diff --git a/Cargo.lock b/Cargo.lock index 01269091dd..56dbf7ba89 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -468,9 +468,9 @@ checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" [[package]] name = "digest" -version = "0.10.3" +version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2fb860ca6fafa5552fb6d0e816a69c8e49f0908bf524e30a90d97c85892d506" +checksum = "adfbc57365a37acbd2ebf2b64d7e69bb766e2fea813521ed536f5d0520dcf86c" dependencies = [ "block-buffer", "crypto-common", @@ -754,6 +754,7 @@ dependencies = [ "bytes", "chrono", "config", + "digest", "git2", "hex", "insta", diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 5e3365b142..026ac561fc 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -24,6 +24,7 @@ bytes = "1.2.1" byteorder = "1.4.3" chrono = { version = "0.4.22", default-features = false, features = ["std", "clock"] } config = { version = "0.13.2", default-features = false, features = ["toml"] } +digest = "0.10.5" git2 = "0.15.0" hex = "0.4.3" itertools = "0.10.5" diff --git a/lib/src/backend.rs b/lib/src/backend.rs index 08cd46fbed..49534cadd4 100644 --- a/lib/src/backend.rs +++ b/lib/src/backend.rs @@ -20,10 +20,13 @@ use std::vec::Vec; use thiserror::Error; +use crate::content_hash::ContentHash; use crate::repo_path::{RepoPath, RepoPathComponent}; -#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)] -pub struct CommitId(Vec); +content_hash! { + #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)] + pub struct CommitId(Vec); +} impl Debug for CommitId { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { @@ -57,8 +60,10 @@ impl CommitId { } } -#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)] -pub struct ChangeId(Vec); +content_hash! { + #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)] + pub struct ChangeId(Vec); +} impl Debug for ChangeId { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { @@ -92,8 +97,10 @@ impl ChangeId { } } -#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)] -pub struct TreeId(Vec); +content_hash! { + #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)] + pub struct TreeId(Vec); +} impl Debug for TreeId { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { @@ -127,8 +134,10 @@ impl TreeId { } } -#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)] -pub struct FileId(Vec); +content_hash! { + #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)] + pub struct FileId(Vec); +} impl Debug for FileId { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { @@ -158,8 +167,10 @@ impl FileId { } } -#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)] -pub struct SymlinkId(Vec); +content_hash! { + #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)] + pub struct SymlinkId(Vec); +} impl Debug for SymlinkId { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { @@ -189,8 +200,10 @@ impl SymlinkId { } } -#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)] -pub struct ConflictId(Vec); +content_hash! { + #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)] + pub struct ConflictId(Vec); +} impl Debug for ConflictId { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { @@ -225,14 +238,18 @@ pub enum Phase { Draft, } -#[derive(Debug, PartialEq, Eq, Clone, PartialOrd, Ord)] -pub struct MillisSinceEpoch(pub i64); +content_hash! { + #[derive(Debug, PartialEq, Eq, Clone, PartialOrd, Ord)] + pub struct MillisSinceEpoch(pub i64); +} -#[derive(Debug, PartialEq, Eq, Clone, PartialOrd, Ord)] -pub struct Timestamp { - pub timestamp: MillisSinceEpoch, - // time zone offset in minutes - pub tz_offset: i32, +content_hash! { + #[derive(Debug, PartialEq, Eq, Clone, PartialOrd, Ord)] + pub struct Timestamp { + pub timestamp: MillisSinceEpoch, + // time zone offset in minutes + pub tz_offset: i32, + } } impl Timestamp { @@ -250,39 +267,47 @@ impl Timestamp { } } -#[derive(Debug, PartialEq, Eq, Clone)] -pub struct Signature { - pub name: String, - pub email: String, - pub timestamp: Timestamp, +content_hash! { + #[derive(Debug, PartialEq, Eq, Clone)] + pub struct Signature { + pub name: String, + pub email: String, + pub timestamp: Timestamp, + } } -#[derive(Debug, Clone)] -pub struct Commit { - pub parents: Vec, - pub predecessors: Vec, - pub root_tree: TreeId, - pub change_id: ChangeId, - pub description: String, - pub author: Signature, - pub committer: Signature, +content_hash! { + #[derive(Debug, Clone)] + pub struct Commit { + pub parents: Vec, + pub predecessors: Vec, + pub root_tree: TreeId, + pub change_id: ChangeId, + pub description: String, + pub author: Signature, + pub committer: Signature, + } } -#[derive(Debug, PartialEq, Eq, Clone)] -pub struct ConflictPart { - // TODO: Store e.g. CommitId here too? Labels (theirs/ours/base)? Would those still be - // useful e.g. after rebasing this conflict? - pub value: TreeValue, +content_hash! { + #[derive(Debug, PartialEq, Eq, Clone)] + pub struct ConflictPart { + // TODO: Store e.g. CommitId here too? Labels (theirs/ours/base)? Would those still be + // useful e.g. after rebasing this conflict? + pub value: TreeValue, + } } -#[derive(Default, Debug, PartialEq, Eq, Clone)] -pub struct Conflict { - // A conflict is represented by a list of positive and negative states that need to be applied. - // In a simple 3-way merge of B and C with merge base A, the conflict will be { add: [B, C], - // remove: [A] }. Also note that a conflict of the form { add: [A], remove: [] } is the - // same as non-conflict A. - pub removes: Vec, - pub adds: Vec, +content_hash! { + #[derive(Default, Debug, PartialEq, Eq, Clone)] + pub struct Conflict { + // A conflict is represented by a list of positive and negative states that need to be applied. + // In a simple 3-way merge of B and C with merge base A, the conflict will be { add: [B, C], + // remove: [A] }. Also note that a conflict of the form { add: [A], remove: [] } is the + // same as non-conflict A. + pub removes: Vec, + pub adds: Vec, + } } #[derive(Debug, Error, PartialEq, Eq)] @@ -304,6 +329,35 @@ pub enum TreeValue { Conflict(ConflictId), } +impl ContentHash for TreeValue { + fn hash(&self, state: &mut impl digest::Update) { + use TreeValue::*; + match *self { + Normal { ref id, executable } => { + state.update(&0u32.to_le_bytes()); + id.hash(state); + executable.hash(state); + } + Symlink(ref id) => { + state.update(&1u32.to_le_bytes()); + id.hash(state); + } + Tree(ref id) => { + state.update(&2u32.to_le_bytes()); + id.hash(state); + } + GitSubmodule(ref id) => { + state.update(&3u32.to_le_bytes()); + id.hash(state); + } + Conflict(ref id) => { + state.update(&4u32.to_le_bytes()); + id.hash(state); + } + } + } +} + #[derive(Debug, PartialEq, Eq, Clone)] pub struct TreeEntry<'a> { name: &'a RepoPathComponent, @@ -338,9 +392,11 @@ impl<'a> Iterator for TreeEntriesNonRecursiveIterator<'a> { } } -#[derive(Default, Debug, Clone)] -pub struct Tree { - entries: BTreeMap, +content_hash! { + #[derive(Default, Debug, Clone)] + pub struct Tree { + entries: BTreeMap, + } } impl Tree { diff --git a/lib/src/content_hash.rs b/lib/src/content_hash.rs new file mode 100644 index 0000000000..a727e4305a --- /dev/null +++ b/lib/src/content_hash.rs @@ -0,0 +1,227 @@ +use itertools::Itertools as _; + +/// Portable, stable hashing suitable for identifying values +/// +/// Variable-length sequences should hash a 64-bit little-endian representation +/// of their length, then their elements in order. Unordered containers should +/// order their elements according to their `Ord` implementation. Enums should +/// hash a 32-bit little-endian encoding of the ordinal number of the enum +/// variant, then the variant's fields in lexical order. +pub trait ContentHash { + fn hash(&self, state: &mut impl digest::Update); +} + +impl ContentHash for () { + fn hash(&self, _: &mut impl digest::Update) {} +} + +impl ContentHash for bool { + fn hash(&self, state: &mut impl digest::Update) { + u8::from(*self).hash(state); + } +} + +impl ContentHash for u8 { + fn hash(&self, state: &mut impl digest::Update) { + state.update(&[*self]); + } +} + +impl ContentHash for i32 { + fn hash(&self, state: &mut impl digest::Update) { + state.update(&self.to_le_bytes()); + } +} + +impl ContentHash for i64 { + fn hash(&self, state: &mut impl digest::Update) { + state.update(&self.to_le_bytes()); + } +} + +// TODO: Specialize for [u8] once specialization exists +impl ContentHash for [T] { + fn hash(&self, state: &mut impl digest::Update) { + state.update(&(self.len() as u64).to_le_bytes()); + for x in self { + x.hash(state); + } + } +} + +impl ContentHash for Vec { + fn hash(&self, state: &mut impl digest::Update) { + self.as_slice().hash(state) + } +} + +impl ContentHash for String { + fn hash(&self, state: &mut impl digest::Update) { + self.as_bytes().hash(state); + } +} + +impl ContentHash for Option { + fn hash(&self, state: &mut impl digest::Update) { + match *self { + None => state.update(&[0]), + Some(ref x) => { + state.update(&[1]); + x.hash(state) + } + } + } +} + +impl ContentHash for std::collections::HashMap +where + K: ContentHash + Ord, + V: ContentHash, +{ + fn hash(&self, state: &mut impl digest::Update) { + state.update(&(self.len() as u64).to_le_bytes()); + let mut kv = self.iter().collect_vec(); + kv.sort_unstable_by_key(|&(k, _)| k); + for (k, v) in kv { + k.hash(state); + v.hash(state); + } + } +} + +impl ContentHash for std::collections::HashSet +where + K: ContentHash + Ord, +{ + fn hash(&self, state: &mut impl digest::Update) { + state.update(&(self.len() as u64).to_le_bytes()); + for k in self.iter().sorted() { + k.hash(state); + } + } +} + +impl ContentHash for std::collections::BTreeMap +where + K: ContentHash, + V: ContentHash, +{ + fn hash(&self, state: &mut impl digest::Update) { + state.update(&(self.len() as u64).to_le_bytes()); + for (k, v) in self.iter() { + k.hash(state); + v.hash(state); + } + } +} + +macro_rules! content_hash { + ($(#[$meta:meta])* $vis:vis struct $name:ident { + $($(#[$field_meta:meta])* $field_vis:vis $field:ident : $ty:ty),* $(,)? + }) => { + $(#[$meta])* + $vis struct $name { + $($(#[$field_meta])* $field_vis $field : $ty),* + } + + impl crate::content_hash::ContentHash for $name { + fn hash(&self, state: &mut impl digest::Update) { + $(<$ty as crate::content_hash::ContentHash>::hash(&self.$field, state);)* + } + } + }; + ($(#[$meta:meta])* $vis:vis struct $name:ident($field_vis:vis $ty:ty);) => { + $(#[$meta])* + $vis struct $name($field_vis $ty); + + impl crate::content_hash::ContentHash for $name { + fn hash(&self, state: &mut impl digest::Update) { + <$ty as crate::content_hash::ContentHash>::hash(&self.0, state); + } + } + }; +} + +#[cfg(test)] +mod tests { + use std::collections::{BTreeMap, HashMap}; + + use blake2::{Blake2b512, Digest}; + + use super::*; + + #[test] + fn test_string_sanity() { + let a = "a".to_string(); + let b = "b".to_string(); + assert_eq!(hash(&a), hash(&a.clone())); + assert_ne!(hash(&a), hash(&b)); + assert_ne!(hash(&"a".to_string()), hash(&"a\0".to_string())); + } + + #[test] + fn test_hash_map_key_value_distinction() { + let a = [("ab".to_string(), "cd".to_string())] + .into_iter() + .collect::>(); + let b = [("a".to_string(), "bcd".to_string())] + .into_iter() + .collect::>(); + + assert_ne!(hash(&a), hash(&b)); + } + + #[test] + fn test_btree_map_key_value_distinction() { + let a = [("ab".to_string(), "cd".to_string())] + .into_iter() + .collect::>(); + let b = [("a".to_string(), "bcd".to_string())] + .into_iter() + .collect::>(); + + assert_ne!(hash(&a), hash(&b)); + } + + #[test] + fn test_struct_sanity() { + content_hash! { + struct Foo { x: i32 } + } + assert_ne!(hash(&Foo { x: 42 }), hash(&Foo { x: 12 })); + } + + #[test] + fn test_option_sanity() { + assert_ne!(hash(&Some(42)), hash(&42)); + assert_ne!(hash(&None::), hash(&42i32)); + } + + #[test] + fn test_slice_sanity() { + assert_ne!(hash(&[42i32][..]), hash(&[12i32][..])); + assert_ne!(hash(&([] as [i32; 0])[..]), hash(&[42i32][..])); + assert_ne!(hash(&([] as [i32; 0])[..]), hash(&())); + assert_ne!(hash(&42i32), hash(&[42i32][..])); + } + + #[test] + fn test_consistent_hashing() { + content_hash! { + struct Foo { x: Vec>, y: i64 } + } + insta::assert_snapshot!( + hex::encode(hash(&Foo { + x: vec![None, Some(42)], + y: 17 + })), + @"14e42ea3d680bc815d0cea8ac20d3e872120014fb7bba8d82c3ffa7a8e6d63c41ef9631c60b73b150e3dd72efe50e8b0248321fe2b7eea09d879f3757b879372" + ); + } + + fn hash(x: &(impl ContentHash + ?Sized)) -> digest::Output { + let mut hasher = Blake2b512::default(); + x.hash(&mut hasher); + hasher.finalize() + } +} diff --git a/lib/src/lib.rs b/lib/src/lib.rs index a705396763..612285aa5e 100644 --- a/lib/src/lib.rs +++ b/lib/src/lib.rs @@ -14,6 +14,9 @@ #![deny(unused_must_use)] +#[macro_use] +mod content_hash; + pub mod backend; pub mod commit; pub mod commit_builder; diff --git a/lib/src/local_backend.rs b/lib/src/local_backend.rs index 73b3159a9b..353bf0721f 100644 --- a/lib/src/local_backend.rs +++ b/lib/src/local_backend.rs @@ -27,6 +27,7 @@ use crate::backend::{ ConflictId, ConflictPart, FileId, MillisSinceEpoch, Signature, SymlinkId, Timestamp, Tree, TreeId, TreeValue, }; +use crate::content_hash::ContentHash; use crate::file_util::persist_content_addressed_temp_file; use crate::repo_path::{RepoPath, RepoPathComponent}; @@ -72,7 +73,7 @@ impl LocalBackend { pub fn load(store_path: &Path) -> Self { let root_commit_id = CommitId::from_bytes(&[0; 64]); - let empty_tree_id = TreeId::from_hex("786a02f742015903c6c6fd852552d272912f4740e15847618a86e217f71f5419d25e1031afee585313896444934eb04b903a685b1448b755d56f701afe9be2ce"); + let empty_tree_id = TreeId::from_hex("482ae5a29fbe856c7272f2071b8b0f0359ee2d89ff392b8a900643fbd0836eccd067b8bf41909e206c90d45d6e7d8b6686b93ecaee5fe1a9060d87b672101310"); LocalBackend { path: store_path.to_path_buf(), root_commit_id, @@ -192,12 +193,9 @@ impl Backend for LocalBackend { let temp_file = NamedTempFile::new_in(&self.path)?; let proto = tree_to_proto(tree); - let mut proto_bytes: Vec = Vec::new(); - proto.write_to_writer(&mut proto_bytes)?; + proto.write_to_writer(&mut temp_file.as_file())?; - temp_file.as_file().write_all(&proto_bytes)?; - - let id = TreeId::new(Blake2b512::digest(&proto_bytes).to_vec()); + let id = TreeId::new(hash(tree).to_vec()); persist_content_addressed_temp_file(temp_file, self.tree_path(&id))?; Ok(id) @@ -215,12 +213,9 @@ impl Backend for LocalBackend { let temp_file = NamedTempFile::new_in(&self.path)?; let proto = conflict_to_proto(conflict); - let mut proto_bytes: Vec = Vec::new(); - proto.write_to_writer(&mut proto_bytes)?; - - temp_file.as_file().write_all(&proto_bytes)?; + proto.write_to_writer(&mut temp_file.as_file())?; - let id = ConflictId::new(Blake2b512::digest(&proto_bytes).to_vec()); + let id = ConflictId::new(hash(conflict).to_vec()); persist_content_addressed_temp_file(temp_file, self.conflict_path(&id))?; Ok(id) @@ -242,12 +237,9 @@ impl Backend for LocalBackend { let temp_file = NamedTempFile::new_in(&self.path)?; let proto = commit_to_proto(commit); - let mut proto_bytes: Vec = Vec::new(); - proto.write_to_writer(&mut proto_bytes)?; - - temp_file.as_file().write_all(&proto_bytes)?; + proto.write_to_writer(&mut temp_file.as_file())?; - let id = CommitId::new(Blake2b512::digest(&proto_bytes).to_vec()); + let id = CommitId::new(hash(commit).to_vec()); persist_content_addressed_temp_file(temp_file, self.commit_path(&id))?; Ok(id) @@ -412,3 +404,9 @@ fn conflict_part_to_proto(part: &ConflictPart) -> crate::protos::store::conflict proto.content = MessageField::some(tree_value_to_proto(&part.value)); proto } + +fn hash(x: &impl ContentHash) -> digest::Output { + let mut hasher = Blake2b512::default(); + x.hash(&mut hasher); + hasher.finalize() +} diff --git a/lib/src/op_store.rs b/lib/src/op_store.rs index 3d74c7f555..1e7387fe6c 100644 --- a/lib/src/op_store.rs +++ b/lib/src/op_store.rs @@ -18,9 +18,12 @@ use std::fmt::{Debug, Error, Formatter}; use thiserror::Error; use crate::backend::{CommitId, Timestamp}; +use crate::content_hash::ContentHash; -#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)] -pub struct WorkspaceId(String); +content_hash! { + #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)] + pub struct WorkspaceId(String); +} impl Debug for WorkspaceId { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { @@ -44,8 +47,10 @@ impl WorkspaceId { } } -#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)] -pub struct ViewId(Vec); +content_hash! { + #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)] + pub struct ViewId(Vec); +} impl Debug for ViewId { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { @@ -75,8 +80,10 @@ impl ViewId { } } -#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)] -pub struct OperationId(Vec); +content_hash! { + #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)] + pub struct OperationId(Vec); +} impl Debug for OperationId { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { @@ -115,6 +122,26 @@ pub enum RefTarget { }, } +impl ContentHash for RefTarget { + fn hash(&self, state: &mut impl digest::Update) { + use RefTarget::*; + match *self { + Normal(ref id) => { + state.update(&0u32.to_le_bytes()); + id.hash(state); + } + Conflict { + ref removes, + ref adds, + } => { + state.update(&1u32.to_le_bytes()); + removes.hash(state); + adds.hash(state); + } + } + } +} + impl RefTarget { pub fn is_conflict(&self) -> bool { matches!(self, RefTarget::Conflict { .. }) @@ -146,67 +173,75 @@ impl RefTarget { } } -#[derive(Default, PartialEq, Eq, Clone, Debug)] -pub struct BranchTarget { - /// The commit the branch points to locally. `None` if the branch has been - /// deleted locally. - pub local_target: Option, - // TODO: Do we need to support tombstones for remote branches? For example, if the branch - // has been deleted locally and you pull from a remote, maybe it should make a difference - // whether the branch is known to have existed on the remote. We may not want to resurrect - // the branch if the branch's state on the remote was just not known. - pub remote_targets: BTreeMap, -} - -/// Represents the way the repo looks at a given time, just like how a Tree -/// object represents how the file system looks at a given time. -#[derive(PartialEq, Eq, Clone, Debug, Default)] -pub struct View { - /// All head commits - pub head_ids: HashSet, - /// Heads of the set of public commits. - pub public_head_ids: HashSet, - pub branches: BTreeMap, - pub tags: BTreeMap, - pub git_refs: BTreeMap, - /// The commit the Git HEAD points to. - // TODO: Support multiple Git worktrees? - // TODO: Do we want to store the current branch name too? - pub git_head: Option, - // The commit that *should be* checked out in the workspace. Note that the working copy - // (.jj/working_copy/) has the source of truth about which commit *is* checked out (to be - // precise: the commit to which we most recently completed an update to). - pub wc_commit_ids: HashMap, -} - -/// Represents an operation (transaction) on the repo view, just like how a -/// Commit object represents an operation on the tree. -/// -/// Operations and views are not meant to be exchanged between repos or users; -/// they represent local state and history. -/// -/// The operation history will almost always be linear. It will only have -/// forks when parallel operations occurred. The parent is determined when -/// the transaction starts. When the transaction commits, a lock will be -/// taken and it will be checked that the current head of the operation -/// graph is unchanged. If the current head has changed, there has been -/// concurrent operation. -#[derive(PartialEq, Eq, Clone, Debug)] -pub struct Operation { - pub view_id: ViewId, - pub parents: Vec, - pub metadata: OperationMetadata, +content_hash! { + #[derive(Default, PartialEq, Eq, Clone, Debug)] + pub struct BranchTarget { + /// The commit the branch points to locally. `None` if the branch has been + /// deleted locally. + pub local_target: Option, + // TODO: Do we need to support tombstones for remote branches? For example, if the branch + // has been deleted locally and you pull from a remote, maybe it should make a difference + // whether the branch is known to have existed on the remote. We may not want to resurrect + // the branch if the branch's state on the remote was just not known. + pub remote_targets: BTreeMap, + } } -#[derive(PartialEq, Eq, Clone, Debug)] -pub struct OperationMetadata { - pub start_time: Timestamp, - pub end_time: Timestamp, - // Whatever is useful to the user, such as exact command line call - pub description: String, - pub hostname: String, - pub username: String, - pub tags: HashMap, +content_hash! { + /// Represents the way the repo looks at a given time, just like how a Tree + /// object represents how the file system looks at a given time. + #[derive(PartialEq, Eq, Clone, Debug, Default)] + pub struct View { + /// All head commits + pub head_ids: HashSet, + /// Heads of the set of public commits. + pub public_head_ids: HashSet, + pub branches: BTreeMap, + pub tags: BTreeMap, + pub git_refs: BTreeMap, + /// The commit the Git HEAD points to. + // TODO: Support multiple Git worktrees? + // TODO: Do we want to store the current branch name too? + pub git_head: Option, + // The commit that *should be* checked out in the workspace. Note that the working copy + // (.jj/working_copy/) has the source of truth about which commit *is* checked out (to be + // precise: the commit to which we most recently completed an update to). + pub wc_commit_ids: HashMap, + } +} + +content_hash! { + /// Represents an operation (transaction) on the repo view, just like how a + /// Commit object represents an operation on the tree. + /// + /// Operations and views are not meant to be exchanged between repos or users; + /// they represent local state and history. + /// + /// The operation history will almost always be linear. It will only have + /// forks when parallel operations occurred. The parent is determined when + /// the transaction starts. When the transaction commits, a lock will be + /// taken and it will be checked that the current head of the operation + /// graph is unchanged. If the current head has changed, there has been + /// concurrent operation. + #[derive(PartialEq, Eq, Clone, Debug)] + pub struct Operation { + pub view_id: ViewId, + pub parents: Vec, + pub metadata: OperationMetadata, + } +} + +content_hash! { + #[derive(PartialEq, Eq, Clone, Debug)] + pub struct OperationMetadata { + pub start_time: Timestamp, + pub end_time: Timestamp, + // Whatever is useful to the user, such as exact command line call + pub description: String, + pub hostname: String, + pub username: String, + pub tags: HashMap, + } } impl OperationMetadata { diff --git a/lib/src/repo_path.rs b/lib/src/repo_path.rs index 81332fedf1..81cf2ba589 100644 --- a/lib/src/repo_path.rs +++ b/lib/src/repo_path.rs @@ -20,9 +20,11 @@ use thiserror::Error; use crate::file_util; -#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)] -pub struct RepoPathComponent { - value: String, +content_hash! { + #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)] + pub struct RepoPathComponent { + value: String, + } } impl RepoPathComponent { diff --git a/lib/src/simple_op_store.rs b/lib/src/simple_op_store.rs index 372b19cff5..54a950184e 100644 --- a/lib/src/simple_op_store.rs +++ b/lib/src/simple_op_store.rs @@ -16,15 +16,16 @@ use std::collections::BTreeMap; use std::fmt::Debug; use std::fs; use std::fs::File; -use std::io::{ErrorKind, Write}; +use std::io::ErrorKind; use std::path::PathBuf; -use blake2::{Blake2b512, Digest}; +use blake2::Blake2b512; use itertools::Itertools; use protobuf::{Message, MessageField}; use tempfile::{NamedTempFile, PersistError}; use crate::backend::{CommitId, MillisSinceEpoch, Timestamp}; +use crate::content_hash::ContentHash; use crate::file_util::persist_content_addressed_temp_file; use crate::op_store::{ BranchTarget, OpStore, OpStoreError, OpStoreResult, Operation, OperationId, OperationMetadata, @@ -95,12 +96,9 @@ impl OpStore for SimpleOpStore { let temp_file = NamedTempFile::new_in(&self.path)?; let proto = view_to_proto(view); - let mut proto_bytes: Vec = Vec::new(); - proto.write_to_writer(&mut proto_bytes)?; + proto.write_to_writer(&mut temp_file.as_file())?; - temp_file.as_file().write_all(&proto_bytes)?; - - let id = ViewId::new(Blake2b512::digest(&proto_bytes).to_vec()); + let id = ViewId::new(hash(view).to_vec()); persist_content_addressed_temp_file(temp_file, self.view_path(&id))?; Ok(id) @@ -118,12 +116,9 @@ impl OpStore for SimpleOpStore { let temp_file = NamedTempFile::new_in(&self.path)?; let proto = operation_to_proto(operation); - let mut proto_bytes: Vec = Vec::new(); - proto.write_to_writer(&mut proto_bytes)?; - - temp_file.as_file().write_all(&proto_bytes)?; + proto.write_to_writer(&mut temp_file.as_file())?; - let id = OperationId::new(Blake2b512::digest(&proto_bytes).to_vec()); + let id = OperationId::new(hash(operation).to_vec()); persist_content_addressed_temp_file(temp_file, self.operation_path(&id))?; Ok(id) @@ -363,6 +358,13 @@ fn ref_target_from_proto(proto: &crate::protos::op_store::RefTarget) -> RefTarge } } +fn hash(x: &impl ContentHash) -> digest::Output { + use digest::Digest; + let mut hasher = Blake2b512::default(); + x.hash(&mut hasher); + hasher.finalize() +} + #[cfg(test)] mod tests { use maplit::{btreemap, hashmap, hashset};