From 23bdddfdce130886fa0210306ebb8092c7275285 Mon Sep 17 00:00:00 2001 From: Benjamin Saunders Date: Sat, 12 Nov 2022 11:19:03 -0800 Subject: [PATCH] local_backend: use ContentHash rather than hasing protos Insulates identifiers from the unstable serialized form. --- lib/src/backend.rs | 130 +++++++++++++++++++++++++++------------ lib/src/content_hash.rs | 6 ++ lib/src/local_backend.rs | 13 +++- lib/src/repo_path.rs | 8 ++- 4 files changed, 111 insertions(+), 46 deletions(-) diff --git a/lib/src/backend.rs b/lib/src/backend.rs index b3a323adadd..49534cadd4c 100644 --- a/lib/src/backend.rs +++ b/lib/src/backend.rs @@ -20,6 +20,7 @@ use std::vec::Vec; use thiserror::Error; +use crate::content_hash::ContentHash; use crate::repo_path::{RepoPath, RepoPathComponent}; content_hash! { @@ -59,8 +60,10 @@ impl CommitId { } } -#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)] -pub struct ChangeId(Vec); +content_hash! { + #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)] + pub struct ChangeId(Vec); +} impl Debug for ChangeId { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { @@ -94,8 +97,10 @@ impl ChangeId { } } -#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)] -pub struct TreeId(Vec); +content_hash! { + #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)] + pub struct TreeId(Vec); +} impl Debug for TreeId { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { @@ -129,8 +134,10 @@ impl TreeId { } } -#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)] -pub struct FileId(Vec); +content_hash! { + #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)] + pub struct FileId(Vec); +} impl Debug for FileId { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { @@ -160,8 +167,10 @@ impl FileId { } } -#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)] -pub struct SymlinkId(Vec); +content_hash! { + #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)] + pub struct SymlinkId(Vec); +} impl Debug for SymlinkId { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { @@ -191,8 +200,10 @@ impl SymlinkId { } } -#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)] -pub struct ConflictId(Vec); +content_hash! { + #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)] + pub struct ConflictId(Vec); +} impl Debug for ConflictId { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { @@ -256,39 +267,47 @@ impl Timestamp { } } -#[derive(Debug, PartialEq, Eq, Clone)] -pub struct Signature { - pub name: String, - pub email: String, - pub timestamp: Timestamp, +content_hash! { + #[derive(Debug, PartialEq, Eq, Clone)] + pub struct Signature { + pub name: String, + pub email: String, + pub timestamp: Timestamp, + } } -#[derive(Debug, Clone)] -pub struct Commit { - pub parents: Vec, - pub predecessors: Vec, - pub root_tree: TreeId, - pub change_id: ChangeId, - pub description: String, - pub author: Signature, - pub committer: Signature, +content_hash! { + #[derive(Debug, Clone)] + pub struct Commit { + pub parents: Vec, + pub predecessors: Vec, + pub root_tree: TreeId, + pub change_id: ChangeId, + pub description: String, + pub author: Signature, + pub committer: Signature, + } } -#[derive(Debug, PartialEq, Eq, Clone)] -pub struct ConflictPart { - // TODO: Store e.g. CommitId here too? Labels (theirs/ours/base)? Would those still be - // useful e.g. after rebasing this conflict? - pub value: TreeValue, +content_hash! { + #[derive(Debug, PartialEq, Eq, Clone)] + pub struct ConflictPart { + // TODO: Store e.g. CommitId here too? Labels (theirs/ours/base)? Would those still be + // useful e.g. after rebasing this conflict? + pub value: TreeValue, + } } -#[derive(Default, Debug, PartialEq, Eq, Clone)] -pub struct Conflict { - // A conflict is represented by a list of positive and negative states that need to be applied. - // In a simple 3-way merge of B and C with merge base A, the conflict will be { add: [B, C], - // remove: [A] }. Also note that a conflict of the form { add: [A], remove: [] } is the - // same as non-conflict A. - pub removes: Vec, - pub adds: Vec, +content_hash! { + #[derive(Default, Debug, PartialEq, Eq, Clone)] + pub struct Conflict { + // A conflict is represented by a list of positive and negative states that need to be applied. + // In a simple 3-way merge of B and C with merge base A, the conflict will be { add: [B, C], + // remove: [A] }. Also note that a conflict of the form { add: [A], remove: [] } is the + // same as non-conflict A. + pub removes: Vec, + pub adds: Vec, + } } #[derive(Debug, Error, PartialEq, Eq)] @@ -310,6 +329,35 @@ pub enum TreeValue { Conflict(ConflictId), } +impl ContentHash for TreeValue { + fn hash(&self, state: &mut impl digest::Update) { + use TreeValue::*; + match *self { + Normal { ref id, executable } => { + state.update(&0u32.to_le_bytes()); + id.hash(state); + executable.hash(state); + } + Symlink(ref id) => { + state.update(&1u32.to_le_bytes()); + id.hash(state); + } + Tree(ref id) => { + state.update(&2u32.to_le_bytes()); + id.hash(state); + } + GitSubmodule(ref id) => { + state.update(&3u32.to_le_bytes()); + id.hash(state); + } + Conflict(ref id) => { + state.update(&4u32.to_le_bytes()); + id.hash(state); + } + } + } +} + #[derive(Debug, PartialEq, Eq, Clone)] pub struct TreeEntry<'a> { name: &'a RepoPathComponent, @@ -344,9 +392,11 @@ impl<'a> Iterator for TreeEntriesNonRecursiveIterator<'a> { } } -#[derive(Default, Debug, Clone)] -pub struct Tree { - entries: BTreeMap, +content_hash! { + #[derive(Default, Debug, Clone)] + pub struct Tree { + entries: BTreeMap, + } } impl Tree { diff --git a/lib/src/content_hash.rs b/lib/src/content_hash.rs index d0bc460a663..f86ac6e5c01 100644 --- a/lib/src/content_hash.rs +++ b/lib/src/content_hash.rs @@ -11,6 +11,12 @@ pub trait ContentHash { fn hash(&self, state: &mut impl digest::Update); } +impl ContentHash for bool { + fn hash(&self, state: &mut impl digest::Update) { + u8::from(*self).hash(state); + } +} + impl ContentHash for u8 { fn hash(&self, state: &mut impl digest::Update) { state.update(&[*self]); diff --git a/lib/src/local_backend.rs b/lib/src/local_backend.rs index 73b3159a9be..ec0ed98bbc1 100644 --- a/lib/src/local_backend.rs +++ b/lib/src/local_backend.rs @@ -27,6 +27,7 @@ use crate::backend::{ ConflictId, ConflictPart, FileId, MillisSinceEpoch, Signature, SymlinkId, Timestamp, Tree, TreeId, TreeValue, }; +use crate::content_hash::ContentHash; use crate::file_util::persist_content_addressed_temp_file; use crate::repo_path::{RepoPath, RepoPathComponent}; @@ -197,7 +198,7 @@ impl Backend for LocalBackend { temp_file.as_file().write_all(&proto_bytes)?; - let id = TreeId::new(Blake2b512::digest(&proto_bytes).to_vec()); + let id = TreeId::new(hash(tree).to_vec()); persist_content_addressed_temp_file(temp_file, self.tree_path(&id))?; Ok(id) @@ -220,7 +221,7 @@ impl Backend for LocalBackend { temp_file.as_file().write_all(&proto_bytes)?; - let id = ConflictId::new(Blake2b512::digest(&proto_bytes).to_vec()); + let id = ConflictId::new(hash(conflict).to_vec()); persist_content_addressed_temp_file(temp_file, self.conflict_path(&id))?; Ok(id) @@ -247,7 +248,7 @@ impl Backend for LocalBackend { temp_file.as_file().write_all(&proto_bytes)?; - let id = CommitId::new(Blake2b512::digest(&proto_bytes).to_vec()); + let id = CommitId::new(hash(commit).to_vec()); persist_content_addressed_temp_file(temp_file, self.commit_path(&id))?; Ok(id) @@ -412,3 +413,9 @@ fn conflict_part_to_proto(part: &ConflictPart) -> crate::protos::store::conflict proto.content = MessageField::some(tree_value_to_proto(&part.value)); proto } + +fn hash(x: &impl ContentHash) -> digest::Output { + let mut hasher = Blake2b512::default(); + x.hash(&mut hasher); + hasher.finalize() +} diff --git a/lib/src/repo_path.rs b/lib/src/repo_path.rs index 81332fedf1e..81cf2ba589c 100644 --- a/lib/src/repo_path.rs +++ b/lib/src/repo_path.rs @@ -20,9 +20,11 @@ use thiserror::Error; use crate::file_util; -#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)] -pub struct RepoPathComponent { - value: String, +content_hash! { + #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)] + pub struct RepoPathComponent { + value: String, + } } impl RepoPathComponent {