Skip to content

Commit

Permalink
Merge branch 'index-verification'
Browse files Browse the repository at this point in the history
  • Loading branch information
Byron committed Jan 31, 2022
2 parents a605b67 + afdeca1 commit ad3c803
Show file tree
Hide file tree
Showing 26 changed files with 532 additions and 130 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ Please see _'Development Status'_ for a listing of all crates and their capabili
* [x] **verify** - assure that a commit-graph is consistent
* **repository**
* **verify** - validate a whole repository, for now only the object database.
* **index**
* [x] **entries** - show detailed entry information for human or machine consumption (via JSON)
* [x] **info** - display general information about the index itself, with detailed extension information by default
* **remote**
* [ref-list](https://asciinema.org/a/359320) - list all (or given) references from a remote at the given URL

Expand Down
3 changes: 2 additions & 1 deletion git-index/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,10 @@ internal-testing-to-avoid-being-run-by-cargo-test-all = []
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
git-features = { version = "^0.19.1", path = "../git-features", features = ["rustsha1"] }
git-features = { version = "^0.19.1", path = "../git-features", features = ["rustsha1", "progress"] }
git-hash = { version = "^0.9.1", path = "../git-hash" }
git-bitmap = { version = "^0.0.1", path = "../git-bitmap" }
git-object = { version = "^0.17.0", path = "../git-object" }

quick-error = "2.0.0"
memmap2 = "0.5.0"
Expand Down
26 changes: 26 additions & 0 deletions git-index/src/access.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
use crate::{extension, Entry, State, Version};

impl State {
pub fn version(&self) -> Version {
self.version
}

pub fn entries(&self) -> &[Entry] {
&self.entries
}
pub fn tree(&self) -> Option<&extension::Tree> {
self.tree.as_ref()
}
pub fn link(&self) -> Option<&extension::Link> {
self.link.as_ref()
}
pub fn resolve_undo(&self) -> Option<&extension::resolve_undo::Paths> {
self.resolve_undo.as_ref()
}
pub fn untracked(&self) -> Option<&extension::UntrackedCache> {
self.untracked.as_ref()
}
pub fn fs_monitor(&self) -> Option<&extension::FsMonitor> {
self.fs_monitor.as_ref()
}
}
24 changes: 24 additions & 0 deletions git-index/src/entry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,9 @@ impl Flags {
#[derive(PartialEq, Eq, Hash, Ord, PartialOrd, Clone, Copy)]
#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
pub struct Time {
/// The amount of seconds elapsed since EPOCH
pub secs: u32,
/// The amount of nanoseconds elapsed in the current second, ranging from 0 to 999.999.999 .
pub nsecs: u32,
}

Expand All @@ -140,5 +142,27 @@ mod access {
pub fn path<'a>(&self, state: &'a State) -> &'a BStr {
(&state.path_backing[self.path.clone()]).as_bstr()
}

pub fn stage(&self) -> u32 {
self.flags.stage()
}
}
}

mod _impls {
use std::cmp::Ordering;

use crate::{Entry, State};

impl Entry {
pub fn cmp(&self, other: &Self, state: &State) -> Ordering {
let lhs = self.path(state);
let rhs = other.path(state);
let common_len = lhs.len().min(rhs.len());
lhs[..common_len]
.cmp(&rhs[..common_len])
.then_with(|| lhs.len().cmp(&rhs.len()))
.then_with(|| self.stage().cmp(&other.stage()))
}
}
}
11 changes: 8 additions & 3 deletions git-index/src/extension/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,11 @@ pub struct Iter<'a> {
/// if there was no change to them. Portions of this tree are invalidated as the index is changed.
pub struct Tree {
pub name: SmallVec<[u8; 23]>,
/// Only set if there are any entries in the index we are associated with.
pub id: Option<tree::NodeId>,
/// The id of the directory tree of the associated tree object.
pub id: git_hash::ObjectId,
/// The amount of non-tree items in this directory tree, including sub-trees, recursively.
/// The value of the top-level tree is thus equal to the value of the total amount of entries.
pub num_entries: u32,
pub children: Vec<Tree>,
}

Expand All @@ -26,6 +29,7 @@ pub struct Link {
pub bitmaps: Option<link::Bitmaps>,
}

#[allow(dead_code)]
pub struct UntrackedCache {
/// Something identifying the location and machine that this cache is for.
/// Should the repository be copied to a different machine, the entire cache can immediately be invalidated.
Expand All @@ -42,6 +46,7 @@ pub struct UntrackedCache {
directories: Vec<untracked_cache::Directory>,
}

#[allow(dead_code)]
pub struct FsMonitor {
token: fs_monitor::Token,
/// if a bit is true, the resepctive entry is NOT valid as per the fs monitor.
Expand All @@ -54,7 +59,7 @@ pub(crate) mod fs_monitor;

pub(crate) mod decode;

pub(crate) mod tree;
pub mod tree;

pub(crate) mod end_of_index_entry;

Expand Down
2 changes: 2 additions & 0 deletions git-index/src/extension/resolve_undo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use crate::{

pub type Paths = Vec<ResolvePath>;

#[allow(dead_code)]
pub struct ResolvePath {
/// relative to the root of the repository, or what would be stored in the index
name: BString,
Expand All @@ -16,6 +17,7 @@ pub struct ResolvePath {
stages: [Option<Stage>; 3],
}

#[allow(dead_code)]
pub struct Stage {
mode: u32,
id: ObjectId,
Expand Down
157 changes: 136 additions & 21 deletions git-index/src/extension/tree.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,136 @@
use std::cmp::Ordering;

use bstr::ByteSlice;
use git_hash::ObjectId;

use crate::{
extension::{Signature, Tree},
util::split_at_byte_exclusive,
util::{split_at_byte_exclusive, split_at_pos},
};

pub const SIGNATURE: Signature = *b"TREE";

pub struct NodeId {
/// The id of the directory tree of the associated tree object.
pub id: git_hash::ObjectId,
/// The amount of non-tree entries contained within, and definitely not zero.
pub entry_count: u32,
pub mod verify {
use bstr::BString;
use quick_error::quick_error;

quick_error! {
#[derive(Debug)]
pub enum Error {
MissingTreeDirectory { parent_id: git_hash::ObjectId, entry_id: git_hash::ObjectId, name: BString } {
display("The entry {} at path '{}' in parent tree {} wasn't found in the nodes children, making it incomplete", entry_id, name, parent_id)
}
TreeNodeNotFound { oid: git_hash::ObjectId } {
display("The tree with id {} wasn't found in the object database", oid)
}
TreeNodeChildcountMismatch { oid: git_hash::ObjectId, expected_childcount: usize, actual_childcount: usize } {
display("The tree with id {} should have {} children, but its cached representation had {} of them", oid, expected_childcount, actual_childcount)
}
RootWithName { name: BString } {
display("The root tree was named '{}', even though it should be empty", name)
}
EntriesCount {actual: u32, expected: u32 } {
display("Expected not more than {} entries to be reachable from the top-level, but actual count was {}", expected, actual)
}
OutOfOrder { parent_id: git_hash::ObjectId, current_path: BString, previous_path: BString } {
display("Parent tree '{}' contained out-of order trees prev = '{}' and next = '{}'", parent_id, previous_path, current_path)
}
}
}
}

impl Tree {
pub fn verify<F>(&self, use_find: bool, mut find: F) -> Result<(), verify::Error>
where
F: for<'a> FnMut(&git_hash::oid, &'a mut Vec<u8>) -> Option<git_object::TreeRefIter<'a>>,
{
fn verify_recursive<F>(
parent_id: git_hash::ObjectId,
children: &[Tree],
mut find_buf: Option<&mut Vec<u8>>,
find: &mut F,
) -> Result<Option<u32>, verify::Error>
where
F: for<'a> FnMut(&git_hash::oid, &'a mut Vec<u8>) -> Option<git_object::TreeRefIter<'a>>,
{
if children.is_empty() {
return Ok(None);
}
let mut entries = 0;
let mut prev = None::<&Tree>;
for child in children {
entries += child.num_entries;
if let Some(prev) = prev {
if prev.name.cmp(&child.name) != Ordering::Less {
return Err(verify::Error::OutOfOrder {
parent_id,
previous_path: prev.name.as_bstr().into(),
current_path: child.name.as_bstr().into(),
});
}
}
prev = Some(child);
}
if let Some(buf) = find_buf.as_mut() {
let tree_entries = find(&parent_id, *buf).ok_or(verify::Error::TreeNodeNotFound { oid: parent_id })?;
let mut num_entries = 0;
for entry in tree_entries
.filter_map(Result::ok)
.filter(|e| e.mode == git_object::tree::EntryMode::Tree)
{
children
.binary_search_by(|e| e.name.as_bstr().cmp(entry.filename))
.map_err(|_| verify::Error::MissingTreeDirectory {
parent_id,
entry_id: entry.oid.to_owned(),
name: entry.filename.to_owned(),
})?;
num_entries += 1;
}

if num_entries != children.len() {
return Err(verify::Error::TreeNodeChildcountMismatch {
oid: parent_id,
expected_childcount: num_entries,
actual_childcount: children.len(),
});
}
}
for child in children {
// This is actually needed here as it's a mut ref, which isn't copy. We do a re-borrow here.
#[allow(clippy::needless_option_as_deref)]
let actual_num_entries = verify_recursive(child.id, &child.children, find_buf.as_deref_mut(), find)?;
if let Some(actual) = actual_num_entries {
if actual > child.num_entries {
return Err(verify::Error::EntriesCount {
actual,
expected: child.num_entries,
});
}
}
}
Ok(entries.into())
}

if !self.name.is_empty() {
return Err(verify::Error::RootWithName {
name: self.name.as_bstr().into(),
});
}

let mut buf = Vec::new();
let declared_entries = verify_recursive(self.id, &self.children, use_find.then(|| &mut buf), &mut find)?;
if let Some(actual) = declared_entries {
if actual > self.num_entries {
return Err(verify::Error::EntriesCount {
actual,
expected: self.num_entries,
});
}
}

Ok(())
}
}

/// A recursive data structure
Expand All @@ -29,32 +148,28 @@ pub fn one_recursive(data: &[u8], hash_len: usize) -> Option<(Tree, &[u8])> {
let (path, data) = split_at_byte_exclusive(data, 0)?;

let (entry_count, data) = split_at_byte_exclusive(data, b' ')?;
let entry_count: u32 = atoi::atoi(entry_count)?;
let num_entries: u32 = atoi::atoi(entry_count)?;

let (subtree_count, mut data) = split_at_byte_exclusive(data, b'\n')?;
let (subtree_count, data) = split_at_byte_exclusive(data, b'\n')?;
let subtree_count: usize = atoi::atoi(subtree_count)?;

let node_id = (entry_count != 0)
.then(|| {
(data.len() >= hash_len).then(|| {
let (hash, rest) = data.split_at(hash_len);
data = rest;
ObjectId::from(hash)
})
})
.flatten()
.map(|id| NodeId { id, entry_count });
let (hash, mut data) = split_at_pos(data, hash_len)?;
let id = ObjectId::from(hash);

let mut subtrees = Vec::with_capacity(subtree_count);
for _ in 0..subtree_count {
let (tree, rest) = one_recursive(data, hash_len)?;
subtrees.push(tree);
match subtrees.binary_search_by(|t: &Tree| t.name.cmp(&tree.name)) {
Ok(_existing_index) => return None,
Err(insert_position) => subtrees.insert(insert_position, tree),
}
data = rest;
}

Some((
Tree {
id: node_id,
id,
num_entries,
name: path.into(),
children: subtrees,
},
Expand All @@ -68,6 +183,6 @@ mod tests {

#[test]
fn size_of_tree() {
assert_eq!(std::mem::size_of::<Tree>(), 88);
assert_eq!(std::mem::size_of::<Tree>(), 80);
}
}
48 changes: 2 additions & 46 deletions git-index/src/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,49 +18,5 @@ mod impls {
}
}

pub mod init {
#![allow(unused)]

use std::path::{Path, PathBuf};

use memmap2::Mmap;

use crate::{decode, extension, File, State};

mod error {
use quick_error::quick_error;

quick_error! {
#[derive(Debug)]
pub enum Error {
Io(err: std::io::Error) {
display("An IO error occurred while opening the index")
source(err)
from()
}
Decode(err: crate::decode::Error) {
display("The file could not be decoded")
source(err)
from()
}
}
}
}
pub use error::Error;

impl File {
pub fn at(path: impl Into<PathBuf>, options: decode::Options) -> Result<Self, Error> {
let path = path.into();
let (data, mtime) = {
// SAFETY: we have to take the risk of somebody changing the file underneath. Git never writes into the same file.
let file = std::fs::File::open(&path)?;
#[allow(unsafe_code)]
let data = unsafe { Mmap::map(&file)? };
(data, filetime::FileTime::from_last_modification_time(&file.metadata()?))
};

let (state, checksum) = State::from_bytes(&data, mtime, options)?;
Ok(File { state, path, checksum })
}
}
}
pub mod init;
pub mod verify;
Loading

0 comments on commit ad3c803

Please sign in to comment.