Skip to content

Commit

Permalink
prepare full 'verify' implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
Byron committed Jul 28, 2020
1 parent 0a33b24 commit ee45c7f
Show file tree
Hide file tree
Showing 6 changed files with 68 additions and 28 deletions.
4 changes: 4 additions & 0 deletions git-odb/src/pack/data/decode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,10 @@ impl File {
// FIXME(Performance) If delta-chains are uneven, we know we will have to copy bytes over here
// Instead we could use a different start buffer, to naturally end up with the result in the
// right one.
// However, this is a bit more complicated than just that - you have to deal with the base
// object, which should also be placed in the second buffer right away. You don't have that
// control/knowledge for out-of-pack bases, so this is a special case to deal with, too.
// Maybe these invariants can be represented in the type system though.
if chain.len() % 2 == 1 {
// this seems inverted, but remember: we swapped the buffers on the last iteration
target_buf[..last_result_size].copy_from_slice(&source_buf[..last_result_size]);
Expand Down
64 changes: 42 additions & 22 deletions gitoxide-core/src/pack/explode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,7 @@ use anyhow::{anyhow, Result};
use git_features::progress::{self, Progress};
use git_object::{owned, HashKind};
use git_odb::{loose, pack, Write};
use std::{
fs,
io::{self, Read},
path::{Path, PathBuf},
};
use std::{fs, io::Read, path::Path};

#[derive(PartialEq, Debug)]
pub enum SafetyCheck {
Expand Down Expand Up @@ -79,8 +75,11 @@ quick_error! {
ObjectEncodeMismatch(kind: git_object::Kind, actual: owned::Id, expected: owned::Id) {
display("{} object {} wasn't re-encoded without change - new hash is {}", kind, expected, actual)
}
RemoveFile(err: io::Error, index: PathBuf, data: PathBuf) {
display("Failed to delete pack index file at '{} or data file at '{}'", index.display(), data.display())
WrittenFileMissing(id: owned::Id) {
display("The recently written file for loose object {} could not be found", id)
}
WrittenFileCorrupt(err: loose::db::locate::Error, id: owned::Id) {
display("The recently written file for loose object {} cold not be read", id)
source(err)
}
}
Expand Down Expand Up @@ -124,10 +123,12 @@ impl OutputWriter {
}
}

#[derive(Default)]
pub struct Context {
pub thread_limit: Option<usize>,
pub delete_pack: bool,
pub sink_compress: bool,
pub verify: bool,
}

pub fn pack_or_pack_index<P>(
Expand All @@ -139,6 +140,7 @@ pub fn pack_or_pack_index<P>(
thread_limit,
delete_pack,
sink_compress,
verify,
}: Context,
) -> Result<()>
where
Expand Down Expand Up @@ -183,22 +185,34 @@ where
{
let object_path = object_path.map(|p| p.as_ref().to_owned());
move || {
let out = OutputWriter::new(object_path.clone(), sink_compress);
move |object_kind, buf, index_entry, _entry_stats, progress| {
let written_id = out
.write_buf(object_kind, buf, HashKind::Sha1)
.map_err(|err| Error::Write(Box::new(err) as Box<dyn std::error::Error + Send + Sync>, object_kind, index_entry.oid))
.map_err(|err| Box::new(err) as Box<dyn std::error::Error + Send + Sync>)?;
if written_id != index_entry.oid {
if let git_object::Kind::Tree = object_kind {
progress.info(format!("The tree in pack named {} was written as {} due to modes 100664 and 100640 rewritten as 100644.", index_entry.oid, written_id));
} else {
return Err(Box::new(Error::ObjectEncodeMismatch(object_kind, index_entry.oid, written_id)))
}
let out = OutputWriter::new(object_path.clone(), sink_compress);
let object_verifier = if verify {
object_path.as_ref().map(|p| loose::Db::at(p))
} else {
None
};
move |object_kind, buf, index_entry, _entry_stats, progress| {
let written_id = out
.write_buf(object_kind, buf, HashKind::Sha1)
.map_err(|err| Error::Write(Box::new(err) as Box<dyn std::error::Error + Send + Sync>, object_kind, index_entry.oid))
.map_err(|err| Box::new(err) as Box<dyn std::error::Error + Send + Sync>)?;
if written_id != index_entry.oid {
if let git_object::Kind::Tree = object_kind {
progress.info(format!("The tree in pack named {} was written as {} due to modes 100664 and 100640 rewritten as 100644.", index_entry.oid, written_id));
} else {
return Err(Box::new(Error::ObjectEncodeMismatch(object_kind, index_entry.oid, written_id)))
}
}
if let Some(verifier) = object_verifier.as_ref() {
let obj = verifier.locate(written_id.to_borrowed())
.ok_or_else(|| Error::WrittenFileMissing(written_id))?
.map_err(|err| Error::WrittenFileCorrupt(err, written_id))?;
// obj.checksum_matches(written_id);
}
Ok(())
}
Ok(())
}
}},
},
pack::cache::DecodeEntryLRU::default,
).map(|(_,_,c)|progress::DoOrDiscard::from(c)).with_context(|| "Failed to explode the entire pack - some loose objects may have been created nonetheless")?;

Expand All @@ -208,7 +222,13 @@ where
if delete_pack {
fs::remove_file(&index_path)
.and_then(|_| fs::remove_file(&data_path))
.map_err(|err| Error::RemoveFile(err, index_path.clone(), data_path.clone()))?;
.with_context(|| {
format!(
"Failed to delete pack index file at '{} or data file at '{}'",
index_path.display(),
data_path.display()
)
})?;
progress.info(format!(
"Removed '{}' and '{}'",
index_path.display(),
Expand Down
8 changes: 8 additions & 0 deletions src/plumbing/lean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,12 @@ mod options {
#[derive(FromArgs, PartialEq, Debug)]
#[argh(subcommand, name = "pack-explode")]
pub struct PackExplode {
#[argh(switch)]
/// read written objects back and assert they match their source. Fail the operation otherwise.
///
/// Only relevant if an object directory is set.
pub verify: bool,

/// delete the pack and index file after the operation is successful
#[argh(switch)]
pub delete_pack: bool,
Expand Down Expand Up @@ -147,6 +153,7 @@ pub fn main() -> Result<()> {
sink_compress,
object_path,
verbose,
verify,
check,
delete_pack,
}) => {
Expand All @@ -160,6 +167,7 @@ pub fn main() -> Result<()> {
thread_limit,
delete_pack,
sink_compress,
verify,
},
)
}
Expand Down
12 changes: 10 additions & 2 deletions src/plumbing/pretty.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,13 @@ mod options {
/// Verify the integrity of a pack or index file
#[structopt(setting = AppSettings::ColoredHelp)]
PackExplode {
/// Delete the pack and index file after the operation is successful
#[structopt(long, requires("object_path"))]
/// Read written objects back and assert they match their source. Fail the operation otherwise.
///
/// Only relevant if an object directory is set.
verify: bool,

/// delete the pack and index file after the operation is successful
#[structopt(long)]
delete_pack: bool,

Expand All @@ -48,7 +54,7 @@ mod options {
/// This helps to determine overhead related to compression. If unset, the sink will
/// only create hashes from bytes, which is usually limited by the speed at which input
/// can be obtained.
#[structopt(long)]
#[structopt(long, conflicts_with("object_path"))]
sink_compress: bool,

/// Display verbose messages and progress information
Expand Down Expand Up @@ -236,6 +242,7 @@ pub fn main() -> Result<()> {
delete_pack,
pack_path,
object_path,
verify,
} => prepare_and_run(
"pack-explode",
verbose,
Expand All @@ -251,6 +258,7 @@ pub fn main() -> Result<()> {
thread_limit,
delete_pack,
sink_compress,
verify,
},
)
},
Expand Down
2 changes: 1 addition & 1 deletion tasks.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
* [x] to disk
* [x] progress
* [x] option to compress sink input too
* [ ] unrelated: see if delta-decode buffer optimization can work easily
* [x] unrelated: see if delta-decode buffer optimization can work easily
* [ ] --verify
* [ ] statistics

Expand Down
6 changes: 3 additions & 3 deletions tests/stateless-journey.sh
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ title "CLI ${kind}"
(with "and all safety checks"
it "does not explode the file at all" && {
WITH_SNAPSHOT="$snapshot/plumbing-broken-pack-explode-delete-pack-to-sink-failure" \
expect_run $WITH_FAILURE "$exe_plumbing" pack-explode --check all --delete-pack "${PACK_FILE}.pack"
expect_run $WITH_FAILURE "$exe_plumbing" pack-explode --sink-compress --check all --delete-pack "${PACK_FILE}.pack"
}

it "did not touch index or pack file" && {
Expand All @@ -84,8 +84,8 @@ title "CLI ${kind}"
(with "and no safety checks at all (and an output directory)"
it "does explode the file" && {
WITH_SNAPSHOT="$snapshot/plumbing-broken-pack-explode-delete-pack-to-sink-skip-checks-success" \
expect_run $SUCCESSFULLY "$exe_plumbing" pack-explode --check skip-file-and-object-checksum-and-no-abort-on-decode \
--delete-pack --sink-compress "${PACK_FILE}.pack" .
expect_run $SUCCESSFULLY "$exe_plumbing" pack-explode --verify --check skip-file-and-object-checksum-and-no-abort-on-decode \
--delete-pack "${PACK_FILE}.pack" .
}

it "removes the original files" && {
Expand Down

0 comments on commit ee45c7f

Please sign in to comment.