Skip to content

Commit

Permalink
Revert "Less memory for look up mode, faster start" - too slow
Browse files Browse the repository at this point in the history
Iterating in order of the pack uses the cache way better, which
gives a huge speedup.

This reverts commit 395c7e7.
  • Loading branch information
Byron committed Aug 6, 2020
1 parent 7d6abef commit 584350a
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 11 deletions.
2 changes: 1 addition & 1 deletion git-odb/src/pack/index/access.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ impl index::File {
None
}

pub fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = Entry> + 'a + Send> {
pub fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = Entry> + 'a> {
match self.kind {
index::Kind::V2 => Box::new(self.iter_v2()),
index::Kind::V1 => Box::new(self.iter_v1()),
Expand Down
17 changes: 9 additions & 8 deletions git-odb/src/pack/index/traverse/lookup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,13 @@ impl index::File {
&mut <<P as Progress>::SubProgress as Progress>::SubProgress,
) -> Result<(), Box<dyn std::error::Error + Send + Sync>>,
{
let (chunk_size, thread_limit, _) =
parallel::optimize_chunk_size_and_thread_limit(1000, Some(self.num_objects as usize), thread_limit, None);
let there_are_enough_entries_to_process = || self.num_objects > 10_000;
let index_entries =
util::index_entries_sorted_by_offset_ascending(self, root.add_child("collecting sorted index"));

let (chunk_size, thread_limit, available_cores) =
parallel::optimize_chunk_size_and_thread_limit(1000, Some(index_entries.len()), thread_limit, None);
let there_are_enough_entries_to_process = || index_entries.len() > chunk_size * available_cores;
let input_chunks = index_entries.chunks(chunk_size.max(chunk_size));
let reduce_progress = parking_lot::Mutex::new({
let mut p = root.add_child("Traversing");
p.init(Some(self.num_objects()), Some("objects"));
Expand All @@ -46,13 +50,10 @@ impl index::File {

in_parallel_if(
there_are_enough_entries_to_process,
util::Chunks {
iter: self.iter(),
size: chunk_size,
},
input_chunks,
thread_limit,
state_per_thread,
|entries: Vec<index::Entry>,
|entries: &[index::Entry],
(cache, ref mut processor, buf, progress)|
-> Result<Vec<decode::Outcome>, Error> {
progress.init(Some(entries.len() as u32), Some("entries"));
Expand Down
2 changes: 1 addition & 1 deletion src/plumbing/lean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ mod options {
/// owned objects, causing plenty of allocation to occour.
pub re_encode: bool,

#[argh(option, short = 'a')]
#[argh(option)]
/// the algorithm used to verify the pack. They differ in costs.
///
/// Possible values are "less-time" and "less-memory". Default is "less-memory".
Expand Down
2 changes: 1 addition & 1 deletion tasks.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
* [x] figure out why resolving the linux pack is so slow and fix it
* [x] Allow to provide a pre-resolve phase to initialize the resolver
* [x] Use Tree in verify impl
* [x] fix lookup todos
* [x] ~~fix lookup todos~~ - it's nearly twice as slow
* [ ] per-object counts for statistics (and use that to optimize order when matching on object types)
* [ ] nicer errors with descriptive messages
* [ ] handle ctrl+c similarly to the pretty version to prevent leakage (or find a way to use
Expand Down

0 comments on commit 584350a

Please sign in to comment.