Revert "Less memory for look up mode, faster start" - too slow

Iterating in order of the pack uses the cache way better, which gives a huge speedup. This reverts commit 395c7e7.
GitoxideLabs · Aug 6, 2020 · 584350a · 584350a
1 parent 7d6abef
commit 584350a
Show file tree

Hide file tree

Showing 4 changed files with 12 additions and 11 deletions.
diff --git a/git-odb/src/pack/index/access.rs b/git-odb/src/pack/index/access.rs
@@ -126,7 +126,7 @@ impl index::File {
         None
     }
 
-    pub fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = Entry> + 'a + Send> {
+    pub fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = Entry> + 'a> {
         match self.kind {
             index::Kind::V2 => Box::new(self.iter_v2()),
             index::Kind::V1 => Box::new(self.iter_v1()),

diff --git a/git-odb/src/pack/index/traverse/lookup.rs b/git-odb/src/pack/index/traverse/lookup.rs
@@ -27,9 +27,13 @@ impl index::File {
             &mut <<P as Progress>::SubProgress as Progress>::SubProgress,
         ) -> Result<(), Box<dyn std::error::Error + Send + Sync>>,
     {
-        let (chunk_size, thread_limit, _) =
-            parallel::optimize_chunk_size_and_thread_limit(1000, Some(self.num_objects as usize), thread_limit, None);
-        let there_are_enough_entries_to_process = || self.num_objects > 10_000;
+        let index_entries =
+            util::index_entries_sorted_by_offset_ascending(self, root.add_child("collecting sorted index"));
+
+        let (chunk_size, thread_limit, available_cores) =
+            parallel::optimize_chunk_size_and_thread_limit(1000, Some(index_entries.len()), thread_limit, None);
+        let there_are_enough_entries_to_process = || index_entries.len() > chunk_size * available_cores;
+        let input_chunks = index_entries.chunks(chunk_size.max(chunk_size));
         let reduce_progress = parking_lot::Mutex::new({
             let mut p = root.add_child("Traversing");
             p.init(Some(self.num_objects()), Some("objects"));
@@ -46,13 +50,10 @@ impl index::File {
 
         in_parallel_if(
             there_are_enough_entries_to_process,
-            util::Chunks {
-                iter: self.iter(),
-                size: chunk_size,
-            },
+            input_chunks,
             thread_limit,
             state_per_thread,
-            |entries: Vec<index::Entry>,
+            |entries: &[index::Entry],
              (cache, ref mut processor, buf, progress)|
              -> Result<Vec<decode::Outcome>, Error> {
                 progress.init(Some(entries.len() as u32), Some("entries"));

diff --git a/src/plumbing/lean.rs b/src/plumbing/lean.rs
@@ -130,7 +130,7 @@ mod options {
         /// owned objects, causing plenty of allocation to occour.
         pub re_encode: bool,
 
-        #[argh(option, short = 'a')]
+        #[argh(option)]
         /// the algorithm used to verify the pack. They differ in costs.
         ///
         /// Possible values are "less-time" and "less-memory". Default is "less-memory".

diff --git a/tasks.md b/tasks.md
@@ -25,7 +25,7 @@
     * [x] figure out why resolving the linux pack is so slow and fix it
     * [x] Allow to provide a pre-resolve phase to initialize the resolver
     * [x] Use Tree in verify impl
-    * [x] fix lookup todos
+    * [x] ~~fix lookup todos~~ - it's nearly twice as slow
     * [ ] per-object counts for statistics (and use that to optimize order when matching on object types)
     * [ ] nicer errors with descriptive messages
     * [ ] handle ctrl+c similarly to the pretty version to prevent leakage (or find a way to use