Skip to content

Commit

Permalink
pack-objects: refactor path-walk delta phase
Browse files Browse the repository at this point in the history
Previously, the --path-walk option to 'git pack-objects' would compute
deltas inline with the path-walk logic. This would make the progress
indicator look like it is taking a long time to enumerate objects, and
then very quickly computed deltas.

Instead of computing deltas on each region of objects organized by tree,
store a list of regions corresponding to these groups. These can later
be pulled from the list for delta compression before doing the "global"
delta search.

This presents a new progress indicator that can be used in tests to
verify that this stage is happening.

The current implementation is not integrated with threads, but could be
done in a future update.

Since we do not attempt to sort objects by size until after exploring
all trees, we can remove the previous change to t5530 due to a different
error message appearing first.

Signed-off-by: Derrick Stolee <stolee@gmail.com>
  • Loading branch information
derrickstolee authored and dscho committed Oct 3, 2024
1 parent 5b2135e commit d832880
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 33 deletions.
81 changes: 56 additions & 25 deletions builtin/pack-objects.c
Original file line number Diff line number Diff line change
Expand Up @@ -3206,6 +3206,50 @@ static int should_attempt_deltas(struct object_entry *entry)
return 1;
}

static void find_deltas_for_region(struct object_entry *list UNUSED,
struct packing_region *region,
unsigned int *processed)
{
struct object_entry **delta_list;
uint32_t delta_list_nr = 0;

ALLOC_ARRAY(delta_list, region->nr);
for (uint32_t i = 0; i < region->nr; i++) {
struct object_entry *entry = to_pack.objects + region->start + i;
if (should_attempt_deltas(entry))
delta_list[delta_list_nr++] = entry;
}

QSORT(delta_list, delta_list_nr, type_size_sort);
find_deltas(delta_list, &delta_list_nr, window, depth, processed);
free(delta_list);
}

static void find_deltas_by_region(struct object_entry *list,
struct packing_region *regions,
uint32_t start, uint32_t nr)
{
unsigned int processed = 0;
uint32_t progress_nr;

if (!nr)
return;

progress_nr = regions[nr - 1].start + regions[nr - 1].nr;

if (progress)
progress_state = start_progress(_("Compressing objects by path"),
progress_nr);

while (nr--)
find_deltas_for_region(list,
&regions[start++],
&processed);

display_progress(progress_state, progress_nr);
stop_progress(&progress_state);
}

static void prepare_pack(int window, int depth)
{
struct object_entry **delta_list;
Expand All @@ -3230,6 +3274,10 @@ static void prepare_pack(int window, int depth)
if (!to_pack.nr_objects || !window || !depth)
return;

if (path_walk)
find_deltas_by_region(to_pack.objects, to_pack.regions,
0, to_pack.nr_regions);

ALLOC_ARRAY(delta_list, to_pack.nr_objects);
nr_deltas = n = 0;

Expand Down Expand Up @@ -4167,10 +4215,8 @@ static int add_objects_by_path(const char *path,
enum object_type type,
void *data)
{
struct object_entry **delta_list;
size_t oe_start = to_pack.nr_objects;
size_t oe_end;
unsigned int sub_list_size;
unsigned int *processed = data;

/*
Expand Down Expand Up @@ -4203,32 +4249,17 @@ static int add_objects_by_path(const char *path,
if (oe_end == oe_start || !window)
return 0;

sub_list_size = 0;
ALLOC_ARRAY(delta_list, oe_end - oe_start);
ALLOC_GROW(to_pack.regions,
to_pack.nr_regions + 1,
to_pack.nr_regions_alloc);

for (size_t i = 0; i < oe_end - oe_start; i++) {
struct object_entry *entry = to_pack.objects + oe_start + i;
to_pack.regions[to_pack.nr_regions].start = oe_start;
to_pack.regions[to_pack.nr_regions].nr = oe_end - oe_start;
to_pack.nr_regions++;

if (!should_attempt_deltas(entry))
continue;
*processed += oids->nr;
display_progress(progress_state, *processed);

delta_list[sub_list_size++] = entry;
}

/*
* Find delta bases among this list of objects that all match the same
* path. This causes the delta compression to be interleaved in the
* object walk, which can lead to confusing progress indicators. This is
* also incompatible with threaded delta calculations. In the future,
* consider creating a list of regions in the full to_pack.objects array
* that could be picked up by the threaded delta computation.
*/
if (sub_list_size && window) {
QSORT(delta_list, sub_list_size, type_size_sort);
find_deltas(delta_list, &sub_list_size, window, depth, processed);
}

free(delta_list);
return 0;
}

Expand Down
12 changes: 12 additions & 0 deletions pack-objects.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,11 +118,23 @@ struct object_entry {
unsigned ext_base:1; /* delta_idx points outside packlist */
};

/**
* A packing region is a section of the packing_data.objects array
* as given by a starting index and a number of elements.
*/
struct packing_region {
uint32_t start;
uint32_t nr;
};

struct packing_data {
struct repository *repo;
struct object_entry *objects;
uint32_t nr_objects, nr_alloc;

struct packing_region *regions;
uint32_t nr_regions, nr_regions_alloc;

int32_t *index;
uint32_t index_size;

Expand Down
8 changes: 6 additions & 2 deletions t/t5300-pack-object.sh
Original file line number Diff line number Diff line change
Expand Up @@ -692,7 +692,9 @@ test_expect_success '--full-name-hash and --write-bitmap-index are incompatible'
# Basic "repack everything" test
test_expect_success '--path-walk pack everything' '
git -C server rev-parse HEAD >in &&
git -C server pack-objects --stdout --revs --path-walk <in >out.pack &&
GIT_PROGRESS_DELAY=0 git -C server pack-objects \
--stdout --revs --path-walk --progress <in >out.pack 2>err &&
grep "Compressing objects by path" err &&
git -C server index-pack --stdin <out.pack
'

Expand All @@ -702,7 +704,9 @@ test_expect_success '--path-walk thin pack' '
$(git -C server rev-parse HEAD)
^$(git -C server rev-parse HEAD~2)
EOF
git -C server pack-objects --thin --stdout --revs --path-walk <in >out.pack &&
GIT_PROGRESS_DELAY=0 git -C server pack-objects \
--thin --stdout --revs --path-walk --progress <in >out.pack 2>err &&
grep "Compressing objects by path" err &&
git -C server index-pack --fix-thin --stdin <out.pack
'

Expand Down
6 changes: 0 additions & 6 deletions t/t5530-upload-pack-error.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,6 @@ test_expect_success 'upload-pack fails due to error in pack-objects packing' '
hexsz=$(test_oid hexsz) &&
printf "%04xwant %s\n00000009done\n0000" \
$(($hexsz + 10)) $head >input &&
# The current implementation of path-walk causes a different
# error message. This will be changed by a future refactoring.
GIT_TEST_PACK_PATH_WALK=0 &&
export GIT_TEST_PACK_PATH_WALK &&
test_must_fail git upload-pack . <input >/dev/null 2>output.err &&
test_grep "unable to read" output.err &&
test_grep "pack-objects died" output.err
Expand Down

0 comments on commit d832880

Please sign in to comment.