Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test performance of optimization to unpack_trees() #18

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cache-tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,7 @@ static int update_one(struct cache_tree *it,

int cache_tree_update(struct index_state *istate, int flags)
{
uint64_t start = getnanotime();
struct cache_tree *it = istate->cache_tree;
struct cache_entry **cache = istate->cache;
int entries = istate->cache_nr;
Expand All @@ -460,6 +461,7 @@ int cache_tree_update(struct index_state *istate, int flags)
if (i < 0)
return i;
istate->cache_changed |= CACHE_TREE_CHANGED;
trace_performance_since(start, "repair cache-tree");
return 0;
}

Expand Down
1 change: 1 addition & 0 deletions cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -670,6 +670,7 @@ extern int index_name_pos(const struct index_state *, const char *name, int name
#define ADD_CACHE_JUST_APPEND 8 /* Append only; tree.c::read_tree() */
#define ADD_CACHE_NEW_ONLY 16 /* Do not replace existing ones */
#define ADD_CACHE_KEEP_CACHE_TREE 32 /* Do not invalidate cache-tree */
#define ADD_CACHE_SKIP_VERIFY_PATH 64 /* Do not verify path */
extern int add_index_entry(struct index_state *, struct cache_entry *ce, int option);
extern void rename_index_entry_at(struct index_state *, int pos, const char *new_name);

Expand Down
3 changes: 2 additions & 1 deletion read-cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -1237,6 +1237,7 @@ static int add_index_entry_with_check(struct index_state *istate, struct cache_e
int ok_to_add = option & ADD_CACHE_OK_TO_ADD;
int ok_to_replace = option & ADD_CACHE_OK_TO_REPLACE;
int skip_df_check = option & ADD_CACHE_SKIP_DFCHECK;
int skip_verify_path = option & ADD_CACHE_SKIP_VERIFY_PATH;
int new_only = option & ADD_CACHE_NEW_ONLY;

if (!(option & ADD_CACHE_KEEP_CACHE_TREE))
Expand Down Expand Up @@ -1277,7 +1278,7 @@ static int add_index_entry_with_check(struct index_state *istate, struct cache_e

if (!ok_to_add)
return -1;
if (!verify_path(ce->name, ce->ce_mode))
if (!skip_verify_path && !verify_path(ce->name, ce->ce_mode))
return error("Invalid path '%s'", ce->name);

if (!skip_df_check &&
Expand Down
152 changes: 152 additions & 0 deletions unpack-trees.c
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ static int do_add_entry(struct unpack_trees_options *o, struct cache_entry *ce,

ce->ce_flags = (ce->ce_flags & ~clear) | set;
return add_index_entry(&o->result, ce,
o->extra_add_index_flags |
ADD_CACHE_OK_TO_ADD | ADD_CACHE_OK_TO_REPLACE);
}

Expand Down Expand Up @@ -342,6 +343,7 @@ static int check_updates(struct unpack_trees_options *o)
struct progress *progress = NULL;
struct index_state *index = &o->result;
struct checkout state = CHECKOUT_INIT;
uint64_t start = getnanotime();
int i;

state.force = 1;
Expand Down Expand Up @@ -413,6 +415,7 @@ static int check_updates(struct unpack_trees_options *o)
errs |= finish_delayed_checkout(&state);
if (o->update)
git_attr_set_direction(GIT_ATTR_CHECKIN, NULL);
trace_performance_since(start, "update worktree after a merge");
return errs != 0;
}

Expand Down Expand Up @@ -634,6 +637,132 @@ static inline int are_same_oid(struct name_entry *name_j, struct name_entry *nam
return name_j->oid && name_k->oid && !oidcmp(name_j->oid, name_k->oid);
}

static int all_trees_same_as_cache_tree(int n, unsigned long dirmask,
struct name_entry *names,
struct traverse_info *info)
{
struct unpack_trees_options *o = info->data;
int i;

if (!o->merge || dirmask != ((1 << n) - 1))
return 0;

for (i = 1; i < n; i++)
if (!are_same_oid(names, names + i))
return 0;

return cache_tree_matches_traversal(o->src_index->cache_tree, names, info);
}

static int index_pos_by_traverse_info(struct name_entry *names,
struct traverse_info *info)
{
struct unpack_trees_options *o = info->data;
int len = traverse_path_len(info, names);
char *name = xmalloc(len + 1 /* slash */ + 1 /* NUL */);
int pos;

make_traverse_path(name, info, names);
name[len++] = '/';
name[len] = '\0';
pos = index_name_pos(o->src_index, name, len);
if (pos >= 0)
BUG("This is a directory and should not exist in index");
pos = -pos - 1;
if (!starts_with(o->src_index->cache[pos]->name, name) ||
(pos > 0 && starts_with(o->src_index->cache[pos-1]->name, name)))
BUG("pos must point at the first entry in this directory");
free(name);
return pos;
}

/*
* Fast path if we detect that all trees are the same as cache-tree at this
* path. We'll walk these trees recursively using cache-tree/index instead of
* ODB since already know what these trees contain.
*/
static int traverse_by_cache_tree(int pos, int nr_entries, int nr_names,
struct name_entry *names,
struct traverse_info *info)
{
struct cache_entry *src[MAX_UNPACK_TREES + 1] = { NULL, };
struct unpack_trees_options *o = info->data;
struct cache_entry *tree_ce = NULL;
int ce_len = 0;
int i, d;

if (!o->merge)
BUG("We need cache-tree to do this optimization");

/*
* Try to keep add_index_entry() as fast as possible since
* we're going to do a lot of them.
*
* Skipping verify_path() should totally be safe because these
* paths are from the source index, which must have been
* verified.
*
* Skipping D/F and cache-tree validation checks is trickier
* because it assumes what n-merge code would do when all
* trees and the index are the same. We probably could just
* optimize those code instead (e.g. we don't invalidate that
* many cache-tree, but the searching for them is very
* expensive).
*/
o->extra_add_index_flags = ADD_CACHE_SKIP_DFCHECK;
o->extra_add_index_flags |= ADD_CACHE_SKIP_VERIFY_PATH;

/*
* Do what unpack_callback() and unpack_nondirectories() normally
* do. But we walk all paths recursively in just one loop instead.
*
* D/F conflicts and staged entries are not a concern because
* cache-tree would be invalidated and we would never get here
* in the first place.
*/
for (i = 0; i < nr_entries; i++) {
int new_ce_len, len, rc;

src[0] = o->src_index->cache[pos + i];

len = ce_namelen(src[0]);
new_ce_len = cache_entry_size(len);

if (new_ce_len > ce_len) {
new_ce_len <<= 1;
tree_ce = xrealloc(tree_ce, new_ce_len);
memset(tree_ce, 0, new_ce_len);
ce_len = new_ce_len;

tree_ce->ce_flags = create_ce_flags(0);

for (d = 1; d <= nr_names; d++)
src[d] = tree_ce;
}

tree_ce->ce_mode = src[0]->ce_mode;
tree_ce->ce_namelen = len;
oidcpy(&tree_ce->oid, &src[0]->oid);
memcpy(tree_ce->name, src[0]->name, len + 1);

rc = call_unpack_fn((const struct cache_entry * const *)src, o);
if (rc < 0) {
free(tree_ce);
return rc;
}

mark_ce_used(src[0], o);
}
o->extra_add_index_flags = 0;
free(tree_ce);
if (o->debug_unpack)
printf("Unpacked %d entries from %s to %s using cache-tree\n",
nr_entries,
o->src_index->cache[pos]->name,
o->src_index->cache[pos + nr_entries - 1]->name);
return 0;
}

static int traverse_trees_recursive(int n, unsigned long dirmask,
unsigned long df_conflicts,
struct name_entry *names,
Expand All @@ -645,6 +774,17 @@ static int traverse_trees_recursive(int n, unsigned long dirmask,
void *buf[MAX_UNPACK_TREES];
struct traverse_info newinfo;
struct name_entry *p;
int nr_entries;

nr_entries = all_trees_same_as_cache_tree(n, dirmask, names, info);
if (nr_entries > 0) {
struct unpack_trees_options *o = info->data;
int pos = index_pos_by_traverse_info(names, info);

if (!o->merge || df_conflicts)
BUG("Wrong condition to get here buddy");
return traverse_by_cache_tree(pos, nr_entries, n, names, info);
}

p = names;
while (!p->mode)
Expand Down Expand Up @@ -811,6 +951,11 @@ static struct cache_entry *create_ce_entry(const struct traverse_info *info,
return ce;
}

/*
* Note that traverse_by_cache_tree() duplicates some logic in this function
* without actually calling it. If you change the logic here you may need to
* check and change there as well.
*/
static int unpack_nondirectories(int n, unsigned long mask,
unsigned long dirmask,
struct cache_entry **src,
Expand Down Expand Up @@ -995,6 +1140,11 @@ static void debug_unpack_callback(int n,
debug_name_entry(i, names + i);
}

/*
* Note that traverse_by_cache_tree() duplicates some logic in this funciton
* without actually calling it. If you change the logic here you may need to
* check and change there as well.
*/
static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, struct name_entry *names, struct traverse_info *info)
{
struct cache_entry *src[MAX_UNPACK_TREES + 1] = { NULL, };
Expand Down Expand Up @@ -1282,6 +1432,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options
int i, ret;
static struct cache_entry *dfc;
struct exclude_list el;
uint64_t start = getnanotime();

if (len > MAX_UNPACK_TREES)
die("unpack_trees takes at most %d trees", MAX_UNPACK_TREES);
Expand Down Expand Up @@ -1423,6 +1574,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options
goto done;
}
}
trace_performance_since(start, "unpack trees");

o->src_index = NULL;
ret = check_updates(o) ? (-2) : 0;
Expand Down
1 change: 1 addition & 0 deletions unpack-trees.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ struct unpack_trees_options {
struct index_state result;

struct exclude_list *el; /* for internal use */
unsigned int extra_add_index_flags;
};

extern int unpack_trees(unsigned n, struct tree_desc *t,
Expand Down