From 77e51986be84b52470ff2e313cf433dce0652594 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Wed, 18 Dec 2019 12:13:46 -0500 Subject: [PATCH] gvfs-helper: better support for concurrent packfile fetches Teach gvfs-helper to better support the concurrent fetching of the same packfile by multiple instances. If 2 instances of gvfs-helper did a POST and requested the same set of OIDs, they might receive the exact same packfile (same checksum SHA). Both processes would then race to install their copy of the .pack and .idx files into the ODB/pack directory. This is not a problem on Unix (because of filesystem semantics). On Windows, this can cause an EBUSY/EPERM problem for the loser while the winner is holding a handle to the target files. (The existing packfile code already handled simple the existence and/or replacement case.) The solution presented here is to silently let the loser claim victory IIF the .pack and .idx are already present in the ODB. (We can't check this in advance because we don't know the packfile SHA checksum until after we receive it and run index-pack.) We avoid using a per-packfile lockfile (or a single lockfile for the `vfs-` prefix) to avoid the usual issues with stale lockfiles. Signed-off-by: Jeff Hostetler --- gvfs-helper.c | 29 ++++++++++- t/t5799-gvfs-helper.sh | 114 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 141 insertions(+), 2 deletions(-) diff --git a/gvfs-helper.c b/gvfs-helper.c index 396082c3739bde..3f291a56a16df8 100644 --- a/gvfs-helper.c +++ b/gvfs-helper.c @@ -1871,12 +1871,36 @@ static void my_finalize_packfile(struct gh__request_params *params, struct strbuf *final_path_idx, struct strbuf *final_filename) { + /* + * Install the .pack and .idx into the ODB pack directory. + * + * We might be racing with other instances of gvfs-helper if + * we, in parallel, both downloaded the exact same packfile + * (with the same checksum SHA) and try to install it at the + * same time. This might happen on Windows where the loser + * can get an EBUSY or EPERM trying to move/rename the + * tempfile into the pack dir, for example. + * + * So, we always install the .pack before the .idx for + * consistency. And only if *WE* created the .pack and .idx + * files, do we create the matching .keep (when requested). + * + * If we get an error and the target files already exist, we + * silently eat the error. Note that finalize_object_file() + * has already munged errno (and it has various creation + * strategies), so we don't bother looking at it. + */ if (finalize_object_file(temp_path_pack->buf, final_path_pack->buf) || finalize_object_file(temp_path_idx->buf, final_path_idx->buf)) { unlink(temp_path_pack->buf); unlink(temp_path_idx->buf); - unlink(final_path_pack->buf); - unlink(final_path_idx->buf); + + if (file_exists(final_path_pack->buf) && + file_exists(final_path_idx->buf)) { + trace2_printf("%s: assuming ok for %s", TR2_CAT, final_path_pack->buf); + goto assume_ok; + } + strbuf_addf(&status->error_message, "could not install packfile '%s'", final_path_pack->buf); @@ -1899,6 +1923,7 @@ static void my_finalize_packfile(struct gh__request_params *params, strbuf_release(&keep); } +assume_ok: if (params->result_list) { struct strbuf result_msg = STRBUF_INIT; diff --git a/t/t5799-gvfs-helper.sh b/t/t5799-gvfs-helper.sh index b654b1e4cdb757..1ac8523970ac28 100755 --- a/t/t5799-gvfs-helper.sh +++ b/t/t5799-gvfs-helper.sh @@ -370,6 +370,10 @@ verify_objects_in_shared_cache () { return 0 } +# gvfs-helper prints a "packfile " message for each received +# packfile to stdout. Verify that we received the expected number +# of packfiles. +# verify_received_packfile_count () { if test $# -eq 1 then @@ -412,6 +416,19 @@ verify_prefetch_keeps () { return 0 } +# Verify that the number of vfs- packfile present in the shared-cache +# matches our expectations. +# +verify_vfs_packfile_count () { + count=$(( $(ls -1 "$SHARED_CACHE_T1"/pack/vfs-*.pack | wc -l) )) + if test $count -ne $1 + then + echo "verify_vfs_packfile_count: expected $1; actual $count" + return 1 + fi + return 0 +} + per_test_cleanup () { stop_gvfs_protocol_server @@ -1174,4 +1191,101 @@ test_expect_success 'integration: fully implicit: diff 2 commits' ' >OUT.output 2>OUT.stderr ' +################################################################# +# Duplicate packfile tests. +# +# If we request a fixed set of blobs, we should get a unique packfile +# of the form "vfs-.{pack,idx}". It we request that same set +# again, the server should create and send the exact same packfile. +# True webservers might build the custom packfile in random order, +# but our test webserver should give us consistent results. +# +# Verify that we can handle the duplicate pack and idx file properly. +################################################################# + +test_expect_success 'duplicate: vfs- packfile' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server && + + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + post \ + <"$OIDS_BLOBS_FILE" >OUT.output 2>OUT.stderr && + verify_received_packfile_count 1 && + verify_vfs_packfile_count 1 && + + # Re-fetch the same packfile. We do not care if it replaces + # first one or if it silently fails to overwrite the existing + # one. We just confirm that afterwards we only have 1 packfile. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + post \ + <"$OIDS_BLOBS_FILE" >OUT.output 2>OUT.stderr && + verify_received_packfile_count 1 && + verify_vfs_packfile_count 1 && + + stop_gvfs_protocol_server +' + +# Return the absolute pathname of the first received packfile. +# +first_received_packfile_pathname () { + fn=$(sed -n '/^packfile/p' OUT.output \ + 2>OUT.stderr && + verify_received_packfile_count 1 && + verify_vfs_packfile_count 1 && + + # Re-fetch the same packfile, but hold the existing packfile + # open for writing on an obscure (and randomly-chosen) file + # descriptor. + # + # This should cause the replacement-install to fail (at least + # on Windows) with an EBUSY or EPERM or something. + # + # Verify that that error is eaten. We do not care if the + # replacement is retried or if gvfs-helper simply discards the + # second instance. We just confirm that afterwards we only + # have 1 packfile on disk and that the command "lies" and reports + # that it created the existing packfile. (We want the lie because + # in normal usage, gh-client has already built the packed-git list + # in memory and is using gvfs-helper to fetch missing objects; + # gh-client does not care who does the fetch, but it needs to + # update its packed-git list and restart the object lookup.) + # + PACK=$(first_received_packfile_pathname) && + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + post \ + <"$OIDS_BLOBS_FILE" \ + >OUT.output \ + 2>OUT.stderr \ + 9>>"$PACK" && + verify_received_packfile_count 1 && + verify_vfs_packfile_count 1 && + + stop_gvfs_protocol_server +' + test_done