From e717a31ae72e7f1bd58a148cb2f68ea3e6db9d34 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Fri, 28 Jun 2024 10:53:38 -0400 Subject: [PATCH 1/4] t5799: explicitly test gvfs-helper --fallback and --no-fallback Construct 2 new unit tests to explicitly verify the use of `--fallback` and `--no-fallback` arguments to `gvfs-helper`. When a cache-server is enabled, `gvfs-helper` will try to fetch objects from it rather than the origin server. If the cache-server fails (and all cache-server retry attempts have been exhausted), `gvfs-helper` can optionally "fallback" and try to fetch the objects from the origin server. (The retry logic is also applied to the origin server, if the origin server fails on the first request.) Add new unit tests to verify that `gvfs-helper` respects both the `--max-retries` and `--[no-]fallback` arguments. We use the "http_503" mayhem feature of the `test_gvfs_protocol` server to force a 503 response on all requests to the cache-server and the origin server end-points. We can then count the number of connection requests that `gvfs-helper` makes to the server and confirm both the per-server retries and whether fallback was attempted. Signed-off-by: Jeff Hostetler --- t/t5799-gvfs-helper.sh | 64 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/t/t5799-gvfs-helper.sh b/t/t5799-gvfs-helper.sh index 0a2d6180430b50..8fb81d1fb32c45 100755 --- a/t/t5799-gvfs-helper.sh +++ b/t/t5799-gvfs-helper.sh @@ -1036,6 +1036,70 @@ test_expect_success 'successful retry after http-error: origin get' ' verify_connection_count 2 ' +################################################################# +# So far we have confirmed that gvfs-helper can recover from a network +# error (with retries, since the cache-server was disabled in all of +# the above tests). Try again with fallback turned on. +# +# With mayhem "http_503" turned on both the cache and origin server +# will always throw a 503 error. +# +# Confirm that we tried to make six connections: we should hit the +# cache-server 3 times (one initial attempt and two retries) and then +# try the origin server 3 times. +# +################################################################# + +test_expect_success 'http-error: 503 Service Unavailable (with retry and fallback)' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server_with_mayhem http_503 && + + test_expect_code $GH__ERROR_CODE__HTTP_503 \ + git -C "$REPO_T1" gvfs-helper \ + --cache-server=trust \ + --remote=origin \ + --fallback \ + get \ + --max-retries=2 \ + <"$OIDS_FILE" >OUT.output 2>OUT.stderr && + + stop_gvfs_protocol_server && + + grep -q "error: get: (http:503)" OUT.stderr && + verify_connection_count 6 +' + +################################################################# +# Now repeat the above, but explicitly turn off fallback. +# +# Again, we use mayhem "http_503". However, with fallback turned +# off, we will only attempt the 3 connections to the cache server. +# We will not try to hit the origin server. +# +# So we should only see a total of 3 connections rather than the +# six in the previous test. +# +################################################################# + +test_expect_success 'http-error: 503 Service Unavailable (with retry and no-fallback)' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server_with_mayhem http_503 && + + test_expect_code $GH__ERROR_CODE__HTTP_503 \ + git -C "$REPO_T1" gvfs-helper \ + --cache-server=trust \ + --remote=origin \ + --no-fallback \ + get \ + --max-retries=2 \ + <"$OIDS_FILE" >OUT.output 2>OUT.stderr && + + stop_gvfs_protocol_server && + + grep -q "error: get: (http:503)" OUT.stderr && + verify_connection_count 3 +' + ################################################################# # Test HTTP Auth # From c2df878a796046b2b164a58bf7819aea192a26ae Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 27 Jun 2024 11:02:36 -0400 Subject: [PATCH 2/4] gvfs-helper: don't fallback with new config By default, GVFS Protocol-enabled Scalar clones will fall back to the origin server if there is a network issue with the cache servers. However (and especially for the prefetch endpoint) this may be a very expensive operation for the origin server, leading to the user being throttled. This shows up later in cases such as 'git push' or other web operations. To avoid this, create a new config option, 'gvfs.fallback', which defaults to true. When set to 'false', pass '--no-fallback' from the gvfs-helper client to the child gvfs-helper server process. This will allow users who have hit this problem to avoid it in the future. In case this becomes a more widespread problem, engineering systems can enable the config option more broadly. Enabling the config will of course lead to immediate failures for users, but at least that will help diagnose the problem when it occurs instead of later when the throttling shows up and the server load has already passed, damage done. Signed-off-by: Derrick Stolee --- Documentation/config/gvfs.txt | 5 +++++ gvfs-helper-client.c | 11 ++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/Documentation/config/gvfs.txt b/Documentation/config/gvfs.txt index 6ab221ded36c91..7224939ac0b270 100644 --- a/Documentation/config/gvfs.txt +++ b/Documentation/config/gvfs.txt @@ -3,3 +3,8 @@ gvfs.cache-server:: gvfs.sharedcache:: TODO + +gvfs.fallback:: + If set to `false`, then never fallback to the origin server when the cache + server fails to connect. This will alert users to failures with the cache + server, but avoid causing throttling on the origin server. diff --git a/gvfs-helper-client.c b/gvfs-helper-client.c index 6f38fe1c4353d0..45d4d86783c73b 100644 --- a/gvfs-helper-client.c +++ b/gvfs-helper-client.c @@ -14,6 +14,7 @@ #include "quote.h" #include "packfile.h" #include "hex.h" +#include "config.h" static struct oidset gh_client__oidset_queued = OIDSET_INIT; static unsigned long gh_client__oidset_count; @@ -339,6 +340,7 @@ static struct gh_server__process *gh_client__find_long_running_process( struct gh_server__process *entry; struct strvec argv = STRVEC_INIT; struct strbuf quoted = STRBUF_INIT; + int fallback; gh_client__choose_odb(); @@ -346,10 +348,17 @@ static struct gh_server__process *gh_client__find_long_running_process( * TODO decide what defaults we want. */ strvec_push(&argv, "gvfs-helper"); - strvec_push(&argv, "--fallback"); strvec_push(&argv, "--cache-server=trust"); strvec_pushf(&argv, "--shared-cache=%s", gh_client__chosen_odb->path); + + /* If gvfs.fallback=false, then don't add --fallback. */ + if (!git_config_get_bool("gvfs.fallback", &fallback) && + !fallback) + strvec_push(&argv, "--no-fallback"); + else + strvec_push(&argv, "--fallback"); + strvec_push(&argv, "server"); sq_quote_argv_pretty("ed, argv.v); From 151da772a348527509b08a701bbfb9a02a57f5c4 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Fri, 28 Jun 2024 13:07:31 -0400 Subject: [PATCH 3/4] test-gvfs-protocol: add cache_http_503 to mayhem Create new `cache_http_503` mayhem method where only the cache server sends a 503. The normal `http_503` directs both cache and origin server to send 503s. This will be used to help test fallback. Signed-off-by: Jeff Hostetler --- t/helper/test-gvfs-protocol.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/t/helper/test-gvfs-protocol.c b/t/helper/test-gvfs-protocol.c index 21a0d3bc78fada..6fb62adad452d8 100644 --- a/t/helper/test-gvfs-protocol.c +++ b/t/helper/test-gvfs-protocol.c @@ -1575,6 +1575,14 @@ static enum worker_result dispatch(struct req *req) const char *method; enum worker_result wr; + if (strstr(req->uri_base.buf, MY_SERVER_TYPE__CACHE)) { + if (string_list_has_string(&mayhem_list, "cache_http_503")) { + logmayhem("cache_http_503"); + return send_http_error(1, 503, "Service Unavailable", 2, + WR_MAYHEM | WR_HANGUP); + } + } + if (string_list_has_string(&mayhem_list, "close_no_write")) { logmayhem("close_no_write"); return WR_MAYHEM | WR_HANGUP; From c457a81e817e3bc5ff0276bf5af643c0f1465496 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Fri, 28 Jun 2024 13:09:35 -0400 Subject: [PATCH 4/4] t5799: add unit tests for new `gvfs.fallback` config setting Signed-off-by: Jeff Hostetler --- t/t5799-gvfs-helper.sh | 98 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/t/t5799-gvfs-helper.sh b/t/t5799-gvfs-helper.sh index 8fb81d1fb32c45..5a118e522c8b07 100755 --- a/t/t5799-gvfs-helper.sh +++ b/t/t5799-gvfs-helper.sh @@ -24,8 +24,12 @@ test_set_port GIT_TEST_GVFS_PROTOCOL_PORT # actually use it). We are only testing explicit object # fetching using gvfs-helper.exe in isolation. # +# repo_t2: +# Another empty repo to use after we contaminate t1. +# REPO_SRC="$(pwd)"/repo_src REPO_T1="$(pwd)"/repo_t1 +REPO_T2="$(pwd)"/repo_t2 # Setup some loopback URLs where test-gvfs-protocol.exe will be # listening. We will spawn it directly inside the repo_src directory, @@ -45,6 +49,7 @@ ORIGIN_URL=http://$HOST_PORT/servertype/origin CACHE_URL=http://$HOST_PORT/servertype/cache SHARED_CACHE_T1="$(pwd)"/shared_cache_t1 +SHARED_CACHE_T2="$(pwd)"/shared_cache_t2 # The pid-file is created by test-gvfs-protocol.exe when it starts. # The server will shut down if/when we delete it. (This is a little @@ -182,6 +187,10 @@ test_expect_success 'setup repos' ' mkdir "$SHARED_CACHE_T1/pack" && mkdir "$SHARED_CACHE_T1/info" && # + mkdir "$SHARED_CACHE_T2" && + mkdir "$SHARED_CACHE_T2/pack" && + mkdir "$SHARED_CACHE_T2/info" && + # # setup repo_t1 and point all of the gvfs.* values to repo_src. # test_create_repo "$REPO_T1" && @@ -191,6 +200,13 @@ test_expect_success 'setup repos' ' git -C "$REPO_T1" config --local gvfs.sharedCache "$SHARED_CACHE_T1" && echo "$SHARED_CACHE_T1" >> "$REPO_T1"/.git/objects/info/alternates && # + test_create_repo "$REPO_T2" && + git -C "$REPO_T2" branch -M main && + git -C "$REPO_T2" remote add origin $ORIGIN_URL && + git -C "$REPO_T2" config --local gvfs.cache-server $CACHE_URL && + git -C "$REPO_T2" config --local gvfs.sharedCache "$SHARED_CACHE_T2" && + echo "$SHARED_CACHE_T2" >> "$REPO_T2"/.git/objects/info/alternates && + # # # cat <<-EOF >creds.txt && @@ -203,6 +219,7 @@ test_expect_success 'setup repos' ' EOF chmod 755 creds.sh && git -C "$REPO_T1" config --local credential.helper "!f() { cat \"$(pwd)\"/creds.txt; }; f" && + git -C "$REPO_T2" config --local credential.helper "!f() { cat \"$(pwd)\"/creds.txt; }; f" && # # Create some test data sets. # @@ -1257,6 +1274,87 @@ test_expect_success 'integration: fully implicit: diff 2 commits' ' >OUT.output 2>OUT.stderr ' +# T1 should be considered contaminated at this point. + +################################################################# +# gvfs-helper.exe defaults to no fallback. +# gvfs-helper-client.c defaults to adding `--fallback` to child process. +# +# `gvfs.fallback` was added to change the default behavior in the +# gvfs-helper-client.c code to add either `--fallback` or `--no-fallback` +# (for origin server load reasons). +# +# When `gvfs.fallback` is unset, we default to TRUE and pass `--fallback`. +# Otherwise, we use the boolean value to decide. +# +# NOTE: We DO NOT attempt to count connection requests in the +# following tests. Since we are using a normal `git` command to drive +# the `gvfs-helper-client.c` code (and spawn `git-gvfs-helper.exe`) we +# cannot make assumptions on the number of child processes or +# reqeusts. The "promisor" logic may drive one or more single-item +# GETs or a series of bulk POST attempts. Therefore, we must rely +# only on the result of the command and (implicitly) whether all +# missing objects were resolved. We use mayhem features to selectively +# break the cache and origin servers. +################################################################# + +test_expect_success 'integration: implicit-get: http_503: diff 2 commits' ' + test_when_finished "per_test_cleanup" && + + # Tell both servers to always send 503. + start_gvfs_protocol_server_with_mayhem http_503 && + + # Implicitly demand-load everything without any pre-seeding. + # (We cannot tell from whether fallback was used or not in this + # limited test.) + # + test_must_fail \ + git -C "$REPO_T2" -c core.useGVFSHelper=true \ + diff $(cat m1.branch)..$(cat m3.branch) \ + >OUT.output 2>OUT.stderr && + + stop_gvfs_protocol_server +' + +test_expect_success 'integration: implicit-get: cache_http_503,no-fallback: diff 2 commits' ' + test_when_finished "per_test_cleanup" && + + # Tell cache server to send 503 and origin server to send 200. + start_gvfs_protocol_server_with_mayhem cache_http_503 && + + # Implicitly demand-load everything without any pre-seeding. + # This should fail because we do not allow fallback. + # + test_must_fail \ + git -C "$REPO_T2" \ + -c core.useGVFSHelper=true \ + -c gvfs.fallback=false \ + diff $(cat m1.branch)..$(cat m3.branch) \ + >OUT.output 2>OUT.stderr && + + stop_gvfs_protocol_server +' + +test_expect_success 'integration: implicit-get: cache_http_503,with-fallback: diff 2 commits' ' + test_when_finished "per_test_cleanup" && + + # Tell cache server to send 503 and origin server to send 200. + start_gvfs_protocol_server_with_mayhem cache_http_503 && + + # Implicitly demand-load everything without any pre-seeding. + # + git -C "$REPO_T2" \ + -c core.useGVFSHelper=true \ + -c gvfs.fallback=true \ + diff $(cat m1.branch)..$(cat m3.branch) \ + >OUT.output 2>OUT.stderr && + + stop_gvfs_protocol_server +' + +# T2 should be considered contaminated at this point. + + ################################################################# # Duplicate packfile tests. #