From fb682b5865f01c2bc898d696ddae484ff727ed84 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Wed, 12 Apr 2023 10:52:22 -0400 Subject: [PATCH 1/3] gvfs-helper: add --max-retries to prefetch verb Signed-off-by: Jeff Hostetler --- gvfs-helper.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/gvfs-helper.c b/gvfs-helper.c index e8e6fbbdd9f361..87f1a8ec563f55 100644 --- a/gvfs-helper.c +++ b/gvfs-helper.c @@ -105,6 +105,11 @@ // The GVFS Protocol defines this value as a way to // request cached packfiles NEWER THAN this timestamp. // +// --max-retries= // defaults to "6" +// +// Number of retries after transient network errors. +// Set to zero to disable such retries. +// // server // // Interactive/sub-process mode. Listen for a series of commands @@ -3781,6 +3786,8 @@ static enum gh__error_code do_sub_cmd__prefetch(int argc, const char **argv) static const char *since_str; static struct option prefetch_options[] = { OPT_STRING(0, "since", &since_str, N_("since"), N_("seconds since epoch")), + OPT_INTEGER('r', "max-retries", &gh__cmd_opts.max_retries, + N_("retries for transient network errors")), OPT_END(), }; @@ -3800,6 +3807,8 @@ static enum gh__error_code do_sub_cmd__prefetch(int argc, const char **argv) if (my_parse_since(since_str, &seconds_since_epoch)) die("could not parse 'since' field"); } + if (gh__cmd_opts.max_retries < 0) + gh__cmd_opts.max_retries = 0; finish_init(1); From 3fb9a26308f6c1a503ef35ab7cceec4f1e016f29 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 13 Apr 2023 14:16:01 -0400 Subject: [PATCH 2/3] t5799: add tests to detect corrupt pack/idx files in prefetch Add "mayhem" keys to generate corrupt packfiles and/or corrupt idx files in prefetch by trashing the trailing checksum SHA. Add unit tests to t5799 to verify that `gvfs-helper` detects these corrupt pack/idx files. Currently, only the (bad-pack, no-idx) case is correctly detected, Because `gvfs-helper` needs to locally compute the idx file itself. A test for the (bad-pack, any-idx) case was also added (as a known breakage) because `gvfs-helper` assumes that when the cache server provides both, it doesn't need to verify them. We will fix that assumption in the next commit. Signed-off-by: Jeff Hostetler --- t/helper/test-gvfs-protocol.c | 82 ++++++++++++++++++++++++++++++++++- t/t5799-gvfs-helper.sh | 68 ++++++++++++++++++++++++++++- 2 files changed, 147 insertions(+), 3 deletions(-) diff --git a/t/helper/test-gvfs-protocol.c b/t/helper/test-gvfs-protocol.c index a437248a10feff..a1c287c6ce225f 100644 --- a/t/helper/test-gvfs-protocol.c +++ b/t/helper/test-gvfs-protocol.c @@ -1136,6 +1136,82 @@ static int ct_pack_sort_compare(const void *_a, const void *_b) return (a->ph.timestamp < b->ph.timestamp) ? -1 : (a->ph.timestamp != b->ph.timestamp); } +#define MY_MIN(a, b) ((a) < (b) ? (a) : (b)) + +/* + * Like copy.c:copy_fd(), but corrupt part of the trailing SHA (if the + * given mayhem key is defined) as we copy it to the destination file. + * + * We don't know (or care) if the input file is a pack file or idx + * file, just that the final bytes are part of a SHA that we can + * corrupt. + */ +static int copy_fd_with_checksum_mayhem(int ifd, int ofd, + const char *mayhem_key, + ssize_t nr_wrong_bytes) +{ + off_t in_cur, in_len; + ssize_t bytes_to_copy; + ssize_t bytes_remaining_to_copy; + char buffer[8192]; + + if (!mayhem_key || !*mayhem_key || !nr_wrong_bytes || + !string_list_has_string(&mayhem_list, mayhem_key)) + return copy_fd(ifd, ofd); + + in_cur = lseek(ifd, 0, SEEK_CUR); + if (in_cur < 0) + return in_cur; + + in_len = lseek(ifd, 0, SEEK_END); + if (in_len < 0) + return in_len; + + if (lseek(ifd, in_cur, SEEK_SET) < 0) + return -1; + + /* Copy the entire file except for the last few bytes. */ + + bytes_to_copy = (ssize_t)in_len - nr_wrong_bytes; + bytes_remaining_to_copy = bytes_to_copy; + while (bytes_remaining_to_copy) { + ssize_t to_read = MY_MIN(sizeof(buffer), bytes_remaining_to_copy); + ssize_t len = xread(ifd, buffer, to_read); + + if (!len) + return -1; /* error on unexpected EOF */ + if (len < 0) + return -1; + if (write_in_full(ofd, buffer, len) < 0) + return -1; + + bytes_remaining_to_copy -= len; + } + + /* Read the trailing bytes so that we can alter them before copying. */ + + while (nr_wrong_bytes) { + ssize_t to_read = MY_MIN(sizeof(buffer), nr_wrong_bytes); + ssize_t len = xread(ifd, buffer, to_read); + ssize_t k; + + if (!len) + return -1; /* error on unexpected EOF */ + if (len < 0) + return -1; + + for (k = 0; k < len; k++) + buffer[k] ^= 0xff; + + if (write_in_full(ofd, buffer, len) < 0) + return -1; + + nr_wrong_bytes -= len; + } + + return 0; +} + static enum worker_result send_ct_item(const struct ct_pack_item *item) { struct ph ph_le; @@ -1157,7 +1233,8 @@ static enum worker_result send_ct_item(const struct ct_pack_item *item) trace2_printf("%s: sending prefetch pack '%s'", TR2_CAT, item->path_pack.buf); fd_pack = git_open_cloexec(item->path_pack.buf, O_RDONLY); - if (fd_pack == -1 || copy_fd(fd_pack, 1)) { + if (fd_pack == -1 || + copy_fd_with_checksum_mayhem(fd_pack, 1, "bad_prefetch_pack_sha", 4)) { logerror("could not send packfile"); wr = WR_IO_ERROR; goto done; @@ -1167,7 +1244,8 @@ static enum worker_result send_ct_item(const struct ct_pack_item *item) trace2_printf("%s: sending prefetch idx '%s'", TR2_CAT, item->path_idx.buf); fd_idx = git_open_cloexec(item->path_idx.buf, O_RDONLY); - if (fd_idx == -1 || copy_fd(fd_idx, 1)) { + if (fd_idx == -1 || + copy_fd_with_checksum_mayhem(fd_idx, 1, "bad_prefetch_idx_sha", 4)) { logerror("could not send idx"); wr = WR_IO_ERROR; goto done; diff --git a/t/t5799-gvfs-helper.sh b/t/t5799-gvfs-helper.sh index 3a10b6b68ff87d..8ceb71e69b4205 100755 --- a/t/t5799-gvfs-helper.sh +++ b/t/t5799-gvfs-helper.sh @@ -1299,7 +1299,7 @@ test_expect_success 'duplicate and busy: vfs- packfile' ' # content matches the requested SHA. # test_expect_success 'catch corrupted loose object' ' -# test_when_finished "per_test_cleanup" && + test_when_finished "per_test_cleanup" && start_gvfs_protocol_server_with_mayhem corrupt_loose && test_must_fail \ @@ -1322,4 +1322,70 @@ test_expect_success 'catch corrupted loose object' ' git -C "$REPO_T1" fsck ' +################################################################# +# Ensure that we can detect when we receive a corrupted packfile +# from the server. This is not concerned with network IO errors, +# but rather cases when the cache or origin server generates or +# sends an invalid packfile. +# +# For example, if the server throws an exception and writes the +# stack trace to the socket rather than or in addition to the +# packfile content. +# +# Or for example, if the packfile on the server's disk is corrupt +# and it sends it correctly, but the original data was already +# garbage, so the client still has garbage (and retrying won't +# help). +################################################################# + +# Send corrupt PACK files w/o IDX files (so that `gvfs-helper` +# must use `index-pack` to create it. (And as a side-effect, +# validate the PACK file is not corrupt.) +test_expect_success 'prefetch corrupt pack without idx' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server_with_mayhem \ + bad_prefetch_pack_sha \ + no_prefetch_idx && + + test_must_fail \ + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + prefetch \ + --max-retries=0 \ + --since="1000000000" \ + >OUT.output 2>OUT.stderr && + + stop_gvfs_protocol_server && + + # Verify corruption detected in pack when building + # local idx file for it. + + grep -q "error: .* index-pack failed" OUT.output 2>OUT.stderr && + + stop_gvfs_protocol_server +' + test_done From e3f2759e2872f9b9dc1661087a7e7a974e6d417e Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 13 Apr 2023 16:35:16 -0400 Subject: [PATCH 3/3] gvfs-helper: ignore .idx files in prefetch multi-part responses The GVFS cache server can return multiple pairs of (.pack, .idx) files. If both are provided, `gvfs-helper` assumes that they are valid without any validation. This might cause problems if the .pack file is corrupt inside the data stream. (This might happen if the cache server sends extra unexpected STDERR data or if the .pack file is corrupt on the cache server's disk.) All of the .pack file verification logic is already contained within `git index-pack`, so let's ignore the .idx from the data stream and force compute it. This defeats the purpose of some of the data cacheing on the cache server, but safety is more important. Signed-off-by: Jeff Hostetler --- gvfs-helper.c | 57 +++++++++++++++++------------------------- t/t5799-gvfs-helper.sh | 5 +--- 2 files changed, 24 insertions(+), 38 deletions(-) diff --git a/gvfs-helper.c b/gvfs-helper.c index 87f1a8ec563f55..3d6ca2b9c1708f 100644 --- a/gvfs-helper.c +++ b/gvfs-helper.c @@ -2121,7 +2121,6 @@ static void extract_packfile_from_multipack( { struct ph ph; struct tempfile *tempfile_pack = NULL; - struct tempfile *tempfile_idx = NULL; int result = -1; int b_no_idx_in_multipack; struct object_id packfile_checksum; @@ -2155,16 +2154,14 @@ static void extract_packfile_from_multipack( b_no_idx_in_multipack = (ph.idx_len == maximum_unsigned_value_of_type(uint64_t) || ph.idx_len == 0); - if (b_no_idx_in_multipack) { - my_create_tempfile(status, 0, "pack", &tempfile_pack, NULL, NULL); - if (!tempfile_pack) - goto done; - } else { - /* create a pair of tempfiles with the same basename */ - my_create_tempfile(status, 0, "pack", &tempfile_pack, "idx", &tempfile_idx); - if (!tempfile_pack || !tempfile_idx) - goto done; - } + /* + * We are going to harden `gvfs-helper` here and ignore the .idx file + * if it is provided and always compute it locally so that we get the + * added verification that `git index-pack` provides. + */ + my_create_tempfile(status, 0, "pack", &tempfile_pack, NULL, NULL); + if (!tempfile_pack) + goto done; /* * Copy the current packfile from the open stream and capture @@ -2191,38 +2188,31 @@ static void extract_packfile_from_multipack( oid_to_hex_r(hex_checksum, &packfile_checksum); - if (b_no_idx_in_multipack) { - /* - * The server did not send the corresponding .idx, so - * we have to compute it ourselves. - */ - strbuf_addbuf(&temp_path_idx, &temp_path_pack); - strbuf_strip_suffix(&temp_path_idx, ".pack"); - strbuf_addstr(&temp_path_idx, ".idx"); + /* + * Always compute the .idx file from the .pack file. + */ + strbuf_addbuf(&temp_path_idx, &temp_path_pack); + strbuf_strip_suffix(&temp_path_idx, ".pack"); + strbuf_addstr(&temp_path_idx, ".idx"); - my_run_index_pack(params, status, - &temp_path_pack, &temp_path_idx, - NULL); - if (status->ec != GH__ERROR_CODE__OK) - goto done; + my_run_index_pack(params, status, + &temp_path_pack, &temp_path_idx, + NULL); + if (status->ec != GH__ERROR_CODE__OK) + goto done; - } else { + if (!b_no_idx_in_multipack) { /* * Server sent the .idx immediately after the .pack in the - * data stream. I'm tempted to verify it, but that defeats - * the purpose of having it cached... + * data stream. Skip over it. */ - if (my_copy_fd_len(fd_multipack, get_tempfile_fd(tempfile_idx), - ph.idx_len) < 0) { + if (lseek(fd_multipack, ph.idx_len, SEEK_CUR) < 0) { strbuf_addf(&status->error_message, - "could not extract index[%d] in multipack", + "could not skip index[%d] in multipack", k); status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PREFETCH; goto done; } - - strbuf_addstr(&temp_path_idx, get_tempfile_path(tempfile_idx)); - close_tempfile_gently(tempfile_idx); } strbuf_addf(&buf_timestamp, "%u", (unsigned int)ph.timestamp); @@ -2237,7 +2227,6 @@ static void extract_packfile_from_multipack( done: delete_tempfile(&tempfile_pack); - delete_tempfile(&tempfile_idx); strbuf_release(&temp_path_pack); strbuf_release(&temp_path_idx); strbuf_release(&final_path_pack); diff --git a/t/t5799-gvfs-helper.sh b/t/t5799-gvfs-helper.sh index 8ceb71e69b4205..73604aabb78f7f 100755 --- a/t/t5799-gvfs-helper.sh +++ b/t/t5799-gvfs-helper.sh @@ -1367,14 +1367,11 @@ test_expect_success 'prefetch corrupt pack without idx' ' # Send corrupt PACK files with IDX files. Since the cache server # sends both, `gvfs-helper` might fail to verify both of them. -test_expect_failure 'prefetch corrupt pack with corrupt idx' ' +test_expect_success 'prefetch corrupt pack with corrupt idx' ' test_when_finished "per_test_cleanup" && start_gvfs_protocol_server_with_mayhem \ bad_prefetch_pack_sha && - # TODO This is a false-positive since `gvfs-helper` - # TODO does not verify either of them when a pair - # TODO is sent. test_must_fail \ git -C "$REPO_T1" gvfs-helper \ --cache-server=disable \