From 99bd372a63f984c0f6f23832caa681a521bfdb72 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 13 Apr 2023 14:16:01 -0400 Subject: [PATCH] t5799: add tests to detect corrupt pack/idx files in prefetch Add "mayhem" keys to generate corrupt packfiles and/or corrupt idx files in prefetch by trashing the trailing checksum SHA. Add unit tests to t5799 to verify that `gvfs-helper` detects these corrupt pack/idx files. Currently, only the (bad-pack, no-idx) case is correctly detected, Because `gvfs-helper` needs to locally compute the idx file itself. A test for the (bad-pack, any-idx) case was also added (as a known breakage) because `gvfs-helper` assumes that when the cache server provides both, it doesn't need to verify them. We will fix that assumption in the next commit. Signed-off-by: Jeff Hostetler --- t/helper/test-gvfs-protocol.c | 82 ++++++++++++++++++++++++++++++++++- t/t5799-gvfs-helper.sh | 68 ++++++++++++++++++++++++++++- 2 files changed, 147 insertions(+), 3 deletions(-) diff --git a/t/helper/test-gvfs-protocol.c b/t/helper/test-gvfs-protocol.c index 9632e3205a7aee..e64311113f48a7 100644 --- a/t/helper/test-gvfs-protocol.c +++ b/t/helper/test-gvfs-protocol.c @@ -1145,6 +1145,82 @@ static int ct_pack_sort_compare(const void *_a, const void *_b) return (a->ph.timestamp < b->ph.timestamp) ? -1 : (a->ph.timestamp != b->ph.timestamp); } +#define MY_MIN(a, b) ((a) < (b) ? (a) : (b)) + +/* + * Like copy.c:copy_fd(), but corrupt part of the trailing SHA (if the + * given mayhem key is defined) as we copy it to the destination file. + * + * We don't know (or care) if the input file is a pack file or idx + * file, just that the final bytes are part of a SHA that we can + * corrupt. + */ +static int copy_fd_with_checksum_mayhem(int ifd, int ofd, + const char *mayhem_key, + ssize_t nr_wrong_bytes) +{ + off_t in_cur, in_len; + ssize_t bytes_to_copy; + ssize_t bytes_remaining_to_copy; + char buffer[8192]; + + if (!mayhem_key || !*mayhem_key || !nr_wrong_bytes || + !string_list_has_string(&mayhem_list, mayhem_key)) + return copy_fd(ifd, ofd); + + in_cur = lseek(ifd, 0, SEEK_CUR); + if (in_cur < 0) + return in_cur; + + in_len = lseek(ifd, 0, SEEK_END); + if (in_len < 0) + return in_len; + + if (lseek(ifd, in_cur, SEEK_SET) < 0) + return -1; + + /* Copy the entire file except for the last few bytes. */ + + bytes_to_copy = (ssize_t)in_len - nr_wrong_bytes; + bytes_remaining_to_copy = bytes_to_copy; + while (bytes_remaining_to_copy) { + ssize_t to_read = MY_MIN(sizeof(buffer), bytes_remaining_to_copy); + ssize_t len = xread(ifd, buffer, to_read); + + if (!len) + return -1; /* error on unexpected EOF */ + if (len < 0) + return -1; + if (write_in_full(ofd, buffer, len) < 0) + return -1; + + bytes_remaining_to_copy -= len; + } + + /* Read the trailing bytes so that we can alter them before copying. */ + + while (nr_wrong_bytes) { + ssize_t to_read = MY_MIN(sizeof(buffer), nr_wrong_bytes); + ssize_t len = xread(ifd, buffer, to_read); + ssize_t k; + + if (!len) + return -1; /* error on unexpected EOF */ + if (len < 0) + return -1; + + for (k = 0; k < len; k++) + buffer[k] ^= 0xff; + + if (write_in_full(ofd, buffer, len) < 0) + return -1; + + nr_wrong_bytes -= len; + } + + return 0; +} + static enum worker_result send_ct_item(const struct ct_pack_item *item) { struct ph ph_le; @@ -1166,7 +1242,8 @@ static enum worker_result send_ct_item(const struct ct_pack_item *item) trace2_printf("%s: sending prefetch pack '%s'", TR2_CAT, item->path_pack.buf); fd_pack = git_open_cloexec(item->path_pack.buf, O_RDONLY); - if (fd_pack == -1 || copy_fd(fd_pack, 1)) { + if (fd_pack == -1 || + copy_fd_with_checksum_mayhem(fd_pack, 1, "bad_prefetch_pack_sha", 4)) { logerror("could not send packfile"); wr = WR_IO_ERROR; goto done; @@ -1176,7 +1253,8 @@ static enum worker_result send_ct_item(const struct ct_pack_item *item) trace2_printf("%s: sending prefetch idx '%s'", TR2_CAT, item->path_idx.buf); fd_idx = git_open_cloexec(item->path_idx.buf, O_RDONLY); - if (fd_idx == -1 || copy_fd(fd_idx, 1)) { + if (fd_idx == -1 || + copy_fd_with_checksum_mayhem(fd_idx, 1, "bad_prefetch_idx_sha", 4)) { logerror("could not send idx"); wr = WR_IO_ERROR; goto done; diff --git a/t/t5799-gvfs-helper.sh b/t/t5799-gvfs-helper.sh index 3a10b6b68ff87d..8ceb71e69b4205 100755 --- a/t/t5799-gvfs-helper.sh +++ b/t/t5799-gvfs-helper.sh @@ -1299,7 +1299,7 @@ test_expect_success 'duplicate and busy: vfs- packfile' ' # content matches the requested SHA. # test_expect_success 'catch corrupted loose object' ' -# test_when_finished "per_test_cleanup" && + test_when_finished "per_test_cleanup" && start_gvfs_protocol_server_with_mayhem corrupt_loose && test_must_fail \ @@ -1322,4 +1322,70 @@ test_expect_success 'catch corrupted loose object' ' git -C "$REPO_T1" fsck ' +################################################################# +# Ensure that we can detect when we receive a corrupted packfile +# from the server. This is not concerned with network IO errors, +# but rather cases when the cache or origin server generates or +# sends an invalid packfile. +# +# For example, if the server throws an exception and writes the +# stack trace to the socket rather than or in addition to the +# packfile content. +# +# Or for example, if the packfile on the server's disk is corrupt +# and it sends it correctly, but the original data was already +# garbage, so the client still has garbage (and retrying won't +# help). +################################################################# + +# Send corrupt PACK files w/o IDX files (so that `gvfs-helper` +# must use `index-pack` to create it. (And as a side-effect, +# validate the PACK file is not corrupt.) +test_expect_success 'prefetch corrupt pack without idx' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server_with_mayhem \ + bad_prefetch_pack_sha \ + no_prefetch_idx && + + test_must_fail \ + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + prefetch \ + --max-retries=0 \ + --since="1000000000" \ + >OUT.output 2>OUT.stderr && + + stop_gvfs_protocol_server && + + # Verify corruption detected in pack when building + # local idx file for it. + + grep -q "error: .* index-pack failed" OUT.output 2>OUT.stderr && + + stop_gvfs_protocol_server +' + test_done