Skip to content

Commit

Permalink
t5799: add tests to detect corrupt pack/idx files in prefetch
Browse files Browse the repository at this point in the history
Add "mayhem" keys to generate corrupt packfiles and/or corrupt idx
files in prefetch by trashing the trailing checksum SHA.

Add unit tests to t5799 to verify that `gvfs-helper` detects these
corrupt pack/idx files.

Currently, only the (bad-pack, no-idx) case is correctly detected,
Because `gvfs-helper` needs to locally compute the idx file itself.

A test for the (bad-pack, any-idx) case was also added (as a known
breakage) because `gvfs-helper` assumes that when the cache server
provides both, it doesn't need to verify them.  We will fix that
assumption in the next commit.

Signed-off-by: Jeff Hostetler <jeffhostetler@github.com>
  • Loading branch information
jeffhostetler authored and dscho committed Aug 11, 2023
1 parent cf9ceb6 commit fa7358e
Show file tree
Hide file tree
Showing 2 changed files with 147 additions and 3 deletions.
82 changes: 80 additions & 2 deletions t/helper/test-gvfs-protocol.c
Original file line number Diff line number Diff line change
Expand Up @@ -1145,6 +1145,82 @@ static int ct_pack_sort_compare(const void *_a, const void *_b)
return (a->ph.timestamp < b->ph.timestamp) ? -1 : (a->ph.timestamp != b->ph.timestamp);
}

#define MY_MIN(a, b) ((a) < (b) ? (a) : (b))

/*
* Like copy.c:copy_fd(), but corrupt part of the trailing SHA (if the
* given mayhem key is defined) as we copy it to the destination file.
*
* We don't know (or care) if the input file is a pack file or idx
* file, just that the final bytes are part of a SHA that we can
* corrupt.
*/
static int copy_fd_with_checksum_mayhem(int ifd, int ofd,
const char *mayhem_key,
ssize_t nr_wrong_bytes)
{
off_t in_cur, in_len;
ssize_t bytes_to_copy;
ssize_t bytes_remaining_to_copy;
char buffer[8192];

if (!mayhem_key || !*mayhem_key || !nr_wrong_bytes ||
!string_list_has_string(&mayhem_list, mayhem_key))
return copy_fd(ifd, ofd);

in_cur = lseek(ifd, 0, SEEK_CUR);
if (in_cur < 0)
return in_cur;

in_len = lseek(ifd, 0, SEEK_END);
if (in_len < 0)
return in_len;

if (lseek(ifd, in_cur, SEEK_SET) < 0)
return -1;

/* Copy the entire file except for the last few bytes. */

bytes_to_copy = (ssize_t)in_len - nr_wrong_bytes;
bytes_remaining_to_copy = bytes_to_copy;
while (bytes_remaining_to_copy) {
ssize_t to_read = MY_MIN(sizeof(buffer), bytes_remaining_to_copy);
ssize_t len = xread(ifd, buffer, to_read);

if (!len)
return -1; /* error on unexpected EOF */
if (len < 0)
return -1;
if (write_in_full(ofd, buffer, len) < 0)
return -1;

bytes_remaining_to_copy -= len;
}

/* Read the trailing bytes so that we can alter them before copying. */

while (nr_wrong_bytes) {
ssize_t to_read = MY_MIN(sizeof(buffer), nr_wrong_bytes);
ssize_t len = xread(ifd, buffer, to_read);
ssize_t k;

if (!len)
return -1; /* error on unexpected EOF */
if (len < 0)
return -1;

for (k = 0; k < len; k++)
buffer[k] ^= 0xff;

if (write_in_full(ofd, buffer, len) < 0)
return -1;

nr_wrong_bytes -= len;
}

return 0;
}

static enum worker_result send_ct_item(const struct ct_pack_item *item)
{
struct ph ph_le;
Expand All @@ -1166,7 +1242,8 @@ static enum worker_result send_ct_item(const struct ct_pack_item *item)
trace2_printf("%s: sending prefetch pack '%s'", TR2_CAT, item->path_pack.buf);

fd_pack = git_open_cloexec(item->path_pack.buf, O_RDONLY);
if (fd_pack == -1 || copy_fd(fd_pack, 1)) {
if (fd_pack == -1 ||
copy_fd_with_checksum_mayhem(fd_pack, 1, "bad_prefetch_pack_sha", 4)) {
logerror("could not send packfile");
wr = WR_IO_ERROR;
goto done;
Expand All @@ -1176,7 +1253,8 @@ static enum worker_result send_ct_item(const struct ct_pack_item *item)
trace2_printf("%s: sending prefetch idx '%s'", TR2_CAT, item->path_idx.buf);

fd_idx = git_open_cloexec(item->path_idx.buf, O_RDONLY);
if (fd_idx == -1 || copy_fd(fd_idx, 1)) {
if (fd_idx == -1 ||
copy_fd_with_checksum_mayhem(fd_idx, 1, "bad_prefetch_idx_sha", 4)) {
logerror("could not send idx");
wr = WR_IO_ERROR;
goto done;
Expand Down
68 changes: 67 additions & 1 deletion t/t5799-gvfs-helper.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1299,7 +1299,7 @@ test_expect_success 'duplicate and busy: vfs- packfile' '
# content matches the requested SHA.
#
test_expect_success 'catch corrupted loose object' '
# test_when_finished "per_test_cleanup" &&
test_when_finished "per_test_cleanup" &&
start_gvfs_protocol_server_with_mayhem corrupt_loose &&
test_must_fail \
Expand All @@ -1322,4 +1322,70 @@ test_expect_success 'catch corrupted loose object' '
git -C "$REPO_T1" fsck
'

#################################################################
# Ensure that we can detect when we receive a corrupted packfile
# from the server. This is not concerned with network IO errors,
# but rather cases when the cache or origin server generates or
# sends an invalid packfile.
#
# For example, if the server throws an exception and writes the
# stack trace to the socket rather than or in addition to the
# packfile content.
#
# Or for example, if the packfile on the server's disk is corrupt
# and it sends it correctly, but the original data was already
# garbage, so the client still has garbage (and retrying won't
# help).
#################################################################

# Send corrupt PACK files w/o IDX files (so that `gvfs-helper`
# must use `index-pack` to create it. (And as a side-effect,
# validate the PACK file is not corrupt.)
test_expect_success 'prefetch corrupt pack without idx' '
test_when_finished "per_test_cleanup" &&
start_gvfs_protocol_server_with_mayhem \
bad_prefetch_pack_sha \
no_prefetch_idx &&
test_must_fail \
git -C "$REPO_T1" gvfs-helper \
--cache-server=disable \
--remote=origin \
--no-progress \
prefetch \
--max-retries=0 \
--since="1000000000" \
>OUT.output 2>OUT.stderr &&
stop_gvfs_protocol_server &&
# Verify corruption detected in pack when building
# local idx file for it.
grep -q "error: .* index-pack failed" <OUT.stderr
'

# Send corrupt PACK files with IDX files. Since the cache server
# sends both, `gvfs-helper` might fail to verify both of them.
test_expect_failure 'prefetch corrupt pack with corrupt idx' '
test_when_finished "per_test_cleanup" &&
start_gvfs_protocol_server_with_mayhem \
bad_prefetch_pack_sha &&
# TODO This is a false-positive since `gvfs-helper`
# TODO does not verify either of them when a pair
# TODO is sent.
test_must_fail \
git -C "$REPO_T1" gvfs-helper \
--cache-server=disable \
--remote=origin \
--no-progress \
prefetch \
--max-retries=0 \
--since="1000000000" \
>OUT.output 2>OUT.stderr &&
stop_gvfs_protocol_server
'

test_done

0 comments on commit fa7358e

Please sign in to comment.