diff --git a/gvfs-helper.c b/gvfs-helper.c index 04c9c460c6ce36..234c3162cc524e 100644 --- a/gvfs-helper.c +++ b/gvfs-helper.c @@ -105,6 +105,11 @@ // The GVFS Protocol defines this value as a way to // request cached packfiles NEWER THAN this timestamp. // +// --max-retries= // defaults to "6" +// +// Number of retries after transient network errors. +// Set to zero to disable such retries. +// // server // // Interactive/sub-process mode. Listen for a series of commands @@ -2119,7 +2124,6 @@ static void extract_packfile_from_multipack( { struct ph ph; struct tempfile *tempfile_pack = NULL; - struct tempfile *tempfile_idx = NULL; int result = -1; int b_no_idx_in_multipack; struct object_id packfile_checksum; @@ -2153,16 +2157,14 @@ static void extract_packfile_from_multipack( b_no_idx_in_multipack = (ph.idx_len == maximum_unsigned_value_of_type(uint64_t) || ph.idx_len == 0); - if (b_no_idx_in_multipack) { - my_create_tempfile(status, 0, "pack", &tempfile_pack, NULL, NULL); - if (!tempfile_pack) - goto done; - } else { - /* create a pair of tempfiles with the same basename */ - my_create_tempfile(status, 0, "pack", &tempfile_pack, "idx", &tempfile_idx); - if (!tempfile_pack || !tempfile_idx) - goto done; - } + /* + * We are going to harden `gvfs-helper` here and ignore the .idx file + * if it is provided and always compute it locally so that we get the + * added verification that `git index-pack` provides. + */ + my_create_tempfile(status, 0, "pack", &tempfile_pack, NULL, NULL); + if (!tempfile_pack) + goto done; /* * Copy the current packfile from the open stream and capture @@ -2189,38 +2191,31 @@ static void extract_packfile_from_multipack( oid_to_hex_r(hex_checksum, &packfile_checksum); - if (b_no_idx_in_multipack) { - /* - * The server did not send the corresponding .idx, so - * we have to compute it ourselves. - */ - strbuf_addbuf(&temp_path_idx, &temp_path_pack); - strbuf_strip_suffix(&temp_path_idx, ".pack"); - strbuf_addstr(&temp_path_idx, ".idx"); + /* + * Always compute the .idx file from the .pack file. + */ + strbuf_addbuf(&temp_path_idx, &temp_path_pack); + strbuf_strip_suffix(&temp_path_idx, ".pack"); + strbuf_addstr(&temp_path_idx, ".idx"); - my_run_index_pack(params, status, - &temp_path_pack, &temp_path_idx, - NULL); - if (status->ec != GH__ERROR_CODE__OK) - goto done; + my_run_index_pack(params, status, + &temp_path_pack, &temp_path_idx, + NULL); + if (status->ec != GH__ERROR_CODE__OK) + goto done; - } else { + if (!b_no_idx_in_multipack) { /* * Server sent the .idx immediately after the .pack in the - * data stream. I'm tempted to verify it, but that defeats - * the purpose of having it cached... + * data stream. Skip over it. */ - if (my_copy_fd_len(fd_multipack, get_tempfile_fd(tempfile_idx), - ph.idx_len) < 0) { + if (lseek(fd_multipack, ph.idx_len, SEEK_CUR) < 0) { strbuf_addf(&status->error_message, - "could not extract index[%d] in multipack", + "could not skip index[%d] in multipack", k); status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PREFETCH; goto done; } - - strbuf_addstr(&temp_path_idx, get_tempfile_path(tempfile_idx)); - close_tempfile_gently(tempfile_idx); } strbuf_addf(&buf_timestamp, "%u", (unsigned int)ph.timestamp); @@ -2235,7 +2230,6 @@ static void extract_packfile_from_multipack( done: delete_tempfile(&tempfile_pack); - delete_tempfile(&tempfile_idx); strbuf_release(&temp_path_pack); strbuf_release(&temp_path_idx); strbuf_release(&final_path_pack); @@ -3753,6 +3747,8 @@ static enum gh__error_code do_sub_cmd__prefetch(int argc, const char **argv) static const char *since_str; static struct option prefetch_options[] = { OPT_STRING(0, "since", &since_str, N_("since"), N_("seconds since epoch")), + OPT_INTEGER('r', "max-retries", &gh__cmd_opts.max_retries, + N_("retries for transient network errors")), OPT_END(), }; @@ -3772,6 +3768,8 @@ static enum gh__error_code do_sub_cmd__prefetch(int argc, const char **argv) if (my_parse_since(since_str, &seconds_since_epoch)) die("could not parse 'since' field"); } + if (gh__cmd_opts.max_retries < 0) + gh__cmd_opts.max_retries = 0; finish_init(1); diff --git a/t/helper/test-gvfs-protocol.c b/t/helper/test-gvfs-protocol.c index 98a94234fbb8b4..00504e375f72ea 100644 --- a/t/helper/test-gvfs-protocol.c +++ b/t/helper/test-gvfs-protocol.c @@ -1154,6 +1154,82 @@ static int ct_pack_sort_compare(const void *_a, const void *_b) return (a->ph.timestamp < b->ph.timestamp) ? -1 : (a->ph.timestamp != b->ph.timestamp); } +#define MY_MIN(a, b) ((a) < (b) ? (a) : (b)) + +/* + * Like copy.c:copy_fd(), but corrupt part of the trailing SHA (if the + * given mayhem key is defined) as we copy it to the destination file. + * + * We don't know (or care) if the input file is a pack file or idx + * file, just that the final bytes are part of a SHA that we can + * corrupt. + */ +static int copy_fd_with_checksum_mayhem(int ifd, int ofd, + const char *mayhem_key, + ssize_t nr_wrong_bytes) +{ + off_t in_cur, in_len; + ssize_t bytes_to_copy; + ssize_t bytes_remaining_to_copy; + char buffer[8192]; + + if (!mayhem_key || !*mayhem_key || !nr_wrong_bytes || + !string_list_has_string(&mayhem_list, mayhem_key)) + return copy_fd(ifd, ofd); + + in_cur = lseek(ifd, 0, SEEK_CUR); + if (in_cur < 0) + return in_cur; + + in_len = lseek(ifd, 0, SEEK_END); + if (in_len < 0) + return in_len; + + if (lseek(ifd, in_cur, SEEK_SET) < 0) + return -1; + + /* Copy the entire file except for the last few bytes. */ + + bytes_to_copy = (ssize_t)in_len - nr_wrong_bytes; + bytes_remaining_to_copy = bytes_to_copy; + while (bytes_remaining_to_copy) { + ssize_t to_read = MY_MIN(sizeof(buffer), bytes_remaining_to_copy); + ssize_t len = xread(ifd, buffer, to_read); + + if (!len) + return -1; /* error on unexpected EOF */ + if (len < 0) + return -1; + if (write_in_full(ofd, buffer, len) < 0) + return -1; + + bytes_remaining_to_copy -= len; + } + + /* Read the trailing bytes so that we can alter them before copying. */ + + while (nr_wrong_bytes) { + ssize_t to_read = MY_MIN(sizeof(buffer), nr_wrong_bytes); + ssize_t len = xread(ifd, buffer, to_read); + ssize_t k; + + if (!len) + return -1; /* error on unexpected EOF */ + if (len < 0) + return -1; + + for (k = 0; k < len; k++) + buffer[k] ^= 0xff; + + if (write_in_full(ofd, buffer, len) < 0) + return -1; + + nr_wrong_bytes -= len; + } + + return 0; +} + static enum worker_result send_ct_item(const struct ct_pack_item *item) { struct ph ph_le; @@ -1175,7 +1251,8 @@ static enum worker_result send_ct_item(const struct ct_pack_item *item) trace2_printf("%s: sending prefetch pack '%s'", TR2_CAT, item->path_pack.buf); fd_pack = git_open_cloexec(item->path_pack.buf, O_RDONLY); - if (fd_pack == -1 || copy_fd(fd_pack, 1)) { + if (fd_pack == -1 || + copy_fd_with_checksum_mayhem(fd_pack, 1, "bad_prefetch_pack_sha", 4)) { logerror("could not send packfile"); wr = WR_IO_ERROR; goto done; @@ -1185,7 +1262,8 @@ static enum worker_result send_ct_item(const struct ct_pack_item *item) trace2_printf("%s: sending prefetch idx '%s'", TR2_CAT, item->path_idx.buf); fd_idx = git_open_cloexec(item->path_idx.buf, O_RDONLY); - if (fd_idx == -1 || copy_fd(fd_idx, 1)) { + if (fd_idx == -1 || + copy_fd_with_checksum_mayhem(fd_idx, 1, "bad_prefetch_idx_sha", 4)) { logerror("could not send idx"); wr = WR_IO_ERROR; goto done; diff --git a/t/t5799-gvfs-helper.sh b/t/t5799-gvfs-helper.sh index 5c79654a63f0fa..0a2d6180430b50 100755 --- a/t/t5799-gvfs-helper.sh +++ b/t/t5799-gvfs-helper.sh @@ -1299,7 +1299,7 @@ test_expect_success 'duplicate and busy: vfs- packfile' ' # content matches the requested SHA. # test_expect_success 'catch corrupted loose object' ' -# test_when_finished "per_test_cleanup" && + test_when_finished "per_test_cleanup" && start_gvfs_protocol_server_with_mayhem corrupt_loose && test_must_fail \ @@ -1322,4 +1322,67 @@ test_expect_success 'catch corrupted loose object' ' git -C "$REPO_T1" fsck ' +################################################################# +# Ensure that we can detect when we receive a corrupted packfile +# from the server. This is not concerned with network IO errors, +# but rather cases when the cache or origin server generates or +# sends an invalid packfile. +# +# For example, if the server throws an exception and writes the +# stack trace to the socket rather than or in addition to the +# packfile content. +# +# Or for example, if the packfile on the server's disk is corrupt +# and it sends it correctly, but the original data was already +# garbage, so the client still has garbage (and retrying won't +# help). +################################################################# + +# Send corrupt PACK files w/o IDX files (so that `gvfs-helper` +# must use `index-pack` to create it. (And as a side-effect, +# validate the PACK file is not corrupt.) +test_expect_success 'prefetch corrupt pack without idx' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server_with_mayhem \ + bad_prefetch_pack_sha \ + no_prefetch_idx && + + test_must_fail \ + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + prefetch \ + --max-retries=0 \ + --since="1000000000" \ + >OUT.output 2>OUT.stderr && + + stop_gvfs_protocol_server && + + # Verify corruption detected in pack when building + # local idx file for it. + + grep -q "error: .* index-pack failed" OUT.output 2>OUT.stderr && + + stop_gvfs_protocol_server +' + test_done