diff --git a/.gitignore b/.gitignore index e8f283f4a2701b..857af0fff99edc 100644 --- a/.gitignore +++ b/.gitignore @@ -75,6 +75,7 @@ /git-gc /git-get-tar-commit-id /git-grep +/git-gvfs-helper /git-hash-object /git-help /git-hook diff --git a/Documentation/config.txt b/Documentation/config.txt index 939cc1387992f8..b87cb7a593b368 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -448,6 +448,8 @@ include::config/gui.txt[] include::config/guitool.txt[] +include::config/gvfs.txt[] + include::config/help.txt[] include::config/http.txt[] diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt index 50e4146af1d24e..7014cc204cc7c7 100644 --- a/Documentation/config/core.txt +++ b/Documentation/config/core.txt @@ -793,6 +793,9 @@ core.gvfs:: flag just blocks them from occurring at all. -- +core.useGvfsHelper:: + TODO + core.sparseCheckout:: Enable "sparse checkout" feature. See linkgit:git-sparse-checkout[1] for more information. diff --git a/Documentation/config/gvfs.txt b/Documentation/config/gvfs.txt new file mode 100644 index 00000000000000..6ab221ded36c91 --- /dev/null +++ b/Documentation/config/gvfs.txt @@ -0,0 +1,5 @@ +gvfs.cache-server:: + TODO + +gvfs.sharedcache:: + TODO diff --git a/Documentation/lint-manpages.sh b/Documentation/lint-manpages.sh index 92cfc0a15abd56..8bc316ba7646e3 100755 --- a/Documentation/lint-manpages.sh +++ b/Documentation/lint-manpages.sh @@ -27,6 +27,7 @@ check_missing_docs () ( git-init-db) continue;; git-remote-*) continue;; git-stage) continue;; + git-gvfs-helper) continue;; git-legacy-*) continue;; git-?*--?* ) continue ;; esac diff --git a/Makefile b/Makefile index a199ef5dead752..124e3d4cae615c 100644 --- a/Makefile +++ b/Makefile @@ -1043,6 +1043,7 @@ LIB_OBJS += gpg-interface.o LIB_OBJS += graph.o LIB_OBJS += grep.o LIB_OBJS += gvfs.o +LIB_OBJS += gvfs-helper-client.o LIB_OBJS += hash-lookup.o LIB_OBJS += hashmap.o LIB_OBJS += help.o @@ -1679,6 +1680,9 @@ endif endif BASIC_CFLAGS += $(CURL_CFLAGS) + PROGRAM_OBJS += gvfs-helper.o + TEST_PROGRAMS_NEED_X += test-gvfs-protocol + REMOTE_CURL_PRIMARY = git-remote-http$X REMOTE_CURL_ALIASES = git-remote-https$X git-remote-ftp$X git-remote-ftps$X REMOTE_CURL_NAMES = $(REMOTE_CURL_PRIMARY) $(REMOTE_CURL_ALIASES) @@ -2959,6 +2963,10 @@ scalar$X: scalar.o GIT-LDFLAGS $(GITLIBS) $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) \ $(filter %.o,$^) $(LIBS) +git-gvfs-helper$X: gvfs-helper.o http.o GIT-LDFLAGS $(GITLIBS) $(LAZYLOAD_LIBCURL_OBJ) + $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \ + $(CURL_LIBCURL) $(EXPAT_LIBEXPAT) $(LIBS) + $(LIB_FILE): $(LIB_OBJS) $(QUIET_AR)$(RM) $@ && $(AR) $(ARFLAGS) $@ $^ diff --git a/builtin/index-pack.c b/builtin/index-pack.c index d2073c8a84f26c..4677ae3a34eac7 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -812,7 +812,7 @@ static void sha1_object(const void *data, struct object_entry *obj_entry, read_lock(); collision_test_needed = repo_has_object_file_with_flags(the_repository, oid, - OBJECT_INFO_QUICK); + OBJECT_INFO_FOR_PREFETCH); read_unlock(); } diff --git a/config.c b/config.c index 7c927b8c91dc7f..df7609fc403935 100644 --- a/config.c +++ b/config.c @@ -42,6 +42,7 @@ #include "wildmatch.h" #include "ws.h" #include "write-or-die.h" +#include "transport.h" struct config_source { struct config_source *prev; @@ -1648,6 +1649,11 @@ int git_default_core_config(const char *var, const char *value, return 0; } + if (!strcmp(var, "core.usegvfshelper")) { + core_use_gvfs_helper = git_config_bool(var, value); + return 0; + } + if (!strcmp(var, "core.sparsecheckout")) { /* virtual file system relies on the sparse checkout logic so force it on */ if (core_virtualfilesystem) @@ -1799,6 +1805,37 @@ static int git_default_mailmap_config(const char *var, const char *value) return 0; } +static int git_default_gvfs_config(const char *var, const char *value) +{ + if (!strcmp(var, "gvfs.cache-server")) { + char *v2 = NULL; + + if (!git_config_string(&v2, var, value) && v2 && *v2) { + free(gvfs_cache_server_url); + gvfs_cache_server_url = transport_anonymize_url(v2); + } + free(v2); + return 0; + } + + if (!strcmp(var, "gvfs.sharedcache") && value && *value) { + strbuf_setlen(&gvfs_shared_cache_pathname, 0); + strbuf_addstr(&gvfs_shared_cache_pathname, value); + if (strbuf_normalize_path(&gvfs_shared_cache_pathname) < 0) { + /* + * Pretend it wasn't set. This will cause us to + * fallback to ".git/objects" effectively. + */ + strbuf_release(&gvfs_shared_cache_pathname); + return 0; + } + strbuf_trim_trailing_dir_sep(&gvfs_shared_cache_pathname); + return 0; + } + + return 0; +} + static int git_default_attr_config(const char *var, const char *value) { if (!strcmp(var, "attr.tree")) { @@ -1866,6 +1903,9 @@ int git_default_config(const char *var, const char *value, if (starts_with(var, "sparse.")) return git_default_sparse_config(var, value); + if (starts_with(var, "gvfs.")) + return git_default_gvfs_config(var, value); + /* Add other config variables here and to Documentation/config.txt. */ return 0; } diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt index 760024240198a4..4f99db39f045f6 100644 --- a/contrib/buildsystems/CMakeLists.txt +++ b/contrib/buildsystems/CMakeLists.txt @@ -649,7 +649,7 @@ if(NOT CURL_FOUND) add_compile_definitions(NO_CURL) message(WARNING "git-http-push and git-http-fetch will not be built") else() - list(APPEND PROGRAMS_BUILT git-http-fetch git-http-push git-imap-send git-remote-http) + list(APPEND PROGRAMS_BUILT git-http-fetch git-http-push git-imap-send git-remote-http git-gvfs-helper) if(CURL_VERSION_STRING VERSION_GREATER_EQUAL 7.34.0) add_compile_definitions(USE_CURL_FOR_IMAP_SEND) endif() @@ -818,6 +818,9 @@ if(CURL_FOUND) add_executable(git-http-push ${CMAKE_SOURCE_DIR}/http-push.c) target_link_libraries(git-http-push http_obj common-main ${CURL_LIBRARIES} ${EXPAT_LIBRARIES}) endif() + + add_executable(git-gvfs-helper ${CMAKE_SOURCE_DIR}/gvfs-helper.c) + target_link_libraries(git-gvfs-helper http_obj common-main ${CURL_LIBRARIES} ) endif() parse_makefile_for_executables(git_builtin_extra "BUILT_INS") @@ -1108,6 +1111,20 @@ set(wrapper_scripts set(wrapper_test_scripts test-fake-ssh test-tool) +if(CURL_FOUND) + list(APPEND wrapper_test_scripts test-gvfs-protocol) + + add_executable(test-gvfs-protocol ${CMAKE_SOURCE_DIR}/t/helper/test-gvfs-protocol.c) + target_link_libraries(test-gvfs-protocol common-main) + + if(MSVC) + set_target_properties(test-gvfs-protocol + PROPERTIES RUNTIME_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/t/helper) + set_target_properties(test-gvfs-protocol + PROPERTIES RUNTIME_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/t/helper) + endif() +endif() + foreach(script ${wrapper_scripts}) file(STRINGS ${CMAKE_SOURCE_DIR}/wrap-for-bin.sh content NEWLINE_CONSUME) diff --git a/credential.c b/credential.c index 6dea3859ece338..1e138862015a75 100644 --- a/credential.c +++ b/credential.c @@ -443,6 +443,8 @@ static int run_credential_helper(struct credential *c, else helper.no_stdout = 1; + helper.trace2_child_class = "cred"; + if (start_command(&helper) < 0) return -1; diff --git a/environment.c b/environment.c index 19700245e10471..a20d0659ec37d5 100644 --- a/environment.c +++ b/environment.c @@ -101,6 +101,9 @@ int protect_hfs = PROTECT_HFS_DEFAULT; #define PROTECT_NTFS_DEFAULT 1 #endif int protect_ntfs = PROTECT_NTFS_DEFAULT; +int core_use_gvfs_helper; +char *gvfs_cache_server_url; +struct strbuf gvfs_shared_cache_pathname = STRBUF_INIT; /* * The character that begins a commented line in user-editable file diff --git a/environment.h b/environment.h index e34e9b61ca8b5a..bcba6889be478c 100644 --- a/environment.h +++ b/environment.h @@ -176,6 +176,9 @@ extern int core_gvfs; extern int precomposed_unicode; extern int protect_hfs; extern int protect_ntfs; +extern int core_use_gvfs_helper; +extern char *gvfs_cache_server_url; +extern struct strbuf gvfs_shared_cache_pathname; extern int core_apply_sparse_checkout; extern int core_sparse_checkout_cone; diff --git a/gvfs-helper-client.c b/gvfs-helper-client.c new file mode 100644 index 00000000000000..a5dc279406cf43 --- /dev/null +++ b/gvfs-helper-client.c @@ -0,0 +1,469 @@ +#define USE_THE_REPOSITORY_VARIABLE +#include "git-compat-util.h" +#include "environment.h" +#include "hex.h" +#include "strvec.h" +#include "trace2.h" +#include "oidset.h" +#include "object.h" +#include "object-store.h" +#include "gvfs-helper-client.h" +#include "sub-process.h" +#include "sigchain.h" +#include "pkt-line.h" +#include "quote.h" +#include "packfile.h" +#include "hex.h" + +static struct oidset gh_client__oidset_queued = OIDSET_INIT; +static unsigned long gh_client__oidset_count; + +struct gh_server__process { + struct subprocess_entry subprocess; /* must be first */ + unsigned int supported_capabilities; +}; + +static int gh_server__subprocess_map_initialized; +static struct hashmap gh_server__subprocess_map; +static struct object_directory *gh_client__chosen_odb; + +/* + * The "objects" capability has 2 verbs: "get" and "post". + */ +#define CAP_OBJECTS (1u<<1) +#define CAP_OBJECTS_NAME "objects" + +#define CAP_OBJECTS__VERB_GET1_NAME "get" +#define CAP_OBJECTS__VERB_POST_NAME "post" + +static int gh_client__start_fn(struct subprocess_entry *subprocess) +{ + static int versions[] = {1, 0}; + static struct subprocess_capability capabilities[] = { + { CAP_OBJECTS_NAME, CAP_OBJECTS }, + { NULL, 0 } + }; + + struct gh_server__process *entry = (struct gh_server__process *)subprocess; + + return subprocess_handshake(subprocess, "gvfs-helper", versions, + NULL, capabilities, + &entry->supported_capabilities); +} + +/* + * Send the queued OIDs in the OIDSET to gvfs-helper for it to + * fetch from the cache-server or main Git server using "/gvfs/objects" + * POST semantics. + * + * objects.post LF + * ( LF)* + * + * + */ +static int gh_client__send__objects_post(struct child_process *process) +{ + struct oidset_iter iter; + struct object_id *oid; + int err; + + /* + * We assume that all of the packet_ routines call error() + * so that we don't have to. + */ + + err = packet_write_fmt_gently( + process->in, + (CAP_OBJECTS_NAME "." CAP_OBJECTS__VERB_POST_NAME "\n")); + if (err) + return err; + + oidset_iter_init(&gh_client__oidset_queued, &iter); + while ((oid = oidset_iter_next(&iter))) { + err = packet_write_fmt_gently(process->in, "%s\n", + oid_to_hex(oid)); + if (err) + return err; + } + + err = packet_flush_gently(process->in); + if (err) + return err; + + return 0; +} + +/* + * Send the given OID to gvfs-helper for it to fetch from the + * cache-server or main Git server using "/gvfs/objects" GET + * semantics. + * + * This ignores any queued OIDs. + * + * objects.get LF + * LF + * + * + */ +static int gh_client__send__objects_get(struct child_process *process, + const struct object_id *oid) +{ + int err; + + /* + * We assume that all of the packet_ routines call error() + * so that we don't have to. + */ + + err = packet_write_fmt_gently( + process->in, + (CAP_OBJECTS_NAME "." CAP_OBJECTS__VERB_GET1_NAME "\n")); + if (err) + return err; + + err = packet_write_fmt_gently(process->in, "%s\n", + oid_to_hex(oid)); + if (err) + return err; + + err = packet_flush_gently(process->in); + if (err) + return err; + + return 0; +} + +/* + * Update the loose object cache to include the newly created + * object. + */ +static void gh_client__update_loose_cache(const char *line) +{ + const char *v1_oid; + struct object_id oid; + + if (!skip_prefix(line, "loose ", &v1_oid)) + BUG("update_loose_cache: invalid line '%s'", line); + + if (get_oid_hex(v1_oid, &oid)) + BUG("update_loose_cache: invalid line '%s'", line); + + odb_loose_cache_add_new_oid(gh_client__chosen_odb, &oid); +} + +/* + * Update the packed-git list to include the newly created packfile. + */ +static void gh_client__update_packed_git(const char *line) +{ + struct strbuf path = STRBUF_INIT; + const char *v1_filename; + struct packed_git *p; + int is_local; + + if (!skip_prefix(line, "packfile ", &v1_filename)) + BUG("update_packed_git: invalid line '%s'", line); + + /* + * ODB[0] is the local .git/objects. All others are alternates. + */ + is_local = (gh_client__chosen_odb == the_repository->objects->odb); + + strbuf_addf(&path, "%s/pack/%s", + gh_client__chosen_odb->path, v1_filename); + strbuf_strip_suffix(&path, ".pack"); + strbuf_addstr(&path, ".idx"); + + p = add_packed_git(path.buf, path.len, is_local); + if (p) + install_packed_git_and_mru(the_repository, p); +} + +/* + * Both CAP_OBJECTS verbs return the same format response: + * + * + * * + * + * + * + * Where: + * + * ::= odb SP LF + * + * ::= / + * + * ::= packfile SP LF + * + * ::= loose SP LF + * + * ::= ok LF + * / partial LF + * / error SP LF + * + * Note that `gvfs-helper` controls how/if it chunks the request when + * it talks to the cache-server and/or main Git server. So it is + * possible for us to receive many packfiles and/or loose objects *AND + * THEN* get a hard network error or a 404 on an individual object. + * + * If we get a partial result, we can let the caller try to continue + * -- for example, maybe an immediate request for a tree object was + * grouped with a queued request for a blob. The tree-walk *might* be + * able to continue and let the 404 blob be handled later. + */ +static int gh_client__objects__receive_response( + struct child_process *process, + enum gh_client__created *p_ghc, + int *p_nr_loose, int *p_nr_packfile) +{ + enum gh_client__created ghc = GHC__CREATED__NOTHING; + const char *v1; + char *line; + int len; + int err = 0; + + while (1) { + /* + * Warning: packet_read_line_gently() calls die() + * despite the _gently moniker. + */ + len = packet_read_line_gently(process->out, NULL, &line); + if ((len < 0) || !line) + break; + + if (starts_with(line, "odb")) { + /* trust that this matches what we expect */ + } + + else if (starts_with(line, "packfile")) { + gh_client__update_packed_git(line); + ghc |= GHC__CREATED__PACKFILE; + *p_nr_packfile += 1; + } + + else if (starts_with(line, "loose")) { + gh_client__update_loose_cache(line); + ghc |= GHC__CREATED__LOOSE; + *p_nr_loose += 1; + } + + else if (starts_with(line, "ok")) + ; + else if (starts_with(line, "partial")) + ; + else if (skip_prefix(line, "error ", &v1)) { + error("gvfs-helper error: '%s'", v1); + err = -1; + } + } + + *p_ghc = ghc; + + return err; +} + +/* + * Select the preferred ODB for fetching missing objects. + * This should be the alternate with the same directory + * name as set in `gvfs.sharedCache`. + * + * Fallback to .git/objects if necessary. + */ +static void gh_client__choose_odb(void) +{ + struct object_directory *odb; + + if (gh_client__chosen_odb) + return; + + prepare_alt_odb(the_repository); + gh_client__chosen_odb = the_repository->objects->odb; + + if (!gvfs_shared_cache_pathname.len) + return; + + for (odb = the_repository->objects->odb->next; odb; odb = odb->next) { + if (!fspathcmp(odb->path, gvfs_shared_cache_pathname.buf)) { + gh_client__chosen_odb = odb; + return; + } + } +} + +static struct gh_server__process *gh_client__find_long_running_process( + unsigned int cap_needed) +{ + struct gh_server__process *entry; + struct strvec argv = STRVEC_INIT; + struct strbuf quoted = STRBUF_INIT; + + gh_client__choose_odb(); + + /* + * TODO decide what defaults we want. + */ + strvec_push(&argv, "gvfs-helper"); + strvec_push(&argv, "--fallback"); + strvec_push(&argv, "--cache-server=trust"); + strvec_pushf(&argv, "--shared-cache=%s", + gh_client__chosen_odb->path); + strvec_push(&argv, "server"); + + sq_quote_argv_pretty("ed, argv.v); + + /* + * Find an existing long-running process with the above command + * line -or- create a new long-running process for this and + * subsequent 'get' requests. + */ + if (!gh_server__subprocess_map_initialized) { + gh_server__subprocess_map_initialized = 1; + hashmap_init(&gh_server__subprocess_map, + (hashmap_cmp_fn)cmd2process_cmp, NULL, 0); + entry = NULL; + } else + entry = (struct gh_server__process *)subprocess_find_entry( + &gh_server__subprocess_map, quoted.buf); + + if (!entry) { + entry = xmalloc(sizeof(*entry)); + entry->supported_capabilities = 0; + + if (subprocess_start_strvec(&gh_server__subprocess_map, + &entry->subprocess, 1, + &argv, gh_client__start_fn)) + FREE_AND_NULL(entry); + } + + if (entry && + (entry->supported_capabilities & cap_needed) != cap_needed) { + error("gvfs-helper: does not support needed capabilities"); + subprocess_stop(&gh_server__subprocess_map, + (struct subprocess_entry *)entry); + FREE_AND_NULL(entry); + } + + strvec_clear(&argv); + strbuf_release("ed); + + return entry; +} + +void gh_client__queue_oid(const struct object_id *oid) +{ + // TODO consider removing this trace2. it is useful for interactive + // TODO debugging, but may generate way too much noise for a data + // TODO event. + trace2_printf("gh_client__queue_oid: %s", oid_to_hex(oid)); + + if (!oidset_insert(&gh_client__oidset_queued, oid)) + gh_client__oidset_count++; +} + +/* + * This routine should actually take a "const struct oid_array *" + * rather than the component parts, but fetch_objects() uses + * this model (because of the call in sha1-file.c). + */ +void gh_client__queue_oid_array(const struct object_id *oids, int oid_nr) +{ + int k; + + for (k = 0; k < oid_nr; k++) + gh_client__queue_oid(&oids[k]); +} + +/* + * Bulk fetch all of the queued OIDs in the OIDSET. + */ +int gh_client__drain_queue(enum gh_client__created *p_ghc) +{ + struct gh_server__process *entry; + struct child_process *process; + int nr_loose = 0; + int nr_packfile = 0; + int err = 0; + + *p_ghc = GHC__CREATED__NOTHING; + + if (!gh_client__oidset_count) + return 0; + + entry = gh_client__find_long_running_process(CAP_OBJECTS); + if (!entry) + return -1; + + trace2_region_enter("gh-client", "objects/post", the_repository); + + process = &entry->subprocess.process; + + sigchain_push(SIGPIPE, SIG_IGN); + + err = gh_client__send__objects_post(process); + if (!err) + err = gh_client__objects__receive_response( + process, p_ghc, &nr_loose, &nr_packfile); + + sigchain_pop(SIGPIPE); + + if (err) { + subprocess_stop(&gh_server__subprocess_map, + (struct subprocess_entry *)entry); + FREE_AND_NULL(entry); + } + + trace2_data_intmax("gh-client", the_repository, + "objects/post/nr_objects", gh_client__oidset_count); + trace2_region_leave("gh-client", "objects/post", the_repository); + + oidset_clear(&gh_client__oidset_queued); + gh_client__oidset_count = 0; + + return err; +} + +/* + * Get exactly 1 object immediately. + * Ignore any queued objects. + */ +int gh_client__get_immediate(const struct object_id *oid, + enum gh_client__created *p_ghc) +{ + struct gh_server__process *entry; + struct child_process *process; + int nr_loose = 0; + int nr_packfile = 0; + int err = 0; + + // TODO consider removing this trace2. it is useful for interactive + // TODO debugging, but may generate way too much noise for a data + // TODO event. + trace2_printf("gh_client__get_immediate: %s", oid_to_hex(oid)); + + entry = gh_client__find_long_running_process(CAP_OBJECTS); + if (!entry) + return -1; + + trace2_region_enter("gh-client", "objects/get", the_repository); + + process = &entry->subprocess.process; + + sigchain_push(SIGPIPE, SIG_IGN); + + err = gh_client__send__objects_get(process, oid); + if (!err) + err = gh_client__objects__receive_response( + process, p_ghc, &nr_loose, &nr_packfile); + + sigchain_pop(SIGPIPE); + + if (err) { + subprocess_stop(&gh_server__subprocess_map, + (struct subprocess_entry *)entry); + FREE_AND_NULL(entry); + } + + trace2_region_leave("gh-client", "objects/get", the_repository); + + return err; +} diff --git a/gvfs-helper-client.h b/gvfs-helper-client.h new file mode 100644 index 00000000000000..c1e38fad75f841 --- /dev/null +++ b/gvfs-helper-client.h @@ -0,0 +1,69 @@ +#ifndef GVFS_HELPER_CLIENT_H +#define GVFS_HELPER_CLIENT_H + +struct repository; +struct commit; +struct object_id; + +enum gh_client__created { + /* + * The _get_ operation did not create anything. If doesn't + * matter if `gvfs-helper` had errors or not -- just that + * nothing was created. + */ + GHC__CREATED__NOTHING = 0, + + /* + * The _get_ operation created one or more packfiles. + */ + GHC__CREATED__PACKFILE = 1<<1, + + /* + * The _get_ operation created one or more loose objects. + * (Not necessarily the for the individual OID you requested.) + */ + GHC__CREATED__LOOSE = 1<<2, + + /* + * The _get_ operation created one or more packfilea *and* + * one or more loose objects. + */ + GHC__CREATED__PACKFILE_AND_LOOSE = (GHC__CREATED__PACKFILE | + GHC__CREATED__LOOSE), +}; + +/* + * Ask `gvfs-helper server` to immediately fetch a single object + * using "/gvfs/objects" GET semantics. + * + * A long-running background process is used to make subsequent + * requests more efficient. + * + * A loose object will be created in the shared-cache ODB and + * in-memory cache updated. + */ +int gh_client__get_immediate(const struct object_id *oid, + enum gh_client__created *p_ghc); + +/* + * Queue this OID for a future fetch using `gvfs-helper service`. + * It does not wait. + * + * Callers should not rely on the queued object being on disk until + * the queue has been drained. + */ +void gh_client__queue_oid(const struct object_id *oid); +void gh_client__queue_oid_array(const struct object_id *oids, int oid_nr); + +/* + * Ask `gvfs-helper server` to fetch the set of queued OIDs using + * "/gvfs/objects" POST semantics. + * + * A long-running background process is used to subsequent requests + * more efficient. + * + * One or more packfiles will be created in the shared-cache ODB. + */ +int gh_client__drain_queue(enum gh_client__created *p_ghc); + +#endif /* GVFS_HELPER_CLIENT_H */ diff --git a/gvfs-helper.c b/gvfs-helper.c new file mode 100644 index 00000000000000..6f865aac7a4758 --- /dev/null +++ b/gvfs-helper.c @@ -0,0 +1,3430 @@ +// TODO Write a man page. Here are some notes for dogfooding. +// TODO +// +// Usage: git gvfs-helper [] [] +// +// : +// +// --remote= // defaults to "origin" +// +// --fallback // boolean. defaults to off +// +// When a fetch from the cache-server fails, automatically +// fallback to the main Git server. This option has no effect +// if no cache-server is defined. +// +// --cache-server= // defaults to "verify" +// +// verify := lookup the set of defined cache-servers using +// "gvfs/config" and confirm that the selected +// cache-server is well-known. Silently disable the +// cache-server if not. (See security notes later.) +// +// error := verify cache-server and abort if not well-known. +// +// trust := do not verify cache-server. just use it. +// +// disable := disable the cache-server and always use the main +// Git server. +// +// --shared-cache= +// +// A relative or absolute pathname to the ODB directory to store +// fetched objects. +// +// If this option is not specified, we default to the value +// in the "gvfs.sharedcache" config setting and then to the +// local ".git/objects" directory. +// +// : +// +// config +// +// Fetch the "gvfs/config" string from the main Git server. +// (The cache-server setting is ignored because cache-servers +// do not support this REST API.) +// +// get +// +// Fetch 1 or more objects one at a time using a "/gvfs/objects" +// GET request. +// +// If a cache-server is configured, +// try it first. Optionally fallback to the main Git server. +// +// The set of objects is given on stdin and is assumed to be +// a list of , one per line. +// +// : +// +// --max-retries= // defaults to "6" +// +// Number of retries after transient network errors. +// Set to zero to disable such retries. +// +// post +// +// Fetch 1 or more objects in bulk using a "/gvfs/objects" POST +// request. +// +// If a cache-server is configured, +// try it first. Optionally fallback to the main Git server. +// +// The set of objects is given on stdin and is assumed to be +// a list of , one per line. +// +// : +// +// --block-size= // defaults to "4000" +// +// Request objects from server in batches of at +// most n objects (not bytes). +// +// --depth= // defaults to "1" +// +// --max-retries= // defaults to "6" +// +// Number of retries after transient network errors. +// Set to zero to disable such retries. +// +// server +// +// Interactive/sub-process mode. Listen for a series of commands +// and data on stdin and return results on stdout. This command +// uses pkt-line format [1] and implements the long-running process +// protocol [2] to communicate with the foreground/parent process. +// +// : +// +// --block-size= // defaults to "4000" +// +// Request objects from server in batches of at +// most n objects (not bytes) when using POST +// requests. +// +// --depth= // defaults to "1" +// +// --max-retries= // defaults to "6" +// +// Number of retries after transient network errors. +// Set to zero to disable such retries. +// +// Interactive verb: objects.get +// +// Fetch 1 or more objects, one at a time, using a +// "/gvfs/objects" GET requests. +// +// Each object will be created as a loose object in the ODB. +// +// Interactive verb: objects.post +// +// Fetch 1 or more objects, in bulk, using one or more +// "/gvfs/objects" POST requests. +// +// For both verbs, if a cache-server is configured, try it first. +// Optionally fallback to the main Git server. +// +// Create 1 or more loose objects and/or packfiles in the +// shared-cache ODB. (The pathname of the selected ODB is +// reported at the beginning of the response; this should +// match the pathname given on the command line). +// +// git> objects.get | objects.post +// git> +// git> +// git> ... +// git> +// git> 0000 +// +// git< odb +// git< loose | packfile +// git< loose | packfile +// gid< ... +// git< loose | packfile +// git< ok | partial | error +// git< 0000 +// +// [1] Documentation/technical/protocol-common.txt +// [2] Documentation/technical/long-running-process-protocol.txt +// [3] See GIT_TRACE_PACKET +// +////////////////////////////////////////////////////////////////// + +#define USE_THE_REPOSITORY_VARIABLE +#include "git-compat-util.h" +#include "git-curl-compat.h" +#include "environment.h" +#include "hex.h" +#include "setup.h" +#include "config.h" +#include "remote.h" +#include "connect.h" +#include "strbuf.h" +#include "walker.h" +#include "http.h" +#include "exec-cmd.h" +#include "run-command.h" +#include "pkt-line.h" +#include "string-list.h" +#include "sideband.h" +#include "strvec.h" +#include "credential.h" +#include "oid-array.h" +#include "send-pack.h" +#include "protocol.h" +#include "quote.h" +#include "transport.h" +#include "parse-options.h" +#include "object-file.h" +#include "object-store.h" +#include "json-writer.h" +#include "tempfile.h" +#include "oidset.h" +#include "dir.h" +#include "url.h" +#include "abspath.h" +#include "progress.h" +#include "trace2.h" + +static const char * const main_usage[] = { + N_("git gvfs-helper [] config []"), + N_("git gvfs-helper [] get []"), + N_("git gvfs-helper [] post []"), + N_("git gvfs-helper [] server []"), + NULL +}; + +static const char *const objects_get_usage[] = { + N_("git gvfs-helper [] get []"), + NULL +}; + +static const char *const objects_post_usage[] = { + N_("git gvfs-helper [] post []"), + NULL +}; + +static const char *const server_usage[] = { + N_("git gvfs-helper [] server []"), + NULL +}; + +/* + * "commitDepth" field in gvfs protocol + */ +#define GH__DEFAULT__OBJECTS_POST__COMMIT_DEPTH 1 + +/* + * Chunk/block size in number of objects we request in each packfile + */ +#define GH__DEFAULT__OBJECTS_POST__BLOCK_SIZE 4000 + +/* + * Retry attempts (after the initial request) for transient errors and 429s. + */ +#define GH__DEFAULT_MAX_RETRIES 6 + +/* + * Maximum delay in seconds for transient (network) error retries. + */ +#define GH__DEFAULT_MAX_TRANSIENT_BACKOFF_SEC 300 + +/* + * Our exit-codes. + */ +enum gh__error_code { + GH__ERROR_CODE__USAGE = -1, /* will be mapped to usage() */ + GH__ERROR_CODE__OK = 0, + GH__ERROR_CODE__ERROR = 1, /* unspecified */ + GH__ERROR_CODE__CURL_ERROR = 2, + GH__ERROR_CODE__HTTP_401 = 3, + GH__ERROR_CODE__HTTP_404 = 4, + GH__ERROR_CODE__HTTP_429 = 5, + GH__ERROR_CODE__HTTP_503 = 6, + GH__ERROR_CODE__HTTP_OTHER = 7, + GH__ERROR_CODE__UNEXPECTED_CONTENT_TYPE = 8, + GH__ERROR_CODE__COULD_NOT_CREATE_TEMPFILE = 8, + GH__ERROR_CODE__COULD_NOT_INSTALL_LOOSE = 10, + GH__ERROR_CODE__COULD_NOT_INSTALL_PACKFILE = 11, + GH__ERROR_CODE__SUBPROCESS_SYNTAX = 12, + GH__ERROR_CODE__INDEX_PACK_FAILED = 13, +}; + +enum gh__cache_server_mode { + /* verify URL. disable if unknown. */ + GH__CACHE_SERVER_MODE__VERIFY_DISABLE = 0, + /* verify URL. error if unknown. */ + GH__CACHE_SERVER_MODE__VERIFY_ERROR, + /* disable the cache-server, if defined */ + GH__CACHE_SERVER_MODE__DISABLE, + /* trust any cache-server */ + GH__CACHE_SERVER_MODE__TRUST_WITHOUT_VERIFY, +}; + +/* + * The set of command line, config, and environment variables + * that we use as input to decide how we should operate. + */ +static struct gh__cmd_opts { + const char *remote_name; + + int try_fallback; /* to git server if cache-server fails */ + int show_progress; + + int depth; + int block_size; + int max_retries; + int max_transient_backoff_sec; + + enum gh__cache_server_mode cache_server_mode; +} gh__cmd_opts; + +/* + * The chosen global state derrived from the inputs in gh__cmd_opts. + */ +static struct gh__global { + struct remote *remote; + + struct credential main_creds; + struct credential cache_creds; + + const char *main_url; + const char *cache_server_url; + + struct strbuf buf_odb_path; + + int http_is_initialized; + int cache_server_is_initialized; /* did sub-command look for one */ + int main_creds_need_approval; /* try to only approve them once */ + +} gh__global; + +enum gh__server_type { + GH__SERVER_TYPE__MAIN = 0, + GH__SERVER_TYPE__CACHE = 1, + + GH__SERVER_TYPE__NR, +}; + +static const char *gh__server_type_label[GH__SERVER_TYPE__NR] = { + "(main)", + "(cs)" +}; + +enum gh__objects_mode { + /* + * Bulk fetch objects. + * + * But also, force the use of HTTP POST regardless of how many + * objects we are requesting. + * + * The GVFS Protocol treats requests for commit objects + * differently in GET and POST requests WRT whether it + * automatically also fetches the referenced trees. + */ + GH__OBJECTS_MODE__POST, + + /* + * Fetch objects one at a time using HTTP GET. + * + * Force the use of GET (primarily because of the commit + * object treatment). + */ + GH__OBJECTS_MODE__GET, +}; + +struct gh__azure_throttle +{ + unsigned long tstu_limit; + unsigned long tstu_remaining; + + unsigned long reset_sec; + unsigned long retry_after_sec; +}; + +static void gh__azure_throttle__zero(struct gh__azure_throttle *azure) +{ + azure->tstu_limit = 0; + azure->tstu_remaining = 0; + azure->reset_sec = 0; + azure->retry_after_sec = 0; +} + +#define GH__AZURE_THROTTLE_INIT { \ + .tstu_limit = 0, \ + .tstu_remaining = 0, \ + .reset_sec = 0, \ + .retry_after_sec = 0, \ + } + +static struct gh__azure_throttle gh__global_throttle[GH__SERVER_TYPE__NR] = { + GH__AZURE_THROTTLE_INIT, + GH__AZURE_THROTTLE_INIT, +}; + +/* + * Stolen from http.c + */ +static CURLcode gh__curlinfo_strbuf(CURL *curl, CURLINFO info, struct strbuf *buf) +{ + char *ptr; + CURLcode ret; + + strbuf_reset(buf); + ret = curl_easy_getinfo(curl, info, &ptr); + if (!ret && ptr) + strbuf_addstr(buf, ptr); + return ret; +} + +enum gh__progress_state { + GH__PROGRESS_STATE__START = 0, + GH__PROGRESS_STATE__PHASE1, + GH__PROGRESS_STATE__PHASE2, + GH__PROGRESS_STATE__PHASE3, +}; + +/* + * Parameters to drive an HTTP request (with any necessary retries). + */ +struct gh__request_params { + /* + * b_is_post indicates if the current HTTP request is a POST=1 or + * a GET=0. This is a lower level field used to setup CURL and + * the tempfile used to receive the content. + * + * It is related to, but different from the GH__OBJECTS_MODE__ + * field that we present to the gvfs-helper client or in the CLI + * (which only concerns the semantics of the /gvfs/objects protocol + * on the set of requested OIDs). + * + * For example, we use an HTTP GET to get the /gvfs/config data + * into a buffer. + */ + int b_is_post; + int b_write_to_file; /* write to file=1 or strbuf=0 */ + int b_permit_cache_server_if_defined; + + enum gh__server_type server_type; + + int k_attempt; /* robust retry attempt */ + int k_transient_delay_sec; /* delay before transient error retries */ + + unsigned long object_count; /* number of objects being fetched */ + + const struct strbuf *post_payload; /* POST body to send */ + + struct curl_slist *headers; /* additional http headers to send */ + struct tempfile *tempfile; /* for response content when file */ + struct strbuf *buffer; /* for response content when strbuf */ + struct strbuf tr2_label; /* for trace2 regions */ + + struct strbuf loose_path; + struct object_id loose_oid; + + struct strbuf temp_path_pack; + struct strbuf temp_path_idx; + struct strbuf final_path_pack; + struct strbuf final_path_idx; + struct strbuf final_packfile_filename; + + /* + * Note that I am putting all of the progress-related instance data + * inside the request-params in the hope that we can eventually + * do multi-threaded/concurrent HTTP requests when chunking + * large requests. However, the underlying "struct progress" API + * is not thread safe (that is, it doesn't allow concurrent progress + * reports (since that might require multiple lines on the screen + * or something)). + */ + enum gh__progress_state progress_state; + struct strbuf progress_base_phase2_msg; + struct strbuf progress_base_phase3_msg; + + /* + * The buffer for the formatted progress message is shared by the + * "struct progress" API and must remain valid for the duration of + * the start_progress..stop_progress lifespan. + */ + struct strbuf progress_msg; + struct progress *progress; + + struct strbuf e2eid; + + struct string_list *result_list; /* we do not own this */ +}; + +#define GH__REQUEST_PARAMS_INIT { \ + .b_is_post = 0, \ + .b_write_to_file = 0, \ + .b_permit_cache_server_if_defined = 1, \ + .server_type = GH__SERVER_TYPE__MAIN, \ + .k_attempt = 0, \ + .k_transient_delay_sec = 0, \ + .object_count = 0, \ + .post_payload = NULL, \ + .headers = NULL, \ + .tempfile = NULL, \ + .buffer = NULL, \ + .tr2_label = STRBUF_INIT, \ + .loose_path = STRBUF_INIT, \ + .loose_oid = {{0}}, \ + .temp_path_pack = STRBUF_INIT, \ + .temp_path_idx = STRBUF_INIT, \ + .final_path_pack = STRBUF_INIT, \ + .final_path_idx = STRBUF_INIT, \ + .final_packfile_filename = STRBUF_INIT, \ + .progress_state = GH__PROGRESS_STATE__START, \ + .progress_base_phase2_msg = STRBUF_INIT, \ + .progress_base_phase3_msg = STRBUF_INIT, \ + .progress_msg = STRBUF_INIT, \ + .progress = NULL, \ + .e2eid = STRBUF_INIT, \ + .result_list = NULL, \ + } + +static void gh__request_params__release(struct gh__request_params *params) +{ + if (!params) + return; + + params->post_payload = NULL; /* we do not own this */ + + curl_slist_free_all(params->headers); + params->headers = NULL; + + delete_tempfile(¶ms->tempfile); + + params->buffer = NULL; /* we do not own this */ + + strbuf_release(¶ms->tr2_label); + strbuf_release(¶ms->loose_path); + strbuf_release(¶ms->temp_path_pack); + strbuf_release(¶ms->temp_path_idx); + strbuf_release(¶ms->final_path_pack); + strbuf_release(¶ms->final_path_idx); + strbuf_release(¶ms->final_packfile_filename); + + strbuf_release(¶ms->progress_base_phase2_msg); + strbuf_release(¶ms->progress_base_phase3_msg); + strbuf_release(¶ms->progress_msg); + + stop_progress(¶ms->progress); + params->progress = NULL; + + strbuf_release(¶ms->e2eid); + + params->result_list = NULL; /* we do not own this */ +} + +/* + * How we handle retries for various unexpected network errors. + */ +enum gh__retry_mode { + /* + * The operation was successful, so no retry is needed. + * Use this for HTTP 200, for example. + */ + GH__RETRY_MODE__SUCCESS = 0, + + /* + * Retry using the normal 401 Auth mechanism. + */ + GH__RETRY_MODE__HTTP_401, + + /* + * Fail because at least one of the requested OIDs does not exist. + */ + GH__RETRY_MODE__FAIL_404, + + /* + * A transient network error, such as dropped connection + * or network IO error. Our belief is that a retry MAY + * succeed. (See Gremlins and Cosmic Rays....) + */ + GH__RETRY_MODE__TRANSIENT, + + /* + * Request was blocked completely because of a 429. + */ + GH__RETRY_MODE__HTTP_429, + + /* + * Request failed because the server was (temporarily?) offline. + */ + GH__RETRY_MODE__HTTP_503, + + /* + * The operation had a hard failure and we have no + * expectation that a second attempt will give a different + * answer, such as a bad hostname or a mal-formed URL. + */ + GH__RETRY_MODE__HARD_FAIL, +}; + +/* + * Bucket to describe the results of an HTTP requests (may be + * overwritten during retries so that it describes the final attempt). + */ +struct gh__response_status { + struct strbuf error_message; + struct strbuf content_type; + enum gh__error_code ec; + enum gh__retry_mode retry; + intmax_t bytes_received; + struct gh__azure_throttle *azure; +}; + +#define GH__RESPONSE_STATUS_INIT { \ + .error_message = STRBUF_INIT, \ + .content_type = STRBUF_INIT, \ + .ec = GH__ERROR_CODE__OK, \ + .retry = GH__RETRY_MODE__SUCCESS, \ + .bytes_received = 0, \ + .azure = NULL, \ + } + +static void gh__response_status__zero(struct gh__response_status *s) +{ + strbuf_setlen(&s->error_message, 0); + strbuf_setlen(&s->content_type, 0); + s->ec = GH__ERROR_CODE__OK; + s->retry = GH__RETRY_MODE__SUCCESS; + s->bytes_received = 0; + s->azure = NULL; +} + +static void install_packfile(struct gh__request_params *params, + struct gh__response_status *status); +static void install_loose(struct gh__request_params *params, + struct gh__response_status *status); + +/* + * Log the E2EID for the current request. + * + * Since every HTTP request to the cache-server and to the main Git server + * will send back a unique E2EID (probably a GUID), we don't want to overload + * telemetry with each ID -- rather, only the ones for which there was a + * problem and that may be helpful in a post mortem. + */ +static void log_e2eid(struct gh__request_params *params, + struct gh__response_status *status) +{ + if (!params->e2eid.len) + return; + + switch (status->retry) { + default: + case GH__RETRY_MODE__SUCCESS: + case GH__RETRY_MODE__HTTP_401: + case GH__RETRY_MODE__FAIL_404: + return; + + case GH__RETRY_MODE__HARD_FAIL: + case GH__RETRY_MODE__TRANSIENT: + case GH__RETRY_MODE__HTTP_429: + case GH__RETRY_MODE__HTTP_503: + break; + } + + if (trace2_is_enabled()) { + struct strbuf key = STRBUF_INIT; + + strbuf_addstr(&key, "e2eid"); + strbuf_addstr(&key, gh__server_type_label[params->server_type]); + + trace2_data_string("gvfs-helper", NULL, key.buf, + params->e2eid.buf); + + strbuf_release(&key); + } +} + +/* + * Normalize a few HTTP response codes before we try to decide + * how to dispatch on them. + */ +static long gh__normalize_odd_codes(struct gh__request_params *params, + long http_response_code) +{ + if (params->server_type == GH__SERVER_TYPE__CACHE && + http_response_code == 400) { + /* + * The cache-server sends a somewhat bogus 400 instead of + * the normal 401 when AUTH is required. Fixup the status + * to hide that. + * + * TODO Technically, the cache-server could send a 400 + * TODO for many reasons, not just for their bogus + * TODO pseudo-401, but we're going to assume it is a + * TODO 401 for now. We should confirm the expected + * TODO error message in the response-body. + */ + return 401; + } + + if (http_response_code == 203) { + /* + * A proxy server transformed a 200 from the origin server + * into a 203. We don't care about the subtle distinction. + */ + return 200; + } + + return http_response_code; +} + +/* + * Map HTTP response codes into a retry strategy. + * See https://en.wikipedia.org/wiki/List_of_HTTP_status_codes + * + * https://docs.microsoft.com/en-us/azure/devops/integrate/concepts/rate-limits?view=azure-devops + */ +static void compute_retry_mode_from_http_response( + struct gh__response_status *status, + long http_response_code) +{ + switch (http_response_code) { + + case 200: + status->retry = GH__RETRY_MODE__SUCCESS; + status->ec = GH__ERROR_CODE__OK; + return; + + case 301: /* all the various flavors of HTTP Redirect */ + case 302: + case 303: + case 304: + case 305: + case 306: + case 307: + case 308: + /* + * TODO Consider a redirected-retry (with or without + * TODO a Retry-After header). + */ + goto hard_fail; + + case 401: + strbuf_addstr(&status->error_message, + "(http:401) Not Authorized"); + status->retry = GH__RETRY_MODE__HTTP_401; + status->ec = GH__ERROR_CODE__HTTP_401; + return; + + case 404: + /* + * TODO if params->object_count > 1, consider + * TODO splitting the request into 2 halves + * TODO and retrying each half in series. + */ + strbuf_addstr(&status->error_message, + "(http:404) Not Found"); + status->retry = GH__RETRY_MODE__FAIL_404; + status->ec = GH__ERROR_CODE__HTTP_404; + return; + + case 429: + /* + * This is a hard block because we've been bad. + */ + strbuf_addstr(&status->error_message, + "(http:429) Too Many Requests [throttled]"); + status->retry = GH__RETRY_MODE__HTTP_429; + status->ec = GH__ERROR_CODE__HTTP_429; + + trace2_data_string("gvfs-helper", NULL, "error/http", + status->error_message.buf); + return; + + case 503: + /* + * We assume that this comes with a "Retry-After" header like 429s. + */ + strbuf_addstr(&status->error_message, + "(http:503) Server Unavailable [throttled]"); + status->retry = GH__RETRY_MODE__HTTP_503; + status->ec = GH__ERROR_CODE__HTTP_503; + + trace2_data_string("gvfs-helper", NULL, "error/http", + status->error_message.buf); + return; + + default: + goto hard_fail; + } + +hard_fail: + strbuf_addf(&status->error_message, "(http:%d) Other [hard_fail]", + (int)http_response_code); + status->retry = GH__RETRY_MODE__HARD_FAIL; + status->ec = GH__ERROR_CODE__HTTP_OTHER; + + trace2_data_string("gvfs-helper", NULL, "error/http", + status->error_message.buf); + return; +} + +/* + * Map CURLE errors code to a retry strategy. + * See and + * https://curl.haxx.se/libcurl/c/libcurl-errors.html + * + * This could be a static table rather than a switch, but + * that is harder to debug and we may want to selectively + * log errors. + * + * I've commented out all of the hard-fail cases for now + * and let the default handle them. This is to indicate + * that I considered them and found them to be not actionable. + * Also, the spelling of some of the CURLE_ symbols seem + * to change between curl releases on different platforms, + * so I'm not going to fight that. + */ +static void compute_retry_mode_from_curl_error( + struct gh__response_status *status, + CURLcode curl_code) +{ + switch (curl_code) { + case CURLE_OK: + status->retry = GH__RETRY_MODE__SUCCESS; + status->ec = GH__ERROR_CODE__OK; + return; + + //se CURLE_UNSUPPORTED_PROTOCOL: goto hard_fail; + //se CURLE_FAILED_INIT: goto hard_fail; + //se CURLE_URL_MALFORMAT: goto hard_fail; + //se CURLE_NOT_BUILT_IN: goto hard_fail; + //se CURLE_COULDNT_RESOLVE_PROXY: goto hard_fail; + //se CURLE_COULDNT_RESOLVE_HOST: goto hard_fail; + case CURLE_COULDNT_CONNECT: goto transient; + //se CURLE_WEIRD_SERVER_REPLY: goto hard_fail; + //se CURLE_REMOTE_ACCESS_DENIED: goto hard_fail; + //se CURLE_FTP_ACCEPT_FAILED: goto hard_fail; + //se CURLE_FTP_WEIRD_PASS_REPLY: goto hard_fail; + //se CURLE_FTP_ACCEPT_TIMEOUT: goto hard_fail; + //se CURLE_FTP_WEIRD_PASV_REPLY: goto hard_fail; + //se CURLE_FTP_WEIRD_227_FORMAT: goto hard_fail; + //se CURLE_FTP_CANT_GET_HOST: goto hard_fail; + case CURLE_HTTP2: goto transient; + //se CURLE_FTP_COULDNT_SET_TYPE: goto hard_fail; + case CURLE_PARTIAL_FILE: goto transient; + //se CURLE_FTP_COULDNT_RETR_FILE: goto hard_fail; + //se CURLE_OBSOLETE20: goto hard_fail; + //se CURLE_QUOTE_ERROR: goto hard_fail; + //se CURLE_HTTP_RETURNED_ERROR: goto hard_fail; + case CURLE_WRITE_ERROR: goto transient; + //se CURLE_OBSOLETE24: goto hard_fail; + case CURLE_UPLOAD_FAILED: goto transient; + //se CURLE_READ_ERROR: goto hard_fail; + //se CURLE_OUT_OF_MEMORY: goto hard_fail; + case CURLE_OPERATION_TIMEDOUT: goto transient; + //se CURLE_OBSOLETE29: goto hard_fail; + //se CURLE_FTP_PORT_FAILED: goto hard_fail; + //se CURLE_FTP_COULDNT_USE_REST: goto hard_fail; + //se CURLE_OBSOLETE32: goto hard_fail; + //se CURLE_RANGE_ERROR: goto hard_fail; + case CURLE_HTTP_POST_ERROR: goto transient; + //se CURLE_SSL_CONNECT_ERROR: goto hard_fail; + //se CURLE_BAD_DOWNLOAD_RESUME: goto hard_fail; + //se CURLE_FILE_COULDNT_READ_FILE: goto hard_fail; + //se CURLE_LDAP_CANNOT_BIND: goto hard_fail; + //se CURLE_LDAP_SEARCH_FAILED: goto hard_fail; + //se CURLE_OBSOLETE40: goto hard_fail; + //se CURLE_FUNCTION_NOT_FOUND: goto hard_fail; + //se CURLE_ABORTED_BY_CALLBACK: goto hard_fail; + //se CURLE_BAD_FUNCTION_ARGUMENT: goto hard_fail; + //se CURLE_OBSOLETE44: goto hard_fail; + //se CURLE_INTERFACE_FAILED: goto hard_fail; + //se CURLE_OBSOLETE46: goto hard_fail; + //se CURLE_TOO_MANY_REDIRECTS: goto hard_fail; + //se CURLE_UNKNOWN_OPTION: goto hard_fail; + //se CURLE_TELNET_OPTION_SYNTAX: goto hard_fail; + //se CURLE_OBSOLETE50: goto hard_fail; + //se CURLE_PEER_FAILED_VERIFICATION: goto hard_fail; + //se CURLE_GOT_NOTHING: goto hard_fail; + //se CURLE_SSL_ENGINE_NOTFOUND: goto hard_fail; + //se CURLE_SSL_ENGINE_SETFAILED: goto hard_fail; + case CURLE_SEND_ERROR: goto transient; + case CURLE_RECV_ERROR: goto transient; + //se CURLE_OBSOLETE57: goto hard_fail; + //se CURLE_SSL_CERTPROBLEM: goto hard_fail; + //se CURLE_SSL_CIPHER: goto hard_fail; + //se CURLE_SSL_CACERT: goto hard_fail; + //se CURLE_BAD_CONTENT_ENCODING: goto hard_fail; + //se CURLE_LDAP_INVALID_URL: goto hard_fail; + //se CURLE_FILESIZE_EXCEEDED: goto hard_fail; + //se CURLE_USE_SSL_FAILED: goto hard_fail; + //se CURLE_SEND_FAIL_REWIND: goto hard_fail; + //se CURLE_SSL_ENGINE_INITFAILED: goto hard_fail; + //se CURLE_LOGIN_DENIED: goto hard_fail; + //se CURLE_TFTP_NOTFOUND: goto hard_fail; + //se CURLE_TFTP_PERM: goto hard_fail; + //se CURLE_REMOTE_DISK_FULL: goto hard_fail; + //se CURLE_TFTP_ILLEGAL: goto hard_fail; + //se CURLE_TFTP_UNKNOWNID: goto hard_fail; + //se CURLE_REMOTE_FILE_EXISTS: goto hard_fail; + //se CURLE_TFTP_NOSUCHUSER: goto hard_fail; + //se CURLE_CONV_FAILED: goto hard_fail; + //se CURLE_CONV_REQD: goto hard_fail; + //se CURLE_SSL_CACERT_BADFILE: goto hard_fail; + //se CURLE_REMOTE_FILE_NOT_FOUND: goto hard_fail; + //se CURLE_SSH: goto hard_fail; + //se CURLE_SSL_SHUTDOWN_FAILED: goto hard_fail; + case CURLE_AGAIN: goto transient; + //se CURLE_SSL_CRL_BADFILE: goto hard_fail; + //se CURLE_SSL_ISSUER_ERROR: goto hard_fail; + //se CURLE_FTP_PRET_FAILED: goto hard_fail; + //se CURLE_RTSP_CSEQ_ERROR: goto hard_fail; + //se CURLE_RTSP_SESSION_ERROR: goto hard_fail; + //se CURLE_FTP_BAD_FILE_LIST: goto hard_fail; + //se CURLE_CHUNK_FAILED: goto hard_fail; + //se CURLE_NO_CONNECTION_AVAILABLE: goto hard_fail; + //se CURLE_SSL_PINNEDPUBKEYNOTMATCH: goto hard_fail; + //se CURLE_SSL_INVALIDCERTSTATUS: goto hard_fail; +#ifdef CURLE_HTTP2_STREAM + case CURLE_HTTP2_STREAM: goto transient; +#endif + default: goto hard_fail; + } + +hard_fail: + strbuf_addf(&status->error_message, "(curl:%d) %s [hard_fail]", + curl_code, curl_easy_strerror(curl_code)); + status->retry = GH__RETRY_MODE__HARD_FAIL; + status->ec = GH__ERROR_CODE__CURL_ERROR; + + trace2_data_string("gvfs-helper", NULL, "error/curl", + status->error_message.buf); + return; + +transient: + strbuf_addf(&status->error_message, "(curl:%d) %s [transient]", + curl_code, curl_easy_strerror(curl_code)); + status->retry = GH__RETRY_MODE__TRANSIENT; + status->ec = GH__ERROR_CODE__CURL_ERROR; + + trace2_data_string("gvfs-helper", NULL, "error/curl", + status->error_message.buf); + return; +} + +/* + * Create a single normalized 'ec' error-code from the status we + * received from the HTTP request. Map a few of the expected HTTP + * status code to 'ec', but don't get too crazy here. + */ +static void gh__response_status__set_from_slot( + struct gh__request_params *params, + struct gh__response_status *status, + const struct active_request_slot *slot) +{ + long http_response_code; + CURLcode curl_code; + + curl_code = slot->results->curl_result; + gh__curlinfo_strbuf(slot->curl, CURLINFO_CONTENT_TYPE, + &status->content_type); + curl_easy_getinfo(slot->curl, CURLINFO_RESPONSE_CODE, + &http_response_code); + + strbuf_setlen(&status->error_message, 0); + + http_response_code = gh__normalize_odd_codes(params, + http_response_code); + + /* + * Use normalized response/status codes form curl/http to decide + * how to set the error-code we propagate *AND* to decide if we + * we should retry because of transient network problems. + */ + if (curl_code == CURLE_OK || + curl_code == CURLE_HTTP_RETURNED_ERROR) + compute_retry_mode_from_http_response(status, + http_response_code); + else + compute_retry_mode_from_curl_error(status, curl_code); + + if (status->ec != GH__ERROR_CODE__OK) + status->bytes_received = 0; + else if (params->b_write_to_file) + status->bytes_received = (intmax_t)ftell(params->tempfile->fp); + else + status->bytes_received = (intmax_t)params->buffer->len; +} + +static void gh__response_status__release(struct gh__response_status *status) +{ + if (!status) + return; + strbuf_release(&status->error_message); + strbuf_release(&status->content_type); +} + +static int gh__curl_progress_cb(void *clientp, + curl_off_t dltotal, curl_off_t dlnow, + curl_off_t ultotal, curl_off_t ulnow) +{ + struct gh__request_params *params = clientp; + + /* + * From what I can tell, CURL progress arrives in 3 phases. + * + * [1] An initial connection setup phase where we get [0,0] [0,0]. + * [2] An upload phase where we start sending the request headers + * and body. ulnow will be > 0. ultotal may or may not be 0. + * [3] A download phase where we start receiving the response + * headers and payload body. dlnow will be > 0. dltotal may + * or may not be 0. + * + * If we pass zero for the total to the "struct progress" API, we + * get simple numbers rather than percentages. So our progress + * output format may vary depending. + * + * It is unclear if CURL will give us a final callback after + * everything is finished, so we leave the progress handle open + * and let the caller issue the final stop_progress(). + * + * There is a bit of a mismatch between the CURL API and the + * "struct progress" API. The latter requires us to set the + * progress message when we call one of the start_progress + * methods. We cannot change the progress message while we are + * showing progress state. And we cannot change the denominator + * (total) after we start. CURL may or may not give us the total + * sizes for each phase. + * + * Also be advised that the "struct progress" API eats messages + * so that the screen is only updated every second or so. And + * may not print anything if the start..stop happen in less then + * 2 seconds. Whereas CURL calls this callback very frequently. + * The net-net is that we may not actually see this progress + * message for small/fast HTTP requests. + */ + + switch (params->progress_state) { + case GH__PROGRESS_STATE__START: /* first callback */ + if (dlnow == 0 && ulnow == 0) + goto enter_phase_1; + + if (ulnow) + goto enter_phase_2; + else + goto enter_phase_3; + + case GH__PROGRESS_STATE__PHASE1: + if (dlnow == 0 && ulnow == 0) + return 0; + + if (ulnow) + goto enter_phase_2; + else + goto enter_phase_3; + + case GH__PROGRESS_STATE__PHASE2: + display_progress(params->progress, ulnow); + if (dlnow == 0) + return 0; + + stop_progress(¶ms->progress); + goto enter_phase_3; + + case GH__PROGRESS_STATE__PHASE3: + display_progress(params->progress, dlnow); + return 0; + + default: + return 0; + } + +enter_phase_1: + /* + * Don't bother to create a progress handle during phase [1]. + * Because we get [0,0,0,0], we don't have any data to report + * and would just have to synthesize some type of progress. + * From my testing, phase [1] is fairly quick (probably just + * the SSL handshake), so the "struct progress" API will most + * likely completely eat any messages that we did produce. + */ + params->progress_state = GH__PROGRESS_STATE__PHASE1; + return 0; + +enter_phase_2: + strbuf_setlen(¶ms->progress_msg, 0); + if (params->progress_base_phase2_msg.len) { + if (params->k_attempt > 0) + strbuf_addf(¶ms->progress_msg, "%s [retry %d/%d] (bytes sent)", + params->progress_base_phase2_msg.buf, + params->k_attempt, gh__cmd_opts.max_retries); + else + strbuf_addf(¶ms->progress_msg, "%s (bytes sent)", + params->progress_base_phase2_msg.buf); + params->progress = start_progress(params->progress_msg.buf, ultotal); + display_progress(params->progress, ulnow); + } + params->progress_state = GH__PROGRESS_STATE__PHASE2; + return 0; + +enter_phase_3: + strbuf_setlen(¶ms->progress_msg, 0); + if (params->progress_base_phase3_msg.len) { + if (params->k_attempt > 0) + strbuf_addf(¶ms->progress_msg, "%s [retry %d/%d] (bytes received)", + params->progress_base_phase3_msg.buf, + params->k_attempt, gh__cmd_opts.max_retries); + else + strbuf_addf(¶ms->progress_msg, "%s (bytes received)", + params->progress_base_phase3_msg.buf); + params->progress = start_progress(params->progress_msg.buf, dltotal); + display_progress(params->progress, dlnow); + } + params->progress_state = GH__PROGRESS_STATE__PHASE3; + return 0; +} + +/* + * Run the request without using "run_one_slot()" because we + * don't want the post-request normalization, error handling, + * and auto-reauth handling in http.c. + */ +static void gh__run_one_slot(struct active_request_slot *slot, + struct gh__request_params *params, + struct gh__response_status *status) +{ + struct strbuf key = STRBUF_INIT; + + strbuf_addbuf(&key, ¶ms->tr2_label); + strbuf_addstr(&key, gh__server_type_label[params->server_type]); + + params->progress_state = GH__PROGRESS_STATE__START; + strbuf_setlen(¶ms->e2eid, 0); + + trace2_region_enter("gvfs-helper", key.buf, NULL); + + if (!start_active_slot(slot)) { + compute_retry_mode_from_curl_error(status, + CURLE_FAILED_INIT); + } else { + run_active_slot(slot); + if (params->b_write_to_file) + fflush(params->tempfile->fp); + + gh__response_status__set_from_slot(params, status, slot); + + log_e2eid(params, status); + + if (status->ec == GH__ERROR_CODE__OK) { + int old_len = key.len; + + /* + * We only log the number of bytes received. + * We do not log the number of objects requested + * because the server may give us more than that + * (such as when we request a commit). + */ + strbuf_addstr(&key, "/nr_bytes"); + trace2_data_intmax("gvfs-helper", NULL, + key.buf, + status->bytes_received); + strbuf_setlen(&key, old_len); + } + } + + if (params->progress) + stop_progress(¶ms->progress); + + if (status->ec == GH__ERROR_CODE__OK && params->b_write_to_file) { + if (params->b_is_post && + !strcmp(status->content_type.buf, + "application/x-git-packfile")) + install_packfile(params, status); + else + install_loose(params, status); + } + + trace2_region_leave("gvfs-helper", key.buf, NULL); + + strbuf_release(&key); +} + +static int option_parse_cache_server_mode(const struct option *opt, + const char *arg, int unset) +{ + if (unset) /* should not happen */ + return error(_("missing value for switch '%s'"), + opt->long_name); + + else if (!strcmp(arg, "verify")) + gh__cmd_opts.cache_server_mode = + GH__CACHE_SERVER_MODE__VERIFY_DISABLE; + + else if (!strcmp(arg, "error")) + gh__cmd_opts.cache_server_mode = + GH__CACHE_SERVER_MODE__VERIFY_ERROR; + + else if (!strcmp(arg, "disable")) + gh__cmd_opts.cache_server_mode = + GH__CACHE_SERVER_MODE__DISABLE; + + else if (!strcmp(arg, "trust")) + gh__cmd_opts.cache_server_mode = + GH__CACHE_SERVER_MODE__TRUST_WITHOUT_VERIFY; + + else + return error(_("invalid value for switch '%s'"), + opt->long_name); + + return 0; +} + +/* + * Let command line args override "gvfs.sharedcache" config setting + * and override the value set by git_default_config(). + * + * The command line is parsed *AFTER* the config is loaded, so + * prepared_alt_odb() has already been called any default or inherited + * shared-cache has already been set. + * + * We have a chance to override it here. + */ +static int option_parse_shared_cache_directory(const struct option *opt, + const char *arg, int unset) +{ + struct strbuf buf_arg = STRBUF_INIT; + + if (unset) /* should not happen */ + return error(_("missing value for switch '%s'"), + opt->long_name); + + strbuf_addstr(&buf_arg, arg); + if (strbuf_normalize_path(&buf_arg) < 0) { + /* + * Pretend command line wasn't given. Use whatever + * settings we already have from the config. + */ + strbuf_release(&buf_arg); + return 0; + } + strbuf_trim_trailing_dir_sep(&buf_arg); + + if (!strbuf_cmp(&buf_arg, &gvfs_shared_cache_pathname)) { + /* + * The command line argument matches what we got from + * the config, so we're already setup correctly. (And + * we have already verified that the directory exists + * on disk.) + */ + strbuf_release(&buf_arg); + return 0; + } + + else if (!gvfs_shared_cache_pathname.len) { + /* + * A shared-cache was requested and we did not inherit one. + * Try it, but let alt_odb_usable() secretly disable it if + * it cannot create the directory on disk. + */ + strbuf_addbuf(&gvfs_shared_cache_pathname, &buf_arg); + + add_to_alternates_memory(buf_arg.buf); + + strbuf_release(&buf_arg); + return 0; + } + + else { + /* + * The requested shared-cache is different from the one + * we inherited. Replace the inherited value with this + * one, but smartly fallback if necessary. + */ + struct strbuf buf_prev = STRBUF_INIT; + + strbuf_addbuf(&buf_prev, &gvfs_shared_cache_pathname); + + strbuf_setlen(&gvfs_shared_cache_pathname, 0); + strbuf_addbuf(&gvfs_shared_cache_pathname, &buf_arg); + + add_to_alternates_memory(buf_arg.buf); + + /* + * alt_odb_usable() releases gvfs_shared_cache_pathname + * if it cannot create the directory on disk, so fallback + * to the previous choice when it fails. + */ + if (!gvfs_shared_cache_pathname.len) + strbuf_addbuf(&gvfs_shared_cache_pathname, + &buf_prev); + + strbuf_release(&buf_arg); + strbuf_release(&buf_prev); + return 0; + } +} + +/* + * Lookup the URL for this remote (defaults to 'origin'). + */ +static void lookup_main_url(void) +{ + /* + * Both VFS and Scalar only work with 'origin', so we expect this. + * The command line arg is mainly for debugging. + */ + if (!gh__cmd_opts.remote_name || !*gh__cmd_opts.remote_name) + gh__cmd_opts.remote_name = "origin"; + + gh__global.remote = remote_get(gh__cmd_opts.remote_name); + if (!gh__global.remote->url.v[0] || !*gh__global.remote->url.v[0]) + die("unknown remote '%s'", gh__cmd_opts.remote_name); + + /* + * Strip out any in-line auth in the origin server URL so that + * we can control which creds we fetch. + * + * Azure DevOps has been known to suggest https URLS of the + * form "https://@dev.azure.com//". + * + * Break that so that we can force the use of a PAT. + */ + gh__global.main_url = transport_anonymize_url(gh__global.remote->url.v[0]); + + trace2_data_string("gvfs-helper", NULL, "remote/url", gh__global.main_url); +} + +static void do__http_get__gvfs_config(struct gh__response_status *status, + struct strbuf *config_data); + +/* + * Find the URL of the cache-server, if we have one. + * + * This routine is called by the initialization code and is allowed + * to call die() rather than returning an 'ec'. + */ +static void select_cache_server(void) +{ + struct gh__response_status status = GH__RESPONSE_STATUS_INIT; + struct strbuf config_data = STRBUF_INIT; + const char *match = NULL; + + /* + * This only indicates that the sub-command actually called + * this routine. We rely on gh__global.cache_server_url to tell + * us if we actually have a cache-server configured. + */ + gh__global.cache_server_is_initialized = 1; + gh__global.cache_server_url = NULL; + + if (gh__cmd_opts.cache_server_mode == GH__CACHE_SERVER_MODE__DISABLE) { + trace2_data_string("gvfs-helper", NULL, "cache/url", "disabled"); + return; + } + + /* + * If the cache-server and main Git server have the same URL, we + * can silently disable the cache-server (by NOT setting the field + * in gh__global and explicitly disable the fallback logic.) + */ + if (!strcmp(gvfs_cache_server_url, gh__global.main_url)) { + gh__cmd_opts.try_fallback = 0; + trace2_data_string("gvfs-helper", NULL, "cache/url", "same"); + return; + } + + if (gh__cmd_opts.cache_server_mode == + GH__CACHE_SERVER_MODE__TRUST_WITHOUT_VERIFY) { + gh__global.cache_server_url = gvfs_cache_server_url; + trace2_data_string("gvfs-helper", NULL, "cache/url", + gvfs_cache_server_url); + return; + } + + /* + * GVFS cache-servers use the main Git server's creds rather + * than having their own creds. This feels like a security + * hole. For example, if the cache-server URL is pointed to a + * bad site, we'll happily send them our creds to the main Git + * server with each request to the cache-server. This would + * allow an attacker to later use our creds to impersonate us + * on the main Git server. + * + * So we optionally verify that the URL to the cache-server is + * well-known by the main Git server. + */ + + do__http_get__gvfs_config(&status, &config_data); + + if (status.ec == GH__ERROR_CODE__OK) { + /* + * The gvfs/config response is in JSON, but I don't think + * we need to parse it and all that. Lets just do a simple + * strstr() and assume it is sufficient. + * + * We do add some context to the pattern to guard against + * some attacks. + */ + struct strbuf pattern = STRBUF_INIT; + + strbuf_addf(&pattern, "\"Url\":\"%s\"", gvfs_cache_server_url); + match = strstr(config_data.buf, pattern.buf); + + strbuf_release(&pattern); + } + + strbuf_release(&config_data); + + if (match) { + gh__global.cache_server_url = gvfs_cache_server_url; + trace2_data_string("gvfs-helper", NULL, "cache/url", + gvfs_cache_server_url); + } + + else if (gh__cmd_opts.cache_server_mode == + GH__CACHE_SERVER_MODE__VERIFY_ERROR) { + if (status.ec != GH__ERROR_CODE__OK) + die("could not verify cache-server '%s': %s", + gvfs_cache_server_url, + status.error_message.buf); + else + die("could not verify cache-server '%s'", + gvfs_cache_server_url); + } + + else if (gh__cmd_opts.cache_server_mode == + GH__CACHE_SERVER_MODE__VERIFY_DISABLE) { + if (status.ec != GH__ERROR_CODE__OK) + warning("could not verify cache-server '%s': %s", + gvfs_cache_server_url, + status.error_message.buf); + else + warning("could not verify cache-server '%s'", + gvfs_cache_server_url); + trace2_data_string("gvfs-helper", NULL, "cache/url", + "disabled"); + } + + gh__response_status__release(&status); +} + +/* + * Read stdin until EOF (or a blank line) and add the desired OIDs + * to the oidset. + * + * Stdin should contain a list of OIDs. Lines may have additional + * text following the OID that we ignore. + */ +static unsigned long read_stdin_for_oids(struct oidset *oids) +{ + struct object_id oid; + struct strbuf buf_stdin = STRBUF_INIT; + unsigned long count = 0; + + do { + if (strbuf_getline(&buf_stdin, stdin) == EOF || !buf_stdin.len) + break; + + if (get_oid_hex(buf_stdin.buf, &oid)) + continue; /* just silently eat it */ + + if (!oidset_insert(oids, &oid)) + count++; + } while (1); + + strbuf_release(&buf_stdin); + return count; +} + +/* + * Build a complete JSON payload for a gvfs/objects POST request + * containing the first `nr_in_block` OIDs found in the OIDSET + * indexed by the given iterator. + * + * https://github.com/microsoft/VFSForGit/blob/master/Protocol.md + * + * Return the number of OIDs we actually put into the payload. + * If only 1 OID was found, also return it. + */ +static unsigned long build_json_payload__gvfs_objects( + struct json_writer *jw_req, + struct oidset_iter *iter, + unsigned long nr_in_block, + struct object_id *oid_out) +{ + unsigned long k; + const struct object_id *oid; + const struct object_id *oid_prev = NULL; + + k = 0; + + jw_init(jw_req); + jw_object_begin(jw_req, 0); + jw_object_intmax(jw_req, "commitDepth", gh__cmd_opts.depth); + jw_object_inline_begin_array(jw_req, "objectIds"); + while (k < nr_in_block && (oid = oidset_iter_next(iter))) { + jw_array_string(jw_req, oid_to_hex(oid)); + k++; + oid_prev = oid; + } + jw_end(jw_req); + jw_end(jw_req); + + if (oid_out) { + if (k == 1) + oidcpy(oid_out, oid_prev); + else + oidclr(oid_out, the_repository->hash_algo); + } + + return k; +} + +/* + * Lookup the creds for the main/origin Git server. + */ +static void lookup_main_creds(void) +{ + if (gh__global.main_creds.username && *gh__global.main_creds.username) + return; + + credential_from_url(&gh__global.main_creds, gh__global.main_url); + credential_fill(&gh__global.main_creds, 0); + gh__global.main_creds_need_approval = 1; +} + +/* + * If we have a set of creds for the main Git server, tell the credential + * manager to throw them away and ask it to reacquire them. + */ +static void refresh_main_creds(void) +{ + if (gh__global.main_creds.username && *gh__global.main_creds.username) + credential_reject(&gh__global.main_creds); + + lookup_main_creds(); + + // TODO should we compare before and after values of u/p and + // TODO shortcut reauth if we already know it will fail? + // TODO if so, return a bool if same/different. +} + +static void approve_main_creds(void) +{ + if (!gh__global.main_creds_need_approval) + return; + + credential_approve(&gh__global.main_creds); + gh__global.main_creds_need_approval = 0; +} + +/* + * Build a set of creds for the cache-server based upon the main Git + * server (assuming we have a cache-server configured). + * + * That is, we NEVER fill them directly for the cache-server -- we + * only synthesize them from the filled main creds. + */ +static void synthesize_cache_server_creds(void) +{ + if (!gh__global.cache_server_is_initialized) + BUG("sub-command did not initialize cache-server vars"); + + if (!gh__global.cache_server_url) + return; + + if (gh__global.cache_creds.username && *gh__global.cache_creds.username) + return; + + /* + * Get the main Git server creds so we can borrow the username + * and password when we talk to the cache-server. + */ + lookup_main_creds(); + gh__global.cache_creds.username = xstrdup(gh__global.main_creds.username); + gh__global.cache_creds.password = xstrdup(gh__global.main_creds.password); +} + +/* + * Flush and refresh the cache-server creds. Because the cache-server + * does not do 401s (or manage creds), we have to reload the main Git + * server creds first. + * + * That is, we NEVER reject them directly because we never filled them. + */ +static void refresh_cache_server_creds(void) +{ + credential_clear(&gh__global.cache_creds); + + refresh_main_creds(); + synthesize_cache_server_creds(); +} + +/* + * We NEVER approve cache-server creds directly because we never directly + * filled them. However, we should be able to infer that the main ones + * are valid and can approve them if necessary. + */ +static void approve_cache_server_creds(void) +{ + approve_main_creds(); +} + +/* + * Get the pathname to the ODB where we write objects that we download. + */ +static void select_odb(void) +{ + prepare_alt_odb(the_repository); + + strbuf_init(&gh__global.buf_odb_path, 0); + + if (gvfs_shared_cache_pathname.len) + strbuf_addbuf(&gh__global.buf_odb_path, + &gvfs_shared_cache_pathname); + else + strbuf_addstr(&gh__global.buf_odb_path, + the_repository->objects->odb->path); +} + +/* + * Create a tempfile to stream the packfile into. + * + * We create a tempfile in the chosen ODB directory and let CURL + * automatically stream data to the file. If successful, we can + * later rename it to a proper .pack and run "git index-pack" on + * it to create the corresponding .idx file. + * + * TODO I would rather to just stream the packfile directly into + * TODO "git index-pack --stdin" (and save some I/O) because it + * TODO will automatically take care of the rename of both files + * TODO and any other cleanup. BUT INDEX-PACK WILL ONLY WRITE + * TODO TO THE PRIMARY ODB -- it will not write into the alternates + * TODO (this is considered bad form). So we would need to add + * TODO an option to index-pack to handle this. I don't want to + * TODO deal with this issue right now. + * + * TODO Consider using lockfile for this rather than naked tempfile. + */ +static void create_tempfile_for_packfile( + struct gh__request_params *params, + struct gh__response_status *status) +{ + static unsigned int nth = 0; + static struct timeval tv = {0}; + static struct tm tm = {0}; + static time_t secs = 0; + static char date[32] = {0}; + + struct strbuf basename = STRBUF_INIT; + struct strbuf buf = STRBUF_INIT; + int len_p; + enum scld_error scld; + + gh__response_status__zero(status); + + if (!nth) { + /* + * Create a string to use in the name of all packfiles + * created by this process. + */ + gettimeofday(&tv, NULL); + secs = tv.tv_sec; + gmtime_r(&secs, &tm); + + xsnprintf(date, sizeof(date), "%4d%02d%02d-%02d%02d%02d-%06ld", + tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec, + (long)tv.tv_usec); + } + + /* + * Create a for this packfile using a series number , + * so that all of the chunks we download will group together. + */ + strbuf_addf(&basename, "vfs-%s-%04d", date, nth++); + + /* + * We will stream the data into a managed tempfile() in: + * + * "/pack/tempPacks/vfs--.temp" + */ + strbuf_setlen(&buf, 0); + strbuf_addbuf(&buf, &gh__global.buf_odb_path); + strbuf_complete(&buf, '/'); + strbuf_addstr(&buf, "pack/"); + len_p = buf.len; + strbuf_addstr(&buf, "tempPacks/"); + strbuf_addbuf(&buf, &basename); + strbuf_addstr(&buf, ".temp"); + + scld = safe_create_leading_directories(buf.buf); + if (scld != SCLD_OK && scld != SCLD_EXISTS) { + strbuf_addf(&status->error_message, + "could not create directory for packfile: '%s'", + buf.buf); + status->ec = GH__ERROR_CODE__COULD_NOT_CREATE_TEMPFILE; + goto cleanup; + } + + params->tempfile = create_tempfile(buf.buf); + if (!params->tempfile) { + strbuf_addf(&status->error_message, + "could not create tempfile for packfile: '%s'", + buf.buf); + status->ec = GH__ERROR_CODE__COULD_NOT_CREATE_TEMPFILE; + goto cleanup; + } + + fdopen_tempfile(params->tempfile, "w"); + + /* + * After the download is complete, we will need to steal the file + * from the tempfile() class (so that it doesn't magically delete + * it when we close the file handle) and then index it. + * + * We do this into the tempPacks directory to avoid contaminating + * the real pack directory until we know there is no corruption. + * + * "/pack/tempPacks/vfs--.temp.pack" + * "/pack/tempPacks/vfs--.temp.idx" + */ + strbuf_setlen(¶ms->temp_path_pack, 0); + strbuf_addf(¶ms->temp_path_pack, "%s.pack", buf.buf); + + strbuf_setlen(¶ms->temp_path_idx, 0); + strbuf_addf(¶ms->temp_path_idx, "%s.idx", buf.buf); + + /* + * Later, if all goes well, we will install them as: + * + * "/pack/vfs--.pack" + * "/pack/vfs--.idx" + */ + strbuf_setlen(&buf, len_p); + strbuf_setlen(¶ms->final_path_pack, 0); + strbuf_addf(¶ms->final_path_pack, "%s%s.pack", + buf.buf, basename.buf); + strbuf_setlen(¶ms->final_path_idx, 0); + strbuf_addf(¶ms->final_path_idx, "%s%s.idx", + buf.buf, basename.buf); + strbuf_setlen(¶ms->final_packfile_filename, 0); + strbuf_addf(¶ms->final_packfile_filename, "%s.pack", + basename.buf); + +cleanup: + strbuf_release(&buf); + strbuf_release(&basename); +} + +/* + * Create a pathname to the loose object in the shared-cache ODB + * with the given OID. Try to "mkdir -p" to ensure the parent + * directories exist. + */ +static int create_loose_pathname_in_odb(struct strbuf *buf_path, + const struct object_id *oid) +{ + enum scld_error scld; + const char *hex; + + hex = oid_to_hex(oid); + + strbuf_setlen(buf_path, 0); + strbuf_addbuf(buf_path, &gh__global.buf_odb_path); + strbuf_complete(buf_path, '/'); + strbuf_add(buf_path, hex, 2); + strbuf_addch(buf_path, '/'); + strbuf_addstr(buf_path, hex+2); + + scld = safe_create_leading_directories(buf_path->buf); + if (scld != SCLD_OK && scld != SCLD_EXISTS) + return -1; + + return 0; +} + +/* + * Create a tempfile to stream a loose object into. + * + * We create a tempfile in the chosen ODB directory and let CURL + * automatically stream data to the file. + * + * We put it directly in the "/xx/" directory. + */ +static void create_tempfile_for_loose( + struct gh__request_params *params, + struct gh__response_status *status) +{ + static int nth = 0; + struct strbuf buf_path = STRBUF_INIT; + + gh__response_status__zero(status); + + if (create_loose_pathname_in_odb(&buf_path, ¶ms->loose_oid)) { + strbuf_addf(&status->error_message, + "cannot create directory for loose object '%s'", + buf_path.buf); + status->ec = GH__ERROR_CODE__COULD_NOT_CREATE_TEMPFILE; + goto cleanup; + } + + /* Remember the full path of the final destination. */ + strbuf_setlen(¶ms->loose_path, 0); + strbuf_addbuf(¶ms->loose_path, &buf_path); + + /* + * Build a unique tempfile pathname based upon it. We avoid + * using lockfiles to avoid issues with stale locks after + * crashes. + */ + strbuf_addf(&buf_path, ".%08u.%.06u.temp", getpid(), nth++); + + params->tempfile = create_tempfile(buf_path.buf); + if (!params->tempfile) { + strbuf_addstr(&status->error_message, + "could not create tempfile for loose object"); + status->ec = GH__ERROR_CODE__COULD_NOT_CREATE_TEMPFILE; + goto cleanup; + } + + fdopen_tempfile(params->tempfile, "w"); + +cleanup: + strbuf_release(&buf_path); +} + +/* + * Convert the tempfile into a temporary .pack, index it into a temporary .idx + * file, and then install the pair into ODB. + */ +static void install_packfile(struct gh__request_params *params, + struct gh__response_status *status) +{ + struct child_process ip = CHILD_PROCESS_INIT; + + /* + * When we request more than 1 object, the server should always + * send us a packfile. + */ + if (strcmp(status->content_type.buf, + "application/x-git-packfile")) { + strbuf_addf(&status->error_message, + "install_packfile: received unknown content-type '%s'", + status->content_type.buf); + status->ec = GH__ERROR_CODE__UNEXPECTED_CONTENT_TYPE; + goto cleanup; + } + + gh__response_status__zero(status); + + if (rename_tempfile(¶ms->tempfile, + params->temp_path_pack.buf) == -1) { + strbuf_addf(&status->error_message, + "could not rename packfile to '%s'", + params->temp_path_pack.buf); + status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PACKFILE; + goto cleanup; + } + + strvec_push(&ip.args, "index-pack"); + if (gh__cmd_opts.show_progress) + strvec_push(&ip.args, "-v"); + strvec_pushl(&ip.args, "-o", params->temp_path_idx.buf, NULL); + strvec_push(&ip.args, params->temp_path_pack.buf); + ip.git_cmd = 1; + ip.no_stdin = 1; + ip.no_stdout = 1; + + /* + * Note that I DO NOT have a trace2 region around the + * index-pack process by itself. Currently, we are inside the + * trace2 region for running the request and that's fine. + * Later, if/when we stream the download directly to + * index-pack, it will be inside under the same region anyway. + * So, I'm not going to introduce it here. + */ + if (run_command(&ip)) { + unlink(params->temp_path_pack.buf); + unlink(params->temp_path_idx.buf); + strbuf_addf(&status->error_message, + "index-pack failed on '%s'", + params->temp_path_pack.buf); + /* + * Lets assume that index-pack failed because the + * downloaded file is corrupt (truncated). + * + * Retry it as if the network had dropped. + */ + status->retry = GH__RETRY_MODE__TRANSIENT; + status->ec = GH__ERROR_CODE__INDEX_PACK_FAILED; + goto cleanup; + } + + if (finalize_object_file(params->temp_path_pack.buf, + params->final_path_pack.buf) || + finalize_object_file(params->temp_path_idx.buf, + params->final_path_idx.buf)) { + unlink(params->temp_path_pack.buf); + unlink(params->temp_path_idx.buf); + unlink(params->final_path_pack.buf); + unlink(params->final_path_idx.buf); + strbuf_addf(&status->error_message, + "could not install packfile '%s'", + params->final_path_pack.buf); + status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PACKFILE; + goto cleanup; + } + + + if (params->result_list) { + struct strbuf result_msg = STRBUF_INIT; + + strbuf_addf(&result_msg, "packfile %s", + params->final_packfile_filename.buf); + string_list_append(params->result_list, result_msg.buf); + strbuf_release(&result_msg); + } + +cleanup: + child_process_clear(&ip); +} + +/* + * Wrapper for read_loose_object() to read and verify the hash of a + * loose object, and discard the contents buffer. + * + * Returns 0 on success, negative on error (details may be written to stderr). + */ +static int verify_loose_object(const char *path, + const struct object_id *expected_oid) +{ + enum object_type type; + void *contents = NULL; + unsigned long size; + struct strbuf type_name = STRBUF_INIT; + int ret; + struct object_info oi = OBJECT_INFO_INIT; + struct object_id real_oid = *null_oid(); + oi.typep = &type; + oi.sizep = &size; + oi.type_name = &type_name; + + ret = read_loose_object(path, expected_oid, &real_oid, &contents, &oi); + if (!ret) + free(contents); + strbuf_release(&type_name); + + return ret; +} + +/* + * Convert the tempfile into a permanent loose object in the ODB. + */ +static void install_loose(struct gh__request_params *params, + struct gh__response_status *status) +{ + struct strbuf tmp_path = STRBUF_INIT; + + /* + * We expect a loose object when we do a GET -or- when we + * do a POST with only 1 object. + * + * Note that this content type is singular, not plural. + */ + if (strcmp(status->content_type.buf, + "application/x-git-loose-object")) { + strbuf_addf(&status->error_message, + "install_loose: received unknown content-type '%s'", + status->content_type.buf); + status->ec = GH__ERROR_CODE__UNEXPECTED_CONTENT_TYPE; + return; + } + + gh__response_status__zero(status); + + /* + * close tempfile to steal ownership away from tempfile class. + */ + strbuf_addstr(&tmp_path, get_tempfile_path(params->tempfile)); + close_tempfile_gently(params->tempfile); + + /* + * Compute the hash of the received content (while it is still + * in a temp file) and verify that it matches the OID that we + * requested and was not corrupted. + */ + if (verify_loose_object(tmp_path.buf, ¶ms->loose_oid)) { + strbuf_addf(&status->error_message, + "hash failed for received loose object '%s'", + oid_to_hex(¶ms->loose_oid)); + status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_LOOSE; + goto cleanup; + } + + /* + * Try to install the tempfile as the actual loose object. + * + * If the loose object already exists, finalize_object_file() + * will NOT overwrite/replace it. It will silently eat the + * EEXIST error and unlink the tempfile as it if was + * successful. We just let it lie to us. + * + * Since our job is to back-fill missing objects needed by a + * foreground git process -- git should have called + * oid_object_info_extended() and loose_object_info() BEFORE + * asking us to download the missing object. So if we get a + * collision we have to assume something else is happening in + * parallel and we lost the race. And that's OK. + */ + if (finalize_object_file(tmp_path.buf, params->loose_path.buf)) { + unlink(tmp_path.buf); + strbuf_addf(&status->error_message, + "could not install loose object '%s'", + params->loose_path.buf); + status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_LOOSE; + goto cleanup; + } + + if (params->result_list) { + struct strbuf result_msg = STRBUF_INIT; + + strbuf_addf(&result_msg, "loose %s", + oid_to_hex(¶ms->loose_oid)); + string_list_append(params->result_list, result_msg.buf); + strbuf_release(&result_msg); + } + +cleanup: + strbuf_release(&tmp_path); +} + +/* + * Our wrapper to initialize the HTTP layer. + * + * We always use the real origin server, not the cache-server, when + * initializing the http/curl layer. + */ +static void gh_http_init(void) +{ + if (gh__global.http_is_initialized) + return; + + http_init(gh__global.remote, gh__global.main_url, 0); + gh__global.http_is_initialized = 1; +} + +static void gh_http_cleanup(void) +{ + if (!gh__global.http_is_initialized) + return; + + http_cleanup(); + gh__global.http_is_initialized = 0; +} + +/* + * buffer has ": [\r]\n" + */ +static void parse_resp_hdr_1(const char *buffer, size_t size, size_t nitems, + struct strbuf *key, struct strbuf *value) +{ + const char *end = buffer + (size * nitems); + const char *p; + + p = strchr(buffer, ':'); + + strbuf_setlen(key, 0); + strbuf_add(key, buffer, (p - buffer)); + + p++; /* skip ':' */ + p++; /* skip ' ' */ + + strbuf_setlen(value, 0); + strbuf_add(value, p, (end - p)); + strbuf_trim_trailing_newline(value); +} + +static size_t parse_resp_hdr(char *buffer, size_t size, size_t nitems, + void *void_params) +{ + struct gh__request_params *params = void_params; + struct gh__azure_throttle *azure = &gh__global_throttle[params->server_type]; + + if (starts_with(buffer, "X-RateLimit-")) { + struct strbuf key = STRBUF_INIT; + struct strbuf val = STRBUF_INIT; + + parse_resp_hdr_1(buffer, size, nitems, &key, &val); + + /* + * The following X- headers are specific to AzureDevOps. + * Other servers have similar sets of values, but I haven't + * compared them in depth. + */ + // trace2_printf("Throttle: %s %s", key.buf, val.buf); + + if (!strcmp(key.buf, "X-RateLimit-Resource")) { + /* + * The name of the resource that is complaining. + * Just log it because we can't do anything with it. + */ + strbuf_setlen(&key, 0); + strbuf_addstr(&key, "ratelimit/resource"); + strbuf_addstr(&key, gh__server_type_label[params->server_type]); + + trace2_data_string("gvfs-helper", NULL, key.buf, val.buf); + } + + else if (!strcmp(key.buf, "X-RateLimit-Delay")) { + /* + * The amount of delay added to our response. + * Just log it because we can't do anything with it. + */ + unsigned long tarpit_delay_ms; + + strbuf_setlen(&key, 0); + strbuf_addstr(&key, "ratelimit/delay_ms"); + strbuf_addstr(&key, gh__server_type_label[params->server_type]); + + git_parse_ulong(val.buf, &tarpit_delay_ms); + + trace2_data_intmax("gvfs-helper", NULL, key.buf, tarpit_delay_ms); + } + + else if (!strcmp(key.buf, "X-RateLimit-Limit")) { + /* + * The resource limit/quota before we get a 429. + */ + git_parse_ulong(val.buf, &azure->tstu_limit); + } + + else if (!strcmp(key.buf, "X-RateLimit-Remaining")) { + /* + * The amount of our quota remaining. When zero, we + * should get 429s on futher requests until the reset + * time. + */ + git_parse_ulong(val.buf, &azure->tstu_remaining); + } + + else if (!strcmp(key.buf, "X-RateLimit-Reset")) { + /* + * The server gave us a time-in-seconds-since-the-epoch + * for when our quota will be reset (if we stop all + * activity right now). + * + * Checkpoint the local system clock so we can do some + * sanity checks on any clock skew. Also, since we get + * the headers before we get the content, we can adjust + * our delay to compensate for the full download time. + */ + unsigned long now = time(NULL); + unsigned long reset_time; + + git_parse_ulong(val.buf, &reset_time); + if (reset_time > now) + azure->reset_sec = reset_time - now; + } + + strbuf_release(&key); + strbuf_release(&val); + } + + else if (starts_with(buffer, "Retry-After")) { + struct strbuf key = STRBUF_INIT; + struct strbuf val = STRBUF_INIT; + + parse_resp_hdr_1(buffer, size, nitems, &key, &val); + + /* + * We get this header with a 429 and 503 and possibly a 30x. + * + * Curl does have CURLINFO_RETRY_AFTER that nicely parses and + * normalizes the value (and supports HTTP/1.1 usage), but it + * is not present yet in the version shipped with the Mac, so + * we do it directly here. + */ + git_parse_ulong(val.buf, &azure->retry_after_sec); + + strbuf_release(&key); + strbuf_release(&val); + } + + else if (starts_with(buffer, "X-VSS-E2EID")) { + struct strbuf key = STRBUF_INIT; + + /* + * Capture the E2EID as it goes by, but don't log it until we + * know the request result. + */ + parse_resp_hdr_1(buffer, size, nitems, &key, ¶ms->e2eid); + + strbuf_release(&key); + } + + return nitems * size; +} + +/* + * Wait "duration" seconds and drive the progress mechanism. + * + * We spin slightly faster than we need to to keep the progress bar + * drawn (especially if the user presses return while waiting) and to + * compensate for delay factors built into the progress class (which + * might wait for 2 seconds before drawing the first message). + */ +static void do_throttle_spin(struct gh__request_params *params, + const char *tr2_label, + const char *progress_msg, + int duration) +{ + struct strbuf region = STRBUF_INIT; + struct progress *progress = NULL; + unsigned long begin = time(NULL); + unsigned long now = begin; + unsigned long end = begin + duration; + + strbuf_addstr(®ion, tr2_label); + strbuf_addstr(®ion, gh__server_type_label[params->server_type]); + trace2_region_enter("gvfs-helper", region.buf, NULL); + + if (gh__cmd_opts.show_progress) + progress = start_progress(progress_msg, duration); + + while (now < end) { + display_progress(progress, (now - begin)); + + sleep_millisec(100); + + now = time(NULL); + } + + display_progress(progress, duration); + stop_progress(&progress); + + trace2_region_leave("gvfs-helper", region.buf, NULL); + strbuf_release(®ion); +} + +/* + * Delay the outbound request if necessary in response to previous throttle + * blockages or hints. Throttle data is somewhat orthogonal to the status + * results from any previous request and/or the request params of the next + * request. + * + * Note that the throttle info also is cross-process information, such as + * 2 concurrent fetches in 2 different terminal windows to the same server + * will be sharing the same server quota. These could be coordinated too, + * so that a blockage received in one process would prevent the other + * process from starting another request (and also blocked or extending + * the delay interval). We're NOT going to do that level of integration. + * We will let both processes independently attempt the next request. + * This may cause us to miss the end-of-quota boundary if the server + * extends it because of the second request. + * + * TODO Should we have a max-wait option and then return a hard-error + * TODO of some type? + */ +static void do_throttle_wait(struct gh__request_params *params, + struct gh__response_status *status UNUSED) +{ + struct gh__azure_throttle *azure = + &gh__global_throttle[params->server_type]; + + if (azure->retry_after_sec) { + /* + * We were given a hard delay (such as after a 429). + * Spin until the requested time. + */ + do_throttle_spin(params, "throttle/hard", + "Waiting on hard throttle (sec)", + azure->retry_after_sec); + return; + } + + if (azure->reset_sec > 0) { + /* + * We were given a hint that we are overloading + * the server. Voluntarily backoff (before we + * get tarpitted or blocked). + */ + do_throttle_spin(params, "throttle/soft", + "Waiting on soft throttle (sec)", + azure->reset_sec); + return; + } + + if (params->k_transient_delay_sec) { + /* + * Insert an arbitrary delay before retrying after a + * transient (network) failure. + */ + do_throttle_spin(params, "throttle/transient", + "Waiting to retry after network error (sec)", + params->k_transient_delay_sec); + return; + } +} + +static void set_main_creds_on_slot(struct active_request_slot *slot, + const struct credential *creds) +{ + assert(creds == &gh__global.main_creds); + + /* + * When talking to the main/origin server, we have 3 modes + * of operation: + * + * [1] The initial request is sent without loading creds + * and with ANY-AUTH set. (And the `":"` is a magic + * value.) + * + * This allows libcurl to negotiate for us if it can. + * For example, this allows NTLM to work by magic and + * we get 200s without ever seeing a 401. If libcurl + * cannot negotiate for us, it gives us a 401 (and all + * of the 401 code in this file responds to that). + * + * [2] A 401 retry will load the main creds and try again. + * This causes `creds->username`to be non-NULL (even + * if refers to a zero-length string). And we assume + * BASIC Authentication. (And a zero-length username + * is a convention for PATs, but then sometimes users + * put the PAT in their `username` field and leave the + * `password` field blank. And that works too.) + * + * [3] Subsequent requests on the same connection use + * whatever worked before. + */ + if (creds && creds->username) { + curl_easy_setopt(slot->curl, CURLOPT_HTTPAUTH, CURLAUTH_BASIC); + curl_easy_setopt(slot->curl, CURLOPT_USERNAME, creds->username); + curl_easy_setopt(slot->curl, CURLOPT_PASSWORD, creds->password); + } else { + curl_easy_setopt(slot->curl, CURLOPT_HTTPAUTH, CURLAUTH_ANY); + curl_easy_setopt(slot->curl, CURLOPT_USERPWD, ":"); + } +} + +static void set_cache_server_creds_on_slot(struct active_request_slot *slot, + const struct credential *creds) +{ + assert(creds == &gh__global.cache_creds); + assert(creds->username); + + /* + * Things are weird when talking to a cache-server: + * + * [1] They don't send 401s on an auth error, rather they send + * a 400 (with a nice human-readable string in the html body). + * This prevents libcurl from doing any negotiation for us. + * + * [2] Cache-servers don't manage their own passwords, but + * rather require us to send the Basic Authentication + * username & password that we would send to the main + * server. (So yes, we have to get creds validated + * against the main server creds and substitute them when + * talking to the cache-server.) + * + * This means that: + * + * [a] We cannot support cache-servers that want to use NTLM. + * + * [b] If we want to talk to a cache-server, we have get the + * Basic Auth creds for the main server. And this may be + * problematic if the libcurl and/or the credential manager + * insists on using NTLM and prevents us from getting them. + * + * So we never try AUTH-ANY and force Basic Auth (if possible). + */ + if (creds && creds->username) { + curl_easy_setopt(slot->curl, CURLOPT_HTTPAUTH, CURLAUTH_BASIC); + curl_easy_setopt(slot->curl, CURLOPT_USERNAME, creds->username); + curl_easy_setopt(slot->curl, CURLOPT_PASSWORD, creds->password); + } +} + +/* + * Do a single HTTP request WITHOUT robust-retry, auth-retry or fallback. + */ +static void do_req(const char *url_base, + const char *url_component, + const struct credential *creds, + struct gh__request_params *params, + struct gh__response_status *status) +{ + struct active_request_slot *slot; + struct slot_results results; + struct strbuf rest_url = STRBUF_INIT; + + gh__response_status__zero(status); + + if (params->b_write_to_file) { + /* Delete dirty tempfile from a previous attempt. */ + if (params->tempfile) + delete_tempfile(¶ms->tempfile); + + if (params->b_is_post) + create_tempfile_for_packfile(params, status); + + create_tempfile_for_loose(params, status); + + if (!params->tempfile || status->ec != GH__ERROR_CODE__OK) + return; + } else { + /* Guard against caller using dirty buffer */ + strbuf_setlen(params->buffer, 0); + } + + end_url_with_slash(&rest_url, url_base); + strbuf_addstr(&rest_url, url_component); + + do_throttle_wait(params, status); + gh__azure_throttle__zero(&gh__global_throttle[params->server_type]); + + slot = get_active_slot(); + slot->results = &results; + + curl_easy_setopt(slot->curl, CURLOPT_NOBODY, 0); /* not a HEAD request */ + curl_easy_setopt(slot->curl, CURLOPT_URL, rest_url.buf); + curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, params->headers); + + if (params->b_is_post) { + curl_easy_setopt(slot->curl, CURLOPT_POST, 1); + curl_easy_setopt(slot->curl, CURLOPT_ENCODING, NULL); + curl_easy_setopt(slot->curl, CURLOPT_POSTFIELDS, + params->post_payload->buf); + curl_easy_setopt(slot->curl, CURLOPT_POSTFIELDSIZE, + (long)params->post_payload->len); + } else { + curl_easy_setopt(slot->curl, CURLOPT_POST, 0); + } + + if (params->b_write_to_file) { + curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite); + curl_easy_setopt(slot->curl, CURLOPT_WRITEDATA, + (void*)params->tempfile->fp); + } else { + curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, + fwrite_buffer); + curl_easy_setopt(slot->curl, CURLOPT_FILE, params->buffer); + } + + curl_easy_setopt(slot->curl, CURLOPT_HEADERFUNCTION, parse_resp_hdr); + curl_easy_setopt(slot->curl, CURLOPT_HEADERDATA, params); + + if (params->server_type == GH__SERVER_TYPE__MAIN) + set_main_creds_on_slot(slot, creds); + else + set_cache_server_creds_on_slot(slot, creds); + + if (params->progress_base_phase2_msg.len || + params->progress_base_phase3_msg.len) { + curl_easy_setopt(slot->curl, CURLOPT_XFERINFOFUNCTION, + gh__curl_progress_cb); + curl_easy_setopt(slot->curl, CURLOPT_XFERINFODATA, params); + curl_easy_setopt(slot->curl, CURLOPT_NOPROGRESS, 0); + } else { + curl_easy_setopt(slot->curl, CURLOPT_NOPROGRESS, 1); + } + + gh__run_one_slot(slot, params, status); + strbuf_release(&rest_url); +} + +/* + * Compute the delay for the nth attempt. + * + * No delay for the first attempt. Then use a normal exponential backoff + * starting from 8. + */ +static int compute_transient_delay(int attempt) +{ + int v; + + if (attempt < 1) + return 0; + + /* + * Let 8K be our hard limit (for integer overflow protection). + * That's over 2 hours. This is 8<<10. + */ + if (attempt > 10) + attempt = 10; + + v = 8 << (attempt - 1); + + if (v > gh__cmd_opts.max_transient_backoff_sec) + v = gh__cmd_opts.max_transient_backoff_sec; + + return v; +} + +/* + * Robustly make an HTTP request. Retry if necessary to hide common + * transient network errors and/or 429 blockages. + * + * For a transient (network) failure (where we do not have a throttle + * delay factor), we should insert a small delay to let the network + * recover. The outage might be because the VPN dropped, or the + * machine went to sleep or something and we want to give the network + * time to come back up. Insert AI here :-) + */ +static void do_req__with_robust_retry(const char *url_base, + const char *url_component, + const struct credential *creds, + struct gh__request_params *params, + struct gh__response_status *status) +{ + for (params->k_attempt = 0; + params->k_attempt < gh__cmd_opts.max_retries + 1; + params->k_attempt++) { + + do_req(url_base, url_component, creds, params, status); + + switch (status->retry) { + default: + case GH__RETRY_MODE__SUCCESS: + case GH__RETRY_MODE__HTTP_401: /* caller does auth-retry */ + case GH__RETRY_MODE__HARD_FAIL: + case GH__RETRY_MODE__FAIL_404: + return; + + case GH__RETRY_MODE__HTTP_429: + case GH__RETRY_MODE__HTTP_503: + /* + * We should have gotten a "Retry-After" header with + * these and that gives us the wait time. If not, + * fallthru and use the backoff delay. + */ + if (gh__global_throttle[params->server_type].retry_after_sec) + continue; + /*fallthru*/ + + case GH__RETRY_MODE__TRANSIENT: + params->k_transient_delay_sec = + compute_transient_delay(params->k_attempt); + continue; + } + } +} + +static void do_req__to_main(const char *url_component, + struct gh__request_params *params, + struct gh__response_status *status) +{ + params->server_type = GH__SERVER_TYPE__MAIN; + + /* + * When talking to the main Git server, we DO NOT preload the + * creds before the first request. + */ + + do_req__with_robust_retry(gh__global.main_url, url_component, + &gh__global.main_creds, + params, status); + + if (status->retry == GH__RETRY_MODE__HTTP_401) { + refresh_main_creds(); + + do_req__with_robust_retry(gh__global.main_url, url_component, + &gh__global.main_creds, + params, status); + } + + if (status->retry == GH__RETRY_MODE__SUCCESS) + approve_main_creds(); +} + +static void do_req__to_cache_server(const char *url_component, + struct gh__request_params *params, + struct gh__response_status *status) +{ + params->server_type = GH__SERVER_TYPE__CACHE; + + /* + * When talking to a cache-server, DO force load the creds. + * This implicitly preloads the creds to the main server. + */ + synthesize_cache_server_creds(); + + do_req__with_robust_retry(gh__global.cache_server_url, url_component, + &gh__global.cache_creds, + params, status); + + if (status->retry == GH__RETRY_MODE__HTTP_401) { + refresh_cache_server_creds(); + + do_req__with_robust_retry(gh__global.cache_server_url, + url_component, + &gh__global.cache_creds, + params, status); + } + + if (status->retry == GH__RETRY_MODE__SUCCESS) + approve_cache_server_creds(); +} + +/* + * Try the cache-server (if configured) then fall-back to the main Git server. + */ +static void do_req__with_fallback(const char *url_component, + struct gh__request_params *params, + struct gh__response_status *status) +{ + if (gh__global.cache_server_url && + params->b_permit_cache_server_if_defined) { + do_req__to_cache_server(url_component, params, status); + + if (status->retry == GH__RETRY_MODE__SUCCESS) + return; + + if (!gh__cmd_opts.try_fallback) + return; + + /* + * The cache-server shares creds with the main Git server, + * so if our creds failed against the cache-server, they + * will also fail against the main Git server. We just let + * this fail. + * + * Falling-back would likely just cause the 3rd (or maybe + * 4th) cred prompt. + */ + if (status->retry == GH__RETRY_MODE__HTTP_401) + return; + } + + do_req__to_main(url_component, params, status); +} + +/* + * Call "gvfs/config" REST API. + * + * Return server's response buffer. This is probably a raw JSON string. + */ +static void do__http_get__gvfs_config(struct gh__response_status *status, + struct strbuf *config_data) +{ + struct gh__request_params params = GH__REQUEST_PARAMS_INIT; + + strbuf_addstr(¶ms.tr2_label, "GET/config"); + + params.b_is_post = 0; + params.b_write_to_file = 0; + /* cache-servers do not handle gvfs/config REST calls */ + params.b_permit_cache_server_if_defined = 0; + params.buffer = config_data; + + params.object_count = 1; /* a bit of a lie */ + + /* + * "X-TFS-FedAuthRedirect: Suppress" disables the 302 + 203 redirect + * sequence to a login page and forces the main Git server to send a + * normal 401. + */ + params.headers = http_copy_default_headers(); + params.headers = curl_slist_append(params.headers, + "X-TFS-FedAuthRedirect: Suppress"); + params.headers = curl_slist_append(params.headers, + "Pragma: no-cache"); + + if (gh__cmd_opts.show_progress) { + /* + * gvfs/config has a very small reqest payload, so I don't + * see any need to report progress on the upload side of + * the GET. So just report progress on the download side. + */ + strbuf_addstr(¶ms.progress_base_phase3_msg, + "Receiving gvfs/config"); + } + + do_req__with_fallback("gvfs/config", ¶ms, status); + + gh__request_params__release(¶ms); +} + +static void setup_gvfs_objects_progress(struct gh__request_params *params, + unsigned long num, unsigned long den) +{ + if (!gh__cmd_opts.show_progress) + return; + + if (params->b_is_post) { + strbuf_addf(¶ms->progress_base_phase3_msg, + "Receiving packfile %ld/%ld with %ld objects", + num, den, params->object_count); + } + /* If requesting only one object, then do not show progress */ +} + +/* + * Call "gvfs/objects/" REST API to fetch a loose object + * and write it to the ODB. + */ +static void do__http_get__gvfs_object(struct gh__response_status *status, + const struct object_id *oid, + unsigned long l_num, unsigned long l_den, + struct string_list *result_list) +{ + struct gh__request_params params = GH__REQUEST_PARAMS_INIT; + struct strbuf component_url = STRBUF_INIT; + + gh__response_status__zero(status); + + strbuf_addf(&component_url, "gvfs/objects/%s", oid_to_hex(oid)); + + strbuf_addstr(¶ms.tr2_label, "GET/objects"); + + params.b_is_post = 0; + params.b_write_to_file = 1; + params.b_permit_cache_server_if_defined = 1; + + params.object_count = 1; + + params.result_list = result_list; + + params.headers = http_copy_default_headers(); + params.headers = curl_slist_append(params.headers, + "X-TFS-FedAuthRedirect: Suppress"); + params.headers = curl_slist_append(params.headers, + "Pragma: no-cache"); + + oidcpy(¶ms.loose_oid, oid); + + setup_gvfs_objects_progress(¶ms, l_num, l_den); + + do_req__with_fallback(component_url.buf, ¶ms, status); + + gh__request_params__release(¶ms); + strbuf_release(&component_url); +} + +/* + * Call "gvfs/objects" POST REST API to fetch a batch of objects + * from the OIDSET. Normal, this is results in a packfile containing + * `nr_wanted_in_block` objects. And we return the number actually + * consumed (along with the filename of the resulting packfile). + * + * However, if we only have 1 oid (remaining) in the OIDSET, the + * server *MAY* respond to our POST with a loose object rather than + * a packfile with 1 object. + * + * Append a message to the result_list describing the result. + * + * Return the number of OIDs consumed from the OIDSET. + */ +static void do__http_post__gvfs_objects(struct gh__response_status *status, + struct oidset_iter *iter, + unsigned long nr_wanted_in_block, + int j_pack_num, int j_pack_den, + struct string_list *result_list, + unsigned long *nr_oid_taken) +{ + struct json_writer jw_req = JSON_WRITER_INIT; + struct gh__request_params params = GH__REQUEST_PARAMS_INIT; + + gh__response_status__zero(status); + + params.object_count = build_json_payload__gvfs_objects( + &jw_req, iter, nr_wanted_in_block, ¶ms.loose_oid); + *nr_oid_taken = params.object_count; + + strbuf_addstr(¶ms.tr2_label, "POST/objects"); + + params.b_is_post = 1; + params.b_write_to_file = 1; + params.b_permit_cache_server_if_defined = 1; + + params.post_payload = &jw_req.json; + + params.result_list = result_list; + + params.headers = http_copy_default_headers(); + params.headers = curl_slist_append(params.headers, + "X-TFS-FedAuthRedirect: Suppress"); + params.headers = curl_slist_append(params.headers, + "Pragma: no-cache"); + params.headers = curl_slist_append(params.headers, + "Content-Type: application/json"); + /* + * If our POST contains more than one object, we want the + * server to send us a packfile. We DO NOT want the non-standard + * concatenated loose object format, so we DO NOT send: + * "Accept: application/x-git-loose-objects" (plural) + * + * However, if the payload only requests 1 OID, the server + * will send us a single loose object instead of a packfile, + * so we ACK that and send: + * "Accept: application/x-git-loose-object" (singular) + */ + params.headers = curl_slist_append(params.headers, + "Accept: application/x-git-packfile"); + params.headers = curl_slist_append(params.headers, + "Accept: application/x-git-loose-object"); + + setup_gvfs_objects_progress(¶ms, j_pack_num, j_pack_den); + + do_req__with_fallback("gvfs/objects", ¶ms, status); + + gh__request_params__release(¶ms); + jw_release(&jw_req); +} + +/* + * Drive one or more HTTP GET requests to fetch the objects + * in the given OIDSET. These are received into loose objects. + * + * Accumulate results for each request in `result_list` until we get a + * hard error and have to stop. + */ +static void do__http_get__fetch_oidset(struct gh__response_status *status, + struct oidset *oids, + unsigned long nr_oid_total, + struct string_list *result_list) +{ + struct oidset_iter iter; + struct strbuf err404 = STRBUF_INIT; + const struct object_id *oid; + unsigned long k; + int had_404 = 0; + + gh__response_status__zero(status); + if (!nr_oid_total) + return; + + oidset_iter_init(oids, &iter); + + for (k = 0; k < nr_oid_total; k++) { + oid = oidset_iter_next(&iter); + + do__http_get__gvfs_object(status, oid, k+1, nr_oid_total, + result_list); + + /* + * If we get a 404 for an individual object, ignore + * it and get the rest. We'll fixup the 'ec' later. + */ + if (status->ec == GH__ERROR_CODE__HTTP_404) { + if (!err404.len) + strbuf_addf(&err404, "%s: from GET %s", + status->error_message.buf, + oid_to_hex(oid)); + /* + * Mark the fetch as "incomplete", but don't + * stop trying to get other chunks. + */ + had_404 = 1; + continue; + } + + if (status->ec != GH__ERROR_CODE__OK) { + /* Stop at the first hard error. */ + strbuf_addf(&status->error_message, ": from GET %s", + oid_to_hex(oid)); + goto cleanup; + } + } + +cleanup: + if (had_404 && status->ec == GH__ERROR_CODE__OK) { + strbuf_setlen(&status->error_message, 0); + strbuf_addbuf(&status->error_message, &err404); + status->ec = GH__ERROR_CODE__HTTP_404; + } + + strbuf_release(&err404); +} + +/* + * Drive one or more HTTP POST requests to bulk fetch the objects in + * the given OIDSET. Create one or more packfiles and/or loose objects. + * + * Accumulate results for each request in `result_list` until we get a + * hard error and have to stop. + */ +static void do__http_post__fetch_oidset(struct gh__response_status *status, + struct oidset *oids, + unsigned long nr_oid_total, + struct string_list *result_list) +{ + struct oidset_iter iter; + struct strbuf err404 = STRBUF_INIT; + unsigned long k; + unsigned long nr_oid_taken; + int j_pack_den = 0; + int j_pack_num = 0; + int had_404 = 0; + + gh__response_status__zero(status); + if (!nr_oid_total) + return; + + oidset_iter_init(oids, &iter); + + j_pack_den = ((nr_oid_total + gh__cmd_opts.block_size - 1) + / gh__cmd_opts.block_size); + + for (k = 0; k < nr_oid_total; k += nr_oid_taken) { + j_pack_num++; + + do__http_post__gvfs_objects(status, &iter, + gh__cmd_opts.block_size, + j_pack_num, j_pack_den, + result_list, + &nr_oid_taken); + + /* + * Because the oidset iterator has random + * order, it does no good to say the k-th or + * n-th chunk was incomplete; the client + * cannot use that index for anything. + * + * We get a 404 when at least one object in + * the chunk was not found. + * + * For now, ignore the 404 and go on to the + * next chunk and then fixup the 'ec' later. + */ + if (status->ec == GH__ERROR_CODE__HTTP_404) { + if (!err404.len) + strbuf_addf(&err404, + "%s: from POST", + status->error_message.buf); + /* + * Mark the fetch as "incomplete", but don't + * stop trying to get other chunks. + */ + had_404 = 1; + continue; + } + + if (status->ec != GH__ERROR_CODE__OK) { + /* Stop at the first hard error. */ + strbuf_addstr(&status->error_message, + ": from POST"); + goto cleanup; + } + } + +cleanup: + if (had_404 && status->ec == GH__ERROR_CODE__OK) { + strbuf_setlen(&status->error_message, 0); + strbuf_addbuf(&status->error_message, &err404); + status->ec = GH__ERROR_CODE__HTTP_404; + } + + strbuf_release(&err404); +} + +/* + * Finish with initialization. This happens after the main option + * parsing, dispatch to sub-command, and sub-command option parsing + * and before actually doing anything. + * + * Optionally configure the cache-server if the sub-command will + * use it. + */ +static void finish_init(int setup_cache_server) +{ + select_odb(); + + lookup_main_url(); + gh_http_init(); + + if (setup_cache_server) + select_cache_server(); +} + +/* + * Request gvfs/config from main Git server. (Config data is not + * available from a GVFS cache-server.) + * + * Print the received server configuration (as the raw JSON string). + */ +static enum gh__error_code do_sub_cmd__config(int argc UNUSED, const char **argv UNUSED) +{ + struct gh__response_status status = GH__RESPONSE_STATUS_INIT; + struct strbuf config_data = STRBUF_INIT; + enum gh__error_code ec = GH__ERROR_CODE__OK; + + trace2_cmd_mode("config"); + + finish_init(0); + + do__http_get__gvfs_config(&status, &config_data); + ec = status.ec; + + if (ec == GH__ERROR_CODE__OK) + printf("%s\n", config_data.buf); + else + error("config: %s", status.error_message.buf); + + gh__response_status__release(&status); + strbuf_release(&config_data); + + return ec; +} + +/* + * Read a list of objects from stdin and fetch them as a series of + * single object HTTP GET requests. + */ +static enum gh__error_code do_sub_cmd__get(int argc, const char **argv) +{ + static struct option get_options[] = { + OPT_INTEGER('r', "max-retries", &gh__cmd_opts.max_retries, + N_("retries for transient network errors")), + OPT_END(), + }; + + struct gh__response_status status = GH__RESPONSE_STATUS_INIT; + struct oidset oids = OIDSET_INIT; + struct string_list result_list = STRING_LIST_INIT_DUP; + enum gh__error_code ec = GH__ERROR_CODE__OK; + unsigned long nr_oid_total; + int k; + + trace2_cmd_mode("get"); + + if (argc > 1 && !strcmp(argv[1], "-h")) + usage_with_options(objects_get_usage, get_options); + + argc = parse_options(argc, argv, NULL, get_options, objects_get_usage, 0); + if (gh__cmd_opts.max_retries < 0) + gh__cmd_opts.max_retries = 0; + + finish_init(1); + + nr_oid_total = read_stdin_for_oids(&oids); + + do__http_get__fetch_oidset(&status, &oids, nr_oid_total, &result_list); + + ec = status.ec; + + for (k = 0; k < result_list.nr; k++) + printf("%s\n", result_list.items[k].string); + + if (ec != GH__ERROR_CODE__OK) + error("get: %s", status.error_message.buf); + + gh__response_status__release(&status); + oidset_clear(&oids); + string_list_clear(&result_list, 0); + + return ec; +} + +/* + * Read a list of objects from stdin and fetch them in a single request (or + * multiple block-size requests) using one or more HTTP POST requests. + */ +static enum gh__error_code do_sub_cmd__post(int argc, const char **argv) +{ + static struct option post_options[] = { + OPT_MAGNITUDE('b', "block-size", &gh__cmd_opts.block_size, + N_("number of objects to request at a time")), + OPT_INTEGER('d', "depth", &gh__cmd_opts.depth, + N_("Commit depth")), + OPT_INTEGER('r', "max-retries", &gh__cmd_opts.max_retries, + N_("retries for transient network errors")), + OPT_END(), + }; + + struct gh__response_status status = GH__RESPONSE_STATUS_INIT; + struct oidset oids = OIDSET_INIT; + struct string_list result_list = STRING_LIST_INIT_DUP; + enum gh__error_code ec = GH__ERROR_CODE__OK; + unsigned long nr_oid_total; + int k; + + trace2_cmd_mode("post"); + + if (argc > 1 && !strcmp(argv[1], "-h")) + usage_with_options(objects_post_usage, post_options); + + argc = parse_options(argc, argv, NULL, post_options, objects_post_usage, 0); + if (gh__cmd_opts.depth < 1) + gh__cmd_opts.depth = 1; + if (gh__cmd_opts.max_retries < 0) + gh__cmd_opts.max_retries = 0; + + finish_init(1); + + nr_oid_total = read_stdin_for_oids(&oids); + + do__http_post__fetch_oidset(&status, &oids, nr_oid_total, &result_list); + + ec = status.ec; + + for (k = 0; k < result_list.nr; k++) + printf("%s\n", result_list.items[k].string); + + if (ec != GH__ERROR_CODE__OK) + error("post: %s", status.error_message.buf); + + gh__response_status__release(&status); + oidset_clear(&oids); + string_list_clear(&result_list, 0); + + return ec; +} + +/* + * Handle the 'objects.get' and 'objects.post' verbs in "server mode". + * + * Only call error() and set ec for hard errors where we cannot + * communicate correctly with the foreground client process. Pass any + * actual data errors (such as 404's or 401's from the fetch) back to + * the client process. + */ +static enum gh__error_code do_server_subprocess__objects(const char *verb_line) +{ + struct gh__response_status status = GH__RESPONSE_STATUS_INIT; + struct oidset oids = OIDSET_INIT; + struct object_id oid; + struct string_list result_list = STRING_LIST_INIT_DUP; + enum gh__error_code ec = GH__ERROR_CODE__OK; + char *line; + int len; + int err; + int k; + enum gh__objects_mode objects_mode; + unsigned long nr_oid_total = 0; + + if (!strcmp(verb_line, "objects.get")) + objects_mode = GH__OBJECTS_MODE__GET; + else if (!strcmp(verb_line, "objects.post")) + objects_mode = GH__OBJECTS_MODE__POST; + else { + error("server: unexpected objects-mode verb '%s'", verb_line); + ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; + goto cleanup; + } + + while (1) { + len = packet_read_line_gently(0, NULL, &line); + if (len < 0 || !line) + break; + + if (get_oid_hex(line, &oid)) { + error("server: invalid oid syntax '%s'", line); + ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; + goto cleanup; + } + + if (!oidset_insert(&oids, &oid)) + nr_oid_total++; + } + + if (!nr_oid_total) { + if (packet_write_fmt_gently(1, "ok\n")) { + error("server: cannot write 'get' result to client"); + ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; + } else + ec = GH__ERROR_CODE__OK; + goto cleanup; + } + + if (objects_mode == GH__OBJECTS_MODE__GET) + do__http_get__fetch_oidset(&status, &oids, nr_oid_total, &result_list); + else + do__http_post__fetch_oidset(&status, &oids, nr_oid_total, &result_list); + + /* + * Write pathname of the ODB where we wrote all of the objects + * we fetched. + */ + if (packet_write_fmt_gently(1, "odb %s\n", + gh__global.buf_odb_path.buf)) { + error("server: cannot write 'odb' to client"); + ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; + goto cleanup; + } + + for (k = 0; k < result_list.nr; k++) + if (packet_write_fmt_gently(1, "%s\n", + result_list.items[k].string)) + { + error("server: cannot write result to client: '%s'", + result_list.items[k].string); + ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; + goto cleanup; + } + + /* + * We only use status.ec to tell the client whether the request + * was complete, incomplete, or had IO errors. We DO NOT return + * this value to our caller. + */ + err = 0; + if (status.ec == GH__ERROR_CODE__OK) + err = packet_write_fmt_gently(1, "ok\n"); + else if (status.ec == GH__ERROR_CODE__HTTP_404) + err = packet_write_fmt_gently(1, "partial\n"); + else + err = packet_write_fmt_gently(1, "error %s\n", + status.error_message.buf); + if (err) { + error("server: cannot write result to client"); + ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; + goto cleanup; + } + + if (packet_flush_gently(1)) { + error("server: cannot flush result to client"); + ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; + goto cleanup; + } + +cleanup: + oidset_clear(&oids); + string_list_clear(&result_list, 0); + + return ec; +} + +typedef enum gh__error_code (fn_subprocess_cmd)(const char *verb_line); + +struct subprocess_capability { + const char *name; + int client_has; + fn_subprocess_cmd *pfn; +}; + +static struct subprocess_capability caps[] = { + { "objects", 0, do_server_subprocess__objects }, + { NULL, 0, NULL }, +}; + +/* + * Handle the subprocess protocol handshake as described in: + * [] Documentation/technical/protocol-common.txt + * [] Documentation/technical/long-running-process-protocol.txt + */ +static int do_protocol_handshake(void) +{ +#define OUR_SUBPROCESS_VERSION "1" + + char *line; + int len; + int k; + int b_support_our_version = 0; + + len = packet_read_line_gently(0, NULL, &line); + if (len < 0 || !line || strcmp(line, "gvfs-helper-client")) { + error("server: subprocess welcome handshake failed: %s", line); + return -1; + } + + while (1) { + const char *v; + len = packet_read_line_gently(0, NULL, &line); + if (len < 0 || !line) + break; + if (!skip_prefix(line, "version=", &v)) { + error("server: subprocess version handshake failed: %s", + line); + return -1; + } + b_support_our_version |= (!strcmp(v, OUR_SUBPROCESS_VERSION)); + } + if (!b_support_our_version) { + error("server: client does not support our version: %s", + OUR_SUBPROCESS_VERSION); + return -1; + } + + if (packet_write_fmt_gently(1, "gvfs-helper-server\n") || + packet_write_fmt_gently(1, "version=%s\n", + OUR_SUBPROCESS_VERSION) || + packet_flush_gently(1)) { + error("server: cannot write version handshake"); + return -1; + } + + while (1) { + const char *v; + int k; + + len = packet_read_line_gently(0, NULL, &line); + if (len < 0 || !line) + break; + if (!skip_prefix(line, "capability=", &v)) { + error("server: subprocess capability handshake failed: %s", + line); + return -1; + } + for (k = 0; caps[k].name; k++) + if (!strcmp(v, caps[k].name)) + caps[k].client_has = 1; + } + + for (k = 0; caps[k].name; k++) + if (caps[k].client_has) + if (packet_write_fmt_gently(1, "capability=%s\n", + caps[k].name)) { + error("server: cannot write capabilities handshake: %s", + caps[k].name); + return -1; + } + if (packet_flush_gently(1)) { + error("server: cannot write capabilities handshake"); + return -1; + } + + return 0; +} + +/* + * Interactively listen to stdin for a series of commands and execute them. + */ +static enum gh__error_code do_sub_cmd__server(int argc, const char **argv) +{ + static struct option server_options[] = { + OPT_MAGNITUDE('b', "block-size", &gh__cmd_opts.block_size, + N_("number of objects to request at a time")), + OPT_INTEGER('d', "depth", &gh__cmd_opts.depth, + N_("Commit depth")), + OPT_INTEGER('r', "max-retries", &gh__cmd_opts.max_retries, + N_("retries for transient network errors")), + OPT_END(), + }; + + enum gh__error_code ec = GH__ERROR_CODE__OK; + char *line; + int len; + int k; + + trace2_cmd_mode("server"); + + if (argc > 1 && !strcmp(argv[1], "-h")) + usage_with_options(server_usage, server_options); + + argc = parse_options(argc, argv, NULL, server_options, server_usage, 0); + if (gh__cmd_opts.depth < 1) + gh__cmd_opts.depth = 1; + if (gh__cmd_opts.max_retries < 0) + gh__cmd_opts.max_retries = 0; + + finish_init(1); + + if (do_protocol_handshake()) { + ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; + goto cleanup; + } + +top_of_loop: + while (1) { + len = packet_read_line_gently(0, NULL, &line); + if (len < 0 || !line) { + /* use extra FLUSH as a QUIT */ + ec = GH__ERROR_CODE__OK; + goto cleanup; + } + + for (k = 0; caps[k].name; k++) { + if (caps[k].client_has && + starts_with(line, caps[k].name)) { + ec = (caps[k].pfn)(line); + if (ec != GH__ERROR_CODE__OK) + goto cleanup; + goto top_of_loop; + } + } + + error("server: unknown command '%s'", line); + ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; + goto cleanup; + } + +cleanup: + return ec; +} + +static enum gh__error_code do_sub_cmd(int argc, const char **argv) +{ + if (!strcmp(argv[0], "get")) + return do_sub_cmd__get(argc, argv); + + if (!strcmp(argv[0], "post")) + return do_sub_cmd__post(argc, argv); + + if (!strcmp(argv[0], "config")) + return do_sub_cmd__config(argc, argv); + + if (!strcmp(argv[0], "server")) + return do_sub_cmd__server(argc, argv); + + // TODO have "test" mode that could be used to drive + // TODO unit testing. + + return GH__ERROR_CODE__USAGE; +} + +/* + * Communicate with the primary Git server or a GVFS cache-server using the + * GVFS Protocol. + * + * https://github.com/microsoft/VFSForGit/blob/master/Protocol.md + */ +int cmd_main(int argc, const char **argv) +{ + static struct option main_options[] = { + OPT_STRING('r', "remote", &gh__cmd_opts.remote_name, + N_("remote"), + N_("Remote name")), + OPT_BOOL('f', "fallback", &gh__cmd_opts.try_fallback, + N_("Fallback to Git server if cache-server fails")), + OPT_CALLBACK(0, "cache-server", NULL, + N_("cache-server"), + N_("cache-server=disable|trust|verify|error"), + option_parse_cache_server_mode), + OPT_CALLBACK(0, "shared-cache", NULL, + N_("pathname"), + N_("Pathname to shared objects directory"), + option_parse_shared_cache_directory), + OPT_BOOL('p', "progress", &gh__cmd_opts.show_progress, + N_("Show progress")), + OPT_END(), + }; + + enum gh__error_code ec = GH__ERROR_CODE__OK; + + if (argc > 1 && !strcmp(argv[1], "-h")) + usage_with_options(main_usage, main_options); + + trace2_cmd_name("gvfs-helper"); + packet_trace_identity("gvfs-helper"); + + setup_git_directory_gently(NULL); + + /* Set any non-zero initial values in gh__cmd_opts. */ + gh__cmd_opts.depth = GH__DEFAULT__OBJECTS_POST__COMMIT_DEPTH; + gh__cmd_opts.block_size = GH__DEFAULT__OBJECTS_POST__BLOCK_SIZE; + gh__cmd_opts.max_retries = GH__DEFAULT_MAX_RETRIES; + gh__cmd_opts.max_transient_backoff_sec = + GH__DEFAULT_MAX_TRANSIENT_BACKOFF_SEC; + + gh__cmd_opts.show_progress = !!isatty(2); + + // TODO use existing gvfs config settings to override our GH__DEFAULT_ + // TODO values in gh__cmd_opts. (And maybe add/remove our command line + // TODO options for them.) + // TODO + // TODO See "scalar.max-retries" (and maybe "gvfs.max-retries") + + git_config(git_default_config, NULL); + + argc = parse_options(argc, argv, NULL, main_options, main_usage, + PARSE_OPT_STOP_AT_NON_OPTION); + if (argc == 0) + usage_with_options(main_usage, main_options); + + ec = do_sub_cmd(argc, argv); + + gh_http_cleanup(); + + if (ec == GH__ERROR_CODE__USAGE) + usage_with_options(main_usage, main_options); + + return ec; +} diff --git a/object-file.c b/object-file.c index 5144a339a8fd2b..0a0b292d807d60 100644 --- a/object-file.c +++ b/object-file.c @@ -46,6 +46,7 @@ #include "sigchain.h" #include "sub-process.h" #include "pkt-line.h" +#include "gvfs-helper-client.h" /* The maximum size for an object header. */ #define MAX_HEADER_LEN 32 @@ -530,6 +531,8 @@ const char *loose_object_path(struct repository *r, struct strbuf *buf, return odb_loose_path(r->objects->odb, buf, oid); } +static int gvfs_matched_shared_cache_to_alternate; + /* * Return non-zero iff the path is usable as an alternate object database. */ @@ -539,6 +542,52 @@ static int alt_odb_usable(struct raw_object_store *o, { int r; + if (!strbuf_cmp(path, &gvfs_shared_cache_pathname)) { + /* + * `gvfs.sharedCache` is the preferred alternate that we + * will use with `gvfs-helper.exe` to dynamically fetch + * missing objects. It is set during git_default_config(). + * + * Make sure the directory exists on disk before we let the + * stock code discredit it. + */ + struct strbuf buf_pack_foo = STRBUF_INIT; + enum scld_error scld; + + /* + * Force create the "" and "/pack" directories, if + * not present on disk. Append an extra bogus directory to + * get safe_create_leading_directories() to see "/pack" + * as a leading directory of something deeper (which it + * won't create). + */ + strbuf_addf(&buf_pack_foo, "%s/pack/foo", path->buf); + + scld = safe_create_leading_directories(buf_pack_foo.buf); + if (scld != SCLD_OK && scld != SCLD_EXISTS) { + error_errno(_("could not create shared-cache ODB '%s'"), + gvfs_shared_cache_pathname.buf); + + strbuf_release(&buf_pack_foo); + + /* + * Pretend no shared-cache was requested and + * effectively fallback to ".git/objects" for + * fetching missing objects. + */ + strbuf_release(&gvfs_shared_cache_pathname); + return 0; + } + + /* + * We know that there is an alternate (either from + * .git/objects/info/alternates or from a memory-only + * entry) associated with the shared-cache directory. + */ + gvfs_matched_shared_cache_to_alternate++; + strbuf_release(&buf_pack_foo); + } + /* Detect cases where alternate disappeared */ if (!is_directory(path->buf)) { error(_("object directory %s does not exist; " @@ -1022,6 +1071,33 @@ void prepare_alt_odb(struct repository *r) link_alt_odb_entries(r, r->objects->alternate_db, PATH_SEP, NULL, 0); read_info_alternates(r, r->objects->odb->path, 0); + + if (gvfs_shared_cache_pathname.len && + !gvfs_matched_shared_cache_to_alternate) { + /* + * There is no entry in .git/objects/info/alternates for + * the requested shared-cache directory. Therefore, the + * odb-list does not contain this directory. + * + * Force this directory into the odb-list as an in-memory + * alternate. Implicitly create the directory on disk, if + * necessary. + * + * See GIT_ALTERNATE_OBJECT_DIRECTORIES for another example + * of this kind of usage. + * + * Note: This has the net-effect of allowing Git to treat + * `gvfs.sharedCache` as an unofficial alternate. This + * usage should be discouraged for compatbility reasons + * with other tools in the overall Git ecosystem (that + * won't know about this trick). It would be much better + * for us to update .git/objects/info/alternates instead. + * The code here is considered a backstop. + */ + link_alt_odb_entries(r, gvfs_shared_cache_pathname.buf, + '\n', NULL, 0); + } + r->objects->loaded_alternates = 1; } @@ -1770,7 +1846,7 @@ static int do_oid_object_info_extended(struct repository *r, const struct object_id *real = oid; int already_retried = 0; int tried_hook = 0; - + int tried_gvfs_helper = 0; if (flags & OBJECT_INFO_LOOKUP_REPLACE) real = lookup_replace_object(r, oid); @@ -1808,13 +1884,41 @@ static int do_oid_object_info_extended(struct repository *r, if (!loose_object_info(r, real, oi, flags)) return 0; + if (core_use_gvfs_helper && !tried_gvfs_helper) { + enum gh_client__created ghc; + + if (flags & OBJECT_INFO_SKIP_FETCH_OBJECT) + return -1; + + gh_client__get_immediate(real, &ghc); + tried_gvfs_helper = 1; + + /* + * Retry the lookup IIF `gvfs-helper` created one + * or more new packfiles or loose objects. + */ + if (ghc != GHC__CREATED__NOTHING) + continue; + + /* + * If `gvfs-helper` fails, we just want to return -1. + * But allow the other providers to have a shot at it. + * (At least until we have a chance to consolidate + * them.) + */ + } + /* Not a loose object; someone else may have just packed it. */ if (!(flags & OBJECT_INFO_QUICK)) { reprepare_packed_git(r); if (find_pack_entry(r, real, &e)) break; if (core_virtualize_objects && !tried_hook) { + // TODO Assert or at least trace2 if gvfs-helper + // TODO was tried and failed and then read-object-hook + // TODO is successful at getting this object. tried_hook = 1; + // TODO BUG? Should 'oid' be 'real' ? if (!read_object_process(oid)) goto retry; } @@ -3152,6 +3256,13 @@ struct oidtree *odb_loose_cache(struct object_directory *odb, return odb->loose_objects_cache; } +void odb_loose_cache_add_new_oid(struct object_directory *odb, + const struct object_id *oid) +{ + struct oidtree *cache = odb_loose_cache(odb, oid); + append_loose_object(oid, NULL, cache); +} + void odb_clear_loose_cache(struct object_directory *odb) { oidtree_clear(odb->loose_objects_cache); diff --git a/object-store-ll.h b/object-store-ll.h index 63611f9f45f224..dca6548b9cb1c6 100644 --- a/object-store-ll.h +++ b/object-store-ll.h @@ -94,6 +94,14 @@ void restore_primary_odb(struct object_directory *restore_odb, const char *old_p struct oidtree *odb_loose_cache(struct object_directory *odb, const struct object_id *oid); +/* + * Add a new object to the loose object cache (possibly after the + * cache was populated). This might be used after dynamically + * fetching a missing object. + */ +void odb_loose_cache_add_new_oid(struct object_directory *odb, + const struct object_id *oid); + /* Empty the loose object cache for the specified object directory. */ void odb_clear_loose_cache(struct object_directory *odb); diff --git a/packfile.c b/packfile.c index c3346197a08a2c..5f0b8c54219cc3 100644 --- a/packfile.c +++ b/packfile.c @@ -769,6 +769,12 @@ void install_packed_git(struct repository *r, struct packed_git *pack) hashmap_add(&r->objects->pack_map, &pack->packmap_ent); } +void install_packed_git_and_mru(struct repository *r, struct packed_git *pack) +{ + install_packed_git(r, pack); + list_add(&pack->mru, &r->objects->packed_git_mru); +} + void (*report_garbage)(unsigned seen_bits, const char *path); static void report_helper(const struct string_list *list, diff --git a/packfile.h b/packfile.h index 33efd0d4c0bfe1..4f2777cdce3748 100644 --- a/packfile.h +++ b/packfile.h @@ -71,6 +71,7 @@ extern void (*report_garbage)(unsigned seen_bits, const char *path); void reprepare_packed_git(struct repository *r); void install_packed_git(struct repository *r, struct packed_git *pack); +void install_packed_git_and_mru(struct repository *r, struct packed_git *pack); struct packed_git *get_packed_git(struct repository *r); struct list_head *get_packed_git_mru(struct repository *r); diff --git a/promisor-remote.c b/promisor-remote.c index 9345ae3db235fb..6e8c6094d0f326 100644 --- a/promisor-remote.c +++ b/promisor-remote.c @@ -1,9 +1,11 @@ #define USE_THE_REPOSITORY_VARIABLE #include "git-compat-util.h" +#include "environment.h" #include "gettext.h" #include "hex.h" #include "object-store-ll.h" +#include "gvfs-helper-client.h" #include "promisor-remote.h" #include "config.h" #include "trace2.h" @@ -218,7 +220,7 @@ struct promisor_remote *repo_promisor_remote_find(struct repository *r, int repo_has_promisor_remote(struct repository *r) { - return !!repo_promisor_remote_find(r, NULL); + return core_use_gvfs_helper || !!repo_promisor_remote_find(r, NULL); } static int remove_fetched_oids(struct repository *repo, @@ -265,6 +267,15 @@ void promisor_remote_get_direct(struct repository *repo, if (oid_nr == 0) return; + if (core_use_gvfs_helper) { + enum gh_client__created ghc = GHC__CREATED__NOTHING; + + trace2_data_intmax("bug", the_repository, "fetch_objects/gvfs-helper", oid_nr); + gh_client__queue_oid_array(oids, oid_nr); + if (!gh_client__drain_queue(&ghc)) + return; + die(_("failed to fetch missing objects from the remote")); + } promisor_remote_init(repo); diff --git a/sub-process.c b/sub-process.c index 1daf5a975254b9..9a4951fdccf218 100644 --- a/sub-process.c +++ b/sub-process.c @@ -5,6 +5,7 @@ #include "sub-process.h" #include "sigchain.h" #include "pkt-line.h" +#include "quote.h" int cmd2process_cmp(const void *cmp_data UNUSED, const struct hashmap_entry *eptr, @@ -81,7 +82,12 @@ int subprocess_start(struct hashmap *hashmap, struct subprocess_entry *entry, co int err; struct child_process *process; - entry->cmd = cmd; + // BUGBUG most callers to subprocess_start() pass in "cmd" the value + // BUGBUG of find_hook() which returns a static buffer (that's only + // BUGBUG good until the next call to find_hook()). + // BUGFIX Defer assignment until we copy the string in our argv. + // entry->cmd = cmd; + process = &entry->process; child_process_init(process); @@ -93,6 +99,8 @@ int subprocess_start(struct hashmap *hashmap, struct subprocess_entry *entry, co process->clean_on_exit_handler = subprocess_exit_handler; process->trace2_child_class = "subprocess"; + entry->cmd = process->args.v[0]; + err = start_command(process); if (err) { error("cannot fork to run subprocess '%s'", cmd); @@ -112,6 +120,52 @@ int subprocess_start(struct hashmap *hashmap, struct subprocess_entry *entry, co return 0; } +int subprocess_start_strvec(struct hashmap *hashmap, + struct subprocess_entry *entry, + int is_git_cmd, + const struct strvec *argv, + subprocess_start_fn startfn) +{ + int err; + int k; + struct child_process *process; + struct strbuf quoted = STRBUF_INIT; + + process = &entry->process; + + child_process_init(process); + for (k = 0; k < argv->nr; k++) + strvec_push(&process->args, argv->v[k]); + process->use_shell = 1; + process->in = -1; + process->out = -1; + process->git_cmd = is_git_cmd; + process->clean_on_exit = 1; + process->clean_on_exit_handler = subprocess_exit_handler; + process->trace2_child_class = "subprocess"; + + sq_quote_argv_pretty("ed, argv->v); + entry->cmd = strbuf_detach("ed, NULL); + + err = start_command(process); + if (err) { + error("cannot fork to run subprocess '%s'", entry->cmd); + return err; + } + + hashmap_entry_init(&entry->ent, strhash(entry->cmd)); + + err = startfn(entry); + if (err) { + error("initialization for subprocess '%s' failed", entry->cmd); + subprocess_stop(hashmap, entry); + return err; + } + + hashmap_add(hashmap, &entry->ent); + return 0; +} + static int handshake_version(struct child_process *process, const char *welcome_prefix, int *versions, int *chosen_version) diff --git a/sub-process.h b/sub-process.h index 6a61638a8ace0b..73cc536646df79 100644 --- a/sub-process.h +++ b/sub-process.h @@ -56,6 +56,12 @@ typedef int(*subprocess_start_fn)(struct subprocess_entry *entry); int subprocess_start(struct hashmap *hashmap, struct subprocess_entry *entry, const char *cmd, subprocess_start_fn startfn); +int subprocess_start_strvec(struct hashmap *hashmap, + struct subprocess_entry *entry, + int is_git_cmd, + const struct strvec *argv, + subprocess_start_fn startfn); + /* Kill a subprocess and remove it from the subprocess hashmap. */ void subprocess_stop(struct hashmap *hashmap, struct subprocess_entry *entry); diff --git a/t/helper/.gitignore b/t/helper/.gitignore index 8c2ddcce95f7aa..4687ed470c5978 100644 --- a/t/helper/.gitignore +++ b/t/helper/.gitignore @@ -1,2 +1,3 @@ +/test-gvfs-protocol /test-tool /test-fake-ssh diff --git a/t/helper/test-gvfs-protocol.c b/t/helper/test-gvfs-protocol.c new file mode 100644 index 00000000000000..a5b39149cf1f15 --- /dev/null +++ b/t/helper/test-gvfs-protocol.c @@ -0,0 +1,1852 @@ +#define USE_THE_REPOSITORY_VARIABLE +#include "git-compat-util.h" +#include "environment.h" +#include "hex.h" +#include "alloc.h" +#include "setup.h" +#include "protocol.h" +#include "config.h" +#include "pkt-line.h" +#include "run-command.h" +#include "strbuf.h" +#include "string-list.h" +#include "trace2.h" +#include "object.h" +#include "object-store.h" +#include "replace-object.h" +#include "repository.h" +#include "version.h" +#include "dir.h" +#include "json-writer.h" +#include "oidset.h" +#include "date.h" +#include "wrapper.h" +#include "git-zlib.h" + +#define TR2_CAT "test-gvfs-protocol" + +static const char *pid_file; +static int verbose; +static int reuseaddr; +static struct string_list mayhem_list = STRING_LIST_INIT_DUP; +static int mayhem_child = 0; +static struct json_writer jw_config = JSON_WRITER_INIT; + +/* + * We look for one of these "servertypes" in the uri-base + * so we can behave differently when we need to. + */ +#define MY_SERVER_TYPE__ORIGIN "servertype/origin" +#define MY_SERVER_TYPE__CACHE "servertype/cache" + +static const char test_gvfs_protocol_usage[] = +"gvfs-protocol [--verbose]\n" +" [--timeout=] [--init-timeout=] [--max-connections=]\n" +" [--reuseaddr] [--pid-file=]\n" +" [--listen=]* [--port=]\n" +" [--mayhem=]*\n" +; + +/* Timeout, and initial timeout */ +static unsigned int timeout; +static unsigned int init_timeout; + +static void logreport(const char *label, const char *err, va_list params) +{ + struct strbuf msg = STRBUF_INIT; + + strbuf_addf(&msg, "[%"PRIuMAX"] %s: ", (uintmax_t)getpid(), label); + strbuf_vaddf(&msg, err, params); + strbuf_addch(&msg, '\n'); + + fwrite(msg.buf, sizeof(char), msg.len, stderr); + fflush(stderr); + + strbuf_release(&msg); +} + +__attribute__((format (printf, 1, 2))) +static void logerror(const char *err, ...) +{ + va_list params; + va_start(params, err); + logreport("error", err, params); + va_end(params); +} + +__attribute__((format (printf, 1, 2))) +static void loginfo(const char *err, ...) +{ + va_list params; + if (!verbose) + return; + va_start(params, err); + logreport("info", err, params); + va_end(params); +} + +__attribute__((format (printf, 1, 2))) +static void logmayhem(const char *err, ...) +{ + va_list params; + if (!verbose) + return; + va_start(params, err); + logreport("mayhem", err, params); + va_end(params); +} + +static void set_keep_alive(int sockfd) +{ + int ka = 1; + + if (setsockopt(sockfd, SOL_SOCKET, SO_KEEPALIVE, &ka, sizeof(ka)) < 0) { + if (errno != ENOTSOCK) + logerror("unable to set SO_KEEPALIVE on socket: %s", + strerror(errno)); + } +} + +////////////////////////////////////////////////////////////////// +// The code in this section is used by "worker" instances to service +// a single connection from a client. The worker talks to the client +// on 0 and 1. +////////////////////////////////////////////////////////////////// + +enum worker_result { + /* + * Operation successful. + * Caller *might* keep the socket open and allow keep-alive. + */ + WR_OK = 0, + /* + * Various errors while processing the request and/or the response. + * Close the socket and clean up. + * Exit child-process with non-zero status. + */ + WR_IO_ERROR = 1<<0, + /* + * Close the socket and clean up. Does not imply an error. + */ + WR_HANGUP = 1<<1, + /* + * The result of a function was influenced by the mayhem settings. + * Does not imply that we need to exit or close the socket. + * Just advice to callers in the worker stack. + */ + WR_MAYHEM = 1<<2, + + WR_STOP_THE_MUSIC = (WR_IO_ERROR | WR_HANGUP), +}; + +/* + * Fields from a parsed HTTP request. + */ +struct req { + struct strbuf start_line; + struct string_list start_line_fields; + + struct strbuf uri_base; + struct strbuf gvfs_api; + struct strbuf slash_args; + struct strbuf quest_args; + + struct string_list header_list; +}; + +#define REQ__INIT { \ + .start_line = STRBUF_INIT, \ + .start_line_fields = STRING_LIST_INIT_DUP, \ + .uri_base = STRBUF_INIT, \ + .gvfs_api = STRBUF_INIT, \ + .slash_args = STRBUF_INIT, \ + .quest_args = STRBUF_INIT, \ + .header_list = STRING_LIST_INIT_DUP, \ + } + +static void req__release(struct req *req) +{ + strbuf_release(&req->start_line); + string_list_clear(&req->start_line_fields, 0); + + strbuf_release(&req->uri_base); + strbuf_release(&req->gvfs_api); + strbuf_release(&req->slash_args); + strbuf_release(&req->quest_args); + + string_list_clear(&req->header_list, 0); +} + +/* + * Generate a somewhat bogus UUID/GUID that is good enough for + * a test suite, but without requiring platform-specific UUID + * or GUID libraries. + */ +static void gen_fake_uuid(struct strbuf *uuid) +{ + static unsigned int seq = 0; + static struct timeval tv; + static struct tm tm; + static time_t secs; + + strbuf_setlen(uuid, 0); + + if (!seq) { + gettimeofday(&tv, NULL); + secs = tv.tv_sec; + gmtime_r(&secs, &tm); + } + + /* + * Build a string that looks like: + * + * "ffffffff-eeee-dddd-cccc-bbbbbbbbbbbb" + * + * Note that the first digit in the "dddd" section gives the + * UUID type. We set it to zero so that we won't collide with + * any "real" UUIDs. + */ + strbuf_addf(uuid, "%04d%02d%02d-%02d%02d-00%02d-%04x-%08x%04x", + tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, + tm.tm_hour, tm.tm_min, + tm.tm_sec, + (unsigned)(getpid() & 0xffff), + (unsigned)(tv.tv_usec & 0xffffffff), + (seq++ & 0xffff)); +} + +/* + * Send a chunk of data to the client using HTTP chunked + * transfer coding rules. + * + * https://tools.ietf.org/html/rfc7230#section-4.1 + */ +static enum worker_result send_chunk(int fd, const unsigned char *buf, + size_t len_buf) +{ + char chunk_size[100]; + int chunk_size_len = xsnprintf(chunk_size, sizeof(chunk_size), + "%x\r\n", (unsigned int)len_buf); + + if ((write_in_full(fd, chunk_size, chunk_size_len) < 0) || + (write_in_full(fd, buf, len_buf) < 0) || + (write_in_full(fd, "\r\n", 2) < 0)) { + logerror("unable to send chunk"); + return WR_IO_ERROR; + } + + return WR_OK; +} + +static enum worker_result send_final_chunk(int fd) +{ + if (write_in_full(fd, "0\r\n\r\n", 5) < 0) { + logerror("unable to send final chunk"); + return WR_IO_ERROR; + } + + return WR_OK; +} + +static enum worker_result send_http_error( + int fd, + int http_code, const char *http_code_name, + int retry_after_seconds, enum worker_result wr_in) +{ + struct strbuf response_header = STRBUF_INIT; + struct strbuf response_content = STRBUF_INIT; + struct strbuf uuid = STRBUF_INIT; + enum worker_result wr; + + strbuf_addf(&response_content, "Error: %d %s\r\n", + http_code, http_code_name); + if (retry_after_seconds > 0) + strbuf_addf(&response_content, "Retry-After: %d\r\n", + retry_after_seconds); + + strbuf_addf (&response_header, "HTTP/1.1 %d %s\r\n", http_code, http_code_name); + strbuf_addstr(&response_header, "Cache-Control: private\r\n"); + strbuf_addstr(&response_header, "Content-Type: text/plain\r\n"); + strbuf_addf (&response_header, "Content-Length: %d\r\n", (int)response_content.len); + if (retry_after_seconds > 0) + strbuf_addf (&response_header, "Retry-After: %d\r\n", retry_after_seconds); + strbuf_addf( &response_header, "Server: test-gvfs-protocol/%s\r\n", git_version_string); + strbuf_addf( &response_header, "Date: %s\r\n", show_date(time(NULL), 0, DATE_MODE(RFC2822))); + gen_fake_uuid(&uuid); + strbuf_addf( &response_header, "X-VSS-E2EID: %s\r\n", uuid.buf); + strbuf_addstr(&response_header, "\r\n"); + + if (write_in_full(fd, response_header.buf, response_header.len) < 0) { + logerror("unable to write response header"); + wr = WR_IO_ERROR; + goto done; + } + + if (write_in_full(fd, response_content.buf, response_content.len) < 0) { + logerror("unable to write response content body"); + wr = WR_IO_ERROR; + goto done; + } + + wr = wr_in; + +done: + strbuf_release(&uuid); + strbuf_release(&response_header); + strbuf_release(&response_content); + + return wr; +} + +/* + * Return 1 if we send an AUTH error to the client. + */ +static int mayhem_try_auth(struct req *req, enum worker_result *wr_out) +{ + *wr_out = WR_OK; + + if (string_list_has_string(&mayhem_list, "http_401")) { + struct string_list_item *item; + int has_auth = 0; + for_each_string_list_item(item, &req->header_list) { + if (starts_with(item->string, "Authorization: Basic")) { + has_auth = 1; + break; + } + } + if (!has_auth) { + if (strstr(req->uri_base.buf, MY_SERVER_TYPE__ORIGIN)) { + logmayhem("http_401 (origin)"); + *wr_out = send_http_error(1, 401, "Unauthorized", -1, + WR_MAYHEM); + return 1; + } + + else if (strstr(req->uri_base.buf, MY_SERVER_TYPE__CACHE)) { + /* + * Cache servers use a non-standard 400 rather than a 401. + */ + logmayhem("http_400 (cacheserver)"); + *wr_out = send_http_error(1, 400, "Bad Request", -1, + WR_MAYHEM); + return 1; + } + + else { + /* + * Non-qualified server type. + */ + logmayhem("http_401"); + *wr_out = send_http_error(1, 401, "Unauthorized", -1, + WR_MAYHEM); + return 1; + } + } + } + + return 0; +} + +/* + * Build fake gvfs/config data using our IP address and port. + * + * The Min/Max data is just random noise copied from the example + * in the documentation. + */ +static void build_gvfs_config_json(struct json_writer *jw, + struct string_list *listen_addr, + int listen_port) +{ + jw_object_begin(jw, 0); + { + jw_object_inline_begin_array(jw, "AllowedGvfsClientVersions"); + { + jw_array_inline_begin_object(jw); + { + jw_object_inline_begin_object(jw, "Max"); + { + jw_object_intmax(jw, "Major", 0); + jw_object_intmax(jw, "Minor", 4); + jw_object_intmax(jw, "Build", 0); + jw_object_intmax(jw, "Revision", 0); + } + jw_end(jw); + + jw_object_inline_begin_object(jw, "Min"); + { + jw_object_intmax(jw, "Major", 0); + jw_object_intmax(jw, "Minor", 2); + jw_object_intmax(jw, "Build", 0); + jw_object_intmax(jw, "Revision", 0); + } + jw_end(jw); + } + jw_end(jw); + + jw_array_inline_begin_object(jw); + { + jw_object_null(jw, "Max"); + jw_object_inline_begin_object(jw, "Min"); + { + jw_object_intmax(jw, "Major", 0); + jw_object_intmax(jw, "Minor", 5); + jw_object_intmax(jw, "Build", 16326); + jw_object_intmax(jw, "Revision", 1); + } + jw_end(jw); + } + jw_end(jw); + } + jw_end(jw); + + jw_object_inline_begin_array(jw, "CacheServers"); + { + struct string_list_item *item; + int k = 0; + + for_each_string_list_item(item, listen_addr) { + jw_array_inline_begin_object(jw); + { + struct strbuf buf = STRBUF_INIT; + + strbuf_addf(&buf, "http://%s:%d/%s", + item->string, + listen_port, + MY_SERVER_TYPE__CACHE); + jw_object_string(jw, "Url", buf.buf); + strbuf_release(&buf); + + strbuf_addf(&buf, "cs%02d", k); + jw_object_string(jw, "Name", buf.buf); + strbuf_release(&buf); + + jw_object_bool(jw, "GlobalDefault", + k++ == 0); + } + jw_end(jw); + } + } + jw_end(jw); + } + jw_end(jw); +} +/* + * Per the GVFS Protocol, this should only be recognized on the origin + * server (not the cache-server). It returns a JSON payload of config + * data. + */ +static enum worker_result do__gvfs_config__get(struct req *req) +{ + struct strbuf response_header = STRBUF_INIT; + struct strbuf uuid = STRBUF_INIT; + enum worker_result wr; + + if (strstr(req->uri_base.buf, MY_SERVER_TYPE__CACHE)) + return send_http_error(1, 404, "Not Found", -1, WR_OK); + + strbuf_addstr(&response_header, "HTTP/1.1 200 OK\r\n"); + strbuf_addstr(&response_header, "Cache-Control: private\r\n"); + strbuf_addstr(&response_header, "Content-Type: text/plain\r\n"); + strbuf_addf( &response_header, "Content-Length: %d\r\n", (int)jw_config.json.len); + strbuf_addf( &response_header, "Server: test-gvfs-protocol/%s\r\n", git_version_string); + strbuf_addf( &response_header, "Date: %s\r\n", show_date(time(NULL), 0, DATE_MODE(RFC2822))); + gen_fake_uuid(&uuid); + strbuf_addf( &response_header, "X-VSS-E2EID: %s\r\n", uuid.buf); + strbuf_addstr(&response_header, "\r\n"); + + if (write_in_full(1, response_header.buf, response_header.len) < 0) { + logerror("unable to write response header"); + wr = WR_IO_ERROR; + goto done; + } + + if (write_in_full(1, jw_config.json.buf, jw_config.json.len) < 0) { + logerror("unable to write response content body"); + wr = WR_IO_ERROR; + goto done; + } + + wr = WR_OK; + +done: + strbuf_release(&uuid); + strbuf_release(&response_header); + + return wr; +} + +/* + * Send the contents of the in-memory inflated object in "compressed + * loose object" format over the socket. + * + * Because we are using keep-alive and are streaming the compressed + * chunks as we produce them, we set the transport-encoding and not + * the content-length. + * + * Our usage here is different from `git-http-backend` because it will + * only send a loose object if it exists as a loose object in the ODB + * (see the "/objects/[0-9a-f]{2}/[0-9a-f]{38}$" regex_t declarations) + * by doing a file-copy. + * + * We want to send an arbitrary object without regard for how it is + * currently stored in the local ODB. + * + * Also, we don't want any of the type-specific branching found in the + * sha1-file.c functions (such as special casing BLOBs). Specifically, + * we DO NOT want any of the content conversion filters. We just want + * to send the raw content as is. + * + * So, we steal freely from sha1-file.c routines: + * write_object_file_prepare() + * write_loose_object() + */ +static enum worker_result send_loose_object(const struct object_id *oid, + int fd) +{ +#define MAX_HEADER_LEN 32 + struct strbuf response_header = STRBUF_INIT; + struct strbuf uuid = STRBUF_INIT; + char object_header[MAX_HEADER_LEN]; + unsigned char compressed[4096]; + git_zstream stream; + struct object_id oid_check; + git_hash_ctx c; + int object_header_len; + int ret; + unsigned flags = 0; + void *content; + unsigned long size; + enum object_type type; + struct object_info oi = OBJECT_INFO_INIT; + int mayhem__corrupt_loose = string_list_has_string(&mayhem_list, + "corrupt_loose"); + + /* + * Since `test-gvfs-protocol` is mocking a real GVFS server (cache or + * main), we don't want a request for a missing object to cause the + * implicit dynamic fetch mechanism to try to fault-it-in (and cause + * our call to oid_object_info_extended() to launch another instance + * of `gvfs-helper` to magically fetch it (which would connect to a + * new instance of `test-gvfs-protocol`)). + * + * Rather, we want a missing object to fail, so we can respond with + * a 404, for example. + */ + flags |= OBJECT_INFO_FOR_PREFETCH; + flags |= OBJECT_INFO_LOOKUP_REPLACE; + + oi.typep = &type; + oi.sizep = &size; + oi.contentp = &content; + + if (oid_object_info_extended(the_repository, oid, &oi, flags)) { + logerror("Could not find OID: '%s'", oid_to_hex(oid)); + free(content); + return send_http_error(1, 404, "Not Found", -1, WR_OK); + } + + if (string_list_has_string(&mayhem_list, "http_404")) { + logmayhem("http_404"); + free(content); + return send_http_error(1, 404, "Not Found", -1, WR_MAYHEM); + } + + trace2_printf("%s: OBJECT type=%d len=%ld '%.40s'", TR2_CAT, + type, size, (const char *)content); + + /* + * We are blending several somewhat independent concepts here: + * + * [1] reconstructing the object format in parts: + * + * ::= + * + * [1a] ::= SP NUL + * [1b] ::= + * + * [2] verify that we constructed [1] correctly by computing + * the hash of [1] and verify it matches the passed OID. + * + * [3] compress [1] because that is how loose objects are + * stored on disk. We compress it as we stream it to + * the client. + * + * [4] send HTTP response headers to the client. + * + * [5] stream each chunk from [3] to the client using the HTTP + * chunked transfer coding. + * + * [6] for extra credit, we repeat the hash construction in [2] + * as we stream it. + */ + + /* [4] */ + strbuf_addstr(&response_header, "HTTP/1.1 200 OK\r\n"); + strbuf_addstr(&response_header, "Cache-Control: private\r\n"); + strbuf_addstr(&response_header, "Content-Type: application/x-git-loose-object\r\n"); + strbuf_addf( &response_header, "Server: test-gvfs-protocol/%s\r\n", git_version_string); + strbuf_addstr(&response_header, "Transfer-Encoding: chunked\r\n"); + strbuf_addf( &response_header, "Date: %s\r\n", show_date(time(NULL), 0, DATE_MODE(RFC2822))); + gen_fake_uuid(&uuid); + strbuf_addf( &response_header, "X-VSS-E2EID: %s\r\n", uuid.buf); + strbuf_addstr(&response_header, "\r\n"); + + if (write_in_full(fd, response_header.buf, response_header.len) < 0) { + logerror("unable to write response header"); + free(content); + return WR_IO_ERROR; + } + + strbuf_release(&uuid); + strbuf_release(&response_header); + + if (string_list_has_string(&mayhem_list, "close_write")) { + logmayhem("close_write"); + free(content); + return WR_MAYHEM | WR_HANGUP; + } + + /* [1a] */ + object_header_len = 1 + xsnprintf(object_header, MAX_HEADER_LEN, + "%s %"PRIuMAX, + type_name(*oi.typep), + (uintmax_t)*oi.sizep); + + /* [2] */ + memset(&oid_check, 0, sizeof(oid_check)); + the_hash_algo->init_fn(&c); + the_hash_algo->update_fn(&c, object_header, object_header_len); + the_hash_algo->update_fn(&c, *oi.contentp, *oi.sizep); + the_hash_algo->final_fn(oid_check.hash, &c); + if (!oideq(oid, &oid_check)) + BUG("send_loose_object[2]: invalid construction '%s' '%s'", + oid_to_hex(oid), oid_to_hex(&oid_check)); + + /* [3, 6] */ + git_deflate_init(&stream, zlib_compression_level); + stream.next_out = compressed; + stream.avail_out = sizeof(compressed); + the_hash_algo->init_fn(&c); + + /* [3, 1a, 6] */ + stream.next_in = (unsigned char *)object_header; + stream.avail_in = object_header_len; + while (git_deflate(&stream, 0) == Z_OK) + ; /* nothing */ + the_hash_algo->update_fn(&c, object_header, object_header_len); + + /* [3, 1b, 5, 6] */ + stream.next_in = *oi.contentp; + stream.avail_in = *oi.sizep; + do { + enum worker_result wr; + unsigned char *in0 = stream.next_in; + + /* + * Corrupt a byte in the buffer we compress, but undo it + * before we compute the SHA on the portion of the raw + * buffer included in the chunk we compressed. + */ + if (mayhem__corrupt_loose) { + logmayhem("corrupt_loose"); + *in0 = *in0 ^ 0xff; + } + + ret = git_deflate(&stream, Z_FINISH); + + if (mayhem__corrupt_loose) + *in0 = *in0 ^ 0xff; + + the_hash_algo->update_fn(&c, in0, stream.next_in - in0); + + /* [5] */ + wr = send_chunk(fd, compressed, stream.next_out - compressed); + if (wr & WR_STOP_THE_MUSIC) { + free(content); + return wr; + } + + stream.next_out = compressed; + stream.avail_out = sizeof(compressed); + + } while (ret == Z_OK); + + /* [3] */ + if (ret != Z_STREAM_END) + BUG("unable to deflate object '%s' (%d)", oid_to_hex(oid), ret); + ret = git_deflate_end_gently(&stream); + if (ret != Z_OK) + BUG("deflateEnd on object '%s' failed (%d)", oid_to_hex(oid), ret); + + /* [6] */ + the_hash_algo->final_fn(oid_check.hash, &c); + if (!oideq(oid, &oid_check)) + BUG("send_loose_object[6]: invalid construction '%s' '%s'", + oid_to_hex(oid), oid_to_hex(&oid_check)); + + /* [5] */ + free(content); + return send_final_chunk(fd); +} + +/* + * Per the GVFS Protocol, a single OID should be in the slash-arg: + * + * GET /gvfs/objects/fc3fff3a25559d2d30d1719c4f4a6d9fe7e05170 HTTP/1.1 + * + * Look it up in our repo (loose or packed) and send it to gvfs-helper + * over the socket as a loose object. + */ +static enum worker_result do__gvfs_objects__get(struct req *req) +{ + struct object_id oid; + + if (!req->slash_args.len || + get_oid_hex(req->slash_args.buf, &oid)) { + logerror("invalid OID in GET gvfs/objects: '%s'", + req->slash_args.buf); + return WR_IO_ERROR; + } + + trace2_printf("%s: GET %s", TR2_CAT, oid_to_hex(&oid)); + + return send_loose_object(&oid, 1); +} + +static enum worker_result read_json_post_body( + struct req *req, + struct oidset *oids, + int *nr_oids) +{ + struct object_id oid; + struct string_list_item *item; + char *post_body = NULL; + const char *v; + ssize_t len_expected = 0; + ssize_t len_received; + const char *pkey; + const char *plbracket; + const char *pstart; + const char *pend; + + for_each_string_list_item(item, &req->header_list) { + if (skip_prefix(item->string, "Content-Length: ", &v)) { + char *p; + len_expected = strtol(v, &p, 10); + break; + } + } + if (!len_expected) { + logerror("no content length in POST"); + return WR_IO_ERROR; + } + post_body = xcalloc(1, len_expected + 1); + if (!post_body) { + logerror("could not malloc buffer for POST body"); + return WR_IO_ERROR; + } + len_received = read_in_full(0, post_body, len_expected); + if (len_received != len_expected) { + logerror("short read in POST (expected %d, received %d)", + (int)len_expected, (int)len_received); + return WR_IO_ERROR; + } + + /* + * A very primitive JSON parser for a very fixed and well-known + * message format. Please don't judge me. + * + * We expect: + * + * ..."objectIds":["","",...""]... + * + * We expect compact (non-pretty) JSON, but do allow it. + */ + pkey = strstr(post_body, "\"objectIds\""); + if (!pkey) + goto could_not_parse_json; + plbracket = strchr(pkey, '['); + if (!plbracket) + goto could_not_parse_json; + pstart = plbracket + 1; + + while (1) { + /* Eat leading whitespace before opening DQUOTE */ + while (*pstart && isspace(*pstart)) + pstart++; + if (!*pstart) + goto could_not_parse_json; + pstart++; + + /* find trailing DQUOTE */ + pend = strchr(pstart, '"'); + if (!pend) + goto could_not_parse_json; + + if (get_oid_hex(pstart, &oid)) + goto could_not_parse_json; + if (!oidset_insert(oids, &oid)) + *nr_oids += 1; + trace2_printf("%s: POST %s", TR2_CAT, oid_to_hex(&oid)); + + /* Eat trailing whitespace after trailing DQUOTE */ + pend++; + while (*pend && isspace(*pend)) + pend++; + if (!*pend) + goto could_not_parse_json; + + /* End of list or is there another OID */ + if (*pend == ']') + break; + if (*pend != ',') + goto could_not_parse_json; + + pstart = pend + 1; + } + + /* + * We do not care about the "commitDepth" parameter. + */ + + free(post_body); + return WR_OK; + +could_not_parse_json: + logerror("could not parse JSON in POST body"); + free(post_body); + return WR_IO_ERROR; +} + +/* + * Since this is a test helper, I'm going to be lazy and + * run pack-objects as a background child using pipe_command + * and get the resulting packfile into a buffer. And then + * the caller can pump it to the client over the socket. + * + * This avoids the need to set up a custom loop (like in + * upload-pack) to drive it and/or the use of a bunch of + * tempfiles. + * + * My assumption here is that we're not testing with GBs + * of data.... + */ +static enum worker_result get_packfile_from_oids( + struct oidset *oids, + struct strbuf *buf_packfile) +{ + struct child_process pack_objects = CHILD_PROCESS_INIT; + struct strbuf buf_child_stdin = STRBUF_INIT; + struct strbuf buf_child_stderr = STRBUF_INIT; + struct oidset_iter iter; + struct object_id *oid; + enum worker_result wr; + int result; + + strvec_push(&pack_objects.args, "git"); + strvec_push(&pack_objects.args, "pack-objects"); + strvec_push(&pack_objects.args, "-q"); + strvec_push(&pack_objects.args, "--revs"); + strvec_push(&pack_objects.args, "--delta-base-offset"); + strvec_push(&pack_objects.args, "--window=0"); + strvec_push(&pack_objects.args, "--depth=4095"); + strvec_push(&pack_objects.args, "--compression=1"); + strvec_push(&pack_objects.args, "--stdout"); + + pack_objects.in = -1; + pack_objects.out = -1; + pack_objects.err = -1; + + oidset_iter_init(oids, &iter); + while ((oid = oidset_iter_next(&iter))) + strbuf_addf(&buf_child_stdin, "%s\n", oid_to_hex(oid)); + strbuf_addstr(&buf_child_stdin, "\n"); + + result = pipe_command(&pack_objects, + buf_child_stdin.buf, buf_child_stdin.len, + buf_packfile, 0, + &buf_child_stderr, 0); + if (result) { + logerror("pack-objects failed: %s", buf_child_stderr.buf); + wr = WR_IO_ERROR; + goto done; + } + + trace2_printf("%s: pack-objects returned %d bytes", TR2_CAT, buf_packfile->len); + wr = WR_OK; + +done: + strbuf_release(&buf_child_stdin); + strbuf_release(&buf_child_stderr); + + return wr; +} + +static enum worker_result send_packfile_from_buffer(const struct strbuf *packfile) +{ + struct strbuf response_header = STRBUF_INIT; + struct strbuf uuid = STRBUF_INIT; + enum worker_result wr; + + strbuf_addstr(&response_header, "HTTP/1.1 200 OK\r\n"); + strbuf_addstr(&response_header, "Cache-Control: private\r\n"); + strbuf_addstr(&response_header, "Content-Type: application/x-git-packfile\r\n"); + strbuf_addf( &response_header, "Content-Length: %d\r\n", (int)packfile->len); + strbuf_addf( &response_header, "Server: test-gvfs-protocol/%s\r\n", git_version_string); + strbuf_addf( &response_header, "Date: %s\r\n", show_date(time(NULL), 0, DATE_MODE(RFC2822))); + gen_fake_uuid(&uuid); + strbuf_addf( &response_header, "X-VSS-E2EID: %s\r\n", uuid.buf); + strbuf_addstr(&response_header, "\r\n"); + + if (write_in_full(1, response_header.buf, response_header.len) < 0) { + logerror("unable to write response header"); + wr = WR_IO_ERROR; + goto done; + } + + if (write_in_full(1, packfile->buf, packfile->len) < 0) { + logerror("unable to write response content body"); + wr = WR_IO_ERROR; + goto done; + } + + wr = WR_OK; + +done: + strbuf_release(&uuid); + strbuf_release(&response_header); + + return wr; +} + +/* + * The GVFS Protocol POST verb behaves like GET for non-commit objects + * (in that it just returns the requested object), but for commit + * objects POST *also* returns all trees referenced by the commit. + * + * The goal of this test is to confirm that: + * [] `gvfs-helper post` can request and receive a packfile at all. + * [] `gvfs-helper post` can handle getting either a packfile or a + * loose object. + * + * Therefore, I'm not going to blur the issue and support the custom + * semantics for commit objects. + * + * If one of the OIDs is a commit, `git pack-objects` will completely + * walk the trees and blobs for it and we get that for free. This is + * good enough for our testing. + * + * TODO A proper solution would separate the commit objects and do a + * TODO `rev-list --filter=blobs:none` for them (or use the internal + * TODO list-objects API) and a regular enumeration for the non-commit + * TODO objects. And build an new oidset with union of those and then + * TODO call pack-objects on it instead. + * TODO + * TODO But that's too much trouble for now. + * + * For now, we just need to know if the post asks for a single object, + * is it a commit or non-commit. That is sufficient to know whether + * we should send a packfile or loose object. +*/ +static enum worker_result classify_oids_in_post( + struct oidset *oids, int nr_oids, int *need_packfile) +{ + struct oidset_iter iter; + struct object_id *oid; + enum object_type type; + struct object_info oi = OBJECT_INFO_INIT; + unsigned flags = 0; + + if (nr_oids > 1) { + *need_packfile = 1; + return WR_OK; + } + + /* disable missing-object faulting */ + flags |= OBJECT_INFO_FOR_PREFETCH; + flags |= OBJECT_INFO_LOOKUP_REPLACE; + + oi.typep = &type; + + oidset_iter_init(oids, &iter); + while ((oid = oidset_iter_next(&iter))) { + if (!oid_object_info_extended(the_repository, oid, &oi, flags) && + type == OBJ_COMMIT) { + *need_packfile = 1; + return WR_OK; + } + } + + *need_packfile = 0; + return WR_OK; +} + +static enum worker_result do__gvfs_objects__post(struct req *req) +{ + struct oidset oids = OIDSET_INIT; + struct strbuf packfile = STRBUF_INIT; + enum worker_result wr; + int nr_oids = 0; + int need_packfile = 0; + + wr = read_json_post_body(req, &oids, &nr_oids); + if (wr & WR_STOP_THE_MUSIC) + goto done; + + wr = classify_oids_in_post(&oids, nr_oids, &need_packfile); + if (wr & WR_STOP_THE_MUSIC) + goto done; + + if (!need_packfile) { + struct oidset_iter iter; + struct object_id *oid; + + oidset_iter_init(&oids, &iter); + oid = oidset_iter_next(&iter); + + wr = send_loose_object(oid, 1); + } else { + wr = get_packfile_from_oids(&oids, &packfile); + if (wr & WR_STOP_THE_MUSIC) + goto done; + + wr = send_packfile_from_buffer(&packfile); + } + +done: + oidset_clear(&oids); + strbuf_release(&packfile); + + return wr; +} + +/* + * Read the HTTP request up to the start of the optional message-body. + * We do this byte-by-byte because we have keep-alive turned on and + * cannot rely on an EOF. + * + * https://tools.ietf.org/html/rfc7230 + * https://github.com/microsoft/VFSForGit/blob/master/Protocol.md + * + * We cannot call die() here because our caller needs to properly + * respond to the client and/or close the socket before this + * child exits so that the client doesn't get a connection reset + * by peer error. + */ +static enum worker_result req__read(struct req *req, int fd) +{ + struct strbuf h = STRBUF_INIT; + int nr_start_line_fields; + const char *uri_target; + const char *http_version; + const char *gvfs; + + /* + * Read line 0 of the request and split it into component parts: + * + * SP SP CRLF + * + */ + if (strbuf_getwholeline_fd(&req->start_line, fd, '\n') == EOF) + return WR_OK | WR_HANGUP; + + if (string_list_has_string(&mayhem_list, "close_read")) { + logmayhem("close_read"); + return WR_MAYHEM | WR_HANGUP; + } + + if (string_list_has_string(&mayhem_list, "close_read_1") && + mayhem_child == 0) { + /* + * Mayhem: fail the first request, but let retries succeed. + */ + logmayhem("close_read_1"); + return WR_MAYHEM | WR_HANGUP; + } + + strbuf_trim_trailing_newline(&req->start_line); + + nr_start_line_fields = string_list_split(&req->start_line_fields, + req->start_line.buf, + ' ', -1); + if (nr_start_line_fields != 3) { + logerror("could not parse request start-line '%s'", + req->start_line.buf); + return WR_IO_ERROR; + } + uri_target = req->start_line_fields.items[1].string; + http_version = req->start_line_fields.items[2].string; + + if (strcmp(http_version, "HTTP/1.1")) { + logerror("unsupported version '%s' (expecting HTTP/1.1)", + http_version); + return WR_IO_ERROR; + } + + /* + * Next, extract the GVFS terms from the . The + * GVFS Protocol defines a REST API containing several GVFS + * commands of the form: + * + * []/gvfs/[/] + * []/gvfs/[?] + * + * For example: + * "GET /gvfs/config HTTP/1.1" + * "GET /gvfs/objects/aaaaaaaaaabbbbbbbbbbccccccccccdddddddddd HTTP/1.1" + * "GET /gvfs/prefetch?lastPackTimestamp=123456789 HTTP/1.1" + * + * "GET //gvfs/config HTTP/1.1" + * "GET //gvfs/objects/aaaaaaaaaabbbbbbbbbbccccccccccdddddddddd HTTP/1.1" + * "GET //gvfs/prefetch?lastPackTimestamp=123456789 HTTP/1.1" + * + * "POST //gvfs/objects HTTP/1.1" + * + * For other testing later, we also allow non-gvfs URLs of the form: + * "GET /[?] HTTP/1.1" + * + * We do not attempt to split the query-params within the args. + * The caller can do that if they need to. + */ + gvfs = strstr(uri_target, "/gvfs/"); + if (gvfs) { + strbuf_add(&req->uri_base, uri_target, (gvfs - uri_target)); + strbuf_trim_trailing_dir_sep(&req->uri_base); + + gvfs += 6; /* skip "/gvfs/" */ + strbuf_add(&req->gvfs_api, "gvfs/", 5); + while (*gvfs && *gvfs != '/' && *gvfs != '?') + strbuf_addch(&req->gvfs_api, *gvfs++); + + /* + */ + if (*gvfs == '/') + strbuf_addstr(&req->slash_args, gvfs + 1); + else if (*gvfs == '?') + strbuf_addstr(&req->quest_args, gvfs + 1); + } else { + + const char *quest = strchr(uri_target, '?'); + + if (quest) { + strbuf_add(&req->uri_base, uri_target, (quest - uri_target)); + strbuf_trim_trailing_dir_sep(&req->uri_base); + strbuf_addstr(&req->quest_args, quest + 1); + } else { + strbuf_addstr(&req->uri_base, uri_target); + strbuf_trim_trailing_dir_sep(&req->uri_base); + } + } + + /* + * Read the set of HTTP headers into a string-list. + */ + while (1) { + if (strbuf_getwholeline_fd(&h, fd, '\n') == EOF) + goto done; + strbuf_trim_trailing_newline(&h); + + if (!h.len) + goto done; /* a blank line ends the header */ + + string_list_append(&req->header_list, h.buf); + } + + /* + * TODO If the set of HTTP headers includes things like: + * TODO + * TODO Connection: Upgrade, HTTP2-Settings + * TODO Upgrade: h2c + * TODO HTTP2-Settings: AAMAAABkAARAAAAAAAIAAAAA + * TODO + * TODO then the client is asking to optionally switch to HTTP/2. + * TODO + * TODO We currently DO NOT support that (and I don't currently + * TODO see a need to do so (because we don't need the multiplexed + * TODO streams feature (because the client never asks for n packfiles + * TODO at the same time))). + * TODO + * TODO https://en.wikipedia.org/wiki/HTTP/1.1_Upgrade_header + */ + + /* + * We do not attempt to read the , if it exists. + * We let our caller read/chunk it in as appropriate. + */ +done: + if (trace2_is_enabled()) { + struct string_list_item *item; + trace2_printf("%s: %s", TR2_CAT, req->start_line.buf); + for_each_string_list_item(item, &req->start_line_fields) + trace2_printf("%s: Field: %s", TR2_CAT, item->string); + trace2_printf("%s: [uri-base '%s'][gvfs '%s'][args '%s' '%s']", + TR2_CAT, + req->uri_base.buf, + req->gvfs_api.buf, + req->slash_args.buf, + req->quest_args.buf); + for_each_string_list_item(item, &req->header_list) + trace2_printf("%s: Hdrs: %s", TR2_CAT, item->string); + } + + strbuf_release(&h); + + return WR_OK; +} + +static enum worker_result dispatch(struct req *req) +{ + const char *method; + enum worker_result wr; + + if (string_list_has_string(&mayhem_list, "close_no_write")) { + logmayhem("close_no_write"); + return WR_MAYHEM | WR_HANGUP; + } + if (string_list_has_string(&mayhem_list, "http_503")) { + logmayhem("http_503"); + return send_http_error(1, 503, "Service Unavailable", 2, + WR_MAYHEM | WR_HANGUP); + } + if (string_list_has_string(&mayhem_list, "http_429")) { + logmayhem("http_429"); + return send_http_error(1, 429, "Too Many Requests", 2, + WR_MAYHEM | WR_HANGUP); + } + if (string_list_has_string(&mayhem_list, "http_429_1") && + mayhem_child == 0) { + logmayhem("http_429_1"); + return send_http_error(1, 429, "Too Many Requests", 2, + WR_MAYHEM | WR_HANGUP); + } + if (mayhem_try_auth(req, &wr)) + return wr; + + method = req->start_line_fields.items[0].string; + + if (!strcmp(req->gvfs_api.buf, "gvfs/objects")) { + + if (!strcmp(method, "GET")) + return do__gvfs_objects__get(req); + if (!strcmp(method, "POST")) + return do__gvfs_objects__post(req); + } + + if (!strcmp(req->gvfs_api.buf, "gvfs/config")) { + + if (!strcmp(method, "GET")) + return do__gvfs_config__get(req); + } + + return send_http_error(1, 501, "Not Implemented", -1, + WR_OK | WR_HANGUP); +} + +static enum worker_result worker(void) +{ + struct req req = REQ__INIT; + char *client_addr = getenv("REMOTE_ADDR"); + char *client_port = getenv("REMOTE_PORT"); + enum worker_result wr = WR_OK; + + if (client_addr) + loginfo("Connection from %s:%s", client_addr, client_port); + + set_keep_alive(0); + + while (1) { + req__release(&req); + + alarm(init_timeout ? init_timeout : timeout); + wr = req__read(&req, 0); + alarm(0); + + if (wr & WR_STOP_THE_MUSIC) + break; + + wr = dispatch(&req); + if (wr & WR_STOP_THE_MUSIC) + break; + } + + close(0); + close(1); + + return !!(wr & WR_IO_ERROR); +} + +////////////////////////////////////////////////////////////////// +// This section contains the listener and child-process management +// code used by the primary instance to accept incoming connections +// and dispatch them to async child process "worker" instances. +////////////////////////////////////////////////////////////////// + +static int addrcmp(const struct sockaddr_storage *s1, + const struct sockaddr_storage *s2) +{ + const struct sockaddr *sa1 = (const struct sockaddr*) s1; + const struct sockaddr *sa2 = (const struct sockaddr*) s2; + + if (sa1->sa_family != sa2->sa_family) + return sa1->sa_family - sa2->sa_family; + if (sa1->sa_family == AF_INET) + return memcmp(&((struct sockaddr_in *)s1)->sin_addr, + &((struct sockaddr_in *)s2)->sin_addr, + sizeof(struct in_addr)); +#ifndef NO_IPV6 + if (sa1->sa_family == AF_INET6) + return memcmp(&((struct sockaddr_in6 *)s1)->sin6_addr, + &((struct sockaddr_in6 *)s2)->sin6_addr, + sizeof(struct in6_addr)); +#endif + return 0; +} + +static int max_connections = 32; + +static unsigned int live_children; + +static struct child { + struct child *next; + struct child_process cld; + struct sockaddr_storage address; +} *firstborn; + +static void add_child(struct child_process *cld, struct sockaddr *addr, socklen_t addrlen) +{ + struct child *newborn, **cradle; + + newborn = xcalloc(1, sizeof(*newborn)); + live_children++; + memcpy(&newborn->cld, cld, sizeof(*cld)); + memcpy(&newborn->address, addr, addrlen); + for (cradle = &firstborn; *cradle; cradle = &(*cradle)->next) + if (!addrcmp(&(*cradle)->address, &newborn->address)) + break; + newborn->next = *cradle; + *cradle = newborn; +} + +/* + * This gets called if the number of connections grows + * past "max_connections". + * + * We kill the newest connection from a duplicate IP. + */ +static void kill_some_child(void) +{ + const struct child *blanket, *next; + + if (!(blanket = firstborn)) + return; + + for (; (next = blanket->next); blanket = next) + if (!addrcmp(&blanket->address, &next->address)) { + kill(blanket->cld.pid, SIGTERM); + break; + } +} + +static void check_dead_children(void) +{ + int status; + pid_t pid; + + struct child **cradle, *blanket; + for (cradle = &firstborn; (blanket = *cradle);) + if ((pid = waitpid(blanket->cld.pid, &status, WNOHANG)) > 1) { + const char *dead = ""; + if (status) + dead = " (with error)"; + loginfo("[%"PRIuMAX"] Disconnected%s", (uintmax_t)pid, dead); + + /* remove the child */ + *cradle = blanket->next; + live_children--; + child_process_clear(&blanket->cld); + free(blanket); + } else + cradle = &blanket->next; +} + +static struct strvec cld_argv = STRVEC_INIT; +static void handle(int incoming, struct sockaddr *addr, socklen_t addrlen) +{ + struct child_process cld = CHILD_PROCESS_INIT; + + if (max_connections && live_children >= max_connections) { + kill_some_child(); + sleep(1); /* give it some time to die */ + check_dead_children(); + if (live_children >= max_connections) { + close(incoming); + logerror("Too many children, dropping connection"); + return; + } + } + + if (addr->sa_family == AF_INET) { + char buf[128] = ""; + struct sockaddr_in *sin_addr = (void *) addr; + inet_ntop(addr->sa_family, &sin_addr->sin_addr, buf, sizeof(buf)); + strvec_pushf(&cld.env, "REMOTE_ADDR=%s", buf); + strvec_pushf(&cld.env, "REMOTE_PORT=%d", + ntohs(sin_addr->sin_port)); +#ifndef NO_IPV6 + } else if (addr->sa_family == AF_INET6) { + char buf[128] = ""; + struct sockaddr_in6 *sin6_addr = (void *) addr; + inet_ntop(AF_INET6, &sin6_addr->sin6_addr, buf, sizeof(buf)); + strvec_pushf(&cld.env, "REMOTE_ADDR=[%s]", buf); + strvec_pushf(&cld.env, "REMOTE_PORT=%d", + ntohs(sin6_addr->sin6_port)); +#endif + } + + if (mayhem_list.nr) { + strvec_pushf(&cld.env, "MAYHEM_CHILD=%d", + mayhem_child++); + } + + strvec_pushv(&cld.args, cld_argv.v); + cld.in = incoming; + cld.out = dup(incoming); + + if (cld.out < 0) + logerror("could not dup() `incoming`"); + else if (start_command(&cld)) + logerror("unable to fork"); + else + add_child(&cld, addr, addrlen); +} + +static void child_handler(int signo UNUSED) +{ + /* + * Otherwise empty handler because systemcalls will get interrupted + * upon signal receipt + * SysV needs the handler to be rearmed + */ + signal(SIGCHLD, child_handler); +} + +static int set_reuse_addr(int sockfd) +{ + int on = 1; + + if (!reuseaddr) + return 0; + return setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, + &on, sizeof(on)); +} + +struct socketlist { + int *list; + size_t nr; + size_t alloc; +}; + +static const char *ip2str(int family, struct sockaddr *sin, socklen_t len) +{ +#ifdef NO_IPV6 + static char ip[INET_ADDRSTRLEN]; +#else + static char ip[INET6_ADDRSTRLEN]; +#endif + + switch (family) { +#ifndef NO_IPV6 + case AF_INET6: + inet_ntop(family, &((struct sockaddr_in6*)sin)->sin6_addr, ip, len); + break; +#endif + case AF_INET: + inet_ntop(family, &((struct sockaddr_in*)sin)->sin_addr, ip, len); + break; + default: + xsnprintf(ip, sizeof(ip), ""); + } + return ip; +} + +#ifndef NO_IPV6 + +static int setup_named_sock(const char *listen_addr, int listen_port, struct socketlist *socklist) +{ + int socknum = 0; + char pbuf[NI_MAXSERV]; + struct addrinfo hints, *ai0, *ai; + int gai; + long flags; + + xsnprintf(pbuf, sizeof(pbuf), "%d", listen_port); + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + hints.ai_protocol = IPPROTO_TCP; + hints.ai_flags = AI_PASSIVE; + + gai = getaddrinfo(listen_addr, pbuf, &hints, &ai0); + if (gai) { + logerror("getaddrinfo() for %s failed: %s", listen_addr, gai_strerror(gai)); + return 0; + } + + for (ai = ai0; ai; ai = ai->ai_next) { + int sockfd; + + sockfd = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol); + if (sockfd < 0) + continue; + if (sockfd >= FD_SETSIZE) { + logerror("Socket descriptor too large"); + close(sockfd); + continue; + } + +#ifdef IPV6_V6ONLY + if (ai->ai_family == AF_INET6) { + int on = 1; + setsockopt(sockfd, IPPROTO_IPV6, IPV6_V6ONLY, + &on, sizeof(on)); + /* Note: error is not fatal */ + } +#endif + + if (set_reuse_addr(sockfd)) { + logerror("Could not set SO_REUSEADDR: %s", strerror(errno)); + close(sockfd); + continue; + } + + set_keep_alive(sockfd); + + if (bind(sockfd, ai->ai_addr, ai->ai_addrlen) < 0) { + logerror("Could not bind to %s: %s", + ip2str(ai->ai_family, ai->ai_addr, ai->ai_addrlen), + strerror(errno)); + close(sockfd); + continue; /* not fatal */ + } + if (listen(sockfd, 5) < 0) { + logerror("Could not listen to %s: %s", + ip2str(ai->ai_family, ai->ai_addr, ai->ai_addrlen), + strerror(errno)); + close(sockfd); + continue; /* not fatal */ + } + + flags = fcntl(sockfd, F_GETFD, 0); + if (flags >= 0) + fcntl(sockfd, F_SETFD, flags | FD_CLOEXEC); + + ALLOC_GROW(socklist->list, socklist->nr + 1, socklist->alloc); + socklist->list[socklist->nr++] = sockfd; + socknum++; + } + + freeaddrinfo(ai0); + + return socknum; +} + +#else /* NO_IPV6 */ + +static int setup_named_sock(char *listen_addr, int listen_port, struct socketlist *socklist) +{ + struct sockaddr_in sin; + int sockfd; + long flags; + + memset(&sin, 0, sizeof sin); + sin.sin_family = AF_INET; + sin.sin_port = htons(listen_port); + + if (listen_addr) { + /* Well, host better be an IP address here. */ + if (inet_pton(AF_INET, listen_addr, &sin.sin_addr.s_addr) <= 0) + return 0; + } else { + sin.sin_addr.s_addr = htonl(INADDR_ANY); + } + + sockfd = socket(AF_INET, SOCK_STREAM, 0); + if (sockfd < 0) + return 0; + + if (set_reuse_addr(sockfd)) { + logerror("Could not set SO_REUSEADDR: %s", strerror(errno)); + close(sockfd); + return 0; + } + + set_keep_alive(sockfd); + + if ( bind(sockfd, (struct sockaddr *)&sin, sizeof sin) < 0 ) { + logerror("Could not bind to %s: %s", + ip2str(AF_INET, (struct sockaddr *)&sin, sizeof(sin)), + strerror(errno)); + close(sockfd); + return 0; + } + + if (listen(sockfd, 5) < 0) { + logerror("Could not listen to %s: %s", + ip2str(AF_INET, (struct sockaddr *)&sin, sizeof(sin)), + strerror(errno)); + close(sockfd); + return 0; + } + + flags = fcntl(sockfd, F_GETFD, 0); + if (flags >= 0) + fcntl(sockfd, F_SETFD, flags | FD_CLOEXEC); + + ALLOC_GROW(socklist->list, socklist->nr + 1, socklist->alloc); + socklist->list[socklist->nr++] = sockfd; + return 1; +} + +#endif + +static void socksetup(struct string_list *listen_addr, int listen_port, struct socketlist *socklist) +{ + if (!listen_addr->nr) + setup_named_sock("127.0.0.1", listen_port, socklist); + else { + int i, socknum; + for (i = 0; i < listen_addr->nr; i++) { + socknum = setup_named_sock(listen_addr->items[i].string, + listen_port, socklist); + + if (socknum == 0) + logerror("unable to allocate any listen sockets for host %s on port %u", + listen_addr->items[i].string, listen_port); + } + } +} + +static int service_loop(struct socketlist *socklist) +{ + struct pollfd *pfd; + int i; + + CALLOC_ARRAY(pfd, socklist->nr); + + for (i = 0; i < socklist->nr; i++) { + pfd[i].fd = socklist->list[i]; + pfd[i].events = POLLIN; + } + + signal(SIGCHLD, child_handler); + + for (;;) { + int i; + int nr_ready; + int timeout = (pid_file ? 100 : -1); + + check_dead_children(); + + nr_ready = poll(pfd, socklist->nr, timeout); + if (nr_ready < 0) { + if (errno != EINTR) { + logerror("Poll failed, resuming: %s", + strerror(errno)); + sleep(1); + } + continue; + } + else if (nr_ready == 0) { + /* + * If we have a pid_file, then we watch it. + * If someone deletes it, we shutdown the service. + * The shell scripts in the test suite will use this. + */ + if (!pid_file || file_exists(pid_file)) + continue; + goto shutdown; + } + + for (i = 0; i < socklist->nr; i++) { + if (pfd[i].revents & POLLIN) { + union { + struct sockaddr sa; + struct sockaddr_in sai; +#ifndef NO_IPV6 + struct sockaddr_in6 sai6; +#endif + } ss; + socklen_t sslen = sizeof(ss); + int incoming = accept(pfd[i].fd, &ss.sa, &sslen); + if (incoming < 0) { + switch (errno) { + case EAGAIN: + case EINTR: + case ECONNABORTED: + continue; + default: + die_errno("accept returned"); + } + } + handle(incoming, &ss.sa, sslen); + } + } + } + +shutdown: + loginfo("Starting graceful shutdown (pid-file gone)"); + for (i = 0; i < socklist->nr; i++) + close(socklist->list[i]); + + return 0; +} + +static int serve(struct string_list *listen_addr, int listen_port) +{ + struct socketlist socklist = { NULL, 0, 0 }; + + socksetup(listen_addr, listen_port, &socklist); + if (socklist.nr == 0) + die("unable to allocate any listen sockets on port %u", + listen_port); + + loginfo("Ready to rumble"); + + /* + * Wait to create the pid-file until we've setup the sockets + * and are open for business. + */ + if (pid_file) + write_file(pid_file, "%"PRIuMAX, (uintmax_t) getpid()); + + return service_loop(&socklist); +} + +////////////////////////////////////////////////////////////////// +// This section is executed by both the primary instance and all +// worker instances. So, yes, each child-process re-parses the +// command line argument and re-discovers how it should behave. +////////////////////////////////////////////////////////////////// + +int cmd_main(int argc, const char **argv) +{ + int listen_port = 0; + static struct string_list listen_addr = STRING_LIST_INIT_NODUP; + int worker_mode = 0; + int i; + + trace2_cmd_name("test-gvfs-protocol"); + setup_git_directory_gently(NULL); + + for (i = 1; i < argc; i++) { + const char *arg = argv[i]; + const char *v; + + if (skip_prefix(arg, "--listen=", &v)) { + string_list_append_nodup(&listen_addr, xstrdup_tolower(v)); + continue; + } + if (skip_prefix(arg, "--port=", &v)) { + char *end; + unsigned long n; + n = strtoul(v, &end, 0); + if (*v && !*end) { + listen_port = n; + continue; + } + } + if (!strcmp(arg, "--worker")) { + worker_mode = 1; + trace2_cmd_mode("worker"); + continue; + } + if (!strcmp(arg, "--verbose")) { + verbose = 1; + continue; + } + if (skip_prefix(arg, "--timeout=", &v)) { + timeout = atoi(v); + continue; + } + if (skip_prefix(arg, "--init-timeout=", &v)) { + init_timeout = atoi(v); + continue; + } + if (skip_prefix(arg, "--max-connections=", &v)) { + max_connections = atoi(v); + if (max_connections < 0) + max_connections = 0; /* unlimited */ + continue; + } + if (!strcmp(arg, "--reuseaddr")) { + reuseaddr = 1; + continue; + } + if (skip_prefix(arg, "--pid-file=", &v)) { + pid_file = v; + continue; + } + if (skip_prefix(arg, "--mayhem=", &v)) { + string_list_append(&mayhem_list, v); + continue; + } + + usage(test_gvfs_protocol_usage); + } + + /* avoid splitting a message in the middle */ + setvbuf(stderr, NULL, _IOFBF, 4096); + + if (listen_port == 0) + listen_port = DEFAULT_GIT_PORT; + + /* + * If no --listen= args are given, the setup_named_sock() + * code will use receive a NULL address and set INADDR_ANY. + * This exposes both internal and external interfaces on the + * port. + * + * Disallow that and default to the internal-use-only loopback + * address. + */ + if (!listen_addr.nr) + string_list_append(&listen_addr, "127.0.0.1"); + + /* + * worker_mode is set in our own child process instances + * (that are bound to a connected socket from a client). + */ + if (worker_mode) { + if (mayhem_list.nr) { + const char *string = getenv("MAYHEM_CHILD"); + if (string && *string) + mayhem_child = atoi(string); + } + + build_gvfs_config_json(&jw_config, &listen_addr, listen_port); + + return worker(); + } + + /* + * `cld_argv` is a bit of a clever hack. The top-level instance + * of test-gvfs-protocol.exe does the normal bind/listen/accept + * stuff. For each incoming socket, the top-level process spawns + * a child instance of test-gvfs-protocol.exe *WITH* the additional + * `--worker` argument. This causes the child to set `worker_mode` + * and immediately call `worker()` using the connected socket (and + * without the usual need for fork() or threads). + * + * The magic here is made possible because `cld_argv` is static + * and handle() (called by service_loop()) knows about it. + */ + strvec_push(&cld_argv, argv[0]); + strvec_push(&cld_argv, "--worker"); + for (i = 1; i < argc; ++i) + strvec_push(&cld_argv, argv[i]); + + /* + * Setup primary instance to listen for connections. + */ + return serve(&listen_addr, listen_port); +} diff --git a/t/t5799-gvfs-helper.sh b/t/t5799-gvfs-helper.sh new file mode 100755 index 00000000000000..3cb1459ea32476 --- /dev/null +++ b/t/t5799-gvfs-helper.sh @@ -0,0 +1,1082 @@ +#!/bin/sh + +test_description='test gvfs-helper and GVFS Protocol' + +. ./test-lib.sh + +# Set the port for t/helper/test-gvfs-protocol.exe from either the +# environment or from the test number of this shell script. +# +test_set_port GIT_TEST_GVFS_PROTOCOL_PORT + +# Setup the following repos: +# +# repo_src: +# A normal, no-magic, fully-populated clone of something. +# No GVFS (aka VFS4G). No Scalar. No partial-clone. +# This will be used by "t/helper/test-gvfs-protocol.exe" +# to serve objects. +# +# repo_t1: +# An empty repo with no contents nor commits. That is, +# everything is missing. For the tests based on this repo, +# we don't care why it is missing objects (or if we could +# actually use it). We are only testing explicit object +# fetching using gvfs-helper.exe in isolation. +# +REPO_SRC="$PWD"/repo_src +REPO_T1="$PWD"/repo_t1 + +# Setup some loopback URLs where test-gvfs-protocol.exe will be +# listening. We will spawn it directly inside the repo_src directory, +# so we don't need any of the directory mapping or configuration +# machinery found in "git-daemon.exe" or "git-http-backend.exe". +# +# This lets us use the "uri-base" part of the URL (prior to the REST +# API "/gvfs/") to control how our mock server responds. For +# example, only the origin (main Git) server supports "/gvfs/config". +# +# For example, this means that if we add a remote containing $ORIGIN_URL, +# it will work with gvfs-helper, but not for fetch (without some mapping +# tricks). +# +HOST_PORT=127.0.0.1:$GIT_TEST_GVFS_PROTOCOL_PORT +ORIGIN_URL=http://$HOST_PORT/servertype/origin +CACHE_URL=http://$HOST_PORT/servertype/cache + +SHARED_CACHE_T1="$PWD"/shared_cache_t1 + +# The pid-file is created by test-gvfs-protocol.exe when it starts. +# The server will shut down if/when we delete it. (This is a little +# easier than killing it by PID.) +# +PID_FILE="$PWD"/pid-file.pid +SERVER_LOG="$PWD"/OUT.server.log + +PATH="$GIT_BUILD_DIR/t/helper/:$PATH" && export PATH + +OIDS_FILE="$PWD"/oid_list.txt +OIDS_CT_FILE="$PWD"/oid_ct_list.txt +OIDS_BLOBS_FILE="$PWD"/oids_blobs_file.txt +OID_ONE_BLOB_FILE="$PWD"/oid_one_blob_file.txt +OID_ONE_COMMIT_FILE="$PWD"/oid_one_commit_file.txt + +# Get a list of available OIDs in repo_src so that we can try to fetch +# them and so that we don't have to hard-code a list of known OIDs. +# This doesn't need to be a complete list -- just enough to drive some +# representative tests. +# +# Optionally require that we find a minimum number of OIDs. +# +get_list_of_oids () { + git -C "$REPO_SRC" rev-list --objects HEAD | sed 's/ .*//' | sort >"$OIDS_FILE" + + if test $# -eq 1 + then + actual_nr=$(wc -l <"$OIDS_FILE") + if test $actual_nr -lt $1 + then + echo "get_list_of_oids: insufficient data. Need $1 OIDs." + return 1 + fi + fi + return 0 +} + +get_list_of_blobs_oids () { + git -C "$REPO_SRC" ls-tree HEAD | grep ' blob ' | awk "{print \$3}" | sort >"$OIDS_BLOBS_FILE" + head -1 <"$OIDS_BLOBS_FILE" >"$OID_ONE_BLOB_FILE" +} + +get_list_of_commit_and_tree_oids () { + git -C "$REPO_SRC" cat-file --batch-check --batch-all-objects | awk "/commit|tree/ {print \$1}" | sort >"$OIDS_CT_FILE" + + if test $# -eq 1 + then + actual_nr=$(wc -l <"$OIDS_CT_FILE") + if test $actual_nr -lt $1 + then + echo "get_list_of_commit_and_tree_oids: insufficient data. Need $1 OIDs." + return 1 + fi + fi + return 0 +} + +get_one_commit_oid () { + git -C "$REPO_SRC" rev-parse HEAD >"$OID_ONE_COMMIT_FILE" + return 0 +} + +test_expect_success 'setup repos' ' + test_create_repo "$REPO_SRC" && + git -C "$REPO_SRC" branch -M main && + # + # test_commit_bulk() does magic to create a packfile containing + # the new commits. + # + test_commit_bulk -C "$REPO_SRC" --filename="batch_a.%s.t" 9 && + git -C "$REPO_SRC" rev-parse refs/heads/main >m1.branch && + test_commit_bulk -C "$REPO_SRC" --filename="batch_b.%s.t" 9 && + git -C "$REPO_SRC" rev-parse refs/heads/main >m2.branch && + # + # test_commit() creates commits, trees, tags, and blobs and leave + # them loose. + # + test_config gc.auto 0 && + # + test_commit -C "$REPO_SRC" file1.txt && + test_commit -C "$REPO_SRC" file2.txt && + test_commit -C "$REPO_SRC" file3.txt && + test_commit -C "$REPO_SRC" file4.txt && + test_commit -C "$REPO_SRC" file5.txt && + test_commit -C "$REPO_SRC" file6.txt && + test_commit -C "$REPO_SRC" file7.txt && + test_commit -C "$REPO_SRC" file8.txt && + test_commit -C "$REPO_SRC" file9.txt && + git -C "$REPO_SRC" rev-parse refs/heads/main >m3.branch && + # + # gvfs-helper.exe writes downloaded objects to a shared-cache directory + # rather than the ODB inside the .git directory. + # + mkdir "$SHARED_CACHE_T1" && + mkdir "$SHARED_CACHE_T1/pack" && + mkdir "$SHARED_CACHE_T1/info" && + # + # setup repo_t1 and point all of the gvfs.* values to repo_src. + # + test_create_repo "$REPO_T1" && + git -C "$REPO_T1" branch -M main && + git -C "$REPO_T1" remote add origin $ORIGIN_URL && + git -C "$REPO_T1" config --local gvfs.cache-server $CACHE_URL && + git -C "$REPO_T1" config --local gvfs.sharedCache "$SHARED_CACHE_T1" && + echo "$SHARED_CACHE_T1" >> "$REPO_T1"/.git/objects/info/alternates && + # + # + # + cat <<-EOF >creds.txt && + username=x + password=y + EOF + cat <<-EOF >creds.sh && + #!/bin/sh + cat "$PWD"/creds.txt + EOF + chmod 755 creds.sh && + git -C "$REPO_T1" config --local credential.helper "!f() { cat \"$PWD\"/creds.txt; }; f" && + # + # Create some test data sets. + # + get_list_of_oids 30 && + get_list_of_commit_and_tree_oids 30 && + get_list_of_blobs_oids && + get_one_commit_oid +' + +stop_gvfs_protocol_server () { + if ! test -f "$PID_FILE" + then + return 0 + fi + # + # The server will shutdown automatically when we delete the pid-file. + # + rm -f "$PID_FILE" + # + # Give it a few seconds to shutdown (mainly to completely release the + # port before the next test start another instance and it attempts to + # bind to it). + # + for k in 0 1 2 3 4 + do + if grep -q "Starting graceful shutdown" "$SERVER_LOG" + then + return 0 + fi + sleep 1 + done + + echo "stop_gvfs_protocol_server: timeout waiting for server shutdown" + return 1 +} + +start_gvfs_protocol_server () { + # + # Launch our server into the background in repo_src. + # + ( + cd "$REPO_SRC" + test-gvfs-protocol --verbose \ + --listen=127.0.0.1 \ + --port=$GIT_TEST_GVFS_PROTOCOL_PORT \ + --reuseaddr \ + --pid-file="$PID_FILE" \ + 2>"$SERVER_LOG" & + ) + # + # Give it a few seconds to get started. + # + for k in 0 1 2 3 4 + do + if test -f "$PID_FILE" + then + return 0 + fi + sleep 1 + done + + echo "start_gvfs_protocol_server: timeout waiting for server startup" + return 1 +} + +start_gvfs_protocol_server_with_mayhem () { + if test $# -lt 1 + then + echo "start_gvfs_protocol_server_with_mayhem: need mayhem args" + return 1 + fi + + mayhem="" + for k in $* + do + mayhem="$mayhem --mayhem=$k" + done + # + # Launch our server into the background in repo_src. + # + ( + cd "$REPO_SRC" + test-gvfs-protocol --verbose \ + --listen=127.0.0.1 \ + --port=$GIT_TEST_GVFS_PROTOCOL_PORT \ + --reuseaddr \ + --pid-file="$PID_FILE" \ + $mayhem \ + 2>"$SERVER_LOG" & + ) + # + # Give it a few seconds to get started. + # + for k in 0 1 2 3 4 + do + if test -f "$PID_FILE" + then + return 0 + fi + sleep 1 + done + + echo "start_gvfs_protocol_server: timeout waiting for server startup" + return 1 +} + +# Verify the number of connections from the client. +# +# If keep-alive is working, a series of successful sequential requests to the +# same server should use the same TCP connection, so a simple multi-get would +# only have one connection. +# +# On the other hand, an auto-retry after a network error (mayhem) will have +# more than one for a single object request. +# +# TODO This may generate false alarm when we get to complicated tests, so +# TODO we might only want to use it for basic tests. +# +verify_connection_count () { + if test $# -eq 1 + then + expected_nr=$1 + else + expected_nr=1 + fi + + actual_nr=$(grep -c "Connection from" "$SERVER_LOG") + + if test $actual_nr -ne $expected_nr + then + echo "verify_keep_live: expected $expected_nr; actual $actual_nr" + return 1 + fi + return 0 +} + +# Verify that the set of requested objects are present in +# the shared-cache and that there is no corruption. We use +# cat-file to hide whether the object is packed or loose in +# the test repo. +# +# Usage: +# +verify_objects_in_shared_cache () { + # + # See if any of the objects are missing from repo_t1. + # + git -C "$REPO_T1" cat-file --batch-check <"$1" >OUT.bc_actual || return 1 + grep -q " missing" OUT.bc_actual && return 1 + # + # See if any of the objects have different sizes or types than repo_src. + # + git -C "$REPO_SRC" cat-file --batch-check <"$1" >OUT.bc_expect || return 1 + test_cmp OUT.bc_expect OUT.bc_actual || return 1 + # + # See if any of the objects are corrupt in repo_t1. This fully + # reconstructs the objects and verifies the hash and therefore + # detects corruption not found by the earlier "batch-check" step. + # + git -C "$REPO_T1" cat-file --batch <"$1" >OUT.b_actual || return 1 + # + # TODO move the shared-cache directory (and/or the + # TODO .git/objects/info/alternates and temporarily unset + # TODO gvfs.sharedCache) and repeat the first "batch-check" + # TODO and make sure that they are ALL missing. + # + return 0 +} + +verify_received_packfile_count () { + if test $# -eq 1 + then + expected_nr=$1 + else + expected_nr=1 + fi + + actual_nr=$(grep -c "packfile " OUT.output && + + # Stop the server to prevent the verification steps from faulting-in + # any missing objects. + # + stop_gvfs_protocol_server && + + # gvfs-helper prints a "loose " message for each received object. + # Verify that gvfs-helper received each of the requested objects. + # + sed "s/loose //" OUT.actual && + test_cmp "$OIDS_FILE" OUT.actual && + + verify_objects_in_shared_cache "$OIDS_FILE" && + verify_connection_count 1 +' + +test_expect_success 'basic: GET cache-server multi-get trust-mode' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server && + + # Connect to the cache-server and make a series of + # single-object GET requests. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=trust \ + --remote=origin \ + get \ + <"$OIDS_FILE" >OUT.output && + + # Stop the server to prevent the verification steps from faulting-in + # any missing objects. + # + stop_gvfs_protocol_server && + + # gvfs-helper prints a "loose " message for each received object. + # Verify that gvfs-helper received each of the requested objects. + # + sed "s/loose //" OUT.actual && + test_cmp "$OIDS_FILE" OUT.actual && + + verify_objects_in_shared_cache "$OIDS_FILE" && + verify_connection_count 1 +' + +test_expect_success 'basic: GET gvfs/config' ' +# test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server && + + # Connect to the cache-server and make a series of + # single-object GET requests. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + config \ + <"$OIDS_FILE" >OUT.output && + + # Stop the server to prevent the verification steps from faulting-in + # any missing objects. + # + stop_gvfs_protocol_server && + + # The cache-server URL should be listed in the gvfs/config output. + # We confirm this before assuming error-mode will work. + # + grep -q "$CACHE_URL" OUT.output +' + +test_expect_success 'basic: GET cache-server multi-get error-mode' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server && + + # Connect to the cache-server and make a series of + # single-object GET requests. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=error \ + --remote=origin \ + get \ + <"$OIDS_FILE" >OUT.output && + + # Stop the server to prevent the verification steps from faulting-in + # any missing objects. + # + stop_gvfs_protocol_server && + + # gvfs-helper prints a "loose " message for each received object. + # Verify that gvfs-helper received each of the requested objects. + # + sed "s/loose //" OUT.actual && + test_cmp "$OIDS_FILE" OUT.actual && + + verify_objects_in_shared_cache "$OIDS_FILE" && + + # Technically, we have 1 connection to the origin server + # for the "gvfs/config" request and 1 to cache server to + # get the objects, but because we are using the same port + # for both, keep-alive will handle it. So 1 connection. + # + verify_connection_count 1 +' + +# The GVFS Protocol POST verb behaves like GET for non-commit objects +# (in that it just returns the requested object), but for commit +# objects POST *also* returns all trees referenced by the commit. +# +# The goal of this test is to confirm that gvfs-helper can send us +# a packfile at all. So, this test only passes blobs to not blur +# the issue. +# +test_expect_success 'basic: POST origin blobs' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server && + + # Connect to the origin server (w/o auth) and make + # multi-object POST request. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + post \ + <"$OIDS_BLOBS_FILE" >OUT.output && + + # Stop the server to prevent the verification steps from faulting-in + # any missing objects. + # + stop_gvfs_protocol_server && + + # gvfs-helper prints a "packfile " message for each received + # packfile. We verify the number of expected packfile(s) and we + # individually verify that each requested object is present in the + # shared cache (and index-pack already verified the integrity of + # the packfile), so we do not bother to run "git verify-pack -v" + # and do an exact matchup here. + # + verify_received_packfile_count 1 && + + verify_objects_in_shared_cache "$OIDS_BLOBS_FILE" && + verify_connection_count 1 +' + +# Request a single blob via POST. Per the GVFS Protocol, the server +# should implicitly send a loose object for it. Confirm that. +# +test_expect_success 'basic: POST-request a single blob' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server && + + # Connect to the origin server (w/o auth) and request a single + # blob via POST. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + post \ + <"$OID_ONE_BLOB_FILE" >OUT.output && + + # Stop the server to prevent the verification steps from faulting-in + # any missing objects. + # + stop_gvfs_protocol_server && + + # gvfs-helper prints a "loose " message for each received + # loose object. + # + sed "s/loose //" OUT.actual && + test_cmp "$OID_ONE_BLOB_FILE" OUT.actual && + + verify_connection_count 1 +' + +# Request a single commit via POST. Per the GVFS Protocol, the server +# should implicitly send us a packfile containing the commit and the +# trees it references. Confirm that properly handled the receipt of +# the packfile. (Here, we are testing that asking for a single object +# yields a packfile rather than a loose object.) +# +# We DO NOT verify that the packfile contains commits/trees and no blobs +# because our test helper doesn't implement the filtering. +# +test_expect_success 'basic: POST-request a single commit' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server && + + # Connect to the origin server (w/o auth) and request a single + # commit via POST. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + post \ + <"$OID_ONE_COMMIT_FILE" >OUT.output && + + # Stop the server to prevent the verification steps from faulting-in + # any missing objects. + # + stop_gvfs_protocol_server && + + # gvfs-helper prints a "packfile " message for each received + # packfile. + # + verify_received_packfile_count 1 && + + verify_connection_count 1 +' + +################################################################# +# Tests to see how gvfs-helper responds to network problems. +# +# We use small --max-retry value because of exponential backoff. +# +# These mayhem tests are interested in how gvfs-helper gracefully +# retries when there is a network error. And verify that it gives +# up gracefully too. +################################################################# + +mayhem_observed__close__connections () { + if $(grep -q "transient" OUT.stderr) + then + # Transient errors should retry. + # 1 for initial request + 2 retries. + # + verify_connection_count 3 + return $? + elif $(grep -q "hard_fail" OUT.stderr) + then + # Hard errors should not retry. + # + verify_connection_count 1 + return $? + else + error "mayhem_observed__close: unexpected mayhem-induced error type" + return 1 + fi +} + +mayhem_observed__close () { + # Expected error codes for mayhem events: + # close_read + # close_write + # close_no_write + # + # CURLE_PARTIAL_FILE 18 + # CURLE_GOT_NOTHING 52 + # CURLE_SEND_ERROR 55 + # CURLE_RECV_ERROR 56 + # + # I don't want to pin it down to an exact error for each because there may + # be races here because of network buffering. + # + # Also, It is unclear which of these network errors should be transient + # (with retry) and which should be a hard-fail (without retry). I'm only + # going to verify the connection counts based upon what type of error + # gvfs-helper claimed it to be. + # + if $(grep -q "error: get: (curl:18)" OUT.stderr) || + $(grep -q "error: get: (curl:52)" OUT.stderr) || + $(grep -q "error: get: (curl:55)" OUT.stderr) || + $(grep -q "error: get: (curl:56)" OUT.stderr) + then + mayhem_observed__close__connections + return $? + else + echo "mayhem_observed__close: unexpected mayhem-induced error" + return 1 + fi +} + +test_expect_success 'curl-error: no server' ' + test_when_finished "per_test_cleanup" && + + # Try to do a multi-get without a server. + # + # Use small max-retry value because of exponential backoff, + # but yet do exercise retry some. + # + test_must_fail \ + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + get \ + --max-retries=2 \ + <"$OIDS_FILE" >OUT.output 2>OUT.stderr && + + # CURLE_COULDNT_CONNECT 7 + grep -q "error: get: (curl:7)" OUT.stderr +' + +test_expect_success 'curl-error: close socket while reading request' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server_with_mayhem close_read && + + test_must_fail \ + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + get \ + --max-retries=2 \ + <"$OIDS_FILE" >OUT.output 2>OUT.stderr && + + stop_gvfs_protocol_server && + + mayhem_observed__close +' + +test_expect_success 'curl-error: close socket while writing response' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server_with_mayhem close_write && + + test_must_fail \ + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + get \ + --max-retries=2 \ + <"$OIDS_FILE" >OUT.output 2>OUT.stderr && + + stop_gvfs_protocol_server && + + mayhem_observed__close +' + +test_expect_success 'curl-error: close socket before writing response' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server_with_mayhem close_no_write && + + test_must_fail \ + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + get \ + --max-retries=2 \ + <"$OIDS_FILE" >OUT.output 2>OUT.stderr && + + stop_gvfs_protocol_server && + + mayhem_observed__close +' + +################################################################# +# Tests to confirm that gvfs-helper does silently recover when +# a retry succeeds. +# +# Note: I'm only to do this for 1 of the close_* mayhem events. +################################################################# + +test_expect_success 'successful retry after curl-error: origin get' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server_with_mayhem close_read_1 && + + # Connect to the origin server (w/o auth). + # Make a single-object GET request. + # Confirm that it succeeds without error. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + get \ + --max-retries=2 \ + <"$OID_ONE_BLOB_FILE" >OUT.output && + + stop_gvfs_protocol_server && + + # gvfs-helper prints a "loose " message for each received object. + # Verify that gvfs-helper received each of the requested objects. + # + sed "s/loose //" OUT.actual && + test_cmp "$OID_ONE_BLOB_FILE" OUT.actual && + + verify_objects_in_shared_cache "$OID_ONE_BLOB_FILE" && + verify_connection_count 2 +' + +################################################################# +# Tests to see how gvfs-helper responds to HTTP errors/problems. +# +################################################################# + +# See "enum gh__error_code" in gvfs-helper.c +# +GH__ERROR_CODE__HTTP_404=4 +GH__ERROR_CODE__HTTP_429=5 +GH__ERROR_CODE__HTTP_503=6 + +test_expect_success 'http-error: 503 Service Unavailable (with retry)' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server_with_mayhem http_503 && + + test_expect_code $GH__ERROR_CODE__HTTP_503 \ + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + get \ + --max-retries=2 \ + <"$OIDS_FILE" >OUT.output 2>OUT.stderr && + + stop_gvfs_protocol_server && + + grep -q "error: get: (http:503)" OUT.stderr && + verify_connection_count 3 +' + +test_expect_success 'http-error: 429 Service Unavailable (with retry)' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server_with_mayhem http_429 && + + test_expect_code $GH__ERROR_CODE__HTTP_429 \ + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + get \ + --max-retries=2 \ + <"$OIDS_FILE" >OUT.output 2>OUT.stderr && + + stop_gvfs_protocol_server && + + grep -q "error: get: (http:429)" OUT.stderr && + verify_connection_count 3 +' + +test_expect_success 'http-error: 404 Not Found (no retry)' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server_with_mayhem http_404 && + + test_expect_code $GH__ERROR_CODE__HTTP_404 \ + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + get \ + --max-retries=2 \ + <"$OID_ONE_BLOB_FILE" >OUT.output 2>OUT.stderr && + + stop_gvfs_protocol_server && + + grep -q "error: get: (http:404)" OUT.stderr && + verify_connection_count 1 +' + +################################################################# +# Tests to confirm that gvfs-helper does silently recover when an +# HTTP request succeeds after a failure. +# +################################################################# + +test_expect_success 'successful retry after http-error: origin get' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server_with_mayhem http_429_1 && + + # Connect to the origin server (w/o auth). + # Make a single-object GET request. + # Confirm that it succeeds without error. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + get \ + --max-retries=2 \ + <"$OID_ONE_BLOB_FILE" >OUT.output && + + stop_gvfs_protocol_server && + + # gvfs-helper prints a "loose " message for each received object. + # Verify that gvfs-helper received each of the requested objects. + # + sed "s/loose //" OUT.actual && + test_cmp "$OID_ONE_BLOB_FILE" OUT.actual && + + verify_objects_in_shared_cache "$OID_ONE_BLOB_FILE" && + verify_connection_count 2 +' + +################################################################# +# Test HTTP Auth +# +################################################################# + +test_expect_success 'HTTP GET Auth on Origin Server' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server_with_mayhem http_401 && + + # Force server to require auth. + # Connect to the origin server without auth. + # Make a single-object GET request. + # Confirm that it gets a 401 and then retries with auth. + # + GIT_CONFIG_NOSYSTEM=1 \ + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + get \ + --max-retries=2 \ + <"$OID_ONE_BLOB_FILE" >OUT.output && + + stop_gvfs_protocol_server && + + # gvfs-helper prints a "loose " message for each received object. + # Verify that gvfs-helper received each of the requested objects. + # + sed "s/loose //" OUT.actual && + test_cmp "$OID_ONE_BLOB_FILE" OUT.actual && + + verify_objects_in_shared_cache "$OID_ONE_BLOB_FILE" && + verify_connection_count 2 +' + +test_expect_success 'HTTP POST Auth on Origin Server' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server_with_mayhem http_401 && + + # Connect to the origin server and make multi-object POST + # request and verify that it automatically handles the 401. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + post \ + <"$OIDS_BLOBS_FILE" >OUT.output && + + # Stop the server to prevent the verification steps from faulting-in + # any missing objects. + # + stop_gvfs_protocol_server && + + # gvfs-helper prints a "packfile " message for each received + # packfile. We verify the number of expected packfile(s) and we + # individually verify that each requested object is present in the + # shared cache (and index-pack already verified the integrity of + # the packfile), so we do not bother to run "git verify-pack -v" + # and do an exact matchup here. + # + verify_received_packfile_count 1 && + + verify_objects_in_shared_cache "$OIDS_BLOBS_FILE" && + verify_connection_count 2 +' + +test_expect_success 'HTTP GET Auth on Cache Server' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server_with_mayhem http_401 && + + # Try auth to cache-server. Note that gvfs-helper *ALWAYS* sends + # creds to cache-servers, so we will never see the "400 Bad Request" + # response. And we are using "trust" mode, so we only expect 1 + # connection to the server. + # + GIT_CONFIG_NOSYSTEM=1 \ + git -C "$REPO_T1" gvfs-helper \ + --cache-server=trust \ + --remote=origin \ + get \ + --max-retries=2 \ + <"$OID_ONE_BLOB_FILE" >OUT.output && + + stop_gvfs_protocol_server && + + # gvfs-helper prints a "loose " message for each received object. + # Verify that gvfs-helper received each of the requested objects. + # + sed "s/loose //" OUT.actual && + test_cmp "$OID_ONE_BLOB_FILE" OUT.actual && + + verify_objects_in_shared_cache "$OID_ONE_BLOB_FILE" && + verify_connection_count 1 +' + +################################################################# +# Integration tests with Git.exe +# +# Now that we have confirmed that gvfs-helper works in isolation, +# run a series of tests using random Git commands that fault-in +# objects as needed. +# +# At this point, I'm going to stop verifying the shape of the ODB +# (loose vs packfiles) and the number of connections required to +# get them. The tests from here on are to verify that objects are +# magically fetched whenever required. +################################################################# + +test_expect_success 'integration: explicit commit/trees, implicit blobs: log file' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server && + + # We have a very empty repo. Seed it with all of the commits + # and trees. The purpose of this test is to demand-load the + # needed blobs only, so we prefetch the commits and trees. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + get \ + <"$OIDS_CT_FILE" >OUT.output && + + # Confirm that we do not have the blobs locally. + # With gvfs-helper turned off, we should fail. + # + test_must_fail \ + git -C "$REPO_T1" -c core.useGVFSHelper=false \ + log $(cat m3.brach) -- file9.txt \ + >OUT.output 2>OUT.stderr && + + # Turn on gvfs-helper and retry. This should implicitly fetch + # any needed blobs. + # + git -C "$REPO_T1" -c core.useGVFSHelper=true \ + log $(cat m3.branch) -- file9.txt \ + >OUT.output 2>OUT.stderr && + + # Verify that gvfs-helper wrote the fetched the blobs to the + # local ODB, such that a second attempt with gvfs-helper + # turned off should succeed. + # + git -C "$REPO_T1" -c core.useGVFSHelper=false \ + log $(cat m3.branch) -- file9.txt \ + >OUT.output 2>OUT.stderr +' + +test_expect_success 'integration: explicit commit/trees, implicit blobs: diff 2 commits' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server && + + # We have a very empty repo. Seed it with all of the commits + # and trees. The purpose of this test is to demand-load the + # needed blobs only, so we prefetch the commits and trees. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + get \ + <"$OIDS_CT_FILE" >OUT.output && + + # Confirm that we do not have the blobs locally. + # With gvfs-helper turned off, we should fail. + # + test_must_fail \ + git -C "$REPO_T1" -c core.useGVFSHelper=false \ + diff $(cat m1.branch)..$(cat m3.branch) \ + >OUT.output 2>OUT.stderr && + + # Turn on gvfs-helper and retry. This should implicitly fetch + # any needed blobs. + # + git -C "$REPO_T1" -c core.useGVFSHelper=true \ + diff $(cat m1.branch)..$(cat m3.branch) \ + >OUT.output 2>OUT.stderr && + + # Verify that gvfs-helper wrote the fetched the blobs to the + # local ODB, such that a second attempt with gvfs-helper + # turned off should succeed. + # + git -C "$REPO_T1" -c core.useGVFSHelper=false \ + diff $(cat m1.branch)..$(cat m3.branch) \ + >OUT.output 2>OUT.stderr +' + +test_expect_success 'integration: fully implicit: diff 2 commits' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server && + + # Implicitly demand-load everything without any pre-seeding. + # + git -C "$REPO_T1" -c core.useGVFSHelper=true \ + diff $(cat m1.branch)..$(cat m3.branch) \ + >OUT.output 2>OUT.stderr +' + +################################################################# +# Ensure that the SHA of the blob we received matches the SHA of +# the blob we requested. +################################################################# + +# Request a loose blob from the server. Verify that we received +# content matches the requested SHA. +# +test_expect_success 'catch corrupted loose object' ' +# test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server_with_mayhem corrupt_loose && + + test_must_fail \ + git -C "$REPO_T1" gvfs-helper \ + --cache-server=trust \ + --remote=origin \ + get \ + <"$OID_ONE_BLOB_FILE" >OUT.output 2>OUT.stderr && + + stop_gvfs_protocol_server && + + # Verify corruption detected. + # Verify valid blob not included in response to client. + + grep "hash failed for received loose object" OUT.stderr && + + # Verify that we did not write the corrupted blob to the ODB. + + ! verify_objects_in_shared_cache "$OID_ONE_BLOB_FILE" && + git -C "$REPO_T1" fsck +' + +test_done