diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt index 7014cc204cc7c7..d0d64e4edab19e 100644 --- a/Documentation/config/core.txt +++ b/Documentation/config/core.txt @@ -791,6 +791,10 @@ core.gvfs:: is first accessed and brought down to the client. Git.exe can't currently tell the first access vs subsequent accesses so this flag just blocks them from occurring at all. + GVFS_PREFETCH_DURING_FETCH:: + Bit value 128 + While performing a `git fetch` command, use the gvfs-helper to + perform a "prefetch" of commits and trees. -- core.useGvfsHelper:: diff --git a/Makefile b/Makefile index 14592f7f7de5b7..b4cabd43eae276 100644 --- a/Makefile +++ b/Makefile @@ -2863,7 +2863,7 @@ gettext.sp gettext.s gettext.o: GIT-PREFIX gettext.sp gettext.s gettext.o: EXTRA_CPPFLAGS = \ -DGIT_LOCALE_PATH='"$(localedir_relative_SQ)"' -http-push.sp http.sp http-walker.sp remote-curl.sp imap-send.sp: SP_EXTRA_FLAGS += \ +http-push.sp http.sp http-walker.sp remote-curl.sp imap-send.sp gvfs-helper.sp: SP_EXTRA_FLAGS += \ -DCURL_DISABLE_TYPECHECK pack-revindex.sp: SP_EXTRA_FLAGS += -Wno-memcpy-max-count diff --git a/builtin/fetch.c b/builtin/fetch.c index 693f02b95802bc..c99f47dac4806c 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -18,6 +18,9 @@ #include "string-list.h" #include "remote.h" #include "transport.h" +#include "gvfs.h" +#include "gvfs-helper-client.h" +#include "packfile.h" #include "run-command.h" #include "parse-options.h" #include "sigchain.h" @@ -1160,6 +1163,13 @@ static int store_updated_refs(struct display_state *display_state, opt.exclude_hidden_refs_section = "fetch"; rm = ref_map; + + /* + * Before checking connectivity, be really sure we have the + * latest pack-files loaded into memory. + */ + reprepare_packed_git(the_repository); + if (check_connected(iterate_ref_map, &rm, &opt)) { rc = error(_("%s did not send all necessary objects\n"), display_state->url); @@ -2402,6 +2412,9 @@ int cmd_fetch(int argc, const char **argv, const char *prefix) } string_list_remove_duplicates(&list, 0); + if (core_gvfs & GVFS_PREFETCH_DURING_FETCH) + gh_client__prefetch(0, NULL); + if (negotiate_only) { struct oidset acked_commits = OIDSET_INIT; struct oidset_iter iter; diff --git a/gvfs-helper-client.c b/gvfs-helper-client.c index a5dc279406cf43..6f38fe1c4353d0 100644 --- a/gvfs-helper-client.c +++ b/gvfs-helper-client.c @@ -28,13 +28,14 @@ static struct hashmap gh_server__subprocess_map; static struct object_directory *gh_client__chosen_odb; /* - * The "objects" capability has 2 verbs: "get" and "post". + * The "objects" capability has verbs: "get" and "post" and "prefetch". */ #define CAP_OBJECTS (1u<<1) #define CAP_OBJECTS_NAME "objects" #define CAP_OBJECTS__VERB_GET1_NAME "get" #define CAP_OBJECTS__VERB_POST_NAME "post" +#define CAP_OBJECTS__VERB_PREFETCH_NAME "prefetch" static int gh_client__start_fn(struct subprocess_entry *subprocess) { @@ -133,6 +134,44 @@ static int gh_client__send__objects_get(struct child_process *process, return 0; } +/* + * Send a request to gvfs-helper to prefetch packfiles from either the + * cache-server or the main Git server using "/gvfs/prefetch". + * + * objects.prefetch LF + * [ LF] + * + */ +static int gh_client__send__objects_prefetch(struct child_process *process, + timestamp_t seconds_since_epoch) +{ + int err; + + /* + * We assume that all of the packet_ routines call error() + * so that we don't have to. + */ + + err = packet_write_fmt_gently( + process->in, + (CAP_OBJECTS_NAME "." CAP_OBJECTS__VERB_PREFETCH_NAME "\n")); + if (err) + return err; + + if (seconds_since_epoch) { + err = packet_write_fmt_gently(process->in, "%" PRItime "\n", + seconds_since_epoch); + if (err) + return err; + } + + err = packet_flush_gently(process->in); + if (err) + return err; + + return 0; +} + /* * Update the loose object cache to include the newly created * object. @@ -180,7 +219,7 @@ static void gh_client__update_packed_git(const char *line) } /* - * Both CAP_OBJECTS verbs return the same format response: + * CAP_OBJECTS verbs return the same format response: * * * * @@ -220,6 +259,8 @@ static int gh_client__objects__receive_response( const char *v1; char *line; int len; + int nr_loose = 0; + int nr_packfile = 0; int err = 0; while (1) { @@ -238,13 +279,13 @@ static int gh_client__objects__receive_response( else if (starts_with(line, "packfile")) { gh_client__update_packed_git(line); ghc |= GHC__CREATED__PACKFILE; - *p_nr_packfile += 1; + nr_packfile++; } else if (starts_with(line, "loose")) { gh_client__update_loose_cache(line); ghc |= GHC__CREATED__LOOSE; - *p_nr_loose += 1; + nr_loose++; } else if (starts_with(line, "ok")) @@ -258,6 +299,8 @@ static int gh_client__objects__receive_response( } *p_ghc = ghc; + *p_nr_loose = nr_loose; + *p_nr_packfile = nr_packfile; return err; } @@ -314,7 +357,7 @@ static struct gh_server__process *gh_client__find_long_running_process( /* * Find an existing long-running process with the above command * line -or- create a new long-running process for this and - * subsequent 'get' requests. + * subsequent requests. */ if (!gh_server__subprocess_map_initialized) { gh_server__subprocess_map_initialized = 1; @@ -351,10 +394,14 @@ static struct gh_server__process *gh_client__find_long_running_process( void gh_client__queue_oid(const struct object_id *oid) { - // TODO consider removing this trace2. it is useful for interactive - // TODO debugging, but may generate way too much noise for a data - // TODO event. - trace2_printf("gh_client__queue_oid: %s", oid_to_hex(oid)); + /* + * Keep this trace as a printf only, so that it goes to the + * perf log, but not the event log. It is useful for interactive + * debugging, but generates way too much (unuseful) noise for the + * database. + */ + if (trace2_is_enabled()) + trace2_printf("gh_client__queue_oid: %s", oid_to_hex(oid)); if (!oidset_insert(&gh_client__oidset_queued, oid)) gh_client__oidset_count++; @@ -435,10 +482,14 @@ int gh_client__get_immediate(const struct object_id *oid, int nr_packfile = 0; int err = 0; - // TODO consider removing this trace2. it is useful for interactive - // TODO debugging, but may generate way too much noise for a data - // TODO event. - trace2_printf("gh_client__get_immediate: %s", oid_to_hex(oid)); + /* + * Keep this trace as a printf only, so that it goes to the + * perf log, but not the event log. It is useful for interactive + * debugging, but generates way too much (unuseful) noise for the + * database. + */ + if (trace2_is_enabled()) + trace2_printf("gh_client__get_immediate: %s", oid_to_hex(oid)); entry = gh_client__find_long_running_process(CAP_OBJECTS); if (!entry) @@ -467,3 +518,55 @@ int gh_client__get_immediate(const struct object_id *oid, return err; } + +/* + * Ask gvfs-helper to prefetch commits-and-trees packfiles since a + * given timestamp. + * + * If seconds_since_epoch is zero, gvfs-helper will scan the ODB for + * the last received prefetch and ask for ones newer than that. + */ +int gh_client__prefetch(timestamp_t seconds_since_epoch, + int *nr_packfiles_received) +{ + struct gh_server__process *entry; + struct child_process *process; + enum gh_client__created ghc; + int nr_loose = 0; + int nr_packfile = 0; + int err = 0; + + entry = gh_client__find_long_running_process(CAP_OBJECTS); + if (!entry) + return -1; + + trace2_region_enter("gh-client", "objects/prefetch", the_repository); + trace2_data_intmax("gh-client", the_repository, "prefetch/since", + seconds_since_epoch); + + process = &entry->subprocess.process; + + sigchain_push(SIGPIPE, SIG_IGN); + + err = gh_client__send__objects_prefetch(process, seconds_since_epoch); + if (!err) + err = gh_client__objects__receive_response( + process, &ghc, &nr_loose, &nr_packfile); + + sigchain_pop(SIGPIPE); + + if (err) { + subprocess_stop(&gh_server__subprocess_map, + (struct subprocess_entry *)entry); + FREE_AND_NULL(entry); + } + + trace2_data_intmax("gh-client", the_repository, + "prefetch/packfile_count", nr_packfile); + trace2_region_leave("gh-client", "objects/prefetch", the_repository); + + if (nr_packfiles_received) + *nr_packfiles_received = nr_packfile; + + return err; +} diff --git a/gvfs-helper-client.h b/gvfs-helper-client.h index c1e38fad75f841..7692534ecda54c 100644 --- a/gvfs-helper-client.h +++ b/gvfs-helper-client.h @@ -66,4 +66,22 @@ void gh_client__queue_oid_array(const struct object_id *oids, int oid_nr); */ int gh_client__drain_queue(enum gh_client__created *p_ghc); +/* + * Ask `gvfs-helper server` to fetch any "prefetch packs" + * available on the server more recent than the requested time. + * + * If seconds_since_epoch is zero, gvfs-helper will scan the ODB for + * the last received prefetch and ask for ones newer than that. + * + * A long-running background process is used to subsequent requests + * (either prefetch or regular immediate/queued requests) more efficient. + * + * One or more packfiles will be created in the shared-cache ODB. + * + * Returns 0 on success, -1 on error. Optionally also returns the + * number of prefetch packs received. + */ +int gh_client__prefetch(timestamp_t seconds_since_epoch, + int *nr_packfiles_received); + #endif /* GVFS_HELPER_CLIENT_H */ diff --git a/gvfs-helper.c b/gvfs-helper.c index d462ef47926700..04c9c460c6ce36 100644 --- a/gvfs-helper.c +++ b/gvfs-helper.c @@ -22,7 +22,7 @@ // // error := verify cache-server and abort if not well-known. // -// trust := do not verify cache-server. just use it. +// trust := do not verify cache-server. just use it, if set. // // disable := disable the cache-server and always use the main // Git server. @@ -87,6 +87,24 @@ // Number of retries after transient network errors. // Set to zero to disable such retries. // +// prefetch +// +// Use "/gvfs/prefetch" REST API to fetch 1 or more commits-and-trees +// prefetch packs from the server. +// +// : +// +// --since= // defaults to "0" +// +// Time in seconds since the epoch. If omitted or +// zero, the timestamp from the newest prefetch +// packfile found in the shared-cache ODB is used. +// (This is based upon the packfile name, not the +// mtime.) +// +// The GVFS Protocol defines this value as a way to +// request cached packfiles NEWER THAN this timestamp. +// // server // // Interactive/sub-process mode. Listen for a series of commands @@ -116,20 +134,36 @@ // // Each object will be created as a loose object in the ODB. // +// Create 1 or more loose objects in the shared-cache ODB. +// (The pathname of the selected ODB is reported at the +// beginning of the response; this should match the pathname +// given on the command line). +// +// git> objects.get +// git> +// git> +// git> ... +// git> +// git> 0000 +// +// git< odb +// git< loose +// git< loose +// git< ... +// git< loose +// git< ok | partial | error +// git< 0000 +// // Interactive verb: objects.post // // Fetch 1 or more objects, in bulk, using one or more // "/gvfs/objects" POST requests. // -// For both verbs, if a cache-server is configured, try it first. -// Optionally fallback to the main Git server. -// // Create 1 or more loose objects and/or packfiles in the -// shared-cache ODB. (The pathname of the selected ODB is -// reported at the beginning of the response; this should -// match the pathname given on the command line). +// shared-cache ODB. A POST is allowed to respond with +// either loose or packed objects. // -// git> objects.get | objects.post +// git> objects.post // git> // git> // git> ... @@ -139,11 +173,31 @@ // git< odb // git< loose | packfile // git< loose | packfile -// gid< ... +// git< ... // git< loose | packfile // git< ok | partial | error // git< 0000 // +// Interactive verb: object.prefetch +// +// Fetch 1 or more prefetch packs using a "/gvfs/prefetch" +// request. +// +// git> objects.prefetch +// git> // optional +// git> 0000 +// +// git< odb +// git< packfile +// git< packfile +// git< ... +// git< packfile +// git< ok | error +// git< 0000 +// +// If a cache-server is configured, try it first. +// Optionally fallback to the main Git server. +// // [1] Documentation/technical/protocol-common.txt // [2] Documentation/technical/long-running-process-protocol.txt // [3] See GIT_TRACE_PACKET @@ -185,11 +239,17 @@ #include "abspath.h" #include "progress.h" #include "trace2.h" +#include "wrapper.h" +#include "packfile.h" +#include "date.h" + +#define TR2_CAT "gvfs-helper" static const char * const main_usage[] = { N_("git gvfs-helper [] config []"), N_("git gvfs-helper [] get []"), N_("git gvfs-helper [] post []"), + N_("git gvfs-helper [] prefetch []"), N_("git gvfs-helper [] server []"), NULL }; @@ -204,6 +264,11 @@ static const char *const objects_post_usage[] = { NULL }; +static const char *const prefetch_usage[] = { + N_("git gvfs-helper [] prefetch []"), + NULL +}; + static const char *const server_usage[] = { N_("git gvfs-helper [] server []"), NULL @@ -248,6 +313,7 @@ enum gh__error_code { GH__ERROR_CODE__COULD_NOT_INSTALL_PACKFILE = 11, GH__ERROR_CODE__SUBPROCESS_SYNTAX = 12, GH__ERROR_CODE__INDEX_PACK_FAILED = 13, + GH__ERROR_CODE__COULD_NOT_INSTALL_PREFETCH = 14, }; enum gh__cache_server_mode { @@ -312,6 +378,8 @@ static const char *gh__server_type_label[GH__SERVER_TYPE__NR] = { }; enum gh__objects_mode { + GH__OBJECTS_MODE__NONE = 0, + /* * Bulk fetch objects. * @@ -331,6 +399,12 @@ enum gh__objects_mode { * object treatment). */ GH__OBJECTS_MODE__GET, + + /* + * Fetch one or more pre-computed "prefetch packs" containing + * commits and trees. + */ + GH__OBJECTS_MODE__PREFETCH, }; struct gh__azure_throttle @@ -405,6 +479,7 @@ struct gh__request_params { int b_write_to_file; /* write to file=1 or strbuf=0 */ int b_permit_cache_server_if_defined; + enum gh__objects_mode objects_mode; enum gh__server_type server_type; int k_attempt; /* robust retry attempt */ @@ -419,15 +494,8 @@ struct gh__request_params { struct strbuf *buffer; /* for response content when strbuf */ struct strbuf tr2_label; /* for trace2 regions */ - struct strbuf loose_path; struct object_id loose_oid; - struct strbuf temp_path_pack; - struct strbuf temp_path_idx; - struct strbuf final_path_pack; - struct strbuf final_path_idx; - struct strbuf final_packfile_filename; - /* * Note that I am putting all of the progress-related instance data * inside the request-params in the hope that we can eventually @@ -467,13 +535,7 @@ struct gh__request_params { .tempfile = NULL, \ .buffer = NULL, \ .tr2_label = STRBUF_INIT, \ - .loose_path = STRBUF_INIT, \ .loose_oid = {{0}}, \ - .temp_path_pack = STRBUF_INIT, \ - .temp_path_idx = STRBUF_INIT, \ - .final_path_pack = STRBUF_INIT, \ - .final_path_idx = STRBUF_INIT, \ - .final_packfile_filename = STRBUF_INIT, \ .progress_state = GH__PROGRESS_STATE__START, \ .progress_base_phase2_msg = STRBUF_INIT, \ .progress_base_phase3_msg = STRBUF_INIT, \ @@ -498,12 +560,6 @@ static void gh__request_params__release(struct gh__request_params *params) params->buffer = NULL; /* we do not own this */ strbuf_release(¶ms->tr2_label); - strbuf_release(¶ms->loose_path); - strbuf_release(¶ms->temp_path_pack); - strbuf_release(¶ms->temp_path_idx); - strbuf_release(¶ms->final_path_pack); - strbuf_release(¶ms->final_path_idx); - strbuf_release(¶ms->final_packfile_filename); strbuf_release(¶ms->progress_base_phase2_msg); strbuf_release(¶ms->progress_base_phase3_msg); @@ -594,9 +650,7 @@ static void gh__response_status__zero(struct gh__response_status *s) s->azure = NULL; } -static void install_packfile(struct gh__request_params *params, - struct gh__response_status *status); -static void install_loose(struct gh__request_params *params, +static void install_result(struct gh__request_params *params, struct gh__response_status *status); /* @@ -633,7 +687,7 @@ static void log_e2eid(struct gh__request_params *params, strbuf_addstr(&key, "e2eid"); strbuf_addstr(&key, gh__server_type_label[params->server_type]); - trace2_data_string("gvfs-helper", NULL, key.buf, + trace2_data_string(TR2_CAT, NULL, key.buf, params->e2eid.buf); strbuf_release(&key); @@ -733,7 +787,7 @@ static void compute_retry_mode_from_http_response( status->retry = GH__RETRY_MODE__HTTP_429; status->ec = GH__ERROR_CODE__HTTP_429; - trace2_data_string("gvfs-helper", NULL, "error/http", + trace2_data_string(TR2_CAT, NULL, "error/http", status->error_message.buf); return; @@ -746,7 +800,7 @@ static void compute_retry_mode_from_http_response( status->retry = GH__RETRY_MODE__HTTP_503; status->ec = GH__ERROR_CODE__HTTP_503; - trace2_data_string("gvfs-helper", NULL, "error/http", + trace2_data_string(TR2_CAT, NULL, "error/http", status->error_message.buf); return; @@ -760,7 +814,7 @@ static void compute_retry_mode_from_http_response( status->retry = GH__RETRY_MODE__HARD_FAIL; status->ec = GH__ERROR_CODE__HTTP_OTHER; - trace2_data_string("gvfs-helper", NULL, "error/http", + trace2_data_string(TR2_CAT, NULL, "error/http", status->error_message.buf); return; } @@ -894,7 +948,7 @@ static void compute_retry_mode_from_curl_error( status->retry = GH__RETRY_MODE__HARD_FAIL; status->ec = GH__ERROR_CODE__CURL_ERROR; - trace2_data_string("gvfs-helper", NULL, "error/curl", + trace2_data_string(TR2_CAT, NULL, "error/curl", status->error_message.buf); return; @@ -904,7 +958,7 @@ static void compute_retry_mode_from_curl_error( status->retry = GH__RETRY_MODE__TRANSIENT; status->ec = GH__ERROR_CODE__CURL_ERROR; - trace2_data_string("gvfs-helper", NULL, "error/curl", + trace2_data_string(TR2_CAT, NULL, "error/curl", status->error_message.buf); return; } @@ -1098,7 +1152,7 @@ static void gh__run_one_slot(struct active_request_slot *slot, params->progress_state = GH__PROGRESS_STATE__START; strbuf_setlen(¶ms->e2eid, 0); - trace2_region_enter("gvfs-helper", key.buf, NULL); + trace2_region_enter(TR2_CAT, key.buf, NULL); if (!start_active_slot(slot)) { compute_retry_mode_from_curl_error(status, @@ -1122,7 +1176,7 @@ static void gh__run_one_slot(struct active_request_slot *slot, * (such as when we request a commit). */ strbuf_addstr(&key, "/nr_bytes"); - trace2_data_intmax("gvfs-helper", NULL, + trace2_data_intmax(TR2_CAT, NULL, key.buf, status->bytes_received); strbuf_setlen(&key, old_len); @@ -1132,16 +1186,10 @@ static void gh__run_one_slot(struct active_request_slot *slot, if (params->progress) stop_progress(¶ms->progress); - if (status->ec == GH__ERROR_CODE__OK && params->b_write_to_file) { - if (params->b_is_post && - !strcmp(status->content_type.buf, - "application/x-git-packfile")) - install_packfile(params, status); - else - install_loose(params, status); - } + if (status->ec == GH__ERROR_CODE__OK && params->b_write_to_file) + install_result(params, status); - trace2_region_leave("gvfs-helper", key.buf, NULL); + trace2_region_leave(TR2_CAT, key.buf, NULL); strbuf_release(&key); } @@ -1288,7 +1336,7 @@ static void lookup_main_url(void) */ gh__global.main_url = transport_anonymize_url(gh__global.remote->url.v[0]); - trace2_data_string("gvfs-helper", NULL, "remote/url", gh__global.main_url); + trace2_data_string(TR2_CAT, NULL, "remote/url", gh__global.main_url); } static void do__http_get__gvfs_config(struct gh__response_status *status, @@ -1315,10 +1363,23 @@ static void select_cache_server(void) gh__global.cache_server_url = NULL; if (gh__cmd_opts.cache_server_mode == GH__CACHE_SERVER_MODE__DISABLE) { - trace2_data_string("gvfs-helper", NULL, "cache/url", "disabled"); + trace2_data_string(TR2_CAT, NULL, "cache/url", "disabled"); return; } + if (!gvfs_cache_server_url || !*gvfs_cache_server_url) { + switch (gh__cmd_opts.cache_server_mode) { + default: + case GH__CACHE_SERVER_MODE__TRUST_WITHOUT_VERIFY: + case GH__CACHE_SERVER_MODE__VERIFY_DISABLE: + trace2_data_string(TR2_CAT, NULL, "cache/url", "unset"); + return; + + case GH__CACHE_SERVER_MODE__VERIFY_ERROR: + die("cache-server not set"); + } + } + /* * If the cache-server and main Git server have the same URL, we * can silently disable the cache-server (by NOT setting the field @@ -1326,14 +1387,14 @@ static void select_cache_server(void) */ if (!strcmp(gvfs_cache_server_url, gh__global.main_url)) { gh__cmd_opts.try_fallback = 0; - trace2_data_string("gvfs-helper", NULL, "cache/url", "same"); + trace2_data_string(TR2_CAT, NULL, "cache/url", "same"); return; } if (gh__cmd_opts.cache_server_mode == GH__CACHE_SERVER_MODE__TRUST_WITHOUT_VERIFY) { gh__global.cache_server_url = gvfs_cache_server_url; - trace2_data_string("gvfs-helper", NULL, "cache/url", + trace2_data_string(TR2_CAT, NULL, "cache/url", gvfs_cache_server_url); return; } @@ -1374,7 +1435,7 @@ static void select_cache_server(void) if (match) { gh__global.cache_server_url = gvfs_cache_server_url; - trace2_data_string("gvfs-helper", NULL, "cache/url", + trace2_data_string(TR2_CAT, NULL, "cache/url", gvfs_cache_server_url); } @@ -1398,7 +1459,7 @@ static void select_cache_server(void) else warning("could not verify cache-server '%s'", gvfs_cache_server_url); - trace2_data_string("gvfs-helper", NULL, "cache/url", + trace2_data_string(TR2_CAT, NULL, "cache/url", "disabled"); } @@ -1585,27 +1646,14 @@ static void select_odb(void) } /* - * Create a tempfile to stream the packfile into. - * - * We create a tempfile in the chosen ODB directory and let CURL - * automatically stream data to the file. If successful, we can - * later rename it to a proper .pack and run "git index-pack" on - * it to create the corresponding .idx file. - * - * TODO I would rather to just stream the packfile directly into - * TODO "git index-pack --stdin" (and save some I/O) because it - * TODO will automatically take care of the rename of both files - * TODO and any other cleanup. BUT INDEX-PACK WILL ONLY WRITE - * TODO TO THE PRIMARY ODB -- it will not write into the alternates - * TODO (this is considered bad form). So we would need to add - * TODO an option to index-pack to handle this. I don't want to - * TODO deal with this issue right now. - * - * TODO Consider using lockfile for this rather than naked tempfile. + * Create a unique tempfile or tempfile-pair inside the + * tempPacks directory. */ -static void create_tempfile_for_packfile( - struct gh__request_params *params, - struct gh__response_status *status) +static void my_create_tempfile( + struct gh__response_status *status, + int b_fdopen, + const char *suffix1, struct tempfile **t1, + const char *suffix2, struct tempfile **t2) { static unsigned int nth = 0; static struct timeval tv = {0}; @@ -1615,15 +1663,16 @@ static void create_tempfile_for_packfile( struct strbuf basename = STRBUF_INIT; struct strbuf buf = STRBUF_INIT; - int len_p; + int len_tp; enum scld_error scld; + int retries; gh__response_status__zero(status); if (!nth) { /* - * Create a string to use in the name of all packfiles - * created by this process. + * Create a unique string to use in the name of all + * tempfiles created by this process. */ gettimeofday(&tv, NULL); secs = tv.tv_sec; @@ -1636,84 +1685,129 @@ static void create_tempfile_for_packfile( } /* - * Create a for this packfile using a series number , - * so that all of the chunks we download will group together. + * Create a for this instance/pair using a series + * number . */ - strbuf_addf(&basename, "vfs-%s-%04d", date, nth++); + strbuf_addf(&basename, "t-%s-%04d", date, nth++); + + if (!suffix1 || !*suffix1) + suffix1 = "temp"; /* - * We will stream the data into a managed tempfile() in: + * Create full pathname as: * - * "/pack/tempPacks/vfs--.temp" + * "/pack/tempPacks/." */ strbuf_setlen(&buf, 0); strbuf_addbuf(&buf, &gh__global.buf_odb_path); strbuf_complete(&buf, '/'); - strbuf_addstr(&buf, "pack/"); - len_p = buf.len; - strbuf_addstr(&buf, "tempPacks/"); - strbuf_addbuf(&buf, &basename); - strbuf_addstr(&buf, ".temp"); + strbuf_addstr(&buf, "pack/tempPacks/"); + len_tp = buf.len; + strbuf_addf( &buf, "%s.%s", basename.buf, suffix1); scld = safe_create_leading_directories(buf.buf); if (scld != SCLD_OK && scld != SCLD_EXISTS) { strbuf_addf(&status->error_message, - "could not create directory for packfile: '%s'", + "could not create directory for tempfile: '%s'", buf.buf); status->ec = GH__ERROR_CODE__COULD_NOT_CREATE_TEMPFILE; goto cleanup; } - params->tempfile = create_tempfile(buf.buf); - if (!params->tempfile) { + retries = 0; + *t1 = create_tempfile(buf.buf); + while (!*t1 && retries < 5) { + retries++; + strbuf_setlen(&buf, len_tp); + strbuf_addf(&buf, "%s-%d.%s", basename.buf, retries, suffix1); + *t1 = create_tempfile(buf.buf); + } + + if (!*t1) { strbuf_addf(&status->error_message, - "could not create tempfile for packfile: '%s'", + "could not create tempfile: '%s'", buf.buf); status->ec = GH__ERROR_CODE__COULD_NOT_CREATE_TEMPFILE; goto cleanup; } - - fdopen_tempfile(params->tempfile, "w"); + if (b_fdopen) + fdopen_tempfile(*t1, "w"); /* - * After the download is complete, we will need to steal the file - * from the tempfile() class (so that it doesn't magically delete - * it when we close the file handle) and then index it. - * - * We do this into the tempPacks directory to avoid contaminating - * the real pack directory until we know there is no corruption. + * Optionally create a peer tempfile with the same basename. + * (This is useful for prefetching .pack and .idx pairs.) * - * "/pack/tempPacks/vfs--.temp.pack" - * "/pack/tempPacks/vfs--.temp.idx" + * "/pack/tempPacks/." */ - strbuf_setlen(¶ms->temp_path_pack, 0); - strbuf_addf(¶ms->temp_path_pack, "%s.pack", buf.buf); - - strbuf_setlen(¶ms->temp_path_idx, 0); - strbuf_addf(¶ms->temp_path_idx, "%s.idx", buf.buf); + if (suffix2 && *suffix2 && t2) { + strbuf_setlen(&buf, len_tp); + strbuf_addf( &buf, "%s.%s", basename.buf, suffix2); + + *t2 = create_tempfile(buf.buf); + while (!*t2 && retries < 5) { + retries++; + strbuf_setlen(&buf, len_tp); + strbuf_addf(&buf, "%s-%d.%s", basename.buf, retries, suffix2); + *t2 = create_tempfile(buf.buf); + } - /* - * Later, if all goes well, we will install them as: - * - * "/pack/vfs--.pack" - * "/pack/vfs--.idx" - */ - strbuf_setlen(&buf, len_p); - strbuf_setlen(¶ms->final_path_pack, 0); - strbuf_addf(¶ms->final_path_pack, "%s%s.pack", - buf.buf, basename.buf); - strbuf_setlen(¶ms->final_path_idx, 0); - strbuf_addf(¶ms->final_path_idx, "%s%s.idx", - buf.buf, basename.buf); - strbuf_setlen(¶ms->final_packfile_filename, 0); - strbuf_addf(¶ms->final_packfile_filename, "%s.pack", - basename.buf); + if (!*t2) { + strbuf_addf(&status->error_message, + "could not create tempfile: '%s'", + buf.buf); + status->ec = GH__ERROR_CODE__COULD_NOT_CREATE_TEMPFILE; + goto cleanup; + } + if (b_fdopen) + fdopen_tempfile(*t2, "w"); + } cleanup: strbuf_release(&buf); strbuf_release(&basename); } +/* + * Create pathnames to the final location of the .pack and .idx + * files in the ODB. These are of the form: + * + * "/pack/-[-]." + * + * For example, for prefetch packs, will be the epoch + * timestamp and will be the packfile hash. + */ +static void create_final_packfile_pathnames( + const char *term_1, const char *term_2, const char *term_3, + struct strbuf *pack_path, struct strbuf *idx_path, + struct strbuf *pack_filename) +{ + struct strbuf base = STRBUF_INIT; + struct strbuf path = STRBUF_INIT; + + if (term_3 && *term_3) + strbuf_addf(&base, "%s-%s-%s", term_1, term_2, term_3); + else + strbuf_addf(&base, "%s-%s", term_1, term_2); + + strbuf_setlen(pack_filename, 0); + strbuf_addf( pack_filename, "%s.pack", base.buf); + + strbuf_addbuf(&path, &gh__global.buf_odb_path); + strbuf_complete(&path, '/'); + strbuf_addstr(&path, "pack/"); + + strbuf_setlen(pack_path, 0); + strbuf_addbuf(pack_path, &path); + strbuf_addf( pack_path, "%s.pack", base.buf); + + strbuf_setlen(idx_path, 0); + strbuf_addbuf(idx_path, &path); + strbuf_addf( idx_path, "%s.idx", base.buf); + + strbuf_release(&base); + strbuf_release(&path); +} + /* * Create a pathname to the loose object in the shared-cache ODB * with the given OID. Try to "mkdir -p" to ensure the parent @@ -1741,54 +1835,134 @@ static int create_loose_pathname_in_odb(struct strbuf *buf_path, return 0; } -/* - * Create a tempfile to stream a loose object into. - * - * We create a tempfile in the chosen ODB directory and let CURL - * automatically stream data to the file. - * - * We put it directly in the "/xx/" directory. - */ -static void create_tempfile_for_loose( - struct gh__request_params *params, - struct gh__response_status *status) +static void my_run_index_pack(struct gh__request_params *params, + struct gh__response_status *status, + const struct strbuf *temp_path_pack, + const struct strbuf *temp_path_idx, + struct strbuf *packfile_checksum) { - static int nth = 0; - struct strbuf buf_path = STRBUF_INIT; + struct child_process ip = CHILD_PROCESS_INIT; + struct strbuf ip_stdout = STRBUF_INIT; - gh__response_status__zero(status); + strvec_push(&ip.args, "git"); + strvec_push(&ip.args, "index-pack"); - if (create_loose_pathname_in_odb(&buf_path, ¶ms->loose_oid)) { + ip.err = -1; + ip.no_stderr = 1; + + /* Skip generating the rev index, we don't need it. */ + strvec_push(&ip.args, "--no-rev-index"); + + strvec_pushl(&ip.args, "-o", temp_path_idx->buf, NULL); + strvec_push(&ip.args, temp_path_pack->buf); + ip.no_stdin = 1; + ip.out = -1; + + if (pipe_command(&ip, NULL, 0, &ip_stdout, 0, NULL, 0)) { + unlink(temp_path_pack->buf); + unlink(temp_path_idx->buf); strbuf_addf(&status->error_message, - "cannot create directory for loose object '%s'", - buf_path.buf); - status->ec = GH__ERROR_CODE__COULD_NOT_CREATE_TEMPFILE; + "index-pack failed on '%s'", + temp_path_pack->buf); + /* + * Lets assume that index-pack failed because the + * downloaded file is corrupt (truncated). + * + * Retry it as if the network had dropped. + */ + status->retry = GH__RETRY_MODE__TRANSIENT; + status->ec = GH__ERROR_CODE__INDEX_PACK_FAILED; goto cleanup; } - /* Remember the full path of the final destination. */ - strbuf_setlen(¶ms->loose_path, 0); - strbuf_addbuf(¶ms->loose_path, &buf_path); + if (packfile_checksum) { + /* + * stdout from index-pack should have the packfile hash. + * Extract it and use it in the final packfile name. + * + * TODO What kind of validation should we do on the + * TODO string and is there ever any other output besides + * TODO just the checksum ? + */ + strbuf_trim_trailing_newline(&ip_stdout); + strbuf_addbuf(packfile_checksum, &ip_stdout); + } + +cleanup: + strbuf_release(&ip_stdout); + child_process_clear(&ip); +} + +static void my_finalize_packfile(struct gh__request_params *params, + struct gh__response_status *status, + int b_keep, + const struct strbuf *temp_path_pack, + const struct strbuf *temp_path_idx, + struct strbuf *final_path_pack, + struct strbuf *final_path_idx, + struct strbuf *final_filename) +{ /* - * Build a unique tempfile pathname based upon it. We avoid - * using lockfiles to avoid issues with stale locks after - * crashes. + * Install the .pack and .idx into the ODB pack directory. + * + * We might be racing with other instances of gvfs-helper if + * we, in parallel, both downloaded the exact same packfile + * (with the same checksum SHA) and try to install it at the + * same time. This might happen on Windows where the loser + * can get an EBUSY or EPERM trying to move/rename the + * tempfile into the pack dir, for example. + * + * So, we always install the .pack before the .idx for + * consistency. And only if *WE* created the .pack and .idx + * files, do we create the matching .keep (when requested). + * + * If we get an error and the target files already exist, we + * silently eat the error. Note that finalize_object_file() + * has already munged errno (and it has various creation + * strategies), so we don't bother looking at it. */ - strbuf_addf(&buf_path, ".%08u.%.06u.temp", getpid(), nth++); + if (finalize_object_file(temp_path_pack->buf, final_path_pack->buf) || + finalize_object_file(temp_path_idx->buf, final_path_idx->buf)) { + unlink(temp_path_pack->buf); + unlink(temp_path_idx->buf); + + if (file_exists(final_path_pack->buf) && + file_exists(final_path_idx->buf)) { + trace2_printf("%s: assuming ok for %s", TR2_CAT, final_path_pack->buf); + goto assume_ok; + } - params->tempfile = create_tempfile(buf_path.buf); - if (!params->tempfile) { - strbuf_addstr(&status->error_message, - "could not create tempfile for loose object"); - status->ec = GH__ERROR_CODE__COULD_NOT_CREATE_TEMPFILE; - goto cleanup; + strbuf_addf(&status->error_message, + "could not install packfile '%s'", + final_path_pack->buf); + status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PACKFILE; + return; } - fdopen_tempfile(params->tempfile, "w"); + if (b_keep) { + struct strbuf keep = STRBUF_INIT; + int fd_keep; -cleanup: - strbuf_release(&buf_path); + strbuf_addbuf(&keep, final_path_pack); + strbuf_strip_suffix(&keep, ".pack"); + strbuf_addstr(&keep, ".keep"); + + fd_keep = xopen(keep.buf, O_WRONLY | O_CREAT | O_TRUNC, 0666); + if (fd_keep >= 0) + close(fd_keep); + + strbuf_release(&keep); + } + +assume_ok: + if (params->result_list) { + struct strbuf result_msg = STRBUF_INIT; + + strbuf_addf(&result_msg, "packfile %s", final_filename->buf); + string_list_append(params->result_list, result_msg.buf); + strbuf_release(&result_msg); + } } /* @@ -1798,93 +1972,403 @@ static void create_tempfile_for_loose( static void install_packfile(struct gh__request_params *params, struct gh__response_status *status) { - struct child_process ip = CHILD_PROCESS_INIT; + struct strbuf temp_path_pack = STRBUF_INIT; + struct strbuf temp_path_idx = STRBUF_INIT; + struct strbuf packfile_checksum = STRBUF_INIT; + struct strbuf final_path_pack = STRBUF_INIT; + struct strbuf final_path_idx = STRBUF_INIT; + struct strbuf final_filename = STRBUF_INIT; + + gh__response_status__zero(status); /* - * When we request more than 1 object, the server should always - * send us a packfile. + * After the download is complete, we will need to steal the file + * from the tempfile() class (so that it doesn't magically delete + * it when we close the file handle) and then index it. */ - if (strcmp(status->content_type.buf, - "application/x-git-packfile")) { - strbuf_addf(&status->error_message, - "install_packfile: received unknown content-type '%s'", - status->content_type.buf); - status->ec = GH__ERROR_CODE__UNEXPECTED_CONTENT_TYPE; - goto cleanup; - } - - gh__response_status__zero(status); + strbuf_addf(&temp_path_pack, "%s.pack", + get_tempfile_path(params->tempfile)); + strbuf_addf(&temp_path_idx, "%s.idx", + get_tempfile_path(params->tempfile)); if (rename_tempfile(¶ms->tempfile, - params->temp_path_pack.buf) == -1) { + temp_path_pack.buf) == -1) { strbuf_addf(&status->error_message, "could not rename packfile to '%s'", - params->temp_path_pack.buf); + temp_path_pack.buf); status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PACKFILE; goto cleanup; } - strvec_push(&ip.args, "index-pack"); - if (gh__cmd_opts.show_progress) - strvec_push(&ip.args, "-v"); - strvec_pushl(&ip.args, "-o", params->temp_path_idx.buf, NULL); - strvec_push(&ip.args, params->temp_path_pack.buf); - ip.git_cmd = 1; - ip.no_stdin = 1; - ip.no_stdout = 1; + my_run_index_pack(params, status, &temp_path_pack, &temp_path_idx, + &packfile_checksum); + if (status->ec != GH__ERROR_CODE__OK) + goto cleanup; + + create_final_packfile_pathnames("vfs", packfile_checksum.buf, NULL, + &final_path_pack, &final_path_idx, + &final_filename); + my_finalize_packfile(params, status, 0, + &temp_path_pack, &temp_path_idx, + &final_path_pack, &final_path_idx, + &final_filename); + +cleanup: + strbuf_release(&temp_path_pack); + strbuf_release(&temp_path_idx); + strbuf_release(&packfile_checksum); + strbuf_release(&final_path_pack); + strbuf_release(&final_path_idx); + strbuf_release(&final_filename); +} + +/* + * bswap.h only defines big endian functions. + * The GVFS Protocol defines fields in little endian. + */ +static inline uint64_t my_get_le64(uint64_t le_val) +{ +#if GIT_BYTE_ORDER == GIT_LITTLE_ENDIAN + return le_val; +#else + return default_bswap64(le_val); +#endif +} + +#define MY_MIN(x,y) (((x) < (y)) ? (x) : (y)) +#define MY_MAX(x,y) (((x) > (y)) ? (x) : (y)) + +/* + * Copy the `nr_bytes_total` from `fd_in` to `fd_out`. + * + * This could be used to extract a single packfile from + * a multipart file, for example. + */ +static int my_copy_fd_len(int fd_in, int fd_out, ssize_t nr_bytes_total) +{ + char buffer[8192]; + + while (nr_bytes_total > 0) { + ssize_t len_to_read = MY_MIN(nr_bytes_total, sizeof(buffer)); + ssize_t nr_read = xread(fd_in, buffer, len_to_read); + + if (!nr_read) + break; + if (nr_read < 0) + return -1; + + if (write_in_full(fd_out, buffer, nr_read) < 0) + return -1; + + nr_bytes_total -= nr_read; + } + + return 0; +} + +/* + * Copy the `nr_bytes_total` from `fd_in` to `fd_out` AND save the + * final `tail_len` bytes in the given buffer. + * + * This could be used to extract a single packfile from + * a multipart file and read the final SHA into the buffer. + */ +static int my_copy_fd_len_tail(int fd_in, int fd_out, ssize_t nr_bytes_total, + unsigned char *buf_tail, ssize_t tail_len) +{ + memset(buf_tail, 0, tail_len); + + if (my_copy_fd_len(fd_in, fd_out, nr_bytes_total) < 0) + return -1; + + if (nr_bytes_total < tail_len) + return 0; + + /* Reset the position to read the tail */ + lseek(fd_in, -tail_len, SEEK_CUR); + + if (xread(fd_in, (char *)buf_tail, tail_len) != tail_len) + return -1; + + return 0; +} + +/* + * See the protocol document for the per-packfile header. + */ +struct ph { + uint64_t timestamp; + uint64_t pack_len; + uint64_t idx_len; +}; + +/* + * Extract the next packfile from the multipack. + * Install {.pack, .idx, .keep} set. + * + * Mark each successfully installed prefetch pack as .keep it as installed + * in case we have errors decoding/indexing later packs within the received + * multipart file. (A later pass can delete the unnecessary .keep files + * from this and any previous invocations.) + */ +static void extract_packfile_from_multipack( + struct gh__request_params *params, + struct gh__response_status *status, + int fd_multipack, + unsigned short k) +{ + struct ph ph; + struct tempfile *tempfile_pack = NULL; + struct tempfile *tempfile_idx = NULL; + int result = -1; + int b_no_idx_in_multipack; + struct object_id packfile_checksum; + char hex_checksum[GIT_MAX_HEXSZ + 1]; + struct strbuf buf_timestamp = STRBUF_INIT; + struct strbuf temp_path_pack = STRBUF_INIT; + struct strbuf temp_path_idx = STRBUF_INIT; + struct strbuf final_path_pack = STRBUF_INIT; + struct strbuf final_path_idx = STRBUF_INIT; + struct strbuf final_filename = STRBUF_INIT; + + if (xread(fd_multipack, &ph, sizeof(ph)) != sizeof(ph)) { + strbuf_addf(&status->error_message, + "could not read header for packfile[%d] in multipack", + k); + status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PREFETCH; + goto done; + } + + ph.timestamp = my_get_le64(ph.timestamp); + ph.pack_len = my_get_le64(ph.pack_len); + ph.idx_len = my_get_le64(ph.idx_len); + + if (!ph.pack_len) { + strbuf_addf(&status->error_message, + "packfile[%d]: zero length packfile?", k); + status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PREFETCH; + goto done; + } + + b_no_idx_in_multipack = (ph.idx_len == maximum_unsigned_value_of_type(uint64_t) || + ph.idx_len == 0); + + if (b_no_idx_in_multipack) { + my_create_tempfile(status, 0, "pack", &tempfile_pack, NULL, NULL); + if (!tempfile_pack) + goto done; + } else { + /* create a pair of tempfiles with the same basename */ + my_create_tempfile(status, 0, "pack", &tempfile_pack, "idx", &tempfile_idx); + if (!tempfile_pack || !tempfile_idx) + goto done; + } /* - * Note that I DO NOT have a trace2 region around the - * index-pack process by itself. Currently, we are inside the - * trace2 region for running the request and that's fine. - * Later, if/when we stream the download directly to - * index-pack, it will be inside under the same region anyway. - * So, I'm not going to introduce it here. + * Copy the current packfile from the open stream and capture + * the checksum. + * + * TODO This assumes that the checksum is SHA1. Fix this if/when + * TODO Git converts to SHA256. */ - if (run_command(&ip)) { - unlink(params->temp_path_pack.buf); - unlink(params->temp_path_idx.buf); + result = my_copy_fd_len_tail(fd_multipack, + get_tempfile_fd(tempfile_pack), + ph.pack_len, + packfile_checksum.hash, + GIT_SHA1_RAWSZ); + packfile_checksum.algo = GIT_HASH_SHA1; + + if (result < 0){ strbuf_addf(&status->error_message, - "index-pack failed on '%s'", - params->temp_path_pack.buf); + "could not extract packfile[%d] from multipack", + k); + goto done; + } + strbuf_addstr(&temp_path_pack, get_tempfile_path(tempfile_pack)); + close_tempfile_gently(tempfile_pack); + + oid_to_hex_r(hex_checksum, &packfile_checksum); + + if (b_no_idx_in_multipack) { /* - * Lets assume that index-pack failed because the - * downloaded file is corrupt (truncated). - * - * Retry it as if the network had dropped. + * The server did not send the corresponding .idx, so + * we have to compute it ourselves. */ - status->retry = GH__RETRY_MODE__TRANSIENT; - status->ec = GH__ERROR_CODE__INDEX_PACK_FAILED; + strbuf_addbuf(&temp_path_idx, &temp_path_pack); + strbuf_strip_suffix(&temp_path_idx, ".pack"); + strbuf_addstr(&temp_path_idx, ".idx"); + + my_run_index_pack(params, status, + &temp_path_pack, &temp_path_idx, + NULL); + if (status->ec != GH__ERROR_CODE__OK) + goto done; + + } else { + /* + * Server sent the .idx immediately after the .pack in the + * data stream. I'm tempted to verify it, but that defeats + * the purpose of having it cached... + */ + if (my_copy_fd_len(fd_multipack, get_tempfile_fd(tempfile_idx), + ph.idx_len) < 0) { + strbuf_addf(&status->error_message, + "could not extract index[%d] in multipack", + k); + status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PREFETCH; + goto done; + } + + strbuf_addstr(&temp_path_idx, get_tempfile_path(tempfile_idx)); + close_tempfile_gently(tempfile_idx); + } + + strbuf_addf(&buf_timestamp, "%u", (unsigned int)ph.timestamp); + create_final_packfile_pathnames("prefetch", buf_timestamp.buf, hex_checksum, + &final_path_pack, &final_path_idx, + &final_filename); + + my_finalize_packfile(params, status, 1, + &temp_path_pack, &temp_path_idx, + &final_path_pack, &final_path_idx, + &final_filename); + +done: + delete_tempfile(&tempfile_pack); + delete_tempfile(&tempfile_idx); + strbuf_release(&temp_path_pack); + strbuf_release(&temp_path_idx); + strbuf_release(&final_path_pack); + strbuf_release(&final_path_idx); + strbuf_release(&final_filename); +} + +struct keep_files_data { + timestamp_t max_timestamp; + int pos_of_max; + struct string_list *keep_files; +}; + +static void cb_keep_files(const char *full_path, size_t full_path_len, + const char *file_path, void *void_data) +{ + struct keep_files_data *data = void_data; + const char *val; + timestamp_t t; + + /* + * We expect prefetch packfiles named like: + * + * prefetch--.keep + */ + if (!skip_prefix(file_path, "prefetch-", &val)) + return; + if (!ends_with(val, ".keep")) + return; + + t = strtol(val, NULL, 10); + if (t > data->max_timestamp) { + data->pos_of_max = data->keep_files->nr; + data->max_timestamp = t; + } + + string_list_append(data->keep_files, full_path); +} + +static void delete_stale_keep_files( + struct gh__request_params *params, + struct gh__response_status *status) +{ + struct string_list keep_files = STRING_LIST_INIT_DUP; + struct keep_files_data data = { 0, 0, &keep_files }; + int k; + + for_each_file_in_pack_dir(gh__global.buf_odb_path.buf, + cb_keep_files, &data); + for (k = 0; k < keep_files.nr; k++) { + if (k != data.pos_of_max) + unlink(keep_files.items[k].string); + } + + string_list_clear(&keep_files, 0); +} + +/* + * Cut apart the received multipart response into individual packfiles + * and install each one. + */ +static void install_prefetch(struct gh__request_params *params, + struct gh__response_status *status) +{ + static unsigned char v1_h[6] = { 'G', 'P', 'R', 'E', ' ', 0x01 }; + + struct mh { + unsigned char h[6]; + unsigned char np[2]; + }; + + struct mh mh; + unsigned short np; + unsigned short k; + int fd = -1; + int nr_installed = 0; + + struct strbuf temp_path_mp = STRBUF_INIT; + + /* + * Steal the multi-part file from the tempfile class. + */ + strbuf_addf(&temp_path_mp, "%s.mp", get_tempfile_path(params->tempfile)); + if (rename_tempfile(¶ms->tempfile, temp_path_mp.buf) == -1) { + strbuf_addf(&status->error_message, + "could not rename prefetch tempfile to '%s'", + temp_path_mp.buf); + status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PREFETCH; goto cleanup; } - if (finalize_object_file(params->temp_path_pack.buf, - params->final_path_pack.buf) || - finalize_object_file(params->temp_path_idx.buf, - params->final_path_idx.buf)) { - unlink(params->temp_path_pack.buf); - unlink(params->temp_path_idx.buf); - unlink(params->final_path_pack.buf); - unlink(params->final_path_idx.buf); + fd = git_open_cloexec(temp_path_mp.buf, O_RDONLY); + if (fd == -1) { strbuf_addf(&status->error_message, - "could not install packfile '%s'", - params->final_path_pack.buf); - status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PACKFILE; + "could not reopen prefetch tempfile '%s'", + temp_path_mp.buf); + status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PREFETCH; + goto cleanup; + } + + if ((xread(fd, &mh, sizeof(mh)) != sizeof(mh)) || + (memcmp(mh.h, &v1_h, sizeof(mh.h)))) { + strbuf_addstr(&status->error_message, + "invalid prefetch multipart header"); goto cleanup; } + np = (unsigned short)mh.np[0] + ((unsigned short)mh.np[1] << 8); + if (np) + trace2_data_intmax(TR2_CAT, NULL, + "prefetch/packfile_count", np); - if (params->result_list) { - struct strbuf result_msg = STRBUF_INIT; + if (gh__cmd_opts.show_progress) + params->progress = start_progress("Installing prefetch packfiles", np); - strbuf_addf(&result_msg, "packfile %s", - params->final_packfile_filename.buf); - string_list_append(params->result_list, result_msg.buf); - strbuf_release(&result_msg); + for (k = 0; k < np; k++) { + extract_packfile_from_multipack(params, status, fd, k); + display_progress(params->progress, k + 1); + if (status->ec != GH__ERROR_CODE__OK) + break; + nr_installed++; } + stop_progress(¶ms->progress); + + if (nr_installed) + delete_stale_keep_files(params, status); cleanup: - child_process_clear(&ip); + if (fd != -1) + close(fd); + + unlink(temp_path_mp.buf); + strbuf_release(&temp_path_mp); } /* @@ -1922,21 +2406,7 @@ static void install_loose(struct gh__request_params *params, struct gh__response_status *status) { struct strbuf tmp_path = STRBUF_INIT; - - /* - * We expect a loose object when we do a GET -or- when we - * do a POST with only 1 object. - * - * Note that this content type is singular, not plural. - */ - if (strcmp(status->content_type.buf, - "application/x-git-loose-object")) { - strbuf_addf(&status->error_message, - "install_loose: received unknown content-type '%s'", - status->content_type.buf); - status->ec = GH__ERROR_CODE__UNEXPECTED_CONTENT_TYPE; - return; - } + struct strbuf loose_path = STRBUF_INIT; gh__response_status__zero(status); @@ -1974,11 +2444,19 @@ static void install_loose(struct gh__request_params *params, * collision we have to assume something else is happening in * parallel and we lost the race. And that's OK. */ - if (finalize_object_file(tmp_path.buf, params->loose_path.buf)) { + if (create_loose_pathname_in_odb(&loose_path, ¶ms->loose_oid)) { + strbuf_addf(&status->error_message, + "cannot create directory for loose object '%s'", + loose_path.buf); + status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_LOOSE; + goto cleanup; + } + + if (finalize_object_file(tmp_path.buf, loose_path.buf)) { unlink(tmp_path.buf); strbuf_addf(&status->error_message, "could not install loose object '%s'", - params->loose_path.buf); + loose_path.buf); status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_LOOSE; goto cleanup; } @@ -1994,6 +2472,57 @@ static void install_loose(struct gh__request_params *params, cleanup: strbuf_release(&tmp_path); + strbuf_release(&loose_path); +} + +static void install_result(struct gh__request_params *params, + struct gh__response_status *status) +{ + if (params->objects_mode == GH__OBJECTS_MODE__PREFETCH) { + /* + * The "gvfs/prefetch" API is the only thing that sends + * these multi-part packfiles. According to the protocol + * documentation, they will have this x- content type. + * + * However, it appears that there is a BUG in the origin + * server causing it to sometimes send "text/html" instead. + * So, we silently handle both. + */ + if (!strcmp(status->content_type.buf, + "application/x-gvfs-timestamped-packfiles-indexes")) { + install_prefetch(params, status); + return; + } + + if (!strcmp(status->content_type.buf, "text/html")) { + install_prefetch(params, status); + return; + } + } else { + if (!strcmp(status->content_type.buf, "application/x-git-packfile")) { + assert(params->b_is_post); + assert(params->objects_mode == GH__OBJECTS_MODE__POST); + + install_packfile(params, status); + return; + } + + if (!strcmp(status->content_type.buf, + "application/x-git-loose-object")) { + /* + * We get these for "gvfs/objects" GET and POST requests. + * + * Note that this content type is singular, not plural. + */ + install_loose(params, status); + return; + } + } + + strbuf_addf(&status->error_message, + "install_result: received unknown content-type '%s'", + status->content_type.buf); + status->ec = GH__ERROR_CODE__UNEXPECTED_CONTENT_TYPE; } /* @@ -2059,7 +2588,7 @@ static size_t parse_resp_hdr(char *buffer, size_t size, size_t nitems, * Other servers have similar sets of values, but I haven't * compared them in depth. */ - // trace2_printf("Throttle: %s %s", key.buf, val.buf); + // trace2_printf("%s: Throttle: %s %s", TR2_CAT, key.buf, val.buf); if (!strcmp(key.buf, "X-RateLimit-Resource")) { /* @@ -2070,7 +2599,7 @@ static size_t parse_resp_hdr(char *buffer, size_t size, size_t nitems, strbuf_addstr(&key, "ratelimit/resource"); strbuf_addstr(&key, gh__server_type_label[params->server_type]); - trace2_data_string("gvfs-helper", NULL, key.buf, val.buf); + trace2_data_string(TR2_CAT, NULL, key.buf, val.buf); } else if (!strcmp(key.buf, "X-RateLimit-Delay")) { @@ -2086,7 +2615,7 @@ static size_t parse_resp_hdr(char *buffer, size_t size, size_t nitems, git_parse_ulong(val.buf, &tarpit_delay_ms); - trace2_data_intmax("gvfs-helper", NULL, key.buf, tarpit_delay_ms); + trace2_data_intmax(TR2_CAT, NULL, key.buf, tarpit_delay_ms); } else if (!strcmp(key.buf, "X-RateLimit-Limit")) { @@ -2184,7 +2713,7 @@ static void do_throttle_spin(struct gh__request_params *params, strbuf_addstr(®ion, tr2_label); strbuf_addstr(®ion, gh__server_type_label[params->server_type]); - trace2_region_enter("gvfs-helper", region.buf, NULL); + trace2_region_enter(TR2_CAT, region.buf, NULL); if (gh__cmd_opts.show_progress) progress = start_progress(progress_msg, duration); @@ -2200,7 +2729,7 @@ static void do_throttle_spin(struct gh__request_params *params, display_progress(progress, duration); stop_progress(&progress); - trace2_region_leave("gvfs-helper", region.buf, NULL); + trace2_region_leave(TR2_CAT, region.buf, NULL); strbuf_release(®ion); } @@ -2362,11 +2891,7 @@ static void do_req(const char *url_base, if (params->tempfile) delete_tempfile(¶ms->tempfile); - if (params->b_is_post) - create_tempfile_for_packfile(params, status); - - create_tempfile_for_loose(params, status); - + my_create_tempfile(status, 1, NULL, ¶ms->tempfile, NULL, NULL); if (!params->tempfile || status->ec != GH__ERROR_CODE__OK) return; } else { @@ -2613,6 +3138,7 @@ static void do__http_get__gvfs_config(struct gh__response_status *status, /* cache-servers do not handle gvfs/config REST calls */ params.b_permit_cache_server_if_defined = 0; params.buffer = config_data; + params.objects_mode = GH__OBJECTS_MODE__NONE; params.object_count = 1; /* a bit of a lie */ @@ -2677,6 +3203,7 @@ static void do__http_get__gvfs_object(struct gh__response_status *status, params.b_is_post = 0; params.b_write_to_file = 1; params.b_permit_cache_server_if_defined = 1; + params.objects_mode = GH__OBJECTS_MODE__GET; params.object_count = 1; @@ -2733,6 +3260,7 @@ static void do__http_post__gvfs_objects(struct gh__response_status *status, params.b_is_post = 1; params.b_write_to_file = 1; params.b_permit_cache_server_if_defined = 1; + params.objects_mode = GH__OBJECTS_MODE__POST; params.post_payload = &jw_req.json; @@ -2769,6 +3297,126 @@ static void do__http_post__gvfs_objects(struct gh__response_status *status, jw_release(&jw_req); } +struct find_last_data { + timestamp_t timestamp; + int nr_files; +}; + +static void cb_find_last(const char *full_path, size_t full_path_len, + const char *file_path, void *void_data) +{ + struct find_last_data *data = void_data; + const char *val; + timestamp_t t; + + if (!skip_prefix(file_path, "prefetch-", &val)) + return; + if (!ends_with(val, ".pack")) + return; + + data->nr_files++; + + /* + * We expect prefetch packfiles named like: + * + * prefetch--.pack + */ + t = strtol(val, NULL, 10); + + data->timestamp = MY_MAX(t, data->timestamp); +} + +/* + * Find the server timestamp on the last prefetch packfile that + * we have in the ODB. + * + * TODO I'm going to assume that all prefetch packs are created + * TODO equal and take the one with the largest t value. + * TODO + * TODO Or should we look for one marked with .keep ? + * + * TODO Alternatively, should we maybe get the 2nd largest? + * TODO (Or maybe subtract an hour delta from the largest?) + * TODO + * TODO Since each cache-server maintains its own set of prefetch + * TODO packs (such that 2 requests may hit 2 different + * TODO load-balanced servers and get different answers (with or + * TODO without clock-skew issues)), is it possible for us to miss + * TODO the absolute fringe of new commits and trees? + * TODO + * TODO That is, since the cache-server generates hourly prefetch + * TODO packs, we could do a prefetch and be up-to-date, but then + * TODO do the main fetch and hit a different cache/main server + * TODO and be behind by as much as an hour and have to demand- + * TODO load the commits/trees. + * + * TODO Alternatively, should we compare the last timestamp found + * TODO with "now" and silently do nothing if within an epsilon? + */ +static void find_last_prefetch_timestamp(timestamp_t *last) +{ + struct find_last_data data; + + memset(&data, 0, sizeof(data)); + + for_each_file_in_pack_dir(gh__global.buf_odb_path.buf, cb_find_last, &data); + + *last = data.timestamp; +} + +/* + * Call "gvfs/prefetch[?lastPackTimestamp=]" REST API to + * fetch a series of packfiles and write them to the ODB. + * + * Return a list of packfile names. + */ +static void do__http_get__gvfs_prefetch(struct gh__response_status *status, + timestamp_t seconds_since_epoch, + struct string_list *result_list) +{ + struct gh__request_params params = GH__REQUEST_PARAMS_INIT; + struct strbuf component_url = STRBUF_INIT; + + gh__response_status__zero(status); + + strbuf_addstr(&component_url, "gvfs/prefetch"); + + if (!seconds_since_epoch) + find_last_prefetch_timestamp(&seconds_since_epoch); + if (seconds_since_epoch) + strbuf_addf(&component_url, "?lastPackTimestamp=%"PRItime, + seconds_since_epoch); + + params.b_is_post = 0; + params.b_write_to_file = 1; + params.b_permit_cache_server_if_defined = 1; + params.objects_mode = GH__OBJECTS_MODE__PREFETCH; + + params.object_count = -1; + + params.result_list = result_list; + + params.headers = http_copy_default_headers(); + params.headers = curl_slist_append(params.headers, + "X-TFS-FedAuthRedirect: Suppress"); + params.headers = curl_slist_append(params.headers, + "Pragma: no-cache"); + params.headers = curl_slist_append(params.headers, + "Accept: application/x-gvfs-timestamped-packfiles-indexes"); + + if (gh__cmd_opts.show_progress) + strbuf_addf(¶ms.progress_base_phase3_msg, + "Prefetch %"PRItime" (%s)", + seconds_since_epoch, + show_date(seconds_since_epoch, 0, + DATE_MODE(ISO8601))); + + do_req__with_fallback(component_url.buf, ¶ms, status); + + gh__request_params__release(¶ms); + strbuf_release(&component_url); +} + /* * Drive one or more HTTP GET requests to fetch the objects * in the given OIDSET. These are received into loose objects. @@ -3069,7 +3717,83 @@ static enum gh__error_code do_sub_cmd__post(int argc, const char **argv) } /* - * Handle the 'objects.get' and 'objects.post' verbs in "server mode". + * Interpret the given string as a timestamp and compute an absolute + * UTC-seconds-since-epoch value (and without TZ). + * + * Note that the gvfs/prefetch API only accepts seconds since epoch, + * so that is all we really need here. But there is a tradition of + * various Git commands allowing a variety of formats for args like + * this. For example, see the `--date` arg in `git commit`. We allow + * these other forms mainly for testing purposes. + */ +static int my_parse_since(const char *since, timestamp_t *p_timestamp) +{ + int offset = 0; + int errors = 0; + unsigned long t; + + if (!parse_date_basic(since, p_timestamp, &offset)) + return 0; + + t = approxidate_careful(since, &errors); + if (!errors) { + *p_timestamp = t; + return 0; + } + + return -1; +} + +/* + * Ask the server for all available packfiles -or- all available since + * the given timestamp. + */ +static enum gh__error_code do_sub_cmd__prefetch(int argc, const char **argv) +{ + static const char *since_str; + static struct option prefetch_options[] = { + OPT_STRING(0, "since", &since_str, N_("since"), N_("seconds since epoch")), + OPT_END(), + }; + + struct gh__response_status status = GH__RESPONSE_STATUS_INIT; + struct string_list result_list = STRING_LIST_INIT_DUP; + enum gh__error_code ec = GH__ERROR_CODE__OK; + timestamp_t seconds_since_epoch = 0; + int k; + + trace2_cmd_mode("prefetch"); + + if (argc > 1 && !strcmp(argv[1], "-h")) + usage_with_options(prefetch_usage, prefetch_options); + + argc = parse_options(argc, argv, NULL, prefetch_options, prefetch_usage, 0); + if (since_str && *since_str) { + if (my_parse_since(since_str, &seconds_since_epoch)) + die("could not parse 'since' field"); + } + + finish_init(1); + + do__http_get__gvfs_prefetch(&status, seconds_since_epoch, &result_list); + + ec = status.ec; + + for (k = 0; k < result_list.nr; k++) + printf("%s\n", result_list.items[k].string); + + if (ec != GH__ERROR_CODE__OK) + error("prefetch: %s", status.error_message.buf); + + gh__response_status__release(&status); + string_list_clear(&result_list, 0); + + return ec; +} + +/* + * Handle the 'objects.get' and 'objects.post' and 'objects.prefetch' + * verbs in "server mode". * * Only call error() and set ec for hard errors where we cannot * communicate correctly with the foreground client process. Pass any @@ -3089,45 +3813,73 @@ static enum gh__error_code do_server_subprocess__objects(const char *verb_line) int k; enum gh__objects_mode objects_mode; unsigned long nr_oid_total = 0; + timestamp_t seconds_since_epoch = 0; if (!strcmp(verb_line, "objects.get")) objects_mode = GH__OBJECTS_MODE__GET; else if (!strcmp(verb_line, "objects.post")) objects_mode = GH__OBJECTS_MODE__POST; + else if (!strcmp(verb_line, "objects.prefetch")) + objects_mode = GH__OBJECTS_MODE__PREFETCH; else { error("server: unexpected objects-mode verb '%s'", verb_line); ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; goto cleanup; } - while (1) { - len = packet_read_line_gently(0, NULL, &line); - if (len < 0 || !line) - break; + switch (objects_mode) { + case GH__OBJECTS_MODE__GET: + case GH__OBJECTS_MODE__POST: + while (1) { + len = packet_read_line_gently(0, NULL, &line); + if (len < 0 || !line) + break; + + if (get_oid_hex(line, &oid)) { + error("server: invalid oid syntax '%s'", line); + ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; + goto cleanup; + } - if (get_oid_hex(line, &oid)) { - error("server: invalid oid syntax '%s'", line); - ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; + if (!oidset_insert(&oids, &oid)) + nr_oid_total++; + } + + if (!nr_oid_total) { + /* if zero objects requested, trivial OK. */ + if (packet_write_fmt_gently(1, "ok\n")) { + error("server: cannot write 'get' result to client"); + ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; + } else + ec = GH__ERROR_CODE__OK; goto cleanup; } - if (!oidset_insert(&oids, &oid)) - nr_oid_total++; - } + if (objects_mode == GH__OBJECTS_MODE__GET) + do__http_get__fetch_oidset(&status, &oids, + nr_oid_total, &result_list); + else + do__http_post__fetch_oidset(&status, &oids, + nr_oid_total, &result_list); + break; - if (!nr_oid_total) { - if (packet_write_fmt_gently(1, "ok\n")) { - error("server: cannot write 'get' result to client"); - ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; - } else - ec = GH__ERROR_CODE__OK; - goto cleanup; - } + case GH__OBJECTS_MODE__PREFETCH: + /* get optional timestamp line */ + while (1) { + len = packet_read_line_gently(0, NULL, &line); + if (len < 0 || !line) + break; - if (objects_mode == GH__OBJECTS_MODE__GET) - do__http_get__fetch_oidset(&status, &oids, nr_oid_total, &result_list); - else - do__http_post__fetch_oidset(&status, &oids, nr_oid_total, &result_list); + seconds_since_epoch = strtoul(line, NULL, 10); + } + + do__http_get__gvfs_prefetch(&status, seconds_since_epoch, + &result_list); + break; + + default: + BUG("unexpected object_mode in switch '%d'", objects_mode); + } /* * Write pathname of the ODB where we wrote all of the objects @@ -3351,12 +4103,16 @@ static enum gh__error_code do_sub_cmd(int argc, const char **argv) if (!strcmp(argv[0], "config")) return do_sub_cmd__config(argc, argv); + if (!strcmp(argv[0], "prefetch")) + return do_sub_cmd__prefetch(argc, argv); + + /* + * server mode is for talking with git.exe via the "gh_client_" API + * using packet-line format. + */ if (!strcmp(argv[0], "server")) return do_sub_cmd__server(argc, argv); - // TODO have "test" mode that could be used to drive - // TODO unit testing. - return GH__ERROR_CODE__USAGE; } diff --git a/gvfs.h b/gvfs.h index a8e58a6ebc88b8..99c5205aa043d7 100644 --- a/gvfs.h +++ b/gvfs.h @@ -28,6 +28,7 @@ #define GVFS_FETCH_SKIP_REACHABILITY_AND_UPLOADPACK (1 << 4) #define GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS (1 << 6) +#define GVFS_PREFETCH_DURING_FETCH (1 << 7) void gvfs_load_config_value(const char *value); int gvfs_config_is_set(int mask); diff --git a/remote-curl.c b/remote-curl.c index 4adcf25ed6eeb5..fe917254c84084 100644 --- a/remote-curl.c +++ b/remote-curl.c @@ -1195,6 +1195,9 @@ static int fetch_git(struct discovery *heads, struct strvec args = STRVEC_INIT; struct strbuf rpc_result = STRBUF_INIT; + if (core_use_gvfs_helper) + return 0; + strvec_pushl(&args, "fetch-pack", "--stateless-rpc", "--stdin", "--lock-pack", NULL); if (options.followtags) diff --git a/t/helper/test-gvfs-protocol.c b/t/helper/test-gvfs-protocol.c index ba54965e00ffa7..98a94234fbb8b4 100644 --- a/t/helper/test-gvfs-protocol.c +++ b/t/helper/test-gvfs-protocol.c @@ -11,7 +11,9 @@ #include "strbuf.h" #include "string-list.h" #include "trace2.h" +#include "copy.h" #include "object.h" +#include "object-file.h" #include "object-store.h" #include "replace-object.h" #include "repository.h" @@ -22,6 +24,7 @@ #include "date.h" #include "wrapper.h" #include "git-zlib.h" +#include "packfile.h" #define TR2_CAT "test-gvfs-protocol" @@ -551,9 +554,6 @@ static enum worker_result send_loose_object(const struct object_id *oid, return send_http_error(1, 404, "Not Found", -1, WR_MAYHEM); } - trace2_printf("%s: OBJECT type=%d len=%ld '%.40s'", TR2_CAT, - type, size, (const char *)content); - /* * We are blending several somewhat independent concepts here: * @@ -872,7 +872,6 @@ static enum worker_result get_packfile_from_oids( goto done; } - trace2_printf("%s: pack-objects returned %d bytes", TR2_CAT, buf_packfile->len); wr = WR_OK; done: @@ -1020,6 +1019,305 @@ static enum worker_result do__gvfs_objects__post(struct req *req) return wr; } +/* + * bswap.h only defines big endian functions. + * The GVFS Protocol defines fields in little endian. + */ +static inline uint64_t my_get_le64(uint64_t le_val) +{ +#if GIT_BYTE_ORDER == GIT_LITTLE_ENDIAN + return le_val; +#else + return default_bswap64(le_val); +#endif +} + +static inline uint16_t my_get_le16(uint16_t le_val) +{ +#if GIT_BYTE_ORDER == GIT_LITTLE_ENDIAN + return le_val; +#else + return default_bswap16(le_val); +#endif +} + +/* + * GVFS Protocol headers for the multipack format + * All integer values are little-endian on the wire. + * + * Note: technically, the protocol defines the `ph` fields as signed, but + * that makes a mess of the bswap routines and we're not going to overflow + * them for a very long time. + */ + +static unsigned char v1_h[6] = { 'G', 'P', 'R', 'E', ' ', 0x01 }; + +struct ph { + uint64_t timestamp; + uint64_t len_pack; + uint64_t len_idx; +}; + +/* + * Accumulate a list of commits-and-trees packfiles we have in the local ODB. + * The test script should have pre-created a set of "ct-.pack" and .idx + * files for us. We serve these as is and DO NOT try to dynamically create + * new commits/trees packfiles (like the cache-server does). We are only + * testing if/whether gvfs-helper.exe can receive one or more packfiles and + * idx files over the protocol. + */ +struct ct_pack_item { + struct ph ph; + struct strbuf path_pack; + struct strbuf path_idx; +}; + +static void ct_pack_item__free(struct ct_pack_item *item) +{ + if (!item) + return; + strbuf_release(&item->path_pack); + strbuf_release(&item->path_idx); + free(item); +} + +struct ct_pack_data { + struct ct_pack_item **items; + size_t nr, alloc; +}; + +static void ct_pack_data__release(struct ct_pack_data *data) +{ + int k; + + if (!data) + return; + + for (k = 0; k < data->nr; k++) + ct_pack_item__free(data->items[k]); + + FREE_AND_NULL(data->items); + data->nr = 0; + data->alloc = 0; +} + +static void cb_ct_pack(const char *full_path, size_t full_path_len, + const char *file_path, void *void_data) +{ + struct ct_pack_data *data = void_data; + struct ct_pack_item *item = NULL; + struct stat st; + const char *v; + + /* + * We only want "ct-.pack" files. The test script creates + * cached commits-and-trees packfiles with this prefix to avoid + * confusion with prefetch packfiles received by gvfs-helper. + */ + if (!ends_with(file_path, ".pack")) + return; + if (!skip_prefix(file_path, "ct-", &v)) + return; + + item = (struct ct_pack_item *)xcalloc(1, sizeof(*item)); + strbuf_init(&item->path_pack, 0); + strbuf_addstr(&item->path_pack, full_path); + + strbuf_init(&item->path_idx, 0); + strbuf_addstr(&item->path_idx, full_path); + strbuf_strip_suffix(&item->path_idx, ".pack"); + strbuf_addstr(&item->path_idx, ".idx"); + + item->ph.timestamp = (uint64_t)strtoul(v, NULL, 10); + + lstat(item->path_pack.buf, &st); + item->ph.len_pack = (uint64_t)st.st_size; + + if (string_list_has_string(&mayhem_list, "no_prefetch_idx")) + item->ph.len_idx = maximum_unsigned_value_of_type(uint64_t); + else if (lstat(item->path_idx.buf, &st) < 0) + item->ph.len_idx = maximum_unsigned_value_of_type(uint64_t); + else + item->ph.len_idx = (uint64_t)st.st_size; + + ALLOC_GROW(data->items, data->nr + 1, data->alloc); + data->items[data->nr++] = item; +} + +/* + * Sort by increasing EPOCH time. + */ +static int ct_pack_sort_compare(const void *_a, const void *_b) +{ + const struct ct_pack_item *a = *(const struct ct_pack_item **)_a; + const struct ct_pack_item *b = *(const struct ct_pack_item **)_b; + return (a->ph.timestamp < b->ph.timestamp) ? -1 : (a->ph.timestamp != b->ph.timestamp); +} + +static enum worker_result send_ct_item(const struct ct_pack_item *item) +{ + struct ph ph_le; + int fd_pack = -1; + int fd_idx = -1; + enum worker_result wr = WR_OK; + + /* send per-packfile header. all fields are little-endian on the wire. */ + ph_le.timestamp = my_get_le64(item->ph.timestamp); + ph_le.len_pack = my_get_le64(item->ph.len_pack); + ph_le.len_idx = my_get_le64(item->ph.len_idx); + + if (write_in_full(1, &ph_le, sizeof(ph_le)) < 0) { + logerror("unable to write ph_le"); + wr = WR_IO_ERROR; + goto done; + } + + trace2_printf("%s: sending prefetch pack '%s'", TR2_CAT, item->path_pack.buf); + + fd_pack = git_open_cloexec(item->path_pack.buf, O_RDONLY); + if (fd_pack == -1 || copy_fd(fd_pack, 1)) { + logerror("could not send packfile"); + wr = WR_IO_ERROR; + goto done; + } + + if (item->ph.len_idx != maximum_unsigned_value_of_type(uint64_t)) { + trace2_printf("%s: sending prefetch idx '%s'", TR2_CAT, item->path_idx.buf); + + fd_idx = git_open_cloexec(item->path_idx.buf, O_RDONLY); + if (fd_idx == -1 || copy_fd(fd_idx, 1)) { + logerror("could not send idx"); + wr = WR_IO_ERROR; + goto done; + } + } + +done: + if (fd_pack != -1) + close(fd_pack); + if (fd_idx != -1) + close(fd_idx); + return wr; +} + +/* + * The GVFS Protocol defines the lastTimeStamp parameter as the value + * of the last prefetch pack that the client has. Therefore, we only + * want to send newer ones. + */ +static int want_ct_pack(const struct ct_pack_item *item, timestamp_t last_timestamp) +{ + return item->ph.timestamp > last_timestamp; +} + +static enum worker_result send_multipack(struct ct_pack_data *data, + timestamp_t last_timestamp) +{ + struct strbuf response_header = STRBUF_INIT; + struct strbuf uuid = STRBUF_INIT; + enum worker_result wr; + size_t content_len = 0; + unsigned short np = 0; + unsigned short np_le; + int k; + + /* + * Precompute the content-length so that we don't have to deal with + * chunking it. + */ + content_len += sizeof(v1_h) + sizeof(np); + for (k = 0; k < data->nr; k++) { + struct ct_pack_item *item = data->items[k]; + + if (!want_ct_pack(item, last_timestamp)) + continue; + + np++; + content_len += sizeof(struct ph); + content_len += item->ph.len_pack; + if (item->ph.len_idx != maximum_unsigned_value_of_type(uint64_t)) + content_len += item->ph.len_idx; + } + + strbuf_addstr(&response_header, "HTTP/1.1 200 OK\r\n"); + strbuf_addstr(&response_header, "Cache-Control: private\r\n"); + strbuf_addstr(&response_header, + "Content-Type: application/x-gvfs-timestamped-packfiles-indexes\r\n"); + strbuf_addf( &response_header, "Content-Length: %d\r\n", (int)content_len); + strbuf_addf( &response_header, "Server: test-gvfs-protocol/%s\r\n", git_version_string); + strbuf_addf( &response_header, "Date: %s\r\n", show_date(time(NULL), 0, DATE_MODE(RFC2822))); + gen_fake_uuid(&uuid); + strbuf_addf( &response_header, "X-VSS-E2EID: %s\r\n", uuid.buf); + strbuf_addstr(&response_header, "\r\n"); + + if (write_in_full(1, response_header.buf, response_header.len) < 0) { + logerror("unable to write response header"); + wr = WR_IO_ERROR; + goto done; + } + + /* send protocol version header */ + if (write_in_full(1, v1_h, sizeof(v1_h)) < 0) { + logerror("unabled to write v1_h"); + wr = WR_IO_ERROR; + goto done; + } + + /* send number of packfiles */ + np_le = my_get_le16(np); + if (write_in_full(1, &np_le, sizeof(np_le)) < 0) { + logerror("unable to write np"); + wr = WR_IO_ERROR; + goto done; + } + + for (k = 0; k < data->nr; k++) { + if (!want_ct_pack(data->items[k], last_timestamp)) + continue; + + wr = send_ct_item(data->items[k]); + if (wr != WR_OK) + goto done; + } + + wr = WR_OK; + +done: + strbuf_release(&uuid); + strbuf_release(&response_header); + + return wr; +} + +static enum worker_result do__gvfs_prefetch__get(struct req *req) +{ + struct ct_pack_data data; + timestamp_t last_timestamp = 0; + enum worker_result wr; + + memset(&data, 0, sizeof(data)); + + if (req->quest_args.len) { + const char *key = strstr(req->quest_args.buf, "lastPackTimestamp="); + if (key) { + const char *val; + if (skip_prefix(key, "lastPackTimestamp=", &val)) { + last_timestamp = strtol(val, NULL, 10); + } + } + } + trace2_printf("%s: prefetch/since %"PRItime, TR2_CAT, last_timestamp); + + for_each_file_in_pack_dir(get_object_directory(), cb_ct_pack, &data); + QSORT(data.items, data.nr, ct_pack_sort_compare); + + wr = send_multipack(&data, last_timestamp); + + ct_pack_data__release(&data); + + return wr; +} + /* * Read the HTTP request up to the start of the optional message-body. * We do this byte-by-byte because we have keep-alive turned on and @@ -1174,6 +1472,11 @@ static enum worker_result req__read(struct req *req, int fd) * We let our caller read/chunk it in as appropriate. */ done: + +#if 0 + /* + * This is useful for debugging the request, but very noisy. + */ if (trace2_is_enabled()) { struct string_list_item *item; trace2_printf("%s: %s", TR2_CAT, req->start_line.buf); @@ -1188,6 +1491,7 @@ static enum worker_result req__read(struct req *req, int fd) for_each_string_list_item(item, &req->header_list) trace2_printf("%s: Hdrs: %s", TR2_CAT, item->string); } +#endif strbuf_release(&h); @@ -1238,6 +1542,12 @@ static enum worker_result dispatch(struct req *req) return do__gvfs_config__get(req); } + if (!strcmp(req->gvfs_api.buf, "gvfs/prefetch")) { + + if (!strcmp(method, "GET")) + return do__gvfs_prefetch__get(req); + } + return send_http_error(1, 501, "Not Implemented", -1, WR_OK | WR_HANGUP); } diff --git a/t/t5799-gvfs-helper.sh b/t/t5799-gvfs-helper.sh index 3cb1459ea32476..5c79654a63f0fa 100755 --- a/t/t5799-gvfs-helper.sh +++ b/t/t5799-gvfs-helper.sh @@ -24,8 +24,8 @@ test_set_port GIT_TEST_GVFS_PROTOCOL_PORT # actually use it). We are only testing explicit object # fetching using gvfs-helper.exe in isolation. # -REPO_SRC="$PWD"/repo_src -REPO_T1="$PWD"/repo_t1 +REPO_SRC="$(pwd)"/repo_src +REPO_T1="$(pwd)"/repo_t1 # Setup some loopback URLs where test-gvfs-protocol.exe will be # listening. We will spawn it directly inside the repo_src directory, @@ -44,22 +44,22 @@ HOST_PORT=127.0.0.1:$GIT_TEST_GVFS_PROTOCOL_PORT ORIGIN_URL=http://$HOST_PORT/servertype/origin CACHE_URL=http://$HOST_PORT/servertype/cache -SHARED_CACHE_T1="$PWD"/shared_cache_t1 +SHARED_CACHE_T1="$(pwd)"/shared_cache_t1 # The pid-file is created by test-gvfs-protocol.exe when it starts. # The server will shut down if/when we delete it. (This is a little # easier than killing it by PID.) # -PID_FILE="$PWD"/pid-file.pid -SERVER_LOG="$PWD"/OUT.server.log +PID_FILE="$(pwd)"/pid-file.pid +SERVER_LOG="$(pwd)"/OUT.server.log PATH="$GIT_BUILD_DIR/t/helper/:$PATH" && export PATH -OIDS_FILE="$PWD"/oid_list.txt -OIDS_CT_FILE="$PWD"/oid_ct_list.txt -OIDS_BLOBS_FILE="$PWD"/oids_blobs_file.txt -OID_ONE_BLOB_FILE="$PWD"/oid_one_blob_file.txt -OID_ONE_COMMIT_FILE="$PWD"/oid_one_commit_file.txt +OIDS_FILE="$(pwd)"/oid_list.txt +OIDS_CT_FILE="$(pwd)"/oid_ct_list.txt +OIDS_BLOBS_FILE="$(pwd)"/oids_blobs_file.txt +OID_ONE_BLOB_FILE="$(pwd)"/oid_one_blob_file.txt +OID_ONE_COMMIT_FILE="$(pwd)"/oid_one_commit_file.txt # Get a list of available OIDs in repo_src so that we can try to fetch # them and so that we don't have to hard-code a list of known OIDs. @@ -108,6 +108,30 @@ get_one_commit_oid () { return 0 } +# Create a commits-and-trees packfile for use with "prefetch" +# using the given range of commits. +# +create_commits_and_trees_packfile () { + if test $# -eq 2 + then + epoch=$1 + revs=$2 + else + echo "create_commits_and_trees_packfile: Need 2 args" + return 1 + fi + + pack_file="$REPO_SRC"/.git/objects/pack/ct-$epoch.pack + idx_file="$REPO_SRC"/.git/objects/pack/ct-$epoch.idx + + git -C "$REPO_SRC" pack-objects --stdout --revs --filter=blob:none \ + >"$pack_file" <<-EOF + $revs + EOF + git -C "$REPO_SRC" index-pack -o "$idx_file" "$pack_file" + return 0 +} + test_expect_success 'setup repos' ' test_create_repo "$REPO_SRC" && git -C "$REPO_SRC" branch -M main && @@ -115,9 +139,16 @@ test_expect_success 'setup repos' ' # test_commit_bulk() does magic to create a packfile containing # the new commits. # + # We create branches in repo_src, but also remember the branch OIDs + # in files so that we can refer to them in repo_t1, which will not + # have the commits locally (because we do not clone or fetch). + # test_commit_bulk -C "$REPO_SRC" --filename="batch_a.%s.t" 9 && + git -C "$REPO_SRC" branch B1 && git -C "$REPO_SRC" rev-parse refs/heads/main >m1.branch && + # test_commit_bulk -C "$REPO_SRC" --filename="batch_b.%s.t" 9 && + git -C "$REPO_SRC" branch B2 && git -C "$REPO_SRC" rev-parse refs/heads/main >m2.branch && # # test_commit() creates commits, trees, tags, and blobs and leave @@ -134,8 +165,16 @@ test_expect_success 'setup repos' ' test_commit -C "$REPO_SRC" file7.txt && test_commit -C "$REPO_SRC" file8.txt && test_commit -C "$REPO_SRC" file9.txt && + git -C "$REPO_SRC" branch B3 && git -C "$REPO_SRC" rev-parse refs/heads/main >m3.branch && # + # Create some commits-and-trees-only packfiles for testing prefetch. + # Set arbitrary EPOCH times to make it easier to test fetch-since. + # + create_commits_and_trees_packfile 1000000000 B1 && + create_commits_and_trees_packfile 1100000000 B1..B2 && + create_commits_and_trees_packfile 1200000000 B2..B3 && + # # gvfs-helper.exe writes downloaded objects to a shared-cache directory # rather than the ODB inside the .git directory. # @@ -160,10 +199,10 @@ test_expect_success 'setup repos' ' EOF cat <<-EOF >creds.sh && #!/bin/sh - cat "$PWD"/creds.txt + cat "$(pwd)"/creds.txt EOF chmod 755 creds.sh && - git -C "$REPO_T1" config --local credential.helper "!f() { cat \"$PWD\"/creds.txt; }; f" && + git -C "$REPO_T1" config --local credential.helper "!f() { cat \"$(pwd)\"/creds.txt; }; f" && # # Create some test data sets. # @@ -333,6 +372,10 @@ verify_objects_in_shared_cache () { return 0 } +# gvfs-helper prints a "packfile " message for each received +# packfile to stdout. Verify that we received the expected number +# of packfiles. +# verify_received_packfile_count () { if test $# -eq 1 then @@ -351,6 +394,43 @@ verify_received_packfile_count () { return 0 } +# Verify that we have exactly 1 prefetch .keep file. +# Optionally, verify that it has the given timestamp. +# +verify_prefetch_keeps () { + count=$(( $(ls -1 "$SHARED_CACHE_T1"/pack/prefetch-*.keep | wc -l) )) + if test $count -ne 1 + then + echo "verify_prefetch_keep_file_count: found $count, expected 1." + return 1 + fi + + if test $# -eq 1 + then + count=$(( $(ls -1 "$SHARED_CACHE_T1"/pack/prefetch-$1-*.keep | wc -l) )) + if test $count -ne 1 + then + echo "verify_prefetch_keep_file_count: did not find expected keep file." + return 1 + fi + fi + + return 0 +} + +# Verify that the number of vfs- packfile present in the shared-cache +# matches our expectations. +# +verify_vfs_packfile_count () { + count=$(( $(ls -1 "$SHARED_CACHE_T1"/pack/vfs-*.pack | wc -l) )) + if test $count -ne $1 + then + echo "verify_vfs_packfile_count: expected $1; actual $count" + return 1 + fi + return 0 +} + per_test_cleanup () { stop_gvfs_protocol_server @@ -554,8 +634,8 @@ test_expect_success 'basic: POST-request a single blob' ' # Request a single commit via POST. Per the GVFS Protocol, the server # should implicitly send us a packfile containing the commit and the # trees it references. Confirm that properly handled the receipt of -# the packfile. (Here, we are testing that asking for a single object -# yields a packfile rather than a loose object.) +# the packfile. (Here, we are testing that asking for a single commit +# via POST yields a packfile rather than a loose object.) # # We DO NOT verify that the packfile contains commits/trees and no blobs # because our test helper doesn't implement the filtering. @@ -587,6 +667,110 @@ test_expect_success 'basic: POST-request a single commit' ' verify_connection_count 1 ' +test_expect_success 'basic: PREFETCH w/o arg gets all' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server && + + # Without a "since" argument gives us all "ct-*.pack" since the EPOCH + # because we do not have any prefetch packs locally. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + prefetch >OUT.output && + + # gvfs-helper prints a "packfile " message for each received + # packfile. + # + verify_received_packfile_count 3 && + verify_prefetch_keeps 1200000000 && + + stop_gvfs_protocol_server && + verify_connection_count 1 +' + +test_expect_success 'basic: PREFETCH w/ arg' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server && + + # Ask for cached packfiles NEWER THAN the given time. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + prefetch --since="1000000000" >OUT.output && + + # gvfs-helper prints a "packfile " message for each received + # packfile. + # + verify_received_packfile_count 2 && + verify_prefetch_keeps 1200000000 && + + stop_gvfs_protocol_server && + verify_connection_count 1 +' + +test_expect_success 'basic: PREFETCH mayhem no_prefetch_idx' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server_with_mayhem no_prefetch_idx && + + # Request prefetch packs, but tell server to not send any + # idx files and force gvfs-helper to compute them. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + prefetch --since="1000000000" >OUT.output && + + # gvfs-helper prints a "packfile " message for each received + # packfile. + # + verify_received_packfile_count 2 && + verify_prefetch_keeps 1200000000 && + + stop_gvfs_protocol_server && + verify_connection_count 1 +' + +test_expect_success 'basic: PREFETCH up-to-date' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server && + + # Ask for cached packfiles NEWER THAN the given time. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + prefetch --since="1000000000" >OUT.output && + + # gvfs-helper prints a "packfile " message for each received + # packfile. + # + verify_received_packfile_count 2 && + verify_prefetch_keeps 1200000000 && + + # Ask again for any packfiles newer than what we have cached locally. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + prefetch >OUT.output && + + # gvfs-helper prints a "packfile " message for each received + # packfile. + # + verify_received_packfile_count 0 && + verify_prefetch_keeps 1200000000 && + + stop_gvfs_protocol_server && + verify_connection_count 2 +' + ################################################################# # Tests to see how gvfs-helper responds to network problems. # @@ -960,7 +1144,7 @@ test_expect_success 'HTTP GET Auth on Cache Server' ' # magically fetched whenever required. ################################################################# -test_expect_success 'integration: explicit commit/trees, implicit blobs: log file' ' +test_expect_success 'integration: explicit commit/trees, implicit blobs: diff 2 commits' ' test_when_finished "per_test_cleanup" && start_gvfs_protocol_server && @@ -979,14 +1163,14 @@ test_expect_success 'integration: explicit commit/trees, implicit blobs: log fil # test_must_fail \ git -C "$REPO_T1" -c core.useGVFSHelper=false \ - log $(cat m3.brach) -- file9.txt \ + diff $(cat m1.branch)..$(cat m3.branch) \ >OUT.output 2>OUT.stderr && # Turn on gvfs-helper and retry. This should implicitly fetch # any needed blobs. # git -C "$REPO_T1" -c core.useGVFSHelper=true \ - log $(cat m3.branch) -- file9.txt \ + diff $(cat m1.branch)..$(cat m3.branch) \ >OUT.output 2>OUT.stderr && # Verify that gvfs-helper wrote the fetched the blobs to the @@ -994,57 +1178,116 @@ test_expect_success 'integration: explicit commit/trees, implicit blobs: log fil # turned off should succeed. # git -C "$REPO_T1" -c core.useGVFSHelper=false \ - log $(cat m3.branch) -- file9.txt \ + diff $(cat m1.branch)..$(cat m3.branch) \ >OUT.output 2>OUT.stderr ' -test_expect_success 'integration: explicit commit/trees, implicit blobs: diff 2 commits' ' +test_expect_success 'integration: fully implicit: diff 2 commits' ' test_when_finished "per_test_cleanup" && start_gvfs_protocol_server && - # We have a very empty repo. Seed it with all of the commits - # and trees. The purpose of this test is to demand-load the - # needed blobs only, so we prefetch the commits and trees. + # Implicitly demand-load everything without any pre-seeding. # + git -C "$REPO_T1" -c core.useGVFSHelper=true \ + diff $(cat m1.branch)..$(cat m3.branch) \ + >OUT.output 2>OUT.stderr +' + +################################################################# +# Duplicate packfile tests. +# +# If we request a fixed set of blobs, we should get a unique packfile +# of the form "vfs-.{pack,idx}". It we request that same set +# again, the server should create and send the exact same packfile. +# True web servers might build the custom packfile in random order, +# but our test web server should give us consistent results. +# +# Verify that we can handle the duplicate pack and idx file properly. +################################################################# + +test_expect_success 'duplicate: vfs- packfile' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server && + git -C "$REPO_T1" gvfs-helper \ --cache-server=disable \ --remote=origin \ - get \ - <"$OIDS_CT_FILE" >OUT.output && - - # Confirm that we do not have the blobs locally. - # With gvfs-helper turned off, we should fail. - # - test_must_fail \ - git -C "$REPO_T1" -c core.useGVFSHelper=false \ - diff $(cat m1.branch)..$(cat m3.branch) \ - >OUT.output 2>OUT.stderr && + --no-progress \ + post \ + <"$OIDS_BLOBS_FILE" >OUT.output 2>OUT.stderr && + verify_received_packfile_count 1 && + verify_vfs_packfile_count 1 && - # Turn on gvfs-helper and retry. This should implicitly fetch - # any needed blobs. + # Re-fetch the same packfile. We do not care if it replaces + # first one or if it silently fails to overwrite the existing + # one. We just confirm that afterwards we only have 1 packfile. # - git -C "$REPO_T1" -c core.useGVFSHelper=true \ - diff $(cat m1.branch)..$(cat m3.branch) \ - >OUT.output 2>OUT.stderr && + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + post \ + <"$OIDS_BLOBS_FILE" >OUT.output 2>OUT.stderr && + verify_received_packfile_count 1 && + verify_vfs_packfile_count 1 && - # Verify that gvfs-helper wrote the fetched the blobs to the - # local ODB, such that a second attempt with gvfs-helper - # turned off should succeed. - # - git -C "$REPO_T1" -c core.useGVFSHelper=false \ - diff $(cat m1.branch)..$(cat m3.branch) \ - >OUT.output 2>OUT.stderr + stop_gvfs_protocol_server ' -test_expect_success 'integration: fully implicit: diff 2 commits' ' +# Return the absolute pathname of the first received packfile. +# +first_received_packfile_pathname () { + fn=$(sed -n '/^packfile/p' OUT.output 2>OUT.stderr + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + post \ + <"$OIDS_BLOBS_FILE" \ + >OUT.output \ + 2>OUT.stderr && + verify_received_packfile_count 1 && + verify_vfs_packfile_count 1 && + + # Re-fetch the same packfile, but hold the existing packfile + # open for writing on an obscure (and randomly-chosen) file + # descriptor. + # + # This should cause the replacement-install to fail (at least + # on Windows) with an EBUSY or EPERM or something. + # + # Verify that that error is eaten. We do not care if the + # replacement is retried or if gvfs-helper simply discards the + # second instance. We just confirm that afterwards we only + # have 1 packfile on disk and that the command "lies" and reports + # that it created the existing packfile. (We want the lie because + # in normal usage, gh-client has already built the packed-git list + # in memory and is using gvfs-helper to fetch missing objects; + # gh-client does not care who does the fetch, but it needs to + # update its packed-git list and restart the object lookup.) + # + PACK=$(first_received_packfile_pathname) && + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + post \ + <"$OIDS_BLOBS_FILE" \ + >OUT.output \ + 2>OUT.stderr \ + 9>>"$PACK" && + verify_received_packfile_count 1 && + verify_vfs_packfile_count 1 && + + stop_gvfs_protocol_server ' #################################################################