From 58237c08cb2e707d7a9af6dfc8c612100f65f2a6 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Mon, 11 Nov 2019 15:09:31 -0500 Subject: [PATCH 01/10] gvfs-helper: add prefetch support Teach gvfs-helper to support "/gvfs/prefetch" REST API. This includes a new `gvfs-helper prefetch --since=` command line option. And a new `objects.prefetch` verb in `gvfs-helper server` mode. If `since` argument is omitted, `gvfs-helper` will search the local shared-cache for the most recent prefetch packfile and start from there. The is usually a seconds-since-epoch, but may also be a "friendly" date -- such as "midnight", "yesterday" and etc. using the existing date selection mechanism. Add `gh_client__prefetch()` API to allow `git.exe` to easily call prefetch (and using the same long-running process as immediate and queued object fetches). Expanded t5799 unit tests to include prefetch tests. Test setup now also builds some commits-and-trees packfiles for testing purposes with well-known timestamps. Expanded t/helper/test-gvfs-protocol.exe to support "/gvfs/prefetch" REST API. Massive refactor of existing packfile handling in gvfs-helper.c to reuse more code between "/gvfs/objects POST" and "/gvfs/prefetch". With this we now properly name packfiles with the checksum SHA1 rather than a date string. Refactor also addresses some of the confusing tempfile setup and install_ code processing (introduced to handle the ambiguity of how POST works with commit objects). Update 2023-05-22 (v2.41.0): add '--no-rev-index' to 'index-pack' to avoid writing the extra (unused) file. Signed-off-by: Jeff Hostetler --- gvfs-helper-client.c | 129 +++- gvfs-helper-client.h | 18 + gvfs-helper.c | 1178 +++++++++++++++++++++++++-------- t/helper/test-gvfs-protocol.c | 318 ++++++++- t/t5799-gvfs-helper.sh | 204 ++++-- 5 files changed, 1508 insertions(+), 339 deletions(-) diff --git a/gvfs-helper-client.c b/gvfs-helper-client.c index 8e5040e6c3932d..c44def7d912e8e 100644 --- a/gvfs-helper-client.c +++ b/gvfs-helper-client.c @@ -26,13 +26,14 @@ static struct hashmap gh_server__subprocess_map; static struct object_directory *gh_client__chosen_odb; /* - * The "objects" capability has 2 verbs: "get" and "post". + * The "objects" capability has verbs: "get" and "post" and "prefetch". */ #define CAP_OBJECTS (1u<<1) #define CAP_OBJECTS_NAME "objects" #define CAP_OBJECTS__VERB_GET1_NAME "get" #define CAP_OBJECTS__VERB_POST_NAME "post" +#define CAP_OBJECTS__VERB_PREFETCH_NAME "prefetch" static int gh_client__start_fn(struct subprocess_entry *subprocess) { @@ -131,6 +132,44 @@ static int gh_client__send__objects_get(struct child_process *process, return 0; } +/* + * Send a request to gvfs-helper to prefetch packfiles from either the + * cache-server or the main Git server using "/gvfs/prefetch". + * + * objects.prefetch LF + * [ LF] + * + */ +static int gh_client__send__objects_prefetch(struct child_process *process, + timestamp_t seconds_since_epoch) +{ + int err; + + /* + * We assume that all of the packet_ routines call error() + * so that we don't have to. + */ + + err = packet_write_fmt_gently( + process->in, + (CAP_OBJECTS_NAME "." CAP_OBJECTS__VERB_PREFETCH_NAME "\n")); + if (err) + return err; + + if (seconds_since_epoch) { + err = packet_write_fmt_gently(process->in, "%" PRItime "\n", + seconds_since_epoch); + if (err) + return err; + } + + err = packet_flush_gently(process->in); + if (err) + return err; + + return 0; +} + /* * Update the loose object cache to include the newly created * object. @@ -178,7 +217,7 @@ static void gh_client__update_packed_git(const char *line) } /* - * Both CAP_OBJECTS verbs return the same format response: + * CAP_OBJECTS verbs return the same format response: * * * * @@ -218,6 +257,8 @@ static int gh_client__objects__receive_response( const char *v1; char *line; int len; + int nr_loose = 0; + int nr_packfile = 0; int err = 0; while (1) { @@ -236,13 +277,13 @@ static int gh_client__objects__receive_response( else if (starts_with(line, "packfile")) { gh_client__update_packed_git(line); ghc |= GHC__CREATED__PACKFILE; - *p_nr_packfile += 1; + nr_packfile++; } else if (starts_with(line, "loose")) { gh_client__update_loose_cache(line); ghc |= GHC__CREATED__LOOSE; - *p_nr_loose += 1; + nr_loose++; } else if (starts_with(line, "ok")) @@ -256,6 +297,8 @@ static int gh_client__objects__receive_response( } *p_ghc = ghc; + *p_nr_loose = nr_loose; + *p_nr_packfile = nr_packfile; return err; } @@ -312,7 +355,7 @@ static struct gh_server__process *gh_client__find_long_running_process( /* * Find an existing long-running process with the above command * line -or- create a new long-running process for this and - * subsequent 'get' requests. + * subsequent requests. */ if (!gh_server__subprocess_map_initialized) { gh_server__subprocess_map_initialized = 1; @@ -349,10 +392,14 @@ static struct gh_server__process *gh_client__find_long_running_process( void gh_client__queue_oid(const struct object_id *oid) { - // TODO consider removing this trace2. it is useful for interactive - // TODO debugging, but may generate way too much noise for a data - // TODO event. - trace2_printf("gh_client__queue_oid: %s", oid_to_hex(oid)); + /* + * Keep this trace as a printf only, so that it goes to the + * perf log, but not the event log. It is useful for interactive + * debugging, but generates way too much (unuseful) noise for the + * database. + */ + if (trace2_is_enabled()) + trace2_printf("gh_client__queue_oid: %s", oid_to_hex(oid)); if (!oidset_insert(&gh_client__oidset_queued, oid)) gh_client__oidset_count++; @@ -433,10 +480,14 @@ int gh_client__get_immediate(const struct object_id *oid, int nr_packfile = 0; int err = 0; - // TODO consider removing this trace2. it is useful for interactive - // TODO debugging, but may generate way too much noise for a data - // TODO event. - trace2_printf("gh_client__get_immediate: %s", oid_to_hex(oid)); + /* + * Keep this trace as a printf only, so that it goes to the + * perf log, but not the event log. It is useful for interactive + * debugging, but generates way too much (unuseful) noise for the + * database. + */ + if (trace2_is_enabled()) + trace2_printf("gh_client__get_immediate: %s", oid_to_hex(oid)); entry = gh_client__find_long_running_process(CAP_OBJECTS); if (!entry) @@ -465,3 +516,55 @@ int gh_client__get_immediate(const struct object_id *oid, return err; } + +/* + * Ask gvfs-helper to prefetch commits-and-trees packfiles since a + * given timestamp. + * + * If seconds_since_epoch is zero, gvfs-helper will scan the ODB for + * the last received prefetch and ask for ones newer than that. + */ +int gh_client__prefetch(timestamp_t seconds_since_epoch, + int *nr_packfiles_received) +{ + struct gh_server__process *entry; + struct child_process *process; + enum gh_client__created ghc; + int nr_loose = 0; + int nr_packfile = 0; + int err = 0; + + entry = gh_client__find_long_running_process(CAP_OBJECTS); + if (!entry) + return -1; + + trace2_region_enter("gh-client", "objects/prefetch", the_repository); + trace2_data_intmax("gh-client", the_repository, "prefetch/since", + seconds_since_epoch); + + process = &entry->subprocess.process; + + sigchain_push(SIGPIPE, SIG_IGN); + + err = gh_client__send__objects_prefetch(process, seconds_since_epoch); + if (!err) + err = gh_client__objects__receive_response( + process, &ghc, &nr_loose, &nr_packfile); + + sigchain_pop(SIGPIPE); + + if (err) { + subprocess_stop(&gh_server__subprocess_map, + (struct subprocess_entry *)entry); + FREE_AND_NULL(entry); + } + + trace2_data_intmax("gh-client", the_repository, + "prefetch/packfile_count", nr_packfile); + trace2_region_leave("gh-client", "objects/prefetch", the_repository); + + if (nr_packfiles_received) + *nr_packfiles_received = nr_packfile; + + return err; +} diff --git a/gvfs-helper-client.h b/gvfs-helper-client.h index c1e38fad75f841..7692534ecda54c 100644 --- a/gvfs-helper-client.h +++ b/gvfs-helper-client.h @@ -66,4 +66,22 @@ void gh_client__queue_oid_array(const struct object_id *oids, int oid_nr); */ int gh_client__drain_queue(enum gh_client__created *p_ghc); +/* + * Ask `gvfs-helper server` to fetch any "prefetch packs" + * available on the server more recent than the requested time. + * + * If seconds_since_epoch is zero, gvfs-helper will scan the ODB for + * the last received prefetch and ask for ones newer than that. + * + * A long-running background process is used to subsequent requests + * (either prefetch or regular immediate/queued requests) more efficient. + * + * One or more packfiles will be created in the shared-cache ODB. + * + * Returns 0 on success, -1 on error. Optionally also returns the + * number of prefetch packs received. + */ +int gh_client__prefetch(timestamp_t seconds_since_epoch, + int *nr_packfiles_received); + #endif /* GVFS_HELPER_CLIENT_H */ diff --git a/gvfs-helper.c b/gvfs-helper.c index a776f95a42fbb6..ca37be28e4dd79 100644 --- a/gvfs-helper.c +++ b/gvfs-helper.c @@ -22,7 +22,7 @@ // // error := verify cache-server and abort if not well-known. // -// trust := do not verify cache-server. just use it. +// trust := do not verify cache-server. just use it, if set. // // disable := disable the cache-server and always use the main // Git server. @@ -87,6 +87,24 @@ // Number of retries after transient network errors. // Set to zero to disable such retries. // +// prefetch +// +// Use "/gvfs/prefetch" REST API to fetch 1 or more commits-and-trees +// prefetch packs from the server. +// +// : +// +// --since= // defaults to "0" +// +// Time in seconds since the epoch. If omitted or +// zero, the timestamp from the newest prefetch +// packfile found in the shared-cache ODB is used. +// (This is based upon the packfile name, not the +// mtime.) +// +// The GVFS Protocol defines this value as a way to +// request cached packfiles NEWER THAN this timestamp. +// // server // // Interactive/sub-process mode. Listen for a series of commands @@ -116,20 +134,36 @@ // // Each object will be created as a loose object in the ODB. // +// Create 1 or more loose objects in the shared-cache ODB. +// (The pathname of the selected ODB is reported at the +// beginning of the response; this should match the pathname +// given on the command line). +// +// git> objects.get +// git> +// git> +// git> ... +// git> +// git> 0000 +// +// git< odb +// git< loose +// git< loose +// git< ... +// git< loose +// git< ok | partial | error +// git< 0000 +// // Interactive verb: objects.post // // Fetch 1 or more objects, in bulk, using one or more // "/gvfs/objects" POST requests. // -// For both verbs, if a cache-server is configured, try it first. -// Optionally fallback to the main Git server. -// // Create 1 or more loose objects and/or packfiles in the -// shared-cache ODB. (The pathname of the selected ODB is -// reported at the beginning of the response; this should -// match the pathname given on the command line). +// shared-cache ODB. A POST is allowed to respond with +// either loose or packed objects. // -// git> objects.get | objects.post +// git> objects.post // git> // git> // git> ... @@ -139,11 +173,31 @@ // git< odb // git< loose | packfile // git< loose | packfile -// gid< ... +// git< ... // git< loose | packfile // git< ok | partial | error // git< 0000 // +// Interactive verb: object.prefetch +// +// Fetch 1 or more prefetch packs using a "/gvfs/prefetch" +// request. +// +// git> objects.prefetch +// git> // optional +// git> 0000 +// +// git< odb +// git< packfile +// git< packfile +// git< ... +// git< packfile +// git< ok | error +// git< 0000 +// +// If a cache-server is configured, try it first. +// Optionally fallback to the main Git server. +// // [1] Documentation/technical/protocol-common.txt // [2] Documentation/technical/long-running-process-protocol.txt // [3] See GIT_TRACE_PACKET @@ -183,11 +237,17 @@ #include "abspath.h" #include "progress.h" #include "trace2.h" +#include "wrapper.h" +#include "packfile.h" +#include "date.h" + +#define TR2_CAT "gvfs-helper" static const char * const main_usage[] = { N_("git gvfs-helper [] config []"), N_("git gvfs-helper [] get []"), N_("git gvfs-helper [] post []"), + N_("git gvfs-helper [] prefetch []"), N_("git gvfs-helper [] server []"), NULL }; @@ -202,6 +262,11 @@ static const char *const objects_post_usage[] = { NULL }; +static const char *const prefetch_usage[] = { + N_("git gvfs-helper [] prefetch []"), + NULL +}; + static const char *const server_usage[] = { N_("git gvfs-helper [] server []"), NULL @@ -246,6 +311,7 @@ enum gh__error_code { GH__ERROR_CODE__COULD_NOT_INSTALL_PACKFILE = 11, GH__ERROR_CODE__SUBPROCESS_SYNTAX = 12, GH__ERROR_CODE__INDEX_PACK_FAILED = 13, + GH__ERROR_CODE__COULD_NOT_INSTALL_PREFETCH = 14, }; enum gh__cache_server_mode { @@ -310,6 +376,8 @@ static const char *gh__server_type_label[GH__SERVER_TYPE__NR] = { }; enum gh__objects_mode { + GH__OBJECTS_MODE__NONE = 0, + /* * Bulk fetch objects. * @@ -329,6 +397,12 @@ enum gh__objects_mode { * object treatment). */ GH__OBJECTS_MODE__GET, + + /* + * Fetch one or more pre-computed "prefetch packs" containing + * commits and trees. + */ + GH__OBJECTS_MODE__PREFETCH, }; struct gh__azure_throttle @@ -403,6 +477,7 @@ struct gh__request_params { int b_write_to_file; /* write to file=1 or strbuf=0 */ int b_permit_cache_server_if_defined; + enum gh__objects_mode objects_mode; enum gh__server_type server_type; int k_attempt; /* robust retry attempt */ @@ -417,15 +492,8 @@ struct gh__request_params { struct strbuf *buffer; /* for response content when strbuf */ struct strbuf tr2_label; /* for trace2 regions */ - struct strbuf loose_path; struct object_id loose_oid; - struct strbuf temp_path_pack; - struct strbuf temp_path_idx; - struct strbuf final_path_pack; - struct strbuf final_path_idx; - struct strbuf final_packfile_filename; - /* * Note that I am putting all of the progress-related instance data * inside the request-params in the hope that we can eventually @@ -465,13 +533,7 @@ struct gh__request_params { .tempfile = NULL, \ .buffer = NULL, \ .tr2_label = STRBUF_INIT, \ - .loose_path = STRBUF_INIT, \ .loose_oid = {{0}}, \ - .temp_path_pack = STRBUF_INIT, \ - .temp_path_idx = STRBUF_INIT, \ - .final_path_pack = STRBUF_INIT, \ - .final_path_idx = STRBUF_INIT, \ - .final_packfile_filename = STRBUF_INIT, \ .progress_state = GH__PROGRESS_STATE__START, \ .progress_base_phase2_msg = STRBUF_INIT, \ .progress_base_phase3_msg = STRBUF_INIT, \ @@ -496,12 +558,6 @@ static void gh__request_params__release(struct gh__request_params *params) params->buffer = NULL; /* we do not own this */ strbuf_release(¶ms->tr2_label); - strbuf_release(¶ms->loose_path); - strbuf_release(¶ms->temp_path_pack); - strbuf_release(¶ms->temp_path_idx); - strbuf_release(¶ms->final_path_pack); - strbuf_release(¶ms->final_path_idx); - strbuf_release(¶ms->final_packfile_filename); strbuf_release(¶ms->progress_base_phase2_msg); strbuf_release(¶ms->progress_base_phase3_msg); @@ -592,9 +648,7 @@ static void gh__response_status__zero(struct gh__response_status *s) s->azure = NULL; } -static void install_packfile(struct gh__request_params *params, - struct gh__response_status *status); -static void install_loose(struct gh__request_params *params, +static void install_result(struct gh__request_params *params, struct gh__response_status *status); /* @@ -631,7 +685,7 @@ static void log_e2eid(struct gh__request_params *params, strbuf_addstr(&key, "e2eid"); strbuf_addstr(&key, gh__server_type_label[params->server_type]); - trace2_data_string("gvfs-helper", NULL, key.buf, + trace2_data_string(TR2_CAT, NULL, key.buf, params->e2eid.buf); strbuf_release(&key); @@ -731,7 +785,7 @@ static void compute_retry_mode_from_http_response( status->retry = GH__RETRY_MODE__HTTP_429; status->ec = GH__ERROR_CODE__HTTP_429; - trace2_data_string("gvfs-helper", NULL, "error/http", + trace2_data_string(TR2_CAT, NULL, "error/http", status->error_message.buf); return; @@ -744,7 +798,7 @@ static void compute_retry_mode_from_http_response( status->retry = GH__RETRY_MODE__HTTP_503; status->ec = GH__ERROR_CODE__HTTP_503; - trace2_data_string("gvfs-helper", NULL, "error/http", + trace2_data_string(TR2_CAT, NULL, "error/http", status->error_message.buf); return; @@ -758,7 +812,7 @@ static void compute_retry_mode_from_http_response( status->retry = GH__RETRY_MODE__HARD_FAIL; status->ec = GH__ERROR_CODE__HTTP_OTHER; - trace2_data_string("gvfs-helper", NULL, "error/http", + trace2_data_string(TR2_CAT, NULL, "error/http", status->error_message.buf); return; } @@ -892,7 +946,7 @@ static void compute_retry_mode_from_curl_error( status->retry = GH__RETRY_MODE__HARD_FAIL; status->ec = GH__ERROR_CODE__CURL_ERROR; - trace2_data_string("gvfs-helper", NULL, "error/curl", + trace2_data_string(TR2_CAT, NULL, "error/curl", status->error_message.buf); return; @@ -902,7 +956,7 @@ static void compute_retry_mode_from_curl_error( status->retry = GH__RETRY_MODE__TRANSIENT; status->ec = GH__ERROR_CODE__CURL_ERROR; - trace2_data_string("gvfs-helper", NULL, "error/curl", + trace2_data_string(TR2_CAT, NULL, "error/curl", status->error_message.buf); return; } @@ -1096,7 +1150,7 @@ static void gh__run_one_slot(struct active_request_slot *slot, params->progress_state = GH__PROGRESS_STATE__START; strbuf_setlen(¶ms->e2eid, 0); - trace2_region_enter("gvfs-helper", key.buf, NULL); + trace2_region_enter(TR2_CAT, key.buf, NULL); if (!start_active_slot(slot)) { compute_retry_mode_from_curl_error(status, @@ -1120,7 +1174,7 @@ static void gh__run_one_slot(struct active_request_slot *slot, * (such as when we request a commit). */ strbuf_addstr(&key, "/nr_bytes"); - trace2_data_intmax("gvfs-helper", NULL, + trace2_data_intmax(TR2_CAT, NULL, key.buf, status->bytes_received); strbuf_setlen(&key, old_len); @@ -1130,16 +1184,10 @@ static void gh__run_one_slot(struct active_request_slot *slot, if (params->progress) stop_progress(¶ms->progress); - if (status->ec == GH__ERROR_CODE__OK && params->b_write_to_file) { - if (params->b_is_post && - !strcmp(status->content_type.buf, - "application/x-git-packfile")) - install_packfile(params, status); - else - install_loose(params, status); - } + if (status->ec == GH__ERROR_CODE__OK && params->b_write_to_file) + install_result(params, status); - trace2_region_leave("gvfs-helper", key.buf, NULL); + trace2_region_leave(TR2_CAT, key.buf, NULL); strbuf_release(&key); } @@ -1286,7 +1334,7 @@ static void lookup_main_url(void) */ gh__global.main_url = transport_anonymize_url(gh__global.remote->url[0]); - trace2_data_string("gvfs-helper", NULL, "remote/url", gh__global.main_url); + trace2_data_string(TR2_CAT, NULL, "remote/url", gh__global.main_url); } static void do__http_get__gvfs_config(struct gh__response_status *status, @@ -1313,10 +1361,23 @@ static void select_cache_server(void) gh__global.cache_server_url = NULL; if (gh__cmd_opts.cache_server_mode == GH__CACHE_SERVER_MODE__DISABLE) { - trace2_data_string("gvfs-helper", NULL, "cache/url", "disabled"); + trace2_data_string(TR2_CAT, NULL, "cache/url", "disabled"); return; } + if (!gvfs_cache_server_url || !*gvfs_cache_server_url) { + switch (gh__cmd_opts.cache_server_mode) { + default: + case GH__CACHE_SERVER_MODE__TRUST_WITHOUT_VERIFY: + case GH__CACHE_SERVER_MODE__VERIFY_DISABLE: + trace2_data_string(TR2_CAT, NULL, "cache/url", "unset"); + return; + + case GH__CACHE_SERVER_MODE__VERIFY_ERROR: + die("cache-server not set"); + } + } + /* * If the cache-server and main Git server have the same URL, we * can silently disable the cache-server (by NOT setting the field @@ -1324,14 +1385,14 @@ static void select_cache_server(void) */ if (!strcmp(gvfs_cache_server_url, gh__global.main_url)) { gh__cmd_opts.try_fallback = 0; - trace2_data_string("gvfs-helper", NULL, "cache/url", "same"); + trace2_data_string(TR2_CAT, NULL, "cache/url", "same"); return; } if (gh__cmd_opts.cache_server_mode == GH__CACHE_SERVER_MODE__TRUST_WITHOUT_VERIFY) { gh__global.cache_server_url = gvfs_cache_server_url; - trace2_data_string("gvfs-helper", NULL, "cache/url", + trace2_data_string(TR2_CAT, NULL, "cache/url", gvfs_cache_server_url); return; } @@ -1372,7 +1433,7 @@ static void select_cache_server(void) if (match) { gh__global.cache_server_url = gvfs_cache_server_url; - trace2_data_string("gvfs-helper", NULL, "cache/url", + trace2_data_string(TR2_CAT, NULL, "cache/url", gvfs_cache_server_url); } @@ -1396,7 +1457,7 @@ static void select_cache_server(void) else warning("could not verify cache-server '%s'", gvfs_cache_server_url); - trace2_data_string("gvfs-helper", NULL, "cache/url", + trace2_data_string(TR2_CAT, NULL, "cache/url", "disabled"); } @@ -1582,27 +1643,14 @@ static void select_odb(void) } /* - * Create a tempfile to stream the packfile into. - * - * We create a tempfile in the chosen ODB directory and let CURL - * automatically stream data to the file. If successful, we can - * later rename it to a proper .pack and run "git index-pack" on - * it to create the corresponding .idx file. - * - * TODO I would rather to just stream the packfile directly into - * TODO "git index-pack --stdin" (and save some I/O) because it - * TODO will automatically take care of the rename of both files - * TODO and any other cleanup. BUT INDEX-PACK WILL ONLY WRITE - * TODO TO THE PRIMARY ODB -- it will not write into the alternates - * TODO (this is considered bad form). So we would need to add - * TODO an option to index-pack to handle this. I don't want to - * TODO deal with this issue right now. - * - * TODO Consider using lockfile for this rather than naked tempfile. + * Create a unique tempfile or tempfile-pair inside the + * tempPacks directory. */ -static void create_tempfile_for_packfile( - struct gh__request_params *params, - struct gh__response_status *status) +static void my_create_tempfile( + struct gh__response_status *status, + int b_fdopen, + const char *suffix1, struct tempfile **t1, + const char *suffix2, struct tempfile **t2) { static unsigned int nth = 0; static struct timeval tv = {0}; @@ -1612,15 +1660,15 @@ static void create_tempfile_for_packfile( struct strbuf basename = STRBUF_INIT; struct strbuf buf = STRBUF_INIT; - int len_p; + int len_tp; enum scld_error scld; gh__response_status__zero(status); if (!nth) { /* - * Create a string to use in the name of all packfiles - * created by this process. + * Create a unique string to use in the name of all + * tempfiles created by this process. */ gettimeofday(&tv, NULL); secs = tv.tv_sec; @@ -1633,84 +1681,114 @@ static void create_tempfile_for_packfile( } /* - * Create a for this packfile using a series number , - * so that all of the chunks we download will group together. + * Create a for this instance/pair using a series + * number . */ - strbuf_addf(&basename, "vfs-%s-%04d", date, nth++); + strbuf_addf(&basename, "t-%s-%04d", date, nth++); + + if (!suffix1 || !*suffix1) + suffix1 = "temp"; /* - * We will stream the data into a managed tempfile() in: + * Create full pathname as: * - * "/pack/tempPacks/vfs--.temp" + * "/pack/tempPacks/." */ strbuf_setlen(&buf, 0); strbuf_addbuf(&buf, &gh__global.buf_odb_path); strbuf_complete(&buf, '/'); - strbuf_addstr(&buf, "pack/"); - len_p = buf.len; - strbuf_addstr(&buf, "tempPacks/"); - strbuf_addbuf(&buf, &basename); - strbuf_addstr(&buf, ".temp"); + strbuf_addstr(&buf, "pack/tempPacks/"); + len_tp = buf.len; + strbuf_addf( &buf, "%s.%s", basename.buf, suffix1); scld = safe_create_leading_directories(buf.buf); if (scld != SCLD_OK && scld != SCLD_EXISTS) { strbuf_addf(&status->error_message, - "could not create directory for packfile: '%s'", + "could not create directory for tempfile: '%s'", buf.buf); status->ec = GH__ERROR_CODE__COULD_NOT_CREATE_TEMPFILE; goto cleanup; } - params->tempfile = create_tempfile(buf.buf); - if (!params->tempfile) { + *t1 = create_tempfile(buf.buf); + if (!*t1) { strbuf_addf(&status->error_message, - "could not create tempfile for packfile: '%s'", + "could not create tempfile: '%s'", buf.buf); status->ec = GH__ERROR_CODE__COULD_NOT_CREATE_TEMPFILE; goto cleanup; } - - fdopen_tempfile(params->tempfile, "w"); - - /* - * After the download is complete, we will need to steal the file - * from the tempfile() class (so that it doesn't magically delete - * it when we close the file handle) and then index it. - * - * We do this into the tempPacks directory to avoid contaminating - * the real pack directory until we know there is no corruption. - * - * "/pack/tempPacks/vfs--.temp.pack" - * "/pack/tempPacks/vfs--.temp.idx" - */ - strbuf_setlen(¶ms->temp_path_pack, 0); - strbuf_addf(¶ms->temp_path_pack, "%s.pack", buf.buf); - - strbuf_setlen(¶ms->temp_path_idx, 0); - strbuf_addf(¶ms->temp_path_idx, "%s.idx", buf.buf); + if (b_fdopen) + fdopen_tempfile(*t1, "w"); /* - * Later, if all goes well, we will install them as: + * Optionally create a peer tempfile with the same basename. + * (This is useful for prefetching .pack and .idx pairs.) * - * "/pack/vfs--.pack" - * "/pack/vfs--.idx" + * "/pack/tempPacks/." */ - strbuf_setlen(&buf, len_p); - strbuf_setlen(¶ms->final_path_pack, 0); - strbuf_addf(¶ms->final_path_pack, "%s%s.pack", - buf.buf, basename.buf); - strbuf_setlen(¶ms->final_path_idx, 0); - strbuf_addf(¶ms->final_path_idx, "%s%s.idx", - buf.buf, basename.buf); - strbuf_setlen(¶ms->final_packfile_filename, 0); - strbuf_addf(¶ms->final_packfile_filename, "%s.pack", - basename.buf); + if (suffix2 && *suffix2 && t2) { + strbuf_setlen(&buf, len_tp); + strbuf_addf( &buf, "%s.%s", basename.buf, suffix2); + + *t2 = create_tempfile(buf.buf); + if (!*t2) { + strbuf_addf(&status->error_message, + "could not create tempfile: '%s'", + buf.buf); + status->ec = GH__ERROR_CODE__COULD_NOT_CREATE_TEMPFILE; + goto cleanup; + } + if (b_fdopen) + fdopen_tempfile(*t2, "w"); + } cleanup: strbuf_release(&buf); strbuf_release(&basename); } +/* + * Create pathnames to the final location of the .pack and .idx + * files in the ODB. These are of the form: + * + * "/pack/-[-]." + * + * For example, for prefetch packs, will be the epoch + * timestamp and will be the packfile hash. + */ +static void create_final_packfile_pathnames( + const char *term_1, const char *term_2, const char *term_3, + struct strbuf *pack_path, struct strbuf *idx_path, + struct strbuf *pack_filename) +{ + struct strbuf base = STRBUF_INIT; + struct strbuf path = STRBUF_INIT; + + if (term_3 && *term_3) + strbuf_addf(&base, "%s-%s-%s", term_1, term_2, term_3); + else + strbuf_addf(&base, "%s-%s", term_1, term_2); + + strbuf_setlen(pack_filename, 0); + strbuf_addf( pack_filename, "%s.pack", base.buf); + + strbuf_addbuf(&path, &gh__global.buf_odb_path); + strbuf_complete(&path, '/'); + strbuf_addstr(&path, "pack/"); + + strbuf_setlen(pack_path, 0); + strbuf_addbuf(pack_path, &path); + strbuf_addf( pack_path, "%s.pack", base.buf); + + strbuf_setlen(idx_path, 0); + strbuf_addbuf(idx_path, &path); + strbuf_addf( idx_path, "%s.idx", base.buf); + + strbuf_release(&base); + strbuf_release(&path); +} + /* * Create a pathname to the loose object in the shared-cache ODB * with the given OID. Try to "mkdir -p" to ensure the parent @@ -1738,54 +1816,98 @@ static int create_loose_pathname_in_odb(struct strbuf *buf_path, return 0; } -/* - * Create a tempfile to stream a loose object into. - * - * We create a tempfile in the chosen ODB directory and let CURL - * automatically stream data to the file. - * - * We put it directly in the "/xx/" directory. - */ -static void create_tempfile_for_loose( - struct gh__request_params *params, - struct gh__response_status *status) +static void my_run_index_pack(struct gh__request_params *params, + struct gh__response_status *status, + const struct strbuf *temp_path_pack, + const struct strbuf *temp_path_idx, + struct strbuf *packfile_checksum) { - static int nth = 0; - struct strbuf buf_path = STRBUF_INIT; + struct child_process ip = CHILD_PROCESS_INIT; + struct strbuf ip_stdout = STRBUF_INIT; - gh__response_status__zero(status); + strvec_push(&ip.args, "git"); + strvec_push(&ip.args, "index-pack"); - if (create_loose_pathname_in_odb(&buf_path, ¶ms->loose_oid)) { - strbuf_addf(&status->error_message, - "cannot create directory for loose object '%s'", - buf_path.buf); - status->ec = GH__ERROR_CODE__COULD_NOT_CREATE_TEMPFILE; - goto cleanup; + if (gh__cmd_opts.show_progress) { + strvec_push(&ip.args, "-v"); + ip.err = 0; + } else { + ip.err = -1; + ip.no_stderr = 1; } - /* Remember the full path of the final destination. */ - strbuf_setlen(¶ms->loose_path, 0); - strbuf_addbuf(¶ms->loose_path, &buf_path); + /* Skip generating the rev index, we don't need it. */ + strvec_push(&ip.args, "--no-rev-index"); - /* - * Build a unique tempfile pathname based upon it. We avoid - * using lockfiles to avoid issues with stale locks after - * crashes. - */ - strbuf_addf(&buf_path, ".%08u.%.06u.temp", getpid(), nth++); + strvec_pushl(&ip.args, "-o", temp_path_idx->buf, NULL); + strvec_push(&ip.args, temp_path_pack->buf); + ip.no_stdin = 1; + ip.out = -1; - params->tempfile = create_tempfile(buf_path.buf); - if (!params->tempfile) { - strbuf_addstr(&status->error_message, - "could not create tempfile for loose object"); - status->ec = GH__ERROR_CODE__COULD_NOT_CREATE_TEMPFILE; + if (pipe_command(&ip, NULL, 0, &ip_stdout, 0, NULL, 0)) { + unlink(temp_path_pack->buf); + unlink(temp_path_idx->buf); + strbuf_addf(&status->error_message, + "index-pack failed on '%s'", + temp_path_pack->buf); + /* + * Lets assume that index-pack failed because the + * downloaded file is corrupt (truncated). + * + * Retry it as if the network had dropped. + */ + status->retry = GH__RETRY_MODE__TRANSIENT; + status->ec = GH__ERROR_CODE__INDEX_PACK_FAILED; goto cleanup; } - fdopen_tempfile(params->tempfile, "w"); + if (packfile_checksum) { + /* + * stdout from index-pack should have the packfile hash. + * Extract it and use it in the final packfile name. + * + * TODO What kind of validation should we do on the + * TODO string and is there ever any other output besides + * TODO just the checksum ? + */ + strbuf_trim_trailing_newline(&ip_stdout); + + strbuf_addbuf(packfile_checksum, &ip_stdout); + } cleanup: - strbuf_release(&buf_path); + strbuf_release(&ip_stdout); + child_process_clear(&ip); +} + +static void my_finalize_packfile(struct gh__request_params *params, + struct gh__response_status *status, + const struct strbuf *temp_path_pack, + const struct strbuf *temp_path_idx, + struct strbuf *final_path_pack, + struct strbuf *final_path_idx, + struct strbuf *final_filename) +{ + if (finalize_object_file(temp_path_pack->buf, final_path_pack->buf) || + finalize_object_file(temp_path_idx->buf, final_path_idx->buf)) { + unlink(temp_path_pack->buf); + unlink(temp_path_idx->buf); + unlink(final_path_pack->buf); + unlink(final_path_idx->buf); + strbuf_addf(&status->error_message, + "could not install packfile '%s'", + final_path_pack->buf); + status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PACKFILE; + return; + } + + if (params->result_list) { + struct strbuf result_msg = STRBUF_INIT; + + strbuf_addf(&result_msg, "packfile %s", final_filename->buf); + string_list_append(params->result_list, result_msg.buf); + strbuf_release(&result_msg); + } } /* @@ -1795,93 +1917,337 @@ static void create_tempfile_for_loose( static void install_packfile(struct gh__request_params *params, struct gh__response_status *status) { - struct child_process ip = CHILD_PROCESS_INIT; + struct strbuf temp_path_pack = STRBUF_INIT; + struct strbuf temp_path_idx = STRBUF_INIT; + struct strbuf packfile_checksum = STRBUF_INIT; + struct strbuf final_path_pack = STRBUF_INIT; + struct strbuf final_path_idx = STRBUF_INIT; + struct strbuf final_filename = STRBUF_INIT; + + gh__response_status__zero(status); /* - * When we request more than 1 object, the server should always - * send us a packfile. + * After the download is complete, we will need to steal the file + * from the tempfile() class (so that it doesn't magically delete + * it when we close the file handle) and then index it. */ - if (strcmp(status->content_type.buf, - "application/x-git-packfile")) { - strbuf_addf(&status->error_message, - "install_packfile: received unknown content-type '%s'", - status->content_type.buf); - status->ec = GH__ERROR_CODE__UNEXPECTED_CONTENT_TYPE; - goto cleanup; - } - - gh__response_status__zero(status); + strbuf_addf(&temp_path_pack, "%s.pack", + get_tempfile_path(params->tempfile)); + strbuf_addf(&temp_path_idx, "%s.idx", + get_tempfile_path(params->tempfile)); if (rename_tempfile(¶ms->tempfile, - params->temp_path_pack.buf) == -1) { + temp_path_pack.buf) == -1) { strbuf_addf(&status->error_message, "could not rename packfile to '%s'", - params->temp_path_pack.buf); + temp_path_pack.buf); status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PACKFILE; goto cleanup; } - strvec_push(&ip.args, "index-pack"); - if (gh__cmd_opts.show_progress) - strvec_push(&ip.args, "-v"); - strvec_pushl(&ip.args, "-o", params->temp_path_idx.buf, NULL); - strvec_push(&ip.args, params->temp_path_pack.buf); - ip.git_cmd = 1; - ip.no_stdin = 1; - ip.no_stdout = 1; + my_run_index_pack(params, status, &temp_path_pack, &temp_path_idx, + &packfile_checksum); + if (status->ec != GH__ERROR_CODE__OK) + goto cleanup; + + create_final_packfile_pathnames("vfs", packfile_checksum.buf, NULL, + &final_path_pack, &final_path_idx, + &final_filename); + my_finalize_packfile(params, status, + &temp_path_pack, &temp_path_idx, + &final_path_pack, &final_path_idx, + &final_filename); + +cleanup: + strbuf_release(&temp_path_pack); + strbuf_release(&temp_path_idx); + strbuf_release(&packfile_checksum); + strbuf_release(&final_path_pack); + strbuf_release(&final_path_idx); + strbuf_release(&final_filename); +} + +/* + * bswap.h only defines big endian functions. + * The GVFS Protocol defines fields in little endian. + */ +static inline uint64_t my_get_le64(uint64_t le_val) +{ +#if GIT_BYTE_ORDER == GIT_LITTLE_ENDIAN + return le_val; +#else + return default_bswap64(le_val); +#endif +} + +#define MY_MIN(x,y) (((x) < (y)) ? (x) : (y)) +#define MY_MAX(x,y) (((x) > (y)) ? (x) : (y)) + +/* + * Copy the `nr_bytes_total` from `fd_in` to `fd_out`. + * + * This could be used to extract a single packfile from + * a multipart file, for example. + */ +static int my_copy_fd_len(int fd_in, int fd_out, ssize_t nr_bytes_total) +{ + char buffer[8192]; + + while (nr_bytes_total > 0) { + ssize_t len_to_read = MY_MIN(nr_bytes_total, sizeof(buffer)); + ssize_t nr_read = xread(fd_in, buffer, len_to_read); + + if (!nr_read) + break; + if (nr_read < 0) + return -1; + + if (write_in_full(fd_out, buffer, nr_read) < 0) + return -1; + + nr_bytes_total -= nr_read; + } + + return 0; +} + +/* + * Copy the `nr_bytes_total` from `fd_in` to `fd_out` AND save the + * final `tail_len` bytes in the given buffer. + * + * This could be used to extract a single packfile from + * a multipart file and read the final SHA into the buffer. + */ +static int my_copy_fd_len_tail(int fd_in, int fd_out, ssize_t nr_bytes_total, + unsigned char *buf_tail, ssize_t tail_len) +{ + memset(buf_tail, 0, tail_len); + + if (nr_bytes_total < tail_len) + return my_copy_fd_len(fd_in, fd_out, nr_bytes_total); + + if (my_copy_fd_len(fd_in, fd_out, (nr_bytes_total - tail_len)) < 0) + return -1; + + if (xread(fd_in, (char *)buf_tail, tail_len) != tail_len) + return -1; + + if (write_in_full(fd_out, buf_tail, tail_len) < 0) + return -1; + + return 0; +} + +/* + * See the protocol document for the per-packfile header. + */ +struct ph { + uint64_t timestamp; + uint64_t pack_len; + uint64_t idx_len; +}; + +/* + * Extract the next packfile from the multipack. + */ +static void extract_packfile_from_multipack( + struct gh__request_params *params, + struct gh__response_status *status, + int fd_multipack, + unsigned short k) +{ + struct ph ph; + struct tempfile *tempfile_pack = NULL; + struct tempfile *tempfile_idx = NULL; + int result = -1; + int b_no_idx_in_multipack; + struct object_id packfile_checksum; + char hex_checksum[GIT_MAX_HEXSZ + 1]; + struct strbuf buf_timestamp = STRBUF_INIT; + struct strbuf temp_path_pack = STRBUF_INIT; + struct strbuf temp_path_idx = STRBUF_INIT; + struct strbuf final_path_pack = STRBUF_INIT; + struct strbuf final_path_idx = STRBUF_INIT; + struct strbuf final_filename = STRBUF_INIT; + + if (xread(fd_multipack, &ph, sizeof(ph)) != sizeof(ph)) { + strbuf_addf(&status->error_message, + "could not read header for packfile[%d] in multipack", + k); + status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PREFETCH; + goto done; + } + + ph.timestamp = my_get_le64(ph.timestamp); + ph.pack_len = my_get_le64(ph.pack_len); + ph.idx_len = my_get_le64(ph.idx_len); + + if (!ph.pack_len) { + strbuf_addf(&status->error_message, + "packfile[%d]: zero length packfile?", k); + status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PREFETCH; + goto done; + } + + b_no_idx_in_multipack = (ph.idx_len == maximum_unsigned_value_of_type(uint64_t) || + ph.idx_len == 0); + + if (b_no_idx_in_multipack) { + my_create_tempfile(status, 0, "pack", &tempfile_pack, NULL, NULL); + if (!tempfile_pack) + goto done; + } else { + /* create a pair of tempfiles with the same basename */ + my_create_tempfile(status, 0, "pack", &tempfile_pack, "idx", &tempfile_idx); + if (!tempfile_pack || !tempfile_idx) + goto done; + } /* - * Note that I DO NOT have a trace2 region around the - * index-pack process by itself. Currently, we are inside the - * trace2 region for running the request and that's fine. - * Later, if/when we stream the download directly to - * index-pack, it will be inside under the same region anyway. - * So, I'm not going to introduce it here. + * Copy the current packfile from the open stream and capture + * the checksum. + * + * TODO This assumes that the checksum is SHA1. Fix this if/when + * TODO Git converts to SHA256. */ - if (run_command(&ip)) { - unlink(params->temp_path_pack.buf); - unlink(params->temp_path_idx.buf); + result = my_copy_fd_len_tail(fd_multipack, + get_tempfile_fd(tempfile_pack), + ph.pack_len, + packfile_checksum.hash, + GIT_SHA1_RAWSZ); + packfile_checksum.algo = GIT_HASH_SHA1; + + if (result < 0){ strbuf_addf(&status->error_message, - "index-pack failed on '%s'", - params->temp_path_pack.buf); + "could not extract packfile[%d] from multipack", + k); + goto done; + } + strbuf_addstr(&temp_path_pack, get_tempfile_path(tempfile_pack)); + close_tempfile_gently(tempfile_pack); + + oid_to_hex_r(hex_checksum, &packfile_checksum); + + if (b_no_idx_in_multipack) { /* - * Lets assume that index-pack failed because the - * downloaded file is corrupt (truncated). - * - * Retry it as if the network had dropped. + * The server did not send the corresponding .idx, so + * we have to compute it ourselves. */ - status->retry = GH__RETRY_MODE__TRANSIENT; - status->ec = GH__ERROR_CODE__INDEX_PACK_FAILED; + strbuf_addbuf(&temp_path_idx, &temp_path_pack); + strbuf_strip_suffix(&temp_path_idx, ".pack"); + strbuf_addstr(&temp_path_idx, ".idx"); + + my_run_index_pack(params, status, + &temp_path_pack, &temp_path_idx, + NULL); + if (status->ec != GH__ERROR_CODE__OK) + goto done; + + } else { + /* + * Server send the .idx immediately after the .pack in the + * data stream. I'm tempted to verify it, but that defeats + * the purpose of having it cached... + */ + if (my_copy_fd_len(fd_multipack, get_tempfile_fd(tempfile_idx), + ph.idx_len) < 0) { + strbuf_addf(&status->error_message, + "could not extract index[%d] in multipack", + k); + status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PREFETCH; + goto done; + } + + strbuf_addstr(&temp_path_idx, get_tempfile_path(tempfile_idx)); + close_tempfile_gently(tempfile_idx); + } + + strbuf_addf(&buf_timestamp, "%u", (unsigned int)ph.timestamp); + create_final_packfile_pathnames("prefetch", buf_timestamp.buf, hex_checksum, + &final_path_pack, &final_path_idx, + &final_filename); + + my_finalize_packfile(params, status, + &temp_path_pack, &temp_path_idx, + &final_path_pack, &final_path_idx, + &final_filename); + +done: + delete_tempfile(&tempfile_pack); + delete_tempfile(&tempfile_idx); + strbuf_release(&temp_path_pack); + strbuf_release(&temp_path_idx); + strbuf_release(&final_path_pack); + strbuf_release(&final_path_idx); + strbuf_release(&final_filename); +} + +/* + * Cut apart the received multipart response into individual packfiles + * and install each one. + */ +static void install_prefetch(struct gh__request_params *params, + struct gh__response_status *status) +{ + static unsigned char v1_h[6] = { 'G', 'P', 'R', 'E', ' ', 0x01 }; + + struct mh { + unsigned char h[6]; + unsigned char np[2]; + }; + + struct mh mh; + unsigned short np; + unsigned short k; + int fd = -1; + + struct strbuf temp_path_mp = STRBUF_INIT; + + /* + * Steal the multi-part file from the tempfile class. + */ + strbuf_addf(&temp_path_mp, "%s.mp", get_tempfile_path(params->tempfile)); + if (rename_tempfile(¶ms->tempfile, temp_path_mp.buf) == -1) { + strbuf_addf(&status->error_message, + "could not rename prefetch tempfile to '%s'", + temp_path_mp.buf); + status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PREFETCH; goto cleanup; } - if (finalize_object_file(params->temp_path_pack.buf, - params->final_path_pack.buf) || - finalize_object_file(params->temp_path_idx.buf, - params->final_path_idx.buf)) { - unlink(params->temp_path_pack.buf); - unlink(params->temp_path_idx.buf); - unlink(params->final_path_pack.buf); - unlink(params->final_path_idx.buf); + fd = git_open_cloexec(temp_path_mp.buf, O_RDONLY); + if (fd == -1) { strbuf_addf(&status->error_message, - "could not install packfile '%s'", - params->final_path_pack.buf); - status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PACKFILE; + "could not reopen prefetch tempfile '%s'", + temp_path_mp.buf); + status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PREFETCH; goto cleanup; } + if ((xread(fd, &mh, sizeof(mh)) != sizeof(mh)) || + (memcmp(mh.h, &v1_h, sizeof(mh.h)))) { + strbuf_addstr(&status->error_message, + "invalid prefetch multipart header"); + goto cleanup; + } - if (params->result_list) { - struct strbuf result_msg = STRBUF_INIT; + np = (unsigned short)mh.np[0] + ((unsigned short)mh.np[1] << 8); + if (np) + trace2_data_intmax(TR2_CAT, NULL, + "prefetch/packfile_count", np); - strbuf_addf(&result_msg, "packfile %s", - params->final_packfile_filename.buf); - string_list_append(params->result_list, result_msg.buf); - strbuf_release(&result_msg); + for (k = 0; k < np; k++) { + extract_packfile_from_multipack(params, status, fd, k); + if (status->ec != GH__ERROR_CODE__OK) + break; } cleanup: - child_process_clear(&ip); + if (fd != -1) + close(fd); + + unlink(temp_path_mp.buf); + strbuf_release(&temp_path_mp); } /* @@ -1918,21 +2284,7 @@ static void install_loose(struct gh__request_params *params, struct gh__response_status *status) { struct strbuf tmp_path = STRBUF_INIT; - - /* - * We expect a loose object when we do a GET -or- when we - * do a POST with only 1 object. - * - * Note that this content type is singular, not plural. - */ - if (strcmp(status->content_type.buf, - "application/x-git-loose-object")) { - strbuf_addf(&status->error_message, - "install_loose: received unknown content-type '%s'", - status->content_type.buf); - status->ec = GH__ERROR_CODE__UNEXPECTED_CONTENT_TYPE; - return; - } + struct strbuf loose_path = STRBUF_INIT; gh__response_status__zero(status); @@ -1970,11 +2322,19 @@ static void install_loose(struct gh__request_params *params, * collision we have to assume something else is happening in * parallel and we lost the race. And that's OK. */ - if (finalize_object_file(tmp_path.buf, params->loose_path.buf)) { + if (create_loose_pathname_in_odb(&loose_path, ¶ms->loose_oid)) { + strbuf_addf(&status->error_message, + "cannot create directory for loose object '%s'", + loose_path.buf); + status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_LOOSE; + goto cleanup; + } + + if (finalize_object_file(tmp_path.buf, loose_path.buf)) { unlink(tmp_path.buf); strbuf_addf(&status->error_message, "could not install loose object '%s'", - params->loose_path.buf); + loose_path.buf); status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_LOOSE; goto cleanup; } @@ -1990,6 +2350,57 @@ static void install_loose(struct gh__request_params *params, cleanup: strbuf_release(&tmp_path); + strbuf_release(&loose_path); +} + +static void install_result(struct gh__request_params *params, + struct gh__response_status *status) +{ + if (params->objects_mode == GH__OBJECTS_MODE__PREFETCH) { + /* + * The "gvfs/prefetch" API is the only thing that sends + * these multi-part packfiles. According to the protocol + * documentation, they will have this x- content type. + * + * However, it appears that there is a BUG in the origin + * server causing it to sometimes send "text/html" instead. + * So, we silently handle both. + */ + if (!strcmp(status->content_type.buf, + "application/x-gvfs-timestamped-packfiles-indexes")) { + install_prefetch(params, status); + return; + } + + if (!strcmp(status->content_type.buf, "text/html")) { + install_prefetch(params, status); + return; + } + } + + if (!strcmp(status->content_type.buf, "application/x-git-packfile")) { + assert(params->b_is_post); + assert(params->objects_mode == GH__OBJECTS_MODE__POST); + + install_packfile(params, status); + return; + } + + if (!strcmp(status->content_type.buf, + "application/x-git-loose-object")) { + /* + * We get these for "gvfs/objects" GET and POST requests. + * + * Note that this content type is singular, not plural. + */ + install_loose(params, status); + return; + } + + strbuf_addf(&status->error_message, + "install_result: received unknown content-type '%s'", + status->content_type.buf); + status->ec = GH__ERROR_CODE__UNEXPECTED_CONTENT_TYPE; } /* @@ -2055,7 +2466,7 @@ static size_t parse_resp_hdr(char *buffer, size_t size, size_t nitems, * Other servers have similar sets of values, but I haven't * compared them in depth. */ - // trace2_printf("Throttle: %s %s", key.buf, val.buf); + // trace2_printf("%s: Throttle: %s %s", TR2_CAT, key.buf, val.buf); if (!strcmp(key.buf, "X-RateLimit-Resource")) { /* @@ -2066,7 +2477,7 @@ static size_t parse_resp_hdr(char *buffer, size_t size, size_t nitems, strbuf_addstr(&key, "ratelimit/resource"); strbuf_addstr(&key, gh__server_type_label[params->server_type]); - trace2_data_string("gvfs-helper", NULL, key.buf, val.buf); + trace2_data_string(TR2_CAT, NULL, key.buf, val.buf); } else if (!strcmp(key.buf, "X-RateLimit-Delay")) { @@ -2082,7 +2493,7 @@ static size_t parse_resp_hdr(char *buffer, size_t size, size_t nitems, git_parse_ulong(val.buf, &tarpit_delay_ms); - trace2_data_intmax("gvfs-helper", NULL, key.buf, tarpit_delay_ms); + trace2_data_intmax(TR2_CAT, NULL, key.buf, tarpit_delay_ms); } else if (!strcmp(key.buf, "X-RateLimit-Limit")) { @@ -2180,7 +2591,7 @@ static void do_throttle_spin(struct gh__request_params *params, strbuf_addstr(®ion, tr2_label); strbuf_addstr(®ion, gh__server_type_label[params->server_type]); - trace2_region_enter("gvfs-helper", region.buf, NULL); + trace2_region_enter(TR2_CAT, region.buf, NULL); if (gh__cmd_opts.show_progress) progress = start_progress(progress_msg, duration); @@ -2196,7 +2607,7 @@ static void do_throttle_spin(struct gh__request_params *params, display_progress(progress, duration); stop_progress(&progress); - trace2_region_leave("gvfs-helper", region.buf, NULL); + trace2_region_leave(TR2_CAT, region.buf, NULL); strbuf_release(®ion); } @@ -2358,11 +2769,7 @@ static void do_req(const char *url_base, if (params->tempfile) delete_tempfile(¶ms->tempfile); - if (params->b_is_post) - create_tempfile_for_packfile(params, status); - - create_tempfile_for_loose(params, status); - + my_create_tempfile(status, 1, NULL, ¶ms->tempfile, NULL, NULL); if (!params->tempfile || status->ec != GH__ERROR_CODE__OK) return; } else { @@ -2608,6 +3015,7 @@ static void do__http_get__gvfs_config(struct gh__response_status *status, /* cache-servers do not handle gvfs/config REST calls */ params.b_permit_cache_server_if_defined = 0; params.buffer = config_data; + params.objects_mode = GH__OBJECTS_MODE__NONE; params.object_count = 1; /* a bit of a lie */ @@ -2672,6 +3080,7 @@ static void do__http_get__gvfs_object(struct gh__response_status *status, params.b_is_post = 0; params.b_write_to_file = 1; params.b_permit_cache_server_if_defined = 1; + params.objects_mode = GH__OBJECTS_MODE__GET; params.object_count = 1; @@ -2728,6 +3137,7 @@ static void do__http_post__gvfs_objects(struct gh__response_status *status, params.b_is_post = 1; params.b_write_to_file = 1; params.b_permit_cache_server_if_defined = 1; + params.objects_mode = GH__OBJECTS_MODE__POST; params.post_payload = &jw_req.json; @@ -2764,6 +3174,126 @@ static void do__http_post__gvfs_objects(struct gh__response_status *status, jw_release(&jw_req); } +struct find_last_data { + timestamp_t timestamp; + int nr_files; +}; + +static void cb_find_last(const char *full_path, size_t full_path_len, + const char *file_path, void *void_data) +{ + struct find_last_data *data = void_data; + const char *val; + timestamp_t t; + + if (!skip_prefix(file_path, "prefetch-", &val)) + return; + if (!ends_with(val, ".pack")) + return; + + data->nr_files++; + + /* + * We expect prefetch packfiles named like: + * + * prefetch--.pack + */ + t = strtol(val, NULL, 10); + + data->timestamp = MY_MAX(t, data->timestamp); +} + +/* + * Find the server timestamp on the last prefetch packfile that + * we have in the ODB. + * + * TODO I'm going to assume that all prefetch packs are created + * TODO equal and take the one with the largest t value. + * TODO + * TODO Or should we look for one marked with .keep ? + * + * TODO Alternatively, should we maybe get the 2nd largest? + * TODO (Or maybe subtract an hour delta from the largest?) + * TODO + * TODO Since each cache-server maintains its own set of prefetch + * TODO packs (such that 2 requests may hit 2 different + * TODO load-balanced servers and get different answers (with or + * TODO without clock-skew issues)), is it possible for us to miss + * TODO the absolute fringe of new commits and trees? + * TODO + * TODO That is, since the cache-server generates hourly prefetch + * TODO packs, we could do a prefetch and be up-to-date, but then + * TODO do the main fetch and hit a different cache/main server + * TODO and be behind by as much as an hour and have to demand- + * TODO load the commits/trees. + * + * TODO Alternatively, should we compare the last timestamp found + * TODO with "now" and silently do nothing if within an epsilon? + */ +static void find_last_prefetch_timestamp(timestamp_t *last) +{ + struct find_last_data data; + + memset(&data, 0, sizeof(data)); + + for_each_file_in_pack_dir(gh__global.buf_odb_path.buf, cb_find_last, &data); + + *last = data.timestamp; +} + +/* + * Call "gvfs/prefetch[?lastPackTimestamp=]" REST API to + * fetch a series of packfiles and write them to the ODB. + * + * Return a list of packfile names. + */ +static void do__http_get__gvfs_prefetch(struct gh__response_status *status, + timestamp_t seconds_since_epoch, + struct string_list *result_list) +{ + struct gh__request_params params = GH__REQUEST_PARAMS_INIT; + struct strbuf component_url = STRBUF_INIT; + + gh__response_status__zero(status); + + strbuf_addstr(&component_url, "gvfs/prefetch"); + + if (!seconds_since_epoch) + find_last_prefetch_timestamp(&seconds_since_epoch); + if (seconds_since_epoch) + strbuf_addf(&component_url, "?lastPackTimestamp=%"PRItime, + seconds_since_epoch); + + params.b_is_post = 0; + params.b_write_to_file = 1; + params.b_permit_cache_server_if_defined = 1; + params.objects_mode = GH__OBJECTS_MODE__PREFETCH; + + params.object_count = -1; + + params.result_list = result_list; + + params.headers = http_copy_default_headers(); + params.headers = curl_slist_append(params.headers, + "X-TFS-FedAuthRedirect: Suppress"); + params.headers = curl_slist_append(params.headers, + "Pragma: no-cache"); + params.headers = curl_slist_append(params.headers, + "Accept: application/x-gvfs-timestamped-packfiles-indexes"); + + if (gh__cmd_opts.show_progress) + strbuf_addf(¶ms.progress_base_phase3_msg, + "Prefetch %"PRItime" (%s)", + seconds_since_epoch, + show_date(seconds_since_epoch, 0, + DATE_MODE(ISO8601))); + + do_req__with_fallback(component_url.buf, ¶ms, status); + + gh__request_params__release(¶ms); + strbuf_release(&component_url); +} + /* * Drive one or more HTTP GET requests to fetch the objects * in the given OIDSET. These are received into loose objects. @@ -3064,7 +3594,83 @@ static enum gh__error_code do_sub_cmd__post(int argc, const char **argv) } /* - * Handle the 'objects.get' and 'objects.post' verbs in "server mode". + * Interpret the given string as a timestamp and compute an absolute + * UTC-seconds-since-epoch value (and without TZ). + * + * Note that the gvfs/prefetch API only accepts seconds since epoch, + * so that is all we really need here. But there is a tradition of + * various Git commands allowing a variety of formats for args like + * this. For example, see the `--date` arg in `git commit`. We allow + * these other forms mainly for testing purposes. + */ +static int my_parse_since(const char *since, timestamp_t *p_timestamp) +{ + int offset = 0; + int errors = 0; + unsigned long t; + + if (!parse_date_basic(since, p_timestamp, &offset)) + return 0; + + t = approxidate_careful(since, &errors); + if (!errors) { + *p_timestamp = t; + return 0; + } + + return -1; +} + +/* + * Ask the server for all available packfiles -or- all available since + * the given timestamp. + */ +static enum gh__error_code do_sub_cmd__prefetch(int argc, const char **argv) +{ + static const char *since_str; + static struct option prefetch_options[] = { + OPT_STRING(0, "since", &since_str, N_("since"), N_("seconds since epoch")), + OPT_END(), + }; + + struct gh__response_status status = GH__RESPONSE_STATUS_INIT; + struct string_list result_list = STRING_LIST_INIT_DUP; + enum gh__error_code ec = GH__ERROR_CODE__OK; + timestamp_t seconds_since_epoch = 0; + int k; + + trace2_cmd_mode("prefetch"); + + if (argc > 1 && !strcmp(argv[1], "-h")) + usage_with_options(prefetch_usage, prefetch_options); + + argc = parse_options(argc, argv, NULL, prefetch_options, prefetch_usage, 0); + if (since_str && *since_str) { + if (my_parse_since(since_str, &seconds_since_epoch)) + die("could not parse 'since' field"); + } + + finish_init(1); + + do__http_get__gvfs_prefetch(&status, seconds_since_epoch, &result_list); + + ec = status.ec; + + for (k = 0; k < result_list.nr; k++) + printf("%s\n", result_list.items[k].string); + + if (ec != GH__ERROR_CODE__OK) + error("prefetch: %s", status.error_message.buf); + + gh__response_status__release(&status); + string_list_clear(&result_list, 0); + + return ec; +} + +/* + * Handle the 'objects.get' and 'objects.post' and 'objects.prefetch' + * verbs in "server mode". * * Only call error() and set ec for hard errors where we cannot * communicate correctly with the foreground client process. Pass any @@ -3084,45 +3690,73 @@ static enum gh__error_code do_server_subprocess__objects(const char *verb_line) int k; enum gh__objects_mode objects_mode; unsigned long nr_oid_total = 0; + timestamp_t seconds_since_epoch = 0; if (!strcmp(verb_line, "objects.get")) objects_mode = GH__OBJECTS_MODE__GET; else if (!strcmp(verb_line, "objects.post")) objects_mode = GH__OBJECTS_MODE__POST; + else if (!strcmp(verb_line, "objects.prefetch")) + objects_mode = GH__OBJECTS_MODE__PREFETCH; else { error("server: unexpected objects-mode verb '%s'", verb_line); ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; goto cleanup; } - while (1) { - len = packet_read_line_gently(0, NULL, &line); - if (len < 0 || !line) - break; + switch (objects_mode) { + case GH__OBJECTS_MODE__GET: + case GH__OBJECTS_MODE__POST: + while (1) { + len = packet_read_line_gently(0, NULL, &line); + if (len < 0 || !line) + break; - if (get_oid_hex(line, &oid)) { - error("server: invalid oid syntax '%s'", line); - ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; + if (get_oid_hex(line, &oid)) { + error("server: invalid oid syntax '%s'", line); + ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; + goto cleanup; + } + + if (!oidset_insert(&oids, &oid)) + nr_oid_total++; + } + + if (!nr_oid_total) { + /* if zero objects requested, trivial OK. */ + if (packet_write_fmt_gently(1, "ok\n")) { + error("server: cannot write 'get' result to client"); + ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; + } else + ec = GH__ERROR_CODE__OK; goto cleanup; } - if (!oidset_insert(&oids, &oid)) - nr_oid_total++; - } + if (objects_mode == GH__OBJECTS_MODE__GET) + do__http_get__fetch_oidset(&status, &oids, + nr_oid_total, &result_list); + else + do__http_post__fetch_oidset(&status, &oids, + nr_oid_total, &result_list); + break; - if (!nr_oid_total) { - if (packet_write_fmt_gently(1, "ok\n")) { - error("server: cannot write 'get' result to client"); - ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; - } else - ec = GH__ERROR_CODE__OK; - goto cleanup; - } + case GH__OBJECTS_MODE__PREFETCH: + /* get optional timestamp line */ + while (1) { + len = packet_read_line_gently(0, NULL, &line); + if (len < 0 || !line) + break; - if (objects_mode == GH__OBJECTS_MODE__GET) - do__http_get__fetch_oidset(&status, &oids, nr_oid_total, &result_list); - else - do__http_post__fetch_oidset(&status, &oids, nr_oid_total, &result_list); + seconds_since_epoch = strtoul(line, NULL, 10); + } + + do__http_get__gvfs_prefetch(&status, seconds_since_epoch, + &result_list); + break; + + default: + BUG("unexpected object_mode in switch '%d'", objects_mode); + } /* * Write pathname of the ODB where we wrote all of the objects @@ -3346,12 +3980,16 @@ static enum gh__error_code do_sub_cmd(int argc, const char **argv) if (!strcmp(argv[0], "config")) return do_sub_cmd__config(argc, argv); + if (!strcmp(argv[0], "prefetch")) + return do_sub_cmd__prefetch(argc, argv); + + /* + * server mode is for talking with git.exe via the "gh_client_" API + * using packet-line format. + */ if (!strcmp(argv[0], "server")) return do_sub_cmd__server(argc, argv); - // TODO have "test" mode that could be used to drive - // TODO unit testing. - return GH__ERROR_CODE__USAGE; } diff --git a/t/helper/test-gvfs-protocol.c b/t/helper/test-gvfs-protocol.c index fa21c18960e47a..9632e3205a7aee 100644 --- a/t/helper/test-gvfs-protocol.c +++ b/t/helper/test-gvfs-protocol.c @@ -10,7 +10,9 @@ #include "strbuf.h" #include "string-list.h" #include "trace2.h" +#include "copy.h" #include "object.h" +#include "object-file.h" #include "object-store.h" #include "replace-object.h" #include "repository.h" @@ -21,6 +23,7 @@ #include "date.h" #include "wrapper.h" #include "git-zlib.h" +#include "packfile.h" #define TR2_CAT "test-gvfs-protocol" @@ -548,9 +551,6 @@ static enum worker_result send_loose_object(const struct object_id *oid, return send_http_error(1, 404, "Not Found", -1, WR_MAYHEM); } - trace2_printf("%s: OBJECT type=%d len=%ld '%.40s'", TR2_CAT, - type, size, (const char *)content); - /* * We are blending several somewhat independent concepts here: * @@ -863,7 +863,6 @@ static enum worker_result get_packfile_from_oids( goto done; } - trace2_printf("%s: pack-objects returned %d bytes", TR2_CAT, buf_packfile->len); wr = WR_OK; done: @@ -1011,6 +1010,305 @@ static enum worker_result do__gvfs_objects__post(struct req *req) return wr; } +/* + * bswap.h only defines big endian functions. + * The GVFS Protocol defines fields in little endian. + */ +static inline uint64_t my_get_le64(uint64_t le_val) +{ +#if GIT_BYTE_ORDER == GIT_LITTLE_ENDIAN + return le_val; +#else + return default_bswap64(le_val); +#endif +} + +static inline uint16_t my_get_le16(uint16_t le_val) +{ +#if GIT_BYTE_ORDER == GIT_LITTLE_ENDIAN + return le_val; +#else + return default_bswap16(le_val); +#endif +} + +/* + * GVFS Protocol headers for the multipack format + * All integer values are little-endian on the wire. + * + * Note: technically, the protocol defines the `ph` fields as signed, but + * that makes a mess of the bswap routines and we're not going to overflow + * them for a very long time. + */ + +static unsigned char v1_h[6] = { 'G', 'P', 'R', 'E', ' ', 0x01 }; + +struct ph { + uint64_t timestamp; + uint64_t len_pack; + uint64_t len_idx; +}; + +/* + * Accumulate a list of commits-and-trees packfiles we have in the local ODB. + * The test script should have pre-created a set of "ct-.pack" and .idx + * files for us. We serve these as is and DO NOT try to dynamically create + * new commits/trees packfiles (like the cache-server does). We are only + * testing if/whether gvfs-helper.exe can receive one or more packfiles and + * idx files over the protocol. + */ +struct ct_pack_item { + struct ph ph; + struct strbuf path_pack; + struct strbuf path_idx; +}; + +static void ct_pack_item__free(struct ct_pack_item *item) +{ + if (!item) + return; + strbuf_release(&item->path_pack); + strbuf_release(&item->path_idx); + free(item); +} + +struct ct_pack_data { + struct ct_pack_item **items; + size_t nr, alloc; +}; + +static void ct_pack_data__release(struct ct_pack_data *data) +{ + int k; + + if (!data) + return; + + for (k = 0; k < data->nr; k++) + ct_pack_item__free(data->items[k]); + + FREE_AND_NULL(data->items); + data->nr = 0; + data->alloc = 0; +} + +static void cb_ct_pack(const char *full_path, size_t full_path_len, + const char *file_path, void *void_data) +{ + struct ct_pack_data *data = void_data; + struct ct_pack_item *item = NULL; + struct stat st; + const char *v; + + /* + * We only want "ct-.pack" files. The test script creates + * cached commits-and-trees packfiles with this prefix to avoid + * confusion with prefetch packfiles received by gvfs-helper. + */ + if (!ends_with(file_path, ".pack")) + return; + if (!skip_prefix(file_path, "ct-", &v)) + return; + + item = (struct ct_pack_item *)xcalloc(1, sizeof(*item)); + strbuf_init(&item->path_pack, 0); + strbuf_addstr(&item->path_pack, full_path); + + strbuf_init(&item->path_idx, 0); + strbuf_addstr(&item->path_idx, full_path); + strbuf_strip_suffix(&item->path_idx, ".pack"); + strbuf_addstr(&item->path_idx, ".idx"); + + item->ph.timestamp = (uint64_t)strtoul(v, NULL, 10); + + lstat(item->path_pack.buf, &st); + item->ph.len_pack = (uint64_t)st.st_size; + + if (string_list_has_string(&mayhem_list, "no_prefetch_idx")) + item->ph.len_idx = maximum_unsigned_value_of_type(uint64_t); + else if (lstat(item->path_idx.buf, &st) < 0) + item->ph.len_idx = maximum_unsigned_value_of_type(uint64_t); + else + item->ph.len_idx = (uint64_t)st.st_size; + + ALLOC_GROW(data->items, data->nr + 1, data->alloc); + data->items[data->nr++] = item; +} + +/* + * Sort by increasing EPOCH time. + */ +static int ct_pack_sort_compare(const void *_a, const void *_b) +{ + const struct ct_pack_item *a = *(const struct ct_pack_item **)_a; + const struct ct_pack_item *b = *(const struct ct_pack_item **)_b; + return (a->ph.timestamp < b->ph.timestamp) ? -1 : (a->ph.timestamp != b->ph.timestamp); +} + +static enum worker_result send_ct_item(const struct ct_pack_item *item) +{ + struct ph ph_le; + int fd_pack = -1; + int fd_idx = -1; + enum worker_result wr = WR_OK; + + /* send per-packfile header. all fields are little-endian on the wire. */ + ph_le.timestamp = my_get_le64(item->ph.timestamp); + ph_le.len_pack = my_get_le64(item->ph.len_pack); + ph_le.len_idx = my_get_le64(item->ph.len_idx); + + if (write_in_full(1, &ph_le, sizeof(ph_le)) < 0) { + logerror("unable to write ph_le"); + wr = WR_IO_ERROR; + goto done; + } + + trace2_printf("%s: sending prefetch pack '%s'", TR2_CAT, item->path_pack.buf); + + fd_pack = git_open_cloexec(item->path_pack.buf, O_RDONLY); + if (fd_pack == -1 || copy_fd(fd_pack, 1)) { + logerror("could not send packfile"); + wr = WR_IO_ERROR; + goto done; + } + + if (item->ph.len_idx != maximum_unsigned_value_of_type(uint64_t)) { + trace2_printf("%s: sending prefetch idx '%s'", TR2_CAT, item->path_idx.buf); + + fd_idx = git_open_cloexec(item->path_idx.buf, O_RDONLY); + if (fd_idx == -1 || copy_fd(fd_idx, 1)) { + logerror("could not send idx"); + wr = WR_IO_ERROR; + goto done; + } + } + +done: + if (fd_pack != -1) + close(fd_pack); + if (fd_idx != -1) + close(fd_idx); + return wr; +} + +/* + * The GVFS Protocol defines the lastTimeStamp parameter as the value + * of the last prefetch pack that the client has. Therefore, we only + * want to send newer ones. + */ +static int want_ct_pack(const struct ct_pack_item *item, timestamp_t last_timestamp) +{ + return item->ph.timestamp > last_timestamp; +} + +static enum worker_result send_multipack(struct ct_pack_data *data, + timestamp_t last_timestamp) +{ + struct strbuf response_header = STRBUF_INIT; + struct strbuf uuid = STRBUF_INIT; + enum worker_result wr; + size_t content_len = 0; + unsigned short np = 0; + unsigned short np_le; + int k; + + /* + * Precompute the content-length so that we don't have to deal with + * chunking it. + */ + content_len += sizeof(v1_h) + sizeof(np); + for (k = 0; k < data->nr; k++) { + struct ct_pack_item *item = data->items[k]; + + if (!want_ct_pack(item, last_timestamp)) + continue; + + np++; + content_len += sizeof(struct ph); + content_len += item->ph.len_pack; + if (item->ph.len_idx != maximum_unsigned_value_of_type(uint64_t)) + content_len += item->ph.len_idx; + } + + strbuf_addstr(&response_header, "HTTP/1.1 200 OK\r\n"); + strbuf_addstr(&response_header, "Cache-Control: private\r\n"); + strbuf_addstr(&response_header, + "Content-Type: application/x-gvfs-timestamped-packfiles-indexes\r\n"); + strbuf_addf( &response_header, "Content-Length: %d\r\n", (int)content_len); + strbuf_addf( &response_header, "Server: test-gvfs-protocol/%s\r\n", git_version_string); + strbuf_addf( &response_header, "Date: %s\r\n", show_date(time(NULL), 0, DATE_MODE(RFC2822))); + gen_fake_uuid(&uuid); + strbuf_addf( &response_header, "X-VSS-E2EID: %s\r\n", uuid.buf); + strbuf_addstr(&response_header, "\r\n"); + + if (write_in_full(1, response_header.buf, response_header.len) < 0) { + logerror("unable to write response header"); + wr = WR_IO_ERROR; + goto done; + } + + /* send protocol version header */ + if (write_in_full(1, v1_h, sizeof(v1_h)) < 0) { + logerror("unabled to write v1_h"); + wr = WR_IO_ERROR; + goto done; + } + + /* send number of packfiles */ + np_le = my_get_le16(np); + if (write_in_full(1, &np_le, sizeof(np_le)) < 0) { + logerror("unable to write np"); + wr = WR_IO_ERROR; + goto done; + } + + for (k = 0; k < data->nr; k++) { + if (!want_ct_pack(data->items[k], last_timestamp)) + continue; + + wr = send_ct_item(data->items[k]); + if (wr != WR_OK) + goto done; + } + + wr = WR_OK; + +done: + strbuf_release(&uuid); + strbuf_release(&response_header); + + return wr; +} + +static enum worker_result do__gvfs_prefetch__get(struct req *req) +{ + struct ct_pack_data data; + timestamp_t last_timestamp = 0; + enum worker_result wr; + + memset(&data, 0, sizeof(data)); + + if (req->quest_args.len) { + const char *key = strstr(req->quest_args.buf, "lastPackTimestamp="); + if (key) { + const char *val; + if (skip_prefix(key, "lastPackTimestamp=", &val)) { + last_timestamp = strtol(val, NULL, 10); + } + } + } + trace2_printf("%s: prefetch/since %"PRItime, TR2_CAT, last_timestamp); + + for_each_file_in_pack_dir(get_object_directory(), cb_ct_pack, &data); + QSORT(data.items, data.nr, ct_pack_sort_compare); + + wr = send_multipack(&data, last_timestamp); + + ct_pack_data__release(&data); + + return wr; +} + /* * Read the HTTP request up to the start of the optional message-body. * We do this byte-by-byte because we have keep-alive turned on and @@ -1166,6 +1464,11 @@ static enum worker_result req__read(struct req *req, int fd) * We let our caller read/chunk it in as appropriate. */ done: + +#if 0 + /* + * This is useful for debugging the request, but very noisy. + */ if (trace2_is_enabled()) { struct string_list_item *item; trace2_printf("%s: %s", TR2_CAT, req->start_line.buf); @@ -1180,6 +1483,7 @@ static enum worker_result req__read(struct req *req, int fd) for_each_string_list_item(item, &req->header_list) trace2_printf("%s: Hdrs: %s", TR2_CAT, item->string); } +#endif return WR_OK; } @@ -1228,6 +1532,12 @@ static enum worker_result dispatch(struct req *req) return do__gvfs_config__get(req); } + if (!strcmp(req->gvfs_api.buf, "gvfs/prefetch")) { + + if (!strcmp(method, "GET")) + return do__gvfs_prefetch__get(req); + } + return send_http_error(1, 501, "Not Implemented", -1, WR_OK | WR_HANGUP); } diff --git a/t/t5799-gvfs-helper.sh b/t/t5799-gvfs-helper.sh index bf4174bd84ce00..ffb4eea95768ca 100755 --- a/t/t5799-gvfs-helper.sh +++ b/t/t5799-gvfs-helper.sh @@ -24,8 +24,8 @@ test_set_port GIT_TEST_GVFS_PROTOCOL_PORT # actually use it). We are only testing explicit object # fetching using gvfs-helper.exe in isolation. # -REPO_SRC="$PWD"/repo_src -REPO_T1="$PWD"/repo_t1 +REPO_SRC="$(pwd)"/repo_src +REPO_T1="$(pwd)"/repo_t1 # Setup some loopback URLs where test-gvfs-protocol.exe will be # listening. We will spawn it directly inside the repo_src directory, @@ -44,22 +44,22 @@ HOST_PORT=127.0.0.1:$GIT_TEST_GVFS_PROTOCOL_PORT ORIGIN_URL=http://$HOST_PORT/servertype/origin CACHE_URL=http://$HOST_PORT/servertype/cache -SHARED_CACHE_T1="$PWD"/shared_cache_t1 +SHARED_CACHE_T1="$(pwd)"/shared_cache_t1 # The pid-file is created by test-gvfs-protocol.exe when it starts. # The server will shut down if/when we delete it. (This is a little # easier than killing it by PID.) # -PID_FILE="$PWD"/pid-file.pid -SERVER_LOG="$PWD"/OUT.server.log +PID_FILE="$(pwd)"/pid-file.pid +SERVER_LOG="$(pwd)"/OUT.server.log PATH="$GIT_BUILD_DIR/t/helper/:$PATH" && export PATH -OIDS_FILE="$PWD"/oid_list.txt -OIDS_CT_FILE="$PWD"/oid_ct_list.txt -OIDS_BLOBS_FILE="$PWD"/oids_blobs_file.txt -OID_ONE_BLOB_FILE="$PWD"/oid_one_blob_file.txt -OID_ONE_COMMIT_FILE="$PWD"/oid_one_commit_file.txt +OIDS_FILE="$(pwd)"/oid_list.txt +OIDS_CT_FILE="$(pwd)"/oid_ct_list.txt +OIDS_BLOBS_FILE="$(pwd)"/oids_blobs_file.txt +OID_ONE_BLOB_FILE="$(pwd)"/oid_one_blob_file.txt +OID_ONE_COMMIT_FILE="$(pwd)"/oid_one_commit_file.txt # Get a list of available OIDs in repo_src so that we can try to fetch # them and so that we don't have to hard-code a list of known OIDs. @@ -108,6 +108,30 @@ get_one_commit_oid () { return 0 } +# Create a commits-and-trees packfile for use with "prefetch" +# using the given range of commits. +# +create_commits_and_trees_packfile () { + if test $# -eq 2 + then + epoch=$1 + revs=$2 + else + echo "create_commits_and_trees_packfile: Need 2 args" + return 1 + fi + + pack_file="$REPO_SRC"/.git/objects/pack/ct-$epoch.pack + idx_file="$REPO_SRC"/.git/objects/pack/ct-$epoch.idx + + git -C "$REPO_SRC" pack-objects --stdout --revs --filter=blob:none \ + >"$pack_file" <<-EOF + $revs + EOF + git -C "$REPO_SRC" index-pack -o "$idx_file" "$pack_file" + return 0 +} + test_expect_success 'setup repos' ' test_create_repo "$REPO_SRC" && git -C "$REPO_SRC" branch -M main && @@ -115,9 +139,16 @@ test_expect_success 'setup repos' ' # test_commit_bulk() does magic to create a packfile containing # the new commits. # + # We create branches in repo_src, but also remember the branch OIDs + # in files so that we can refer to them in repo_t1, which will not + # have the commits locally (because we do not clone or fetch). + # test_commit_bulk -C "$REPO_SRC" --filename="batch_a.%s.t" 9 && + git -C "$REPO_SRC" branch B1 && cp "$REPO_SRC"/.git/refs/heads/main m1.branch && + # test_commit_bulk -C "$REPO_SRC" --filename="batch_b.%s.t" 9 && + git -C "$REPO_SRC" branch B2 && cp "$REPO_SRC"/.git/refs/heads/main m2.branch && # # test_commit() creates commits, trees, tags, and blobs and leave @@ -134,8 +165,16 @@ test_expect_success 'setup repos' ' test_commit -C "$REPO_SRC" file7.txt && test_commit -C "$REPO_SRC" file8.txt && test_commit -C "$REPO_SRC" file9.txt && + git -C "$REPO_SRC" branch B3 && cp "$REPO_SRC"/.git/refs/heads/main m3.branch && # + # Create some commits-and-trees-only packfiles for testing prefetch. + # Set arbitrary EPOCH times to make it easier to test fetch-since. + # + create_commits_and_trees_packfile 1000000000 B1 && + create_commits_and_trees_packfile 1100000000 B1..B2 && + create_commits_and_trees_packfile 1200000000 B2..B3 && + # # gvfs-helper.exe writes downloaded objects to a shared-cache directory # rather than the ODB inside the .git directory. # @@ -160,10 +199,10 @@ test_expect_success 'setup repos' ' EOF cat <<-EOF >creds.sh && #!/bin/sh - cat "$PWD"/creds.txt + cat "$(pwd)"/creds.txt EOF chmod 755 creds.sh && - git -C "$REPO_T1" config --local credential.helper "!f() { cat \"$PWD\"/creds.txt; }; f" && + git -C "$REPO_T1" config --local credential.helper "!f() { cat \"$(pwd)\"/creds.txt; }; f" && # # Create some test data sets. # @@ -554,8 +593,8 @@ test_expect_success 'basic: POST-request a single blob' ' # Request a single commit via POST. Per the GVFS Protocol, the server # should implicitly send us a packfile containing the commit and the # trees it references. Confirm that properly handled the receipt of -# the packfile. (Here, we are testing that asking for a single object -# yields a packfile rather than a loose object.) +# the packfile. (Here, we are testing that asking for a single commit +# via POST yields a packfile rather than a loose object.) # # We DO NOT verify that the packfile contains commits/trees and no blobs # because our test helper doesn't implement the filtering. @@ -587,6 +626,105 @@ test_expect_success 'basic: POST-request a single commit' ' verify_connection_count 1 ' +test_expect_success 'basic: PREFETCH w/o arg gets all' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server && + + # Without a "since" argument gives us all "ct-*.pack" since the EPOCH + # because we do not have any prefetch packs locally. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + prefetch >OUT.output && + + # gvfs-helper prints a "packfile " message for each received + # packfile. + # + verify_received_packfile_count 3 && + + stop_gvfs_protocol_server && + verify_connection_count 1 +' + +test_expect_success 'basic: PREFETCH w/ arg' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server && + + # Ask for cached packfiles NEWER THAN the given time. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + prefetch --since="1000000000" >OUT.output && + + # gvfs-helper prints a "packfile " message for each received + # packfile. + # + verify_received_packfile_count 2 && + + stop_gvfs_protocol_server && + verify_connection_count 1 +' + +test_expect_success 'basic: PREFETCH mayhem no_prefetch_idx' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server_with_mayhem no_prefetch_idx && + + # Request prefetch packs, but tell server to not send any + # idx files and force gvfs-helper to compute them. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + prefetch --since="1000000000" >OUT.output && + + # gvfs-helper prints a "packfile " message for each received + # packfile. + # + verify_received_packfile_count 2 && + + stop_gvfs_protocol_server && + verify_connection_count 1 +' + +test_expect_success 'basic: PREFETCH up-to-date' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server && + + # Ask for cached packfiles NEWER THAN the given time. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + prefetch --since="1000000000" >OUT.output && + + # gvfs-helper prints a "packfile " message for each received + # packfile. + # + verify_received_packfile_count 2 && + + # Ask again for any packfiles newer than what we have cached locally. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + prefetch >OUT.output && + + # gvfs-helper prints a "packfile " message for each received + # packfile. + # + verify_received_packfile_count 0 && + + stop_gvfs_protocol_server && + verify_connection_count 2 +' + ################################################################# # Tests to see how gvfs-helper responds to network problems. # @@ -960,44 +1098,6 @@ test_expect_success 'HTTP GET Auth on Cache Server' ' # magically fetched whenever required. ################################################################# -test_expect_success 'integration: explicit commit/trees, implicit blobs: log file' ' - test_when_finished "per_test_cleanup" && - start_gvfs_protocol_server && - - # We have a very empty repo. Seed it with all of the commits - # and trees. The purpose of this test is to demand-load the - # needed blobs only, so we prefetch the commits and trees. - # - git -C "$REPO_T1" gvfs-helper \ - --cache-server=disable \ - --remote=origin \ - get \ - <"$OIDS_CT_FILE" >OUT.output && - - # Confirm that we do not have the blobs locally. - # With gvfs-helper turned off, we should fail. - # - test_must_fail \ - git -C "$REPO_T1" -c core.useGVFSHelper=false \ - log $(cat m3.brach) -- file9.txt \ - >OUT.output 2>OUT.stderr && - - # Turn on gvfs-helper and retry. This should implicitly fetch - # any needed blobs. - # - git -C "$REPO_T1" -c core.useGVFSHelper=true \ - log $(cat m3.branch) -- file9.txt \ - >OUT.output 2>OUT.stderr && - - # Verify that gvfs-helper wrote the fetched the blobs to the - # local ODB, such that a second attempt with gvfs-helper - # turned off should succeed. - # - git -C "$REPO_T1" -c core.useGVFSHelper=false \ - log $(cat m3.branch) -- file9.txt \ - >OUT.output 2>OUT.stderr -' - test_expect_success 'integration: explicit commit/trees, implicit blobs: diff 2 commits' ' test_when_finished "per_test_cleanup" && start_gvfs_protocol_server && From f94b391a57fd185ebd02cbc21dc98bb9cbc7e918 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Tue, 26 Nov 2019 14:13:57 -0500 Subject: [PATCH 02/10] gvfs-helper: add prefetch .keep file for last packfile Signed-off-by: Jeff Hostetler --- gvfs-helper.c | 83 ++++++++++++++++++++++++++++++++++++++++-- t/t5799-gvfs-helper.sh | 29 +++++++++++++++ 2 files changed, 109 insertions(+), 3 deletions(-) diff --git a/gvfs-helper.c b/gvfs-helper.c index ca37be28e4dd79..c7ab63263c3b04 100644 --- a/gvfs-helper.c +++ b/gvfs-helper.c @@ -1882,6 +1882,7 @@ static void my_run_index_pack(struct gh__request_params *params, static void my_finalize_packfile(struct gh__request_params *params, struct gh__response_status *status, + int b_keep, const struct strbuf *temp_path_pack, const struct strbuf *temp_path_idx, struct strbuf *final_path_pack, @@ -1901,6 +1902,21 @@ static void my_finalize_packfile(struct gh__request_params *params, return; } + if (b_keep) { + struct strbuf keep = STRBUF_INIT; + int fd_keep; + + strbuf_addbuf(&keep, final_path_pack); + strbuf_strip_suffix(&keep, ".pack"); + strbuf_addstr(&keep, ".keep"); + + fd_keep = xopen(keep.buf, O_WRONLY | O_CREAT | O_TRUNC, 0666); + if (fd_keep >= 0) + close(fd_keep); + + strbuf_release(&keep); + } + if (params->result_list) { struct strbuf result_msg = STRBUF_INIT; @@ -1953,7 +1969,7 @@ static void install_packfile(struct gh__request_params *params, create_final_packfile_pathnames("vfs", packfile_checksum.buf, NULL, &final_path_pack, &final_path_idx, &final_filename); - my_finalize_packfile(params, status, + my_finalize_packfile(params, status, 0, &temp_path_pack, &temp_path_idx, &final_path_pack, &final_path_idx, &final_filename); @@ -2049,6 +2065,12 @@ struct ph { /* * Extract the next packfile from the multipack. + * Install {.pack, .idx, .keep} set. + * + * Mark each successfully installed prefetch pack as .keep it as installed + * in case we have errors decoding/indexing later packs within the received + * multipart file. (A later pass can delete the unnecessary .keep files + * from this and any previous invocations.) */ static void extract_packfile_from_multipack( struct gh__request_params *params, @@ -2145,7 +2167,7 @@ static void extract_packfile_from_multipack( } else { /* - * Server send the .idx immediately after the .pack in the + * Server sent the .idx immediately after the .pack in the * data stream. I'm tempted to verify it, but that defeats * the purpose of having it cached... */ @@ -2167,7 +2189,7 @@ static void extract_packfile_from_multipack( &final_path_pack, &final_path_idx, &final_filename); - my_finalize_packfile(params, status, + my_finalize_packfile(params, status, 1, &temp_path_pack, &temp_path_idx, &final_path_pack, &final_path_idx, &final_filename); @@ -2182,6 +2204,56 @@ static void extract_packfile_from_multipack( strbuf_release(&final_filename); } +struct keep_files_data { + timestamp_t max_timestamp; + int pos_of_max; + struct string_list *keep_files; +}; + +static void cb_keep_files(const char *full_path, size_t full_path_len, + const char *file_path, void *void_data) +{ + struct keep_files_data *data = void_data; + const char *val; + timestamp_t t; + + /* + * We expect prefetch packfiles named like: + * + * prefetch--.keep + */ + if (!skip_prefix(file_path, "prefetch-", &val)) + return; + if (!ends_with(val, ".keep")) + return; + + t = strtol(val, NULL, 10); + if (t > data->max_timestamp) { + data->pos_of_max = data->keep_files->nr; + data->max_timestamp = t; + } + + string_list_append(data->keep_files, full_path); +} + +static void delete_stale_keep_files( + struct gh__request_params *params, + struct gh__response_status *status) +{ + struct string_list keep_files = STRING_LIST_INIT_DUP; + struct keep_files_data data = { 0, 0, &keep_files }; + int k; + + for_each_file_in_pack_dir(gh__global.buf_odb_path.buf, + cb_keep_files, &data); + for (k = 0; k < keep_files.nr; k++) { + if (k != data.pos_of_max) + unlink(keep_files.items[k].string); + } + + string_list_clear(&keep_files, 0); +} + /* * Cut apart the received multipart response into individual packfiles * and install each one. @@ -2200,6 +2272,7 @@ static void install_prefetch(struct gh__request_params *params, unsigned short np; unsigned short k; int fd = -1; + int nr_installed = 0; struct strbuf temp_path_mp = STRBUF_INIT; @@ -2240,8 +2313,12 @@ static void install_prefetch(struct gh__request_params *params, extract_packfile_from_multipack(params, status, fd, k); if (status->ec != GH__ERROR_CODE__OK) break; + nr_installed++; } + if (nr_installed) + delete_stale_keep_files(params, status); + cleanup: if (fd != -1) close(fd); diff --git a/t/t5799-gvfs-helper.sh b/t/t5799-gvfs-helper.sh index ffb4eea95768ca..aae93931ad64ab 100755 --- a/t/t5799-gvfs-helper.sh +++ b/t/t5799-gvfs-helper.sh @@ -390,6 +390,30 @@ verify_received_packfile_count () { return 0 } +# Verify that we have exactly 1 prefetch .keep file. +# Optionally, verify that it has the given timestamp. +# +verify_prefetch_keeps () { + count=$(( $(ls -1 "$SHARED_CACHE_T1"/pack/prefetch-*.keep | wc -l) )) + if test $count -ne 1 + then + echo "verify_prefetch_keep_file_count: found $count, expected 1." + return 1 + fi + + if test $# -eq 1 + then + count=$(( $(ls -1 "$SHARED_CACHE_T1"/pack/prefetch-$1-*.keep | wc -l) )) + if test $count -ne 1 + then + echo "verify_prefetch_keep_file_count: did not find expected keep file." + return 1 + fi + fi + + return 0 +} + per_test_cleanup () { stop_gvfs_protocol_server @@ -643,6 +667,7 @@ test_expect_success 'basic: PREFETCH w/o arg gets all' ' # packfile. # verify_received_packfile_count 3 && + verify_prefetch_keeps 1200000000 && stop_gvfs_protocol_server && verify_connection_count 1 @@ -664,6 +689,7 @@ test_expect_success 'basic: PREFETCH w/ arg' ' # packfile. # verify_received_packfile_count 2 && + verify_prefetch_keeps 1200000000 && stop_gvfs_protocol_server && verify_connection_count 1 @@ -686,6 +712,7 @@ test_expect_success 'basic: PREFETCH mayhem no_prefetch_idx' ' # packfile. # verify_received_packfile_count 2 && + verify_prefetch_keeps 1200000000 && stop_gvfs_protocol_server && verify_connection_count 1 @@ -707,6 +734,7 @@ test_expect_success 'basic: PREFETCH up-to-date' ' # packfile. # verify_received_packfile_count 2 && + verify_prefetch_keeps 1200000000 && # Ask again for any packfiles newer than what we have cached locally. # @@ -720,6 +748,7 @@ test_expect_success 'basic: PREFETCH up-to-date' ' # packfile. # verify_received_packfile_count 0 && + verify_prefetch_keeps 1200000000 && stop_gvfs_protocol_server && verify_connection_count 2 From d654b3a78ed6b524a6a56e157e6e7fbcd7e0b3a1 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 16 Dec 2019 13:12:32 -0500 Subject: [PATCH 03/10] gvfs-helper: do one read in my_copy_fd_len_tail() Signed-off-by: Derrick Stolee --- gvfs-helper.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/gvfs-helper.c b/gvfs-helper.c index c7ab63263c3b04..c5a00122779de1 100644 --- a/gvfs-helper.c +++ b/gvfs-helper.c @@ -2039,18 +2039,18 @@ static int my_copy_fd_len_tail(int fd_in, int fd_out, ssize_t nr_bytes_total, { memset(buf_tail, 0, tail_len); + if (my_copy_fd_len(fd_in, fd_out, nr_bytes_total) < 0) + return -1; + if (nr_bytes_total < tail_len) - return my_copy_fd_len(fd_in, fd_out, nr_bytes_total); + return 0; - if (my_copy_fd_len(fd_in, fd_out, (nr_bytes_total - tail_len)) < 0) - return -1; + /* Reset the position to read the tail */ + lseek(fd_in, -tail_len, SEEK_CUR); if (xread(fd_in, (char *)buf_tail, tail_len) != tail_len) return -1; - if (write_in_full(fd_out, buf_tail, tail_len) < 0) - return -1; - return 0; } From 2752a5417c5802e497d063b3186b83d4b5a96932 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 16 Dec 2019 16:26:33 -0500 Subject: [PATCH 04/10] gvfs-helper: move content-type warning for prefetch packs Signed-off-by: Derrick Stolee --- gvfs-helper.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/gvfs-helper.c b/gvfs-helper.c index c5a00122779de1..47f783526ba71d 100644 --- a/gvfs-helper.c +++ b/gvfs-helper.c @@ -2453,25 +2453,25 @@ static void install_result(struct gh__request_params *params, install_prefetch(params, status); return; } - } - - if (!strcmp(status->content_type.buf, "application/x-git-packfile")) { - assert(params->b_is_post); - assert(params->objects_mode == GH__OBJECTS_MODE__POST); + } else { + if (!strcmp(status->content_type.buf, "application/x-git-packfile")) { + assert(params->b_is_post); + assert(params->objects_mode == GH__OBJECTS_MODE__POST); - install_packfile(params, status); - return; - } + install_packfile(params, status); + return; + } - if (!strcmp(status->content_type.buf, - "application/x-git-loose-object")) { - /* - * We get these for "gvfs/objects" GET and POST requests. - * - * Note that this content type is singular, not plural. - */ - install_loose(params, status); - return; + if (!strcmp(status->content_type.buf, + "application/x-git-loose-object")) { + /* + * We get these for "gvfs/objects" GET and POST requests. + * + * Note that this content type is singular, not plural. + */ + install_loose(params, status); + return; + } } strbuf_addf(&status->error_message, From cd85c6e2a8c63b9b86bf8d1fee596ea86cf691d4 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 17 Dec 2019 07:25:40 -0500 Subject: [PATCH 05/10] fetch: use gvfs-helper prefetch under config The gvfs-helper allows us to download prefetch packs using a simple subprocess call. The gvfs-helper-client.h method will automatically compute the timestamp if passing 0, and passing NULL for the number of downloaded packs is valid. Signed-off-by: Derrick Stolee --- Documentation/config/core.txt | 4 ++++ builtin/fetch.c | 5 +++++ gvfs.h | 1 + 3 files changed, 10 insertions(+) diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt index bbf738d72d9ab4..78005fde51a0ad 100644 --- a/Documentation/config/core.txt +++ b/Documentation/config/core.txt @@ -776,6 +776,10 @@ core.gvfs:: is first accessed and brought down to the client. Git.exe can't currently tell the first access vs subsequent accesses so this flag just blocks them from occurring at all. + GVFS_PREFETCH_DURING_FETCH:: + Bit value 128 + While performing a `git fetch` command, use the gvfs-helper to + perform a "prefetch" of commits and trees. -- core.useGvfsHelper:: diff --git a/builtin/fetch.c b/builtin/fetch.c index 849a9be421d048..9639e375dbf195 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -19,6 +19,8 @@ #include "string-list.h" #include "remote.h" #include "transport.h" +#include "gvfs.h" +#include "gvfs-helper-client.h" #include "run-command.h" #include "parse-options.h" #include "sigchain.h" @@ -2358,6 +2360,9 @@ int cmd_fetch(int argc, const char **argv, const char *prefix) } string_list_remove_duplicates(&list, 0); + if (core_gvfs & GVFS_PREFETCH_DURING_FETCH) + gh_client__prefetch(0, NULL); + if (negotiate_only) { struct oidset acked_commits = OIDSET_INIT; struct oidset_iter iter; diff --git a/gvfs.h b/gvfs.h index a8e58a6ebc88b8..99c5205aa043d7 100644 --- a/gvfs.h +++ b/gvfs.h @@ -28,6 +28,7 @@ #define GVFS_FETCH_SKIP_REACHABILITY_AND_UPLOADPACK (1 << 4) #define GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS (1 << 6) +#define GVFS_PREFETCH_DURING_FETCH (1 << 7) void gvfs_load_config_value(const char *value); int gvfs_config_is_set(int mask); From 5e7b77e09f88a15503be4d3db7d969ab2caaa0aa Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Wed, 18 Dec 2019 12:13:46 -0500 Subject: [PATCH 06/10] gvfs-helper: better support for concurrent packfile fetches Teach gvfs-helper to better support the concurrent fetching of the same packfile by multiple instances. If 2 instances of gvfs-helper did a POST and requested the same set of OIDs, they might receive the exact same packfile (same checksum SHA). Both processes would then race to install their copy of the .pack and .idx files into the ODB/pack directory. This is not a problem on Unix (because of filesystem semantics). On Windows, this can cause an EBUSY/EPERM problem for the loser while the winner is holding a handle to the target files. (The existing packfile code already handled simple the existence and/or replacement case.) The solution presented here is to silently let the loser claim victory IIF the .pack and .idx are already present in the ODB. (We can't check this in advance because we don't know the packfile SHA checksum until after we receive it and run index-pack.) We avoid using a per-packfile lockfile (or a single lockfile for the `vfs-` prefix) to avoid the usual issues with stale lockfiles. Signed-off-by: Jeff Hostetler --- gvfs-helper.c | 29 ++++++++++- t/t5799-gvfs-helper.sh | 114 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 141 insertions(+), 2 deletions(-) diff --git a/gvfs-helper.c b/gvfs-helper.c index 47f783526ba71d..5fcd427a6ab69c 100644 --- a/gvfs-helper.c +++ b/gvfs-helper.c @@ -1889,12 +1889,36 @@ static void my_finalize_packfile(struct gh__request_params *params, struct strbuf *final_path_idx, struct strbuf *final_filename) { + /* + * Install the .pack and .idx into the ODB pack directory. + * + * We might be racing with other instances of gvfs-helper if + * we, in parallel, both downloaded the exact same packfile + * (with the same checksum SHA) and try to install it at the + * same time. This might happen on Windows where the loser + * can get an EBUSY or EPERM trying to move/rename the + * tempfile into the pack dir, for example. + * + * So, we always install the .pack before the .idx for + * consistency. And only if *WE* created the .pack and .idx + * files, do we create the matching .keep (when requested). + * + * If we get an error and the target files already exist, we + * silently eat the error. Note that finalize_object_file() + * has already munged errno (and it has various creation + * strategies), so we don't bother looking at it. + */ if (finalize_object_file(temp_path_pack->buf, final_path_pack->buf) || finalize_object_file(temp_path_idx->buf, final_path_idx->buf)) { unlink(temp_path_pack->buf); unlink(temp_path_idx->buf); - unlink(final_path_pack->buf); - unlink(final_path_idx->buf); + + if (file_exists(final_path_pack->buf) && + file_exists(final_path_idx->buf)) { + trace2_printf("%s: assuming ok for %s", TR2_CAT, final_path_pack->buf); + goto assume_ok; + } + strbuf_addf(&status->error_message, "could not install packfile '%s'", final_path_pack->buf); @@ -1917,6 +1941,7 @@ static void my_finalize_packfile(struct gh__request_params *params, strbuf_release(&keep); } +assume_ok: if (params->result_list) { struct strbuf result_msg = STRBUF_INIT; diff --git a/t/t5799-gvfs-helper.sh b/t/t5799-gvfs-helper.sh index aae93931ad64ab..3a10b6b68ff87d 100755 --- a/t/t5799-gvfs-helper.sh +++ b/t/t5799-gvfs-helper.sh @@ -372,6 +372,10 @@ verify_objects_in_shared_cache () { return 0 } +# gvfs-helper prints a "packfile " message for each received +# packfile to stdout. Verify that we received the expected number +# of packfiles. +# verify_received_packfile_count () { if test $# -eq 1 then @@ -414,6 +418,19 @@ verify_prefetch_keeps () { return 0 } +# Verify that the number of vfs- packfile present in the shared-cache +# matches our expectations. +# +verify_vfs_packfile_count () { + count=$(( $(ls -1 "$SHARED_CACHE_T1"/pack/vfs-*.pack | wc -l) )) + if test $count -ne $1 + then + echo "verify_vfs_packfile_count: expected $1; actual $count" + return 1 + fi + return 0 +} + per_test_cleanup () { stop_gvfs_protocol_server @@ -1176,6 +1193,103 @@ test_expect_success 'integration: fully implicit: diff 2 commits' ' >OUT.output 2>OUT.stderr ' +################################################################# +# Duplicate packfile tests. +# +# If we request a fixed set of blobs, we should get a unique packfile +# of the form "vfs-.{pack,idx}". It we request that same set +# again, the server should create and send the exact same packfile. +# True web servers might build the custom packfile in random order, +# but our test web server should give us consistent results. +# +# Verify that we can handle the duplicate pack and idx file properly. +################################################################# + +test_expect_success 'duplicate: vfs- packfile' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server && + + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + post \ + <"$OIDS_BLOBS_FILE" >OUT.output 2>OUT.stderr && + verify_received_packfile_count 1 && + verify_vfs_packfile_count 1 && + + # Re-fetch the same packfile. We do not care if it replaces + # first one or if it silently fails to overwrite the existing + # one. We just confirm that afterwards we only have 1 packfile. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + post \ + <"$OIDS_BLOBS_FILE" >OUT.output 2>OUT.stderr && + verify_received_packfile_count 1 && + verify_vfs_packfile_count 1 && + + stop_gvfs_protocol_server +' + +# Return the absolute pathname of the first received packfile. +# +first_received_packfile_pathname () { + fn=$(sed -n '/^packfile/p' OUT.output \ + 2>OUT.stderr && + verify_received_packfile_count 1 && + verify_vfs_packfile_count 1 && + + # Re-fetch the same packfile, but hold the existing packfile + # open for writing on an obscure (and randomly-chosen) file + # descriptor. + # + # This should cause the replacement-install to fail (at least + # on Windows) with an EBUSY or EPERM or something. + # + # Verify that that error is eaten. We do not care if the + # replacement is retried or if gvfs-helper simply discards the + # second instance. We just confirm that afterwards we only + # have 1 packfile on disk and that the command "lies" and reports + # that it created the existing packfile. (We want the lie because + # in normal usage, gh-client has already built the packed-git list + # in memory and is using gvfs-helper to fetch missing objects; + # gh-client does not care who does the fetch, but it needs to + # update its packed-git list and restart the object lookup.) + # + PACK=$(first_received_packfile_pathname) && + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + post \ + <"$OIDS_BLOBS_FILE" \ + >OUT.output \ + 2>OUT.stderr \ + 9>>"$PACK" && + verify_received_packfile_count 1 && + verify_vfs_packfile_count 1 && + + stop_gvfs_protocol_server +' + ################################################################# # Ensure that the SHA of the blob we received matches the SHA of # the blob we requested. From e5ef243b651b383bad37ee75b8ad1e13e5116eca Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 3 Feb 2020 15:33:26 -0500 Subject: [PATCH 07/10] remote-curl: do not call fetch-pack when using gvfs-helper When using the GVFS protocol, we should _never_ call "git fetch-pack" to attempt downloading a pack-file via the regular Git protocol. It appears that the mechanism that prevented this in the VFS for Git world is due to the read-object hook populating the commits at the new ref tips in a different way than the gvfs-helper does. By acting as if the fetch-pack succeeds here in remote-curl, we prevent a failed fetch. Signed-off-by: Derrick Stolee --- remote-curl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/remote-curl.c b/remote-curl.c index acf7b2bb40ac4e..9da0c97e976442 100644 --- a/remote-curl.c +++ b/remote-curl.c @@ -1175,6 +1175,9 @@ static int fetch_git(struct discovery *heads, struct strvec args = STRVEC_INIT; struct strbuf rpc_result = STRBUF_INIT; + if (core_use_gvfs_helper) + return 0; + strvec_pushl(&args, "fetch-pack", "--stateless-rpc", "--stdin", "--lock-pack", NULL); if (options.followtags) From 9879ae0b281db3794032429340ebe178f33e92ea Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 12 Mar 2020 12:48:49 +0000 Subject: [PATCH 08/10] fetch: reprepare packs before checking connectivity Signed-off-by: Derrick Stolee --- builtin/fetch.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/builtin/fetch.c b/builtin/fetch.c index 9639e375dbf195..1f591021e1b4a9 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -1146,6 +1146,13 @@ static int store_updated_refs(struct display_state *display_state, opt.exclude_hidden_refs_section = "fetch"; rm = ref_map; + + /* + * Before checking connectivity, be really sure we have the + * latest pack-files loaded into memory. + */ + reprepare_packed_git(the_repository); + if (check_connected(iterate_ref_map, &rm, &opt)) { rc = error(_("%s did not send all necessary objects\n"), display_state->url); From 133865cfd069eb98d472ef9b5d9788e314801dbc Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 26 Dec 2019 10:20:24 -0500 Subject: [PATCH 09/10] gvfs-helper: retry when creating temp files When we create temp files for downloading packs, we use a name based on the current timestamp. There is no randomness in the name, so we can have collisions in the same second. Retry the temp pack names using a new "-" suffix to the name before the ".temp". Signed-off-by: Derrick Stolee --- gvfs-helper.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/gvfs-helper.c b/gvfs-helper.c index 5fcd427a6ab69c..0fe3fff130a9c1 100644 --- a/gvfs-helper.c +++ b/gvfs-helper.c @@ -1662,6 +1662,7 @@ static void my_create_tempfile( struct strbuf buf = STRBUF_INIT; int len_tp; enum scld_error scld; + int retries; gh__response_status__zero(status); @@ -1710,7 +1711,15 @@ static void my_create_tempfile( goto cleanup; } + retries = 0; *t1 = create_tempfile(buf.buf); + while (!*t1 && retries < 5) { + retries++; + strbuf_setlen(&buf, len_tp); + strbuf_addf(&buf, "%s-%d.%s", basename.buf, retries, suffix1); + *t1 = create_tempfile(buf.buf); + } + if (!*t1) { strbuf_addf(&status->error_message, "could not create tempfile: '%s'", @@ -1732,6 +1741,13 @@ static void my_create_tempfile( strbuf_addf( &buf, "%s.%s", basename.buf, suffix2); *t2 = create_tempfile(buf.buf); + while (!*t2 && retries < 5) { + retries++; + strbuf_setlen(&buf, len_tp); + strbuf_addf(&buf, "%s-%d.%s", basename.buf, retries, suffix2); + *t2 = create_tempfile(buf.buf); + } + if (!*t2) { strbuf_addf(&status->error_message, "could not create tempfile: '%s'", From ac98a9ef2b831a0c9f03f6ff56388be1e73b34db Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 3 Aug 2021 17:08:26 -0400 Subject: [PATCH 10/10] sparse: avoid warnings about known cURL issues in gvfs-helper.c `sparse` complains with an error message like this: gvfs-helper.c:2912:17: error: expression using sizeof on a function The culprit is this line: curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite); Similar lines exist in `http-push.c` and other files that are in upstream Git, and to avoid these bogus warnings, they are already exempted from `sparse`'s tender, loving care. We simply add `gvfs-helper.c` to that list. Signed-off-by: Derrick Stolee --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index fef1041eadafaf..c0fc2e6583aaf9 100644 --- a/Makefile +++ b/Makefile @@ -2821,7 +2821,7 @@ gettext.sp gettext.s gettext.o: GIT-PREFIX gettext.sp gettext.s gettext.o: EXTRA_CPPFLAGS = \ -DGIT_LOCALE_PATH='"$(localedir_relative_SQ)"' -http-push.sp http.sp http-walker.sp remote-curl.sp imap-send.sp: SP_EXTRA_FLAGS += \ +http-push.sp http.sp http-walker.sp remote-curl.sp imap-send.sp gvfs-helper.sp: SP_EXTRA_FLAGS += \ -DCURL_DISABLE_TYPECHECK pack-revindex.sp: SP_EXTRA_FLAGS += -Wno-memcpy-max-count