diff --git a/gvfs-helper-client.c b/gvfs-helper-client.c index 02da53f0845a0c..e6fbb6ed1a43c3 100644 --- a/gvfs-helper-client.c +++ b/gvfs-helper-client.c @@ -24,13 +24,14 @@ static struct hashmap gh_server__subprocess_map; static struct object_directory *gh_client__chosen_odb; /* - * The "objects" capability has 2 verbs: "get" and "post". + * The "objects" capability has verbs: "get" and "post" and "prefetch". */ #define CAP_OBJECTS (1u<<1) #define CAP_OBJECTS_NAME "objects" #define CAP_OBJECTS__VERB_GET1_NAME "get" #define CAP_OBJECTS__VERB_POST_NAME "post" +#define CAP_OBJECTS__VERB_PREFETCH_NAME "prefetch" static int gh_client__start_fn(struct subprocess_entry *subprocess) { @@ -129,6 +130,44 @@ static int gh_client__send__objects_get(struct child_process *process, return 0; } +/* + * Send a request to gvfs-helper to prefetch packfiles from either the + * cache-server or the main Git server using "/gvfs/prefetch". + * + * objects.prefetch LF + * [ LF] + * + */ +static int gh_client__send__objects_prefetch(struct child_process *process, + timestamp_t seconds_since_epoch) +{ + int err; + + /* + * We assume that all of the packet_ routines call error() + * so that we don't have to. + */ + + err = packet_write_fmt_gently( + process->in, + (CAP_OBJECTS_NAME "." CAP_OBJECTS__VERB_PREFETCH_NAME "\n")); + if (err) + return err; + + if (seconds_since_epoch) { + err = packet_write_fmt_gently(process->in, "%" PRItime "\n", + seconds_since_epoch); + if (err) + return err; + } + + err = packet_flush_gently(process->in); + if (err) + return err; + + return 0; +} + /* * Update the loose object cache to include the newly created * object. @@ -176,7 +215,7 @@ static void gh_client__update_packed_git(const char *line) } /* - * Both CAP_OBJECTS verbs return the same format response: + * CAP_OBJECTS verbs return the same format response: * * * * @@ -216,6 +255,8 @@ static int gh_client__objects__receive_response( const char *v1; char *line; int len; + int nr_loose = 0; + int nr_packfile = 0; int err = 0; while (1) { @@ -234,13 +275,13 @@ static int gh_client__objects__receive_response( else if (starts_with(line, "packfile")) { gh_client__update_packed_git(line); ghc |= GHC__CREATED__PACKFILE; - *p_nr_packfile += 1; + nr_packfile++; } else if (starts_with(line, "loose")) { gh_client__update_loose_cache(line); ghc |= GHC__CREATED__LOOSE; - *p_nr_loose += 1; + nr_loose++; } else if (starts_with(line, "ok")) @@ -254,6 +295,8 @@ static int gh_client__objects__receive_response( } *p_ghc = ghc; + *p_nr_loose = nr_loose; + *p_nr_packfile = nr_packfile; return err; } @@ -310,7 +353,7 @@ static struct gh_server__process *gh_client__find_long_running_process( /* * Find an existing long-running process with the above command * line -or- create a new long-running process for this and - * subsequent 'get' requests. + * subsequent requests. */ if (!gh_server__subprocess_map_initialized) { gh_server__subprocess_map_initialized = 1; @@ -347,10 +390,14 @@ static struct gh_server__process *gh_client__find_long_running_process( void gh_client__queue_oid(const struct object_id *oid) { - // TODO consider removing this trace2. it is useful for interactive - // TODO debugging, but may generate way too much noise for a data - // TODO event. - trace2_printf("gh_client__queue_oid: %s", oid_to_hex(oid)); + /* + * Keep this trace as a printf only, so that it goes to the + * perf log, but not the event log. It is useful for interactive + * debugging, but generates way too much (unuseful) noise for the + * database. + */ + if (trace2_is_enabled()) + trace2_printf("gh_client__queue_oid: %s", oid_to_hex(oid)); if (!oidset_insert(&gh_client__oidset_queued, oid)) gh_client__oidset_count++; @@ -431,10 +478,14 @@ int gh_client__get_immediate(const struct object_id *oid, int nr_packfile = 0; int err = 0; - // TODO consider removing this trace2. it is useful for interactive - // TODO debugging, but may generate way too much noise for a data - // TODO event. - trace2_printf("gh_client__get_immediate: %s", oid_to_hex(oid)); + /* + * Keep this trace as a printf only, so that it goes to the + * perf log, but not the event log. It is useful for interactive + * debugging, but generates way too much (unuseful) noise for the + * database. + */ + if (trace2_is_enabled()) + trace2_printf("gh_client__get_immediate: %s", oid_to_hex(oid)); entry = gh_client__find_long_running_process(CAP_OBJECTS); if (!entry) @@ -463,3 +514,55 @@ int gh_client__get_immediate(const struct object_id *oid, return err; } + +/* + * Ask gvfs-helper to prefetch commits-and-trees packfiles since a + * given timestamp. + * + * If seconds_since_epoch is zero, gvfs-helper will scan the ODB for + * the last received prefetch and ask for ones newer than that. + */ +int gh_client__prefetch(timestamp_t seconds_since_epoch, + int *nr_packfiles_received) +{ + struct gh_server__process *entry; + struct child_process *process; + enum gh_client__created ghc; + int nr_loose = 0; + int nr_packfile = 0; + int err = 0; + + entry = gh_client__find_long_running_process(CAP_OBJECTS); + if (!entry) + return -1; + + trace2_region_enter("gh-client", "objects/prefetch", the_repository); + trace2_data_intmax("gh-client", the_repository, "prefetch/since", + seconds_since_epoch); + + process = &entry->subprocess.process; + + sigchain_push(SIGPIPE, SIG_IGN); + + err = gh_client__send__objects_prefetch(process, seconds_since_epoch); + if (!err) + err = gh_client__objects__receive_response( + process, &ghc, &nr_loose, &nr_packfile); + + sigchain_pop(SIGPIPE); + + if (err) { + subprocess_stop(&gh_server__subprocess_map, + (struct subprocess_entry *)entry); + FREE_AND_NULL(entry); + } + + trace2_data_intmax("gh-client", the_repository, + "prefetch/packfile_count", nr_packfile); + trace2_region_leave("gh-client", "objects/prefetch", the_repository); + + if (nr_packfiles_received) + *nr_packfiles_received = nr_packfile; + + return err; +} diff --git a/gvfs-helper-client.h b/gvfs-helper-client.h index c1e38fad75f841..7692534ecda54c 100644 --- a/gvfs-helper-client.h +++ b/gvfs-helper-client.h @@ -66,4 +66,22 @@ void gh_client__queue_oid_array(const struct object_id *oids, int oid_nr); */ int gh_client__drain_queue(enum gh_client__created *p_ghc); +/* + * Ask `gvfs-helper server` to fetch any "prefetch packs" + * available on the server more recent than the requested time. + * + * If seconds_since_epoch is zero, gvfs-helper will scan the ODB for + * the last received prefetch and ask for ones newer than that. + * + * A long-running background process is used to subsequent requests + * (either prefetch or regular immediate/queued requests) more efficient. + * + * One or more packfiles will be created in the shared-cache ODB. + * + * Returns 0 on success, -1 on error. Optionally also returns the + * number of prefetch packs received. + */ +int gh_client__prefetch(timestamp_t seconds_since_epoch, + int *nr_packfiles_received); + #endif /* GVFS_HELPER_CLIENT_H */ diff --git a/gvfs-helper.c b/gvfs-helper.c index 7026b614d27a96..37d5ddf674b4f7 100644 --- a/gvfs-helper.c +++ b/gvfs-helper.c @@ -22,7 +22,7 @@ // // error := verify cache-server and abort if not well-known. // -// trust := do not verify cache-server. just use it. +// trust := do not verify cache-server. just use it, if set. // // disable := disable the cache-server and always use the main // Git server. @@ -87,6 +87,24 @@ // Number of retries after transient network errors. // Set to zero to disable such retries. // +// prefetch +// +// Use "/gvfs/prefetch" REST API to fetch 1 or more commits-and-trees +// prefetch packs from the server. +// +// : +// +// --since= // defaults to "0" +// +// Time in seconds since the epoch. If omitted or +// zero, the timestamp from the newest prefetch +// packfile found in the shared-cache ODB is used. +// (This is based upon the packfile name, not the +// mtime.) +// +// The GVFS Protocol defines this value as a way to +// request cached packfiles NEWER THAN this timestamp. +// // server // // Interactive/sub-process mode. Listen for a series of commands @@ -116,20 +134,36 @@ // // Each object will be created as a loose object in the ODB. // +// Create 1 or more loose objects in the shared-cache ODB. +// (The pathname of the selected ODB is reported at the +// beginning of the response; this should match the pathname +// given on the command line). +// +// git> objects.get +// git> +// git> +// git> ... +// git> +// git> 0000 +// +// git< odb +// git< loose +// git< loose +// git< ... +// git< loose +// git< ok | partial | error +// git< 0000 +// // Interactive verb: objects.post // // Fetch 1 or more objects, in bulk, using one or more // "/gvfs/objects" POST requests. // -// For both verbs, if a cache-server is configured, try it first. -// Optionally fallback to the main Git server. -// // Create 1 or more loose objects and/or packfiles in the -// shared-cache ODB. (The pathname of the selected ODB is -// reported at the beginning of the response; this should -// match the pathname given on the command line). +// shared-cache ODB. A POST is allowed to respond with +// either loose or packed objects. // -// git> objects.get | objects.post +// git> objects.post // git> // git> // git> ... @@ -139,11 +173,31 @@ // git< odb // git< loose | packfile // git< loose | packfile -// gid< ... +// git< ... // git< loose | packfile // git< ok | partial | error // git< 0000 // +// Interactive verb: object.prefetch +// +// Fetch 1 or more prefetch packs using a "/gvfs/prefetch" +// request. +// +// git> objects.prefetch +// git> // optional +// git> 0000 +// +// git< odb +// git< packfile +// git< packfile +// git< ... +// git< packfile +// git< ok | error +// git< 0000 +// +// If a cache-server is configured, try it first. +// Optionally fallback to the main Git server. +// // [1] Documentation/technical/protocol-common.txt // [2] Documentation/technical/long-running-process-protocol.txt // [3] See GIT_TRACE_PACKET @@ -176,11 +230,16 @@ #include "oidset.h" #include "dir.h" #include "progress.h" +#include "packfile.h" +#include "date.h" + +#define TR2_CAT "gvfs-helper" static const char * const main_usage[] = { N_("git gvfs-helper [] config []"), N_("git gvfs-helper [] get []"), N_("git gvfs-helper [] post []"), + N_("git gvfs-helper [] prefetch []"), N_("git gvfs-helper [] server []"), NULL }; @@ -195,6 +254,11 @@ static const char *const objects_post_usage[] = { NULL }; +static const char *const prefetch_usage[] = { + N_("git gvfs-helper [] prefetch []"), + NULL +}; + static const char *const server_usage[] = { N_("git gvfs-helper [] server []"), NULL @@ -239,6 +303,7 @@ enum gh__error_code { GH__ERROR_CODE__COULD_NOT_INSTALL_PACKFILE = 11, GH__ERROR_CODE__SUBPROCESS_SYNTAX = 12, GH__ERROR_CODE__INDEX_PACK_FAILED = 13, + GH__ERROR_CODE__COULD_NOT_INSTALL_PREFETCH = 14, }; enum gh__cache_server_mode { @@ -303,6 +368,8 @@ static const char *gh__server_type_label[GH__SERVER_TYPE__NR] = { }; enum gh__objects_mode { + GH__OBJECTS_MODE__NONE = 0, + /* * Bulk fetch objects. * @@ -322,6 +389,12 @@ enum gh__objects_mode { * object treatment). */ GH__OBJECTS_MODE__GET, + + /* + * Fetch one or more pre-computed "prefetch packs" containing + * commits and trees. + */ + GH__OBJECTS_MODE__PREFETCH, }; struct gh__azure_throttle @@ -396,6 +469,7 @@ struct gh__request_params { int b_write_to_file; /* write to file=1 or strbuf=0 */ int b_permit_cache_server_if_defined; + enum gh__objects_mode objects_mode; enum gh__server_type server_type; int k_attempt; /* robust retry attempt */ @@ -410,15 +484,8 @@ struct gh__request_params { struct strbuf *buffer; /* for response content when strbuf */ struct strbuf tr2_label; /* for trace2 regions */ - struct strbuf loose_path; struct object_id loose_oid; - struct strbuf temp_path_pack; - struct strbuf temp_path_idx; - struct strbuf final_path_pack; - struct strbuf final_path_idx; - struct strbuf final_packfile_filename; - /* * Note that I am putting all of the progress-related instance data * inside the request-params in the hope that we can eventually @@ -458,13 +525,7 @@ struct gh__request_params { .tempfile = NULL, \ .buffer = NULL, \ .tr2_label = STRBUF_INIT, \ - .loose_path = STRBUF_INIT, \ .loose_oid = {{0}}, \ - .temp_path_pack = STRBUF_INIT, \ - .temp_path_idx = STRBUF_INIT, \ - .final_path_pack = STRBUF_INIT, \ - .final_path_idx = STRBUF_INIT, \ - .final_packfile_filename = STRBUF_INIT, \ .progress_state = GH__PROGRESS_STATE__START, \ .progress_base_phase2_msg = STRBUF_INIT, \ .progress_base_phase3_msg = STRBUF_INIT, \ @@ -489,12 +550,6 @@ static void gh__request_params__release(struct gh__request_params *params) params->buffer = NULL; /* we do not own this */ strbuf_release(¶ms->tr2_label); - strbuf_release(¶ms->loose_path); - strbuf_release(¶ms->temp_path_pack); - strbuf_release(¶ms->temp_path_idx); - strbuf_release(¶ms->final_path_pack); - strbuf_release(¶ms->final_path_idx); - strbuf_release(¶ms->final_packfile_filename); strbuf_release(¶ms->progress_base_phase2_msg); strbuf_release(¶ms->progress_base_phase3_msg); @@ -585,9 +640,7 @@ static void gh__response_status__zero(struct gh__response_status *s) s->azure = NULL; } -static void install_packfile(struct gh__request_params *params, - struct gh__response_status *status); -static void install_loose(struct gh__request_params *params, +static void install_result(struct gh__request_params *params, struct gh__response_status *status); /* @@ -624,7 +677,7 @@ static void log_e2eid(struct gh__request_params *params, strbuf_addstr(&key, "e2eid"); strbuf_addstr(&key, gh__server_type_label[params->server_type]); - trace2_data_string("gvfs-helper", NULL, key.buf, + trace2_data_string(TR2_CAT, NULL, key.buf, params->e2eid.buf); strbuf_release(&key); @@ -724,7 +777,7 @@ static void compute_retry_mode_from_http_response( status->retry = GH__RETRY_MODE__HTTP_429; status->ec = GH__ERROR_CODE__HTTP_429; - trace2_data_string("gvfs-helper", NULL, "error/http", + trace2_data_string(TR2_CAT, NULL, "error/http", status->error_message.buf); return; @@ -737,7 +790,7 @@ static void compute_retry_mode_from_http_response( status->retry = GH__RETRY_MODE__HTTP_503; status->ec = GH__ERROR_CODE__HTTP_503; - trace2_data_string("gvfs-helper", NULL, "error/http", + trace2_data_string(TR2_CAT, NULL, "error/http", status->error_message.buf); return; @@ -751,7 +804,7 @@ static void compute_retry_mode_from_http_response( status->retry = GH__RETRY_MODE__HARD_FAIL; status->ec = GH__ERROR_CODE__HTTP_OTHER; - trace2_data_string("gvfs-helper", NULL, "error/http", + trace2_data_string(TR2_CAT, NULL, "error/http", status->error_message.buf); return; } @@ -885,7 +938,7 @@ static void compute_retry_mode_from_curl_error( status->retry = GH__RETRY_MODE__HARD_FAIL; status->ec = GH__ERROR_CODE__CURL_ERROR; - trace2_data_string("gvfs-helper", NULL, "error/curl", + trace2_data_string(TR2_CAT, NULL, "error/curl", status->error_message.buf); return; @@ -895,7 +948,7 @@ static void compute_retry_mode_from_curl_error( status->retry = GH__RETRY_MODE__TRANSIENT; status->ec = GH__ERROR_CODE__CURL_ERROR; - trace2_data_string("gvfs-helper", NULL, "error/curl", + trace2_data_string(TR2_CAT, NULL, "error/curl", status->error_message.buf); return; } @@ -1089,7 +1142,7 @@ static void gh__run_one_slot(struct active_request_slot *slot, params->progress_state = GH__PROGRESS_STATE__START; strbuf_setlen(¶ms->e2eid, 0); - trace2_region_enter("gvfs-helper", key.buf, NULL); + trace2_region_enter(TR2_CAT, key.buf, NULL); if (!start_active_slot(slot)) { compute_retry_mode_from_curl_error(status, @@ -1113,7 +1166,7 @@ static void gh__run_one_slot(struct active_request_slot *slot, * (such as when we request a commit). */ strbuf_addstr(&key, "/nr_bytes"); - trace2_data_intmax("gvfs-helper", NULL, + trace2_data_intmax(TR2_CAT, NULL, key.buf, status->bytes_received); strbuf_setlen(&key, old_len); @@ -1123,16 +1176,10 @@ static void gh__run_one_slot(struct active_request_slot *slot, if (params->progress) stop_progress(¶ms->progress); - if (status->ec == GH__ERROR_CODE__OK && params->b_write_to_file) { - if (params->b_is_post && - !strcmp(status->content_type.buf, - "application/x-git-packfile")) - install_packfile(params, status); - else - install_loose(params, status); - } + if (status->ec == GH__ERROR_CODE__OK && params->b_write_to_file) + install_result(params, status); - trace2_region_leave("gvfs-helper", key.buf, NULL); + trace2_region_leave(TR2_CAT, key.buf, NULL); strbuf_release(&key); } @@ -1279,7 +1326,7 @@ static void lookup_main_url(void) */ gh__global.main_url = transport_anonymize_url(gh__global.remote->url[0]); - trace2_data_string("gvfs-helper", NULL, "remote/url", gh__global.main_url); + trace2_data_string(TR2_CAT, NULL, "remote/url", gh__global.main_url); } static void do__http_get__gvfs_config(struct gh__response_status *status, @@ -1306,10 +1353,23 @@ static void select_cache_server(void) gh__global.cache_server_url = NULL; if (gh__cmd_opts.cache_server_mode == GH__CACHE_SERVER_MODE__DISABLE) { - trace2_data_string("gvfs-helper", NULL, "cache/url", "disabled"); + trace2_data_string(TR2_CAT, NULL, "cache/url", "disabled"); return; } + if (!gvfs_cache_server_url || !*gvfs_cache_server_url) { + switch (gh__cmd_opts.cache_server_mode) { + default: + case GH__CACHE_SERVER_MODE__TRUST_WITHOUT_VERIFY: + case GH__CACHE_SERVER_MODE__VERIFY_DISABLE: + trace2_data_string(TR2_CAT, NULL, "cache/url", "unset"); + return; + + case GH__CACHE_SERVER_MODE__VERIFY_ERROR: + die("cache-server not set"); + } + } + /* * If the cache-server and main Git server have the same URL, we * can silently disable the cache-server (by NOT setting the field @@ -1317,14 +1377,14 @@ static void select_cache_server(void) */ if (!strcmp(gvfs_cache_server_url, gh__global.main_url)) { gh__cmd_opts.try_fallback = 0; - trace2_data_string("gvfs-helper", NULL, "cache/url", "same"); + trace2_data_string(TR2_CAT, NULL, "cache/url", "same"); return; } if (gh__cmd_opts.cache_server_mode == GH__CACHE_SERVER_MODE__TRUST_WITHOUT_VERIFY) { gh__global.cache_server_url = gvfs_cache_server_url; - trace2_data_string("gvfs-helper", NULL, "cache/url", + trace2_data_string(TR2_CAT, NULL, "cache/url", gvfs_cache_server_url); return; } @@ -1365,7 +1425,7 @@ static void select_cache_server(void) if (match) { gh__global.cache_server_url = gvfs_cache_server_url; - trace2_data_string("gvfs-helper", NULL, "cache/url", + trace2_data_string(TR2_CAT, NULL, "cache/url", gvfs_cache_server_url); } @@ -1389,7 +1449,7 @@ static void select_cache_server(void) else warning("could not verify cache-server '%s'", gvfs_cache_server_url); - trace2_data_string("gvfs-helper", NULL, "cache/url", + trace2_data_string(TR2_CAT, NULL, "cache/url", "disabled"); } @@ -1575,27 +1635,14 @@ static void select_odb(void) } /* - * Create a tempfile to stream the packfile into. - * - * We create a tempfile in the chosen ODB directory and let CURL - * automatically stream data to the file. If successful, we can - * later rename it to a proper .pack and run "git index-pack" on - * it to create the corresponding .idx file. - * - * TODO I would rather to just stream the packfile directly into - * TODO "git index-pack --stdin" (and save some I/O) because it - * TODO will automatically take care of the rename of both files - * TODO and any other cleanup. BUT INDEX-PACK WILL ONLY WRITE - * TODO TO THE PRIMARY ODB -- it will not write into the alternates - * TODO (this is considered bad form). So we would need to add - * TODO an option to index-pack to handle this. I don't want to - * TODO deal with this issue right now. - * - * TODO Consider using lockfile for this rather than naked tempfile. + * Create a unique tempfile or tempfile-pair inside the + * tempPacks directory. */ -static void create_tempfile_for_packfile( - struct gh__request_params *params, - struct gh__response_status *status) +static void my_create_tempfile( + struct gh__response_status *status, + int b_fdopen, + const char *suffix1, struct tempfile **t1, + const char *suffix2, struct tempfile **t2) { static unsigned int nth = 0; static struct timeval tv = {0}; @@ -1605,15 +1652,15 @@ static void create_tempfile_for_packfile( struct strbuf basename = STRBUF_INIT; struct strbuf buf = STRBUF_INIT; - int len_p; + int len_tp; enum scld_error scld; gh__response_status__zero(status); if (!nth) { /* - * Create a string to use in the name of all packfiles - * created by this process. + * Create a unique string to use in the name of all + * tempfiles created by this process. */ gettimeofday(&tv, NULL); secs = tv.tv_sec; @@ -1626,84 +1673,114 @@ static void create_tempfile_for_packfile( } /* - * Create a for this packfile using a series number , - * so that all of the chunks we download will group together. + * Create a for this instance/pair using a series + * number . */ - strbuf_addf(&basename, "vfs-%s-%04d", date, nth++); + strbuf_addf(&basename, "t-%s-%04d", date, nth++); + + if (!suffix1 || !*suffix1) + suffix1 = "temp"; /* - * We will stream the data into a managed tempfile() in: + * Create full pathname as: * - * "/pack/tempPacks/vfs--.temp" + * "/pack/tempPacks/." */ strbuf_setlen(&buf, 0); strbuf_addbuf(&buf, &gh__global.buf_odb_path); strbuf_complete(&buf, '/'); - strbuf_addstr(&buf, "pack/"); - len_p = buf.len; - strbuf_addstr(&buf, "tempPacks/"); - strbuf_addbuf(&buf, &basename); - strbuf_addstr(&buf, ".temp"); + strbuf_addstr(&buf, "pack/tempPacks/"); + len_tp = buf.len; + strbuf_addf( &buf, "%s.%s", basename.buf, suffix1); scld = safe_create_leading_directories(buf.buf); if (scld != SCLD_OK && scld != SCLD_EXISTS) { strbuf_addf(&status->error_message, - "could not create directory for packfile: '%s'", + "could not create directory for tempfile: '%s'", buf.buf); status->ec = GH__ERROR_CODE__COULD_NOT_CREATE_TEMPFILE; goto cleanup; } - params->tempfile = create_tempfile(buf.buf); - if (!params->tempfile) { + *t1 = create_tempfile(buf.buf); + if (!*t1) { strbuf_addf(&status->error_message, - "could not create tempfile for packfile: '%s'", + "could not create tempfile: '%s'", buf.buf); status->ec = GH__ERROR_CODE__COULD_NOT_CREATE_TEMPFILE; goto cleanup; } - - fdopen_tempfile(params->tempfile, "w"); - - /* - * After the download is complete, we will need to steal the file - * from the tempfile() class (so that it doesn't magically delete - * it when we close the file handle) and then index it. - * - * We do this into the tempPacks directory to avoid contaminating - * the real pack directory until we know there is no corruption. - * - * "/pack/tempPacks/vfs--.temp.pack" - * "/pack/tempPacks/vfs--.temp.idx" - */ - strbuf_setlen(¶ms->temp_path_pack, 0); - strbuf_addf(¶ms->temp_path_pack, "%s.pack", buf.buf); - - strbuf_setlen(¶ms->temp_path_idx, 0); - strbuf_addf(¶ms->temp_path_idx, "%s.idx", buf.buf); + if (b_fdopen) + fdopen_tempfile(*t1, "w"); /* - * Later, if all goes well, we will install them as: + * Optionally create a peer tempfile with the same basename. + * (This is useful for prefetching .pack and .idx pairs.) * - * "/pack/vfs--.pack" - * "/pack/vfs--.idx" + * "/pack/tempPacks/." */ - strbuf_setlen(&buf, len_p); - strbuf_setlen(¶ms->final_path_pack, 0); - strbuf_addf(¶ms->final_path_pack, "%s%s.pack", - buf.buf, basename.buf); - strbuf_setlen(¶ms->final_path_idx, 0); - strbuf_addf(¶ms->final_path_idx, "%s%s.idx", - buf.buf, basename.buf); - strbuf_setlen(¶ms->final_packfile_filename, 0); - strbuf_addf(¶ms->final_packfile_filename, "%s.pack", - basename.buf); + if (suffix2 && *suffix2 && t2) { + strbuf_setlen(&buf, len_tp); + strbuf_addf( &buf, "%s.%s", basename.buf, suffix2); + + *t2 = create_tempfile(buf.buf); + if (!*t2) { + strbuf_addf(&status->error_message, + "could not create tempfile: '%s'", + buf.buf); + status->ec = GH__ERROR_CODE__COULD_NOT_CREATE_TEMPFILE; + goto cleanup; + } + if (b_fdopen) + fdopen_tempfile(*t2, "w"); + } cleanup: strbuf_release(&buf); strbuf_release(&basename); } +/* + * Create pathnames to the final location of the .pack and .idx + * files in the ODB. These are of the form: + * + * "/pack/-[-]." + * + * For example, for prefetch packs, will be the epoch + * timestamp and will be the packfile hash. + */ +static void create_final_packfile_pathnames( + const char *term_1, const char *term_2, const char *term_3, + struct strbuf *pack_path, struct strbuf *idx_path, + struct strbuf *pack_filename) +{ + struct strbuf base = STRBUF_INIT; + struct strbuf path = STRBUF_INIT; + + if (term_3 && *term_3) + strbuf_addf(&base, "%s-%s-%s", term_1, term_2, term_3); + else + strbuf_addf(&base, "%s-%s", term_1, term_2); + + strbuf_setlen(pack_filename, 0); + strbuf_addf( pack_filename, "%s.pack", base.buf); + + strbuf_addbuf(&path, &gh__global.buf_odb_path); + strbuf_complete(&path, '/'); + strbuf_addstr(&path, "pack/"); + + strbuf_setlen(pack_path, 0); + strbuf_addbuf(pack_path, &path); + strbuf_addf( pack_path, "%s.pack", base.buf); + + strbuf_setlen(idx_path, 0); + strbuf_addbuf(idx_path, &path); + strbuf_addf( idx_path, "%s.idx", base.buf); + + strbuf_release(&base); + strbuf_release(&path); +} + /* * Create a pathname to the loose object in the shared-cache ODB * with the given OID. Try to "mkdir -p" to ensure the parent @@ -1731,54 +1808,95 @@ static int create_loose_pathname_in_odb(struct strbuf *buf_path, return 0; } -/* - * Create a tempfile to stream a loose object into. - * - * We create a tempfile in the chosen ODB directory and let CURL - * automatically stream data to the file. - * - * We put it directly in the "/xx/" directory. - */ -static void create_tempfile_for_loose( - struct gh__request_params *params, - struct gh__response_status *status) +static void my_run_index_pack(struct gh__request_params *params, + struct gh__response_status *status, + const struct strbuf *temp_path_pack, + const struct strbuf *temp_path_idx, + struct strbuf *packfile_checksum) { - static int nth = 0; - struct strbuf buf_path = STRBUF_INIT; + struct child_process ip = CHILD_PROCESS_INIT; + struct strbuf ip_stdout = STRBUF_INIT; - gh__response_status__zero(status); + strvec_push(&ip.args, "git"); + strvec_push(&ip.args, "index-pack"); + + if (gh__cmd_opts.show_progress) { + strvec_push(&ip.args, "-v"); + ip.err = 0; + } else { + ip.err = -1; + ip.no_stderr = 1; + } - if (create_loose_pathname_in_odb(&buf_path, ¶ms->loose_oid)) { + strvec_pushl(&ip.args, "-o", temp_path_idx->buf, NULL); + strvec_push(&ip.args, temp_path_pack->buf); + ip.no_stdin = 1; + ip.out = -1; + + if (pipe_command(&ip, NULL, 0, &ip_stdout, 0, NULL, 0)) { + unlink(temp_path_pack->buf); + unlink(temp_path_idx->buf); strbuf_addf(&status->error_message, - "cannot create directory for loose object '%s'", - buf_path.buf); - status->ec = GH__ERROR_CODE__COULD_NOT_CREATE_TEMPFILE; + "index-pack failed on '%s'", + temp_path_pack->buf); + /* + * Lets assume that index-pack failed because the + * downloaded file is corrupt (truncated). + * + * Retry it as if the network had dropped. + */ + status->retry = GH__RETRY_MODE__TRANSIENT; + status->ec = GH__ERROR_CODE__INDEX_PACK_FAILED; goto cleanup; } - /* Remember the full path of the final destination. */ - strbuf_setlen(¶ms->loose_path, 0); - strbuf_addbuf(¶ms->loose_path, &buf_path); + if (packfile_checksum) { + /* + * stdout from index-pack should have the packfile hash. + * Extract it and use it in the final packfile name. + * + * TODO What kind of validation should we do on the + * TODO string and is there ever any other output besides + * TODO just the checksum ? + */ + strbuf_trim_trailing_newline(&ip_stdout); - /* - * Build a unique tempfile pathname based upon it. We avoid - * using lockfiles to avoid issues with stale locks after - * crashes. - */ - strbuf_addf(&buf_path, ".%08u.%.06u.temp", getpid(), nth++); + strbuf_addbuf(packfile_checksum, &ip_stdout); + } - params->tempfile = create_tempfile(buf_path.buf); - if (!params->tempfile) { - strbuf_addstr(&status->error_message, - "could not create tempfile for loose object"); - status->ec = GH__ERROR_CODE__COULD_NOT_CREATE_TEMPFILE; - goto cleanup; +cleanup: + strbuf_release(&ip_stdout); + child_process_clear(&ip); +} + +static void my_finalize_packfile(struct gh__request_params *params, + struct gh__response_status *status, + const struct strbuf *temp_path_pack, + const struct strbuf *temp_path_idx, + struct strbuf *final_path_pack, + struct strbuf *final_path_idx, + struct strbuf *final_filename) +{ + if (finalize_object_file(temp_path_pack->buf, final_path_pack->buf) || + finalize_object_file(temp_path_idx->buf, final_path_idx->buf)) { + unlink(temp_path_pack->buf); + unlink(temp_path_idx->buf); + unlink(final_path_pack->buf); + unlink(final_path_idx->buf); + strbuf_addf(&status->error_message, + "could not install packfile '%s'", + final_path_pack->buf); + status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PACKFILE; + return; } - fdopen_tempfile(params->tempfile, "w"); + if (params->result_list) { + struct strbuf result_msg = STRBUF_INIT; -cleanup: - strbuf_release(&buf_path); + strbuf_addf(&result_msg, "packfile %s", final_filename->buf); + string_list_append(params->result_list, result_msg.buf); + strbuf_release(&result_msg); + } } /* @@ -1788,93 +1906,337 @@ static void create_tempfile_for_loose( static void install_packfile(struct gh__request_params *params, struct gh__response_status *status) { - struct child_process ip = CHILD_PROCESS_INIT; + struct strbuf temp_path_pack = STRBUF_INIT; + struct strbuf temp_path_idx = STRBUF_INIT; + struct strbuf packfile_checksum = STRBUF_INIT; + struct strbuf final_path_pack = STRBUF_INIT; + struct strbuf final_path_idx = STRBUF_INIT; + struct strbuf final_filename = STRBUF_INIT; + + gh__response_status__zero(status); /* - * When we request more than 1 object, the server should always - * send us a packfile. + * After the download is complete, we will need to steal the file + * from the tempfile() class (so that it doesn't magically delete + * it when we close the file handle) and then index it. */ - if (strcmp(status->content_type.buf, - "application/x-git-packfile")) { - strbuf_addf(&status->error_message, - "install_packfile: received unknown content-type '%s'", - status->content_type.buf); - status->ec = GH__ERROR_CODE__UNEXPECTED_CONTENT_TYPE; - goto cleanup; - } - - gh__response_status__zero(status); + strbuf_addf(&temp_path_pack, "%s.pack", + get_tempfile_path(params->tempfile)); + strbuf_addf(&temp_path_idx, "%s.idx", + get_tempfile_path(params->tempfile)); if (rename_tempfile(¶ms->tempfile, - params->temp_path_pack.buf) == -1) { + temp_path_pack.buf) == -1) { strbuf_addf(&status->error_message, "could not rename packfile to '%s'", - params->temp_path_pack.buf); + temp_path_pack.buf); status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PACKFILE; goto cleanup; } - strvec_push(&ip.args, "index-pack"); - if (gh__cmd_opts.show_progress) - strvec_push(&ip.args, "-v"); - strvec_pushl(&ip.args, "-o", params->temp_path_idx.buf, NULL); - strvec_push(&ip.args, params->temp_path_pack.buf); - ip.git_cmd = 1; - ip.no_stdin = 1; - ip.no_stdout = 1; + my_run_index_pack(params, status, &temp_path_pack, &temp_path_idx, + &packfile_checksum); + if (status->ec != GH__ERROR_CODE__OK) + goto cleanup; + + create_final_packfile_pathnames("vfs", packfile_checksum.buf, NULL, + &final_path_pack, &final_path_idx, + &final_filename); + my_finalize_packfile(params, status, + &temp_path_pack, &temp_path_idx, + &final_path_pack, &final_path_idx, + &final_filename); + +cleanup: + strbuf_release(&temp_path_pack); + strbuf_release(&temp_path_idx); + strbuf_release(&packfile_checksum); + strbuf_release(&final_path_pack); + strbuf_release(&final_path_idx); + strbuf_release(&final_filename); +} + +/* + * bswap.h only defines big endian functions. + * The GVFS Protocol defines fields in little endian. + */ +static inline uint64_t my_get_le64(uint64_t le_val) +{ +#if GIT_BYTE_ORDER == GIT_LITTLE_ENDIAN + return le_val; +#else + return default_bswap64(le_val); +#endif +} + +#define MY_MIN(x,y) (((x) < (y)) ? (x) : (y)) +#define MY_MAX(x,y) (((x) > (y)) ? (x) : (y)) + +/* + * Copy the `nr_bytes_total` from `fd_in` to `fd_out`. + * + * This could be used to extract a single packfile from + * a multipart file, for example. + */ +static int my_copy_fd_len(int fd_in, int fd_out, ssize_t nr_bytes_total) +{ + char buffer[8192]; + + while (nr_bytes_total > 0) { + ssize_t len_to_read = MY_MIN(nr_bytes_total, sizeof(buffer)); + ssize_t nr_read = xread(fd_in, buffer, len_to_read); + + if (!nr_read) + break; + if (nr_read < 0) + return -1; + + if (write_in_full(fd_out, buffer, nr_read) < 0) + return -1; + + nr_bytes_total -= nr_read; + } + + return 0; +} + +/* + * Copy the `nr_bytes_total` from `fd_in` to `fd_out` AND save the + * final `tail_len` bytes in the given buffer. + * + * This could be used to extract a single packfile from + * a multipart file and read the final SHA into the buffer. + */ +static int my_copy_fd_len_tail(int fd_in, int fd_out, ssize_t nr_bytes_total, + unsigned char *buf_tail, ssize_t tail_len) +{ + memset(buf_tail, 0, tail_len); + + if (nr_bytes_total < tail_len) + return my_copy_fd_len(fd_in, fd_out, nr_bytes_total); + + if (my_copy_fd_len(fd_in, fd_out, (nr_bytes_total - tail_len)) < 0) + return -1; + + if (xread(fd_in, (char *)buf_tail, tail_len) != tail_len) + return -1; + + if (write_in_full(fd_out, buf_tail, tail_len) < 0) + return -1; + + return 0; +} + +/* + * See the protocol document for the per-packfile header. + */ +struct ph { + uint64_t timestamp; + uint64_t pack_len; + uint64_t idx_len; +}; + +/* + * Extract the next packfile from the multipack. + */ +static void extract_packfile_from_multipack( + struct gh__request_params *params, + struct gh__response_status *status, + int fd_multipack, + unsigned short k) +{ + struct ph ph; + struct tempfile *tempfile_pack = NULL; + struct tempfile *tempfile_idx = NULL; + int result = -1; + int b_no_idx_in_multipack; + struct object_id packfile_checksum; + char hex_checksum[GIT_MAX_HEXSZ + 1]; + struct strbuf buf_timestamp = STRBUF_INIT; + struct strbuf temp_path_pack = STRBUF_INIT; + struct strbuf temp_path_idx = STRBUF_INIT; + struct strbuf final_path_pack = STRBUF_INIT; + struct strbuf final_path_idx = STRBUF_INIT; + struct strbuf final_filename = STRBUF_INIT; + + if (xread(fd_multipack, &ph, sizeof(ph)) != sizeof(ph)) { + strbuf_addf(&status->error_message, + "could not read header for packfile[%d] in multipack", + k); + status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PREFETCH; + goto done; + } + + ph.timestamp = my_get_le64(ph.timestamp); + ph.pack_len = my_get_le64(ph.pack_len); + ph.idx_len = my_get_le64(ph.idx_len); + + if (!ph.pack_len) { + strbuf_addf(&status->error_message, + "packfile[%d]: zero length packfile?", k); + status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PREFETCH; + goto done; + } + + b_no_idx_in_multipack = (ph.idx_len == maximum_unsigned_value_of_type(uint64_t) || + ph.idx_len == 0); + + if (b_no_idx_in_multipack) { + my_create_tempfile(status, 0, "pack", &tempfile_pack, NULL, NULL); + if (!tempfile_pack) + goto done; + } else { + /* create a pair of tempfiles with the same basename */ + my_create_tempfile(status, 0, "pack", &tempfile_pack, "idx", &tempfile_idx); + if (!tempfile_pack || !tempfile_idx) + goto done; + } /* - * Note that I DO NOT have a trace2 region around the - * index-pack process by itself. Currently, we are inside the - * trace2 region for running the request and that's fine. - * Later, if/when we stream the download directly to - * index-pack, it will be inside under the same region anyway. - * So, I'm not going to introduce it here. + * Copy the current packfile from the open stream and capture + * the checksum. + * + * TODO This assumes that the checksum is SHA1. Fix this if/when + * TODO Git converts to SHA256. */ - if (run_command(&ip)) { - unlink(params->temp_path_pack.buf); - unlink(params->temp_path_idx.buf); + result = my_copy_fd_len_tail(fd_multipack, + get_tempfile_fd(tempfile_pack), + ph.pack_len, + packfile_checksum.hash, + GIT_SHA1_RAWSZ); + packfile_checksum.algo = GIT_HASH_SHA1; + + if (result < 0){ strbuf_addf(&status->error_message, - "index-pack failed on '%s'", - params->temp_path_pack.buf); + "could not extract packfile[%d] from multipack", + k); + goto done; + } + strbuf_addstr(&temp_path_pack, get_tempfile_path(tempfile_pack)); + close_tempfile_gently(tempfile_pack); + + oid_to_hex_r(hex_checksum, &packfile_checksum); + + if (b_no_idx_in_multipack) { /* - * Lets assume that index-pack failed because the - * downloaded file is corrupt (truncated). - * - * Retry it as if the network had dropped. + * The server did not send the corresponding .idx, so + * we have to compute it ourselves. */ - status->retry = GH__RETRY_MODE__TRANSIENT; - status->ec = GH__ERROR_CODE__INDEX_PACK_FAILED; + strbuf_addbuf(&temp_path_idx, &temp_path_pack); + strbuf_strip_suffix(&temp_path_idx, ".pack"); + strbuf_addstr(&temp_path_idx, ".idx"); + + my_run_index_pack(params, status, + &temp_path_pack, &temp_path_idx, + NULL); + if (status->ec != GH__ERROR_CODE__OK) + goto done; + + } else { + /* + * Server send the .idx immediately after the .pack in the + * data stream. I'm tempted to verify it, but that defeats + * the purpose of having it cached... + */ + if (my_copy_fd_len(fd_multipack, get_tempfile_fd(tempfile_idx), + ph.idx_len) < 0) { + strbuf_addf(&status->error_message, + "could not extract index[%d] in multipack", + k); + status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PREFETCH; + goto done; + } + + strbuf_addstr(&temp_path_idx, get_tempfile_path(tempfile_idx)); + close_tempfile_gently(tempfile_idx); + } + + strbuf_addf(&buf_timestamp, "%u", (unsigned int)ph.timestamp); + create_final_packfile_pathnames("prefetch", buf_timestamp.buf, hex_checksum, + &final_path_pack, &final_path_idx, + &final_filename); + + my_finalize_packfile(params, status, + &temp_path_pack, &temp_path_idx, + &final_path_pack, &final_path_idx, + &final_filename); + +done: + delete_tempfile(&tempfile_pack); + delete_tempfile(&tempfile_idx); + strbuf_release(&temp_path_pack); + strbuf_release(&temp_path_idx); + strbuf_release(&final_path_pack); + strbuf_release(&final_path_idx); + strbuf_release(&final_filename); +} + +/* + * Cut apart the received multipart response into individual packfiles + * and install each one. + */ +static void install_prefetch(struct gh__request_params *params, + struct gh__response_status *status) +{ + static unsigned char v1_h[6] = { 'G', 'P', 'R', 'E', ' ', 0x01 }; + + struct mh { + unsigned char h[6]; + unsigned char np[2]; + }; + + struct mh mh; + unsigned short np; + unsigned short k; + int fd = -1; + + struct strbuf temp_path_mp = STRBUF_INIT; + + /* + * Steal the multi-part file from the tempfile class. + */ + strbuf_addf(&temp_path_mp, "%s.mp", get_tempfile_path(params->tempfile)); + if (rename_tempfile(¶ms->tempfile, temp_path_mp.buf) == -1) { + strbuf_addf(&status->error_message, + "could not rename prefetch tempfile to '%s'", + temp_path_mp.buf); + status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PREFETCH; goto cleanup; } - if (finalize_object_file(params->temp_path_pack.buf, - params->final_path_pack.buf) || - finalize_object_file(params->temp_path_idx.buf, - params->final_path_idx.buf)) { - unlink(params->temp_path_pack.buf); - unlink(params->temp_path_idx.buf); - unlink(params->final_path_pack.buf); - unlink(params->final_path_idx.buf); + fd = git_open_cloexec(temp_path_mp.buf, O_RDONLY); + if (fd == -1) { strbuf_addf(&status->error_message, - "could not install packfile '%s'", - params->final_path_pack.buf); - status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PACKFILE; + "could not reopen prefetch tempfile '%s'", + temp_path_mp.buf); + status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PREFETCH; goto cleanup; } + if ((xread(fd, &mh, sizeof(mh)) != sizeof(mh)) || + (memcmp(mh.h, &v1_h, sizeof(mh.h)))) { + strbuf_addstr(&status->error_message, + "invalid prefetch multipart header"); + goto cleanup; + } - if (params->result_list) { - struct strbuf result_msg = STRBUF_INIT; + np = (unsigned short)mh.np[0] + ((unsigned short)mh.np[1] << 8); + if (np) + trace2_data_intmax(TR2_CAT, NULL, + "prefetch/packfile_count", np); - strbuf_addf(&result_msg, "packfile %s", - params->final_packfile_filename.buf); - string_list_append(params->result_list, result_msg.buf); - strbuf_release(&result_msg); + for (k = 0; k < np; k++) { + extract_packfile_from_multipack(params, status, fd, k); + if (status->ec != GH__ERROR_CODE__OK) + break; } cleanup: - child_process_clear(&ip); + if (fd != -1) + close(fd); + + unlink(temp_path_mp.buf); + strbuf_release(&temp_path_mp); } /* @@ -1911,21 +2273,7 @@ static void install_loose(struct gh__request_params *params, struct gh__response_status *status) { struct strbuf tmp_path = STRBUF_INIT; - - /* - * We expect a loose object when we do a GET -or- when we - * do a POST with only 1 object. - * - * Note that this content type is singular, not plural. - */ - if (strcmp(status->content_type.buf, - "application/x-git-loose-object")) { - strbuf_addf(&status->error_message, - "install_loose: received unknown content-type '%s'", - status->content_type.buf); - status->ec = GH__ERROR_CODE__UNEXPECTED_CONTENT_TYPE; - return; - } + struct strbuf loose_path = STRBUF_INIT; gh__response_status__zero(status); @@ -1963,11 +2311,19 @@ static void install_loose(struct gh__request_params *params, * collision we have to assume something else is happening in * parallel and we lost the race. And that's OK. */ - if (finalize_object_file(tmp_path.buf, params->loose_path.buf)) { + if (create_loose_pathname_in_odb(&loose_path, ¶ms->loose_oid)) { + strbuf_addf(&status->error_message, + "cannot create directory for loose object '%s'", + loose_path.buf); + status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_LOOSE; + goto cleanup; + } + + if (finalize_object_file(tmp_path.buf, loose_path.buf)) { unlink(tmp_path.buf); strbuf_addf(&status->error_message, "could not install loose object '%s'", - params->loose_path.buf); + loose_path.buf); status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_LOOSE; goto cleanup; } @@ -1983,6 +2339,57 @@ static void install_loose(struct gh__request_params *params, cleanup: strbuf_release(&tmp_path); + strbuf_release(&loose_path); +} + +static void install_result(struct gh__request_params *params, + struct gh__response_status *status) +{ + if (params->objects_mode == GH__OBJECTS_MODE__PREFETCH) { + /* + * The "gvfs/prefetch" API is the only thing that sends + * these multi-part packfiles. According to the protocol + * documentation, they will have this x- content type. + * + * However, it appears that there is a BUG in the origin + * server causing it to sometimes send "text/html" instead. + * So, we silently handle both. + */ + if (!strcmp(status->content_type.buf, + "application/x-gvfs-timestamped-packfiles-indexes")) { + install_prefetch(params, status); + return; + } + + if (!strcmp(status->content_type.buf, "text/html")) { + install_prefetch(params, status); + return; + } + } + + if (!strcmp(status->content_type.buf, "application/x-git-packfile")) { + assert(params->b_is_post); + assert(params->objects_mode == GH__OBJECTS_MODE__POST); + + install_packfile(params, status); + return; + } + + if (!strcmp(status->content_type.buf, + "application/x-git-loose-object")) { + /* + * We get these for "gvfs/objects" GET and POST requests. + * + * Note that this content type is singular, not plural. + */ + install_loose(params, status); + return; + } + + strbuf_addf(&status->error_message, + "install_result: received unknown content-type '%s'", + status->content_type.buf); + status->ec = GH__ERROR_CODE__UNEXPECTED_CONTENT_TYPE; } /* @@ -2048,7 +2455,7 @@ static size_t parse_resp_hdr(char *buffer, size_t size, size_t nitems, * Other servers have similar sets of values, but I haven't * compared them in depth. */ - // trace2_printf("Throttle: %s %s", key.buf, val.buf); + // trace2_printf("%s: Throttle: %s %s", TR2_CAT, key.buf, val.buf); if (!strcmp(key.buf, "X-RateLimit-Resource")) { /* @@ -2059,7 +2466,7 @@ static size_t parse_resp_hdr(char *buffer, size_t size, size_t nitems, strbuf_addstr(&key, "ratelimit/resource"); strbuf_addstr(&key, gh__server_type_label[params->server_type]); - trace2_data_string("gvfs-helper", NULL, key.buf, val.buf); + trace2_data_string(TR2_CAT, NULL, key.buf, val.buf); } else if (!strcmp(key.buf, "X-RateLimit-Delay")) { @@ -2075,7 +2482,7 @@ static size_t parse_resp_hdr(char *buffer, size_t size, size_t nitems, git_parse_ulong(val.buf, &tarpit_delay_ms); - trace2_data_intmax("gvfs-helper", NULL, key.buf, tarpit_delay_ms); + trace2_data_intmax(TR2_CAT, NULL, key.buf, tarpit_delay_ms); } else if (!strcmp(key.buf, "X-RateLimit-Limit")) { @@ -2173,7 +2580,7 @@ static void do_throttle_spin(struct gh__request_params *params, strbuf_addstr(®ion, tr2_label); strbuf_addstr(®ion, gh__server_type_label[params->server_type]); - trace2_region_enter("gvfs-helper", region.buf, NULL); + trace2_region_enter(TR2_CAT, region.buf, NULL); if (gh__cmd_opts.show_progress) progress = start_progress(progress_msg, duration); @@ -2189,7 +2596,7 @@ static void do_throttle_spin(struct gh__request_params *params, display_progress(progress, duration); stop_progress(&progress); - trace2_region_leave("gvfs-helper", region.buf, NULL); + trace2_region_leave(TR2_CAT, region.buf, NULL); strbuf_release(®ion); } @@ -2351,11 +2758,7 @@ static void do_req(const char *url_base, if (params->tempfile) delete_tempfile(¶ms->tempfile); - if (params->b_is_post) - create_tempfile_for_packfile(params, status); - - create_tempfile_for_loose(params, status); - + my_create_tempfile(status, 1, NULL, ¶ms->tempfile, NULL, NULL); if (!params->tempfile || status->ec != GH__ERROR_CODE__OK) return; } else { @@ -2601,6 +3004,7 @@ static void do__http_get__gvfs_config(struct gh__response_status *status, /* cache-servers do not handle gvfs/config REST calls */ params.b_permit_cache_server_if_defined = 0; params.buffer = config_data; + params.objects_mode = GH__OBJECTS_MODE__NONE; params.object_count = 1; /* a bit of a lie */ @@ -2665,6 +3069,7 @@ static void do__http_get__gvfs_object(struct gh__response_status *status, params.b_is_post = 0; params.b_write_to_file = 1; params.b_permit_cache_server_if_defined = 1; + params.objects_mode = GH__OBJECTS_MODE__GET; params.object_count = 1; @@ -2721,6 +3126,7 @@ static void do__http_post__gvfs_objects(struct gh__response_status *status, params.b_is_post = 1; params.b_write_to_file = 1; params.b_permit_cache_server_if_defined = 1; + params.objects_mode = GH__OBJECTS_MODE__POST; params.post_payload = &jw_req.json; @@ -2757,6 +3163,126 @@ static void do__http_post__gvfs_objects(struct gh__response_status *status, jw_release(&jw_req); } +struct find_last_data { + timestamp_t timestamp; + int nr_files; +}; + +static void cb_find_last(const char *full_path, size_t full_path_len, + const char *file_path, void *void_data) +{ + struct find_last_data *data = void_data; + const char *val; + timestamp_t t; + + if (!skip_prefix(file_path, "prefetch-", &val)) + return; + if (!ends_with(val, ".pack")) + return; + + data->nr_files++; + + /* + * We expect prefetch packfiles named like: + * + * prefetch--.pack + */ + t = strtol(val, NULL, 10); + + data->timestamp = MY_MAX(t, data->timestamp); +} + +/* + * Find the server timestamp on the last prefetch packfile that + * we have in the ODB. + * + * TODO I'm going to assume that all prefetch packs are created + * TODO equal and take the one with the largest t value. + * TODO + * TODO Or should we look for one marked with .keep ? + * + * TODO Alternatively, should we maybe get the 2nd largest? + * TODO (Or maybe subtract an hour delta from the largest?) + * TODO + * TODO Since each cache-server maintains its own set of prefetch + * TODO packs (such that 2 requests may hit 2 different + * TODO load-balanced servers and get different answers (with or + * TODO without clock-skew issues)), is it possible for us to miss + * TODO the absolute fringe of new commits and trees? + * TODO + * TODO That is, since the cache-server generates hourly prefetch + * TODO packs, we could do a prefetch and be up-to-date, but then + * TODO do the main fetch and hit a different cache/main server + * TODO and be behind by as much as an hour and have to demand- + * TODO load the commits/trees. + * + * TODO Alternatively, should we compare the last timestamp found + * TODO with "now" and silently do nothing if within an epsilon? + */ +static void find_last_prefetch_timestamp(timestamp_t *last) +{ + struct find_last_data data; + + memset(&data, 0, sizeof(data)); + + for_each_file_in_pack_dir(gh__global.buf_odb_path.buf, cb_find_last, &data); + + *last = data.timestamp; +} + +/* + * Call "gvfs/prefetch[?lastPackTimestamp=]" REST API to + * fetch a series of packfiles and write them to the ODB. + * + * Return a list of packfile names. + */ +static void do__http_get__gvfs_prefetch(struct gh__response_status *status, + timestamp_t seconds_since_epoch, + struct string_list *result_list) +{ + struct gh__request_params params = GH__REQUEST_PARAMS_INIT; + struct strbuf component_url = STRBUF_INIT; + + gh__response_status__zero(status); + + strbuf_addstr(&component_url, "gvfs/prefetch"); + + if (!seconds_since_epoch) + find_last_prefetch_timestamp(&seconds_since_epoch); + if (seconds_since_epoch) + strbuf_addf(&component_url, "?lastPackTimestamp=%"PRItime, + seconds_since_epoch); + + params.b_is_post = 0; + params.b_write_to_file = 1; + params.b_permit_cache_server_if_defined = 1; + params.objects_mode = GH__OBJECTS_MODE__PREFETCH; + + params.object_count = -1; + + params.result_list = result_list; + + params.headers = http_copy_default_headers(); + params.headers = curl_slist_append(params.headers, + "X-TFS-FedAuthRedirect: Suppress"); + params.headers = curl_slist_append(params.headers, + "Pragma: no-cache"); + params.headers = curl_slist_append(params.headers, + "Accept: application/x-gvfs-timestamped-packfiles-indexes"); + + if (gh__cmd_opts.show_progress) + strbuf_addf(¶ms.progress_base_phase3_msg, + "Prefetch %"PRItime" (%s)", + seconds_since_epoch, + show_date(seconds_since_epoch, 0, + DATE_MODE(ISO8601))); + + do_req__with_fallback(component_url.buf, ¶ms, status); + + gh__request_params__release(¶ms); + strbuf_release(&component_url); +} + /* * Drive one or more HTTP GET requests to fetch the objects * in the given OIDSET. These are received into loose objects. @@ -3057,7 +3583,83 @@ static enum gh__error_code do_sub_cmd__post(int argc, const char **argv) } /* - * Handle the 'objects.get' and 'objects.post' verbs in "server mode". + * Interpret the given string as a timestamp and compute an absolute + * UTC-seconds-since-epoch value (and without TZ). + * + * Note that the gvfs/prefetch API only accepts seconds since epoch, + * so that is all we really need here. But there is a tradition of + * various Git commands allowing a variety of formats for args like + * this. For example, see the `--date` arg in `git commit`. We allow + * these other forms mainly for testing purposes. + */ +static int my_parse_since(const char *since, timestamp_t *p_timestamp) +{ + int offset = 0; + int errors = 0; + unsigned long t; + + if (!parse_date_basic(since, p_timestamp, &offset)) + return 0; + + t = approxidate_careful(since, &errors); + if (!errors) { + *p_timestamp = t; + return 0; + } + + return -1; +} + +/* + * Ask the server for all available packfiles -or- all available since + * the given timestamp. + */ +static enum gh__error_code do_sub_cmd__prefetch(int argc, const char **argv) +{ + static const char *since_str; + static struct option prefetch_options[] = { + OPT_STRING(0, "since", &since_str, N_("since"), N_("seconds since epoch")), + OPT_END(), + }; + + struct gh__response_status status = GH__RESPONSE_STATUS_INIT; + struct string_list result_list = STRING_LIST_INIT_DUP; + enum gh__error_code ec = GH__ERROR_CODE__OK; + timestamp_t seconds_since_epoch = 0; + int k; + + trace2_cmd_mode("prefetch"); + + if (argc > 1 && !strcmp(argv[1], "-h")) + usage_with_options(prefetch_usage, prefetch_options); + + argc = parse_options(argc, argv, NULL, prefetch_options, prefetch_usage, 0); + if (since_str && *since_str) { + if (my_parse_since(since_str, &seconds_since_epoch)) + die("could not parse 'since' field"); + } + + finish_init(1); + + do__http_get__gvfs_prefetch(&status, seconds_since_epoch, &result_list); + + ec = status.ec; + + for (k = 0; k < result_list.nr; k++) + printf("%s\n", result_list.items[k].string); + + if (ec != GH__ERROR_CODE__OK) + error("prefetch: %s", status.error_message.buf); + + gh__response_status__release(&status); + string_list_clear(&result_list, 0); + + return ec; +} + +/* + * Handle the 'objects.get' and 'objects.post' and 'objects.prefetch' + * verbs in "server mode". * * Only call error() and set ec for hard errors where we cannot * communicate correctly with the foreground client process. Pass any @@ -3077,45 +3679,73 @@ static enum gh__error_code do_server_subprocess__objects(const char *verb_line) int k; enum gh__objects_mode objects_mode; unsigned long nr_oid_total = 0; + timestamp_t seconds_since_epoch = 0; if (!strcmp(verb_line, "objects.get")) objects_mode = GH__OBJECTS_MODE__GET; else if (!strcmp(verb_line, "objects.post")) objects_mode = GH__OBJECTS_MODE__POST; + else if (!strcmp(verb_line, "objects.prefetch")) + objects_mode = GH__OBJECTS_MODE__PREFETCH; else { error("server: unexpected objects-mode verb '%s'", verb_line); ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; goto cleanup; } - while (1) { - len = packet_read_line_gently(0, NULL, &line); - if (len < 0 || !line) - break; + switch (objects_mode) { + case GH__OBJECTS_MODE__GET: + case GH__OBJECTS_MODE__POST: + while (1) { + len = packet_read_line_gently(0, NULL, &line); + if (len < 0 || !line) + break; - if (get_oid_hex(line, &oid)) { - error("server: invalid oid syntax '%s'", line); - ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; + if (get_oid_hex(line, &oid)) { + error("server: invalid oid syntax '%s'", line); + ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; + goto cleanup; + } + + if (!oidset_insert(&oids, &oid)) + nr_oid_total++; + } + + if (!nr_oid_total) { + /* if zero objects requested, trivial OK. */ + if (packet_write_fmt_gently(1, "ok\n")) { + error("server: cannot write 'get' result to client"); + ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; + } else + ec = GH__ERROR_CODE__OK; goto cleanup; } - if (!oidset_insert(&oids, &oid)) - nr_oid_total++; - } + if (objects_mode == GH__OBJECTS_MODE__GET) + do__http_get__fetch_oidset(&status, &oids, + nr_oid_total, &result_list); + else + do__http_post__fetch_oidset(&status, &oids, + nr_oid_total, &result_list); + break; - if (!nr_oid_total) { - if (packet_write_fmt_gently(1, "ok\n")) { - error("server: cannot write 'get' result to client"); - ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; - } else - ec = GH__ERROR_CODE__OK; - goto cleanup; - } + case GH__OBJECTS_MODE__PREFETCH: + /* get optional timestamp line */ + while (1) { + len = packet_read_line_gently(0, NULL, &line); + if (len < 0 || !line) + break; - if (objects_mode == GH__OBJECTS_MODE__GET) - do__http_get__fetch_oidset(&status, &oids, nr_oid_total, &result_list); - else - do__http_post__fetch_oidset(&status, &oids, nr_oid_total, &result_list); + seconds_since_epoch = strtoul(line, NULL, 10); + } + + do__http_get__gvfs_prefetch(&status, seconds_since_epoch, + &result_list); + break; + + default: + BUG("unexpected object_mode in switch '%d'", objects_mode); + } /* * Write pathname of the ODB where we wrote all of the objects @@ -3339,12 +3969,16 @@ static enum gh__error_code do_sub_cmd(int argc, const char **argv) if (!strcmp(argv[0], "config")) return do_sub_cmd__config(argc, argv); + if (!strcmp(argv[0], "prefetch")) + return do_sub_cmd__prefetch(argc, argv); + + /* + * server mode is for talking with git.exe via the "gh_client_" API + * using packet-line format. + */ if (!strcmp(argv[0], "server")) return do_sub_cmd__server(argc, argv); - // TODO have "test" mode that could be used to drive - // TODO unit testing. - return GH__ERROR_CODE__USAGE; } diff --git a/t/helper/test-gvfs-protocol.c b/t/helper/test-gvfs-protocol.c index b548d7552df171..97b3d363bf3fa5 100644 --- a/t/helper/test-gvfs-protocol.c +++ b/t/helper/test-gvfs-protocol.c @@ -14,6 +14,7 @@ #include "json-writer.h" #include "oidset.h" #include "date.h" +#include "packfile.h" #define TR2_CAT "test-gvfs-protocol" @@ -541,9 +542,6 @@ static enum worker_result send_loose_object(const struct object_id *oid, return send_http_error(1, 404, "Not Found", -1, WR_MAYHEM); } - trace2_printf("%s: OBJECT type=%d len=%ld '%.40s'", TR2_CAT, - type, size, (const char *)content); - /* * We are blending several somewhat independent concepts here: * @@ -856,7 +854,6 @@ static enum worker_result get_packfile_from_oids( goto done; } - trace2_printf("%s: pack-objects returned %d bytes", TR2_CAT, buf_packfile->len); wr = WR_OK; done: @@ -1004,6 +1001,305 @@ static enum worker_result do__gvfs_objects__post(struct req *req) return wr; } +/* + * bswap.h only defines big endian functions. + * The GVFS Protocol defines fields in little endian. + */ +static inline uint64_t my_get_le64(uint64_t le_val) +{ +#if GIT_BYTE_ORDER == GIT_LITTLE_ENDIAN + return le_val; +#else + return default_bswap64(le_val); +#endif +} + +static inline uint16_t my_get_le16(uint16_t le_val) +{ +#if GIT_BYTE_ORDER == GIT_LITTLE_ENDIAN + return le_val; +#else + return default_bswap16(le_val); +#endif +} + +/* + * GVFS Protocol headers for the multipack format + * All integer values are little-endian on the wire. + * + * Note: technically, the protocol defines the `ph` fields as signed, but + * that makes a mess of the bswap routines and we're not going to overflow + * them for a very long time. + */ + +static unsigned char v1_h[6] = { 'G', 'P', 'R', 'E', ' ', 0x01 }; + +struct ph { + uint64_t timestamp; + uint64_t len_pack; + uint64_t len_idx; +}; + +/* + * Accumulate a list of commits-and-trees packfiles we have in the local ODB. + * The test script should have pre-created a set of "ct-.pack" and .idx + * files for us. We serve these as is and DO NOT try to dynamically create + * new commits/trees packfiles (like the cache-server does). We are only + * testing if/whether gvfs-helper.exe can receive one or more packfiles and + * idx files over the protocol. + */ +struct ct_pack_item { + struct ph ph; + struct strbuf path_pack; + struct strbuf path_idx; +}; + +static void ct_pack_item__free(struct ct_pack_item *item) +{ + if (!item) + return; + strbuf_release(&item->path_pack); + strbuf_release(&item->path_idx); + free(item); +} + +struct ct_pack_data { + struct ct_pack_item **items; + size_t nr, alloc; +}; + +static void ct_pack_data__release(struct ct_pack_data *data) +{ + int k; + + if (!data) + return; + + for (k = 0; k < data->nr; k++) + ct_pack_item__free(data->items[k]); + + FREE_AND_NULL(data->items); + data->nr = 0; + data->alloc = 0; +} + +static void cb_ct_pack(const char *full_path, size_t full_path_len, + const char *file_path, void *void_data) +{ + struct ct_pack_data *data = void_data; + struct ct_pack_item *item = NULL; + struct stat st; + const char *v; + + /* + * We only want "ct-.pack" files. The test script creates + * cached commits-and-trees packfiles with this prefix to avoid + * confusion with prefetch packfiles received by gvfs-helper. + */ + if (!ends_with(file_path, ".pack")) + return; + if (!skip_prefix(file_path, "ct-", &v)) + return; + + item = (struct ct_pack_item *)xcalloc(1, sizeof(*item)); + strbuf_init(&item->path_pack, 0); + strbuf_addstr(&item->path_pack, full_path); + + strbuf_init(&item->path_idx, 0); + strbuf_addstr(&item->path_idx, full_path); + strbuf_strip_suffix(&item->path_idx, ".pack"); + strbuf_addstr(&item->path_idx, ".idx"); + + item->ph.timestamp = (uint64_t)strtoul(v, NULL, 10); + + lstat(item->path_pack.buf, &st); + item->ph.len_pack = (uint64_t)st.st_size; + + if (string_list_has_string(&mayhem_list, "no_prefetch_idx")) + item->ph.len_idx = maximum_unsigned_value_of_type(uint64_t); + else if (lstat(item->path_idx.buf, &st) < 0) + item->ph.len_idx = maximum_unsigned_value_of_type(uint64_t); + else + item->ph.len_idx = (uint64_t)st.st_size; + + ALLOC_GROW(data->items, data->nr + 1, data->alloc); + data->items[data->nr++] = item; +} + +/* + * Sort by increasing EPOCH time. + */ +static int ct_pack_sort_compare(const void *_a, const void *_b) +{ + const struct ct_pack_item *a = *(const struct ct_pack_item **)_a; + const struct ct_pack_item *b = *(const struct ct_pack_item **)_b; + return (a->ph.timestamp < b->ph.timestamp) ? -1 : (a->ph.timestamp != b->ph.timestamp); +} + +static enum worker_result send_ct_item(const struct ct_pack_item *item) +{ + struct ph ph_le; + int fd_pack = -1; + int fd_idx = -1; + enum worker_result wr = WR_OK; + + /* send per-packfile header. all fields are little-endian on the wire. */ + ph_le.timestamp = my_get_le64(item->ph.timestamp); + ph_le.len_pack = my_get_le64(item->ph.len_pack); + ph_le.len_idx = my_get_le64(item->ph.len_idx); + + if (write_in_full(1, &ph_le, sizeof(ph_le)) < 0) { + logerror("unable to write ph_le"); + wr = WR_IO_ERROR; + goto done; + } + + trace2_printf("%s: sending prefetch pack '%s'", TR2_CAT, item->path_pack.buf); + + fd_pack = git_open_cloexec(item->path_pack.buf, O_RDONLY); + if (fd_pack == -1 || copy_fd(fd_pack, 1)) { + logerror("could not send packfile"); + wr = WR_IO_ERROR; + goto done; + } + + if (item->ph.len_idx != maximum_unsigned_value_of_type(uint64_t)) { + trace2_printf("%s: sending prefetch idx '%s'", TR2_CAT, item->path_idx.buf); + + fd_idx = git_open_cloexec(item->path_idx.buf, O_RDONLY); + if (fd_idx == -1 || copy_fd(fd_idx, 1)) { + logerror("could not send idx"); + wr = WR_IO_ERROR; + goto done; + } + } + +done: + if (fd_pack != -1) + close(fd_pack); + if (fd_idx != -1) + close(fd_idx); + return wr; +} + +/* + * The GVFS Protocol defines the lastTimeStamp parameter as the value + * of the last prefetch pack that the client has. Therefore, we only + * want to send newer ones. + */ +static int want_ct_pack(const struct ct_pack_item *item, timestamp_t last_timestamp) +{ + return item->ph.timestamp > last_timestamp; +} + +static enum worker_result send_multipack(struct ct_pack_data *data, + timestamp_t last_timestamp) +{ + struct strbuf response_header = STRBUF_INIT; + struct strbuf uuid = STRBUF_INIT; + enum worker_result wr; + size_t content_len = 0; + unsigned short np = 0; + unsigned short np_le; + int k; + + /* + * Precompute the content-length so that we don't have to deal with + * chunking it. + */ + content_len += sizeof(v1_h) + sizeof(np); + for (k = 0; k < data->nr; k++) { + struct ct_pack_item *item = data->items[k]; + + if (!want_ct_pack(item, last_timestamp)) + continue; + + np++; + content_len += sizeof(struct ph); + content_len += item->ph.len_pack; + if (item->ph.len_idx != maximum_unsigned_value_of_type(uint64_t)) + content_len += item->ph.len_idx; + } + + strbuf_addstr(&response_header, "HTTP/1.1 200 OK\r\n"); + strbuf_addstr(&response_header, "Cache-Control: private\r\n"); + strbuf_addstr(&response_header, + "Content-Type: application/x-gvfs-timestamped-packfiles-indexes\r\n"); + strbuf_addf( &response_header, "Content-Length: %d\r\n", (int)content_len); + strbuf_addf( &response_header, "Server: test-gvfs-protocol/%s\r\n", git_version_string); + strbuf_addf( &response_header, "Date: %s\r\n", show_date(time(NULL), 0, DATE_MODE(RFC2822))); + gen_fake_uuid(&uuid); + strbuf_addf( &response_header, "X-VSS-E2EID: %s\r\n", uuid.buf); + strbuf_addstr(&response_header, "\r\n"); + + if (write_in_full(1, response_header.buf, response_header.len) < 0) { + logerror("unable to write response header"); + wr = WR_IO_ERROR; + goto done; + } + + /* send protocol version header */ + if (write_in_full(1, v1_h, sizeof(v1_h)) < 0) { + logerror("unabled to write v1_h"); + wr = WR_IO_ERROR; + goto done; + } + + /* send number of packfiles */ + np_le = my_get_le16(np); + if (write_in_full(1, &np_le, sizeof(np_le)) < 0) { + logerror("unable to write np"); + wr = WR_IO_ERROR; + goto done; + } + + for (k = 0; k < data->nr; k++) { + if (!want_ct_pack(data->items[k], last_timestamp)) + continue; + + wr = send_ct_item(data->items[k]); + if (wr != WR_OK) + goto done; + } + + wr = WR_OK; + +done: + strbuf_release(&uuid); + strbuf_release(&response_header); + + return wr; +} + +static enum worker_result do__gvfs_prefetch__get(struct req *req) +{ + struct ct_pack_data data; + timestamp_t last_timestamp = 0; + enum worker_result wr; + + memset(&data, 0, sizeof(data)); + + if (req->quest_args.len) { + const char *key = strstr(req->quest_args.buf, "lastPackTimestamp="); + if (key) { + const char *val; + if (skip_prefix(key, "lastPackTimestamp=", &val)) { + last_timestamp = strtol(val, NULL, 10); + } + } + } + trace2_printf("%s: prefetch/since %"PRItime, TR2_CAT, last_timestamp); + + for_each_file_in_pack_dir(get_object_directory(), cb_ct_pack, &data); + QSORT(data.items, data.nr, ct_pack_sort_compare); + + wr = send_multipack(&data, last_timestamp); + + ct_pack_data__release(&data); + + return wr; +} + /* * Read the HTTP request up to the start of the optional message-body. * We do this byte-by-byte because we have keep-alive turned on and @@ -1159,6 +1455,11 @@ static enum worker_result req__read(struct req *req, int fd) * We let our caller read/chunk it in as appropriate. */ done: + +#if 0 + /* + * This is useful for debugging the request, but very noisy. + */ if (trace2_is_enabled()) { struct string_list_item *item; trace2_printf("%s: %s", TR2_CAT, req->start_line.buf); @@ -1173,6 +1474,7 @@ static enum worker_result req__read(struct req *req, int fd) for_each_string_list_item(item, &req->header_list) trace2_printf("%s: Hdrs: %s", TR2_CAT, item->string); } +#endif return WR_OK; } @@ -1221,6 +1523,12 @@ static enum worker_result dispatch(struct req *req) return do__gvfs_config__get(req); } + if (!strcmp(req->gvfs_api.buf, "gvfs/prefetch")) { + + if (!strcmp(method, "GET")) + return do__gvfs_prefetch__get(req); + } + return send_http_error(1, 501, "Not Implemented", -1, WR_OK | WR_HANGUP); } diff --git a/t/t5799-gvfs-helper.sh b/t/t5799-gvfs-helper.sh index bf4174bd84ce00..ffb4eea95768ca 100755 --- a/t/t5799-gvfs-helper.sh +++ b/t/t5799-gvfs-helper.sh @@ -24,8 +24,8 @@ test_set_port GIT_TEST_GVFS_PROTOCOL_PORT # actually use it). We are only testing explicit object # fetching using gvfs-helper.exe in isolation. # -REPO_SRC="$PWD"/repo_src -REPO_T1="$PWD"/repo_t1 +REPO_SRC="$(pwd)"/repo_src +REPO_T1="$(pwd)"/repo_t1 # Setup some loopback URLs where test-gvfs-protocol.exe will be # listening. We will spawn it directly inside the repo_src directory, @@ -44,22 +44,22 @@ HOST_PORT=127.0.0.1:$GIT_TEST_GVFS_PROTOCOL_PORT ORIGIN_URL=http://$HOST_PORT/servertype/origin CACHE_URL=http://$HOST_PORT/servertype/cache -SHARED_CACHE_T1="$PWD"/shared_cache_t1 +SHARED_CACHE_T1="$(pwd)"/shared_cache_t1 # The pid-file is created by test-gvfs-protocol.exe when it starts. # The server will shut down if/when we delete it. (This is a little # easier than killing it by PID.) # -PID_FILE="$PWD"/pid-file.pid -SERVER_LOG="$PWD"/OUT.server.log +PID_FILE="$(pwd)"/pid-file.pid +SERVER_LOG="$(pwd)"/OUT.server.log PATH="$GIT_BUILD_DIR/t/helper/:$PATH" && export PATH -OIDS_FILE="$PWD"/oid_list.txt -OIDS_CT_FILE="$PWD"/oid_ct_list.txt -OIDS_BLOBS_FILE="$PWD"/oids_blobs_file.txt -OID_ONE_BLOB_FILE="$PWD"/oid_one_blob_file.txt -OID_ONE_COMMIT_FILE="$PWD"/oid_one_commit_file.txt +OIDS_FILE="$(pwd)"/oid_list.txt +OIDS_CT_FILE="$(pwd)"/oid_ct_list.txt +OIDS_BLOBS_FILE="$(pwd)"/oids_blobs_file.txt +OID_ONE_BLOB_FILE="$(pwd)"/oid_one_blob_file.txt +OID_ONE_COMMIT_FILE="$(pwd)"/oid_one_commit_file.txt # Get a list of available OIDs in repo_src so that we can try to fetch # them and so that we don't have to hard-code a list of known OIDs. @@ -108,6 +108,30 @@ get_one_commit_oid () { return 0 } +# Create a commits-and-trees packfile for use with "prefetch" +# using the given range of commits. +# +create_commits_and_trees_packfile () { + if test $# -eq 2 + then + epoch=$1 + revs=$2 + else + echo "create_commits_and_trees_packfile: Need 2 args" + return 1 + fi + + pack_file="$REPO_SRC"/.git/objects/pack/ct-$epoch.pack + idx_file="$REPO_SRC"/.git/objects/pack/ct-$epoch.idx + + git -C "$REPO_SRC" pack-objects --stdout --revs --filter=blob:none \ + >"$pack_file" <<-EOF + $revs + EOF + git -C "$REPO_SRC" index-pack -o "$idx_file" "$pack_file" + return 0 +} + test_expect_success 'setup repos' ' test_create_repo "$REPO_SRC" && git -C "$REPO_SRC" branch -M main && @@ -115,9 +139,16 @@ test_expect_success 'setup repos' ' # test_commit_bulk() does magic to create a packfile containing # the new commits. # + # We create branches in repo_src, but also remember the branch OIDs + # in files so that we can refer to them in repo_t1, which will not + # have the commits locally (because we do not clone or fetch). + # test_commit_bulk -C "$REPO_SRC" --filename="batch_a.%s.t" 9 && + git -C "$REPO_SRC" branch B1 && cp "$REPO_SRC"/.git/refs/heads/main m1.branch && + # test_commit_bulk -C "$REPO_SRC" --filename="batch_b.%s.t" 9 && + git -C "$REPO_SRC" branch B2 && cp "$REPO_SRC"/.git/refs/heads/main m2.branch && # # test_commit() creates commits, trees, tags, and blobs and leave @@ -134,8 +165,16 @@ test_expect_success 'setup repos' ' test_commit -C "$REPO_SRC" file7.txt && test_commit -C "$REPO_SRC" file8.txt && test_commit -C "$REPO_SRC" file9.txt && + git -C "$REPO_SRC" branch B3 && cp "$REPO_SRC"/.git/refs/heads/main m3.branch && # + # Create some commits-and-trees-only packfiles for testing prefetch. + # Set arbitrary EPOCH times to make it easier to test fetch-since. + # + create_commits_and_trees_packfile 1000000000 B1 && + create_commits_and_trees_packfile 1100000000 B1..B2 && + create_commits_and_trees_packfile 1200000000 B2..B3 && + # # gvfs-helper.exe writes downloaded objects to a shared-cache directory # rather than the ODB inside the .git directory. # @@ -160,10 +199,10 @@ test_expect_success 'setup repos' ' EOF cat <<-EOF >creds.sh && #!/bin/sh - cat "$PWD"/creds.txt + cat "$(pwd)"/creds.txt EOF chmod 755 creds.sh && - git -C "$REPO_T1" config --local credential.helper "!f() { cat \"$PWD\"/creds.txt; }; f" && + git -C "$REPO_T1" config --local credential.helper "!f() { cat \"$(pwd)\"/creds.txt; }; f" && # # Create some test data sets. # @@ -554,8 +593,8 @@ test_expect_success 'basic: POST-request a single blob' ' # Request a single commit via POST. Per the GVFS Protocol, the server # should implicitly send us a packfile containing the commit and the # trees it references. Confirm that properly handled the receipt of -# the packfile. (Here, we are testing that asking for a single object -# yields a packfile rather than a loose object.) +# the packfile. (Here, we are testing that asking for a single commit +# via POST yields a packfile rather than a loose object.) # # We DO NOT verify that the packfile contains commits/trees and no blobs # because our test helper doesn't implement the filtering. @@ -587,6 +626,105 @@ test_expect_success 'basic: POST-request a single commit' ' verify_connection_count 1 ' +test_expect_success 'basic: PREFETCH w/o arg gets all' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server && + + # Without a "since" argument gives us all "ct-*.pack" since the EPOCH + # because we do not have any prefetch packs locally. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + prefetch >OUT.output && + + # gvfs-helper prints a "packfile " message for each received + # packfile. + # + verify_received_packfile_count 3 && + + stop_gvfs_protocol_server && + verify_connection_count 1 +' + +test_expect_success 'basic: PREFETCH w/ arg' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server && + + # Ask for cached packfiles NEWER THAN the given time. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + prefetch --since="1000000000" >OUT.output && + + # gvfs-helper prints a "packfile " message for each received + # packfile. + # + verify_received_packfile_count 2 && + + stop_gvfs_protocol_server && + verify_connection_count 1 +' + +test_expect_success 'basic: PREFETCH mayhem no_prefetch_idx' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server_with_mayhem no_prefetch_idx && + + # Request prefetch packs, but tell server to not send any + # idx files and force gvfs-helper to compute them. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + prefetch --since="1000000000" >OUT.output && + + # gvfs-helper prints a "packfile " message for each received + # packfile. + # + verify_received_packfile_count 2 && + + stop_gvfs_protocol_server && + verify_connection_count 1 +' + +test_expect_success 'basic: PREFETCH up-to-date' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server && + + # Ask for cached packfiles NEWER THAN the given time. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + prefetch --since="1000000000" >OUT.output && + + # gvfs-helper prints a "packfile " message for each received + # packfile. + # + verify_received_packfile_count 2 && + + # Ask again for any packfiles newer than what we have cached locally. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + prefetch >OUT.output && + + # gvfs-helper prints a "packfile " message for each received + # packfile. + # + verify_received_packfile_count 0 && + + stop_gvfs_protocol_server && + verify_connection_count 2 +' + ################################################################# # Tests to see how gvfs-helper responds to network problems. # @@ -960,44 +1098,6 @@ test_expect_success 'HTTP GET Auth on Cache Server' ' # magically fetched whenever required. ################################################################# -test_expect_success 'integration: explicit commit/trees, implicit blobs: log file' ' - test_when_finished "per_test_cleanup" && - start_gvfs_protocol_server && - - # We have a very empty repo. Seed it with all of the commits - # and trees. The purpose of this test is to demand-load the - # needed blobs only, so we prefetch the commits and trees. - # - git -C "$REPO_T1" gvfs-helper \ - --cache-server=disable \ - --remote=origin \ - get \ - <"$OIDS_CT_FILE" >OUT.output && - - # Confirm that we do not have the blobs locally. - # With gvfs-helper turned off, we should fail. - # - test_must_fail \ - git -C "$REPO_T1" -c core.useGVFSHelper=false \ - log $(cat m3.brach) -- file9.txt \ - >OUT.output 2>OUT.stderr && - - # Turn on gvfs-helper and retry. This should implicitly fetch - # any needed blobs. - # - git -C "$REPO_T1" -c core.useGVFSHelper=true \ - log $(cat m3.branch) -- file9.txt \ - >OUT.output 2>OUT.stderr && - - # Verify that gvfs-helper wrote the fetched the blobs to the - # local ODB, such that a second attempt with gvfs-helper - # turned off should succeed. - # - git -C "$REPO_T1" -c core.useGVFSHelper=false \ - log $(cat m3.branch) -- file9.txt \ - >OUT.output 2>OUT.stderr -' - test_expect_success 'integration: explicit commit/trees, implicit blobs: diff 2 commits' ' test_when_finished "per_test_cleanup" && start_gvfs_protocol_server &&