Skip to content

Commit

Permalink
gvfs-helper: add prefetch support
Browse files Browse the repository at this point in the history
Teach gvfs-helper to support "/gvfs/prefetch" REST API.
This includes a new `gvfs-helper prefetch --since=<t>` command line option.
And a new `objects.prefetch` verb in `gvfs-helper server` mode.

If `since` argument is omitted, `gvfs-helper` will search the local
shared-cache for the most recent prefetch packfile and start from
there.

The <t> is usually a seconds-since-epoch, but may also be a "friendly"
date -- such as "midnight", "yesterday" and etc. using the existing
date selection mechanism.

Add `gh_client__prefetch()` API to allow `git.exe` to easily call
prefetch (and using the same long-running process as immediate and
queued object fetches).

Expanded t5799 unit tests to include prefetch tests.  Test setup now
also builds some commits-and-trees packfiles for testing purposes with
well-known timestamps.

Expanded t/helper/test-gvfs-protocol.exe to support "/gvfs/prefetch"
REST API.

Massive refactor of existing packfile handling in gvfs-helper.c to
reuse more code between "/gvfs/objects POST" and "/gvfs/prefetch".
With this we now properly name packfiles with the checksum SHA1
rather than a date string.

Refactor also addresses some of the confusing tempfile setup and
install_<result> code processing (introduced to handle the ambiguity
of how POST works with commit objects).

Update 2023-05-22 (v2.41.0): add '--no-rev-index' to 'index-pack' to avoid
writing the extra (unused) file.

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
  • Loading branch information
jeffhostetler authored and vdye committed Jul 19, 2023
1 parent ae0852f commit 58237c0
Show file tree
Hide file tree
Showing 5 changed files with 1,508 additions and 339 deletions.
129 changes: 116 additions & 13 deletions gvfs-helper-client.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,14 @@ static struct hashmap gh_server__subprocess_map;
static struct object_directory *gh_client__chosen_odb;

/*
* The "objects" capability has 2 verbs: "get" and "post".
* The "objects" capability has verbs: "get" and "post" and "prefetch".
*/
#define CAP_OBJECTS (1u<<1)
#define CAP_OBJECTS_NAME "objects"

#define CAP_OBJECTS__VERB_GET1_NAME "get"
#define CAP_OBJECTS__VERB_POST_NAME "post"
#define CAP_OBJECTS__VERB_PREFETCH_NAME "prefetch"

static int gh_client__start_fn(struct subprocess_entry *subprocess)
{
Expand Down Expand Up @@ -131,6 +132,44 @@ static int gh_client__send__objects_get(struct child_process *process,
return 0;
}

/*
* Send a request to gvfs-helper to prefetch packfiles from either the
* cache-server or the main Git server using "/gvfs/prefetch".
*
* objects.prefetch LF
* [<seconds-since_epoch> LF]
* <flush>
*/
static int gh_client__send__objects_prefetch(struct child_process *process,
timestamp_t seconds_since_epoch)
{
int err;

/*
* We assume that all of the packet_ routines call error()
* so that we don't have to.
*/

err = packet_write_fmt_gently(
process->in,
(CAP_OBJECTS_NAME "." CAP_OBJECTS__VERB_PREFETCH_NAME "\n"));
if (err)
return err;

if (seconds_since_epoch) {
err = packet_write_fmt_gently(process->in, "%" PRItime "\n",
seconds_since_epoch);
if (err)
return err;
}

err = packet_flush_gently(process->in);
if (err)
return err;

return 0;
}

/*
* Update the loose object cache to include the newly created
* object.
Expand Down Expand Up @@ -178,7 +217,7 @@ static void gh_client__update_packed_git(const char *line)
}

/*
* Both CAP_OBJECTS verbs return the same format response:
* CAP_OBJECTS verbs return the same format response:
*
* <odb>
* <data>*
Expand Down Expand Up @@ -218,6 +257,8 @@ static int gh_client__objects__receive_response(
const char *v1;
char *line;
int len;
int nr_loose = 0;
int nr_packfile = 0;
int err = 0;

while (1) {
Expand All @@ -236,13 +277,13 @@ static int gh_client__objects__receive_response(
else if (starts_with(line, "packfile")) {
gh_client__update_packed_git(line);
ghc |= GHC__CREATED__PACKFILE;
*p_nr_packfile += 1;
nr_packfile++;
}

else if (starts_with(line, "loose")) {
gh_client__update_loose_cache(line);
ghc |= GHC__CREATED__LOOSE;
*p_nr_loose += 1;
nr_loose++;
}

else if (starts_with(line, "ok"))
Expand All @@ -256,6 +297,8 @@ static int gh_client__objects__receive_response(
}

*p_ghc = ghc;
*p_nr_loose = nr_loose;
*p_nr_packfile = nr_packfile;

return err;
}
Expand Down Expand Up @@ -312,7 +355,7 @@ static struct gh_server__process *gh_client__find_long_running_process(
/*
* Find an existing long-running process with the above command
* line -or- create a new long-running process for this and
* subsequent 'get' requests.
* subsequent requests.
*/
if (!gh_server__subprocess_map_initialized) {
gh_server__subprocess_map_initialized = 1;
Expand Down Expand Up @@ -349,10 +392,14 @@ static struct gh_server__process *gh_client__find_long_running_process(

void gh_client__queue_oid(const struct object_id *oid)
{
// TODO consider removing this trace2. it is useful for interactive
// TODO debugging, but may generate way too much noise for a data
// TODO event.
trace2_printf("gh_client__queue_oid: %s", oid_to_hex(oid));
/*
* Keep this trace as a printf only, so that it goes to the
* perf log, but not the event log. It is useful for interactive
* debugging, but generates way too much (unuseful) noise for the
* database.
*/
if (trace2_is_enabled())
trace2_printf("gh_client__queue_oid: %s", oid_to_hex(oid));

if (!oidset_insert(&gh_client__oidset_queued, oid))
gh_client__oidset_count++;
Expand Down Expand Up @@ -433,10 +480,14 @@ int gh_client__get_immediate(const struct object_id *oid,
int nr_packfile = 0;
int err = 0;

// TODO consider removing this trace2. it is useful for interactive
// TODO debugging, but may generate way too much noise for a data
// TODO event.
trace2_printf("gh_client__get_immediate: %s", oid_to_hex(oid));
/*
* Keep this trace as a printf only, so that it goes to the
* perf log, but not the event log. It is useful for interactive
* debugging, but generates way too much (unuseful) noise for the
* database.
*/
if (trace2_is_enabled())
trace2_printf("gh_client__get_immediate: %s", oid_to_hex(oid));

entry = gh_client__find_long_running_process(CAP_OBJECTS);
if (!entry)
Expand Down Expand Up @@ -465,3 +516,55 @@ int gh_client__get_immediate(const struct object_id *oid,

return err;
}

/*
* Ask gvfs-helper to prefetch commits-and-trees packfiles since a
* given timestamp.
*
* If seconds_since_epoch is zero, gvfs-helper will scan the ODB for
* the last received prefetch and ask for ones newer than that.
*/
int gh_client__prefetch(timestamp_t seconds_since_epoch,
int *nr_packfiles_received)
{
struct gh_server__process *entry;
struct child_process *process;
enum gh_client__created ghc;
int nr_loose = 0;
int nr_packfile = 0;
int err = 0;

entry = gh_client__find_long_running_process(CAP_OBJECTS);
if (!entry)
return -1;

trace2_region_enter("gh-client", "objects/prefetch", the_repository);
trace2_data_intmax("gh-client", the_repository, "prefetch/since",
seconds_since_epoch);

process = &entry->subprocess.process;

sigchain_push(SIGPIPE, SIG_IGN);

err = gh_client__send__objects_prefetch(process, seconds_since_epoch);
if (!err)
err = gh_client__objects__receive_response(
process, &ghc, &nr_loose, &nr_packfile);

sigchain_pop(SIGPIPE);

if (err) {
subprocess_stop(&gh_server__subprocess_map,
(struct subprocess_entry *)entry);
FREE_AND_NULL(entry);
}

trace2_data_intmax("gh-client", the_repository,
"prefetch/packfile_count", nr_packfile);
trace2_region_leave("gh-client", "objects/prefetch", the_repository);

if (nr_packfiles_received)
*nr_packfiles_received = nr_packfile;

return err;
}
18 changes: 18 additions & 0 deletions gvfs-helper-client.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,4 +66,22 @@ void gh_client__queue_oid_array(const struct object_id *oids, int oid_nr);
*/
int gh_client__drain_queue(enum gh_client__created *p_ghc);

/*
* Ask `gvfs-helper server` to fetch any "prefetch packs"
* available on the server more recent than the requested time.
*
* If seconds_since_epoch is zero, gvfs-helper will scan the ODB for
* the last received prefetch and ask for ones newer than that.
*
* A long-running background process is used to subsequent requests
* (either prefetch or regular immediate/queued requests) more efficient.
*
* One or more packfiles will be created in the shared-cache ODB.
*
* Returns 0 on success, -1 on error. Optionally also returns the
* number of prefetch packs received.
*/
int gh_client__prefetch(timestamp_t seconds_since_epoch,
int *nr_packfiles_received);

#endif /* GVFS_HELPER_CLIENT_H */
Loading

0 comments on commit 58237c0

Please sign in to comment.