Skip to content

Commit

Permalink
gvfs-helper: expose gvfs/objects GET and POST semantics
Browse files Browse the repository at this point in the history
Expose the differences in the semantics of GET and POST for
the "gvfs/objects" API:

    HTTP GET: fetches a single loose object over the network.
              When a commit object is requested, it just returns
	      the single object.

    HTTP POST: fetches a batch of objects over the network.
               When the oid-set contains a commit object, all
	       referenced trees are also included in the response.

gvfs-helper is updated to take "get" and "post" command line options.
the gvfs-helper "server" mode is updated to take "objects.get" and
"objects.post" verbs.

For convenience, the "get" option and the "objects.get" verb
do allow more than one object to be requested.  gvfs-helper will
automatically issue a series of (single object) HTTP GET requests
and creating a series of loose objects.

The "post" option and the "objects.post" verb will perform bulk
object fetching using the batch-size chunking.  Individual HTTP
POST requests containing more than one object will be created
as a packfile.  A HTTP POST for a single object will create a
loose object.

This commit also contains some refactoring to eliminate the
assumption that POST is always associated with packfiles.

In gvfs-helper-client.c, gh_client__get_immediate() now uses the
"objects.get" verb and ignores any currently queued objects.

In gvfs-helper-client.c, the OIDSET built by gh_client__queue_oid()
is only processed when gh_client__drain_queue() is called.  The queue
is processed using the "object.post" verb.

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
  • Loading branch information
jeffhostetler authored and dscho committed Aug 11, 2023
1 parent 0e03263 commit 076a696
Show file tree
Hide file tree
Showing 2 changed files with 607 additions and 331 deletions.
226 changes: 150 additions & 76 deletions gvfs-helper-client.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@

static struct oidset gh_client__oidset_queued = OIDSET_INIT;
static unsigned long gh_client__oidset_count;
static int gh_client__includes_immediate;

struct gh_server__process {
struct subprocess_entry subprocess; /* must be first */
Expand All @@ -26,13 +25,20 @@ static int gh_server__subprocess_map_initialized;
static struct hashmap gh_server__subprocess_map;
static struct object_directory *gh_client__chosen_odb;

#define CAP_GET (1u<<1)
/*
* The "objects" capability has 2 verbs: "get" and "post".
*/
#define CAP_OBJECTS (1u<<1)
#define CAP_OBJECTS_NAME "objects"

#define CAP_OBJECTS__VERB_GET1_NAME "get"
#define CAP_OBJECTS__VERB_POST_NAME "post"

static int gh_client__start_fn(struct subprocess_entry *subprocess)
{
static int versions[] = {1, 0};
static struct subprocess_capability capabilities[] = {
{ "get", CAP_GET },
{ CAP_OBJECTS_NAME, CAP_OBJECTS },
{ NULL, 0 }
};

Expand All @@ -44,14 +50,16 @@ static int gh_client__start_fn(struct subprocess_entry *subprocess)
}

/*
* Send:
* Send the queued OIDs in the OIDSET to gvfs-helper for it to
* fetch from the cache-server or main Git server using "/gvfs/objects"
* POST semantics.
*
* get LF
* objects.post LF
* (<hex-oid> LF)*
* <flush>
*
*/
static int gh_client__get__send_command(struct child_process *process)
static int gh_client__send__objects_post(struct child_process *process)
{
struct oidset_iter iter;
struct object_id *oid;
Expand All @@ -62,7 +70,9 @@ static int gh_client__get__send_command(struct child_process *process)
* so that we don't have to.
*/

err = packet_write_fmt_gently(process->in, "get\n");
err = packet_write_fmt_gently(
process->in,
(CAP_OBJECTS_NAME "." CAP_OBJECTS__VERB_POST_NAME "\n"));
if (err)
return err;

Expand All @@ -81,6 +91,46 @@ static int gh_client__get__send_command(struct child_process *process)
return 0;
}

/*
* Send the given OID to gvfs-helper for it to fetch from the
* cache-server or main Git server using "/gvfs/objects" GET
* semantics.
*
* This ignores any queued OIDs.
*
* objects.get LF
* <hex-oid> LF
* <flush>
*
*/
static int gh_client__send__objects_get(struct child_process *process,
const struct object_id *oid)
{
int err;

/*
* We assume that all of the packet_ routines call error()
* so that we don't have to.
*/

err = packet_write_fmt_gently(
process->in,
(CAP_OBJECTS_NAME "." CAP_OBJECTS__VERB_GET1_NAME "\n"));
if (err)
return err;

err = packet_write_fmt_gently(process->in, "%s\n",
oid_to_hex(oid));
if (err)
return err;

err = packet_flush_gently(process->in);
if (err)
return err;

return 0;
}

/*
* Update the loose object cache to include the newly created
* object.
Expand Down Expand Up @@ -128,7 +178,7 @@ static void gh_client__update_packed_git(const char *line)
}

/*
* We expect:
* Both CAP_OBJECTS verbs return the same format response:
*
* <odb>
* <data>*
Expand Down Expand Up @@ -159,7 +209,7 @@ static void gh_client__update_packed_git(const char *line)
* grouped with a queued request for a blob. The tree-walk *might* be
* able to continue and let the 404 blob be handled later.
*/
static int gh_client__get__receive_response(
static int gh_client__objects__receive_response(
struct child_process *process,
enum gh_client__created *p_ghc,
int *p_nr_loose, int *p_nr_packfile)
Expand Down Expand Up @@ -238,17 +288,12 @@ static void gh_client__choose_odb(void)
}
}

static int gh_client__get(enum gh_client__created *p_ghc)
static struct gh_server__process *gh_client__find_long_running_process(
unsigned int cap_needed)
{
struct gh_server__process *entry;
struct child_process *process;
struct strvec argv = STRVEC_INIT;
struct strbuf quoted = STRBUF_INIT;
int nr_loose = 0;
int nr_packfile = 0;
int err = 0;

trace2_region_enter("gh-client", "get", the_repository);

gh_client__choose_odb();

Expand All @@ -264,6 +309,11 @@ static int gh_client__get(enum gh_client__created *p_ghc)

sq_quote_argv_pretty(&quoted, argv.v);

/*
* Find an existing long-running process with the above command
* line -or- create a new long-running process for this and
* subsequent 'get' requests.
*/
if (!gh_server__subprocess_map_initialized) {
gh_server__subprocess_map_initialized = 1;
hashmap_init(&gh_server__subprocess_map,
Expand All @@ -277,70 +327,24 @@ static int gh_client__get(enum gh_client__created *p_ghc)
entry = xmalloc(sizeof(*entry));
entry->supported_capabilities = 0;

err = subprocess_start_strvec(
&gh_server__subprocess_map, &entry->subprocess, 1,
&argv, gh_client__start_fn);
if (err) {
free(entry);
goto leave_region;
}
if (subprocess_start_strvec(&gh_server__subprocess_map,
&entry->subprocess, 1,
&argv, gh_client__start_fn))
FREE_AND_NULL(entry);
}

process = &entry->subprocess.process;

if (!(CAP_GET & entry->supported_capabilities)) {
error("gvfs-helper: does not support GET");
subprocess_stop(&gh_server__subprocess_map,
(struct subprocess_entry *)entry);
free(entry);
err = -1;
goto leave_region;
}

sigchain_push(SIGPIPE, SIG_IGN);

err = gh_client__get__send_command(process);
if (!err)
err = gh_client__get__receive_response(process, p_ghc,
&nr_loose, &nr_packfile);

sigchain_pop(SIGPIPE);

if (err) {
if (entry &&
(entry->supported_capabilities & cap_needed) != cap_needed) {
error("gvfs-helper: does not support needed capabilities");
subprocess_stop(&gh_server__subprocess_map,
(struct subprocess_entry *)entry);
free(entry);
FREE_AND_NULL(entry);
}

leave_region:
strvec_clear(&argv);
strbuf_release(&quoted);

trace2_data_intmax("gh-client", the_repository,
"get/immediate", gh_client__includes_immediate);

trace2_data_intmax("gh-client", the_repository,
"get/nr_objects", gh_client__oidset_count);

if (nr_loose)
trace2_data_intmax("gh-client", the_repository,
"get/nr_loose", nr_loose);

if (nr_packfile)
trace2_data_intmax("gh-client", the_repository,
"get/nr_packfile", nr_packfile);

if (err)
trace2_data_intmax("gh-client", the_repository,
"get/error", err);

trace2_region_leave("gh-client", "get", the_repository);

oidset_clear(&gh_client__oidset_queued);
gh_client__oidset_count = 0;
gh_client__includes_immediate = 0;

return err;
return entry;
}

void gh_client__queue_oid(const struct object_id *oid)
Expand All @@ -367,27 +371,97 @@ void gh_client__queue_oid_array(const struct object_id *oids, int oid_nr)
gh_client__queue_oid(&oids[k]);
}

/*
* Bulk fetch all of the queued OIDs in the OIDSET.
*/
int gh_client__drain_queue(enum gh_client__created *p_ghc)
{
struct gh_server__process *entry;
struct child_process *process;
int nr_loose = 0;
int nr_packfile = 0;
int err = 0;

*p_ghc = GHC__CREATED__NOTHING;

if (!gh_client__oidset_count)
return 0;

return gh_client__get(p_ghc);
entry = gh_client__find_long_running_process(CAP_OBJECTS);
if (!entry)
return -1;

trace2_region_enter("gh-client", "objects/post", the_repository);

process = &entry->subprocess.process;

sigchain_push(SIGPIPE, SIG_IGN);

err = gh_client__send__objects_post(process);
if (!err)
err = gh_client__objects__receive_response(
process, p_ghc, &nr_loose, &nr_packfile);

sigchain_pop(SIGPIPE);

if (err) {
subprocess_stop(&gh_server__subprocess_map,
(struct subprocess_entry *)entry);
FREE_AND_NULL(entry);
}

trace2_data_intmax("gh-client", the_repository,
"objects/post/nr_objects", gh_client__oidset_count);
trace2_region_leave("gh-client", "objects/post", the_repository);

oidset_clear(&gh_client__oidset_queued);
gh_client__oidset_count = 0;

return err;
}

/*
* Get exactly 1 object immediately.
* Ignore any queued objects.
*/
int gh_client__get_immediate(const struct object_id *oid,
enum gh_client__created *p_ghc)
{
gh_client__includes_immediate = 1;
struct gh_server__process *entry;
struct child_process *process;
int nr_loose = 0;
int nr_packfile = 0;
int err = 0;

// TODO consider removing this trace2. it is useful for interactive
// TODO debugging, but may generate way too much noise for a data
// TODO event.
trace2_printf("gh_client__get_immediate: %s", oid_to_hex(oid));

if (!oidset_insert(&gh_client__oidset_queued, oid))
gh_client__oidset_count++;
entry = gh_client__find_long_running_process(CAP_OBJECTS);
if (!entry)
return -1;

trace2_region_enter("gh-client", "objects/get", the_repository);

return gh_client__drain_queue(p_ghc);
process = &entry->subprocess.process;

sigchain_push(SIGPIPE, SIG_IGN);

err = gh_client__send__objects_get(process, oid);
if (!err)
err = gh_client__objects__receive_response(
process, p_ghc, &nr_loose, &nr_packfile);

sigchain_pop(SIGPIPE);

if (err) {
subprocess_stop(&gh_server__subprocess_map,
(struct subprocess_entry *)entry);
FREE_AND_NULL(entry);
}

trace2_region_leave("gh-client", "objects/get", the_repository);

return err;
}
Loading

0 comments on commit 076a696

Please sign in to comment.