Merge branch 'scalar-with-gvfs'

Prepare `scalar` to use the GVFS protocol instead of partial clone (required to support Azure Repos). Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
microsoft · Jun 22, 2022 · 04a5a93 · 04a5a93
2 parents 384e7da + 0dede5e
commit 04a5a93
Show file tree

Hide file tree

Showing 14 changed files with 1,055 additions and 53 deletions.
diff --git a/Documentation/scalar.txt b/Documentation/scalar.txt
@@ -8,14 +8,17 @@ scalar - an opinionated repository management tool
 SYNOPSIS
 --------
 [verse]
-scalar clone [--single-branch] [--branch <main-branch>] [--full-clone] <url> [<enlistment>]
+scalar clone [--single-branch] [--branch <main-branch>] [--full-clone]
+	[--local-cache-path <path>] [--cache-server-url <url>]
+	<url> [<enlistment>]
 scalar list
 scalar register [<enlistment>]
 scalar unregister [<enlistment>]
 scalar run ( all | config | commit-graph | fetch | loose-objects | pack-files ) [<enlistment>]
 scalar reconfigure [ --all | <enlistment> ]
 scalar diagnose [<enlistment>]
 scalar delete <enlistment>
+scalar cache-server ( --get | --set <url> | --list [<remote>] ) [<enlistment>]
 
 DESCRIPTION
 -----------
@@ -85,6 +88,17 @@ cloning. If the HEAD at the remote did not point at any branch when
 	A sparse-checkout is initialized by default. This behavior can be
 	turned off via `--full-clone`.
 
+--local-cache-path <path>::
+    Override the path to the local cache root directory; Pre-fetched objects
+    are stored into a repository-dependent subdirectory of that path.
++
+The default is `<drive>:\.scalarCache` on Windows (on the same drive as the
+clone), and `~/.scalarCache` on macOS.
+
+--cache-server-url <url>::
+    Retrieve missing objects from the specified remote, which is expected to
+    understand the GVFS protocol.
+
 List
 ~~~~
 
@@ -158,6 +172,27 @@ delete <enlistment>::
 	This subcommand lets you delete an existing Scalar enlistment from your
 	local file system, unregistering the repository.
 
+Cache-server
+~~~~~~~~~~~~
+
+cache-server ( --get | --set <url> | --list [<remote>] ) [<enlistment>]::
+    This command lets you query or set the GVFS-enabled cache server used
+    to fetch missing objects.
+
+--get::
+    This is the default command mode: query the currently-configured cache
+    server URL, if any.
+
+--list::
+    Access the `gvfs/info` endpoint of the specified remote (default:
+    `origin`) to figure out which cache servers are available, if any.
++
+In contrast to the `--get` command mode (which only accesses the local
+repository), this command mode triggers a request via the network that
+potentially requires authentication. If authentication is required, the
+configured credential helper is employed (see linkgit:git-credential[1]
+for details).
+
 SEE ALSO
 --------
 linkgit:git-clone[1], linkgit:git-maintenance[1].

diff --git a/Documentation/scalar/getting-started.md b/Documentation/scalar/getting-started.md
@@ -18,8 +18,9 @@ Creating a new Scalar clone
 ---------------------------------------------------
 
 The `clone` verb creates a local enlistment of a remote repository using the
-partial clone feature available e.g. on GitHub.
-
+partial clone feature available e.g. on GitHub, or using the
+[GVFS protocol](https://github.com/microsoft/VFSForGit/blob/HEAD/Protocol.md),
+such as Azure Repos.
 
 ```
 scalar clone [options] <url> [<dir>]
@@ -68,11 +69,26 @@ in `<path>`.
 These options allow a user to customize their initial enlistment.
 
 * `--full-clone`: If specified, do not initialize the sparse-checkout feature.
-  All files will be present in your `src` directory. This uses a Git partial
-  clone: blobs are downloaded on demand.
+  All files will be present in your `src` directory. This behaves very similar
+  to a Git partial clone in that blobs are downloaded on demand. However, it
+  will use the GVFS protocol to download all Git objects.
+
+* `--cache-server-url=<url>`: If specified, set the intended cache server to
+  the specified `<url>`. All object queries will use the GVFS protocol to this
+  `<url>` instead of the origin remote. If the remote supplies a list of
+  cache servers via the `<url>/gvfs/config` endpoint, then the `clone` command
+  will select a nearby cache server from that list.
 
 * `--branch=<ref>`: Specify the branch to checkout after clone.
 
+* `--local-cache-path=<path>`: Use this option to override the path for the
+  local Scalar cache. If not specified, then Scalar will select a default
+  path to share objects with your other enlistments. On Windows, this path
+  is a subdirectory of `<Volume>:\.scalarCache\`. On Mac, this path is a
+  subdirectory of `~/.scalarCache/`. The default cache path is recommended so
+  multiple enlistments of the same remote repository share objects on the
+  same device.
+
 ### Advanced Options
 
 The options below are not intended for use by a typical user. These are

diff --git a/Documentation/scalar/index.md b/Documentation/scalar/index.md
@@ -28,10 +28,14 @@ these features for that repo (except partial clone) and start running suggested
 maintenance in the background using
 [the `git maintenance` feature](https://git-scm.com/docs/git-maintenance).
 
-Repos cloned with the `scalar clone` command use partial clone to significantly
-reduce the amount of data required to get started using a repository. By
-delaying all blob downloads until they are required, Scalar allows you to work
-with very large repositories quickly.
+Repos cloned with the `scalar clone` command use partial clone or the
+[GVFS protocol](https://github.com/microsoft/VFSForGit/blob/HEAD/Protocol.md)
+to significantly reduce the amount of data required to get started
+using a repository. By delaying all blob downloads until they are required,
+Scalar allows you to work with very large repositories quickly. The GVFS
+protocol allows a network of _cache servers_ to serve objects with lower
+latency and higher throughput. The cache servers also reduce load on the
+central server.
 
 Documentation
 -------------
@@ -42,7 +46,7 @@ Documentation
 
 * [Troubleshooting](troubleshooting.md):
   Collect diagnostic information or update custom settings. Includes
-  `scalar diagnose`.
+  `scalar diagnose` and `scalar cache-server`.
 
 * [The Philosophy of Scalar](philosophy.md): Why does Scalar work the way
   it does, and how do we make decisions about its future?

diff --git a/Documentation/scalar/philosophy.md b/Documentation/scalar/philosophy.md
@@ -13,22 +13,27 @@ Scalar only to configure those new settings. In particular, we ported
 features like background maintenance to Git to make Scalar simpler and
 make Git more powerful.
 
-Services such as GitHub support partial clone , a standard adopted by the Git
-project to download only part of the Git objects when cloning, and fetching
-further objects on demand. If your hosting service supports partial clone, then
-we absolutely recommend it as a way to greatly speed up your clone and fetch
-times and to reduce how much disk space your Git repository requires. Scalar
-will help with this!
+Scalar ships inside [a custom version of Git][microsoft-git], but we are
+working to make it available in other forks of Git. The only feature
+that is not intended to ever reach the standard Git client is Scalar's use
+of [the GVFS Protocol][gvfs-protocol], which is essentially an older
+version of [Git's partial clone feature](https://github.blog/2020-12-21-get-up-to-speed-with-partial-clone-and-shallow-clone/)
+that was available first in Azure Repos. Services such as GitHub support
+only partial clone instead of the GVFS protocol because that is the
+standard adopted by the Git project. If your hosting service supports
+partial clone, then we absolutely recommend it as a way to greatly speed
+up your clone and fetch times and to reduce how much disk space your Git
+repository requires. Scalar will help with this!
 
-Most of the value of Scalar can be found in the core Git client. However, most
-of the advanced features that really optimize Git's performance are off by
-default for compatibility reasons. To really take advantage of Git's latest and
-greatest features, you either need to study the [`git config`
-documentation](https://git-scm.com/docs/git-config) and regularly read [the Git
-release notes](https://github.com/git/git/tree/master/Documentation/RelNotes).
+If you don't use the GVFS Protocol, then most of the value of Scalar can
+be found in the core Git client. However, most of the advanced features
+that really optimize Git's performance are off by default for compatibility
+reasons. To really take advantage of Git's latest and greatest features,
+you either need to study the [`git config` documentation](https://git-scm.com/docs/git-config)
+and regularly read [the Git release notes](https://github.com/git/git/tree/master/Documentation/RelNotes).
 Even if you do all that work and customize your Git settings on your machines,
-you likely will want to share those settings with other team members. Or, you
-can just use Scalar!
+you likely will want to share those settings with other team members.
+Or, you can just use Scalar!
 
 Using `scalar register` on an existing Git repository will give you these
 benefits:

diff --git a/Documentation/scalar/troubleshooting.md b/Documentation/scalar/troubleshooting.md
@@ -18,3 +18,23 @@ files for that repository. This includes:
 
 As the `diagnose` command completes, it provides the path of the resulting
 zip file. This zip can be attached to bug reports to make the analysis easier.
+
+Modifying Configuration Values
+------------------------------
+
+The Scalar-specific configuration is only available for repos using the
+GVFS protocol.
+
+### Cache Server URL
+
+When using an enlistment cloned with `scalar clone` and the GVFS protocol,
+you will have a value called the cache server URL. Cache servers are a feature
+of the GVFS protocol to provide low-latency access to the on-demand object
+requests. This modifies the `gvfs.cache-server` setting in your local Git config
+file.
+
+Run `scalar cache-server --get` to see the current cache server.
+
+Run `scalar cache-server --list` to see the available cache server URLs.
+
+Run `scalar cache-server --set=<url>` to set your cache server to `<url>`.
diff --git a/Makefile b/Makefile
@@ -692,7 +692,7 @@ all:: $(FUZZ_OBJS)
 
 FUZZ_PROGRAMS += $(patsubst %.o,%,$(FUZZ_OBJS))
 
-SCALAR_OBJS := scalar.o
+SCALAR_OBJS := scalar.o json-parser.o
 
 PROGRAMS += scalar$(X)
 BINDIR_PROGRAMS_NEED_X += scalar

diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt
@@ -792,7 +792,7 @@ target_link_libraries(git-sh-i18n--envsubst common-main)
 add_executable(git-shell ${CMAKE_SOURCE_DIR}/shell.c)
 target_link_libraries(git-shell common-main)
 
-add_executable(scalar ${CMAKE_SOURCE_DIR}/scalar.c)
+add_executable(scalar ${CMAKE_SOURCE_DIR}/scalar.c ${CMAKE_SOURCE_DIR}/json-parser.c)
 target_link_libraries(scalar common-main)
 
 if(CURL_FOUND)

diff --git a/dir.c b/dir.c
@@ -3103,6 +3103,8 @@ static int cmp_icase(char a, char b)
 {
 	if (a == b)
 		return 0;
+	if (is_dir_sep(a))
+		return is_dir_sep(b) ? 0 : -1;
 	if (ignore_case)
 		return toupper(a) - toupper(b);
 	return a - b;

diff --git a/gvfs-helper.c b/gvfs-helper.c
@@ -202,6 +202,12 @@
 //            [2] Documentation/technical/long-running-process-protocol.txt
 //            [3] See GIT_TRACE_PACKET
 //
+//     endpoint
+//
+//            Fetch the given endpoint from the main Git server (specifying
+//            `gvfs/config` as endpoint is idempotent to the `config`
+//            command mentioned above).
+//
 //////////////////////////////////////////////////////////////////
 
 #include "cache.h"
@@ -3110,18 +3116,20 @@ static void do_req__with_fallback(const char *url_component,
  *
  * Return server's response buffer.  This is probably a raw JSON string.
  */
-static void do__http_get__gvfs_config(struct gh__response_status *status,
-				      struct strbuf *config_data)
+static void do__http_get__simple_endpoint(struct gh__response_status *status,
+					  struct strbuf *response,
+					  const char *endpoint,
+					  const char *tr2_label)
 {
 	struct gh__request_params params = GH__REQUEST_PARAMS_INIT;
 
-	strbuf_addstr(&params.tr2_label, "GET/config");
+	strbuf_addstr(&params.tr2_label, tr2_label);
 
 	params.b_is_post = 0;
 	params.b_write_to_file = 0;
 	/* cache-servers do not handle gvfs/config REST calls */
 	params.b_permit_cache_server_if_defined = 0;
-	params.buffer = config_data;
+	params.buffer = response;
 	params.objects_mode = GH__OBJECTS_MODE__NONE;
 
 	params.object_count = 1; /* a bit of a lie */
@@ -3143,15 +3151,22 @@ static void do__http_get__gvfs_config(struct gh__response_status *status,
 		 * see any need to report progress on the upload side of
 		 * the GET.  So just report progress on the download side.
 		 */
-		strbuf_addstr(&params.progress_base_phase3_msg,
-			      "Receiving gvfs/config");
+		strbuf_addf(&params.progress_base_phase3_msg,
+			    "Receiving %s", endpoint);
 	}
 
-	do_req__with_fallback("gvfs/config", &params, status);
+	do_req__with_fallback(endpoint, &params, status);
 
 	gh__request_params__release(&params);
 }
 
+static void do__http_get__gvfs_config(struct gh__response_status *status,
+				      struct strbuf *config_data)
+{
+	do__http_get__simple_endpoint(status, config_data, "gvfs/config",
+				      "GET/config");
+}
+
 static void setup_gvfs_objects_progress(struct gh__request_params *params,
 					unsigned long num, unsigned long den)
 {
@@ -3596,6 +3611,35 @@ static enum gh__error_code do_sub_cmd__config(int argc, const char **argv)
 	return ec;
 }
 
+static enum gh__error_code do_sub_cmd__endpoint(int argc, const char **argv)
+{
+	struct gh__response_status status = GH__RESPONSE_STATUS_INIT;
+	struct strbuf data = STRBUF_INIT;
+	enum gh__error_code ec = GH__ERROR_CODE__OK;
+	const char *endpoint;
+
+	if (argc != 2)
+		return GH__ERROR_CODE__ERROR;
+	endpoint = argv[1];
+
+	trace2_cmd_mode(endpoint);
+
+	finish_init(0);
+
+	do__http_get__simple_endpoint(&status, &data, endpoint, endpoint);
+	ec = status.ec;
+
+	if (ec == GH__ERROR_CODE__OK)
+		printf("%s\n", data.buf);
+	else
+		error("config: %s", status.error_message.buf);
+
+	gh__response_status__release(&status);
+	strbuf_release(&data);
+
+	return ec;
+}
+
 /*
  * Read a list of objects from stdin and fetch them as a series of
  * single object HTTP GET requests.
@@ -4087,6 +4131,9 @@ static enum gh__error_code do_sub_cmd(int argc, const char **argv)
 	if (!strcmp(argv[0], "config"))
 		return do_sub_cmd__config(argc, argv);
 
+	if (!strcmp(argv[0], "endpoint"))
+		return do_sub_cmd__endpoint(argc, argv);
+
 	if (!strcmp(argv[0], "prefetch"))
 		return do_sub_cmd__prefetch(argc, argv);