diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt index ef3987dcae4d54..380ffb4d9a4b81 100644 --- a/Documentation/config/core.txt +++ b/Documentation/config/core.txt @@ -622,6 +622,48 @@ core.multiPackIndex:: single index. See link:technical/multi-pack-index.html[the multi-pack-index design document]. +core.gvfs:: + Enable the features needed for GVFS. This value can be set to true + to indicate all features should be turned on or the bit values listed + below can be used to turn on specific features. ++ +-- + GVFS_SKIP_SHA_ON_INDEX:: + Bit value 1 + Disables the calculation of the sha when writing the index + GVFS_MISSING_OK:: + Bit value 4 + Normally git write-tree ensures that the objects referenced by the + directory exist in the object database. This option disables this check. + GVFS_NO_DELETE_OUTSIDE_SPARSECHECKOUT:: + Bit value 8 + When marking entries to remove from the index and the working + directory this option will take into account what the + skip-worktree bit was set to so that if the entry has the + skip-worktree bit set it will not be removed from the working + directory. This will allow virtualized working directories to + detect the change to HEAD and use the new commit tree to show + the files that are in the working directory. + GVFS_FETCH_SKIP_REACHABILITY_AND_UPLOADPACK:: + Bit value 16 + While performing a fetch with a virtual file system we know + that there will be missing objects and we don't want to download + them just because of the reachability of the commits. We also + don't want to download a pack file with commits, trees, and blobs + since these will be downloaded on demand. This flag will skip the + checks on the reachability of objects during a fetch as well as + the upload pack so that extraneous objects don't get downloaded. + GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS:: + Bit value 64 + With a virtual file system we only know the file size before any + CRLF or smudge/clean filters processing is done on the client. + To prevent file corruption due to truncation or expansion with + garbage at the end, these filters must not run when the file + is first accessed and brought down to the client. Git.exe can't + currently tell the first access vs subsequent accesses so this + flag just blocks them from occurring at all. +-- + core.sparseCheckout:: Enable "sparse checkout" feature. See linkgit:git-sparse-checkout[1] for more information. diff --git a/Documentation/technical/read-object-protocol.txt b/Documentation/technical/read-object-protocol.txt new file mode 100644 index 00000000000000..a893b46e7c28a9 --- /dev/null +++ b/Documentation/technical/read-object-protocol.txt @@ -0,0 +1,102 @@ +Read Object Process +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The read-object process enables Git to read all missing blobs with a +single process invocation for the entire life of a single Git command. +This is achieved by using a packet format (pkt-line, see technical/ +protocol-common.txt) based protocol over standard input and standard +output as follows. All packets, except for the "*CONTENT" packets and +the "0000" flush packet, are considered text and therefore are +terminated by a LF. + +Git starts the process when it encounters the first missing object that +needs to be retrieved. After the process is started, Git sends a welcome +message ("git-read-object-client"), a list of supported protocol version +numbers, and a flush packet. Git expects to read a welcome response +message ("git-read-object-server"), exactly one protocol version number +from the previously sent list, and a flush packet. All further +communication will be based on the selected version. + +The remaining protocol description below documents "version=1". Please +note that "version=42" in the example below does not exist and is only +there to illustrate how the protocol would look with more than one +version. + +After the version negotiation Git sends a list of all capabilities that +it supports and a flush packet. Git expects to read a list of desired +capabilities, which must be a subset of the supported capabilities list, +and a flush packet as response: +------------------------ +packet: git> git-read-object-client +packet: git> version=1 +packet: git> version=42 +packet: git> 0000 +packet: git< git-read-object-server +packet: git< version=1 +packet: git< 0000 +packet: git> capability=get +packet: git> capability=have +packet: git> capability=put +packet: git> capability=not-yet-invented +packet: git> 0000 +packet: git< capability=get +packet: git< 0000 +------------------------ +The only supported capability in version 1 is "get". + +Afterwards Git sends a list of "key=value" pairs terminated with a flush +packet. The list will contain at least the command (based on the +supported capabilities) and the sha1 of the object to retrieve. Please +note, that the process must not send any response before it received the +final flush packet. + +When the process receives the "get" command, it should make the requested +object available in the git object store and then return success. Git will +then check the object store again and this time find it and proceed. +------------------------ +packet: git> command=get +packet: git> sha1=0a214a649e1b3d5011e14a3dc227753f2bd2be05 +packet: git> 0000 +------------------------ + +The process is expected to respond with a list of "key=value" pairs +terminated with a flush packet. If the process does not experience +problems then the list must contain a "success" status. +------------------------ +packet: git< status=success +packet: git< 0000 +------------------------ + +In case the process cannot or does not want to process the content, it +is expected to respond with an "error" status. +------------------------ +packet: git< status=error +packet: git< 0000 +------------------------ + +In case the process cannot or does not want to process the content as +well as any future content for the lifetime of the Git process, then it +is expected to respond with an "abort" status at any point in the +protocol. +------------------------ +packet: git< status=abort +packet: git< 0000 +------------------------ + +Git neither stops nor restarts the process in case the "error"/"abort" +status is set. + +If the process dies during the communication or does not adhere to the +protocol then Git will stop the process and restart it with the next +object that needs to be processed. + +After the read-object process has processed an object it is expected to +wait for the next "key=value" list containing a command. Git will close +the command pipe on exit. The process is expected to detect EOF and exit +gracefully on its own. Git will wait until the process has stopped. + +A long running read-object process demo implementation can be found in +`contrib/long-running-read-object/example.pl` located in the Git core +repository. If you develop your own long running process then the +`GIT_TRACE_PACKET` environment variables can be very helpful for +debugging (see linkgit:git[1]). diff --git a/GIT-VERSION-GEN b/GIT-VERSION-GEN index 0ebd699cf0f16d..5e9882b36fc195 100755 --- a/GIT-VERSION-GEN +++ b/GIT-VERSION-GEN @@ -1,7 +1,7 @@ #!/bin/sh GVF=GIT-VERSION-FILE -DEF_VER=v2.29.0-rc1 +DEF_VER=v2.29.0.vfs.0.0 LF=' ' @@ -12,7 +12,7 @@ if test -f version then VN=$(cat version) || VN="$DEF_VER" elif test -d ${GIT_DIR:-.git} -o -f .git && - VN=$(git describe --match "v[0-9]*" HEAD 2>/dev/null) && + VN=$(git describe --match "v[0-9]*vfs*" HEAD 2>/dev/null) && case "$VN" in *$LF*) (exit 1) ;; v[0-9]*) diff --git a/cache-tree.c b/cache-tree.c index a537a806c16e03..9a59dafa26a07d 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -6,6 +6,7 @@ #include "object-store.h" #include "replace-object.h" #include "promisor-remote.h" +#include "gvfs.h" #ifndef DEBUG_CACHE_TREE #define DEBUG_CACHE_TREE 0 @@ -244,7 +245,8 @@ static int update_one(struct cache_tree *it, int flags) { struct strbuf buffer; - int missing_ok = flags & WRITE_TREE_MISSING_OK; + int missing_ok = gvfs_config_is_set(GVFS_MISSING_OK) ? + WRITE_TREE_MISSING_OK : (flags & WRITE_TREE_MISSING_OK); int dryrun = flags & WRITE_TREE_DRY_RUN; int repair = flags & WRITE_TREE_REPAIR; int to_invalidate = 0; diff --git a/cache.h b/cache.h index 3e979666c86524..5010acf0dfee9d 100644 --- a/cache.h +++ b/cache.h @@ -955,6 +955,7 @@ extern char *git_replace_ref_base; extern int fsync_object_files; extern int core_preload_index; +extern int core_gvfs; extern int precomposed_unicode; extern int protect_hfs; extern int protect_ntfs; @@ -983,6 +984,8 @@ int use_optional_locks(void); extern char comment_line_char; extern int auto_comment_line_char; +extern int core_virtualize_objects; + enum log_refs_config { LOG_REFS_UNSET = -1, LOG_REFS_NONE = 0, diff --git a/config.c b/config.c index bf87d02603d25a..bd7fcc512e4faa 100644 --- a/config.c +++ b/config.c @@ -20,6 +20,7 @@ #include "dir.h" #include "color.h" #include "refs.h" +#include "gvfs.h" struct config_source { struct config_source *prev; @@ -1374,6 +1375,11 @@ static int git_default_core_config(const char *var, const char *value, void *cb) return 0; } + if (!strcmp(var, "core.gvfs")) { + gvfs_load_config_value(value); + return 0; + } + if (!strcmp(var, "core.sparsecheckout")) { core_apply_sparse_checkout = git_config_bool(var, value); return 0; @@ -1404,6 +1410,11 @@ static int git_default_core_config(const char *var, const char *value, void *cb) return 0; } + if (!strcmp(var, "core.virtualizeobjects")) { + core_virtualize_objects = git_config_bool(var, value); + return 0; + } + /* Add other config variables here and to Documentation/config.txt. */ return platform_core_config(var, value, cb); } diff --git a/connected.c b/connected.c index b18299fdf0e522..a9168bcca65bea 100644 --- a/connected.c +++ b/connected.c @@ -6,6 +6,7 @@ #include "transport.h" #include "packfile.h" #include "promisor-remote.h" +#include "gvfs.h" /* * If we feed all the commits we want to verify to this command @@ -30,6 +31,26 @@ int check_connected(oid_iterate_fn fn, void *cb_data, struct transport *transport; size_t base_len; + /* + * Running a virtual file system there will be objects that are + * missing locally and we don't want to download a bunch of + * commits, trees, and blobs just to make sure everything is + * reachable locally so this option will skip reachablility + * checks below that use rev-list. This will stop the check + * before uploadpack runs to determine if there is anything to + * fetch. Returning zero for the first check will also prevent the + * uploadpack from happening. It will also skip the check after + * the fetch is finished to make sure all the objects where + * downloaded in the pack file. This will allow the fetch to + * run and get all the latest tip commit ids for all the branches + * in the fetch but not pull down commits, trees, or blobs via + * upload pack. + */ + if (gvfs_config_is_set(GVFS_FETCH_SKIP_REACHABILITY_AND_UPLOADPACK)) + return 0; + if (core_virtualize_objects) + return 0; + if (!opt) opt = &defaults; transport = opt->transport; diff --git a/contrib/long-running-read-object/example.pl b/contrib/long-running-read-object/example.pl new file mode 100644 index 00000000000000..b8f37f836a813c --- /dev/null +++ b/contrib/long-running-read-object/example.pl @@ -0,0 +1,114 @@ +#!/usr/bin/perl +# +# Example implementation for the Git read-object protocol version 1 +# See Documentation/technical/read-object-protocol.txt +# +# Allows you to test the ability for blobs to be pulled from a host git repo +# "on demand." Called when git needs a blob it couldn't find locally due to +# a lazy clone that only cloned the commits and trees. +# +# A lazy clone can be simulated via the following commands from the host repo +# you wish to create a lazy clone of: +# +# cd /host_repo +# git rev-parse HEAD +# git init /guest_repo +# git cat-file --batch-check --batch-all-objects | grep -v 'blob' | +# cut -d' ' -f1 | git pack-objects /guest_repo/.git/objects/pack/noblobs +# cd /guest_repo +# git config core.virtualizeobjects true +# git reset --hard +# +# Please note, this sample is a minimal skeleton. No proper error handling +# was implemented. +# + +use strict; +use warnings; + +# +# Point $DIR to the folder where your host git repo is located so we can pull +# missing objects from it +# +my $DIR = "/host_repo/.git/"; + +sub packet_bin_read { + my $buffer; + my $bytes_read = read STDIN, $buffer, 4; + if ( $bytes_read == 0 ) { + + # EOF - Git stopped talking to us! + exit(); + } + elsif ( $bytes_read != 4 ) { + die "invalid packet: '$buffer'"; + } + my $pkt_size = hex($buffer); + if ( $pkt_size == 0 ) { + return ( 1, "" ); + } + elsif ( $pkt_size > 4 ) { + my $content_size = $pkt_size - 4; + $bytes_read = read STDIN, $buffer, $content_size; + if ( $bytes_read != $content_size ) { + die "invalid packet ($content_size bytes expected; $bytes_read bytes read)"; + } + return ( 0, $buffer ); + } + else { + die "invalid packet size: $pkt_size"; + } +} + +sub packet_txt_read { + my ( $res, $buf ) = packet_bin_read(); + unless ( $buf =~ s/\n$// ) { + die "A non-binary line MUST be terminated by an LF."; + } + return ( $res, $buf ); +} + +sub packet_bin_write { + my $buf = shift; + print STDOUT sprintf( "%04x", length($buf) + 4 ); + print STDOUT $buf; + STDOUT->flush(); +} + +sub packet_txt_write { + packet_bin_write( $_[0] . "\n" ); +} + +sub packet_flush { + print STDOUT sprintf( "%04x", 0 ); + STDOUT->flush(); +} + +( packet_txt_read() eq ( 0, "git-read-object-client" ) ) || die "bad initialize"; +( packet_txt_read() eq ( 0, "version=1" ) ) || die "bad version"; +( packet_bin_read() eq ( 1, "" ) ) || die "bad version end"; + +packet_txt_write("git-read-object-server"); +packet_txt_write("version=1"); +packet_flush(); + +( packet_txt_read() eq ( 0, "capability=get" ) ) || die "bad capability"; +( packet_bin_read() eq ( 1, "" ) ) || die "bad capability end"; + +packet_txt_write("capability=get"); +packet_flush(); + +while (1) { + my ($command) = packet_txt_read() =~ /^command=([^=]+)$/; + + if ( $command eq "get" ) { + my ($sha1) = packet_txt_read() =~ /^sha1=([0-9a-f]{40})$/; + packet_bin_read(); + + system ('git --git-dir="' . $DIR . '" cat-file blob ' . $sha1 . ' | git -c core.virtualizeobjects=false hash-object -w --stdin >/dev/null 2>&1'); + packet_txt_write(($?) ? "status=error" : "status=success"); + packet_flush(); + } else { + die "bad command '$command'"; + } +} diff --git a/convert.c b/convert.c index ee360c2f07ced0..f23cd679dc85d4 100644 --- a/convert.c +++ b/convert.c @@ -9,6 +9,7 @@ #include "sub-process.h" #include "utf8.h" #include "ll-merge.h" +#include "gvfs.h" /* * convert.c - convert a file when checking it out and checking it in. @@ -559,6 +560,9 @@ static int crlf_to_git(const struct index_state *istate, if (!buf) return 1; + if (gvfs_config_is_set(GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS)) + die("CRLF conversions not supported when running under GVFS"); + /* only grow if not in place */ if (strbuf_avail(buf) + buf->len < len) strbuf_grow(buf, len - buf->len); @@ -598,6 +602,9 @@ static int crlf_to_worktree(const char *src, size_t len, if (!will_convert_lf_to_crlf(&stats, crlf_action)) return 0; + if (gvfs_config_is_set(GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS)) + die("CRLF conversions not supported when running under GVFS"); + /* are we "faking" in place editing ? */ if (src == buf->buf) to_free = strbuf_detach(buf, NULL); @@ -709,6 +716,9 @@ static int apply_single_file_filter(const char *path, const char *src, size_t le struct async async; struct filter_params params; + if (gvfs_config_is_set(GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS)) + die("Filter \"%s\" not supported when running under GVFS", cmd); + memset(&async, 0, sizeof(async)); async.proc = filter_buffer_or_fd; async.data = ¶ms; @@ -1113,6 +1123,9 @@ static int ident_to_git(const char *src, size_t len, if (!buf) return 1; + if (gvfs_config_is_set(GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS)) + die("ident conversions not supported when running under GVFS"); + /* only grow if not in place */ if (strbuf_avail(buf) + buf->len < len) strbuf_grow(buf, len - buf->len); @@ -1160,6 +1173,9 @@ static int ident_to_worktree(const char *src, size_t len, if (!cnt) return 0; + if (gvfs_config_is_set(GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS)) + die("ident conversions not supported when running under GVFS"); + /* are we "faking" in place editing ? */ if (src == buf->buf) to_free = strbuf_detach(buf, NULL); @@ -1614,6 +1630,9 @@ static int lf_to_crlf_filter_fn(struct stream_filter *filter, size_t count, o = 0; struct lf_to_crlf_filter *lf_to_crlf = (struct lf_to_crlf_filter *)filter; + if (gvfs_config_is_set(GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS)) + die("CRLF conversions not supported when running under GVFS"); + /* * We may be holding onto the CR to see if it is followed by a * LF, in which case we would need to go to the main loop. @@ -1858,6 +1877,9 @@ static int ident_filter_fn(struct stream_filter *filter, struct ident_filter *ident = (struct ident_filter *)filter; static const char head[] = "$Id"; + if (gvfs_config_is_set(GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS)) + die("ident conversions not supported when running under GVFS"); + if (!input) { /* drain upon eof */ switch (ident->state) { diff --git a/environment.c b/environment.c index bb518c61cd259c..80fb6df4971d44 100644 --- a/environment.c +++ b/environment.c @@ -69,9 +69,11 @@ char *notes_ref_name; int grafts_replace_parents = 1; int core_apply_sparse_checkout; int core_sparse_checkout_cone; +int core_gvfs; int merge_log_config = -1; int precomposed_unicode = -1; /* see probe_utf8_pathname_composition() */ unsigned long pack_size_limit_cfg; +int core_virtualize_objects; enum log_refs_config log_all_ref_updates = LOG_REFS_UNSET; #ifndef PROTECT_HFS_DEFAULT diff --git a/gvfs.h b/gvfs.h new file mode 100644 index 00000000000000..2d6de575bf4a65 --- /dev/null +++ b/gvfs.h @@ -0,0 +1,50 @@ +#ifndef GVFS_H +#define GVFS_H + +#include "cache.h" +#include "config.h" + +/* + * This file is for the specific settings and methods + * used for GVFS functionality + */ + + +/* + * The list of bits in the core_gvfs setting + */ +#define GVFS_SKIP_SHA_ON_INDEX (1 << 0) +#define GVFS_MISSING_OK (1 << 2) +#define GVFS_NO_DELETE_OUTSIDE_SPARSECHECKOUT (1 << 3) +#define GVFS_FETCH_SKIP_REACHABILITY_AND_UPLOADPACK (1 << 4) +#define GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS (1 << 6) + +static inline int gvfs_config_is_set(int mask) { + return (core_gvfs & mask) == mask; +} + +static inline int gvfs_config_is_set_any(void) { + return core_gvfs > 0; +} + +static inline void gvfs_load_config_value(const char *value) { + int is_bool = 0; + + if (value) + core_gvfs = git_config_bool_or_int("core.gvfs", value, &is_bool); + else + git_config_get_bool_or_int("core.gvfs", &is_bool, &core_gvfs); + + /* Turn on all bits if a bool was set in the settings */ + if (is_bool && core_gvfs) + core_gvfs = -1; +} + + +static inline int gvfs_config_load_and_is_set(int mask) { + gvfs_load_config_value(0); + return gvfs_config_is_set(mask); +} + + +#endif /* GVFS_H */ diff --git a/read-cache.c b/read-cache.c index 99d1c5b84b3e39..6d8b6e5aaa9d0a 100644 --- a/read-cache.c +++ b/read-cache.c @@ -25,6 +25,7 @@ #include "fsmonitor.h" #include "thread-utils.h" #include "progress.h" +#include "gvfs.h" /* Mask for the name length in ce_flags in the on-disk index */ @@ -2468,7 +2469,9 @@ static int ce_write_flush(git_hash_ctx *context, int fd) { unsigned int buffered = write_buffer_len; if (buffered) { - the_hash_algo->update_fn(context, write_buffer, buffered); + if (!gvfs_config_is_set(GVFS_SKIP_SHA_ON_INDEX)) + the_hash_algo->update_fn(context, write_buffer, + buffered); if (write_in_full(fd, write_buffer, buffered) < 0) return -1; write_buffer_len = 0; @@ -2517,7 +2520,8 @@ static int ce_flush(git_hash_ctx *context, int fd, unsigned char *hash) if (left) { write_buffer_len = 0; - the_hash_algo->update_fn(context, write_buffer, left); + if (!gvfs_config_is_set(GVFS_SKIP_SHA_ON_INDEX)) + the_hash_algo->update_fn(context, write_buffer, left); } /* Flush first if not enough space for hash signature */ @@ -2528,7 +2532,8 @@ static int ce_flush(git_hash_ctx *context, int fd, unsigned char *hash) } /* Append the hash signature at the end */ - the_hash_algo->final_fn(write_buffer + left, context); + if (!gvfs_config_is_set(GVFS_SKIP_SHA_ON_INDEX)) + the_hash_algo->final_fn(write_buffer + left, context); hashcpy(hash, write_buffer + left); left += the_hash_algo->rawsz; return (write_in_full(fd, write_buffer, left) < 0) ? -1 : 0; diff --git a/run-command.c b/run-command.c index 2ee59acdc8c828..afcda96f1d0ffa 100644 --- a/run-command.c +++ b/run-command.c @@ -1343,7 +1343,8 @@ const char *find_hook(const char *name) return path.buf; } -int run_hook_ve(const char *const *env, const char *name, va_list args) +int run_hook_strvec(const char *const *env, const char *name, + struct strvec *argv) { struct child_process hook = CHILD_PROCESS_INIT; const char *p; @@ -1353,8 +1354,7 @@ int run_hook_ve(const char *const *env, const char *name, va_list args) return 0; strvec_push(&hook.args, p); - while ((p = va_arg(args, const char *))) - strvec_push(&hook.args, p); + strvec_pushv(&hook.args, argv->v); hook.env = env; hook.no_stdin = 1; hook.stdout_to_stderr = 1; @@ -1363,6 +1363,20 @@ int run_hook_ve(const char *const *env, const char *name, va_list args) return run_command(&hook); } +int run_hook_ve(const char *const *env, const char *name, va_list args) +{ + struct strvec argv = STRVEC_INIT; + const char *p; + int ret; + + while ((p = va_arg(args, const char *))) + strvec_push(&argv, p); + + ret = run_hook_strvec(env, name, &argv); + strvec_clear(&argv); + return ret; +} + int run_hook_le(const char *const *env, const char *name, ...) { va_list args; diff --git a/run-command.h b/run-command.h index 6472b38bde448c..d6d6b850fdd339 100644 --- a/run-command.h +++ b/run-command.h @@ -217,6 +217,8 @@ const char *find_hook(const char *name); LAST_ARG_MUST_BE_NULL int run_hook_le(const char *const *env, const char *name, ...); int run_hook_ve(const char *const *env, const char *name, va_list args); +int run_hook_strvec(const char *const *env, const char *name, + struct strvec *argv); /* * Trigger an auto-gc diff --git a/sha1-file.c b/sha1-file.c index dd65bd5c681513..eb749414a0ea2d 100644 --- a/sha1-file.c +++ b/sha1-file.c @@ -32,6 +32,9 @@ #include "packfile.h" #include "object-store.h" #include "promisor-remote.h" +#include "sigchain.h" +#include "sub-process.h" +#include "pkt-line.h" /* The maximum size for an object header. */ #define MAX_HEADER_LEN 32 @@ -878,6 +881,115 @@ void prepare_alt_odb(struct repository *r) r->objects->loaded_alternates = 1; } +#define CAP_GET (1u<<0) + +static int subprocess_map_initialized; +static struct hashmap subprocess_map; + +struct read_object_process { + struct subprocess_entry subprocess; + unsigned int supported_capabilities; +}; + +static int start_read_object_fn(struct subprocess_entry *subprocess) +{ + struct read_object_process *entry = (struct read_object_process *)subprocess; + static int versions[] = {1, 0}; + static struct subprocess_capability capabilities[] = { + { "get", CAP_GET }, + { NULL, 0 } + }; + + return subprocess_handshake(subprocess, "git-read-object", versions, + NULL, capabilities, + &entry->supported_capabilities); +} + +static int read_object_process(const struct object_id *oid) +{ + int err; + struct read_object_process *entry; + struct child_process *process; + struct strbuf status = STRBUF_INIT; + const char *cmd = find_hook("read-object"); + uint64_t start; + + start = getnanotime(); + + if (!subprocess_map_initialized) { + subprocess_map_initialized = 1; + hashmap_init(&subprocess_map, (hashmap_cmp_fn)cmd2process_cmp, + NULL, 0); + entry = NULL; + } else { + entry = (struct read_object_process *) subprocess_find_entry(&subprocess_map, cmd); + } + + if (!entry) { + entry = xmalloc(sizeof(*entry)); + entry->supported_capabilities = 0; + + if (subprocess_start(&subprocess_map, &entry->subprocess, cmd, + start_read_object_fn)) { + free(entry); + return -1; + } + } + process = &entry->subprocess.process; + + if (!(CAP_GET & entry->supported_capabilities)) + return -1; + + sigchain_push(SIGPIPE, SIG_IGN); + + err = packet_write_fmt_gently(process->in, "command=get\n"); + if (err) + goto done; + + err = packet_write_fmt_gently(process->in, "sha1=%s\n", oid_to_hex(oid)); + if (err) + goto done; + + err = packet_flush_gently(process->in); + if (err) + goto done; + + err = subprocess_read_status(process->out, &status); + err = err ? err : strcmp(status.buf, "success"); + +done: + sigchain_pop(SIGPIPE); + + if (err || errno == EPIPE) { + err = err ? err : errno; + if (!strcmp(status.buf, "error")) { + /* The process signaled a problem with the file. */ + } + else if (!strcmp(status.buf, "abort")) { + /* + * The process signaled a permanent problem. Don't try to read + * objects with the same command for the lifetime of the current + * Git process. + */ + entry->supported_capabilities &= ~CAP_GET; + } + else { + /* + * Something went wrong with the read-object process. + * Force shutdown and restart if needed. + */ + error("external process '%s' failed", cmd); + subprocess_stop(&subprocess_map, + (struct subprocess_entry *)entry); + free(entry); + } + } + + trace_performance_since(start, "read_object_process"); + + return err; +} + /* Returns 1 if we have successfully freshened the file, 0 otherwise. */ static int freshen_file(const char *fn) { @@ -926,10 +1038,23 @@ static int check_and_freshen_nonlocal(const struct object_id *oid, int freshen) return 0; } -static int check_and_freshen(const struct object_id *oid, int freshen) +static int check_and_freshen(const struct object_id *oid, int freshen, + int skip_virtualized_objects) { - return check_and_freshen_local(oid, freshen) || + int ret; + int tried_hook = 0; + +retry: + ret = check_and_freshen_local(oid, freshen) || check_and_freshen_nonlocal(oid, freshen); + if (!ret && core_virtualize_objects && !skip_virtualized_objects && + !tried_hook) { + tried_hook = 1; + if (!read_object_process(oid)) + goto retry; + } + + return ret; } int has_loose_object_nonlocal(const struct object_id *oid) @@ -939,7 +1064,7 @@ int has_loose_object_nonlocal(const struct object_id *oid) static int has_loose_object(const struct object_id *oid) { - return check_and_freshen(oid, 0); + return check_and_freshen(oid, 0, 0); } static void mmap_limit_check(size_t length) @@ -1466,6 +1591,7 @@ static int do_oid_object_info_extended(struct repository *r, int rtype; const struct object_id *real = oid; int already_retried = 0; + int tried_hook = 0; if (flags & OBJECT_INFO_LOOKUP_REPLACE) @@ -1477,6 +1603,7 @@ static int do_oid_object_info_extended(struct repository *r, if (!oi) oi = &blank_oi; +retry: co = find_cached_object(real); if (co) { if (oi->typep) @@ -1511,6 +1638,11 @@ static int do_oid_object_info_extended(struct repository *r, reprepare_packed_git(r); if (find_pack_entry(r, real, &e)) break; + if (core_virtualize_objects && !tried_hook) { + tried_hook = 1; + if (!read_object_process(oid)) + goto retry; + } } /* Check if it is a missing object */ @@ -1910,9 +2042,10 @@ static int write_loose_object(const struct object_id *oid, char *hdr, return finalize_object_file(tmp_file.buf, filename.buf); } -static int freshen_loose_object(const struct object_id *oid) +static int freshen_loose_object(const struct object_id *oid, + int skip_virtualized_objects) { - return check_and_freshen(oid, 1); + return check_and_freshen(oid, 1, skip_virtualized_objects); } static int freshen_packed_object(const struct object_id *oid) @@ -1939,7 +2072,7 @@ int write_object_file(const void *buf, unsigned long len, const char *type, */ write_object_file_prepare(the_hash_algo, buf, len, type, oid, hdr, &hdrlen); - if (freshen_packed_object(oid) || freshen_loose_object(oid)) + if (freshen_packed_object(oid) || freshen_loose_object(oid, 1)) return 0; return write_loose_object(oid, hdr, hdrlen, buf, len, 0); } @@ -1959,7 +2092,7 @@ int hash_object_file_literally(const void *buf, unsigned long len, if (!(flags & HASH_WRITE_OBJECT)) goto cleanup; - if (freshen_packed_object(oid) || freshen_loose_object(oid)) + if (freshen_packed_object(oid) || freshen_loose_object(oid, 1)) goto cleanup; status = write_loose_object(oid, header, hdrlen, buf, len, 0); diff --git a/t/t0000-basic.sh b/t/t0000-basic.sh index 923281af93981d..92873f02b89c14 100755 --- a/t/t0000-basic.sh +++ b/t/t0000-basic.sh @@ -1186,6 +1186,11 @@ test_expect_success 'writing this tree with --missing-ok' ' git write-tree --missing-ok ' +test_expect_success 'writing this tree with missing ok config value' ' + git config core.gvfs 4 && + git write-tree +' + ################################################################ test_expect_success 'git read-tree followed by write-tree should be idempotent' ' diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh index 5508e0bf6fbbb3..75e5d77435b4f8 100755 --- a/t/t0021-conversion.sh +++ b/t/t0021-conversion.sh @@ -314,6 +314,43 @@ test_expect_success "filter: smudge empty file" ' test_cmp expected filtered-empty-in-repo ' +test_expect_success "filter: clean filters blocked when under GVFS" ' + test_config filter.empty-in-repo.clean "cat >/dev/null" && + test_config filter.empty-in-repo.smudge "echo smudged && cat" && + test_config core.gvfs 64 && + + echo dead data walking >empty-in-repo && + test_must_fail git add empty-in-repo +' + +test_expect_success "filter: smudge filters blocked when under GVFS" ' + test_config filter.empty-in-repo.clean "cat >/dev/null" && + test_config filter.empty-in-repo.smudge "echo smudged && cat" && + test_config core.gvfs 64 && + + test_must_fail git checkout +' + +test_expect_success "ident blocked on add when under GVFS" ' + test_config core.gvfs 64 && + test_config core.autocrlf false && + + echo "*.i ident" >.gitattributes && + echo "\$Id\$" > ident.i && + + test_must_fail git add ident.i +' + +test_expect_success "ident blocked when under GVFS" ' + git add ident.i && + + git commit -m "added ident.i" && + test_config core.gvfs 64 && + rm ident.i && + + test_must_fail git checkout -- ident.i +' + test_expect_success 'disable filter with empty override' ' test_config_global filter.disable.smudge false && test_config_global filter.disable.clean false && diff --git a/t/t0027-auto-crlf.sh b/t/t0027-auto-crlf.sh index 9fcd56fab37314..4d43e15bb96727 100755 --- a/t/t0027-auto-crlf.sh +++ b/t/t0027-auto-crlf.sh @@ -333,6 +333,18 @@ checkout_files () { " } +test_expect_success 'crlf conversions blocked when under GVFS' ' + git checkout -b gvfs && + test_commit initial && + rm initial.t && + test_config core.gvfs 64 && + test_config core.autocrlf true && + test_must_fail git read-tree --reset -u HEAD && + + git config core.autocrlf false && + git read-tree --reset -u HEAD +' + # Test control characters # NUL SOH CR EOF==^Z test_expect_success 'ls-files --eol -o Text/Binary' ' diff --git a/t/t0410/read-object b/t/t0410/read-object new file mode 100755 index 00000000000000..02c799837f4057 --- /dev/null +++ b/t/t0410/read-object @@ -0,0 +1,118 @@ +#!/usr/bin/perl +# +# Example implementation for the Git read-object protocol version 1 +# See Documentation/technical/read-object-protocol.txt +# +# Allows you to test the ability for blobs to be pulled from a host git repo +# "on demand." Called when git needs a blob it couldn't find locally due to +# a lazy clone that only cloned the commits and trees. +# +# A lazy clone can be simulated via the following commands from the host repo +# you wish to create a lazy clone of: +# +# cd /host_repo +# git rev-parse HEAD +# git init /guest_repo +# git cat-file --batch-check --batch-all-objects | grep -v 'blob' | +# cut -d' ' -f1 | git pack-objects /guest_repo/.git/objects/pack/noblobs +# cd /guest_repo +# git config core.virtualizeobjects true +# git reset --hard +# +# Please note, this sample is a minimal skeleton. No proper error handling +# was implemented. +# + +use strict; +use warnings; + +# +# Point $DIR to the folder where your host git repo is located so we can pull +# missing objects from it +# +my $DIR = "../.git/"; + +sub packet_bin_read { + my $buffer; + my $bytes_read = read STDIN, $buffer, 4; + if ( $bytes_read == 0 ) { + + # EOF - Git stopped talking to us! + exit(); + } + elsif ( $bytes_read != 4 ) { + die "invalid packet: '$buffer'"; + } + my $pkt_size = hex($buffer); + if ( $pkt_size == 0 ) { + return ( 1, "" ); + } + elsif ( $pkt_size > 4 ) { + my $content_size = $pkt_size - 4; + $bytes_read = read STDIN, $buffer, $content_size; + if ( $bytes_read != $content_size ) { + die "invalid packet ($content_size bytes expected; $bytes_read bytes read)"; + } + return ( 0, $buffer ); + } + else { + die "invalid packet size: $pkt_size"; + } +} + +sub packet_txt_read { + my ( $res, $buf ) = packet_bin_read(); + unless ( $buf =~ s/\n$// ) { + die "A non-binary line MUST be terminated by an LF."; + } + return ( $res, $buf ); +} + +sub packet_bin_write { + my $buf = shift; + print STDOUT sprintf( "%04x", length($buf) + 4 ); + print STDOUT $buf; + STDOUT->flush(); +} + +sub packet_txt_write { + packet_bin_write( $_[0] . "\n" ); +} + +sub packet_flush { + print STDOUT sprintf( "%04x", 0 ); + STDOUT->flush(); +} + +( packet_txt_read() eq ( 0, "git-read-object-client" ) ) || die "bad initialize"; +( packet_txt_read() eq ( 0, "version=1" ) ) || die "bad version"; +( packet_bin_read() eq ( 1, "" ) ) || die "bad version end"; + +packet_txt_write("git-read-object-server"); +packet_txt_write("version=1"); +packet_flush(); + +( packet_txt_read() eq ( 0, "capability=get" ) ) || die "bad capability"; +( packet_bin_read() eq ( 1, "" ) ) || die "bad capability end"; + +packet_txt_write("capability=get"); +packet_flush(); + +while (1) { + my ($command) = packet_txt_read() =~ /^command=([^=]+)$/; + + if ( $command eq "get" ) { + my ($sha1) = packet_txt_read() =~ /^sha1=([0-9a-f]{40,64})$/; + packet_bin_read(); + + system ('git --git-dir="' . $DIR . '" cat-file blob ' . $sha1 . ' | git -c core.virtualizeobjects=false hash-object -w --stdin >/dev/null 2>&1'); + packet_txt_write(($?) ? "status=error" : "status=success"); + packet_flush(); + + open my $log, '>>.git/read-object-hook.log'; + print $log "Read object $sha1, exit code $?\n"; + close $log; + } else { + die "bad command '$command'"; + } +} diff --git a/t/t0411-read-object.sh b/t/t0411-read-object.sh new file mode 100755 index 00000000000000..af97ec5b50f356 --- /dev/null +++ b/t/t0411-read-object.sh @@ -0,0 +1,34 @@ +#!/bin/sh + +test_description='tests for long running read-object process' + +. ./test-lib.sh + +test_expect_success 'setup host repo with a root commit' ' + test_commit zero && + hash1=$(git ls-tree HEAD | grep zero.t | cut -f1 | cut -d\ -f3) +' + +test_expect_success 'blobs can be retrieved from the host repo' ' + git init guest-repo && + (cd guest-repo && + mkdir -p .git/hooks && + cp $TEST_DIRECTORY/t0410/read-object .git/hooks/ && + git config core.virtualizeobjects true && + git cat-file blob "$hash1") +' + +test_expect_success 'invalid blobs generate errors' ' + (cd guest-repo && + test_must_fail git cat-file blob "invalid") +' + +test_expect_success 'read-object-hook is bypassed when writing objects' ' + (cd guest-repo && + echo hello >hello.txt && + git add hello.txt && + hash="$(git rev-parse --verify :hello.txt)" && + ! grep "$hash" .git/read-object-hook.log) +' + +test_done diff --git a/t/t1016-read-tree-skip-sha-on-read.sh b/t/t1016-read-tree-skip-sha-on-read.sh new file mode 100755 index 00000000000000..5b76a80a0020dc --- /dev/null +++ b/t/t1016-read-tree-skip-sha-on-read.sh @@ -0,0 +1,22 @@ +#!/bin/sh + +test_description='check that read-tree works with core.gvfs config value' + +. ./test-lib.sh +. "$TEST_DIRECTORY"/lib-read-tree.sh + +test_expect_success setup ' + echo one >a && + git add a && + git commit -m initial +' +test_expect_success 'read-tree without core.gvsf' ' + read_tree_u_must_succeed -m -u HEAD +' + +test_expect_success 'read-tree with core.gvfs set to 1' ' + git config core.gvfs 1 && + read_tree_u_must_succeed -m -u HEAD +' + +test_done diff --git a/t/t1090-sparse-checkout-scope.sh b/t/t1090-sparse-checkout-scope.sh index e7dd921be5f956..d267e21db83720 100755 --- a/t/t1090-sparse-checkout-scope.sh +++ b/t/t1090-sparse-checkout-scope.sh @@ -81,6 +81,23 @@ test_expect_success 'in partial clone, sparse checkout only fetches needed blobs test_cmp expect actual ' +test_expect_success 'checkout does not delete items outside the sparse checkout file' ' + git checkout master && + git config core.gvfs 8 && + git checkout -b outside && + echo "new file1" >d && + git add d && + git commit -m "branch initial" && + echo "new file1" >e && + git add e && + git commit -m "skipped worktree" && + git update-index --skip-worktree e && + echo "/d" >.git/info/sparse-checkout && + git checkout HEAD^ && + test_path_is_file d && + test_path_is_file e +' + test_expect_success MINGW 'no unnecessary opendir() with fscache' ' git clone . fscache-test && ( diff --git a/t/t5583-vfs.sh b/t/t5583-vfs.sh new file mode 100755 index 00000000000000..8a703cbb640387 --- /dev/null +++ b/t/t5583-vfs.sh @@ -0,0 +1,24 @@ +#!/bin/sh + +test_description='fetch using the flag to skip reachability and upload pack' + +. ./test-lib.sh + + +test_expect_success setup ' + echo inital >a && + git add a && + git commit -m initial && + git clone . one +' + +test_expect_success "fetch test" ' + cd one && + git config core.gvfs 16 && + rm -rf .git/objects/* && + git -C .. cat-file commit HEAD | git hash-object -w --stdin -t commit && + git fetch && + test_must_fail git rev-parse --verify HEAD^{tree} +' + +test_done \ No newline at end of file diff --git a/t/t7108-reset-stdin.sh b/t/t7108-reset-stdin.sh index b7cbcbf869296c..db5483b8f10052 100755 --- a/t/t7108-reset-stdin.sh +++ b/t/t7108-reset-stdin.sh @@ -29,4 +29,13 @@ test_expect_success '--stdin requires --mixed' ' git reset --mixed --stdin list && + git reset --stdin ce_flags & CE_CONFLICTED) && verify_uptodate(old, o)) return -1; + + /* + * When marking entries to remove from the index and the working + * directory this option will take into account what the + * skip-worktree bit was set to so that if the entry has the + * skip-worktree bit set it will not be removed from the working + * directory. This will allow virtualized working directories to + * detect the change to HEAD and use the new commit tree to show + * the files that are in the working directory. + * + * old is the cache_entry that will have the skip-worktree bit set + * which will need to be preserved when the CE_REMOVE entry is added + */ + if (gvfs_config_is_set(GVFS_NO_DELETE_OUTSIDE_SPARSECHECKOUT) && + old && + old->ce_flags & CE_SKIP_WORKTREE) { + add_entry(o, old, CE_REMOVE, 0); + invalidate_ce_path(old, o); + return 1; + } + add_entry(o, ce, CE_REMOVE, 0); invalidate_ce_path(ce, o); return 1;