diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt index b6740b750f03aa..d88cfebe0d7164 100644 --- a/Documentation/config/core.txt +++ b/Documentation/config/core.txt @@ -728,6 +728,48 @@ core.multiPackIndex:: single index. See linkgit:git-multi-pack-index[1] for more information. Defaults to true. +core.gvfs:: + Enable the features needed for GVFS. This value can be set to true + to indicate all features should be turned on or the bit values listed + below can be used to turn on specific features. ++ +-- + GVFS_SKIP_SHA_ON_INDEX:: + Bit value 1 + Disables the calculation of the sha when writing the index + GVFS_MISSING_OK:: + Bit value 4 + Normally git write-tree ensures that the objects referenced by the + directory exist in the object database. This option disables this check. + GVFS_NO_DELETE_OUTSIDE_SPARSECHECKOUT:: + Bit value 8 + When marking entries to remove from the index and the working + directory this option will take into account what the + skip-worktree bit was set to so that if the entry has the + skip-worktree bit set it will not be removed from the working + directory. This will allow virtualized working directories to + detect the change to HEAD and use the new commit tree to show + the files that are in the working directory. + GVFS_FETCH_SKIP_REACHABILITY_AND_UPLOADPACK:: + Bit value 16 + While performing a fetch with a virtual file system we know + that there will be missing objects and we don't want to download + them just because of the reachability of the commits. We also + don't want to download a pack file with commits, trees, and blobs + since these will be downloaded on demand. This flag will skip the + checks on the reachability of objects during a fetch as well as + the upload pack so that extraneous objects don't get downloaded. + GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS:: + Bit value 64 + With a virtual file system we only know the file size before any + CRLF or smudge/clean filters processing is done on the client. + To prevent file corruption due to truncation or expansion with + garbage at the end, these filters must not run when the file + is first accessed and brought down to the client. Git.exe can't + currently tell the first access vs subsequent accesses so this + flag just blocks them from occurring at all. +-- + core.sparseCheckout:: Enable "sparse checkout" feature. See linkgit:git-sparse-checkout[1] for more information. diff --git a/Documentation/technical/read-object-protocol.txt b/Documentation/technical/read-object-protocol.txt new file mode 100644 index 00000000000000..a893b46e7c28a9 --- /dev/null +++ b/Documentation/technical/read-object-protocol.txt @@ -0,0 +1,102 @@ +Read Object Process +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The read-object process enables Git to read all missing blobs with a +single process invocation for the entire life of a single Git command. +This is achieved by using a packet format (pkt-line, see technical/ +protocol-common.txt) based protocol over standard input and standard +output as follows. All packets, except for the "*CONTENT" packets and +the "0000" flush packet, are considered text and therefore are +terminated by a LF. + +Git starts the process when it encounters the first missing object that +needs to be retrieved. After the process is started, Git sends a welcome +message ("git-read-object-client"), a list of supported protocol version +numbers, and a flush packet. Git expects to read a welcome response +message ("git-read-object-server"), exactly one protocol version number +from the previously sent list, and a flush packet. All further +communication will be based on the selected version. + +The remaining protocol description below documents "version=1". Please +note that "version=42" in the example below does not exist and is only +there to illustrate how the protocol would look with more than one +version. + +After the version negotiation Git sends a list of all capabilities that +it supports and a flush packet. Git expects to read a list of desired +capabilities, which must be a subset of the supported capabilities list, +and a flush packet as response: +------------------------ +packet: git> git-read-object-client +packet: git> version=1 +packet: git> version=42 +packet: git> 0000 +packet: git< git-read-object-server +packet: git< version=1 +packet: git< 0000 +packet: git> capability=get +packet: git> capability=have +packet: git> capability=put +packet: git> capability=not-yet-invented +packet: git> 0000 +packet: git< capability=get +packet: git< 0000 +------------------------ +The only supported capability in version 1 is "get". + +Afterwards Git sends a list of "key=value" pairs terminated with a flush +packet. The list will contain at least the command (based on the +supported capabilities) and the sha1 of the object to retrieve. Please +note, that the process must not send any response before it received the +final flush packet. + +When the process receives the "get" command, it should make the requested +object available in the git object store and then return success. Git will +then check the object store again and this time find it and proceed. +------------------------ +packet: git> command=get +packet: git> sha1=0a214a649e1b3d5011e14a3dc227753f2bd2be05 +packet: git> 0000 +------------------------ + +The process is expected to respond with a list of "key=value" pairs +terminated with a flush packet. If the process does not experience +problems then the list must contain a "success" status. +------------------------ +packet: git< status=success +packet: git< 0000 +------------------------ + +In case the process cannot or does not want to process the content, it +is expected to respond with an "error" status. +------------------------ +packet: git< status=error +packet: git< 0000 +------------------------ + +In case the process cannot or does not want to process the content as +well as any future content for the lifetime of the Git process, then it +is expected to respond with an "abort" status at any point in the +protocol. +------------------------ +packet: git< status=abort +packet: git< 0000 +------------------------ + +Git neither stops nor restarts the process in case the "error"/"abort" +status is set. + +If the process dies during the communication or does not adhere to the +protocol then Git will stop the process and restart it with the next +object that needs to be processed. + +After the read-object process has processed an object it is expected to +wait for the next "key=value" list containing a command. Git will close +the command pipe on exit. The process is expected to detect EOF and exit +gracefully on its own. Git will wait until the process has stopped. + +A long running read-object process demo implementation can be found in +`contrib/long-running-read-object/example.pl` located in the Git core +repository. If you develop your own long running process then the +`GIT_TRACE_PACKET` environment variables can be very helpful for +debugging (see linkgit:git[1]). diff --git a/GIT-VERSION-GEN b/GIT-VERSION-GEN index 2c8dae398f661b..88af5a9152e093 100755 --- a/GIT-VERSION-GEN +++ b/GIT-VERSION-GEN @@ -1,7 +1,7 @@ #!/bin/sh GVF=GIT-VERSION-FILE -DEF_VER=v2.42.0 +DEF_VER=v2.42.0.vfs.0.0 LF=' ' @@ -12,10 +12,15 @@ if test -f version then VN=$(cat version) || VN="$DEF_VER" elif test -d ${GIT_DIR:-.git} -o -f .git && - VN=$(git describe --match "v[0-9]*" HEAD 2>/dev/null) && + VN=$(git describe --match "v[0-9]*vfs*" HEAD 2>/dev/null) && case "$VN" in *$LF*) (exit 1) ;; v[0-9]*) + if test "${VN%%.vfs.*}" != "${DEF_VER%%.vfs.*}" + then + echo "Found version $VN, which is not based on $DEF_VER" >&2 + exit 1 + fi git update-index -q --refresh test -z "$(git diff-index --name-only HEAD --)" || VN="$VN-dirty" ;; diff --git a/cache-tree.c b/cache-tree.c index 641427ed410af3..4ac7e8df8ae83b 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -1,6 +1,7 @@ #include "git-compat-util.h" #include "environment.h" #include "hex.h" +#include "gvfs.h" #include "lockfile.h" #include "tree.h" #include "tree-walk.h" @@ -258,7 +259,8 @@ static int update_one(struct cache_tree *it, int flags) { struct strbuf buffer; - int missing_ok = flags & WRITE_TREE_MISSING_OK; + int missing_ok = gvfs_config_is_set(GVFS_MISSING_OK) ? + WRITE_TREE_MISSING_OK : (flags & WRITE_TREE_MISSING_OK); int dryrun = flags & WRITE_TREE_DRY_RUN; int repair = flags & WRITE_TREE_REPAIR; int to_invalidate = 0; diff --git a/commit.c b/commit.c index b3223478bc2a3a..7bb48ce61bc626 100644 --- a/commit.c +++ b/commit.c @@ -1,4 +1,5 @@ #include "git-compat-util.h" +#include "gvfs.h" #include "tag.h" #include "commit.h" #include "commit-graph.h" @@ -560,13 +561,17 @@ int repo_parse_commit_internal(struct repository *r, .sizep = &size, .contentp = &buffer, }; + int ret; /* * Git does not support partial clones that exclude commits, so set * OBJECT_INFO_SKIP_FETCH_OBJECT to fail fast when an object is missing. */ int flags = OBJECT_INFO_LOOKUP_REPLACE | OBJECT_INFO_SKIP_FETCH_OBJECT | - OBJECT_INFO_DIE_IF_CORRUPT; - int ret; + OBJECT_INFO_DIE_IF_CORRUPT; + + /* But the GVFS Protocol _does_ support missing commits! */ + if (gvfs_config_is_set(GVFS_MISSING_OK)) + flags ^= OBJECT_INFO_SKIP_FETCH_OBJECT; if (!item) return -1; diff --git a/config.c b/config.c index d5b2a9b5099687..cc6402e6f29beb 100644 --- a/config.c +++ b/config.c @@ -9,6 +9,7 @@ #include "abspath.h" #include "advice.h" #include "date.h" +#include "gvfs.h" #include "branch.h" #include "config.h" #include "convert.h" @@ -1776,6 +1777,11 @@ int git_default_core_config(const char *var, const char *value, return 0; } + if (!strcmp(var, "core.gvfs")) { + gvfs_load_config_value(value); + return 0; + } + if (!strcmp(var, "core.sparsecheckout")) { core_apply_sparse_checkout = git_config_bool(var, value); return 0; @@ -1801,6 +1807,11 @@ int git_default_core_config(const char *var, const char *value, return 0; } + if (!strcmp(var, "core.virtualizeobjects")) { + core_virtualize_objects = git_config_bool(var, value); + return 0; + } + /* Add other config variables here and to Documentation/config.txt. */ return platform_core_config(var, value, ctx, cb); } diff --git a/connected.c b/connected.c index 8f89376dbcf30c..12b9f49cdfa801 100644 --- a/connected.c +++ b/connected.c @@ -1,6 +1,8 @@ #include "git-compat-util.h" +#include "environment.h" #include "gettext.h" #include "hex.h" +#include "gvfs.h" #include "object-store-ll.h" #include "run-command.h" #include "sigchain.h" @@ -32,6 +34,26 @@ int check_connected(oid_iterate_fn fn, void *cb_data, struct transport *transport; size_t base_len; + /* + * Running a virtual file system there will be objects that are + * missing locally and we don't want to download a bunch of + * commits, trees, and blobs just to make sure everything is + * reachable locally so this option will skip reachablility + * checks below that use rev-list. This will stop the check + * before uploadpack runs to determine if there is anything to + * fetch. Returning zero for the first check will also prevent the + * uploadpack from happening. It will also skip the check after + * the fetch is finished to make sure all the objects where + * downloaded in the pack file. This will allow the fetch to + * run and get all the latest tip commit ids for all the branches + * in the fetch but not pull down commits, trees, or blobs via + * upload pack. + */ + if (gvfs_config_is_set(GVFS_FETCH_SKIP_REACHABILITY_AND_UPLOADPACK)) + return 0; + if (core_virtualize_objects) + return 0; + if (!opt) opt = &defaults; transport = opt->transport; diff --git a/contrib/long-running-read-object/example.pl b/contrib/long-running-read-object/example.pl new file mode 100644 index 00000000000000..b8f37f836a813c --- /dev/null +++ b/contrib/long-running-read-object/example.pl @@ -0,0 +1,114 @@ +#!/usr/bin/perl +# +# Example implementation for the Git read-object protocol version 1 +# See Documentation/technical/read-object-protocol.txt +# +# Allows you to test the ability for blobs to be pulled from a host git repo +# "on demand." Called when git needs a blob it couldn't find locally due to +# a lazy clone that only cloned the commits and trees. +# +# A lazy clone can be simulated via the following commands from the host repo +# you wish to create a lazy clone of: +# +# cd /host_repo +# git rev-parse HEAD +# git init /guest_repo +# git cat-file --batch-check --batch-all-objects | grep -v 'blob' | +# cut -d' ' -f1 | git pack-objects /guest_repo/.git/objects/pack/noblobs +# cd /guest_repo +# git config core.virtualizeobjects true +# git reset --hard +# +# Please note, this sample is a minimal skeleton. No proper error handling +# was implemented. +# + +use strict; +use warnings; + +# +# Point $DIR to the folder where your host git repo is located so we can pull +# missing objects from it +# +my $DIR = "/host_repo/.git/"; + +sub packet_bin_read { + my $buffer; + my $bytes_read = read STDIN, $buffer, 4; + if ( $bytes_read == 0 ) { + + # EOF - Git stopped talking to us! + exit(); + } + elsif ( $bytes_read != 4 ) { + die "invalid packet: '$buffer'"; + } + my $pkt_size = hex($buffer); + if ( $pkt_size == 0 ) { + return ( 1, "" ); + } + elsif ( $pkt_size > 4 ) { + my $content_size = $pkt_size - 4; + $bytes_read = read STDIN, $buffer, $content_size; + if ( $bytes_read != $content_size ) { + die "invalid packet ($content_size bytes expected; $bytes_read bytes read)"; + } + return ( 0, $buffer ); + } + else { + die "invalid packet size: $pkt_size"; + } +} + +sub packet_txt_read { + my ( $res, $buf ) = packet_bin_read(); + unless ( $buf =~ s/\n$// ) { + die "A non-binary line MUST be terminated by an LF."; + } + return ( $res, $buf ); +} + +sub packet_bin_write { + my $buf = shift; + print STDOUT sprintf( "%04x", length($buf) + 4 ); + print STDOUT $buf; + STDOUT->flush(); +} + +sub packet_txt_write { + packet_bin_write( $_[0] . "\n" ); +} + +sub packet_flush { + print STDOUT sprintf( "%04x", 0 ); + STDOUT->flush(); +} + +( packet_txt_read() eq ( 0, "git-read-object-client" ) ) || die "bad initialize"; +( packet_txt_read() eq ( 0, "version=1" ) ) || die "bad version"; +( packet_bin_read() eq ( 1, "" ) ) || die "bad version end"; + +packet_txt_write("git-read-object-server"); +packet_txt_write("version=1"); +packet_flush(); + +( packet_txt_read() eq ( 0, "capability=get" ) ) || die "bad capability"; +( packet_bin_read() eq ( 1, "" ) ) || die "bad capability end"; + +packet_txt_write("capability=get"); +packet_flush(); + +while (1) { + my ($command) = packet_txt_read() =~ /^command=([^=]+)$/; + + if ( $command eq "get" ) { + my ($sha1) = packet_txt_read() =~ /^sha1=([0-9a-f]{40})$/; + packet_bin_read(); + + system ('git --git-dir="' . $DIR . '" cat-file blob ' . $sha1 . ' | git -c core.virtualizeobjects=false hash-object -w --stdin >/dev/null 2>&1'); + packet_txt_write(($?) ? "status=error" : "status=success"); + packet_flush(); + } else { + die "bad command '$command'"; + } +} diff --git a/convert.c b/convert.c index a8870baff36a4a..e3b6c52011efab 100644 --- a/convert.c +++ b/convert.c @@ -1,5 +1,6 @@ #include "git-compat-util.h" #include "advice.h" +#include "gvfs.h" #include "config.h" #include "convert.h" #include "copy.h" @@ -555,6 +556,9 @@ static int crlf_to_git(struct index_state *istate, if (!buf) return 1; + if (gvfs_config_is_set(GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS)) + die("CRLF conversions not supported when running under GVFS"); + /* only grow if not in place */ if (strbuf_avail(buf) + buf->len < len) strbuf_grow(buf, len - buf->len); @@ -594,6 +598,9 @@ static int crlf_to_worktree(const char *src, size_t len, struct strbuf *buf, if (!will_convert_lf_to_crlf(&stats, crlf_action)) return 0; + if (gvfs_config_is_set(GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS)) + die("CRLF conversions not supported when running under GVFS"); + /* are we "faking" in place editing ? */ if (src == buf->buf) to_free = strbuf_detach(buf, NULL); @@ -703,6 +710,9 @@ static int apply_single_file_filter(const char *path, const char *src, size_t le struct async async; struct filter_params params; + if (gvfs_config_is_set(GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS)) + die("Filter \"%s\" not supported when running under GVFS", cmd); + memset(&async, 0, sizeof(async)); async.proc = filter_buffer_or_fd; async.data = ¶ms; @@ -1116,6 +1126,9 @@ static int ident_to_git(const char *src, size_t len, if (!buf) return 1; + if (gvfs_config_is_set(GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS)) + die("ident conversions not supported when running under GVFS"); + /* only grow if not in place */ if (strbuf_avail(buf) + buf->len < len) strbuf_grow(buf, len - buf->len); @@ -1163,6 +1176,9 @@ static int ident_to_worktree(const char *src, size_t len, if (!cnt) return 0; + if (gvfs_config_is_set(GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS)) + die("ident conversions not supported when running under GVFS"); + /* are we "faking" in place editing ? */ if (src == buf->buf) to_free = strbuf_detach(buf, NULL); @@ -1612,6 +1628,9 @@ static int lf_to_crlf_filter_fn(struct stream_filter *filter, size_t count, o = 0; struct lf_to_crlf_filter *lf_to_crlf = (struct lf_to_crlf_filter *)filter; + if (gvfs_config_is_set(GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS)) + die("CRLF conversions not supported when running under GVFS"); + /* * We may be holding onto the CR to see if it is followed by a * LF, in which case we would need to go to the main loop. @@ -1856,6 +1875,9 @@ static int ident_filter_fn(struct stream_filter *filter, struct ident_filter *ident = (struct ident_filter *)filter; static const char head[] = "$Id"; + if (gvfs_config_is_set(GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS)) + die("ident conversions not supported when running under GVFS"); + if (!input) { /* drain upon eof */ switch (ident->state) { diff --git a/environment.c b/environment.c index f98d76f08047f1..e3cbae74f31875 100644 --- a/environment.c +++ b/environment.c @@ -77,9 +77,11 @@ int grafts_keep_true_parents; int core_apply_sparse_checkout; int core_sparse_checkout_cone; int sparse_expect_files_outside_of_patterns; +int core_gvfs; int merge_log_config = -1; int precomposed_unicode = -1; /* see probe_utf8_pathname_composition() */ unsigned long pack_size_limit_cfg; +int core_virtualize_objects; enum log_refs_config log_all_ref_updates = LOG_REFS_UNSET; #ifndef PROTECT_HFS_DEFAULT diff --git a/environment.h b/environment.h index c5377473c68339..9373b7e2f22113 100644 --- a/environment.h +++ b/environment.h @@ -11,6 +11,8 @@ struct strvec; extern char comment_line_char; extern int auto_comment_line_char; +extern int core_virtualize_objects; + /* * Wrapper of getenv() that returns a strdup value. This value is kept * in argv to be freed later. @@ -144,6 +146,7 @@ int get_shared_repository(void); void reset_shared_repository(void); extern int core_preload_index; +extern int core_gvfs; extern int precomposed_unicode; extern int protect_hfs; extern int protect_ntfs; diff --git a/gvfs.h b/gvfs.h new file mode 100644 index 00000000000000..bfa82d29e02739 --- /dev/null +++ b/gvfs.h @@ -0,0 +1,50 @@ +#ifndef GVFS_H +#define GVFS_H + +#include "environment.h" +#include "config.h" + +/* + * This file is for the specific settings and methods + * used for GVFS functionality + */ + + +/* + * The list of bits in the core_gvfs setting + */ +#define GVFS_SKIP_SHA_ON_INDEX (1 << 0) +#define GVFS_MISSING_OK (1 << 2) +#define GVFS_NO_DELETE_OUTSIDE_SPARSECHECKOUT (1 << 3) +#define GVFS_FETCH_SKIP_REACHABILITY_AND_UPLOADPACK (1 << 4) +#define GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS (1 << 6) + +static inline int gvfs_config_is_set(int mask) { + return (core_gvfs & mask) == mask; +} + +static inline int gvfs_config_is_set_any(void) { + return core_gvfs > 0; +} + +static inline void gvfs_load_config_value(const char *value) { + int is_bool = 0; + + if (value) + core_gvfs = git_config_bool_or_int("core.gvfs", value, &is_bool); + else + git_config_get_bool_or_int("core.gvfs", &is_bool, &core_gvfs); + + /* Turn on all bits if a bool was set in the settings */ + if (is_bool && core_gvfs) + core_gvfs = -1; +} + + +static inline int gvfs_config_load_and_is_set(int mask) { + gvfs_load_config_value(0); + return gvfs_config_is_set(mask); +} + + +#endif /* GVFS_H */ diff --git a/object-file.c b/object-file.c index 4df61c0ce6b33f..0cd04c296b4a6b 100644 --- a/object-file.c +++ b/object-file.c @@ -43,6 +43,11 @@ #include "setup.h" #include "submodule.h" #include "fsck.h" +#include "trace.h" +#include "hook.h" +#include "sigchain.h" +#include "sub-process.h" +#include "pkt-line.h" /* The maximum size for an object header. */ #define MAX_HEADER_LEN 32 @@ -959,6 +964,115 @@ int has_alt_odb(struct repository *r) return !!r->objects->odb->next; } +#define CAP_GET (1u<<0) + +static int subprocess_map_initialized; +static struct hashmap subprocess_map; + +struct read_object_process { + struct subprocess_entry subprocess; + unsigned int supported_capabilities; +}; + +static int start_read_object_fn(struct subprocess_entry *subprocess) +{ + struct read_object_process *entry = (struct read_object_process *)subprocess; + static int versions[] = {1, 0}; + static struct subprocess_capability capabilities[] = { + { "get", CAP_GET }, + { NULL, 0 } + }; + + return subprocess_handshake(subprocess, "git-read-object", versions, + NULL, capabilities, + &entry->supported_capabilities); +} + +static int read_object_process(const struct object_id *oid) +{ + int err; + struct read_object_process *entry; + struct child_process *process; + struct strbuf status = STRBUF_INIT; + const char *cmd = find_hook("read-object"); + uint64_t start; + + start = getnanotime(); + + if (!subprocess_map_initialized) { + subprocess_map_initialized = 1; + hashmap_init(&subprocess_map, (hashmap_cmp_fn)cmd2process_cmp, + NULL, 0); + entry = NULL; + } else { + entry = (struct read_object_process *) subprocess_find_entry(&subprocess_map, cmd); + } + + if (!entry) { + entry = xmalloc(sizeof(*entry)); + entry->supported_capabilities = 0; + + if (subprocess_start(&subprocess_map, &entry->subprocess, cmd, + start_read_object_fn)) { + free(entry); + return -1; + } + } + process = &entry->subprocess.process; + + if (!(CAP_GET & entry->supported_capabilities)) + return -1; + + sigchain_push(SIGPIPE, SIG_IGN); + + err = packet_write_fmt_gently(process->in, "command=get\n"); + if (err) + goto done; + + err = packet_write_fmt_gently(process->in, "sha1=%s\n", oid_to_hex(oid)); + if (err) + goto done; + + err = packet_flush_gently(process->in); + if (err) + goto done; + + err = subprocess_read_status(process->out, &status); + err = err ? err : strcmp(status.buf, "success"); + +done: + sigchain_pop(SIGPIPE); + + if (err || errno == EPIPE) { + err = err ? err : errno; + if (!strcmp(status.buf, "error")) { + /* The process signaled a problem with the file. */ + } + else if (!strcmp(status.buf, "abort")) { + /* + * The process signaled a permanent problem. Don't try to read + * objects with the same command for the lifetime of the current + * Git process. + */ + entry->supported_capabilities &= ~CAP_GET; + } + else { + /* + * Something went wrong with the read-object process. + * Force shutdown and restart if needed. + */ + error("external process '%s' failed", cmd); + subprocess_stop(&subprocess_map, + (struct subprocess_entry *)entry); + free(entry); + } + } + + trace_performance_since(start, "read_object_process"); + + return err; +} + /* Returns 1 if we have successfully freshened the file, 0 otherwise. */ static int freshen_file(const char *fn) { @@ -1007,10 +1121,23 @@ static int check_and_freshen_nonlocal(const struct object_id *oid, int freshen) return 0; } -static int check_and_freshen(const struct object_id *oid, int freshen) +static int check_and_freshen(const struct object_id *oid, int freshen, + int skip_virtualized_objects) { - return check_and_freshen_local(oid, freshen) || + int ret; + int tried_hook = 0; + +retry: + ret = check_and_freshen_local(oid, freshen) || check_and_freshen_nonlocal(oid, freshen); + if (!ret && core_virtualize_objects && !skip_virtualized_objects && + !tried_hook) { + tried_hook = 1; + if (!read_object_process(oid)) + goto retry; + } + + return ret; } int has_loose_object_nonlocal(const struct object_id *oid) @@ -1020,7 +1147,7 @@ int has_loose_object_nonlocal(const struct object_id *oid) int has_loose_object(const struct object_id *oid) { - return check_and_freshen(oid, 0); + return check_and_freshen(oid, 0, 0); } static void mmap_limit_check(size_t length) @@ -1564,6 +1691,7 @@ static int do_oid_object_info_extended(struct repository *r, int rtype; const struct object_id *real = oid; int already_retried = 0; + int tried_hook = 0; if (flags & OBJECT_INFO_LOOKUP_REPLACE) @@ -1575,6 +1703,7 @@ static int do_oid_object_info_extended(struct repository *r, if (!oi) oi = &blank_oi; +retry: co = find_cached_object(real); if (co) { if (oi->typep) @@ -1606,6 +1735,11 @@ static int do_oid_object_info_extended(struct repository *r, reprepare_packed_git(r); if (find_pack_entry(r, real, &e)) break; + if (core_virtualize_objects && !tried_hook) { + tried_hook = 1; + if (!read_object_process(oid)) + goto retry; + } } /* @@ -2085,9 +2219,10 @@ static int write_loose_object(const struct object_id *oid, char *hdr, return finalize_object_file(tmp_file.buf, filename.buf); } -static int freshen_loose_object(const struct object_id *oid) +static int freshen_loose_object(const struct object_id *oid, + int skip_virtualized_objects) { - return check_and_freshen(oid, 1); + return check_and_freshen(oid, 1, skip_virtualized_objects); } static int freshen_packed_object(const struct object_id *oid) @@ -2181,7 +2316,7 @@ int stream_loose_object(struct input_stream *in_stream, size_t len, die(_("deflateEnd on stream object failed (%d)"), ret); close_loose_object(fd, tmp_file.buf); - if (freshen_packed_object(oid) || freshen_loose_object(oid)) { + if (freshen_packed_object(oid) || freshen_loose_object(oid, 1)) { unlink_or_warn(tmp_file.buf); goto cleanup; } @@ -2221,7 +2356,7 @@ int write_object_file_flags(const void *buf, size_t len, */ write_object_file_prepare(the_hash_algo, buf, len, type, oid, hdr, &hdrlen); - if (freshen_packed_object(oid) || freshen_loose_object(oid)) + if (freshen_packed_object(oid) || freshen_loose_object(oid, 1)) return 0; return write_loose_object(oid, hdr, hdrlen, buf, len, 0, flags); } @@ -2242,7 +2377,7 @@ int write_object_file_literally(const void *buf, size_t len, if (!(flags & HASH_WRITE_OBJECT)) goto cleanup; - if (freshen_packed_object(oid) || freshen_loose_object(oid)) + if (freshen_packed_object(oid) || freshen_loose_object(oid, 1)) goto cleanup; status = write_loose_object(oid, header, hdrlen, buf, len, 0, 0); diff --git a/repo-settings.c b/repo-settings.c index 9f711c87753c1b..fb76ee4c69ca7e 100644 --- a/repo-settings.c +++ b/repo-settings.c @@ -4,6 +4,7 @@ #include "midx.h" #include "fsmonitor-ipc.h" #include "fsmonitor-settings.h" +#include "gvfs.h" static void repo_cfg_bool(struct repository *r, const char *key, int *dest, int def) @@ -94,6 +95,13 @@ void prepare_repo_settings(struct repository *r) r->settings.pack_use_bitmap_boundary_traversal); repo_cfg_bool(r, "core.usereplacerefs", &r->settings.read_replace_refs, 1); + /* + * For historical compatibility reasons, enable index.skipHash based + * on a bit in core.gvfs. + */ + if (gvfs_config_is_set(GVFS_SKIP_SHA_ON_INDEX)) + r->settings.index_skip_hash = 1; + /* * The GIT_TEST_MULTI_PACK_INDEX variable is special in that * either it *or* the config sets diff --git a/t/t0000-basic.sh b/t/t0000-basic.sh index 6e300be2ac55af..69143159331ddd 100755 --- a/t/t0000-basic.sh +++ b/t/t0000-basic.sh @@ -1106,6 +1106,11 @@ test_expect_success 'writing this tree with --missing-ok' ' git write-tree --missing-ok ' +test_expect_success 'writing this tree with missing ok config value' ' + git config core.gvfs 4 && + git write-tree +' + ################################################################ test_expect_success 'git read-tree followed by write-tree should be idempotent' ' diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh index e190c44fe818b9..afd77331040403 100755 --- a/t/t0021-conversion.sh +++ b/t/t0021-conversion.sh @@ -335,6 +335,43 @@ test_expect_success "filter: smudge empty file" ' test_cmp expected filtered-empty-in-repo ' +test_expect_success "filter: clean filters blocked when under GVFS" ' + test_config filter.empty-in-repo.clean "cat >/dev/null" && + test_config filter.empty-in-repo.smudge "echo smudged && cat" && + test_config core.gvfs 64 && + + echo dead data walking >empty-in-repo && + test_must_fail git add empty-in-repo +' + +test_expect_success "filter: smudge filters blocked when under GVFS" ' + test_config filter.empty-in-repo.clean "cat >/dev/null" && + test_config filter.empty-in-repo.smudge "echo smudged && cat" && + test_config core.gvfs 64 && + + test_must_fail git checkout +' + +test_expect_success "ident blocked on add when under GVFS" ' + test_config core.gvfs 64 && + test_config core.autocrlf false && + + echo "*.i ident" >.gitattributes && + echo "\$Id\$" > ident.i && + + test_must_fail git add ident.i +' + +test_expect_success "ident blocked when under GVFS" ' + git add ident.i && + + git commit -m "added ident.i" && + test_config core.gvfs 64 && + rm ident.i && + + test_must_fail git checkout -- ident.i +' + test_expect_success 'disable filter with empty override' ' test_config_global filter.disable.smudge false && test_config_global filter.disable.clean false && diff --git a/t/t0027-auto-crlf.sh b/t/t0027-auto-crlf.sh index 2f57c8669cb5af..c42bf106e6f4ea 100755 --- a/t/t0027-auto-crlf.sh +++ b/t/t0027-auto-crlf.sh @@ -344,6 +344,18 @@ checkout_files () { " } +test_expect_success 'crlf conversions blocked when under GVFS' ' + git checkout -b gvfs && + test_commit initial && + rm initial.t && + test_config core.gvfs 64 && + test_config core.autocrlf true && + test_must_fail git read-tree --reset -u HEAD && + + git config core.autocrlf false && + git read-tree --reset -u HEAD +' + # Test control characters # NUL SOH CR EOF==^Z test_expect_success 'ls-files --eol -o Text/Binary' ' diff --git a/t/t0410/read-object b/t/t0410/read-object new file mode 100755 index 00000000000000..02c799837f4057 --- /dev/null +++ b/t/t0410/read-object @@ -0,0 +1,118 @@ +#!/usr/bin/perl +# +# Example implementation for the Git read-object protocol version 1 +# See Documentation/technical/read-object-protocol.txt +# +# Allows you to test the ability for blobs to be pulled from a host git repo +# "on demand." Called when git needs a blob it couldn't find locally due to +# a lazy clone that only cloned the commits and trees. +# +# A lazy clone can be simulated via the following commands from the host repo +# you wish to create a lazy clone of: +# +# cd /host_repo +# git rev-parse HEAD +# git init /guest_repo +# git cat-file --batch-check --batch-all-objects | grep -v 'blob' | +# cut -d' ' -f1 | git pack-objects /guest_repo/.git/objects/pack/noblobs +# cd /guest_repo +# git config core.virtualizeobjects true +# git reset --hard +# +# Please note, this sample is a minimal skeleton. No proper error handling +# was implemented. +# + +use strict; +use warnings; + +# +# Point $DIR to the folder where your host git repo is located so we can pull +# missing objects from it +# +my $DIR = "../.git/"; + +sub packet_bin_read { + my $buffer; + my $bytes_read = read STDIN, $buffer, 4; + if ( $bytes_read == 0 ) { + + # EOF - Git stopped talking to us! + exit(); + } + elsif ( $bytes_read != 4 ) { + die "invalid packet: '$buffer'"; + } + my $pkt_size = hex($buffer); + if ( $pkt_size == 0 ) { + return ( 1, "" ); + } + elsif ( $pkt_size > 4 ) { + my $content_size = $pkt_size - 4; + $bytes_read = read STDIN, $buffer, $content_size; + if ( $bytes_read != $content_size ) { + die "invalid packet ($content_size bytes expected; $bytes_read bytes read)"; + } + return ( 0, $buffer ); + } + else { + die "invalid packet size: $pkt_size"; + } +} + +sub packet_txt_read { + my ( $res, $buf ) = packet_bin_read(); + unless ( $buf =~ s/\n$// ) { + die "A non-binary line MUST be terminated by an LF."; + } + return ( $res, $buf ); +} + +sub packet_bin_write { + my $buf = shift; + print STDOUT sprintf( "%04x", length($buf) + 4 ); + print STDOUT $buf; + STDOUT->flush(); +} + +sub packet_txt_write { + packet_bin_write( $_[0] . "\n" ); +} + +sub packet_flush { + print STDOUT sprintf( "%04x", 0 ); + STDOUT->flush(); +} + +( packet_txt_read() eq ( 0, "git-read-object-client" ) ) || die "bad initialize"; +( packet_txt_read() eq ( 0, "version=1" ) ) || die "bad version"; +( packet_bin_read() eq ( 1, "" ) ) || die "bad version end"; + +packet_txt_write("git-read-object-server"); +packet_txt_write("version=1"); +packet_flush(); + +( packet_txt_read() eq ( 0, "capability=get" ) ) || die "bad capability"; +( packet_bin_read() eq ( 1, "" ) ) || die "bad capability end"; + +packet_txt_write("capability=get"); +packet_flush(); + +while (1) { + my ($command) = packet_txt_read() =~ /^command=([^=]+)$/; + + if ( $command eq "get" ) { + my ($sha1) = packet_txt_read() =~ /^sha1=([0-9a-f]{40,64})$/; + packet_bin_read(); + + system ('git --git-dir="' . $DIR . '" cat-file blob ' . $sha1 . ' | git -c core.virtualizeobjects=false hash-object -w --stdin >/dev/null 2>&1'); + packet_txt_write(($?) ? "status=error" : "status=success"); + packet_flush(); + + open my $log, '>>.git/read-object-hook.log'; + print $log "Read object $sha1, exit code $?\n"; + close $log; + } else { + die "bad command '$command'"; + } +} diff --git a/t/t0411-read-object.sh b/t/t0411-read-object.sh new file mode 100755 index 00000000000000..0cee1963cf091e --- /dev/null +++ b/t/t0411-read-object.sh @@ -0,0 +1,37 @@ +#!/bin/sh + +test_description='tests for long running read-object process' + +. ./test-lib.sh + +test_expect_success 'setup host repo with a root commit' ' + test_commit zero && + hash1=$(git ls-tree HEAD | grep zero.t | cut -f1 | cut -d\ -f3) +' + +test_expect_success 'blobs can be retrieved from the host repo' ' + git init guest-repo && + (cd guest-repo && + mkdir -p .git/hooks && + sed "1s|/usr/bin/perl|$PERL_PATH|" \ + <$TEST_DIRECTORY/t0410/read-object \ + >.git/hooks/read-object && + chmod +x .git/hooks/read-object && + git config core.virtualizeobjects true && + git cat-file blob "$hash1") +' + +test_expect_success 'invalid blobs generate errors' ' + (cd guest-repo && + test_must_fail git cat-file blob "invalid") +' + +test_expect_success 'read-object-hook is bypassed when writing objects' ' + (cd guest-repo && + echo hello >hello.txt && + git add hello.txt && + hash="$(git rev-parse --verify :hello.txt)" && + ! grep "$hash" .git/read-object-hook.log) +' + +test_done diff --git a/t/t1016-read-tree-skip-sha-on-read.sh b/t/t1016-read-tree-skip-sha-on-read.sh new file mode 100755 index 00000000000000..5b76a80a0020dc --- /dev/null +++ b/t/t1016-read-tree-skip-sha-on-read.sh @@ -0,0 +1,22 @@ +#!/bin/sh + +test_description='check that read-tree works with core.gvfs config value' + +. ./test-lib.sh +. "$TEST_DIRECTORY"/lib-read-tree.sh + +test_expect_success setup ' + echo one >a && + git add a && + git commit -m initial +' +test_expect_success 'read-tree without core.gvsf' ' + read_tree_u_must_succeed -m -u HEAD +' + +test_expect_success 'read-tree with core.gvfs set to 1' ' + git config core.gvfs 1 && + read_tree_u_must_succeed -m -u HEAD +' + +test_done diff --git a/t/t1090-sparse-checkout-scope.sh b/t/t1090-sparse-checkout-scope.sh index 529844e2862c74..effa20aab7bea7 100755 --- a/t/t1090-sparse-checkout-scope.sh +++ b/t/t1090-sparse-checkout-scope.sh @@ -106,6 +106,26 @@ test_expect_success 'in partial clone, sparse checkout only fetches needed blobs test_cmp expect actual ' +test_expect_success 'checkout does not delete items outside the sparse checkout file' ' + # The "sparse.expectfilesoutsideofpatterns" config will prevent the + # SKIP_WORKTREE flag from being dropped on files present on-disk. + test_config sparse.expectfilesoutsideofpatterns true && + + test_config core.gvfs 8 && + git checkout -b outside && + echo "new file1" >d && + git add --sparse d && + git commit -m "branch initial" && + echo "new file1" >e && + git add --sparse e && + git commit -m "skipped worktree" && + git update-index --skip-worktree e && + echo "/d" >.git/info/sparse-checkout && + git checkout HEAD^ && + test_path_is_file d && + test_path_is_file e +' + test_expect_success MINGW 'no unnecessary opendir() with fscache' ' git clone . fscache-test && ( diff --git a/t/t5584-vfs.sh b/t/t5584-vfs.sh new file mode 100755 index 00000000000000..8a703cbb640387 --- /dev/null +++ b/t/t5584-vfs.sh @@ -0,0 +1,24 @@ +#!/bin/sh + +test_description='fetch using the flag to skip reachability and upload pack' + +. ./test-lib.sh + + +test_expect_success setup ' + echo inital >a && + git add a && + git commit -m initial && + git clone . one +' + +test_expect_success "fetch test" ' + cd one && + git config core.gvfs 16 && + rm -rf .git/objects/* && + git -C .. cat-file commit HEAD | git hash-object -w --stdin -t commit && + git fetch && + test_must_fail git rev-parse --verify HEAD^{tree} +' + +test_done \ No newline at end of file diff --git a/t/t7108-reset-stdin.sh b/t/t7108-reset-stdin.sh index b7cbcbf869296c..db5483b8f10052 100755 --- a/t/t7108-reset-stdin.sh +++ b/t/t7108-reset-stdin.sh @@ -29,4 +29,13 @@ test_expect_success '--stdin requires --mixed' ' git reset --mixed --stdin list && + git reset --stdin ce_flags & CE_CONFLICTED) && verify_uptodate(old, o)) return -1; + + /* + * When marking entries to remove from the index and the working + * directory this option will take into account what the + * skip-worktree bit was set to so that if the entry has the + * skip-worktree bit set it will not be removed from the working + * directory. This will allow virtualized working directories to + * detect the change to HEAD and use the new commit tree to show + * the files that are in the working directory. + * + * old is the cache_entry that will have the skip-worktree bit set + * which will need to be preserved when the CE_REMOVE entry is added + */ + if (gvfs_config_is_set(GVFS_NO_DELETE_OUTSIDE_SPARSECHECKOUT) && + old && + old->ce_flags & CE_SKIP_WORKTREE) { + add_entry(o, old, CE_REMOVE, 0); + invalidate_ce_path(old, o); + return 1; + } + add_entry(o, ce, CE_REMOVE, 0); invalidate_ce_path(ce, o); return 1;