From 112fd81ed87523c2d15180ada40e62aaed6a7f8a Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 5 Aug 2019 14:46:50 -0400 Subject: [PATCH 1/4] sparse-checkout: basics of builtin Implements init, add, and list. Save 'remove' for later. Signed-off-by: Derrick Stolee --- .gitignore | 1 + Documentation/git-sparse-checkout.txt | 120 +++++++++++ Makefile | 2 + builtin.h | 1 + builtin/sparse-checkout.c | 300 ++++++++++++++++++++++++++ dir.c | 124 +++++++++++ dir.h | 30 +++ git.c | 1 + sparse-checkout.c | 37 ++++ sparse-checkout.h | 43 ++++ t/t1091-sparse-checkout-builtin.sh | 128 +++++++++++ 11 files changed, 787 insertions(+) create mode 100644 Documentation/git-sparse-checkout.txt create mode 100644 builtin/sparse-checkout.c create mode 100644 sparse-checkout.c create mode 100644 sparse-checkout.h create mode 100755 t/t1091-sparse-checkout-builtin.sh diff --git a/.gitignore b/.gitignore index 2374f77a1aae58..079e694710b79c 100644 --- a/.gitignore +++ b/.gitignore @@ -159,6 +159,7 @@ /git-show-branch /git-show-index /git-show-ref +/git-sparse-checkout /git-stage /git-stash /git-status diff --git a/Documentation/git-sparse-checkout.txt b/Documentation/git-sparse-checkout.txt new file mode 100644 index 00000000000000..45da29a5aac063 --- /dev/null +++ b/Documentation/git-sparse-checkout.txt @@ -0,0 +1,120 @@ +git-sparse-checkout(1) +======================= + +NAME +---- +git-sparse-checkout - Initialize and modify the sparse-checkout +configuration, which reduces the checkout to a set of directories +given by a list of prefixes. + + +SYNOPSIS +-------- +[verse] +'git sparse-checkout [init|add|list]' + + +DESCRIPTION +----------- + +Initialize and modify the sparse-checkout configuration, which reduces +the checkout to a set of directories given by a list of prefixes. + + +COMMANDS +-------- +'init':: + Enable the `core.sparseCheckout` setting and clear all folders not + included by the sparse-checkout file. Typically, no sparse-checkout + file will exist when this command is run, so all directories in the + working directory will be removed. + +'add':: + Given a list of paths over stdin, add those paths to the + sparse-checkout file. Refresh the index and working directory to + place the necessary files on disk. + +'list':: + Provide a list of the contents in the sparse-checkout file. + + +SPARSE CHECKOUT +---------------- + +The sparse-checkout feature provides simple way to reduce the "cone" of files +in the working directory which are populated by Git. The sparse-checkout file +specifies a list of directories from the working directory root, and the list +of paths included are as follows: + +1. Any path that is contained in a folder listed in the sparse-checkout file. + For example, if `A/B/C` is in the sparse-checkout file, then `A/B/C/D/e.txt` + will exist in the working directory. + +2. Any path whose immediate parent folder is an ancestor of a folder listed in + the sparse-checkout file. For example, all files in the root directory are + included -- even if the sparse-checkout file is empty. As another example, + if `A/B/C` is in the sparse-checkout file, then `A/foo.txt` and `A/B/bar.c` + would be included. Note that `A/F/xyz.h` would not be included, as its + immediate parent (`A/F`) is not a prefix of `A/B/C`. + +Note that the pattern matching in the sparse-checkout feature is very restricted, +unlike the sparse-checkout feature. The sparse-checkout and sparse-checkout +features both use the skip-worktree bits in the index file to interact with +other features in Git, but otherwise are incompatible. Creating a sparse-checkout +file to include files according to the rules above is difficult, and the +pattern matching required by the sparse-checkout feature leads to quadratic +growth: for N patterns and M index entries, we must check O(N * M) patterns. + +Conversely, the sparse-checkout feature does not allow negative patterns or +file-name based patterns. If you want to exclude all files ending in ".exe" +you could include the line `!*.exe` in the sparse-checkout file. This is not +available in the sparse-checkout feature. + +To use the sparse-checkout feature, you must enable the `core.partialCheckout` +config setting. This setting will override the `core.sparseCheckout` setting, +so any values in the sparse-checkout file will be ignored. + +To initialize an existing repo to use the sparse-checkout feature, run +`git sparse-checkout init`. This will enable `core.partialCheckout`, remove +all directories in the root of the working directory, and then update the +working directory to contain the folders that may already exist in the +sparse-checkout file. In the usual case, the sparse-checkout file will be +empty and you will only see files in the working directory root. + +To add folders to the sparse-checkout file, run the `add` subcommand, and +provide the list over standard-in: + +``` +$git sparse-checkout add +A/B/C +Docs +tests +^D +``` + +Since these folders do not exist in your working directory, you can use +`git ls-tree HEAD -- ` to help discover folders that exist in your +repo. + +After adding the folders to the sparse-checkout file, Git will update the index +and run `git reset --hard` to place the files on disk. Due to the use of +`git reset --hard`, the command will halt with an error before doing any work +if you do not have a clean `git status`. + +If you wish to reduce your working directory, you can use the +`git sparse-checkout remove` subcommand. It takes a list of folders from +standard in, removes them from the sparse-checkout file and deletes them from +your working directory, then runs `git reset --hard` to ensure the index is +up to date. + +To check which folders are included in the sparse-checkout file, run the +`git sparse-checkout list` subcommand. + +SEE ALSO +-------- + +linkgit:git-read-tree[1] + +GIT +--- +Part of the linkgit:git[1] suite diff --git a/Makefile b/Makefile index 8a7e2353520ddd..e34cf5569714c2 100644 --- a/Makefile +++ b/Makefile @@ -984,6 +984,7 @@ LIB_OBJS += sha1-name.o LIB_OBJS += shallow.o LIB_OBJS += sideband.o LIB_OBJS += sigchain.o +LIB_OBJS += sparse-checkout.o LIB_OBJS += split-index.o LIB_OBJS += strbuf.o LIB_OBJS += streaming.o @@ -1127,6 +1128,7 @@ BUILTIN_OBJS += builtin/shortlog.o BUILTIN_OBJS += builtin/show-branch.o BUILTIN_OBJS += builtin/show-index.o BUILTIN_OBJS += builtin/show-ref.o +BUILTIN_OBJS += builtin/sparse-checkout.o BUILTIN_OBJS += builtin/stash.o BUILTIN_OBJS += builtin/stripspace.o BUILTIN_OBJS += builtin/submodule--helper.o diff --git a/builtin.h b/builtin.h index ec7e0954c4c8a1..d517068faaafa9 100644 --- a/builtin.h +++ b/builtin.h @@ -223,6 +223,7 @@ int cmd_shortlog(int argc, const char **argv, const char *prefix); int cmd_show(int argc, const char **argv, const char *prefix); int cmd_show_branch(int argc, const char **argv, const char *prefix); int cmd_show_index(int argc, const char **argv, const char *prefix); +int cmd_sparse_checkout(int argc, const char **argv, const char *prefix); int cmd_status(int argc, const char **argv, const char *prefix); int cmd_stash(int argc, const char **argv, const char *prefix); int cmd_stripspace(int argc, const char **argv, const char *prefix); diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c new file mode 100644 index 00000000000000..5def8cca282a69 --- /dev/null +++ b/builtin/sparse-checkout.c @@ -0,0 +1,300 @@ +#include "builtin.h" +#include "config.h" +#include "dir.h" +#include "parse-options.h" +#include "pathspec.h" +#include "repository.h" +#include "run-command.h" +#include "sparse-checkout.h" +#include "strbuf.h" +#include "string-list.h" + +static char const * const builtin_sparse_checkout_usage[] = { + N_("git sparse-checkout [init|add|list]"), + NULL +}; + +struct opts_sparse_checkout { + const char *subcommand; + int read_stdin; +} opts; + +static char *get_sparse_checkout_filename(void) +{ + return git_pathdup("info/sparse-checkout"); +} + +static int check_clean_status(void) +{ +/* + struct strbuf sb = STRBUF_INIT; + + if (repo_index_has_changes(the_repository, NULL, &sb)) { + error(_("You have local changes that could be overwritten by a reset:\n %s"), + sb.buf); + return 1; + } +*/ + return 0; +} + +static int sc_read_tree(void) +{ + struct argv_array argv = ARGV_ARRAY_INIT; + int result = 0; + argv_array_pushl(&argv, "read-tree", "-m", "-u", "HEAD", NULL); + + if (run_command_v_opt(argv.argv, RUN_GIT_CMD)) { + error(_("failed to update index with new sparse-checkout paths")); + result = 1; + } + + argv_array_clear(&argv); + return result; +} + +static int sc_enable_config(void) +{ + struct argv_array argv = ARGV_ARRAY_INIT; + int result = 0; + argv_array_pushl(&argv, "config", "--add", "core.sparseCheckout", "true", NULL); + + if (run_command_v_opt(argv.argv, RUN_GIT_CMD)) { + error(_("failed to enable core.sparseCheckout")); + result = 1; + } + + argv_array_clear(&argv); + return result; +} + +static int delete_directory(const struct object_id *oid, struct strbuf *base, + const char *pathname, unsigned mode, int stage, void *context) +{ + struct strbuf dirname = STRBUF_INIT; + struct stat sb; + + strbuf_addstr(&dirname, the_repository->worktree); + strbuf_addch(&dirname, '/'); + strbuf_addstr(&dirname, pathname); + + if (stat(dirname.buf, &sb) || !(sb.st_mode & S_IFDIR)) + return 0; + + if (remove_dir_recursively(&dirname, 0)) + warning(_("failed to remove directory '%s'"), + dirname.buf); + + strbuf_release(&dirname); + return 0; +} + +static int sparse_checkout_init(int argc, const char **argv) +{ + struct tree *t; + struct object_id oid; + static struct pathspec pathspec; + struct exclude_list el; + char *sparse_filename; + FILE *fp; + + if (check_clean_status()) + return 1; + + if (sc_enable_config()) + return 1; + + sparse_filename = get_sparse_checkout_filename(); + + if (!get_sparse_checkout_data(sparse_filename, &el)) { + /* Check for existing data */ + goto reset; + } + + /* initial mode: all blobs at root */ + fp = fopen(sparse_filename, "w"); + fprintf(fp, "/*\n!/*/*\n"); + fclose(fp); + + /* remove all directories in the root, if tracked by Git */ + if (get_oid("HEAD", &oid)) { + /* assume we are in a fresh repo */ + fprintf(stderr, "NO HEAD FOUND!\n"); + free(sparse_filename); + return 0; + } + + t = parse_tree_indirect(&oid); + + parse_pathspec(&pathspec, PATHSPEC_ALL_MAGIC & + ~(PATHSPEC_FROMTOP | PATHSPEC_LITERAL), + PATHSPEC_PREFER_CWD, + "", NULL); + + if (read_tree_recursive(the_repository, t, "", 0, 0, &pathspec, + delete_directory, NULL)) + return 1; + +reset: + free(sparse_filename); + return sc_read_tree(); +} + +static int sparse_checkout_add(int argc, const char **argv) +{ + struct strbuf line = STRBUF_INIT; + FILE *fp; + struct exclude_list el; + char *sparse_filename; + struct hashmap_iter iter; + struct exclude_entry *entry; + struct string_list sl = STRING_LIST_INIT_DUP; + int i; + + if (check_clean_status()) + return 1; + + memset(&el, 0, sizeof(el)); + sparse_filename = get_sparse_checkout_filename(); + get_sparse_checkout_data(sparse_filename, &el); + + if (!excludes_are_strict(&el)) + die(_("The sparse-checkout file has incompatible patterns. It may have been edited manually.")); + + strbuf_init(&line, PATH_MAX); + + for (;;) { + if (strbuf_getline(&line, stdin)) { + if (feof(stdin)) + break; + if (!ferror(stdin)) + die("BUG: fgets returned NULL, not EOF, not error!"); + if (errno != EINTR) + die_errno("fgets"); + clearerr(stdin); + continue; + } + + if (line.len) + insert_recursive_pattern(&el, &line); + } + + fp = fopen(sparse_filename, "w"); + + hashmap_iter_init(&el.parent_hashmap, &iter); + while ((entry = hashmap_iter_next(&iter))) { + char *pattern = xstrdup(entry->pattern); + char *converted = pattern; + if (pattern[0] == '/') + converted++; + if (pattern[entry->patternlen - 1] == '/') + pattern[entry->patternlen - 1] = 0; + string_list_insert(&sl, converted); + free(pattern); + } + + string_list_sort(&sl); + string_list_remove_duplicates(&sl, 0); + + for (i = 0; i < sl.nr; i++) { + char *pattern = sl.items[i].string; + + if (!strcmp(pattern, "")) + fprintf(fp, "/*\n!/*/*\n"); + else + fprintf(fp, "/%s/*\n!/%s/*/*\n", pattern, pattern); + } + + string_list_clear(&sl, 0); + + hashmap_iter_init(&el.recursive_hashmap, &iter); + while ((entry = hashmap_iter_next(&iter))) { + char *pattern = xstrdup(entry->pattern); + char *converted = pattern; + if (pattern[0] == '/') + converted++; + if (pattern[entry->patternlen - 1] == '/') + pattern[entry->patternlen - 1] = 0; + string_list_insert(&sl, converted); + free(pattern); + } + + string_list_sort(&sl); + string_list_remove_duplicates(&sl, 0); + + for (i = 0; i < sl.nr; i++) { + char *pattern = sl.items[i].string; + fprintf(fp, "/%s/*\n", pattern); + } + + fclose(fp); + + return sc_read_tree(); +} + +static int sparse_checkout_list(int argc, const char **argv) +{ + struct exclude_list el; + char *sparse_filename; + int i; + + memset(&el, 0, sizeof(el)); + + sparse_filename = get_sparse_checkout_filename(); + get_sparse_checkout_data(sparse_filename, &el); + free(sparse_filename); + + if (!el.use_restricted_patterns) + die(_("your sparse-checkout file does not use restricted patterns")); + + for (i = 0; i < el.nr; i++) { + struct exclude *x = el.excludes[i]; + char *truncate; + + if (x->flags & EXC_FLAG_NEGATIVE) + continue; + + if (x->patternlen < 2) + die(_("your sparse-checkout file contains an empty pattern")); + + truncate = xstrdup(x->pattern); + truncate[x->patternlen - 1] = 0; + printf("%s", truncate); + + if (is_recursive_pattern(&el, truncate)) + printf("*"); + printf("\n"); + free(truncate); + } + + return 0; +} + +int cmd_sparse_checkout(int argc, const char **argv, const char *prefix) +{ + static struct option builtin_sparse_checkout_options[] = { + OPT_END(), + }; + + if (argc == 2 && !strcmp(argv[1], "-h")) + usage_with_options(builtin_sparse_checkout_usage, + builtin_sparse_checkout_options); + + git_config(git_default_config, NULL); + argc = parse_options(argc, argv, prefix, + builtin_sparse_checkout_options, + builtin_sparse_checkout_usage, + PARSE_OPT_STOP_AT_NON_OPTION); + + if (argc > 0) { + if (!strcmp(argv[0], "init")) + return sparse_checkout_init(argc, argv); + if (!strcmp(argv[0], "add")) + return sparse_checkout_add(argc, argv); + if (!strcmp(argv[0], "list")) + return sparse_checkout_list(argc, argv); + } + + usage_with_options(builtin_sparse_checkout_usage, + builtin_sparse_checkout_options); +} diff --git a/dir.c b/dir.c index ba4a51c296efca..69983eaf1cc64d 100644 --- a/dir.c +++ b/dir.c @@ -599,6 +599,124 @@ void parse_exclude_pattern(const char **pattern, *patternlen = len; } +static int el_hashmap_cmp(const void *unused_cmp_data, + const void *a, const void *b, const void *key) +{ + const struct exclude_entry *ee1 = a; + const struct exclude_entry *ee2 = b; + + return strncmp(ee1->pattern, ee2->pattern, ee1->patternlen); +} + +void insert_recursive_pattern(struct exclude_list *el, struct strbuf *path) +{ + struct exclude_entry *e = xmalloc(sizeof(struct exclude_entry)); + e->patternlen = path->len; + e->pattern = strbuf_detach(path, NULL); + hashmap_entry_init(e, memhash(e->pattern, e->patternlen)); + + hashmap_add(&el->recursive_hashmap, e); + + while (e->patternlen) { + char *slash = strrchr(e->pattern, '/'); + char *oldpattern = e->pattern; + size_t newlen; + + if (!slash) + break; + + newlen = slash - e->pattern; + e = xmalloc(sizeof(struct exclude_entry)); + e->patternlen = newlen; + e->pattern = xstrndup(oldpattern, newlen); + hashmap_entry_init(e, memhash(e->pattern, e->patternlen)); + + if (!hashmap_get(&el->parent_hashmap, e, NULL)) + hashmap_add(&el->parent_hashmap, e); + } +} + +static void add_exclude_to_hashsets(struct exclude_list *el, struct exclude *x) +{ + struct exclude_entry *e; + char *truncated; + char *data = NULL; + + if (!el->use_restricted_patterns) + return; + + if (x->patternlen >= 4 && + !strcmp(x->pattern + x->patternlen - 4, "/*/*")) { + if (!(x->flags & EXC_FLAG_NEGATIVE)) { + /* Not a restricted pattern. */ + el->use_restricted_patterns = 0; + warning(_("unrecognized pattern: '%s'"), x->pattern); + goto clear_hashmaps; + } + + truncated = xstrdup(x->pattern); + truncated[x->patternlen - 3] = 0; + + e = xmalloc(sizeof(struct exclude_entry)); + e->pattern = truncated; + e->patternlen = x->patternlen - 3; + hashmap_entry_init(e, memhash(e->pattern, e->patternlen)); + + if (!hashmap_get(&el->recursive_hashmap, e, NULL)) { + /* We did not see the "parent" included */ + warning(_("unrecognized negative pattern: '%s'"), x->pattern); + free(truncated); + goto clear_hashmaps; + } + + hashmap_add(&el->parent_hashmap, e); + hashmap_remove(&el->recursive_hashmap, e, &data); + free(data); + return; + } + + if (x->patternlen >= 2 && + !strcmp(x->pattern + x->patternlen - 2, "/*")) { + if (x->flags & EXC_FLAG_NEGATIVE) + goto clear_hashmaps; + + e = xmalloc(sizeof(struct exclude_entry)); + + truncated = xstrdup(x->pattern); + truncated[x->patternlen - 1] = 0; + e->pattern = truncated; + e->patternlen = x->patternlen - 1; + hashmap_entry_init(e, memhash(e->pattern, e->patternlen)); + + hashmap_add(&el->recursive_hashmap, e); + + if (hashmap_get(&el->parent_hashmap, e, NULL)) { + /* we already included this at the parent level */ + warning(_("your sparse-checkout file may have issues: pattern '%s' is repeated"), + x->pattern); + hashmap_remove(&el->parent_hashmap, e, &data); + free(data); + } + return; + } + +clear_hashmaps: + hashmap_free(&el->parent_hashmap, 1); + hashmap_free(&el->recursive_hashmap, 1); + el->use_restricted_patterns = 0; +} + +int is_recursive_pattern(struct exclude_list *el, char *path) +{ + struct exclude_entry e; + + e.pattern = path; + e.patternlen = strlen(path); + hashmap_entry_init(&e, memhash(e.pattern, e.patternlen)); + + return !!hashmap_get(&el->recursive_hashmap, &e, NULL); +} + void add_exclude(const char *string, const char *base, int baselen, struct exclude_list *el, int srcpos) { @@ -623,6 +741,8 @@ void add_exclude(const char *string, const char *base, ALLOC_GROW(el->excludes, el->nr + 1, el->alloc); el->excludes[el->nr++] = x; x->el = el; + + add_exclude_to_hashsets(el, x); } static int read_skip_worktree_file_from_index(const struct index_state *istate, @@ -848,6 +968,10 @@ static int add_excludes_from_buffer(char *buf, size_t size, int i, lineno = 1; char *entry; + el->use_restricted_patterns = 1; + hashmap_init(&el->recursive_hashmap, el_hashmap_cmp, NULL, 0); + hashmap_init(&el->parent_hashmap, el_hashmap_cmp, NULL, 0); + el->filebuf = buf; if (skip_utf8_bom(&buf, size)) diff --git a/dir.h b/dir.h index 680079bbe3241f..a86fccb2310f26 100644 --- a/dir.h +++ b/dir.h @@ -5,6 +5,7 @@ #include "cache.h" #include "strbuf.h" +#include "hashmap.h" struct dir_entry { unsigned int len; @@ -37,6 +38,12 @@ struct exclude { int srcpos; }; +struct exclude_entry { + struct hashmap_entry ent; + char *pattern; + size_t patternlen; +}; + /* * Each excludes file will be parsed into a fresh exclude_list which * is appended to the relevant exclude_list_group (either EXC_DIRS or @@ -55,6 +62,25 @@ struct exclude_list { const char *src; struct exclude **excludes; + + /* + * While scanning the excludes, we attempt to match the patterns + * with a more restricted set that allows us to use hashsets for + * matching logic, which is faster than the linear lookup in the + * excludes array above. If non-zero, that check succeeded. + */ + unsigned use_restricted_patterns; + + /* + * Stores paths where everything starting with those paths + * is included. + */ + struct hashmap recursive_hashmap; + + /* + * Used to check single-level parents of blobs. + */ + struct hashmap parent_hashmap; }; /* @@ -380,4 +406,8 @@ void connect_work_tree_and_git_dir(const char *work_tree, void relocate_gitdir(const char *path, const char *old_git_dir, const char *new_git_dir); + +void insert_recursive_pattern(struct exclude_list *el, struct strbuf *path); +int is_recursive_pattern(struct exclude_list *el, char *path); + #endif diff --git a/git.c b/git.c index 1bf9c94550d9c2..eccdbf339dd84b 100644 --- a/git.c +++ b/git.c @@ -575,6 +575,7 @@ static struct cmd_struct commands[] = { { "show-branch", cmd_show_branch, RUN_SETUP }, { "show-index", cmd_show_index }, { "show-ref", cmd_show_ref, RUN_SETUP }, + { "sparse-checkout", cmd_sparse_checkout, RUN_SETUP | NEED_WORK_TREE }, { "stage", cmd_add, RUN_SETUP | NEED_WORK_TREE }, /* * NEEDSWORK: Until the builtin stash is thoroughly robust and no diff --git a/sparse-checkout.c b/sparse-checkout.c new file mode 100644 index 00000000000000..453f43a6092b01 --- /dev/null +++ b/sparse-checkout.c @@ -0,0 +1,37 @@ +#include "cache.h" +#include "config.h" +#include "dir.h" +#include "hashmap.h" +#include "repository.h" +#include "run-command.h" +#include "sparse-checkout.h" +#include "strbuf.h" + +static int core_sparse_checkout = -1; + +int use_sparse_checkout(struct repository *r) +{ + if (core_sparse_checkout >= 0) + return core_sparse_checkout; + + if (repo_config_get_bool(r, "core.sparsecheckout", &core_sparse_checkout)) + core_sparse_checkout = 0; + + return core_sparse_checkout; +} + +int get_sparse_checkout_data(char *sparse_filename, + struct exclude_list *el) +{ + el->use_restricted_patterns = 1; + + if (add_excludes_from_file_to_list(sparse_filename, "", 0, el, NULL) < 0) + return -1; + return 0; +} + +int excludes_are_strict(struct exclude_list *el) +{ + /* TODO: actually implement! */ + return 1; +} \ No newline at end of file diff --git a/sparse-checkout.h b/sparse-checkout.h new file mode 100644 index 00000000000000..94d777eb7a1864 --- /dev/null +++ b/sparse-checkout.h @@ -0,0 +1,43 @@ +#ifndef SPARSE_CHECKOUT_H +#define SPARSE_CHECKOUT_H + +struct exclude_list; +struct index_state; +struct repository; +struct strbuf; + +int use_sparse_checkout(struct repository *r); +int get_sparse_checkout_data(char *sparse_filename, + struct exclude_list *ed); + +/* + * Given an exclude_list, scan the list to discover if + * the patterns match the "fast cone" patterns. That is, + * we expect to see a set of patterns such as + * + * Type 1: + * + * /[*] + * !/[*]/[*] + * + * and Type 2: + * + * /[*] + * + * (Ignore brackets around asterisks. They exist to avoid + * build breaks.) + * + * The Type 1 pattern pairs say "I want all files in + * directory, and none of the subdirectories". The + * Type 2 pattern says "I want every file in this directory, + * recursively through the subdirectories". These patterns + * appear in an ordered list, and if is an ancestor + * of , then a Type 1 pattern for should appear + * before either pattern for . + * + * excludes_are_strict() returns 1 exactly when all patterns + * match those type above, including the order restriction. + */ +int excludes_are_strict(struct exclude_list *el); + +#endif diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh new file mode 100755 index 00000000000000..8f24864988e907 --- /dev/null +++ b/t/t1091-sparse-checkout-builtin.sh @@ -0,0 +1,128 @@ +#!/bin/sh + +test_description='sparse checkout builtin tests' + +. ./test-lib.sh + +test_expect_success 'setup' ' + echo "initial" >a && + mkdir folder1 folder2 deep && + mkdir deep/deeper1 deep/deeper2 && + mkdir deep/deeper1/deepest && + cp a folder1 && + cp a folder2 && + cp a deep && + cp a deep/deeper1 && + cp a deep/deeper2 && + cp a deep/deeper1/deepest && + git add . && + git commit -m "initial commit" && + mkdir test-output +' + +test_expect_success 'git sparse-checkout list (empty)' ' + git sparse-checkout list >test-output/list && + test_line_count = 0 test-output/list +' + +test_expect_success 'git sparse-checkout init' ' + git sparse-checkout init && + cat >test-output/expect <<-EOF && + /* + !/*/* + EOF + test_cmp test-output/expect .git/info/sparse-checkout && + git config --list >test-output/config && + test_i18ngrep "core.sparsecheckout=true" test-output/config && + ls >test-output/dir && + cat >test-output/expect <<-EOF && + a + test-output + EOF + test_cmp test-output/expect test-output/dir +' + +test_expect_success 'git sparse-checkout list after init' ' + git sparse-checkout list >test-output/actual && + echo "/" >test-output/expect && + test_cmp test-output/expect test-output/actual +' + +test_expect_success 'git sparse-checkout add' ' + git sparse-checkout add test-output/actual && + test_cmp test-output/expect test-output/actual && + git sparse-checkout add <<-EOF && + folder1 + deep/deeper1 + EOF + cat >test-output/expect <<-EOF && + /* + !/*/* + /deep/* + !/deep/*/* + /deep/deeper1/* + /folder1/* + EOF + test_cmp test-output/expect .git/info/sparse-checkout && + ls >test-output/dir && + cat >test-output/expect <<-EOF && + a + deep + folder1 + test-output + EOF + test_cmp test-output/expect test-output/dir && + ls deep >test-output/dir && + cat >test-output/expect <<-EOF && + a + deeper1 + EOF + test_cmp test-output/expect test-output/dir +' + +test_expect_success 'git sparse-checkout list after add' ' + git sparse-checkout list >test-output/actual && + cat >test-output/expect <<-EOF && + / + /deep/ + /deep/deeper1/* + /folder1/* + EOF + test_cmp test-output/expect test-output/actual +' + +test_expect_success 'git sparse-checkout add more' ' + git sparse-checkout add <<-EOF && + folder1 + deep/deeper2 + EOF + cat >test-output/expect <<-EOF && + /* + !/*/* + /deep/* + !/deep/*/* + /deep/deeper1/* + /deep/deeper2/* + /folder1/* + EOF + test_cmp test-output/expect .git/info/sparse-checkout && + ls >test-output/dir && + cat >test-output/expect <<-EOF && + a + deep + folder1 + test-output + EOF + test_cmp test-output/expect test-output/dir && + ls deep >test-output/dir && + cat >test-output/expect <<-EOF && + a + deeper1 + deeper2 + EOF + test_cmp test-output/expect test-output/dir +' + + +test_done \ No newline at end of file From 5013c283ae57cf7f53ae673fd31dc2c69c3f71b2 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 15 Aug 2019 11:22:01 -0400 Subject: [PATCH 2/4] sparse-checkout: fast restricted pattern matching Signed-off-by: Derrick Stolee --- dir.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 75 insertions(+), 12 deletions(-) diff --git a/dir.c b/dir.c index 69983eaf1cc64d..7e035bf76f1970 100644 --- a/dir.c +++ b/dir.c @@ -655,11 +655,11 @@ static void add_exclude_to_hashsets(struct exclude_list *el, struct exclude *x) } truncated = xstrdup(x->pattern); - truncated[x->patternlen - 3] = 0; + truncated[x->patternlen - 4] = 0; e = xmalloc(sizeof(struct exclude_entry)); e->pattern = truncated; - e->patternlen = x->patternlen - 3; + e->patternlen = x->patternlen - 4; hashmap_entry_init(e, memhash(e->pattern, e->patternlen)); if (!hashmap_get(&el->recursive_hashmap, e, NULL)) { @@ -683,9 +683,9 @@ static void add_exclude_to_hashsets(struct exclude_list *el, struct exclude *x) e = xmalloc(sizeof(struct exclude_entry)); truncated = xstrdup(x->pattern); - truncated[x->patternlen - 1] = 0; + truncated[x->patternlen - 2] = 0; e->pattern = truncated; - e->patternlen = x->patternlen - 1; + e->patternlen = x->patternlen - 2; hashmap_entry_init(e, memhash(e->pattern, e->patternlen)); hashmap_add(&el->recursive_hashmap, e); @@ -708,13 +708,21 @@ static void add_exclude_to_hashsets(struct exclude_list *el, struct exclude *x) int is_recursive_pattern(struct exclude_list *el, char *path) { + struct strbuf formatted_path = STRBUF_INIT; struct exclude_entry e; + int result; - e.pattern = path; - e.patternlen = strlen(path); + strbuf_addstr(&formatted_path, path); + if (formatted_path.len && formatted_path.buf[formatted_path.len - 1] == '/') + strbuf_setlen(&formatted_path, formatted_path.len - 1); + e.pattern = formatted_path.buf; + e.patternlen = formatted_path.len; hashmap_entry_init(&e, memhash(e.pattern, e.patternlen)); - return !!hashmap_get(&el->recursive_hashmap, &e, NULL); + result = !!hashmap_get(&el->recursive_hashmap, &e, NULL); + + strbuf_release(&formatted_path); + return result; } void add_exclude(const char *string, const char *base, @@ -1192,6 +1200,18 @@ static struct exclude *last_exclude_matching_from_list(const char *pathname, return exc; } +static int hashmap_contains_path(struct hashmap *map, + struct strbuf *pattern) +{ + struct exclude_entry e; + + /* Check straight mapping */ + e.pattern = pattern->buf; + e.patternlen = pattern->len; + hashmap_entry_init(&e, memhash(e.pattern, e.patternlen)); + return !!hashmap_get(map, &e, NULL); +} + /* * Scan the list and let the last match determine the fate. * Return 1 for exclude, 0 for include and -1 for undecided. @@ -1201,11 +1221,54 @@ int is_excluded_from_list(const char *pathname, struct exclude_list *el, struct index_state *istate) { struct exclude *exclude; - exclude = last_exclude_matching_from_list(pathname, pathlen, basename, - dtype, el, istate); - if (exclude) - return exclude->flags & EXC_FLAG_NEGATIVE ? 0 : 1; - return -1; /* undecided */ + struct strbuf parent_pathname = STRBUF_INIT; + int excluded = 0; + const char *slash_pos; + + if (!el->use_restricted_patterns) { + exclude = last_exclude_matching_from_list(pathname, pathlen, basename, + dtype, el, istate); + + if (exclude) + return exclude->flags & EXC_FLAG_NEGATIVE ? 0 : 1; + + return -1; /* undecided */ + } + + strbuf_addch(&parent_pathname, '/'); + strbuf_add(&parent_pathname, pathname, pathlen); + slash_pos = strrchr(parent_pathname.buf, '/'); + + if (slash_pos == parent_pathname.buf) { + /* include every file in root */ + excluded = 1; + goto done; + } + + strbuf_setlen(&parent_pathname, slash_pos - parent_pathname.buf); + + if (hashmap_contains_path(&el->parent_hashmap, &parent_pathname)) { + excluded = 1; + goto done; + } + + while (parent_pathname.len) { + if (hashmap_contains_path(&el->recursive_hashmap, + &parent_pathname)) { + excluded = -1; + goto done; + } + + slash_pos = strrchr(parent_pathname.buf, '/'); + if (slash_pos == parent_pathname.buf) + break; + + strbuf_setlen(&parent_pathname, slash_pos - parent_pathname.buf); + } + +done: + strbuf_release(&parent_pathname); + return excluded; } static struct exclude *last_exclude_matching_from_lists(struct dir_struct *dir, From bba19eeb55f76c4b4313cfb5af1cfe73a1996cb6 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 16 Aug 2019 13:45:09 -0400 Subject: [PATCH 3/4] fixup! sparse-checkout: basics of builtin --- builtin/sparse-checkout.c | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index 5def8cca282a69..9a4ac7f3f81592 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -24,20 +24,6 @@ static char *get_sparse_checkout_filename(void) return git_pathdup("info/sparse-checkout"); } -static int check_clean_status(void) -{ -/* - struct strbuf sb = STRBUF_INIT; - - if (repo_index_has_changes(the_repository, NULL, &sb)) { - error(_("You have local changes that could be overwritten by a reset:\n %s"), - sb.buf); - return 1; - } -*/ - return 0; -} - static int sc_read_tree(void) { struct argv_array argv = ARGV_ARRAY_INIT; @@ -98,9 +84,6 @@ static int sparse_checkout_init(int argc, const char **argv) char *sparse_filename; FILE *fp; - if (check_clean_status()) - return 1; - if (sc_enable_config()) return 1; @@ -151,9 +134,6 @@ static int sparse_checkout_add(int argc, const char **argv) struct string_list sl = STRING_LIST_INIT_DUP; int i; - if (check_clean_status()) - return 1; - memset(&el, 0, sizeof(el)); sparse_filename = get_sparse_checkout_filename(); get_sparse_checkout_data(sparse_filename, &el); From 83927a1619f4b6da3afd1aabe8080bd2875070ee Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 16 Aug 2019 14:36:05 -0400 Subject: [PATCH 4/4] sparse-checkout: create third config mode 'cone' Signed-off-by: Derrick Stolee --- Makefile | 1 - builtin/sparse-checkout.c | 21 +++++++-------- cache.h | 9 ++++++- config.c | 11 ++++++-- dir.c | 3 ++- environment.c | 2 +- sparse-checkout.c | 37 ------------------------- sparse-checkout.h | 43 ------------------------------ t/t1091-sparse-checkout-builtin.sh | 2 +- 9 files changed, 30 insertions(+), 99 deletions(-) delete mode 100644 sparse-checkout.c delete mode 100644 sparse-checkout.h diff --git a/Makefile b/Makefile index e34cf5569714c2..c373b575c9dab5 100644 --- a/Makefile +++ b/Makefile @@ -984,7 +984,6 @@ LIB_OBJS += sha1-name.o LIB_OBJS += shallow.o LIB_OBJS += sideband.o LIB_OBJS += sigchain.o -LIB_OBJS += sparse-checkout.o LIB_OBJS += split-index.o LIB_OBJS += strbuf.o LIB_OBJS += streaming.o diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index 9a4ac7f3f81592..5413e69be4e272 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -5,7 +5,6 @@ #include "pathspec.h" #include "repository.h" #include "run-command.h" -#include "sparse-checkout.h" #include "strbuf.h" #include "string-list.h" @@ -43,7 +42,7 @@ static int sc_enable_config(void) { struct argv_array argv = ARGV_ARRAY_INIT; int result = 0; - argv_array_pushl(&argv, "config", "--add", "core.sparseCheckout", "true", NULL); + argv_array_pushl(&argv, "config", "--add", "core.sparseCheckout", "cone", NULL); if (run_command_v_opt(argv.argv, RUN_GIT_CMD)) { error(_("failed to enable core.sparseCheckout")); @@ -80,7 +79,6 @@ static int sparse_checkout_init(int argc, const char **argv) struct tree *t; struct object_id oid; static struct pathspec pathspec; - struct exclude_list el; char *sparse_filename; FILE *fp; @@ -89,11 +87,6 @@ static int sparse_checkout_init(int argc, const char **argv) sparse_filename = get_sparse_checkout_filename(); - if (!get_sparse_checkout_data(sparse_filename, &el)) { - /* Check for existing data */ - goto reset; - } - /* initial mode: all blobs at root */ fp = fopen(sparse_filename, "w"); fprintf(fp, "/*\n!/*/*\n"); @@ -118,7 +111,6 @@ static int sparse_checkout_init(int argc, const char **argv) delete_directory, NULL)) return 1; -reset: free(sparse_filename); return sc_read_tree(); } @@ -136,9 +128,12 @@ static int sparse_checkout_add(int argc, const char **argv) memset(&el, 0, sizeof(el)); sparse_filename = get_sparse_checkout_filename(); - get_sparse_checkout_data(sparse_filename, &el); - if (!excludes_are_strict(&el)) + if (add_excludes_from_file_to_list(sparse_filename, "", 0, &el, NULL) < 0) + return 0; + free(sparse_filename); + + if (!el.use_restricted_patterns) die(_("The sparse-checkout file has incompatible patterns. It may have been edited manually.")); strbuf_init(&line, PATH_MAX); @@ -221,7 +216,9 @@ static int sparse_checkout_list(int argc, const char **argv) memset(&el, 0, sizeof(el)); sparse_filename = get_sparse_checkout_filename(); - get_sparse_checkout_data(sparse_filename, &el); + + if (add_excludes_from_file_to_list(sparse_filename, "", 0, &el, NULL) < 0) + return 0; free(sparse_filename); if (!el.use_restricted_patterns) diff --git a/cache.h b/cache.h index b4bb2e2c11adff..02b97d83bb2005 100644 --- a/cache.h +++ b/cache.h @@ -889,7 +889,14 @@ extern char *git_replace_ref_base; extern int fsync_object_files; extern int core_preload_index; -extern int core_apply_sparse_checkout; + +enum sparse_checkout_options { + SPARSE_CHECKOUT_NONE = 0, + SPARSE_CHECKOUT_FULL = 1, + SPARSE_CHECKOUT_CONE = 2 +}; + +extern enum sparse_checkout_options core_apply_sparse_checkout; extern int precomposed_unicode; extern int protect_hfs; extern int protect_ntfs; diff --git a/config.c b/config.c index 296a6d9cc4110b..173292baf36677 100644 --- a/config.c +++ b/config.c @@ -1325,8 +1325,15 @@ static int git_default_core_config(const char *var, const char *value, void *cb) } if (!strcmp(var, "core.sparsecheckout")) { - core_apply_sparse_checkout = git_config_bool(var, value); - return 0; + int result = git_parse_maybe_bool(value); + + if (result < 0) { + core_apply_sparse_checkout = SPARSE_CHECKOUT_NONE; + + if (!strcasecmp(value, "cone")) + core_apply_sparse_checkout = SPARSE_CHECKOUT_CONE; + } else + core_apply_sparse_checkout = result; } if (!strcmp(var, "core.precomposeunicode")) { diff --git a/dir.c b/dir.c index 7e035bf76f1970..6093076b0f0d1d 100644 --- a/dir.c +++ b/dir.c @@ -976,7 +976,8 @@ static int add_excludes_from_buffer(char *buf, size_t size, int i, lineno = 1; char *entry; - el->use_restricted_patterns = 1; +fprintf(stderr, "core_apply_sparse_checkout = %d\n", core_apply_sparse_checkout); + el->use_restricted_patterns = core_apply_sparse_checkout == SPARSE_CHECKOUT_CONE ? 1 : 0; hashmap_init(&el->recursive_hashmap, el_hashmap_cmp, NULL, 0); hashmap_init(&el->parent_hashmap, el_hashmap_cmp, NULL, 0); diff --git a/environment.c b/environment.c index 89af47cb850490..36267a4da388e4 100644 --- a/environment.c +++ b/environment.c @@ -68,7 +68,7 @@ enum push_default_type push_default = PUSH_DEFAULT_UNSPECIFIED; enum object_creation_mode object_creation_mode = OBJECT_CREATION_MODE; char *notes_ref_name; int grafts_replace_parents = 1; -int core_apply_sparse_checkout; +enum sparse_checkout_options core_apply_sparse_checkout; int merge_log_config = -1; int precomposed_unicode = -1; /* see probe_utf8_pathname_composition() */ unsigned long pack_size_limit_cfg; diff --git a/sparse-checkout.c b/sparse-checkout.c deleted file mode 100644 index 453f43a6092b01..00000000000000 --- a/sparse-checkout.c +++ /dev/null @@ -1,37 +0,0 @@ -#include "cache.h" -#include "config.h" -#include "dir.h" -#include "hashmap.h" -#include "repository.h" -#include "run-command.h" -#include "sparse-checkout.h" -#include "strbuf.h" - -static int core_sparse_checkout = -1; - -int use_sparse_checkout(struct repository *r) -{ - if (core_sparse_checkout >= 0) - return core_sparse_checkout; - - if (repo_config_get_bool(r, "core.sparsecheckout", &core_sparse_checkout)) - core_sparse_checkout = 0; - - return core_sparse_checkout; -} - -int get_sparse_checkout_data(char *sparse_filename, - struct exclude_list *el) -{ - el->use_restricted_patterns = 1; - - if (add_excludes_from_file_to_list(sparse_filename, "", 0, el, NULL) < 0) - return -1; - return 0; -} - -int excludes_are_strict(struct exclude_list *el) -{ - /* TODO: actually implement! */ - return 1; -} \ No newline at end of file diff --git a/sparse-checkout.h b/sparse-checkout.h deleted file mode 100644 index 94d777eb7a1864..00000000000000 --- a/sparse-checkout.h +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef SPARSE_CHECKOUT_H -#define SPARSE_CHECKOUT_H - -struct exclude_list; -struct index_state; -struct repository; -struct strbuf; - -int use_sparse_checkout(struct repository *r); -int get_sparse_checkout_data(char *sparse_filename, - struct exclude_list *ed); - -/* - * Given an exclude_list, scan the list to discover if - * the patterns match the "fast cone" patterns. That is, - * we expect to see a set of patterns such as - * - * Type 1: - * - * /[*] - * !/[*]/[*] - * - * and Type 2: - * - * /[*] - * - * (Ignore brackets around asterisks. They exist to avoid - * build breaks.) - * - * The Type 1 pattern pairs say "I want all files in - * directory, and none of the subdirectories". The - * Type 2 pattern says "I want every file in this directory, - * recursively through the subdirectories". These patterns - * appear in an ordered list, and if is an ancestor - * of , then a Type 1 pattern for should appear - * before either pattern for . - * - * excludes_are_strict() returns 1 exactly when all patterns - * match those type above, including the order restriction. - */ -int excludes_are_strict(struct exclude_list *el); - -#endif diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index 8f24864988e907..e249a8bee4cef6 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -33,7 +33,7 @@ test_expect_success 'git sparse-checkout init' ' EOF test_cmp test-output/expect .git/info/sparse-checkout && git config --list >test-output/config && - test_i18ngrep "core.sparsecheckout=true" test-output/config && + test_i18ngrep "core.sparsecheckout=cone" test-output/config && ls >test-output/dir && cat >test-output/expect <<-EOF && a