From 1dc28c9b88dbb807240b905c3dc93504bb6dc907 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Tue, 24 Jan 2017 15:12:13 -0500 Subject: [PATCH 1/3] fscache: add key for GIT_TRACE_FSCACHE Signed-off-by: Jeff Hostetler Signed-off-by: Johannes Schindelin --- compat/win32/fscache.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index 890cef3d318c95..92d4768a711f8a 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -4,11 +4,13 @@ #include "fscache.h" #include "../../dir.h" #include "../../abspath.h" +#include "../../trace.h" static int initialized; static volatile long enabled; static struct hashmap map; static CRITICAL_SECTION mutex; +static struct trace_key trace_fscache = TRACE_KEY_INIT(FSCACHE); /* * An entry in the file system cache. Used for both entire directory listings @@ -207,6 +209,8 @@ static struct fsentry *fsentry_create_list(const struct fsentry *dir) if (h == INVALID_HANDLE_VALUE) { err = GetLastError(); errno = (err == ERROR_DIRECTORY) ? ENOTDIR : err_win_to_posix(err); + trace_printf_key(&trace_fscache, "fscache: error(%d) '%s'\n", + errno, dir->dirent.d_name); return NULL; } @@ -392,6 +396,7 @@ int fscache_enable(int enable) fscache_clear(); LeaveCriticalSection(&mutex); } + trace_printf_key(&trace_fscache, "fscache: enable(%d)\n", enable); return result; } From 16cd1497ca7cc3b44e2f275f52f4bb5c5e70d6f6 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Tue, 13 Dec 2016 14:05:32 -0500 Subject: [PATCH 2/3] fscache: remember not-found directories Teach FSCACHE to remember "not found" directories. This is a performance optimization. FSCACHE is a performance optimization available for Windows. It intercepts Posix-style lstat() calls into an in-memory directory using FindFirst/FindNext. It improves performance on Windows by catching the first lstat() call in a directory, using FindFirst/ FindNext to read the list of files (and attribute data) for the entire directory into the cache, and short-cut subsequent lstat() calls in the same directory. This gives a major performance boost on Windows. However, it does not remember "not found" directories. When STATUS runs and there are missing directories, the lstat() interception fails to find the parent directory and simply return ENOENT for the file -- it does not remember that the FindFirst on the directory failed. Thus subsequent lstat() calls in the same directory, each re-attempt the FindFirst. This completely defeats any performance gains. This can be seen by doing a sparse-checkout on a large repo and then doing a read-tree to reset the skip-worktree bits and then running status. This change reduced status times for my very large repo by 60%. Signed-off-by: Jeff Hostetler Signed-off-by: Johannes Schindelin --- compat/win32/fscache.c | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index 92d4768a711f8a..0fbe4f3318add0 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -181,7 +181,8 @@ static struct fsentry *fseentry_create_entry(struct fsentry *list, * Dir should not contain trailing '/'. Use an empty string for the current * directory (not "."!). */ -static struct fsentry *fsentry_create_list(const struct fsentry *dir) +static struct fsentry *fsentry_create_list(const struct fsentry *dir, + int *dir_not_found) { wchar_t pattern[MAX_PATH + 2]; /* + 2 for '/' '*' */ WIN32_FIND_DATAW fdata; @@ -190,6 +191,8 @@ static struct fsentry *fsentry_create_list(const struct fsentry *dir) struct fsentry *list, **phead; DWORD err; + *dir_not_found = 0; + /* convert name to UTF-16 and check length < MAX_PATH */ if ((wlen = xutftowcsn(pattern, dir->dirent.d_name, MAX_PATH, dir->len)) < 0) { @@ -208,6 +211,7 @@ static struct fsentry *fsentry_create_list(const struct fsentry *dir) h = FindFirstFileW(pattern, &fdata); if (h == INVALID_HANDLE_VALUE) { err = GetLastError(); + *dir_not_found = 1; /* or empty directory */ errno = (err == ERROR_DIRECTORY) ? ENOTDIR : err_win_to_posix(err); trace_printf_key(&trace_fscache, "fscache: error(%d) '%s'\n", errno, dir->dirent.d_name); @@ -216,6 +220,8 @@ static struct fsentry *fsentry_create_list(const struct fsentry *dir) /* allocate object to hold directory listing */ list = fsentry_alloc(NULL, dir->dirent.d_name, dir->len); + list->st_mode = S_IFDIR; + list->dirent.d_type = DT_DIR; /* walk directory and build linked list of fsentry structures */ phead = &list->next; @@ -300,12 +306,16 @@ static struct fsentry *fscache_get_wait(struct fsentry *key) static struct fsentry *fscache_get(struct fsentry *key) { struct fsentry *fse, *future, *waiter; + int dir_not_found; EnterCriticalSection(&mutex); /* check if entry is in cache */ fse = fscache_get_wait(key); if (fse) { - fsentry_addref(fse); + if (fse->st_mode) + fsentry_addref(fse); + else + fse = NULL; /* non-existing directory */ LeaveCriticalSection(&mutex); return fse; } @@ -314,7 +324,10 @@ static struct fsentry *fscache_get(struct fsentry *key) fse = fscache_get_wait(key->list); if (fse) { LeaveCriticalSection(&mutex); - /* dir entry without file entry -> file doesn't exist */ + /* + * dir entry without file entry, or dir does not + * exist -> file doesn't exist + */ errno = ENOENT; return NULL; } @@ -328,7 +341,7 @@ static struct fsentry *fscache_get(struct fsentry *key) /* create the directory listing (outside mutex!) */ LeaveCriticalSection(&mutex); - fse = fsentry_create_list(future); + fse = fsentry_create_list(future, &dir_not_found); EnterCriticalSection(&mutex); /* remove future entry and signal waiting threads */ @@ -342,6 +355,18 @@ static struct fsentry *fscache_get(struct fsentry *key) /* leave on error (errno set by fsentry_create_list) */ if (!fse) { + if (dir_not_found && key->list) { + /* + * Record that the directory does not exist (or is + * empty, which for all practical matters is the same + * thing as far as fscache is concerned). + */ + fse = fsentry_alloc(key->list->list, + key->list->dirent.d_name, + key->list->len); + fse->st_mode = 0; + hashmap_add(&map, &fse->ent); + } LeaveCriticalSection(&mutex); return NULL; } @@ -353,6 +378,9 @@ static struct fsentry *fscache_get(struct fsentry *key) if (key->list) fse = hashmap_get_entry(&map, key, ent, NULL); + if (fse && !fse->st_mode) + fse = NULL; /* non-existing directory */ + /* return entry or ENOENT */ if (fse) fsentry_addref(fse); From d416e8af103a9514c0882c6c1a7931bb56675bfb Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 25 Jan 2017 18:39:16 +0100 Subject: [PATCH 3/3] fscache: add a test for the dir-not-found optimization Signed-off-by: Johannes Schindelin --- t/t1090-sparse-checkout-scope.sh | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/t/t1090-sparse-checkout-scope.sh b/t/t1090-sparse-checkout-scope.sh index 3a14218b245d4c..529844e2862c74 100755 --- a/t/t1090-sparse-checkout-scope.sh +++ b/t/t1090-sparse-checkout-scope.sh @@ -106,4 +106,24 @@ test_expect_success 'in partial clone, sparse checkout only fetches needed blobs test_cmp expect actual ' +test_expect_success MINGW 'no unnecessary opendir() with fscache' ' + git clone . fscache-test && + ( + cd fscache-test && + git config core.fscache 1 && + echo "/excluded/*" >.git/info/sparse-checkout && + for f in $(test_seq 10) + do + sha1=$(echo $f | git hash-object -w --stdin) && + git update-index --add \ + --cacheinfo 100644,$sha1,excluded/$f || exit 1 + done && + test_tick && + git commit -m excluded && + GIT_TRACE_FSCACHE=1 git status >out 2>err && + grep excluded err >grep.out && + test_line_count = 1 grep.out + ) +' + test_done