From 6fd0cc83e4a6b9133dab0aa52cad7af653dcd778 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Tue, 24 Jan 2017 15:12:13 -0500 Subject: [PATCH 1/3] fscache: add key for GIT_TRACE_FSCACHE Signed-off-by: Jeff Hostetler Signed-off-by: Johannes Schindelin --- compat/win32/fscache.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index 63d522f49a1699..abefd3bb4fae2b 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -7,6 +7,7 @@ static int initialized; static volatile long enabled; static struct hashmap map; static CRITICAL_SECTION mutex; +static struct trace_key trace_fscache = TRACE_KEY_INIT(FSCACHE); /* * An entry in the file system cache. Used for both entire directory listings @@ -190,6 +191,8 @@ static struct fsentry *fsentry_create_list(const struct fsentry *dir) if (h == INVALID_HANDLE_VALUE) { err = GetLastError(); errno = (err == ERROR_DIRECTORY) ? ENOTDIR : err_win_to_posix(err); + trace_printf_key(&trace_fscache, "fscache: error(%d) '%.*s'\n", + errno, dir->len, dir->name); return NULL; } @@ -391,6 +394,7 @@ int fscache_enable(int enable) fscache_clear(); LeaveCriticalSection(&mutex); } + trace_printf_key(&trace_fscache, "fscache: enable(%d)\n", enable); return result; } From f22f12561d2af2963e4dacb94f7c69e57804fb1c Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Tue, 13 Dec 2016 14:05:32 -0500 Subject: [PATCH 2/3] fscache: remember not-found directories Teach FSCACHE to remember "not found" directories. This is a performance optimization. FSCACHE is a performance optimization available for Windows. It intercepts Posix-style lstat() calls into an in-memory directory using FindFirst/FindNext. It improves performance on Windows by catching the first lstat() call in a directory, using FindFirst/ FindNext to read the list of files (and attribute data) for the entire directory into the cache, and short-cut subsequent lstat() calls in the same directory. This gives a major performance boost on Windows. However, it does not remember "not found" directories. When STATUS runs and there are missing directories, the lstat() interception fails to find the parent directory and simply return ENOENT for the file -- it does not remember that the FindFirst on the directory failed. Thus subsequent lstat() calls in the same directory, each re-attempt the FindFirst. This completely defeats any performance gains. This can be seen by doing a sparse-checkout on a large repo and then doing a read-tree to reset the skip-worktree bits and then running status. This change reduced status times for my very large repo by 60%. Signed-off-by: Jeff Hostetler Signed-off-by: Johannes Schindelin --- compat/win32/fscache.c | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index abefd3bb4fae2b..e0f0681bb392d6 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -163,7 +163,8 @@ static struct fsentry *fseentry_create_entry(struct fsentry *list, * Dir should not contain trailing '/'. Use an empty string for the current * directory (not "."!). */ -static struct fsentry *fsentry_create_list(const struct fsentry *dir) +static struct fsentry *fsentry_create_list(const struct fsentry *dir, + int *dir_not_found) { wchar_t pattern[MAX_LONG_PATH + 2]; /* + 2 for "\*" */ WIN32_FIND_DATAW fdata; @@ -172,6 +173,8 @@ static struct fsentry *fsentry_create_list(const struct fsentry *dir) struct fsentry *list, **phead; DWORD err; + *dir_not_found = 0; + /* convert name to UTF-16 and check length */ if ((wlen = xutftowcs_path_ex(pattern, dir->name, MAX_LONG_PATH, dir->len, MAX_PATH - 2, core_long_paths)) < 0) @@ -190,6 +193,7 @@ static struct fsentry *fsentry_create_list(const struct fsentry *dir) h = FindFirstFileW(pattern, &fdata); if (h == INVALID_HANDLE_VALUE) { err = GetLastError(); + *dir_not_found = 1; /* or empty directory */ errno = (err == ERROR_DIRECTORY) ? ENOTDIR : err_win_to_posix(err); trace_printf_key(&trace_fscache, "fscache: error(%d) '%.*s'\n", errno, dir->len, dir->name); @@ -198,6 +202,7 @@ static struct fsentry *fsentry_create_list(const struct fsentry *dir) /* allocate object to hold directory listing */ list = fsentry_alloc(NULL, dir->name, dir->len); + list->st_mode = S_IFDIR; /* walk directory and build linked list of fsentry structures */ phead = &list->next; @@ -298,12 +303,16 @@ static struct fsentry *fscache_get_wait(struct fsentry *key) static struct fsentry *fscache_get(struct fsentry *key) { struct fsentry *fse, *future, *waiter; + int dir_not_found; EnterCriticalSection(&mutex); /* check if entry is in cache */ fse = fscache_get_wait(key); if (fse) { - fsentry_addref(fse); + if (fse->st_mode) + fsentry_addref(fse); + else + fse = NULL; /* non-existing directory */ LeaveCriticalSection(&mutex); return fse; } @@ -312,7 +321,10 @@ static struct fsentry *fscache_get(struct fsentry *key) fse = fscache_get_wait(key->list); if (fse) { LeaveCriticalSection(&mutex); - /* dir entry without file entry -> file doesn't exist */ + /* + * dir entry without file entry, or dir does not + * exist -> file doesn't exist + */ errno = ENOENT; return NULL; } @@ -326,7 +338,7 @@ static struct fsentry *fscache_get(struct fsentry *key) /* create the directory listing (outside mutex!) */ LeaveCriticalSection(&mutex); - fse = fsentry_create_list(future); + fse = fsentry_create_list(future, &dir_not_found); EnterCriticalSection(&mutex); /* remove future entry and signal waiting threads */ @@ -340,6 +352,17 @@ static struct fsentry *fscache_get(struct fsentry *key) /* leave on error (errno set by fsentry_create_list) */ if (!fse) { + if (dir_not_found && key->list) { + /* + * Record that the directory does not exist (or is + * empty, which for all practical matters is the same + * thing as far as fscache is concerned). + */ + fse = fsentry_alloc(key->list->list, + key->list->name, key->list->len); + fse->st_mode = 0; + hashmap_add(&map, fse); + } LeaveCriticalSection(&mutex); return NULL; } @@ -351,6 +374,9 @@ static struct fsentry *fscache_get(struct fsentry *key) if (key->list) fse = hashmap_get(&map, key, NULL); + if (fse && !fse->st_mode) + fse = NULL; /* non-existing directory */ + /* return entry or ENOENT */ if (fse) fsentry_addref(fse); From c38245e1b8a26add15526e0924fdb0980bc7635e Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 25 Jan 2017 18:39:16 +0100 Subject: [PATCH 3/3] fscache: add a test for the dir-not-found optimization Signed-off-by: Johannes Schindelin --- t/t1090-sparse-checkout-scope.sh | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/t/t1090-sparse-checkout-scope.sh b/t/t1090-sparse-checkout-scope.sh index 1f61eb3e88e30c..33a69a9c71b9bf 100755 --- a/t/t1090-sparse-checkout-scope.sh +++ b/t/t1090-sparse-checkout-scope.sh @@ -49,4 +49,24 @@ test_expect_success 'return to full checkout of master' ' test "$(cat b)" = "modified" ' +test_expect_success 'no unnecessary opendir() with fscache' ' + git clone . fscache-test && + ( + cd fscache-test && + git config core.fscache 1 && + echo "/excluded/*" >.git/info/sparse-checkout && + for f in $(test_seq 10) + do + sha1=$(echo $f | git hash-object -w --stdin) && + git update-index --add \ + --cacheinfo 100644,$sha1,excluded/$f || break + done && + test_tick && + git commit -m excluded && + GIT_TRACE_FSCACHE=1 git status >out 2>err && + grep excluded err >grep.out && + test_line_count = 1 grep.out + ) +' + test_done