Skip to content

Commit

Permalink
fscache: teach fscache to use NtQueryDirectoryFile
Browse files Browse the repository at this point in the history
Using FindFirstFileExW() requires the OS to allocate a 64K buffer for each
directory and then free it when we call FindClose().  Update fscache to call
the underlying kernel API NtQueryDirectoryFile so that we can do the buffer
management ourselves.  That allows us to allocate a single buffer for the
lifetime of the cache and reuse it for each directory.

This change improves performance of 'git status' by 18% in a repo with ~200K
files and 30k folders.

Documentation for NtQueryDirectoryFile can be found at:

https://docs.microsoft.com/en-us/windows-hardware/drivers/ddi/content/ntifs/nf-ntifs-ntquerydirectoryfile
https://docs.microsoft.com/en-us/windows/desktop/FileIO/file-attribute-constants
https://docs.microsoft.com/en-us/windows/desktop/fileio/reparse-point-tags

To determine if the specified directory is a symbolic link, inspect the
FileAttributes member to see if the FILE_ATTRIBUTE_REPARSE_POINT flag is
set. If so, EaSize will contain the reparse tag (this is a so far
undocumented feature, but confirmed by the NTFS developers). To
determine if the reparse point is a symbolic link (and not some other
form of reparse point), test whether the tag value equals the value
IO_REPARSE_TAG_SYMLINK.

Signed-off-by: Ben Peart <benpeart@microsoft.com>
  • Loading branch information
benpeart authored and dscho committed Dec 9, 2018
1 parent 5033536 commit 842a845
Show file tree
Hide file tree
Showing 2 changed files with 218 additions and 35 deletions.
122 changes: 87 additions & 35 deletions compat/win32/fscache.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "fscache.h"
#include "config.h"
#include "../../mem-pool.h"
#include "ntifs.h"

static volatile long initialized;
static DWORD dwTlsIndex;
Expand All @@ -23,6 +24,7 @@ struct fscache {
unsigned int opendir_requests;
unsigned int fscache_requests;
unsigned int fscache_misses;
WCHAR buffer[64 * 1024];
};
static struct trace_key trace_fscache = TRACE_KEY_INIT(FSCACHE);

Expand Down Expand Up @@ -145,16 +147,30 @@ static void fsentry_release(struct fsentry *fse)
InterlockedDecrement(&(fse->refcnt));
}

static int xwcstoutfn(char *utf, int utflen, const wchar_t *wcs, int wcslen)
{
if (!wcs || !utf || utflen < 1) {
errno = EINVAL;
return -1;
}
utflen = WideCharToMultiByte(CP_UTF8, 0, wcs, wcslen, utf, utflen, NULL, NULL);
if (utflen)
return utflen;
errno = ERANGE;
return -1;
}

/*
* Allocate and initialize an fsentry from a WIN32_FIND_DATA structure.
* Allocate and initialize an fsentry from a FILE_FULL_DIR_INFORMATION structure.
*/
static struct fsentry *fseentry_create_entry(struct fscache *cache, struct fsentry *list,
const WIN32_FIND_DATAW *fdata)
PFILE_FULL_DIR_INFORMATION fdata)
{
char buf[MAX_PATH * 3];
int len;
struct fsentry *fse;
len = xwcstoutf(buf, fdata->cFileName, ARRAY_SIZE(buf));

len = xwcstoutfn(buf, ARRAY_SIZE(buf), fdata->FileName, fdata->FileNameLength / sizeof(wchar_t));

fse = fsentry_alloc(cache, list, buf, len);

Expand All @@ -167,7 +183,8 @@ static struct fsentry *fseentry_create_entry(struct fscache *cache, struct fsent
* Let's work around this by detecting that situation and
* telling Git that these are *not* symbolic links.
*/
if (fdata->dwReserved0 == IO_REPARSE_TAG_SYMLINK &&
if (fdata->FileAttributes & FILE_ATTRIBUTE_REPARSE_POINT &&
fdata->EaSize == IO_REPARSE_TAG_SYMLINK &&
sizeof(buf) > (list ? list->len + 1 : 0) + fse->len + 1 &&
is_inside_windows_container()) {
size_t off = 0;
Expand All @@ -180,13 +197,13 @@ static struct fsentry *fseentry_create_entry(struct fscache *cache, struct fsent
buf[off + fse->len] = '\0';
}

fse->st_mode = file_attr_to_st_mode(fdata->dwFileAttributes,
fdata->dwReserved0, buf);
fse->st_mode = file_attr_to_st_mode(fdata->FileAttributes,
fdata->EaSize, buf);
fse->st_size = S_ISLNK(fse->st_mode) ? MAX_LONG_PATH :
fdata->nFileSizeLow | (((off_t) fdata->nFileSizeHigh) << 32);
filetime_to_timespec(&(fdata->ftLastAccessTime), &(fse->st_atim));
filetime_to_timespec(&(fdata->ftLastWriteTime), &(fse->st_mtim));
filetime_to_timespec(&(fdata->ftCreationTime), &(fse->st_ctim));
fdata->EndOfFile.LowPart | (((off_t)fdata->EndOfFile.HighPart) << 32);
filetime_to_timespec((FILETIME *)&(fdata->LastAccessTime), &(fse->st_atim));
filetime_to_timespec((FILETIME *)&(fdata->LastWriteTime), &(fse->st_mtim));
filetime_to_timespec((FILETIME *)&(fdata->CreationTime), &(fse->st_ctim));

return fse;
}
Expand All @@ -199,8 +216,10 @@ static struct fsentry *fseentry_create_entry(struct fscache *cache, struct fsent
static struct fsentry *fsentry_create_list(struct fscache *cache, const struct fsentry *dir,
int *dir_not_found)
{
wchar_t pattern[MAX_LONG_PATH + 2]; /* + 2 for "\*" */
WIN32_FIND_DATAW fdata;
wchar_t pattern[MAX_LONG_PATH];
NTSTATUS status;
IO_STATUS_BLOCK iosb;
PFILE_FULL_DIR_INFORMATION di;
HANDLE h;
int wlen;
struct fsentry *list, **phead;
Expand All @@ -213,18 +232,18 @@ static struct fsentry *fsentry_create_list(struct fscache *cache, const struct f
dir->len, MAX_PATH - 2, core_long_paths)) < 0)
return NULL;

/*
* append optional '\' and wildcard '*'. Note: we need to use '\' as
* Windows doesn't translate '/' to '\' for "\\?\"-prefixed paths.
*/
if (wlen)
pattern[wlen++] = '\\';
pattern[wlen++] = '*';
pattern[wlen] = 0;

/* open find handle */
h = FindFirstFileExW(pattern, FindExInfoBasic, &fdata, FindExSearchNameMatch,
NULL, FIND_FIRST_EX_LARGE_FETCH);
/* handle CWD */
if (!wlen) {
wlen = GetCurrentDirectoryW(ARRAY_SIZE(pattern), pattern);
if (!wlen || wlen >= ARRAY_SIZE(pattern)) {
errno = wlen ? ENAMETOOLONG : err_win_to_posix(GetLastError());
return NULL;
}
}

h = CreateFileW(pattern, FILE_LIST_DIRECTORY,
FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
NULL, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL);
if (h == INVALID_HANDLE_VALUE) {
err = GetLastError();
*dir_not_found = 1; /* or empty directory */
Expand All @@ -240,22 +259,55 @@ static struct fsentry *fsentry_create_list(struct fscache *cache, const struct f

/* walk directory and build linked list of fsentry structures */
phead = &list->next;
do {
*phead = fseentry_create_entry(cache, list, &fdata);
status = NtQueryDirectoryFile(h, NULL, 0, 0, &iosb, cache->buffer,
sizeof(cache->buffer), FileFullDirectoryInformation, FALSE, NULL, FALSE);
if (!NT_SUCCESS(status)) {
/*
* NtQueryDirectoryFile returns STATUS_INVALID_PARAMETER when
* asked to enumerate an invalid directory (ie it is a file
* instead of a directory). Verify that is the actual cause
* of the error.
*/
if (status == STATUS_INVALID_PARAMETER) {
DWORD attributes = GetFileAttributesW(pattern);
if (!(attributes & FILE_ATTRIBUTE_DIRECTORY))
status = ERROR_DIRECTORY;
}
goto Error;
}
di = (PFILE_FULL_DIR_INFORMATION)(cache->buffer);
for (;;) {

*phead = fseentry_create_entry(cache, list, di);
phead = &(*phead)->next;
} while (FindNextFileW(h, &fdata));

/* remember result of last FindNextFile, then close find handle */
err = GetLastError();
FindClose(h);
/* If there is no offset in the entry, the buffer has been exhausted. */
if (di->NextEntryOffset == 0) {
status = NtQueryDirectoryFile(h, NULL, 0, 0, &iosb, cache->buffer,
sizeof(cache->buffer), FileFullDirectoryInformation, FALSE, NULL, FALSE);
if (!NT_SUCCESS(status)) {
if (status == STATUS_NO_MORE_FILES)
break;
goto Error;
}

di = (PFILE_FULL_DIR_INFORMATION)(cache->buffer);
continue;
}

/* Advance to the next entry. */
di = (PFILE_FULL_DIR_INFORMATION)(((PUCHAR)di) + di->NextEntryOffset);
}

/* return the list if we've got all the files */
if (err == ERROR_NO_MORE_FILES)
return list;
CloseHandle(h);
return list;

/* otherwise release the list and return error */
Error:
errno = (status == ERROR_DIRECTORY) ? ENOTDIR : err_win_to_posix(status);
trace_printf_key(&trace_fscache, "fscache: error(%d) unable to query directory contents '%.*s'\n",
errno, dir->len, dir->name);
CloseHandle(h);
fsentry_release(list);
errno = err_win_to_posix(err);
return NULL;
}

Expand Down
131 changes: 131 additions & 0 deletions compat/win32/ntifs.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
#ifndef _NTIFS_
#define _NTIFS_

/*
* Copy necessary structures and definitions out of the Windows DDK
* to enable calling NtQueryDirectoryFile()
*/

typedef _Return_type_success_(return >= 0) LONG NTSTATUS;
#define NT_SUCCESS(Status) (((NTSTATUS)(Status)) >= 0)

typedef struct _UNICODE_STRING {
USHORT Length;
USHORT MaximumLength;
#ifdef MIDL_PASS
[size_is(MaximumLength / 2), length_is((Length) / 2)] USHORT * Buffer;
#else // MIDL_PASS
_Field_size_bytes_part_(MaximumLength, Length) PWCH Buffer;
#endif // MIDL_PASS
} UNICODE_STRING;
typedef UNICODE_STRING *PUNICODE_STRING;
typedef const UNICODE_STRING *PCUNICODE_STRING;

typedef enum _FILE_INFORMATION_CLASS {
FileDirectoryInformation = 1,
FileFullDirectoryInformation,
FileBothDirectoryInformation,
FileBasicInformation,
FileStandardInformation,
FileInternalInformation,
FileEaInformation,
FileAccessInformation,
FileNameInformation,
FileRenameInformation,
FileLinkInformation,
FileNamesInformation,
FileDispositionInformation,
FilePositionInformation,
FileFullEaInformation,
FileModeInformation,
FileAlignmentInformation,
FileAllInformation,
FileAllocationInformation,
FileEndOfFileInformation,
FileAlternateNameInformation,
FileStreamInformation,
FilePipeInformation,
FilePipeLocalInformation,
FilePipeRemoteInformation,
FileMailslotQueryInformation,
FileMailslotSetInformation,
FileCompressionInformation,
FileObjectIdInformation,
FileCompletionInformation,
FileMoveClusterInformation,
FileQuotaInformation,
FileReparsePointInformation,
FileNetworkOpenInformation,
FileAttributeTagInformation,
FileTrackingInformation,
FileIdBothDirectoryInformation,
FileIdFullDirectoryInformation,
FileValidDataLengthInformation,
FileShortNameInformation,
FileIoCompletionNotificationInformation,
FileIoStatusBlockRangeInformation,
FileIoPriorityHintInformation,
FileSfioReserveInformation,
FileSfioVolumeInformation,
FileHardLinkInformation,
FileProcessIdsUsingFileInformation,
FileNormalizedNameInformation,
FileNetworkPhysicalNameInformation,
FileIdGlobalTxDirectoryInformation,
FileIsRemoteDeviceInformation,
FileAttributeCacheInformation,
FileNumaNodeInformation,
FileStandardLinkInformation,
FileRemoteProtocolInformation,
FileMaximumInformation
} FILE_INFORMATION_CLASS, *PFILE_INFORMATION_CLASS;

typedef struct _FILE_FULL_DIR_INFORMATION {
ULONG NextEntryOffset;
ULONG FileIndex;
LARGE_INTEGER CreationTime;
LARGE_INTEGER LastAccessTime;
LARGE_INTEGER LastWriteTime;
LARGE_INTEGER ChangeTime;
LARGE_INTEGER EndOfFile;
LARGE_INTEGER AllocationSize;
ULONG FileAttributes;
ULONG FileNameLength;
ULONG EaSize;
WCHAR FileName[1];
} FILE_FULL_DIR_INFORMATION, *PFILE_FULL_DIR_INFORMATION;

typedef struct _IO_STATUS_BLOCK {
union {
NTSTATUS Status;
PVOID Pointer;
} DUMMYUNIONNAME;
ULONG_PTR Information;
} IO_STATUS_BLOCK, *PIO_STATUS_BLOCK;

typedef VOID
(NTAPI *PIO_APC_ROUTINE)(
IN PVOID ApcContext,
IN PIO_STATUS_BLOCK IoStatusBlock,
IN ULONG Reserved);

NTSYSCALLAPI
NTSTATUS
NTAPI
NtQueryDirectoryFile(
_In_ HANDLE FileHandle,
_In_opt_ HANDLE Event,
_In_opt_ PIO_APC_ROUTINE ApcRoutine,
_In_opt_ PVOID ApcContext,
_Out_ PIO_STATUS_BLOCK IoStatusBlock,
_Out_writes_bytes_(Length) PVOID FileInformation,
_In_ ULONG Length,
_In_ FILE_INFORMATION_CLASS FileInformationClass,
_In_ BOOLEAN ReturnSingleEntry,
_In_opt_ PUNICODE_STRING FileName,
_In_ BOOLEAN RestartScan
);

#define STATUS_NO_MORE_FILES ((NTSTATUS)0x80000006L)

#endif

0 comments on commit 842a845

Please sign in to comment.