Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Minor File.ReadAllBytes* improvements #61519

Merged
merged 9 commits into from
Nov 17, 2021
15 changes: 15 additions & 0 deletions src/libraries/System.IO.FileSystem/tests/File/ReadWriteAllBytes.cs
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,21 @@ public void ReadFileOver2GB()
Assert.Throws<IOException>(() => File.ReadAllBytes(path));
}

[Fact]
[OuterLoop]
[ActiveIssue("https://github.com/dotnet/runtime/issues/45954", TestPlatforms.Browser)]
public void ReadFileOverMaxArrayLength()
{
string path = GetTestFilePath();
using (FileStream fs = File.Create(path))
{
fs.SetLength(Array.MaxLength + 1L);
}

// File is too large for ReadAllBytes at once
Assert.Throws<IOException>(() => File.ReadAllBytes(path));
adamsitnik marked this conversation as resolved.
Show resolved Hide resolved
}

[Fact]
public void Overwrite()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,21 @@ public Task ReadFileOver2GBAsync()
return Assert.ThrowsAsync<IOException>(async () => await File.ReadAllBytesAsync(path));
}

[Fact]
[OuterLoop]
[ActiveIssue("https://github.com/dotnet/runtime/issues/45954", TestPlatforms.Browser)]
public Task MaxArrayLengthAsync()
adamsitnik marked this conversation as resolved.
Show resolved Hide resolved
{
string path = GetTestFilePath();
using (FileStream fs = File.Create(path))
{
fs.SetLength(Array.MaxLength + 1L);
}

// File is too large for ReadAllBytes at once
return Assert.ThrowsAsync<IOException>(async () => await File.ReadAllBytesAsync(path));
adamsitnik marked this conversation as resolved.
Show resolved Hide resolved
}

[Fact]
public async Task OverwriteAsync()
{
Expand Down
73 changes: 35 additions & 38 deletions src/libraries/System.Private.CoreLib/src/System/IO/File.cs
Original file line number Diff line number Diff line change
Expand Up @@ -251,27 +251,33 @@ public static void WriteAllText(string path, string? contents, Encoding encoding

public static byte[] ReadAllBytes(string path)
{
// bufferSize == 1 used to avoid unnecessary buffer in FileStream
using (FileStream fs = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read, bufferSize: 1, FileOptions.SequentialScan))
// SequentialScan is a perf hint that requires extra sys-call on non-Windows OSes.
FileOptions options = OperatingSystem.IsWindows() ? FileOptions.SequentialScan : FileOptions.None;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: maybe turn this into a static property, like PlatformSequentialScanOption.

using (SafeFileHandle sfh = OpenHandle(path, FileMode.Open, FileAccess.Read, FileShare.Read, options))
{
long fileLength = 0;
if (fs.CanSeek && (fileLength = fs.Length) > int.MaxValue)
if (sfh.CanSeek && (fileLength = RandomAccess.GetFileLength(sfh)) > Array.MaxLength)
{
throw new IOException(SR.IO_FileTooLong2GB);
}

#if DEBUG
fileLength = 0; // improve the test coverage for ReadAllBytesUnknownLength
#endif
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we instead just ensure we have tests that read files that return 0 for the length?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we instead just ensure we have tests that read files that return 0 for the length?

We have some tests, and you are the person who authored most of them: dotnet/corefx#28388

The problem is that they don't guarantee that all possible code paths are going to be executed (using stack-allocated array, renting an array from the pool, returning it and renting a larger array etc) as they deal with a virtual file system that seems to be exposing files only for reading.

We could use pipes (#58434 introduced some tests for that ), but it would require more work and handling all edge cases.

I know that setting fileLength to 0 with #if DEBUG is ugly, but it gives us what we need with a very little effort.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

but it gives us what we need with a very little effort.

Does it? It means, for example, that Debug.Asserts in the normal non-zero code paths are now rendered useless.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It means, for example, that Debug.Asserts in the normal non-zero code paths are now rendered useless.

Which asserts do you mean? I can't see any in the code path for regular files that report length properly:

private static async Task<byte[]> InternalReadAllBytesAsync(SafeFileHandle sfh, int count, CancellationToken cancellationToken)
{
using (sfh)
{
int index = 0;
byte[] bytes = new byte[count];
do
{
int n = await RandomAccess.ReadAtOffsetAsync(sfh, bytes.AsMemory(index), index, cancellationToken).ConfigureAwait(false);
if (n == 0)
{
ThrowHelper.ThrowEndOfFileException();
}
index += n;
} while (index < count);
return bytes;
}
}

int index = 0;
int count = (int)fileLength;
byte[] bytes = new byte[count];
while (count > 0)
{
int n = RandomAccess.ReadAtOffset(sfh, bytes.AsSpan(index, count), index);
if (n == 0)
{
ThrowHelper.ThrowEndOfFileException();
}
index += n;
count -= n;
}
return bytes;

Copy link
Member

@stephentoub stephentoub Nov 17, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Which asserts do you mean?

Any on any code paths used from here all the way down through the runtime. The change says that the 99.9% code path is never executed in debug.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any on any code paths used from here all the way down through the runtime.

These paths (namely RandomAccess.ReadAtOffsetAsync and RandomAccess.ReadAtOffset) have an excellent test code coverage (directly as RandomAccess tests and indirectly via FileStream tests) so I am not worried about it.

I do understand that it's not a clear win and a a trade off.


if (fileLength == 0)
{
// Some file systems (e.g. procfs on Linux) return 0 for length even when there's content; also there is non-seekable file stream.
// Some file systems (e.g. procfs on Linux) return 0 for length even when there's content; also there are non-seekable files.
// Thus we need to assume 0 doesn't mean empty.
return ReadAllBytesUnknownLength(fs);
return ReadAllBytesUnknownLength(sfh);
}

int index = 0;
int count = (int)fileLength;
byte[] bytes = new byte[count];
while (count > 0)
{
int n = fs.Read(bytes, index, count);
int n = RandomAccess.ReadAtOffset(sfh, bytes.AsSpan(index, count), index);
if (n == 0)
{
ThrowHelper.ThrowEndOfFileException();
Expand Down Expand Up @@ -519,44 +525,35 @@ private static async Task<string> InternalReadAllTextAsync(string path, Encoding
return Task.FromCanceled<byte[]>(cancellationToken);
}

var fs = new FileStream(
path, FileMode.Open, FileAccess.Read, FileShare.Read, bufferSize: 1, // bufferSize == 1 used to avoid unnecessary buffer in FileStream
FileOptions.Asynchronous | FileOptions.SequentialScan);
// SequentialScan is a perf hint that requires extra sys-call on non-Windows OSes.
FileOptions options = (OperatingSystem.IsWindows() ? FileOptions.SequentialScan : FileOptions.None) | FileOptions.Asynchronous;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe:

Suggested change
// SequentialScan is a perf hint that requires extra sys-call on non-Windows OSes.
FileOptions options = (OperatingSystem.IsWindows() ? FileOptions.SequentialScan : FileOptions.None) | FileOptions.Asynchronous;
FileOptions options = FileOptions.Asynchronous;
if (OperatingSystem.IsWindows())
{
// SequentialScan is a perf hint that requires extra sys-call on non-Windows OSes.
options |= FileOptions.SequentialScan;
}

?

Or as this is corelib, it could also be e.g.

Suggested change
// SequentialScan is a perf hint that requires extra sys-call on non-Windows OSes.
FileOptions options = (OperatingSystem.IsWindows() ? FileOptions.SequentialScan : FileOptions.None) | FileOptions.Asynchronous;
const FileOptions Options =
#if TARGET_WINDOWS
FileOptions.Asynchronous | FileOptions.SequentialScan;
#else
// SequentialScan is a perf hint that requires extra sys-call on non-Windows OSes.
FileOptions.Asynchronous;
#endif

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not convinced, as the proposals require too much space. I've moved FileOptions.Asynchronous to the left, hope it helps:

- FileOptions options = (OperatingSystem.IsWindows() ? FileOptions.SequentialScan : FileOptions.None) | FileOptions.Asynchronous;
+ FileOptions options = FileOptions.Asynchronous | (OperatingSystem.IsWindows() ? FileOptions.SequentialScan : FileOptions.None);

SafeFileHandle sfh = OpenHandle(path, FileMode.Open, FileAccess.Read, FileShare.Read, options);

bool returningInternalTask = false;
try
long fileLength = 0L;
if (sfh.CanSeek && (fileLength = RandomAccess.GetFileLength(sfh)) > Array.MaxLength)
{
long fileLength = 0L;
if (fs.CanSeek && (fileLength = fs.Length) > int.MaxValue)
{
var e = new IOException(SR.IO_FileTooLong2GB);
ExceptionDispatchInfo.SetCurrentStackTrace(e);
return Task.FromException<byte[]>(e);
}

returningInternalTask = true;
return fileLength > 0 ?
InternalReadAllBytesAsync(fs, (int)fileLength, cancellationToken) :
InternalReadAllBytesUnknownLengthAsync(fs, cancellationToken);
}
finally
{
if (!returningInternalTask)
{
fs.Dispose();
}
sfh.Dispose();
return Task.FromException<byte[]>(ExceptionDispatchInfo.SetCurrentStackTrace(new IOException(SR.IO_FileTooLong2GB)));
}

#if DEBUG
fileLength = 0; // improve the test coverage for InternalReadAllBytesUnknownLengthAsync
#endif
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same question/comment as earlier.


return fileLength > 0 ?
InternalReadAllBytesAsync(sfh, (int)fileLength, cancellationToken) :
InternalReadAllBytesUnknownLengthAsync(sfh, cancellationToken);
}

private static async Task<byte[]> InternalReadAllBytesAsync(FileStream fs, int count, CancellationToken cancellationToken)
private static async Task<byte[]> InternalReadAllBytesAsync(SafeFileHandle sfh, int count, CancellationToken cancellationToken)
{
using (fs)
using (sfh)
{
int index = 0;
byte[] bytes = new byte[count];
do
{
int n = await fs.ReadAsync(new Memory<byte>(bytes, index, count - index), cancellationToken).ConfigureAwait(false);
int n = await RandomAccess.ReadAtOffsetAsync(sfh, bytes.AsMemory(index), index, cancellationToken).ConfigureAwait(false);
if (n == 0)
{
ThrowHelper.ThrowEndOfFileException();
Expand All @@ -569,7 +566,7 @@ private static async Task<byte[]> InternalReadAllBytesAsync(FileStream fs, int c
}
}

private static async Task<byte[]> InternalReadAllBytesUnknownLengthAsync(FileStream fs, CancellationToken cancellationToken)
private static async Task<byte[]> InternalReadAllBytesUnknownLengthAsync(SafeFileHandle sfh, CancellationToken cancellationToken)
{
byte[] rentedArray = ArrayPool<byte>.Shared.Rent(512);
try
Expand All @@ -595,7 +592,7 @@ private static async Task<byte[]> InternalReadAllBytesUnknownLengthAsync(FileStr
}

Debug.Assert(bytesRead < rentedArray.Length);
int n = await fs.ReadAsync(rentedArray.AsMemory(bytesRead), cancellationToken).ConfigureAwait(false);
int n = await RandomAccess.ReadAtOffsetAsync(sfh, rentedArray.AsMemory(bytesRead), bytesRead, cancellationToken).ConfigureAwait(false);
if (n == 0)
{
return rentedArray.AsSpan(0, bytesRead).ToArray();
Expand All @@ -605,7 +602,7 @@ private static async Task<byte[]> InternalReadAllBytesUnknownLengthAsync(FileStr
}
finally
{
fs.Dispose();
sfh.Dispose();
ArrayPool<byte>.Shared.Return(rentedArray);
}
}
Expand Down Expand Up @@ -775,7 +772,7 @@ private static void Validate(string path, Encoding encoding)
throw new ArgumentException(SR.Argument_EmptyPath, nameof(path));
}

private static byte[] ReadAllBytesUnknownLength(FileStream fs)
private static byte[] ReadAllBytesUnknownLength(SafeFileHandle sfh)
{
byte[]? rentedArray = null;
Span<byte> buffer = stackalloc byte[512];
Expand Down Expand Up @@ -803,7 +800,7 @@ private static byte[] ReadAllBytesUnknownLength(FileStream fs)
}

Debug.Assert(bytesRead < buffer.Length);
int n = fs.Read(buffer.Slice(bytesRead));
int n = RandomAccess.ReadAtOffset(sfh, buffer.Slice(bytesRead), bytesRead);
if (n == 0)
{
return buffer.Slice(0, bytesRead).ToArray();
Expand Down