Skip to content
This repository has been archived by the owner on Jan 23, 2023. It is now read-only.
/ corefx Public archive

Fix File.ReadAllBytes{Async} for virtual file system files #28388

Merged
merged 1 commit into from
Mar 23, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 96 additions & 13 deletions src/System.IO.FileSystem/src/System/IO/File.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ namespace System.IO
// routines such as Delete, etc.
public static class File
{
private const int MaxByteArrayLength = 0x7FFFFFC7;
private static Encoding s_UTF8NoBOM;

internal const int DefaultBufferSize = 4096;
Expand Down Expand Up @@ -324,7 +325,15 @@ public static byte[] ReadAllBytes(string path)
{
long fileLength = fs.Length;
if (fileLength > int.MaxValue)
{
throw new IOException(SR.IO_FileTooLong2GB);
}
else if (fileLength == 0)
{
// Some file systems (e.g. procfs on Linux) return 0 for length even when there's content.
// Thus we need to assume 0 doesn't mean empty.
return ReadAllBytesUnknownLength(fs);
}

int index = 0;
int count = (int)fileLength;
Expand All @@ -341,6 +350,50 @@ public static byte[] ReadAllBytes(string path)
}
}

private static byte[] ReadAllBytesUnknownLength(FileStream fs)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe I missed something but it doesn't seem to throw new IOException(SR.IO_FileTooLong2GB) in case of newLength > int.MaxValue like it is done in ReadAllBytes for the case where length is known. And it seems like DefaultArrayPool.Rent doesn't throw either

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this stops when Rent throws out-of-memory which will happen when you ask it to allocate more than MaxByteArrayLength (or when there is no more memory).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can't see such limit in DefaultArrayPool:


So it will throw only on some runtime limit or out of memory which seem to be inconsistent with known length case

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

MaxByteArrayLength is the max array length supported by .NET.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. Thanks! That's what I actually didn't realize

{
byte[] rentedArray = null;
Span<byte> buffer = stackalloc byte[512];
try
{
int bytesRead = 0;
while (true)
{
if (bytesRead == buffer.Length)
{
uint newLength = (uint)buffer.Length * 2;
if (newLength > MaxByteArrayLength)
{
newLength = (uint)Math.Max(MaxByteArrayLength, buffer.Length + 1);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can't there be signed integer overflow in buffer.Length + 1 at some point? We don't seem to have any hard limit for this whole loop. Maybe you actually meant Math.Min here?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For buffer.Length + 1 > MaxByteArrayLength case, Rent will end up throwing (I think).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do I understand it correctly - when buffer.Length becomes MaxByteArrayLength on all further iterations it grows by one byte?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tmds That's the thing - DefaultArrayPool.Rent implementation doesn't seem have any limits (or maybe I missed one)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From what I see this is how it behaves:
On each iteration size doubles;
Then it tries MaxByteArrayLength;
And on the next iteration (buffer.Length + 1), Rent throws.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I'm looking into DefaultArrayPool.Rent implementation and in case requested length is greater than supported by pool it just creates an array of requested length:

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So newermind. I didn't realize that MaxArrayLength is the actual maximum supported by .NET

}

byte[] tmp = ArrayPool<byte>.Shared.Rent((int)newLength);
buffer.CopyTo(tmp);
if (rentedArray != null)
{
ArrayPool<byte>.Shared.Return(rentedArray);
}
buffer = rentedArray = tmp;
}

Debug.Assert(bytesRead < buffer.Length);
int n = fs.Read(buffer.Slice(bytesRead));
if (n == 0)
{
return buffer.Slice(0, bytesRead).ToArray();
}
bytesRead += n;
}
}
finally
{
if (rentedArray != null)
{
ArrayPool<byte>.Shared.Return(rentedArray);
}
}
}

public static void WriteAllBytes(string path, byte[] bytes)
{
if (path == null)
Expand Down Expand Up @@ -709,31 +762,23 @@ private static async Task<string> InternalReadAllTextAsync(string path, Encoding
return Task.FromCanceled<byte[]>(cancellationToken);
}

FileStream fs = new FileStream(
path, FileMode.Open, FileAccess.Read, FileShare.Read, DefaultBufferSize,
var fs = new FileStream(
path, FileMode.Open, FileAccess.Read, FileShare.Read, bufferSize: 1, // bufferSize == 1 used to avoid unnecessary buffer in FileStream
FileOptions.Asynchronous | FileOptions.SequentialScan);

bool returningInternalTask = false;
try
{
long fileLength = fs.Length;
if (cancellationToken.IsCancellationRequested)
{
return Task.FromCanceled<byte[]>(cancellationToken);
}

if (fileLength > int.MaxValue)
{
return Task.FromException<byte[]>(new IOException(SR.IO_FileTooLong2GB));
}

if (fileLength == 0)
{
return Task.FromResult(Array.Empty<byte>());
}

returningInternalTask = true;
return InternalReadAllBytesAsync(fs, (int)fileLength, cancellationToken);
return fileLength > 0 ?
InternalReadAllBytesAsync(fs, (int)fileLength, cancellationToken) :
InternalReadAllBytesUnknownLengthAsync(fs, cancellationToken);
}
finally
{
Expand Down Expand Up @@ -765,6 +810,44 @@ private static async Task<byte[]> InternalReadAllBytesAsync(FileStream fs, int c
}
}

private static async Task<byte[]> InternalReadAllBytesUnknownLengthAsync(FileStream fs, CancellationToken cancellationToken)
{
byte[] rentedArray = ArrayPool<byte>.Shared.Rent(512);
try
{
int bytesRead = 0;
while (true)
{
if (bytesRead == rentedArray.Length)
{
uint newLength = (uint)rentedArray.Length * 2;
if (newLength > MaxByteArrayLength)
{
newLength = (uint)Math.Max(MaxByteArrayLength, rentedArray.Length + 1);
}

byte[] tmp = ArrayPool<byte>.Shared.Rent((int)newLength);
Buffer.BlockCopy(rentedArray, 0, tmp, 0, bytesRead);
ArrayPool<byte>.Shared.Return(rentedArray);
rentedArray = tmp;
}

Debug.Assert(bytesRead < rentedArray.Length);
int n = await fs.ReadAsync(rentedArray.AsMemory(bytesRead), cancellationToken).ConfigureAwait(false);
if (n == 0)
{
return rentedArray.AsSpan().Slice(0, bytesRead).ToArray();
}
bytesRead += n;
}
}
finally
{
fs.Dispose();
ArrayPool<byte>.Shared.Return(rentedArray);
}
}

public static Task WriteAllBytesAsync(string path, byte[] bytes, CancellationToken cancellationToken = default(CancellationToken))
{
if (path == null)
Expand Down
53 changes: 53 additions & 0 deletions src/System.IO.FileSystem/tests/File/ReadWriteAllBytes.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

using System.Runtime.InteropServices;
using System.Text;
using System.Threading.Tasks;
using Xunit;

namespace System.IO.Tests
Expand Down Expand Up @@ -120,5 +121,57 @@ public void WriteToReadOnlyFile()
File.SetAttributes(path, FileAttributes.Normal);
}
}

[Fact]
public void EmptyFile_ReturnsEmptyArray()
{
string path = GetTestFilePath();
File.Create(path).Dispose();
Assert.Equal(0, File.ReadAllBytes(path).Length);
}

[Theory]
[PlatformSpecific(TestPlatforms.Linux)]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar test for OSX maybe?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you know of any file paths that present this behavior on macOS?

[InlineData("/proc/cmdline")]
[InlineData("/proc/version")]
[InlineData("/proc/filesystems")]
public void ProcFs_EqualsReadAllText(string path)
{
byte[] bytes = null;
string text = null;

const int NumTries = 3; // some of these could theoretically change between reads, so allow retries just in case
for (int i = 1; i <= NumTries; i++)
{
try
{
bytes = File.ReadAllBytes(path);
text = File.ReadAllText(path);
Assert.Equal(text, Encoding.UTF8.GetString(bytes));
}
catch when (i < NumTries) { }
}
}

[Theory]
[PlatformSpecific(TestPlatforms.Linux)]
public void ReadAllBytes_ProcFs_Uptime_ContainsTwoNumbers()
{
string text = Encoding.UTF8.GetString(File.ReadAllBytes("/proc/uptime"));
string[] parts = text.Split(new [] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
Assert.Equal(2, parts.Length);
Assert.True(double.TryParse(parts[0].Trim(), out _));
Assert.True(double.TryParse(parts[1].Trim(), out _));
}

[Theory]
[PlatformSpecific(TestPlatforms.Linux)]
[InlineData("/proc/meminfo")]
[InlineData("/proc/stat")]
[InlineData("/proc/cpuinfo")]
public void ProcFs_NotEmpty(string path)
{
Assert.InRange(File.ReadAllBytes(path).Length, 1, int.MaxValue);
}
}
}
52 changes: 52 additions & 0 deletions src/System.IO.FileSystem/tests/File/ReadWriteAllBytesAsync.cs
Original file line number Diff line number Diff line change
Expand Up @@ -134,5 +134,57 @@ public async Task WriteToReadOnlyFileAsync()
File.SetAttributes(path, FileAttributes.Normal);
}
}

[Fact]
public async Task EmptyFile_ReturnsEmptyArray()
{
string path = GetTestFilePath();
File.Create(path).Dispose();
Assert.Equal(0, (await File.ReadAllBytesAsync(path)).Length);
}

[Theory]
[PlatformSpecific(TestPlatforms.Linux)]
[InlineData("/proc/cmdline")]
[InlineData("/proc/version")]
[InlineData("/proc/filesystems")]
public async Task ProcFs_EqualsReadAllText(string path)
{
byte[] bytes = null;
string text = null;

const int NumTries = 3; // some of these could theoretically change between reads, so allow retries just in case
for (int i = 1; i <= NumTries; i++)
{
try
{
bytes = await File.ReadAllBytesAsync(path);
text = await File.ReadAllTextAsync(path);
Assert.Equal(text, Encoding.UTF8.GetString(bytes));
}
catch when (i < NumTries) { }
}
}

[Theory]
[PlatformSpecific(TestPlatforms.Linux)]
public async Task ReadAllBytes_ProcFs_Uptime_ContainsTwoNumbers()
{
string text = Encoding.UTF8.GetString(await File.ReadAllBytesAsync("/proc/uptime"));
string[] parts = text.Split(' ', StringSplitOptions.RemoveEmptyEntries);
Assert.Equal(2, parts.Length);
Assert.True(double.TryParse(parts[0].Trim(), out _));
Assert.True(double.TryParse(parts[1].Trim(), out _));
}

[Theory]
[PlatformSpecific(TestPlatforms.Linux)]
[InlineData("/proc/meminfo")]
[InlineData("/proc/stat")]
[InlineData("/proc/cpuinfo")]
public async Task ProcFs_NotEmpty(string path)
{
Assert.InRange((await File.ReadAllBytesAsync(path)).Length, 1, int.MaxValue);
}
}
}