-
Notifications
You must be signed in to change notification settings - Fork 4.9k
Fix File.ReadAllBytes{Async} for virtual file system files #28388
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -15,6 +15,7 @@ namespace System.IO | |||
// routines such as Delete, etc. | ||||
public static class File | ||||
{ | ||||
private const int MaxByteArrayLength = 0x7FFFFFC7; | ||||
private static Encoding s_UTF8NoBOM; | ||||
|
||||
internal const int DefaultBufferSize = 4096; | ||||
|
@@ -324,7 +325,15 @@ public static byte[] ReadAllBytes(string path) | |||
{ | ||||
long fileLength = fs.Length; | ||||
if (fileLength > int.MaxValue) | ||||
{ | ||||
throw new IOException(SR.IO_FileTooLong2GB); | ||||
} | ||||
else if (fileLength == 0) | ||||
{ | ||||
// Some file systems (e.g. procfs on Linux) return 0 for length even when there's content. | ||||
// Thus we need to assume 0 doesn't mean empty. | ||||
return ReadAllBytesUnknownLength(fs); | ||||
} | ||||
|
||||
int index = 0; | ||||
int count = (int)fileLength; | ||||
|
@@ -341,6 +350,50 @@ public static byte[] ReadAllBytes(string path) | |||
} | ||||
} | ||||
|
||||
private static byte[] ReadAllBytesUnknownLength(FileStream fs) | ||||
{ | ||||
byte[] rentedArray = null; | ||||
Span<byte> buffer = stackalloc byte[512]; | ||||
try | ||||
{ | ||||
int bytesRead = 0; | ||||
while (true) | ||||
{ | ||||
if (bytesRead == buffer.Length) | ||||
{ | ||||
uint newLength = (uint)buffer.Length * 2; | ||||
if (newLength > MaxByteArrayLength) | ||||
{ | ||||
newLength = (uint)Math.Max(MaxByteArrayLength, buffer.Length + 1); | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can't there be signed integer overflow in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do I understand it correctly - when buffer.Length becomes MaxByteArrayLength on all further iterations it grows by one byte? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @tmds That's the thing - DefaultArrayPool.Rent implementation doesn't seem have any limits (or maybe I missed one) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. From what I see this is how it behaves: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So I'm looking into DefaultArrayPool.Rent implementation and in case requested length is greater than supported by pool it just creates an array of requested length:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So newermind. I didn't realize that MaxArrayLength is the actual maximum supported by .NET |
||||
} | ||||
|
||||
byte[] tmp = ArrayPool<byte>.Shared.Rent((int)newLength); | ||||
buffer.CopyTo(tmp); | ||||
if (rentedArray != null) | ||||
{ | ||||
ArrayPool<byte>.Shared.Return(rentedArray); | ||||
} | ||||
buffer = rentedArray = tmp; | ||||
} | ||||
|
||||
Debug.Assert(bytesRead < buffer.Length); | ||||
int n = fs.Read(buffer.Slice(bytesRead)); | ||||
if (n == 0) | ||||
{ | ||||
return buffer.Slice(0, bytesRead).ToArray(); | ||||
} | ||||
bytesRead += n; | ||||
} | ||||
} | ||||
finally | ||||
{ | ||||
if (rentedArray != null) | ||||
{ | ||||
ArrayPool<byte>.Shared.Return(rentedArray); | ||||
} | ||||
} | ||||
} | ||||
|
||||
public static void WriteAllBytes(string path, byte[] bytes) | ||||
{ | ||||
if (path == null) | ||||
|
@@ -709,31 +762,23 @@ private static async Task<string> InternalReadAllTextAsync(string path, Encoding | |||
return Task.FromCanceled<byte[]>(cancellationToken); | ||||
} | ||||
|
||||
FileStream fs = new FileStream( | ||||
path, FileMode.Open, FileAccess.Read, FileShare.Read, DefaultBufferSize, | ||||
var fs = new FileStream( | ||||
path, FileMode.Open, FileAccess.Read, FileShare.Read, bufferSize: 1, // bufferSize == 1 used to avoid unnecessary buffer in FileStream | ||||
FileOptions.Asynchronous | FileOptions.SequentialScan); | ||||
|
||||
bool returningInternalTask = false; | ||||
try | ||||
{ | ||||
long fileLength = fs.Length; | ||||
if (cancellationToken.IsCancellationRequested) | ||||
{ | ||||
return Task.FromCanceled<byte[]>(cancellationToken); | ||||
} | ||||
|
||||
if (fileLength > int.MaxValue) | ||||
{ | ||||
return Task.FromException<byte[]>(new IOException(SR.IO_FileTooLong2GB)); | ||||
} | ||||
|
||||
if (fileLength == 0) | ||||
{ | ||||
return Task.FromResult(Array.Empty<byte>()); | ||||
} | ||||
|
||||
returningInternalTask = true; | ||||
return InternalReadAllBytesAsync(fs, (int)fileLength, cancellationToken); | ||||
return fileLength > 0 ? | ||||
InternalReadAllBytesAsync(fs, (int)fileLength, cancellationToken) : | ||||
InternalReadAllBytesUnknownLengthAsync(fs, cancellationToken); | ||||
} | ||||
finally | ||||
{ | ||||
|
@@ -765,6 +810,44 @@ private static async Task<byte[]> InternalReadAllBytesAsync(FileStream fs, int c | |||
} | ||||
} | ||||
|
||||
private static async Task<byte[]> InternalReadAllBytesUnknownLengthAsync(FileStream fs, CancellationToken cancellationToken) | ||||
{ | ||||
byte[] rentedArray = ArrayPool<byte>.Shared.Rent(512); | ||||
try | ||||
{ | ||||
int bytesRead = 0; | ||||
while (true) | ||||
{ | ||||
if (bytesRead == rentedArray.Length) | ||||
{ | ||||
uint newLength = (uint)rentedArray.Length * 2; | ||||
if (newLength > MaxByteArrayLength) | ||||
{ | ||||
newLength = (uint)Math.Max(MaxByteArrayLength, rentedArray.Length + 1); | ||||
} | ||||
|
||||
byte[] tmp = ArrayPool<byte>.Shared.Rent((int)newLength); | ||||
Buffer.BlockCopy(rentedArray, 0, tmp, 0, bytesRead); | ||||
ArrayPool<byte>.Shared.Return(rentedArray); | ||||
rentedArray = tmp; | ||||
} | ||||
|
||||
Debug.Assert(bytesRead < rentedArray.Length); | ||||
int n = await fs.ReadAsync(rentedArray.AsMemory(bytesRead), cancellationToken).ConfigureAwait(false); | ||||
if (n == 0) | ||||
{ | ||||
return rentedArray.AsSpan().Slice(0, bytesRead).ToArray(); | ||||
} | ||||
bytesRead += n; | ||||
} | ||||
} | ||||
finally | ||||
{ | ||||
fs.Dispose(); | ||||
ArrayPool<byte>.Shared.Return(rentedArray); | ||||
} | ||||
} | ||||
|
||||
public static Task WriteAllBytesAsync(string path, byte[] bytes, CancellationToken cancellationToken = default(CancellationToken)) | ||||
{ | ||||
if (path == null) | ||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,6 +4,7 @@ | |
|
||
using System.Runtime.InteropServices; | ||
using System.Text; | ||
using System.Threading.Tasks; | ||
using Xunit; | ||
|
||
namespace System.IO.Tests | ||
|
@@ -120,5 +121,57 @@ public void WriteToReadOnlyFile() | |
File.SetAttributes(path, FileAttributes.Normal); | ||
} | ||
} | ||
|
||
[Fact] | ||
public void EmptyFile_ReturnsEmptyArray() | ||
{ | ||
string path = GetTestFilePath(); | ||
File.Create(path).Dispose(); | ||
Assert.Equal(0, File.ReadAllBytes(path).Length); | ||
} | ||
|
||
[Theory] | ||
[PlatformSpecific(TestPlatforms.Linux)] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similar test for OSX maybe? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you know of any file paths that present this behavior on macOS? |
||
[InlineData("/proc/cmdline")] | ||
[InlineData("/proc/version")] | ||
[InlineData("/proc/filesystems")] | ||
public void ProcFs_EqualsReadAllText(string path) | ||
{ | ||
byte[] bytes = null; | ||
string text = null; | ||
|
||
const int NumTries = 3; // some of these could theoretically change between reads, so allow retries just in case | ||
for (int i = 1; i <= NumTries; i++) | ||
{ | ||
try | ||
{ | ||
bytes = File.ReadAllBytes(path); | ||
text = File.ReadAllText(path); | ||
Assert.Equal(text, Encoding.UTF8.GetString(bytes)); | ||
} | ||
catch when (i < NumTries) { } | ||
} | ||
} | ||
|
||
[Theory] | ||
[PlatformSpecific(TestPlatforms.Linux)] | ||
public void ReadAllBytes_ProcFs_Uptime_ContainsTwoNumbers() | ||
{ | ||
string text = Encoding.UTF8.GetString(File.ReadAllBytes("/proc/uptime")); | ||
string[] parts = text.Split(new [] { ' ' }, StringSplitOptions.RemoveEmptyEntries); | ||
Assert.Equal(2, parts.Length); | ||
Assert.True(double.TryParse(parts[0].Trim(), out _)); | ||
Assert.True(double.TryParse(parts[1].Trim(), out _)); | ||
} | ||
|
||
[Theory] | ||
[PlatformSpecific(TestPlatforms.Linux)] | ||
[InlineData("/proc/meminfo")] | ||
[InlineData("/proc/stat")] | ||
[InlineData("/proc/cpuinfo")] | ||
public void ProcFs_NotEmpty(string path) | ||
{ | ||
Assert.InRange(File.ReadAllBytes(path).Length, 1, int.MaxValue); | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe I missed something but it doesn't seem to
throw new IOException(SR.IO_FileTooLong2GB)
in case ofnewLength > int.MaxValue
like it is done inReadAllBytes
for the case where length is known. And it seems likeDefaultArrayPool.Rent
doesn't throw eitherThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this stops when Rent throws out-of-memory which will happen when you ask it to allocate more than MaxByteArrayLength (or when there is no more memory).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can't see such limit in
DefaultArrayPool
:corefx/src/System.Buffers/src/System/Buffers/DefaultArrayPool.cs
Line 108 in bffef76
So it will throw only on some runtime limit or out of memory which seem to be inconsistent with known length case
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
MaxByteArrayLength
is the max array length supported by .NET.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see. Thanks! That's what I actually didn't realize