Skip to content

Commit

Permalink
Preallocate list capacity when serializing (to prevent later resizing)
Browse files Browse the repository at this point in the history
  • Loading branch information
Arithmomaniac authored and aloneguid committed Jan 5, 2024
1 parent 50ccd64 commit 643307d
Showing 1 changed file with 21 additions and 6 deletions.
27 changes: 21 additions & 6 deletions src/Parquet/Serialization/ParquetSerializer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -177,8 +177,9 @@ public static async Task<IList<T>> DeserializeAsync<T>(Stream source,

Assembler<T> asm = GetAssembler<T>();

var result = new List<T>();
using ParquetReader reader = await ParquetReader.CreateAsync(source, options, cancellationToken: cancellationToken);

List<T> result = GetList<T>(reader.Metadata?.RowGroups[rowGroupIndex].NumRows);
await DeserializeRowGroupAsync(reader, rowGroupIndex, asm, result, cancellationToken);

return result;
Expand All @@ -200,9 +201,11 @@ public static async Task<IList<T>> DeserializeAsync<T>(Stream source,

Assembler<T> asm = GetAssembler<T>();

var result = new List<T>();

using ParquetReader reader = await ParquetReader.CreateAsync(source, options, cancellationToken: cancellationToken);

long? requestedCapacity = reader.Metadata?.RowGroups.Sum(x => x.NumRows);
List<T> result = GetList<T>(requestedCapacity);

for(int rgi = 0; rgi < reader.RowGroupCount; rgi++) {

await DeserializeRowGroupAsync(reader, rgi, asm, result, cancellationToken);
Expand Down Expand Up @@ -253,9 +256,11 @@ public static async IAsyncEnumerable<T> DeserializeAllAsync<T>(Stream source,

Assembler<T> asm = GetAssembler<T>();

var result = new List<T>();

using ParquetReader reader = await ParquetReader.CreateAsync(source, options, cancellationToken: cancellationToken);

long? requestedCapacity = reader.Metadata?.RowGroups.Max(x => x.NumRows);
List<T> result = GetList<T>(requestedCapacity);

for(int rgi = 0; rgi < reader.RowGroupCount; rgi++) {

await DeserializeRowGroupAsync(reader, rgi, asm, result, cancellationToken);
Expand Down Expand Up @@ -284,7 +289,7 @@ public static async Task<IList<T>> DeserializeAsync<T>(ParquetRowGroupReader row

Assembler<T> asm = GetAssembler<T>();

var result = new List<T>();
List<T> result = GetList<T>(rowGroupReader.RowGroup.NumRows);

await DeserializeRowGroupAsync(rowGroupReader, schema, asm, result, cancellationToken);

Expand Down Expand Up @@ -354,5 +359,15 @@ private static async Task DeserializeRowGroupAsync<T>(ParquetRowGroupReader rg,
}
}
}

private static List<T> GetList<T>(long? requestedCapacity) {
if(requestedCapacity == null)
return new List<T>();

if(requestedCapacity >= int.MaxValue)
return new List<T>(int.MaxValue);

return new List<T>((int)requestedCapacity);
}
}
}

0 comments on commit 643307d

Please sign in to comment.