Skip to content

Commit

Permalink
Reduce memory and CPU costs due to SegmentedList usage (#75661)
Browse files Browse the repository at this point in the history
* Reduce memory and CPU costs due to SegmentedList usage

Currently, the SegmentedList class suffers from two inefficiencies:

1) Upon growth, it doubles the SegmentedArray size. This is necessary for normal List like collections to get constant time amortized growth, but isn't necessary (or desirable) for segmented data structures.
2) Upon growth, it reallocates and copies over the existing pages.

Instead, if we only allocate the modified/new pages and the array holding the pages, we can save significant CPU and allocation costs.
  • Loading branch information
ToddGrun authored Nov 1, 2024
1 parent 9bb57bf commit 6217b5c
Show file tree
Hide file tree
Showing 5 changed files with 275 additions and 14 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.Collections.Generic;
using Microsoft.CodeAnalysis.Collections;
using Xunit;

namespace Microsoft.CodeAnalysis.UnitTests.Collections
{
/// <summary>
/// Contains tests that ensure the correctness of the List class.
/// </summary>
public abstract partial class SegmentedList_Generic_Tests<T> : IList_Generic_Tests<T>
where T : notnull
{
public static IEnumerable<object[]> TestLengthsAndSegmentCounts
{
get
{
for (var segmentsToAdd = 1; segmentsToAdd < 4; segmentsToAdd++)
{
yield return new object[] { 1, segmentsToAdd };
yield return new object[] { 10, segmentsToAdd };
yield return new object[] { 100, segmentsToAdd };
yield return new object[] { SegmentedArray<object>.TestAccessor.SegmentSize / 2, segmentsToAdd };
yield return new object[] { SegmentedArray<object>.TestAccessor.SegmentSize, segmentsToAdd };
yield return new object[] { SegmentedArray<object>.TestAccessor.SegmentSize * 2, segmentsToAdd };
yield return new object[] { 100000, segmentsToAdd };
}
}
}

[Theory]
[MemberData(nameof(ValidCollectionSizes))]
public void Capacity_ArgumentValidity(int initialCapacity)
{
var list = new SegmentedList<T>(initialCapacity);

for (var i = 0; i < initialCapacity; i++)
list.Add(CreateT(i));

Assert.Throws<ArgumentOutOfRangeException>(() => list.Capacity = initialCapacity - 1);
}

[Theory]
[InlineData(0, 1)]
[InlineData(0, 10)]
[InlineData(4, 6)]
[InlineData(4, 10)]
[InlineData(4, 100_000)]
public void Capacity_MatchesSizeRequested(int initialCapacity, int requestedCapacity)
{
var list = new SegmentedList<T>(initialCapacity);

list.Capacity = requestedCapacity;

Assert.Equal(requestedCapacity, list.Capacity);
}

[Theory]
[MemberData(nameof(TestLengthsAndSegmentCounts))]
public void Capacity_ReusesSegments(int initialCapacity, int segmentCountToAdd)
{
var elementCountToAdd = segmentCountToAdd * SegmentedArray<object>.TestAccessor.SegmentSize;

var segmented = new SegmentedList<object>(initialCapacity);

var oldSegments = SegmentedCollectionsMarshal.AsSegments(segmented.GetTestAccessor().Items);
var oldSegmentCount = oldSegments.Length;

segmented.Capacity = initialCapacity + elementCountToAdd;

var resizedSegments = SegmentedCollectionsMarshal.AsSegments(segmented.GetTestAccessor().Items);
var resizedSegmentCount = resizedSegments.Length;

Assert.Equal(oldSegmentCount + segmentCountToAdd, resizedSegmentCount);

for (var i = 0; i < oldSegmentCount - 1; i++)
Assert.Same(resizedSegments[i], oldSegments[i]);

for (var i = oldSegmentCount - 1; i < resizedSegmentCount - 1; i++)
Assert.Equal(resizedSegments[i].Length, SegmentedArray<object>.TestAccessor.SegmentSize);

Assert.NotSame(resizedSegments[resizedSegmentCount - 1], oldSegments[oldSegmentCount - 1]);
Assert.Equal(resizedSegments[resizedSegmentCount - 1].Length, oldSegments[oldSegmentCount - 1].Length);
}

[Theory]
[CombinatorialData]
public void Capacity_InOnlySingleSegment(
[CombinatorialValues(1, 2, 10, 100)] int initialCapacity,
[CombinatorialValues(1, 2, 10, 100)] int addItemCount)
{
var segmented = new SegmentedList<object>(initialCapacity);

var oldSegments = SegmentedCollectionsMarshal.AsSegments(segmented.GetTestAccessor().Items);

segmented.Capacity = initialCapacity + addItemCount;

var resizedSegments = SegmentedCollectionsMarshal.AsSegments(segmented.GetTestAccessor().Items);

Assert.Equal(1, oldSegments.Length);
Assert.Equal(1, resizedSegments.Length);
Assert.Same(resizedSegments[0], oldSegments[0]);
Assert.Equal(segmented.Capacity, resizedSegments[0].Length);
}

[Theory]
[InlineData(0, 1, 4)]
[InlineData(0, 10, 10)]
[InlineData(4, 6, 8)]
[InlineData(4, 10, 10)]
public void EnsureCapacity_ResizesAppropriately(int initialCapacity, int requestedCapacity, int expectedCapacity)
{
var list = new SegmentedList<T>(initialCapacity);

list.EnsureCapacity(requestedCapacity);

Assert.Equal(expectedCapacity, list.Capacity);
}

[Theory]
[InlineData(1)]
[InlineData(2)]
[InlineData(4)]
public void EnsureCapacity_MatchesSizeWithLargeCapacityRequest(int segmentCount)
{
var elementCount = segmentCount * SegmentedArray<T>.TestAccessor.SegmentSize;
var list = new SegmentedList<T>(elementCount);

Assert.Equal(elementCount, list.Capacity);

var requestedCapacity = 2 * elementCount + 10;
list.EnsureCapacity(requestedCapacity);
Assert.Equal(requestedCapacity, list.Capacity);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,8 @@ internal static class PrivateMarshal
/// <inheritdoc cref="SegmentedCollectionsMarshal.AsSegments{T}(SegmentedArray{T})"/>
public static T[][] AsSegments(SegmentedArray<T> array)
=> array._items;

public static SegmentedArray<T> AsSegmentedArray(int length, T[][] segments)
=> new SegmentedArray<T>(length, segments);
}
}
19 changes: 19 additions & 0 deletions src/Dependencies/Collections/SegmentedCollectionsMarshal.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,25 @@ internal static class SegmentedCollectionsMarshal
public static T[][] AsSegments<T>(SegmentedArray<T> array)
=> SegmentedArray<T>.PrivateMarshal.AsSegments(array);

/// <summary>
/// Gets a <see cref="SegmentedArray{T}"/> value wrapping the input T[][].
/// </summary>
/// <typeparam name="T">The type of elements in the input.</typeparam>
/// <param name="length">The combined length of the input arrays</param>
/// <param name="segments">The input array to wrap in the returned <see cref="SegmentedArray{T}"/> value.</param>
/// <returns>A <see cref="SegmentedArray{T}"/> value wrapping <paramref name="segments"/>.</returns>
/// <remarks>
/// <para>
/// When using this method, callers should take extra care to ensure that they're the sole owners of the input
/// array, and that it won't be modified once the returned <see cref="SegmentedArray{T}"/> value starts
/// being used. Doing so might cause undefined behavior in code paths which don't expect the contents of a given
/// <see cref="SegmentedArray{T}"/> values to change outside their control.
/// </para>
/// </remarks>
/// <exception cref="System.ArgumentNullException">Thrown when <paramref name="segments"/> is <see langword="null"/></exception>
public static SegmentedArray<T> AsSegmentedArray<T>(int length, T[][] segments)
=> SegmentedArray<T>.PrivateMarshal.AsSegmentedArray(length, segments);

/// <summary>
/// Gets either a ref to a <typeparamref name="TValue"/> in the <see cref="SegmentedDictionary{TKey, TValue}"/> or a
/// ref null if it does not exist in the <paramref name="dictionary"/>.
Expand Down
48 changes: 34 additions & 14 deletions src/Dependencies/Collections/SegmentedList`1.cs
Original file line number Diff line number Diff line change
Expand Up @@ -131,21 +131,41 @@ public int Capacity
ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.value, ExceptionResource.ArgumentOutOfRange_SmallCapacity);
}

if (value != _items.Length)
if (value == _items.Length)
return;

if (value <= 0)
{
if (value > 0)
{
var newItems = new SegmentedArray<T>(value);
if (_size > 0)
{
SegmentedArray.Copy(_items, newItems, _size);
}
_items = newItems;
}
else
{
_items = s_emptyArray;
}
_items = s_emptyArray;
return;
}

if (_items.Length == 0)
{
// No data from existing array to reuse, just create a new one.
_items = new SegmentedArray<T>(value);
}
else
{
// Rather than creating a copy of _items, instead reuse as much of it's data as possible.
var segments = SegmentedCollectionsMarshal.AsSegments(_items);

var oldSegmentCount = segments.Length;
var newSegmentCount = (value + SegmentedArrayHelper.GetSegmentSize<T>() - 1) >> SegmentedArrayHelper.GetSegmentShift<T>();

// Grow the array of segments, if necessary
Array.Resize(ref segments, newSegmentCount);

// Resize all segments to full segment size from the last old segment to the next to last
// new segment.
for (var i = oldSegmentCount - 1; i < newSegmentCount - 1; i++)
Array.Resize(ref segments[i], SegmentedArrayHelper.GetSegmentSize<T>());

// Resize the last segment
var lastSegmentSize = value - ((newSegmentCount - 1) << SegmentedArrayHelper.GetSegmentShift<T>());
Array.Resize(ref segments[newSegmentCount - 1], lastSegmentSize);

_items = SegmentedCollectionsMarshal.AsSegmentedArray(value, segments);
}
}
}
Expand Down
79 changes: 79 additions & 0 deletions src/Tools/IdeCoreBenchmarks/SegmentedListBenchmarks_Add.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using BenchmarkDotNet.Attributes;
using Microsoft.CodeAnalysis.Collections;

namespace IdeCoreBenchmarks
{
[MemoryDiagnoser]
public class SegmentedListBenchmarks_Add
{
[Params(1_000, 10_000, 100_000, 1_000_000)]
public int Count { get; set; }

[Benchmark]
public void AddIntToList()
=> AddToList(1);

[Benchmark]
public void AddObjectToList()
=> AddToList(new object());

[Benchmark]
public void AddLargeStructToList()
=> AddToList(new LargeStruct());

[Benchmark]
public void AddEnormousStructToList()
=> AddToList(new EnormousStruct());

private void AddToList<T>(T item)
{
var array = new SegmentedList<T>();
var iterations = Count;

for (var i = 0; i < iterations; i++)
array.Add(item);
}

private struct LargeStruct
{
public int i1 { get; set; }
public int i2 { get; set; }
public int i3 { get; set; }
public int i4 { get; set; }
public int i5 { get; set; }
public int i6 { get; set; }
public int i7 { get; set; }
public int i8 { get; set; }
public int i9 { get; set; }
public int i10 { get; set; }
public int i11 { get; set; }
public int i12 { get; set; }
public int i13 { get; set; }
public int i14 { get; set; }
public int i15 { get; set; }
public int i16 { get; set; }
public int i17 { get; set; }
public int i18 { get; set; }
public int i19 { get; set; }
public int i20 { get; set; }
}

private struct EnormousStruct
{
public LargeStruct s1 { get; set; }
public LargeStruct s2 { get; set; }
public LargeStruct s3 { get; set; }
public LargeStruct s4 { get; set; }
public LargeStruct s5 { get; set; }
public LargeStruct s6 { get; set; }
public LargeStruct s7 { get; set; }
public LargeStruct s8 { get; set; }
public LargeStruct s9 { get; set; }
public LargeStruct s10 { get; set; }
}
}
}

0 comments on commit 6217b5c

Please sign in to comment.