From ad2271f6fb2fe7ff6028f87e34d9d5fad6d80f36 Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Thu, 11 Oct 2018 18:42:44 -0500 Subject: [PATCH] Implement VBuffer master plan WIP #1 --- src/Microsoft.ML.Core/Data/MetadataUtils.cs | 20 +- src/Microsoft.ML.Core/Data/VBuffer.cs | 219 +++++--- src/Microsoft.ML.Core/Utilities/MathUtils.cs | 54 +- src/Microsoft.ML.Core/Utilities/Utils.cs | 64 ++- .../Utilities/VBufferUtils.cs | 531 ++++++++++-------- src/Microsoft.ML.Data/Data/BufferBuilder.cs | 76 +-- .../DataLoadSave/Binary/Codecs.cs | 20 +- .../DataLoadSave/Text/TextLoaderParser.cs | 19 +- .../DataLoadSave/Text/TextSaver.cs | 18 +- .../DataView/CompositeSchema.cs | 2 +- .../Depricated/Vector/VBufferMathUtils.cs | 166 +++--- .../Depricated/Vector/VectorUtils.cs | 145 +++-- .../Transforms/DropSlotsTransform.cs | 3 +- .../Utilities/SlotDropper.cs | 38 +- .../TreeEnsemble/RegressionTree.cs | 19 +- .../KMeansPlusPlusTrainer.cs | 37 +- .../KMeansPredictor.cs | 2 +- .../Optimizer/DifferentiableFunction.cs | 4 +- .../Optimizer/OptimizationMonitor.cs | 2 +- .../Optimizer/Optimizer.cs | 2 +- .../Optimizer/SgdOptimizer.cs | 2 +- .../Standard/LinearClassificationTrainer.cs | 11 +- .../LogisticRegression/LbfgsPredictorBase.cs | 10 +- .../MulticlassLogisticRegression.cs | 23 +- .../Standard/SdcaMultiClass.cs | 10 +- 25 files changed, 816 insertions(+), 681 deletions(-) diff --git a/src/Microsoft.ML.Core/Data/MetadataUtils.cs b/src/Microsoft.ML.Core/Data/MetadataUtils.cs index 7ed3aecd9ee..e68b895a8f6 100644 --- a/src/Microsoft.ML.Core/Data/MetadataUtils.cs +++ b/src/Microsoft.ML.Core/Data/MetadataUtils.cs @@ -318,7 +318,10 @@ public static void GetSlotNames(RoleMappedSchema schema, RoleMappedSchema.Column IReadOnlyList list; if ((list = schema?.GetColumns(role)) == null || list.Count != 1 || !schema.Schema.HasSlotNames(list[0].Index, vectorSize)) - slotNames = new VBuffer>(vectorSize, 0, slotNames.Values, slotNames.Indices); + { + VBufferMutationContext.Create(ref slotNames, vectorSize, 0) + .Complete(ref slotNames); + } else schema.Schema.GetMetadata(Kinds.SlotNames, list[0].Index, ref slotNames); } @@ -447,21 +450,22 @@ public static bool TryGetCategoricalFeatureIndices(Schema schema, int colIndex, { int previousEndIndex = -1; isValid = true; - for (int i = 0; i < catIndices.Values.Length; i += 2) + var catIndicesValues = catIndices.GetValues(); + for (int i = 0; i < catIndicesValues.Length; i += 2) { - if (catIndices.Values[i] > catIndices.Values[i + 1] || - catIndices.Values[i] <= previousEndIndex || - catIndices.Values[i] >= columnSlotsCount || - catIndices.Values[i + 1] >= columnSlotsCount) + if (catIndicesValues[i] > catIndicesValues[i + 1] || + catIndicesValues[i] <= previousEndIndex || + catIndicesValues[i] >= columnSlotsCount || + catIndicesValues[i + 1] >= columnSlotsCount) { isValid = false; break; } - previousEndIndex = catIndices.Values[i + 1]; + previousEndIndex = catIndicesValues[i + 1]; } if (isValid) - categoricalFeatures = catIndices.Values.Select(val => val).ToArray(); + categoricalFeatures = catIndicesValues.ToArray(); } } diff --git a/src/Microsoft.ML.Core/Data/VBuffer.cs b/src/Microsoft.ML.Core/Data/VBuffer.cs index b867e8542e8..efcf3f88517 100644 --- a/src/Microsoft.ML.Core/Data/VBuffer.cs +++ b/src/Microsoft.ML.Core/Data/VBuffer.cs @@ -16,6 +16,9 @@ namespace Microsoft.ML.Runtime.Data /// public readonly struct VBuffer { + private readonly T[] _values; + private readonly int[] _indices; + /// /// The logical length of the buffer. /// @@ -27,17 +30,6 @@ public readonly struct VBuffer /// public readonly int Count; - /// - /// The values. Only the first Count of these are valid. - /// - public readonly T[] Values; - - /// - /// The indices. For a dense representation, this array is not used. For a sparse representation - /// it is parallel to values and specifies the logical indices for the corresponding values. - /// - public readonly int[] Indices; - /// /// The explicitly represented values. /// @@ -50,7 +42,8 @@ public readonly struct VBuffer public ReadOnlySpan GetIndices() => IsDense ? default : Indices.AsSpan(0, Count); /// - /// Equivalent to Count == Length. + /// Gets a value indicating whether every logical element is explicitly + /// represented in the buffer. /// public bool IsDense { @@ -72,8 +65,8 @@ public VBuffer(int length, T[] values, int[] indices = null) Length = length; Count = length; - Values = values; - Indices = indices; + _values = values; + _indices = indices; } /// @@ -104,8 +97,8 @@ public VBuffer(int length, int count, T[] values, int[] indices) Length = length; Count = count; - Values = values; - Indices = indices; + _values = values; + _indices = indices; } /// @@ -113,15 +106,13 @@ public VBuffer(int length, int count, T[] values, int[] indices) /// public void CopyToDense(ref VBuffer dst) { - var values = dst.Values; - if (Utils.Size(values) < Length) - values = new T[Length]; + var mutation = VBufferMutationContext.Create(ref dst, Length, Count); if (!IsDense) - CopyTo(values); + CopyTo(mutation.Values); else if (Length > 0) - Array.Copy(Values, values, Length); - dst = new VBuffer(Length, values, dst.Indices); + _values.AsSpan(0, Length).CopyTo(mutation.Values); + mutation.Complete(ref dst); } /// @@ -129,31 +120,24 @@ public void CopyToDense(ref VBuffer dst) /// public void CopyTo(ref VBuffer dst) { - var values = dst.Values; - var indices = dst.Indices; + var mutation = VBufferMutationContext.Create(ref dst, Length, Count); if (IsDense) { if (Length > 0) { - if (Utils.Size(values) < Length) - values = new T[Length]; - Array.Copy(Values, values, Length); + _values.AsSpan(0, Length).CopyTo(mutation.Values); } - dst = new VBuffer(Length, values, indices); + mutation.Complete(ref dst); Contracts.Assert(dst.IsDense); } else { if (Count > 0) { - if (Utils.Size(values) < Count) - values = new T[Count]; - if (Utils.Size(indices) < Count) - indices = new int[Count]; - Array.Copy(Values, values, Count); - Array.Copy(Indices, indices, Count); + _values.AsSpan(0, Count).CopyTo(mutation.Values); + _indices.AsSpan(0, Count).CopyTo(mutation.Indices); } - dst = new VBuffer(Length, Count, values, indices); + mutation.Complete(ref dst); } } @@ -164,17 +148,15 @@ public void CopyTo(ref VBuffer dst, int srcMin, int length) { Contracts.Check(0 <= srcMin && srcMin <= Length, "srcMin"); Contracts.Check(0 <= length && srcMin <= Length - length, "length"); - var values = dst.Values; - var indices = dst.Indices; + if (IsDense) { + var mutation = VBufferMutationContext.Create(ref dst, length, length); if (length > 0) { - if (Utils.Size(values) < length) - values = new T[length]; - Array.Copy(Values, srcMin, values, 0, length); + _values.AsSpan(srcMin, length).CopyTo(mutation.Values); } - dst = new VBuffer(length, values, indices); + mutation.Complete(ref dst); Contracts.Assert(dst.IsDense); } else @@ -182,29 +164,31 @@ public void CopyTo(ref VBuffer dst, int srcMin, int length) int copyCount = 0; if (Count > 0) { - int copyMin = Indices.FindIndexSorted(0, Count, srcMin); - int copyLim = Indices.FindIndexSorted(copyMin, Count, srcMin + length); + int copyMin = _indices.FindIndexSorted(0, Count, srcMin); + int copyLim = _indices.FindIndexSorted(copyMin, Count, srcMin + length); Contracts.Assert(copyMin <= copyLim); copyCount = copyLim - copyMin; + var mutation = VBufferMutationContext.Create(ref dst, length, copyCount); if (copyCount > 0) { - if (Utils.Size(values) < copyCount) - values = new T[copyCount]; - Array.Copy(Values, copyMin, values, 0, copyCount); + _values.AsSpan(copyMin, copyCount).CopyTo(mutation.Values); if (copyCount < length) { - if (Utils.Size(indices) < copyCount) - indices = new int[copyCount]; for (int i = 0; i < copyCount; ++i) - indices[i] = Indices[i + copyMin] - srcMin; + mutation.Indices[i] = _indices[i + copyMin] - srcMin; } } + mutation.Complete(ref dst); + } + else + { + var mutation = VBufferMutationContext.Create(ref dst, length, copyCount); + mutation.Complete(ref dst); } - dst = new VBuffer(length, copyCount, values, indices); } } - /// +/* /// /// Copy from this buffer to the given destination, making sure to explicitly include the /// first count indices in indicesInclude. Note that indicesInclude should be sorted /// with each index less than this.Length. Note that this can make the destination be @@ -376,43 +360,43 @@ public void CopyTo(ref VBuffer dst, int[] indicesInclude, int count) Contracts.Assert(size == ii || size == 0); dst = new VBuffer(Length, ii, values, indices); - } + }*/ /// /// Copy from this buffer to the given destination array. This "densifies". /// - public void CopyTo(T[] dst) + public void CopyTo(Span dst) { CopyTo(dst, 0); } - public void CopyTo(T[] dst, int ivDst, T defaultValue = default(T)) + public void CopyTo(Span dst, int ivDst, T defaultValue = default(T)) { - Contracts.CheckParam(0 <= ivDst && ivDst <= Utils.Size(dst) - Length, nameof(dst), "dst is not large enough"); + Contracts.CheckParam(0 <= ivDst && ivDst <= dst.Length - Length, nameof(dst), "dst is not large enough"); if (Length == 0) return; if (IsDense) { - Array.Copy(Values, 0, dst, ivDst, Length); + _values.AsSpan(0, Length).CopyTo(dst.Slice(ivDst)); return; } if (Count == 0) { - Array.Clear(dst, ivDst, Length); + dst.Slice(ivDst, Length).Clear(); return; } int iv = 0; for (int islot = 0; islot < Count; islot++) { - int slot = Indices[islot]; + int slot = _indices[islot]; Contracts.Assert(slot >= iv); while (iv < slot) dst[ivDst + iv++] = defaultValue; Contracts.Assert(iv == slot); - dst[ivDst + iv++] = Values[islot]; + dst[ivDst + iv++] = _values[islot]; } while (iv < Length) dst[ivDst + iv++] = defaultValue; @@ -425,24 +409,22 @@ public static void Copy(T[] src, int srcIndex, ref VBuffer dst, int length) { Contracts.CheckParam(0 <= length && length <= Utils.Size(src), nameof(length)); Contracts.CheckParam(0 <= srcIndex && srcIndex <= Utils.Size(src) - length, nameof(srcIndex)); - var values = dst.Values; + var mutation = VBufferMutationContext.Create(ref dst, length, length); if (length > 0) { - if (Utils.Size(values) < length) - values = new T[length]; - Array.Copy(src, srcIndex, values, 0, length); + src.AsSpan(srcIndex, length).CopyTo(mutation.Values); } - dst = new VBuffer(length, values, dst.Indices); + mutation.Complete(ref dst); } public IEnumerable> Items(bool all = false) { - return VBufferUtils.Items(Values, Indices, Length, Count, all); + return VBufferUtils.Items(_values, _indices, Length, Count, all); } public IEnumerable DenseValues() { - return VBufferUtils.DenseValues(Values, Indices, Length, Count); + return VBufferUtils.DenseValues(_values, _indices, Length, Count); } public void GetItemOrDefault(int slot, ref T dst) @@ -451,9 +433,9 @@ public void GetItemOrDefault(int slot, ref T dst) int index; if (IsDense) - dst = Values[slot]; - else if (Count > 0 && Indices.TryFindIndexSorted(0, Count, slot, out index)) - dst = Values[index]; + dst = _values[slot]; + else if (Count > 0 && _indices.TryFindIndexSorted(0, Count, slot, out index)) + dst = _values[index]; else dst = default(T); } @@ -464,10 +446,103 @@ public T GetItemOrDefault(int slot) int index; if (IsDense) - return Values[slot]; - if (Count > 0 && Indices.TryFindIndexSorted(0, Count, slot, out index)) - return Values[index]; + return _values[slot]; + if (Count > 0 && _indices.TryFindIndexSorted(0, Count, slot, out index)) + return _values[index]; return default(T); } + + internal VBufferMutationContext GetMutableContext( + int newLogicalLength, + int? valuesCount, + int? maxValuesCapacity, + bool keepOldOnResize, + out bool createdNewValues, + out bool createdNewIndices) + { + Contracts.CheckParam(newLogicalLength >= 0, nameof(newLogicalLength)); + Contracts.CheckParam(valuesCount == null || valuesCount.Value <= newLogicalLength, nameof(valuesCount)); + + valuesCount = valuesCount ?? newLogicalLength; + int maxCapacity = maxValuesCapacity ?? newLogicalLength; + + T[] values = _values; + Utils.EnsureSize(ref values, valuesCount.Value, maxCapacity, keepOldOnResize, out createdNewValues); + + int[] indices = _indices; + bool isDense = newLogicalLength == valuesCount.Value; + if (isDense) + { + createdNewIndices = false; + } + else + { + Utils.EnsureSize(ref indices, valuesCount.Value, maxCapacity, keepOldOnResize, out createdNewIndices); + } + + return new VBufferMutationContext(newLogicalLength, valuesCount.Value, values, indices); + } + } + + public static class VBufferMutationContext + { + public static VBufferMutationContext Create( + ref VBuffer destination, + int newLogicalLength, + int? valuesCount = null, + int? maxValuesCapacity = null, + bool keepOldOnResize = false) + { + return destination.GetMutableContext( + newLogicalLength, + valuesCount, + maxValuesCapacity, + keepOldOnResize, + out bool _, + out bool _); + } + + public static VBufferMutationContext Create( + ref VBuffer destination, + int newLogicalLength, + out bool createdNewValues, + out bool createdNewIndices, + int? valuesCount = null, + int? maxValuesCapacity = null, + bool keepOldOnResize = false) + { + return destination.GetMutableContext( + newLogicalLength, + valuesCount, + maxValuesCapacity, + keepOldOnResize, + out createdNewValues, + out createdNewIndices); + } + } + + public ref struct VBufferMutationContext + { + private readonly int _logicalLength; + private readonly T[] _values; + private readonly int[] _indices; + + public readonly Span Values; + public readonly Span Indices; + + internal VBufferMutationContext(int logicalLength, int physicalValuesCount, T[] values, int[] indices) + { + _logicalLength = logicalLength; + _values = values; + _indices = indices; + + Values = _values.AsSpan(0, physicalValuesCount); + Indices = _indices.AsSpan(0, physicalValuesCount); + } + + public void Complete(ref VBuffer destintation) + { + destintation = new VBuffer(_logicalLength, Values.Length, _values, _indices); + } } } \ No newline at end of file diff --git a/src/Microsoft.ML.Core/Utilities/MathUtils.cs b/src/Microsoft.ML.Core/Utilities/MathUtils.cs index fb68ee82d6c..6a3da193ff8 100644 --- a/src/Microsoft.ML.Core/Utilities/MathUtils.cs +++ b/src/Microsoft.ML.Core/Utilities/MathUtils.cs @@ -132,40 +132,23 @@ public static Float Min(Float[] a) } /// - /// Finds the first index of the max element of the array. + /// Finds the first index of the max element of the span. /// NaNs are ignored. If all the elements to consider are NaNs, -1 is /// returned. The caller should distinguish in this case between two /// possibilities: /// 1) The number of the element to consider is zero. /// 2) All the elements to consider are NaNs. /// - /// an array - /// the first index of the max element - public static int ArgMax(Float[] a) - { - return ArgMax(a, Utils.Size(a)); - } - - /// - /// Finds the first index of the max element of the array. - /// NaNs are ignored. If all the elements to consider are NaNs, -1 is - /// returned. The caller should distinguish in this case between two - /// possibilities: - /// 1) The number of the element to consider is zero. - /// 2) All the elements to consider are NaNs. - /// - /// an array - /// number of the element in the array to consider + /// The span of floats. /// the first index of the max element - public static int ArgMax(Float[] a, int count) + public static int ArgMax(ReadOnlySpan a) { - Contracts.Assert(0 <= count && count <= Utils.Size(a)); - if (count == 0) + if (a.IsEmpty) return -1; int amax = -1; Float max = Float.NegativeInfinity; - for (int i = count - 1; i >= 0; i--) + for (int i = a.Length - 1; i >= 0; i--) { if (max <= a[i]) { @@ -178,40 +161,23 @@ public static int ArgMax(Float[] a, int count) } /// - /// Finds the first index of the minimum element of the array. + /// Finds the first index of the minimum element of the span. /// NaNs are ignored. If all the elements to consider are NaNs, -1 is /// returned. The caller should distinguish in this case between two /// possibilities: /// 1) The number of the element to consider is zero. /// 2) All the elements to consider are NaNs. /// - /// an array - /// the first index of the minimum element - public static int ArgMin(Float[] a) - { - return ArgMin(a, Utils.Size(a)); - } - - /// - /// Finds the first index of the minimum element of the array. - /// NaNs are ignored. If all the elements to consider are NaNs, -1 is - /// returned. The caller should distinguish in this case between two - /// possibilities: - /// 1) The number of the element to consider is zero. - /// 2) All the elements to consider are NaNs. - /// - /// an array - /// number of the element in the array to consider + /// The span of floats. /// the first index of the minimum element - public static int ArgMin(Float[] a, int count) + public static int ArgMin(ReadOnlySpan a) { - Contracts.Assert(0 <= count && count <= Utils.Size(a)); - if (count == 0) + if (a.IsEmpty) return -1; int amin = -1; Float min = Float.PositiveInfinity; - for (int i = count - 1; i >= 0; i--) + for (int i = a.Length - 1; i >= 0; i--) { if (min >= a[i]) { diff --git a/src/Microsoft.ML.Core/Utilities/Utils.cs b/src/Microsoft.ML.Core/Utilities/Utils.cs index 9a6ecb9b0b0..d666e6b486f 100644 --- a/src/Microsoft.ML.Core/Utilities/Utils.cs +++ b/src/Microsoft.ML.Core/Utilities/Utils.cs @@ -180,18 +180,6 @@ public static void Push(ref Stack stack, T item) stack.Push(item); } - /// - /// Assumes input is sorted and finds value using BinarySearch. - /// If value is not found, returns the logical index of 'value' in the sorted list i.e index of the first element greater than value. - /// In case of duplicates it returns the index of the first one. - /// It guarantees that items before the returned index are < value, while those at and after the returned index are >= value. - /// - public static int FindIndexSorted(this int[] input, int value) - { - Contracts.AssertValue(input); - return FindIndexSorted(input, 0, input.Length, value); - } - /// /// Assumes input is sorted and finds value using BinarySearch. /// If value is not found, returns the logical index of 'value' in the sorted list i.e index of the first element greater than value. @@ -239,6 +227,17 @@ public static bool TryFindIndexSorted(this int[] input, int min, int lim, int va return index < lim && input[index] == value; } + /// + /// Akin to FindIndexSorted, except stores the found index in the output + /// index parameter, and returns whether that index is a valid index + /// pointing to a value equal to the input parameter value. + /// + public static bool TryFindIndexSorted(ReadOnlySpan input, int min, int lim, int value, out int index) + { + index = FindIndexSorted(input, min, lim, value); + return index < lim && input[index] == value; + } + /// /// Assumes input is sorted and finds value using BinarySearch. /// If value is not found, returns the logical index of 'value' in the sorted list i.e index of the first element greater than value. @@ -465,9 +464,8 @@ public static int[] GetIdentityPermutation(int size) return res; } - public static void FillIdentity(int[] a, int lim) + public static void FillIdentity(Span a, int lim) { - Contracts.AssertValue(a); Contracts.Assert(0 <= lim & lim <= a.Length); for (int i = 0; i < lim; ++i) @@ -856,12 +854,19 @@ public static int EnsureSize(ref T[] array, int min, bool keepOld = true) /// /// The new size, that is no less than and no more that . public static int EnsureSize(ref T[] array, int min, int max, bool keepOld = true) + => EnsureSize(ref array, min, max, keepOld, out bool _); + + public static int EnsureSize(ref T[] array, int min, int max, bool keepOld, out bool resized) { Contracts.CheckParam(min <= max, nameof(max), "min must not exceed max"); // This code adapted from the private method EnsureCapacity code of List. int size = Utils.Size(array); if (size >= min) + { + resized = false; return size; + } + int newSize = size == 0 ? 4 : size * 2; // This constant taken from the internal code of system\array.cs of mscorlib. if ((uint)newSize > max) @@ -872,6 +877,8 @@ public static int EnsureSize(ref T[] array, int min, int max, bool keepOld = Array.Resize(ref array, newSize); else array = new T[newSize]; + + resized = true; return newSize; } @@ -1097,5 +1104,34 @@ public static string GetDescription(this Enum value) } return null; } + + public static int Count(this ReadOnlySpan source, Func predicate) + { + Contracts.CheckValue(predicate, nameof(predicate)); + + int result = 0; + for (int i = 0; i < source.Length; i++) + { + if (predicate(source[i])) + { + result++; + } + } + return result; + } + + public static bool All(this ReadOnlySpan source, Func predicate) + { + Contracts.CheckValue(predicate, nameof(predicate)); + + for (int i = 0; i < source.Length; i++) + { + if (!predicate(source[i])) + { + return false; + } + } + return true; + } } } diff --git a/src/Microsoft.ML.Core/Utilities/VBufferUtils.cs b/src/Microsoft.ML.Core/Utilities/VBufferUtils.cs index 19a7819325f..03615246c27 100644 --- a/src/Microsoft.ML.Core/Utilities/VBufferUtils.cs +++ b/src/Microsoft.ML.Core/Utilities/VBufferUtils.cs @@ -175,7 +175,7 @@ public static void ForEachDefined(in VBuffer a, Action visitor) /// Applies the to each corresponding pair of elements /// where the item is emplicitly defined in the vector. By explicitly defined, /// we mean that for a given index i, both vectors have an entry in - /// corresponding to that index. + /// corresponding to that index. /// /// The first vector /// The second vector @@ -313,9 +313,11 @@ public static void ForEachEitherDefined(in VBuffer a, in VBuffer b, Act /// public static void Clear(ref VBuffer dst) { - if (dst.Count == 0) + int dstValuesCount = dst.GetValues().Length; + if (dstValuesCount == 0) return; - Array.Clear(dst.Values, 0, dst.Count); + var mutation = VBufferMutationContext.Create(ref dst, dst.Length, dstValuesCount); + mutation.Values.Clear(); } // REVIEW: Look into removing slot in this and other manipulators, so that we @@ -343,15 +345,18 @@ public static void Apply(ref VBuffer dst, SlotValueManipulator manip) { Contracts.CheckValue(manip, nameof(manip)); + int dstValuesCount = dst.GetValues().Length; + var mutation = VBufferMutationContext.Create(ref dst, dst.Length, dstValuesCount); if (dst.IsDense) { - for (int i = 0; i < dst.Length; i++) - manip(i, ref dst.Values[i]); + for (int i = 0; i < mutation.Values.Length; i++) + manip(i, ref mutation.Values[i]); } else { - for (int i = 0; i < dst.Count; i++) - manip(dst.Indices[i], ref dst.Values[i]); + var dstIndices = dst.GetIndices(); + for (int i = 0; i < mutation.Values.Length; i++) + manip(dstIndices[i], ref mutation.Values[i]); } } @@ -375,17 +380,19 @@ public static void ApplyAt(ref VBuffer dst, int slot, SlotValueManipulator Contracts.CheckValue(manip, nameof(manip)); Contracts.CheckValueOrNull(pred); + int dstValuesCount = dst.GetValues().Length; + var mutation = VBufferMutationContext.Create(ref dst, dst.Length, dstValuesCount); if (dst.IsDense) { // The vector is dense, so we can just do a direct access. - manip(slot, ref dst.Values[slot]); + manip(slot, ref mutation.Values[slot]); return; } int idx = 0; - if (dst.Count > 0 && Utils.TryFindIndexSorted(dst.Indices, 0, dst.Count, slot, out idx)) + if (dstValuesCount > 0 && Utils.TryFindIndexSorted(mutation.Indices, 0, dstValuesCount, slot, out idx)) { // Vector is sparse, but the item exists so we can access it. - manip(slot, ref dst.Values[idx]); + manip(slot, ref mutation.Values[idx]); return; } // The vector is sparse and there is no corresponding item, yet. @@ -396,26 +403,24 @@ public static void ApplyAt(ref VBuffer dst, int slot, SlotValueManipulator if (pred(ref value)) return; // We have to insert this value, somehow. - int[] indices = dst.Indices; - T[] values = dst.Values; + // There is a modest special case where there is exactly one free slot // we are modifying in the sparse vector, in which case the vector becomes // dense. Then there is no need to do anything with indices. - bool needIndices = dst.Count + 1 < dst.Length; - if (needIndices) - Utils.EnsureSize(ref indices, dst.Count + 1, dst.Length - 1); - Utils.EnsureSize(ref values, dst.Count + 1, dst.Length); - if (idx != dst.Count) + bool needIndices = dstValuesCount + 1 < dst.Length; + mutation = VBufferMutationContext.Create(ref dst, dst.Length, dstValuesCount + 1); + if (idx != dstValuesCount) { // We have to do some sort of shift copy. + int sliceLength = dstValuesCount - idx; if (needIndices) - Array.Copy(indices, idx, indices, idx + 1, dst.Count - idx); - Array.Copy(values, idx, values, idx + 1, dst.Count - idx); + mutation.Indices.Slice(idx, sliceLength).CopyTo(mutation.Indices.Slice(idx + 1)); + mutation.Values.Slice(idx, sliceLength).CopyTo(mutation.Values.Slice(idx + 1)); } if (needIndices) - indices[idx] = slot; - values[idx] = value; - dst = new VBuffer(dst.Length, dst.Count + 1, values, indices); + mutation.Indices[idx] = slot; + mutation.Values[idx] = value; + mutation.Complete(ref dst); } /// @@ -425,37 +430,42 @@ public static void Densify(ref VBuffer dst) { if (dst.IsDense) return; - var indices = dst.Indices; - var values = dst.Values; - if (Utils.Size(values) >= dst.Length) + + var indices = dst.GetIndices(); + var values = dst.GetValues(); + var mutation = VBufferMutationContext.Create( + ref dst, + dst.Length, + out bool createdNewValues, out bool _); + + if (!createdNewValues) { // Densify in place. - for (int i = dst.Count; --i >= 0; ) + for (int i = values.Length; --i >= 0; ) { Contracts.Assert(i <= indices[i]); - values[indices[i]] = values[i]; + mutation.Values[indices[i]] = values[i]; } - if (dst.Count == 0) - Array.Clear(values, 0, dst.Length); + if (values.Length == 0) + mutation.Values.Clear(); else { int min = 0; - for (int ii = 0; ii < dst.Count; ++ii) + for (int ii = 0; ii < values.Length; ++ii) { - Array.Clear(values, min, indices[ii] - min); + mutation.Values.Slice(min, indices[ii] - min).Clear(); min = indices[ii] + 1; } - Array.Clear(values, min, dst.Length - min); + mutation.Values.Slice(min, dst.Length - min).Clear(); } } else { - T[] newValues = new T[dst.Length]; - for (int i = 0; i < dst.Count; ++i) - newValues[indices[i]] = values[i]; - values = newValues; + // createdNewValues is true, keepOldOnResize is false, so mutation.Values is already cleared + for (int i = 0; i < values.Length; ++i) + mutation.Values[indices[i]] = values[i]; } - dst = new VBuffer(dst.Length, values, indices); + mutation.Complete(ref dst); } /// @@ -465,7 +475,9 @@ public static void Densify(ref VBuffer dst) public static void DensifyFirst(ref VBuffer dst, int denseCount) { Contracts.Check(0 <= denseCount && denseCount <= dst.Length); - if (dst.IsDense || denseCount == 0 || (dst.Count >= denseCount && dst.Indices[denseCount - 1] == denseCount - 1)) + var dstValues = dst.GetValues(); + var dstIndices = dst.GetIndices(); + if (dst.IsDense || denseCount == 0 || (dstValues.Length >= denseCount && dstIndices[denseCount - 1] == denseCount - 1)) return; if (denseCount == dst.Length) { @@ -473,37 +485,36 @@ public static void DensifyFirst(ref VBuffer dst, int denseCount) return; } - // Densify the first BiasCount entries. - int[] indices = dst.Indices; - T[] values = dst.Values; - if (indices == null) + // Densify the first denseCount entries. + if (dstIndices.IsEmpty) { - Contracts.Assert(dst.Count == 0); - indices = Utils.GetIdentityPermutation(denseCount); - Utils.EnsureSize(ref values, denseCount, dst.Length, keepOld: false); - Array.Clear(values, 0, denseCount); - dst = new VBuffer(dst.Length, denseCount, values, indices); + // no previous values + var newIndicesMutation = VBufferMutationContext.Create(ref dst, dst.Length, denseCount); + Utils.FillIdentity(newIndicesMutation.Indices, denseCount); + newIndicesMutation.Values.Clear(); + newIndicesMutation.Complete(ref dst); return; } - int lim = Utils.FindIndexSorted(indices, 0, dst.Count, denseCount); + int lim = Utils.FindIndexSorted(dstIndices, 0, dstValues.Length, denseCount); Contracts.Assert(lim < denseCount); - int newLen = dst.Count + denseCount - lim; + int newLen = dstValues.Length + denseCount - lim; if (newLen == dst.Length) { Densify(ref dst); return; } - Utils.EnsureSize(ref values, newLen, dst.Length); - Utils.EnsureSize(ref indices, newLen, dst.Length); - Array.Copy(values, lim, values, denseCount, dst.Count - lim); - Array.Copy(indices, lim, indices, denseCount, dst.Count - lim); + + var mutation = VBufferMutationContext.Create(ref dst, dst.Length, newLen, keepOldOnResize: true); + int sliceLength = dstValues.Length - lim; + mutation.Values.Slice(lim, sliceLength).CopyTo(mutation.Values.Slice(denseCount)); + mutation.Indices.Slice(lim, sliceLength).CopyTo(mutation.Indices.Slice(denseCount)); int i = lim - 1; for (int ii = denseCount; --ii >= 0; ) { - values[ii] = i >= 0 && indices[i] == ii ? values[i--] : default(T); - indices[ii] = ii; + mutation.Values[ii] = i >= 0 && dstIndices[i] == ii ? dstValues[i--] : default(T); + mutation.Indices[ii] = ii; } - dst = new VBuffer(dst.Length, newLen, values, indices); + mutation.Complete(ref dst); } /// @@ -521,9 +532,10 @@ public static void CreateMaybeSparseCopy(in VBuffer src, ref VBuffer ds int sparseCount = 0; var sparseCountThreshold = (int)(src.Length * sparsityThreshold); + var srcValues = src.GetValues(); for (int i = 0; i < src.Length; i++) { - if (!isDefaultPredicate(in src.Values[i])) + if (!isDefaultPredicate(in srcValues[i])) sparseCount++; if (sparseCount > sparseCountThreshold) @@ -533,23 +545,17 @@ public static void CreateMaybeSparseCopy(in VBuffer src, ref VBuffer ds } } - var indices = dst.Indices; - var values = dst.Values; - + var mutation = VBufferMutationContext.Create(ref dst, src.Length, sparseCount); if (sparseCount > 0) { - if (Utils.Size(values) < sparseCount) - values = new T[sparseCount]; - if (Utils.Size(indices) < sparseCount) - indices = new int[sparseCount]; int j = 0; for (int i = 0; i < src.Length; i++) { - if (!isDefaultPredicate(in src.Values[i])) + if (!isDefaultPredicate(in srcValues[i])) { Contracts.Assert(j < sparseCount); - indices[j] = i; - values[j] = src.Values[i]; + mutation.Indices[j] = i; + mutation.Values[j] = srcValues[i]; j++; } } @@ -557,7 +563,7 @@ public static void CreateMaybeSparseCopy(in VBuffer src, ref VBuffer ds Contracts.Assert(j == sparseCount); } - dst = new VBuffer(src.Length, sparseCount, values, indices); + mutation.Complete(ref dst); } /// @@ -666,10 +672,10 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< // of the "outer" parameter. There are nine, top level cases. Each case is // considered in this order. - // 1. src.Count == 0. + // 1. srcValues.Length == 0. // 2. src.Dense. // 3. dst.Dense. - // 4. dst.Count == 0. + // 4. dstValues.Length == 0. // Beyond this point the cases can assume both src/dst are sparse non-empty vectors. // We then calculate the size of the resulting output array, then use that to fall @@ -687,20 +693,24 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< // Case 5 does not require special handling, because it falls through to other cases // that do the special handling for them. - if (src.Count == 0) + var srcValues = src.GetValues(); + var dstValues = dst.GetValues(); + var dstIndices = dst.GetIndices(); + var mutation = VBufferMutationContext.Create(ref dst, dst.Length, dst.Count); + if (srcValues.Length == 0) { - // Major case 1, with src.Count == 0. + // Major case 1, with srcValues.Length == 0. if (!outer) return; if (dst.IsDense) { for (int i = 0; i < dst.Length; i++) - manip(i, default(TSrc), ref dst.Values[i]); + manip(i, default(TSrc), ref mutation.Values[i]); } else { - for (int i = 0; i < dst.Count; i++) - manip(dst.Indices[i], default(TSrc), ref dst.Values[i]); + for (int i = 0; i < dstValues.Length; i++) + manip(dstIndices[i], default(TSrc), ref mutation.Values[i]); } return; } @@ -711,33 +721,34 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< if (!dst.IsDense) Densify(ref dst); // Both are now dense. Both cases of outer are covered. - for (int i = 0; i < src.Length; i++) - manip(i, src.Values[i], ref dst.Values[i]); + for (int i = 0; i < srcValues.Length; i++) + manip(i, srcValues[i], ref mutation.Values[i]); return; } + var srcIndices = src.GetIndices(); if (dst.IsDense) { - // Major case 3, with dst.Dense. Note that !a.Dense. + // Major case 3, with dst.Dense. Note that !src.Dense. if (outer) { int sI = 0; - int sIndex = src.Indices[sI]; + int sIndex = srcIndices[sI]; for (int i = 0; i < dst.Length; ++i) { if (i == sIndex) { - manip(i, src.Values[sI], ref dst.Values[i]); - sIndex = ++sI == src.Count ? src.Length : src.Indices[sI]; + manip(i, srcValues[sI], ref mutation.Values[i]); + sIndex = ++sI == srcValues.Length ? src.Length : srcIndices[sI]; } else - manip(i, default(TSrc), ref dst.Values[i]); + manip(i, default(TSrc), ref mutation.Values[i]); } } else { for (int i = 0; i < src.Count; i++) - manip(src.Indices[i], src.Values[i], ref dst.Values[src.Indices[i]]); + manip(srcIndices[i], srcValues[i], ref mutation.Values[srcIndices[i]]); } return; } @@ -746,14 +757,14 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< { // Major case 4, with dst empty. Note that !src.Dense. // Neither is dense, and dst is empty. Both cases of outer are covered. - var values = dst.Values; - var indices = dst.Indices; - Utils.EnsureSize(ref values, src.Count, src.Length); - Array.Clear(values, 0, src.Count); - Utils.EnsureSize(ref indices, src.Count, src.Length); + mutation = VBufferMutationContext.Create(ref dst, + src.Length, + src.Count, + maxValuesCapacity: src.Length); + mutation.Values.Clear(); for (int i = 0; i < src.Count; i++) - manip(indices[i] = src.Indices[i], src.Values[i], ref values[i]); - dst = new VBuffer(src.Length, src.Count, values, indices); + manip(mutation.Indices[i] = srcIndices[i], srcValues[i], ref mutation.Values[i]); + mutation.Complete(ref dst); return; } @@ -763,15 +774,15 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< // Try to find each src index in dst indices, counting how many more we'll add. for (int sI = 0; sI < src.Count; sI++) { - int sIndex = src.Indices[sI]; - while (dI < dst.Count && dst.Indices[dI] < sIndex) + int sIndex = srcIndices[sI]; + while (dI < dst.Count && dstIndices[dI] < sIndex) dI++; if (dI == dst.Count) { newCount += src.Count - sI; break; } - if (dst.Indices[dI] == sIndex) + if (dstIndices[dI] == sIndex) dI++; else newCount++; @@ -804,14 +815,16 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< // proved to be inefficient so we go to the little bit of extra work // to handle it here. - var indices = dst.Indices; - var values = dst.Values; - Utils.EnsureSize(ref indices, newCount, dst.Length, keepOld: false); - Utils.EnsureSize(ref values, newCount, dst.Length, keepOld: false); + mutation = VBufferMutationContext.Create(ref dst, + src.Length, + newCount, + maxValuesCapacity: dst.Length); + var indices = mutation.Indices; + var values = mutation.Values; int sI = src.Count - 1; dI = dst.Count - 1; - int sIndex = src.Indices[sI]; - int dIndex = dst.Indices[dI]; + int sIndex = srcIndices[sI]; + int dIndex = dstIndices[dI]; // Go from the end, so that even if we're writing over dst's vectors in // place, we do not corrupt the data as we are reorganizing it. @@ -820,17 +833,17 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< if (sIndex < dIndex) { indices[i] = dIndex; - values[i] = dst.Values[dI]; + values[i] = dstValues[dI]; if (outer) manip(dIndex, default(TSrc), ref values[i]); - dIndex = --dI >= 0 ? dst.Indices[dI] : -1; + dIndex = --dI >= 0 ? dstIndices[dI] : -1; } else if (sIndex > dIndex) { indices[i] = sIndex; values[i] = default(TDst); - manip(sIndex, src.Values[sI], ref values[i]); - sIndex = --sI >= 0 ? src.Indices[sI] : -1; + manip(sIndex, srcValues[sI], ref values[i]); + sIndex = --sI >= 0 ? srcIndices[sI] : -1; } else { @@ -838,13 +851,13 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< Contracts.Assert(sIndex >= 0); Contracts.Assert(sIndex == dIndex); indices[i] = dIndex; - values[i] = dst.Values[dI]; - manip(sIndex, src.Values[sI], ref values[i]); - sIndex = --sI >= 0 ? src.Indices[sI] : -1; - dIndex = --dI >= 0 ? dst.Indices[dI] : -1; + values[i] = dstValues[dI]; + manip(sIndex, srcValues[sI], ref values[i]); + sIndex = --sI >= 0 ? srcIndices[sI] : -1; + dIndex = --dI >= 0 ? dstIndices[dI] : -1; } } - dst = new VBuffer(dst.Length, newCount, values, indices); + mutation.Complete(ref dst); return; } @@ -856,8 +869,8 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< Contracts.Assert(src.Count == dst.Count); for (int i = 0; i < src.Count; i++) { - Contracts.Assert(src.Indices[i] == dst.Indices[i]); - manip(src.Indices[i], src.Values[i], ref dst.Values[i]); + Contracts.Assert(srcIndices[i] == dstIndices[i]); + manip(srcIndices[i], srcValues[i], ref mutation.Values[i]); } return; } @@ -867,27 +880,27 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< if (outer) { int sI = 0; - int sIndex = src.Indices[sI]; + int sIndex = srcIndices[sI]; for (int i = 0; i < dst.Count; ++i) { - if (dst.Indices[i] == sIndex) + if (dstIndices[i] == sIndex) { - manip(sIndex, src.Values[sI], ref dst.Values[i]); - sIndex = ++sI == src.Count ? src.Length : src.Indices[sI]; + manip(sIndex, srcValues[sI], ref mutation.Values[i]); + sIndex = ++sI == src.Count ? src.Length : srcIndices[sI]; } else - manip(dst.Indices[i], default(TSrc), ref dst.Values[i]); + manip(dstIndices[i], default(TSrc), ref mutation.Values[i]); } } else { for (int sI = 0; sI < src.Count; sI++) { - int sIndex = src.Indices[sI]; - while (dst.Indices[dI] < sIndex) + int sIndex = srcIndices[sI]; + while (dstIndices[dI] < sIndex) dI++; - Contracts.Assert(dst.Indices[dI] == sIndex); - manip(sIndex, src.Values[sI], ref dst.Values[dI++]); + Contracts.Assert(dstIndices[dI] == sIndex); + manip(sIndex, srcValues[sI], ref mutation.Values[dI++]); } } return; @@ -899,23 +912,27 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< // First do a "quasi" densification of dst, by making the indices // of dst correspond to those in src. + mutation = VBufferMutationContext.Create(ref dst, newCount, dst.Count); int sI = 0; for (dI = 0; dI < dst.Count; ++dI) { - int bIndex = dst.Indices[dI]; - while (src.Indices[sI] < bIndex) + int bIndex = dstIndices[dI]; + while (srcIndices[sI] < bIndex) sI++; - Contracts.Assert(src.Indices[sI] == bIndex); - dst.Indices[dI] = sI++; + Contracts.Assert(srcIndices[sI] == bIndex); + mutation.Indices[dI] = sI++; } - dst = new VBuffer(newCount, dst.Count, dst.Values, dst.Indices); + mutation.Complete(ref dst); Densify(ref dst); - int[] indices = dst.Indices; - Utils.EnsureSize(ref indices, src.Count, src.Length, keepOld: false); - Array.Copy(src.Indices, indices, newCount); - dst = new VBuffer(src.Length, newCount, dst.Values, indices); + + mutation = VBufferMutationContext.Create(ref dst, + src.Length, + newCount, + maxValuesCapacity: src.Length); + srcIndices.CopyTo(mutation.Indices); for (sI = 0; sI < src.Count; sI++) - manip(src.Indices[sI], src.Values[sI], ref dst.Values[sI]); + manip(srcIndices[sI], srcValues[sI], ref mutation.Values[sI]); + mutation.Complete(ref dst); return; } @@ -932,64 +949,69 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf { Contracts.Check(src.Length == dst.Length, "Vectors must have the same dimensionality."); Contracts.CheckValue(manip, nameof(manip)); - Contracts.Assert(Utils.Size(src.Values) >= src.Count); - Contracts.Assert(Utils.Size(dst.Values) >= dst.Count); + int length = src.Length; + var srcValues = src.GetValues(); + var dstValues = dst.GetValues(); + if (dst.Count == 0) { if (src.Count == 0) - res = new VBuffer(length, 0, res.Values, res.Indices); + { + VBufferMutationContext.Create(ref res, length, 0) + .Complete(ref res); + } else if (src.IsDense) { Contracts.Assert(src.Count == src.Length); - TDst[] resValues = Utils.Size(res.Values) >= length ? res.Values : new TDst[length]; + var mutation = VBufferMutationContext.Create(ref res, length); for (int i = 0; i < length; i++) - manip(i, src.Values[i], default(TDst), ref resValues[i]); - res = new VBuffer(length, resValues, res.Indices); + manip(i, srcValues[i], default(TDst), ref mutation.Values[i]); + mutation.Complete(ref res); } else { // src is non-empty sparse. int count = src.Count; Contracts.Assert(0 < count && count < length); - int[] resIndices = Utils.Size(res.Indices) >= count ? res.Indices : new int[count]; - TDst[] resValues = Utils.Size(res.Values) >= count ? res.Values : new TDst[count]; - Array.Copy(src.Indices, resIndices, count); + var mutation = VBufferMutationContext.Create(ref res, length, count); + var srcIndices = src.GetIndices(); + srcIndices.CopyTo(mutation.Indices); for (int ii = 0; ii < count; ii++) { - int i = src.Indices[ii]; - resIndices[ii] = i; - manip(i, src.Values[ii], default(TDst), ref resValues[ii]); + int i = srcIndices[ii]; + mutation.Indices[ii] = i; + manip(i, srcValues[ii], default(TDst), ref mutation.Values[ii]); } - res = new VBuffer(length, count, resValues, resIndices); + mutation.Complete(ref res); } } else if (dst.IsDense) { - TDst[] resValues = Utils.Size(res.Values) >= length ? res.Values : new TDst[length]; + var mutation = VBufferMutationContext.Create(ref res, length); if (src.Count == 0) { if (outer) { // Apply manip to all slots, as all slots of dst are defined. for (int j = 0; j < length; j++) - manip(j, default(TSrc), dst.Values[j], ref resValues[j]); + manip(j, default(TSrc), dstValues[j], ref mutation.Values[j]); } else { // Copy only. No slot of src is defined. for (int j = 0; j < length; j++) - resValues[j] = dst.Values[j]; + mutation.Values[j] = dstValues[j]; } - res = new VBuffer(length, resValues, res.Indices); + mutation.Complete(ref res); } else if (src.IsDense) { Contracts.Assert(src.Count == src.Length); for (int i = 0; i < length; i++) - manip(i, src.Values[i], dst.Values[i], ref resValues[i]); - res = new VBuffer(length, resValues, res.Indices); + manip(i, srcValues[i], dstValues[i], ref mutation.Values[i]); + mutation.Complete(ref res); } else { @@ -998,7 +1020,8 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf Contracts.Assert(0 < count && count < length); int ii = 0; - int i = src.Indices[ii]; + var srcIndices = src.GetIndices(); + int i = srcIndices[ii]; if (outer) { // All slots of dst are defined. Always apply manip. @@ -1006,11 +1029,11 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf { if (j == i) { - manip(j, src.Values[ii], dst.Values[j], ref resValues[j]); - i = ++ii == count ? length : src.Indices[ii]; + manip(j, srcValues[ii], dstValues[j], ref mutation.Values[j]); + i = ++ii == count ? length : srcIndices[ii]; } else - manip(j, default(TSrc), dst.Values[j], ref resValues[j]); + manip(j, default(TSrc), dstValues[j], ref mutation.Values[j]); } } else @@ -1020,61 +1043,61 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf { if (j == i) { - manip(j, src.Values[ii], dst.Values[j], ref resValues[j]); - i = ++ii == count ? length : src.Indices[ii]; + manip(j, srcValues[ii], dstValues[j], ref mutation.Values[j]); + i = ++ii == count ? length : srcIndices[ii]; } else - resValues[j] = dst.Values[j]; + mutation.Values[j] = dstValues[j]; } } - res = new VBuffer(length, resValues, res.Indices); + mutation.Complete(ref res); } } else { // dst is non-empty sparse int dstCount = dst.Count; + var dstIndices = dst.GetIndices(); Contracts.Assert(dstCount > 0); if (src.Count == 0) { - int[] resIndices = Utils.Size(res.Indices) >= dstCount ? res.Indices : new int[dstCount]; - TDst[] resValues = Utils.Size(res.Values) >= dstCount ? res.Values : new TDst[dstCount]; + var mutation = VBufferMutationContext.Create(ref res, length, dstCount); if (outer) { for (int jj = 0; jj < dstCount; jj++) { - int j = dst.Indices[jj]; - resIndices[jj] = j; - manip(j, default(TSrc), dst.Values[jj], ref resValues[jj]); + int j = dstIndices[jj]; + mutation.Indices[jj] = j; + manip(j, default(TSrc), dstValues[jj], ref mutation.Values[jj]); } } else { for (int jj = 0; jj < dstCount; jj++) { - resIndices[jj] = dst.Indices[jj]; - resValues[jj] = dst.Values[jj]; + mutation.Indices[jj] = dstIndices[jj]; + mutation.Values[jj] = dstValues[jj]; } } - res = new VBuffer(length, dstCount, resValues, resIndices); + mutation.Complete(ref res); } else if (src.IsDense) { // res will be dense. - TDst[] resValues = Utils.Size(res.Values) >= length ? res.Values : new TDst[length]; + var mutation = VBufferMutationContext.Create(ref res, length); int jj = 0; - int j = dst.Indices[jj]; + int j = dstIndices[jj]; for (int i = 0; i < length; i++) { if (i == j) { - manip(i, src.Values[i], dst.Values[jj], ref resValues[i]); - j = ++jj == dstCount ? length : dst.Indices[jj]; + manip(i, srcValues[i], dstValues[jj], ref mutation.Values[i]); + j = ++jj == dstCount ? length : dstIndices[jj]; } else - manip(i, src.Values[i], default(TDst), ref resValues[i]); + manip(i, srcValues[i], default(TDst), ref mutation.Values[i]); } - res = new VBuffer(length, resValues, res.Indices); + mutation.Complete(ref res); } else { @@ -1083,17 +1106,18 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf // Find the count of result, which is the size of the union of the indices set of src and dst. int resCount = dstCount; + var srcIndices = src.GetIndices(); for (int ii = 0, jj = 0; ii < src.Count; ii++) { - int i = src.Indices[ii]; - while (jj < dst.Count && dst.Indices[jj] < i) + int i = srcIndices[ii]; + while (jj < dst.Count && dstIndices[jj] < i) jj++; if (jj == dst.Count) { resCount += src.Count - ii; break; } - if (dst.Indices[jj] == i) + if (dstIndices[jj] == i) jj++; else resCount++; @@ -1114,13 +1138,12 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf } else { - int[] resIndices = Utils.Size(res.Indices) >= resCount ? res.Indices : new int[resCount]; - TDst[] resValues = Utils.Size(res.Values) >= resCount ? res.Values : new TDst[resCount]; + var mutation = VBufferMutationContext.Create(ref res, length, resCount); int ii = 0; - int i = src.Indices[ii]; + int i = srcIndices[ii]; int jj = 0; - int j = dst.Indices[jj]; + int j = dstIndices[jj]; for (int kk = 0; kk < resCount; kk++) { @@ -1128,35 +1151,35 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf if (i == j) { // Slot (i == j) both defined in src and dst. Apply manip. - resIndices[kk] = i; - manip(i, src.Values[ii], dst.Values[jj], ref resValues[kk]); - i = ++ii == src.Count ? length : src.Indices[ii]; - j = ++jj == dstCount ? length : dst.Indices[jj]; + mutation.Indices[kk] = i; + manip(i, srcValues[ii], dstValues[jj], ref mutation.Values[kk]); + i = ++ii == src.Count ? length : srcIndices[ii]; + j = ++jj == dstCount ? length : dstIndices[jj]; } else if (i < j) { // Slot i defined only in src, but not in dst. Apply manip. - resIndices[kk] = i; - manip(i, src.Values[ii], default(TDst), ref resValues[kk]); - i = ++ii == src.Count ? length : src.Indices[ii]; + mutation.Indices[kk] = i; + manip(i, srcValues[ii], default(TDst), ref mutation.Values[kk]); + i = ++ii == src.Count ? length : srcIndices[ii]; } else { // Slot j defined only in dst, but not in src. Apply manip if outer. // Otherwise just copy. - resIndices[kk] = j; + mutation.Indices[kk] = j; // REVIEW: Should we move checking of outer outside the loop? if (outer) - manip(j, default(TSrc), dst.Values[jj], ref resValues[kk]); + manip(j, default(TSrc), dstValues[jj], ref mutation.Values[kk]); else - resValues[kk] = dst.Values[jj]; - j = ++jj == dstCount ? length : dst.Indices[jj]; + mutation.Values[kk] = dstValues[jj]; + j = ++jj == dstCount ? length : dstIndices[jj]; } } Contracts.Assert(ii == src.Count && jj == dstCount); Contracts.Assert(i == length && j == length); - res = new VBuffer(length, resCount, resValues, resIndices); + mutation.Complete(ref res); } } } @@ -1180,25 +1203,30 @@ public static void ApplyIntoEitherDefined(in VBuffer src, ref // equal lengths, but I don't care here. if (src.Count == 0) { - dst = new VBuffer(src.Length, src.Count, dst.Values, dst.Indices); + VBufferMutationContext.Create(ref dst, src.Length, 0) + .Complete(ref dst); return; } - int[] indices = dst.Indices; - TDst[] values = dst.Values; - Utils.EnsureSize(ref values, src.Count, src.Length, keepOld: false); + var mutation = VBufferMutationContext.Create(ref dst, + src.Length, + src.Count, + maxValuesCapacity: src.Length); + Span values = mutation.Values; + var srcValues = src.GetValues(); if (src.IsDense) { for (int i = 0; i < src.Length; ++i) - values[i] = func(i, src.Values[i]); + values[i] = func(i, srcValues[i]); } else { - Utils.EnsureSize(ref indices, src.Count, src.Length, keepOld: false); - Array.Copy(src.Indices, indices, src.Count); - for (int i = 0; i < src.Count; ++i) - values[i] = func(src.Indices[i], src.Values[i]); + Span indices = mutation.Indices; + var srcIndices = src.GetIndices(); + srcIndices.CopyTo(indices); + for (int i = 0; i < srcValues.Length; ++i) + values[i] = func(srcIndices[i], srcValues[i]); } - dst = new VBuffer(src.Length, src.Count, values, indices); + mutation.Complete(ref dst); } /// @@ -1225,54 +1253,62 @@ public static void ApplyInto(in VBuffer a, in VBuffer // 5. b's indices are a subset of a's. // 6. Neither a nor b's indices are a subset of the other. - if (a.Count == 0 && b.Count == 0) + var aValues = a.GetValues(); + var bValues = b.GetValues(); + if (aValues.Length == 0 && bValues.Length == 0) { // Case 1. Output will be empty. - dst = new VBuffer(a.Length, 0, dst.Values, dst.Indices); + VBufferMutationContext.Create(ref dst, a.Length, 0) + .Complete(ref dst); return; } int aI = 0; int bI = 0; - TDst[] values = dst.Values; + ReadOnlySpan aIndices; + ReadOnlySpan bIndices; + VBufferMutationContext mutation; if (a.IsDense || b.IsDense) { // Case 2. One of the two inputs is dense. The output will be dense. - Utils.EnsureSize(ref values, a.Length, a.Length, keepOld: false); - + mutation = VBufferMutationContext.Create(ref dst, a.Length); if (!a.IsDense) { // a is sparse, b is dense + aIndices = a.GetIndices(); for (int i = 0; i < b.Length; i++) { - TSrc1 aVal = (aI < a.Count && i == a.Indices[aI]) ? a.Values[aI++] : default(TSrc1); - values[i] = func(i, aVal, b.Values[i]); + TSrc1 aVal = (aI < a.Count && i == aIndices[aI]) ? aValues[aI++] : default(TSrc1); + mutation.Values[i] = func(i, aVal, bValues[i]); } } else if (!b.IsDense) { // b is sparse, a is dense + bIndices = b.GetIndices(); for (int i = 0; i < a.Length; i++) { - TSrc2 bVal = (bI < b.Count && i == b.Indices[bI]) ? b.Values[bI++] : default(TSrc2); - values[i] = func(i, a.Values[i], bVal); + TSrc2 bVal = (bI < b.Count && i == bIndices[bI]) ? bValues[bI++] : default(TSrc2); + mutation.Values[i] = func(i, aValues[i], bVal); } } else { // both dense for (int i = 0; i < a.Length; i++) - values[i] = func(i, a.Values[i], b.Values[i]); + mutation.Values[i] = func(i, aValues[i], bValues[i]); } - dst = new VBuffer(a.Length, values, dst.Indices); + mutation.Complete(ref dst); return; } // a, b both sparse. int newCount = 0; + aIndices = a.GetIndices(); + bIndices = b.GetIndices(); while (aI < a.Count && bI < b.Count) { - int aCompB = a.Indices[aI] - b.Indices[bI]; + int aCompB = aIndices[aI] - bIndices[bI]; if (aCompB <= 0) // a is no larger than b. aI++; if (aCompB >= 0) // b is no larger than a. @@ -1288,50 +1324,49 @@ public static void ApplyInto(in VBuffer a, in VBuffer // REVIEW: Worth optimizing the newCount == a.Length case? // Probably not... - int[] indices = dst.Indices; - Utils.EnsureSize(ref indices, newCount, a.Length, keepOld: false); - Utils.EnsureSize(ref values, newCount, a.Length, keepOld: false); + mutation = VBufferMutationContext.Create(ref dst, a.Length, newCount); + Span indices = mutation.Indices; if (newCount == b.Count) { if (newCount == a.Count) { // Case 3, a and b actually have the same indices! - Array.Copy(a.Indices, indices, a.Count); + aIndices.CopyTo(indices); for (aI = 0; aI < a.Count; aI++) { - Contracts.Assert(a.Indices[aI] == b.Indices[aI]); - values[aI] = func(a.Indices[aI], a.Values[aI], b.Values[aI]); + Contracts.Assert(aIndices[aI] == bIndices[aI]); + mutation.Values[aI] = func(aIndices[aI], aValues[aI], bValues[aI]); } } else { // Case 4, a's indices are a subset of b's. - Array.Copy(b.Indices, indices, b.Count); + bIndices.CopyTo(indices); aI = 0; for (bI = 0; aI < a.Count && bI < b.Count; bI++) { - Contracts.Assert(a.Indices[aI] >= b.Indices[bI]); - TSrc1 aVal = a.Indices[aI] == b.Indices[bI] ? a.Values[aI++] : default(TSrc1); - values[bI] = func(b.Indices[bI], aVal, b.Values[bI]); + Contracts.Assert(aIndices[aI] >= bIndices[bI]); + TSrc1 aVal = aIndices[aI] == bIndices[bI] ? aValues[aI++] : default(TSrc1); + mutation.Values[bI] = func(bIndices[bI], aVal, bValues[bI]); } for (; bI < b.Count; bI++) - values[bI] = func(b.Indices[bI], default(TSrc1), b.Values[bI]); + mutation.Values[bI] = func(bIndices[bI], default(TSrc1), bValues[bI]); } } else if (newCount == a.Count) { // Case 5, b's indices are a subset of a's. - Array.Copy(a.Indices, indices, a.Count); + aIndices.CopyTo(indices); bI = 0; for (aI = 0; bI < b.Count && aI < a.Count; aI++) { - Contracts.Assert(b.Indices[bI] >= a.Indices[aI]); - TSrc2 bVal = a.Indices[aI] == b.Indices[bI] ? b.Values[bI++] : default(TSrc2); - values[aI] = func(a.Indices[aI], a.Values[aI], bVal); + Contracts.Assert(bIndices[bI] >= aIndices[aI]); + TSrc2 bVal = aIndices[aI] == bIndices[bI] ? bValues[bI++] : default(TSrc2); + mutation.Values[aI] = func(aIndices[aI], aValues[aI], bVal); } for (; aI < a.Count; aI++) - values[aI] = func(a.Indices[aI], a.Values[aI], default(TSrc2)); + mutation.Values[aI] = func(aIndices[aI], aValues[aI], default(TSrc2)); } else { @@ -1341,47 +1376,47 @@ public static void ApplyInto(in VBuffer a, in VBuffer TSrc2 bVal = default(TSrc2); while (aI < a.Count && bI < b.Count) { - int aCompB = a.Indices[aI] - b.Indices[bI]; + int aCompB = aIndices[aI] - bIndices[bI]; int index = 0; if (aCompB < 0) { - index = a.Indices[aI]; - aVal = a.Values[aI++]; + index = aIndices[aI]; + aVal = aValues[aI++]; bVal = default(TSrc2); } else if (aCompB > 0) { - index = b.Indices[bI]; + index = bIndices[bI]; aVal = default(TSrc1); - bVal = b.Values[bI++]; + bVal = bValues[bI++]; } else { - index = a.Indices[aI]; - Contracts.Assert(index == b.Indices[bI]); - aVal = a.Values[aI++]; - bVal = b.Values[bI++]; + index = aIndices[aI]; + Contracts.Assert(index == bIndices[bI]); + aVal = aValues[aI++]; + bVal = bValues[bI++]; } - values[newI] = func(index, aVal, bVal); + mutation.Values[newI] = func(index, aVal, bVal); indices[newI++] = index; } for (; aI < a.Count; aI++) { - int index = a.Indices[aI]; - values[newI] = func(index, a.Values[aI], default(TSrc2)); + int index = aIndices[aI]; + mutation.Values[newI] = func(index, aValues[aI], default(TSrc2)); indices[newI++] = index; } for (; bI < b.Count; bI++) { - int index = b.Indices[bI]; - values[newI] = func(index, default(TSrc1), b.Values[bI]); + int index = bIndices[bI]; + mutation.Values[newI] = func(index, default(TSrc1), bValues[bI]); indices[newI++] = index; } } - dst = new VBuffer(a.Length, newCount, values, indices); + mutation.Complete(ref dst); } /// @@ -1390,14 +1425,16 @@ public static void ApplyInto(in VBuffer a, in VBuffer public static void Copy(List src, ref VBuffer dst, int length) { Contracts.CheckParam(0 <= length && length <= Utils.Size(src), nameof(length)); - var values = dst.Values; + var mutation = VBufferMutationContext.Create(ref dst, length); if (length > 0) { - if (Utils.Size(values) < length) - values = new T[length]; - src.CopyTo(values); + // List.CopyTo should have an overload for Span - https://github.com/dotnet/corefx/issues/33006 + for (int i = 0; i < length; i++) + { + mutation.Values[i] = src[i]; + } } - dst = new VBuffer(length, values, dst.Indices); + mutation.Complete(ref dst); } } } diff --git a/src/Microsoft.ML.Data/Data/BufferBuilder.cs b/src/Microsoft.ML.Data/Data/BufferBuilder.cs index 5020ae04187..a92cf7b4f8b 100644 --- a/src/Microsoft.ML.Data/Data/BufferBuilder.cs +++ b/src/Microsoft.ML.Data/Data/BufferBuilder.cs @@ -382,45 +382,6 @@ public bool TryGetFeature(int index, out T v) return false; } - private void GetResult(ref T[] values, ref int[] indices, out int count, out int length) - { - if (_count == 0) - { - count = 0; - length = _length; - return; - } - - if (!_dense) - { - if (!_sorted) - SortAndSumDups(); - if (!_dense && _count >= _length / 2) - MakeDense(); - } - - if (_dense) - { - if (Utils.Size(values) < _length) - values = new T[_length]; - Array.Copy(_values, values, _length); - count = _length; - length = _length; - } - else - { - Contracts.Assert(_count < _length); - if (Utils.Size(values) < _count) - values = new T[_count]; - if (Utils.Size(indices) < _count) - indices = new int[_count]; - Array.Copy(_values, values, _count); - Array.Copy(_indices, indices, _count); - count = _count; - length = _length; - } - } - public void Reset(int length, bool dense) { ResetImpl(length, dense); @@ -435,7 +396,7 @@ public void AddFeatures(int index, in VBuffer buffer) if (count == 0) return; - var values = buffer.Values; + var values = buffer.GetValues(); if (buffer.IsDense) { Contracts.Assert(count == buffer.Length); @@ -454,7 +415,7 @@ public void AddFeatures(int index, in VBuffer buffer) else { // REVIEW: Validate indices! - var indices = buffer.Indices; + var indices = buffer.GetIndices(); if (_dense) { for (int i = 0; i < count; i++) @@ -471,24 +432,35 @@ public void AddFeatures(int index, in VBuffer buffer) public void GetResult(ref VBuffer buffer) { - var values = buffer.Values; - var indices = buffer.Indices; - if (IsEmpty) { - buffer = new VBuffer(_length, 0, values, indices); + VBufferMutationContext.Create(ref buffer, _length, 0) + .Complete(ref buffer); return; } - int count; - int length; - GetResult(ref values, ref indices, out count, out length); - Contracts.Assert(0 <= count && count <= length); + if (!_dense) + { + if (!_sorted) + SortAndSumDups(); + if (!_dense && _count >= _length / 2) + MakeDense(); + } - if (count == length) - buffer = new VBuffer(length, values, indices); + if (_dense) + { + var mutation = VBufferMutationContext.Create(ref buffer, _length); + _values.AsSpan(0, _length).CopyTo(mutation.Values); + mutation.Complete(ref buffer); + } else - buffer = new VBuffer(length, count, values, indices); + { + Contracts.Assert(_count < _length); + var mutation = VBufferMutationContext.Create(ref buffer, _length, _count); + _values.AsSpan(0, _count).CopyTo(mutation.Values); + _indices.AsSpan(0, _count).CopyTo(mutation.Indices); + mutation.Complete(ref buffer); + } } } } diff --git a/src/Microsoft.ML.Data/DataLoadSave/Binary/Codecs.cs b/src/Microsoft.ML.Data/DataLoadSave/Binary/Codecs.cs index 920f4350fb4..e095162442a 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Binary/Codecs.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Binary/Codecs.cs @@ -1109,29 +1109,29 @@ public override void Get(ref VBuffer value) int length = FixedLength ? _size : _lengths[_vectorIndex]; int count = _counts[_vectorIndex]; - int[] indices = value.Indices; - T[] values = value.Values; if (count < 0) { // dense + var mutation = VBufferMutationContext.Create(ref value, length); if (length > 0) { - Utils.EnsureSize(ref values, length); - Array.Copy(_values, _valuesOffset, values, 0, length); + _values.AsSpan(_valuesOffset, length) + .CopyTo(mutation.Values); } - value = new VBuffer(length, values, indices); + mutation.Complete(ref value); } else { // sparse + var mutation = VBufferMutationContext.Create(ref value, length, count); if (count > 0) { - Utils.EnsureSize(ref values, count); - Utils.EnsureSize(ref indices, count); - Array.Copy(_values, _valuesOffset, values, 0, count); - Array.Copy(_indices, _indicesOffset, indices, 0, count); + _values.AsSpan(_valuesOffset, count) + .CopyTo(mutation.Values); + _indices.AsSpan(_indicesOffset, count) + .CopyTo(mutation.Indices); } - value = new VBuffer(length, count, values, indices); + mutation.Complete(ref value); } } } diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs index b5e87296a84..6d6854dc790 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs @@ -393,28 +393,23 @@ public void Get(ref VBuffer dst) { AssertValid(); - var values = dst.Values; - var indices = dst.Indices; - if (_count == 0) { - dst = new VBuffer(_size, 0, values, indices); + VBufferMutationContext.Create(ref dst, _size, 0) + .Complete(ref dst); return; } - if (Utils.Size(values) < _count) - values = new TItem[_count]; - Array.Copy(_values, values, _count); + var mutation = VBufferMutationContext.Create(ref dst, _size, _count); + _values.AsSpan(0, _count).CopyTo(mutation.Values); if (_count == _size) { - dst = new VBuffer(_size, values, indices); + mutation.Complete(ref dst); return; } - if (Utils.Size(indices) < _count) - indices = new int[_count]; - Array.Copy(_indices, indices, _count); - dst = new VBuffer(_size, _count, values, indices); + _indices.AsSpan(0, _count).CopyTo(mutation.Indices); + mutation.Complete(ref dst); } } diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextSaver.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextSaver.cs index 8e83f01ac1a..3987e0ce354 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextSaver.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextSaver.cs @@ -166,20 +166,22 @@ public VecValueWriter(IRowCursor cursor, VectorType type, int source, char sep) public override void WriteData(Action appendItem, out int length) { _getSrc(ref _src); + var srcValues = _src.GetValues(); if (_src.IsDense) { - for (int i = 0; i < _src.Length; i++) + for (int i = 0; i < srcValues.Length; i++) { - Conv(in _src.Values[i], ref Sb); + Conv(in srcValues[i], ref Sb); appendItem(Sb, i); } } else { - for (int i = 0; i < _src.Count; i++) + var srcIndices = _src.GetIndices(); + for (int i = 0; i < srcValues.Length; i++) { - Conv(in _src.Values[i], ref Sb); - appendItem(Sb, _src.Indices[i]); + Conv(in srcValues[i], ref Sb); + appendItem(Sb, srcIndices[i]); } } length = _src.Length; @@ -190,13 +192,15 @@ public override void WriteHeader(Action appendItem, out int length = _slotCount; if (_slotNames.Count == 0) return; + var slotNamesValues = _slotNames.GetValues(); + var slotNamesIndices = _slotNames.GetIndices(); for (int i = 0; i < _slotNames.Count; i++) { - var name = _slotNames.Values[i]; + var name = slotNamesValues[i]; if (name.IsEmpty) continue; MapText(in name, ref Sb); - int index = _slotNames.IsDense ? i : _slotNames.Indices[i]; + int index = _slotNames.IsDense ? i : slotNamesIndices[i]; appendItem(Sb, index); } } diff --git a/src/Microsoft.ML.Data/DataView/CompositeSchema.cs b/src/Microsoft.ML.Data/DataView/CompositeSchema.cs index 2a526f152a6..d61289b55c4 100644 --- a/src/Microsoft.ML.Data/DataView/CompositeSchema.cs +++ b/src/Microsoft.ML.Data/DataView/CompositeSchema.cs @@ -67,7 +67,7 @@ public void CheckColumnInRange(int col) public void GetColumnSource(int col, out int srcIndex, out int srcCol) { CheckColumnInRange(col); - if (!_cumulativeColCounts.TryFindIndexSorted(0, _cumulativeColCounts.Length, col, out srcIndex)) + if (!Utils.TryFindIndexSorted(_cumulativeColCounts, 0, _cumulativeColCounts.Length, col, out srcIndex)) srcIndex--; Contracts.Assert(0 <= srcIndex && srcIndex < _cumulativeColCounts.Length); srcCol = col - _cumulativeColCounts[srcIndex]; diff --git a/src/Microsoft.ML.Data/Depricated/Vector/VBufferMathUtils.cs b/src/Microsoft.ML.Data/Depricated/Vector/VBufferMathUtils.cs index 1438cce6012..0a0fa3255eb 100644 --- a/src/Microsoft.ML.Data/Depricated/Vector/VBufferMathUtils.cs +++ b/src/Microsoft.ML.Data/Depricated/Vector/VBufferMathUtils.cs @@ -22,7 +22,7 @@ public static Float NormSquared(in VBuffer a) { if (a.Count == 0) return 0; - return CpuMathUtils.SumSq(a.Values.AsSpan(0, a.Count)); + return CpuMathUtils.SumSq(a.GetValues()); } /// @@ -50,7 +50,7 @@ public static Float L1Norm(in VBuffer a) { if (a.Count == 0) return 0; - return CpuMathUtils.SumAbs(a.Values.AsSpan(0, a.Count)); + return CpuMathUtils.SumAbs(a.GetValues()); } /// @@ -61,7 +61,7 @@ public static Float MaxNorm(in VBuffer a) { if (a.Count == 0) return 0; - return CpuMathUtils.MaxAbs(a.Values.AsSpan(0, a.Count)); + return CpuMathUtils.MaxAbs(a.GetValues()); } /// @@ -71,7 +71,7 @@ public static Float Sum(in VBuffer a) { if (a.Count == 0) return 0; - return CpuMathUtils.Sum(a.Values.AsSpan(0, a.Count)); + return CpuMathUtils.Sum(a.GetValues()); } /// @@ -83,10 +83,11 @@ public static void ScaleBy(ref VBuffer dst, Float c) { if (c == 1 || dst.Count == 0) return; + var mutation = VBufferMutationContext.Create(ref dst, dst.Length, dst.Count); if (c != 0) - CpuMathUtils.Scale(c, dst.Values.AsSpan(0, dst.Count)); + CpuMathUtils.Scale(c, mutation.Values); else // Maintain density of dst. - Array.Clear(dst.Values, 0, dst.Count); + mutation.Values.Clear(); // REVIEW: Any benefit in sparsifying? } @@ -102,30 +103,31 @@ public static void ScaleBy(in VBuffer src, ref VBuffer dst, Float if (count == 0) { // dst is a zero vector. - dst = new VBuffer(length, 0, dst.Values, dst.Indices); + VBufferMutationContext.Create(ref dst, length, 0) + .Complete(ref dst); return; } - var dstValues = Utils.Size(dst.Values) >= count ? dst.Values : new Float[count]; if (src.IsDense) { // Maintain the density of src to dst in order to avoid slow down of L-BFGS. + var mutation = VBufferMutationContext.Create(ref dst, length); Contracts.Assert(length == count); if (c == 0) - Array.Clear(dstValues, 0, length); + mutation.Values.Clear(); else - CpuMathUtils.Scale(c, src.Values, dstValues, length); - dst = new VBuffer(length, dstValues, dst.Indices); + CpuMathUtils.Scale(c, src.GetValues(), mutation.Values, length); + mutation.Complete(ref dst); } else { - var dstIndices = Utils.Size(dst.Indices) >= count ? dst.Indices : new int[count]; - Array.Copy(src.Indices, dstIndices, count); + var mutation = VBufferMutationContext.Create(ref dst, length, count); + src.GetIndices().CopyTo(mutation.Indices); if (c == 0) - Array.Clear(dstValues, 0, count); + mutation.Values.Clear(); else - CpuMathUtils.Scale(c, src.Values, dstValues, count); - dst = new VBuffer(length, count, dstValues, dstIndices); + CpuMathUtils.Scale(c, src.GetValues(), mutation.Values, count); + mutation.Complete(ref dst); } } @@ -141,10 +143,11 @@ public static void Add(in VBuffer src, ref VBuffer dst) if (dst.IsDense) { + var mutation = VBufferMutationContext.Create(ref dst, dst.Length); if (src.IsDense) - CpuMathUtils.Add(src.Values, dst.Values, src.Length); + CpuMathUtils.Add(src.GetValues(), mutation.Values, src.Length); else - CpuMathUtils.Add(src.Values, src.Indices, dst.Values, src.Count); + CpuMathUtils.Add(src.GetValues(), src.GetIndices(), mutation.Values, src.Count); return; } // REVIEW: Should we use SSE for any of these possibilities? @@ -167,10 +170,11 @@ public static void AddMult(in VBuffer src, Float c, ref VBuffer ds if (dst.IsDense) { + var mutation = VBufferMutationContext.Create(ref dst, dst.Length); if (src.IsDense) - CpuMathUtils.AddScale(c, src.Values, dst.Values, src.Length); + CpuMathUtils.AddScale(c, src.GetValues(), mutation.Values, src.Length); else - CpuMathUtils.AddScale(c, src.Values, src.Indices, dst.Values, src.Count); + CpuMathUtils.AddScale(c, src.GetValues(), src.GetIndices(), mutation.Values, src.Count); return; } // REVIEW: Should we use SSE for any of these possibilities? @@ -196,9 +200,9 @@ public static void AddMult(in VBuffer src, Float c, ref VBuffer ds Contracts.Assert(length > 0); if (dst.IsDense && src.IsDense) { - Float[] resValues = Utils.Size(res.Values) >= length ? res.Values : new Float[length]; - CpuMathUtils.AddScaleCopy(c, src.Values, dst.Values, resValues, length); - res = new VBuffer(length, resValues, res.Indices); + var mutation = VBufferMutationContext.Create(ref res, length); + CpuMathUtils.AddScaleCopy(c, src.GetValues(), dst.GetValues(), mutation.Values, length); + mutation.Complete(ref res); return; } @@ -235,13 +239,17 @@ public static void AddMultWithOffset(in VBuffer src, Float c, ref VBuffer if (src.Count == 0 || c == 0) return; + VBufferMutationContext mutation; + Span values; if (dst.IsDense) { // This is by far the most common case. + mutation = VBufferMutationContext.Create(ref dst, dst.Length); + values = mutation.Values.Slice(offset); if (src.IsDense) - CpuMathUtils.AddScale(c, src.Values, dst.Values.AsSpan(offset), src.Count); + CpuMathUtils.AddScale(c, src.GetValues(), values, src.Count); else - CpuMathUtils.AddScale(c, src.Values, src.Indices, dst.Values.AsSpan(offset), src.Count); + CpuMathUtils.AddScale(c, src.GetValues(), src.GetIndices(), values, src.Count); return; } // REVIEW: Perhaps implementing an ApplyInto with an offset would be more @@ -250,8 +258,9 @@ public static void AddMultWithOffset(in VBuffer src, Float c, ref VBuffer // dst is sparse. I expect this will see limited practical use, since accumulants // are often better off going into a dense vector in all applications of interest to us. // Correspondingly, this implementation will be functional, but not optimized. - int dMin = dst.Count == 0 ? 0 : Utils.FindIndexSorted(dst.Indices, 0, dst.Count, offset); - int dLim = dst.Count == 0 ? 0 : Utils.FindIndexSorted(dst.Indices, dMin, dst.Count, offset + src.Length); + var dstIndices = dst.GetIndices(); + int dMin = dst.Count == 0 ? 0 : Utils.FindIndexSorted(dstIndices, 0, dst.Count, offset); + int dLim = dst.Count == 0 ? 0 : Utils.FindIndexSorted(dstIndices, dMin, dst.Count, offset + src.Length); Contracts.Assert(dMin - dLim <= src.Length); // First get the number of extra values that we will need to accomodate. int gapCount; @@ -260,9 +269,10 @@ public static void AddMultWithOffset(in VBuffer src, Float c, ref VBuffer else { gapCount = src.Count; + var srcIndices = src.GetIndices(); for (int iS = 0, iD = dMin; iS < src.Count && iD < dLim; ) { - var comp = src.Indices[iS] - dst.Indices[iD] + offset; + var comp = srcIndices[iS] - dstIndices[iD] + offset; if (comp < 0) // dst index is larger. iS++; else if (comp > 0) // src index is larger. @@ -276,23 +286,29 @@ public static void AddMultWithOffset(in VBuffer src, Float c, ref VBuffer } } // Extend dst so that it has room for this additional stuff. Shift things over as well. - var indices = dst.Indices; - var values = dst.Values; + mutation = VBufferMutationContext.Create(ref dst, + dst.Length, + dst.Count + gapCount, + keepOldOnResize: true); + var indices = mutation.Indices; + values = mutation.Values; if (gapCount > 0) { - Utils.EnsureSize(ref indices, dst.Count + gapCount, dst.Length); - Utils.EnsureSize(ref values, dst.Count + gapCount, dst.Length); // Shift things over, unless there's nothing to shift over, or no new elements are being introduced anyway. if (dst.Count != dLim) { Contracts.Assert(dLim < dst.Count); - Array.Copy(indices, dLim, indices, dLim + gapCount, dst.Count - dLim); - Array.Copy(values, dLim, values, dLim + gapCount, dst.Count - dLim); + indices.Slice(dLim, dst.Count - dLim) + .CopyTo(indices.Slice(dLim + gapCount)); + values.Slice(dLim, dst.Count - dLim) + .CopyTo(values.Slice(dLim + gapCount)); } } // Now, fill in the stuff in this "gap." Both of these implementations work // backwards from the end, since they can potentially be working in place if // the EnsureSize calls did not actually result in a new array. + var srcValues = src.GetValues(); + var dstValues = dst.GetValues(); if (src.IsDense) { // dst is sparse, src is dense. @@ -303,10 +319,10 @@ public static void AddMultWithOffset(in VBuffer src, Float c, ref VBuffer Contracts.Assert(iDD == iS + dMin); // iDD and iD are the points in where we are writing and reading from. Contracts.Assert(iDD >= iD); - if (iD >= 0 && offset + iS == dst.Indices[iD]) // Collision. - values[iDD] = dst.Values[iD--] + c * src.Values[iS]; + if (iD >= 0 && offset + iS == dstIndices[iD]) // Collision. + values[iDD] = dstValues[iD--] + c * srcValues[iS]; else // Miss. - values[iDD] = c * src.Values[iS]; + values[iDD] = c * srcValues[iS]; indices[iDD] = offset + iS; } } @@ -315,8 +331,9 @@ public static void AddMultWithOffset(in VBuffer src, Float c, ref VBuffer // Both dst and src are sparse. int iD = dLim - 1; int iS = src.Count - 1; - int sIndex = iS < 0 ? -1 : src.Indices[iS]; - int dIndex = iD < 0 ? -1 : dst.Indices[iD] - offset; + var srcIndices = src.GetIndices(); + int sIndex = iS < 0 ? -1 : srcIndices[iS]; + int dIndex = iD < 0 ? -1 : dstIndices[iD] - offset; for (int iDD = dLim + gapCount; --iDD >= dMin; ) { @@ -324,26 +341,26 @@ public static void AddMultWithOffset(in VBuffer src, Float c, ref VBuffer int comp = sIndex - dIndex; if (comp == 0) // Collision on both. { - indices[iDD] = dst.Indices[iD]; - values[iDD] = dst.Values[iD--] + c * src.Values[iS--]; - sIndex = iS < 0 ? -1 : src.Indices[iS]; - dIndex = iD < 0 ? -1 : dst.Indices[iD] - offset; + indices[iDD] = dstIndices[iD]; + values[iDD] = dstValues[iD--] + c * srcValues[iS--]; + sIndex = iS < 0 ? -1 : srcIndices[iS]; + dIndex = iD < 0 ? -1 : dstIndices[iD] - offset; } else if (comp < 0) // Collision on dst. { - indices[iDD] = dst.Indices[iD]; - values[iDD] = dst.Values[iD--]; - dIndex = iD < 0 ? -1 : dst.Indices[iD] - offset; + indices[iDD] = dstIndices[iD]; + values[iDD] = dstValues[iD--]; + dIndex = iD < 0 ? -1 : dstIndices[iD] - offset; } else // Collision on src. { indices[iDD] = sIndex + offset; - values[iDD] = c * src.Values[iS--]; - sIndex = iS < 0 ? -1 : src.Indices[iS]; + values[iDD] = c * srcValues[iS--]; + sIndex = iS < 0 ? -1 : srcIndices[iS]; } } } - dst = new VBuffer(dst.Length, dst.Count + gapCount, values, indices); + mutation.Complete(ref dst); } /// @@ -365,15 +382,20 @@ public static void ScaleInto(in VBuffer src, Float c, ref VBuffer { if (src.Length > 0 && src.IsDense) { - var values = dst.Values; // Due to sparsity preservation from src, dst must be dense, in the same way. - Utils.EnsureSize(ref values, src.Length, src.Length, keepOld: false); - if (values == dst.Values) // We need to clear it. - Array.Clear(values, 0, src.Length); - dst = new VBuffer(src.Length, values, dst.Indices); + var mutation = VBufferMutationContext.Create(ref dst, + src.Length, + out bool createdNewValues, + out bool _); + if (!createdNewValues) // We need to clear it + mutation.Values.Clear(); + mutation.Complete(ref dst); } else - dst = new VBuffer(src.Length, 0, dst.Values, dst.Indices); + { + VBufferMutationContext.Create(ref dst, src.Length, 0) + .Complete(ref dst); + } } else if (c == -1) VBufferUtils.ApplyIntoEitherDefined(in src, ref dst, (i, v) => -v); @@ -388,30 +410,32 @@ public static int ArgMax(in VBuffer src) if (src.Count == 0) return 0; - int ind = MathUtils.ArgMax(src.Values, src.Count); + var srcValues = src.GetValues(); + int ind = MathUtils.ArgMax(srcValues); // ind < 0 iff all explicit values are NaN. Contracts.Assert(-1 <= ind && ind < src.Count); if (src.IsDense) return ind; + var srcIndices = src.GetIndices(); if (ind >= 0) { - Contracts.Assert(src.Indices[ind] >= ind); - if (src.Values[ind] > 0) - return src.Indices[ind]; + Contracts.Assert(srcIndices[ind] >= ind); + if (srcValues[ind] > 0) + return srcIndices[ind]; // This covers the case where there is an explicit zero, and zero is the max, // and the first explicit zero is before any implicit entries. - if (src.Values[ind] == 0 && src.Indices[ind] == ind) + if (srcValues[ind] == 0 && srcIndices[ind] == ind) return ind; } // All explicit values are non-positive or NaN, so return the first index not in src.Indices. ind = 0; - while (ind < src.Count && src.Indices[ind] == ind) + while (ind < src.Count && srcIndices[ind] == ind) ind++; Contracts.Assert(ind <= src.Count); - Contracts.Assert(ind == src.Count || ind < src.Indices[ind]); + Contracts.Assert(ind == src.Count || ind < srcIndices[ind]); return ind; } @@ -422,30 +446,32 @@ public static int ArgMin(in VBuffer src) if (src.Count == 0) return 0; - int ind = MathUtils.ArgMin(src.Values, src.Count); + var srcValues = src.GetValues(); + int ind = MathUtils.ArgMin(srcValues); // ind < 0 iff all explicit values are NaN. Contracts.Assert(-1 <= ind && ind < src.Count); if (src.IsDense) return ind; + var srcIndices = src.GetIndices(); if (ind >= 0) { - Contracts.Assert(src.Indices[ind] >= ind); - if (src.Values[ind] < 0) - return src.Indices[ind]; + Contracts.Assert(srcIndices[ind] >= ind); + if (srcValues[ind] < 0) + return srcIndices[ind]; // This covers the case where there is an explicit zero, and zero is the min, // and the first explicit zero is before any implicit entries. - if (src.Values[ind] == 0 && src.Indices[ind] == ind) + if (srcValues[ind] == 0 && srcIndices[ind] == ind) return ind; } - // All explicit values are non-negative or NaN, so return the first index not in src.Indices. + // All explicit values are non-negative or NaN, so return the first index not in srcIndices. ind = 0; - while (ind < src.Count && src.Indices[ind] == ind) + while (ind < src.Count && srcIndices[ind] == ind) ind++; Contracts.Assert(ind <= src.Count); - Contracts.Assert(ind == src.Count || ind < src.Indices[ind]); + Contracts.Assert(ind == src.Count || ind < srcIndices[ind]); return ind; } } diff --git a/src/Microsoft.ML.Data/Depricated/Vector/VectorUtils.cs b/src/Microsoft.ML.Data/Depricated/Vector/VectorUtils.cs index 79af700bcc9..84fe2e135f6 100644 --- a/src/Microsoft.ML.Data/Depricated/Vector/VectorUtils.cs +++ b/src/Microsoft.ML.Data/Depricated/Vector/VectorUtils.cs @@ -33,8 +33,8 @@ public static Float DotProduct(Float[] a, in VBuffer b) if (b.Count == 0) return 0; if (b.IsDense) - return CpuMathUtils.DotProductDense(a, b.Values, b.Length); - return CpuMathUtils.DotProductSparse(a, b.Values, b.Indices, b.Count); + return CpuMathUtils.DotProductDense(a, b.GetValues(), b.Length); + return CpuMathUtils.DotProductSparse(a, b.GetValues(), b.GetIndices(), b.Count); } public static Float DotProduct(in VBuffer a, in VBuffer b) @@ -47,13 +47,13 @@ public static Float DotProduct(in VBuffer a, in VBuffer b) if (a.IsDense) { if (b.IsDense) - return CpuMathUtils.DotProductDense(a.Values, b.Values, a.Length); - return CpuMathUtils.DotProductSparse(a.Values, b.Values, b.Indices, b.Count); + return CpuMathUtils.DotProductDense(a.GetValues(), b.GetValues(), a.Length); + return CpuMathUtils.DotProductSparse(a.GetValues(), b.GetValues(), b.GetIndices(), b.Count); } if (b.IsDense) - return CpuMathUtils.DotProductSparse(b.Values, a.Values, a.Indices, a.Count); - return DotProductSparse(a.Values, a.Indices, 0, a.Count, b.Values, b.Indices, 0, b.Count, 0); + return CpuMathUtils.DotProductSparse(b.GetValues(), a.GetValues(), a.GetIndices(), a.Count); + return DotProductSparse(a.GetValues(), a.GetIndices(), 0, a.Count, b.GetValues(), b.GetIndices(), 0, b.Count); } /// @@ -75,10 +75,12 @@ public static void SparsifyNormalize(ref VBuffer a, int top, int bottom, var bottomHeap = new Heap>((left, right) => right.Value > left.Value, bottom + 1); bool isDense = a.IsDense; + var aValues = a.GetValues(); + var aIndices = a.GetIndices(); for (int i = 0; i < a.Count; i++) { - int idx = isDense ? i : a.Indices[i]; - var value = a.Values[i]; + int idx = isDense ? i : aIndices[i]; + var value = aValues[i]; if (value < 0 && bottom > 0) { @@ -108,22 +110,21 @@ public static void SparsifyNormalize(ref VBuffer a, int top, int bottom, } var newCount = topHeap.Count + bottomHeap.Count; - var indices = a.Indices; - Utils.EnsureSize(ref indices, newCount); - Contracts.Assert(Utils.Size(a.Values) >= newCount); + var mutation = VBufferMutationContext.Create(ref a, a.Length, newCount); + var indices = mutation.Indices; int count = 0; while (topHeap.Count > 0) { var pair = topHeap.Pop(); indices[count] = pair.Key; - a.Values[count++] = pair.Value; + mutation.Values[count++] = pair.Value; } while (bottomHeap.Count > 0) { var pair = bottomHeap.Pop(); indices[count] = pair.Key; - a.Values[count++] = pair.Value; + mutation.Values[count++] = pair.Value; } Contracts.Assert(count == newCount); @@ -132,7 +133,7 @@ public static void SparsifyNormalize(ref VBuffer a, int top, int bottom, { for (var i = 0; i < newCount; i++) { - var value = a.Values[i]; + var value = mutation.Values[i]; var absValue = Math.Abs(value); if (absValue > absMax) absMax = absValue; @@ -142,13 +143,13 @@ public static void SparsifyNormalize(ref VBuffer a, int top, int bottom, { var ratio = 1 / absMax; for (var i = 0; i < newCount; i++) - a.Values[i] = ratio * a.Values[i]; + mutation.Values[i] = ratio * mutation.Values[i]; } } if (indices != null) Array.Sort(indices, a.Values, 0, newCount); - a = new VBuffer(a.Length, newCount, a.Values, indices); + mutation.Complete(ref a); } /// @@ -159,27 +160,24 @@ public static void MulElementWise(in VBuffer a, ref VBuffer dst) Contracts.Check(a.Length == dst.Length, "Vectors must have the same dimensionality."); if (a.IsDense && dst.IsDense) - CpuMathUtils.MulElementWise(a.Values, dst.Values, dst.Values, a.Length); + { + var mutation = VBufferMutationContext.Create(ref dst, dst.Length, dst.Count); + CpuMathUtils.MulElementWise(a.GetValues(), dst.GetValues(), mutation.Values, a.Length); + } else VBufferUtils.ApplyWithEitherDefined(in a, ref dst, (int ind, Float v1, ref Float v2) => { v2 *= v1; }); } - private static Float L2DistSquaredSparse(Float[] valuesA, int[] indicesA, int countA, Float[] valuesB, int[] indicesB, int countB, int length) + private static Float L2DistSquaredSparse(ReadOnlySpan valuesA, ReadOnlySpan indicesA, ReadOnlySpan valuesB, ReadOnlySpan indicesB) { - Contracts.AssertValueOrNull(valuesA); - Contracts.AssertValueOrNull(indicesA); - Contracts.AssertValueOrNull(valuesB); - Contracts.AssertValueOrNull(indicesB); - Contracts.Assert(0 <= countA && countA <= Utils.Size(indicesA)); - Contracts.Assert(0 <= countB && countB <= Utils.Size(indicesB)); - Contracts.Assert(countA <= Utils.Size(valuesA)); - Contracts.Assert(countB <= Utils.Size(valuesB)); + Contracts.Assert(valuesA.Length == indicesA.Length); + Contracts.Assert(valuesB.Length == indicesB.Length); Float res = 0; int ia = 0; int ib = 0; - while (ia < countA && ib < countB) + while (ia < indicesA.Length && ib < indicesB.Length) { int diff = indicesA[ia] - indicesB[ib]; Float d; @@ -202,14 +200,14 @@ private static Float L2DistSquaredSparse(Float[] valuesA, int[] indicesA, int co res += d * d; } - while (ia < countA) + while (ia < indicesA.Length) { var d = valuesA[ia]; res += d * d; ia++; } - while (ib < countB) + while (ib < indicesB.Length) { var d = valuesB[ib]; res += d * d; @@ -219,30 +217,21 @@ private static Float L2DistSquaredSparse(Float[] valuesA, int[] indicesA, int co return res; } - private static Float L2DistSquaredHalfSparse(Float[] valuesA, int lengthA, Float[] valuesB, int[] indicesB, int countB) + private static Float L2DistSquaredHalfSparse(ReadOnlySpan valuesA, ReadOnlySpan valuesB, ReadOnlySpan indicesB) { - Contracts.AssertValueOrNull(valuesA); - Contracts.AssertValueOrNull(valuesB); - Contracts.AssertValueOrNull(indicesB); - Contracts.Assert(0 <= lengthA && lengthA <= Utils.Size(valuesA)); - Contracts.Assert(0 <= countB && countB <= Utils.Size(indicesB)); - Contracts.Assert(countB <= Utils.Size(valuesB)); - - var normA = CpuMathUtils.SumSq(valuesA.AsSpan(0, lengthA)); - if (countB == 0) + var normA = CpuMathUtils.SumSq(valuesA); + if (valuesB.Length == 0) return normA; - var normB = CpuMathUtils.SumSq(valuesB.AsSpan(0, countB)); - var dotP = CpuMathUtils.DotProductSparse(valuesA, valuesB, indicesB, countB); + var normB = CpuMathUtils.SumSq(valuesB); + var dotP = CpuMathUtils.DotProductSparse(valuesA, valuesB, indicesB, valuesB.Length); var res = normA + normB - 2 * dotP; return res < 0 ? 0 : res; } - private static Float L2DiffSquaredDense(Float[] valuesA, Float[] valuesB, int length) + private static Float L2DiffSquaredDense(ReadOnlySpan valuesA, ReadOnlySpan valuesB, int length) { - Contracts.AssertValueOrNull(valuesA); - Contracts.AssertValueOrNull(valuesB); - Contracts.Assert(0 <= length && length <= Utils.Size(valuesA)); - Contracts.Assert(0 <= length && length <= Utils.Size(valuesB)); + Contracts.Assert(0 <= length && length <= valuesA.Length); + Contracts.Assert(0 <= length && length <= valuesB.Length); if (length == 0) return 0; @@ -267,27 +256,31 @@ public static Float DotProductWithOffset(in VBuffer a, int offset, in VBu if (a.IsDense) { if (b.IsDense) - return CpuMathUtils.DotProductDense(a.Values.AsSpan(offset), b.Values, b.Length); - return CpuMathUtils.DotProductSparse(a.Values.AsSpan(offset), b.Values, b.Indices, b.Count); + return CpuMathUtils.DotProductDense(a.GetValues().Slice(offset), b.GetValues(), b.Length); + return CpuMathUtils.DotProductSparse(a.GetValues().Slice(offset), b.GetValues(), b.GetIndices(), b.Count); } else { Float result = 0; - int aMin = Utils.FindIndexSorted(a.Indices, 0, a.Count, offset); - int aLim = Utils.FindIndexSorted(a.Indices, 0, a.Count, offset + b.Length); + var aValues = a.GetValues(); + var aIndices = a.GetIndices(); + var bValues = b.GetValues(); + var bIndices = b.GetIndices(); + int aMin = Utils.FindIndexSorted(aIndices, 0, a.Count, offset); + int aLim = Utils.FindIndexSorted(aIndices, 0, a.Count, offset + b.Length); if (b.IsDense) { for (int iA = aMin; iA < aLim; ++iA) - result += a.Values[iA] * b.Values[a.Indices[iA] - offset]; + result += aValues[iA] * bValues[aIndices[iA] - offset]; return result; } for (int iA = aMin, iB = 0; iA < aLim && iB < b.Count; ) { - int aIndex = a.Indices[iA]; - int bIndex = b.Indices[iB]; + int aIndex = aIndices[iA]; + int bIndex = bIndices[iB]; int comp = (aIndex - offset) - bIndex; if (comp == 0) - result += a.Values[iA++] * b.Values[iB++]; + result += aValues[iA++] * bValues[iB++]; else if (comp < 0) iA++; else @@ -314,16 +307,16 @@ public static Float DotProductWithOffset(Float[] a, int offset, in VBuffer aValues, ReadOnlySpan aIndices, int ia, int iaLim, ReadOnlySpan bValues, ReadOnlySpan bIndices, int ib, int ibLim) { - Contracts.AssertValue(aValues); - Contracts.AssertValue(aIndices); - Contracts.AssertValue(bValues); - Contracts.AssertValue(bIndices); + Contracts.AssertNonEmpty(aValues); + Contracts.AssertNonEmpty(aIndices); + Contracts.AssertNonEmpty(bValues); + Contracts.AssertNonEmpty(bIndices); Contracts.Assert(0 <= ia && ia < iaLim && iaLim <= aIndices.Length); Contracts.Assert(0 <= ib && ib < ibLim && ibLim <= bIndices.Length); @@ -334,7 +327,7 @@ private static Float DotProductSparse(Float[] aValues, int[] aIndices, int ia, i for (; ; ) { - int d = aIndices[ia] - offset - bIndices[ib]; + int d = aIndices[ia] - bIndices[ib]; if (d == 0) { res += aValues[ia] * bValues[ib]; @@ -347,7 +340,7 @@ private static Float DotProductSparse(Float[] aValues, int[] aIndices, int ia, i { ia++; if (d < -thresh) - ia = Utils.FindIndexSorted(aIndices, ia, iaLim, bIndices[ib] + offset); + ia = Utils.FindIndexSorted(aIndices, ia, iaLim, bIndices[ib]); if (ia >= iaLim) break; } @@ -355,7 +348,7 @@ private static Float DotProductSparse(Float[] aValues, int[] aIndices, int ia, i { ib++; if (d > thresh) - ib = Utils.FindIndexSorted(bIndices, ib, ibLim, aIndices[ia] - offset); + ib = Utils.FindIndexSorted(bIndices, ib, ibLim, aIndices[ia]); if (ib >= ibLim) break; } @@ -401,12 +394,12 @@ public static Float L2DistSquared(in VBuffer a, in VBuffer b) if (a.IsDense) { if (b.IsDense) - return L2DiffSquaredDense(a.Values, b.Values, b.Length); - return L2DistSquaredHalfSparse(a.Values, a.Length, b.Values, b.Indices, b.Count); + return L2DiffSquaredDense(a.GetValues(), b.GetValues(), b.Length); + return L2DistSquaredHalfSparse(a.GetValues(), b.GetValues(), b.GetIndices()); } if (b.IsDense) - return L2DistSquaredHalfSparse(b.Values, b.Length, a.Values, a.Indices, a.Count); - return L2DistSquaredSparse(a.Values, a.Indices, a.Count, b.Values, b.Indices, b.Count, a.Length); + return L2DistSquaredHalfSparse(b.GetValues(), a.GetValues(), a.GetIndices()); + return L2DistSquaredSparse(a.GetValues(), a.GetIndices(), b.GetValues(), b.GetIndices()); } /// @@ -420,8 +413,8 @@ public static Float L2DistSquared(Float[] a, in VBuffer b) Contracts.CheckValue(a, nameof(a)); Contracts.Check(Utils.Size(a) == b.Length, "Vectors must have the same dimensionality."); if (b.IsDense) - return L2DiffSquaredDense(a, b.Values, b.Length); - return L2DistSquaredHalfSparse(a, a.Length, b.Values, b.Indices, b.Count); + return L2DiffSquaredDense(a, b.GetValues(), b.Length); + return L2DistSquaredHalfSparse(a.AsSpan(0, a.Length), b.GetValues(), b.GetIndices()); } /// @@ -451,12 +444,14 @@ public static void AddMult(in VBuffer src, Float[] dst, Float c) if (src.Count == 0 || c == 0) return; + var srcValues = src.GetValues(); if (src.IsDense) - CpuMathUtils.AddScale(c, src.Values, dst, src.Count); + CpuMathUtils.AddScale(c, srcValues, dst, src.Count); else { + var srcIndices = src.GetIndices(); for (int i = 0; i < src.Count; i++) - dst[src.Indices[i]] += c * src.Values[i]; + dst[srcIndices[i]] += c * srcValues[i]; } } @@ -477,15 +472,17 @@ public static void AddMultWithOffset(in VBuffer src, Float[] dst, int off if (src.Count == 0 || c == 0) return; + var srcValues = src.GetValues(); if (src.IsDense) { for (int i = 0; i < src.Length; i++) - dst[i + offset] += c * src.Values[i]; + dst[i + offset] += c * srcValues[i]; } else { + var srcIndices = src.GetIndices(); for (int i = 0; i < src.Count; i++) - dst[src.Indices[i] + offset] += c * src.Values[i]; + dst[srcIndices[i] + offset] += c * srcValues[i]; } } diff --git a/src/Microsoft.ML.Data/Transforms/DropSlotsTransform.cs b/src/Microsoft.ML.Data/Transforms/DropSlotsTransform.cs index d6447256189..ca28f78d9fb 100644 --- a/src/Microsoft.ML.Data/Transforms/DropSlotsTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/DropSlotsTransform.cs @@ -701,7 +701,8 @@ private ValueGetter> MakeVecTrivialGetter() // Delegates onto instance methods are more efficient than delegates onto static methods. private void VecTrivialGetter(ref VBuffer value) { - value = new VBuffer(1, 0, value.Values, value.Indices); + VBufferMutationContext.Create(ref value, 1, 0) + .Complete(ref value); } private Delegate MakeVecGetter(IRow input, int iinfo) diff --git a/src/Microsoft.ML.Data/Utilities/SlotDropper.cs b/src/Microsoft.ML.Data/Utilities/SlotDropper.cs index 64b510a6559..029dc15d20d 100644 --- a/src/Microsoft.ML.Data/Utilities/SlotDropper.cs +++ b/src/Microsoft.ML.Data/Utilities/SlotDropper.cs @@ -104,11 +104,11 @@ public void DropSlots(ref VBuffer src, ref VBuffer dst) } int newLength = DstLength == 0 ? ComputeLength(src.Length) : DstLength; - var values = dst.Values; if (newLength == 0) { // All slots dropped. - dst = new VBuffer(1, 0, dst.Values, dst.Indices); + VBufferMutationContext.Create(ref dst, 1, 0) + .Complete(ref dst); return; } @@ -116,12 +116,11 @@ public void DropSlots(ref VBuffer src, ref VBuffer dst) // End of the trivial cases // At this point, we need to drop some slots and keep some slots. + VBufferMutationContext mutation; + var srcValues = src.GetValues(); if (src.IsDense) { - Contracts.Assert(Utils.Size(values) == Utils.Size(src.Values) || src.Values != dst.Values); - - if (Utils.Size(values) < newLength) - values = new TDst[newLength]; + mutation = VBufferMutationContext.Create(ref dst, newLength); int iDst = 0; int iSrc = 0; @@ -131,33 +130,29 @@ public void DropSlots(ref VBuffer src, ref VBuffer dst) while (iSrc < lim) { Contracts.Assert(iDst <= iSrc); - values[iDst++] = src.Values[iSrc++]; + mutation.Values[iDst++] = srcValues[iSrc++]; } iSrc = SlotsMax[i] + 1; } while (iSrc < src.Length) { Contracts.Assert(iDst <= iSrc); - values[iDst++] = src.Values[iSrc++]; + mutation.Values[iDst++] = srcValues[iSrc++]; } Contracts.Assert(iDst == newLength); - dst = new VBuffer(newLength, values, dst.Indices); + mutation.Complete(ref dst); return; } // Sparse case. // Approximate new count is min(#indices, newLength). var newCount = Math.Min(src.Count, newLength); - var indices = dst.Indices; + var indices = dst.GetIndices(); + var srcIndices = src.GetIndices(); Contracts.Assert(newCount <= src.Length); - Contracts.Assert(Utils.Size(values) == Utils.Size(src.Values) || src.Values != dst.Values); - Contracts.Assert(Utils.Size(indices) == Utils.Size(src.Indices) || src.Indices != dst.Indices); - if (Utils.Size(indices) < newCount) - indices = new int[newCount]; - if (Utils.Size(values) < newCount) - values = new TDst[newCount]; + mutation = VBufferMutationContext.Create(ref dst, newLength, newCount); int iiDst = 0; int iiSrc = 0; @@ -170,12 +165,12 @@ public void DropSlots(ref VBuffer src, ref VBuffer dst) while (iiSrc < src.Count) { // Copy (with offset) the elements before the current range. - var index = src.Indices[iiSrc]; + var index = srcIndices[iiSrc]; if (index < min) { Contracts.Assert(iiDst <= iiSrc); - indices[iiDst] = index - iOffset; - values[iiDst++] = src.Values[iiSrc++]; + mutation.Indices[iiDst] = index - iOffset; + mutation.Values[iiDst++] = srcValues[iiSrc++]; continue; } if (index <= max) @@ -211,7 +206,10 @@ public void DropSlots(ref VBuffer src, ref VBuffer dst) Contracts.Assert(index <= max); } - dst = new VBuffer(newLength, iiDst, values, indices); + mutation.Complete(ref dst); + // now change the ValuesCount to iiDst to be correct + VBufferMutationContext.Create(ref dst, newLength, iiDst) + .Complete(ref dst); } } } diff --git a/src/Microsoft.ML.FastTree/TreeEnsemble/RegressionTree.cs b/src/Microsoft.ML.FastTree/TreeEnsemble/RegressionTree.cs index 05d809f8b4d..50e965e1a3c 100644 --- a/src/Microsoft.ML.FastTree/TreeEnsemble/RegressionTree.cs +++ b/src/Microsoft.ML.FastTree/TreeEnsemble/RegressionTree.cs @@ -762,7 +762,7 @@ public int GetLeaf(in VBuffer feat) { // REVIEW: This really should validate feat.Length! if (feat.IsDense) - return GetLeafCore(feat.Values); + return GetLeafCore(feat.GetValues()); return GetLeafCore(feat.GetIndices(), feat.GetValues()); } @@ -778,7 +778,7 @@ private int GetLeafFrom(in VBuffer feat, int root) } if (feat.IsDense) - return GetLeafCore(feat.Values, root: root); + return GetLeafCore(feat.GetValues(), root: root); return GetLeafCore(feat.GetIndices(), feat.GetValues(), root: root); } @@ -796,8 +796,9 @@ public int GetLeaf(in VBuffer feat, ref List path) path.Clear(); if (feat.IsDense) - return GetLeafCore(feat.Values, path); + return GetLeafCore(feat.GetValues(), path); return GetLeafCore(feat.GetIndices(), feat.GetValues(), path); + } private Float GetFeatureValue(Float x, int node) @@ -816,9 +817,8 @@ private Float GetFeatureValue(Float x, int node) } } - private int GetLeafCore(Float[] nonBinnedInstance, List path = null, int root = 0) + private int GetLeafCore(ReadOnlySpan nonBinnedInstance, List path = null, int root = 0) { - Contracts.AssertValue(nonBinnedInstance); Contracts.Assert(path == null || path.Count == 0); Contracts.Assert(root >= 0); @@ -907,6 +907,7 @@ private int GetLeafCore(ReadOnlySpan featIndices, ReadOnlySpan featV if (NumLeaves == 1) return 0; + int count = featIndices.Length; int node = root; while (node >= 0) @@ -921,13 +922,13 @@ private int GetLeafCore(ReadOnlySpan featIndices, ReadOnlySpan featV //REVIEW: Consider experimenting with bitmap instead of doing log(n) binary search. int newNode = LteChild[node]; - int end = featIndices.FindIndexSorted(0, count, CategoricalSplitFeatureRanges[node][1]); - for (int i = featIndices.FindIndexSorted(0, count, CategoricalSplitFeatureRanges[node][0]); + int end = Utils.FindIndexSorted(featIndices, 0, count, CategoricalSplitFeatureRanges[node][1]); + for (int i = Utils.FindIndexSorted(featIndices, 0, count, CategoricalSplitFeatureRanges[node][0]); i < count && i <= end; ++i) { int index = featIndices[i]; - if (CategoricalSplitFeatures[node].TryFindIndexSorted(0, CategoricalSplitFeatures[node].Length, index, out int ii)) + if (Utils.TryFindIndexSorted(CategoricalSplitFeatures[node], 0, CategoricalSplitFeatures[node].Length, index, out int ii)) { Float val = GetFeatureValue(featValues[i], node); if (val > 0.0f) @@ -945,7 +946,7 @@ private int GetLeafCore(ReadOnlySpan featIndices, ReadOnlySpan featV Float val = 0; int ifeat = SplitFeatures[node]; - int ii = featIndices.FindIndexSorted(0, count, ifeat); + int ii = Utils.FindIndexSorted(featIndices, 0, count, ifeat); if (ii < count && featIndices[ii] == ifeat) val = featValues[ii]; val = GetFeatureValue(val, node); diff --git a/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs b/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs index c325f87a4d0..5f3cdb7a8d4 100644 --- a/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs +++ b/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs @@ -648,7 +648,7 @@ private static void FindBestCluster(in VBuffer point, int pointRowIndex, if (pointRowIndex != -1) // if the space was available for cur in initializationState. { // pointNorm is necessary for using triangle inequality. - float pointNorm = VectorUtils.NormSquared(point); + float pointNorm = VectorUtils.NormSquared(in point); // We have cached distance information for this point. bestCluster = initializationState.GetBestCluster(pointRowIndex); float bestWeight = initializationState.GetBestWeight(pointRowIndex); @@ -781,6 +781,8 @@ public static void Initialize(IHost host, int numThreads, IChannel ch, FeatureFl // The final chosen points, to be approximately clustered to determine starting // centroids. VBuffer[] clusters = new VBuffer[totalSamples]; + VBuffer[] readOnlyClusters = null; + // L2s, kept for distance trick. float[] clustersL2s = new float[totalSamples]; @@ -852,6 +854,9 @@ public static void Initialize(IHost host, int numThreads, IChannel ch, FeatureFl clusterCount++; } ch.Assert(clusterCount - clusterPrevCount <= numSamplesPerRound); + + KMeansUtils.UpdateReadOnlyCache(clusters, ref readOnlyClusters); + logicalExternalRounds++; pCh.Checkpoint(logicalExternalRounds, numRounds + 2); } @@ -1309,9 +1314,11 @@ public static void Train(IHost host, int numThreads, IChannel ch, FeatureFloatVe Initialize(ch, cursorFactory, totalTrainingInstances, numThreads, k, dimensionality, accelMemBudgetInMb, out state, out workState, out reducedState); float[] centroidL2s = new float[k]; + VBuffer[] readOnlyCentroids = new VBuffer[centroids.Length]; + KMeansUtils.UpdateReadOnlyCache(centroids, ref readOnlyCentroids); for (int i = 0; i < k; i++) - centroidL2s[i] = VectorUtils.NormSquared(centroids[i]); + centroidL2s[i] = VectorUtils.NormSquared(in readOnlyCentroids[i]); using (var pch = host.StartProgressChannel("KMeansTrain")) { @@ -1338,7 +1345,7 @@ public static void Train(IHost host, int numThreads, IChannel ch, FeatureFloatVe ops[i] = new Action(() => { using (var cursor = set[chunkId]) - ProcessChunk(cursor, state, workState[chunkId], k, centroids, centroidL2s); + ProcessChunk(cursor, state, workState[chunkId], k, readOnlyCentroids, centroidL2s); }); } @@ -1350,7 +1357,7 @@ public static void Train(IHost host, int numThreads, IChannel ch, FeatureFloatVe else { using (var cursor = cursorFactory.Create()) - ProcessChunk(cursor, state, reducedState, k, centroids, centroidL2s); + ProcessChunk(cursor, state, reducedState, k, readOnlyCentroids, centroidL2s); } WorkChunkState.Reduce(workState, reducedState); @@ -1387,11 +1394,13 @@ public static void Train(IHost host, int numThreads, IChannel ch, FeatureFloatVe } #endif reducedState.UpdateClusters(centroids, centroidL2s, state.Delta, ref state.DeltaMax); + KMeansUtils.UpdateReadOnlyCache(centroids, ref readOnlyCentroids); + isConverged = reducedState.AverageScoreDelta < convergenceThreshold; state.Iteration++; if (state.Iteration % 100 == 0) - KMeansUtils.VerifyModelConsistency(centroids); + KMeansUtils.VerifyModelConsistency(readOnlyCentroids); } } } @@ -1788,5 +1797,23 @@ public static void VerifyModelConsistency(VBuffer[] centroids) foreach (var centroid in centroids) Contracts.Check(centroid.Items().Select(x => x.Value).All(FloatUtils.IsFinite), "Model training failed: non-finite coordinates are generated"); } + + /// + /// Checks that all coordinates of all centroids are finite, and throws otherwise + /// + public static void VerifyModelConsistency(VBuffer[] centroids) + { + for (int i = 0; i < centroids.Length; i++) + Contracts.Check(centroids[i].GetValues().All(FloatUtils.IsFinite), "Model training failed: non-finite coordinates are generated"); + } + + public static void UpdateReadOnlyCache(VBuffer[] source, ref VBuffer[] destination) + { + Utils.EnsureSize(ref destination, source.Length); + for (int i = 0; i < source.Length; i++) + { + destination[i] = source[i]; + } + } } } diff --git a/src/Microsoft.ML.KMeansClustering/KMeansPredictor.cs b/src/Microsoft.ML.KMeansClustering/KMeansPredictor.cs index 326b549a989..a9fc5e0de72 100644 --- a/src/Microsoft.ML.KMeansClustering/KMeansPredictor.cs +++ b/src/Microsoft.ML.KMeansClustering/KMeansPredictor.cs @@ -162,7 +162,7 @@ private void Map(in VBuffer src, Float[] distances) { Host.Assert(Utils.Size(distances) >= _k); - Float instanceL2 = VectorUtils.NormSquared(src); + Float instanceL2 = VectorUtils.NormSquared(in src); for (int i = 0; i < _k; i++) { Float distance = Math.Max(0, diff --git a/src/Microsoft.ML.StandardLearners/Optimizer/DifferentiableFunction.cs b/src/Microsoft.ML.StandardLearners/Optimizer/DifferentiableFunction.cs index ee928e5cdaa..3c8ba567241 100644 --- a/src/Microsoft.ML.StandardLearners/Optimizer/DifferentiableFunction.cs +++ b/src/Microsoft.ML.StandardLearners/Optimizer/DifferentiableFunction.cs @@ -247,7 +247,7 @@ public static Float Test(DifferentiableFunction f, in VBuffer x, bool qui /// /// /// - public static void TestAllCoords(DifferentiableFunction f, ref VBuffer x) + public static void TestAllCoords(DifferentiableFunction f, in VBuffer x) { // REVIEW: Delete this method? VBuffer grad = default(VBuffer); @@ -286,7 +286,7 @@ public static void TestAllCoords(DifferentiableFunction f, ref VBuffer x) /// Function to test /// Point at which to test /// List of coordinates to test - public static void TestCoords(DifferentiableFunction f, ref VBuffer x, IList coords) + public static void TestCoords(DifferentiableFunction f, in VBuffer x, IList coords) { // REVIEW: Delete this method? VBuffer grad = default(VBuffer); diff --git a/src/Microsoft.ML.StandardLearners/Optimizer/OptimizationMonitor.cs b/src/Microsoft.ML.StandardLearners/Optimizer/OptimizationMonitor.cs index 7b231bb0277..705c9f8477d 100644 --- a/src/Microsoft.ML.StandardLearners/Optimizer/OptimizationMonitor.cs +++ b/src/Microsoft.ML.StandardLearners/Optimizer/OptimizationMonitor.cs @@ -85,7 +85,7 @@ private Float Check(Optimizer.OptimizerState state) { Console.Error.Write(_checkingMessage); Console.Error.Flush(); - var x = state.X; + VBuffer x = state.X; var lastDir = state.LastDir; Float checkResult = GradientTester.Test(state.Function, in x, ref lastDir, true, ref _newGrad, ref _newX); for (int i = 0; i < _checkingMessage.Length; i++) diff --git a/src/Microsoft.ML.StandardLearners/Optimizer/Optimizer.cs b/src/Microsoft.ML.StandardLearners/Optimizer/Optimizer.cs index 4b8c1e2cc40..e1a9accca7a 100644 --- a/src/Microsoft.ML.StandardLearners/Optimizer/Optimizer.cs +++ b/src/Microsoft.ML.StandardLearners/Optimizer/Optimizer.cs @@ -645,7 +645,7 @@ public void Minimize(DifferentiableFunction function, ref VBuffer initial double? improvement = null; double x; int end; - if (message != null && DoubleParser.TryParse(message.AsMemory().Span, out x, out end)) + if (message != null && DoubleParser.TryParse(message.AsSpan(), out x, out end)) improvement = x; pch.Checkpoint(state.Value, improvement, state.Iter); diff --git a/src/Microsoft.ML.StandardLearners/Optimizer/SgdOptimizer.cs b/src/Microsoft.ML.StandardLearners/Optimizer/SgdOptimizer.cs index 67fcf1c18b7..b814ee187e3 100644 --- a/src/Microsoft.ML.StandardLearners/Optimizer/SgdOptimizer.cs +++ b/src/Microsoft.ML.StandardLearners/Optimizer/SgdOptimizer.cs @@ -349,7 +349,7 @@ public void ChangeDir() /// Function to minimize /// Initial point /// Approximate minimum - public void Minimize(DifferentiableFunction function, ref VBuffer initial, ref VBuffer result) + public void Minimize(DifferentiableFunction function, in VBuffer initial, ref VBuffer result) { Contracts.Check(FloatUtils.IsFinite(initial.GetValues()), "The initial vector contains NaNs or infinite values."); LineFunc lineFunc = new LineFunc(function, in initial, UseCG); diff --git a/src/Microsoft.ML.StandardLearners/Standard/LinearClassificationTrainer.cs b/src/Microsoft.ML.StandardLearners/Standard/LinearClassificationTrainer.cs index 58056292309..b4e1ded8002 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LinearClassificationTrainer.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LinearClassificationTrainer.cs @@ -772,7 +772,7 @@ protected virtual void TrainWithoutLock(IProgressChannelProvider progress, Float while (cursor.MoveNext()) { long idx = getIndexFromId(cursor.Id); - var features = cursor.Features; + VBuffer features = cursor.Features; var label = cursor.Label; float invariant; if (invariants != null) @@ -830,9 +830,9 @@ protected virtual void TrainWithoutLock(IProgressChannelProvider progress, Float } if (features.IsDense) - CpuMathUtils.SdcaL1UpdateDense(primalUpdate, features.Count, features.Values, l1Threshold, l1IntermediateWeights[0].Values, weights[0].Values); + CpuMathUtils.SdcaL1UpdateDense(primalUpdate, features.Count, features.GetValues(), l1Threshold, l1IntermediateWeights[0].Values, weights[0].Values); else if (features.Count > 0) - CpuMathUtils.SdcaL1UpdateSparse(primalUpdate, features.Count, features.Values, features.Indices, l1Threshold, l1IntermediateWeights[0].Values, weights[0].Values); + CpuMathUtils.SdcaL1UpdateSparse(primalUpdate, features.Count, features.GetValues(), features.GetIndices(), l1Threshold, l1IntermediateWeights[0].Values, weights[0].Values); } break; @@ -919,6 +919,7 @@ protected virtual bool CheckConvergence( var lossSum = new CompensatedSum(); var dualLossSum = new CompensatedSum(); var biasTotal = biasReg[0] + biasUnreg[0]; + VBuffer firstWeights = weights[0]; using (var cursor = cursorFactory.Create()) { @@ -955,7 +956,7 @@ protected virtual bool CheckConvergence( var dualityGap = metrics[(int)MetricKind.DualityGap] = newLoss - newDualLoss; metrics[(int)MetricKind.BiasUnreg] = biasUnreg[0]; metrics[(int)MetricKind.BiasReg] = biasReg[0]; - metrics[(int)MetricKind.L1Sparsity] = Args.L1Threshold == 0 ? 1 : (Double)weights[0].Values.Count(w => w != 0) / weights.Length; + metrics[(int)MetricKind.L1Sparsity] = Args.L1Threshold == 0 ? 1 : (Double)firstWeights.GetValues().Count(w => w != 0) / weights.Length; bool converged = dualityGap / newLoss < Args.ConvergenceTolerance; @@ -964,7 +965,7 @@ protected virtual bool CheckConvergence( // Maintain a copy of weights and bias with best primal loss thus far. // This is some extra work and uses extra memory, but it seems worth doing it. // REVIEW: Sparsify bestWeights? - weights[0].CopyTo(ref bestWeights[0]); + firstWeights.CopyTo(ref bestWeights[0]); bestBiasReg[0] = biasReg[0]; bestBiasUnreg[0] = biasUnreg[0]; bestPrimalLoss = metrics[(int)MetricKind.Loss]; diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsPredictorBase.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsPredictorBase.cs index f17e617e29c..d451ac96746 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsPredictorBase.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsPredictorBase.cs @@ -595,11 +595,15 @@ protected virtual float DifferentiableFunction(in VBuffer x, ref VBuffer< Contracts.AssertValueOrNull(progress); float scaleFactor = 1 / (float)WeightSum; - VBuffer xDense = default(VBuffer); + VBuffer xDense = default; if (x.IsDense) xDense = x; else - x.CopyToDense(ref xDense); + { + VBuffer xDenseTemp = default; + x.CopyToDense(ref xDenseTemp); + xDense = xDenseTemp; + } IProgressChannel pch = progress != null ? progress.StartProgressChannel("Gradient") : null; float loss; @@ -613,7 +617,7 @@ protected virtual float DifferentiableFunction(in VBuffer x, ref VBuffer< if (L2Weight > 0) { Contracts.Assert(xDense.IsDense); - var values = xDense.Values; + var values = xDense.GetValues(); Double r = 0; for (int i = BiasCount; i < values.Length; i++) { diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs index 4a6a9e44137..0215ef73586 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs @@ -673,11 +673,12 @@ protected override void SaveCore(ModelSaveContext ctx) { if (fw.IsDense) { + var fwValues = fw.GetValues(); for (int i = 0; i < fw.Length; i++) { - if (fw.Values[i] != 0) + if (fwValues[i] != 0) { - ctx.Writer.Write(fw.Values[i]); + ctx.Writer.Write(fwValues[i]); count++; } } @@ -712,21 +713,11 @@ protected override void SaveCore(ModelSaveContext ctx) private static int NonZeroCount(in VBuffer vector) { int count = 0; - if (!vector.IsDense) - { - for (int i = 0; i < vector.Count; i++) - { - if (vector.Values[i] != 0) - count++; - } - } - else + var values = vector.GetValues(); + for (int i = 0; i < values.Length; i++) { - for (int i = 0; i < vector.Length; i++) - { - if (vector.Values[i] != 0) - count++; - } + if (values[i] != 0) + count++; } return count; } diff --git a/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs b/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs index deaa204b034..71498518832 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs @@ -167,7 +167,7 @@ protected override void TrainWithoutLock(IProgressChannelProvider progress, Floa } else { - normSquared = VectorUtils.NormSquared(features); + normSquared = VectorUtils.NormSquared(in features); if (Args.BiasLearningRate == 0) normSquared += 1; @@ -241,9 +241,9 @@ protected override void TrainWithoutLock(IProgressChannelProvider progress, Floa } if (features.IsDense) - CpuMathUtils.SdcaL1UpdateDense(-primalUpdate, features.Count, features.Values, l1Threshold, l1IntermediateWeights[iClass].Values, weights[iClass].Values); + CpuMathUtils.SdcaL1UpdateDense(-primalUpdate, features.Count, features.GetValues(), l1Threshold, l1IntermediateWeights[iClass].Values, weights[iClass].Values); else if (features.Count > 0) - CpuMathUtils.SdcaL1UpdateSparse(-primalUpdate, features.Count, features.Values, features.Indices, l1Threshold, l1IntermediateWeights[iClass].Values, weights[iClass].Values); + CpuMathUtils.SdcaL1UpdateSparse(-primalUpdate, features.Count, features.GetValues(), features.GetIndices(), l1Threshold, l1IntermediateWeights[iClass].Values, weights[iClass].Values); } break; @@ -268,9 +268,9 @@ protected override void TrainWithoutLock(IProgressChannelProvider progress, Floa : 0; if (features.IsDense) - CpuMathUtils.SdcaL1UpdateDense(labelPrimalUpdate, features.Count, features.Values, l1Threshold, l1IntermediateWeights[label].Values, weights[label].Values); + CpuMathUtils.SdcaL1UpdateDense(labelPrimalUpdate, features.Count, features.GetValues(), l1Threshold, l1IntermediateWeights[label].Values, weights[label].Values); else if (features.Count > 0) - CpuMathUtils.SdcaL1UpdateSparse(labelPrimalUpdate, features.Count, features.Values, features.Indices, l1Threshold, l1IntermediateWeights[label].Values, weights[label].Values); + CpuMathUtils.SdcaL1UpdateSparse(labelPrimalUpdate, features.Count, features.GetValues(), features.GetIndices(), l1Threshold, l1IntermediateWeights[label].Values, weights[label].Values); } rowCount++;