Skip to content

Commit

Permalink
Add Apply<TResult>method to PrimitiveDataFrameColumn (dotnet#2807)
Browse files Browse the repository at this point in the history
* Add Apply method to PrimitiveDataFrameColumn and its container

* Add TestApply test

* Remove unused df variable in DataFrameTests

* Add xml doc comments to Apply method
  • Loading branch information
zHaytam authored and Prashanth Govindarajan committed Jan 13, 2020
1 parent 838350b commit 0fa210d
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 0 deletions.
34 changes: 34 additions & 0 deletions src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,40 @@ public void ApplyElementwise(Func<T?, long, T?> func)
}
}

public void Apply<TResult>(Func<T?, TResult?> func, PrimitiveColumnContainer<TResult> resultContainer)
where TResult : unmanaged
{
for (int b = 0; b < Buffers.Count; b++)
{
ReadOnlyDataFrameBuffer<T> buffer = Buffers[b];
long prevLength = checked(Buffers[0].Length * b);
DataFrameBuffer<T> mutableBuffer = DataFrameBuffer<T>.GetMutableBuffer(buffer);
Buffers[b] = mutableBuffer;
Span<T> span = mutableBuffer.Span;
DataFrameBuffer<byte> mutableNullBitMapBuffer = DataFrameBuffer<byte>.GetMutableBuffer(NullBitMapBuffers[b]);
NullBitMapBuffers[b] = mutableNullBitMapBuffer;
Span<byte> nullBitMapSpan = mutableNullBitMapBuffer.Span;

ReadOnlyDataFrameBuffer<TResult> resultBuffer = resultContainer.Buffers[b];
long resultPrevLength = checked(resultContainer.Buffers[0].Length * b);
DataFrameBuffer<TResult> resultMutableBuffer = DataFrameBuffer<TResult>.GetMutableBuffer(resultBuffer);
resultContainer.Buffers[b] = resultMutableBuffer;
Span<TResult> resultSpan = resultMutableBuffer.Span;
DataFrameBuffer<byte> resultMutableNullBitMapBuffer = DataFrameBuffer<byte>.GetMutableBuffer(resultContainer.NullBitMapBuffers[b]);
resultContainer.NullBitMapBuffers[b] = resultMutableNullBitMapBuffer;
Span<byte> resultNullBitMapSpan = resultMutableNullBitMapBuffer.Span;

for (int i = 0; i < span.Length; i++)
{
long curIndex = i + prevLength;
bool isValid = IsValid(nullBitMapSpan, i);
TResult? value = func(isValid ? span[i] : default(T?));
resultSpan[i] = value.GetValueOrDefault();
SetValidityBit(resultNullBitMapSpan, i, value != null);
}
}
}

// Faster to use when we already have a span since it avoids indexing
public bool IsValid(ReadOnlySpan<byte> bitMapBufferSpan, int index)
{
Expand Down
13 changes: 13 additions & 0 deletions src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,19 @@ public override Dictionary<TKey, ICollection<long>> GroupColumnValues<TKey>()

public void ApplyElementwise(Func<T?, long, T?> func) => _columnContainer.ApplyElementwise(func);

/// <summary>
/// Applies a function to all the values
/// </summary>
/// <typeparam name="TResult">The new column's type</typeparam>
/// <param name="func">The function to apply</param>
/// <returns>A new PrimitiveDataFrameColumn containing the new values</returns>
public PrimitiveDataFrameColumn<TResult> Apply<TResult>(Func<T?, TResult?> func) where TResult : unmanaged
{
var resultColumn = new PrimitiveDataFrameColumn<TResult>("Result", Length);
_columnContainer.Apply(func, resultColumn._columnContainer);
return resultColumn;
}

/// <summary>
/// Clips values beyond the specified thresholds
/// </summary>
Expand Down
17 changes: 17 additions & 0 deletions tests/Microsoft.Data.Analysis.Tests/DataFrameTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1946,6 +1946,7 @@ public void TestMutationOnRows()
}
}
}

[Fact]
public void TestAppendRow()
{
Expand Down Expand Up @@ -2027,5 +2028,21 @@ public void TestAppendEmptyValue()
Assert.Equal(13, df.Rows.Count);
Assert.Equal(1, df.Columns[2].NullCount);
}

[Fact]
public void TestApply()
{
int[] values = { 1, 2, 3, 4, 5 };
var col = new PrimitiveDataFrameColumn<int>("Ints", values);
PrimitiveDataFrameColumn<double> newCol = col.Apply(i => i + 0.5d);

Assert.Equal(values.Length, newCol.Length);

for (int i = 0; i < newCol.Length; i++)
{
Assert.Equal(col[i], values[i]); // Make sure values didn't change
Assert.Equal(newCol[i], values[i] + 0.5d);
}
}
}
}

0 comments on commit 0fa210d

Please sign in to comment.