Skip to content

Commit

Permalink
Add Apply to ArrowStringDataFrameColumn (dotnet#2889)
Browse files Browse the repository at this point in the history
  • Loading branch information
Prashanth Govindarajan authored Mar 23, 2020
1 parent 4072f96 commit a6c34d0
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 1 deletion.
19 changes: 19 additions & 0 deletions src/Microsoft.Data.Analysis/ArrowStringDataFrameColumn.cs
Original file line number Diff line number Diff line change
Expand Up @@ -601,5 +601,24 @@ public override PrimitiveDataFrameColumn<bool> ElementwiseNotEquals(DataFrameCol
{
return StringDataFrameColumn.ElementwiseNotEqualsImplementation(this, column);
}

/// <summary>
/// Applies a function to all the values
/// </summary>
/// <param name="func">The function to apply</param>
/// <returns>A <see cref="ArrowStringDataFrameColumn"/> containing the new string values</returns>
/// <remarks>This function converts from UTF-8 to UTF-16 strings</remarks>
public ArrowStringDataFrameColumn Apply(Func<string, string> func)
{
ArrowStringDataFrameColumn ret = new ArrowStringDataFrameColumn(Name);
Encoding encoding = Encoding.UTF8;
for (long i = 0; i < Length; i++)
{
string cur = this[i];
string funcResult = func(cur);
ret.Append(funcResult != null ? encoding.GetBytes(funcResult) : default(ReadOnlySpan<byte>));
}
return ret;
}
}
}
35 changes: 34 additions & 1 deletion tests/Microsoft.Data.Analysis.Tests/DataFrameTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ public static DataFrame MakeDataFrameWithTwoColumns(int length, bool withNulls =
return dataFrame;
}

public static DataFrameColumn CreateArrowStringColumn(int length, bool withNulls = true)
public static ArrowStringDataFrameColumn CreateArrowStringColumn(int length, bool withNulls = true)
{
byte[] dataMemory = new byte[length * 3];
byte[] nullMemory = new byte[BitUtility.ByteCount(length)];
Expand Down Expand Up @@ -2180,6 +2180,39 @@ public void TestBinaryOperationsOnExplodedNumericColumns()
Assert.False(reverseInPlace.ElementwiseEquals(reverse).All());
}

[Fact]
public void TestArrowStringApply()
{
ArrowStringDataFrameColumn column = CreateArrowStringColumn(10);
ArrowStringDataFrameColumn ret = column.Apply((string cur) =>
{
if (cur != null)
{
return cur + "123";
}
return null;
});
for (long i = 0; i < column.Length; i++)
{
if (column[i] != null)
{
Assert.Equal(column[i] + "123", ret[i]);
}
else
{
Assert.Null(ret[i]);
}
}
Assert.Equal(1, ret.NullCount);

// Test null counts
ret = column.Apply((string cur) =>
{
return null;
});
Assert.Equal(column.Length, ret.NullCount);
}

[Fact]
public void GetColumnTests()
{
Expand Down

0 comments on commit a6c34d0

Please sign in to comment.