diff --git a/src/Microsoft.Data.Analysis/ArrowStringDataFrameColumn.cs b/src/Microsoft.Data.Analysis/ArrowStringDataFrameColumn.cs index 6b98285842..465064e634 100644 --- a/src/Microsoft.Data.Analysis/ArrowStringDataFrameColumn.cs +++ b/src/Microsoft.Data.Analysis/ArrowStringDataFrameColumn.cs @@ -601,5 +601,24 @@ public override PrimitiveDataFrameColumn ElementwiseNotEquals(DataFrameCol { return StringDataFrameColumn.ElementwiseNotEqualsImplementation(this, column); } + + /// + /// Applies a function to all the values + /// + /// The function to apply + /// A containing the new string values + /// This function converts from UTF-8 to UTF-16 strings + public ArrowStringDataFrameColumn Apply(Func func) + { + ArrowStringDataFrameColumn ret = new ArrowStringDataFrameColumn(Name); + Encoding encoding = Encoding.UTF8; + for (long i = 0; i < Length; i++) + { + string cur = this[i]; + string funcResult = func(cur); + ret.Append(funcResult != null ? encoding.GetBytes(funcResult) : default(ReadOnlySpan)); + } + return ret; + } } } diff --git a/tests/Microsoft.Data.Analysis.Tests/DataFrameTests.cs b/tests/Microsoft.Data.Analysis.Tests/DataFrameTests.cs index 44636ffd72..5e540305a5 100644 --- a/tests/Microsoft.Data.Analysis.Tests/DataFrameTests.cs +++ b/tests/Microsoft.Data.Analysis.Tests/DataFrameTests.cs @@ -29,7 +29,7 @@ public static DataFrame MakeDataFrameWithTwoColumns(int length, bool withNulls = return dataFrame; } - public static DataFrameColumn CreateArrowStringColumn(int length, bool withNulls = true) + public static ArrowStringDataFrameColumn CreateArrowStringColumn(int length, bool withNulls = true) { byte[] dataMemory = new byte[length * 3]; byte[] nullMemory = new byte[BitUtility.ByteCount(length)]; @@ -2180,6 +2180,39 @@ public void TestBinaryOperationsOnExplodedNumericColumns() Assert.False(reverseInPlace.ElementwiseEquals(reverse).All()); } + [Fact] + public void TestArrowStringApply() + { + ArrowStringDataFrameColumn column = CreateArrowStringColumn(10); + ArrowStringDataFrameColumn ret = column.Apply((string cur) => + { + if (cur != null) + { + return cur + "123"; + } + return null; + }); + for (long i = 0; i < column.Length; i++) + { + if (column[i] != null) + { + Assert.Equal(column[i] + "123", ret[i]); + } + else + { + Assert.Null(ret[i]); + } + } + Assert.Equal(1, ret.NullCount); + + // Test null counts + ret = column.Apply((string cur) => + { + return null; + }); + Assert.Equal(column.Length, ret.NullCount); + } + [Fact] public void GetColumnTests() {