Skip to content

Commit

Permalink
apacheGH-36588: [C#] Support blank column names and enable more integ…
Browse files Browse the repository at this point in the history
…ration tests. (apache#39167)

### What changes are included in this PR?

Allows field names to be blank (but not null).
Enables schema metadata to be read from JSON integration tests.
Enables integration tests for cases that are now working.

### Are these changes tested?

Yes.

* Closes: apache#36588

Authored-by: Curt Hagenlocher <curt@hagenlocher.org>
Signed-off-by: Curt Hagenlocher <curt@hagenlocher.org>
  • Loading branch information
CurtHagenlocher authored and mapleFU committed Dec 13, 2023
1 parent a0060f7 commit dedee2d
Show file tree
Hide file tree
Showing 7 changed files with 105 additions and 22 deletions.
3 changes: 1 addition & 2 deletions csharp/src/Apache.Arrow/Field.Builder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,13 @@ public class Builder

public Builder()
{
_name = string.Empty;
_type = NullType.Default;
_nullable = true;
}

public Builder Name(string value)
{
if (string.IsNullOrWhiteSpace(value))
if (value == null)
{
throw new ArgumentNullException(nameof(value));
}
Expand Down
11 changes: 5 additions & 6 deletions csharp/src/Apache.Arrow/Field.cs
Original file line number Diff line number Diff line change
Expand Up @@ -35,24 +35,23 @@ public partial class Field

public Field(string name, IArrowType dataType, bool nullable,
IEnumerable<KeyValuePair<string, string>> metadata = default)
: this(name, dataType, nullable, false)
: this(name, dataType, nullable)
{
Metadata = metadata?.ToDictionary(kv => kv.Key, kv => kv.Value);

}

internal Field(string name, IArrowType dataType, bool nullable,
IReadOnlyDictionary<string, string> metadata, bool copyCollections, bool allowBlankName)
: this(name, dataType, nullable, allowBlankName)
IReadOnlyDictionary<string, string> metadata, bool copyCollections)
: this(name, dataType, nullable)
{
Debug.Assert(copyCollections == false, "This internal constructor is to not copy the collections.");

Metadata = metadata;
}

private Field(string name, IArrowType dataType, bool nullable, bool allowBlankName)
private Field(string name, IArrowType dataType, bool nullable)
{
if (name == null || (!allowBlankName && string.IsNullOrWhiteSpace(name)))
if (name == null)
{
throw new ArgumentNullException(nameof(name));
}
Expand Down
9 changes: 4 additions & 5 deletions csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ internal static Schema GetSchema(Flatbuf.Schema schema, ref DictionaryMemo dicti
for (int i = 0; i < schema.FieldsLength; i++)
{
Flatbuf.Field field = schema.Fields(i).GetValueOrDefault();
fields.Add(FieldFromFlatbuffer(field, ref dictionaryMemo, allowBlankName: false));
fields.Add(FieldFromFlatbuffer(field, ref dictionaryMemo));
}

Dictionary<string, string> metadata = schema.CustomMetadataLength > 0 ? new Dictionary<string, string>() : null;
Expand All @@ -73,14 +73,13 @@ internal static Schema GetSchema(Flatbuf.Schema schema, ref DictionaryMemo dicti
return new Schema(fields, metadata, copyCollections: false);
}

private static Field FieldFromFlatbuffer(Flatbuf.Field flatbufField, ref DictionaryMemo dictionaryMemo, bool allowBlankName)
private static Field FieldFromFlatbuffer(Flatbuf.Field flatbufField, ref DictionaryMemo dictionaryMemo)
{
bool allowBlankNameChild = flatbufField.ChildrenLength == 1 && flatbufField.TypeType == Flatbuf.Type.FixedSizeList;
Field[] childFields = flatbufField.ChildrenLength > 0 ? new Field[flatbufField.ChildrenLength] : null;
for (int i = 0; i < flatbufField.ChildrenLength; i++)
{
Flatbuf.Field? childFlatbufField = flatbufField.Children(i);
childFields[i] = FieldFromFlatbuffer(childFlatbufField.Value, ref dictionaryMemo, allowBlankNameChild);
childFields[i] = FieldFromFlatbuffer(childFlatbufField.Value, ref dictionaryMemo);
}

Flatbuf.DictionaryEncoding? dictionaryEncoding = flatbufField.Dictionary;
Expand All @@ -104,7 +103,7 @@ private static Field FieldFromFlatbuffer(Flatbuf.Field flatbufField, ref Diction
metadata[keyValue.Key] = keyValue.Value;
}

var arrowField = new Field(flatbufField.Name, type, flatbufField.Nullable, metadata, copyCollections: false, allowBlankName);
var arrowField = new Field(flatbufField.Name, type, flatbufField.Nullable, metadata, copyCollections: false);

if (dictionaryEncoding.HasValue)
{
Expand Down
48 changes: 44 additions & 4 deletions csharp/src/Apache.Arrow/Types/MapType.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,17 @@
// See the License for the specific language governing permissions and
// limitations under the License.

using System;
using System.Collections.Generic;

namespace Apache.Arrow.Types
{
public sealed class MapType : NestedType // MapType = ListType(StructType("key", "value"))
{
private const string EntriesKey = "entries";
private const string KeyKey = "key";
private const string ValueKey = "value";

public override ArrowTypeId TypeId => ArrowTypeId.Map;
public override string Name => "map";
public readonly bool KeySorted;
Expand All @@ -28,20 +33,23 @@ public sealed class MapType : NestedType // MapType = ListType(StructType("key",
public Field ValueField => KeyValueType.Fields[1];

public MapType(IArrowType key, IArrowType value, bool nullable = true, bool keySorted = false)
: this(new Field("key", key, false), new Field("value", value, nullable), keySorted)
: base(Entries(key, value, nullable))
{
KeySorted = keySorted;
}

public MapType(Field key, Field value, bool keySorted = false)
: this(new StructType(new List<Field>() { key, value }), keySorted)
: base(Entries(key, value))
{
KeySorted = keySorted;
}

public MapType(StructType entries, bool keySorted = false) : this(new Field("entries", entries, false), keySorted)
public MapType(StructType entries, bool keySorted = false) : base(Entries(entries))
{
KeySorted = keySorted;
}

public MapType(Field entries, bool keySorted = false) : base(entries)
public MapType(Field entries, bool keySorted = false) : base(Entries(entries))
{
KeySorted = keySorted;
}
Expand All @@ -54,5 +62,37 @@ public MapType UnsortedKey()

return new MapType(Fields[0], keySorted: false);
}

private static Field Entries(IArrowType key, IArrowType value, bool nullable) =>
new Field(EntriesKey, NewStruct(new Field(KeyKey, key, false), new Field(ValueKey, value, nullable)), false);

private static Field Entries(Field key, Field value) =>
new Field(EntriesKey, NewStruct(NamedField(KeyKey, key), NamedField(ValueKey, value)), false);

private static Field Entries(StructType entries)
{
return new Field(EntriesKey, Struct(entries), false);
}

private static StructType NewStruct(Field key, Field value) => new StructType(new[] { key, value });

private static StructType Struct(StructType entries)
{
Field key = NamedField(KeyKey, entries.Fields[0]);
Field value = NamedField(ValueKey, entries.Fields[1]);
return object.ReferenceEquals(key, entries.Fields[0]) && object.ReferenceEquals(value, entries.Fields[1]) ? entries : NewStruct(key, value);
}

private static Field Entries(Field entries)
{
StructType structType = (StructType)entries.DataType;
StructType adjustedStruct = Struct(structType);
return StringComparer.Ordinal.Equals(entries.Name, EntriesKey) && object.ReferenceEquals(structType, adjustedStruct) ? entries : new Field(EntriesKey, adjustedStruct, false);
}

private static Field NamedField(string name, Field field)
{
return StringComparer.Ordinal.Equals(name, field.Name) ? field : new Field(name, field.DataType, field.IsNullable, field.Metadata);
}
}
}
6 changes: 6 additions & 0 deletions csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,12 @@ private static Schema CreateSchema(JsonSchema jsonSchema, Dictionary<DictionaryT
{
builder.Field(f => CreateField(f, jsonSchema.Fields[i], dictionaryIndexes));
}

if (jsonSchema.Metadata != null)
{
builder.Metadata(jsonSchema.Metadata);
}

return builder.Build();
}

Expand Down
44 changes: 44 additions & 0 deletions csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -829,6 +829,50 @@ public unsafe void ExportArrayStream()
CArrowArrayStream.Free(cArrayStream);
}

[SkippableFact]
public unsafe void ImportRecordBatchFromBuffer()
{
using (Py.GIL())
{
dynamic pa = Py.Import("pyarrow");
dynamic batch = pa.record_batch(
new PyList(new PyObject[]
{
pa.array(new long?[] { 1, 2, 3, null, 5 }),
pa.array(new[] { "hello", "world", null, "foo", "bar" }),
pa.array(new[] { 0.0, 1.4, 2.5, 3.6, 4.7 }),
pa.array(new bool?[] { true, false, null, false, true }),
}),
new[] { null, "", "column", "column" });

dynamic sink = pa.BufferOutputStream();
dynamic writer = pa.ipc.new_stream(sink, batch.schema);
writer.write_batch(batch);
((IDisposable)writer).Dispose();

dynamic buf = sink.getvalue();
// buf.address, buf.size

IntPtr address = (IntPtr)(long)buf.address;
int size = buf.size;
byte[] buffer = new byte[size];
byte* ptr = (byte*)address.ToPointer();
for (int i = 0; i < size; i++)
{
buffer[i] = ptr[i];
}

using (ArrowStreamReader reader = new ArrowStreamReader(new ReadOnlyMemory<byte>(buffer)))
{
RecordBatch batch2 = reader.ReadNextRecordBatch();
Assert.Equal("None", batch2.Schema.FieldsList[0].Name);
Assert.Equal("", batch2.Schema.FieldsList[1].Name);
Assert.Equal("column", batch2.Schema.FieldsList[2].Name);
Assert.Equal("column", batch2.Schema.FieldsList[3].Name);
}
}
}

private static PyObject List(params int?[] values)
{
return new PyList(values.Select(i => i == null ? PyObject.None : new PyInt(i.Value)).ToArray());
Expand Down
6 changes: 1 addition & 5 deletions dev/archery/archery/integration/datagen.py
Original file line number Diff line number Diff line change
Expand Up @@ -1812,7 +1812,6 @@ def _temp_path():
generate_map_case(),

generate_non_canonical_map_case()
.skip_tester('C#')
.skip_tester('Java') # TODO(ARROW-8715)
# Canonical map names are restored on import, so the schemas are unequal
.skip_format(SKIP_C_SCHEMA, 'C++'),
Expand All @@ -1827,11 +1826,9 @@ def _temp_path():

generate_unions_case(),

generate_custom_metadata_case()
.skip_tester('C#'),
generate_custom_metadata_case(),

generate_duplicate_fieldnames_case()
.skip_tester('C#')
.skip_tester('JS'),

generate_dictionary_case(),
Expand All @@ -1856,7 +1853,6 @@ def _temp_path():
.skip_tester('Rust'),

generate_extension_case()
.skip_tester('C#')
# TODO: ensure the extension is registered in the C++ entrypoint
.skip_format(SKIP_C_SCHEMA, 'C++')
.skip_format(SKIP_C_ARRAY, 'C++'),
Expand Down

0 comments on commit dedee2d

Please sign in to comment.