Skip to content

Commit

Permalink
DataFrame.LoadCsv throws an exception on projects targeting < netcore…
Browse files Browse the repository at this point in the history
…3.0 (dotnet#2797)

Fixing by passing in an encoding and a default buffer size.

Also, get our tests running on .NET Framework.

Fix dotnet#2783
  • Loading branch information
eerhardt authored Dec 5, 2019
1 parent 7cee9d9 commit e64cbad
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 16 deletions.
6 changes: 4 additions & 2 deletions src/Microsoft.Data.Analysis/DataFrame.IO.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;

namespace Microsoft.Data.Analysis
{

public partial class DataFrame
{
private const int DefaultStreamReaderBufferSize = 1024;

private static Type GuessKind(int col, List<string[]> read)
{
Type res = typeof(string);
Expand Down Expand Up @@ -205,7 +207,7 @@ public static DataFrame LoadCsv(Stream csvStream,
List<DataFrameColumn> columns;
long streamStart = csvStream.Position;
// First pass: schema and number of rows.
using (var streamReader = new StreamReader(csvStream, encoding: null, detectEncodingFromByteOrderMarks: true, bufferSize: -1, leaveOpen: true))
using (var streamReader = new StreamReader(csvStream, Encoding.UTF8, detectEncodingFromByteOrderMarks: true, DefaultStreamReaderBufferSize, leaveOpen: true))
{
string line = null;
if (dataTypes == null)
Expand Down
2 changes: 2 additions & 0 deletions tests/Microsoft.Data.Analysis.Tests/BufferTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ public void TestArrowStringColumnClone()
Assert.Null(clone[i]);
}

#if !NETFRAMEWORK // https://github.com/dotnet/corefxlab/issues/2796
[Fact]
public void TestPrimitiveColumnGetReadOnlyBuffers()
{
Expand Down Expand Up @@ -275,5 +276,6 @@ public void TestArrowStringColumnGetReadOnlyBuffers()
}
}
}
#endif //!NETFRAMEWORK
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ public void TestIDataView()
Assert.Equal((ushort)1, preview.ColumnView[10].Values[1]);

Assert.Equal("String", preview.ColumnView[11].Column.Name);
Assert.Equal("0".AsMemory(), preview.ColumnView[11].Values[0]);
Assert.Equal("1".AsMemory(), preview.ColumnView[11].Values[1]);
Assert.Equal("0".ToString(), preview.ColumnView[11].Values[0].ToString());
Assert.Equal("1".ToString(), preview.ColumnView[11].Values[1].ToString());

Assert.Equal("Char", preview.ColumnView[12].Column.Name);
Assert.Equal((ushort)65, preview.ColumnView[12].Values[0]);
Expand Down
23 changes: 12 additions & 11 deletions tests/Microsoft.Data.Analysis.Tests/DataFrameTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ public DataFrame SplitTrainTest(DataFrame input, float testRatio, out DataFrame
{
IEnumerable<int> randomIndices = Enumerable.Range(0, (int)input.Rows.Count);
IEnumerable<int> trainIndices = randomIndices.Take((int)(input.Rows.Count * testRatio));
IEnumerable<int> testIndices = randomIndices.TakeLast((int)(input.Rows.Count * (1 - testRatio)));
IEnumerable<int> testIndices = randomIndices.Skip((int)(input.Rows.Count * testRatio));
Test = input[testIndices];
return input[trainIndices];
}
Expand Down Expand Up @@ -1540,38 +1540,38 @@ public void TestPrefixAndSuffix()

DataFrame prefix = df.AddPrefix("Prefix_");
IEnumerable<DataViewSchema.Column> prefixNames = ((IDataView)prefix).Schema;
foreach ((DataViewSchema.Column First, DataViewSchema.Column Second) in columnNames.Zip(((IDataView)df).Schema))
foreach ((DataViewSchema.Column First, DataViewSchema.Column Second) in columnNames.Zip(((IDataView)df).Schema, (e1, e2) => (e1, e2)))
{
Assert.Equal(First.Name, Second.Name);
}
foreach ((DataViewSchema.Column First, DataViewSchema.Column Second) in prefixNames.Zip(columnNames))
foreach ((DataViewSchema.Column First, DataViewSchema.Column Second) in prefixNames.Zip(columnNames, (e1, e2) => (e1, e2)))
{
Assert.Equal(First.Name, "Prefix_" + Second.Name);
}

// Inplace
df.AddPrefix("Prefix_", true);
prefixNames = ((IDataView)df).Schema;
foreach ((DataViewSchema.Column First, DataViewSchema.Column Second) in columnNames.Zip(prefixNames))
foreach ((DataViewSchema.Column First, DataViewSchema.Column Second) in columnNames.Zip(prefixNames, (e1, e2) => (e1, e2)))
{
Assert.Equal("Prefix_" + First.Name, Second.Name);
}

DataFrame suffix = df.AddSuffix("_Suffix");
IEnumerable<DataViewSchema.Column> suffixNames = ((IDataView)suffix).Schema;
foreach ((DataViewSchema.Column First, DataViewSchema.Column Second) in ((IDataView)df).Schema.Zip(columnNames))
foreach ((DataViewSchema.Column First, DataViewSchema.Column Second) in ((IDataView)df).Schema.Zip(columnNames, (e1, e2) => (e1, e2)))
{
Assert.Equal(First.Name, "Prefix_" + Second.Name);
}
foreach ((DataViewSchema.Column First, DataViewSchema.Column Second) in columnNames.Zip(suffixNames))
foreach ((DataViewSchema.Column First, DataViewSchema.Column Second) in columnNames.Zip(suffixNames, (e1, e2) => (e1, e2)))
{
Assert.Equal("Prefix_" + First.Name + "_Suffix", Second.Name);
}

// InPlace
df.AddSuffix("_Suffix", true);
suffixNames = ((IDataView)df).Schema;
foreach ((DataViewSchema.Column First, DataViewSchema.Column Second) in columnNames.Zip(suffixNames))
foreach ((DataViewSchema.Column First, DataViewSchema.Column Second) in columnNames.Zip(suffixNames, (e1, e2) => (e1, e2)))
{
Assert.Equal("Prefix_" + First.Name + "_Suffix", Second.Name);
}
Expand Down Expand Up @@ -1942,19 +1942,20 @@ public void TestAppendRow()
Assert.Equal(2, df.Columns[0].NullCount);
Assert.Equal(3, df.Columns[1].NullCount);

df.Append(new List<KeyValuePair<string, object>> { KeyValuePair.Create("Column1", (object)5), KeyValuePair.Create("Column2", (object)false) });
df.Append(new Dictionary<string, object> { { "Column1", (object)5 } , { "Column2", false } });
Assert.Equal(14, df.Rows.Count);
Assert.Equal(2, df.Columns[0].NullCount);
Assert.Equal(3, df.Columns[1].NullCount);

df.Append(new List<KeyValuePair<string, object>> { KeyValuePair.Create("Column1", (object)5) });
df.Append(new Dictionary<string, object> { { "Column1", 5 } });
Assert.Equal(15, df.Rows.Count);

Assert.Equal(15, df["Column1"].Length);
Assert.Equal(15, df["Column2"].Length);
Assert.Equal(2, df.Columns[0].NullCount);
Assert.Equal(4, df.Columns[1].NullCount);

df.Append(new List<KeyValuePair<string, object>> { KeyValuePair.Create("Column2", (object)false) });
df.Append(new Dictionary<string, object> { { "Column2", false } });
Assert.Equal(16, df.Rows.Count);
Assert.Equal(16, df["Column1"].Length);
Assert.Equal(16, df["Column2"].Length);
Expand All @@ -1970,7 +1971,7 @@ public void TestAppendRow()

// DataFrame must remain usable even if Append throws
Assert.Throws<FormatException>(() => df.Append(new List<object> { 5, "str" }));
Assert.Throws<FormatException>(() => df.Append(new List<KeyValuePair<string, object>> { KeyValuePair.Create("Column2", (object)"str") }));
Assert.Throws<FormatException>(() => df.Append(new Dictionary<string, object> { { "Column2", "str" } }));
Assert.Throws<ArgumentException>(() => df.Append(new List<object> { 5, true, true }));

df.Append();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>netcoreapp3.0</TargetFramework>
<TargetFrameworks>net461;netcoreapp3.0</TargetFrameworks>
<TargetFrameworks Condition="'$(OS)' != 'Windows_NT'">netcoreapp3.0</TargetFrameworks>
</PropertyGroup>

<ItemGroup>
Expand Down

0 comments on commit e64cbad

Please sign in to comment.