Skip to content

Commit

Permalink
Add additional tests for ReadCsv (dotnet#2811)
Browse files Browse the repository at this point in the history
* Add additional tests for ReadCsv

* Update asserts

* Add empty row and skip test pending another fix

* Remove test for another issue
  • Loading branch information
jwood803 authored and Prashanth Govindarajan committed Jan 16, 2020
1 parent 0fa210d commit 430ac09
Showing 1 changed file with 138 additions and 0 deletions.
138 changes: 138 additions & 0 deletions tests/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -104,5 +104,143 @@ Stream GetStream(string streamData)
Assert.Equal("", nullRow[5]);
Assert.Null(nullRow[6]);
}

[Fact]
public void TestReadCsvWithPipeSeparator()
{
string data = @"vendor_id|rate_code|passenger_count|trip_time_in_secs|trip_distance|payment_type|fare_amount
CMT|1|1|1271|3.8|CRD|17.5
CMT|1|1|474|1.5|CRD|8
CMT|1|1|637|1.4|CRD|8.5
||||||
CMT|1|1|181|0.6|CSH|4.5";

Stream GetStream(string streamData)
{
return new MemoryStream(Encoding.Default.GetBytes(streamData));
}
DataFrame df = DataFrame.LoadCsv(GetStream(data), separator: '|');

Assert.Equal(5, df.Rows.Count);
Assert.Equal(7, df.Columns.Count);
Assert.Equal("CMT", df["vendor_id"][4]);

DataFrame reducedRows = DataFrame.LoadCsv(GetStream(data), separator: '|', numberOfRowsToRead: 3);
Assert.Equal(3, reducedRows.Rows.Count);
Assert.Equal(7, reducedRows.Columns.Count);
Assert.Equal("CMT", reducedRows["vendor_id"][2]);

var nullRow = df.Rows[3];
Assert.Equal("", nullRow[0]);
Assert.Null(nullRow[1]);
Assert.Null(nullRow[2]);
Assert.Null(nullRow[3]);
Assert.Null(nullRow[4]);
Assert.Equal("", nullRow[5]);
Assert.Null(nullRow[6]);
}

[Fact]
public void TestReadCsvWithSemicolonSeparator()
{
string data = @"vendor_id;rate_code;passenger_count;trip_time_in_secs;trip_distance;payment_type;fare_amount
CMT;1;1;1271;3.8;CRD;17.5
CMT;1;1;474;1.5;CRD;8
CMT;1;1;637;1.4;CRD;8.5
;;;;;;
CMT;1;1;181;0.6;CSH;4.5";

Stream GetStream(string streamData)
{
return new MemoryStream(Encoding.Default.GetBytes(streamData));
}
DataFrame df = DataFrame.LoadCsv(GetStream(data), separator: ';');

Assert.Equal(5, df.Rows.Count);
Assert.Equal(7, df.Columns.Count);
Assert.Equal("CMT", df["vendor_id"][4]);

DataFrame reducedRows = DataFrame.LoadCsv(GetStream(data), separator: ';', numberOfRowsToRead: 3);
Assert.Equal(3, reducedRows.Rows.Count);
Assert.Equal(7, reducedRows.Columns.Count);
Assert.Equal("CMT", reducedRows["vendor_id"][2]);

var nullRow = df.Rows[3];
Assert.Equal("", nullRow[0]);
Assert.Null(nullRow[1]);
Assert.Null(nullRow[2]);
Assert.Null(nullRow[3]);
Assert.Null(nullRow[4]);
Assert.Equal("", nullRow[5]);
Assert.Null(nullRow[6]);
}

[Fact]
public void TestReadCsvWithExtraColumnInHeader()
{
string data = @"vendor_id,rate_code,passenger_count,trip_time_in_secs,trip_distance,payment_type,fare_amount,extra
CMT,1,1,1271,3.8,CRD,17.5
CMT,1,1,474,1.5,CRD,8
CMT,1,1,637,1.4,CRD,8.5
CMT,1,1,181,0.6,CSH,4.5";

Stream GetStream(string streamData)
{
return new MemoryStream(Encoding.Default.GetBytes(streamData));
}
DataFrame df = DataFrame.LoadCsv(GetStream(data));

Assert.Equal(4, df.Rows.Count);
Assert.Equal(7, df.Columns.Count);
Assert.Equal("CMT", df["vendor_id"][3]);

DataFrame reducedRows = DataFrame.LoadCsv(GetStream(data), numberOfRowsToRead: 3);
Assert.Equal(3, reducedRows.Rows.Count);
Assert.Equal(7, reducedRows.Columns.Count);
Assert.Equal("CMT", reducedRows["vendor_id"][2]);
}

[Fact]
public void TestReadCsvWithExtraColumnInRow()
{
string data = @"vendor_id,rate_code,passenger_count,trip_time_in_secs,trip_distance,payment_type,fare_amount
CMT,1,1,1271,3.8,CRD,17.5,0
CMT,1,1,474,1.5,CRD,8,0
CMT,1,1,637,1.4,CRD,8.5,0
CMT,1,1,181,0.6,CSH,4.5,0";

Stream GetStream(string streamData)
{
return new MemoryStream(Encoding.Default.GetBytes(streamData));
}

Assert.Throws<IndexOutOfRangeException>(() => DataFrame.LoadCsv(GetStream(data)));
}

[Fact]
public void TestReadCsvWithLessColumnsInRow()
{
string data = @"vendor_id,rate_code,passenger_count,trip_time_in_secs,trip_distance,payment_type,fare_amount
CMT,1,1,1271,3.8,CRD
CMT,1,1,474,1.5,CRD
CMT,1,1,637,1.4,CRD
CMT,1,1,181,0.6,CSH";

Stream GetStream(string streamData)
{
return new MemoryStream(Encoding.Default.GetBytes(streamData));
}

DataFrame df = DataFrame.LoadCsv(GetStream(data));
Assert.Equal(4, df.Rows.Count);
Assert.Equal(6, df.Columns.Count);
Assert.Equal("CMT", df["vendor_id"][3]);

DataFrame reducedRows = DataFrame.LoadCsv(GetStream(data), numberOfRowsToRead: 3);
Assert.Equal(3, reducedRows.Rows.Count);
Assert.Equal(6, reducedRows.Columns.Count);
Assert.Equal("CMT", reducedRows["vendor_id"][2]);

}
}
}

0 comments on commit 430ac09

Please sign in to comment.