Skip to content

Commit

Permalink
FIX-#2239: Improve testing for case
Browse files Browse the repository at this point in the history
Signed-off-by: Devin Petersohn <devin.petersohn@gmail.com>
  • Loading branch information
devin-petersohn committed Oct 27, 2020
1 parent 0d426b6 commit d43fce0
Show file tree
Hide file tree
Showing 3 changed files with 155 additions and 12 deletions.
12 changes: 0 additions & 12 deletions modin/engines/base/io/text/csv_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,18 +180,6 @@ def _read(cls, filepath_or_buffer, **kwargs):
if index_col is None:
row_lengths = cls.materialize(index_ids)
new_index = pandas.RangeIndex(sum(row_lengths))
# pandas has a really weird edge case here.
# The edge case is as follows:
# If skiprows and names are specified, pandas assigns a row number based
# on the number of dtypes that match above.
# This number is not easy for us to compute and ensure matching behavior
# with pandas, so we will just read 1 line with pandas and grab the start
# value from that.
if skiprows > 1 and kwargs.get("names", None) is not None:
start = pandas.read_csv(
filepath_or_buffer, skiprows=skiprows, nrows=1, names=names
).index[0]
new_index = pandas.RangeIndex(start, start + new_index.stop)
else:
index_objs = cls.materialize(index_ids)
row_lengths = [len(o) for o in index_objs]
Expand Down
146 changes: 146 additions & 0 deletions modin/pandas/test/data/issue_2239.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
1585542839.000000, 1585542839.000000, 1585542839.000000
32.000000, 32.000000, 32.000000
-38,-14,51
-38,-13,51
-38,-14,51
-38,-14,50
-38,-13,51
-38,-14,50
-38,-14,51
-38,-13,51
-38,-14,51
-38,-13,51
-38,-14,51
-38,-14,50
-38,-13,51
-38,-14,50
-38,-14,51
-38,-13,51
-38,-14,51
-38,-13,51
-38,-14,51
-38,-14,50
-38,-13,51
-38,-14,50
-38,-14,51
-38,-13,51
-38,-14,51
-38,-13,51
-38,-14,51
-38,-14,50
-38,-13,51
-38,-14,50
-38,-14,51
-38,-13,51
-38,-14,51
-38,-13,51
-38,-14,51
-38,-14,50
-38,-13,51
-38,-14,50
-38,-14,51
-38,-13,51
-38,-14,51
-38,-13,51
-38,-14,51
-38,-14,50
-38,-13,51
-38,-14,50
-38,-14,51
-38,-13,51
-38,-14,51
-38,-13,51
-38,-14,51
-38,-14,50
-38,-13,51
-38,-14,50
-38,-14,51
-38,-13,51
-38,-14,51
-38,-13,51
-38,-14,51
-38,-14,50
-38,-13,51
-38,-14,50
-38,-14,51
-38,-13,51
-38,-14,51
-38,-13,51
-38,-14,51
-38,-14,50
-38,-13,51
-38,-14,50
-38,-14,51
-38,-13,51
-38,-14,51
-38,-13,51
-38,-14,51
-38,-14,50
-38,-13,51
-38,-14,50
-38,-14,51
-38,-13,51
-38,-14,51
-38,-13,51
-38,-14,51
-38,-14,50
-38,-13,51
-38,-14,50
-38,-14,51
-38,-13,51
-38,-14,51
-38,-13,51
-38,-14,51
-38,-14,50
-38,-13,51
-38,-14,50
-38,-14,51
-38,-13,51
-38,-14,51
-38,-13,51
-38,-14,51
-38,-14,50
-38,-13,51
-38,-14,50
-38,-14,51
-38,-13,51
-38,-14,51
-38,-13,51
-38,-14,51
-38,-14,50
-38,-13,51
-38,-14,50
-38,-14,51
-38,-13,51
-38,-14,51
-38,-13,51
-38,-14,51
-38,-14,50
-38,-13,51
-38,-14,50
-38,-14,51
-38,-13,51
-38,-14,51
-38,-13,51
-38,-14,51
-38,-14,50
-38,-13,51
-38,-14,50
-38,-14,51
-38,-13,51
-38,-14,51
-38,-13,51
-38,-14,51
-38,-14,50
-38,-13,51
-38,-14,50
-38,-14,51
-38,-13,51
-38,-14,51
-38,-13,51
-38,-14,51
-38,-14,50
-38,-13,51
-38,-14,50
-38,-14,51
-38,-13,51
9 changes: 9 additions & 0 deletions modin/pandas/test/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -1160,6 +1160,15 @@ def test_from_csv_skiprows(make_csv_file, nrows):
df_equals(modin_df, pandas_df)


@pytest.mark.parametrize("names", [list("XYZ"), None])
@pytest.mark.parametrize("skiprows", [1, 2, 3, 4, None])
def test_from_csv_skiprows_names(names, skiprows):
path = "modin/pandas/test/data/issue_2239.csv"
pandas_df = pandas.read_csv(path, names=names, skiprows=skiprows)
modin_df = pd.read_csv(path, names=names, skiprows=skiprows)
df_equals(pandas_df, modin_df)


@pytest.mark.parametrize(
"encoding", ["latin8", "ISO-8859-1", "latin1", "iso-8859-1", "cp1252", "utf8"]
)
Expand Down

0 comments on commit d43fce0

Please sign in to comment.