Skip to content

Commit

Permalink
TST: updated expected values for some tests
Browse files Browse the repository at this point in the history
* Updates:
  * Updated expected values for some tests about 'to_csv()' method, to 
deal with new default value of 'line_terminator' arg.

* Related Issue:
  * Issue pandas-dev#20353
  * PR pandas-dev#21406
  • Loading branch information
deflatSOCO committed Jun 26, 2018
1 parent 7e89b8d commit c5c5c11
Show file tree
Hide file tree
Showing 2 changed files with 111 additions and 74 deletions.
88 changes: 48 additions & 40 deletions pandas/tests/frame/test_to_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -842,10 +842,10 @@ def test_to_csv_unicodewriter_quoting(self):
encoding='utf-8')

result = buf.getvalue()
expected = ('"A","B"\n'
'1,"foo"\n'
'2,"bar"\n'
'3,"baz"\n')
expected = ('"A","B"' + os.linesep +
'1,"foo"' + os.linesep +
'2,"bar"' + os.linesep +
'3,"baz"' + os.linesep)

assert result == expected

Expand All @@ -857,7 +857,9 @@ def test_to_csv_quote_none(self):
df.to_csv(buf, quoting=csv.QUOTE_NONE,
encoding=encoding, index=False)
result = buf.getvalue()
expected = 'A\nhello\n{"hello"}\n'
expected = ('A' + os.linesep +
'hello' + os.linesep +
'{"hello"}' + os.linesep)
assert result == expected

def test_to_csv_index_no_leading_comma(self):
Expand All @@ -866,10 +868,10 @@ def test_to_csv_index_no_leading_comma(self):

buf = StringIO()
df.to_csv(buf, index_label=False)
expected = ('A,B\n'
'one,1,4\n'
'two,2,5\n'
'three,3,6\n')
expected = ('A,B' + os.linesep +
'one,1,4' + os.linesep +
'two,2,5' + os.linesep +
'three,3,6' + os.linesep)
assert buf.getvalue() == expected

def test_to_csv_line_terminators(self):
Expand Down Expand Up @@ -1072,11 +1074,9 @@ def test_to_csv_quoting(self):
'c_string': ['a', 'b,c'],
})

expected = """\
,c_bool,c_float,c_int,c_string
0,True,1.0,42.0,a
1,False,3.2,,"b,c"
"""
expected = (',c_bool,c_float,c_int,c_string' + os.linesep +
'0,True,1.0,42.0,a' + os.linesep +
'1,False,3.2,,"b,c"' + os.linesep)
result = df.to_csv()
assert result == expected

Expand All @@ -1086,21 +1086,19 @@ def test_to_csv_quoting(self):
result = df.to_csv(quoting=csv.QUOTE_MINIMAL)
assert result == expected

expected = """\
"","c_bool","c_float","c_int","c_string"
"0","True","1.0","42.0","a"
"1","False","3.2","","b,c"
"""
expected = ('"","c_bool","c_float","c_int","c_string"' + os.linesep +
'"0","True","1.0","42.0","a"' + os.linesep +
'"1","False","3.2","","b,c"' + os.linesep)

result = df.to_csv(quoting=csv.QUOTE_ALL)
assert result == expected

# see gh-12922, gh-13259: make sure changes to
# the formatters do not break this behaviour
expected = """\
"","c_bool","c_float","c_int","c_string"
0,True,1.0,42.0,"a"
1,False,3.2,"","b,c"
"""
expected = ('"","c_bool","c_float","c_int","c_string"' + os.linesep +
'0,True,1.0,42.0,"a"' + os.linesep +
'1,False,3.2,"","b,c"' + os.linesep)

result = df.to_csv(quoting=csv.QUOTE_NONNUMERIC)
assert result == expected

Expand All @@ -1111,27 +1109,24 @@ def test_to_csv_quoting(self):
quoting=csv.QUOTE_NONE,
escapechar=None)

expected = """\
,c_bool,c_float,c_int,c_string
0,True,1.0,42.0,a
1,False,3.2,,b!,c
"""
expected = (',c_bool,c_float,c_int,c_string' + os.linesep +
'0,True,1.0,42.0,a' + os.linesep +
'1,False,3.2,,b!,c' + os.linesep)
result = df.to_csv(quoting=csv.QUOTE_NONE,
escapechar='!')
assert result == expected

expected = """\
,c_bool,c_ffloat,c_int,c_string
0,True,1.0,42.0,a
1,False,3.2,,bf,c
"""
expected = (',c_bool,c_ffloat,c_int,c_string' + os.linesep +
'0,True,1.0,42.0,a' + os.linesep +
'1,False,3.2,,bf,c' + os.linesep)
result = df.to_csv(quoting=csv.QUOTE_NONE,
escapechar='f')
assert result == expected

# see gh-3503: quoting Windows line terminators
# presents with encoding?
text = 'a,b,c\n1,"test \r\n",3\n'
text = ('a,b,c' + os.linesep +
'1,"test \r\n",3' + os.linesep)
df = pd.read_csv(StringIO(text))
buf = StringIO()
df.to_csv(buf, encoding='utf-8', index=False)
Expand All @@ -1141,7 +1136,9 @@ def test_to_csv_quoting(self):
# with multi-indexes
df = pd.DataFrame({'a': [1, 2], 'b': [3, 4], 'c': [5, 6]})
df = df.set_index(['a', 'b'])
expected = '"a","b","c"\n"1","3","5"\n"2","4","6"\n'
expected = ('"a","b","c"' + os.linesep +
'"1","3","5"' + os.linesep +
'"2","4","6"' + os.linesep)
assert df.to_csv(quoting=csv.QUOTE_ALL) == expected

def test_period_index_date_overflow(self):
Expand All @@ -1153,13 +1150,19 @@ def test_period_index_date_overflow(self):
df = pd.DataFrame([4, 5, 6], index=index)
result = df.to_csv()

expected = ',0\n1990-01-01,4\n2000-01-01,5\n3005-01-01,6\n'
expected = (',0' + os.linesep +
'1990-01-01,4' + os.linesep +
'2000-01-01,5' + os.linesep +
'3005-01-01,6' + os.linesep)
assert result == expected

date_format = "%m-%d-%Y"
result = df.to_csv(date_format=date_format)

expected = ',0\n01-01-1990,4\n01-01-2000,5\n01-01-3005,6\n'
expected = (',0' + os.linesep +
'01-01-1990,4' + os.linesep +
'01-01-2000,5' + os.linesep +
'01-01-3005,6' + os.linesep)
assert result == expected

# Overflow with pd.NaT
Expand All @@ -1169,7 +1172,10 @@ def test_period_index_date_overflow(self):
df = pd.DataFrame([4, 5, 6], index=index)
result = df.to_csv()

expected = ',0\n1990-01-01,4\n,5\n3005-01-01,6\n'
expected = (',0' + os.linesep +
'1990-01-01,4' + os.linesep +
',5' + os.linesep +
'3005-01-01,6' + os.linesep)
assert result == expected

def test_multi_index_header(self):
Expand All @@ -1182,5 +1188,7 @@ def test_multi_index_header(self):
header = ["a", "b", "c", "d"]
result = df.to_csv(header=header)

expected = ",a,b,c,d\n0,1,2,3,4\n1,5,6,7,8\n"
expected = (',a,b,c,d' + os.linesep +
'0,1,2,3,4' + os.linesep +
'1,5,6,7,8' + os.linesep)
assert result == expected
97 changes: 63 additions & 34 deletions pandas/tests/io/formats/test_to_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,29 +130,37 @@ def test_to_csv_escapechar(self):

def test_csv_to_string(self):
df = DataFrame({'col': [1, 2]})
expected = ',col\n0,1\n1,2\n'
expected = (',col' + os.linesep +
'0,1' + os.linesep +
'1,2' + os.linesep)
assert df.to_csv() == expected

def test_to_csv_decimal(self):
# GH 781
df = DataFrame({'col1': [1], 'col2': ['a'], 'col3': [10.1]})

expected_default = ',col1,col2,col3\n0,1,a,10.1\n'
expected_default = (',col1,col2,col3' + os.linesep +
'0,1,a,10.1' + os.linesep)
assert df.to_csv() == expected_default

expected_european_excel = ';col1;col2;col3\n0;1;a;10,1\n'
expected_european_excel = (';col1;col2;col3' + os.linesep +
'0;1;a;10,1' + os.linesep)
assert df.to_csv(decimal=',', sep=';') == expected_european_excel

expected_float_format_default = ',col1,col2,col3\n0,1,a,10.10\n'
expected_float_format_default = (',col1,col2,col3' + os.linesep +
'0,1,a,10.10' + os.linesep)
assert df.to_csv(float_format='%.2f') == expected_float_format_default

expected_float_format = ';col1;col2;col3\n0;1;a;10,10\n'
expected_float_format = (';col1;col2;col3' + os.linesep +
'0;1;a;10,10' + os.linesep)
assert df.to_csv(decimal=',', sep=';',
float_format='%.2f') == expected_float_format

# GH 11553: testing if decimal is taken into account for '0.0'
df = pd.DataFrame({'a': [0, 1.1], 'b': [2.2, 3.3], 'c': 1})
expected = 'a,b,c\n0^0,2^2,1\n1^1,3^3,1\n'
expected = ('a,b,c' + os.linesep +
'0^0,2^2,1' + os.linesep +
'1^1,3^3,1' + os.linesep)
assert df.to_csv(index=False, decimal='^') == expected

# same but for an index
Expand All @@ -165,7 +173,9 @@ def test_to_csv_float_format(self):
# testing if float_format is taken into account for the index
# GH 11553
df = pd.DataFrame({'a': [0, 1], 'b': [2.2, 3.3], 'c': 1})
expected = 'a,b,c\n0,2.20,1\n1,3.30,1\n'
expected = ('a,b,c' + os.linesep +
'0,2.20,1' + os.linesep +
'1,3.30,1' + os.linesep)
assert df.set_index('a').to_csv(float_format='%.2f') == expected

# same for a multi-index
Expand All @@ -176,19 +186,25 @@ def test_to_csv_na_rep(self):
# testing if NaN values are correctly represented in the index
# GH 11553
df = DataFrame({'a': [0, np.NaN], 'b': [0, 1], 'c': [2, 3]})
expected = "a,b,c\n0.0,0,2\n_,1,3\n"
expected = ('a,b,c' + os.linesep +
'0.0,0,2' + os.linesep +
'_,1,3' + os.linesep)
assert df.set_index('a').to_csv(na_rep='_') == expected
assert df.set_index(['a', 'b']).to_csv(na_rep='_') == expected

# now with an index containing only NaNs
df = DataFrame({'a': np.NaN, 'b': [0, 1], 'c': [2, 3]})
expected = "a,b,c\n_,0,2\n_,1,3\n"
expected = ('a,b,c' + os.linesep +
'_,0,2' + os.linesep +
'_,1,3' + os.linesep)
assert df.set_index('a').to_csv(na_rep='_') == expected
assert df.set_index(['a', 'b']).to_csv(na_rep='_') == expected

# check if na_rep parameter does not break anything when no NaN
df = DataFrame({'a': 0, 'b': [0, 1], 'c': [2, 3]})
expected = "a,b,c\n0,0,2\n0,1,3\n"
expected = ('a,b,c' + os.linesep +
'0,0,2' + os.linesep +
'0,1,3' + os.linesep)
assert df.set_index('a').to_csv(na_rep='_') == expected
assert df.set_index(['a', 'b']).to_csv(na_rep='_') == expected

Expand All @@ -199,33 +215,46 @@ def test_to_csv_date_format(self):
df_day = DataFrame({'A': pd.date_range('20130101', periods=5, freq='d')
})

expected_default_sec = (',A\n0,2013-01-01 00:00:00\n1,'
'2013-01-01 00:00:01\n2,2013-01-01 00:00:02'
'\n3,2013-01-01 00:00:03\n4,'
'2013-01-01 00:00:04\n')
expected_default_sec = (',A' + os.linesep +
'0,2013-01-01 00:00:00' + os.linesep +
'1,2013-01-01 00:00:01' + os.linesep +
'2,2013-01-01 00:00:02' + os.linesep +
'3,2013-01-01 00:00:03' + os.linesep +
'4,2013-01-01 00:00:04' + os.linesep)
assert df_sec.to_csv() == expected_default_sec

expected_ymdhms_day = (',A\n0,2013-01-01 00:00:00\n1,'
'2013-01-02 00:00:00\n2,2013-01-03 00:00:00'
'\n3,2013-01-04 00:00:00\n4,'
'2013-01-05 00:00:00\n')
expected_ymdhms_day = (',A' + os.linesep +
'0,2013-01-01 00:00:00' + os.linesep +
'1,2013-01-02 00:00:00' + os.linesep +
'2,2013-01-03 00:00:00' + os.linesep +
'3,2013-01-04 00:00:00' + os.linesep +
'4,2013-01-05 00:00:00' + os.linesep)
assert (df_day.to_csv(date_format='%Y-%m-%d %H:%M:%S') ==
expected_ymdhms_day)

expected_ymd_sec = (',A\n0,2013-01-01\n1,2013-01-01\n2,'
'2013-01-01\n3,2013-01-01\n4,2013-01-01\n')
expected_ymd_sec = (',A' + os.linesep +
'0,2013-01-01' + os.linesep +
'1,2013-01-01' + os.linesep +
'2,2013-01-01' + os.linesep +
'3,2013-01-01' + os.linesep +
'4,2013-01-01' + os.linesep)
assert df_sec.to_csv(date_format='%Y-%m-%d') == expected_ymd_sec

expected_default_day = (',A\n0,2013-01-01\n1,2013-01-02\n2,'
'2013-01-03\n3,2013-01-04\n4,2013-01-05\n')
expected_default_day = (',A' + os.linesep +
'0,2013-01-01' + os.linesep +
'1,2013-01-02' + os.linesep +
'2,2013-01-03' + os.linesep +
'3,2013-01-04' + os.linesep +
'4,2013-01-05' + os.linesep)
assert df_day.to_csv() == expected_default_day
assert df_day.to_csv(date_format='%Y-%m-%d') == expected_default_day

# testing if date_format parameter is taken into account for
# multi-indexed dataframes (GH 7791)
df_sec['B'] = 0
df_sec['C'] = 1
expected_ymd_sec = 'A,B,C\n2013-01-01,0,1\n'
expected_ymd_sec = ('A,B,C' + os.linesep +
'2013-01-01,0,1' + os.linesep)
df_sec_grouped = df_sec.groupby([pd.Grouper(key='A', freq='1h'), 'B'])
assert (df_sec_grouped.mean().to_csv(date_format='%Y-%m-%d') ==
expected_ymd_sec)
Expand All @@ -234,28 +263,30 @@ def test_to_csv_multi_index(self):
# GH 6618
df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1], [2]]))

exp = ",1\n,2\n0,1\n"
exp = (',1' + os.linesep +
',2' + os.linesep +
'0,1' + os.linesep)
assert df.to_csv() == exp

exp = "1\n2\n1\n"
exp = ('1' + os.linesep + '2' + os.linesep + '1' + os.linesep)
assert df.to_csv(index=False) == exp

df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1], [2]]),
index=pd.MultiIndex.from_arrays([[1], [2]]))

exp = ",,1\n,,2\n1,2,1\n"
exp = (',,1' + os.linesep + ',,2' + os.linesep + '1,2,1' + os.linesep)
assert df.to_csv() == exp

exp = "1\n2\n1\n"
exp = ('1' + os.linesep + '2' + os.linesep + '1' + os.linesep)
assert df.to_csv(index=False) == exp

df = DataFrame(
[1], columns=pd.MultiIndex.from_arrays([['foo'], ['bar']]))

exp = ",foo\n,bar\n0,1\n"
exp = (',foo' + os.linesep + ',bar' + os.linesep + '0,1' + os.linesep)
assert df.to_csv() == exp

exp = "foo\nbar\n1\n"
exp = ('foo' + os.linesep + 'bar' + os.linesep + '1' + os.linesep)
assert df.to_csv(index=False) == exp

def test_to_csv_string_array_ascii(self):
Expand Down Expand Up @@ -381,11 +412,9 @@ def test_to_csv_stdout_file(self):
# GH 21561
df = pd.DataFrame([['foo', 'bar'], ['baz', 'qux']],
columns=['name_1', 'name_2'])
expected_ascii = '''\
,name_1,name_2
0,foo,bar
1,baz,qux
'''
expected_ascii = (',name_1,name_2' + os.linesep +
'0,foo,bar' + os.linesep +
'1,baz,qux' + os.linesep)
df.to_csv(sys.stdout, encoding='ascii')
output = sys.stdout.getvalue()
assert output == expected_ascii
Expand Down

0 comments on commit c5c5c11

Please sign in to comment.