Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLN: Remove the engine parameter in CSVFormatter and to_csv #13419

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions doc/source/whatsnew/v0.19.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,15 @@ Deprecations
- ``as_recarray`` has been deprecated in ``pd.read_csv()`` and will be removed in a future version (:issue:`13373`)
- top-level ``pd.ordered_merge()`` has been renamed to ``pd.merge_ordered()`` and the original name will be removed in a future version (:issue:`13358`)


.. _whatsnew_0190.prior_deprecations:

Removal of prior version deprecations/changes
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

- ``DataFrame.to_csv()`` has dropped the ``engine`` parameter (:issue:`11274`, :issue:`13419`)


.. _whatsnew_0190.performance:

Performance Improvements
Expand Down
1 change: 0 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1342,7 +1342,6 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
cols=columns, header=header, index=index,
index_label=index_label, mode=mode,
chunksize=chunksize, quotechar=quotechar,
engine=kwds.get("engine"),
tupleize_cols=tupleize_cols,
date_format=date_format,
doublequote=doublequote,
Expand Down
133 changes: 5 additions & 128 deletions pandas/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@

import itertools
import csv
import warnings

common_docstring = """
Parameters
Expand Down Expand Up @@ -1326,15 +1325,10 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='',
float_format=None, cols=None, header=True, index=True,
index_label=None, mode='w', nanRep=None, encoding=None,
compression=None, quoting=None, line_terminator='\n',
chunksize=None, engine=None, tupleize_cols=False,
quotechar='"', date_format=None, doublequote=True,
escapechar=None, decimal='.'):

if engine is not None:
warnings.warn("'engine' keyword is deprecated and will be "
"removed in a future version", FutureWarning,
stacklevel=3)
self.engine = engine # remove for 0.18
chunksize=None, tupleize_cols=False, quotechar='"',
date_format=None, doublequote=True, escapechar=None,
decimal='.'):

self.obj = obj

if path_or_buf is None:
Expand Down Expand Up @@ -1369,11 +1363,6 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='',

self.date_format = date_format

# GH3457
if not self.obj.columns.is_unique and engine == 'python':
raise NotImplementedError("columns.is_unique == False not "
"supported with engine='python'")

self.tupleize_cols = tupleize_cols
self.has_mi_columns = (isinstance(obj.columns, MultiIndex) and
not self.tupleize_cols)
Expand Down Expand Up @@ -1430,108 +1419,6 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='',
if not index:
self.nlevels = 0

# original python implem. of df.to_csv
# invoked by df.to_csv(engine=python)
def _helper_csv(self, writer, na_rep=None, cols=None, header=True,
index=True, index_label=None, float_format=None,
date_format=None):
if cols is None:
cols = self.columns

has_aliases = isinstance(header, (tuple, list, np.ndarray, Index))
if has_aliases or header:
if index:
# should write something for index label
if index_label is not False:
if index_label is None:
if isinstance(self.obj.index, MultiIndex):
index_label = []
for i, name in enumerate(self.obj.index.names):
if name is None:
name = ''
index_label.append(name)
else:
index_label = self.obj.index.name
if index_label is None:
index_label = ['']
else:
index_label = [index_label]
elif not isinstance(index_label,
(list, tuple, np.ndarray, Index)):
# given a string for a DF with Index
index_label = [index_label]

encoded_labels = list(index_label)
else:
encoded_labels = []

if has_aliases:
if len(header) != len(cols):
raise ValueError(('Writing %d cols but got %d aliases'
% (len(cols), len(header))))
else:
write_cols = header
else:
write_cols = cols
encoded_cols = list(write_cols)

writer.writerow(encoded_labels + encoded_cols)
else:
encoded_cols = list(cols)
writer.writerow(encoded_cols)

if date_format is None:
date_formatter = lambda x: Timestamp(x)._repr_base
else:

def strftime_with_nulls(x):
x = Timestamp(x)
if notnull(x):
return x.strftime(date_format)

date_formatter = lambda x: strftime_with_nulls(x)

data_index = self.obj.index

if isinstance(self.obj.index, PeriodIndex):
data_index = self.obj.index.to_timestamp()

if isinstance(data_index, DatetimeIndex) and date_format is not None:
data_index = Index([date_formatter(x) for x in data_index])

values = self.obj.copy()
values.index = data_index
values.columns = values.columns.to_native_types(
na_rep=na_rep, float_format=float_format, date_format=date_format,
quoting=self.quoting)
values = values[cols]

series = {}
for k, v in compat.iteritems(values._series):
series[k] = v._values

nlevels = getattr(data_index, 'nlevels', 1)
for j, idx in enumerate(data_index):
row_fields = []
if index:
if nlevels == 1:
row_fields = [idx]
else: # handle MultiIndex
row_fields = list(idx)
for i, col in enumerate(cols):
val = series[col][j]
if lib.checknull(val):
val = na_rep

if float_format is not None and com.is_float(val):
val = float_format % val
elif isinstance(val, (np.datetime64, Timestamp)):
val = date_formatter(val)

row_fields.append(val)

writer.writerow(row_fields)

def save(self):
# create the writer & save
if hasattr(self.path_or_buf, 'write'):
Expand All @@ -1555,17 +1442,7 @@ def save(self):
else:
self.writer = csv.writer(f, **writer_kwargs)

if self.engine == 'python':
# to be removed in 0.13
self._helper_csv(self.writer, na_rep=self.na_rep,
float_format=self.float_format,
cols=self.cols, header=self.header,
index=self.index,
index_label=self.index_label,
date_format=self.date_format)

else:
self._save()
self._save()

finally:
if close:
Expand Down
6 changes: 0 additions & 6 deletions pandas/tests/formats/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -3329,12 +3329,6 @@ def test_to_csv_date_format(self):
self.assertEqual(df_sec_grouped.mean().to_csv(date_format='%Y-%m-%d'),
expected_ymd_sec)

# deprecation GH11274
def test_to_csv_engine_kw_deprecation(self):
with tm.assert_produces_warning(FutureWarning):
df = DataFrame({'col1': [1], 'col2': ['a'], 'col3': [10.1]})
df.to_csv(engine='python')

def test_period(self):
# GH 12615
df = pd.DataFrame({'A': pd.period_range('2013-01',
Expand Down
109 changes: 42 additions & 67 deletions pandas/tests/frame/test_to_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from pandas.compat import (lmap, range, lrange, StringIO, u)
from pandas.parser import CParserError
from pandas import (DataFrame, Index, Series, MultiIndex, Timestamp,
date_range, read_csv, compat)
date_range, read_csv, compat, to_datetime)
import pandas as pd

from pandas.util.testing import (assert_almost_equal,
Expand Down Expand Up @@ -139,7 +139,7 @@ def test_to_csv_from_csv5(self):
self.tzframe.to_csv(path)
result = pd.read_csv(path, index_col=0, parse_dates=['A'])

converter = lambda c: pd.to_datetime(result[c]).dt.tz_localize(
converter = lambda c: to_datetime(result[c]).dt.tz_localize(
'UTC').dt.tz_convert(self.tzframe[c].dt.tz)
result['B'] = converter('B')
result['C'] = converter('C')
Expand All @@ -162,15 +162,6 @@ def test_to_csv_cols_reordering(self):

assert_frame_equal(df[cols], rs_c, check_names=False)

def test_to_csv_legacy_raises_on_dupe_cols(self):
df = mkdf(10, 3)
df.columns = ['a', 'a', 'b']
with ensure_clean() as path:
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
self.assertRaises(NotImplementedError,
df.to_csv, path, engine='python')

def test_to_csv_new_dupe_cols(self):
import pandas as pd

Expand Down Expand Up @@ -712,7 +703,6 @@ def test_to_csv_dups_cols(self):
cols.extend([0, 1, 2])
df.columns = cols

from pandas import to_datetime
with ensure_clean() as filename:
df.to_csv(filename)
result = read_csv(filename, index_col=0)
Expand Down Expand Up @@ -993,72 +983,57 @@ def test_to_csv_compression_value_error(self):
filename, compression="zip")

def test_to_csv_date_format(self):
from pandas import to_datetime
with ensure_clean('__tmp_to_csv_date_format__') as path:
for engine in [None, 'python']:
w = FutureWarning if engine == 'python' else None

dt_index = self.tsframe.index
datetime_frame = DataFrame(
{'A': dt_index, 'B': dt_index.shift(1)}, index=dt_index)

with tm.assert_produces_warning(w, check_stacklevel=False):
datetime_frame.to_csv(
path, date_format='%Y%m%d', engine=engine)

# Check that the data was put in the specified format
test = read_csv(path, index_col=0)

datetime_frame_int = datetime_frame.applymap(
lambda x: int(x.strftime('%Y%m%d')))
datetime_frame_int.index = datetime_frame_int.index.map(
lambda x: int(x.strftime('%Y%m%d')))
dt_index = self.tsframe.index
datetime_frame = DataFrame(
{'A': dt_index, 'B': dt_index.shift(1)}, index=dt_index)
datetime_frame.to_csv(path, date_format='%Y%m%d')

assert_frame_equal(test, datetime_frame_int)
# Check that the data was put in the specified format
test = read_csv(path, index_col=0)

with tm.assert_produces_warning(w, check_stacklevel=False):
datetime_frame.to_csv(
path, date_format='%Y-%m-%d', engine=engine)
datetime_frame_int = datetime_frame.applymap(
lambda x: int(x.strftime('%Y%m%d')))
datetime_frame_int.index = datetime_frame_int.index.map(
lambda x: int(x.strftime('%Y%m%d')))

# Check that the data was put in the specified format
test = read_csv(path, index_col=0)
datetime_frame_str = datetime_frame.applymap(
lambda x: x.strftime('%Y-%m-%d'))
datetime_frame_str.index = datetime_frame_str.index.map(
lambda x: x.strftime('%Y-%m-%d'))
assert_frame_equal(test, datetime_frame_int)

assert_frame_equal(test, datetime_frame_str)
datetime_frame.to_csv(path, date_format='%Y-%m-%d')

# Check that columns get converted
datetime_frame_columns = datetime_frame.T
# Check that the data was put in the specified format
test = read_csv(path, index_col=0)
datetime_frame_str = datetime_frame.applymap(
lambda x: x.strftime('%Y-%m-%d'))
datetime_frame_str.index = datetime_frame_str.index.map(
lambda x: x.strftime('%Y-%m-%d'))

with tm.assert_produces_warning(w, check_stacklevel=False):
datetime_frame_columns.to_csv(
path, date_format='%Y%m%d', engine=engine)
assert_frame_equal(test, datetime_frame_str)

test = read_csv(path, index_col=0)
# Check that columns get converted
datetime_frame_columns = datetime_frame.T
datetime_frame_columns.to_csv(path, date_format='%Y%m%d')

datetime_frame_columns = datetime_frame_columns.applymap(
lambda x: int(x.strftime('%Y%m%d')))
# Columns don't get converted to ints by read_csv
datetime_frame_columns.columns = (
datetime_frame_columns.columns
.map(lambda x: x.strftime('%Y%m%d')))
test = read_csv(path, index_col=0)

assert_frame_equal(test, datetime_frame_columns)
datetime_frame_columns = datetime_frame_columns.applymap(
lambda x: int(x.strftime('%Y%m%d')))
# Columns don't get converted to ints by read_csv
datetime_frame_columns.columns = (
datetime_frame_columns.columns
.map(lambda x: x.strftime('%Y%m%d')))

# test NaTs
nat_index = to_datetime(
['NaT'] * 10 + ['2000-01-01', '1/1/2000', '1-1-2000'])
nat_frame = DataFrame({'A': nat_index}, index=nat_index)
assert_frame_equal(test, datetime_frame_columns)

with tm.assert_produces_warning(w, check_stacklevel=False):
nat_frame.to_csv(
path, date_format='%Y-%m-%d', engine=engine)
# test NaTs
nat_index = to_datetime(
['NaT'] * 10 + ['2000-01-01', '1/1/2000', '1-1-2000'])
nat_frame = DataFrame({'A': nat_index}, index=nat_index)
nat_frame.to_csv(path, date_format='%Y-%m-%d')

test = read_csv(path, parse_dates=[0, 1], index_col=0)
test = read_csv(path, parse_dates=[0, 1], index_col=0)

assert_frame_equal(test, nat_frame)
assert_frame_equal(test, nat_frame)

def test_to_csv_with_dst_transitions(self):

Expand All @@ -1077,7 +1052,7 @@ def test_to_csv_with_dst_transitions(self):
# we have to reconvert the index as we
# don't parse the tz's
result = read_csv(path, index_col=0)
result.index = pd.to_datetime(result.index).tz_localize(
result.index = to_datetime(result.index).tz_localize(
'UTC').tz_convert('Europe/London')
assert_frame_equal(result, df)

Expand All @@ -1089,9 +1064,9 @@ def test_to_csv_with_dst_transitions(self):
with ensure_clean('csv_date_format_with_dst') as path:
df.to_csv(path, index=True)
result = read_csv(path, index_col=0)
result.index = pd.to_datetime(result.index).tz_localize(
result.index = to_datetime(result.index).tz_localize(
'UTC').tz_convert('Europe/Paris')
result['idx'] = pd.to_datetime(result['idx']).astype(
result['idx'] = to_datetime(result['idx']).astype(
'datetime64[ns, Europe/Paris]')
assert_frame_equal(result, df)

Expand Down