Skip to content

Commit

Permalink
CLN: Remove the engine parameter in CSVFormatter and to_csv
Browse files Browse the repository at this point in the history
closes #13419
xref #11274
  • Loading branch information
gfyoung authored and jreback committed Jul 10, 2016
1 parent 2a96ab7 commit c989570
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 202 deletions.
9 changes: 9 additions & 0 deletions doc/source/whatsnew/v0.19.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,15 @@ Deprecations
- top-level ``pd.ordered_merge()`` has been renamed to ``pd.merge_ordered()`` and the original name will be removed in a future version (:issue:`13358`)
- ``Timestamp.offset`` property (and named arg in the constructor), has been deprecated in favor of ``freq`` (:issue:`12160`)


.. _whatsnew_0190.prior_deprecations:

Removal of prior version deprecations/changes
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

- ``DataFrame.to_csv()`` has dropped the ``engine`` parameter, as was deprecated in 0.17.1 (:issue:`11274`, :issue:`13419`)


.. _whatsnew_0190.performance:

Performance Improvements
Expand Down
1 change: 0 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1342,7 +1342,6 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
cols=columns, header=header, index=index,
index_label=index_label, mode=mode,
chunksize=chunksize, quotechar=quotechar,
engine=kwds.get("engine"),
tupleize_cols=tupleize_cols,
date_format=date_format,
doublequote=doublequote,
Expand Down
133 changes: 5 additions & 128 deletions pandas/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@

import itertools
import csv
import warnings

common_docstring = """
Parameters
Expand Down Expand Up @@ -1326,15 +1325,10 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='',
float_format=None, cols=None, header=True, index=True,
index_label=None, mode='w', nanRep=None, encoding=None,
compression=None, quoting=None, line_terminator='\n',
chunksize=None, engine=None, tupleize_cols=False,
quotechar='"', date_format=None, doublequote=True,
escapechar=None, decimal='.'):

if engine is not None:
warnings.warn("'engine' keyword is deprecated and will be "
"removed in a future version", FutureWarning,
stacklevel=3)
self.engine = engine # remove for 0.18
chunksize=None, tupleize_cols=False, quotechar='"',
date_format=None, doublequote=True, escapechar=None,
decimal='.'):

self.obj = obj

if path_or_buf is None:
Expand Down Expand Up @@ -1369,11 +1363,6 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='',

self.date_format = date_format

# GH3457
if not self.obj.columns.is_unique and engine == 'python':
raise NotImplementedError("columns.is_unique == False not "
"supported with engine='python'")

self.tupleize_cols = tupleize_cols
self.has_mi_columns = (isinstance(obj.columns, MultiIndex) and
not self.tupleize_cols)
Expand Down Expand Up @@ -1430,108 +1419,6 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='',
if not index:
self.nlevels = 0

# original python implem. of df.to_csv
# invoked by df.to_csv(engine=python)
def _helper_csv(self, writer, na_rep=None, cols=None, header=True,
index=True, index_label=None, float_format=None,
date_format=None):
if cols is None:
cols = self.columns

has_aliases = isinstance(header, (tuple, list, np.ndarray, Index))
if has_aliases or header:
if index:
# should write something for index label
if index_label is not False:
if index_label is None:
if isinstance(self.obj.index, MultiIndex):
index_label = []
for i, name in enumerate(self.obj.index.names):
if name is None:
name = ''
index_label.append(name)
else:
index_label = self.obj.index.name
if index_label is None:
index_label = ['']
else:
index_label = [index_label]
elif not isinstance(index_label,
(list, tuple, np.ndarray, Index)):
# given a string for a DF with Index
index_label = [index_label]

encoded_labels = list(index_label)
else:
encoded_labels = []

if has_aliases:
if len(header) != len(cols):
raise ValueError(('Writing %d cols but got %d aliases'
% (len(cols), len(header))))
else:
write_cols = header
else:
write_cols = cols
encoded_cols = list(write_cols)

writer.writerow(encoded_labels + encoded_cols)
else:
encoded_cols = list(cols)
writer.writerow(encoded_cols)

if date_format is None:
date_formatter = lambda x: Timestamp(x)._repr_base
else:

def strftime_with_nulls(x):
x = Timestamp(x)
if notnull(x):
return x.strftime(date_format)

date_formatter = lambda x: strftime_with_nulls(x)

data_index = self.obj.index

if isinstance(self.obj.index, PeriodIndex):
data_index = self.obj.index.to_timestamp()

if isinstance(data_index, DatetimeIndex) and date_format is not None:
data_index = Index([date_formatter(x) for x in data_index])

values = self.obj.copy()
values.index = data_index
values.columns = values.columns.to_native_types(
na_rep=na_rep, float_format=float_format, date_format=date_format,
quoting=self.quoting)
values = values[cols]

series = {}
for k, v in compat.iteritems(values._series):
series[k] = v._values

nlevels = getattr(data_index, 'nlevels', 1)
for j, idx in enumerate(data_index):
row_fields = []
if index:
if nlevels == 1:
row_fields = [idx]
else: # handle MultiIndex
row_fields = list(idx)
for i, col in enumerate(cols):
val = series[col][j]
if lib.checknull(val):
val = na_rep

if float_format is not None and com.is_float(val):
val = float_format % val
elif isinstance(val, (np.datetime64, Timestamp)):
val = date_formatter(val)

row_fields.append(val)

writer.writerow(row_fields)

def save(self):
# create the writer & save
if hasattr(self.path_or_buf, 'write'):
Expand All @@ -1555,17 +1442,7 @@ def save(self):
else:
self.writer = csv.writer(f, **writer_kwargs)

if self.engine == 'python':
# to be removed in 0.13
self._helper_csv(self.writer, na_rep=self.na_rep,
float_format=self.float_format,
cols=self.cols, header=self.header,
index=self.index,
index_label=self.index_label,
date_format=self.date_format)

else:
self._save()
self._save()

finally:
if close:
Expand Down
6 changes: 0 additions & 6 deletions pandas/tests/formats/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -3329,12 +3329,6 @@ def test_to_csv_date_format(self):
self.assertEqual(df_sec_grouped.mean().to_csv(date_format='%Y-%m-%d'),
expected_ymd_sec)

# deprecation GH11274
def test_to_csv_engine_kw_deprecation(self):
with tm.assert_produces_warning(FutureWarning):
df = DataFrame({'col1': [1], 'col2': ['a'], 'col3': [10.1]})
df.to_csv(engine='python')

def test_period(self):
# GH 12615
df = pd.DataFrame({'A': pd.period_range('2013-01',
Expand Down
109 changes: 42 additions & 67 deletions pandas/tests/frame/test_to_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from pandas.compat import (lmap, range, lrange, StringIO, u)
from pandas.parser import CParserError
from pandas import (DataFrame, Index, Series, MultiIndex, Timestamp,
date_range, read_csv, compat)
date_range, read_csv, compat, to_datetime)
import pandas as pd

from pandas.util.testing import (assert_almost_equal,
Expand Down Expand Up @@ -139,7 +139,7 @@ def test_to_csv_from_csv5(self):
self.tzframe.to_csv(path)
result = pd.read_csv(path, index_col=0, parse_dates=['A'])

converter = lambda c: pd.to_datetime(result[c]).dt.tz_localize(
converter = lambda c: to_datetime(result[c]).dt.tz_localize(
'UTC').dt.tz_convert(self.tzframe[c].dt.tz)
result['B'] = converter('B')
result['C'] = converter('C')
Expand All @@ -162,15 +162,6 @@ def test_to_csv_cols_reordering(self):

assert_frame_equal(df[cols], rs_c, check_names=False)

def test_to_csv_legacy_raises_on_dupe_cols(self):
df = mkdf(10, 3)
df.columns = ['a', 'a', 'b']
with ensure_clean() as path:
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
self.assertRaises(NotImplementedError,
df.to_csv, path, engine='python')

def test_to_csv_new_dupe_cols(self):
import pandas as pd

Expand Down Expand Up @@ -712,7 +703,6 @@ def test_to_csv_dups_cols(self):
cols.extend([0, 1, 2])
df.columns = cols

from pandas import to_datetime
with ensure_clean() as filename:
df.to_csv(filename)
result = read_csv(filename, index_col=0)
Expand Down Expand Up @@ -993,72 +983,57 @@ def test_to_csv_compression_value_error(self):
filename, compression="zip")

def test_to_csv_date_format(self):
from pandas import to_datetime
with ensure_clean('__tmp_to_csv_date_format__') as path:
for engine in [None, 'python']:
w = FutureWarning if engine == 'python' else None

dt_index = self.tsframe.index
datetime_frame = DataFrame(
{'A': dt_index, 'B': dt_index.shift(1)}, index=dt_index)

with tm.assert_produces_warning(w, check_stacklevel=False):
datetime_frame.to_csv(
path, date_format='%Y%m%d', engine=engine)

# Check that the data was put in the specified format
test = read_csv(path, index_col=0)

datetime_frame_int = datetime_frame.applymap(
lambda x: int(x.strftime('%Y%m%d')))
datetime_frame_int.index = datetime_frame_int.index.map(
lambda x: int(x.strftime('%Y%m%d')))
dt_index = self.tsframe.index
datetime_frame = DataFrame(
{'A': dt_index, 'B': dt_index.shift(1)}, index=dt_index)
datetime_frame.to_csv(path, date_format='%Y%m%d')

assert_frame_equal(test, datetime_frame_int)
# Check that the data was put in the specified format
test = read_csv(path, index_col=0)

with tm.assert_produces_warning(w, check_stacklevel=False):
datetime_frame.to_csv(
path, date_format='%Y-%m-%d', engine=engine)
datetime_frame_int = datetime_frame.applymap(
lambda x: int(x.strftime('%Y%m%d')))
datetime_frame_int.index = datetime_frame_int.index.map(
lambda x: int(x.strftime('%Y%m%d')))

# Check that the data was put in the specified format
test = read_csv(path, index_col=0)
datetime_frame_str = datetime_frame.applymap(
lambda x: x.strftime('%Y-%m-%d'))
datetime_frame_str.index = datetime_frame_str.index.map(
lambda x: x.strftime('%Y-%m-%d'))
assert_frame_equal(test, datetime_frame_int)

assert_frame_equal(test, datetime_frame_str)
datetime_frame.to_csv(path, date_format='%Y-%m-%d')

# Check that columns get converted
datetime_frame_columns = datetime_frame.T
# Check that the data was put in the specified format
test = read_csv(path, index_col=0)
datetime_frame_str = datetime_frame.applymap(
lambda x: x.strftime('%Y-%m-%d'))
datetime_frame_str.index = datetime_frame_str.index.map(
lambda x: x.strftime('%Y-%m-%d'))

with tm.assert_produces_warning(w, check_stacklevel=False):
datetime_frame_columns.to_csv(
path, date_format='%Y%m%d', engine=engine)
assert_frame_equal(test, datetime_frame_str)

test = read_csv(path, index_col=0)
# Check that columns get converted
datetime_frame_columns = datetime_frame.T
datetime_frame_columns.to_csv(path, date_format='%Y%m%d')

datetime_frame_columns = datetime_frame_columns.applymap(
lambda x: int(x.strftime('%Y%m%d')))
# Columns don't get converted to ints by read_csv
datetime_frame_columns.columns = (
datetime_frame_columns.columns
.map(lambda x: x.strftime('%Y%m%d')))
test = read_csv(path, index_col=0)

assert_frame_equal(test, datetime_frame_columns)
datetime_frame_columns = datetime_frame_columns.applymap(
lambda x: int(x.strftime('%Y%m%d')))
# Columns don't get converted to ints by read_csv
datetime_frame_columns.columns = (
datetime_frame_columns.columns
.map(lambda x: x.strftime('%Y%m%d')))

# test NaTs
nat_index = to_datetime(
['NaT'] * 10 + ['2000-01-01', '1/1/2000', '1-1-2000'])
nat_frame = DataFrame({'A': nat_index}, index=nat_index)
assert_frame_equal(test, datetime_frame_columns)

with tm.assert_produces_warning(w, check_stacklevel=False):
nat_frame.to_csv(
path, date_format='%Y-%m-%d', engine=engine)
# test NaTs
nat_index = to_datetime(
['NaT'] * 10 + ['2000-01-01', '1/1/2000', '1-1-2000'])
nat_frame = DataFrame({'A': nat_index}, index=nat_index)
nat_frame.to_csv(path, date_format='%Y-%m-%d')

test = read_csv(path, parse_dates=[0, 1], index_col=0)
test = read_csv(path, parse_dates=[0, 1], index_col=0)

assert_frame_equal(test, nat_frame)
assert_frame_equal(test, nat_frame)

def test_to_csv_with_dst_transitions(self):

Expand All @@ -1077,7 +1052,7 @@ def test_to_csv_with_dst_transitions(self):
# we have to reconvert the index as we
# don't parse the tz's
result = read_csv(path, index_col=0)
result.index = pd.to_datetime(result.index).tz_localize(
result.index = to_datetime(result.index).tz_localize(
'UTC').tz_convert('Europe/London')
assert_frame_equal(result, df)

Expand All @@ -1089,9 +1064,9 @@ def test_to_csv_with_dst_transitions(self):
with ensure_clean('csv_date_format_with_dst') as path:
df.to_csv(path, index=True)
result = read_csv(path, index_col=0)
result.index = pd.to_datetime(result.index).tz_localize(
result.index = to_datetime(result.index).tz_localize(
'UTC').tz_convert('Europe/Paris')
result['idx'] = pd.to_datetime(result['idx']).astype(
result['idx'] = to_datetime(result['idx']).astype(
'datetime64[ns, Europe/Paris]')
assert_frame_equal(result, df)

Expand Down

0 comments on commit c989570

Please sign in to comment.