From ddd6cd2e367543ec3fc77a3cf0b34d8b7290e21e Mon Sep 17 00:00:00 2001 From: gfyoung Date: Fri, 10 Jun 2016 03:29:17 +0100 Subject: [PATCH] CLN: Remove the engine parameter in CSVFormatter and to_csv --- doc/source/whatsnew/v0.19.0.txt | 9 ++ pandas/core/frame.py | 1 - pandas/formats/format.py | 133 ++-------------------------- pandas/tests/formats/test_format.py | 6 -- pandas/tests/frame/test_to_csv.py | 109 +++++++++-------------- 5 files changed, 56 insertions(+), 202 deletions(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 657de7ec26efc..1d282e975d7d5 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -433,6 +433,15 @@ Deprecations - ``as_recarray`` has been deprecated in ``pd.read_csv()`` and will be removed in a future version (:issue:`13373`) - top-level ``pd.ordered_merge()`` has been renamed to ``pd.merge_ordered()`` and the original name will be removed in a future version (:issue:`13358`) + +.. _whatsnew_0190.prior_deprecations: + +Removal of prior version deprecations/changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- ``DataFrame.to_csv()`` has dropped the ``engine`` parameter (:issue:`11274`, :issue:`13419`) + + .. _whatsnew_0190.performance: Performance Improvements diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e804271d8afa9..356abc67b168a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1342,7 +1342,6 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, cols=columns, header=header, index=index, index_label=index_label, mode=mode, chunksize=chunksize, quotechar=quotechar, - engine=kwds.get("engine"), tupleize_cols=tupleize_cols, date_format=date_format, doublequote=doublequote, diff --git a/pandas/formats/format.py b/pandas/formats/format.py index 0c6a15db4ccfe..cc46ed57aeff0 100644 --- a/pandas/formats/format.py +++ b/pandas/formats/format.py @@ -30,7 +30,6 @@ import itertools import csv -import warnings common_docstring = """ Parameters @@ -1326,15 +1325,10 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', float_format=None, cols=None, header=True, index=True, index_label=None, mode='w', nanRep=None, encoding=None, compression=None, quoting=None, line_terminator='\n', - chunksize=None, engine=None, tupleize_cols=False, - quotechar='"', date_format=None, doublequote=True, - escapechar=None, decimal='.'): - - if engine is not None: - warnings.warn("'engine' keyword is deprecated and will be " - "removed in a future version", FutureWarning, - stacklevel=3) - self.engine = engine # remove for 0.18 + chunksize=None, tupleize_cols=False, quotechar='"', + date_format=None, doublequote=True, escapechar=None, + decimal='.'): + self.obj = obj if path_or_buf is None: @@ -1369,11 +1363,6 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', self.date_format = date_format - # GH3457 - if not self.obj.columns.is_unique and engine == 'python': - raise NotImplementedError("columns.is_unique == False not " - "supported with engine='python'") - self.tupleize_cols = tupleize_cols self.has_mi_columns = (isinstance(obj.columns, MultiIndex) and not self.tupleize_cols) @@ -1430,108 +1419,6 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', if not index: self.nlevels = 0 - # original python implem. of df.to_csv - # invoked by df.to_csv(engine=python) - def _helper_csv(self, writer, na_rep=None, cols=None, header=True, - index=True, index_label=None, float_format=None, - date_format=None): - if cols is None: - cols = self.columns - - has_aliases = isinstance(header, (tuple, list, np.ndarray, Index)) - if has_aliases or header: - if index: - # should write something for index label - if index_label is not False: - if index_label is None: - if isinstance(self.obj.index, MultiIndex): - index_label = [] - for i, name in enumerate(self.obj.index.names): - if name is None: - name = '' - index_label.append(name) - else: - index_label = self.obj.index.name - if index_label is None: - index_label = [''] - else: - index_label = [index_label] - elif not isinstance(index_label, - (list, tuple, np.ndarray, Index)): - # given a string for a DF with Index - index_label = [index_label] - - encoded_labels = list(index_label) - else: - encoded_labels = [] - - if has_aliases: - if len(header) != len(cols): - raise ValueError(('Writing %d cols but got %d aliases' - % (len(cols), len(header)))) - else: - write_cols = header - else: - write_cols = cols - encoded_cols = list(write_cols) - - writer.writerow(encoded_labels + encoded_cols) - else: - encoded_cols = list(cols) - writer.writerow(encoded_cols) - - if date_format is None: - date_formatter = lambda x: Timestamp(x)._repr_base - else: - - def strftime_with_nulls(x): - x = Timestamp(x) - if notnull(x): - return x.strftime(date_format) - - date_formatter = lambda x: strftime_with_nulls(x) - - data_index = self.obj.index - - if isinstance(self.obj.index, PeriodIndex): - data_index = self.obj.index.to_timestamp() - - if isinstance(data_index, DatetimeIndex) and date_format is not None: - data_index = Index([date_formatter(x) for x in data_index]) - - values = self.obj.copy() - values.index = data_index - values.columns = values.columns.to_native_types( - na_rep=na_rep, float_format=float_format, date_format=date_format, - quoting=self.quoting) - values = values[cols] - - series = {} - for k, v in compat.iteritems(values._series): - series[k] = v._values - - nlevels = getattr(data_index, 'nlevels', 1) - for j, idx in enumerate(data_index): - row_fields = [] - if index: - if nlevels == 1: - row_fields = [idx] - else: # handle MultiIndex - row_fields = list(idx) - for i, col in enumerate(cols): - val = series[col][j] - if lib.checknull(val): - val = na_rep - - if float_format is not None and com.is_float(val): - val = float_format % val - elif isinstance(val, (np.datetime64, Timestamp)): - val = date_formatter(val) - - row_fields.append(val) - - writer.writerow(row_fields) - def save(self): # create the writer & save if hasattr(self.path_or_buf, 'write'): @@ -1555,17 +1442,7 @@ def save(self): else: self.writer = csv.writer(f, **writer_kwargs) - if self.engine == 'python': - # to be removed in 0.13 - self._helper_csv(self.writer, na_rep=self.na_rep, - float_format=self.float_format, - cols=self.cols, header=self.header, - index=self.index, - index_label=self.index_label, - date_format=self.date_format) - - else: - self._save() + self._save() finally: if close: diff --git a/pandas/tests/formats/test_format.py b/pandas/tests/formats/test_format.py index c5e9c258b293a..7a282e7eb14ad 100644 --- a/pandas/tests/formats/test_format.py +++ b/pandas/tests/formats/test_format.py @@ -3329,12 +3329,6 @@ def test_to_csv_date_format(self): self.assertEqual(df_sec_grouped.mean().to_csv(date_format='%Y-%m-%d'), expected_ymd_sec) - # deprecation GH11274 - def test_to_csv_engine_kw_deprecation(self): - with tm.assert_produces_warning(FutureWarning): - df = DataFrame({'col1': [1], 'col2': ['a'], 'col3': [10.1]}) - df.to_csv(engine='python') - def test_period(self): # GH 12615 df = pd.DataFrame({'A': pd.period_range('2013-01', diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index c23702ef46ad2..55c7ebb183ce5 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -10,7 +10,7 @@ from pandas.compat import (lmap, range, lrange, StringIO, u) from pandas.parser import CParserError from pandas import (DataFrame, Index, Series, MultiIndex, Timestamp, - date_range, read_csv, compat) + date_range, read_csv, compat, to_datetime) import pandas as pd from pandas.util.testing import (assert_almost_equal, @@ -139,7 +139,7 @@ def test_to_csv_from_csv5(self): self.tzframe.to_csv(path) result = pd.read_csv(path, index_col=0, parse_dates=['A']) - converter = lambda c: pd.to_datetime(result[c]).dt.tz_localize( + converter = lambda c: to_datetime(result[c]).dt.tz_localize( 'UTC').dt.tz_convert(self.tzframe[c].dt.tz) result['B'] = converter('B') result['C'] = converter('C') @@ -162,15 +162,6 @@ def test_to_csv_cols_reordering(self): assert_frame_equal(df[cols], rs_c, check_names=False) - def test_to_csv_legacy_raises_on_dupe_cols(self): - df = mkdf(10, 3) - df.columns = ['a', 'a', 'b'] - with ensure_clean() as path: - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - self.assertRaises(NotImplementedError, - df.to_csv, path, engine='python') - def test_to_csv_new_dupe_cols(self): import pandas as pd @@ -712,7 +703,6 @@ def test_to_csv_dups_cols(self): cols.extend([0, 1, 2]) df.columns = cols - from pandas import to_datetime with ensure_clean() as filename: df.to_csv(filename) result = read_csv(filename, index_col=0) @@ -993,72 +983,57 @@ def test_to_csv_compression_value_error(self): filename, compression="zip") def test_to_csv_date_format(self): - from pandas import to_datetime with ensure_clean('__tmp_to_csv_date_format__') as path: - for engine in [None, 'python']: - w = FutureWarning if engine == 'python' else None - - dt_index = self.tsframe.index - datetime_frame = DataFrame( - {'A': dt_index, 'B': dt_index.shift(1)}, index=dt_index) - - with tm.assert_produces_warning(w, check_stacklevel=False): - datetime_frame.to_csv( - path, date_format='%Y%m%d', engine=engine) - - # Check that the data was put in the specified format - test = read_csv(path, index_col=0) - - datetime_frame_int = datetime_frame.applymap( - lambda x: int(x.strftime('%Y%m%d'))) - datetime_frame_int.index = datetime_frame_int.index.map( - lambda x: int(x.strftime('%Y%m%d'))) + dt_index = self.tsframe.index + datetime_frame = DataFrame( + {'A': dt_index, 'B': dt_index.shift(1)}, index=dt_index) + datetime_frame.to_csv(path, date_format='%Y%m%d') - assert_frame_equal(test, datetime_frame_int) + # Check that the data was put in the specified format + test = read_csv(path, index_col=0) - with tm.assert_produces_warning(w, check_stacklevel=False): - datetime_frame.to_csv( - path, date_format='%Y-%m-%d', engine=engine) + datetime_frame_int = datetime_frame.applymap( + lambda x: int(x.strftime('%Y%m%d'))) + datetime_frame_int.index = datetime_frame_int.index.map( + lambda x: int(x.strftime('%Y%m%d'))) - # Check that the data was put in the specified format - test = read_csv(path, index_col=0) - datetime_frame_str = datetime_frame.applymap( - lambda x: x.strftime('%Y-%m-%d')) - datetime_frame_str.index = datetime_frame_str.index.map( - lambda x: x.strftime('%Y-%m-%d')) + assert_frame_equal(test, datetime_frame_int) - assert_frame_equal(test, datetime_frame_str) + datetime_frame.to_csv(path, date_format='%Y-%m-%d') - # Check that columns get converted - datetime_frame_columns = datetime_frame.T + # Check that the data was put in the specified format + test = read_csv(path, index_col=0) + datetime_frame_str = datetime_frame.applymap( + lambda x: x.strftime('%Y-%m-%d')) + datetime_frame_str.index = datetime_frame_str.index.map( + lambda x: x.strftime('%Y-%m-%d')) - with tm.assert_produces_warning(w, check_stacklevel=False): - datetime_frame_columns.to_csv( - path, date_format='%Y%m%d', engine=engine) + assert_frame_equal(test, datetime_frame_str) - test = read_csv(path, index_col=0) + # Check that columns get converted + datetime_frame_columns = datetime_frame.T + datetime_frame_columns.to_csv(path, date_format='%Y%m%d') - datetime_frame_columns = datetime_frame_columns.applymap( - lambda x: int(x.strftime('%Y%m%d'))) - # Columns don't get converted to ints by read_csv - datetime_frame_columns.columns = ( - datetime_frame_columns.columns - .map(lambda x: x.strftime('%Y%m%d'))) + test = read_csv(path, index_col=0) - assert_frame_equal(test, datetime_frame_columns) + datetime_frame_columns = datetime_frame_columns.applymap( + lambda x: int(x.strftime('%Y%m%d'))) + # Columns don't get converted to ints by read_csv + datetime_frame_columns.columns = ( + datetime_frame_columns.columns + .map(lambda x: x.strftime('%Y%m%d'))) - # test NaTs - nat_index = to_datetime( - ['NaT'] * 10 + ['2000-01-01', '1/1/2000', '1-1-2000']) - nat_frame = DataFrame({'A': nat_index}, index=nat_index) + assert_frame_equal(test, datetime_frame_columns) - with tm.assert_produces_warning(w, check_stacklevel=False): - nat_frame.to_csv( - path, date_format='%Y-%m-%d', engine=engine) + # test NaTs + nat_index = to_datetime( + ['NaT'] * 10 + ['2000-01-01', '1/1/2000', '1-1-2000']) + nat_frame = DataFrame({'A': nat_index}, index=nat_index) + nat_frame.to_csv(path, date_format='%Y-%m-%d') - test = read_csv(path, parse_dates=[0, 1], index_col=0) + test = read_csv(path, parse_dates=[0, 1], index_col=0) - assert_frame_equal(test, nat_frame) + assert_frame_equal(test, nat_frame) def test_to_csv_with_dst_transitions(self): @@ -1077,7 +1052,7 @@ def test_to_csv_with_dst_transitions(self): # we have to reconvert the index as we # don't parse the tz's result = read_csv(path, index_col=0) - result.index = pd.to_datetime(result.index).tz_localize( + result.index = to_datetime(result.index).tz_localize( 'UTC').tz_convert('Europe/London') assert_frame_equal(result, df) @@ -1089,9 +1064,9 @@ def test_to_csv_with_dst_transitions(self): with ensure_clean('csv_date_format_with_dst') as path: df.to_csv(path, index=True) result = read_csv(path, index_col=0) - result.index = pd.to_datetime(result.index).tz_localize( + result.index = to_datetime(result.index).tz_localize( 'UTC').tz_convert('Europe/Paris') - result['idx'] = pd.to_datetime(result['idx']).astype( + result['idx'] = to_datetime(result['idx']).astype( 'datetime64[ns, Europe/Paris]') assert_frame_equal(result, df)