Skip to content

Commit

Permalink
API: Deprecate old Series.to_csv signature (#21896)
Browse files Browse the repository at this point in the history
closes #19715
  • Loading branch information
toobaz authored and jorisvandenbossche committed Aug 13, 2018
1 parent 188a4fc commit eb0ac54
Show file tree
Hide file tree
Showing 7 changed files with 230 additions and 182 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,7 @@ Deprecations
- :meth:`MultiIndex.to_hierarchical` is deprecated and will be removed in a future version (:issue:`21613`)
- :meth:`Series.ptp` is deprecated. Use ``numpy.ptp`` instead (:issue:`21614`)
- :meth:`Series.compress` is deprecated. Use ``Series[condition]`` instead (:issue:`18262`)
- The signature of :meth:`Series.to_csv` has been uniformed to that of doc:meth:`DataFrame.to_csv`: the name of the first argument is now 'path_or_buf', the order of subsequent arguments has changed, the 'header' argument now defaults to True. (:issue:`19715`)
- :meth:`Categorical.from_codes` has deprecated providing float values for the ``codes`` argument. (:issue:`21767`)
- :func:`pandas.read_table` is deprecated. Instead, use :func:`pandas.read_csv` passing ``sep='\t'`` if necessary (:issue:`21948`)

Expand Down
101 changes: 0 additions & 101 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1714,107 +1714,6 @@ def to_panel(self):

return self._constructor_expanddim(new_mgr)

def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
columns=None, header=True, index=True, index_label=None,
mode='w', encoding=None, compression='infer', quoting=None,
quotechar='"', line_terminator='\n', chunksize=None,
tupleize_cols=None, date_format=None, doublequote=True,
escapechar=None, decimal='.'):
r"""Write DataFrame to a comma-separated values (csv) file
Parameters
----------
path_or_buf : string or file handle, default None
File path or object, if None is provided the result is returned as
a string.
sep : character, default ','
Field delimiter for the output file.
na_rep : string, default ''
Missing data representation
float_format : string, default None
Format string for floating point numbers
columns : sequence, optional
Columns to write
header : boolean or list of string, default True
Write out the column names. If a list of strings is given it is
assumed to be aliases for the column names
index : boolean, default True
Write row names (index)
index_label : string or sequence, or False, default None
Column label for index column(s) if desired. If None is given, and
`header` and `index` are True, then the index names are used. A
sequence should be given if the DataFrame uses MultiIndex. If
False do not print fields for index names. Use index_label=False
for easier importing in R
mode : str
Python write mode, default 'w'
encoding : string, optional
A string representing the encoding to use in the output file,
defaults to 'ascii' on Python 2 and 'utf-8' on Python 3.
compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None},
default 'infer'
If 'infer' and `path_or_buf` is path-like, then detect compression
from the following extensions: '.gz', '.bz2', '.zip' or '.xz'
(otherwise no compression).
.. versionchanged:: 0.24.0
'infer' option added and set to default
line_terminator : string, default ``'\n'``
The newline character or character sequence to use in the output
file
quoting : optional constant from csv module
defaults to csv.QUOTE_MINIMAL. If you have set a `float_format`
then floats are converted to strings and thus csv.QUOTE_NONNUMERIC
will treat them as non-numeric
quotechar : string (length 1), default '\"'
character used to quote fields
doublequote : boolean, default True
Control quoting of `quotechar` inside a field
escapechar : string (length 1), default None
character used to escape `sep` and `quotechar` when appropriate
chunksize : int or None
rows to write at a time
tupleize_cols : boolean, default False
.. deprecated:: 0.21.0
This argument will be removed and will always write each row
of the multi-index as a separate row in the CSV file.
Write MultiIndex columns as a list of tuples (if True) or in
the new, expanded format, where each MultiIndex column is a row
in the CSV (if False).
date_format : string, default None
Format string for datetime objects
decimal: string, default '.'
Character recognized as decimal separator. E.g. use ',' for
European data
"""

if tupleize_cols is not None:
warnings.warn("The 'tupleize_cols' parameter is deprecated and "
"will be removed in a future version",
FutureWarning, stacklevel=2)
else:
tupleize_cols = False

from pandas.io.formats.csvs import CSVFormatter
formatter = CSVFormatter(self, path_or_buf,
line_terminator=line_terminator, sep=sep,
encoding=encoding,
compression=compression, quoting=quoting,
na_rep=na_rep, float_format=float_format,
cols=columns, header=header, index=index,
index_label=index_label, mode=mode,
chunksize=chunksize, quotechar=quotechar,
tupleize_cols=tupleize_cols,
date_format=date_format,
doublequote=doublequote,
escapechar=escapechar, decimal=decimal)
formatter.save()

if path_or_buf is None:
return formatter.path_or_buf.getvalue()

@Appender(_shared_docs['to_excel'] % _shared_doc_kwargs)
def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
float_format=None, columns=None, header=True, index=True,
Expand Down
109 changes: 109 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -9271,6 +9271,115 @@ def first_valid_index(self):
def last_valid_index(self):
return self._find_valid_index('last')

def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
columns=None, header=True, index=True, index_label=None,
mode='w', encoding=None, compression='infer', quoting=None,
quotechar='"', line_terminator='\n', chunksize=None,
tupleize_cols=None, date_format=None, doublequote=True,
escapechar=None, decimal='.'):
r"""Write object to a comma-separated values (csv) file
Parameters
----------
path_or_buf : string or file handle, default None
File path or object, if None is provided the result is returned as
a string.
.. versionchanged:: 0.24.0
Was previously named "path" for Series.
sep : character, default ','
Field delimiter for the output file.
na_rep : string, default ''
Missing data representation
float_format : string, default None
Format string for floating point numbers
columns : sequence, optional
Columns to write
header : boolean or list of string, default True
Write out the column names. If a list of strings is given it is
assumed to be aliases for the column names
.. versionchanged:: 0.24.0
Previously defaulted to False for Series.
index : boolean, default True
Write row names (index)
index_label : string or sequence, or False, default None
Column label for index column(s) if desired. If None is given, and
`header` and `index` are True, then the index names are used. A
sequence should be given if the object uses MultiIndex. If
False do not print fields for index names. Use index_label=False
for easier importing in R
mode : str
Python write mode, default 'w'
encoding : string, optional
A string representing the encoding to use in the output file,
defaults to 'ascii' on Python 2 and 'utf-8' on Python 3.
compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None},
default 'infer'
If 'infer' and `path_or_buf` is path-like, then detect compression
from the following extensions: '.gz', '.bz2', '.zip' or '.xz'
(otherwise no compression).
.. versionchanged:: 0.24.0
'infer' option added and set to default
line_terminator : string, default ``'\n'``
The newline character or character sequence to use in the output
file
quoting : optional constant from csv module
defaults to csv.QUOTE_MINIMAL. If you have set a `float_format`
then floats are converted to strings and thus csv.QUOTE_NONNUMERIC
will treat them as non-numeric
quotechar : string (length 1), default '\"'
character used to quote fields
doublequote : boolean, default True
Control quoting of `quotechar` inside a field
escapechar : string (length 1), default None
character used to escape `sep` and `quotechar` when appropriate
chunksize : int or None
rows to write at a time
tupleize_cols : boolean, default False
.. deprecated:: 0.21.0
This argument will be removed and will always write each row
of the multi-index as a separate row in the CSV file.
Write MultiIndex columns as a list of tuples (if True) or in
the new, expanded format, where each MultiIndex column is a row
in the CSV (if False).
date_format : string, default None
Format string for datetime objects
decimal: string, default '.'
Character recognized as decimal separator. E.g. use ',' for
European data
.. versionchanged:: 0.24.0
The order of arguments for Series was changed.
"""

df = self if isinstance(self, ABCDataFrame) else self.to_frame()

if tupleize_cols is not None:
warnings.warn("The 'tupleize_cols' parameter is deprecated and "
"will be removed in a future version",
FutureWarning, stacklevel=2)
else:
tupleize_cols = False

from pandas.io.formats.csvs import CSVFormatter
formatter = CSVFormatter(df, path_or_buf,
line_terminator=line_terminator, sep=sep,
encoding=encoding,
compression=compression, quoting=quoting,
na_rep=na_rep, float_format=float_format,
cols=columns, header=header, index=index,
index_label=index_label, mode=mode,
chunksize=chunksize, quotechar=quotechar,
tupleize_cols=tupleize_cols,
date_format=date_format,
doublequote=doublequote,
escapechar=escapechar, decimal=decimal)
formatter.save()

if path_or_buf is None:
return formatter.path_or_buf.getvalue()


def _doc_parms(cls):
"""Return a tuple of the doc parms."""
Expand Down
110 changes: 57 additions & 53 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from pandas.core.arrays import ExtensionArray
from pandas.core.dtypes.common import (
is_categorical_dtype,
is_string_like,
is_bool,
is_integer, is_integer_dtype,
is_float_dtype,
Expand Down Expand Up @@ -3765,59 +3766,62 @@ def from_csv(cls, path, sep=',', parse_dates=True, header=None,

return result

def to_csv(self, path=None, index=True, sep=",", na_rep='',
float_format=None, header=False, index_label=None,
mode='w', encoding=None, compression='infer', date_format=None,
decimal='.'):
"""
Write Series to a comma-separated values (csv) file
Parameters
----------
path : string or file handle, default None
File path or object, if None is provided the result is returned as
a string.
na_rep : string, default ''
Missing data representation
float_format : string, default None
Format string for floating point numbers
header : boolean, default False
Write out series name
index : boolean, default True
Write row names (index)
index_label : string or sequence, default None
Column label for index column(s) if desired. If None is given, and
`header` and `index` are True, then the index names are used. A
sequence should be given if the DataFrame uses MultiIndex.
mode : Python write mode, default 'w'
sep : character, default ","
Field delimiter for the output file.
encoding : string, optional
a string representing the encoding to use if the contents are
non-ascii, for python versions prior to 3
compression : None or string, default 'infer'
A string representing the compression to use in the output file.
Allowed values are None, 'gzip', 'bz2', 'zip', 'xz', and 'infer'.
This input is only used when the first argument is a filename.
.. versionchanged:: 0.24.0
'infer' option added and set to default
date_format: string, default None
Format string for datetime objects.
decimal: string, default '.'
Character recognized as decimal separator. E.g. use ',' for
European data
"""
from pandas.core.frame import DataFrame
df = DataFrame(self)
# result is only a string if no path provided, otherwise None
result = df.to_csv(path, index=index, sep=sep, na_rep=na_rep,
float_format=float_format, header=header,
index_label=index_label, mode=mode,
encoding=encoding, compression=compression,
date_format=date_format, decimal=decimal)
if path is None:
return result
@Appender(generic.NDFrame.to_csv.__doc__)
def to_csv(self, *args, **kwargs):

names = ["path_or_buf", "sep", "na_rep", "float_format", "columns",
"header", "index", "index_label", "mode", "encoding",
"compression", "quoting", "quotechar", "line_terminator",
"chunksize", "tupleize_cols", "date_format", "doublequote",
"escapechar", "decimal"]

old_names = ["path_or_buf", "index", "sep", "na_rep", "float_format",
"header", "index_label", "mode", "encoding",
"compression", "date_format", "decimal"]

if "path" in kwargs:
warnings.warn("The signature of `Series.to_csv` was aligned "
"to that of `DataFrame.to_csv`, and argument "
"'path' will be renamed to 'path_or_buf'.",
FutureWarning, stacklevel=2)
kwargs["path_or_buf"] = kwargs.pop("path")

if len(args) > 1:
# Either "index" (old signature) or "sep" (new signature) is being
# passed as second argument (while the first is the same)
maybe_sep = args[1]

if not (is_string_like(maybe_sep) and len(maybe_sep) == 1):
# old signature
warnings.warn("The signature of `Series.to_csv` was aligned "
"to that of `DataFrame.to_csv`. Note that the "
"order of arguments changed, and the new one "
"has 'sep' in first place, for which \"{}\" is "
"not a valid value. The old order will cease to "
"be supported in a future version. Please refer "
"to the documentation for `DataFrame.to_csv` "
"when updating your function "
"calls.".format(maybe_sep),
FutureWarning, stacklevel=2)
names = old_names

pos_args = dict(zip(names[:len(args)], args))

for key in pos_args:
if key in kwargs:
raise ValueError("Argument given by name ('{}') and position "
"({})".format(key, names.index(key)))
kwargs[key] = pos_args[key]

if kwargs.get("header", None) is None:
warnings.warn("The signature of `Series.to_csv` was aligned "
"to that of `DataFrame.to_csv`, and argument "
"'header' will change its default value from False "
"to True: please pass an explicit value to suppress "
"this warning.", FutureWarning,
stacklevel=2)
kwargs["header"] = False # Backwards compatibility.
return self.to_frame().to_csv(**kwargs)

@Appender(generic._shared_docs['to_excel'] % _shared_doc_kwargs)
def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
Expand Down
17 changes: 11 additions & 6 deletions pandas/tests/frame/test_to_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -893,22 +893,27 @@ def test_to_csv_line_terminators(self):

def test_to_csv_from_csv_categorical(self):

# CSV with categoricals should result in the same output as when one
# would add a "normal" Series/DataFrame.
s = Series(pd.Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']))
s2 = Series(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'])
# CSV with categoricals should result in the same output
# as when one would add a "normal" Series/DataFrame.
s = Series(pd.Categorical(["a", "b", "b", "a", "a", "c", "c", "c"]))
s2 = Series(["a", "b", "b", "a", "a", "c", "c", "c"])
res = StringIO()
s.to_csv(res)

s.to_csv(res, header=False)
exp = StringIO()
s2.to_csv(exp)

s2.to_csv(exp, header=False)
assert res.getvalue() == exp.getvalue()

df = DataFrame({"s": s})
df2 = DataFrame({"s": s2})

res = StringIO()
df.to_csv(res)

exp = StringIO()
df2.to_csv(exp)

assert res.getvalue() == exp.getvalue()

def test_to_csv_path_is_none(self):
Expand Down
Loading

0 comments on commit eb0ac54

Please sign in to comment.