Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: Deprecate Series.to_csv signature #21896

Merged
merged 1 commit into from
Aug 13, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,7 @@ Deprecations
- :meth:`MultiIndex.to_hierarchical` is deprecated and will be removed in a future version (:issue:`21613`)
- :meth:`Series.ptp` is deprecated. Use ``numpy.ptp`` instead (:issue:`21614`)
- :meth:`Series.compress` is deprecated. Use ``Series[condition]`` instead (:issue:`18262`)
- The signature of :meth:`Series.to_csv` has been uniformed to that of doc:meth:`DataFrame.to_csv`: the name of the first argument is now 'path_or_buf', the order of subsequent arguments has changed, the 'header' argument now defaults to True. (:issue:`19715`)
- :meth:`Categorical.from_codes` has deprecated providing float values for the ``codes`` argument. (:issue:`21767`)
- :func:`pandas.read_table` is deprecated. Instead, use :func:`pandas.read_csv` passing ``sep='\t'`` if necessary (:issue:`21948`)

Expand Down
101 changes: 0 additions & 101 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1714,107 +1714,6 @@ def to_panel(self):

return self._constructor_expanddim(new_mgr)

def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
columns=None, header=True, index=True, index_label=None,
mode='w', encoding=None, compression='infer', quoting=None,
quotechar='"', line_terminator='\n', chunksize=None,
tupleize_cols=None, date_format=None, doublequote=True,
escapechar=None, decimal='.'):
r"""Write DataFrame to a comma-separated values (csv) file

Parameters
----------
path_or_buf : string or file handle, default None
File path or object, if None is provided the result is returned as
a string.
sep : character, default ','
Field delimiter for the output file.
na_rep : string, default ''
Missing data representation
float_format : string, default None
Format string for floating point numbers
columns : sequence, optional
Columns to write
header : boolean or list of string, default True
Write out the column names. If a list of strings is given it is
assumed to be aliases for the column names
index : boolean, default True
Write row names (index)
index_label : string or sequence, or False, default None
Column label for index column(s) if desired. If None is given, and
`header` and `index` are True, then the index names are used. A
sequence should be given if the DataFrame uses MultiIndex. If
False do not print fields for index names. Use index_label=False
for easier importing in R
mode : str
Python write mode, default 'w'
encoding : string, optional
A string representing the encoding to use in the output file,
defaults to 'ascii' on Python 2 and 'utf-8' on Python 3.
compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None},
default 'infer'
If 'infer' and `path_or_buf` is path-like, then detect compression
from the following extensions: '.gz', '.bz2', '.zip' or '.xz'
(otherwise no compression).

.. versionchanged:: 0.24.0
'infer' option added and set to default
line_terminator : string, default ``'\n'``
The newline character or character sequence to use in the output
file
quoting : optional constant from csv module
defaults to csv.QUOTE_MINIMAL. If you have set a `float_format`
then floats are converted to strings and thus csv.QUOTE_NONNUMERIC
will treat them as non-numeric
quotechar : string (length 1), default '\"'
character used to quote fields
doublequote : boolean, default True
Control quoting of `quotechar` inside a field
escapechar : string (length 1), default None
character used to escape `sep` and `quotechar` when appropriate
chunksize : int or None
rows to write at a time
tupleize_cols : boolean, default False
.. deprecated:: 0.21.0
This argument will be removed and will always write each row
of the multi-index as a separate row in the CSV file.

Write MultiIndex columns as a list of tuples (if True) or in
the new, expanded format, where each MultiIndex column is a row
in the CSV (if False).
date_format : string, default None
Format string for datetime objects
decimal: string, default '.'
Character recognized as decimal separator. E.g. use ',' for
European data

"""

if tupleize_cols is not None:
warnings.warn("The 'tupleize_cols' parameter is deprecated and "
"will be removed in a future version",
FutureWarning, stacklevel=2)
else:
tupleize_cols = False

from pandas.io.formats.csvs import CSVFormatter
formatter = CSVFormatter(self, path_or_buf,
line_terminator=line_terminator, sep=sep,
encoding=encoding,
compression=compression, quoting=quoting,
na_rep=na_rep, float_format=float_format,
cols=columns, header=header, index=index,
index_label=index_label, mode=mode,
chunksize=chunksize, quotechar=quotechar,
tupleize_cols=tupleize_cols,
date_format=date_format,
doublequote=doublequote,
escapechar=escapechar, decimal=decimal)
formatter.save()

if path_or_buf is None:
return formatter.path_or_buf.getvalue()

@Appender(_shared_docs['to_excel'] % _shared_doc_kwargs)
def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
float_format=None, columns=None, header=True, index=True,
Expand Down
109 changes: 109 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -9270,6 +9270,115 @@ def first_valid_index(self):
def last_valid_index(self):
return self._find_valid_index('last')

def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
columns=None, header=True, index=True, index_label=None,
mode='w', encoding=None, compression='infer', quoting=None,
quotechar='"', line_terminator='\n', chunksize=None,
tupleize_cols=None, date_format=None, doublequote=True,
escapechar=None, decimal='.'):
r"""Write object to a comma-separated values (csv) file

Parameters
----------
path_or_buf : string or file handle, default None
File path or object, if None is provided the result is returned as
a string.
.. versionchanged:: 0.24.0
Was previously named "path" for Series.
sep : character, default ','
Field delimiter for the output file.
na_rep : string, default ''
Missing data representation
float_format : string, default None
Format string for floating point numbers
columns : sequence, optional
Columns to write
header : boolean or list of string, default True
Write out the column names. If a list of strings is given it is
assumed to be aliases for the column names
.. versionchanged:: 0.24.0
Previously defaulted to False for Series.
index : boolean, default True
Write row names (index)
index_label : string or sequence, or False, default None
Column label for index column(s) if desired. If None is given, and
`header` and `index` are True, then the index names are used. A
sequence should be given if the object uses MultiIndex. If
False do not print fields for index names. Use index_label=False
for easier importing in R
mode : str
Python write mode, default 'w'
encoding : string, optional
A string representing the encoding to use in the output file,
defaults to 'ascii' on Python 2 and 'utf-8' on Python 3.
compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None},
default 'infer'
If 'infer' and `path_or_buf` is path-like, then detect compression
from the following extensions: '.gz', '.bz2', '.zip' or '.xz'
(otherwise no compression).

.. versionchanged:: 0.24.0
'infer' option added and set to default
line_terminator : string, default ``'\n'``
The newline character or character sequence to use in the output
file
quoting : optional constant from csv module
defaults to csv.QUOTE_MINIMAL. If you have set a `float_format`
then floats are converted to strings and thus csv.QUOTE_NONNUMERIC
will treat them as non-numeric
quotechar : string (length 1), default '\"'
character used to quote fields
doublequote : boolean, default True
Control quoting of `quotechar` inside a field
escapechar : string (length 1), default None
character used to escape `sep` and `quotechar` when appropriate
chunksize : int or None
rows to write at a time
tupleize_cols : boolean, default False
.. deprecated:: 0.21.0
This argument will be removed and will always write each row
of the multi-index as a separate row in the CSV file.

Write MultiIndex columns as a list of tuples (if True) or in
the new, expanded format, where each MultiIndex column is a row
in the CSV (if False).
date_format : string, default None
Format string for datetime objects
decimal: string, default '.'
Character recognized as decimal separator. E.g. use ',' for
European data

.. versionchanged:: 0.24.0
The order of arguments for Series was changed.
"""

df = self if isinstance(self, ABCDataFrame) else self.to_frame()

if tupleize_cols is not None:
warnings.warn("The 'tupleize_cols' parameter is deprecated and "
"will be removed in a future version",
FutureWarning, stacklevel=2)
else:
tupleize_cols = False

from pandas.io.formats.csvs import CSVFormatter
formatter = CSVFormatter(df, path_or_buf,
line_terminator=line_terminator, sep=sep,
encoding=encoding,
compression=compression, quoting=quoting,
na_rep=na_rep, float_format=float_format,
cols=columns, header=header, index=index,
index_label=index_label, mode=mode,
chunksize=chunksize, quotechar=quotechar,
tupleize_cols=tupleize_cols,
date_format=date_format,
doublequote=doublequote,
escapechar=escapechar, decimal=decimal)
formatter.save()

if path_or_buf is None:
return formatter.path_or_buf.getvalue()


def _doc_parms(cls):
"""Return a tuple of the doc parms."""
Expand Down
110 changes: 57 additions & 53 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from pandas.core.arrays import ExtensionArray
from pandas.core.dtypes.common import (
is_categorical_dtype,
is_string_like,
is_bool,
is_integer, is_integer_dtype,
is_float_dtype,
Expand Down Expand Up @@ -3765,59 +3766,62 @@ def from_csv(cls, path, sep=',', parse_dates=True, header=None,

return result

def to_csv(self, path=None, index=True, sep=",", na_rep='',
float_format=None, header=False, index_label=None,
mode='w', encoding=None, compression='infer', date_format=None,
decimal='.'):
"""
Write Series to a comma-separated values (csv) file

Parameters
----------
path : string or file handle, default None
File path or object, if None is provided the result is returned as
a string.
na_rep : string, default ''
Missing data representation
float_format : string, default None
Format string for floating point numbers
header : boolean, default False
Write out series name
index : boolean, default True
Write row names (index)
index_label : string or sequence, default None
Column label for index column(s) if desired. If None is given, and
`header` and `index` are True, then the index names are used. A
sequence should be given if the DataFrame uses MultiIndex.
mode : Python write mode, default 'w'
sep : character, default ","
Field delimiter for the output file.
encoding : string, optional
a string representing the encoding to use if the contents are
non-ascii, for python versions prior to 3
compression : None or string, default 'infer'
A string representing the compression to use in the output file.
Allowed values are None, 'gzip', 'bz2', 'zip', 'xz', and 'infer'.
This input is only used when the first argument is a filename.

.. versionchanged:: 0.24.0
'infer' option added and set to default
date_format: string, default None
Format string for datetime objects.
decimal: string, default '.'
Character recognized as decimal separator. E.g. use ',' for
European data
"""
from pandas.core.frame import DataFrame
df = DataFrame(self)
# result is only a string if no path provided, otherwise None
result = df.to_csv(path, index=index, sep=sep, na_rep=na_rep,
float_format=float_format, header=header,
index_label=index_label, mode=mode,
encoding=encoding, compression=compression,
date_format=date_format, decimal=decimal)
if path is None:
return result
@Appender(generic.NDFrame.to_csv.__doc__)
def to_csv(self, *args, **kwargs):

names = ["path_or_buf", "sep", "na_rep", "float_format", "columns",
"header", "index", "index_label", "mode", "encoding",
"compression", "quoting", "quotechar", "line_terminator",
"chunksize", "tupleize_cols", "date_format", "doublequote",
"escapechar", "decimal"]

old_names = ["path_or_buf", "index", "sep", "na_rep", "float_format",
"header", "index_label", "mode", "encoding",
"compression", "date_format", "decimal"]

if "path" in kwargs:
warnings.warn("The signature of `Series.to_csv` was aligned "
"to that of `DataFrame.to_csv`, and argument "
"'path' will be renamed to 'path_or_buf'.",
FutureWarning, stacklevel=2)
kwargs["path_or_buf"] = kwargs.pop("path")

if len(args) > 1:
# Either "index" (old signature) or "sep" (new signature) is being
# passed as second argument (while the first is the same)
maybe_sep = args[1]

if not (is_string_like(maybe_sep) and len(maybe_sep) == 1):
# old signature
warnings.warn("The signature of `Series.to_csv` was aligned "
"to that of `DataFrame.to_csv`. Note that the "
"order of arguments changed, and the new one "
"has 'sep' in first place, for which \"{}\" is "
"not a valid value. The old order will cease to "
"be supported in a future version. Please refer "
"to the documentation for `DataFrame.to_csv` "
"when updating your function "
"calls.".format(maybe_sep),
FutureWarning, stacklevel=2)
names = old_names

pos_args = dict(zip(names[:len(args)], args))

for key in pos_args:
if key in kwargs:
raise ValueError("Argument given by name ('{}') and position "
"({})".format(key, names.index(key)))
kwargs[key] = pos_args[key]

if kwargs.get("header", None) is None:
warnings.warn("The signature of `Series.to_csv` was aligned "
"to that of `DataFrame.to_csv`, and argument "
"'header' will change its default value from False "
"to True: please pass an explicit value to suppress "
"this warning.", FutureWarning,
stacklevel=2)
kwargs["header"] = False # Backwards compatibility.
return self.to_frame().to_csv(**kwargs)

@Appender(generic._shared_docs['to_excel'] % _shared_doc_kwargs)
def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
Expand Down
17 changes: 11 additions & 6 deletions pandas/tests/frame/test_to_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -893,22 +893,27 @@ def test_to_csv_line_terminators(self):

def test_to_csv_from_csv_categorical(self):

# CSV with categoricals should result in the same output as when one
# would add a "normal" Series/DataFrame.
s = Series(pd.Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']))
s2 = Series(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'])
# CSV with categoricals should result in the same output
# as when one would add a "normal" Series/DataFrame.
s = Series(pd.Categorical(["a", "b", "b", "a", "a", "c", "c", "c"]))
s2 = Series(["a", "b", "b", "a", "a", "c", "c", "c"])
res = StringIO()
s.to_csv(res)

s.to_csv(res, header=False)
exp = StringIO()
s2.to_csv(exp)

s2.to_csv(exp, header=False)
assert res.getvalue() == exp.getvalue()

df = DataFrame({"s": s})
df2 = DataFrame({"s": s2})

res = StringIO()
df.to_csv(res)

exp = StringIO()
df2.to_csv(exp)

assert res.getvalue() == exp.getvalue()

def test_to_csv_path_is_none(self):
Expand Down
Loading