Skip to content

Commit

Permalink
API: Proof of concept for #19715 based on #21868
Browse files Browse the repository at this point in the history
closes #19715
  • Loading branch information
toobaz committed Jul 14, 2018
1 parent 365eac4 commit 7d655b2
Show file tree
Hide file tree
Showing 4 changed files with 164 additions and 156 deletions.
97 changes: 0 additions & 97 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1710,103 +1710,6 @@ def to_panel(self):

return self._constructor_expanddim(new_mgr)

def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
columns=None, header=True, index=True, index_label=None,
mode='w', encoding=None, compression=None, quoting=None,
quotechar='"', line_terminator='\n', chunksize=None,
tupleize_cols=None, date_format=None, doublequote=True,
escapechar=None, decimal='.'):
r"""Write DataFrame to a comma-separated values (csv) file
Parameters
----------
path_or_buf : string or file handle, default None
File path or object, if None is provided the result is returned as
a string.
sep : character, default ','
Field delimiter for the output file.
na_rep : string, default ''
Missing data representation
float_format : string, default None
Format string for floating point numbers
columns : sequence, optional
Columns to write
header : boolean or list of string, default True
Write out the column names. If a list of strings is given it is
assumed to be aliases for the column names
index : boolean, default True
Write row names (index)
index_label : string or sequence, or False, default None
Column label for index column(s) if desired. If None is given, and
`header` and `index` are True, then the index names are used. A
sequence should be given if the DataFrame uses MultiIndex. If
False do not print fields for index names. Use index_label=False
for easier importing in R
mode : str
Python write mode, default 'w'
encoding : string, optional
A string representing the encoding to use in the output file,
defaults to 'ascii' on Python 2 and 'utf-8' on Python 3.
compression : {'infer', 'gzip', 'bz2', 'xz', None}, default None
If 'infer' and `path_or_buf` is path-like, then detect compression
from the following extensions: '.gz', '.bz2' or '.xz'
(otherwise no compression).
line_terminator : string, default ``'\n'``
The newline character or character sequence to use in the output
file
quoting : optional constant from csv module
defaults to csv.QUOTE_MINIMAL. If you have set a `float_format`
then floats are converted to strings and thus csv.QUOTE_NONNUMERIC
will treat them as non-numeric
quotechar : string (length 1), default '\"'
character used to quote fields
doublequote : boolean, default True
Control quoting of `quotechar` inside a field
escapechar : string (length 1), default None
character used to escape `sep` and `quotechar` when appropriate
chunksize : int or None
rows to write at a time
tupleize_cols : boolean, default False
.. deprecated:: 0.21.0
This argument will be removed and will always write each row
of the multi-index as a separate row in the CSV file.
Write MultiIndex columns as a list of tuples (if True) or in
the new, expanded format, where each MultiIndex column is a row
in the CSV (if False).
date_format : string, default None
Format string for datetime objects
decimal: string, default '.'
Character recognized as decimal separator. E.g. use ',' for
European data
"""

if tupleize_cols is not None:
warnings.warn("The 'tupleize_cols' parameter is deprecated and "
"will be removed in a future version",
FutureWarning, stacklevel=2)
else:
tupleize_cols = False

from pandas.io.formats.csvs import CSVFormatter
formatter = CSVFormatter(self, path_or_buf,
line_terminator=line_terminator, sep=sep,
encoding=encoding,
compression=compression, quoting=quoting,
na_rep=na_rep, float_format=float_format,
cols=columns, header=header, index=index,
index_label=index_label, mode=mode,
chunksize=chunksize, quotechar=quotechar,
tupleize_cols=tupleize_cols,
date_format=date_format,
doublequote=doublequote,
escapechar=escapechar, decimal=decimal)
formatter.save()

if path_or_buf is None:
return formatter.path_or_buf.getvalue()

@Appender(_shared_docs['to_excel'] % _shared_doc_kwargs)
def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
float_format=None, columns=None, header=True, index=True,
Expand Down
101 changes: 101 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -9161,6 +9161,107 @@ def first_valid_index(self):
def last_valid_index(self):
return self._find_valid_index('last')

def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
columns=None, header=True, index=True, index_label=None,
mode='w', encoding=None, compression=None, quoting=None,
quotechar='"', line_terminator='\n', chunksize=None,
tupleize_cols=None, date_format=None, doublequote=True,
escapechar=None, decimal='.'):
r"""Write DataFrame to a comma-separated values (csv) file
Parameters
----------
path_or_buf : string or file handle, default None
File path or object, if None is provided the result is returned as
a string.
sep : character, default ','
Field delimiter for the output file.
na_rep : string, default ''
Missing data representation
float_format : string, default None
Format string for floating point numbers
columns : sequence, optional
Columns to write
header : boolean or list of string, default True
Write out the column names. If a list of strings is given it is
assumed to be aliases for the column names
index : boolean, default True
Write row names (index)
index_label : string or sequence, or False, default None
Column label for index column(s) if desired. If None is given, and
`header` and `index` are True, then the index names are used. A
sequence should be given if the DataFrame uses MultiIndex. If
False do not print fields for index names. Use index_label=False
for easier importing in R
mode : str
Python write mode, default 'w'
encoding : string, optional
A string representing the encoding to use in the output file,
defaults to 'ascii' on Python 2 and 'utf-8' on Python 3.
compression : {'infer', 'gzip', 'bz2', 'xz', None}, default None
If 'infer' and `path_or_buf` is path-like, then detect compression
from the following extensions: '.gz', '.bz2' or '.xz'
(otherwise no compression).
line_terminator : string, default ``'\n'``
The newline character or character sequence to use in the output
file
quoting : optional constant from csv module
defaults to csv.QUOTE_MINIMAL. If you have set a `float_format`
then floats are converted to strings and thus csv.QUOTE_NONNUMERIC
will treat them as non-numeric
quotechar : string (length 1), default '\"'
character used to quote fields
doublequote : boolean, default True
Control quoting of `quotechar` inside a field
escapechar : string (length 1), default None
character used to escape `sep` and `quotechar` when appropriate
chunksize : int or None
rows to write at a time
tupleize_cols : boolean, default False
.. deprecated:: 0.21.0
This argument will be removed and will always write each row
of the multi-index as a separate row in the CSV file.
Write MultiIndex columns as a list of tuples (if True) or in
the new, expanded format, where each MultiIndex column is a row
in the CSV (if False).
date_format : string, default None
Format string for datetime objects
decimal: string, default '.'
Character recognized as decimal separator. E.g. use ',' for
European data
"""

from pandas.core.frame import DataFrame

df = self if isinstance(self, DataFrame) else self.to_frame()

if tupleize_cols is not None:
warnings.warn("The 'tupleize_cols' parameter is deprecated and "
"will be removed in a future version",
FutureWarning, stacklevel=2)
else:
tupleize_cols = False

from pandas.io.formats.csvs import CSVFormatter
formatter = CSVFormatter(df, path_or_buf,
line_terminator=line_terminator, sep=sep,
encoding=encoding,
compression=compression, quoting=quoting,
na_rep=na_rep, float_format=float_format,
cols=columns, header=header, index=index,
index_label=index_label, mode=mode,
chunksize=chunksize, quotechar=quotechar,
tupleize_cols=tupleize_cols,
date_format=date_format,
doublequote=doublequote,
escapechar=escapechar, decimal=decimal)
formatter.save()

if path_or_buf is None:
return formatter.path_or_buf.getvalue()


def _doc_parms(cls):
"""Return a tuple of the doc parms."""
Expand Down
104 changes: 54 additions & 50 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from pandas.core.arrays import ExtensionArray
from pandas.core.dtypes.common import (
is_categorical_dtype,
is_string_like,
is_bool,
is_integer, is_integer_dtype,
is_float_dtype,
Expand Down Expand Up @@ -3760,56 +3761,59 @@ def from_csv(cls, path, sep=',', parse_dates=True, header=None,

return result

def to_csv(self, path=None, index=True, sep=",", na_rep='',
float_format=None, header=False, index_label=None,
mode='w', encoding=None, compression=None, date_format=None,
decimal='.'):
"""
Write Series to a comma-separated values (csv) file
Parameters
----------
path : string or file handle, default None
File path or object, if None is provided the result is returned as
a string.
na_rep : string, default ''
Missing data representation
float_format : string, default None
Format string for floating point numbers
header : boolean, default False
Write out series name
index : boolean, default True
Write row names (index)
index_label : string or sequence, default None
Column label for index column(s) if desired. If None is given, and
`header` and `index` are True, then the index names are used. A
sequence should be given if the DataFrame uses MultiIndex.
mode : Python write mode, default 'w'
sep : character, default ","
Field delimiter for the output file.
encoding : string, optional
a string representing the encoding to use if the contents are
non-ascii, for python versions prior to 3
compression : string, optional
A string representing the compression to use in the output file.
Allowed values are 'gzip', 'bz2', 'zip', 'xz'. This input is only
used when the first argument is a filename.
date_format: string, default None
Format string for datetime objects.
decimal: string, default '.'
Character recognized as decimal separator. E.g. use ',' for
European data
"""
from pandas.core.frame import DataFrame
df = DataFrame(self)
# result is only a string if no path provided, otherwise None
result = df.to_csv(path, index=index, sep=sep, na_rep=na_rep,
float_format=float_format, header=header,
index_label=index_label, mode=mode,
encoding=encoding, compression=compression,
date_format=date_format, decimal=decimal)
if path is None:
return result
def to_csv(self, *args, **kwargs):
"""
See DataFrame.to_csv()
# TODO
"""

names = ['path_or_buf', 'sep', 'na_rep', 'float_format', 'columns',
'header', 'index', 'index_label', 'mode', 'encoding',
'compression', 'quoting', 'quotechar', 'line_terminator',
'chunksize', 'tupleize_cols', 'date_format', 'doublequote',
'escapechar', 'decimal']

old_names = ['path_or_buf', 'index', 'sep', 'na_rep', 'float_format',
'header', 'index_label', 'mode', 'encoding',
'compression', 'date_format', 'decimal']

if 'path' in kwargs:
warnings.warn("Argument 'path' is now named 'path_or_buf'")
kwargs['path_or_buf'] = kwargs.pop('path')

if len(args) > 1:
# Either "index" (old signature) or "sep" (new signature) is being
# passed as second argument (while the first is the same)
maybe_sep = args[1]

if not (is_string_like(maybe_sep) and len(maybe_sep) == 1):
# old signature
warnings.warn("The signature of `Series.to_csv` was aligned "
"to that of `DataFrame.to_csv`. Note that the "
"order of arguments changed, and the new one "
"has 'sep' in first place, for which \"{}\" is "
"not a valid value. The old order will cease to "
"be supported in a future version. Please refer "
"to the documentation for `DataFrame.to_csv` "
"when updating your function "
"calls.".format(maybe_sep),
FutureWarning, stacklevel=2)
names = old_names

pos_args = dict(zip(names[:len(args)], args))

for key in pos_args:
if key in kwargs:
raise ValueError("Argument given by name ('{}') and position "
"({})".format(key, names.index(key)))
kwargs[key] = pos_args[key]

if kwargs.get('header', None) is None:
warnings.warn("Argument 'header' has changed default value to "
"True: please pass an explicit value to suppress "
"this warning")

return self.to_frame().to_csv(**kwargs)

@Appender(generic._shared_docs['to_excel'] % _shared_doc_kwargs)
def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
Expand Down
Loading

0 comments on commit 7d655b2

Please sign in to comment.