Skip to content

Commit

Permalink
Proof of concept for #19715 based on #21868
Browse files Browse the repository at this point in the history
  • Loading branch information
toobaz committed Jul 13, 2018
1 parent 365eac4 commit 1fa5123
Show file tree
Hide file tree
Showing 4 changed files with 147 additions and 154 deletions.
97 changes: 0 additions & 97 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1710,103 +1710,6 @@ def to_panel(self):

return self._constructor_expanddim(new_mgr)

def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
columns=None, header=True, index=True, index_label=None,
mode='w', encoding=None, compression=None, quoting=None,
quotechar='"', line_terminator='\n', chunksize=None,
tupleize_cols=None, date_format=None, doublequote=True,
escapechar=None, decimal='.'):
r"""Write DataFrame to a comma-separated values (csv) file
Parameters
----------
path_or_buf : string or file handle, default None
File path or object, if None is provided the result is returned as
a string.
sep : character, default ','
Field delimiter for the output file.
na_rep : string, default ''
Missing data representation
float_format : string, default None
Format string for floating point numbers
columns : sequence, optional
Columns to write
header : boolean or list of string, default True
Write out the column names. If a list of strings is given it is
assumed to be aliases for the column names
index : boolean, default True
Write row names (index)
index_label : string or sequence, or False, default None
Column label for index column(s) if desired. If None is given, and
`header` and `index` are True, then the index names are used. A
sequence should be given if the DataFrame uses MultiIndex. If
False do not print fields for index names. Use index_label=False
for easier importing in R
mode : str
Python write mode, default 'w'
encoding : string, optional
A string representing the encoding to use in the output file,
defaults to 'ascii' on Python 2 and 'utf-8' on Python 3.
compression : {'infer', 'gzip', 'bz2', 'xz', None}, default None
If 'infer' and `path_or_buf` is path-like, then detect compression
from the following extensions: '.gz', '.bz2' or '.xz'
(otherwise no compression).
line_terminator : string, default ``'\n'``
The newline character or character sequence to use in the output
file
quoting : optional constant from csv module
defaults to csv.QUOTE_MINIMAL. If you have set a `float_format`
then floats are converted to strings and thus csv.QUOTE_NONNUMERIC
will treat them as non-numeric
quotechar : string (length 1), default '\"'
character used to quote fields
doublequote : boolean, default True
Control quoting of `quotechar` inside a field
escapechar : string (length 1), default None
character used to escape `sep` and `quotechar` when appropriate
chunksize : int or None
rows to write at a time
tupleize_cols : boolean, default False
.. deprecated:: 0.21.0
This argument will be removed and will always write each row
of the multi-index as a separate row in the CSV file.
Write MultiIndex columns as a list of tuples (if True) or in
the new, expanded format, where each MultiIndex column is a row
in the CSV (if False).
date_format : string, default None
Format string for datetime objects
decimal: string, default '.'
Character recognized as decimal separator. E.g. use ',' for
European data
"""

if tupleize_cols is not None:
warnings.warn("The 'tupleize_cols' parameter is deprecated and "
"will be removed in a future version",
FutureWarning, stacklevel=2)
else:
tupleize_cols = False

from pandas.io.formats.csvs import CSVFormatter
formatter = CSVFormatter(self, path_or_buf,
line_terminator=line_terminator, sep=sep,
encoding=encoding,
compression=compression, quoting=quoting,
na_rep=na_rep, float_format=float_format,
cols=columns, header=header, index=index,
index_label=index_label, mode=mode,
chunksize=chunksize, quotechar=quotechar,
tupleize_cols=tupleize_cols,
date_format=date_format,
doublequote=doublequote,
escapechar=escapechar, decimal=decimal)
formatter.save()

if path_or_buf is None:
return formatter.path_or_buf.getvalue()

@Appender(_shared_docs['to_excel'] % _shared_doc_kwargs)
def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
float_format=None, columns=None, header=True, index=True,
Expand Down
97 changes: 97 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -9161,6 +9161,103 @@ def first_valid_index(self):
def last_valid_index(self):
return self._find_valid_index('last')

def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
columns=None, header=True, index=True, index_label=None,
mode='w', encoding=None, compression=None, quoting=None,
quotechar='"', line_terminator='\n', chunksize=None,
tupleize_cols=None, date_format=None, doublequote=True,
escapechar=None, decimal='.'):
r"""Write DataFrame to a comma-separated values (csv) file
Parameters
----------
path_or_buf : string or file handle, default None
File path or object, if None is provided the result is returned as
a string.
sep : character, default ','
Field delimiter for the output file.
na_rep : string, default ''
Missing data representation
float_format : string, default None
Format string for floating point numbers
columns : sequence, optional
Columns to write
header : boolean or list of string, default True
Write out the column names. If a list of strings is given it is
assumed to be aliases for the column names
index : boolean, default True
Write row names (index)
index_label : string or sequence, or False, default None
Column label for index column(s) if desired. If None is given, and
`header` and `index` are True, then the index names are used. A
sequence should be given if the DataFrame uses MultiIndex. If
False do not print fields for index names. Use index_label=False
for easier importing in R
mode : str
Python write mode, default 'w'
encoding : string, optional
A string representing the encoding to use in the output file,
defaults to 'ascii' on Python 2 and 'utf-8' on Python 3.
compression : {'infer', 'gzip', 'bz2', 'xz', None}, default None
If 'infer' and `path_or_buf` is path-like, then detect compression
from the following extensions: '.gz', '.bz2' or '.xz'
(otherwise no compression).
line_terminator : string, default ``'\n'``
The newline character or character sequence to use in the output
file
quoting : optional constant from csv module
defaults to csv.QUOTE_MINIMAL. If you have set a `float_format`
then floats are converted to strings and thus csv.QUOTE_NONNUMERIC
will treat them as non-numeric
quotechar : string (length 1), default '\"'
character used to quote fields
doublequote : boolean, default True
Control quoting of `quotechar` inside a field
escapechar : string (length 1), default None
character used to escape `sep` and `quotechar` when appropriate
chunksize : int or None
rows to write at a time
tupleize_cols : boolean, default False
.. deprecated:: 0.21.0
This argument will be removed and will always write each row
of the multi-index as a separate row in the CSV file.
Write MultiIndex columns as a list of tuples (if True) or in
the new, expanded format, where each MultiIndex column is a row
in the CSV (if False).
date_format : string, default None
Format string for datetime objects
decimal: string, default '.'
Character recognized as decimal separator. E.g. use ',' for
European data
"""

if tupleize_cols is not None:
warnings.warn("The 'tupleize_cols' parameter is deprecated and "
"will be removed in a future version",
FutureWarning, stacklevel=2)
else:
tupleize_cols = False

from pandas.io.formats.csvs import CSVFormatter
formatter = CSVFormatter(self, path_or_buf,
line_terminator=line_terminator, sep=sep,
encoding=encoding,
compression=compression, quoting=quoting,
na_rep=na_rep, float_format=float_format,
cols=columns, header=header, index=index,
index_label=index_label, mode=mode,
chunksize=chunksize, quotechar=quotechar,
tupleize_cols=tupleize_cols,
date_format=date_format,
doublequote=doublequote,
escapechar=escapechar, decimal=decimal)
formatter.save()

if path_or_buf is None:
return formatter.path_or_buf.getvalue()


def _doc_parms(cls):
"""Return a tuple of the doc parms."""
Expand Down
89 changes: 41 additions & 48 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from pandas.core.arrays import ExtensionArray
from pandas.core.dtypes.common import (
is_categorical_dtype,
is_string_dtype,
is_bool,
is_integer, is_integer_dtype,
is_float_dtype,
Expand Down Expand Up @@ -3760,56 +3761,48 @@ def from_csv(cls, path, sep=',', parse_dates=True, header=None,

return result

def to_csv(self, path=None, index=True, sep=",", na_rep='',
float_format=None, header=False, index_label=None,
mode='w', encoding=None, compression=None, date_format=None,
decimal='.'):
def to_csv(self, *args, **kwargs):
"""
Write Series to a comma-separated values (csv) file
Parameters
----------
path : string or file handle, default None
File path or object, if None is provided the result is returned as
a string.
na_rep : string, default ''
Missing data representation
float_format : string, default None
Format string for floating point numbers
header : boolean, default False
Write out series name
index : boolean, default True
Write row names (index)
index_label : string or sequence, default None
Column label for index column(s) if desired. If None is given, and
`header` and `index` are True, then the index names are used. A
sequence should be given if the DataFrame uses MultiIndex.
mode : Python write mode, default 'w'
sep : character, default ","
Field delimiter for the output file.
encoding : string, optional
a string representing the encoding to use if the contents are
non-ascii, for python versions prior to 3
compression : string, optional
A string representing the compression to use in the output file.
Allowed values are 'gzip', 'bz2', 'zip', 'xz'. This input is only
used when the first argument is a filename.
date_format: string, default None
Format string for datetime objects.
decimal: string, default '.'
Character recognized as decimal separator. E.g. use ',' for
European data
See DataFrame.to_csv()
# TODO
"""
from pandas.core.frame import DataFrame
df = DataFrame(self)
# result is only a string if no path provided, otherwise None
result = df.to_csv(path, index=index, sep=sep, na_rep=na_rep,
float_format=float_format, header=header,
index_label=index_label, mode=mode,
encoding=encoding, compression=compression,
date_format=date_format, decimal=decimal)
if path is None:
return result

names = ['path_or_buf', 'sep', 'na_rep', 'float_format', 'columns',
'header', 'index', 'index_label', 'mode', 'encoding',
'compression', 'quoting', 'quotechar', 'line_terminator',
'chunksize', 'tupleize_cols', 'date_format', 'doublequote',
'escapechar', 'decimal']

old_names = ['path_or_buf', 'index', 'sep', 'na_rep', 'float_format',
'header', 'index_label', 'mode', 'encoding',
'compression', 'date_format','decimal']

if 'path' in kwargs:
warnings.warn("Argument 'path' is now named 'path_or_buf'")
kwargs['path_or_buf'] = kwargs.pop('path')

if len(args) > 1:
# Either "index" (old signature) or "sep" (new signature) is being
# passed as second argument (while the first is the same)
maybe_sep = args[1]
if not (is_string_dtype(maybe_sep) and len(maybe_sep) == 1):
# old signature
names = old_names

pos_args = dict(zip(names[:len(args)], args))

for key in pos_args:
if key in kwargs:
raise ValueError("Argument {} was passed both as positional "
"and as keyword argument".format(key))
kwargs[key] = pos_args[key]

if kwargs.get('header', None) is None:
warnings.warn("Argument 'header' has changed default value to "
"True: please pass an explicit value to suppress "
"this warning")

return self.to_frame().to_csv(**kwargs)

@Appender(generic._shared_docs['to_excel'] % _shared_doc_kwargs)
def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
Expand Down
18 changes: 9 additions & 9 deletions pandas/tests/series/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def read_csv(self, path, **kwargs):
def test_from_csv_deprecation(self):
# see gh-17812
with ensure_clean() as path:
self.ts.to_csv(path)
self.ts.to_csv(path, header=False)

with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
Expand All @@ -48,7 +48,7 @@ def test_from_csv_deprecation(self):
def test_from_csv(self):

with ensure_clean() as path:
self.ts.to_csv(path)
self.ts.to_csv(path, header=False)
ts = self.read_csv(path)
assert_series_equal(self.ts, ts, check_names=False)

Expand All @@ -65,7 +65,7 @@ def test_from_csv(self):
ts_h = self.read_csv(path, header=0)
assert ts_h.name == "ts"

self.series.to_csv(path)
self.series.to_csv(path, header=False)
series = self.read_csv(path)
assert_series_equal(self.series, series, check_names=False)

Expand All @@ -92,21 +92,21 @@ def test_to_csv(self):
import io

with ensure_clean() as path:
self.ts.to_csv(path)
self.ts.to_csv(path, header=False)

with io.open(path, newline=None) as f:
lines = f.readlines()
assert (lines[1] != '\n')

self.ts.to_csv(path, index=False)
self.ts.to_csv(path, index=False, header=False)
arr = np.loadtxt(path)
assert_almost_equal(arr, self.ts.values)

def test_to_csv_unicode_index(self):
buf = StringIO()
s = Series([u("\u05d0"), "d2"], index=[u("\u05d0"), u("\u05d1")])

s.to_csv(buf, encoding="UTF-8")
s.to_csv(buf, encoding="UTF-8", header=False)
buf.seek(0)

s2 = self.read_csv(buf, index_col=0, encoding="UTF-8")
Expand All @@ -116,7 +116,7 @@ def test_to_csv_float_format(self):

with ensure_clean() as filename:
ser = Series([0.123456, 0.234567, 0.567567])
ser.to_csv(filename, float_format="%.2f")
ser.to_csv(filename, float_format="%.2f", header=False)

rs = self.read_csv(filename)
xp = Series([0.12, 0.23, 0.57])
Expand All @@ -128,14 +128,14 @@ def test_to_csv_list_entries(self):
split = s.str.split(r'\s+and\s+')

buf = StringIO()
split.to_csv(buf)
split.to_csv(buf, header=False)

def test_to_csv_path_is_none(self):
# GH 8215
# Series.to_csv() was returning None, inconsistent with
# DataFrame.to_csv() which returned string
s = Series([1, 2, 3])
csv_str = s.to_csv(path=None)
csv_str = s.to_csv(path_or_buf=None, header=False)
assert isinstance(csv_str, str)

@pytest.mark.parametrize('s,encoding', [
Expand Down

0 comments on commit 1fa5123

Please sign in to comment.