Proof of concept for #19715 based on #21868

pandas-dev · Jul 13, 2018 · 1fa5123 · 1fa5123
1 parent 365eac4
commit 1fa5123
Show file tree

Hide file tree

Showing 4 changed files with 147 additions and 154 deletions.
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1710,103 +1710,6 @@ def to_panel(self):
 
         return self._constructor_expanddim(new_mgr)
 
-    def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
-               columns=None, header=True, index=True, index_label=None,
-               mode='w', encoding=None, compression=None, quoting=None,
-               quotechar='"', line_terminator='\n', chunksize=None,
-               tupleize_cols=None, date_format=None, doublequote=True,
-               escapechar=None, decimal='.'):
-        r"""Write DataFrame to a comma-separated values (csv) file
-
-        Parameters
-        ----------
-        path_or_buf : string or file handle, default None
-            File path or object, if None is provided the result is returned as
-            a string.
-        sep : character, default ','
-            Field delimiter for the output file.
-        na_rep : string, default ''
-            Missing data representation
-        float_format : string, default None
-            Format string for floating point numbers
-        columns : sequence, optional
-            Columns to write
-        header : boolean or list of string, default True
-            Write out the column names. If a list of strings is given it is
-            assumed to be aliases for the column names
-        index : boolean, default True
-            Write row names (index)
-        index_label : string or sequence, or False, default None
-            Column label for index column(s) if desired. If None is given, and
-            `header` and `index` are True, then the index names are used. A
-            sequence should be given if the DataFrame uses MultiIndex.  If
-            False do not print fields for index names. Use index_label=False
-            for easier importing in R
-        mode : str
-            Python write mode, default 'w'
-        encoding : string, optional
-            A string representing the encoding to use in the output file,
-            defaults to 'ascii' on Python 2 and 'utf-8' on Python 3.
-        compression : {'infer', 'gzip', 'bz2', 'xz', None}, default None
-            If 'infer' and `path_or_buf` is path-like, then detect compression
-            from the following extensions: '.gz', '.bz2' or '.xz'
-            (otherwise no compression).
-        line_terminator : string, default ``'\n'``
-            The newline character or character sequence to use in the output
-            file
-        quoting : optional constant from csv module
-            defaults to csv.QUOTE_MINIMAL. If you have set a `float_format`
-            then floats are converted to strings and thus csv.QUOTE_NONNUMERIC
-            will treat them as non-numeric
-        quotechar : string (length 1), default '\"'
-            character used to quote fields
-        doublequote : boolean, default True
-            Control quoting of `quotechar` inside a field
-        escapechar : string (length 1), default None
-            character used to escape `sep` and `quotechar` when appropriate
-        chunksize : int or None
-            rows to write at a time
-        tupleize_cols : boolean, default False
-            .. deprecated:: 0.21.0
-               This argument will be removed and will always write each row
-               of the multi-index as a separate row in the CSV file.
-
-            Write MultiIndex columns as a list of tuples (if True) or in
-            the new, expanded format, where each MultiIndex column is a row
-            in the CSV (if False).
-        date_format : string, default None
-            Format string for datetime objects
-        decimal: string, default '.'
-            Character recognized as decimal separator. E.g. use ',' for
-            European data
-
-        """
-
-        if tupleize_cols is not None:
-            warnings.warn("The 'tupleize_cols' parameter is deprecated and "
-                          "will be removed in a future version",
-                          FutureWarning, stacklevel=2)
-        else:
-            tupleize_cols = False
-
-        from pandas.io.formats.csvs import CSVFormatter
-        formatter = CSVFormatter(self, path_or_buf,
-                                 line_terminator=line_terminator, sep=sep,
-                                 encoding=encoding,
-                                 compression=compression, quoting=quoting,
-                                 na_rep=na_rep, float_format=float_format,
-                                 cols=columns, header=header, index=index,
-                                 index_label=index_label, mode=mode,
-                                 chunksize=chunksize, quotechar=quotechar,
-                                 tupleize_cols=tupleize_cols,
-                                 date_format=date_format,
-                                 doublequote=doublequote,
-                                 escapechar=escapechar, decimal=decimal)
-        formatter.save()
-
-        if path_or_buf is None:
-            return formatter.path_or_buf.getvalue()
-
     @Appender(_shared_docs['to_excel'] % _shared_doc_kwargs)
     def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
                  float_format=None, columns=None, header=True, index=True,

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -9161,6 +9161,103 @@ def first_valid_index(self):
     def last_valid_index(self):
         return self._find_valid_index('last')
 
+    def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
+               columns=None, header=True, index=True, index_label=None,
+               mode='w', encoding=None, compression=None, quoting=None,
+               quotechar='"', line_terminator='\n', chunksize=None,
+               tupleize_cols=None, date_format=None, doublequote=True,
+               escapechar=None, decimal='.'):
+        r"""Write DataFrame to a comma-separated values (csv) file
+
+        Parameters
+        ----------
+        path_or_buf : string or file handle, default None
+            File path or object, if None is provided the result is returned as
+            a string.
+        sep : character, default ','
+            Field delimiter for the output file.
+        na_rep : string, default ''
+            Missing data representation
+        float_format : string, default None
+            Format string for floating point numbers
+        columns : sequence, optional
+            Columns to write
+        header : boolean or list of string, default True
+            Write out the column names. If a list of strings is given it is
+            assumed to be aliases for the column names
+        index : boolean, default True
+            Write row names (index)
+        index_label : string or sequence, or False, default None
+            Column label for index column(s) if desired. If None is given, and
+            `header` and `index` are True, then the index names are used. A
+            sequence should be given if the DataFrame uses MultiIndex.  If
+            False do not print fields for index names. Use index_label=False
+            for easier importing in R
+        mode : str
+            Python write mode, default 'w'
+        encoding : string, optional
+            A string representing the encoding to use in the output file,
+            defaults to 'ascii' on Python 2 and 'utf-8' on Python 3.
+        compression : {'infer', 'gzip', 'bz2', 'xz', None}, default None
+            If 'infer' and `path_or_buf` is path-like, then detect compression
+            from the following extensions: '.gz', '.bz2' or '.xz'
+            (otherwise no compression).
+        line_terminator : string, default ``'\n'``
+            The newline character or character sequence to use in the output
+            file
+        quoting : optional constant from csv module
+            defaults to csv.QUOTE_MINIMAL. If you have set a `float_format`
+            then floats are converted to strings and thus csv.QUOTE_NONNUMERIC
+            will treat them as non-numeric
+        quotechar : string (length 1), default '\"'
+            character used to quote fields
+        doublequote : boolean, default True
+            Control quoting of `quotechar` inside a field
+        escapechar : string (length 1), default None
+            character used to escape `sep` and `quotechar` when appropriate
+        chunksize : int or None
+            rows to write at a time
+        tupleize_cols : boolean, default False
+            .. deprecated:: 0.21.0
+               This argument will be removed and will always write each row
+               of the multi-index as a separate row in the CSV file.
+
+            Write MultiIndex columns as a list of tuples (if True) or in
+            the new, expanded format, where each MultiIndex column is a row
+            in the CSV (if False).
+        date_format : string, default None
+            Format string for datetime objects
+        decimal: string, default '.'
+            Character recognized as decimal separator. E.g. use ',' for
+            European data
+
+        """
+
+        if tupleize_cols is not None:
+            warnings.warn("The 'tupleize_cols' parameter is deprecated and "
+                          "will be removed in a future version",
+                          FutureWarning, stacklevel=2)
+        else:
+            tupleize_cols = False
+
+        from pandas.io.formats.csvs import CSVFormatter
+        formatter = CSVFormatter(self, path_or_buf,
+                                 line_terminator=line_terminator, sep=sep,
+                                 encoding=encoding,
+                                 compression=compression, quoting=quoting,
+                                 na_rep=na_rep, float_format=float_format,
+                                 cols=columns, header=header, index=index,
+                                 index_label=index_label, mode=mode,
+                                 chunksize=chunksize, quotechar=quotechar,
+                                 tupleize_cols=tupleize_cols,
+                                 date_format=date_format,
+                                 doublequote=doublequote,
+                                 escapechar=escapechar, decimal=decimal)
+        formatter.save()
+
+        if path_or_buf is None:
+            return formatter.path_or_buf.getvalue()
+
 
 def _doc_parms(cls):
     """Return a tuple of the doc parms."""

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -17,6 +17,7 @@
 from pandas.core.arrays import ExtensionArray
 from pandas.core.dtypes.common import (
     is_categorical_dtype,
+    is_string_dtype,
     is_bool,
     is_integer, is_integer_dtype,
     is_float_dtype,
@@ -3760,56 +3761,48 @@ def from_csv(cls, path, sep=',', parse_dates=True, header=None,
 
         return result
 
-    def to_csv(self, path=None, index=True, sep=",", na_rep='',
-               float_format=None, header=False, index_label=None,
-               mode='w', encoding=None, compression=None, date_format=None,
-               decimal='.'):
+    def to_csv(self, *args, **kwargs):
         """
-        Write Series to a comma-separated values (csv) file
-
-        Parameters
-        ----------
-        path : string or file handle, default None
-            File path or object, if None is provided the result is returned as
-            a string.
-        na_rep : string, default ''
-            Missing data representation
-        float_format : string, default None
-            Format string for floating point numbers
-        header : boolean, default False
-            Write out series name
-        index : boolean, default True
-            Write row names (index)
-        index_label : string or sequence, default None
-            Column label for index column(s) if desired. If None is given, and
-            `header` and `index` are True, then the index names are used. A
-            sequence should be given if the DataFrame uses MultiIndex.
-        mode : Python write mode, default 'w'
-        sep : character, default ","
-            Field delimiter for the output file.
-        encoding : string, optional
-            a string representing the encoding to use if the contents are
-            non-ascii, for python versions prior to 3
-        compression : string, optional
-            A string representing the compression to use in the output file.
-            Allowed values are 'gzip', 'bz2', 'zip', 'xz'. This input is only
-            used when the first argument is a filename.
-        date_format: string, default None
-            Format string for datetime objects.
-        decimal: string, default '.'
-            Character recognized as decimal separator. E.g. use ',' for
-            European data
+        See DataFrame.to_csv()
+        # TODO
         """
-        from pandas.core.frame import DataFrame
-        df = DataFrame(self)
-        # result is only a string if no path provided, otherwise None
-        result = df.to_csv(path, index=index, sep=sep, na_rep=na_rep,
-                           float_format=float_format, header=header,
-                           index_label=index_label, mode=mode,
-                           encoding=encoding, compression=compression,
-                           date_format=date_format, decimal=decimal)
-        if path is None:
-            return result
+
+        names = ['path_or_buf', 'sep', 'na_rep', 'float_format', 'columns',
+                 'header', 'index', 'index_label', 'mode', 'encoding',
+                 'compression', 'quoting', 'quotechar', 'line_terminator',
+                 'chunksize', 'tupleize_cols', 'date_format', 'doublequote',
+                 'escapechar', 'decimal']
+
+        old_names = ['path_or_buf', 'index', 'sep', 'na_rep', 'float_format',
+                     'header', 'index_label', 'mode', 'encoding',
+                     'compression', 'date_format','decimal']
+
+        if 'path' in kwargs:
+            warnings.warn("Argument 'path' is now named 'path_or_buf'")
+            kwargs['path_or_buf'] = kwargs.pop('path')
+
+        if len(args) > 1:
+            # Either "index" (old signature) or "sep" (new signature) is being
+            # passed as second argument (while the first is the same)
+            maybe_sep = args[1]
+            if not (is_string_dtype(maybe_sep) and len(maybe_sep) == 1):
+                # old signature
+                names = old_names
+
+        pos_args = dict(zip(names[:len(args)], args))
+
+        for key in pos_args:
+            if key in kwargs:
+                raise ValueError("Argument {} was passed both as positional "
+                                 "and as keyword argument".format(key))
+            kwargs[key] = pos_args[key]
+
+        if kwargs.get('header', None) is None:
+            warnings.warn("Argument 'header' has changed default value to "
+                          "True: please pass an explicit value to suppress "
+                          "this warning")
+
+        return self.to_frame().to_csv(**kwargs)
 
     @Appender(generic._shared_docs['to_excel'] % _shared_doc_kwargs)
     def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',

diff --git a/pandas/tests/series/test_io.py b/pandas/tests/series/test_io.py
@@ -37,7 +37,7 @@ def read_csv(self, path, **kwargs):
     def test_from_csv_deprecation(self):
         # see gh-17812
         with ensure_clean() as path:
-            self.ts.to_csv(path)
+            self.ts.to_csv(path, header=False)
 
             with tm.assert_produces_warning(FutureWarning,
                                             check_stacklevel=False):
@@ -48,7 +48,7 @@ def test_from_csv_deprecation(self):
     def test_from_csv(self):
 
         with ensure_clean() as path:
-            self.ts.to_csv(path)
+            self.ts.to_csv(path, header=False)
             ts = self.read_csv(path)
             assert_series_equal(self.ts, ts, check_names=False)
 
@@ -65,7 +65,7 @@ def test_from_csv(self):
             ts_h = self.read_csv(path, header=0)
             assert ts_h.name == "ts"
 
-            self.series.to_csv(path)
+            self.series.to_csv(path, header=False)
             series = self.read_csv(path)
             assert_series_equal(self.series, series, check_names=False)
 
@@ -92,21 +92,21 @@ def test_to_csv(self):
         import io
 
         with ensure_clean() as path:
-            self.ts.to_csv(path)
+            self.ts.to_csv(path, header=False)
 
             with io.open(path, newline=None) as f:
                 lines = f.readlines()
             assert (lines[1] != '\n')
 
-            self.ts.to_csv(path, index=False)
+            self.ts.to_csv(path, index=False, header=False)
             arr = np.loadtxt(path)
             assert_almost_equal(arr, self.ts.values)
 
     def test_to_csv_unicode_index(self):
         buf = StringIO()
         s = Series([u("\u05d0"), "d2"], index=[u("\u05d0"), u("\u05d1")])
 
-        s.to_csv(buf, encoding="UTF-8")
+        s.to_csv(buf, encoding="UTF-8", header=False)
         buf.seek(0)
 
         s2 = self.read_csv(buf, index_col=0, encoding="UTF-8")
@@ -116,7 +116,7 @@ def test_to_csv_float_format(self):
 
         with ensure_clean() as filename:
             ser = Series([0.123456, 0.234567, 0.567567])
-            ser.to_csv(filename, float_format="%.2f")
+            ser.to_csv(filename, float_format="%.2f", header=False)
 
             rs = self.read_csv(filename)
             xp = Series([0.12, 0.23, 0.57])
@@ -128,14 +128,14 @@ def test_to_csv_list_entries(self):
         split = s.str.split(r'\s+and\s+')
 
         buf = StringIO()
-        split.to_csv(buf)
+        split.to_csv(buf, header=False)
 
     def test_to_csv_path_is_none(self):
         # GH 8215
         # Series.to_csv() was returning None, inconsistent with
         # DataFrame.to_csv() which returned string
         s = Series([1, 2, 3])
-        csv_str = s.to_csv(path=None)
+        csv_str = s.to_csv(path_or_buf=None, header=False)
         assert isinstance(csv_str, str)
 
     @pytest.mark.parametrize('s,encoding', [