From 517590738b70cd2af778cd142d0a0334cdd2827d Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Fri, 13 Jul 2018 20:13:34 +0200 Subject: [PATCH] API: Proof of concept for #19715 based on #21868 closes #19715 --- pandas/core/frame.py | 97 ---------------------------------- pandas/core/generic.py | 97 ++++++++++++++++++++++++++++++++++ pandas/core/series.py | 89 ++++++++++++++----------------- pandas/tests/series/test_io.py | 18 +++---- 4 files changed, 147 insertions(+), 154 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 63809443380101..f0aa00163c9027 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1710,103 +1710,6 @@ def to_panel(self): return self._constructor_expanddim(new_mgr) - def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, - columns=None, header=True, index=True, index_label=None, - mode='w', encoding=None, compression=None, quoting=None, - quotechar='"', line_terminator='\n', chunksize=None, - tupleize_cols=None, date_format=None, doublequote=True, - escapechar=None, decimal='.'): - r"""Write DataFrame to a comma-separated values (csv) file - - Parameters - ---------- - path_or_buf : string or file handle, default None - File path or object, if None is provided the result is returned as - a string. - sep : character, default ',' - Field delimiter for the output file. - na_rep : string, default '' - Missing data representation - float_format : string, default None - Format string for floating point numbers - columns : sequence, optional - Columns to write - header : boolean or list of string, default True - Write out the column names. If a list of strings is given it is - assumed to be aliases for the column names - index : boolean, default True - Write row names (index) - index_label : string or sequence, or False, default None - Column label for index column(s) if desired. If None is given, and - `header` and `index` are True, then the index names are used. A - sequence should be given if the DataFrame uses MultiIndex. If - False do not print fields for index names. Use index_label=False - for easier importing in R - mode : str - Python write mode, default 'w' - encoding : string, optional - A string representing the encoding to use in the output file, - defaults to 'ascii' on Python 2 and 'utf-8' on Python 3. - compression : {'infer', 'gzip', 'bz2', 'xz', None}, default None - If 'infer' and `path_or_buf` is path-like, then detect compression - from the following extensions: '.gz', '.bz2' or '.xz' - (otherwise no compression). - line_terminator : string, default ``'\n'`` - The newline character or character sequence to use in the output - file - quoting : optional constant from csv module - defaults to csv.QUOTE_MINIMAL. If you have set a `float_format` - then floats are converted to strings and thus csv.QUOTE_NONNUMERIC - will treat them as non-numeric - quotechar : string (length 1), default '\"' - character used to quote fields - doublequote : boolean, default True - Control quoting of `quotechar` inside a field - escapechar : string (length 1), default None - character used to escape `sep` and `quotechar` when appropriate - chunksize : int or None - rows to write at a time - tupleize_cols : boolean, default False - .. deprecated:: 0.21.0 - This argument will be removed and will always write each row - of the multi-index as a separate row in the CSV file. - - Write MultiIndex columns as a list of tuples (if True) or in - the new, expanded format, where each MultiIndex column is a row - in the CSV (if False). - date_format : string, default None - Format string for datetime objects - decimal: string, default '.' - Character recognized as decimal separator. E.g. use ',' for - European data - - """ - - if tupleize_cols is not None: - warnings.warn("The 'tupleize_cols' parameter is deprecated and " - "will be removed in a future version", - FutureWarning, stacklevel=2) - else: - tupleize_cols = False - - from pandas.io.formats.csvs import CSVFormatter - formatter = CSVFormatter(self, path_or_buf, - line_terminator=line_terminator, sep=sep, - encoding=encoding, - compression=compression, quoting=quoting, - na_rep=na_rep, float_format=float_format, - cols=columns, header=header, index=index, - index_label=index_label, mode=mode, - chunksize=chunksize, quotechar=quotechar, - tupleize_cols=tupleize_cols, - date_format=date_format, - doublequote=doublequote, - escapechar=escapechar, decimal=decimal) - formatter.save() - - if path_or_buf is None: - return formatter.path_or_buf.getvalue() - @Appender(_shared_docs['to_excel'] % _shared_doc_kwargs) def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='', float_format=None, columns=None, header=True, index=True, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8da678e0adec08..be012262b854fe 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9161,6 +9161,103 @@ def first_valid_index(self): def last_valid_index(self): return self._find_valid_index('last') + def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, + columns=None, header=True, index=True, index_label=None, + mode='w', encoding=None, compression=None, quoting=None, + quotechar='"', line_terminator='\n', chunksize=None, + tupleize_cols=None, date_format=None, doublequote=True, + escapechar=None, decimal='.'): + r"""Write DataFrame to a comma-separated values (csv) file + + Parameters + ---------- + path_or_buf : string or file handle, default None + File path or object, if None is provided the result is returned as + a string. + sep : character, default ',' + Field delimiter for the output file. + na_rep : string, default '' + Missing data representation + float_format : string, default None + Format string for floating point numbers + columns : sequence, optional + Columns to write + header : boolean or list of string, default True + Write out the column names. If a list of strings is given it is + assumed to be aliases for the column names + index : boolean, default True + Write row names (index) + index_label : string or sequence, or False, default None + Column label for index column(s) if desired. If None is given, and + `header` and `index` are True, then the index names are used. A + sequence should be given if the DataFrame uses MultiIndex. If + False do not print fields for index names. Use index_label=False + for easier importing in R + mode : str + Python write mode, default 'w' + encoding : string, optional + A string representing the encoding to use in the output file, + defaults to 'ascii' on Python 2 and 'utf-8' on Python 3. + compression : {'infer', 'gzip', 'bz2', 'xz', None}, default None + If 'infer' and `path_or_buf` is path-like, then detect compression + from the following extensions: '.gz', '.bz2' or '.xz' + (otherwise no compression). + line_terminator : string, default ``'\n'`` + The newline character or character sequence to use in the output + file + quoting : optional constant from csv module + defaults to csv.QUOTE_MINIMAL. If you have set a `float_format` + then floats are converted to strings and thus csv.QUOTE_NONNUMERIC + will treat them as non-numeric + quotechar : string (length 1), default '\"' + character used to quote fields + doublequote : boolean, default True + Control quoting of `quotechar` inside a field + escapechar : string (length 1), default None + character used to escape `sep` and `quotechar` when appropriate + chunksize : int or None + rows to write at a time + tupleize_cols : boolean, default False + .. deprecated:: 0.21.0 + This argument will be removed and will always write each row + of the multi-index as a separate row in the CSV file. + + Write MultiIndex columns as a list of tuples (if True) or in + the new, expanded format, where each MultiIndex column is a row + in the CSV (if False). + date_format : string, default None + Format string for datetime objects + decimal: string, default '.' + Character recognized as decimal separator. E.g. use ',' for + European data + + """ + + if tupleize_cols is not None: + warnings.warn("The 'tupleize_cols' parameter is deprecated and " + "will be removed in a future version", + FutureWarning, stacklevel=2) + else: + tupleize_cols = False + + from pandas.io.formats.csvs import CSVFormatter + formatter = CSVFormatter(self, path_or_buf, + line_terminator=line_terminator, sep=sep, + encoding=encoding, + compression=compression, quoting=quoting, + na_rep=na_rep, float_format=float_format, + cols=columns, header=header, index=index, + index_label=index_label, mode=mode, + chunksize=chunksize, quotechar=quotechar, + tupleize_cols=tupleize_cols, + date_format=date_format, + doublequote=doublequote, + escapechar=escapechar, decimal=decimal) + formatter.save() + + if path_or_buf is None: + return formatter.path_or_buf.getvalue() + def _doc_parms(cls): """Return a tuple of the doc parms.""" diff --git a/pandas/core/series.py b/pandas/core/series.py index 0bdb9d9cc23a62..1eb2289b054be0 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -17,6 +17,7 @@ from pandas.core.arrays import ExtensionArray from pandas.core.dtypes.common import ( is_categorical_dtype, + is_string_dtype, is_bool, is_integer, is_integer_dtype, is_float_dtype, @@ -3760,56 +3761,48 @@ def from_csv(cls, path, sep=',', parse_dates=True, header=None, return result - def to_csv(self, path=None, index=True, sep=",", na_rep='', - float_format=None, header=False, index_label=None, - mode='w', encoding=None, compression=None, date_format=None, - decimal='.'): + def to_csv(self, *args, **kwargs): """ - Write Series to a comma-separated values (csv) file - - Parameters - ---------- - path : string or file handle, default None - File path or object, if None is provided the result is returned as - a string. - na_rep : string, default '' - Missing data representation - float_format : string, default None - Format string for floating point numbers - header : boolean, default False - Write out series name - index : boolean, default True - Write row names (index) - index_label : string or sequence, default None - Column label for index column(s) if desired. If None is given, and - `header` and `index` are True, then the index names are used. A - sequence should be given if the DataFrame uses MultiIndex. - mode : Python write mode, default 'w' - sep : character, default "," - Field delimiter for the output file. - encoding : string, optional - a string representing the encoding to use if the contents are - non-ascii, for python versions prior to 3 - compression : string, optional - A string representing the compression to use in the output file. - Allowed values are 'gzip', 'bz2', 'zip', 'xz'. This input is only - used when the first argument is a filename. - date_format: string, default None - Format string for datetime objects. - decimal: string, default '.' - Character recognized as decimal separator. E.g. use ',' for - European data + See DataFrame.to_csv() + # TODO """ - from pandas.core.frame import DataFrame - df = DataFrame(self) - # result is only a string if no path provided, otherwise None - result = df.to_csv(path, index=index, sep=sep, na_rep=na_rep, - float_format=float_format, header=header, - index_label=index_label, mode=mode, - encoding=encoding, compression=compression, - date_format=date_format, decimal=decimal) - if path is None: - return result + + names = ['path_or_buf', 'sep', 'na_rep', 'float_format', 'columns', + 'header', 'index', 'index_label', 'mode', 'encoding', + 'compression', 'quoting', 'quotechar', 'line_terminator', + 'chunksize', 'tupleize_cols', 'date_format', 'doublequote', + 'escapechar', 'decimal'] + + old_names = ['path_or_buf', 'index', 'sep', 'na_rep', 'float_format', + 'header', 'index_label', 'mode', 'encoding', + 'compression', 'date_format','decimal'] + + if 'path' in kwargs: + warnings.warn("Argument 'path' is now named 'path_or_buf'") + kwargs['path_or_buf'] = kwargs.pop('path') + + if len(args) > 1: + # Either "index" (old signature) or "sep" (new signature) is being + # passed as second argument (while the first is the same) + maybe_sep = args[1] + if not (is_string_dtype(maybe_sep) and len(maybe_sep) == 1): + # old signature + names = old_names + + pos_args = dict(zip(names[:len(args)], args)) + + for key in pos_args: + if key in kwargs: + raise ValueError("Argument given by name ('{}') and position " + "({})".format(key, names.index(key))) + kwargs[key] = pos_args[key] + + if kwargs.get('header', None) is None: + warnings.warn("Argument 'header' has changed default value to " + "True: please pass an explicit value to suppress " + "this warning") + + return self.to_frame().to_csv(**kwargs) @Appender(generic._shared_docs['to_excel'] % _shared_doc_kwargs) def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='', diff --git a/pandas/tests/series/test_io.py b/pandas/tests/series/test_io.py index 814d794d45c184..770072726a7b23 100644 --- a/pandas/tests/series/test_io.py +++ b/pandas/tests/series/test_io.py @@ -37,7 +37,7 @@ def read_csv(self, path, **kwargs): def test_from_csv_deprecation(self): # see gh-17812 with ensure_clean() as path: - self.ts.to_csv(path) + self.ts.to_csv(path, header=False) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): @@ -48,7 +48,7 @@ def test_from_csv_deprecation(self): def test_from_csv(self): with ensure_clean() as path: - self.ts.to_csv(path) + self.ts.to_csv(path, header=False) ts = self.read_csv(path) assert_series_equal(self.ts, ts, check_names=False) @@ -65,7 +65,7 @@ def test_from_csv(self): ts_h = self.read_csv(path, header=0) assert ts_h.name == "ts" - self.series.to_csv(path) + self.series.to_csv(path, header=False) series = self.read_csv(path) assert_series_equal(self.series, series, check_names=False) @@ -92,13 +92,13 @@ def test_to_csv(self): import io with ensure_clean() as path: - self.ts.to_csv(path) + self.ts.to_csv(path, header=False) with io.open(path, newline=None) as f: lines = f.readlines() assert (lines[1] != '\n') - self.ts.to_csv(path, index=False) + self.ts.to_csv(path, index=False, header=False) arr = np.loadtxt(path) assert_almost_equal(arr, self.ts.values) @@ -106,7 +106,7 @@ def test_to_csv_unicode_index(self): buf = StringIO() s = Series([u("\u05d0"), "d2"], index=[u("\u05d0"), u("\u05d1")]) - s.to_csv(buf, encoding="UTF-8") + s.to_csv(buf, encoding="UTF-8", header=False) buf.seek(0) s2 = self.read_csv(buf, index_col=0, encoding="UTF-8") @@ -116,7 +116,7 @@ def test_to_csv_float_format(self): with ensure_clean() as filename: ser = Series([0.123456, 0.234567, 0.567567]) - ser.to_csv(filename, float_format="%.2f") + ser.to_csv(filename, float_format="%.2f", header=False) rs = self.read_csv(filename) xp = Series([0.12, 0.23, 0.57]) @@ -128,14 +128,14 @@ def test_to_csv_list_entries(self): split = s.str.split(r'\s+and\s+') buf = StringIO() - split.to_csv(buf) + split.to_csv(buf, header=False) def test_to_csv_path_is_none(self): # GH 8215 # Series.to_csv() was returning None, inconsistent with # DataFrame.to_csv() which returned string s = Series([1, 2, 3]) - csv_str = s.to_csv(path=None) + csv_str = s.to_csv(path_or_buf=None, header=False) assert isinstance(csv_str, str) @pytest.mark.parametrize('s,encoding', [