From 241bde1bcd4326f15b630d6b1da20b076dbbc178 Mon Sep 17 00:00:00 2001 From: Brett Naul Date: Fri, 12 Oct 2018 15:23:56 -0700 Subject: [PATCH] Support writing CSV to GCS (#22704) --- doc/source/whatsnew/v0.24.0.txt | 2 +- pandas/io/formats/csvs.py | 7 ++++--- pandas/tests/io/test_gcs.py | 15 +++++++++++++++ 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index a05ef67a7238f..5d7f45b92b75d 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -181,7 +181,7 @@ Other Enhancements - :func:`to_csv` now supports ``compression`` keyword when a file handle is passed. (:issue:`21227`) - :meth:`Index.droplevel` is now implemented also for flat indexes, for compatibility with :class:`MultiIndex` (:issue:`21115`) - :meth:`Series.droplevel` and :meth:`DataFrame.droplevel` are now implemented (:issue:`20342`) -- Added support for reading from Google Cloud Storage via the ``gcsfs`` library (:issue:`19454`) +- Added support for reading from/writing to Google Cloud Storage via the ``gcsfs`` library (:issue:`19454`, :issue:`23094`) - :func:`to_gbq` and :func:`read_gbq` signature and documentation updated to reflect changes from the `Pandas-GBQ library version 0.6.0 `__. diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index 9faac6cd09218..0344689183dbb 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -22,10 +22,9 @@ ABCMultiIndex, ABCPeriodIndex, ABCDatetimeIndex, ABCIndexClass) from pandas.io.common import ( - _expand_user, _get_handle, _infer_compression, - _stringify_path, + get_filepath_or_buffer, UnicodeWriter, ) @@ -45,7 +44,9 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', if path_or_buf is None: path_or_buf = StringIO() - self.path_or_buf = _expand_user(_stringify_path(path_or_buf)) + self.path_or_buf, _, _, _ = get_filepath_or_buffer( + path_or_buf, encoding=encoding, compression=compression, mode=mode + ) self.sep = sep self.na_rep = na_rep self.float_format = float_format diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py index 251c93df0733d..efbd57dec9f1b 100644 --- a/pandas/tests/io/test_gcs.py +++ b/pandas/tests/io/test_gcs.py @@ -26,6 +26,21 @@ def test_read_csv_gcs(mock): assert_frame_equal(df1, df2) +@td.skip_if_no('gcsfs') +def test_to_csv_gcs(mock): + df1 = DataFrame({'int': [1, 3], 'float': [2.0, np.nan], 'str': ['t', 's'], + 'dt': date_range('2018-06-18', periods=2)}) + with mock.patch('gcsfs.GCSFileSystem') as MockFileSystem: + s = StringIO() + instance = MockFileSystem.return_value + instance.open.return_value = s + + df1.to_csv('gs://test/test.csv', index=True) + df2 = read_csv(StringIO(s.getvalue()), parse_dates=['dt'], index_col=0) + + assert_frame_equal(df1, df2) + + @td.skip_if_no('gcsfs') def test_gcs_get_filepath_or_buffer(mock): df1 = DataFrame({'int': [1, 3], 'float': [2.0, np.nan], 'str': ['t', 's'],