From f24b923099a543eeefb2e4894215bd356410d9d4 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Mon, 22 Apr 2013 16:52:30 -0700 Subject: [PATCH] BUG: respect passed chunksize in read_csv when using get_chunk function. close #3406 --- RELEASE.rst | 2 ++ pandas/io/parsers.py | 12 ++++++++---- pandas/io/tests/test_parsers.py | 15 ++++++++++++++- 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/RELEASE.rst b/RELEASE.rst index 2008f42a5b3dc..e2f8db17a6682 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -311,6 +311,7 @@ pandas 0.11.0 to non-fast apply) (GH3380_) - Eliminated unicode errors on FreeBSD when using MPL GTK backend (GH3360_) - Period.strftime should return unicode strings always (GH3363_) + - Respect passed read_* chunksize in get_chunk function (GH3406_) .. _GH3294: https://github.com/pydata/pandas/issues/3294 .. _GH622: https://github.com/pydata/pandas/issues/622 @@ -425,6 +426,7 @@ pandas 0.11.0 .. _GH3308: https://github.com/pydata/pandas/issues/3308 .. _GH3311: https://github.com/pydata/pandas/issues/3311 .. _GH3380: https://github.com/pydata/pandas/issues/3380 +.. _GH3406: https://github.com/pydata/pandas/issues/3406 pandas 0.10.1 ============= diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 7a05529342d87..60798bacbc144 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -649,9 +649,10 @@ def read(self, nrows=None): def _create_index(self, col_dict, columns): pass - # backwards compatibility - get_chunk = read - + def get_chunk(self, size=None): + if size is None: + size = self.chunksize + return self.read(nrows=size) def _is_index_col(col): return col is not None and col is not False @@ -1285,7 +1286,10 @@ def read(self, rows=None): return index, columns, data # legacy - get_chunk = read + def get_chunk(self, size=None): + if size is None: + size = self.chunksize + return self.read(nrows=size) def _convert_data(self, data): # apply converters diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py index d57b7f41b62fc..aa3fce3959860 100644 --- a/pandas/io/tests/test_parsers.py +++ b/pandas/io/tests/test_parsers.py @@ -456,7 +456,9 @@ def test_malformed(self): 2,3,4 """ try: - it = self.read_table(StringIO(data), sep=',', header=1, comment='#', iterator=True, chunksize=1, skiprows=[2]) + it = self.read_table(StringIO(data), sep=',', header=1, + comment='#', iterator=True, chunksize=1, + skiprows=[2]) df = it.read(1) it.read(2) self.assert_(False) @@ -876,6 +878,17 @@ def test_read_chunksize_named(self): tm.assert_frame_equal(chunks[1], df[2:4]) tm.assert_frame_equal(chunks[2], df[4:]) + def test_get_chunk_passed_chunksize(self): + data = """A,B,C +1,2,3 +4,5,6 +7,8,9 +1,2,3""" + result = self.read_csv(StringIO(data), chunksize=2) + + piece = result.get_chunk() + self.assertEqual(len(piece), 2) + def test_read_text_list(self): data = """A,B,C\nfoo,1,2,3\nbar,4,5,6""" as_list = [['A', 'B', 'C'], ['foo', '1', '2', '3'], ['bar',