BUG: respect passed chunksize in read_csv when using get_chunk functi…

…on. close #3406
pandas-dev · Apr 22, 2013 · f24b923 · f24b923
1 parent 5adcceb
commit f24b923
Show file tree

Hide file tree

Showing 3 changed files with 24 additions and 5 deletions.
diff --git a/RELEASE.rst b/RELEASE.rst
@@ -311,6 +311,7 @@ pandas 0.11.0
     to non-fast apply) (GH3380_)
   - Eliminated unicode errors on FreeBSD when using MPL GTK backend (GH3360_)
   - Period.strftime should return unicode strings always (GH3363_)
+  - Respect passed read_* chunksize in get_chunk function (GH3406_)
 
 .. _GH3294: https://github.com/pydata/pandas/issues/3294
 .. _GH622: https://github.com/pydata/pandas/issues/622
@@ -425,6 +426,7 @@ pandas 0.11.0
 .. _GH3308: https://github.com/pydata/pandas/issues/3308
 .. _GH3311: https://github.com/pydata/pandas/issues/3311
 .. _GH3380: https://github.com/pydata/pandas/issues/3380
+.. _GH3406: https://github.com/pydata/pandas/issues/3406
 
 pandas 0.10.1
 =============

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -649,9 +649,10 @@ def read(self, nrows=None):
     def _create_index(self, col_dict, columns):
         pass
 
-    # backwards compatibility
-    get_chunk = read
-
+    def get_chunk(self, size=None):
+        if size is None:
+            size = self.chunksize
+        return self.read(nrows=size)
 
 def _is_index_col(col):
     return col is not None and col is not False
@@ -1285,7 +1286,10 @@ def read(self, rows=None):
         return index, columns, data
 
     # legacy
-    get_chunk = read
+    def get_chunk(self, size=None):
+        if size is None:
+            size = self.chunksize
+        return self.read(nrows=size)
 
     def _convert_data(self, data):
         # apply converters

diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
@@ -456,7 +456,9 @@ def test_malformed(self):
 2,3,4
 """
         try:
-            it = self.read_table(StringIO(data), sep=',', header=1, comment='#', iterator=True, chunksize=1, skiprows=[2])
+            it = self.read_table(StringIO(data), sep=',', header=1,
+                                 comment='#', iterator=True, chunksize=1,
+                                 skiprows=[2])
             df = it.read(1)
             it.read(2)
             self.assert_(False)
@@ -876,6 +878,17 @@ def test_read_chunksize_named(self):
         tm.assert_frame_equal(chunks[1], df[2:4])
         tm.assert_frame_equal(chunks[2], df[4:])
 
+    def test_get_chunk_passed_chunksize(self):
+        data = """A,B,C
+1,2,3
+4,5,6
+7,8,9
+1,2,3"""
+        result = self.read_csv(StringIO(data), chunksize=2)
+
+        piece = result.get_chunk()
+        self.assertEqual(len(piece), 2)
+
     def test_read_text_list(self):
         data = """A,B,C\nfoo,1,2,3\nbar,4,5,6"""
         as_list = [['A', 'B', 'C'], ['foo', '1', '2', '3'], ['bar',