Skip to content

Commit

Permalink
BUG: respect passed chunksize in read_csv when using get_chunk functi…
Browse files Browse the repository at this point in the history
…on. close #3406
  • Loading branch information
wesm committed Apr 22, 2013
1 parent 5adcceb commit f24b923
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 5 deletions.
2 changes: 2 additions & 0 deletions RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,7 @@ pandas 0.11.0
to non-fast apply) (GH3380_)
- Eliminated unicode errors on FreeBSD when using MPL GTK backend (GH3360_)
- Period.strftime should return unicode strings always (GH3363_)
- Respect passed read_* chunksize in get_chunk function (GH3406_)

.. _GH3294: https://github.com/pydata/pandas/issues/3294
.. _GH622: https://github.com/pydata/pandas/issues/622
Expand Down Expand Up @@ -425,6 +426,7 @@ pandas 0.11.0
.. _GH3308: https://github.com/pydata/pandas/issues/3308
.. _GH3311: https://github.com/pydata/pandas/issues/3311
.. _GH3380: https://github.com/pydata/pandas/issues/3380
.. _GH3406: https://github.com/pydata/pandas/issues/3406

pandas 0.10.1
=============
Expand Down
12 changes: 8 additions & 4 deletions pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,9 +649,10 @@ def read(self, nrows=None):
def _create_index(self, col_dict, columns):
pass

# backwards compatibility
get_chunk = read

def get_chunk(self, size=None):
if size is None:
size = self.chunksize
return self.read(nrows=size)

def _is_index_col(col):
return col is not None and col is not False
Expand Down Expand Up @@ -1285,7 +1286,10 @@ def read(self, rows=None):
return index, columns, data

# legacy
get_chunk = read
def get_chunk(self, size=None):
if size is None:
size = self.chunksize
return self.read(nrows=size)

def _convert_data(self, data):
# apply converters
Expand Down
15 changes: 14 additions & 1 deletion pandas/io/tests/test_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,9 @@ def test_malformed(self):
2,3,4
"""
try:
it = self.read_table(StringIO(data), sep=',', header=1, comment='#', iterator=True, chunksize=1, skiprows=[2])
it = self.read_table(StringIO(data), sep=',', header=1,
comment='#', iterator=True, chunksize=1,
skiprows=[2])
df = it.read(1)
it.read(2)
self.assert_(False)
Expand Down Expand Up @@ -876,6 +878,17 @@ def test_read_chunksize_named(self):
tm.assert_frame_equal(chunks[1], df[2:4])
tm.assert_frame_equal(chunks[2], df[4:])

def test_get_chunk_passed_chunksize(self):
data = """A,B,C
1,2,3
4,5,6
7,8,9
1,2,3"""
result = self.read_csv(StringIO(data), chunksize=2)

piece = result.get_chunk()
self.assertEqual(len(piece), 2)

def test_read_text_list(self):
data = """A,B,C\nfoo,1,2,3\nbar,4,5,6"""
as_list = [['A', 'B', 'C'], ['foo', '1', '2', '3'], ['bar',
Expand Down

0 comments on commit f24b923

Please sign in to comment.