Skip to content

Commit

Permalink
ENH: raise exc if find NA values when explicit integer dtype passed t…
Browse files Browse the repository at this point in the history
…o read_* functions. close #2631
  • Loading branch information
wesm committed Jan 20, 2013
1 parent dd439c9 commit 5da8df7
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 6 deletions.
3 changes: 3 additions & 0 deletions RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ pandas 0.10.1
- ``pivot_table`` aggfunc can be anything used in GroupBy.aggregate (GH2643_)
- Implement DataFrame merges in case where set cardinalities might overflow
64-bit integer (GH2690_)
- Raise exception in C file parser if integer dtype specified and have NA
values. (GH2631_)

**Bug fixes**

Expand Down Expand Up @@ -103,6 +105,7 @@ pandas 0.10.1
.. _GH2616: https://github.com/pydata/pandas/issues/2616
.. _GH2625: https://github.com/pydata/pandas/issues/2625
.. _GH2643: https://github.com/pydata/pandas/issues/2643
.. _GH2631: https://github.com/pydata/pandas/issues/2631
.. _GH2633: https://github.com/pydata/pandas/issues/2633
.. _GH2637: https://github.com/pydata/pandas/issues/2637
.. _GH2690: https://github.com/pydata/pandas/issues/2690
Expand Down
10 changes: 10 additions & 0 deletions pandas/io/tests/test_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2007,6 +2007,16 @@ def test_custom_lineterminator(self):
result = self.assertRaises(ValueError, read_csv, StringIO(data2),
lineterminator='~~')

def test_raise_on_passed_int_dtype_with_nas(self):
# #2631
data = """YEAR, DOY, a
2001,106380451,10
2001,,11
2001,106380451,67"""
self.assertRaises(Exception, read_csv, StringIO(data), sep=",",
skipinitialspace=True,
dtype={'DOY': np.int64})


class TestParseSQL(unittest.TestCase):

Expand Down
14 changes: 8 additions & 6 deletions pandas/src/parser.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -870,7 +870,7 @@ cdef class TextReader:
col_dtype = np.dtype(col_dtype).str

return self._convert_with_dtype(col_dtype, i, start, end,
na_filter, na_hashset)
na_filter, 1, na_hashset)

if i in self.noconvert:
return self._string_convert(i, start, end, na_filter, na_hashset)
Expand All @@ -879,10 +879,10 @@ cdef class TextReader:
for dt in dtype_cast_order:
try:
col_res, na_count = self._convert_with_dtype(
dt, i, start, end, na_filter, na_hashset)
dt, i, start, end, na_filter, 0, na_hashset)
except OverflowError:
col_res, na_count = self._convert_with_dtype(
'|O8', i, start, end, na_filter, na_hashset)
'|O8', i, start, end, na_filter, 0, na_hashset)

if col_res is not None:
break
Expand All @@ -891,14 +891,16 @@ cdef class TextReader:

cdef _convert_with_dtype(self, object dtype, Py_ssize_t i,
int start, int end,
bint na_filter, kh_str_t *na_hashset):
bint na_filter,
bint user_dtype,
kh_str_t *na_hashset):
cdef kh_str_t *true_set, *false_set

if dtype[1] == 'i' or dtype[1] == 'u':
result, na_count = _try_int64(self.parser, i, start, end,
na_filter, na_hashset)
# if na_count > 0:
# raise Exception('Integer column has NA values')
if user_dtype and na_count > 0:
raise Exception('Integer column has NA values')

if dtype[1:] != 'i8':
result = result.astype(dtype)
Expand Down

0 comments on commit 5da8df7

Please sign in to comment.