Skip to content

Commit

Permalink
BUG: raise OverflowError on integer values exceeding int64 precision …
Browse files Browse the repository at this point in the history
…in parsers. close #2247
  • Loading branch information
wesm committed Nov 28, 2012
1 parent 6b5be05 commit 8316a1e
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 3 deletions.
1 change: 1 addition & 0 deletions RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ pandas 0.10.0
- Enable joins between MultiIndex and regular Index (#2024)
- Fix time zone metadata issue when unioning non-overlapping DatetimeIndex
objects (#2367)
- Raise/handle int64 overflows in parsers (#2247)

pandas 0.9.1
============
Expand Down
17 changes: 17 additions & 0 deletions pandas/io/tests/test_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@
parse_date_time, parse_date_fields, parse_all_fields
)

from pandas._parser import OverflowError

def _skip_if_no_xlrd():
try:
import xlrd
Expand Down Expand Up @@ -1898,6 +1900,21 @@ def test_disable_bool_parsing(self):
result = read_csv(StringIO(data), dtype=object, na_filter=False)
self.assertEquals(result['B'][2], '')

def test_int64_overflow(self):
data = """ID
00013007854817840016671868
00013007854817840016749251
00013007854817840016754630
00013007854817840016781876
00013007854817840017028824
00013007854817840017963235
00013007854817840018860166"""

result = read_csv(StringIO(data))
self.assertTrue(result['ID'].dtype == object)

self.assertRaises(OverflowError, read_csv, StringIO(data),
dtype='i8')

class TestParseSQL(unittest.TestCase):

Expand Down
23 changes: 20 additions & 3 deletions pandas/src/parser.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ cdef extern from "parser/tokenizer.h":
EAT_COMMENT
FINISHED

enum: ERROR_OVERFLOW

ctypedef void* (*io_callback)(void *src, size_t nbytes, size_t *bytes_read,
int *status)
ctypedef int (*io_cleanup)(void *src)
Expand Down Expand Up @@ -840,9 +842,13 @@ cdef class TextReader:
else:
col_res = None
for dt in dtype_cast_order:
col_res, na_count = self._convert_with_dtype(dt, i, start,
end, na_filter,
na_hashset)
try:
col_res, na_count = self._convert_with_dtype(
dt, i, start, end, na_filter, na_hashset)
except OverflowError:
col_res, na_count = self._convert_with_dtype(
'|O8', i, start, end, na_filter, na_hashset)

if col_res is not None:
break

Expand Down Expand Up @@ -966,6 +972,11 @@ cdef class TextReader:
class CParserError(Exception):
pass


class OverflowError(ValueError):
pass


def _ensure_encoded(list lst):
cdef list result = []
for x in lst:
Expand Down Expand Up @@ -1251,6 +1262,7 @@ cdef _try_double(parser_t *parser, int col, int line_start, int line_end,

return result, na_count


cdef _try_int64(parser_t *parser, int col, int line_start, int line_end,
bint na_filter, kh_str_t *na_hashset):
cdef:
Expand Down Expand Up @@ -1283,13 +1295,18 @@ cdef _try_int64(parser_t *parser, int col, int line_start, int line_end,
data[i] = str_to_int64(word, INT64_MIN, INT64_MAX,
&error, parser.thousands)
if error != 0:
if error == ERROR_OVERFLOW:
raise OverflowError(word)

return None, None
else:
for i in range(lines):
word = COLITER_NEXT(it)
data[i] = str_to_int64(word, INT64_MIN, INT64_MAX,
&error, parser.thousands)
if error != 0:
if error == ERROR_OVERFLOW:
raise OverflowError(word)
return None, None

return result, na_count
Expand Down

0 comments on commit 8316a1e

Please sign in to comment.