BUG: don't segfault in tokenizer cleanup if initializing file reader …

…failed. close #2474
pandas-dev · Dec 10, 2012 · dd7218c · dd7218c
1 parent 223a1fd
commit dd7218c
Show file tree

Hide file tree

Showing 3 changed files with 10 additions and 0 deletions.
diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
@@ -1828,6 +1828,9 @@ def test_decompression(self):
 
             result = self.read_csv('__tmp__', compression='bz2')
             tm.assert_frame_equal(result, expected)
+
+            self.assertRaises(ValueError, self.read_csv,
+                              '__tmp__', compression='bz3')
         finally:
             try:
                 os.remove('__tmp__')

diff --git a/pandas/src/parser.pyx b/pandas/src/parser.pyx
@@ -463,6 +463,9 @@ cdef class TextReader:
             int status
             void *ptr
 
+        self.parser.cb_io = NULL
+        self.parser.cb_cleanup = NULL
+
         if isinstance(source, basestring) and self.compression:
             if self.compression == 'gzip':
                 import gzip

diff --git a/pandas/src/parser/tokenizer.c b/pandas/src/parser/tokenizer.c
@@ -179,6 +179,10 @@ int parser_clear_data_buffers(parser_t *self) {
 }
 
 int parser_cleanup(parser_t *self) {
+    if (self->cb_cleanup == NULL) {
+        return 0;
+    }
+
     if (self->cb_cleanup(self->source) < 0) {
         return -1;
     }