Skip to content

Commit

Permalink
BUG: proper type inference with list of lists passed to DataFrame con…
Browse files Browse the repository at this point in the history
…structor, from_records type-handling fixes, GH #484
  • Loading branch information
wesm committed Dec 13, 2011
1 parent 32c5fe4 commit 21bad0f
Show file tree
Hide file tree
Showing 7 changed files with 50 additions and 30 deletions.
2 changes: 2 additions & 0 deletions RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ pandas 0.6.1
matrices (GH #189)
- Add `margins` option to `pivot_table` for computing subgroup aggregates (GH
#114)
- Add `Series.from_csv` function (PR #482)

**Improvements to existing features**

Expand Down Expand Up @@ -129,6 +130,7 @@ Thanks
- Chang She
- Ted Square
- Chris Uga
- Dieter Vandenbussche

pandas 0.6.0
============
Expand Down
39 changes: 23 additions & 16 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,8 +207,12 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
mgr = self._init_ndarray(data, index, columns, dtype=dtype,
copy=copy)
elif isinstance(data, list):
mgr = self._init_ndarray(data, index, columns, dtype=dtype,
copy=copy)
if isinstance(data[0], (list, tuple)):
data, columns = _list_to_sdict(data, columns)
mgr = self._init_dict(data, index, columns, dtype=dtype)
else:
mgr = self._init_ndarray(data, index, columns, dtype=dtype,
copy=copy)
else:
raise PandasError('DataFrame constructor not properly called!')

Expand Down Expand Up @@ -528,20 +532,7 @@ def from_records(cls, data, index=None, exclude=None, columns=None,
if isinstance(data, (np.ndarray, DataFrame, dict)):
columns, sdict = _rec_to_dict(data)
else:
if isinstance(data[0], tuple):
content = list(lib.to_object_array_tuples(data).T)
else:
# list of lists
content = list(lib.to_object_array(data).T)

if columns is None:
columns = range(len(content))
else:
assert(len(columns) == len(content))

sdict = dict((c, lib.maybe_convert_objects(vals))
for c, vals in zip(columns, content))
del content
sdict, columns = _list_to_sdict(data, columns)

if exclude is None:
exclude = set()
Expand Down Expand Up @@ -3547,6 +3538,22 @@ def _rec_to_dict(arr):

return columns, sdict

def _list_to_sdict(data, columns):
if isinstance(data[0], tuple):
content = list(lib.to_object_array_tuples(data).T)
else:
# list of lists
content = list(lib.to_object_array(data).T)

if columns is None:
columns = range(len(content))
else:
assert(len(columns) == len(content))

sdict = dict((c, lib.maybe_convert_objects(vals))
for c, vals in zip(columns, content))
return sdict, columns

def _homogenize(data, index, columns, dtype=None):
from pandas.core.series import _sanitize_array

Expand Down
26 changes: 14 additions & 12 deletions pandas/src/parsing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def maybe_convert_numeric(ndarray[object] values, set na_values):
for i from 0 <= i < n:
val = values[i]

if cpython.PyFloat_Check(val):
if util.is_float_object(val):
floats[i] = val
seen_float = 1
elif val in na_values:
Expand Down Expand Up @@ -144,18 +144,18 @@ def maybe_convert_objects(ndarray[object] objects):
seen_null = 1
objects[i] = onan
floats[i] = fnan
elif cpython.PyBool_Check(val):
elif util.is_bool_object(val):
seen_bool = 1
bools[i] = val
elif is_integer_object(val):
elif util.is_integer_object(val):
seen_int = 1
floats[i] = <float64_t> val
if not seen_null:
ints[i] = val
elif cpython.PyFloat_Check(val):
elif util.is_float_object(val):
floats[i] = val
seen_float = 1
elif not (cpython.PyString_Check(val) or cpython.PyUnicode_Check(val)):
elif not util.is_string_object(val):
# this will convert Decimal objects
try:
floats[i] = float(val)
Expand All @@ -173,14 +173,16 @@ def maybe_convert_objects(ndarray[object] objects):
else:
if seen_object:
return objects
elif seen_int:
return ints
elif seen_float:
return floats
elif seen_bool:
return bools.view(np.bool_)
elif not seen_bool:
if seen_float:
return floats
elif seen_int:
return ints
else:
return objects
if not seen_float and not seen_int:
return bools.view(np.bool_)

return objects

convert_sql_column = maybe_convert_objects

Expand Down
2 changes: 1 addition & 1 deletion pandas/src/tseries.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ cdef double_t *get_double_ptr(ndarray arr):

return <double_t *> arr.data

from util cimport is_integer_object
cimport util

cdef extern from "math.h":
double sqrt(double x)
Expand Down
2 changes: 2 additions & 0 deletions pandas/src/util.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ cimport numpy as cnp
cdef extern from "numpy_helper.h":
inline int is_integer_object(object)
inline int is_float_object(object)
inline int is_bool_object(object)
inline int is_string_object(object)
inline int assign_value_1d (ndarray, Py_ssize_t, object) except -1

cpdef inline object get_value_at(ndarray arr, object loc):
Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1222,6 +1222,13 @@ def test_constructor_more(self):
self.assertEqual(len(dm.columns), 2)
self.assert_(dm.values.dtype == np.float64)

def test_constructor_list_of_lists(self):
# GH #484
l = [[1, 'a'], [2, 'b']]
df = DataFrame(data=l, columns=["num", "str"])
self.assert_(com.is_integer_dtype(df['num']))
self.assert_(df['str'].dtype == np.object_)

def test_constructor_ragged(self):
data = {'A' : randn(10),
'B' : randn(8)}
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ def srcpath(name=None, suffix='.pyx', subdir='src'):
tseries_depends = [srcpath(f, suffix='.pyx')
for f in tseries_depends]
else:
tseries_depends = None
tseries_depends = []

tseries_ext = Extension('pandas._tseries',
depends=tseries_depends + ['pandas/src/numpy_helper.h'],
Expand Down

0 comments on commit 21bad0f

Please sign in to comment.