diff --git a/RELEASE.rst b/RELEASE.rst index 6721205846cd6..3a4d45a26efc0 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -58,6 +58,7 @@ pandas 0.6.1 matrices (GH #189) - Add `margins` option to `pivot_table` for computing subgroup aggregates (GH #114) + - Add `Series.from_csv` function (PR #482) **Improvements to existing features** @@ -129,6 +130,7 @@ Thanks - Chang She - Ted Square - Chris Uga +- Dieter Vandenbussche pandas 0.6.0 ============ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d71ea7ec764b3..f9199a81134c6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -207,8 +207,12 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, mgr = self._init_ndarray(data, index, columns, dtype=dtype, copy=copy) elif isinstance(data, list): - mgr = self._init_ndarray(data, index, columns, dtype=dtype, - copy=copy) + if isinstance(data[0], (list, tuple)): + data, columns = _list_to_sdict(data, columns) + mgr = self._init_dict(data, index, columns, dtype=dtype) + else: + mgr = self._init_ndarray(data, index, columns, dtype=dtype, + copy=copy) else: raise PandasError('DataFrame constructor not properly called!') @@ -528,20 +532,7 @@ def from_records(cls, data, index=None, exclude=None, columns=None, if isinstance(data, (np.ndarray, DataFrame, dict)): columns, sdict = _rec_to_dict(data) else: - if isinstance(data[0], tuple): - content = list(lib.to_object_array_tuples(data).T) - else: - # list of lists - content = list(lib.to_object_array(data).T) - - if columns is None: - columns = range(len(content)) - else: - assert(len(columns) == len(content)) - - sdict = dict((c, lib.maybe_convert_objects(vals)) - for c, vals in zip(columns, content)) - del content + sdict, columns = _list_to_sdict(data, columns) if exclude is None: exclude = set() @@ -3547,6 +3538,22 @@ def _rec_to_dict(arr): return columns, sdict +def _list_to_sdict(data, columns): + if isinstance(data[0], tuple): + content = list(lib.to_object_array_tuples(data).T) + else: + # list of lists + content = list(lib.to_object_array(data).T) + + if columns is None: + columns = range(len(content)) + else: + assert(len(columns) == len(content)) + + sdict = dict((c, lib.maybe_convert_objects(vals)) + for c, vals in zip(columns, content)) + return sdict, columns + def _homogenize(data, index, columns, dtype=None): from pandas.core.series import _sanitize_array diff --git a/pandas/src/parsing.pyx b/pandas/src/parsing.pyx index 1438308e24405..bf1fa458cbbaa 100644 --- a/pandas/src/parsing.pyx +++ b/pandas/src/parsing.pyx @@ -85,7 +85,7 @@ def maybe_convert_numeric(ndarray[object] values, set na_values): for i from 0 <= i < n: val = values[i] - if cpython.PyFloat_Check(val): + if util.is_float_object(val): floats[i] = val seen_float = 1 elif val in na_values: @@ -144,18 +144,18 @@ def maybe_convert_objects(ndarray[object] objects): seen_null = 1 objects[i] = onan floats[i] = fnan - elif cpython.PyBool_Check(val): + elif util.is_bool_object(val): seen_bool = 1 bools[i] = val - elif is_integer_object(val): + elif util.is_integer_object(val): seen_int = 1 floats[i] = val if not seen_null: ints[i] = val - elif cpython.PyFloat_Check(val): + elif util.is_float_object(val): floats[i] = val seen_float = 1 - elif not (cpython.PyString_Check(val) or cpython.PyUnicode_Check(val)): + elif not util.is_string_object(val): # this will convert Decimal objects try: floats[i] = float(val) @@ -173,14 +173,16 @@ def maybe_convert_objects(ndarray[object] objects): else: if seen_object: return objects - elif seen_int: - return ints - elif seen_float: - return floats - elif seen_bool: - return bools.view(np.bool_) + elif not seen_bool: + if seen_float: + return floats + elif seen_int: + return ints else: - return objects + if not seen_float and not seen_int: + return bools.view(np.bool_) + + return objects convert_sql_column = maybe_convert_objects diff --git a/pandas/src/tseries.pyx b/pandas/src/tseries.pyx index 91811dd9acf9e..2b563612903d6 100644 --- a/pandas/src/tseries.pyx +++ b/pandas/src/tseries.pyx @@ -49,7 +49,7 @@ cdef double_t *get_double_ptr(ndarray arr): return arr.data -from util cimport is_integer_object +cimport util cdef extern from "math.h": double sqrt(double x) diff --git a/pandas/src/util.pxd b/pandas/src/util.pxd index 404c996358a29..f3cd5102cc4d6 100644 --- a/pandas/src/util.pxd +++ b/pandas/src/util.pxd @@ -4,6 +4,8 @@ cimport numpy as cnp cdef extern from "numpy_helper.h": inline int is_integer_object(object) inline int is_float_object(object) + inline int is_bool_object(object) + inline int is_string_object(object) inline int assign_value_1d (ndarray, Py_ssize_t, object) except -1 cpdef inline object get_value_at(ndarray arr, object loc): diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 7d680d4fdf94e..b071d44b727c3 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -1222,6 +1222,13 @@ def test_constructor_more(self): self.assertEqual(len(dm.columns), 2) self.assert_(dm.values.dtype == np.float64) + def test_constructor_list_of_lists(self): + # GH #484 + l = [[1, 'a'], [2, 'b']] + df = DataFrame(data=l, columns=["num", "str"]) + self.assert_(com.is_integer_dtype(df['num'])) + self.assert_(df['str'].dtype == np.object_) + def test_constructor_ragged(self): data = {'A' : randn(10), 'B' : randn(8)} diff --git a/setup.py b/setup.py index 945961fce3b49..5049aa91ddfff 100755 --- a/setup.py +++ b/setup.py @@ -286,7 +286,7 @@ def srcpath(name=None, suffix='.pyx', subdir='src'): tseries_depends = [srcpath(f, suffix='.pyx') for f in tseries_depends] else: - tseries_depends = None + tseries_depends = [] tseries_ext = Extension('pandas._tseries', depends=tseries_depends + ['pandas/src/numpy_helper.h'],