Skip to content

Commit

Permalink
ENH: add get_value, set_value to Panel per #437, more perf tweaks
Browse files Browse the repository at this point in the history
  • Loading branch information
wesm committed Dec 5, 2011
1 parent bc10c86 commit 6e84ba1
Show file tree
Hide file tree
Showing 8 changed files with 116 additions and 31 deletions.
34 changes: 34 additions & 0 deletions pandas/core/panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,40 @@ def _get_values(self):

values = property(fget=_get_values)

#----------------------------------------------------------------------
# Getting and setting elements

def get_value(self, item, major, minor):
"""
Quickly retrieve single value at (item, major, minor) location
Parameters
----------
item : item label (panel item)
major : major axis label (panel item row)
minor : minor axis label (panel item column)
Returns
-------
element : scalar value
"""
# hm, two layers to the onion
frame = self._get_item_cache(item)
return frame.get_value(major, minor)

def set_value(self, item, major, minor, value):
"""
Quickly set single value at (item, major, minor) location
Parameters
----------
index : row label
col : column label
value : scalar value
"""
frame = self._get_item_cache(item)
return frame.set_value(major, minor, value)

def _box_item_values(self, key, values):
return DataFrame(values, index=self.major_axis, columns=self.minor_axis)

Expand Down
27 changes: 20 additions & 7 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import pandas.core.datetools as datetools
import pandas.core.nanops as nanops
import pandas._tseries as lib
import pandas._engines as _gin

__all__ = ['Series', 'TimeSeries']

Expand Down Expand Up @@ -253,12 +254,11 @@ def __getitem__(self, key):
return self._multilevel_index(key)
else:
hash(key)
values = self.values
try:
return values[self.index.get_loc(key)]
return self.get_value(key)
except KeyError, e1:
try:
return values[key]
return _gin.get_value_at(self, key)
except Exception, _:
pass
raise e1
Expand Down Expand Up @@ -305,12 +305,25 @@ def get(self, label, default=None):
y : scalar
"""
try:
return self.index._engine.get_value(self, label)
return self.get_value(label)
except KeyError:
return default
get_value = get

def put_value(self, label, value):
def get_value(self, label):
"""
Quickly retrieve single value at passed index label
Parameters
----------
index : label
Returns
-------
value : scalar value
"""
return self.index._engine.get_value(self, label)

def set_value(self, label, value):
"""
Quickly set single value at passed label
Expand All @@ -323,7 +336,7 @@ def put_value(self, label, value):
-------
element : scalar value
"""
self.index._engine.put_value(self, label, value)
self.index._engine.set_value(self, label, value)

def _multilevel_index(self, key):
values = self.values
Expand Down
40 changes: 26 additions & 14 deletions pandas/src/engines.pyx
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from numpy cimport ndarray
cimport numpy as cnp

cdef extern from "numpy_helper.h":
inline int is_integer_object(object)
cnp.import_array()

cimport util

cdef class IndexEngine:

Expand All @@ -15,9 +16,7 @@ cdef class IndexEngine:
void* data_ptr

loc = self.get_loc(key)
assert(is_integer_object(loc))
data_ptr = cnp.PyArray_GETPTR1(arr, loc)
return cnp.PyArray_GETITEM(arr, data_ptr)
return get_value_at(arr, loc)

cpdef set_value(self, ndarray arr, object key, object value):
'''
Expand All @@ -28,21 +27,34 @@ cdef class IndexEngine:
void* data_ptr

loc = self.get_loc(key)
assert(is_integer_object(loc))
data_ptr = cnp.PyArray_GETPTR1(arr, loc)
cnp.PyArray_SETITEM(arr, data_ptr, value)
set_value_at(arr, loc, value)

cpdef get_value_at(ndarray arr, object loc):
assert(is_integer_object(loc))
data_ptr = cnp.PyArray_GETPTR1(arr, loc)
cpdef inline object get_value_at(ndarray arr, object loc):
cdef:
Py_ssize_t i
if util.is_float_object(loc):
casted = int(loc)
if casted == loc:
loc = casted
i = <Py_ssize_t> loc
if i < 0:
i += cnp.PyArray_SIZE(arr)
data_ptr = cnp.PyArray_GETPTR1(arr, i)
return cnp.PyArray_GETITEM(arr, data_ptr)

cpdef set_value_at(ndarray arr, object loc, object value):
assert(is_integer_object(loc))
cpdef inline set_value_at(ndarray arr, object loc, object value):
cdef:
Py_ssize_t i
if util.is_float_object(loc):
casted = int(loc)
if casted == loc:
loc = casted
i = <Py_ssize_t> loc
if i < 0:
i += cnp.PyArray_SIZE(arr)
data_ptr = cnp.PyArray_GETPTR1(arr, loc)
cnp.PyArray_SETITEM(arr, data_ptr, value)


cdef class DictIndexEngine(IndexEngine):
'''
For accelerating low-level internal details of indexing
Expand Down
6 changes: 6 additions & 0 deletions pandas/src/numpy_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,9 @@ inline int
is_integer_object(PyObject* obj) {
return PyArray_IsIntegerScalar(obj);
}

inline int
is_float_object(PyObject* obj) {
return (PyFloat_Check(obj)
|| PyObject_TypeCheck(obj, &PyFloatingArrType_Type));
}
4 changes: 2 additions & 2 deletions pandas/src/tseries.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,7 @@ cdef double_t *get_double_ptr(ndarray arr):

return <double_t *> arr.data

cdef extern from "numpy_helper.h":
inline int is_integer_object(object)
from util cimport is_integer_object

cdef extern from "math.h":
double sqrt(double x)
Expand Down Expand Up @@ -479,3 +478,4 @@ include "reindex.pyx"
include "generated.pyx"
include "parsing.pyx"
include "reduce.pyx"

14 changes: 9 additions & 5 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -666,16 +666,20 @@ def test_get_value(self):
for col in self.frame.columns:
result = self.frame.get_value(idx, col)
expected = self.frame[col][idx]
self.assertEqual(result, expected)

# partial w/ MultiIndex raise exception
index = MultiIndex.from_tuples([(0, 1), (0, 2), (1, 1), (1, 2)])
assert_almost_equal(result, expected)

def test_set_value(self):
for idx in self.frame.index:
for col in self.frame.columns:
self.frame.set_value(idx, col, 1)
self.assertEqual(self.frame[col][idx], 1)
assert_almost_equal(self.frame[col][idx], 1)

def test_get_set_value_no_partial_indexing(self):
# partial w/ MultiIndex raise exception
index = MultiIndex.from_tuples([(0, 1), (0, 2), (1, 1), (1, 2)])
df = DataFrame(index=index, columns=range(4))
self.assertRaises(KeyError, df.get_value, 0, 1)
self.assertRaises(KeyError, df.set_value, 0, 1, 0)

def test_single_element_ix_dont_upcast(self):
self.frame['E'] = 1
Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/test_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,6 +528,21 @@ def _check_view(self, indexer, comp):
self.assert_((obj.values == 0).all())
comp(cp.ix[indexer].reindex_like(obj), obj)

def test_get_value(self):
for item in self.panel.items:
for mjr in self.panel.major_axis[::2]:
for mnr in self.panel.minor_axis:
result = self.panel.get_value(item, mjr, mnr)
expected = self.panel[item][mnr][mjr]
assert_almost_equal(result, expected)

def test_set_value(self):
for item in self.panel.items:
for mjr in self.panel.major_axis[::2]:
for mnr in self.panel.minor_axis:
self.panel.set_value(item, mjr, mnr, 1.)
assert_almost_equal(self.panel[item][mnr][mjr], 1.)

class TestPanel(unittest.TestCase, PanelTests, CheckIndexing,
SafeForLongAndSparse,
SafeForSparse):
Expand Down
7 changes: 4 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ def srcpath(name=None, suffix='.pyx', subdir='src'):
tseries_depends = None

tseries_ext = Extension('pandas._tseries',
depends=tseries_depends,
depends=tseries_depends + ['pandas/src/numpy_helper.h'],
sources=[srcpath('tseries', suffix=suffix)],
include_dirs=[np.get_include()])

Expand All @@ -298,8 +298,9 @@ def srcpath(name=None, suffix='.pyx', subdir='src'):
include_dirs=[np.get_include()])

engines_ext = Extension('pandas._engines',
sources=[srcpath('engines', suffix=suffix)],
include_dirs=[np.get_include()])
depends=['pandas/src/numpy_helper.h'],
sources=[srcpath('engines', suffix=suffix)],
include_dirs=[np.get_include()])

sandbox_ext = Extension('pandas._sandbox',
sources=[srcpath('sandbox', suffix=suffix)],
Expand Down

0 comments on commit 6e84ba1

Please sign in to comment.