Skip to content

Commit

Permalink
ENH: refactoring and micro-optimizations to support #437, uncovered p…
Browse files Browse the repository at this point in the history
…anel bug
  • Loading branch information
wesm committed Dec 5, 2011
1 parent 44f689a commit 768af08
Show file tree
Hide file tree
Showing 10 changed files with 222 additions and 110 deletions.
89 changes: 22 additions & 67 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from pandas.core.common import (isnull, notnull, PandasError, _try_sort,
_default_index, _stringify, _maybe_upcast)
from pandas.core.daterange import DateRange
from pandas.core.generic import NDFrame
from pandas.core.generic import NDFrame, AxisProperty
from pandas.core.index import Index, MultiIndex, NULL_INDEX, _ensure_index
from pandas.core.indexing import _NDFrameIndexer, _maybe_droplevels
from pandas.core.internals import BlockManager, make_block, form_blocks
Expand Down Expand Up @@ -209,8 +209,7 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
else:
raise PandasError('DataFrame constructor not properly called!')

self._data = mgr
self._series_cache = {}
NDFrame.__init__(self, mgr)

def _init_dict(self, data, index, columns, dtype=None):
"""
Expand Down Expand Up @@ -790,28 +789,9 @@ def get_dtype_counts(self):
#----------------------------------------------------------------------
# properties for index and columns

def _get_columns(self):
return self._data.axes[0]

def _set_columns(self, value):
self._data.set_axis(0, value)
self._clear_caches()
columns = property(fset=_set_columns, fget=_get_columns)

def _get_index(self):
return self._data.axes[1]

def _set_index(self, value):
self._data.set_axis(1, value)
self._clear_caches()
index = property(fset=_set_index, fget=_get_index)

def _clear_caches(self):
self._series_cache.clear()

def _consolidate_inplace(self):
self._clear_caches()
NDFrame._consolidate_inplace(self)
columns = AxisProperty(0)
index = AxisProperty(1)

def as_matrix(self, columns=None):
"""
Expand Down Expand Up @@ -859,7 +839,9 @@ def __setstate__(self, state):
else: # pragma: no cover
# old pickling format, for compatibility
self._unpickle_matrix_compat(state)
self._series_cache = {}

# ordinarily created in NDFrame
self._item_cache = {}

# legacy pickle formats
def _unpickle_frame_compat(self, state): # pragma: no cover
Expand Down Expand Up @@ -919,13 +901,11 @@ def get_value(self, index, col):
-------
element : scalar value
"""
iloc = self.index.get_loc(index)
vals = self._getitem_single(col).values
result = vals[iloc]
assert(not lib.is_array(result)) # a little faster than isinstance
return result
series = self._get_item_cache(col)
engine = self.index._engine
return engine.get_value(series, index)

def put_value(self, index, col, value):
def set_value(self, index, col, value):
"""
Put single value at passed column and index
Expand All @@ -935,9 +915,9 @@ def put_value(self, index, col, value):
col : column label
value : scalar value
"""
iloc = self.index.get_loc(index)
vals = self._getitem_single(col).values
vals[iloc] = value
series = self._get_item_cache(col)
engine = self.index._engine
return engine.set_value(series, index, value)

def __getitem__(self, key):
# slice rows
Expand All @@ -956,7 +936,7 @@ def __getitem__(self, key):
elif isinstance(self.columns, MultiIndex):
return self._getitem_multilevel(key)
else:
return self._getitem_single(key)
return self._get_item_cache(key)

def _getitem_array(self, key):
if key.dtype == np.bool_:
Expand Down Expand Up @@ -996,17 +976,10 @@ def _getitem_multilevel(self, key):
columns=result_columns)
return result
else:
return self._getitem_single(key)
return self._get_item_cache(key)

def _getitem_single(self, key):
cache = self._series_cache
try:
return cache[key]
except:
values = self._data.get(key)
res = Series(values, index=self.index, name=key)
cache[key] = res
return res
def _box_item_values(self, key, values):
return Series(values, index=self.index, name=key)

def __getattr__(self, name):
"""After regular attribute access, try looking up the name of a column.
Expand Down Expand Up @@ -1072,12 +1045,7 @@ def _set_item(self, key, value):
"""
value = self._sanitize_column(value)
value = np.atleast_2d(value)
self._data.set(key, value)

try:
del self._series_cache[key]
except KeyError:
pass
NDFrame._set_item(self, key, value)

def _sanitize_column(self, value):
# Need to make sure new columns (which go into the BlockManager as new
Expand All @@ -1103,17 +1071,6 @@ def _sanitize_column(self, value):

return value

def __delitem__(self, key):
"""
Delete column from DataFrame
"""
self._data.delete(key)

try:
del self._series_cache[key]
except KeyError:
pass

def pop(self, item):
"""
Return column and drop from frame. Raise KeyError if not found.
Expand All @@ -1122,9 +1079,7 @@ def pop(self, item):
-------
column : Series
"""
result = self[item]
del self[item]
return result
return NDFrame.pop(self, item)

# to support old APIs
@property
Expand Down Expand Up @@ -1716,11 +1671,11 @@ def rename(self, index=None, columns=None, copy=True):

def _rename_index_inplace(self, mapper):
self._data = self._data.rename_axis(mapper, axis=1)
self._clear_caches()
self._clear_item_cache()

def _rename_columns_inplace(self, mapper):
self._data = self._data.rename_items(mapper, copydata=False)
self._clear_caches()
self._clear_item_cache()

#----------------------------------------------------------------------
# Arithmetic / combination related
Expand Down
55 changes: 53 additions & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@ def __get__(self, obj, type=None):
return data.axes[self.axis]

def __set__(self, obj, value):
data = getattr(obj, '_data')
data.set_axis(self.axis, value)
obj._set_axis(self.axis, value)

class PandasObject(Picklable):

Expand Down Expand Up @@ -226,6 +225,7 @@ def __init__(self, data, axes=None, copy=False, dtype=None):
data = data.astype(dtype)

self._data = data
self._item_cache = {}

def astype(self, dtype):
"""
Expand Down Expand Up @@ -260,10 +260,61 @@ def values(self):
def ndim(self):
return self._data.ndim

def _set_axis(self, axis, labels):
self._data.set_axis(axis, labels)
self._clear_item_cache()

def __getitem__(self, item):
return self._get_item_cache(item)

def _get_item_cache(self, item):
cache = self._item_cache
try:
return cache[item]
except Exception:
values = self._data.get(item)
res = self._box_item_values(item, values)
cache[item] = res
return res

def _box_item_values(self, key, values):
raise NotImplementedError

def _clear_item_cache(self):
self._item_cache.clear()

def _set_item(self, key, value):
self._data.set(key, value)

try:
del self._item_cache[key]
except KeyError:
pass

def __delitem__(self, key):
"""
Delete item
"""
self._data.delete(key)

try:
del self._item_cache[key]
except KeyError:
pass

def pop(self, item):
"""
Return item and drop from frame. Raise KeyError if not found.
"""
result = self[item]
del self[item]
return result

#----------------------------------------------------------------------
# Consolidation of internals

def _consolidate_inplace(self):
self._clear_item_cache()
self._data = self._data.consolidate()

def consolidate(self):
Expand Down
14 changes: 14 additions & 0 deletions pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,20 @@ def get_loc(self, key):
"""
return self._engine.get_loc(key)

def get_value(self, arr, key):
"""
Fast lookup of value from 1-dimensional ndarray. Only use this if you
know what you're doing
"""
return self._engine.get_value(arr, key)

def put_value(self, arr, key, value):
"""
Fast lookup of value from 1-dimensional ndarray. Only use this if you
know what you're doing
"""
self._engine.put_value(arr, key, value)

def get_indexer(self, target, method=None):
"""
Compute indexer and mask for new index given the current index. The
Expand Down
19 changes: 7 additions & 12 deletions pandas/core/panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ def __init__(self, data=None, items=None, major_axis=None, minor_axis=None,
else: # pragma: no cover
raise PandasError('Panel constructor not properly called!')

self._data = mgr
NDFrame.__init__(self, mgr)

def _init_dict(self, data, axes, dtype=None):
items, major, minor = axes
Expand Down Expand Up @@ -447,9 +447,8 @@ def _get_values(self):

values = property(fget=_get_values)

def __getitem__(self, key):
mat = self._data.get(key)
return DataFrame(mat, index=self.major_axis, columns=self.minor_axis)
def _box_item_values(self, key, values):
return DataFrame(values, index=self.major_axis, columns=self.minor_axis)

def _slice(self, slobj, axis=0):
new_data = self._data.get_slice(slobj, axis=axis)
Expand All @@ -476,12 +475,9 @@ def __setitem__(self, key, value):
mat.fill(value)

mat = mat.reshape((1, N, K))
self._data.set(key, mat)
NDFrame._set_item(self, key, mat)

def __delitem__(self, key):
self._data.delete(key)

def pop(self, key):
def pop(self, item):
"""
Return item slice from panel and delete from panel
Expand All @@ -494,9 +490,7 @@ def pop(self, key):
-------
y : DataFrame
"""
result = self[key]
del self[key]
return result
return NDFrame.pop(self, item)

def __getstate__(self):
"Returned pickled representation of the panel"
Expand All @@ -510,6 +504,7 @@ def __setstate__(self, state):
self._unpickle_panel_compat(state)
else: # pragma: no cover
raise ValueError('unrecognized pickle')
self._item_cache = {}

def _unpickle_panel_compat(self, state): # pragma: no cover
"Unpickle the panel"
Expand Down
Loading

0 comments on commit 768af08

Please sign in to comment.