From f3fa93bb9701925916352a52f865d17be0f38f86 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 24 Jul 2016 13:50:26 -0400 Subject: [PATCH] CLN: Removed SparsePanel --- bench/bench_sparse.py | 92 ----- doc/source/sparse.rst | 21 +- doc/source/whatsnew/v0.19.0.txt | 2 + pandas/api/tests/test_api.py | 2 +- pandas/core/panel.py | 22 +- pandas/core/sparse.py | 1 - pandas/io/packers.py | 14 +- pandas/io/pytables.py | 37 +- pandas/io/tests/test_packers.py | 20 -- pandas/io/tests/test_pytables.py | 17 - pandas/sparse/api.py | 1 - pandas/sparse/panel.py | 563 ------------------------------ pandas/sparse/tests/test_panel.py | 279 --------------- pandas/stats/plm.py | 4 +- pandas/tests/test_panel.py | 54 +-- pandas/util/testing.py | 16 - 16 files changed, 33 insertions(+), 1112 deletions(-) delete mode 100644 bench/bench_sparse.py delete mode 100644 pandas/sparse/panel.py delete mode 100644 pandas/sparse/tests/test_panel.py diff --git a/bench/bench_sparse.py b/bench/bench_sparse.py deleted file mode 100644 index 0aa705118d970..0000000000000 --- a/bench/bench_sparse.py +++ /dev/null @@ -1,92 +0,0 @@ -import numpy as np - -from pandas import * -import pandas.core.sparse as spm -import pandas.compat as compat -reload(spm) -from pandas.core.sparse import * - -N = 10000. - -arr1 = np.arange(N) -index = Index(np.arange(N)) - -off = N // 10 -arr1[off: 2 * off] = np.NaN -arr1[4 * off: 5 * off] = np.NaN -arr1[8 * off: 9 * off] = np.NaN - -arr2 = np.arange(N) -arr2[3 * off // 2: 2 * off + off // 2] = np.NaN -arr2[8 * off + off // 2: 9 * off + off // 2] = np.NaN - -s1 = SparseSeries(arr1, index=index) -s2 = SparseSeries(arr2, index=index) - -is1 = SparseSeries(arr1, kind='integer', index=index) -is2 = SparseSeries(arr2, kind='integer', index=index) - -s1_dense = s1.to_dense() -s2_dense = s2.to_dense() - -if compat.is_platform_linux(): - pth = '/home/wesm/code/pandas/example' -else: - pth = '/Users/wesm/code/pandas/example' - -dm = DataFrame.load(pth) - -sdf = dm.to_sparse() - - -def new_data_like(sdf): - new_data = {} - for col, series in compat.iteritems(sdf): - new_data[col] = SparseSeries(np.random.randn(len(series.sp_values)), - index=sdf.index, - sparse_index=series.sp_index, - fill_value=series.fill_value) - - return SparseDataFrame(new_data) - -# data = {} -# for col, ser in dm.iteritems(): -# data[col] = SparseSeries(ser) - -dwp = Panel.fromDict({'foo': dm}) -# sdf = SparseDataFrame(data) - - -lp = stack_sparse_frame(sdf) - - -swp = SparsePanel({'A': sdf}) -swp = SparsePanel({'A': sdf, - 'B': sdf, - 'C': sdf, - 'D': sdf}) - -y = sdf -x = SparsePanel({'x1': sdf + new_data_like(sdf) / 10, - 'x2': sdf + new_data_like(sdf) / 10}) - -dense_y = sdf -dense_x = x.to_dense() - -# import hotshot, hotshot.stats -# prof = hotshot.Profile('test.prof') - -# benchtime, stones = prof.runcall(ols, y=y, x=x) - -# prof.close() - -# stats = hotshot.stats.load('test.prof') - -dense_model = ols(y=dense_y, x=dense_x) - -import pandas.stats.plm as plm -import pandas.stats.interface as face -reload(plm) -reload(face) - -# model = face.ols(y=y, x=x) diff --git a/doc/source/sparse.rst b/doc/source/sparse.rst index 257fb2909d42c..478f90576e168 100644 --- a/doc/source/sparse.rst +++ b/doc/source/sparse.rst @@ -15,13 +15,15 @@ Sparse data structures ********************** -We have implemented "sparse" versions of Series, DataFrame, and Panel. These -are not sparse in the typical "mostly 0". You can view these objects as being -"compressed" where any data matching a specific value (NaN/missing by default, -though any value can be chosen) is omitted. A special ``SparseIndex`` object -tracks where data has been "sparsified". This will make much more sense in an -example. All of the standard pandas data structures have a ``to_sparse`` -method: +.. note:: The ``SparsePanel`` class has been removed in 0.19.0 + +We have implemented "sparse" versions of Series and DataFrame (there used to be +one for Panel but was removed in 0.19.0). These are not sparse in the typical +"mostly 0". You can view these objects as being "compressed" where any data matching +a specific value (NaN/missing by default, though any value can be chosen) is omitted. +A special ``SparseIndex`` object tracks where data has been "sparsified". This will +make much more sense in an example. All of the standard pandas data structures have +a ``to_sparse`` method: .. ipython:: python @@ -77,9 +79,8 @@ distinct from the ``fill_value``: sparr = pd.SparseArray(arr) sparr -Like the indexed objects (SparseSeries, SparseDataFrame, SparsePanel), a -``SparseArray`` can be converted back to a regular ndarray by calling -``to_dense``: +Like the indexed objects (SparseSeries, SparseDataFrame), a ``SparseArray`` +can be converted back to a regular ndarray by calling ``to_dense``: .. ipython:: python diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index e340d04416fe6..375bbd79fd29b 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -330,6 +330,7 @@ API changes ~~~~~~~~~~~ +- ``Panel.to_sparse`` will raise a ``NotImplementedError`` exception when called (:issue:`13778`) - ``Index.reshape`` will raise a ``NotImplementedError`` exception when called (:issue:`12882`) - Non-convertible dates in an excel date column will be returned without conversion and the column will be ``object`` dtype, rather than raising an exception (:issue:`10001`) - ``eval``'s upcasting rules for ``float32`` types have been updated to be more consistent with NumPy's rules. New behavior will not upcast to ``float64`` if you multiply a pandas ``float32`` object by a scalar float64. (:issue:`12388`) @@ -619,6 +620,7 @@ Deprecations Removal of prior version deprecations/changes ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +- The ``SparsePanel`` class has been removed (:issue:`13778`) - The ``pd.sandbox`` module has been removed in favor of the external library ``pandas-qt`` (:issue:`13670`) - The ``pandas.io.data`` and ``pandas.io.wb`` modules are removed in favor of the `pandas-datareader package `__ (:issue:`13724`). diff --git a/pandas/api/tests/test_api.py b/pandas/api/tests/test_api.py index 8143f925af3e0..fda81ee6c9045 100644 --- a/pandas/api/tests/test_api.py +++ b/pandas/api/tests/test_api.py @@ -57,7 +57,7 @@ class TestPDApi(Base, tm.TestCase): 'TimedeltaIndex', 'Timestamp'] # these are already deprecated; awaiting removal - deprecated_classes = ['SparsePanel', 'TimeSeries', 'WidePanel', + deprecated_classes = ['TimeSeries', 'WidePanel', 'SparseTimeSeries', 'Panel4D'] # these should be deperecated in the future diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 1d49ac5e2be86..b8cd9b90e7989 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -393,25 +393,15 @@ def _get_plane_axes(self, axis): fromDict = from_dict - def to_sparse(self, fill_value=None, kind='block'): + def to_sparse(self, *args, **kwargs): """ - Convert to SparsePanel - - Parameters - ---------- - fill_value : float, default NaN - kind : {'block', 'integer'} + NOT IMPLEMENTED: do not call this method, as sparsifying is not + supported for Panel objects and will raise an error. - Returns - ------- - y : SparseDataFrame + Convert to SparsePanel """ - from pandas.core.sparse import SparsePanel - frames = dict(self.iteritems()) - return SparsePanel(frames, items=self.items, - major_axis=self.major_axis, - minor_axis=self.minor_axis, default_kind=kind, - default_fill_value=fill_value) + raise NotImplementedError("sparsifying is not supported " + "for Panel objects") def to_excel(self, path, na_rep='', engine=None, **kwargs): """ diff --git a/pandas/core/sparse.py b/pandas/core/sparse.py index 701e6b1102b05..4fc329844d616 100644 --- a/pandas/core/sparse.py +++ b/pandas/core/sparse.py @@ -8,4 +8,3 @@ from pandas.sparse.series import SparseSeries from pandas.sparse.frame import SparseDataFrame -from pandas.sparse.panel import SparsePanel diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 94f390955dddd..1838d9175e597 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -56,7 +56,7 @@ Panel, RangeIndex, PeriodIndex, DatetimeIndex, NaT, Categorical) from pandas.tslib import NaTType -from pandas.sparse.api import SparseSeries, SparseDataFrame, SparsePanel +from pandas.sparse.api import SparseSeries, SparseDataFrame from pandas.sparse.array import BlockIndex, IntIndex from pandas.core.generic import NDFrame from pandas.core.common import PerformanceWarning @@ -447,18 +447,6 @@ def encode(obj): # d['data'] = dict([(name, ss) # for name, ss in compat.iteritems(obj)]) # return d - elif isinstance(obj, SparsePanel): - raise NotImplementedError( - 'msgpack sparse frame is not implemented' - ) - # d = {'typ': 'sparse_panel', - # 'klass': obj.__class__.__name__, - # 'items': obj.items} - # for f in ['default_fill_value', 'default_kind']: - # d[f] = getattr(obj, f, None) - # d['data'] = dict([(name, df) - # for name, df in compat.iteritems(obj)]) - # return d else: data = obj._data diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 038ca7ac7775b..7503b21160250 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -29,7 +29,7 @@ MultiIndex, Int64Index, isnull) from pandas.core import config from pandas.io.common import _stringify_path -from pandas.sparse.api import SparseSeries, SparseDataFrame, SparsePanel +from pandas.sparse.api import SparseSeries, SparseDataFrame from pandas.sparse.array import BlockIndex, IntIndex from pandas.tseries.api import PeriodIndex, DatetimeIndex from pandas.tseries.tdi import TimedeltaIndex @@ -169,7 +169,6 @@ class DuplicateWarning(Warning): SparseDataFrame: u('sparse_frame'), Panel: u('wide'), Panel4D: u('ndim'), - SparsePanel: u('sparse_panel') } # storer class map @@ -183,7 +182,6 @@ class DuplicateWarning(Warning): u('frame'): 'FrameFixed', u('sparse_frame'): 'SparseFrameFixed', u('wide'): 'PanelFixed', - u('sparse_panel'): 'SparsePanelFixed', } # table class map @@ -2777,39 +2775,6 @@ def write(self, obj, **kwargs): self.write_index('columns', obj.columns) -class SparsePanelFixed(SparseFixed): - pandas_kind = u('sparse_panel') - attributes = ['default_kind', 'default_fill_value'] - - def read(self, **kwargs): - kwargs = self.validate_read(kwargs) - items = self.read_index('items') - - sdict = {} - for name in items: - key = 'sparse_frame_%s' % name - s = SparseFrameFixed(self.parent, getattr(self.group, key)) - s.infer_axes() - sdict[name] = s.read() - return SparsePanel(sdict, items=items, default_kind=self.default_kind, - default_fill_value=self.default_fill_value) - - def write(self, obj, **kwargs): - super(SparsePanelFixed, self).write(obj, **kwargs) - self.attrs.default_fill_value = obj.default_fill_value - self.attrs.default_kind = obj.default_kind - self.write_index('items', obj.items) - - for name, sdf in obj.iteritems(): - key = 'sparse_frame_%s' % name - if key not in self.group._v_children: - node = self._handle.create_group(self.group, key) - else: - node = getattr(self.group, key) - s = SparseFrameFixed(self.parent, node) - s.write(sdf) - - class BlockManagerFixed(GenericFixed): attributes = ['ndim', 'nblocks'] is_shape_reversed = False diff --git a/pandas/io/tests/test_packers.py b/pandas/io/tests/test_packers.py index fe5972d35d5ec..cf61ad9a35935 100644 --- a/pandas/io/tests/test_packers.py +++ b/pandas/io/tests/test_packers.py @@ -542,26 +542,6 @@ def test_sparse_frame(self): self._check_roundtrip(ss3, tm.assert_frame_equal, check_frame_type=True) - def test_sparse_panel(self): - - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - - items = ['x', 'y', 'z'] - p = Panel(dict((i, tm.makeDataFrame().ix[:2, :2]) for i in items)) - sp = p.to_sparse() - - self._check_roundtrip(sp, tm.assert_panel_equal, - check_panel_type=True) - - sp2 = p.to_sparse(kind='integer') - self._check_roundtrip(sp2, tm.assert_panel_equal, - check_panel_type=True) - - sp3 = p.to_sparse(fill_value=0) - self._check_roundtrip(sp3, tm.assert_panel_equal, - check_panel_type=True) - class TestCompression(TestPackers): """See https://github.com/pydata/pandas/pull/9783 diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 89d2f13f256fe..f95e764ad4da3 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -2688,23 +2688,6 @@ def test_sparse_frame(self): self._check_double_roundtrip(ss3, tm.assert_frame_equal, check_frame_type=True) - def test_sparse_panel(self): - - items = ['x', 'y', 'z'] - p = Panel(dict((i, tm.makeDataFrame().ix[:2, :2]) for i in items)) - sp = p.to_sparse() - - self._check_double_roundtrip(sp, assert_panel_equal, - check_panel_type=True) - - sp2 = p.to_sparse(kind='integer') - self._check_double_roundtrip(sp2, assert_panel_equal, - check_panel_type=True) - - sp3 = p.to_sparse(fill_value=0) - self._check_double_roundtrip(sp3, assert_panel_equal, - check_panel_type=True) - def test_float_index(self): # GH #454 diff --git a/pandas/sparse/api.py b/pandas/sparse/api.py index b4d874e6a1ab9..55841fbeffa2d 100644 --- a/pandas/sparse/api.py +++ b/pandas/sparse/api.py @@ -4,4 +4,3 @@ from pandas.sparse.list import SparseList from pandas.sparse.series import SparseSeries, SparseTimeSeries from pandas.sparse.frame import SparseDataFrame -from pandas.sparse.panel import SparsePanel diff --git a/pandas/sparse/panel.py b/pandas/sparse/panel.py deleted file mode 100644 index 4370d040d8eaf..0000000000000 --- a/pandas/sparse/panel.py +++ /dev/null @@ -1,563 +0,0 @@ -""" -Data structures for sparse float data. Life is made simpler by dealing only -with float64 data -""" - -# pylint: disable=E1101,E1103,W0231 - -import warnings -from pandas.compat import lrange, zip -from pandas import compat -import numpy as np - -from pandas.types.common import is_list_like, is_scalar -from pandas.core.index import Index, MultiIndex, _ensure_index -from pandas.core.frame import DataFrame -from pandas.core.panel import Panel -from pandas.sparse.frame import SparseDataFrame -from pandas.util.decorators import deprecate - -import pandas.core.common as com -import pandas.core.ops as ops - - -class SparsePanelAxis(object): - def __init__(self, cache_field, frame_attr): - self.cache_field = cache_field - self.frame_attr = frame_attr - - def __get__(self, obj, type=None): - return getattr(obj, self.cache_field, None) - - def __set__(self, obj, value): - value = _ensure_index(value) - - if isinstance(value, MultiIndex): - raise NotImplementedError("value cannot be a MultiIndex") - - for v in compat.itervalues(obj._frames): - setattr(v, self.frame_attr, value) - - setattr(obj, self.cache_field, value) - - -class SparsePanel(Panel): - """ - Sparse version of Panel - - Parameters - ---------- - frames : dict of DataFrame objects - items : array-like - major_axis : array-like - minor_axis : array-like - default_kind : {'block', 'integer'}, default 'block' - Default sparse kind for converting Series to SparseSeries. Will not - override SparseSeries passed into constructor - default_fill_value : float - Default fill_value for converting Series to SparseSeries. Will not - override SparseSeries passed in - - Notes - ----- - """ - ndim = 3 - _typ = 'panel' - _subtyp = 'sparse_panel' - - def __init__(self, frames=None, items=None, major_axis=None, - minor_axis=None, default_fill_value=np.nan, - default_kind='block', copy=False): - - # deprecation #11157 - warnings.warn("SparsePanel is deprecated and will be removed in a " - "future version", FutureWarning, stacklevel=3) - - if frames is None: - frames = {} - - if isinstance(frames, np.ndarray): - new_frames = {} - for item, vals in zip(items, frames): - new_frames[item] = SparseDataFrame( - vals, index=major_axis, columns=minor_axis, - default_fill_value=default_fill_value, - default_kind=default_kind) - frames = new_frames - - if not isinstance(frames, dict): - raise TypeError('input must be a dict, a %r was passed' % - type(frames).__name__) - - self.default_fill_value = fill_value = default_fill_value - self.default_kind = kind = default_kind - - # pre-filter, if necessary - if items is None: - items = Index(sorted(frames.keys())) - items = _ensure_index(items) - - (clean_frames, major_axis, - minor_axis) = _convert_frames(frames, major_axis, minor_axis, - kind=kind, fill_value=fill_value) - - self._frames = clean_frames - - # do we want to fill missing ones? - for item in items: - if item not in clean_frames: - raise ValueError('column %r not found in data' % item) - - self._items = items - self.major_axis = major_axis - self.minor_axis = minor_axis - - def _consolidate_inplace(self): # pragma: no cover - # do nothing when DataFrame calls this method - pass - - def __array_wrap__(self, result): - return SparsePanel(result, items=self.items, - major_axis=self.major_axis, - minor_axis=self.minor_axis, - default_kind=self.default_kind, - default_fill_value=self.default_fill_value) - - @classmethod - def from_dict(cls, data): - """ - Analogous to Panel.from_dict - """ - return SparsePanel(data) - - def to_dense(self): - """ - Convert SparsePanel to (dense) Panel - - Returns - ------- - dense : Panel - """ - return Panel(self.values, self.items, self.major_axis, self.minor_axis) - - def as_matrix(self): - return self.values - - @property - def values(self): - # return dense values - return np.array([self._frames[item].values for item in self.items]) - - # need a special property for items to make the field assignable - - _items = None - - def _get_items(self): - return self._items - - def _set_items(self, new_items): - new_items = _ensure_index(new_items) - if isinstance(new_items, MultiIndex): - raise NotImplementedError("itemps cannot be a MultiIndex") - - # need to create new frames dict - - old_frame_dict = self._frames - old_items = self._items - self._frames = dict((new_k, old_frame_dict[old_k]) - for new_k, old_k in zip(new_items, old_items)) - self._items = new_items - - items = property(fget=_get_items, fset=_set_items) - - # DataFrame's index - major_axis = SparsePanelAxis('_major_axis', 'index') - - # DataFrame's columns / "items" - minor_axis = SparsePanelAxis('_minor_axis', 'columns') - - def _ixs(self, i, axis=0): - """ - for compat as we don't support Block Manager here - i : int, slice, or sequence of integers - axis : int - """ - - key = self._get_axis(axis)[i] - - # xs cannot handle a non-scalar key, so just reindex here - if is_list_like(key): - return self.reindex(**{self._get_axis_name(axis): key}) - - return self.xs(key, axis=axis) - - def _slice(self, slobj, axis=0, kind=None): - """ - for compat as we don't support Block Manager here - """ - axis = self._get_axis_name(axis) - index = self._get_axis(axis) - - return self.reindex(**{axis: index[slobj]}) - - def _get_item_cache(self, key): - return self._frames[key] - - def __setitem__(self, key, value): - if isinstance(value, DataFrame): - value = value.reindex(index=self.major_axis, - columns=self.minor_axis) - if not isinstance(value, SparseDataFrame): - value = value.to_sparse(fill_value=self.default_fill_value, - kind=self.default_kind) - else: - raise ValueError('only DataFrame objects can be set currently') - - self._frames[key] = value - - if key not in self.items: - self._items = Index(list(self.items) + [key]) - - def set_value(self, item, major, minor, value): - """ - Quickly set single value at (item, major, minor) location - - Parameters - ---------- - item : item label (panel item) - major : major axis label (panel item row) - minor : minor axis label (panel item column) - value : scalar - - Notes - ----- - This method *always* returns a new object. It is not particularly - efficient but is provided for API compatibility with Panel - - Returns - ------- - panel : SparsePanel - """ - dense = self.to_dense().set_value(item, major, minor, value) - return dense.to_sparse(kind=self.default_kind, - fill_value=self.default_fill_value) - - def __delitem__(self, key): - loc = self.items.get_loc(key) - indices = lrange(loc) + lrange(loc + 1, len(self.items)) - del self._frames[key] - self._items = self._items.take(indices) - - def __getstate__(self): - # pickling - from pandas.io.pickle import _pickle_array - return (self._frames, _pickle_array(self.items), - _pickle_array(self.major_axis), - _pickle_array(self.minor_axis), self.default_fill_value, - self.default_kind) - - def __setstate__(self, state): - frames, items, major, minor, fv, kind = state - - from pandas.io.pickle import _unpickle_array - self.default_fill_value = fv - self.default_kind = kind - self._items = _ensure_index(_unpickle_array(items)) - self._major_axis = _ensure_index(_unpickle_array(major)) - self._minor_axis = _ensure_index(_unpickle_array(minor)) - self._frames = frames - - def copy(self, deep=True): - """ - Make a copy of the sparse panel - - Returns - ------- - copy : SparsePanel - """ - - d = self._construct_axes_dict() - if deep: - new_data = dict((k, v.copy(deep=True)) - for k, v in compat.iteritems(self._frames)) - d = dict((k, v.copy(deep=True)) for k, v in compat.iteritems(d)) - else: - new_data = self._frames.copy() - d['default_fill_value'] = self.default_fill_value - d['default_kind'] = self.default_kind - - return SparsePanel(new_data, **d) - - def to_frame(self, filter_observations=True): - """ - Convert SparsePanel to (dense) DataFrame - - Returns - ------- - frame : DataFrame - """ - if not filter_observations: - raise TypeError('filter_observations=False not supported for ' - 'SparsePanel.to_long') - - I, N, K = self.shape - counts = np.zeros(N * K, dtype=int) - - d_values = {} - d_indexer = {} - - for item in self.items: - frame = self[item] - - values, major, minor = _stack_sparse_info(frame) - - # values are stacked column-major - indexer = minor * N + major - counts.put(indexer, counts.take(indexer) + 1) # cuteness - - d_values[item] = values - d_indexer[item] = indexer - - # have full set of observations for each item - mask = counts == I - - # for each item, take mask values at index locations for those sparse - # values, and use that to select values - values = np.column_stack([d_values[item][mask.take(d_indexer[item])] - for item in self.items]) - - inds, = mask.nonzero() - - # still column major - major_labels = inds % N - minor_labels = inds // N - - index = MultiIndex(levels=[self.major_axis, self.minor_axis], - labels=[major_labels, minor_labels], - verify_integrity=False) - - df = DataFrame(values, index=index, columns=self.items) - return df.sortlevel(level=0) - - to_long = deprecate('to_long', to_frame) - toLong = deprecate('toLong', to_frame) - - def reindex(self, major=None, items=None, minor=None, major_axis=None, - minor_axis=None, copy=False): - """ - Conform / reshape panel axis labels to new input labels - - Parameters - ---------- - major : array-like, default None - items : array-like, default None - minor : array-like, default None - copy : boolean, default False - Copy underlying SparseDataFrame objects - - Returns - ------- - reindexed : SparsePanel - """ - major = com._mut_exclusive(major=major, major_axis=major_axis) - minor = com._mut_exclusive(minor=minor, minor_axis=minor_axis) - - if com._all_none(items, major, minor): - raise ValueError('Must specify at least one axis') - - major = self.major_axis if major is None else major - minor = self.minor_axis if minor is None else minor - - if items is not None: - new_frames = {} - for item in items: - if item in self._frames: - new_frames[item] = self._frames[item] - else: - raise NotImplementedError('Reindexing with new items not ' - 'yet supported') - else: - new_frames = self._frames - - if copy: - new_frames = dict((k, v.copy()) - for k, v in compat.iteritems(new_frames)) - - return SparsePanel(new_frames, items=items, major_axis=major, - minor_axis=minor, - default_fill_value=self.default_fill_value, - default_kind=self.default_kind) - - def _combine(self, other, func, axis=0): - if isinstance(other, DataFrame): - return self._combineFrame(other, func, axis=axis) - elif isinstance(other, Panel): - return self._combinePanel(other, func) - elif is_scalar(other): - new_frames = dict((k, func(v, other)) - for k, v in self.iteritems()) - return self._new_like(new_frames) - - def _combineFrame(self, other, func, axis=0): - index, columns = self._get_plane_axes(axis) - axis = self._get_axis_number(axis) - - other = other.reindex(index=index, columns=columns) - - if axis == 0: - new_values = func(self.values, other.values) - elif axis == 1: - new_values = func(self.values.swapaxes(0, 1), other.values.T) - new_values = new_values.swapaxes(0, 1) - elif axis == 2: - new_values = func(self.values.swapaxes(0, 2), other.values) - new_values = new_values.swapaxes(0, 2) - - # TODO: make faster! - new_frames = {} - for item, item_slice in zip(self.items, new_values): - old_frame = self[item] - ofv = old_frame.default_fill_value - ok = old_frame.default_kind - new_frames[item] = SparseDataFrame(item_slice, - index=self.major_axis, - columns=self.minor_axis, - default_fill_value=ofv, - default_kind=ok) - - return self._new_like(new_frames) - - def _new_like(self, new_frames): - return SparsePanel(new_frames, self.items, self.major_axis, - self.minor_axis, - default_fill_value=self.default_fill_value, - default_kind=self.default_kind) - - def _combinePanel(self, other, func): - items = self.items.union(other.items) - major = self.major_axis.union(other.major_axis) - minor = self.minor_axis.union(other.minor_axis) - - # could check that everything's the same size, but forget it - - this = self.reindex(items=items, major=major, minor=minor) - other = other.reindex(items=items, major=major, minor=minor) - - new_frames = {} - for item in items: - new_frames[item] = func(this[item], other[item]) - - if not isinstance(other, SparsePanel): - new_default_fill = self.default_fill_value - else: - # maybe unnecessary - new_default_fill = func(self.default_fill_value, - other.default_fill_value) - - return SparsePanel(new_frames, items, major, minor, - default_fill_value=new_default_fill, - default_kind=self.default_kind) - - def major_xs(self, key): - """ - Return slice of panel along major axis - - Parameters - ---------- - key : object - Major axis label - - Returns - ------- - y : DataFrame - index -> minor axis, columns -> items - """ - slices = dict((k, v.xs(key)) for k, v in self.iteritems()) - return DataFrame(slices, index=self.minor_axis, columns=self.items) - - def minor_xs(self, key): - """ - Return slice of panel along minor axis - - Parameters - ---------- - key : object - Minor axis label - - Returns - ------- - y : SparseDataFrame - index -> major axis, columns -> items - """ - slices = dict((k, v[key]) for k, v in self.iteritems()) - return SparseDataFrame(slices, index=self.major_axis, - columns=self.items, - default_fill_value=self.default_fill_value, - default_kind=self.default_kind) - - # TODO: allow SparsePanel to work with flex arithmetic. - # pow and mod only work for scalars for now - def pow(self, val, *args, **kwargs): - """wrapper around `__pow__` (only works for scalar values)""" - return self.__pow__(val) - - def mod(self, val, *args, **kwargs): - """wrapper around `__mod__` (only works for scalar values""" - return self.__mod__(val) - -# Sparse objects opt out of numexpr -SparsePanel._add_aggregate_operations(use_numexpr=False) -ops.add_special_arithmetic_methods(SparsePanel, use_numexpr=False, ** - ops.panel_special_funcs) -SparseWidePanel = SparsePanel - - -def _convert_frames(frames, index, columns, fill_value=np.nan, kind='block'): - from pandas.core.panel import _get_combined_index - output = {} - for item, df in compat.iteritems(frames): - if not isinstance(df, SparseDataFrame): - df = SparseDataFrame(df, default_kind=kind, - default_fill_value=fill_value) - - output[item] = df - - if index is None: - all_indexes = [x.index for x in output.values()] - index = _get_combined_index(all_indexes) - if columns is None: - all_columns = [x.columns for x in output.values()] - columns = _get_combined_index(all_columns) - - index = _ensure_index(index) - columns = _ensure_index(columns) - - for item, df in compat.iteritems(output): - if not (df.index.equals(index) and df.columns.equals(columns)): - output[item] = df.reindex(index=index, columns=columns) - - return output, index, columns - - -def _stack_sparse_info(frame): - lengths = [s.sp_index.npoints for _, s in compat.iteritems(frame)] - - # this is pretty fast - minor_labels = np.repeat(np.arange(len(frame.columns)), lengths) - - inds_to_concat = [] - vals_to_concat = [] - for col in frame.columns: - series = frame[col] - - if not np.isnan(series.fill_value): - raise TypeError('This routine assumes NaN fill value') - - int_index = series.sp_index.to_int_index() - inds_to_concat.append(int_index.indices) - vals_to_concat.append(series.sp_values) - - major_labels = np.concatenate(inds_to_concat) - sparse_values = np.concatenate(vals_to_concat) - - return sparse_values, major_labels, minor_labels diff --git a/pandas/sparse/tests/test_panel.py b/pandas/sparse/tests/test_panel.py deleted file mode 100644 index 09d861fe0a9ac..0000000000000 --- a/pandas/sparse/tests/test_panel.py +++ /dev/null @@ -1,279 +0,0 @@ -# pylint: disable-msg=E1101,W0612 - -import nose # noqa -from numpy import nan -import pandas as pd - -from pandas import DataFrame, bdate_range, Panel -from pandas.core.index import Index -import pandas.util.testing as tm -from pandas.sparse.api import SparseSeries, SparsePanel -import pandas.tests.test_panel as test_panel - - -def panel_data1(): - index = bdate_range('1/1/2011', periods=8) - - return DataFrame({ - 'A': [nan, nan, nan, 0, 1, 2, 3, 4], - 'B': [0, 1, 2, 3, 4, nan, nan, nan], - 'C': [0, 1, 2, nan, nan, nan, 3, 4], - 'D': [nan, 0, 1, nan, 2, 3, 4, nan] - }, index=index) - - -def panel_data2(): - index = bdate_range('1/1/2011', periods=9) - - return DataFrame({ - 'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5], - 'B': [0, 1, 2, 3, 4, 5, nan, nan, nan], - 'C': [0, 1, 2, nan, nan, nan, 3, 4, 5], - 'D': [nan, 0, 1, nan, 2, 3, 4, 5, nan] - }, index=index) - - -def panel_data3(): - index = bdate_range('1/1/2011', periods=10).shift(-2) - - return DataFrame({ - 'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], - 'B': [0, 1, 2, 3, 4, 5, 6, nan, nan, nan], - 'C': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], - 'D': [nan, 0, 1, nan, 2, 3, 4, 5, 6, nan] - }, index=index) - - -class TestSparsePanel(tm.TestCase, test_panel.SafeForLongAndSparse, - test_panel.SafeForSparse): - _multiprocess_can_split_ = True - - def setUp(self): - self.data_dict = { - 'ItemA': panel_data1(), - 'ItemB': panel_data2(), - 'ItemC': panel_data3(), - 'ItemD': panel_data1(), - } - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.panel = SparsePanel(self.data_dict) - - @staticmethod - def _test_op(panel, op): - # arithmetic tests - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = op(panel, 1) - tm.assert_sp_frame_equal(result['ItemA'], op(panel['ItemA'], 1)) - - def test_constructor(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertRaises(ValueError, SparsePanel, self.data_dict, - items=['Item0', 'ItemA', 'ItemB']) - with tm.assertRaisesRegexp(TypeError, - "input must be a dict, a 'list' was " - "passed"): - SparsePanel(['a', 'b', 'c']) - - # deprecation GH11157 - def test_deprecation(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - SparsePanel() - - # GH 9272 - def test_constructor_empty(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - sp = SparsePanel() - self.assertEqual(len(sp.items), 0) - self.assertEqual(len(sp.major_axis), 0) - self.assertEqual(len(sp.minor_axis), 0) - - def test_from_dict(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - fd = SparsePanel.from_dict(self.data_dict) - tm.assert_sp_panel_equal(fd, self.panel) - - def test_pickle(self): - def _test_roundtrip(panel): - result = self.round_trip_pickle(panel) - tm.assertIsInstance(result.items, Index) - tm.assertIsInstance(result.major_axis, Index) - tm.assertIsInstance(result.minor_axis, Index) - tm.assert_sp_panel_equal(panel, result) - - _test_roundtrip(self.panel) - - def test_dense_to_sparse(self): - wp = Panel.from_dict(self.data_dict) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - dwp = wp.to_sparse() - tm.assertIsInstance(dwp['ItemA']['A'], SparseSeries) - - def test_to_dense(self): - dwp = self.panel.to_dense() - dwp2 = Panel.from_dict(self.data_dict) - tm.assert_panel_equal(dwp, dwp2) - - def test_to_frame(self): - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - - def _compare_with_dense(panel): - slp = panel.to_frame() - dlp = panel.to_dense().to_frame() - - self.assert_numpy_array_equal(slp.values, dlp.values) - self.assert_index_equal(slp.index, dlp.index, - check_names=False) - - _compare_with_dense(self.panel) - _compare_with_dense(self.panel.reindex(items=['ItemA'])) - - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - zero_panel = SparsePanel(self.data_dict, default_fill_value=0) - self.assertRaises(Exception, zero_panel.to_frame) - - self.assertRaises(Exception, self.panel.to_frame, - filter_observations=False) - - def test_long_to_wide_sparse(self): - pass - - def test_values(self): - pass - - def test_setitem(self): - self.panel['ItemE'] = self.panel['ItemC'] - self.panel['ItemF'] = self.panel['ItemC'].to_dense() - - tm.assert_sp_frame_equal(self.panel['ItemE'], self.panel['ItemC']) - tm.assert_sp_frame_equal(self.panel['ItemF'], self.panel['ItemC']) - - expected = pd.Index(['ItemA', 'ItemB', 'ItemC', - 'ItemD', 'ItemE', 'ItemF']) - tm.assert_index_equal(self.panel.items, expected) - - self.assertRaises(Exception, self.panel.__setitem__, 'item6', 1) - - def test_set_value(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - def _check_loc(item, major, minor, val=1.5): - res = self.panel.set_value(item, major, minor, val) - self.assertIsNot(res, self.panel) - self.assertEqual(res.get_value(item, major, minor), val) - - _check_loc('ItemA', self.panel.major_axis[4], - self.panel.minor_axis[3]) - _check_loc('ItemF', self.panel.major_axis[4], - self.panel.minor_axis[3]) - _check_loc('ItemF', 'foo', self.panel.minor_axis[3]) - _check_loc('ItemE', 'foo', 'bar') - - def test_delitem_pop(self): - del self.panel['ItemB'] - tm.assert_index_equal(self.panel.items, - pd.Index(['ItemA', 'ItemC', 'ItemD'])) - crackle = self.panel['ItemC'] - pop = self.panel.pop('ItemC') - self.assertIs(pop, crackle) - tm.assert_almost_equal(self.panel.items, pd.Index(['ItemA', 'ItemD'])) - - self.assertRaises(KeyError, self.panel.__delitem__, 'ItemC') - - def test_copy(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - cop = self.panel.copy() - tm.assert_sp_panel_equal(cop, self.panel) - - def test_reindex(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - - def _compare_with_dense(swp, items, major, minor): - swp_re = swp.reindex(items=items, major=major, minor=minor) - dwp_re = swp.to_dense().reindex(items=items, major=major, - minor=minor) - tm.assert_panel_equal(swp_re.to_dense(), dwp_re) - - _compare_with_dense(self.panel, self.panel.items[:2], - self.panel.major_axis[::2], - self.panel.minor_axis[::2]) - _compare_with_dense(self.panel, None, self.panel.major_axis[::2], - self.panel.minor_axis[::2]) - - self.assertRaises(ValueError, self.panel.reindex) - - # TODO: do something about this later... - self.assertRaises(Exception, self.panel.reindex, - items=['item0', 'ItemA', 'ItemB']) - - # test copying - cp = self.panel.reindex(self.panel.major_axis, copy=True) - cp['ItemA']['E'] = cp['ItemA']['A'] - self.assertNotIn('E', self.panel['ItemA']) - - def test_operators(self): - def _check_ops(panel): - def _dense_comp(op): - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - dense = panel.to_dense() - sparse_result = op(panel) - dense_result = op(dense) - tm.assert_panel_equal(sparse_result.to_dense(), - dense_result) - - def _mixed_comp(op): - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - result = op(panel, panel.to_dense()) - expected = op(panel.to_dense(), panel.to_dense()) - tm.assert_panel_equal(result, expected) - - op1 = lambda x: x + 2 - - _dense_comp(op1) - op2 = lambda x: x.add(x.reindex(major=x.major_axis[::2])) - _dense_comp(op2) - op3 = lambda x: x.subtract(x.mean(0), axis=0) - _dense_comp(op3) - op4 = lambda x: x.subtract(x.mean(1), axis=1) - _dense_comp(op4) - op5 = lambda x: x.subtract(x.mean(2), axis=2) - _dense_comp(op5) - - _mixed_comp(Panel.multiply) - _mixed_comp(Panel.subtract) - - # TODO: this case not yet supported! - # op6 = lambda x: x.add(x.to_frame()) - # _dense_comp(op6) - - _check_ops(self.panel) - - def test_major_xs(self): - def _dense_comp(sparse): - dense = sparse.to_dense() - - for idx in sparse.major_axis: - dslice = dense.major_xs(idx) - sslice = sparse.major_xs(idx) - tm.assert_frame_equal(dslice, sslice) - - _dense_comp(self.panel) - - def test_minor_xs(self): - def _dense_comp(sparse): - dense = sparse.to_dense() - - for idx in sparse.minor_axis: - dslice = dense.minor_xs(idx) - sslice = sparse.minor_xs(idx).to_dense() - tm.assert_frame_equal(dslice, sslice) - - _dense_comp(self.panel) - - -if __name__ == '__main__': - import nose # noqa - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/stats/plm.py b/pandas/stats/plm.py index baa30cde9344e..099c45d5ec60b 100644 --- a/pandas/stats/plm.py +++ b/pandas/stats/plm.py @@ -18,7 +18,6 @@ from pandas.core.frame import DataFrame from pandas.core.reshape import get_dummies from pandas.core.series import Series -from pandas.core.sparse import SparsePanel from pandas.stats.ols import OLS, MovingOLS import pandas.stats.common as com import pandas.stats.math as math @@ -137,8 +136,7 @@ def _filter_data(self): if isinstance(data, Panel): data = data.copy() - if not isinstance(data, SparsePanel): - data, cat_mapping = self._convert_x(data) + data, cat_mapping = self._convert_x(data) if not isinstance(data, Panel): data = Panel.from_dict(data, intersect=True) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 46eba1772c47a..a93f2ae5651b4 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -5,7 +5,6 @@ import operator import nose -from functools import wraps import numpy as np import pandas as pd @@ -20,37 +19,16 @@ from pandas.formats.printing import pprint_thing from pandas import compat from pandas.compat import range, lrange, StringIO, OrderedDict, signature -from pandas import SparsePanel from pandas.util.testing import (assert_panel_equal, assert_frame_equal, assert_series_equal, assert_almost_equal, - assert_produces_warning, ensure_clean, - assertRaisesRegexp, makeCustomDataframe as - mkdf, makeMixedDataFrame) + ensure_clean, assertRaisesRegexp, + makeCustomDataframe as mkdf, + makeMixedDataFrame) import pandas.core.panel as panelm import pandas.util.testing as tm -def ignore_sparse_panel_future_warning(func): - """ - decorator to ignore FutureWarning if we have a SparsePanel - - can be removed when SparsePanel is fully removed - """ - - @wraps(func) - def wrapper(self, *args, **kwargs): - - if isinstance(self.panel, SparsePanel): - with assert_produces_warning(FutureWarning, - check_stacklevel=False): - return func(self, *args, **kwargs) - else: - return func(self, *args, **kwargs) - - return wrapper - - class PanelTests(object): panel = None @@ -78,7 +56,6 @@ class SafeForLongAndSparse(object): def test_repr(self): repr(self.panel) - @ignore_sparse_panel_future_warning def test_copy_names(self): for attr in ('major_axis', 'minor_axis'): getattr(self.panel, attr).name = None @@ -261,7 +238,6 @@ def test_get_plane_axes(self): index, columns = self.panel._get_plane_axes('minor_axis') index, columns = self.panel._get_plane_axes(0) - @ignore_sparse_panel_future_warning def test_truncate(self): dates = self.panel.major_axis start, end = dates[1], dates[5] @@ -322,7 +298,6 @@ def test_iteritems(self): self.assertEqual(len(list(self.panel.iteritems())), len(self.panel.items)) - @ignore_sparse_panel_future_warning def test_combineFrame(self): def check_op(op, name): # items @@ -352,18 +327,9 @@ def check_op(op, name): assert_frame_equal(result.minor_xs(idx), op(self.panel.minor_xs(idx), xs)) - ops = ['add', 'sub', 'mul', 'truediv', 'floordiv'] + ops = ['add', 'sub', 'mul', 'truediv', 'floordiv', 'pow', 'mod'] if not compat.PY3: ops.append('div') - # pow, mod not supported for SparsePanel as flex ops (for now) - if not isinstance(self.panel, SparsePanel): - ops.extend(['pow', 'mod']) - else: - idx = self.panel.minor_axis[1] - with assertRaisesRegexp(ValueError, "Simple arithmetic.*scalar"): - self.panel.pow(self.panel.minor_xs(idx), axis='minor') - with assertRaisesRegexp(ValueError, "Simple arithmetic.*scalar"): - self.panel.mod(self.panel.minor_xs(idx), axis='minor') for op in ops: try: @@ -378,12 +344,10 @@ def check_op(op, name): pprint_thing("Failing operation: %r" % 'div') raise - @ignore_sparse_panel_future_warning def test_combinePanel(self): result = self.panel.add(self.panel) self.assert_panel_equal(result, self.panel * 2) - @ignore_sparse_panel_future_warning def test_neg(self): self.assert_panel_equal(-self.panel, self.panel * -1) @@ -399,7 +363,6 @@ def test_raise_when_not_implemented(self): with self.assertRaises(NotImplementedError): getattr(p, op)(d, axis=0) - @ignore_sparse_panel_future_warning def test_select(self): p = self.panel @@ -431,7 +394,6 @@ def test_get_value(self): expected = self.panel[item][mnr][mjr] assert_almost_equal(result, expected) - @ignore_sparse_panel_future_warning def test_abs(self): result = self.panel.abs() @@ -1654,7 +1616,6 @@ def test_transpose_copy(self): panel.values[0, 1, 1] = np.nan self.assertTrue(notnull(result.values[1, 0, 1])) - @ignore_sparse_panel_future_warning def test_to_frame(self): # filtered filtered = self.panel.to_frame() @@ -2432,7 +2393,12 @@ def test_to_string(self): buf = StringIO() self.panel.to_string(buf) - @ignore_sparse_panel_future_warning + def test_to_sparse(self): + if isinstance(self.panel, Panel): + msg = 'sparsifying is not supported' + tm.assertRaisesRegexp(NotImplementedError, msg, + self.panel.to_sparse) + def test_truncate(self): dates = self.panel.index.levels[0] start, end = dates[1], dates[5] diff --git a/pandas/util/testing.py b/pandas/util/testing.py index a7c66e18aa604..e49d92e4ab202 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1452,22 +1452,6 @@ def assert_sp_frame_equal(left, right, exact_indices=True, assert (col in left) -def assert_sp_panel_equal(left, right, exact_indices=True): - assertIsInstance(left, pd.SparsePanel, '[SparsePanel]') - assertIsInstance(right, pd.SparsePanel, '[SparsePanel]') - - for item, frame in left.iteritems(): - assert (item in right) - # trade-off? - assert_sp_frame_equal(frame, right[item], exact_indices=exact_indices) - - assert_almost_equal(left.default_fill_value, right.default_fill_value) - assert (left.default_kind == right.default_kind) - - for item in right: - assert (item in left) - - def assert_sp_list_equal(left, right): assertIsInstance(left, pd.SparseList, '[SparseList]') assertIsInstance(right, pd.SparseList, '[SparseList]')