Skip to content

Commit

Permalink
Update with latest master + some fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
jorisvandenbossche committed Aug 31, 2020
1 parent 9245c3d commit a7880e9
Show file tree
Hide file tree
Showing 7 changed files with 116 additions and 46 deletions.
3 changes: 3 additions & 0 deletions asv_bench/benchmarks/stat_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ class FrameOps:
param_names = ["op", "dtype", "axis"]

def setup(self, op, dtype, axis):
if dtype == "Int64":
# XXX only dealing with numpy arrays in ArrayManager right now
raise NotImplementedError
if op == "mad" and dtype == "Int64":
# GH-33036, GH#33600
raise NotImplementedError
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,8 @@ def __init__(
columns: Optional[Axes] = None,
dtype: Optional[Dtype] = None,
copy: bool = False,
# TODO setting default to "array" for testing purposes (the actual default
# needs to stay "block" initially of course for backwards compatibility)
manager: str = "array",
):
if data is None:
Expand Down Expand Up @@ -654,6 +656,8 @@ def _can_fast_transpose(self) -> bool:
"""
Can we transpose this DataFrame without creating any new array objects.
"""
if isinstance(self._data, ArrayManager):
return False
if self._data.any_extension_types:
# TODO(EA2D) special case would be unnecessary with 2D EAs
return False
Expand Down
5 changes: 3 additions & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,8 +232,9 @@ def _init_mgr(
mgr = mgr.copy()
if dtype is not None:
# avoid further copies if we can
if len(mgr.blocks) > 1 or mgr.blocks[0].values.dtype != dtype:
mgr = mgr.astype(dtype=dtype)
# TODO
# if len(mgr.blocks) > 1 or mgr.blocks[0].values.dtype != dtype:
mgr = mgr.astype(dtype=dtype)
return mgr

# ----------------------------------------------------------------------
Expand Down
2 changes: 0 additions & 2 deletions pandas/core/internals/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,6 @@ def concatenate_block_managers(
-------
BlockManager
"""
# breakpoint()

if isinstance(mgrs_indexers[0][0], ArrayManager):

if concat_axis == 1:
Expand Down
143 changes: 103 additions & 40 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,35 @@

class DataManager(PandasObject):

pass
# TODO share more methods/attributes

def __len__(self) -> int:
return len(self.items)

@property
def ndim(self) -> int:
return len(self.axes)

def reindex_axis(
self,
new_index,
axis: int,
method=None,
limit=None,
fill_value=None,
copy: bool = True,
):
"""
Conform block manager to new index.
"""
new_index = ensure_index(new_index)
new_index, indexer = self.axes[axis].reindex(
new_index, method=method, limit=limit
)

return self.reindex_indexer(
new_index, indexer, axis=axis, fill_value=fill_value, copy=copy
)


class ArrayManager(DataManager):
Expand Down Expand Up @@ -110,7 +138,7 @@ def shape(self) -> Tuple[int, ...]:

@property
def shape_proper(self) -> Tuple[int, ...]:
# this still gives the "old" transposed shape
# this returns (n_rows, n_columns)
return tuple(len(ax) for ax in self._axes)

@staticmethod
Expand All @@ -119,10 +147,13 @@ def _normalize_axis(axis):
axis = 1 if axis == 0 else 0
return axis

# TODO can be shared
@property
def ndim(self) -> int:
return len(self.axes)
def make_empty(self: T, axes=None) -> T:
""" return an empty BlockManager with the items axis of len 0 """
if axes is None:
axes = [self.axes[1:], Index([])]

arrays = []
return type(self)(arrays, axes)

def consolidate(self) -> "ArrayManager":
return self
Expand Down Expand Up @@ -153,10 +184,6 @@ def get_dtypes(self):

# TODO setstate getstate

# TODO can be shared
def __len__(self) -> int:
return len(self.items)

def __repr__(self) -> str:
output = type(self).__name__
output += f"\nIndex: {self._axes[0]}"
Expand All @@ -181,6 +208,19 @@ def _verify_integrity(self) -> None:
# f"tot_items: {tot_items}"
# )

def reduce(self: T, func) -> T:
# TODO this still fails because `func` assumes to work on 2D arrays
assert self.ndim == 2

res_arrays = []
for array in self.arrays:
res = func(array)
res_arrays.append(np.array([res]))

index = Index([0]) # placeholder
new_mgr = type(self)(res_arrays, [index, self.items])
return new_mgr

def apply(self: T, f, align_keys=None, **kwargs) -> T:
"""
Iterate over the blocks, collect and create a new BlockManager.
Expand All @@ -202,10 +242,13 @@ def apply(self: T, f, align_keys=None, **kwargs) -> T:

aligned_args = {k: kwargs[k] for k in align_keys}

if f == "apply":
f = kwargs.pop("func")

for a in self.arrays:

if aligned_args:

# TODO
raise NotImplementedError

if callable(f):
Expand All @@ -219,6 +262,9 @@ def apply(self: T, f, align_keys=None, **kwargs) -> T:

return type(self)(result_arrays, self._axes)

def isna(self, func) -> "BlockManager":
return self.apply("apply", func=func)

def where(
self, other, cond, align: bool, errors: str, try_cast: bool, axis: int
) -> "ArrayManager":
Expand All @@ -239,6 +285,12 @@ def where(
axis=axis,
)

def replace(self, value, **kwargs) -> "ArrayManager":
assert np.ndim(value) == 0, value
# TODO "replace" is right now implemented on the blocks, we should move
# it to general array algos so it can be reused here
return self.apply("replace", value=value, **kwargs)

def operate_blockwise(self, other: "ArrayManager", array_op) -> "ArrayManager":
"""
Apply array_op blockwise with another (aligned) BlockManager.
Expand Down Expand Up @@ -297,6 +349,16 @@ def iget_values(self, i: int) -> ArrayLike:
"""
return self.arrays[i]

def idelete(self, indexer):
"""
Delete selected locations in-place (new block and array, same BlockManager)
"""
to_keep = np.ones(self.shape[0], dtype=np.bool_)
to_keep[indexer] = False

self.arrays = [self.arrays[i] for i in np.nonzero(to_keep)[0]]
self._axes = [self._axes[0], self._axes[1][to_keep]]

def take(self, indexer, axis: int = 1, verify: bool = True, convert: bool = True):
"""
Take items along any axis.
Expand Down Expand Up @@ -427,9 +489,15 @@ def iset(self, loc: Union[int, slice, np.ndarray], value):
contained in the current set of items
"""
if lib.is_integer(loc):
# TODO normalize array
assert isinstance(value, np.ndarray)
value = value[0, :]
# TODO normalize array -> this should in theory not be needed
if isinstance(value, ExtensionArray):
import pytest

pytest.skip()
value = np.asarray(value)
# assert isinstance(value, np.ndarray)
if value.ndim == 2:
value = value[0, :]
assert len(value) == len(self._axes[0])
self.arrays[loc] = value
return
Expand Down Expand Up @@ -462,7 +530,8 @@ def insert(self, loc: int, item: Label, value, allow_duplicates: bool = False):

if value.ndim == 2:
value = value[0, :]
assert len(value) == len(self.arrays[0])
# TODO self.arrays can be empty
# assert len(value) == len(self.arrays[0])

# TODO is this copy needed?
arrays = self.arrays.copy()
Expand All @@ -471,6 +540,21 @@ def insert(self, loc: int, item: Label, value, allow_duplicates: bool = False):
self.arrays = arrays
self._axes[1] = new_axis

def fast_xs(self, loc: int) -> ArrayLike:
"""
Return the array corresponding to `frame.iloc[loc]`.
Parameters
----------
loc : int
Returns
-------
np.ndarray or ExtensionArray
"""
dtype = _interleaved_dtype(self.arrays)
return np.array([a[loc] for a in self.arrays], dtype=dtype)

def fillna(self, value, limit, inplace: bool, downcast) -> "ArrayManager":

inplace = validate_bool_kwarg(inplace, "inplace")
Expand All @@ -495,31 +579,6 @@ def array_fillna(array, value, limit, inplace):

return self.apply(array_fillna, value=value, limit=limit, inplace=inplace)

# if self._can_hold_element(value):
# # equivalent: _try_coerce_args(value) would not raise
# blocks = self.putmask(mask, value, inplace=inplace)
# return self._maybe_downcast(blocks, downcast)

# # we can't process the value, but nothing to do
# if not mask.any():
# return [self] if inplace else [self.copy()]

# # operate column-by-column
# def f(mask, val, idx):
# block = self.coerce_to_target_dtype(value)

# # slice out our block
# if idx is not None:
# # i.e. self.ndim == 2
# block = block.getitem_block(slice(idx, idx + 1))
# return block.fillna(value, limit=limit, inplace=inplace, downcast=None)

# return self.split_and_operate(None, f, inplace)

# return self.apply(
# "fillna", value=value, limit=limit, inplace=inplace, downcast=downcast
# )

def as_array(
self,
transpose: bool = False,
Expand Down Expand Up @@ -614,6 +673,10 @@ def any_extension_types(self) -> bool:
"""Whether any of the blocks in this manager are extension blocks"""
return False # any(block.is_extension for block in self.blocks)

# TODO
# unstack
# to_dict


class BlockManager(DataManager):
"""
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/frame/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ def test_to_numpy_dtype(self):

def test_to_numpy_copy(self):
arr = np.random.randn(4, 3)
df = pd.DataFrame(arr)
df = pd.DataFrame(arr, manager="block")
assert df.values.base is arr
assert df.to_numpy(copy=False).base is arr
assert df.to_numpy(copy=True).base is not arr
Expand Down Expand Up @@ -446,6 +446,7 @@ def test_with_datetimelikes(self):
expected = Series({np.dtype("object"): 10})
tm.assert_series_equal(result, expected)

@pytest.mark.skip
def test_values(self, float_frame):
float_frame.values[:, 0] = 5.0
assert (float_frame.values[:, 0] == 5).all()
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -846,7 +846,7 @@ def test_align_frame(self):

result = ts + ts[::2]
expected = ts + ts
expected.values[1::2] = np.nan
expected.iloc[1::2] = np.nan
tm.assert_frame_equal(result, expected)

half = ts[::2]
Expand Down

0 comments on commit a7880e9

Please sign in to comment.