Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REFACTOR-#2467: Convert internal base dataframe objects to ABC #2468

Merged
merged 2 commits into from
Nov 26, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions modin/engines/base/frame/axis_partition.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,12 @@
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

from abc import ABC
import pandas
from modin.data_management.utils import split_result_of_axis_func_pandas

NOT_IMPLMENTED_MESSAGE = "Must be implemented in child class"


class BaseFrameAxisPartition(object): # pragma: no cover
class BaseFrameAxisPartition(ABC): # pragma: no cover
"""An abstract class that represents the Parent class for any `ColumnPartition` or `RowPartition` class.

This class is intended to simplify the way that operations are performed.
Expand Down Expand Up @@ -73,7 +72,7 @@ def apply(
-------
A list of `BaseFramePartition` objects.
"""
raise NotImplementedError(NOT_IMPLMENTED_MESSAGE)
pass

def shuffle(self, func, lengths, **kwargs):
"""Shuffle the order of the data in this axis based on the `lengths`.
Expand All @@ -86,7 +85,7 @@ def shuffle(self, func, lengths, **kwargs):
-------
A list of RemotePartition objects split by `lengths`.
"""
raise NotImplementedError(NOT_IMPLMENTED_MESSAGE)
pass

# Child classes must have these in order to correctly subclass.
instance_type = None
Expand Down
62 changes: 35 additions & 27 deletions modin/engines/base/frame/partition.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,25 +11,18 @@
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

NOT_IMPLEMENTED_MESSAGE = "Must be implemented in child class"
from abc import ABC


class BaseFramePartition(object): # pragma: no cover
class BaseFramePartition(ABC): # pragma: no cover
"""An abstract class that holds the data and metadata for a single partition.

The methods required for implementing this abstract class are listed in
the section immediately following this.

The API exposed by the children of this object is used in
`BaseFrameManager`.
The public API exposed by the children of this object is used in `BaseFrameManager`.

Note: These objects are treated as immutable by `BaseFrameManager`
subclasses. There is no logic for updating inplace.
"""

# Abstract methods and fields. These must be implemented in order to
# properly subclass this object. There are also some abstract classmethods
# to implement.
def get(self):
"""Return the object wrapped by this one to the original format.

Expand All @@ -41,7 +34,7 @@ def get(self):
-------
The object that was `put`.
"""
raise NotImplementedError(NOT_IMPLEMENTED_MESSAGE)
pass

def apply(self, func, **kwargs):
"""Apply some callable function to the data in this partition.
Expand All @@ -50,26 +43,38 @@ def apply(self, func, **kwargs):
an important part of many implementations. As of right now, they
are not serialized.

Args:
func: The lambda to apply (may already be correctly formatted)
Args
----
func : callable
The function to apply.

Returns
-------
A new `BaseFramePartition` containing the object that has had `func`
applied to it.
"""
raise NotImplementedError(NOT_IMPLEMENTED_MESSAGE)
pass

def add_to_apply_calls(self, func, **kwargs):
"""Add the function to the apply function call stack.

This function will be executed when apply is called. It will be executed
Note: This function will be executed when apply is called. It will be executed
in the order inserted; apply's func operates the last and return

Args
----
func : callable
The function to apply.

Returns
-------
A new `BaseFramePartition` with the function added to the call queue.
"""
raise NotImplementedError(NOT_IMPLEMENTED_MESSAGE)
pass

def drain_call_queue(self):
"""Execute all functionality stored in the call queue."""
pass

def to_pandas(self):
"""Convert the object stored in this partition to a Pandas DataFrame.
Expand All @@ -81,7 +86,7 @@ def to_pandas(self):
-------
A Pandas DataFrame.
"""
raise NotImplementedError(NOT_IMPLEMENTED_MESSAGE)
pass

def to_numpy(self, **kwargs):
"""Convert the object stored in this partition to a NumPy array.
Expand All @@ -93,7 +98,7 @@ def to_numpy(self, **kwargs):
-------
A NumPy array.
"""
raise NotImplementedError(NOT_IMPLEMENTED_MESSAGE)
pass

def mask(self, row_indices, col_indices):
"""Lazily create a mask that extracts the indices provided.
Expand All @@ -106,21 +111,22 @@ def mask(self, row_indices, col_indices):
-------
A `BaseFramePartition` object.
"""
raise NotImplementedError(NOT_IMPLEMENTED_MESSAGE)
pass

@classmethod
def put(cls, obj):
"""Format a given object.

Parameters
----------
obj: An object.
obj: object
An object.

Returns
-------
A `BaseFramePartition` object.
"""
raise NotImplementedError(NOT_IMPLEMENTED_MESSAGE)
pass

@classmethod
def preprocess_func(cls, func):
Expand All @@ -131,14 +137,16 @@ def preprocess_func(cls, func):
deploy a preprocessed function to multiple `BaseFramePartition`
objects.

Args:
func: The function to preprocess.
Args
----
func : callable
The function to preprocess.

Returns
-------
An object that can be accepted by `apply`.
"""
raise NotImplementedError(NOT_IMPLEMENTED_MESSAGE)
pass

@classmethod
def length_extraction_fn(cls):
Expand All @@ -148,7 +156,7 @@ def length_extraction_fn(cls):
-------
A callable function.
"""
raise NotImplementedError(NOT_IMPLEMENTED_MESSAGE)
pass

@classmethod
def width_extraction_fn(cls):
Expand All @@ -158,7 +166,7 @@ def width_extraction_fn(cls):
-------
A callable function.
"""
raise NotImplementedError(NOT_IMPLEMENTED_MESSAGE)
pass

_length_cache = None
_width_cache = None
Expand Down Expand Up @@ -189,4 +197,4 @@ def empty(cls):
-------
An empty partition
"""
raise NotImplementedError(NOT_IMPLEMENTED_MESSAGE)
pass
3 changes: 2 additions & 1 deletion modin/engines/base/frame/partition_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

from abc import ABC
import numpy as np
import pandas

Expand All @@ -19,7 +20,7 @@
from pandas.api.types import union_categoricals


class BaseFrameManager(object):
class BaseFrameManager(ABC):
"""Partition class is the class to use for storing each partition. It must extend the `BaseFramePartition` class.

It is the base class for managing the dataframe data layout and operators.
Expand Down