Skip to content

Commit

Permalink
REFACTOR-#2467: Convert internal base dataframe objects to ABC (#2468)
Browse files Browse the repository at this point in the history
Signed-off-by: Devin Petersohn <devin.petersohn@gmail.com>
  • Loading branch information
devin-petersohn authored Nov 26, 2020
1 parent e5556b5 commit 0aada32
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 33 deletions.
9 changes: 4 additions & 5 deletions modin/engines/base/frame/axis_partition.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,12 @@
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

from abc import ABC
import pandas
from modin.data_management.utils import split_result_of_axis_func_pandas

NOT_IMPLMENTED_MESSAGE = "Must be implemented in child class"


class BaseFrameAxisPartition(object): # pragma: no cover
class BaseFrameAxisPartition(ABC): # pragma: no cover
"""An abstract class that represents the Parent class for any `ColumnPartition` or `RowPartition` class.
This class is intended to simplify the way that operations are performed.
Expand Down Expand Up @@ -73,7 +72,7 @@ def apply(
-------
A list of `BaseFramePartition` objects.
"""
raise NotImplementedError(NOT_IMPLMENTED_MESSAGE)
pass

def shuffle(self, func, lengths, **kwargs):
"""Shuffle the order of the data in this axis based on the `lengths`.
Expand All @@ -86,7 +85,7 @@ def shuffle(self, func, lengths, **kwargs):
-------
A list of RemotePartition objects split by `lengths`.
"""
raise NotImplementedError(NOT_IMPLMENTED_MESSAGE)
pass

# Child classes must have these in order to correctly subclass.
instance_type = None
Expand Down
62 changes: 35 additions & 27 deletions modin/engines/base/frame/partition.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,25 +11,18 @@
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

NOT_IMPLEMENTED_MESSAGE = "Must be implemented in child class"
from abc import ABC


class BaseFramePartition(object): # pragma: no cover
class BaseFramePartition(ABC): # pragma: no cover
"""An abstract class that holds the data and metadata for a single partition.
The methods required for implementing this abstract class are listed in
the section immediately following this.
The API exposed by the children of this object is used in
`BaseFrameManager`.
The public API exposed by the children of this object is used in `BaseFrameManager`.
Note: These objects are treated as immutable by `BaseFrameManager`
subclasses. There is no logic for updating inplace.
"""

# Abstract methods and fields. These must be implemented in order to
# properly subclass this object. There are also some abstract classmethods
# to implement.
def get(self):
"""Return the object wrapped by this one to the original format.
Expand All @@ -41,7 +34,7 @@ def get(self):
-------
The object that was `put`.
"""
raise NotImplementedError(NOT_IMPLEMENTED_MESSAGE)
pass

def apply(self, func, **kwargs):
"""Apply some callable function to the data in this partition.
Expand All @@ -50,26 +43,38 @@ def apply(self, func, **kwargs):
an important part of many implementations. As of right now, they
are not serialized.
Args:
func: The lambda to apply (may already be correctly formatted)
Args
----
func : callable
The function to apply.
Returns
-------
A new `BaseFramePartition` containing the object that has had `func`
applied to it.
"""
raise NotImplementedError(NOT_IMPLEMENTED_MESSAGE)
pass

def add_to_apply_calls(self, func, **kwargs):
"""Add the function to the apply function call stack.
This function will be executed when apply is called. It will be executed
Note: This function will be executed when apply is called. It will be executed
in the order inserted; apply's func operates the last and return
Args
----
func : callable
The function to apply.
Returns
-------
A new `BaseFramePartition` with the function added to the call queue.
"""
raise NotImplementedError(NOT_IMPLEMENTED_MESSAGE)
pass

def drain_call_queue(self):
"""Execute all functionality stored in the call queue."""
pass

def to_pandas(self):
"""Convert the object stored in this partition to a Pandas DataFrame.
Expand All @@ -81,7 +86,7 @@ def to_pandas(self):
-------
A Pandas DataFrame.
"""
raise NotImplementedError(NOT_IMPLEMENTED_MESSAGE)
pass

def to_numpy(self, **kwargs):
"""Convert the object stored in this partition to a NumPy array.
Expand All @@ -93,7 +98,7 @@ def to_numpy(self, **kwargs):
-------
A NumPy array.
"""
raise NotImplementedError(NOT_IMPLEMENTED_MESSAGE)
pass

def mask(self, row_indices, col_indices):
"""Lazily create a mask that extracts the indices provided.
Expand All @@ -106,21 +111,22 @@ def mask(self, row_indices, col_indices):
-------
A `BaseFramePartition` object.
"""
raise NotImplementedError(NOT_IMPLEMENTED_MESSAGE)
pass

@classmethod
def put(cls, obj):
"""Format a given object.
Parameters
----------
obj: An object.
obj: object
An object.
Returns
-------
A `BaseFramePartition` object.
"""
raise NotImplementedError(NOT_IMPLEMENTED_MESSAGE)
pass

@classmethod
def preprocess_func(cls, func):
Expand All @@ -131,14 +137,16 @@ def preprocess_func(cls, func):
deploy a preprocessed function to multiple `BaseFramePartition`
objects.
Args:
func: The function to preprocess.
Args
----
func : callable
The function to preprocess.
Returns
-------
An object that can be accepted by `apply`.
"""
raise NotImplementedError(NOT_IMPLEMENTED_MESSAGE)
pass

@classmethod
def length_extraction_fn(cls):
Expand All @@ -148,7 +156,7 @@ def length_extraction_fn(cls):
-------
A callable function.
"""
raise NotImplementedError(NOT_IMPLEMENTED_MESSAGE)
pass

@classmethod
def width_extraction_fn(cls):
Expand All @@ -158,7 +166,7 @@ def width_extraction_fn(cls):
-------
A callable function.
"""
raise NotImplementedError(NOT_IMPLEMENTED_MESSAGE)
pass

_length_cache = None
_width_cache = None
Expand Down Expand Up @@ -189,4 +197,4 @@ def empty(cls):
-------
An empty partition
"""
raise NotImplementedError(NOT_IMPLEMENTED_MESSAGE)
pass
3 changes: 2 additions & 1 deletion modin/engines/base/frame/partition_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

from abc import ABC
import numpy as np
import pandas

Expand All @@ -19,7 +20,7 @@
from pandas.api.types import union_categoricals


class BaseFrameManager(object):
class BaseFrameManager(ABC):
"""Partition class is the class to use for storing each partition. It must extend the `BaseFramePartition` class.
It is the base class for managing the dataframe data layout and operators.
Expand Down

0 comments on commit 0aada32

Please sign in to comment.