Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REFACTOR-#1973: Refactor code in accordance with formatting style of new released version of black #1974

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions modin/backends/base/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,7 @@ def concat(self, axis, other, **kwargs):
# Data Management Methods
@abc.abstractmethod
def free(self):
"""In the future, this will hopefully trigger a cleanup of this object.
"""
"""In the future, this will hopefully trigger a cleanup of this object."""
# TODO create a way to clean up this object.
pass

Expand Down
6 changes: 4 additions & 2 deletions modin/backends/pandas/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,10 @@ def single_worker_read(cls, fname, **kwargs):
pandas_frame = cls.parse(fname, **kwargs)
if isinstance(pandas_frame, pandas.io.parsers.TextFileReader):
pd_read = pandas_frame.read
pandas_frame.read = lambda *args, **kwargs: cls.query_compiler_cls.from_pandas(
pd_read(*args, **kwargs), cls.frame_cls
pandas_frame.read = (
lambda *args, **kwargs: cls.query_compiler_cls.from_pandas(
pd_read(*args, **kwargs), cls.frame_cls
)
)
return pandas_frame
elif isinstance(pandas_frame, (OrderedDict, dict)):
Expand Down
31 changes: 23 additions & 8 deletions modin/backends/pandas/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def caller(df, *args, **kwargs):

class PandasQueryCompiler(BaseQueryCompiler):
"""This class implements the logic necessary for operating on partitions
with a Pandas backend. This logic is specific to Pandas."""
with a Pandas backend. This logic is specific to Pandas."""

def __init__(self, modin_frame):
self._modin_frame = modin_frame
Expand Down Expand Up @@ -272,8 +272,7 @@ def concat(self, axis, other, **kwargs):

# Data Management Methods
def free(self):
"""In the future, this will hopefully trigger a cleanup of this object.
"""
"""In the future, this will hopefully trigger a cleanup of this object."""
# TODO create a way to clean up this object.
return

Expand Down Expand Up @@ -1172,7 +1171,9 @@ def unique(self):
The unique values returned as a NumPy array.
"""
new_modin_frame = self._modin_frame._apply_full_axis(
0, lambda x: x.squeeze(axis=1).unique(), new_columns=self.columns,
0,
lambda x: x.squeeze(axis=1).unique(),
new_columns=self.columns,
)
return self.__constructor__(new_modin_frame)

Expand Down Expand Up @@ -1728,7 +1729,9 @@ def applyier(df, internal_indices, other=[], internal_other_indices=[]):
)
new_index = pandas.RangeIndex(len(self.index) * len(value_vars))
new_modin_frame = self._modin_frame.__constructor__(
new_parts, index=new_index, columns=id_vars + [var_name, value_name],
new_parts,
index=new_index,
columns=id_vars + [var_name, value_name],
)
result = self.__constructor__(new_modin_frame)
# this assigment needs to propagate correct indices into partitions
Expand Down Expand Up @@ -2267,11 +2270,19 @@ def sort_rows_by_column_values(self, columns, ascending=True, **kwargs):
broadcast_values2 = broadcast_values2.reset_index(drop=True)
# Index may contain duplicates
new_index1 = broadcast_values1.sort_values(
by=columns, axis=0, ascending=ascending, kind=kind, na_position=na_position,
by=columns,
axis=0,
ascending=ascending,
kind=kind,
na_position=na_position,
).index
# Index without duplicates
new_index2 = broadcast_values2.sort_values(
by=columns, axis=0, ascending=ascending, kind=kind, na_position=na_position,
by=columns,
axis=0,
ascending=ascending,
kind=kind,
na_position=na_position,
).index

result = self.reset_index(drop=True).reindex(0, new_index2)
Expand Down Expand Up @@ -2307,7 +2318,11 @@ def sort_columns_by_row_values(self, rows, ascending=True, **kwargs):
)
broadcast_values.columns = self.columns
new_columns = broadcast_values.sort_values(
by=rows, axis=1, ascending=ascending, kind=kind, na_position=na_position,
by=rows,
axis=1,
ascending=ascending,
kind=kind,
na_position=na_position,
).columns
return self.reindex(1, new_columns)

Expand Down
74 changes: 37 additions & 37 deletions modin/engines/base/frame/axis_partition.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,23 +19,23 @@

class BaseFrameAxisPartition(object): # pragma: no cover
"""This abstract class represents the Parent class for any
`ColumnPartition` or `RowPartition` class. This class is intended to
simplify the way that operations are performed

Note 0: The procedures that use this class and its methods assume that
they have some global knowledge about the entire axis. This may
require the implementation to use concatenation or append on the
list of block partitions in this object.

Note 1: The `BaseFrameManager` object that controls these objects
(through the API exposed here) has an invariant that requires that
this object is never returned from a function. It assumes that
there will always be `BaseFramePartition` object stored and structures
itself accordingly.

The abstract methods that need implemented are `apply` and `shuffle`.
The children classes must also implement `instance_type` and `partition_type`
(see below).
`ColumnPartition` or `RowPartition` class. This class is intended to
simplify the way that operations are performed

Note 0: The procedures that use this class and its methods assume that
they have some global knowledge about the entire axis. This may
require the implementation to use concatenation or append on the
list of block partitions in this object.

Note 1: The `BaseFrameManager` object that controls these objects
(through the API exposed here) has an invariant that requires that
this object is never returned from a function. It assumes that
there will always be `BaseFramePartition` object stored and structures
itself accordingly.

The abstract methods that need implemented are `apply` and `shuffle`.
The children classes must also implement `instance_type` and `partition_type`
(see below).
"""

def apply(
Expand Down Expand Up @@ -99,15 +99,15 @@ def _wrap_partitions(self, partitions):

class PandasFrameAxisPartition(BaseFrameAxisPartition):
"""This abstract class is created to simplify and consolidate the code for
AxisPartitions that run pandas. Because much of the code is similar, this allows
us to reuse this code.
AxisPartitions that run pandas. Because much of the code is similar, this allows
us to reuse this code.

Subclasses must implement `list_of_blocks` which unwraps the `RemotePartition`
objects and creates something interpretable as a pandas DataFrame.
Subclasses must implement `list_of_blocks` which unwraps the `RemotePartition`
objects and creates something interpretable as a pandas DataFrame.

See `modin.engines.ray.pandas_on_ray.axis_partition.PandasOnRayFrameAxisPartition`
for an example on how to override/use this class when the implementation needs
to be augmented.
See `modin.engines.ray.pandas_on_ray.axis_partition.PandasOnRayFrameAxisPartition`
for an example on how to override/use this class when the implementation needs
to be augmented.
"""

def apply(
Expand Down Expand Up @@ -181,19 +181,19 @@ def deploy_axis_func(
):
"""Deploy a function along a full axis in Ray.

Args:
axis: The axis to perform the function along.
func: The function to perform.
num_splits: The number of splits to return
(see `split_result_of_axis_func_pandas`)
kwargs: A dictionary of keyword arguments.
maintain_partitioning: If True, keep the old partitioning if possible.
If False, create a new partition layout.
partitions: All partitions that make up the full axis (row or column)

Returns:
A list of Pandas DataFrames.
"""
Args:
axis: The axis to perform the function along.
func: The function to perform.
num_splits: The number of splits to return
(see `split_result_of_axis_func_pandas`)
kwargs: A dictionary of keyword arguments.
maintain_partitioning: If True, keep the old partitioning if possible.
If False, create a new partition layout.
partitions: All partitions that make up the full axis (row or column)

Returns:
A list of Pandas DataFrames.
"""
# Pop these off first because they aren't expected by the function.
manual_partition = kwargs.pop("manual_partition", False)
lengths = kwargs.pop("_lengths", None)
Expand Down
20 changes: 16 additions & 4 deletions modin/engines/base/frame/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,8 @@ def _validate_axis_equality(self, axis: int, force: bool = False):
self._set_axis(axis, new_axis, cache_only=not is_lenghts_matches)
else:
self._set_axis(
axis, self.axes[axis],
axis,
self.axes[axis],
)

def _validate_internal_indices(self, mode=None, **kwargs):
Expand Down Expand Up @@ -1176,7 +1177,12 @@ def filter_full_axis(self, axis, func):
)

def _apply_full_axis(
self, axis, func, new_index=None, new_columns=None, dtypes=None,
self,
axis,
func,
new_index=None,
new_columns=None,
dtypes=None,
):
"""
Perform a function across an entire axis.
Expand Down Expand Up @@ -1272,8 +1278,14 @@ def _apply_full_axis_select_indices(
# Get the indices for the axis being applied to (it is the opposite of axis
# being applied over)
dict_indices = self._get_dict_of_block_index(axis ^ 1, numeric_indices)
new_partitions = self._frame_mgr_cls.apply_func_to_select_indices_along_full_axis(
axis, self._partitions, func, dict_indices, keep_remaining=keep_remaining
new_partitions = (
self._frame_mgr_cls.apply_func_to_select_indices_along_full_axis(
axis,
self._partitions,
func,
dict_indices,
keep_remaining=keep_remaining,
)
)
# TODO Infer columns and index from `keep_remaining` and `apply_indices`
if new_index is None:
Expand Down
12 changes: 6 additions & 6 deletions modin/engines/base/frame/partition.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,14 @@

class BaseFramePartition(object): # pragma: no cover
"""This abstract class holds the data and metadata for a single partition.
The methods required for implementing this abstract class are listed in
the section immediately following this.
The methods required for implementing this abstract class are listed in
the section immediately following this.

The API exposed by the children of this object is used in
`BaseFrameManager`.
The API exposed by the children of this object is used in
`BaseFrameManager`.

Note: These objects are treated as immutable by `BaseFrameManager`
subclasses. There is no logic for updating inplace.
Note: These objects are treated as immutable by `BaseFrameManager`
subclasses. There is no logic for updating inplace.
"""

# Abstract methods and fields. These must be implemented in order to
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,12 @@ def build_query_compiler(cls, path, columns, **kwargs):
dtypes = cls.build_dtypes(partition_ids[-1], columns)
new_query_compiler = cls.query_compiler_cls(
cls.frame_cls(
remote_parts, index, columns, row_lens, column_widths, dtypes=dtypes,
remote_parts,
index,
columns,
row_lens,
column_widths,
dtypes=dtypes,
)
)
return new_query_compiler
8 changes: 4 additions & 4 deletions modin/engines/dask/pandas_on_dask/frame/axis_partition.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,17 +79,17 @@ def deploy_func_between_two_axis_partitions(

class PandasOnDaskFrameColumnPartition(PandasOnDaskFrameAxisPartition):
"""The column partition implementation for Multiprocess. All of the implementation
for this class is in the parent class, and this class defines the axis
to perform the computation over.
for this class is in the parent class, and this class defines the axis
to perform the computation over.
"""

axis = 0


class PandasOnDaskFrameRowPartition(PandasOnDaskFrameAxisPartition):
"""The row partition implementation for Multiprocess. All of the implementation
for this class is in the parent class, and this class defines the axis
to perform the computation over.
for this class is in the parent class, and this class defines the axis
to perform the computation over.
"""

axis = 1
12 changes: 6 additions & 6 deletions modin/engines/dask/pandas_on_dask/frame/partition.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,14 @@ def apply_list_of_funcs(funcs, df):

class PandasOnDaskFramePartition(BaseFramePartition):
"""This abstract class holds the data and metadata for a single partition.
The methods required for implementing this abstract class are listed in
the section immediately following this.
The methods required for implementing this abstract class are listed in
the section immediately following this.

The API exposed by the children of this object is used in
`BaseFrameManager`.
The API exposed by the children of this object is used in
`BaseFrameManager`.

Note: These objects are treated as immutable by `BaseFrameManager`
subclasses. There is no logic for updating inplace.
Note: These objects are treated as immutable by `BaseFrameManager`
subclasses. There is no logic for updating inplace.
"""

def __init__(self, future, length=None, width=None, call_queue=None):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,17 @@ def __init__(self, list_of_blocks):

class PandasOnPythonFrameColumnPartition(PandasOnPythonFrameAxisPartition):
"""The column partition implementation for Ray. All of the implementation
for this class is in the parent class, and this class defines the axis
to perform the computation over.
for this class is in the parent class, and this class defines the axis
to perform the computation over.
"""

axis = 0


class PandasOnPythonFrameRowPartition(PandasOnPythonFrameAxisPartition):
"""The row partition implementation for Ray. All of the implementation
for this class is in the parent class, and this class defines the axis
to perform the computation over.
for this class is in the parent class, and this class defines the axis
to perform the computation over.
"""

axis = 1
12 changes: 6 additions & 6 deletions modin/engines/python/pandas_on_python/frame/partition.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,14 @@

class PandasOnPythonFramePartition(BaseFramePartition):
"""This abstract class holds the data and metadata for a single partition.
The methods required for implementing this abstract class are listed in
the section immediately following this.
The methods required for implementing this abstract class are listed in
the section immediately following this.

The API exposed by the children of this object is used in
`BaseFrameManager`.
The API exposed by the children of this object is used in
`BaseFrameManager`.

Note: These objects are treated as immutable by `BaseFrameManager`
subclasses. There is no logic for updating inplace.
Note: These objects are treated as immutable by `BaseFrameManager`
subclasses. There is no logic for updating inplace.
"""

def __init__(self, data, length=None, width=None, call_queue=None):
Expand Down
8 changes: 4 additions & 4 deletions modin/engines/ray/pandas_on_ray/frame/axis_partition.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,17 +72,17 @@ def _wrap_partitions(self, partitions):

class PandasOnRayFrameColumnPartition(PandasOnRayFrameAxisPartition):
"""The column partition implementation for Ray. All of the implementation
for this class is in the parent class, and this class defines the axis
to perform the computation over.
for this class is in the parent class, and this class defines the axis
to perform the computation over.
"""

axis = 0


class PandasOnRayFrameRowPartition(PandasOnRayFrameAxisPartition):
"""The row partition implementation for Ray. All of the implementation
for this class is in the parent class, and this class defines the axis
to perform the computation over.
for this class is in the parent class, and this class defines the axis
to perform the computation over.
"""

axis = 1
Expand Down
4 changes: 3 additions & 1 deletion modin/experimental/cloud/rpyc_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,7 +614,9 @@ def make_dataframe_groupby_wrapper(DataFrameGroupBy):
Look for deatils in make_dataframe_wrapper() and _deliveringWrapper().
"""
DeliveringDataFrameGroupBy = _deliveringWrapper(
DataFrameGroupBy, ["agg", "aggregate", "apply"], target_name="DataFrameGroupBy",
DataFrameGroupBy,
["agg", "aggregate", "apply"],
target_name="DataFrameGroupBy",
)
return DeliveringDataFrameGroupBy

Expand Down
2 changes: 1 addition & 1 deletion modin/experimental/engines/pandas_on_ray/io_exp.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def read_sql(
upper_bound=None,
max_sessions=None,
):
""" Read SQL query or database table into a DataFrame.
"""Read SQL query or database table into a DataFrame.

Args:
sql: string or SQLAlchemy Selectable (select or text object) SQL query to be executed or a table name.
Expand Down
Loading