modin-project · devin-petersohn · Aug 27, 2020 · Aug 27, 2020
@@ -88,8 +88,7 @@ def concat(self, axis, other, **kwargs):
     # Data Management Methods
     @abc.abstractmethod
     def free(self):
-        """In the future, this will hopefully trigger a cleanup of this object.
-        """
+        """In the future, this will hopefully trigger a cleanup of this object."""
         # TODO create a way to clean up this object.
         pass
 

@@ -73,8 +73,10 @@ def single_worker_read(cls, fname, **kwargs):
         pandas_frame = cls.parse(fname, **kwargs)
         if isinstance(pandas_frame, pandas.io.parsers.TextFileReader):
             pd_read = pandas_frame.read
-            pandas_frame.read = lambda *args, **kwargs: cls.query_compiler_cls.from_pandas(
-                pd_read(*args, **kwargs), cls.frame_cls
+            pandas_frame.read = (
+                lambda *args, **kwargs: cls.query_compiler_cls.from_pandas(
+                    pd_read(*args, **kwargs), cls.frame_cls
+                )
             )
             return pandas_frame
         elif isinstance(pandas_frame, (OrderedDict, dict)):

@@ -144,7 +144,7 @@ def caller(df, *args, **kwargs):
 
 class PandasQueryCompiler(BaseQueryCompiler):
     """This class implements the logic necessary for operating on partitions
-        with a Pandas backend. This logic is specific to Pandas."""
+    with a Pandas backend. This logic is specific to Pandas."""
 
     def __init__(self, modin_frame):
         self._modin_frame = modin_frame
@@ -272,8 +272,7 @@ def concat(self, axis, other, **kwargs):
 
     # Data Management Methods
     def free(self):
-        """In the future, this will hopefully trigger a cleanup of this object.
-        """
+        """In the future, this will hopefully trigger a cleanup of this object."""
         # TODO create a way to clean up this object.
         return
 
@@ -1172,7 +1171,9 @@ def unique(self):
             The unique values returned as a NumPy array.
         """
         new_modin_frame = self._modin_frame._apply_full_axis(
-            0, lambda x: x.squeeze(axis=1).unique(), new_columns=self.columns,
+            0,
+            lambda x: x.squeeze(axis=1).unique(),
+            new_columns=self.columns,
         )
         return self.__constructor__(new_modin_frame)
 
@@ -1728,7 +1729,9 @@ def applyier(df, internal_indices, other=[], internal_other_indices=[]):
         )
         new_index = pandas.RangeIndex(len(self.index) * len(value_vars))
         new_modin_frame = self._modin_frame.__constructor__(
-            new_parts, index=new_index, columns=id_vars + [var_name, value_name],
+            new_parts,
+            index=new_index,
+            columns=id_vars + [var_name, value_name],
         )
         result = self.__constructor__(new_modin_frame)
         # this assigment needs to propagate correct indices into partitions
@@ -2267,11 +2270,19 @@ def sort_rows_by_column_values(self, columns, ascending=True, **kwargs):
         broadcast_values2 = broadcast_values2.reset_index(drop=True)
         # Index may contain duplicates
         new_index1 = broadcast_values1.sort_values(
-            by=columns, axis=0, ascending=ascending, kind=kind, na_position=na_position,
+            by=columns,
+            axis=0,
+            ascending=ascending,
+            kind=kind,
+            na_position=na_position,
         ).index
         # Index without duplicates
         new_index2 = broadcast_values2.sort_values(
-            by=columns, axis=0, ascending=ascending, kind=kind, na_position=na_position,
+            by=columns,
+            axis=0,
+            ascending=ascending,
+            kind=kind,
+            na_position=na_position,
         ).index
 
         result = self.reset_index(drop=True).reindex(0, new_index2)
@@ -2307,7 +2318,11 @@ def sort_columns_by_row_values(self, rows, ascending=True, **kwargs):
         )
         broadcast_values.columns = self.columns
         new_columns = broadcast_values.sort_values(
-            by=rows, axis=1, ascending=ascending, kind=kind, na_position=na_position,
+            by=rows,
+            axis=1,
+            ascending=ascending,
+            kind=kind,
+            na_position=na_position,
         ).columns
         return self.reindex(1, new_columns)
 

@@ -19,23 +19,23 @@
 
 class BaseFrameAxisPartition(object):  # pragma: no cover
     """This abstract class represents the Parent class for any
-        `ColumnPartition` or `RowPartition` class. This class is intended to
-        simplify the way that operations are performed
-
-        Note 0: The procedures that use this class and its methods assume that
-            they have some global knowledge about the entire axis. This may
-            require the implementation to use concatenation or append on the
-            list of block partitions in this object.
-
-        Note 1: The `BaseFrameManager` object that controls these objects
-            (through the API exposed here) has an invariant that requires that
-            this object is never returned from a function. It assumes that
-            there will always be `BaseFramePartition` object stored and structures
-            itself accordingly.
-
-        The abstract methods that need implemented are `apply` and `shuffle`.
-        The children classes must also implement `instance_type` and `partition_type`
-        (see below).
+    `ColumnPartition` or `RowPartition` class. This class is intended to
+    simplify the way that operations are performed
+
+    Note 0: The procedures that use this class and its methods assume that
+        they have some global knowledge about the entire axis. This may
+        require the implementation to use concatenation or append on the
+        list of block partitions in this object.
+
+    Note 1: The `BaseFrameManager` object that controls these objects
+        (through the API exposed here) has an invariant that requires that
+        this object is never returned from a function. It assumes that
+        there will always be `BaseFramePartition` object stored and structures
+        itself accordingly.
+
+    The abstract methods that need implemented are `apply` and `shuffle`.
+    The children classes must also implement `instance_type` and `partition_type`
+    (see below).
     """
 
     def apply(
@@ -99,15 +99,15 @@ def _wrap_partitions(self, partitions):
 
 class PandasFrameAxisPartition(BaseFrameAxisPartition):
     """This abstract class is created to simplify and consolidate the code for
-        AxisPartitions that run pandas. Because much of the code is similar, this allows
-        us to reuse this code.
+    AxisPartitions that run pandas. Because much of the code is similar, this allows
+    us to reuse this code.
 
-        Subclasses must implement `list_of_blocks` which unwraps the `RemotePartition`
-        objects and creates something interpretable as a pandas DataFrame.
+    Subclasses must implement `list_of_blocks` which unwraps the `RemotePartition`
+    objects and creates something interpretable as a pandas DataFrame.
 
-        See `modin.engines.ray.pandas_on_ray.axis_partition.PandasOnRayFrameAxisPartition`
-        for an example on how to override/use this class when the implementation needs
-        to be augmented.
+    See `modin.engines.ray.pandas_on_ray.axis_partition.PandasOnRayFrameAxisPartition`
+    for an example on how to override/use this class when the implementation needs
+    to be augmented.
     """
 
     def apply(
@@ -181,19 +181,19 @@ def deploy_axis_func(
     ):
         """Deploy a function along a full axis in Ray.
 
-            Args:
-                axis: The axis to perform the function along.
-                func: The function to perform.
-                num_splits: The number of splits to return
-                    (see `split_result_of_axis_func_pandas`)
-                kwargs: A dictionary of keyword arguments.
-                maintain_partitioning: If True, keep the old partitioning if possible.
-                    If False, create a new partition layout.
-                partitions: All partitions that make up the full axis (row or column)
-
-            Returns:
-                A list of Pandas DataFrames.
-            """
+        Args:
+            axis: The axis to perform the function along.
+            func: The function to perform.
+            num_splits: The number of splits to return
+                (see `split_result_of_axis_func_pandas`)
+            kwargs: A dictionary of keyword arguments.
+            maintain_partitioning: If True, keep the old partitioning if possible.
+                If False, create a new partition layout.
+            partitions: All partitions that make up the full axis (row or column)
+
+        Returns:
+            A list of Pandas DataFrames.
+        """
         # Pop these off first because they aren't expected by the function.
         manual_partition = kwargs.pop("manual_partition", False)
         lengths = kwargs.pop("_lengths", None)

@@ -288,7 +288,8 @@ def _validate_axis_equality(self, axis: int, force: bool = False):
                 self._set_axis(axis, new_axis, cache_only=not is_lenghts_matches)
             else:
                 self._set_axis(
-                    axis, self.axes[axis],
+                    axis,
+                    self.axes[axis],
                 )
 
     def _validate_internal_indices(self, mode=None, **kwargs):
@@ -1176,7 +1177,12 @@ def filter_full_axis(self, axis, func):
         )
 
     def _apply_full_axis(
-        self, axis, func, new_index=None, new_columns=None, dtypes=None,
+        self,
+        axis,
+        func,
+        new_index=None,
+        new_columns=None,
+        dtypes=None,
     ):
         """
         Perform a function across an entire axis.
@@ -1272,8 +1278,14 @@ def _apply_full_axis_select_indices(
         # Get the indices for the axis being applied to (it is the opposite of axis
         # being applied over)
         dict_indices = self._get_dict_of_block_index(axis ^ 1, numeric_indices)
-        new_partitions = self._frame_mgr_cls.apply_func_to_select_indices_along_full_axis(
-            axis, self._partitions, func, dict_indices, keep_remaining=keep_remaining
+        new_partitions = (
+            self._frame_mgr_cls.apply_func_to_select_indices_along_full_axis(
+                axis,
+                self._partitions,
+                func,
+                dict_indices,
+                keep_remaining=keep_remaining,
+            )
         )
         # TODO Infer columns and index from `keep_remaining` and `apply_indices`
         if new_index is None:

@@ -16,14 +16,14 @@
 
 class BaseFramePartition(object):  # pragma: no cover
     """This abstract class holds the data and metadata for a single partition.
-        The methods required for implementing this abstract class are listed in
-        the section immediately following this.
+    The methods required for implementing this abstract class are listed in
+    the section immediately following this.
 
-        The API exposed by the children of this object is used in
-        `BaseFrameManager`.
+    The API exposed by the children of this object is used in
+    `BaseFrameManager`.
 
-        Note: These objects are treated as immutable by `BaseFrameManager`
-        subclasses. There is no logic for updating inplace.
+    Note: These objects are treated as immutable by `BaseFrameManager`
+    subclasses. There is no logic for updating inplace.
     """
 
     # Abstract methods and fields. These must be implemented in order to

@@ -113,7 +113,12 @@ def build_query_compiler(cls, path, columns, **kwargs):
         dtypes = cls.build_dtypes(partition_ids[-1], columns)
         new_query_compiler = cls.query_compiler_cls(
             cls.frame_cls(
-                remote_parts, index, columns, row_lens, column_widths, dtypes=dtypes,
+                remote_parts,
+                index,
+                columns,
+                row_lens,
+                column_widths,
+                dtypes=dtypes,
             )
         )
         return new_query_compiler
@@ -79,17 +79,17 @@ def deploy_func_between_two_axis_partitions(
 
 class PandasOnDaskFrameColumnPartition(PandasOnDaskFrameAxisPartition):
     """The column partition implementation for Multiprocess. All of the implementation
-        for this class is in the parent class, and this class defines the axis
-        to perform the computation over.
+    for this class is in the parent class, and this class defines the axis
+    to perform the computation over.
     """
 
     axis = 0
 
 
 class PandasOnDaskFrameRowPartition(PandasOnDaskFrameAxisPartition):
     """The row partition implementation for Multiprocess. All of the implementation
-        for this class is in the parent class, and this class defines the axis
-        to perform the computation over.
+    for this class is in the parent class, and this class defines the axis
+    to perform the computation over.
     """
 
     axis = 1
@@ -30,14 +30,14 @@ def apply_list_of_funcs(funcs, df):
 
 class PandasOnDaskFramePartition(BaseFramePartition):
     """This abstract class holds the data and metadata for a single partition.
-        The methods required for implementing this abstract class are listed in
-        the section immediately following this.
+    The methods required for implementing this abstract class are listed in
+    the section immediately following this.
 
-        The API exposed by the children of this object is used in
-        `BaseFrameManager`.
+    The API exposed by the children of this object is used in
+    `BaseFrameManager`.
 
-        Note: These objects are treated as immutable by `BaseFrameManager`
-        subclasses. There is no logic for updating inplace.
+    Note: These objects are treated as immutable by `BaseFrameManager`
+    subclasses. There is no logic for updating inplace.
     """
 
     def __init__(self, future, length=None, width=None, call_queue=None):

@@ -30,17 +30,17 @@ def __init__(self, list_of_blocks):
 
 class PandasOnPythonFrameColumnPartition(PandasOnPythonFrameAxisPartition):
     """The column partition implementation for Ray. All of the implementation
-        for this class is in the parent class, and this class defines the axis
-        to perform the computation over.
+    for this class is in the parent class, and this class defines the axis
+    to perform the computation over.
     """
 
     axis = 0
 
 
 class PandasOnPythonFrameRowPartition(PandasOnPythonFrameAxisPartition):
     """The row partition implementation for Ray. All of the implementation
-        for this class is in the parent class, and this class defines the axis
-        to perform the computation over.
+    for this class is in the parent class, and this class defines the axis
+    to perform the computation over.
     """
 
     axis = 1
@@ -19,14 +19,14 @@
 
 class PandasOnPythonFramePartition(BaseFramePartition):
     """This abstract class holds the data and metadata for a single partition.
-        The methods required for implementing this abstract class are listed in
-        the section immediately following this.
+    The methods required for implementing this abstract class are listed in
+    the section immediately following this.
 
-        The API exposed by the children of this object is used in
-        `BaseFrameManager`.
+    The API exposed by the children of this object is used in
+    `BaseFrameManager`.
 
-        Note: These objects are treated as immutable by `BaseFrameManager`
-        subclasses. There is no logic for updating inplace.
+    Note: These objects are treated as immutable by `BaseFrameManager`
+    subclasses. There is no logic for updating inplace.
     """
 
     def __init__(self, data, length=None, width=None, call_queue=None):

@@ -72,17 +72,17 @@ def _wrap_partitions(self, partitions):
 
 class PandasOnRayFrameColumnPartition(PandasOnRayFrameAxisPartition):
     """The column partition implementation for Ray. All of the implementation
-        for this class is in the parent class, and this class defines the axis
-        to perform the computation over.
+    for this class is in the parent class, and this class defines the axis
+    to perform the computation over.
     """
 
     axis = 0
 
 
 class PandasOnRayFrameRowPartition(PandasOnRayFrameAxisPartition):
     """The row partition implementation for Ray. All of the implementation
-        for this class is in the parent class, and this class defines the axis
-        to perform the computation over.
+    for this class is in the parent class, and this class defines the axis
+    to perform the computation over.
     """
 
     axis = 1

@@ -614,7 +614,9 @@ def make_dataframe_groupby_wrapper(DataFrameGroupBy):
     Look for deatils in make_dataframe_wrapper() and _deliveringWrapper().
     """
     DeliveringDataFrameGroupBy = _deliveringWrapper(
-        DataFrameGroupBy, ["agg", "aggregate", "apply"], target_name="DataFrameGroupBy",
+        DataFrameGroupBy,
+        ["agg", "aggregate", "apply"],
+        target_name="DataFrameGroupBy",
     )
     return DeliveringDataFrameGroupBy
 

@@ -70,7 +70,7 @@ def read_sql(
         upper_bound=None,
         max_sessions=None,
     ):
-        """ Read SQL query or database table into a DataFrame.
+        """Read SQL query or database table into a DataFrame.
 
         Args:
             sql: string or SQLAlchemy Selectable (select or text object) SQL query to be executed or a table name.