[Data] Implement Operators for union() (#36242)

Implement the `LogicalOperator` and `PhysicalOperator` for `Dataset.union()`, and make `union()` lazy. This PR also introduces `Nary` and `NaryOperator` Logical/Physical Operators to support abstraction for `Union` and `Zip` operators. --------- Signed-off-by: Scott Lee <sjl@anyscale.com>
ray-project · Jun 19, 2023 · d207361 · d207361
1 parent 31ab8a7
commit d207361
Show file tree

Hide file tree

Showing 25 changed files with 461 additions and 83 deletions.
diff --git a/python/ray/data/_internal/execution/bulk_executor.py b/python/ray/data/_internal/execution/bulk_executor.py
@@ -62,7 +62,7 @@ def execute_recursive(op: PhysicalOperator) -> List[RefBundle]:
                 for i, ref_bundles in enumerate(inputs):
                     for r in ref_bundles:
                         op.add_input(r, input_index=i)
-                op.inputs_done()
+                op.all_inputs_done()
                 output = _naive_run_until_complete(op)
             finally:
                 op.shutdown()

diff --git a/python/ray/data/_internal/execution/interfaces.py b/python/ray/data/_internal/execution/interfaces.py
@@ -396,7 +396,15 @@ def add_input(self, refs: RefBundle, input_index: int) -> None:
         """
         raise NotImplementedError
 
-    def inputs_done(self) -> None:
+    def input_done(self, input_index: int) -> None:
+        """Called when the upstream operator at index `input_index` has completed().
+
+        After this is called, the executor guarantees that no more inputs will be added
+        via `add_input` for the given input index.
+        """
+        pass
+
+    def all_inputs_done(self) -> None:
         """Called when all upstream operators have completed().
 
         After this is called, the executor guarantees that no more inputs will be added

diff --git a/python/ray/data/_internal/execution/legacy_compat.py b/python/ray/data/_internal/execution/legacy_compat.py
@@ -14,10 +14,12 @@
     RefBundle,
     TaskContext,
 )
-from ray.data._internal.execution.operators.all_to_all_operator import AllToAllOperator
+from ray.data._internal.execution.operators.base_physical_operator import (
+    AllToAllOperator,
+)
 from ray.data._internal.execution.operators.input_data_buffer import InputDataBuffer
+from ray.data._internal.execution.operators.limit_operator import LimitOperator
 from ray.data._internal.execution.operators.map_operator import MapOperator
-from ray.data._internal.execution.operators.one_to_one_operator import LimitOperator
 from ray.data._internal.execution.util import make_callable_class_concurrent
 from ray.data._internal.lazy_block_list import LazyBlockList
 from ray.data._internal.logical.optimizers import get_execution_plan

diff --git a/python/ray/data/_internal/execution/operators/actor_pool_map_operator.py b/python/ray/data/_internal/execution/operators/actor_pool_map_operator.py
@@ -230,10 +230,10 @@ def notify_work_completed(
         # For either a completed task or ready worker, we try to dispatch queued tasks.
         self._dispatch_tasks()
 
-    def inputs_done(self):
+    def all_inputs_done(self):
         # Call base implementation to handle any leftover bundles. This may or may not
         # trigger task dispatch.
-        super().inputs_done()
+        super().all_inputs_done()
 
         # Mark inputs as done so future task dispatch will kill all inactive workers
         # once the bundle queue is exhausted.

diff --git a/...xecution/operators/all_to_all_operator.py → ...ution/operators/base_physical_operator.py b/...xecution/operators/all_to_all_operator.py → ...ution/operators/base_physical_operator.py
@@ -6,10 +6,34 @@
     RefBundle,
     TaskContext,
 )
+from ray.data._internal.logical.interfaces import LogicalOperator
 from ray.data._internal.progress_bar import ProgressBar
 from ray.data._internal.stats import StatsDict
 
 
+class OneToOneOperator(PhysicalOperator):
+    """An operator that has one input and one output dependency.
+
+    This operator serves as the base for map, filter, limit, etc.
+    """
+
+    def __init__(
+        self,
+        name: str,
+        input_op: PhysicalOperator,
+    ):
+        """Create a OneToOneOperator.
+        Args:
+            input_op: Operator generating input data for this op.
+            name: The name of this operator.
+        """
+        super().__init__(name, [input_op])
+
+    @property
+    def input_dependency(self) -> PhysicalOperator:
+        return self.input_dependencies[0]
+
+
 class AllToAllOperator(PhysicalOperator):
     """A blocking operator that executes once its inputs are complete.
 
@@ -25,7 +49,6 @@ def __init__(
         name: str = "AllToAll",
     ):
         """Create an AllToAllOperator.
-
         Args:
             bulk_fn: The blocking transformation function to run. The inputs are the
                 list of input ref bundles, and the outputs are the output ref bundles
@@ -57,15 +80,15 @@ def add_input(self, refs: RefBundle, input_index: int) -> None:
         assert input_index == 0, input_index
         self._input_buffer.append(refs)
 
-    def inputs_done(self) -> None:
+    def all_inputs_done(self) -> None:
         ctx = TaskContext(
             task_idx=self._next_task_index,
             sub_progress_bar_dict=self._sub_progress_bar_dict,
         )
         self._output_buffer, self._stats = self._bulk_fn(self._input_buffer, ctx)
         self._next_task_index += 1
         self._input_buffer.clear()
-        super().inputs_done()
+        super().all_inputs_done()
 
     def has_next(self) -> bool:
         return len(self._output_buffer) > 0
@@ -102,3 +125,23 @@ def close_sub_progress_bars(self):
         if self._sub_progress_bar_dict is not None:
             for sub_bar in self._sub_progress_bar_dict.values():
                 sub_bar.close()
+
+
+class NAryOperator(PhysicalOperator):
+    """An operator that has multiple input dependencies and one output.
+
+    This operator serves as the base for union, zip, etc.
+    """
+
+    def __init__(
+        self,
+        *input_ops: LogicalOperator,
+    ):
+        """Create a OneToOneOperator.
+        Args:
+            input_op: Operator generating input data for this op.
+            name: The name of this operator.
+        """
+        input_names = ", ".join([op._name for op in input_ops])
+        op_name = f"{self.__class__.__name__}({input_names})"
+        super().__init__(op_name, list(input_ops))
diff --git a/...xecution/operators/one_to_one_operator.py → ...nal/execution/operators/limit_operator.py b/...xecution/operators/one_to_one_operator.py → ...nal/execution/operators/limit_operator.py
@@ -4,35 +4,15 @@
 
 import ray
 from ray.data._internal.execution.interfaces import PhysicalOperator, RefBundle
+from ray.data._internal.execution.operators.base_physical_operator import (
+    OneToOneOperator,
+)
 from ray.data._internal.remote_fn import cached_remote_fn
 from ray.data._internal.stats import StatsDict
 from ray.data.block import Block, BlockAccessor, BlockMetadata
 from ray.types import ObjectRef
 
 
-class OneToOneOperator(PhysicalOperator):
-    """An operator that has one input and one output dependency.
-    This operator serves as the base for map, filter, limit, etc.
-    """
-
-    def __init__(
-        self,
-        name: str,
-        input_op: PhysicalOperator,
-    ):
-        """Create a OneToOneOperator.
-
-        Args:
-            input_op: Operator generating input data for this op.
-            name: The name of this operator.
-        """
-        super().__init__(name, [input_op])
-
-    @property
-    def input_dependency(self) -> PhysicalOperator:
-        return self.input_dependencies[0]
-
-
 class LimitOperator(OneToOneOperator):
     """Physical operator for limit."""
 
@@ -49,7 +29,7 @@ def __init__(
         self._cur_output_bundles = 0
         super().__init__(self._name, input_op)
         if self._limit <= 0:
-            self.inputs_done()
+            self.all_inputs_done()
 
     def _limit_reached(self) -> bool:
         return self._consumed_rows >= self._limit
@@ -99,7 +79,7 @@ def slice_fn(block, metadata, num_rows) -> Tuple[Block, BlockMetadata]:
         )
         self._buffer.append(out_refs)
         if self._limit_reached():
-            self.inputs_done()
+            self.all_inputs_done()
 
     def has_next(self) -> bool:
         return len(self._buffer) > 0

diff --git a/python/ray/data/_internal/execution/operators/map_operator.py b/python/ray/data/_internal/execution/operators/map_operator.py
@@ -19,7 +19,9 @@
     RefBundle,
     TaskContext,
 )
-from ray.data._internal.execution.operators.one_to_one_operator import OneToOneOperator
+from ray.data._internal.execution.operators.base_physical_operator import (
+    OneToOneOperator,
+)
 from ray.data._internal.memory_tracing import trace_allocation
 from ray.data._internal.stats import StatsDict
 from ray.data.block import Block, BlockAccessor, BlockExecStats, BlockMetadata
@@ -280,13 +282,13 @@ def _handle_task_done(self, task: "_TaskState"):
         if self._metrics.cur > self._metrics.peak:
             self._metrics.peak = self._metrics.cur
 
-    def inputs_done(self):
+    def all_inputs_done(self):
         self._block_ref_bundler.done_adding_bundles()
         if self._block_ref_bundler.has_bundle():
             # Handle any leftover bundles in the bundler.
             bundle = self._block_ref_bundler.get_next_bundle()
             self._add_bundled_input(bundle)
-        super().inputs_done()
+        super().all_inputs_done()
 
     def has_next(self) -> bool:
         assert self._started

diff --git a/python/ray/data/_internal/execution/operators/output_splitter.py b/python/ray/data/_internal/execution/operators/output_splitter.py
@@ -101,8 +101,8 @@ def add_input(self, bundle, input_index) -> None:
         self._buffer.append(bundle)
         self._dispatch_bundles()
 
-    def inputs_done(self) -> None:
-        super().inputs_done()
+    def all_inputs_done(self) -> None:
+        super().all_inputs_done()
         if not self._equal:
             self._dispatch_bundles(dispatch_all=True)
             assert not self._buffer, "Should have dispatched all bundles."

diff --git a/python/ray/data/_internal/execution/operators/union_operator.py b/python/ray/data/_internal/execution/operators/union_operator.py
@@ -0,0 +1,107 @@
+from typing import List, Optional
+
+from ray.data._internal.execution.interfaces import (
+    ExecutionOptions,
+    PhysicalOperator,
+    RefBundle,
+)
+from ray.data._internal.execution.operators.base_physical_operator import NAryOperator
+from ray.data._internal.stats import StatsDict
+
+
+class UnionOperator(NAryOperator):
+    """An operator that combines output blocks from
+    two or more input operators into a single output."""
+
+    def __init__(
+        self,
+        *input_ops: PhysicalOperator,
+    ):
+        """Create a UnionOperator.
+
+        Args:
+            input_ops: Operators generating input data for this operator to union.
+        """
+
+        # By default, union does not preserve the order of output blocks.
+        # To preserve the order, configure ExecutionOptions accordingly.
+        self._preserve_order = False
+
+        # Intermediary buffers used to store blocks from each input dependency.
+        # Only used when `self._prserve_order` is True.
+        self._input_buffers: List[List[RefBundle]] = [[] for _ in range(len(input_ops))]
+
+        # The index of the input dependency that is currently the source of
+        # the output buffer. New inputs from this input dependency will be added
+        # directly to the output buffer. Only used when `self._preserve_order` is True.
+        self._input_idx_to_output = 0
+
+        self._output_buffer: List[RefBundle] = []
+        self._stats: StatsDict = {}
+        super().__init__(*input_ops)
+
+    def start(self, options: ExecutionOptions):
+        # Whether to preserve the order of the input data (both the
+        # order of the input operators and the order of the blocks within).
+        self._preserve_order = options.preserve_order
+        super().start(options)
+
+    def num_outputs_total(self) -> Optional[int]:
+        num_outputs = 0
+        for input_op in self.input_dependencies:
+            op_num_outputs = input_op.num_outputs_total()
+            # If at least one of the input ops has an unknown number of outputs,
+            # the number of outputs of the union operator is unknown.
+            if op_num_outputs is None:
+                return None
+            num_outputs += op_num_outputs
+        return num_outputs
+
+    def add_input(self, refs: RefBundle, input_index: int) -> None:
+        assert not self.completed()
+        assert 0 <= input_index <= len(self._input_dependencies), input_index
+
+        if not self._preserve_order:
+            self._output_buffer.append(refs)
+        else:
+            if input_index == self._input_idx_to_output:
+                self._output_buffer.append(refs)
+            else:
+                self._input_buffers[input_index].append(refs)
+
+    def input_done(self, input_index: int) -> None:
+        """When `self._preserve_order` is True, change the
+        output buffer source to the next input dependency
+        once the current input dependency calls `input_done()`."""
+        if not self._preserve_order:
+            return
+        if not input_index == self._input_idx_to_output:
+            return
+        next_input_idx = self._input_idx_to_output + 1
+        if next_input_idx < len(self._input_buffers):
+            self._output_buffer.extend(self._input_buffers[next_input_idx])
+            self._input_buffers[next_input_idx].clear()
+            self._input_idx_to_output = next_input_idx
+        super().input_done(input_index)
+
+    def all_inputs_done(self) -> None:
+        # Note that in the case where order is not preserved, all inputs
+        # are directly added to the output buffer as soon as they are received,
+        # so there is no need to check any intermediary buffers.
+        if self._preserve_order:
+            for idx, input_buffer in enumerate(self._input_buffers):
+                assert len(input_buffer) == 0, (
+                    f"Input at index {idx} still has "
+                    f"{len(input_buffer)} blocks remaining."
+                )
+        super().all_inputs_done()
+
+    def has_next(self) -> bool:
+        # Check if the output buffer still contains at least one block.
+        return len(self._output_buffer) > 0
+
+    def get_next(self) -> RefBundle:
+        return self._output_buffer.pop(0)
+
+    def get_stats(self) -> StatsDict:
+        return self._stats
diff --git a/python/ray/data/_internal/execution/operators/zip_operator.py b/python/ray/data/_internal/execution/operators/zip_operator.py
@@ -59,13 +59,13 @@ def add_input(self, refs: RefBundle, input_index: int) -> None:
         else:
             self._right_buffer.append(refs)
 
-    def inputs_done(self) -> None:
+    def all_inputs_done(self) -> None:
         self._output_buffer, self._stats = self._zip(
             self._left_buffer, self._right_buffer
         )
         self._left_buffer.clear()
         self._right_buffer.clear()
-        super().inputs_done()
+        super().all_inputs_done()
 
     def has_next(self) -> bool:
         return len(self._output_buffer) > 0

diff --git a/python/ray/data/_internal/execution/streaming_executor.py b/python/ray/data/_internal/execution/streaming_executor.py
@@ -27,6 +27,7 @@
     build_streaming_topology,
     process_completed_tasks,
     select_operator_to_run,
+    update_operator_states,
 )
 from ray.data._internal.progress_bar import ProgressBar
 from ray.data._internal.stats import DatasetStats
@@ -257,6 +258,8 @@ def _scheduling_loop_step(self, topology: Topology) -> bool:
                 autoscaling_state=self._autoscaling_state,
             )
 
+        update_operator_states(topology)
+
         # Update the progress bar to reflect scheduling decisions.
         for op_state in topology.values():
             op_state.refresh_progress_bar()