[Data] Define optimizer rules with global variables (ray-project#36920)

We've temporarily disabled the limit-push-down rule. With this change, users can enable it manually by modifying these 2 variables, in case the it is useful. Later, we should consider providing a public API for users to configure the rules. Signed-off-by: Hao Chen <chenh1024@gmail.com> Signed-off-by: e428265 <arvind.chandramouli@lmco.com>
lmco · Aug 31, 2023 · 711b1a2 · 711b1a2
1 parent 238ae27
commit 711b1a2
Showing 1 changed file with 13 additions and 5 deletions.
diff --git a/python/ray/data/_internal/logical/optimizers.py b/python/ray/data/_internal/logical/optimizers.py
@@ -12,24 +12,32 @@
 )
 from ray.data._internal.planner.planner import Planner
 
+# TODO(scottjlee): add back LimitPushdownRule once we
+# enforce number of input/output rows remains the same
+# for Map/MapBatches ops.
+LOGICAL_OPTIMIZER_RULES = [
+    ReorderRandomizeBlocksRule,
+]
+
+PHYSICAL_OPTIMIZER_RULES = [
+    OperatorFusionRule,
+]
+
 
 class LogicalOptimizer(Optimizer):
     """The optimizer for logical operators."""
 
     @property
     def rules(self) -> List[Rule]:
-        # TODO(scottjlee): add back LimitPushdownRule once we
-        # enforce number of input/output rows remains the same
-        # for Map/MapBatches ops.
-        return [ReorderRandomizeBlocksRule()]
+        return [rule_cls() for rule_cls in LOGICAL_OPTIMIZER_RULES]
 
 
 class PhysicalOptimizer(Optimizer):
     """The optimizer for physical operators."""
 
     @property
     def rules(self) -> List["Rule"]:
-        return [OperatorFusionRule()]
+        return [rule_cls() for rule_cls in PHYSICAL_OPTIMIZER_RULES]
 
 
 def get_execution_plan(logical_plan: LogicalPlan) -> PhysicalPlan: