From d6ee0a136943d29f2862de8f0b6ad04bcd150f35 Mon Sep 17 00:00:00 2001
From: kangguangli <kangguangli@hotmail.com>
Date: Mon, 10 Apr 2023 13:12:34 +0800
Subject: [PATCH] [StandaloneExe] Remove flag about Executor (#52671)

* add strategy force_sequential_run

* remove flag

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix
---
 .../fluid/framework/details/build_strategy.h  |   5 +-
 .../framework/new_executor/interpretercore.cc |   3 -
 .../framework/new_executor/interpretercore.h  |   1 -
 .../controlflow/conditional_block_op.cc       | 121 ++++++------------
 .../fluid/operators/controlflow/while_op.cc   | 121 ++++++------------
 paddle/fluid/pybind/parallel_executor.cc      |  10 +-
 python/paddle/fluid/executor.py               |  25 +---
 python/paddle/fluid/framework.py              |   6 -
 .../test_standalone_sequentail_run.py         |  12 +-
 9 files changed, 101 insertions(+), 203 deletions(-)

diff --git a/paddle/fluid/framework/details/build_strategy.h b/paddle/fluid/framework/details/build_strategy.h
index b06ef5d1d22a7b..f336ab88c0cb40 100644
--- a/paddle/fluid/framework/details/build_strategy.h
+++ b/paddle/fluid/framework/details/build_strategy.h
@@ -135,7 +135,7 @@ struct BuildStrategy {
   bool fuse_adamw_{false};
   // Fused feed forward
   bool fused_feedforward_{false};
-  bool force_sequential_run_{false};
+  bool sequential_run_{false};
 
   // mkldnn_enabled_op_types specify the operator type list to
   // use MKLDNN acceleration. It is null in default, means
@@ -270,8 +270,7 @@ inline std::ostream &operator<<(std::ostream &os,
   os << "fuse_gemm_epilogue_: " << strategy.fuse_gemm_epilogue_ << std::endl;
   os << "fused_attention_: " << strategy.fused_attention_ << std::endl;
   os << "fused_feedforward_: " << strategy.fused_feedforward_ << std::endl;
-  os << "force_sequential_run_: " << strategy.force_sequential_run_
-     << std::endl;
+  os << "sequential_run_: " << strategy.sequential_run_ << std::endl;
   os << "mkldnn_enabled_op_types_: ";
   for (auto str : strategy.mkldnn_enabled_op_types_) {
     os << str << ", ";
diff --git a/paddle/fluid/framework/new_executor/interpretercore.cc b/paddle/fluid/framework/new_executor/interpretercore.cc
index c99de872c8b111..d61a0a472d8738 100644
--- a/paddle/fluid/framework/new_executor/interpretercore.cc
+++ b/paddle/fluid/framework/new_executor/interpretercore.cc
@@ -50,9 +50,6 @@ PADDLE_DEFINE_EXPORTED_bool(new_executor_use_local_scope,
                             true,
                             "Use local_scope in new executor(especially used "
                             "in UT), can turn off for better performance");
-PADDLE_DEFINE_EXPORTED_bool(control_flow_use_new_executor,
-                            true,
-                            "Use new executor in control flow op");
 
 DECLARE_bool(check_nan_inf);
 DECLARE_bool(benchmark);
diff --git a/paddle/fluid/framework/new_executor/interpretercore.h b/paddle/fluid/framework/new_executor/interpretercore.h
index daa2a281f8b1d6..46cbf9cfc3fcb9 100644
--- a/paddle/fluid/framework/new_executor/interpretercore.h
+++ b/paddle/fluid/framework/new_executor/interpretercore.h
@@ -34,7 +34,6 @@
 #include "paddle/fluid/platform/device_event.h"
 
 DECLARE_bool(new_executor_use_local_scope);
-DECLARE_bool(control_flow_use_new_executor);
 
 namespace paddle {
 namespace framework {
diff --git a/paddle/fluid/operators/controlflow/conditional_block_op.cc b/paddle/fluid/operators/controlflow/conditional_block_op.cc
index 4544dade327e0f..ee8ec2e276b616 100644
--- a/paddle/fluid/operators/controlflow/conditional_block_op.cc
+++ b/paddle/fluid/operators/controlflow/conditional_block_op.cc
@@ -95,48 +95,28 @@ class ConditionalBlockOp : public ConditionalOp {
       auto &skip_vars =
           Attr<std::vector<std::string>>(ConditionalOp::kSkipEagerDeletionVars);
 
-      if (FLAGS_control_flow_use_new_executor) {
-        LOG_FIRST_N(INFO, 1)
-            << "[ControlFlow][ConditionalBlock] New Executor is Running.";
-        if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) {
-          VLOG(10) << "[interpreterCore cache]" << core_.get();
-          VLOG_IF(10, core_)
-              << platform::is_same_place(core_->GetPlace(), dev_place);
-
-          framework::interpreter::ExecutionConfig execution_config;
-          execution_config.create_local_scope = false;
-          execution_config.used_for_control_flow_op = true;
-          execution_config.skip_gc_vars =
-              std::set<std::string>(skip_vars.begin(), skip_vars.end());
-
-          core_.reset(new InterpreterCore(
-              dev_place, *block, &cur_scope, execution_config));
-          VLOG(10) << "[interpreterCore cache]"
-                   << "new created:" << core_;
-        } else {
-          BuildScopeForControlFlowOp(*core_, *block, &cur_scope);
-          core_->reset_scope(&cur_scope);
-        }
-
-        core_->Run({}, false);
-
+      LOG_FIRST_N(INFO, 1)
+          << "[ControlFlow][ConditionalBlock] New Executor is Running.";
+      if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) {
+        VLOG(10) << "[interpreterCore cache]" << core_.get();
+        VLOG_IF(10, core_) << platform::is_same_place(core_->GetPlace(),
+                                                      dev_place);
+
+        framework::interpreter::ExecutionConfig execution_config;
+        execution_config.create_local_scope = false;
+        execution_config.used_for_control_flow_op = true;
+        execution_config.skip_gc_vars =
+            std::set<std::string>(skip_vars.begin(), skip_vars.end());
+
+        core_.reset(new InterpreterCore(
+            dev_place, *block, &cur_scope, execution_config));
+        VLOG(10) << "[interpreterCore] created:" << core_;
       } else {
-        if (!exec_ || !platform::is_same_place(exec_->GetPlace(), dev_place)) {
-          auto &pdesc = *block->Program();
-          exec_.reset(new Executor(dev_place));
-          if (FLAGS_use_mkldnn) exec_->EnableMKLDNN(pdesc);
-          ctx_ = exec_->Prepare(pdesc, block->ID(), skip_vars, false);
-#ifdef PADDLE_WITH_MKLDNN
-          platform::AttachPointerHashToMKLDNNKey(exec_.get(), dev_place);
-          platform::RegisterModelLayout(ctx_->ops_, dev_place);
-#endif
-        }
-        exec_->RunPreparedContext(ctx_.get(),
-                                  &cur_scope,
-                                  /* create_local_scope */ false,
-                                  /* create_vars */ true,
-                                  /* keep_kids */ true);
+        BuildScopeForControlFlowOp(*core_, *block, &cur_scope);
+        core_->reset_scope(&cur_scope);
       }
+
+      core_->Run({}, false);
     }
   }
 
@@ -208,47 +188,27 @@ class ConditionalBlockGradOp : public ConditionalOp {
       VLOG(3) << "Conditional Grad block.idx = " << block->ID()
               << ", scope = " << &cur_scope;
 
-      if (FLAGS_control_flow_use_new_executor) {
-        LOG_FIRST_N(INFO, 1)
-            << "[ControlFlow][ConditionalGradBlock] New Executor is Running.";
-        if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) {
-          VLOG(10) << "[interpreterCore cache]" << core_.get();
-          VLOG_IF(10, core_)
-              << platform::is_same_place(core_->GetPlace(), dev_place);
-
-          framework::interpreter::ExecutionConfig execution_config;
-          execution_config.create_local_scope = false;
-          execution_config.used_for_control_flow_op = true;
-          execution_config.skip_gc_vars =
-              std::set<std::string>(inside_grads.begin(), inside_grads.end());
-
-          core_.reset(new InterpreterCore(
-              dev_place, *block, &cur_scope, execution_config));
-          VLOG(10) << "[interpreterCore cache]"
-                   << "new created:" << core_;
-        } else {
-          BuildScopeForControlFlowOp(*core_, *block, &cur_scope);
-          core_->reset_scope(&cur_scope);
-        }
-        core_->Run({}, false);
-
+      LOG_FIRST_N(INFO, 1)
+          << "[ControlFlow][ConditionalGradBlock] New Executor is Running.";
+      if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) {
+        VLOG(10) << "[interpreterCore cache]" << core_.get();
+        VLOG_IF(10, core_) << platform::is_same_place(core_->GetPlace(),
+                                                      dev_place);
+
+        framework::interpreter::ExecutionConfig execution_config;
+        execution_config.create_local_scope = false;
+        execution_config.used_for_control_flow_op = true;
+        execution_config.skip_gc_vars =
+            std::set<std::string>(inside_grads.begin(), inside_grads.end());
+
+        core_.reset(new InterpreterCore(
+            dev_place, *block, &cur_scope, execution_config));
+        VLOG(10) << "[interpreterCore] created:" << core_;
       } else {
-        if (!exec_ || !platform::is_same_place(exec_->GetPlace(), dev_place)) {
-          auto &pdesc = *block->Program();
-          exec_.reset(new Executor(dev_place));
-          if (FLAGS_use_mkldnn) exec_->EnableMKLDNN(pdesc);
-          ctx_ = exec_->Prepare(pdesc, block->ID(), inside_grads, false);
-#ifdef PADDLE_WITH_MKLDNN
-          platform::AttachPointerHashToMKLDNNKey(exec_.get(), dev_place);
-          platform::RegisterModelLayout(ctx_->ops_, dev_place);
-#endif
-        }
-        exec_->RunPreparedContext(ctx_.get(),
-                                  &cur_scope,
-                                  /* create_local_scope */ false,
-                                  /* create_vars */ true,
-                                  /* keep_kids */ true);
+        BuildScopeForControlFlowOp(*core_, *block, &cur_scope);
+        core_->reset_scope(&cur_scope);
       }
+      core_->Run({}, false);
 
       AssignLocalGradientToParentScope(
           dev_place, cur_scope, scope, inside_grads, outside_grads, inputs);
@@ -398,7 +358,8 @@ struct FilterNoGradInput<framework::OpDesc> {
                      std::vector<std::string> *vec) {
     auto f = [desc](const std::string &name) -> std::string {
       if (name == framework::kEmptyVarName) {
-        // don't drop empty var name, you can use Input(name, true) to drop it.
+        // don't drop empty var name, you can use Input(name, true) to drop
+        // it.
         return framework::kEmptyVarName;
       }
       auto var_desc =
diff --git a/paddle/fluid/operators/controlflow/while_op.cc b/paddle/fluid/operators/controlflow/while_op.cc
index 3017a1e0fc4b79..30fdb90ce10696 100644
--- a/paddle/fluid/operators/controlflow/while_op.cc
+++ b/paddle/fluid/operators/controlflow/while_op.cc
@@ -120,7 +120,6 @@ class WhileOp : public framework::OperatorBase {
     platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
     auto &dev_ctx = *pool.Get(dev_place);
 
-    auto *program = block->Program();
     bool is_test = Attr<bool>("is_test");
 
     std::set<std::string> no_copy_var_names;
@@ -199,26 +198,18 @@ class WhileOp : public framework::OperatorBase {
       }
     }
 
-    if (FLAGS_control_flow_use_new_executor) {
-      LOG_FIRST_N(INFO, 1) << "[ControlFlow][WhileOp] New Executor is Running.";
-      if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) {
-        framework::Scope placeholder;  // Don't care if it's valid, just for
-                                       // initialize InterpreterCore
-        framework::interpreter::ExecutionConfig execution_config;
-        execution_config.create_local_scope = false;
-        execution_config.used_for_control_flow_op = true;
-        execution_config.skip_gc_vars =
-            std::set<std::string>(skip_vars.begin(), skip_vars.end());
-
-        core_.reset(new framework::InterpreterCore(
-            dev_place, *block, &placeholder, execution_config));
-      }
-    } else {
-      if (!executor_ ||
-          !platform::is_same_place(executor_->GetPlace(), dev_place)) {
-        executor_.reset(new framework::Executor(dev_place));
-        ctx_ = executor_->Prepare(*program, block->ID(), skip_vars);
-      }
+    LOG_FIRST_N(INFO, 1) << "[ControlFlow][WhileOp] New Executor is Running.";
+    if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) {
+      framework::Scope placeholder;  // Don't care if it's valid, just for
+                                     // initialize InterpreterCore
+      framework::interpreter::ExecutionConfig execution_config;
+      execution_config.create_local_scope = false;
+      execution_config.used_for_control_flow_op = true;
+      execution_config.skip_gc_vars =
+          std::set<std::string>(skip_vars.begin(), skip_vars.end());
+
+      core_.reset(new framework::InterpreterCore(
+          dev_place, *block, &placeholder, execution_config));
     }
 
     if (!is_test) {
@@ -244,22 +235,17 @@ class WhileOp : public framework::OperatorBase {
             }
           }
         }
-        if (FLAGS_control_flow_use_new_executor) {
-          BuildScopeForControlFlowOp(*core_, *block, &current_scope);
-          core_->reset_scope(&current_scope);
-          core_->Run({}, false);
-
-          // restore inputs place
-          for (const auto &n : input_var_original_places) {
-            const std::string &in_name = n.first;
-            const phi::Place &original_place = n.second;
-            // input vars exist in `scope` not `current_scope`
-            TransferVariablePlace(&scope, in_name, original_place, dev_ctx);
-          }
 
-        } else {
-          executor_->RunPreparedContext(
-              ctx_.get(), &current_scope, false, true, true);
+        BuildScopeForControlFlowOp(*core_, *block, &current_scope);
+        core_->reset_scope(&current_scope);
+        core_->Run({}, false);
+
+        // restore inputs place
+        for (const auto &n : input_var_original_places) {
+          const std::string &in_name = n.first;
+          const phi::Place &original_place = n.second;
+          // input vars exist in `scope` not `current_scope`
+          TransferVariablePlace(&scope, in_name, original_place, dev_ctx);
         }
 
         for (auto &var_rename : rename_vars) {
@@ -273,12 +259,8 @@ class WhileOp : public framework::OperatorBase {
     } else {
       auto &current_scope = scope.NewScope();
 
-      if (FLAGS_control_flow_use_new_executor) {
-        BuildScopeForControlFlowOp(*core_, *block, &current_scope);
-        core_->reset_scope(&current_scope);
-      } else {
-        executor_->CreateVariables(*program, &current_scope, block->ID());
-      }
+      BuildScopeForControlFlowOp(*core_, *block, &current_scope);
+      core_->reset_scope(&current_scope);
 
       while (cond_data) {
         for (auto &name : current_scope.LocalVarNames()) {
@@ -295,12 +277,7 @@ class WhileOp : public framework::OperatorBase {
           }
         }
 
-        if (FLAGS_control_flow_use_new_executor) {
-          core_->Run({}, false);
-        } else {
-          executor_->RunPreparedContext(
-              ctx_.get(), &current_scope, false, false, false);
-        }
+        core_->Run({}, false);
 
         cond_data = GetCondData(
             scope.FindVar(Input(kCondition))->Get<phi::DenseTensor>());
@@ -367,7 +344,6 @@ class WhileGradOp : public framework::OperatorBase {
     auto &dev_ctx = *pool.Get(dev_place);
 
     auto *block = Attr<framework::BlockDesc *>(kStepBlock);
-    auto *program = block->Program();
     auto *parent_block = block->ParentBlock();
 
     auto &skip_vars = Attr<std::vector<std::string>>(kSkipEagerDeletionVars);
@@ -391,28 +367,20 @@ class WhileGradOp : public framework::OperatorBase {
                           outside_og_names.size(),
                           inside_og_names.size()));
 
-    if (FLAGS_control_flow_use_new_executor) {
-      LOG_FIRST_N(INFO, 1)
-          << "[ControlFlow][WhileGradOp] New Executor is Running.";
-      if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) {
-        std::set<std::string> skip_gc_vars(skip_vars.begin(), skip_vars.end());
-        framework::Scope placeholder;  // Don't care if it's valid, just for
-                                       // initialize InterpreterCore
-        framework::interpreter::ExecutionConfig execution_config;
-        execution_config.create_local_scope = false;
-        execution_config.used_for_control_flow_op = true;
-        execution_config.skip_gc_vars =
-            std::set<std::string>(skip_vars.begin(), skip_vars.end());
-
-        core_.reset(new framework::InterpreterCore(
-            dev_place, *block, &placeholder, execution_config));
-      }
-    } else {
-      if (!executor_ ||
-          !platform::is_same_place(executor_->GetPlace(), dev_place)) {
-        executor_.reset(new framework::Executor(dev_place));
-        ctx_ = executor_->Prepare(*program, block->ID(), skip_vars);
-      }
+    LOG_FIRST_N(INFO, 1)
+        << "[ControlFlow][WhileGradOp] New Executor is Running.";
+    if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) {
+      std::set<std::string> skip_gc_vars(skip_vars.begin(), skip_vars.end());
+      framework::Scope placeholder;  // Don't care if it's valid, just for
+                                     // initialize InterpreterCore
+      framework::interpreter::ExecutionConfig execution_config;
+      execution_config.create_local_scope = false;
+      execution_config.used_for_control_flow_op = true;
+      execution_config.skip_gc_vars =
+          std::set<std::string>(skip_vars.begin(), skip_vars.end());
+
+      core_.reset(new framework::InterpreterCore(
+          dev_place, *block, &placeholder, execution_config));
     }
 
     for (auto cur_scope_iter = step_scopes->rbegin();
@@ -504,14 +472,9 @@ class WhileGradOp : public framework::OperatorBase {
         }
       }
 
-      if (FLAGS_control_flow_use_new_executor) {
-        BuildScopeForControlFlowOp(*core_, *block, *cur_scope_iter);
-        core_->reset_scope(*cur_scope_iter);
-        core_->Run({}, false);
-      } else {
-        executor_->RunPreparedContext(
-            ctx_.get(), *cur_scope_iter, false, true, true);
-      }
+      BuildScopeForControlFlowOp(*core_, *block, *cur_scope_iter);
+      core_->reset_scope(*cur_scope_iter);
+      core_->Run({}, false);
 
       // The Outputs(kXGRAD) contains the names of the gradient of parameters
       // and inputs.
diff --git a/paddle/fluid/pybind/parallel_executor.cc b/paddle/fluid/pybind/parallel_executor.cc
index 0dc171aabc740e..0c8898b524fae5 100644
--- a/paddle/fluid/pybind/parallel_executor.cc
+++ b/paddle/fluid/pybind/parallel_executor.cc
@@ -760,17 +760,17 @@ void BindParallelExecutor(pybind11::module &m) {  // NOLINT
                         build_strategy.fused_feedforward = True
                      )DOC")
       .def_property(
-          "force_sequential_run",
-          [](const BuildStrategy &self) { return self.force_sequential_run_; },
+          "sequential_run",
+          [](const BuildStrategy &self) { return self.sequential_run_; },
           [](BuildStrategy &self, bool b) {
             PADDLE_ENFORCE_NE(self.IsFinalized(),
                               true,
                               platform::errors::PreconditionNotMet(
                                   "BuildStrategy has been finlaized, cannot be "
                                   "configured again."));
-            self.force_sequential_run_ = b;
+            self.sequential_run_ = b;
           },
-          R"DOC((bool, optional): force_sequential_run is used to let the `StandaloneExecutor` run ops by the
+          R"DOC((bool, optional): sequential_run is used to let the `StandaloneExecutor` run ops by the
           order of `ProgramDesc`. Default is False.
 
                 Examples:
@@ -782,7 +782,7 @@ void BindParallelExecutor(pybind11::module &m) {  // NOLINT
                         paddle.enable_static()
 
                         build_strategy = static.BuildStrategy()
-                        build_strategy.fused_feedforward = True
+                        build_strategy.sequential_run = True
                      )DOC")
       .def_property(
           "fuse_bn_act_ops",
diff --git a/python/paddle/fluid/executor.py b/python/paddle/fluid/executor.py
index 55dac9695cdffb..37718c25c6c1f1 100755
--- a/python/paddle/fluid/executor.py
+++ b/python/paddle/fluid/executor.py
@@ -493,26 +493,6 @@ def _to_str(var):
         return _to_str(var)
 
 
-def _is_dy2st_enable_standalone_executor():
-    return framework._dy2st_enable_standalone_executor_ in [
-        1,
-        '1',
-        True,
-        'True',
-        'true',
-    ]
-
-
-def _is_cuda_graph_enable_standalone_executor():
-    return framework._cuda_graph_enable_standalone_executor_ in [
-        1,
-        '1',
-        True,
-        'True',
-        'true',
-    ]
-
-
 def _prepare_fleet_executor():
     from ..distributed.fleet.proto import fleet_executor_desc_pb2
 
@@ -1619,10 +1599,7 @@ def _can_use_interpreter_core(program, place):
                     else program._graph
                 )
                 build_strategy = compiled_program._build_strategy
-                if (
-                    build_strategy is not None
-                    and build_strategy.force_sequential_run
-                ):
+                if build_strategy is not None and build_strategy.sequential_run:
                     schedule_flag = [
                         'FLAGS_new_executor_serial_run',
                         'FLAGS_new_executor_sequential_run',
diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py
index 98d0fbf0620b7d..708cc462e78ea9 100644
--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
@@ -117,12 +117,6 @@ def __setattr__(self, name, val):
 _current_cuda_graph_mode = None
 _global_flags_ = core.globals()
 
-_dy2st_enable_standalone_executor_ = os.environ.get(
-    'FLAGS_DY2ST_USE_STANDALONE_EXECUTOR', 1
-)
-_cuda_graph_enable_standalone_executor_ = os.environ.get(
-    'FLAGS_CUDA_GRAPH_USE_STANDALONE_EXECUTOR', 0
-)
 
 # special_op_attrs, extra_op_attrs are prepared for printing warnings
 # when turning on FLAGS_print_extra_attrs
diff --git a/test/standalone_executor/test_standalone_sequentail_run.py b/test/standalone_executor/test_standalone_sequentail_run.py
index bc7368e58b4e72..64b4e4293d100f 100644
--- a/test/standalone_executor/test_standalone_sequentail_run.py
+++ b/test/standalone_executor/test_standalone_sequentail_run.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import unittest
 
 import numpy as np
@@ -31,13 +32,14 @@ def build_program(self):
 
         return main_program, startup_program, [c]
 
-    def run_program(self, force_sequential_run=False):
+    def run_program(self, sequential_run=False):
         seed = 100
         paddle.seed(seed)
         np.random.seed(seed)
         main, startup, outs = self.build_program()
         build_strategy = paddle.static.BuildStrategy()
-        build_strategy.force_sequential_run = force_sequential_run
+        build_strategy.sequential_run = sequential_run
+        print(build_strategy)
         compiled_program = paddle.static.CompiledProgram(
             main, build_strategy=build_strategy
         )
@@ -60,6 +62,12 @@ def test_result(self):
         ret2 = self.run_program(False)
         np.testing.assert_array_equal(ret1, ret2)
 
+    def test_str_flag(self):
+        paddle.enable_static()
+        os.environ['FLAGS_new_executor_sequential_run'] = 'true'
+        ret1 = self.run_program(True)
+        assert os.environ['FLAGS_new_executor_sequential_run'] == "true"
+
 
 if __name__ == "__main__":
     unittest.main()