From d6ee0a136943d29f2862de8f0b6ad04bcd150f35 Mon Sep 17 00:00:00 2001 From: kangguangli Date: Mon, 10 Apr 2023 13:12:34 +0800 Subject: [PATCH] [StandaloneExe] Remove flag about Executor (#52671) * add strategy force_sequential_run * remove flag * fix * fix * fix * fix * fix * fix * fix * fix * fix --- .../fluid/framework/details/build_strategy.h | 5 +- .../framework/new_executor/interpretercore.cc | 3 - .../framework/new_executor/interpretercore.h | 1 - .../controlflow/conditional_block_op.cc | 121 ++++++------------ .../fluid/operators/controlflow/while_op.cc | 121 ++++++------------ paddle/fluid/pybind/parallel_executor.cc | 10 +- python/paddle/fluid/executor.py | 25 +--- python/paddle/fluid/framework.py | 6 - .../test_standalone_sequentail_run.py | 12 +- 9 files changed, 101 insertions(+), 203 deletions(-) diff --git a/paddle/fluid/framework/details/build_strategy.h b/paddle/fluid/framework/details/build_strategy.h index b06ef5d1d22a7b..f336ab88c0cb40 100644 --- a/paddle/fluid/framework/details/build_strategy.h +++ b/paddle/fluid/framework/details/build_strategy.h @@ -135,7 +135,7 @@ struct BuildStrategy { bool fuse_adamw_{false}; // Fused feed forward bool fused_feedforward_{false}; - bool force_sequential_run_{false}; + bool sequential_run_{false}; // mkldnn_enabled_op_types specify the operator type list to // use MKLDNN acceleration. It is null in default, means @@ -270,8 +270,7 @@ inline std::ostream &operator<<(std::ostream &os, os << "fuse_gemm_epilogue_: " << strategy.fuse_gemm_epilogue_ << std::endl; os << "fused_attention_: " << strategy.fused_attention_ << std::endl; os << "fused_feedforward_: " << strategy.fused_feedforward_ << std::endl; - os << "force_sequential_run_: " << strategy.force_sequential_run_ - << std::endl; + os << "sequential_run_: " << strategy.sequential_run_ << std::endl; os << "mkldnn_enabled_op_types_: "; for (auto str : strategy.mkldnn_enabled_op_types_) { os << str << ", "; diff --git a/paddle/fluid/framework/new_executor/interpretercore.cc b/paddle/fluid/framework/new_executor/interpretercore.cc index c99de872c8b111..d61a0a472d8738 100644 --- a/paddle/fluid/framework/new_executor/interpretercore.cc +++ b/paddle/fluid/framework/new_executor/interpretercore.cc @@ -50,9 +50,6 @@ PADDLE_DEFINE_EXPORTED_bool(new_executor_use_local_scope, true, "Use local_scope in new executor(especially used " "in UT), can turn off for better performance"); -PADDLE_DEFINE_EXPORTED_bool(control_flow_use_new_executor, - true, - "Use new executor in control flow op"); DECLARE_bool(check_nan_inf); DECLARE_bool(benchmark); diff --git a/paddle/fluid/framework/new_executor/interpretercore.h b/paddle/fluid/framework/new_executor/interpretercore.h index daa2a281f8b1d6..46cbf9cfc3fcb9 100644 --- a/paddle/fluid/framework/new_executor/interpretercore.h +++ b/paddle/fluid/framework/new_executor/interpretercore.h @@ -34,7 +34,6 @@ #include "paddle/fluid/platform/device_event.h" DECLARE_bool(new_executor_use_local_scope); -DECLARE_bool(control_flow_use_new_executor); namespace paddle { namespace framework { diff --git a/paddle/fluid/operators/controlflow/conditional_block_op.cc b/paddle/fluid/operators/controlflow/conditional_block_op.cc index 4544dade327e0f..ee8ec2e276b616 100644 --- a/paddle/fluid/operators/controlflow/conditional_block_op.cc +++ b/paddle/fluid/operators/controlflow/conditional_block_op.cc @@ -95,48 +95,28 @@ class ConditionalBlockOp : public ConditionalOp { auto &skip_vars = Attr>(ConditionalOp::kSkipEagerDeletionVars); - if (FLAGS_control_flow_use_new_executor) { - LOG_FIRST_N(INFO, 1) - << "[ControlFlow][ConditionalBlock] New Executor is Running."; - if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) { - VLOG(10) << "[interpreterCore cache]" << core_.get(); - VLOG_IF(10, core_) - << platform::is_same_place(core_->GetPlace(), dev_place); - - framework::interpreter::ExecutionConfig execution_config; - execution_config.create_local_scope = false; - execution_config.used_for_control_flow_op = true; - execution_config.skip_gc_vars = - std::set(skip_vars.begin(), skip_vars.end()); - - core_.reset(new InterpreterCore( - dev_place, *block, &cur_scope, execution_config)); - VLOG(10) << "[interpreterCore cache]" - << "new created:" << core_; - } else { - BuildScopeForControlFlowOp(*core_, *block, &cur_scope); - core_->reset_scope(&cur_scope); - } - - core_->Run({}, false); - + LOG_FIRST_N(INFO, 1) + << "[ControlFlow][ConditionalBlock] New Executor is Running."; + if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) { + VLOG(10) << "[interpreterCore cache]" << core_.get(); + VLOG_IF(10, core_) << platform::is_same_place(core_->GetPlace(), + dev_place); + + framework::interpreter::ExecutionConfig execution_config; + execution_config.create_local_scope = false; + execution_config.used_for_control_flow_op = true; + execution_config.skip_gc_vars = + std::set(skip_vars.begin(), skip_vars.end()); + + core_.reset(new InterpreterCore( + dev_place, *block, &cur_scope, execution_config)); + VLOG(10) << "[interpreterCore] created:" << core_; } else { - if (!exec_ || !platform::is_same_place(exec_->GetPlace(), dev_place)) { - auto &pdesc = *block->Program(); - exec_.reset(new Executor(dev_place)); - if (FLAGS_use_mkldnn) exec_->EnableMKLDNN(pdesc); - ctx_ = exec_->Prepare(pdesc, block->ID(), skip_vars, false); -#ifdef PADDLE_WITH_MKLDNN - platform::AttachPointerHashToMKLDNNKey(exec_.get(), dev_place); - platform::RegisterModelLayout(ctx_->ops_, dev_place); -#endif - } - exec_->RunPreparedContext(ctx_.get(), - &cur_scope, - /* create_local_scope */ false, - /* create_vars */ true, - /* keep_kids */ true); + BuildScopeForControlFlowOp(*core_, *block, &cur_scope); + core_->reset_scope(&cur_scope); } + + core_->Run({}, false); } } @@ -208,47 +188,27 @@ class ConditionalBlockGradOp : public ConditionalOp { VLOG(3) << "Conditional Grad block.idx = " << block->ID() << ", scope = " << &cur_scope; - if (FLAGS_control_flow_use_new_executor) { - LOG_FIRST_N(INFO, 1) - << "[ControlFlow][ConditionalGradBlock] New Executor is Running."; - if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) { - VLOG(10) << "[interpreterCore cache]" << core_.get(); - VLOG_IF(10, core_) - << platform::is_same_place(core_->GetPlace(), dev_place); - - framework::interpreter::ExecutionConfig execution_config; - execution_config.create_local_scope = false; - execution_config.used_for_control_flow_op = true; - execution_config.skip_gc_vars = - std::set(inside_grads.begin(), inside_grads.end()); - - core_.reset(new InterpreterCore( - dev_place, *block, &cur_scope, execution_config)); - VLOG(10) << "[interpreterCore cache]" - << "new created:" << core_; - } else { - BuildScopeForControlFlowOp(*core_, *block, &cur_scope); - core_->reset_scope(&cur_scope); - } - core_->Run({}, false); - + LOG_FIRST_N(INFO, 1) + << "[ControlFlow][ConditionalGradBlock] New Executor is Running."; + if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) { + VLOG(10) << "[interpreterCore cache]" << core_.get(); + VLOG_IF(10, core_) << platform::is_same_place(core_->GetPlace(), + dev_place); + + framework::interpreter::ExecutionConfig execution_config; + execution_config.create_local_scope = false; + execution_config.used_for_control_flow_op = true; + execution_config.skip_gc_vars = + std::set(inside_grads.begin(), inside_grads.end()); + + core_.reset(new InterpreterCore( + dev_place, *block, &cur_scope, execution_config)); + VLOG(10) << "[interpreterCore] created:" << core_; } else { - if (!exec_ || !platform::is_same_place(exec_->GetPlace(), dev_place)) { - auto &pdesc = *block->Program(); - exec_.reset(new Executor(dev_place)); - if (FLAGS_use_mkldnn) exec_->EnableMKLDNN(pdesc); - ctx_ = exec_->Prepare(pdesc, block->ID(), inside_grads, false); -#ifdef PADDLE_WITH_MKLDNN - platform::AttachPointerHashToMKLDNNKey(exec_.get(), dev_place); - platform::RegisterModelLayout(ctx_->ops_, dev_place); -#endif - } - exec_->RunPreparedContext(ctx_.get(), - &cur_scope, - /* create_local_scope */ false, - /* create_vars */ true, - /* keep_kids */ true); + BuildScopeForControlFlowOp(*core_, *block, &cur_scope); + core_->reset_scope(&cur_scope); } + core_->Run({}, false); AssignLocalGradientToParentScope( dev_place, cur_scope, scope, inside_grads, outside_grads, inputs); @@ -398,7 +358,8 @@ struct FilterNoGradInput { std::vector *vec) { auto f = [desc](const std::string &name) -> std::string { if (name == framework::kEmptyVarName) { - // don't drop empty var name, you can use Input(name, true) to drop it. + // don't drop empty var name, you can use Input(name, true) to drop + // it. return framework::kEmptyVarName; } auto var_desc = diff --git a/paddle/fluid/operators/controlflow/while_op.cc b/paddle/fluid/operators/controlflow/while_op.cc index 3017a1e0fc4b79..30fdb90ce10696 100644 --- a/paddle/fluid/operators/controlflow/while_op.cc +++ b/paddle/fluid/operators/controlflow/while_op.cc @@ -120,7 +120,6 @@ class WhileOp : public framework::OperatorBase { platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); auto &dev_ctx = *pool.Get(dev_place); - auto *program = block->Program(); bool is_test = Attr("is_test"); std::set no_copy_var_names; @@ -199,26 +198,18 @@ class WhileOp : public framework::OperatorBase { } } - if (FLAGS_control_flow_use_new_executor) { - LOG_FIRST_N(INFO, 1) << "[ControlFlow][WhileOp] New Executor is Running."; - if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) { - framework::Scope placeholder; // Don't care if it's valid, just for - // initialize InterpreterCore - framework::interpreter::ExecutionConfig execution_config; - execution_config.create_local_scope = false; - execution_config.used_for_control_flow_op = true; - execution_config.skip_gc_vars = - std::set(skip_vars.begin(), skip_vars.end()); - - core_.reset(new framework::InterpreterCore( - dev_place, *block, &placeholder, execution_config)); - } - } else { - if (!executor_ || - !platform::is_same_place(executor_->GetPlace(), dev_place)) { - executor_.reset(new framework::Executor(dev_place)); - ctx_ = executor_->Prepare(*program, block->ID(), skip_vars); - } + LOG_FIRST_N(INFO, 1) << "[ControlFlow][WhileOp] New Executor is Running."; + if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) { + framework::Scope placeholder; // Don't care if it's valid, just for + // initialize InterpreterCore + framework::interpreter::ExecutionConfig execution_config; + execution_config.create_local_scope = false; + execution_config.used_for_control_flow_op = true; + execution_config.skip_gc_vars = + std::set(skip_vars.begin(), skip_vars.end()); + + core_.reset(new framework::InterpreterCore( + dev_place, *block, &placeholder, execution_config)); } if (!is_test) { @@ -244,22 +235,17 @@ class WhileOp : public framework::OperatorBase { } } } - if (FLAGS_control_flow_use_new_executor) { - BuildScopeForControlFlowOp(*core_, *block, ¤t_scope); - core_->reset_scope(¤t_scope); - core_->Run({}, false); - - // restore inputs place - for (const auto &n : input_var_original_places) { - const std::string &in_name = n.first; - const phi::Place &original_place = n.second; - // input vars exist in `scope` not `current_scope` - TransferVariablePlace(&scope, in_name, original_place, dev_ctx); - } - } else { - executor_->RunPreparedContext( - ctx_.get(), ¤t_scope, false, true, true); + BuildScopeForControlFlowOp(*core_, *block, ¤t_scope); + core_->reset_scope(¤t_scope); + core_->Run({}, false); + + // restore inputs place + for (const auto &n : input_var_original_places) { + const std::string &in_name = n.first; + const phi::Place &original_place = n.second; + // input vars exist in `scope` not `current_scope` + TransferVariablePlace(&scope, in_name, original_place, dev_ctx); } for (auto &var_rename : rename_vars) { @@ -273,12 +259,8 @@ class WhileOp : public framework::OperatorBase { } else { auto ¤t_scope = scope.NewScope(); - if (FLAGS_control_flow_use_new_executor) { - BuildScopeForControlFlowOp(*core_, *block, ¤t_scope); - core_->reset_scope(¤t_scope); - } else { - executor_->CreateVariables(*program, ¤t_scope, block->ID()); - } + BuildScopeForControlFlowOp(*core_, *block, ¤t_scope); + core_->reset_scope(¤t_scope); while (cond_data) { for (auto &name : current_scope.LocalVarNames()) { @@ -295,12 +277,7 @@ class WhileOp : public framework::OperatorBase { } } - if (FLAGS_control_flow_use_new_executor) { - core_->Run({}, false); - } else { - executor_->RunPreparedContext( - ctx_.get(), ¤t_scope, false, false, false); - } + core_->Run({}, false); cond_data = GetCondData( scope.FindVar(Input(kCondition))->Get()); @@ -367,7 +344,6 @@ class WhileGradOp : public framework::OperatorBase { auto &dev_ctx = *pool.Get(dev_place); auto *block = Attr(kStepBlock); - auto *program = block->Program(); auto *parent_block = block->ParentBlock(); auto &skip_vars = Attr>(kSkipEagerDeletionVars); @@ -391,28 +367,20 @@ class WhileGradOp : public framework::OperatorBase { outside_og_names.size(), inside_og_names.size())); - if (FLAGS_control_flow_use_new_executor) { - LOG_FIRST_N(INFO, 1) - << "[ControlFlow][WhileGradOp] New Executor is Running."; - if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) { - std::set skip_gc_vars(skip_vars.begin(), skip_vars.end()); - framework::Scope placeholder; // Don't care if it's valid, just for - // initialize InterpreterCore - framework::interpreter::ExecutionConfig execution_config; - execution_config.create_local_scope = false; - execution_config.used_for_control_flow_op = true; - execution_config.skip_gc_vars = - std::set(skip_vars.begin(), skip_vars.end()); - - core_.reset(new framework::InterpreterCore( - dev_place, *block, &placeholder, execution_config)); - } - } else { - if (!executor_ || - !platform::is_same_place(executor_->GetPlace(), dev_place)) { - executor_.reset(new framework::Executor(dev_place)); - ctx_ = executor_->Prepare(*program, block->ID(), skip_vars); - } + LOG_FIRST_N(INFO, 1) + << "[ControlFlow][WhileGradOp] New Executor is Running."; + if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) { + std::set skip_gc_vars(skip_vars.begin(), skip_vars.end()); + framework::Scope placeholder; // Don't care if it's valid, just for + // initialize InterpreterCore + framework::interpreter::ExecutionConfig execution_config; + execution_config.create_local_scope = false; + execution_config.used_for_control_flow_op = true; + execution_config.skip_gc_vars = + std::set(skip_vars.begin(), skip_vars.end()); + + core_.reset(new framework::InterpreterCore( + dev_place, *block, &placeholder, execution_config)); } for (auto cur_scope_iter = step_scopes->rbegin(); @@ -504,14 +472,9 @@ class WhileGradOp : public framework::OperatorBase { } } - if (FLAGS_control_flow_use_new_executor) { - BuildScopeForControlFlowOp(*core_, *block, *cur_scope_iter); - core_->reset_scope(*cur_scope_iter); - core_->Run({}, false); - } else { - executor_->RunPreparedContext( - ctx_.get(), *cur_scope_iter, false, true, true); - } + BuildScopeForControlFlowOp(*core_, *block, *cur_scope_iter); + core_->reset_scope(*cur_scope_iter); + core_->Run({}, false); // The Outputs(kXGRAD) contains the names of the gradient of parameters // and inputs. diff --git a/paddle/fluid/pybind/parallel_executor.cc b/paddle/fluid/pybind/parallel_executor.cc index 0dc171aabc740e..0c8898b524fae5 100644 --- a/paddle/fluid/pybind/parallel_executor.cc +++ b/paddle/fluid/pybind/parallel_executor.cc @@ -760,17 +760,17 @@ void BindParallelExecutor(pybind11::module &m) { // NOLINT build_strategy.fused_feedforward = True )DOC") .def_property( - "force_sequential_run", - [](const BuildStrategy &self) { return self.force_sequential_run_; }, + "sequential_run", + [](const BuildStrategy &self) { return self.sequential_run_; }, [](BuildStrategy &self, bool b) { PADDLE_ENFORCE_NE(self.IsFinalized(), true, platform::errors::PreconditionNotMet( "BuildStrategy has been finlaized, cannot be " "configured again.")); - self.force_sequential_run_ = b; + self.sequential_run_ = b; }, - R"DOC((bool, optional): force_sequential_run is used to let the `StandaloneExecutor` run ops by the + R"DOC((bool, optional): sequential_run is used to let the `StandaloneExecutor` run ops by the order of `ProgramDesc`. Default is False. Examples: @@ -782,7 +782,7 @@ void BindParallelExecutor(pybind11::module &m) { // NOLINT paddle.enable_static() build_strategy = static.BuildStrategy() - build_strategy.fused_feedforward = True + build_strategy.sequential_run = True )DOC") .def_property( "fuse_bn_act_ops", diff --git a/python/paddle/fluid/executor.py b/python/paddle/fluid/executor.py index 55dac9695cdffb..37718c25c6c1f1 100755 --- a/python/paddle/fluid/executor.py +++ b/python/paddle/fluid/executor.py @@ -493,26 +493,6 @@ def _to_str(var): return _to_str(var) -def _is_dy2st_enable_standalone_executor(): - return framework._dy2st_enable_standalone_executor_ in [ - 1, - '1', - True, - 'True', - 'true', - ] - - -def _is_cuda_graph_enable_standalone_executor(): - return framework._cuda_graph_enable_standalone_executor_ in [ - 1, - '1', - True, - 'True', - 'true', - ] - - def _prepare_fleet_executor(): from ..distributed.fleet.proto import fleet_executor_desc_pb2 @@ -1619,10 +1599,7 @@ def _can_use_interpreter_core(program, place): else program._graph ) build_strategy = compiled_program._build_strategy - if ( - build_strategy is not None - and build_strategy.force_sequential_run - ): + if build_strategy is not None and build_strategy.sequential_run: schedule_flag = [ 'FLAGS_new_executor_serial_run', 'FLAGS_new_executor_sequential_run', diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 98d0fbf0620b7d..708cc462e78ea9 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -117,12 +117,6 @@ def __setattr__(self, name, val): _current_cuda_graph_mode = None _global_flags_ = core.globals() -_dy2st_enable_standalone_executor_ = os.environ.get( - 'FLAGS_DY2ST_USE_STANDALONE_EXECUTOR', 1 -) -_cuda_graph_enable_standalone_executor_ = os.environ.get( - 'FLAGS_CUDA_GRAPH_USE_STANDALONE_EXECUTOR', 0 -) # special_op_attrs, extra_op_attrs are prepared for printing warnings # when turning on FLAGS_print_extra_attrs diff --git a/test/standalone_executor/test_standalone_sequentail_run.py b/test/standalone_executor/test_standalone_sequentail_run.py index bc7368e58b4e72..64b4e4293d100f 100644 --- a/test/standalone_executor/test_standalone_sequentail_run.py +++ b/test/standalone_executor/test_standalone_sequentail_run.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os import unittest import numpy as np @@ -31,13 +32,14 @@ def build_program(self): return main_program, startup_program, [c] - def run_program(self, force_sequential_run=False): + def run_program(self, sequential_run=False): seed = 100 paddle.seed(seed) np.random.seed(seed) main, startup, outs = self.build_program() build_strategy = paddle.static.BuildStrategy() - build_strategy.force_sequential_run = force_sequential_run + build_strategy.sequential_run = sequential_run + print(build_strategy) compiled_program = paddle.static.CompiledProgram( main, build_strategy=build_strategy ) @@ -60,6 +62,12 @@ def test_result(self): ret2 = self.run_program(False) np.testing.assert_array_equal(ret1, ret2) + def test_str_flag(self): + paddle.enable_static() + os.environ['FLAGS_new_executor_sequential_run'] = 'true' + ret1 = self.run_program(True) + assert os.environ['FLAGS_new_executor_sequential_run'] == "true" + if __name__ == "__main__": unittest.main()