Support feed op new ir (#54840)

* add fetch kernel * support fetch var in new ir * fix bug * polish code * change array equal to np.testing * support feed in new ir * fix bug * try to hack combine op * add scope guard * revert atan2 op * polish code
PaddlePaddle · Jun 26, 2023 · 1e32313 · 1e32313
1 parent 5d9af9d
commit 1e32313
Show file tree

Hide file tree

Showing 10 changed files with 144 additions and 56 deletions.
diff --git a/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc b/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc
@@ -952,8 +952,8 @@ void BuildOpFuncList(
 
     auto op_name = attr_map.at("op_name").dyn_cast<::ir::StrAttribute>().data();
 
-    if (op_name == "builtin.combine") {
-      VLOG(6) << "skip process pd.fetch op";
+    if (op_name == "builtin.combine" || op_name == "pd.feed") {
+      VLOG(6) << "skip process " << op_name;
       continue;
     }
 

diff --git a/paddle/fluid/framework/new_executor/new_ir_interpreter.cc b/paddle/fluid/framework/new_executor/new_ir_interpreter.cc
@@ -192,7 +192,7 @@ FetchList NewIRInterpreter::Run(const std::vector<std::string>& feed_names,
                                  local_scope_,
                                  value_2_var_name_map_,
                                  execution_config_);
-    SetFeedVarsInplaceSkip(feed_names);
+    // SetFeedVarsInplaceSkip(feed_names);
     // convert vec func_list to graph
     Convert(&op_func_nodes);
     UpdateSyncOpNum();

diff --git a/paddle/fluid/framework/new_executor/standalone_executor.cc b/paddle/fluid/framework/new_executor/standalone_executor.cc
@@ -69,7 +69,6 @@ StandaloneExecutor::StandaloneExecutor(const platform::Place& place,
     if (FLAGS_enable_new_ir_in_executor) {
       VLOG(6) << "begin to translate" << std::endl;
       auto base_program = paddle::TranslateLegacyProgramToProgram(*program);
-
       auto kernel_program =
           paddle::dialect::PdOpLowerToKernelPass(base_program.get());
       interpretercores_.emplace_back(std::make_shared<InterpreterCore>(

diff --git a/paddle/fluid/ir/dialect/pd_op.yaml b/paddle/fluid/ir/dialect/pd_op.yaml
@@ -2,6 +2,7 @@
   inputs: []
   attrs:
   - {typename: str, name: name}
+  - {typename: int, name: col}
   outputs:
   - {typename: Tensor, name: out, optional: false, intermediate: false}
   no_need_buffer: null

diff --git a/paddle/fluid/ir/pass/pd_op_to_kernel_pass.cc b/paddle/fluid/ir/pass/pd_op_to_kernel_pass.cc
@@ -35,6 +35,9 @@ phi::KernelKey GetKernelKey(
     ir::Operation* op,
     const phi::Place& place,
     const std::unordered_map<ir::Value, ir::OpResult>& map_value_pair) {
+  if (op->name() == "pd.feed") {
+    return {phi::Backend::CPU, phi::DataLayout::ANY, phi::DataType::FLOAT32};
+  }
   phi::Backend kernel_backend = phi::Backend::UNDEFINED;
   phi::DataLayout kernel_layout = phi::DataLayout::UNDEFINED;
   phi::DataType kernel_data_type = phi::DataType::UNDEFINED;
@@ -110,7 +113,9 @@ phi::KernelKey GetKernelKey(
         continue;
       }
       auto input_tmp = op->operand(i).source();
+
       auto new_input_tmp = map_value_pair.at(input_tmp);
+
       auto input_type = new_input_tmp.type();
       dialect::AllocatedDenseTensorType type;
       if (input_type.isa<dialect::AllocatedDenseTensorType>()) {
@@ -181,32 +186,34 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog) {
 
     std::vector<ir::Type> op_output_types;
     if ((*it)->num_results() > 0) {
-      auto result_type = (*it)->result(0).type();
-      if (result_type.isa<dialect::DenseTensorType>()) {
-        auto allocated_dense_tensor_dtype =
-            paddle::dialect::AllocatedDenseTensorType::get(
-                ctx,
-                phi::TransToPhiPlace(kernel_key.backend()),
-                result_type.dyn_cast<dialect::DenseTensorType>());
-        op_output_types.push_back(allocated_dense_tensor_dtype);
-      } else if (result_type.isa<ir::VectorType>()) {
-        auto pos1 = result_type.dyn_cast<ir::VectorType>().data()[0];
-
-        if (pos1.isa<dialect::DenseTensorType>()) {
+      for (size_t i = 0; i < (*it)->num_results(); ++i) {
+        auto result_type = (*it)->result(i).type();
+        if (result_type.isa<dialect::DenseTensorType>()) {
           auto allocated_dense_tensor_dtype =
               paddle::dialect::AllocatedDenseTensorType::get(
                   ctx,
                   phi::TransToPhiPlace(kernel_key.backend()),
-                  pos1.dyn_cast<dialect::DenseTensorType>());
+                  result_type.dyn_cast<dialect::DenseTensorType>());
           op_output_types.push_back(allocated_dense_tensor_dtype);
-        } else {
-          PADDLE_THROW(phi::errors::Unimplemented(
-              "only support dense tensor in vector type for now"));
+        } else if (result_type.isa<ir::VectorType>()) {
+          auto pos1 = result_type.dyn_cast<ir::VectorType>().data()[0];
+
+          if (pos1.isa<dialect::DenseTensorType>()) {
+            auto allocated_dense_tensor_dtype =
+                paddle::dialect::AllocatedDenseTensorType::get(
+                    ctx,
+                    phi::TransToPhiPlace(kernel_key.backend()),
+                    pos1.dyn_cast<dialect::DenseTensorType>());
+            op_output_types.push_back(allocated_dense_tensor_dtype);
+          } else {
+            PADDLE_THROW(phi::errors::Unimplemented(
+                "only support dense tensor in vector type for now"));
+          }
+
+          ir::Type t1 = ir::VectorType::get(ctx, op_output_types);
+          op_output_types.clear();
+          op_output_types.push_back(t1);
         }
-
-        ir::Type t1 = ir::VectorType::get(ctx, op_output_types);
-        op_output_types.clear();
-        op_output_types.push_back(t1);
       }
     }
 
@@ -249,7 +256,9 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog) {
 
     // only deal with single output
     if ((*it)->num_results() > 0) {
-      map_value_pair[(*it)->result(0)] = op1->result(0);
+      for (size_t i = 0; i < (*it)->num_results(); ++i) {
+        map_value_pair[(*it)->result(i)] = op1->result(i);
+      }
     }
 
     program->block()->push_back(op1);

diff --git a/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.cc b/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.cc
@@ -66,6 +66,27 @@ void BuildScope(ir::Block* block,
       continue;
     }
 
+    if (op_name == "pd.feed") {
+      auto ptr = (*it)->result(0);
+      std::string name = "inner_var_" + std::to_string(count++);
+      name_map->emplace(ptr, name);
+      auto var = scope->Var(name);
+      // TODO(phlrain): need to update here, support StringTensor
+      auto out_tensor = var->GetMutable<phi::DenseTensor>();
+
+      name_map->emplace(ptr, name);
+
+      auto feed_var = scope->Var("feed");
+      int index =
+          (*it)->attributes().at("col").dyn_cast<ir::Int32Attribute>().data();
+      auto feed_list = feed_var->Get<paddle::framework::FeedList>();
+      auto& in_tensor = (PADDLE_GET(phi::DenseTensor, feed_list.at(index)));
+
+      out_tensor->ShareDataWith(in_tensor);
+
+      continue;
+    }
+
     if (op_name == "builtin.combine") {
       auto out_value = (*it)->result(0);
 
@@ -162,12 +183,12 @@ void BuildInferMetaContext(
   auto runtime_info = std::get<3>(op_yaml_info);
 
   // int input_index = 0;
+
   std::vector<std::string> vec_param_list = runtime_info.infer_meta_param;
 
   for (size_t input_index = 0; input_index < vec_param_list.size();
        input_index++) {
     auto& t = vec_param_list[input_index];
-
     if (input_index_map.count(t)) {
       // get information from input
       ir::Value ptr = op->operand(input_index_map[t]).source();
@@ -197,7 +218,7 @@ void BuildInferMetaContext(
         if (var->IsType<phi::DenseTensor>()) {
           const phi::TensorBase* tensor_in = &(var->Get<phi::DenseTensor>());
           ctx->EmplaceBackInput(const_cast<phi::TensorBase*>(tensor_in));
-        } else {
+        } else if (var->IsType<paddle::framework::TensorRefArray>()) {
           paddle::small_vector<phi::MetaTensor, phi::kInputSmallVectorSize>
               inputs;
           auto& tensor_array = var->Get<paddle::framework::TensorRefArray>();
@@ -206,6 +227,9 @@ void BuildInferMetaContext(
           }
 
           ctx->EmplaceBackInputs(std::move(inputs));
+        } else {
+          PADDLE_THROW(phi::errors::Unimplemented("Not support var type [%d] ",
+                                                  var->Type()));
         }
       }
     }
@@ -238,8 +262,7 @@ void BuildInferMetaContext(
     }
   }
 
-  // update here, support fetch list for now
-  // [todo update here]
+  // TODO(phlrain): use var type instead of op name
   if (op->attributes().count("op_name") &&
       (op->attributes().at("op_name").dyn_cast<ir::StrAttribute>().data() ==
        "pd.fetch")) {
@@ -249,9 +272,11 @@ void BuildInferMetaContext(
     auto* out_tensor = &(PADDLE_GET(phi::DenseTensor, fetch_list->at(0)));
     ctx->EmplaceBackOutput(out_tensor);
   } else {
-    ir::Value out_ptr = op->result(0);
-    auto name = name_map.at(out_ptr);
-    ctx->EmplaceBackOutput(scope->Var(name)->Get<phi::DenseTensor>());
+    for (size_t i = 0; i < op->num_results(); ++i) {
+      ir::Value out_ptr = op->result(i);
+      auto name = name_map.at(out_ptr);
+      ctx->EmplaceBackOutput(scope->Var(name)->Get<phi::DenseTensor>());
+    }
   }
 }
 
@@ -293,10 +318,14 @@ void BuildPhiKernelContext(
       // get information from input
       ir::Value ptr = op->operand(input_index_map[t]).source();
       auto in_var_name = name_map.at(ptr);
-
       if (input_map != nullptr) {
         // only deal with single input for now, [todo] need support multi input
         // like concat
+        // TODO(phlrain): OpFuncNode need input_index and output_index,
+        // construct input_index and output_here,  should remove input_index and
+        // output_index from OpFuncNode Each in_var_name named "inner_var_" +
+        // index, len("inner_var_") = 10
+
         size_t tmp_id = std::atol(in_var_name.substr(4, 100).c_str());
         (*input_map)[std::to_string(input_index_map.at(t))].push_back(tmp_id);
       }
@@ -331,14 +360,21 @@ void BuildPhiKernelContext(
         if (var->IsType<phi::DenseTensor>()) {
           const phi::TensorBase* tensor_in = &(var->Get<phi::DenseTensor>());
           ctx->EmplaceBackInput(tensor_in);
-        } else {
+        } else if (var->IsType<paddle::framework::TensorRefArray>()) {
           paddle::small_vector<const phi::TensorBase*> inputs;
           auto& tensor_array = var->Get<paddle::framework::TensorRefArray>();
           for (size_t i = 0; i < tensor_array.size(); ++i) {
             inputs.emplace_back(tensor_array[i]);
           }
 
           ctx->EmplaceBackInputs(std::move(inputs));
+        } else if (var->IsType<paddle::framework::FeedList>()) {
+          auto feed_list = var->Get<paddle::framework::FeedList>();
+          auto* in_tensor = &(PADDLE_GET(phi::DenseTensor, feed_list.at(0)));
+          ctx->EmplaceBackOutput(in_tensor);
+        } else {
+          PADDLE_THROW(phi::errors::Unimplemented("Not support var type [%d] ",
+                                                  var->Type()));
         }
       }
     }
@@ -371,6 +407,7 @@ void BuildPhiKernelContext(
     }
   }
 
+  // TODO(phlrain): use var type instead of op name
   if (op->attributes().count("op_name") &&
       (op->attributes().at("op_name").dyn_cast<ir::StrAttribute>().data() ==
        "pd.fetch")) {
@@ -380,16 +417,23 @@ void BuildPhiKernelContext(
     auto* out_tensor = &(PADDLE_GET(phi::DenseTensor, fetch_list->at(0)));
     ctx->EmplaceBackOutput(out_tensor);
   } else {
-    ir::Value out_ptr = op->result(0);
-    auto name = name_map.at(out_ptr);
-    ctx->EmplaceBackOutput(const_cast<phi::DenseTensor*>(
-        &(scope->Var(name)->Get<phi::DenseTensor>())));
-
-    if (output_map != nullptr) {
-      // only deal with single input for now, [todo] need support multi input
-      // like concat
-      size_t tmp_id = std::atol(name.substr(4, 100).c_str());
-      (*output_map)["out"].push_back(tmp_id);
+    for (size_t i = 0; i < op->num_results(); ++i) {
+      ir::Value out_ptr = op->result(i);
+      auto name = name_map.at(out_ptr);
+      ctx->EmplaceBackOutput(const_cast<phi::DenseTensor*>(
+          &(scope->Var(name)->Get<phi::DenseTensor>())));
+
+      if (output_map != nullptr) {
+        // only deal with single input for now, [todo] need support multi input
+        // like concat
+        // TODO(phlrain): OpFuncNode need input_index and output_index,
+        // construct input_index and output_here,  should remove input_index and
+        // output_index from OpFuncNode Each in_var_name named "inner_var_" +
+        // index, len("inner_var_") = 10
+
+        size_t tmp_id = std::atol(name.substr(4, 100).c_str());
+        (*output_map)["out"].push_back(tmp_id);
+      }
     }
   }
 }

diff --git a/paddle/fluid/ir_adaptor/translator/op_translator.cc b/paddle/fluid/ir_adaptor/translator/op_translator.cc
@@ -540,6 +540,8 @@ ir::Operation* FeedOpHandler(ir::IrContext* ctx,
       GenerateOperationOutput(ctx, op_desc, output_infos);
   ir::AttributeMap attribute_map = {
       {"name", ir::StrAttribute::get(ctx, op_desc.OutputArgumentNames()[0])},
+      {"col",
+       ir::Int32Attribute::get(ctx, op_desc.GetAttrIfExists<int>("col"))},
   };
 
   ir::Operation* operation =

diff --git a/paddle/phi/api/yaml/op_compat.yaml b/paddle/phi/api/yaml/op_compat.yaml
@@ -297,6 +297,7 @@
     out : Out
 
 - op : atan2
+  backward : atan2_grad
   inputs :
     {x : X1, y : X2}
   outputs :

diff --git a/python/paddle/fluid/executor.py b/python/paddle/fluid/executor.py
@@ -1635,6 +1635,7 @@ def _can_use_interpreter_core(program, place):
             )
 
             self._feed_data(program, feed, feed_var_name, scope)
+
             if hasattr(program, 'lr_scheduler'):
                 from paddle.optimizer.lr import LRScheduler
 

diff --git a/test/ir/new_ir/test_standalone_new_ir.py b/test/ir/new_ir/test_standalone_new_ir.py
@@ -27,13 +27,15 @@ def test_with_new_ir(self):
         place = paddle.CPUPlace()
         exe = paddle.static.Executor(place)
 
-        x = paddle.ones([2, 2], dtype="float32")
-        y = paddle.ones([2, 2], dtype="float32")
+        main_program = paddle.static.Program()
+        new_scope = paddle.static.Scope()
+        with paddle.static.scope_guard(new_scope):
+            with paddle.static.program_guard(main_program):
+                x = paddle.ones([2, 2], dtype="float32")
+                y = paddle.ones([2, 2], dtype="float32")
 
-        z = x + y
-        out = exe.run(
-            paddle.static.default_main_program(), {}, fetch_list=[z.name]
-        )
+                z = x + y
+            out = exe.run(main_program, {}, fetch_list=[z.name])
 
         gold_res = np.ones([2, 2], dtype="float32") * 2
 
@@ -45,18 +47,47 @@ def test_with_new_ir(self):
         place = paddle.CPUPlace()
         exe = paddle.static.Executor(place)
 
-        x = paddle.ones([2, 2], dtype="float32")
-        y = paddle.ones([2, 2], dtype="float32")
+        main_program = paddle.static.Program()
+        new_scope = paddle.static.Scope()
+        with paddle.static.scope_guard(new_scope):
+            with paddle.static.program_guard(main_program):
+                x = paddle.ones([2, 2], dtype="float32")
+                y = paddle.ones([2, 2], dtype="float32")
 
-        z = paddle.linalg.multi_dot([x, y])
-        out = exe.run(
-            paddle.static.default_main_program(), {}, fetch_list=[z.name]
-        )
+                z = paddle.linalg.multi_dot([x, y])
+            out = exe.run(main_program, {}, fetch_list=[z.name])
 
         gold_res = np.ones([2, 2], dtype="float32") * 2
 
         np.testing.assert_array_equal(out[0], gold_res)
 
 
+class TestFeedOp(unittest.TestCase):
+    def test_with_new_ir(self):
+        place = paddle.CPUPlace()
+        exe = paddle.static.Executor(place)
+
+        main_program = paddle.static.Program()
+        new_scope = paddle.static.Scope()
+        with paddle.static.scope_guard(new_scope):
+            with paddle.static.program_guard(main_program):
+                x = paddle.static.data("x", [2, 2], dtype="float32")
+                y = paddle.static.data("y", [2, 2], dtype="float32")
+
+                z = x + y
+
+            np_a = np.random.rand(2, 2).astype("float32")
+            np_b = np.random.rand(2, 2).astype("float32")
+            out = exe.run(
+                main_program,
+                feed={"x": np_a, "y": np_b},
+                fetch_list=[z.name],
+            )
+
+        gold_res = np_a + np_b
+
+        np.testing.assert_array_equal(out[0], gold_res)
+
+
 if __name__ == "__main__":
     unittest.main()
-Original file line number
+Diff line change
@@ Expand Up / @@ -1635,6 +1635,7 @@ def _can_use_interpreter_core(program, place): @@
                 )
                 self._feed_data(program, feed, feed_var_name, scope)
                 if hasattr(program, 'lr_scheduler'):
                     from paddle.optimizer.lr import LRScheduler
@@ Expand Down @@