Skip to content

Commit

Permalink
Support feed op new ir (#54840)
Browse files Browse the repository at this point in the history
* add fetch kernel

* support fetch var in new ir

* fix bug

* polish code

* change array equal to np.testing

* support feed in new ir

* fix bug

* try to hack combine op

* add scope guard

* revert atan2 op

* polish code
  • Loading branch information
phlrain authored Jun 26, 2023
1 parent 5d9af9d commit 1e32313
Show file tree
Hide file tree
Showing 10 changed files with 144 additions and 56 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -952,8 +952,8 @@ void BuildOpFuncList(

auto op_name = attr_map.at("op_name").dyn_cast<::ir::StrAttribute>().data();

if (op_name == "builtin.combine") {
VLOG(6) << "skip process pd.fetch op";
if (op_name == "builtin.combine" || op_name == "pd.feed") {
VLOG(6) << "skip process " << op_name;
continue;
}

Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/framework/new_executor/new_ir_interpreter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ FetchList NewIRInterpreter::Run(const std::vector<std::string>& feed_names,
local_scope_,
value_2_var_name_map_,
execution_config_);
SetFeedVarsInplaceSkip(feed_names);
// SetFeedVarsInplaceSkip(feed_names);
// convert vec func_list to graph
Convert(&op_func_nodes);
UpdateSyncOpNum();
Expand Down
1 change: 0 additions & 1 deletion paddle/fluid/framework/new_executor/standalone_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ StandaloneExecutor::StandaloneExecutor(const platform::Place& place,
if (FLAGS_enable_new_ir_in_executor) {
VLOG(6) << "begin to translate" << std::endl;
auto base_program = paddle::TranslateLegacyProgramToProgram(*program);

auto kernel_program =
paddle::dialect::PdOpLowerToKernelPass(base_program.get());
interpretercores_.emplace_back(std::make_shared<InterpreterCore>(
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/ir/dialect/pd_op.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
inputs: []
attrs:
- {typename: str, name: name}
- {typename: int, name: col}
outputs:
- {typename: Tensor, name: out, optional: false, intermediate: false}
no_need_buffer: null
Expand Down
51 changes: 30 additions & 21 deletions paddle/fluid/ir/pass/pd_op_to_kernel_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ phi::KernelKey GetKernelKey(
ir::Operation* op,
const phi::Place& place,
const std::unordered_map<ir::Value, ir::OpResult>& map_value_pair) {
if (op->name() == "pd.feed") {
return {phi::Backend::CPU, phi::DataLayout::ANY, phi::DataType::FLOAT32};
}
phi::Backend kernel_backend = phi::Backend::UNDEFINED;
phi::DataLayout kernel_layout = phi::DataLayout::UNDEFINED;
phi::DataType kernel_data_type = phi::DataType::UNDEFINED;
Expand Down Expand Up @@ -110,7 +113,9 @@ phi::KernelKey GetKernelKey(
continue;
}
auto input_tmp = op->operand(i).source();

auto new_input_tmp = map_value_pair.at(input_tmp);

auto input_type = new_input_tmp.type();
dialect::AllocatedDenseTensorType type;
if (input_type.isa<dialect::AllocatedDenseTensorType>()) {
Expand Down Expand Up @@ -181,32 +186,34 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog) {

std::vector<ir::Type> op_output_types;
if ((*it)->num_results() > 0) {
auto result_type = (*it)->result(0).type();
if (result_type.isa<dialect::DenseTensorType>()) {
auto allocated_dense_tensor_dtype =
paddle::dialect::AllocatedDenseTensorType::get(
ctx,
phi::TransToPhiPlace(kernel_key.backend()),
result_type.dyn_cast<dialect::DenseTensorType>());
op_output_types.push_back(allocated_dense_tensor_dtype);
} else if (result_type.isa<ir::VectorType>()) {
auto pos1 = result_type.dyn_cast<ir::VectorType>().data()[0];

if (pos1.isa<dialect::DenseTensorType>()) {
for (size_t i = 0; i < (*it)->num_results(); ++i) {
auto result_type = (*it)->result(i).type();
if (result_type.isa<dialect::DenseTensorType>()) {
auto allocated_dense_tensor_dtype =
paddle::dialect::AllocatedDenseTensorType::get(
ctx,
phi::TransToPhiPlace(kernel_key.backend()),
pos1.dyn_cast<dialect::DenseTensorType>());
result_type.dyn_cast<dialect::DenseTensorType>());
op_output_types.push_back(allocated_dense_tensor_dtype);
} else {
PADDLE_THROW(phi::errors::Unimplemented(
"only support dense tensor in vector type for now"));
} else if (result_type.isa<ir::VectorType>()) {
auto pos1 = result_type.dyn_cast<ir::VectorType>().data()[0];

if (pos1.isa<dialect::DenseTensorType>()) {
auto allocated_dense_tensor_dtype =
paddle::dialect::AllocatedDenseTensorType::get(
ctx,
phi::TransToPhiPlace(kernel_key.backend()),
pos1.dyn_cast<dialect::DenseTensorType>());
op_output_types.push_back(allocated_dense_tensor_dtype);
} else {
PADDLE_THROW(phi::errors::Unimplemented(
"only support dense tensor in vector type for now"));
}

ir::Type t1 = ir::VectorType::get(ctx, op_output_types);
op_output_types.clear();
op_output_types.push_back(t1);
}

ir::Type t1 = ir::VectorType::get(ctx, op_output_types);
op_output_types.clear();
op_output_types.push_back(t1);
}
}

Expand Down Expand Up @@ -249,7 +256,9 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog) {

// only deal with single output
if ((*it)->num_results() > 0) {
map_value_pair[(*it)->result(0)] = op1->result(0);
for (size_t i = 0; i < (*it)->num_results(); ++i) {
map_value_pair[(*it)->result(i)] = op1->result(i);
}
}

program->block()->push_back(op1);
Expand Down
82 changes: 63 additions & 19 deletions paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,27 @@ void BuildScope(ir::Block* block,
continue;
}

if (op_name == "pd.feed") {
auto ptr = (*it)->result(0);
std::string name = "inner_var_" + std::to_string(count++);
name_map->emplace(ptr, name);
auto var = scope->Var(name);
// TODO(phlrain): need to update here, support StringTensor
auto out_tensor = var->GetMutable<phi::DenseTensor>();

name_map->emplace(ptr, name);

auto feed_var = scope->Var("feed");
int index =
(*it)->attributes().at("col").dyn_cast<ir::Int32Attribute>().data();
auto feed_list = feed_var->Get<paddle::framework::FeedList>();
auto& in_tensor = (PADDLE_GET(phi::DenseTensor, feed_list.at(index)));

out_tensor->ShareDataWith(in_tensor);

continue;
}

if (op_name == "builtin.combine") {
auto out_value = (*it)->result(0);

Expand Down Expand Up @@ -162,12 +183,12 @@ void BuildInferMetaContext(
auto runtime_info = std::get<3>(op_yaml_info);

// int input_index = 0;

std::vector<std::string> vec_param_list = runtime_info.infer_meta_param;

for (size_t input_index = 0; input_index < vec_param_list.size();
input_index++) {
auto& t = vec_param_list[input_index];

if (input_index_map.count(t)) {
// get information from input
ir::Value ptr = op->operand(input_index_map[t]).source();
Expand Down Expand Up @@ -197,7 +218,7 @@ void BuildInferMetaContext(
if (var->IsType<phi::DenseTensor>()) {
const phi::TensorBase* tensor_in = &(var->Get<phi::DenseTensor>());
ctx->EmplaceBackInput(const_cast<phi::TensorBase*>(tensor_in));
} else {
} else if (var->IsType<paddle::framework::TensorRefArray>()) {
paddle::small_vector<phi::MetaTensor, phi::kInputSmallVectorSize>
inputs;
auto& tensor_array = var->Get<paddle::framework::TensorRefArray>();
Expand All @@ -206,6 +227,9 @@ void BuildInferMetaContext(
}

ctx->EmplaceBackInputs(std::move(inputs));
} else {
PADDLE_THROW(phi::errors::Unimplemented("Not support var type [%d] ",
var->Type()));
}
}
}
Expand Down Expand Up @@ -238,8 +262,7 @@ void BuildInferMetaContext(
}
}

// update here, support fetch list for now
// [todo update here]
// TODO(phlrain): use var type instead of op name
if (op->attributes().count("op_name") &&
(op->attributes().at("op_name").dyn_cast<ir::StrAttribute>().data() ==
"pd.fetch")) {
Expand All @@ -249,9 +272,11 @@ void BuildInferMetaContext(
auto* out_tensor = &(PADDLE_GET(phi::DenseTensor, fetch_list->at(0)));
ctx->EmplaceBackOutput(out_tensor);
} else {
ir::Value out_ptr = op->result(0);
auto name = name_map.at(out_ptr);
ctx->EmplaceBackOutput(scope->Var(name)->Get<phi::DenseTensor>());
for (size_t i = 0; i < op->num_results(); ++i) {
ir::Value out_ptr = op->result(i);
auto name = name_map.at(out_ptr);
ctx->EmplaceBackOutput(scope->Var(name)->Get<phi::DenseTensor>());
}
}
}

Expand Down Expand Up @@ -293,10 +318,14 @@ void BuildPhiKernelContext(
// get information from input
ir::Value ptr = op->operand(input_index_map[t]).source();
auto in_var_name = name_map.at(ptr);

if (input_map != nullptr) {
// only deal with single input for now, [todo] need support multi input
// like concat
// TODO(phlrain): OpFuncNode need input_index and output_index,
// construct input_index and output_here, should remove input_index and
// output_index from OpFuncNode Each in_var_name named "inner_var_" +
// index, len("inner_var_") = 10

size_t tmp_id = std::atol(in_var_name.substr(4, 100).c_str());
(*input_map)[std::to_string(input_index_map.at(t))].push_back(tmp_id);
}
Expand Down Expand Up @@ -331,14 +360,21 @@ void BuildPhiKernelContext(
if (var->IsType<phi::DenseTensor>()) {
const phi::TensorBase* tensor_in = &(var->Get<phi::DenseTensor>());
ctx->EmplaceBackInput(tensor_in);
} else {
} else if (var->IsType<paddle::framework::TensorRefArray>()) {
paddle::small_vector<const phi::TensorBase*> inputs;
auto& tensor_array = var->Get<paddle::framework::TensorRefArray>();
for (size_t i = 0; i < tensor_array.size(); ++i) {
inputs.emplace_back(tensor_array[i]);
}

ctx->EmplaceBackInputs(std::move(inputs));
} else if (var->IsType<paddle::framework::FeedList>()) {
auto feed_list = var->Get<paddle::framework::FeedList>();
auto* in_tensor = &(PADDLE_GET(phi::DenseTensor, feed_list.at(0)));
ctx->EmplaceBackOutput(in_tensor);
} else {
PADDLE_THROW(phi::errors::Unimplemented("Not support var type [%d] ",
var->Type()));
}
}
}
Expand Down Expand Up @@ -371,6 +407,7 @@ void BuildPhiKernelContext(
}
}

// TODO(phlrain): use var type instead of op name
if (op->attributes().count("op_name") &&
(op->attributes().at("op_name").dyn_cast<ir::StrAttribute>().data() ==
"pd.fetch")) {
Expand All @@ -380,16 +417,23 @@ void BuildPhiKernelContext(
auto* out_tensor = &(PADDLE_GET(phi::DenseTensor, fetch_list->at(0)));
ctx->EmplaceBackOutput(out_tensor);
} else {
ir::Value out_ptr = op->result(0);
auto name = name_map.at(out_ptr);
ctx->EmplaceBackOutput(const_cast<phi::DenseTensor*>(
&(scope->Var(name)->Get<phi::DenseTensor>())));

if (output_map != nullptr) {
// only deal with single input for now, [todo] need support multi input
// like concat
size_t tmp_id = std::atol(name.substr(4, 100).c_str());
(*output_map)["out"].push_back(tmp_id);
for (size_t i = 0; i < op->num_results(); ++i) {
ir::Value out_ptr = op->result(i);
auto name = name_map.at(out_ptr);
ctx->EmplaceBackOutput(const_cast<phi::DenseTensor*>(
&(scope->Var(name)->Get<phi::DenseTensor>())));

if (output_map != nullptr) {
// only deal with single input for now, [todo] need support multi input
// like concat
// TODO(phlrain): OpFuncNode need input_index and output_index,
// construct input_index and output_here, should remove input_index and
// output_index from OpFuncNode Each in_var_name named "inner_var_" +
// index, len("inner_var_") = 10

size_t tmp_id = std::atol(name.substr(4, 100).c_str());
(*output_map)["out"].push_back(tmp_id);
}
}
}
}
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/ir_adaptor/translator/op_translator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,8 @@ ir::Operation* FeedOpHandler(ir::IrContext* ctx,
GenerateOperationOutput(ctx, op_desc, output_infos);
ir::AttributeMap attribute_map = {
{"name", ir::StrAttribute::get(ctx, op_desc.OutputArgumentNames()[0])},
{"col",
ir::Int32Attribute::get(ctx, op_desc.GetAttrIfExists<int>("col"))},
};

ir::Operation* operation =
Expand Down
1 change: 1 addition & 0 deletions paddle/phi/api/yaml/op_compat.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,7 @@
out : Out

- op : atan2
backward : atan2_grad
inputs :
{x : X1, y : X2}
outputs :
Expand Down
1 change: 1 addition & 0 deletions python/paddle/fluid/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1635,6 +1635,7 @@ def _can_use_interpreter_core(program, place):
)

self._feed_data(program, feed, feed_var_name, scope)

if hasattr(program, 'lr_scheduler'):
from paddle.optimizer.lr import LRScheduler

Expand Down
55 changes: 43 additions & 12 deletions test/ir/new_ir/test_standalone_new_ir.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,15 @@ def test_with_new_ir(self):
place = paddle.CPUPlace()
exe = paddle.static.Executor(place)

x = paddle.ones([2, 2], dtype="float32")
y = paddle.ones([2, 2], dtype="float32")
main_program = paddle.static.Program()
new_scope = paddle.static.Scope()
with paddle.static.scope_guard(new_scope):
with paddle.static.program_guard(main_program):
x = paddle.ones([2, 2], dtype="float32")
y = paddle.ones([2, 2], dtype="float32")

z = x + y
out = exe.run(
paddle.static.default_main_program(), {}, fetch_list=[z.name]
)
z = x + y
out = exe.run(main_program, {}, fetch_list=[z.name])

gold_res = np.ones([2, 2], dtype="float32") * 2

Expand All @@ -45,18 +47,47 @@ def test_with_new_ir(self):
place = paddle.CPUPlace()
exe = paddle.static.Executor(place)

x = paddle.ones([2, 2], dtype="float32")
y = paddle.ones([2, 2], dtype="float32")
main_program = paddle.static.Program()
new_scope = paddle.static.Scope()
with paddle.static.scope_guard(new_scope):
with paddle.static.program_guard(main_program):
x = paddle.ones([2, 2], dtype="float32")
y = paddle.ones([2, 2], dtype="float32")

z = paddle.linalg.multi_dot([x, y])
out = exe.run(
paddle.static.default_main_program(), {}, fetch_list=[z.name]
)
z = paddle.linalg.multi_dot([x, y])
out = exe.run(main_program, {}, fetch_list=[z.name])

gold_res = np.ones([2, 2], dtype="float32") * 2

np.testing.assert_array_equal(out[0], gold_res)


class TestFeedOp(unittest.TestCase):
def test_with_new_ir(self):
place = paddle.CPUPlace()
exe = paddle.static.Executor(place)

main_program = paddle.static.Program()
new_scope = paddle.static.Scope()
with paddle.static.scope_guard(new_scope):
with paddle.static.program_guard(main_program):
x = paddle.static.data("x", [2, 2], dtype="float32")
y = paddle.static.data("y", [2, 2], dtype="float32")

z = x + y

np_a = np.random.rand(2, 2).astype("float32")
np_b = np.random.rand(2, 2).astype("float32")
out = exe.run(
main_program,
feed={"x": np_a, "y": np_b},
fetch_list=[z.name],
)

gold_res = np_a + np_b

np.testing.assert_array_equal(out[0], gold_res)


if __name__ == "__main__":
unittest.main()

0 comments on commit 1e32313

Please sign in to comment.