From 3847c760ea0e5315e5d6a354a894a92c8032a586 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Tue, 23 Nov 2021 02:45:05 +0000
Subject: [PATCH 01/31] refine a test case, test=develop

---
 python/paddle/fluid/tests/unittests/test_allclose_layer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/paddle/fluid/tests/unittests/test_allclose_layer.py b/python/paddle/fluid/tests/unittests/test_allclose_layer.py
index c376a5c95c393..7b201f60db539 100644
--- a/python/paddle/fluid/tests/unittests/test_allclose_layer.py
+++ b/python/paddle/fluid/tests/unittests/test_allclose_layer.py
@@ -61,7 +61,7 @@ def allclose_check(self, use_cuda, dtype='float32'):
         # for corner case
         x = np.array([10.1, 10.1]).astype(dtype)
         y = np.array([10, 10]).astype(dtype)
-        result_c, = exe.run(feed={'a': x, 'b': y}, fetch_list=[result_corner])
+        result_c = exe.run(feed={'a': x, 'b': y}, fetch_list=[result_corner])
         corner_res = (dtype == 'float64')
         self.assertEqual(result_c[0], corner_res)
 

From 3b4356b960c51735dde67a0abdfb27ccc5cbdf27 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Thu, 25 Nov 2021 08:02:59 +0000
Subject: [PATCH 02/31] publish python c api for eager, test=develop

---
 cmake/flags.cmake                             |   1 +
 paddle/fluid/pybind/CMakeLists.txt            |   6 +
 paddle/fluid/pybind/eager.cc                  | 130 +++++++
 paddle/fluid/pybind/eager.h                   |  24 ++
 paddle/fluid/pybind/eager_functions.cc        | 239 ++++++++++++
 paddle/fluid/pybind/eager_method.cc           | 115 ++++++
 paddle/fluid/pybind/eager_properties.cc       | 145 ++++++++
 paddle/fluid/pybind/eager_utils.cc            | 339 ++++++++++++++++++
 paddle/fluid/pybind/eager_utils.h             |  54 +++
 paddle/fluid/pybind/pybind.cc                 |  48 ++-
 paddle/pten/core/convert_utils.cc             | 125 +++++++
 paddle/pten/core/convert_utils.h              |   5 +
 python/paddle/fluid/__init__.py               |   4 +
 python/paddle/fluid/eager/__init__.py         |  20 ++
 .../fluid/eager/eager_tensor_patch_methods.py |  26 ++
 python/paddle/fluid/framework.py              |  22 +-
 .../tests/unittests/test_egr_python_api.py    | 101 ++++++
 python/paddle/tensor/creation.py              |   7 +
 python/paddle/tensor/to_string.py             |  36 ++
 python/setup.py.in                            |   1 +
 20 files changed, 1434 insertions(+), 14 deletions(-)
 create mode 100644 paddle/fluid/pybind/eager.cc
 create mode 100644 paddle/fluid/pybind/eager.h
 create mode 100644 paddle/fluid/pybind/eager_functions.cc
 create mode 100644 paddle/fluid/pybind/eager_method.cc
 create mode 100644 paddle/fluid/pybind/eager_properties.cc
 create mode 100644 paddle/fluid/pybind/eager_utils.cc
 create mode 100644 paddle/fluid/pybind/eager_utils.h
 create mode 100644 python/paddle/fluid/eager/__init__.py
 create mode 100644 python/paddle/fluid/eager/eager_tensor_patch_methods.py
 create mode 100644 python/paddle/fluid/tests/unittests/test_egr_python_api.py

diff --git a/cmake/flags.cmake b/cmake/flags.cmake
index 7afff25664bbb..27983e4404a52 100644
--- a/cmake/flags.cmake
+++ b/cmake/flags.cmake
@@ -141,6 +141,7 @@ set(COMMON_FLAGS
     -Wno-unused-parameter
     -Wno-unused-function
     -Wno-error=literal-suffix
+    -Wno-error=missing-field-initializers
     -Wno-error=unused-local-typedefs
     -Wno-error=parentheses-equality # Warnings in pybind11
     -Wno-error=ignored-attributes  # Warnings in Eigen, gcc 6.3
diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt
index 588caed5a452e..ea2b9a7c3f7db 100644
--- a/paddle/fluid/pybind/CMakeLists.txt
+++ b/paddle/fluid/pybind/CMakeLists.txt
@@ -212,6 +212,12 @@ if(WITH_PYTHON)
   add_custom_target(op_function_generator_cmd ALL DEPENDS ${impl_file})
 
   list(APPEND PYBIND_DEPS interpretercore standalone_executor)
+
+  cc_library(paddle_eager
+  SRCS eager.cc eager_functions.cc eager_method.cc eager_properties.cc eager_utils.cc
+  DEPS autograd_meta grad_node_info pten global_utils utils) #eager_api accumulation_node backward
+  list(APPEND PYBIND_DEPS paddle_eager)
+
   cc_library(paddle_pybind SHARED
     SRCS ${PYBIND_SRCS}
     DEPS ${PYBIND_DEPS} ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS})
diff --git a/paddle/fluid/pybind/eager.cc b/paddle/fluid/pybind/eager.cc
new file mode 100644
index 0000000000000..a8601a3892381
--- /dev/null
+++ b/paddle/fluid/pybind/eager.cc
@@ -0,0 +1,130 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+// disable numpy compile error
+#include <Python.h>
+
+#include <string>
+#include <vector>
+
+#include "paddle/fluid/eager/api/all.h"
+#include "paddle/fluid/eager/autograd_meta.h"
+#include "paddle/fluid/eager/utils.h"
+#include "paddle/fluid/memory/allocation/allocator.h"
+#include "paddle/fluid/memory/memcpy.h"
+#include "paddle/fluid/platform/enforce.h"
+#include "paddle/fluid/pybind/eager.h"
+#include "paddle/fluid/pybind/eager_utils.h"
+#include "paddle/pten/common/data_type.h"
+#include "paddle/pten/core/convert_utils.h"
+#include "paddle/pten/core/dense_tensor.h"
+#include "paddle/pten/include/core.h"
+
+namespace paddle {
+namespace pybind {
+
+namespace py = ::pybind11;
+
+PyTypeObject* p_eager_tensor_type;
+
+PyObject* eagertensor_new(PyTypeObject* type, PyObject* args,
+                          PyObject* kwargs) {
+  PyObject* obj = type->tp_alloc(type, 0);
+  if (obj) {
+    auto v = reinterpret_cast<EagerTensorObject*>(obj);
+    new (&(v->eagertensor)) egr::EagerTensor();
+  }
+  return obj;
+}
+
+static void eagertensor_dealloc(EagerTensorObject* self) {
+  self->eagertensor.~EagerTensor();
+  Py_TYPE(self)->tp_free(reinterpret_cast<PyObject*>(self));
+}
+
+extern struct PyGetSetDef variable_properties[];
+
+extern PyMethodDef variable_methods[];
+
+PyTypeObject eager_tensor_type = {
+    PyVarObject_HEAD_INIT(NULL, 0) "core_avx.eager.EagerTensor", /* tp_name */
+    sizeof(EagerTensorObject),       /* tp_basicsize */
+    0,                               /* tp_itemsize */
+    (destructor)eagertensor_dealloc, /* tp_dealloc */
+    0,                               /* tp_vectorcall_offset */
+    0,                               /* tp_getattr */
+    0,                               /* tp_setattr */
+    0,                               /* tp_reserved */
+    0,                               /* tp_repr */
+    0,                               /* tp_as_number */
+    0,                               /* tp_as_sequence */
+    0,                               /* tp_as_mapping */
+    0,                               /* tp_hash  */
+    0,                               /* tp_call */
+    0,                               /* tp_str */
+    0,                               /* tp_getattro */
+    0,                               /* tp_setattro */
+    0,                               /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
+        Py_TPFLAGS_HEAPTYPE, /* tp_flags */
+    0,                       /* tp_doc */
+    0,                       /* tp_traverse */
+    0,                       /* tp_clear */
+    0,                       /* tp_richcompare */
+    0,                       /* tp_weaklistoffset */
+    0,                       /* tp_iter */
+    0,                       /* tp_iternext */
+    variable_methods,        /* tp_methods */
+    0,                       /* tp_members */
+    variable_properties,     /* tp_getset */
+    0,                       /* tp_base */
+    0,                       /* tp_dict */
+    0,                       /* tp_descr_get */
+    0,                       /* tp_descr_set */
+    0,                       /* tp_dictoffset */
+    0,                       /* tp_init */
+    0,                       /* tp_alloc */
+    eagertensor_new,         /* tp_new */
+    0,                       /* tp_free */
+    0,                       /* tp_is_gc */
+    0,                       /* tp_bases */
+    0,                       /* tp_mro */
+    0,                       /* tp_cache */
+    0,                       /* tp_subclasses */
+    0,                       /* tp_weaklist */
+    0,                       /* tp_del */
+    0                        /* tp_version_tag */
+};
+
+void BindEager(pybind11::module* module) {
+  auto m = module->def_submodule("eager");
+
+  p_eager_tensor_type = &eager_tensor_type;
+  if (PyType_Ready(&eager_tensor_type) < 0) {
+    PADDLE_THROW(platform::errors::Fatal(
+        "Init Paddle erroe in BindEager(PyType_Ready)."));
+    return;
+  }
+
+  Py_INCREF(&eager_tensor_type);
+  if (PyModule_AddObject(m.ptr(), "EagerTensor",
+                         reinterpret_cast<PyObject*>(&eager_tensor_type)) < 0) {
+    Py_DECREF(&eager_tensor_type);
+    Py_DECREF(m.ptr());
+    PADDLE_THROW(platform::errors::Fatal(
+        "Init Paddle erroe in BindEager(PyModule_AddObject)."));
+    return;
+  }
+
+  BindFunctions(m.ptr());
+}
+
+}  // namespace pybind
+}  // namespace paddle
diff --git a/paddle/fluid/pybind/eager.h b/paddle/fluid/pybind/eager.h
new file mode 100644
index 0000000000000..c1a869d9b89fa
--- /dev/null
+++ b/paddle/fluid/pybind/eager.h
@@ -0,0 +1,24 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#pragma once
+
+#include <Python.h>
+#include "pybind11/pybind11.h"
+#include "pybind11/stl.h"
+
+namespace paddle {
+namespace pybind {
+
+void BindEager(pybind11::module* m);
+void BindFunctions(PyObject* module);
+
+}  // namespace pybind
+}  // namespace paddle
diff --git a/paddle/fluid/pybind/eager_functions.cc b/paddle/fluid/pybind/eager_functions.cc
new file mode 100644
index 0000000000000..149a618f34fa3
--- /dev/null
+++ b/paddle/fluid/pybind/eager_functions.cc
@@ -0,0 +1,239 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+// disable numpy compile error
+#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
+// #define PY_ARRAY_UNIQUE_SYMBOL Paddle_PyArray_API_F
+#define INIT_NUMPY_ARRAY_CPP
+
+#include <numpy/arrayobject.h>
+#include <numpy/arrayscalars.h>
+
+#include <Python.h>
+
+#include <string>
+#include <vector>
+
+#include "paddle/fluid/eager/accumulation/accumulation_node.h"
+#include "paddle/fluid/eager/api/all.h"
+#include "paddle/fluid/eager/autograd_meta.h"
+#include "paddle/fluid/eager/backward.h"
+#include "paddle/fluid/eager/utils.h"
+#include "paddle/fluid/memory/allocation/allocator.h"
+#include "paddle/fluid/memory/memcpy.h"
+#include "paddle/fluid/platform/enforce.h"
+#include "paddle/fluid/pybind/eager.h"
+#include "paddle/fluid/pybind/eager_utils.h"
+#include "paddle/pten/api/lib/utils/allocator.h"
+#include "paddle/pten/api/lib/utils/storage.h"
+#include "paddle/pten/api/lib/utils/tensor_utils.h"
+#include "paddle/pten/common/data_type.h"
+#include "paddle/pten/core/convert_utils.h"
+#include "paddle/pten/core/dense_tensor.h"
+#include "paddle/pten/include/core.h"
+#pragma GCC diagnostic ignored "-Wconversion-null"  // for import_array();
+
+namespace paddle {
+namespace pybind {
+
+int init_numpy_f() {
+  import_array();
+  return 0;
+}
+static const int numpy_initialized_f = init_numpy_f();
+
+namespace py = ::pybind11;
+
+// TODO(wanghuancoder) we must build paddle whl package with lower numpy version
+bool check_numpy_available() {
+  static bool ret = []() {
+    if (_import_array() >= 0) {
+      return true;
+    }
+
+    std::string message = "Failed to initialize NumPy";
+    PyObject *type, *value, *traceback;
+    PyErr_Fetch(&type, &value, &traceback);
+    if (value) {
+      PyObject* err_msg = PyObject_Str(value);
+      PyObject* err_msg2 =
+          PyUnicode_AsEncodedString(err_msg, "utf-8", "strict");
+      if (err_msg2) {
+        LOG(WARNING) << "Numpy Error: '" << PyBytes_AS_STRING(err_msg2)
+                     << "'. You can try upgrading numpy.";
+        Py_XDECREF(err_msg2);
+      }
+      Py_XDECREF(err_msg);
+    }
+    PyErr_Clear();
+    return false;
+  }();
+  return ret;
+}
+
+extern PyTypeObject* p_eager_tensor_type;
+
+static PyObject* eager_api_set_expected_place(PyObject* self, PyObject* args,
+                                              PyObject* kwargs) {
+  auto place = CastPyArg2Place(PyTuple_GET_ITEM(args, 0), 0);
+  egr::Controller::Instance().SetExpectedPlace(place);
+
+  Py_INCREF(Py_None);
+  return Py_None;
+}
+
+static PyObject* eager_api_scale(PyObject* self, PyObject* args,
+                                 PyObject* kwargs) {
+  // TODO(jiabin): Sync Tensor and Variable here when we support
+  egr::EagerTensor ret =
+      egr::scale(reinterpret_cast<EagerTensorObject*>(PyTuple_GET_ITEM(args, 0))
+                     ->eagertensor,
+                 CastPyArg2AttrFloat(PyTuple_GET_ITEM(args, 1), 1),
+                 CastPyArg2AttrFloat(PyTuple_GET_ITEM(args, 2), 2),
+                 CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 3), 3),
+                 CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 4), 4));
+  return ToPyObject(ret);
+}
+
+class EagerNumpyAllocation : public paddle::memory::allocation::Allocation {
+ public:
+  explicit EagerNumpyAllocation(PyObject* numpy_data, pten::DataType dtype)
+      : Allocation(
+            static_cast<void*>(
+                (reinterpret_cast<PyArrayObject_fields*>(numpy_data))->data),
+            pten::DataTypeSize(dtype) * PyArray_Size(numpy_data),
+            paddle::platform::CPUPlace()),
+        arr_(numpy_data) {
+    PADDLE_ENFORCE_NOT_NULL(arr_, platform::errors::InvalidArgument(
+                                      "The underlying PyObject pointer of "
+                                      "numpy array cannot be nullptr"));
+    PADDLE_ENFORCE_NE(
+        arr_, Py_None,
+        platform::errors::PreconditionNotMet(
+            "The underlying PyObject pointer of numpy array cannot be None"));
+    Py_INCREF(arr_);
+  }
+  ~EagerNumpyAllocation() override {
+    py::gil_scoped_acquire gil;
+    Py_DECREF(arr_);
+  }
+
+ private:
+  PyObject* arr_;
+};
+
+static inline PyObject* eager_api_numpy_to_tensor(
+    PyObject* numpy_data, pten::DataType dtype,
+    const paddle::platform::Place& place, bool stop_gradient) {
+  std::vector<int64_t> vec_dims;
+  auto numpy_shape = PyArray_DIMS(reinterpret_cast<PyArrayObject*>(numpy_data));
+  int rank = PyArray_NDIM(reinterpret_cast<PyArrayObject*>(numpy_data));
+  for (int i = 0; i < rank; i++) {
+    vec_dims.push_back(static_cast<int64_t>(numpy_shape[i]));
+  }
+  paddle::framework::DDim dims = paddle::framework::make_ddim(vec_dims);
+
+  // TODO(jiabin): Support GPU later
+  auto meta = pten::DenseTensorMeta(dtype, dims);
+  auto holder = std::make_shared<EagerNumpyAllocation>(numpy_data, dtype);
+  auto shared_storage =
+      pten::make_intrusive<paddle::experimental::SharedStorage>(holder, 0);
+  std::shared_ptr<pten::DenseTensor> densetensor(
+      new pten::DenseTensor(std::move(shared_storage), std::move(meta)));
+
+  PyObject* obj = p_eager_tensor_type->tp_alloc(p_eager_tensor_type, 0);
+  if (obj) {
+    auto v = reinterpret_cast<EagerTensorObject*>(obj);
+    new (&(v->eagertensor)) egr::EagerTensor();
+    v->eagertensor.set_impl(densetensor);
+    v->eagertensor.set_name(egr::Controller::Instance().GenerateUniqueName());
+    auto meta = egr::EagerUtils::autograd_meta(&(v->eagertensor));
+    meta->SetStopGradient(stop_gradient);
+
+    // Created tensor will be leaf tensor
+    // So we append AccumulationNode to it.
+    auto accumulation_node = std::make_shared<egr::GradNodeAccumulation>();
+    meta->SetGradNode(accumulation_node);
+
+    // TODO(jiabin): Shall we increase ref cnt here to make python ref cnt num
+    // correctly?
+  } else {
+    PADDLE_THROW(platform::errors::Fatal(
+        "tp_alloc return null, can not new a PyObject."));
+  }
+
+  return obj;
+}
+
+static PyObject* eager_api_to_tensor(PyObject* self, PyObject* args,
+                                     PyObject* kwargs) {
+  // TODO(jiabin): Support Kwargs here
+  PyObject* data = PyTuple_GET_ITEM(args, 0);
+  auto str_dtype = CastPyArg2AttrString(PyTuple_GET_ITEM(args, 1), 1);
+  pten::DataType dtype = pten::String2DataType(str_dtype);
+  auto place = CastPyArg2Place(PyTuple_GET_ITEM(args, 2), 2);
+  bool stop_gradient = CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 3), 3);
+  // TODO(jiabin): Support this when python given name
+  // auto str_name = CastPyArg2AttrString(PyTuple_GET_ITEM(args, 4), 4);
+
+  if (check_numpy_available() && PyArray_Check(data)) {
+    return eager_api_numpy_to_tensor(data, dtype, place, stop_gradient);
+  } else {
+    PADDLE_THROW(platform::errors::InvalidArgument(
+        "Eater to_tensor only support numpy to tensor."));
+    Py_INCREF(Py_None);
+    return Py_None;
+  }
+}
+
+static PyObject* eager_api_retain_grad_for_tensor(PyObject* self,
+                                                  PyObject* args,
+                                                  PyObject* kwargs) {
+  RetainGradForTensor(CastPyArg2EagerTensor(PyTuple_GET_ITEM(args, 0), 0));
+  Py_INCREF(Py_None);
+  return Py_None;
+}
+
+static PyObject* eager_api_run_backward(PyObject* self, PyObject* args,
+                                        PyObject* kwargs) {
+  auto tensors = CastPyArg2VectorOfEagerTensor(PyTuple_GET_ITEM(args, 0), 0);
+  auto grad_tensors =
+      CastPyArg2VectorOfEagerTensor(PyTuple_GET_ITEM(args, 1), 1);
+  RunBackward(tensors, grad_tensors,
+              CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 2), 2));
+  Py_INCREF(Py_None);
+  return Py_None;
+}
+
+PyMethodDef variable_functions[] = {
+    {"to_tensor", (PyCFunction)(void (*)(void))eager_api_to_tensor,
+     METH_VARARGS | METH_KEYWORDS, NULL},
+    {"scale", (PyCFunction)(void (*)(void))eager_api_scale,
+     METH_VARARGS | METH_KEYWORDS, NULL},
+    {"_set_expected_place",
+     (PyCFunction)(void (*)(void))eager_api_set_expected_place,
+     METH_VARARGS | METH_KEYWORDS, NULL},
+    {"retain_grad_for_tensor",
+     (PyCFunction)(void (*)(void))eager_api_retain_grad_for_tensor,
+     METH_VARARGS | METH_KEYWORDS, NULL},
+    {"run_backward", (PyCFunction)(void (*)(void))eager_api_run_backward,
+     METH_VARARGS | METH_KEYWORDS, NULL},
+    {NULL, NULL, 0, NULL}};
+
+void BindFunctions(PyObject* module) {
+  if (PyModule_AddFunctions(module, variable_functions) < 0) {
+    PADDLE_THROW(platform::errors::Fatal(
+        "Init Paddle erroe in BindFunctions(PyModule_AddFunctions)."));
+    return;
+  }
+}
+
+}  // namespace pybind
+}  // namespace paddle
diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc
new file mode 100644
index 0000000000000..92482c8e250da
--- /dev/null
+++ b/paddle/fluid/pybind/eager_method.cc
@@ -0,0 +1,115 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+// disable numpy compile error
+#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
+#define PY_ARRAY_UNIQUE_SYMBOL Paddle_PyArray_API_M
+#define INIT_NUMPY_ARRAY_CPP
+
+#include <numpy/arrayobject.h>
+#include <numpy/arrayscalars.h>
+
+#include <Python.h>
+
+#include <string>
+#include <vector>
+
+#include "paddle/fluid/eager/api/all.h"
+#include "paddle/fluid/eager/autograd_meta.h"
+#include "paddle/fluid/memory/allocation/allocator.h"
+#include "paddle/fluid/memory/memcpy.h"
+#include "paddle/fluid/platform/enforce.h"
+#include "paddle/fluid/pybind/eager.h"
+#include "paddle/fluid/pybind/eager_utils.h"
+#include "paddle/pten/common/data_type.h"
+#include "paddle/pten/core/convert_utils.h"
+#include "paddle/pten/core/dense_tensor.h"
+#include "paddle/pten/include/core.h"
+#pragma GCC diagnostic ignored "-Wconversion-null"  // for import_array();
+
+namespace paddle {
+namespace pybind {
+
+int init_numpy_m() {
+  import_array();
+  return 0;
+}
+static const int numpy_initialized_m = init_numpy_m();
+
+extern PyTypeObject* pEagerTensorType;
+
+static PyObject* eager_tensor_method_numpy(EagerTensorObject* self,
+                                           PyObject* args, PyObject* kwargs) {
+  if (!self->eagertensor.initialized()) {
+    Py_INCREF(Py_None);
+    return Py_None;
+  }
+  auto tensor_dims = self->eagertensor.shape();
+  auto numpy_dtype = pten::TensorDtype2NumpyDtype(self->eagertensor.type());
+  auto sizeof_dtype = pten::DataTypeSize(self->eagertensor.type());
+  npy_intp py_dims[paddle::framework::DDim::kMaxRank];
+  npy_intp py_strides[paddle::framework::DDim::kMaxRank];
+  size_t numel = 1;
+  for (int i = tensor_dims.size() - 1; i >= 0; --i) {
+    py_dims[i] = static_cast<size_t>(tensor_dims[i]);
+    py_strides[i] = sizeof_dtype * numel;
+    numel *= py_dims[i];
+  }
+  PyObject* array =
+      PyArray_NewFromDescr(&PyArray_Type, PyArray_DescrFromType(numpy_dtype),
+                           tensor_dims.size(), py_dims, py_strides, nullptr,
+                           NPY_ARRAY_ALIGNED | NPY_ARRAY_WRITEABLE, nullptr);
+
+  if (self->eagertensor.is_cpu()) {
+    auto dense_tensor =
+        std::dynamic_pointer_cast<pten::DenseTensor>(self->eagertensor.impl());
+    platform::CPUPlace place;
+    // deep copy
+    paddle::memory::Copy(
+        place, reinterpret_cast<void*>(
+                   (reinterpret_cast<PyArrayObject_fields*>(array))->data),
+        place, dense_tensor->data(), sizeof_dtype * numel);
+#if defined(PADDLE_WITH_CUDA)
+  } else if (self->eagertensor.is_cuda()) {
+    auto dense_tensor =
+        std::dynamic_pointer_cast<pten::DenseTensor>(self->eagertensor.impl());
+
+    paddle::platform::GpuMemcpySync(
+        (reinterpret_cast<PyArrayObject_fields*>(array))->data,
+        dense_tensor->data(),
+        pten::DataTypeSize(dense_tensor->dtype()) * dense_tensor->numel(),
+        cudaMemcpyDeviceToHost);
+#endif
+  } else {
+    PADDLE_THROW(platform::errors::InvalidArgument(
+        "Tensor.numpy() only support cpu tensor."));
+    Py_INCREF(Py_None);
+    return Py_None;
+  }
+
+  return array;
+}
+
+static PyObject* eager_tensor_method_is_initialized(EagerTensorObject* self,
+                                                    PyObject* args,
+                                                    PyObject* kwargs) {
+  return ToPyObject(self->eagertensor.initialized());
+}
+
+PyMethodDef variable_methods[] = {
+    {"numpy", (PyCFunction)(void (*)(void))eager_tensor_method_numpy,
+     METH_VARARGS | METH_KEYWORDS, NULL},
+    {"_is_initialized",
+     (PyCFunction)(void (*)(void))eager_tensor_method_is_initialized,
+     METH_VARARGS | METH_KEYWORDS, NULL},
+    {NULL, NULL, 0, NULL}};
+
+}  // namespace pybind
+}  // namespace paddle
diff --git a/paddle/fluid/pybind/eager_properties.cc b/paddle/fluid/pybind/eager_properties.cc
new file mode 100644
index 0000000000000..9643a4dba661a
--- /dev/null
+++ b/paddle/fluid/pybind/eager_properties.cc
@@ -0,0 +1,145 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+// disable numpy compile error
+#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
+#define PY_ARRAY_UNIQUE_SYMBOL Paddle_PyArray_API_P
+#define INIT_NUMPY_ARRAY_CPP
+
+#include <numpy/arrayobject.h>
+#include <numpy/arrayscalars.h>
+
+#include <Python.h>
+
+#include <string>
+#include <vector>
+
+#include "paddle/fluid/eager/api/all.h"
+#include "paddle/fluid/eager/autograd_meta.h"
+#include "paddle/fluid/eager/utils.h"
+#include "paddle/fluid/memory/allocation/allocator.h"
+#include "paddle/fluid/memory/memcpy.h"
+#include "paddle/fluid/platform/enforce.h"
+#include "paddle/fluid/pybind/eager.h"
+#include "paddle/fluid/pybind/eager_utils.h"
+#include "paddle/pten/common/data_type.h"
+#include "paddle/pten/core/convert_utils.h"
+#include "paddle/pten/core/dense_tensor.h"
+#include "paddle/pten/include/core.h"
+#pragma GCC diagnostic ignored "-Wconversion-null"  // for import_array();
+
+namespace paddle {
+namespace pybind {
+
+int init_numpy_p() {
+  import_array();
+  return 0;
+}
+static const int numpy_initialized_m = init_numpy_p();
+
+extern PyTypeObject* p_eager_tensor_type;
+
+PyObject* eager_tensor_properties_get_name(EagerTensorObject* self,
+                                           void* closure) {
+  return ToPyObject(self->eagertensor.name());
+}
+
+int eager_tensor_properties_set_name(EagerTensorObject* self, PyObject* value,
+                                     void* closure) {
+  self->eagertensor.set_name(CastPyArg2AttrString(value, 0));
+  return 0;
+}
+
+PyObject* eager_tensor_properties_get_stop_gradient(EagerTensorObject* self,
+                                                    void* closure) {
+  auto meta = egr::EagerUtils::unsafe_autograd_meta(self->eagertensor);
+  return ToPyObject(meta->StopGradient());
+}
+
+PyObject* eager_tensor_properties_get_grad(EagerTensorObject* self,
+                                           void* closure) {
+  auto meta = egr::EagerUtils::unsafe_autograd_meta(self->eagertensor);
+  return ToPyObject(meta->Grad());
+}
+
+int eager_tensor_properties_set_stop_gradient(EagerTensorObject* self,
+                                              PyObject* value, void* closure) {
+  auto meta = egr::EagerUtils::unsafe_autograd_meta(self->eagertensor);
+  meta->SetStopGradient(CastPyArg2AttrBoolean(value, 0));
+  return 0;
+}
+
+PyObject* eager_tensor_properties_get_persistable(EagerTensorObject* self,
+                                                  void* closure) {
+  auto meta = egr::EagerUtils::unsafe_autograd_meta(self->eagertensor);
+  return ToPyObject(meta->Persistable());
+}
+
+int eager_tensor_properties_set_persistable(EagerTensorObject* self,
+                                            PyObject* value, void* closure) {
+  auto meta = egr::EagerUtils::unsafe_autograd_meta(self->eagertensor);
+  meta->SetPersistable(CastPyArg2AttrBoolean(value, 0));
+  return 0;
+}
+
+PyObject* eager_tensor_properties_get_shape(EagerTensorObject* self,
+                                            void* closure) {
+  auto ddim = self->eagertensor.shape();
+  std::vector<int64_t> value;
+  size_t rank = static_cast<size_t>(ddim.size());
+  value.resize(rank);
+  for (size_t i = 0; i < rank; i++) {
+    value[i] = ddim[i];
+  }
+
+  return ToPyObject(value);
+}
+
+PyObject* eager_tensor_properties_get_place(EagerTensorObject* self,
+                                            void* closure) {
+  return ToPyObject(self->eagertensor.place());
+}
+
+PyObject* eager_tensor_properties_get_place_str(EagerTensorObject* self,
+                                                void* closure) {
+  std::stringstream ostr;
+  ostr << self->eagertensor.place();
+  return ToPyObject(ostr.str());
+}
+
+PyObject* eager_tensor_properties_get_dtype(EagerTensorObject* self,
+                                            void* closure) {
+  return ToPyObject(pten::DataType2String(self->eagertensor.type()));
+}
+
+struct PyGetSetDef variable_properties[] = {
+    {"grad", (getter)eager_tensor_properties_get_grad, nullptr, nullptr,
+     nullptr},
+    {"name", (getter)eager_tensor_properties_get_name,
+     (setter)eager_tensor_properties_set_name, nullptr, nullptr},
+    {"stop_gradient", (getter)eager_tensor_properties_get_stop_gradient,
+     (setter)eager_tensor_properties_set_stop_gradient, nullptr, nullptr},
+    {"persistable", (getter)eager_tensor_properties_get_persistable,
+     (setter)eager_tensor_properties_set_persistable, nullptr, nullptr},
+    {"shape", (getter)eager_tensor_properties_get_shape, nullptr, nullptr,
+     nullptr},
+    // {"is_leaf", (getter)eager_tensor_properties_get_is_leaf, nullptr,
+    // nullptr,
+    //  nullptr},
+    {"place", (getter)eager_tensor_properties_get_place, nullptr, nullptr,
+     nullptr},
+    {"_place_str", (getter)eager_tensor_properties_get_place_str, nullptr,
+     nullptr, nullptr},
+    {"dtype", (getter)eager_tensor_properties_get_dtype, nullptr, nullptr,
+     nullptr},
+    {nullptr, nullptr, nullptr, nullptr, nullptr}};
+
+}  // namespace pybind
+}  // namespace paddle
diff --git a/paddle/fluid/pybind/eager_utils.cc b/paddle/fluid/pybind/eager_utils.cc
new file mode 100644
index 0000000000000..fb3462f342fd8
--- /dev/null
+++ b/paddle/fluid/pybind/eager_utils.cc
@@ -0,0 +1,339 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <Python.h>
+
+#include <string>
+#include <vector>
+
+#include "paddle/fluid/eager/api/all.h"
+#include "paddle/fluid/eager/autograd_meta.h"
+#include "paddle/fluid/memory/allocation/allocator.h"
+#include "paddle/fluid/platform/enforce.h"
+#include "paddle/fluid/pybind/eager.h"
+#include "paddle/fluid/pybind/eager_utils.h"
+#include "paddle/pten/common/data_type.h"
+#include "paddle/pten/core/convert_utils.h"
+#include "paddle/pten/core/dense_tensor.h"
+#include "paddle/pten/include/core.h"
+
+namespace paddle {
+namespace pybind {
+
+extern PyTypeObject* p_eager_tensor_type;
+
+extern PyTypeObject* g_place_pytype;
+extern PyTypeObject* g_cudaplace_pytype;
+extern PyTypeObject* g_cpuplace_pytype;
+extern PyTypeObject* g_xpuplace_pytype;
+extern PyTypeObject* g_npuplace_pytype;
+extern PyTypeObject* g_cudapinnedplace_pytype;
+
+bool PyObject_CheckLongOrConvertToLong(PyObject** obj) {
+  if ((PyLong_Check(*obj) && !PyBool_Check(*obj))) {
+    return true;
+  }
+
+  if (std::string((reinterpret_cast<PyTypeObject*>((*obj)->ob_type))->tp_name)
+          .find("numpy") != std::string::npos) {
+    auto to = PyNumber_Long(*obj);
+    if (to) {
+      *obj = to;
+      return true;
+    }
+  }
+
+  return false;
+}
+
+bool PyObject_CheckFloatOrConvertToFloat(PyObject** obj) {
+  // sometimes users provide PyLong or numpy.int64 but attr is float
+  if (PyFloat_Check(*obj) || PyLong_Check(*obj)) {
+    return true;
+  }
+  if (std::string((reinterpret_cast<PyTypeObject*>((*obj)->ob_type))->tp_name)
+          .find("numpy") != std::string::npos) {
+    auto to = PyNumber_Float(*obj);
+    if (to) {
+      *obj = to;
+      return true;
+    }
+  }
+  return false;
+}
+
+bool PyObject_CheckStr(PyObject* obj) { return PyUnicode_Check(obj); }
+
+bool CastPyArg2AttrBoolean(PyObject* obj, ssize_t arg_pos) {
+  if (obj == Py_None) {
+    return false;  // To be compatible with QA integration testing. Some
+                   // test case pass in None.
+  } else if (obj == Py_True) {
+    return true;
+  } else if (obj == Py_False) {
+    return false;
+  } else {
+    PADDLE_THROW(platform::errors::InvalidArgument(
+        "argument (position %d) must be "
+        "bool, but got %s",
+        arg_pos + 1, (reinterpret_cast<PyTypeObject*>(obj->ob_type))->tp_name));
+  }
+}
+
+int CastPyArg2AttrInt(PyObject* obj, ssize_t arg_pos) {
+  if (PyObject_CheckLongOrConvertToLong(&obj)) {
+    return static_cast<int>(PyLong_AsLong(obj));
+  } else {
+    PADDLE_THROW(platform::errors::InvalidArgument(
+        "argument (position %d) must be "
+        "int, but got %s",
+        arg_pos + 1, (reinterpret_cast<PyTypeObject*>(obj->ob_type))->tp_name));
+  }
+}
+
+int64_t CastPyArg2AttrLong(PyObject* obj, ssize_t arg_pos) {
+  if (PyObject_CheckLongOrConvertToLong(&obj)) {
+    return reinterpret_cast<int64_t>(PyLong_AsLong(obj));
+  } else {
+    PADDLE_THROW(platform::errors::InvalidArgument(
+        "argument (position %d) must be "
+        "long, but got %s",
+        arg_pos + 1, (reinterpret_cast<PyTypeObject*>(obj->ob_type))->tp_name));
+  }
+}
+
+float CastPyArg2AttrFloat(PyObject* obj, ssize_t arg_pos) {
+  if (PyObject_CheckFloatOrConvertToFloat(&obj)) {
+    return static_cast<float>(PyFloat_AsDouble(obj));
+  } else {
+    PADDLE_THROW(platform::errors::InvalidArgument(
+        "argument (position %d) must be "
+        "float, but got %s",
+        arg_pos + 1, (reinterpret_cast<PyTypeObject*>(obj->ob_type))->tp_name));
+  }
+}
+
+std::string CastPyArg2AttrString(PyObject* obj, ssize_t arg_pos) {
+  if (PyObject_CheckStr(obj)) {
+    Py_ssize_t size;
+    const char* data;
+    data = PyUnicode_AsUTF8AndSize(obj, &size);
+    return std::string(data, static_cast<size_t>(size));
+  } else {
+    PADDLE_THROW(platform::errors::InvalidArgument(
+        "argument (position %d) must be "
+        "str, but got %s",
+        arg_pos + 1, (reinterpret_cast<PyTypeObject*>(obj->ob_type))->tp_name));
+    return "";
+  }
+}
+
+egr::EagerTensor CastPyArg2EagerTensor(PyObject* obj, ssize_t arg_pos) {
+  if (PyObject_IsInstance(obj,
+                          reinterpret_cast<PyObject*>(p_eager_tensor_type))) {
+    return reinterpret_cast<EagerTensorObject*>(obj)->eagertensor;
+  } else {
+    PADDLE_THROW(platform::errors::InvalidArgument(
+        "argument (position %d) must be "
+        "EagerTensor, but got %s",
+        arg_pos + 1, reinterpret_cast<PyTypeObject*>(obj->ob_type)->tp_name));
+  }
+}
+
+std::vector<egr::EagerTensor> CastPyArg2VectorOfEagerTensor(PyObject* obj,
+                                                            ssize_t arg_pos) {
+  std::vector<egr::EagerTensor> result;
+  if (PyList_Check(obj)) {
+    Py_ssize_t len = PyList_Size(obj);
+    PyObject* item = nullptr;
+    for (Py_ssize_t i = 0; i < len; i++) {
+      item = PyList_GetItem(obj, i);
+      if (PyObject_IsInstance(
+              item, reinterpret_cast<PyObject*>(p_eager_tensor_type))) {
+        result.emplace_back(
+            reinterpret_cast<EagerTensorObject*>(item)->eagertensor);
+      } else {
+        PADDLE_THROW(platform::errors::InvalidArgument(
+            "argument (position %d) must be "
+            "list of bool, but got %s at pos %d",
+            arg_pos + 1,
+            reinterpret_cast<PyTypeObject*>(item->ob_type)->tp_name, i));
+      }
+    }
+  } else if (PyTuple_Check(obj)) {
+    Py_ssize_t len = PyTuple_Size(obj);
+    PyObject* item = nullptr;
+    for (Py_ssize_t i = 0; i < len; i++) {
+      item = PyTuple_GetItem(obj, i);
+      if (PyObject_IsInstance(
+              item, reinterpret_cast<PyObject*>(p_eager_tensor_type))) {
+        result.emplace_back(
+            reinterpret_cast<EagerTensorObject*>(item)->eagertensor);
+      } else {
+        PADDLE_THROW(platform::errors::InvalidArgument(
+            "argument (position %d) must be "
+            "list of bool, but got %s at pos %d",
+            arg_pos + 1,
+            reinterpret_cast<PyTypeObject*>(item->ob_type)->tp_name, i));
+      }
+    }
+  } else {
+    PADDLE_THROW(platform::errors::InvalidArgument(
+        "argument (position %d) must be "
+        "list or tuple, but got %s",
+        arg_pos + 1, reinterpret_cast<PyTypeObject*>(obj->ob_type)->tp_name));
+  }
+  return result;
+}
+
+platform::Place CastPyArg2Place(PyObject* obj, ssize_t arg_pos) {
+  platform::Place place;
+  if (PyObject_IsInstance(obj, reinterpret_cast<PyObject*>(g_place_pytype))) {
+    place = ::pybind11::handle(obj).cast<platform::Place>();
+  } else if (PyObject_IsInstance(
+                 obj, reinterpret_cast<PyObject*>(g_cudaplace_pytype))) {
+    place = ::pybind11::handle(obj).cast<platform::CUDAPlace>();
+  } else if (PyObject_IsInstance(
+                 obj, reinterpret_cast<PyObject*>(g_cpuplace_pytype))) {
+    place = ::pybind11::handle(obj).cast<platform::CPUPlace>();
+  } else if (PyObject_IsInstance(
+                 obj, reinterpret_cast<PyObject*>(g_xpuplace_pytype))) {
+    place = ::pybind11::handle(obj).cast<platform::XPUPlace>();
+  } else if (PyObject_IsInstance(
+                 obj, reinterpret_cast<PyObject*>(g_npuplace_pytype))) {
+    place = ::pybind11::handle(obj).cast<platform::NPUPlace>();
+  } else if (PyObject_IsInstance(
+                 obj, reinterpret_cast<PyObject*>(g_cudapinnedplace_pytype))) {
+    place = ::pybind11::handle(obj).cast<platform::CUDAPinnedPlace>();
+  } else {
+    PADDLE_THROW(platform::errors::InvalidArgument(
+        "argument (position %d) must be "
+        "one of(Place,CUDAPlace,CPUPlace,XPUPlace,NPUPlace,CUDAPinnedPlace), "
+        "but got %s",
+        arg_pos + 1, reinterpret_cast<PyTypeObject*>(obj->ob_type)->tp_name));
+  }
+  return place;
+}
+
+PyObject* ToPyObject(bool value) {
+  if (value) {
+    Py_INCREF(Py_True);
+    return Py_True;
+  } else {
+    Py_INCREF(Py_False);
+    return Py_False;
+  }
+}
+
+PyObject* ToPyObject(int value) { return PyLong_FromLong(value); }
+
+PyObject* ToPyObject(int64_t value) { return PyLong_FromLongLong(value); }
+
+PyObject* ToPyObject(float value) { return PyLong_FromDouble(value); }
+
+PyObject* ToPyObject(double value) { return PyLong_FromDouble(value); }
+
+PyObject* ToPyObject(const char* value) { return PyUnicode_FromString(value); }
+
+PyObject* ToPyObject(const std::string& value) {
+  return PyUnicode_FromString(value.c_str());
+}
+
+PyObject* ToPyObject(const egr::EagerTensor& value) {
+  PyObject* obj = p_eager_tensor_type->tp_alloc(p_eager_tensor_type, 0);
+  if (obj) {
+    auto v = reinterpret_cast<EagerTensorObject*>(obj);
+    new (&(v->eagertensor)) egr::EagerTensor();
+    v->eagertensor = value;
+  } else {
+    PADDLE_THROW(platform::errors::Fatal(
+        "tp_alloc return null, can not new a PyObject."));
+  }
+  return obj;
+}
+
+PyObject* ToPyObject(const std::vector<bool>& value) {
+  PyObject* result = PyList_New((Py_ssize_t)value.size());
+
+  for (size_t i = 0; i < value.size(); i++) {
+    PyList_SET_ITEM(result, static_cast<Py_ssize_t>(i), ToPyObject(value[i]));
+  }
+
+  return result;
+}
+
+PyObject* ToPyObject(const std::vector<int>& value) {
+  PyObject* result = PyList_New((Py_ssize_t)value.size());
+
+  for (size_t i = 0; i < value.size(); i++) {
+    PyList_SET_ITEM(result, static_cast<Py_ssize_t>(i), ToPyObject(value[i]));
+  }
+
+  return result;
+}
+
+PyObject* ToPyObject(const std::vector<int64_t>& value) {
+  PyObject* result = PyList_New((Py_ssize_t)value.size());
+
+  for (size_t i = 0; i < value.size(); i++) {
+    PyList_SET_ITEM(result, (Py_ssize_t)i, ToPyObject(value[i]));
+  }
+
+  return result;
+}
+
+PyObject* ToPyObject(const std::vector<float>& value) {
+  PyObject* result = PyList_New((Py_ssize_t)value.size());
+
+  for (size_t i = 0; i < value.size(); i++) {
+    PyList_SET_ITEM(result, static_cast<Py_ssize_t>(i), ToPyObject(value[i]));
+  }
+
+  return result;
+}
+
+PyObject* ToPyObject(const std::vector<double>& value) {
+  PyObject* result = PyList_New((Py_ssize_t)value.size());
+
+  for (size_t i = 0; i < value.size(); i++) {
+    PyList_SET_ITEM(result, static_cast<Py_ssize_t>(i), ToPyObject(value[i]));
+  }
+
+  return result;
+}
+
+PyObject* ToPyObject(const std::vector<egr::EagerTensor>& value) {
+  PyObject* result = PyList_New((Py_ssize_t)value.size());
+
+  for (size_t i = 0; i < value.size(); i++) {
+    PyObject* obj = p_eager_tensor_type->tp_alloc(p_eager_tensor_type, 0);
+    if (obj) {
+      auto v = reinterpret_cast<EagerTensorObject*>(obj);
+      new (&(v->eagertensor)) egr::EagerTensor();
+      v->eagertensor = value[i];
+    } else {
+      PADDLE_THROW(platform::errors::Fatal(
+          "tp_alloc return null, can not new a PyObject."));
+    }
+    PyList_SET_ITEM(result, static_cast<Py_ssize_t>(i), obj);
+  }
+
+  return result;
+}
+
+PyObject* ToPyObject(const platform::Place& value) {
+  auto obj = ::pybind11::cast(value);
+  obj.inc_ref();
+  return obj.ptr();
+}
+
+}  // namespace pybind
+}  // namespace paddle
diff --git a/paddle/fluid/pybind/eager_utils.h b/paddle/fluid/pybind/eager_utils.h
new file mode 100644
index 0000000000000..49f56a61c31f1
--- /dev/null
+++ b/paddle/fluid/pybind/eager_utils.h
@@ -0,0 +1,54 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#pragma once
+
+#include <Python.h>
+#include "pybind11/pybind11.h"
+#include "pybind11/stl.h"
+
+namespace paddle {
+namespace pybind {
+
+typedef struct {
+  PyObject_HEAD egr::EagerTensor eagertensor;
+} EagerTensorObject;
+
+bool PyObject_CheckLongOrConvertToLong(PyObject** obj);
+bool PyObject_CheckFloatOrConvertToFloat(PyObject** obj);
+bool PyObject_CheckStr(PyObject* obj);
+bool CastPyArg2AttrBoolean(PyObject* obj, ssize_t arg_pos);
+int CastPyArg2AttrInt(PyObject* obj, ssize_t arg_pos);
+int64_t CastPyArg2AttrLong(PyObject* obj, ssize_t arg_pos);
+float CastPyArg2AttrFloat(PyObject* obj, ssize_t arg_pos);
+std::string CastPyArg2AttrString(PyObject* obj, ssize_t arg_pos);
+egr::EagerTensor CastPyArg2EagerTensor(PyObject* obj, ssize_t arg_pos);
+std::vector<egr::EagerTensor> CastPyArg2VectorOfEagerTensor(PyObject* obj,
+                                                            ssize_t arg_pos);
+platform::Place CastPyArg2Place(PyObject* obj, ssize_t arg_pos);
+
+PyObject* ToPyObject(int value);
+PyObject* ToPyObject(bool value);
+PyObject* ToPyObject(int64_t value);
+PyObject* ToPyObject(float value);
+PyObject* ToPyObject(double value);
+PyObject* ToPyObject(const char* value);
+PyObject* ToPyObject(const std::string& value);
+PyObject* ToPyObject(const egr::EagerTensor& value);
+PyObject* ToPyObject(const std::vector<bool>& value);
+PyObject* ToPyObject(const std::vector<int>& value);
+PyObject* ToPyObject(const std::vector<int64_t>& value);
+PyObject* ToPyObject(const std::vector<float>& value);
+PyObject* ToPyObject(const std::vector<double>& value);
+PyObject* ToPyObject(const std::vector<egr::EagerTensor>& value);
+PyObject* ToPyObject(const platform::Place& value);
+
+}  // namespace pybind
+}  // namespace paddle
diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc
index b85ebdeab5542..683c98f86543f 100644
--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -75,6 +75,7 @@ limitations under the License. */
 #include "paddle/fluid/platform/place.h"
 #include "paddle/fluid/platform/profiler.h"
 #include "paddle/fluid/pybind/cuda_streams_py.h"
+#include "paddle/fluid/pybind/eager.h"
 #include "paddle/fluid/pybind/io.h"
 #include "paddle/utils/none.h"
 #ifdef PADDLE_WITH_ASCEND
@@ -150,6 +151,14 @@ PYBIND11_MAKE_OPAQUE(paddle::framework::FetchType);
 
 namespace paddle {
 namespace pybind {
+
+PyTypeObject *g_place_pytype = nullptr;
+PyTypeObject *g_cudaplace_pytype = nullptr;
+PyTypeObject *g_cpuplace_pytype = nullptr;
+PyTypeObject *g_xpuplace_pytype = nullptr;
+PyTypeObject *g_npuplace_pytype = nullptr;
+PyTypeObject *g_cudapinnedplace_pytype = nullptr;
+
 bool IsCompiledWithCUDA() {
 #if !defined(PADDLE_WITH_CUDA) && !defined(PADDLE_WITH_HIP)
   return false;
@@ -524,6 +533,7 @@ PYBIND11_MODULE(core_avx, m) {
 PYBIND11_MODULE(core_noavx, m) {
 #endif
 
+  BindEager(&m);
   BindCudaStream(&m);
 
   // Not used, just make sure cpu_info.cc is linked.
@@ -1599,7 +1609,7 @@ All parameter, weight, gradient are variables in Paddle.
 #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
   py::class_<platform::Communicator>(m, "Communicator").def(py::init<>());
 #endif
-  py::class_<platform::CUDAPlace>(m, "CUDAPlace", R"DOC(
+  py::class_<platform::CUDAPlace> cudaplace(m, "CUDAPlace", R"DOC(
 
     CUDAPlace is a descriptor of a device.
     It represents a GPU device allocated or to be allocated with Tensor or LoDTensor.
@@ -1622,7 +1632,9 @@ All parameter, weight, gradient are variables in Paddle.
 
           place = paddle.CUDAPlace(0)
 
-        )DOC")
+        )DOC");
+  g_cudaplace_pytype = reinterpret_cast<PyTypeObject *>(cudaplace.ptr());
+  cudaplace
       .def("__init__",
            [](platform::CUDAPlace &self, int dev_id) {
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
@@ -1680,13 +1692,15 @@ All parameter, weight, gradient are variables in Paddle.
       .def("__repr__", string::to_string<const platform::CUDAPlace &>)
       .def("__str__", string::to_string<const platform::CUDAPlace &>);
 
-  py::class_<platform::XPUPlace>(m, "XPUPlace", R"DOC(
+  py::class_<platform::XPUPlace> xpuplace(m, "XPUPlace", R"DOC(
     **Note**:
     Examples:
         .. code-block:: python
           import paddle.fluid as fluid
           xpu_place = fluid.XPUPlace(0)
-        )DOC")
+        )DOC");
+  g_xpuplace_pytype = reinterpret_cast<PyTypeObject *>(xpuplace.ptr());
+  xpuplace
       .def("__init__",
            [](platform::XPUPlace &self, int dev_id) {
 #ifdef PADDLE_WITH_XPU
@@ -1756,7 +1770,7 @@ All parameter, weight, gradient are variables in Paddle.
   });
 #endif
 
-  py::class_<paddle::platform::CPUPlace>(m, "CPUPlace", R"DOC(
+  py::class_<paddle::platform::CPUPlace> cpuplace(m, "CPUPlace", R"DOC(
     CPUPlace is a descriptor of a device.
     It represents a CPU device on which a tensor will be allocated and a model will run.
 
@@ -1766,8 +1780,9 @@ All parameter, weight, gradient are variables in Paddle.
           import paddle
           cpu_place = paddle.CPUPlace()
 
-        )DOC")
-      .def(py::init<>())
+        )DOC");
+  g_cpuplace_pytype = reinterpret_cast<PyTypeObject *>(cpuplace.ptr());
+  cpuplace.def(py::init<>())
       .def("_type", &PlaceIndex<platform::CPUPlace>)
       .def("_equals", &IsSamePlace<platform::CPUPlace, platform::Place>)
       .def("_equals", &IsSamePlace<platform::CPUPlace, platform::XPUPlace>)
@@ -1779,7 +1794,8 @@ All parameter, weight, gradient are variables in Paddle.
       .def("__repr__", string::to_string<const platform::CPUPlace &>)
       .def("__str__", string::to_string<const platform::CPUPlace &>);
 
-  py::class_<paddle::platform::CUDAPinnedPlace>(m, "CUDAPinnedPlace", R"DOC(
+  py::class_<paddle::platform::CUDAPinnedPlace> cudapinnedplace(
+      m, "CUDAPinnedPlace", R"DOC(
     CUDAPinnedPlace is a descriptor of a device.
     It refers to the page locked memory allocated by the CUDA function `cudaHostAlloc()` in the host memory.
     The host operating system will not paging and exchanging the memory.
@@ -1793,7 +1809,10 @@ All parameter, weight, gradient are variables in Paddle.
           import paddle
           place = paddle.CUDAPinnedPlace()
 
-        )DOC")
+        )DOC");
+  g_cudapinnedplace_pytype =
+      reinterpret_cast<PyTypeObject *>(cudapinnedplace.ptr());
+  cudapinnedplace
       .def("__init__",
            [](platform::CUDAPinnedPlace &self) {
 #if !defined(PADDLE_WITH_CUDA) && !defined(PADDLE_WITH_HIP)
@@ -1819,7 +1838,7 @@ All parameter, weight, gradient are variables in Paddle.
       .def("__str__", string::to_string<const platform::CUDAPinnedPlace &>);
 
   // NPUPlace
-  py::class_<platform::NPUPlace>(m, "NPUPlace", R"DOC(
+  py::class_<platform::NPUPlace> npuplace(m, "NPUPlace", R"DOC(
     NPUPlace is a descriptor of a device.
     It represents a NPU device on which a tensor will be allocated and a model will run.
 
@@ -1828,7 +1847,9 @@ All parameter, weight, gradient are variables in Paddle.
           import paddle
           npu_place = paddle.NPUPlace(0)
 
-        )DOC")
+        )DOC");
+  g_npuplace_pytype = reinterpret_cast<PyTypeObject *>(npuplace.ptr());
+  npuplace
       .def("__init__",
            [](platform::NPUPlace &self, int dev_id) {
 #ifdef PADDLE_WITH_ASCEND_CL
@@ -1879,8 +1900,9 @@ All parameter, weight, gradient are variables in Paddle.
            [](const platform::NPUPlace &self) { return self.GetDeviceId(); })
       .def("__str__", string::to_string<const platform::NPUPlace &>);
 
-  py::class_<platform::Place>(m, "Place")
-      .def(py::init<>())
+  py::class_<platform::Place> platformplace(m, "Place");
+  g_place_pytype = reinterpret_cast<PyTypeObject *>(platformplace.ptr());
+  platformplace.def(py::init<>())
       .def("_type", &PlaceIndex<platform::Place>)
       .def("_equals", &IsSamePlace<platform::Place, platform::Place>)
       .def("_equals", &IsSamePlace<platform::Place, platform::CUDAPlace>)
diff --git a/paddle/pten/core/convert_utils.cc b/paddle/pten/core/convert_utils.cc
index 92709647dac00..526394a755f97 100644
--- a/paddle/pten/core/convert_utils.cc
+++ b/paddle/pten/core/convert_utils.cc
@@ -12,7 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
 #include "paddle/pten/core/convert_utils.h"
+#include <numpy/ndarraytypes.h>
 
 // See Note [ Why still include the fluid headers? ]
 #include "paddle/fluid/platform/gpu_info.h"
@@ -180,4 +182,127 @@ pten::LoD TransToPtenLoD(const paddle::framework::LoD& lod) {
   return out;
 }
 
+size_t DataTypeSize(DataType dtype) {
+  switch (dtype) {
+    case DataType::UNDEFINED:
+      return 0;
+    case DataType::BOOL:
+      return sizeof(bool);
+    case DataType::INT8:
+      return sizeof(int8_t);
+    case DataType::UINT8:
+      return sizeof(uint8_t);
+    case DataType::INT16:
+      return sizeof(int16_t);
+    case DataType::INT32:
+      return sizeof(int);
+    case DataType::INT64:
+      return sizeof(int64_t);
+    case DataType::FLOAT16:
+      return sizeof(paddle::platform::float16);
+    case DataType::FLOAT32:
+      return sizeof(float);
+    case DataType::FLOAT64:
+      return sizeof(double);
+    case DataType::COMPLEX64:
+      return sizeof(paddle::platform::complex<float>);
+    case DataType::COMPLEX128:
+      return sizeof(paddle::platform::complex<double>);
+    default:
+      return 0;
+  }
+}
+
+DataType String2DataType(const std::string& str) {
+  if (str == "bool") {
+    return DataType::BOOL;
+  } else if (str == "float16") {
+    return DataType::FLOAT16;
+  } else if (str == "float32") {
+    return DataType::FLOAT32;
+  } else if (str == "float64") {
+    return DataType::FLOAT64;
+  } else if (str == "int8") {
+    return DataType::INT8;
+  } else if (str == "int16") {
+    return DataType::INT16;
+  } else if (str == "int32") {
+    return DataType::INT32;
+  } else if (str == "int64") {
+    return DataType::INT64;
+  } else if (str == "uint8") {
+    return DataType::UINT8;
+  } else if (str == "complex64") {
+    return DataType::COMPLEX64;
+  } else if (str == "complex128") {
+    return DataType::COMPLEX128;
+  } else {
+    return DataType::UNDEFINED;
+  }
+}
+
+std::string DataType2String(DataType dtype) {
+  switch (dtype) {
+    case DataType::BOOL:
+      return "bool";
+    case DataType::INT8:
+      return "int8";
+    case DataType::UINT8:
+      return "uint8";
+    case DataType::INT16:
+      return "int16";
+    case DataType::INT32:
+      return "int32";
+    case DataType::INT64:
+      return "int64";
+    case DataType::FLOAT16:
+      return "float16";
+    case DataType::FLOAT32:
+      return "float32";
+    case DataType::FLOAT64:
+      return "float64";
+    case DataType::COMPLEX64:
+      return "complex64";
+    case DataType::COMPLEX128:
+      return "complex128";
+    default:
+      PADDLE_THROW(paddle::platform::errors::InvalidArgument(
+          "Unknow pten::DataType, the int value = %d.",
+          static_cast<int>(dtype)));
+      return "";
+  }
+}
+
+int TensorDtype2NumpyDtype(pten::DataType dtype) {
+  switch (dtype) {
+    case pten::DataType::BOOL:
+      return NPY_TYPES::NPY_BOOL;
+    case pten::DataType::INT8:
+      return NPY_TYPES::NPY_INT8;
+    case pten::DataType::UINT8:
+      return NPY_TYPES::NPY_UINT8;
+    case pten::DataType::INT16:
+      return NPY_TYPES::NPY_INT16;
+    case pten::DataType::INT32:
+      return NPY_TYPES::NPY_INT32;
+    case pten::DataType::INT64:
+      return NPY_TYPES::NPY_INT64;
+    case pten::DataType::FLOAT16:
+      return NPY_TYPES::NPY_FLOAT16;
+    case pten::DataType::FLOAT32:
+      return NPY_TYPES::NPY_FLOAT;
+    case pten::DataType::FLOAT64:
+      return NPY_TYPES::NPY_DOUBLE;
+    case pten::DataType::COMPLEX64:
+      return NPY_TYPES::NPY_COMPLEX64;
+    case pten::DataType::COMPLEX128:
+      return NPY_TYPES::NPY_COMPLEX128;
+    default:
+      PADDLE_THROW(paddle::platform::errors::InvalidArgument(
+          "Unknow pten::DataType, the int value = %d.",
+          static_cast<int>(dtype)));
+      return 0;
+  }
+}
+
 }  // namespace pten
diff --git a/paddle/pten/core/convert_utils.h b/paddle/pten/core/convert_utils.h
index 0b807c48bc150..e5990eb0a89f0 100644
--- a/paddle/pten/core/convert_utils.h
+++ b/paddle/pten/core/convert_utils.h
@@ -45,4 +45,9 @@ paddle::framework::DataLayout TransToFluidDataLayout(const DataLayout& layout);
 paddle::framework::LoD TransToFluidLoD(const pten::LoD& lod);
 pten::LoD TransToPtenLoD(const paddle::framework::LoD& lod);
 
+size_t DataTypeSize(DataType dtype);
+DataType String2DataType(const std::string& str);
+std::string DataType2String(DataType dtype);
+int TensorDtype2NumpyDtype(pten::DataType dtype);
+
 }  // namespace pten
diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py
index 5683750c4d829..5482413dbbc5d 100644
--- a/python/paddle/fluid/__init__.py
+++ b/python/paddle/fluid/__init__.py
@@ -55,6 +55,7 @@
 from .initializer import set_global_initializer
 from . import layers
 from . import dygraph
+from . import eager
 from . import contrib
 from . import nets
 from . import optimizer
@@ -90,6 +91,7 @@
 from .io import save, load, load_program_state, set_program_state
 from .dygraph.checkpoint import save_dygraph, load_dygraph
 from .dygraph.varbase_patch_methods import monkey_patch_varbase
+from .eager.eager_tensor_patch_methods import monkey_patch_eagertensor
 from . import generator
 from .core import _cuda_synchronize
 from .generator import Generator
@@ -113,6 +115,7 @@
         'contrib',
         'data',
         'dygraph',
+        'eager',
         'enable_dygraph',
         'disable_dygraph',
         'enable_imperative',
@@ -211,6 +214,7 @@ def remove_flag_if_exists(name):
 monkey_patch_variable()
 __bootstrap__()
 monkey_patch_varbase()
+monkey_patch_eagertensor()
 
 # NOTE(zhiqiu): register npu_finalize on the exit of Python,
 # do some clean up manually.
diff --git a/python/paddle/fluid/eager/__init__.py b/python/paddle/fluid/eager/__init__.py
new file mode 100644
index 0000000000000..1dc82ef69979c
--- /dev/null
+++ b/python/paddle/fluid/eager/__init__.py
@@ -0,0 +1,20 @@
+#   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+
+# incubate directory is mainly for internal use
+# after we have tested incubate APIs in industrial application for a period
+# we will move stable functions into fluid
+
+from . import eager_tensor_patch_methods
+
+__all__ = []
diff --git a/python/paddle/fluid/eager/eager_tensor_patch_methods.py b/python/paddle/fluid/eager/eager_tensor_patch_methods.py
new file mode 100644
index 0000000000000..ee83038175ba4
--- /dev/null
+++ b/python/paddle/fluid/eager/eager_tensor_patch_methods.py
@@ -0,0 +1,26 @@
+#   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle.fluid.core as core
+
+
+def monkey_patch_eagertensor():
+    def __str__(self):
+        from paddle.tensor.to_string import eager_tensor_to_string
+        return eager_tensor_to_string(self)
+
+    setattr(core.eager.EagerTensor, "__str__", __str__)
+
+    # for method_name, method in (("__str__", __str__), ("__name__", "Tensor")):
+    #     setattr(core.eager.EagerTensor, method_name, method)
diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py
index 04042eac953ba..ee7aa4560364e 100644
--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
@@ -45,6 +45,8 @@
     'Program',
     'default_startup_program',
     'default_main_program',
+    'eager_guard',
+    'in_eager_mode',
     'program_guard',
     'name_scope',
     'cuda_places',
@@ -75,6 +77,21 @@
 global_prog_seed = 0
 _current_pipeline_stage = None
 _global_flags_ = core.globals()
+_eager_mode_ = False
+
+
+@signature_safe_contextmanager
+def eager_guard():
+    global _eager_mode_
+    _eager_mode_ = True
+    try:
+        yield
+    finally:
+        _eager_mode_ = False
+
+
+def in_eager_mode():
+    return _eager_mode_
 
 
 def require_version(min_version, max_version=None):
@@ -340,7 +357,10 @@ def _set_dygraph_tracer_expected_place(place):
 def _set_expected_place(place):
     global _global_expected_place_
     _global_expected_place_ = place
-    _set_dygraph_tracer_expected_place(place)
+    if in_eager_mode():
+        return core.eager._set_expected_place(place)
+    else:
+        _set_dygraph_tracer_expected_place(place)
 
 
 # TODO(zhiqiu): remove this function.
diff --git a/python/paddle/fluid/tests/unittests/test_egr_python_api.py b/python/paddle/fluid/tests/unittests/test_egr_python_api.py
new file mode 100644
index 0000000000000..cab810f2daa02
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_egr_python_api.py
@@ -0,0 +1,101 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle.fluid.core as core
+import paddle.fluid.eager.eager_tensor_patch_methods as eager_tensor_patch_methods
+import paddle
+import numpy as np
+from paddle.fluid import eager_guard
+import unittest
+
+
+class EagerScaleTestCase(unittest.TestCase):
+    def test_scale_base(self):
+        with eager_guard():
+            paddle.set_device("cpu")
+            arr = np.ones([4, 16, 16, 32]).astype('float32')
+            tensor = paddle.to_tensor(arr, 'float32', core.CPUPlace())
+            print(tensor)
+            tensor = core.eager.scale(tensor, 2.0, 0.9, True, False)
+            for i in range(0, 100):
+                tensor = core.eager.scale(tensor, 2.0, 0.9, True, False)
+            print(tensor)
+            self.assertEqual(tensor.shape, [4, 16, 16, 32])
+            self.assertEqual(tensor.stop_gradient, True)
+
+    def test_retain_grad_and_run_backward(self):
+        with eager_guard():
+            paddle.set_device("cpu")
+
+            input_data = np.ones([4, 16, 16, 32]).astype('float32')
+            data_eager = paddle.to_tensor(input_data, 'float32',
+                                          core.CPUPlace(), False)
+
+            grad_data = np.ones([4, 16, 16, 32]).astype('float32')
+            grad_eager = paddle.to_tensor(grad_data, 'float32', core.CPUPlace())
+
+            core.eager.retain_grad_for_tensor(data_eager)
+
+            for i in range(10):
+                out_eager = core.eager.scale(data_eager, 1.0, 0.9, True, True)
+                core.eager.run_backward([out_eager], [grad_eager], False)
+
+
+class EagerDtypeTestCase(unittest.TestCase):
+    def check_to_tesnsor_and_numpy(self, dtype):
+        with eager_guard():
+            arr = np.random.random([4, 16, 16, 32]).astype(dtype)
+            tensor = paddle.to_tensor(arr, dtype)
+            self.assertEqual(tensor.dtype, dtype)
+            self.assertTrue(np.array_equal(arr, tensor.numpy()))
+
+    def test_dtype_base(self):
+        self.check_to_tesnsor_and_numpy('bool')
+        self.check_to_tesnsor_and_numpy('int8')
+        self.check_to_tesnsor_and_numpy('uint8')
+        self.check_to_tesnsor_and_numpy('int16')
+        self.check_to_tesnsor_and_numpy('int32')
+        self.check_to_tesnsor_and_numpy('int64')
+        self.check_to_tesnsor_and_numpy('float16')
+        self.check_to_tesnsor_and_numpy('float32')
+        self.check_to_tesnsor_and_numpy('float64')
+        self.check_to_tesnsor_and_numpy('complex64')
+        self.check_to_tesnsor_and_numpy('complex128')
+
+
+class EagerTensorPropertiesTestCase(unittest.TestCase):
+    def test_properties(self):
+        with eager_guard():
+            paddle.set_device("cpu")
+            arr = np.ones([4, 16, 16, 32]).astype('float32')
+            tensor = paddle.to_tensor(arr, 'float32', core.CPUPlace())
+            self.assertEqual(tensor.shape, [4, 16, 16, 32])
+            tensor.name = 'tensor_name_test'
+            self.assertEqual(tensor.name, 'tensor_name_test')
+            self.assertEqual(tensor.persistable, False)
+            tensor.persistable = True
+            self.assertEqual(tensor.persistable, True)
+            tensor.persistable = False
+            self.assertEqual(tensor.persistable, False)
+            self.assertTrue(tensor.place.is_cpu_place())
+            self.assertEqual(tensor._place_str, 'CPUPlace')
+            self.assertEqual(tensor.stop_gradient, True)
+            tensor.stop_gradient = False
+            self.assertEqual(tensor.stop_gradient, False)
+            tensor.stop_gradient = True
+            self.assertEqual(tensor.stop_gradient, True)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/python/paddle/tensor/creation.py b/python/paddle/tensor/creation.py
index 72b6bd29fd9e7..812c7e8b5ac04 100644
--- a/python/paddle/tensor/creation.py
+++ b/python/paddle/tensor/creation.py
@@ -31,6 +31,7 @@
 from ..fluid.layers import linspace  # noqa: F401
 import paddle
 from paddle import _C_ops
+from ..fluid.framework import in_eager_mode
 
 __all__ = []
 
@@ -115,6 +116,12 @@ def to_tensor(data, dtype=None, place=None, stop_gradient=True):
             ) != _current_expected_place()._get_device_id():
         place = _current_expected_place()
 
+    if in_eager_mode():
+        if dtype is None:
+            dtype = paddle.get_default_dtype()
+        return core.eager.to_tensor(data,
+                                    convert_dtype(dtype), place, stop_gradient)
+
     if not isinstance(data, np.ndarray):
 
         def _handle_dtype(data, dtype):
diff --git a/python/paddle/tensor/to_string.py b/python/paddle/tensor/to_string.py
index f640882893034..6fd20457fe619 100644
--- a/python/paddle/tensor/to_string.py
+++ b/python/paddle/tensor/to_string.py
@@ -255,3 +255,39 @@ def to_string(var, prefix='Tensor'):
         stop_gradient=var.stop_gradient,
         indent=' ' * indent,
         data=data)
+
+
+def eager_tensor_to_string(tensor, prefix='Tensor'):
+    indent = len(prefix) + 1
+
+    _template = "{prefix}(shape={shape}, dtype={dtype}, place={place}, stop_gradient={stop_gradient},\n{indent}{data})"
+
+    if not tensor._is_initialized():
+        return "Tensor(Not initialized)"
+
+    np_tensor = tensor.numpy()
+
+    if len(tensor.shape) == 0:
+        size = 0
+    else:
+        size = 1
+        for dim in tensor.shape:
+            size *= dim
+
+    sumary = False
+    if size > DEFAULT_PRINT_OPTIONS.threshold:
+        sumary = True
+
+    max_width, signed = _get_max_width(_to_summary(np_tensor))
+
+    data = _format_tensor(
+        np_tensor, sumary, indent=indent, max_width=max_width, signed=signed)
+
+    return _template.format(
+        prefix=prefix,
+        shape=tensor.shape,
+        dtype=tensor.dtype,
+        place=tensor._place_str,
+        stop_gradient=tensor.stop_gradient,
+        indent=' ' * indent,
+        data=data)
diff --git a/python/setup.py.in b/python/setup.py.in
index 60d9434e85663..e48fe381a147e 100644
--- a/python/setup.py.in
+++ b/python/setup.py.in
@@ -306,6 +306,7 @@ packages=['paddle',
           'paddle.fluid.dygraph',
           'paddle.fluid.dygraph.dygraph_to_static',
           'paddle.fluid.dygraph.amp',
+          'paddle.fluid.eager',
           'paddle.fluid.proto',
           'paddle.fluid.proto.profiler',
           'paddle.fluid.distributed',

From cf6ccebcf8ed63fa645c56ad358c39f81c4cd1c1 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Thu, 25 Nov 2021 08:08:08 +0000
Subject: [PATCH 03/31] revert modify about test_allclose_layer.py,
 test=develop

---
 python/paddle/fluid/tests/unittests/test_allclose_layer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/paddle/fluid/tests/unittests/test_allclose_layer.py b/python/paddle/fluid/tests/unittests/test_allclose_layer.py
index 7b201f60db539..c376a5c95c393 100644
--- a/python/paddle/fluid/tests/unittests/test_allclose_layer.py
+++ b/python/paddle/fluid/tests/unittests/test_allclose_layer.py
@@ -61,7 +61,7 @@ def allclose_check(self, use_cuda, dtype='float32'):
         # for corner case
         x = np.array([10.1, 10.1]).astype(dtype)
         y = np.array([10, 10]).astype(dtype)
-        result_c = exe.run(feed={'a': x, 'b': y}, fetch_list=[result_corner])
+        result_c, = exe.run(feed={'a': x, 'b': y}, fetch_list=[result_corner])
         corner_res = (dtype == 'float64')
         self.assertEqual(result_c[0], corner_res)
 

From a5b4261d27c10e01f26bb7409c0ec091f075b6e3 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Mon, 29 Nov 2021 05:41:56 +0000
Subject: [PATCH 04/31] refine, test=develop

---
 paddle/fluid/pybind/eager_functions.cc  | 1 +
 paddle/fluid/pybind/eager_method.cc     | 1 +
 paddle/fluid/pybind/eager_properties.cc | 3 ++-
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/paddle/fluid/pybind/eager_functions.cc b/paddle/fluid/pybind/eager_functions.cc
index 149a618f34fa3..705a3c58c96d6 100644
--- a/paddle/fluid/pybind/eager_functions.cc
+++ b/paddle/fluid/pybind/eager_functions.cc
@@ -39,6 +39,7 @@ limitations under the License. */
 #include "paddle/pten/core/dense_tensor.h"
 #include "paddle/pten/include/core.h"
 #pragma GCC diagnostic ignored "-Wconversion-null"  // for import_array();
+#pragma GCC diagnostic ignored "-Wunused-variable"  // for numpy_initialized_f
 
 namespace paddle {
 namespace pybind {
diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc
index 92482c8e250da..eacd1aabf3430 100644
--- a/paddle/fluid/pybind/eager_method.cc
+++ b/paddle/fluid/pybind/eager_method.cc
@@ -33,6 +33,7 @@ limitations under the License. */
 #include "paddle/pten/core/dense_tensor.h"
 #include "paddle/pten/include/core.h"
 #pragma GCC diagnostic ignored "-Wconversion-null"  // for import_array();
+#pragma GCC diagnostic ignored "-Wunused-variable"  // for numpy_initialized_m
 
 namespace paddle {
 namespace pybind {
diff --git a/paddle/fluid/pybind/eager_properties.cc b/paddle/fluid/pybind/eager_properties.cc
index 9643a4dba661a..1a1cb9f6f3d4a 100644
--- a/paddle/fluid/pybind/eager_properties.cc
+++ b/paddle/fluid/pybind/eager_properties.cc
@@ -34,6 +34,7 @@ limitations under the License. */
 #include "paddle/pten/core/dense_tensor.h"
 #include "paddle/pten/include/core.h"
 #pragma GCC diagnostic ignored "-Wconversion-null"  // for import_array();
+#pragma GCC diagnostic ignored "-Wunused-variable"  // for numpy_initialized_p
 
 namespace paddle {
 namespace pybind {
@@ -42,7 +43,7 @@ int init_numpy_p() {
   import_array();
   return 0;
 }
-static const int numpy_initialized_m = init_numpy_p();
+static const int numpy_initialized_p = init_numpy_p();
 
 extern PyTypeObject* p_eager_tensor_type;
 

From 0732c3f4adea7fe4e5190db19ff30fe301fc298c Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Mon, 29 Nov 2021 06:09:42 +0000
Subject: [PATCH 05/31] refine, test=develop

---
 paddle/fluid/pybind/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt
index ea2b9a7c3f7db..17816674a3c4b 100644
--- a/paddle/fluid/pybind/CMakeLists.txt
+++ b/paddle/fluid/pybind/CMakeLists.txt
@@ -215,7 +215,7 @@ if(WITH_PYTHON)
 
   cc_library(paddle_eager
   SRCS eager.cc eager_functions.cc eager_method.cc eager_properties.cc eager_utils.cc
-  DEPS autograd_meta grad_node_info pten global_utils utils) #eager_api accumulation_node backward
+  DEPS autograd_meta grad_node_info pten global_utils utils eager_api accumulation_node backward)
   list(APPEND PYBIND_DEPS paddle_eager)
 
   cc_library(paddle_pybind SHARED

From e121ff800089f4b6c696e96085e589b5dbf03e8f Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Mon, 29 Nov 2021 06:25:05 +0000
Subject: [PATCH 06/31] refine, test=develop

---
 paddle/fluid/pybind/eager_properties.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/paddle/fluid/pybind/eager_properties.cc b/paddle/fluid/pybind/eager_properties.cc
index 1a1cb9f6f3d4a..0cfb9b8c930bf 100644
--- a/paddle/fluid/pybind/eager_properties.cc
+++ b/paddle/fluid/pybind/eager_properties.cc
@@ -35,6 +35,8 @@ limitations under the License. */
 #include "paddle/pten/include/core.h"
 #pragma GCC diagnostic ignored "-Wconversion-null"  // for import_array();
 #pragma GCC diagnostic ignored "-Wunused-variable"  // for numpy_initialized_p
+#pragma GCC diagnostic ignored \
+    "-Wwrite-strings"  // for {nullptr, nullptr, nullptr, nullptr, nullptr}};
 
 namespace paddle {
 namespace pybind {

From 5cfa2e3933a7b9131f6231f13a5ae5d47d39a049 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Mon, 29 Nov 2021 06:59:28 +0000
Subject: [PATCH 07/31] refine, test=develop

---
 paddle/fluid/pybind/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt
index 17816674a3c4b..41708ef8611e4 100644
--- a/paddle/fluid/pybind/CMakeLists.txt
+++ b/paddle/fluid/pybind/CMakeLists.txt
@@ -215,7 +215,7 @@ if(WITH_PYTHON)
 
   cc_library(paddle_eager
   SRCS eager.cc eager_functions.cc eager_method.cc eager_properties.cc eager_utils.cc
-  DEPS autograd_meta grad_node_info pten global_utils utils eager_api accumulation_node backward)
+  DEPS autograd_meta grad_node_info pten global_utils utils eager_api accumulation_node backward python)
   list(APPEND PYBIND_DEPS paddle_eager)
 
   cc_library(paddle_pybind SHARED

From ca2a4af3d04ea5656f6f8833c3705bc8f002523e Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Mon, 29 Nov 2021 07:33:33 +0000
Subject: [PATCH 08/31] refine, test=develop

---
 paddle/pten/core/CMakeLists.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/paddle/pten/core/CMakeLists.txt b/paddle/pten/core/CMakeLists.txt
index e19d0a490cef3..0a2504f50327c 100644
--- a/paddle/pten/core/CMakeLists.txt
+++ b/paddle/pten/core/CMakeLists.txt
@@ -1,9 +1,9 @@
 if(WITH_GPU)
-  cc_library(convert_utils SRCS convert_utils.cc DEPS data_type place gpu_info)
+  cc_library(convert_utils SRCS convert_utils.cc DEPS data_type place gpu_info python)
 elseif(WITH_ROCM)
-  cc_library(convert_utils SRCS convert_utils.cc DEPS data_type place gpu_info)
+  cc_library(convert_utils SRCS convert_utils.cc DEPS data_type place gpu_info python)
 else()
-  cc_library(convert_utils SRCS convert_utils.cc DEPS data_type place)
+  cc_library(convert_utils SRCS convert_utils.cc DEPS data_type place python)
 endif()
 
 cc_library(kernel_factory SRCS kernel_factory.cc DEPS enforce)

From 79d1611058357632f9e8e48724964d8490e0eb70 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Mon, 29 Nov 2021 07:38:34 +0000
Subject: [PATCH 09/31] refine, test=develop

---
 paddle/fluid/pybind/eager_utils.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddle/fluid/pybind/eager_utils.cc b/paddle/fluid/pybind/eager_utils.cc
index fb3462f342fd8..9268fc8e7b976 100644
--- a/paddle/fluid/pybind/eager_utils.cc
+++ b/paddle/fluid/pybind/eager_utils.cc
@@ -101,7 +101,7 @@ int CastPyArg2AttrInt(PyObject* obj, ssize_t arg_pos) {
 
 int64_t CastPyArg2AttrLong(PyObject* obj, ssize_t arg_pos) {
   if (PyObject_CheckLongOrConvertToLong(&obj)) {
-    return reinterpret_cast<int64_t>(PyLong_AsLong(obj));
+    return (int64_t)PyLong_AsLong(obj);  // NOLINT
   } else {
     PADDLE_THROW(platform::errors::InvalidArgument(
         "argument (position %d) must be "

From 5cb333ab7084b6f83424b3208edfef012a6f8317 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Tue, 30 Nov 2021 11:09:20 +0000
Subject: [PATCH 10/31] delete numpy includes, use pybind11 numpy.h,
 test=develop

---
 paddle/fluid/pybind/eager_functions.cc  | 66 +++++++------------------
 paddle/fluid/pybind/eager_method.cc     | 44 ++++++-----------
 paddle/fluid/pybind/eager_properties.cc | 15 ------
 paddle/pten/core/convert_utils.cc       | 27 +++++-----
 4 files changed, 47 insertions(+), 105 deletions(-)

diff --git a/paddle/fluid/pybind/eager_functions.cc b/paddle/fluid/pybind/eager_functions.cc
index 705a3c58c96d6..dbd9f30c51165 100644
--- a/paddle/fluid/pybind/eager_functions.cc
+++ b/paddle/fluid/pybind/eager_functions.cc
@@ -9,18 +9,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 // disable numpy compile error
-#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
-// #define PY_ARRAY_UNIQUE_SYMBOL Paddle_PyArray_API_F
-#define INIT_NUMPY_ARRAY_CPP
-
-#include <numpy/arrayobject.h>
-#include <numpy/arrayscalars.h>
-
 #include <Python.h>
 
 #include <string>
 #include <vector>
 
+#include "pybind11/numpy.h"
+#include "pybind11/pybind11.h"
+
 #include "paddle/fluid/eager/accumulation/accumulation_node.h"
 #include "paddle/fluid/eager/api/all.h"
 #include "paddle/fluid/eager/autograd_meta.h"
@@ -38,47 +34,12 @@ limitations under the License. */
 #include "paddle/pten/core/convert_utils.h"
 #include "paddle/pten/core/dense_tensor.h"
 #include "paddle/pten/include/core.h"
-#pragma GCC diagnostic ignored "-Wconversion-null"  // for import_array();
-#pragma GCC diagnostic ignored "-Wunused-variable"  // for numpy_initialized_f
 
 namespace paddle {
 namespace pybind {
 
-int init_numpy_f() {
-  import_array();
-  return 0;
-}
-static const int numpy_initialized_f = init_numpy_f();
-
 namespace py = ::pybind11;
 
-// TODO(wanghuancoder) we must build paddle whl package with lower numpy version
-bool check_numpy_available() {
-  static bool ret = []() {
-    if (_import_array() >= 0) {
-      return true;
-    }
-
-    std::string message = "Failed to initialize NumPy";
-    PyObject *type, *value, *traceback;
-    PyErr_Fetch(&type, &value, &traceback);
-    if (value) {
-      PyObject* err_msg = PyObject_Str(value);
-      PyObject* err_msg2 =
-          PyUnicode_AsEncodedString(err_msg, "utf-8", "strict");
-      if (err_msg2) {
-        LOG(WARNING) << "Numpy Error: '" << PyBytes_AS_STRING(err_msg2)
-                     << "'. You can try upgrading numpy.";
-        Py_XDECREF(err_msg2);
-      }
-      Py_XDECREF(err_msg);
-    }
-    PyErr_Clear();
-    return false;
-  }();
-  return ret;
-}
-
 extern PyTypeObject* p_eager_tensor_type;
 
 static PyObject* eager_api_set_expected_place(PyObject* self, PyObject* args,
@@ -103,13 +64,22 @@ static PyObject* eager_api_scale(PyObject* self, PyObject* args,
   return ToPyObject(ret);
 }
 
+size_t PyArray_Size_(PyObject* numpy_data) {
+  size_t res = 1;
+  auto dims = pybind11::detail::array_proxy(numpy_data)->dimensions;
+  auto nd = pybind11::detail::array_proxy(numpy_data)->nd;
+  while (nd--) {
+    res *= (*dims++);
+  }
+  return res;
+}
+
 class EagerNumpyAllocation : public paddle::memory::allocation::Allocation {
  public:
   explicit EagerNumpyAllocation(PyObject* numpy_data, pten::DataType dtype)
       : Allocation(
-            static_cast<void*>(
-                (reinterpret_cast<PyArrayObject_fields*>(numpy_data))->data),
-            pten::DataTypeSize(dtype) * PyArray_Size(numpy_data),
+            static_cast<void*>(pybind11::detail::array_proxy(numpy_data)->data),
+            pten::DataTypeSize(dtype) * PyArray_Size_(numpy_data),
             paddle::platform::CPUPlace()),
         arr_(numpy_data) {
     PADDLE_ENFORCE_NOT_NULL(arr_, platform::errors::InvalidArgument(
@@ -134,8 +104,8 @@ static inline PyObject* eager_api_numpy_to_tensor(
     PyObject* numpy_data, pten::DataType dtype,
     const paddle::platform::Place& place, bool stop_gradient) {
   std::vector<int64_t> vec_dims;
-  auto numpy_shape = PyArray_DIMS(reinterpret_cast<PyArrayObject*>(numpy_data));
-  int rank = PyArray_NDIM(reinterpret_cast<PyArrayObject*>(numpy_data));
+  auto numpy_shape = pybind11::detail::array_proxy(numpy_data)->dimensions;
+  int rank = pybind11::detail::array_proxy(numpy_data)->nd;
   for (int i = 0; i < rank; i++) {
     vec_dims.push_back(static_cast<int64_t>(numpy_shape[i]));
   }
@@ -184,7 +154,7 @@ static PyObject* eager_api_to_tensor(PyObject* self, PyObject* args,
   // TODO(jiabin): Support this when python given name
   // auto str_name = CastPyArg2AttrString(PyTuple_GET_ITEM(args, 4), 4);
 
-  if (check_numpy_available() && PyArray_Check(data)) {
+  if (pybind11::detail::npy_api::get().PyArray_Check_(data)) {
     return eager_api_numpy_to_tensor(data, dtype, place, stop_gradient);
   } else {
     PADDLE_THROW(platform::errors::InvalidArgument(
diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc
index eacd1aabf3430..9c7a885e97f6f 100644
--- a/paddle/fluid/pybind/eager_method.cc
+++ b/paddle/fluid/pybind/eager_method.cc
@@ -9,18 +9,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 // disable numpy compile error
-#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
-#define PY_ARRAY_UNIQUE_SYMBOL Paddle_PyArray_API_M
-#define INIT_NUMPY_ARRAY_CPP
-
-#include <numpy/arrayobject.h>
-#include <numpy/arrayscalars.h>
-
 #include <Python.h>
 
 #include <string>
 #include <vector>
 
+#include "pybind11/numpy.h"
+#include "pybind11/pybind11.h"
+
 #include "paddle/fluid/eager/api/all.h"
 #include "paddle/fluid/eager/autograd_meta.h"
 #include "paddle/fluid/memory/allocation/allocator.h"
@@ -32,18 +28,9 @@ limitations under the License. */
 #include "paddle/pten/core/convert_utils.h"
 #include "paddle/pten/core/dense_tensor.h"
 #include "paddle/pten/include/core.h"
-#pragma GCC diagnostic ignored "-Wconversion-null"  // for import_array();
-#pragma GCC diagnostic ignored "-Wunused-variable"  // for numpy_initialized_m
-
 namespace paddle {
 namespace pybind {
 
-int init_numpy_m() {
-  import_array();
-  return 0;
-}
-static const int numpy_initialized_m = init_numpy_m();
-
 extern PyTypeObject* pEagerTensorType;
 
 static PyObject* eager_tensor_method_numpy(EagerTensorObject* self,
@@ -55,36 +42,37 @@ static PyObject* eager_tensor_method_numpy(EagerTensorObject* self,
   auto tensor_dims = self->eagertensor.shape();
   auto numpy_dtype = pten::TensorDtype2NumpyDtype(self->eagertensor.type());
   auto sizeof_dtype = pten::DataTypeSize(self->eagertensor.type());
-  npy_intp py_dims[paddle::framework::DDim::kMaxRank];
-  npy_intp py_strides[paddle::framework::DDim::kMaxRank];
+  Py_intptr_t py_dims[paddle::framework::DDim::kMaxRank];
+  Py_intptr_t py_strides[paddle::framework::DDim::kMaxRank];
   size_t numel = 1;
   for (int i = tensor_dims.size() - 1; i >= 0; --i) {
     py_dims[i] = static_cast<size_t>(tensor_dims[i]);
     py_strides[i] = sizeof_dtype * numel;
     numel *= py_dims[i];
   }
-  PyObject* array =
-      PyArray_NewFromDescr(&PyArray_Type, PyArray_DescrFromType(numpy_dtype),
-                           tensor_dims.size(), py_dims, py_strides, nullptr,
-                           NPY_ARRAY_ALIGNED | NPY_ARRAY_WRITEABLE, nullptr);
+  auto& api = pybind11::detail::npy_api::get();
+  PyObject* array = api.PyArray_NewFromDescr_(
+      api.PyArray_Type_, api.PyArray_DescrFromType_(numpy_dtype),
+      tensor_dims.size(), py_dims, py_strides, nullptr,
+      pybind11::detail::npy_api::NPY_ARRAY_ALIGNED_ |
+          pybind11::detail::npy_api::NPY_ARRAY_WRITEABLE_,
+      nullptr);
 
   if (self->eagertensor.is_cpu()) {
     auto dense_tensor =
         std::dynamic_pointer_cast<pten::DenseTensor>(self->eagertensor.impl());
     platform::CPUPlace place;
     // deep copy
-    paddle::memory::Copy(
-        place, reinterpret_cast<void*>(
-                   (reinterpret_cast<PyArrayObject_fields*>(array))->data),
-        place, dense_tensor->data(), sizeof_dtype * numel);
+    paddle::memory::Copy(place, reinterpret_cast<void*>(
+                                    pybind11::detail::array_proxy(array)->data),
+                         place, dense_tensor->data(), sizeof_dtype * numel);
 #if defined(PADDLE_WITH_CUDA)
   } else if (self->eagertensor.is_cuda()) {
     auto dense_tensor =
         std::dynamic_pointer_cast<pten::DenseTensor>(self->eagertensor.impl());
 
     paddle::platform::GpuMemcpySync(
-        (reinterpret_cast<PyArrayObject_fields*>(array))->data,
-        dense_tensor->data(),
+        pybind11::detail::array_proxy(array)->data, dense_tensor->data(),
         pten::DataTypeSize(dense_tensor->dtype()) * dense_tensor->numel(),
         cudaMemcpyDeviceToHost);
 #endif
diff --git a/paddle/fluid/pybind/eager_properties.cc b/paddle/fluid/pybind/eager_properties.cc
index 0cfb9b8c930bf..7516864218842 100644
--- a/paddle/fluid/pybind/eager_properties.cc
+++ b/paddle/fluid/pybind/eager_properties.cc
@@ -9,13 +9,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 // disable numpy compile error
-#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
-#define PY_ARRAY_UNIQUE_SYMBOL Paddle_PyArray_API_P
-#define INIT_NUMPY_ARRAY_CPP
-
-#include <numpy/arrayobject.h>
-#include <numpy/arrayscalars.h>
-
 #include <Python.h>
 
 #include <string>
@@ -33,20 +26,12 @@ limitations under the License. */
 #include "paddle/pten/core/convert_utils.h"
 #include "paddle/pten/core/dense_tensor.h"
 #include "paddle/pten/include/core.h"
-#pragma GCC diagnostic ignored "-Wconversion-null"  // for import_array();
-#pragma GCC diagnostic ignored "-Wunused-variable"  // for numpy_initialized_p
 #pragma GCC diagnostic ignored \
     "-Wwrite-strings"  // for {nullptr, nullptr, nullptr, nullptr, nullptr}};
 
 namespace paddle {
 namespace pybind {
 
-int init_numpy_p() {
-  import_array();
-  return 0;
-}
-static const int numpy_initialized_p = init_numpy_p();
-
 extern PyTypeObject* p_eager_tensor_type;
 
 PyObject* eager_tensor_properties_get_name(EagerTensorObject* self,
diff --git a/paddle/pten/core/convert_utils.cc b/paddle/pten/core/convert_utils.cc
index 526394a755f97..8b54813eadf32 100644
--- a/paddle/pten/core/convert_utils.cc
+++ b/paddle/pten/core/convert_utils.cc
@@ -11,10 +11,9 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-
-#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
 #include "paddle/pten/core/convert_utils.h"
-#include <numpy/ndarraytypes.h>
+#include "paddle/fluid/operators/py_func_op.h"
+#include "paddle/fluid/pybind/tensor_py.h"
 
 // See Note [ Why still include the fluid headers? ]
 #include "paddle/fluid/platform/gpu_info.h"
@@ -276,27 +275,27 @@ std::string DataType2String(DataType dtype) {
 int TensorDtype2NumpyDtype(pten::DataType dtype) {
   switch (dtype) {
     case pten::DataType::BOOL:
-      return NPY_TYPES::NPY_BOOL;
+      return pybind11::detail::npy_api::NPY_BOOL_;
     case pten::DataType::INT8:
-      return NPY_TYPES::NPY_INT8;
+      return pybind11::detail::npy_api::NPY_INT8_;
     case pten::DataType::UINT8:
-      return NPY_TYPES::NPY_UINT8;
+      return pybind11::detail::npy_api::NPY_UINT8_;
     case pten::DataType::INT16:
-      return NPY_TYPES::NPY_INT16;
+      return pybind11::detail::npy_api::NPY_INT16_;
     case pten::DataType::INT32:
-      return NPY_TYPES::NPY_INT32;
+      return pybind11::detail::npy_api::NPY_INT32_;
     case pten::DataType::INT64:
-      return NPY_TYPES::NPY_INT64;
+      return pybind11::detail::npy_api::NPY_INT64_;
     case pten::DataType::FLOAT16:
-      return NPY_TYPES::NPY_FLOAT16;
+      return pybind11::detail::NPY_FLOAT16_;
     case pten::DataType::FLOAT32:
-      return NPY_TYPES::NPY_FLOAT;
+      return pybind11::detail::npy_api::NPY_FLOAT_;
     case pten::DataType::FLOAT64:
-      return NPY_TYPES::NPY_DOUBLE;
+      return pybind11::detail::npy_api::NPY_DOUBLE_;
     case pten::DataType::COMPLEX64:
-      return NPY_TYPES::NPY_COMPLEX64;
+      return pybind11::detail::NPY_COMPLEX64;
     case pten::DataType::COMPLEX128:
-      return NPY_TYPES::NPY_COMPLEX128;
+      return pybind11::detail::NPY_COMPLEX128;
     default:
       PADDLE_THROW(paddle::platform::errors::InvalidArgument(
           "Unknow pten::DataType, the int value = %d.",

From 0807b223d2cc39ca88d5dc3b99c32ff304614f53 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Tue, 30 Nov 2021 12:05:00 +0000
Subject: [PATCH 11/31] refine, test=develop

---
 cmake/flags.cmake                       | 1 -
 paddle/fluid/pybind/eager.cc            | 3 ++-
 paddle/fluid/pybind/eager_functions.cc  | 2 +-
 paddle/fluid/pybind/eager_properties.cc | 2 --
 4 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/cmake/flags.cmake b/cmake/flags.cmake
index 27983e4404a52..7afff25664bbb 100644
--- a/cmake/flags.cmake
+++ b/cmake/flags.cmake
@@ -141,7 +141,6 @@ set(COMMON_FLAGS
     -Wno-unused-parameter
     -Wno-unused-function
     -Wno-error=literal-suffix
-    -Wno-error=missing-field-initializers
     -Wno-error=unused-local-typedefs
     -Wno-error=parentheses-equality # Warnings in pybind11
     -Wno-error=ignored-attributes  # Warnings in Eigen, gcc 6.3
diff --git a/paddle/fluid/pybind/eager.cc b/paddle/fluid/pybind/eager.cc
index a8601a3892381..8e4f784058980 100644
--- a/paddle/fluid/pybind/eager.cc
+++ b/paddle/fluid/pybind/eager.cc
@@ -100,7 +100,8 @@ PyTypeObject eager_tensor_type = {
     0,                       /* tp_subclasses */
     0,                       /* tp_weaklist */
     0,                       /* tp_del */
-    0                        /* tp_version_tag */
+    0,                       /* tp_version_tag */
+    0                        /* tp_finalize */
 };
 
 void BindEager(pybind11::module* module) {
diff --git a/paddle/fluid/pybind/eager_functions.cc b/paddle/fluid/pybind/eager_functions.cc
index dbd9f30c51165..14f414c420dde 100644
--- a/paddle/fluid/pybind/eager_functions.cc
+++ b/paddle/fluid/pybind/eager_functions.cc
@@ -167,7 +167,7 @@ static PyObject* eager_api_to_tensor(PyObject* self, PyObject* args,
 static PyObject* eager_api_retain_grad_for_tensor(PyObject* self,
                                                   PyObject* args,
                                                   PyObject* kwargs) {
-  RetainGradForTensor(CastPyArg2EagerTensor(PyTuple_GET_ITEM(args, 0), 0));
+  egr::RetainGradForTensor(CastPyArg2EagerTensor(PyTuple_GET_ITEM(args, 0), 0));
   Py_INCREF(Py_None);
   return Py_None;
 }
diff --git a/paddle/fluid/pybind/eager_properties.cc b/paddle/fluid/pybind/eager_properties.cc
index 7516864218842..9befa6ddd1115 100644
--- a/paddle/fluid/pybind/eager_properties.cc
+++ b/paddle/fluid/pybind/eager_properties.cc
@@ -26,8 +26,6 @@ limitations under the License. */
 #include "paddle/pten/core/convert_utils.h"
 #include "paddle/pten/core/dense_tensor.h"
 #include "paddle/pten/include/core.h"
-#pragma GCC diagnostic ignored \
-    "-Wwrite-strings"  // for {nullptr, nullptr, nullptr, nullptr, nullptr}};
 
 namespace paddle {
 namespace pybind {

From 56a25f61c024ad072ec90601cb1d97d70763b49b Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Tue, 30 Nov 2021 12:12:42 +0000
Subject: [PATCH 12/31] refine, test=develop

---
 paddle/fluid/pybind/eager_properties.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/paddle/fluid/pybind/eager_properties.cc b/paddle/fluid/pybind/eager_properties.cc
index 9befa6ddd1115..8f3d84e7ede46 100644
--- a/paddle/fluid/pybind/eager_properties.cc
+++ b/paddle/fluid/pybind/eager_properties.cc
@@ -26,6 +26,7 @@ limitations under the License. */
 #include "paddle/pten/core/convert_utils.h"
 #include "paddle/pten/core/dense_tensor.h"
 #include "paddle/pten/include/core.h"
+#pragma GCC diagnostic ignored "-Wwrite-strings"
 
 namespace paddle {
 namespace pybind {

From 24ebe936b04c08b9112d329f3ae076d1d1e24fba Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Wed, 1 Dec 2021 01:29:00 +0000
Subject: [PATCH 13/31] refine, test=develop

---
 paddle/fluid/pybind/eager_functions.cc                 | 10 ++++++----
 .../paddle/fluid/eager/eager_tensor_patch_methods.py   |  3 ---
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/paddle/fluid/pybind/eager_functions.cc b/paddle/fluid/pybind/eager_functions.cc
index 14f414c420dde..c3f0e5834f3db 100644
--- a/paddle/fluid/pybind/eager_functions.cc
+++ b/paddle/fluid/pybind/eager_functions.cc
@@ -100,9 +100,10 @@ class EagerNumpyAllocation : public paddle::memory::allocation::Allocation {
   PyObject* arr_;
 };
 
-static inline PyObject* eager_api_numpy_to_tensor(
-    PyObject* numpy_data, pten::DataType dtype,
-    const paddle::platform::Place& place, bool stop_gradient) {
+static PyObject* eager_api_numpy_to_tensor(PyObject* numpy_data,
+                                           pten::DataType dtype,
+                                           const paddle::platform::Place& place,
+                                           bool stop_gradient) {
   std::vector<int64_t> vec_dims;
   auto numpy_shape = pybind11::detail::array_proxy(numpy_data)->dimensions;
   int rank = pybind11::detail::array_proxy(numpy_data)->nd;
@@ -167,7 +168,8 @@ static PyObject* eager_api_to_tensor(PyObject* self, PyObject* args,
 static PyObject* eager_api_retain_grad_for_tensor(PyObject* self,
                                                   PyObject* args,
                                                   PyObject* kwargs) {
-  egr::RetainGradForTensor(CastPyArg2EagerTensor(PyTuple_GET_ITEM(args, 0), 0));
+  egr::egr_utils_api::RetainGradForTensor(
+      CastPyArg2EagerTensor(PyTuple_GET_ITEM(args, 0), 0));
   Py_INCREF(Py_None);
   return Py_None;
 }
diff --git a/python/paddle/fluid/eager/eager_tensor_patch_methods.py b/python/paddle/fluid/eager/eager_tensor_patch_methods.py
index ee83038175ba4..206c5cf23e6da 100644
--- a/python/paddle/fluid/eager/eager_tensor_patch_methods.py
+++ b/python/paddle/fluid/eager/eager_tensor_patch_methods.py
@@ -21,6 +21,3 @@ def __str__(self):
         return eager_tensor_to_string(self)
 
     setattr(core.eager.EagerTensor, "__str__", __str__)
-
-    # for method_name, method in (("__str__", __str__), ("__name__", "Tensor")):
-    #     setattr(core.eager.EagerTensor, method_name, method)

From 8d7d9168186b492548ab25500b863cb3423b6b0d Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Wed, 1 Dec 2021 03:23:33 +0000
Subject: [PATCH 14/31] suport eager error msg, and add grad test case,
 test=develop

---
 paddle/fluid/pybind/eager_functions.cc        | 55 +++++++++++--------
 paddle/fluid/pybind/eager_method.cc           |  5 ++
 paddle/fluid/pybind/eager_properties.cc       | 23 ++++++++
 paddle/fluid/pybind/exception.cc              | 43 +++++++++++++++
 paddle/fluid/pybind/exception.h               | 16 ++++++
 paddle/fluid/pybind/op_function.h             | 45 +--------------
 .../tests/unittests/test_egr_python_api.py    |  8 ++-
 7 files changed, 126 insertions(+), 69 deletions(-)

diff --git a/paddle/fluid/pybind/eager_functions.cc b/paddle/fluid/pybind/eager_functions.cc
index c3f0e5834f3db..8c0f9ddf19f12 100644
--- a/paddle/fluid/pybind/eager_functions.cc
+++ b/paddle/fluid/pybind/eager_functions.cc
@@ -27,6 +27,7 @@ limitations under the License. */
 #include "paddle/fluid/platform/enforce.h"
 #include "paddle/fluid/pybind/eager.h"
 #include "paddle/fluid/pybind/eager_utils.h"
+#include "paddle/fluid/pybind/exception.h"
 #include "paddle/pten/api/lib/utils/allocator.h"
 #include "paddle/pten/api/lib/utils/storage.h"
 #include "paddle/pten/api/lib/utils/tensor_utils.h"
@@ -42,28 +43,6 @@ namespace py = ::pybind11;
 
 extern PyTypeObject* p_eager_tensor_type;
 
-static PyObject* eager_api_set_expected_place(PyObject* self, PyObject* args,
-                                              PyObject* kwargs) {
-  auto place = CastPyArg2Place(PyTuple_GET_ITEM(args, 0), 0);
-  egr::Controller::Instance().SetExpectedPlace(place);
-
-  Py_INCREF(Py_None);
-  return Py_None;
-}
-
-static PyObject* eager_api_scale(PyObject* self, PyObject* args,
-                                 PyObject* kwargs) {
-  // TODO(jiabin): Sync Tensor and Variable here when we support
-  egr::EagerTensor ret =
-      egr::scale(reinterpret_cast<EagerTensorObject*>(PyTuple_GET_ITEM(args, 0))
-                     ->eagertensor,
-                 CastPyArg2AttrFloat(PyTuple_GET_ITEM(args, 1), 1),
-                 CastPyArg2AttrFloat(PyTuple_GET_ITEM(args, 2), 2),
-                 CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 3), 3),
-                 CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 4), 4));
-  return ToPyObject(ret);
-}
-
 size_t PyArray_Size_(PyObject* numpy_data) {
   size_t res = 1;
   auto dims = pybind11::detail::array_proxy(numpy_data)->dimensions;
@@ -100,6 +79,32 @@ class EagerNumpyAllocation : public paddle::memory::allocation::Allocation {
   PyObject* arr_;
 };
 
+static PyObject* eager_api_set_expected_place(PyObject* self, PyObject* args,
+                                              PyObject* kwargs) {
+  EAGER_TRY
+  auto place = CastPyArg2Place(PyTuple_GET_ITEM(args, 0), 0);
+  egr::Controller::Instance().SetExpectedPlace(place);
+
+  Py_INCREF(Py_None);
+  return Py_None;
+  EAGER_CATCH_AND_THROW_RETURN_NULL
+}
+
+static PyObject* eager_api_scale(PyObject* self, PyObject* args,
+                                 PyObject* kwargs) {
+  EAGER_TRY
+  // TODO(jiabin): Sync Tensor and Variable here when we support
+  egr::EagerTensor ret =
+      egr::scale(reinterpret_cast<EagerTensorObject*>(PyTuple_GET_ITEM(args, 0))
+                     ->eagertensor,
+                 CastPyArg2AttrFloat(PyTuple_GET_ITEM(args, 1), 1),
+                 CastPyArg2AttrFloat(PyTuple_GET_ITEM(args, 2), 2),
+                 CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 3), 3),
+                 CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 4), 4));
+  return ToPyObject(ret);
+  EAGER_CATCH_AND_THROW_RETURN_NULL
+}
+
 static PyObject* eager_api_numpy_to_tensor(PyObject* numpy_data,
                                            pten::DataType dtype,
                                            const paddle::platform::Place& place,
@@ -146,6 +151,7 @@ static PyObject* eager_api_numpy_to_tensor(PyObject* numpy_data,
 
 static PyObject* eager_api_to_tensor(PyObject* self, PyObject* args,
                                      PyObject* kwargs) {
+  EAGER_TRY
   // TODO(jiabin): Support Kwargs here
   PyObject* data = PyTuple_GET_ITEM(args, 0);
   auto str_dtype = CastPyArg2AttrString(PyTuple_GET_ITEM(args, 1), 1);
@@ -163,19 +169,23 @@ static PyObject* eager_api_to_tensor(PyObject* self, PyObject* args,
     Py_INCREF(Py_None);
     return Py_None;
   }
+  EAGER_CATCH_AND_THROW_RETURN_NULL
 }
 
 static PyObject* eager_api_retain_grad_for_tensor(PyObject* self,
                                                   PyObject* args,
                                                   PyObject* kwargs) {
+  EAGER_TRY
   egr::egr_utils_api::RetainGradForTensor(
       CastPyArg2EagerTensor(PyTuple_GET_ITEM(args, 0), 0));
   Py_INCREF(Py_None);
   return Py_None;
+  EAGER_CATCH_AND_THROW_RETURN_NULL
 }
 
 static PyObject* eager_api_run_backward(PyObject* self, PyObject* args,
                                         PyObject* kwargs) {
+  EAGER_TRY
   auto tensors = CastPyArg2VectorOfEagerTensor(PyTuple_GET_ITEM(args, 0), 0);
   auto grad_tensors =
       CastPyArg2VectorOfEagerTensor(PyTuple_GET_ITEM(args, 1), 1);
@@ -183,6 +193,7 @@ static PyObject* eager_api_run_backward(PyObject* self, PyObject* args,
               CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 2), 2));
   Py_INCREF(Py_None);
   return Py_None;
+  EAGER_CATCH_AND_THROW_RETURN_NULL
 }
 
 PyMethodDef variable_functions[] = {
diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc
index 9c7a885e97f6f..f040566260c74 100644
--- a/paddle/fluid/pybind/eager_method.cc
+++ b/paddle/fluid/pybind/eager_method.cc
@@ -24,6 +24,7 @@ limitations under the License. */
 #include "paddle/fluid/platform/enforce.h"
 #include "paddle/fluid/pybind/eager.h"
 #include "paddle/fluid/pybind/eager_utils.h"
+#include "paddle/fluid/pybind/exception.h"
 #include "paddle/pten/common/data_type.h"
 #include "paddle/pten/core/convert_utils.h"
 #include "paddle/pten/core/dense_tensor.h"
@@ -35,6 +36,7 @@ extern PyTypeObject* pEagerTensorType;
 
 static PyObject* eager_tensor_method_numpy(EagerTensorObject* self,
                                            PyObject* args, PyObject* kwargs) {
+  EAGER_TRY
   if (!self->eagertensor.initialized()) {
     Py_INCREF(Py_None);
     return Py_None;
@@ -84,12 +86,15 @@ static PyObject* eager_tensor_method_numpy(EagerTensorObject* self,
   }
 
   return array;
+  EAGER_CATCH_AND_THROW_RETURN_NULL
 }
 
 static PyObject* eager_tensor_method_is_initialized(EagerTensorObject* self,
                                                     PyObject* args,
                                                     PyObject* kwargs) {
+  EAGER_TRY
   return ToPyObject(self->eagertensor.initialized());
+  EAGER_CATCH_AND_THROW_RETURN_NULL
 }
 
 PyMethodDef variable_methods[] = {
diff --git a/paddle/fluid/pybind/eager_properties.cc b/paddle/fluid/pybind/eager_properties.cc
index 8f3d84e7ede46..f90f63179693f 100644
--- a/paddle/fluid/pybind/eager_properties.cc
+++ b/paddle/fluid/pybind/eager_properties.cc
@@ -22,6 +22,7 @@ limitations under the License. */
 #include "paddle/fluid/platform/enforce.h"
 #include "paddle/fluid/pybind/eager.h"
 #include "paddle/fluid/pybind/eager_utils.h"
+#include "paddle/fluid/pybind/exception.h"
 #include "paddle/pten/common/data_type.h"
 #include "paddle/pten/core/convert_utils.h"
 #include "paddle/pten/core/dense_tensor.h"
@@ -35,49 +36,64 @@ extern PyTypeObject* p_eager_tensor_type;
 
 PyObject* eager_tensor_properties_get_name(EagerTensorObject* self,
                                            void* closure) {
+  EAGER_TRY
   return ToPyObject(self->eagertensor.name());
+  EAGER_CATCH_AND_THROW_RETURN_NULL
 }
 
 int eager_tensor_properties_set_name(EagerTensorObject* self, PyObject* value,
                                      void* closure) {
+  EAGER_TRY
   self->eagertensor.set_name(CastPyArg2AttrString(value, 0));
   return 0;
+  EAGER_CATCH_AND_THROW_RETURN_ZERO
 }
 
 PyObject* eager_tensor_properties_get_stop_gradient(EagerTensorObject* self,
                                                     void* closure) {
+  EAGER_TRY
   auto meta = egr::EagerUtils::unsafe_autograd_meta(self->eagertensor);
   return ToPyObject(meta->StopGradient());
+  EAGER_CATCH_AND_THROW_RETURN_NULL
 }
 
 PyObject* eager_tensor_properties_get_grad(EagerTensorObject* self,
                                            void* closure) {
+  EAGER_TRY
   auto meta = egr::EagerUtils::unsafe_autograd_meta(self->eagertensor);
   return ToPyObject(meta->Grad());
+  EAGER_CATCH_AND_THROW_RETURN_NULL
 }
 
 int eager_tensor_properties_set_stop_gradient(EagerTensorObject* self,
                                               PyObject* value, void* closure) {
+  EAGER_TRY
   auto meta = egr::EagerUtils::unsafe_autograd_meta(self->eagertensor);
   meta->SetStopGradient(CastPyArg2AttrBoolean(value, 0));
   return 0;
+  EAGER_CATCH_AND_THROW_RETURN_ZERO
 }
 
 PyObject* eager_tensor_properties_get_persistable(EagerTensorObject* self,
                                                   void* closure) {
+  EAGER_TRY
   auto meta = egr::EagerUtils::unsafe_autograd_meta(self->eagertensor);
   return ToPyObject(meta->Persistable());
+  EAGER_CATCH_AND_THROW_RETURN_NULL
 }
 
 int eager_tensor_properties_set_persistable(EagerTensorObject* self,
                                             PyObject* value, void* closure) {
+  EAGER_TRY
   auto meta = egr::EagerUtils::unsafe_autograd_meta(self->eagertensor);
   meta->SetPersistable(CastPyArg2AttrBoolean(value, 0));
   return 0;
+  EAGER_CATCH_AND_THROW_RETURN_ZERO
 }
 
 PyObject* eager_tensor_properties_get_shape(EagerTensorObject* self,
                                             void* closure) {
+  EAGER_TRY
   auto ddim = self->eagertensor.shape();
   std::vector<int64_t> value;
   size_t rank = static_cast<size_t>(ddim.size());
@@ -87,23 +103,30 @@ PyObject* eager_tensor_properties_get_shape(EagerTensorObject* self,
   }
 
   return ToPyObject(value);
+  EAGER_CATCH_AND_THROW_RETURN_NULL
 }
 
 PyObject* eager_tensor_properties_get_place(EagerTensorObject* self,
                                             void* closure) {
+  EAGER_TRY
   return ToPyObject(self->eagertensor.place());
+  EAGER_CATCH_AND_THROW_RETURN_NULL
 }
 
 PyObject* eager_tensor_properties_get_place_str(EagerTensorObject* self,
                                                 void* closure) {
+  EAGER_TRY
   std::stringstream ostr;
   ostr << self->eagertensor.place();
   return ToPyObject(ostr.str());
+  EAGER_CATCH_AND_THROW_RETURN_NULL
 }
 
 PyObject* eager_tensor_properties_get_dtype(EagerTensorObject* self,
                                             void* closure) {
+  EAGER_TRY
   return ToPyObject(pten::DataType2String(self->eagertensor.type()));
+  EAGER_CATCH_AND_THROW_RETURN_NULL
 }
 
 struct PyGetSetDef variable_properties[] = {
diff --git a/paddle/fluid/pybind/exception.cc b/paddle/fluid/pybind/exception.cc
index 3d07985ff654e..362a3e44fab62 100644
--- a/paddle/fluid/pybind/exception.cc
+++ b/paddle/fluid/pybind/exception.cc
@@ -81,5 +81,48 @@ void BindException(pybind11::module* m) {
   });
 }
 
+void ThrowExceptionToPython(std::exception_ptr p) {
+  static PyObject* EOFExceptionException =
+      PyErr_NewException("paddle.EOFException", PyExc_Exception, NULL);
+  static PyObject* EnforceNotMetException =
+      PyErr_NewException("paddle.EnforceNotMet", PyExc_Exception, NULL);
+  try {
+    if (p) std::rethrow_exception(p);
+  } catch (const platform::EOFException& e) {
+    PyErr_SetString(EOFExceptionException, e.what());
+  } catch (const platform::EnforceNotMet& e) {
+    switch (e.code()) {
+      case paddle::platform::error::INVALID_ARGUMENT:
+        PyErr_SetString(PyExc_ValueError, e.what());
+        break;
+      case paddle::platform::error::NOT_FOUND:
+      case paddle::platform::error::ALREADY_EXISTS:
+      case paddle::platform::error::PRECONDITION_NOT_MET:
+      case paddle::platform::error::PERMISSION_DENIED:
+      case paddle::platform::error::EXECUTION_TIMEOUT:
+      case paddle::platform::error::UNAVAILABLE:
+        PyErr_SetString(PyExc_RuntimeError, e.what());
+        break;
+      case paddle::platform::error::OUT_OF_RANGE:
+        PyErr_SetString(PyExc_IndexError, e.what());
+        break;
+      case paddle::platform::error::RESOURCE_EXHAUSTED:
+        PyErr_SetString(PyExc_MemoryError, e.what());
+        break;
+      case paddle::platform::error::UNIMPLEMENTED:
+        PyErr_SetString(PyExc_NotImplementedError, e.what());
+        break;
+      case paddle::platform::error::FATAL:
+        PyErr_SetString(PyExc_SystemError, e.what());
+        break;
+      case paddle::platform::error::EXTERNAL:
+        PyErr_SetString(PyExc_OSError, e.what());
+        break;
+      default:
+        PyErr_SetString(EnforceNotMetException, e.what());
+        break;
+    }
+  }
+}
 }  // namespace pybind
 }  // namespace paddle
diff --git a/paddle/fluid/pybind/exception.h b/paddle/fluid/pybind/exception.h
index 5e054267361f2..cf82f464a11f2 100644
--- a/paddle/fluid/pybind/exception.h
+++ b/paddle/fluid/pybind/exception.h
@@ -18,10 +18,26 @@ limitations under the License. */
 #include "paddle/fluid/platform/enforce.h"
 #include "pybind11/pybind11.h"
 
+#define EAGER_TRY try {
+#define EAGER_CATCH_AND_THROW_RETURN_NULL             \
+  }                                                   \
+  catch (...) {                                       \
+    ThrowExceptionToPython(std::current_exception()); \
+    return nullptr;                                   \
+  }
+
+#define EAGER_CATCH_AND_THROW_RETURN_ZERO             \
+  }                                                   \
+  catch (...) {                                       \
+    ThrowExceptionToPython(std::current_exception()); \
+    return 0;                                         \
+  }
+
 namespace paddle {
 namespace pybind {
 
 void BindException(pybind11::module* m);
+void ThrowExceptionToPython(std::exception_ptr p);
 
 }  // namespace pybind
 }  // namespace paddle
diff --git a/paddle/fluid/pybind/op_function.h b/paddle/fluid/pybind/op_function.h
index 324cd4b1b161f..5535ffd950f37 100644
--- a/paddle/fluid/pybind/op_function.h
+++ b/paddle/fluid/pybind/op_function.h
@@ -29,6 +29,7 @@
 #include "paddle/fluid/framework/variable.h"
 #include "paddle/fluid/imperative/tracer.h"
 #include "paddle/fluid/imperative/type_defs.h"
+#include "paddle/fluid/pybind/exception.h"
 #include "paddle/fluid/pybind/imperative.h"
 
 namespace py = pybind11;
@@ -992,50 +993,6 @@ void InitOpsAttrTypeMap() {
   }
 }
 
-void ThrowExceptionToPython(std::exception_ptr p) {
-  static PyObject* EOFExceptionException =
-      PyErr_NewException("paddle.EOFException", PyExc_Exception, NULL);
-  static PyObject* EnforceNotMetException =
-      PyErr_NewException("paddle.EnforceNotMet", PyExc_Exception, NULL);
-  try {
-    if (p) std::rethrow_exception(p);
-  } catch (const platform::EOFException& e) {
-    PyErr_SetString(EOFExceptionException, e.what());
-  } catch (const platform::EnforceNotMet& e) {
-    switch (e.code()) {
-      case paddle::platform::error::INVALID_ARGUMENT:
-        PyErr_SetString(PyExc_ValueError, e.what());
-        break;
-      case paddle::platform::error::NOT_FOUND:
-      case paddle::platform::error::ALREADY_EXISTS:
-      case paddle::platform::error::PRECONDITION_NOT_MET:
-      case paddle::platform::error::PERMISSION_DENIED:
-      case paddle::platform::error::EXECUTION_TIMEOUT:
-      case paddle::platform::error::UNAVAILABLE:
-        PyErr_SetString(PyExc_RuntimeError, e.what());
-        break;
-      case paddle::platform::error::OUT_OF_RANGE:
-        PyErr_SetString(PyExc_IndexError, e.what());
-        break;
-      case paddle::platform::error::RESOURCE_EXHAUSTED:
-        PyErr_SetString(PyExc_MemoryError, e.what());
-        break;
-      case paddle::platform::error::UNIMPLEMENTED:
-        PyErr_SetString(PyExc_NotImplementedError, e.what());
-        break;
-      case paddle::platform::error::FATAL:
-        PyErr_SetString(PyExc_SystemError, e.what());
-        break;
-      case paddle::platform::error::EXTERNAL:
-        PyErr_SetString(PyExc_OSError, e.what());
-        break;
-      default:
-        PyErr_SetString(EnforceNotMetException, e.what());
-        break;
-    }
-  }
-}
-
 }  // namespace pybind
 }  // namespace paddle
 
diff --git a/python/paddle/fluid/tests/unittests/test_egr_python_api.py b/python/paddle/fluid/tests/unittests/test_egr_python_api.py
index cab810f2daa02..c497c7f9bd80a 100644
--- a/python/paddle/fluid/tests/unittests/test_egr_python_api.py
+++ b/python/paddle/fluid/tests/unittests/test_egr_python_api.py
@@ -47,9 +47,11 @@ def test_retain_grad_and_run_backward(self):
 
             core.eager.retain_grad_for_tensor(data_eager)
 
-            for i in range(10):
-                out_eager = core.eager.scale(data_eager, 1.0, 0.9, True, True)
-                core.eager.run_backward([out_eager], [grad_eager], False)
+            out_eager = core.eager.scale(data_eager, 1.0, 0.9, True, True)
+            self.assertFalse(data_eager.grad._is_initialized())
+            core.eager.run_backward([out_eager], [grad_eager], False)
+            self.assertTrue(data_eager.grad._is_initialized())
+            self.assertTrue(np.array_equal(data_eager.grad.numpy(), input_data))
 
 
 class EagerDtypeTestCase(unittest.TestCase):

From c14d512847b03ad499fdb06b3950a9b25ee12c4f Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Wed, 1 Dec 2021 03:26:56 +0000
Subject: [PATCH 15/31] refine, test=develop

---
 paddle/fluid/pybind/eager.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/paddle/fluid/pybind/eager.cc b/paddle/fluid/pybind/eager.cc
index 8e4f784058980..5be000844bcf1 100644
--- a/paddle/fluid/pybind/eager.cc
+++ b/paddle/fluid/pybind/eager.cc
@@ -26,6 +26,7 @@ limitations under the License. */
 #include "paddle/pten/core/convert_utils.h"
 #include "paddle/pten/core/dense_tensor.h"
 #include "paddle/pten/include/core.h"
+#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
 
 namespace paddle {
 namespace pybind {

From f4fc5d230a2f47a46e19a25bb9100ca4138e9b1b Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Wed, 1 Dec 2021 07:52:11 +0000
Subject: [PATCH 16/31] refine, test=develop

---
 paddle/fluid/pybind/eager_properties.cc | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/paddle/fluid/pybind/eager_properties.cc b/paddle/fluid/pybind/eager_properties.cc
index f90f63179693f..a13e4836d141a 100644
--- a/paddle/fluid/pybind/eager_properties.cc
+++ b/paddle/fluid/pybind/eager_properties.cc
@@ -52,7 +52,7 @@ int eager_tensor_properties_set_name(EagerTensorObject* self, PyObject* value,
 PyObject* eager_tensor_properties_get_stop_gradient(EagerTensorObject* self,
                                                     void* closure) {
   EAGER_TRY
-  auto meta = egr::EagerUtils::unsafe_autograd_meta(self->eagertensor);
+  auto meta = egr::EagerUtils::autograd_meta(&self->eagertensor);
   return ToPyObject(meta->StopGradient());
   EAGER_CATCH_AND_THROW_RETURN_NULL
 }
@@ -68,7 +68,7 @@ PyObject* eager_tensor_properties_get_grad(EagerTensorObject* self,
 int eager_tensor_properties_set_stop_gradient(EagerTensorObject* self,
                                               PyObject* value, void* closure) {
   EAGER_TRY
-  auto meta = egr::EagerUtils::unsafe_autograd_meta(self->eagertensor);
+  auto meta = egr::EagerUtils::autograd_meta(&self->eagertensor);
   meta->SetStopGradient(CastPyArg2AttrBoolean(value, 0));
   return 0;
   EAGER_CATCH_AND_THROW_RETURN_ZERO
@@ -77,7 +77,7 @@ int eager_tensor_properties_set_stop_gradient(EagerTensorObject* self,
 PyObject* eager_tensor_properties_get_persistable(EagerTensorObject* self,
                                                   void* closure) {
   EAGER_TRY
-  auto meta = egr::EagerUtils::unsafe_autograd_meta(self->eagertensor);
+  auto meta = egr::EagerUtils::autograd_meta(&self->eagertensor);
   return ToPyObject(meta->Persistable());
   EAGER_CATCH_AND_THROW_RETURN_NULL
 }
@@ -85,7 +85,7 @@ PyObject* eager_tensor_properties_get_persistable(EagerTensorObject* self,
 int eager_tensor_properties_set_persistable(EagerTensorObject* self,
                                             PyObject* value, void* closure) {
   EAGER_TRY
-  auto meta = egr::EagerUtils::unsafe_autograd_meta(self->eagertensor);
+  auto meta = egr::EagerUtils::autograd_meta(&self->eagertensor);
   meta->SetPersistable(CastPyArg2AttrBoolean(value, 0));
   return 0;
   EAGER_CATCH_AND_THROW_RETURN_ZERO

From a9651c50575afab812ef0a5d5924b1cba9907a87 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Fri, 3 Dec 2021 02:30:36 +0000
Subject: [PATCH 17/31] generate eager core ops, only 4 ops, test=develop

---
 paddle/fluid/pybind/CMakeLists.txt            |  48 +-
 paddle/fluid/pybind/eager.cc                  |   6 +-
 paddle/fluid/pybind/eager_method.cc           |   2 +
 .../pybind/eager_op_function_generator.cc     | 564 ++++++++++++
 paddle/fluid/pybind/eager_properties.cc       |  11 +
 paddle/fluid/pybind/eager_utils.cc            |  79 ++
 paddle/fluid/pybind/eager_utils.h             |  33 +
 paddle/fluid/pybind/op_function.h             | 773 +----------------
 paddle/fluid/pybind/op_function_common.cc     | 806 ++++++++++++++++++
 paddle/fluid/pybind/op_function_common.h      | 126 +++
 python/paddle/_C_ops.py                       |  18 +
 python/paddle/fluid/framework.py              |  28 +-
 .../unittests/test_egr_code_generate_api.py   |  71 ++
 13 files changed, 1792 insertions(+), 773 deletions(-)
 create mode 100644 paddle/fluid/pybind/eager_op_function_generator.cc
 create mode 100644 paddle/fluid/pybind/op_function_common.cc
 create mode 100644 paddle/fluid/pybind/op_function_common.h
 create mode 100644 python/paddle/fluid/tests/unittests/test_egr_code_generate_api.py

diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt
index 41708ef8611e4..f22d1451c0671 100644
--- a/paddle/fluid/pybind/CMakeLists.txt
+++ b/paddle/fluid/pybind/CMakeLists.txt
@@ -126,17 +126,25 @@ if(WITH_PYTHON)
 
   add_executable(op_function_generator op_function_generator.cc)
   target_link_libraries(op_function_generator ${OP_FUNCTION_GENERETOR_DEPS})
+  add_executable(eager_op_function_generator eager_op_function_generator.cc)
+  target_link_libraries(eager_op_function_generator ${OP_FUNCTION_GENERETOR_DEPS})
 
   get_property (os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
   target_link_libraries(op_function_generator ${os_dependency_modules})
+  target_link_libraries(eager_op_function_generator ${os_dependency_modules})
   if(WITH_ROCM)
     target_link_libraries(op_function_generator ${ROCM_HIPRTC_LIB})
+    target_link_libraries(eager_op_function_generator ${ROCM_HIPRTC_LIB})
   endif()
 
   set(impl_file ${CMAKE_SOURCE_DIR}/paddle/fluid/pybind/op_function_impl.h)
   set(tmp_impl_file ${impl_file}.tmp)
+  set(eager_impl_file ${CMAKE_SOURCE_DIR}/paddle/fluid/pybind/eager_op_function_impl.h)
+  set(tmp_eager_impl_file ${eager_impl_file}.tmp)
 
   set(OP_IMPL_DEPS op_function_generator)
+  set(EAGER_OP_IMPL_DEPS eager_op_function_generator)
+
   if(WIN32)
     if("${CMAKE_GENERATOR}" STREQUAL "Ninja")
       set(op_impl_path "${CMAKE_CURRENT_BINARY_DIR}")
@@ -160,22 +168,41 @@ if(WITH_PYTHON)
     ")\n"
     "exit /b 0")
 
+    file(WRITE ${CMAKE_BINARY_DIR}/paddle/fluid/pybind/eager_op_function_generator_retry.bat ""
+    "set build_times=1\n"
+    ":retry\n"
+    "ECHO eager_op_function_generator run %build_times% time\n"
+    "taskkill /f /im eager_op_function_generator.exe 2>NUL\n"
+    "${op_impl_path}/eager_op_function_generator.exe ${tmp_eager_impl_file}\n"
+    "if %ERRORLEVEL% NEQ 0 (\n"
+    "    set /a build_times=%build_times%+1\n"
+    "    if %build_times% GEQ 10 (\n"
+    "        exit /b 1\n"
+    "    ) else (\n"
+    "        goto :retry\n"
+    "    )\n"
+    ")\n"
+    "exit /b 0")
+
     if(${CBLAS_PROVIDER} STREQUAL MKLML)
       ADD_CUSTOM_COMMAND(OUTPUT ${op_impl_path}/libiomp5md.dll
         COMMAND ${CMAKE_COMMAND} -E copy ${MKLML_SHARED_IOMP_LIB} ${op_impl_path}
         DEPENDS mklml)
       list(APPEND OP_IMPL_DEPS ${op_impl_path}/libiomp5md.dll)
+      list(APPEND EAGER_OP_IMPL_DEPS ${op_impl_path}/libiomp5md.dll)
     else(${CBLAS_PROVIDER} STREQUAL EXTERN_OPENBLAS)
       ADD_CUSTOM_COMMAND(OUTPUT ${op_impl_path}/openblas.dll
         COMMAND ${CMAKE_COMMAND} -E copy ${OPENBLAS_SHARED_LIB} ${op_impl_path}
         DEPENDS extern_openblas)
       list(APPEND OP_IMPL_DEPS ${op_impl_path}/openblas.dll)
+      list(APPEND EAGER_OP_IMPL_DEPS ${op_impl_path}/openblas.dll)
     endif()
     if(WITH_MKLDNN)
       ADD_CUSTOM_COMMAND(OUTPUT ${op_impl_path}/mkldnn.dll
         COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_SHARED_LIB} ${op_impl_path}
         DEPENDS mkldnn)
         list(APPEND OP_IMPL_DEPS ${op_impl_path}/mkldnn.dll)
+        list(APPEND EAGER_OP_IMPL_DEPS ${op_impl_path}/mkldnn.dll)
     endif()
 
     add_custom_command(OUTPUT ${impl_file}
@@ -183,6 +210,11 @@ if(WITH_PYTHON)
       COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file} ${impl_file}
       COMMENT "copy_if_different ${tmp_impl_file} to ${impl_file}"
       DEPENDS ${OP_IMPL_DEPS})
+    add_custom_command(OUTPUT ${eager_impl_file}
+      COMMAND ${CMAKE_BINARY_DIR}/paddle/fluid/pybind/eager_op_function_generator_retry.bat
+      COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_eager_impl_file} ${eager_impl_file}
+      COMMENT "copy_if_different ${tmp_eager_impl_file} to ${eager_impl_file}"
+      DEPENDS ${EAGER_OP_IMPL_DEPS})
   else(WIN32)
     # If there are no *.so in /usr/lib or LD_LIBRARY_PATH,
     # copy these *.so to current directory and append current directory to
@@ -193,12 +225,14 @@ if(WITH_PYTHON)
         COMMAND ${CMAKE_COMMAND} -E copy ${MKLML_SHARED_IOMP_LIB} ${CMAKE_CURRENT_BINARY_DIR}
         DEPENDS mklml)
       list(APPEND OP_IMPL_DEPS ${CMAKE_CURRENT_BINARY_DIR}/libiomp5.so)
+      list(APPEND EAGER_OP_IMPL_DEPS ${CMAKE_CURRENT_BINARY_DIR}/libiomp5.so)
     endif()
     if(WITH_MKLDNN)
       ADD_CUSTOM_COMMAND(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/libdnnl.so.0
         COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_SHARED_LIB} ${CMAKE_CURRENT_BINARY_DIR}
         DEPENDS mkldnn)
       list(APPEND OP_IMPL_DEPS ${CMAKE_CURRENT_BINARY_DIR}/libdnnl.so.0)
+      list(APPEND EAGER_OP_IMPL_DEPS ${CMAKE_CURRENT_BINARY_DIR}/libdnnl.so.0)
     endif()
     add_custom_command(OUTPUT ${impl_file}
           COMMAND ${CMAKE_COMMAND} -E env "LD_LIBRARY_PATH=$ENV{LD_LIBRARY_PATH}:."
@@ -208,15 +242,27 @@ if(WITH_PYTHON)
           COMMENT "copy_if_different ${tmp_impl_file} to ${impl_file}"
           DEPENDS ${OP_IMPL_DEPS}
           VERBATIM)
+    add_custom_command(OUTPUT ${eager_impl_file}
+          COMMAND ${CMAKE_COMMAND} -E env "LD_LIBRARY_PATH=$ENV{LD_LIBRARY_PATH}:."
+              "${CMAKE_CURRENT_BINARY_DIR}/eager_op_function_generator"
+              "${tmp_eager_impl_file}"
+          COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_eager_impl_file} ${eager_impl_file}
+          COMMENT "copy_if_different ${tmp_eager_impl_file} to ${eager_impl_file}"
+          DEPENDS ${EAGER_OP_IMPL_DEPS}
+          VERBATIM)
   endif(WIN32)
   add_custom_target(op_function_generator_cmd ALL DEPENDS ${impl_file})
+  add_custom_target(eager_op_function_generator_cmd ALL DEPENDS ${eager_impl_file})
 
   list(APPEND PYBIND_DEPS interpretercore standalone_executor)
+  cc_library(op_function_common SRCS op_function_common.cc)
 
   cc_library(paddle_eager
   SRCS eager.cc eager_functions.cc eager_method.cc eager_properties.cc eager_utils.cc
-  DEPS autograd_meta grad_node_info pten global_utils utils eager_api accumulation_node backward python)
+  DEPS eager_api autograd_meta backward grad_node_info pten op_function_common dygraph_function dygraph_node math_cpu linalg_cpu creation_cpu utils_cpu manipulation_cpu math_api linalg_api creation_api manipulation_api accumulation_node global_utils utils python)
+  add_dependencies(paddle_eager eager_op_function_generator_cmd)
   list(APPEND PYBIND_DEPS paddle_eager)
+  list(APPEND PYBIND_DEPS op_function_common)
 
   cc_library(paddle_pybind SHARED
     SRCS ${PYBIND_SRCS}
diff --git a/paddle/fluid/pybind/eager.cc b/paddle/fluid/pybind/eager.cc
index 5be000844bcf1..6f4fce6b30fe2 100644
--- a/paddle/fluid/pybind/eager.cc
+++ b/paddle/fluid/pybind/eager.cc
@@ -16,6 +16,8 @@ limitations under the License. */
 
 #include "paddle/fluid/eager/api/all.h"
 #include "paddle/fluid/eager/autograd_meta.h"
+// #include "paddle/fluid/eager/function_api.h"
+#include "paddle/fluid/eager/api/generated/fluid_generated/dygraph_forward_api.h"
 #include "paddle/fluid/eager/utils.h"
 #include "paddle/fluid/memory/allocation/allocator.h"
 #include "paddle/fluid/memory/memcpy.h"
@@ -27,7 +29,8 @@ limitations under the License. */
 #include "paddle/pten/core/dense_tensor.h"
 #include "paddle/pten/include/core.h"
 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
-
+#include "paddle/fluid/pybind/eager_op_function_impl.h"
+#include "paddle/fluid/pybind/exception.h"
 namespace paddle {
 namespace pybind {
 
@@ -126,6 +129,7 @@ void BindEager(pybind11::module* module) {
   }
 
   BindFunctions(m.ptr());
+  BindEagerOpFunctions(&m);
 }
 
 }  // namespace pybind
diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc
index f040566260c74..e95db865a1d59 100644
--- a/paddle/fluid/pybind/eager_method.cc
+++ b/paddle/fluid/pybind/eager_method.cc
@@ -37,6 +37,7 @@ extern PyTypeObject* pEagerTensorType;
 static PyObject* eager_tensor_method_numpy(EagerTensorObject* self,
                                            PyObject* args, PyObject* kwargs) {
   EAGER_TRY
+  self->eagertensor.SyncToTensor();
   if (!self->eagertensor.initialized()) {
     Py_INCREF(Py_None);
     return Py_None;
@@ -93,6 +94,7 @@ static PyObject* eager_tensor_method_is_initialized(EagerTensorObject* self,
                                                     PyObject* args,
                                                     PyObject* kwargs) {
   EAGER_TRY
+  self->eagertensor.SyncToTensor();
   return ToPyObject(self->eagertensor.initialized());
   EAGER_CATCH_AND_THROW_RETURN_NULL
 }
diff --git a/paddle/fluid/pybind/eager_op_function_generator.cc b/paddle/fluid/pybind/eager_op_function_generator.cc
new file mode 100644
index 0000000000000..38b4e1e161329
--- /dev/null
+++ b/paddle/fluid/pybind/eager_op_function_generator.cc
@@ -0,0 +1,564 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <algorithm>
+#include <fstream>
+#include <iostream>
+#include <set>
+#include <string>
+#ifndef _WIN32
+#include <unistd.h>
+#endif
+
+#include "paddle/fluid/framework/op_info.h"
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/framework/operator.h"
+#include "paddle/fluid/framework/variable.h"
+#include "paddle/fluid/pybind/pybind.h"
+#include "paddle/fluid/string/string_helper.h"
+#ifdef PADDLE_WITH_ASCEND_CL
+#include "paddle/fluid/framework/fleet/ascend_wrapper.h"
+#endif
+
+std::set<std::string> gen_list = {"elementwise_add", "reduce_sum", "matmul_v2",
+                                  "sigmoid"};
+
+// NOTE(zhiqiu): Commonly, the inputs in auto-generated OP function are
+// determined by the OP`s proto automatically, i.e., all the inputs registered
+// in OpMaker.
+// However, some OPs have dispensable inputs, which means the input can
+// be none for some conditions. It is discovered that most dispensable inputs
+// is not used in imperative mode, so we drop those inputs when generating OP
+// functions. While, for very few OPs, the dispensable inputs are used, we
+// need to manually specify them in this map.
+std::map<std::string, std::set<std::string>> op_ins_map = {
+    {"layer_norm", {"X", "Scale", "Bias"}},
+    {"fused_attention",
+     {"X", "LnScale", "LnBias", "QKVW", "QKVBias", "SrcMask", "OutLinearW",
+      "OutLinearBias", "Ln2Scale", "Ln2Bias"}},
+    {"instance_norm", {"X", "Scale", "Bias"}},
+    {"gru_unit", {"Input", "HiddenPrev", "Weight", "Bias"}},
+    {"label_smooth", {"X", "PriorDist"}},
+    {"assign", {"X"}},
+    {"reshape2", {"X", "Shape"}},
+    {"expand", {"X", "ExpandTimes"}},
+    {"slice", {"Input", "StartsTensor", "EndsTensor"}},
+    {"fake_quantize_dequantize_moving_average_abs_max",
+     {"X", "InScale", "InAccum", "InState"}},
+    {"nll_loss", {"X", "Label", "Weight"}},
+    {"bilinear_tensor_product", {"X", "Y", "Weight", "Bias"}},
+    {"gather", {"X", "Index", "Axis"}},
+    {"roi_pool", {"X", "ROIs", "RoisNum"}},
+    {"roi_align", {"X", "ROIs", "RoisNum"}},
+    {"psroi_pool", {"X", "ROIs", "RoisNum"}},
+    {"collect_fpn_proposals",
+     {"MultiLevelRois", "MultiLevelScores", "MultiLevelRoIsNum"}},
+    {"distribute_fpn_proposals", {"FpnRois", "RoisNum"}},
+    {"warpctc", {"Logits", "Label", "LogitsLength", "LabelLength"}},
+    {"hierarchical_sigmoid",
+     {"X", "W", "Label", "PathTable", "PathCode", "Bias"}},
+    {"moving_average_abs_max_scale", {"X", "InAccum", "InState"}},
+    {"multiclass_nms3", {"BBoxes", "Scores", "RoisNum"}},
+    {"box_coder", {"PriorBox", "PriorBoxVar", "TargetBox"}},
+    {"momentum", {"Param", "Grad", "Velocity", "LearningRate", "MasterParam"}},
+    {"sparse_momentum", {"Param", "Grad", "Velocity", "Index", "LearningRate"}},
+    {"rnn", {"Input", "PreState", "WeightList", "SequenceLength"}},
+    {"run_program", {"X", "Params"}},
+    {"faster_tokenizer", {"Text", "Vocab", "TextPair"}},
+    {"matrix_rank", {"X", "TolTensor"}},
+    {"adam",
+     {"Param", "Grad", "LearningRate", "Moment1", "Moment2", "Beta1Pow",
+      "Beta2Pow", "MasterParam"}},
+    {"adamw",
+     {"Param", "Grad", "LearningRate", "Moment1", "Moment2", "Beta1Pow",
+      "Beta2Pow", "MasterParam"}},
+};
+
+// NOTE(zhiqiu): Like op_ins_map.
+// Commonly, the outputs in auto-generated OP function are determined by the
+// OP`s proto automatically, i.e., all the outputs registered in OpMaker.
+// However, some OPs have dispensable outputs, which means the output can
+// be none for some conditions. It is discovered that most dispensable outputs
+// is not used in imperative mode, so we drop those outputs when generating OP
+// functions. While, for very few OPs, the dispensable outputs are used, we
+// need to manually specify them in this map.
+std::map<std::string, std::set<std::string>> op_outs_map = {
+    {"fake_quantize_dequantize_moving_average_abs_max",
+     {"Out", "OutScale", "OutAccum", "OutState"}},
+    {"batch_norm",
+     {"Y", "MeanOut", "VarianceOut", "SavedMean", "SavedVariance",
+      "ReserveSpace"}},
+    {"fused_attention",
+     {"LnMean", "LnVariance", "LnOut", "QKVOut", "QKVBiasOut", "TransposeOut2",
+      "QKOut", "QKTVOut", "SoftmaxOut", "AttnDropoutMaskOut", "AttnDropoutOut",
+      "SrcMaskOut", "FMHAOut", "OutLinearOut", "DropoutMaskOut", "Ln2Mean",
+      "Ln2Variance", "BiasDropoutResidualOut", "Y"}},
+    {"sync_batch_norm",
+     {"Y", "MeanOut", "VarianceOut", "SavedMean", "SavedVariance",
+      "ReserveSpace"}},
+    {"unique", {"Out", "Index", "Indices", "Counts"}},
+    {"unique_consecutive", {"Out", "Index", "Counts"}},
+    {"generate_proposals", {"RpnRois", "RpnRoiProbs", "RpnRoisNum"}},
+    {"collect_fpn_proposals", {"FpnRois", "RoisNum"}},
+    {"matrix_nms", {"Out", "Index", "RoisNum"}},
+    {"distribute_fpn_proposals",
+     {"MultiFpnRois", "RestoreIndex", "MultiLevelRoIsNum"}},
+    {"moving_average_abs_max_scale",
+     {"Out", "OutScale", "OutAccum", "OutState"}},
+    {"multiclass_nms3", {"Out", "NmsRoisNum"}},
+    {"generate_proposals_v2", {"RpnRois", "RpnRoiProbs", "RpnRoisNum"}},
+    {"momentum", {"ParamOut", "VelocityOut", "MasterParamOut"}},
+    {"sparse_momentum", {"ParamOut", "VelocityOut"}},
+    {"rnn", {"DropoutState", "Reserve", "Out", "State"}},
+    {"lamb",
+     {"ParamOut", "Moment1Out", "Moment2Out", "Beta1PowOut", "Beta2PowOut"}},
+    {"run_program", {"DOut"}},
+    {"adam",
+     {"ParamOut", "Moment1Out", "Moment2Out", "Beta1PowOut", "Beta2PowOut",
+      "MasterParamOut"}},
+    {"adamw",
+     {"ParamOut", "Moment1Out", "Moment2Out", "Beta1PowOut", "Beta2PowOut",
+      "MasterParamOut"}},
+};
+
+// NOTE(zhiqiu): Commonly, the outputs in auto-generated OP function are
+// generated in C++ automatically.
+// However, some OPs need to pass the outputs from Python instead of generating
+// them in C++. There are mainly 2 reasons for that,
+// (1) Optimizer OPs need to update the input param in-place, like sgd.
+//     So they need to pass the output which is same as input param.
+// (2) Very few python APIs has out in their arguments, like fill_constant.
+//     So they need to pass the python output to C++.
+//     Actually, this is not a good design, since it may break the SSA graph,
+//     especially in declarative mode.
+// For those OPs, we need to manually specify the outs need to pass in this map.
+std::map<std::string, std::set<std::string>> op_passing_outs_map = {
+    {"sgd", {"ParamOut"}},
+    {"adam",
+     {"ParamOut", "Moment1Out", "Moment2Out", "Beta1PowOut", "Beta2PowOut",
+      "MasterParamOut"}},
+    {"adamw",
+     {"ParamOut", "Moment1Out", "Moment2Out", "Beta1PowOut", "Beta2PowOut",
+      "MasterParamOut"}},
+    {"average_accumulates",
+     {"out_sum_1", "out_sum_2", "out_sum_3", "out_num_accumulates",
+      "out_old_num_accumulates", "out_num_updates"}},
+    {"momentum", {"ParamOut", "VelocityOut", "MasterParamOut"}},
+    {"sparse_momentum", {"ParamOut", "VelocityOut"}},
+    {"batch_norm", {"MeanOut", "VarianceOut"}},
+    {"sync_batch_norm", {"MeanOut", "VarianceOut"}},
+    {"accuracy", {"Correct", "Total"}},
+    {"fill_constant", {"Out"}},
+    {"recv_v2", {"Out"}},
+    {"partial_recv", {"Out"}},
+    {"matmul", {"Out"}},
+    {"c_broadcast", {"Out"}},
+    {"c_sync_calc_stream", {"Out"}},
+    {"c_sync_comm_stream", {"Out"}},
+    {"c_reduce_sum", {"Out"}},
+    {"c_reduce_max", {"Out"}},
+    {"c_reduce_min", {"Out"}},
+    {"c_reduce_prod", {"Out"}},
+    {"c_reduce", {"Out"}},
+    {"c_scatter", {"Out"}},
+    {"barrier", {"Out"}},
+    {"fake_quantize_dequantize_moving_average_abs_max",
+     {"Out", "OutScale", "OutAccum", "OutState"}},
+    {"fake_quantize_dequantize_abs_max", {"Out", "OutScale"}},
+    {"fake_channel_wise_quantize_dequantize_abs_max", {"Out", "OutScale"}},
+    {"check_finite_and_unscale", {"Out", "FoundInfinite"}},
+    {"update_loss_scaling",
+     {"Out", "LossScaling", "OutGoodSteps", "OutBadSteps"}},
+    {"moving_average_abs_max_scale",
+     {"Out", "OutScale", "OutAccum", "OutState"}},
+    {"lamb",
+     {"ParamOut", "Moment1Out", "Moment2Out", "Beta1PowOut", "Beta2PowOut"}},
+    {"rnn", {"DropoutState"}},
+    {"run_program", {"Out", "DOut", "OutScope"}},
+    {"clear_float_status", {"FloatStatusOut"}},
+    {"get_float_status", {"FloatStatusOut"}},
+};
+
+// NOTE(pangyoki): Tensor View Strategy.
+// In this case, a new output varbase will be created, and this varbase will
+// reuse the input varbase's allocation.
+// It's a map. The key of outer map is the view op name, the value is
+// a pair which implies the mapping relationship between the input and
+// output varbase.
+std::map<std::string, std::pair<std::string, std::string>> view_op_map = {
+    {"squeeze2", {"X", "Out"}},  // "X" -> "Out"
+    {"unsqueeze2", {"X", "Out"}},
+    {"reshape2", {"X", "Out"}},
+    {"flatten_contiguous_range", {"X", "Out"}},
+};
+
+// clang-format off
+const char* OUT_INITIALIZER_TEMPLATE =
+    R"({"%s", {std::shared_ptr<imperative::VarBase>(new imperative::VarBase("auto_"+std::to_string(VarBaseUniqueNameID++)+"_"))}})";
+const char* OUT_DUPLICABLE_INITIALIZER_TEMPLATE = R"({"%s", ConstructDuplicableOutput(%s)})";
+
+const char* INPUT_INITIALIZER_TEMPLATE = R"({"%s", {%s}})";
+const char* INPUT_LIST_INITIALIZER_TEMPLATE = R"({"%s", %s})";
+
+const char* INPUT_INITIALIZER_TEMPLATE_WITH_NULL = R"(
+    if (%s != nullptr) {
+      ins["%s"] = {%s};
+    }
+)";
+
+const char* INPUT_INITIALIZER_TEMPLATE_WITH_NULL_LIST = R"(
+    if (%s.size() != 0) {
+      ins["%s"] = %s;
+    }
+)";
+
+const char* OUTPUT_INITIALIZER_TEMPLATE_WITH_NULL = R"(
+    outs["%s"] = {%s};
+)";
+
+const char* OUTPUT_INITIALIZER_TEMPLATE_WITH_NULL_LIST = R"(
+    outs["%s"] = %s;
+)";
+// if inputs is list, no need {}
+const char* ARG_OUT_NUM = R"(%sNum)";
+const char* ARG_OUT_NUM_TYPE = R"(size_t )";
+
+const char* IN_VAR_TYPE = R"(py::handle)";
+const char* IN_VAR_LIST_TYPE = R"(py::handle)";
+
+const char* OUT_VAR_TYPE = R"(std::shared_ptr<imperative::VarBase>)";
+const char* OUT_VAR_LIST_TYPE = R"(std::vector<std::shared_ptr<imperative::VarBase>>)";
+
+const char* CAST_VAR_TEMPLATE = R"(
+    auto %s = GetEagerTensorFromArgs("%s", "%s", args, %d, %s);)";
+
+const char* CAST_VAR_LIST_TEMPLATE = R"(
+    auto %s = GetEagerTensorListFromArgs("%s", "%s", args, %d, %s);)";
+
+const char* CAST_SIZE_T_TEMPLATE = R"(
+    auto %s = GetUnsignedLongFromArgs("%s", "%s", args, %d, %s);)";
+
+const char* ARG_TEMPLATE = R"(const %s& %s)";
+
+const char* RETURN_TUPLE_TYPE = R"(std::tuple<%s>)";
+const char* RETURN_TUPLE_TEMPLATE = R"(std::make_tuple(%s))";
+const char* RETURN_LIST_TEMPLATE = R"(outs["%s"])";
+const char* RETURN_TEMPLATE = R"(outs["%s"][0])";
+
+const char* FUNCTION_ARGS = R"(%s, const py::args& args)";
+const char* FUNCTION_ARGS_NO_INPUT = R"(const py::args& args)";
+
+const char* HANDLE_VIEW_BETWEEN_INPUT_AND_OUTPUT = R"(
+    if (ins.count("%s") && outs.count("%s")) {
+      HandleViewBetweenInputAndOutput(ins["%s"][0], outs["%s"][0]);
+    })";
+
+const char* OP_FUNCTION_TEMPLATE =
+R"(
+static PyObject * %s(PyObject *self, PyObject *args, PyObject *kwargs)
+{
+  PyThreadState *tstate = nullptr;
+  try
+  {
+    %s
+    framework::AttributeMap attrs;
+    ConstructAttrMapFromPyArgs("%s", args, %d, PyTuple_GET_SIZE(args) , attrs);
+    tstate = PyEval_SaveThread();
+    %s
+    PyEval_RestoreThread(tstate);
+    tstate = nullptr;
+    %s
+  }
+  catch(...) {
+    if (tstate) {
+      PyEval_RestoreThread(tstate);
+    }
+    ThrowExceptionToPython(std::current_exception());
+    return nullptr;
+  }
+})";
+
+const char* PYBIND_ITEM_TEMPLATE = R"(  {"%s", (PyCFunction)(void(*)(void))%s, METH_VARARGS | METH_KEYWORDS, "C++ interface function for %s in dygraph."},)";
+
+// clang-format on
+static inline bool FindInsMap(const std::string& op_type,
+                              const std::string& in_name) {
+  return op_ins_map[op_type].count(in_name);
+}
+
+static inline bool FindOutsMap(const std::string& op_type,
+                               const std::string& out_name) {
+  return op_outs_map[op_type].count(out_name);
+}
+
+static inline bool FindPassingOutsMap(const std::string& op_type,
+                                      const std::string& out_name) {
+  return op_passing_outs_map[op_type].count(out_name);
+}
+
+static inline bool FindViewOpMap(const std::string& op_type) {
+  return view_op_map.count(op_type);
+}
+
+static inline std::string TempName(const std::string& name) {
+  return name + '_';
+}
+
+std::string GenerateOpFunctionsBody(
+    const paddle::framework::proto::OpProto* op_proto, std::string func_name,
+    bool use_inplace_strategy = false,
+    std::map<std::string, std::string> inplace_map = {}) {
+  auto& op_type = op_proto->type();
+  std::string input_args = "";
+  std::string call_api_str = "auto out = " + op_type + "_dygraph_function(";
+  std::string ins_initializer_with_null = "";
+  std::string py_arg = "";
+  int arg_idx = 0;
+  int input_args_num = 0;
+  std::string ins_cast_str = "";
+  std::string view_strategy_str = "";
+  for (auto& input : op_proto->inputs()) {
+    auto& in_name = input.name();
+    // skip those dispensable inputs, like ResidualData in conv2d
+    if (input.dispensable() && !FindInsMap(op_type, in_name)) {
+      continue;
+    }
+    const auto in_type = input.duplicable() ? IN_VAR_LIST_TYPE : IN_VAR_TYPE;
+    auto input_arg =
+        paddle::string::Sprintf(ARG_TEMPLATE, in_type, TempName(in_name));
+    input_args += input_arg;
+    input_args += ",";
+    input_args_num++;
+    const auto in_cast_type =
+        input.duplicable() ? CAST_VAR_LIST_TEMPLATE : CAST_VAR_TEMPLATE;
+    auto dispensable = input.dispensable() ? "true" : "false";
+    ins_cast_str += paddle::string::Sprintf(in_cast_type, in_name, op_type,
+                                            in_name, arg_idx++, dispensable);
+
+    if (input.dispensable()) {
+      const auto in_template = input.duplicable()
+                                   ? INPUT_INITIALIZER_TEMPLATE_WITH_NULL_LIST
+                                   : INPUT_INITIALIZER_TEMPLATE_WITH_NULL;
+      ins_initializer_with_null +=
+          paddle::string::Sprintf(in_template, in_name, in_name, in_name);
+    } else {
+      call_api_str += in_name + ", ";
+    }
+  }
+
+  if (!input_args.empty() && input_args.back() == ',') {
+    input_args.pop_back();
+  }
+
+  // Generate outs initializer
+  std::string outs_initializer = "{";
+  std::string outs_initializer_with_null = "";
+  std::string return_str = "";
+
+  int outs_num = 0;
+  for (auto& output : op_proto->outputs()) {
+    auto& out_name = output.name();
+
+    // skip those dispensable oututs
+    if (output.dispensable() && !FindOutsMap(op_type, out_name)) {
+      continue;
+    }
+    const auto out_type =
+        output.duplicable() ? OUT_VAR_LIST_TYPE : OUT_VAR_TYPE;
+
+    if (FindPassingOutsMap(op_type, out_name)) {
+      if (input_args != "") {
+        input_args += ",";
+      }
+      input_args += out_type;
+      input_args += out_name;
+      input_args_num++;
+
+      if (output.dispensable()) {
+        const auto out_template =
+            output.duplicable() ? OUTPUT_INITIALIZER_TEMPLATE_WITH_NULL_LIST
+                                : OUTPUT_INITIALIZER_TEMPLATE_WITH_NULL;
+        outs_initializer_with_null +=
+            paddle::string::Sprintf(out_template, out_name, out_name);
+      } else {
+        const auto out_template = output.duplicable()
+                                      ? INPUT_LIST_INITIALIZER_TEMPLATE
+                                      : INPUT_INITIALIZER_TEMPLATE;
+        outs_initializer +=
+            paddle::string::Sprintf(out_template, out_name, out_name);
+        outs_initializer += ",";
+      }
+
+      const auto in_cast_type =
+          output.duplicable() ? CAST_VAR_LIST_TEMPLATE : CAST_VAR_TEMPLATE;
+      auto dispensable = output.dispensable() ? "true" : "false";
+      ins_cast_str += paddle::string::Sprintf(in_cast_type, out_name, op_type,
+                                              out_name, arg_idx++, dispensable);
+    } else {
+      // There are few Operators that have duplicable output, like `Out` in
+      // split op. We need to specify the number of variables for the
+      // duplicable output, as the argument OutNum;
+      if (output.duplicable()) {
+        if (input_args != "") {
+          input_args += ",";
+        }
+        auto out_num_str = paddle::string::Sprintf(ARG_OUT_NUM, out_name);
+        input_args += ARG_OUT_NUM_TYPE;
+        input_args += out_num_str;
+        input_args_num++;
+        outs_initializer += paddle::string::Sprintf(
+            OUT_DUPLICABLE_INITIALIZER_TEMPLATE, out_name, out_num_str);
+
+        auto dispensable = output.dispensable() ? "true" : "false";
+        ins_cast_str +=
+            paddle::string::Sprintf(CAST_SIZE_T_TEMPLATE, out_num_str, op_type,
+                                    out_num_str, arg_idx++, dispensable);
+        call_api_str += out_num_str + ", ";
+      } else {
+        outs_initializer +=
+            paddle::string::Sprintf(OUT_INITIALIZER_TEMPLATE, out_name);
+      }
+      outs_initializer += ",";
+    }
+
+    // return_str += paddle::string::Sprintf(return_template, out_name);
+    // return_str += ",";
+    outs_num += 1;
+  }
+  call_api_str += "attrs);";
+  if (outs_initializer.back() == ',') {
+    outs_initializer.pop_back();
+    // return_str.pop_back();
+  }
+  outs_initializer += "}";
+  if (FindViewOpMap(op_type)) {
+    std::string viwe_input_name = view_op_map[op_type].first;
+    std::string viwe_output_name = view_op_map[op_type].second;
+    view_strategy_str += paddle::string::Sprintf(
+        HANDLE_VIEW_BETWEEN_INPUT_AND_OUTPUT, viwe_input_name, viwe_output_name,
+        viwe_input_name, viwe_output_name);
+  }
+  if (outs_num == 0) {
+    return_str = "Py_INCREF(Py_None);\n    return Py_None;";
+  } else {
+    return_str = "return ToPyObject(out);";
+  }
+  std::string function_args = "";
+  if (input_args == "") {
+    function_args = FUNCTION_ARGS_NO_INPUT;
+  } else {
+    function_args = paddle::string::Sprintf(FUNCTION_ARGS, input_args);
+  }
+
+  // generate op funtcion body
+  auto op_function_str = paddle::string::Sprintf(
+      OP_FUNCTION_TEMPLATE, func_name, ins_cast_str, op_type, input_args_num,
+      call_api_str, return_str);
+
+  return op_function_str;
+}
+
+static std::tuple<std::vector<std::string>, std::vector<std::string>>
+GenerateOpFunctions() {
+  auto& op_info_map = paddle::framework::OpInfoMap::Instance().map();
+
+  std::vector<std::string> op_function_list, bind_function_list;
+  auto& all_kernels = paddle::framework::OperatorWithKernel::AllOpKernels();
+
+  for (auto& pair : op_info_map) {
+    auto& op_info = pair.second;
+    auto op_proto = op_info.proto_;
+    if (op_proto == nullptr) {
+      continue;
+    }
+    auto& op_type = op_proto->type();
+    // Skip ooerator which is not inherit form OperatorWithKernel, like while,
+    // since only OperatorWithKernel can run in dygraph mode.
+    // if the pten lib contains op kernel, we still generate ops method
+    if (!all_kernels.count(op_type) &&
+        !pten::KernelFactory::Instance().HasCompatiblePtenKernel(op_type)) {
+      continue;
+    }
+    if (!gen_list.count(op_type)) {
+      continue;
+    }
+    std::string func_name = "eager_api_" + op_type;
+    std::string op_function_str = GenerateOpFunctionsBody(op_proto, func_name);
+
+    // generate pybind item
+    auto bind_function_str = paddle::string::Sprintf(
+        PYBIND_ITEM_TEMPLATE, op_type, func_name, op_type);
+
+    op_function_list.emplace_back(std::move(op_function_str));
+    bind_function_list.emplace_back(std::move(bind_function_str));
+  }
+  return std::make_tuple(op_function_list, bind_function_list);
+}
+
+int main(int argc, char* argv[]) {
+  if (argc != 2) {
+    std::cerr << "argc must be 2" << std::endl;
+    return -1;
+  }
+
+#ifdef PADDLE_WITH_ASCEND_CL
+  auto ascend_ptr = paddle::framework::AscendInstance::GetInstance();
+  ascend_ptr->InitGEForUT();
+#endif
+
+  std::vector<std::string> headers{
+      "\"pybind11/detail/common.h\"",
+      "\"paddle/fluid/pybind/op_function_common.h\"", "<Python.h>"};
+
+  std::ofstream out(argv[1], std::ios::out);
+
+  out << "#pragma once\n\n";
+
+  for (auto& header : headers) {
+    out << "#include  " + header + "\n";
+  }
+
+  out << "\n\n";
+
+  auto op_funcs = GenerateOpFunctions();
+
+  out << "namespace paddle {\n"
+      << "namespace pybind {\n\n";
+  out << paddle::string::join_strings(std::get<0>(op_funcs), '\n');
+  out << "\n\n";
+
+  out << "static PyMethodDef ExtestMethods[] = {\n"
+      << paddle::string::join_strings(std::get<1>(op_funcs), '\n')
+      << "\n  {nullptr,nullptr,0,nullptr}"
+      << "};\n\n";
+
+  out << "inline void BindEagerOpFunctions(pybind11::module *module) {\n"
+      << "  auto m = module->def_submodule(\"ops\");\n"
+      << "  if (PyModule_AddFunctions(m.ptr(), ExtestMethods) < 0) {\n"
+      << "    PADDLE_THROW(platform::errors::Fatal (\"Add functions to "
+         "core.eager.ops failed!\"));\n"
+      << "  }\n\n"
+      << "  InitOpsAttrTypeMap();"
+      << "}\n\n"
+      << "} // namespace pybind\n"
+      << "} // namespace paddle\n";
+
+  out.close();
+
+#ifdef PADDLE_WITH_ASCEND_CL
+  ge::GEFinalize();
+#endif
+
+  return 0;
+}
diff --git a/paddle/fluid/pybind/eager_properties.cc b/paddle/fluid/pybind/eager_properties.cc
index a13e4836d141a..7f20f32e81a5e 100644
--- a/paddle/fluid/pybind/eager_properties.cc
+++ b/paddle/fluid/pybind/eager_properties.cc
@@ -37,6 +37,7 @@ extern PyTypeObject* p_eager_tensor_type;
 PyObject* eager_tensor_properties_get_name(EagerTensorObject* self,
                                            void* closure) {
   EAGER_TRY
+  self->eagertensor.SyncToTensor();
   return ToPyObject(self->eagertensor.name());
   EAGER_CATCH_AND_THROW_RETURN_NULL
 }
@@ -44,6 +45,7 @@ PyObject* eager_tensor_properties_get_name(EagerTensorObject* self,
 int eager_tensor_properties_set_name(EagerTensorObject* self, PyObject* value,
                                      void* closure) {
   EAGER_TRY
+  self->eagertensor.SyncToTensor();
   self->eagertensor.set_name(CastPyArg2AttrString(value, 0));
   return 0;
   EAGER_CATCH_AND_THROW_RETURN_ZERO
@@ -52,6 +54,7 @@ int eager_tensor_properties_set_name(EagerTensorObject* self, PyObject* value,
 PyObject* eager_tensor_properties_get_stop_gradient(EagerTensorObject* self,
                                                     void* closure) {
   EAGER_TRY
+  self->eagertensor.SyncToTensor();
   auto meta = egr::EagerUtils::autograd_meta(&self->eagertensor);
   return ToPyObject(meta->StopGradient());
   EAGER_CATCH_AND_THROW_RETURN_NULL
@@ -60,6 +63,7 @@ PyObject* eager_tensor_properties_get_stop_gradient(EagerTensorObject* self,
 PyObject* eager_tensor_properties_get_grad(EagerTensorObject* self,
                                            void* closure) {
   EAGER_TRY
+  self->eagertensor.SyncToTensor();
   auto meta = egr::EagerUtils::unsafe_autograd_meta(self->eagertensor);
   return ToPyObject(meta->Grad());
   EAGER_CATCH_AND_THROW_RETURN_NULL
@@ -68,6 +72,7 @@ PyObject* eager_tensor_properties_get_grad(EagerTensorObject* self,
 int eager_tensor_properties_set_stop_gradient(EagerTensorObject* self,
                                               PyObject* value, void* closure) {
   EAGER_TRY
+  self->eagertensor.SyncToTensor();
   auto meta = egr::EagerUtils::autograd_meta(&self->eagertensor);
   meta->SetStopGradient(CastPyArg2AttrBoolean(value, 0));
   return 0;
@@ -77,6 +82,7 @@ int eager_tensor_properties_set_stop_gradient(EagerTensorObject* self,
 PyObject* eager_tensor_properties_get_persistable(EagerTensorObject* self,
                                                   void* closure) {
   EAGER_TRY
+  self->eagertensor.SyncToTensor();
   auto meta = egr::EagerUtils::autograd_meta(&self->eagertensor);
   return ToPyObject(meta->Persistable());
   EAGER_CATCH_AND_THROW_RETURN_NULL
@@ -85,6 +91,7 @@ PyObject* eager_tensor_properties_get_persistable(EagerTensorObject* self,
 int eager_tensor_properties_set_persistable(EagerTensorObject* self,
                                             PyObject* value, void* closure) {
   EAGER_TRY
+  self->eagertensor.SyncToTensor();
   auto meta = egr::EagerUtils::autograd_meta(&self->eagertensor);
   meta->SetPersistable(CastPyArg2AttrBoolean(value, 0));
   return 0;
@@ -94,6 +101,7 @@ int eager_tensor_properties_set_persistable(EagerTensorObject* self,
 PyObject* eager_tensor_properties_get_shape(EagerTensorObject* self,
                                             void* closure) {
   EAGER_TRY
+  self->eagertensor.SyncToTensor();
   auto ddim = self->eagertensor.shape();
   std::vector<int64_t> value;
   size_t rank = static_cast<size_t>(ddim.size());
@@ -109,6 +117,7 @@ PyObject* eager_tensor_properties_get_shape(EagerTensorObject* self,
 PyObject* eager_tensor_properties_get_place(EagerTensorObject* self,
                                             void* closure) {
   EAGER_TRY
+  self->eagertensor.SyncToTensor();
   return ToPyObject(self->eagertensor.place());
   EAGER_CATCH_AND_THROW_RETURN_NULL
 }
@@ -116,6 +125,7 @@ PyObject* eager_tensor_properties_get_place(EagerTensorObject* self,
 PyObject* eager_tensor_properties_get_place_str(EagerTensorObject* self,
                                                 void* closure) {
   EAGER_TRY
+  self->eagertensor.SyncToTensor();
   std::stringstream ostr;
   ostr << self->eagertensor.place();
   return ToPyObject(ostr.str());
@@ -125,6 +135,7 @@ PyObject* eager_tensor_properties_get_place_str(EagerTensorObject* self,
 PyObject* eager_tensor_properties_get_dtype(EagerTensorObject* self,
                                             void* closure) {
   EAGER_TRY
+  self->eagertensor.SyncToTensor();
   return ToPyObject(pten::DataType2String(self->eagertensor.type()));
   EAGER_CATCH_AND_THROW_RETURN_NULL
 }
diff --git a/paddle/fluid/pybind/eager_utils.cc b/paddle/fluid/pybind/eager_utils.cc
index 9268fc8e7b976..0e858e3d26687 100644
--- a/paddle/fluid/pybind/eager_utils.cc
+++ b/paddle/fluid/pybind/eager_utils.cc
@@ -20,6 +20,8 @@ limitations under the License. */
 #include "paddle/fluid/platform/enforce.h"
 #include "paddle/fluid/pybind/eager.h"
 #include "paddle/fluid/pybind/eager_utils.h"
+#include "paddle/fluid/pybind/op_function_common.h"
+// #include "paddle/pten/api/include/core.h"
 #include "paddle/pten/common/data_type.h"
 #include "paddle/pten/core/convert_utils.h"
 #include "paddle/pten/core/dense_tensor.h"
@@ -335,5 +337,82 @@ PyObject* ToPyObject(const platform::Place& value) {
   return obj.ptr();
 }
 
+egr::EagerTensor GetEagerTensorFromArgs(const std::string& op_type,
+                                        const std::string& arg_name,
+                                        PyObject* args, ssize_t arg_idx,
+                                        bool dispensable) {
+  PyObject* obj = PyTuple_GET_ITEM(args, arg_idx);
+
+  if (PyTuple_Check(obj)) {
+    obj = PyTuple_GET_ITEM(obj, 0);
+  }
+
+  if (obj == nullptr || obj == Py_None) {
+    if (!dispensable) {
+      PADDLE_THROW(platform::errors::InvalidArgument(
+          "%s(): argument '%s' (position %d) must be Tensor, but got None",
+          op_type, arg_name, arg_idx));
+    }
+    egr::EagerTensor emptytensor;
+    return emptytensor;
+  }
+
+  return reinterpret_cast<EagerTensorObject*>(obj)->eagertensor;
+}
+
+std::vector<egr::EagerTensor> GetEagerTensorListFromArgs(
+    const std::string& op_type, const std::string& arg_name, PyObject* args,
+    ssize_t arg_idx, bool dispensable) {
+  PyObject* list = PyTuple_GET_ITEM(args, arg_idx);
+
+  if (list == nullptr) {
+    if (!dispensable) {
+      PADDLE_THROW(platform::errors::InvalidArgument(
+          "%s(): argument '%s' (position %d) must be list of Tensor, but got "
+          "None",
+          op_type, arg_name, arg_idx));
+    }
+    return {};
+  }
+
+  std::vector<egr::EagerTensor> result;
+
+  if (PyList_Check(list)) {
+    Py_ssize_t len = PyList_Size(list);
+    if (len == 0) {
+      PADDLE_THROW(platform::errors::InvalidArgument(
+          "%s(): argument '%s' (position %d) must be list of Tensors, but got "
+          "empty list",
+          op_type, arg_name, arg_idx));
+    }
+    for (Py_ssize_t i = 0; i < len; i++) {
+      result.emplace_back(
+          reinterpret_cast<EagerTensorObject*>(PyList_GetItem(list, i))
+              ->eagertensor);
+    }
+  } else if (PyTuple_Check(list)) {
+    Py_ssize_t len = PyTuple_Size(list);
+    if (len == 0) {
+      PADDLE_THROW(platform::errors::InvalidArgument(
+          "%s(): argument '%s' (position %d) must be list of Tensors, but got "
+          "empty list",
+          op_type, arg_name, arg_idx));
+    }
+    for (Py_ssize_t i = 0; i < len; i++) {
+      result.emplace_back(
+          reinterpret_cast<EagerTensorObject*>(PyTuple_GetItem(list, i))
+              ->eagertensor);
+    }
+  } else {
+    PADDLE_THROW(platform::errors::InvalidArgument(
+        "%s(): argument '%s' (position %d) must be list of Tensors, but got "
+        "%s",
+        op_type, arg_name, arg_idx,
+        (reinterpret_cast<PyTypeObject*>(list->ob_type))->tp_name));
+  }
+
+  return result;
+}
+
 }  // namespace pybind
 }  // namespace paddle
diff --git a/paddle/fluid/pybind/eager_utils.h b/paddle/fluid/pybind/eager_utils.h
index 49f56a61c31f1..252a558566632 100644
--- a/paddle/fluid/pybind/eager_utils.h
+++ b/paddle/fluid/pybind/eager_utils.h
@@ -50,5 +50,38 @@ PyObject* ToPyObject(const std::vector<double>& value);
 PyObject* ToPyObject(const std::vector<egr::EagerTensor>& value);
 PyObject* ToPyObject(const platform::Place& value);
 
+template <typename Tuple, size_t N>
+struct TupleEagerTensorResult {
+  static void Run(const Tuple& out, PyObject* result) {
+    TupleEagerTensorResult<Tuple, N - 1>::Run(out, result);
+    PyTuple_SET_ITEM(result, N - 1, ToPyObject(std::get<N - 1>(out)));
+  }
+};
+
+template <typename Tuple>
+struct TupleEagerTensorResult<Tuple, 1> {
+  static void Run(const Tuple& out, PyObject* result) {
+    PyTuple_SET_ITEM(result, 0, ToPyObject(std::get<0>(out)));
+  }
+};
+
+template <typename... Args>
+PyObject* ToPyObject(const std::tuple<Args...>& out) {
+  auto len = sizeof...(Args);
+  PyObject* result = PyTuple_New(len);
+
+  TupleEagerTensorResult<decltype(out), sizeof...(Args)>::Run(out, result);
+
+  return result;
+}
+
+egr::EagerTensor GetEagerTensorFromArgs(const std::string& op_type,
+                                        const std::string& arg_name,
+                                        PyObject* args, ssize_t arg_idx,
+                                        bool dispensable = false);
+std::vector<egr::EagerTensor> GetEagerTensorListFromArgs(
+    const std::string& op_type, const std::string& arg_name, PyObject* args,
+    ssize_t arg_idx, bool dispensable = false);
+
 }  // namespace pybind
 }  // namespace paddle
diff --git a/paddle/fluid/pybind/op_function.h b/paddle/fluid/pybind/op_function.h
index 5535ffd950f37..7b9379df6be2c 100644
--- a/paddle/fluid/pybind/op_function.h
+++ b/paddle/fluid/pybind/op_function.h
@@ -31,33 +31,12 @@
 #include "paddle/fluid/imperative/type_defs.h"
 #include "paddle/fluid/pybind/exception.h"
 #include "paddle/fluid/pybind/imperative.h"
+#include "paddle/fluid/pybind/op_function_common.h"
 
 namespace py = pybind11;
 namespace paddle {
 namespace pybind {
 
-class OpAttrTypeMap {
- public:
-  static OpAttrTypeMap& Instance() {
-    static OpAttrTypeMap g_op_attr_type_map;
-    return g_op_attr_type_map;
-  }
-
-  std::unordered_map<
-      std::string,
-      std::unordered_map<std::string, paddle::framework::proto::AttrType>>&
-  Map() {
-    return ops_attrtype_map_;
-  }
-
- private:
-  OpAttrTypeMap() = default;
-  std::unordered_map<
-      std::string,
-      std::unordered_map<std::string, paddle::framework::proto::AttrType>>
-      ops_attrtype_map_;
-};
-
 static inline std::shared_ptr<imperative::VarBase> CastPyHandleToVarBase(
     const std::string& op_type, const std::string& arg_name, int arg_idx,
     const py::handle& handle, bool dispensable = false) {
@@ -198,737 +177,7 @@ static inline void HandleViewBetweenInputAndOutput(
   }
 }
 
-extern PyTypeObject* g_varbase_pytype;
-extern PyTypeObject* g_vartype_pytype;
-extern PyTypeObject* g_blockdesc_pytype;
-
-inline bool PyObject_CheckBool(PyObject** obj) { return PyBool_Check(*obj); }
-
-inline bool PyObject_CheckLongOrToLong(PyObject** obj) {
-  if ((PyLong_Check(*obj) && !PyBool_Check(*obj)) ||
-      PyObject_IsInstance(*obj, (PyObject*)g_vartype_pytype) ||  // NOLINT
-      PyObject_IsInstance(*obj, (PyObject*)g_varbase_pytype)) {  // NOLINT
-    return true;
-  }
-
-  if (std::string(((PyTypeObject*)(*obj)->ob_type)->tp_name)  // NOLINT
-          .find("numpy") != std::string::npos) {
-    auto to = PyNumber_Long(*obj);
-    if (to) {
-      *obj = to;
-      return true;
-    }
-  }
-
-  return false;
-}
-
-inline bool PyObject_CheckFloatOrToFloat(PyObject** obj) {
-  // sometimes users provide PyLong or numpy.int64 but attr is float
-  if (PyFloat_Check(*obj) || PyLong_Check(*obj) ||
-      PyObject_IsInstance(*obj, (PyObject*)g_varbase_pytype)) {  // NOLINT
-    return true;
-  }
-  if (std::string(((PyTypeObject*)(*obj)->ob_type)->tp_name)  // NOLINT
-          .find("numpy") != std::string::npos) {
-    auto to = PyNumber_Float(*obj);
-    if (to) {
-      *obj = to;
-      return true;
-    }
-  }
-  return false;
-}
-
-inline bool PyObject_CheckString(PyObject* obj) { return PyUnicode_Check(obj); }
-
-static inline void CastPyArg2AttrBoolean(
-    PyObject* obj,
-    paddle::framework::AttributeMap& attrs,  // NOLINT
-    const std::string& key, const std::string& op_type, ssize_t arg_pos) {
-  if (obj == Py_None) {
-    attrs[key] = false;  // To be compatible with QA integration testing. Some
-                         // test case pass in None.
-  } else if (obj == Py_True) {
-    attrs[key] = true;
-  } else if (obj == Py_False) {
-    attrs[key] = false;
-  } else {
-    PADDLE_THROW(platform::errors::InvalidArgument(
-        "%s(): argument (position %d) must be "
-        "bool, but got %s",
-        op_type, arg_pos + 1,
-        ((PyTypeObject*)obj->ob_type)->tp_name));  // NOLINT
-  }
-}
-
-static inline void CastPyArg2AttrInt(
-    PyObject* obj,
-    paddle::framework::AttributeMap& attrs,  // NOLINT
-    const std::string& key, const std::string& op_type, ssize_t arg_pos) {
-  if (PyObject_CheckLongOrToLong(&obj)) {
-    attrs[key] = (int)PyLong_AsLong(obj);  // NOLINT
-  } else {
-    PADDLE_THROW(platform::errors::InvalidArgument(
-        "%s(): argument (position %d) must be "
-        "int, but got %s",
-        op_type, arg_pos + 1,
-        ((PyTypeObject*)obj->ob_type)->tp_name));  // NOLINT
-  }
-}
-
-static inline void CastPyArg2AttrLong(
-    PyObject* obj,
-    paddle::framework::AttributeMap& attrs,  // NOLINT
-    const std::string& key, const std::string& op_type, ssize_t arg_pos) {
-  if (PyObject_CheckLongOrToLong(&obj)) {
-    attrs[key] = (int64_t)PyLong_AsLong(obj);  // NOLINT
-  } else {
-    PADDLE_THROW(platform::errors::InvalidArgument(
-        "%s(): argument (position %d) must be "
-        "long, but got %s",
-        op_type, arg_pos + 1,
-        ((PyTypeObject*)obj->ob_type)->tp_name));  // NOLINT
-  }
-}
-
-static inline void CastPyArg2AttrFloat(
-    PyObject* obj,
-    paddle::framework::AttributeMap& attrs,  // NOLINT
-    const std::string& key, const std::string& op_type, ssize_t arg_pos) {
-  if (PyObject_CheckFloatOrToFloat(&obj)) {
-    attrs[key] = (float)PyFloat_AsDouble(obj);  // NOLINT
-  } else {
-    PADDLE_THROW(platform::errors::InvalidArgument(
-        "%s(): argument (position %d) must be "
-        "float, but got %s",
-        op_type, arg_pos + 1,
-        ((PyTypeObject*)obj->ob_type)->tp_name));  // NOLINT
-  }
-}
-
-static inline void CastPyArg2AttrString(
-    PyObject* obj,
-    paddle::framework::AttributeMap& attrs,  // NOLINT
-    const std::string& key, const std::string& op_type, ssize_t arg_pos) {
-  if (PyObject_CheckString(obj)) {
-    Py_ssize_t size;
-    const char* data;
-    data = PyUnicode_AsUTF8AndSize(obj, &size);
-    attrs[key] = std::string(data, static_cast<size_t>(size));
-  } else {
-    PADDLE_THROW(platform::errors::InvalidArgument(
-        "%s(): argument (position %d) must be "
-        "str, but got %s",
-        op_type, arg_pos + 1,
-        ((PyTypeObject*)obj->ob_type)->tp_name));  // NOLINT
-  }
-}
-
-static inline void CastPyArg2AttrBooleans(
-    PyObject* obj, paddle::framework::AttributeMap& attrs,  // NOLINT
-    const std::string& key, const std::string& op_type, ssize_t arg_pos) {
-  if (PyList_Check(obj)) {
-    Py_ssize_t len = PyList_Size(obj);
-    PyObject* item = nullptr;
-    std::vector<bool> value;
-    for (Py_ssize_t i = 0; i < len; i++) {
-      item = PyList_GetItem(obj, i);
-      if (PyObject_CheckBool(&item)) {
-        value.emplace_back(PyLong_AsLong(item));
-      } else {
-        PADDLE_THROW(platform::errors::InvalidArgument(
-            "%s(): argument (position %d) must be "
-            "list of bool, but got %s at pos %d",
-            op_type, arg_pos + 1,
-            ((PyTypeObject*)item->ob_type)->tp_name,  // NOLINT
-            i));
-      }
-    }
-    attrs[key] = value;
-  } else if (PyTuple_Check(obj)) {
-    Py_ssize_t len = PyTuple_Size(obj);
-    PyObject* item = nullptr;
-    std::vector<bool> value;
-    for (Py_ssize_t i = 0; i < len; i++) {
-      item = PyTuple_GetItem(obj, i);
-      if (PyObject_CheckBool(&item)) {
-        value.emplace_back(PyLong_AsLong(item));
-      } else {
-        PADDLE_THROW(platform::errors::InvalidArgument(
-            "%s(): argument (position %d) must be "
-            "list of bool, but got %s at pos %d",
-            op_type, arg_pos + 1,
-            ((PyTypeObject*)item->ob_type)->tp_name,  // NOLINT
-            i));
-      }
-    }
-    attrs[key] = value;
-  } else {
-    PADDLE_THROW(platform::errors::InvalidArgument(
-        "%s(): argument (position %d) must be "
-        "list or tuple, but got %s",
-        op_type, arg_pos + 1,
-        ((PyTypeObject*)obj->ob_type)->tp_name));  // NOLINT
-  }
-}
-
-static inline void CastPyArg2AttrInts(
-    PyObject* obj,
-    paddle::framework::AttributeMap& attrs,  // NOLINT
-    const std::string& key, const std::string& op_type, ssize_t arg_pos) {
-  if (PyList_Check(obj)) {
-    Py_ssize_t len = PyList_Size(obj);
-    PyObject* item = nullptr;
-    std::vector<int> value;
-    for (Py_ssize_t i = 0; i < len; i++) {
-      item = PyList_GetItem(obj, i);
-      if (PyObject_CheckLongOrToLong(&item)) {
-        value.emplace_back(PyLong_AsLong(item));
-      } else {
-        PADDLE_THROW(platform::errors::InvalidArgument(
-            "%s(): argument (position %d) must be "
-            "list of int, but got %s at pos %d",
-            op_type, arg_pos + 1,
-            ((PyTypeObject*)item->ob_type)->tp_name,  // NOLINT
-            i));
-      }
-    }
-    attrs[key] = value;
-  } else if (PyTuple_Check(obj)) {
-    Py_ssize_t len = PyTuple_Size(obj);
-    PyObject* item = nullptr;
-    std::vector<int> value;
-    for (Py_ssize_t i = 0; i < len; i++) {
-      item = PyTuple_GetItem(obj, i);
-      if (PyObject_CheckLongOrToLong(&item)) {
-        value.emplace_back(PyLong_AsLong(item));
-      } else {
-        PADDLE_THROW(platform::errors::InvalidArgument(
-            "%s(): argument (position %d) must be "
-            "list of int, but got %s at pos %d",
-            op_type, arg_pos + 1,
-            ((PyTypeObject*)item->ob_type)->tp_name,  // NOLINT
-            i));
-      }
-    }
-    attrs[key] = value;
-  } else if (PySequence_Check(obj)) {
-    Py_ssize_t len = PySequence_Size(obj);
-    PyObject* item = nullptr;
-    std::vector<int> value;
-    for (Py_ssize_t i = 0; i < len; i++) {
-      item = PySequence_GetItem(obj, i);
-      if (PyObject_CheckLongOrToLong(&item)) {
-        value.emplace_back(PyLong_AsLong(item));
-      } else {
-        PADDLE_THROW(platform::errors::InvalidArgument(
-            "%s(): argument (position %d) must be "
-            "list of int, but got %s at pos %d",
-            op_type, arg_pos + 1,
-            ((PyTypeObject*)item->ob_type)->tp_name,  // NOLINT
-            i));
-      }
-    }
-    attrs[key] = value;
-  } else {
-    PADDLE_THROW(platform::errors::InvalidArgument(
-        "%s(): argument (position %d) must be "
-        "list or tuple, but got %s",
-        op_type, arg_pos + 1,
-        ((PyTypeObject*)obj->ob_type)->tp_name));  // NOLINT
-  }
-}
-
-static inline void CastPyArg2AttrLongs(
-    PyObject* obj,
-    paddle::framework::AttributeMap& attrs,  // NOLINT
-    const std::string& key, const std::string& op_type, ssize_t arg_pos) {
-  if (PyList_Check(obj)) {
-    Py_ssize_t len = PyList_Size(obj);
-    PyObject* item = nullptr;
-    std::vector<int64_t> value;
-    for (Py_ssize_t i = 0; i < len; i++) {
-      item = PyList_GetItem(obj, i);
-      if (PyObject_CheckLongOrToLong(&item)) {
-        value.emplace_back(PyLong_AsLong(item));
-      } else {
-        PADDLE_THROW(platform::errors::InvalidArgument(
-            "%s(): argument (position %d) must be "
-            "list of int, but got %s at pos %d",
-            op_type, arg_pos + 1,
-            ((PyTypeObject*)item->ob_type)->tp_name,  // NOLINT
-            i));
-      }
-    }
-    attrs[key] = value;
-  } else if (PyTuple_Check(obj)) {
-    Py_ssize_t len = PyTuple_Size(obj);
-    PyObject* item = nullptr;
-    std::vector<int64_t> value;
-    for (Py_ssize_t i = 0; i < len; i++) {
-      item = PyTuple_GetItem(obj, i);
-      if (PyObject_CheckLongOrToLong(&item)) {
-        value.emplace_back(PyLong_AsLong(item));
-      } else {
-        PADDLE_THROW(platform::errors::InvalidArgument(
-            "%s(): argument (position %d) must be "
-            "list of int, but got %s at pos %d",
-            op_type, arg_pos + 1,
-            ((PyTypeObject*)item->ob_type)->tp_name,  // NOLINT
-            i));
-      }
-    }
-    attrs[key] = value;
-  } else if (PySequence_Check(obj)) {
-    Py_ssize_t len = PySequence_Size(obj);
-    PyObject* item = nullptr;
-    std::vector<int64_t> value;
-    for (Py_ssize_t i = 0; i < len; i++) {
-      item = PySequence_GetItem(obj, i);
-      if (PyObject_CheckLongOrToLong(&item)) {
-        value.emplace_back(PyLong_AsLong(item));
-      } else {
-        PADDLE_THROW(platform::errors::InvalidArgument(
-            "%s(): argument (position %d) must be "
-            "list of int, but got %s at pos %d",
-            op_type, arg_pos + 1,
-            ((PyTypeObject*)item->ob_type)->tp_name,  // NOLINT
-            i));
-      }
-    }
-    attrs[key] = value;
-  } else {
-    PADDLE_THROW(platform::errors::InvalidArgument(
-        "%s(): argument (position %d) must be "
-        "list or tuple, but got %s",
-        op_type, arg_pos + 1,
-        ((PyTypeObject*)obj->ob_type)->tp_name));  // NOLINT
-  }
-}
-
-static inline void CastPyArg2AttrFloats(
-    PyObject* obj,
-    paddle::framework::AttributeMap& attrs,  // NOLINT
-    const std::string& key, const std::string& op_type, ssize_t arg_pos) {
-  if (PyList_Check(obj)) {
-    Py_ssize_t len = PyList_Size(obj);
-    PyObject* item = nullptr;
-    std::vector<float> value;
-    for (Py_ssize_t i = 0; i < len; i++) {
-      item = PyList_GetItem(obj, i);
-      if (PyObject_CheckFloatOrToFloat(&item)) {
-        value.emplace_back(PyFloat_AsDouble(item));
-      } else {
-        PADDLE_THROW(platform::errors::InvalidArgument(
-            "%s(): argument (position %d) must be "
-            "list of float, but got %s at pos %d",
-            op_type, arg_pos + 1,
-            ((PyTypeObject*)item->ob_type)->tp_name,  // NOLINT
-            i));
-      }
-    }
-    attrs[key] = value;
-  } else if (PyTuple_Check(obj)) {
-    Py_ssize_t len = PyTuple_Size(obj);
-    PyObject* item = nullptr;
-    std::vector<float> value;
-    for (Py_ssize_t i = 0; i < len; i++) {
-      item = PyTuple_GetItem(obj, i);
-      if (PyObject_CheckFloatOrToFloat(&item)) {
-        value.emplace_back(PyFloat_AsDouble(item));
-      } else {
-        PADDLE_THROW(platform::errors::InvalidArgument(
-            "%s(): argument (position %d) must be "
-            "list of float, but got %s at pos %d",
-            op_type, arg_pos + 1,
-            ((PyTypeObject*)item->ob_type)->tp_name,  // NOLINT
-            i));
-      }
-    }
-    attrs[key] = value;
-  } else if (PySequence_Check(obj)) {
-    Py_ssize_t len = PySequence_Size(obj);
-    PyObject* item = nullptr;
-    std::vector<float> value;
-    for (Py_ssize_t i = 0; i < len; i++) {
-      item = PySequence_GetItem(obj, i);
-      if (PyObject_CheckFloatOrToFloat(&item)) {
-        value.emplace_back(PyFloat_AsDouble(item));
-      } else {
-        PADDLE_THROW(platform::errors::InvalidArgument(
-            "%s(): argument (position %d) must be "
-            "list of float, but got %s at pos %d",
-            op_type, arg_pos + 1,
-            ((PyTypeObject*)item->ob_type)->tp_name,  // NOLINT
-            i));
-      }
-    }
-    attrs[key] = value;
-  } else {
-    PADDLE_THROW(platform::errors::InvalidArgument(
-        "%s(): argument (position %d) must be "
-        "list or tuple, but got %s",
-        op_type, arg_pos + 1,
-        ((PyTypeObject*)obj->ob_type)->tp_name));  // NOLINT
-  }
-}
-
-static inline void CastPyArg2AttrFloat64s(
-    PyObject* obj, paddle::framework::AttributeMap& attrs,  // NOLINT
-    const std::string& key, const std::string& op_type, ssize_t arg_pos) {
-  if (PyList_Check(obj)) {
-    Py_ssize_t len = PyList_Size(obj);
-    PyObject* item = nullptr;
-    std::vector<double> value;
-    for (Py_ssize_t i = 0; i < len; i++) {
-      item = PyList_GetItem(obj, i);
-      if (PyObject_CheckFloatOrToFloat(&item)) {
-        value.emplace_back(PyFloat_AsDouble(item));
-      } else {
-        PADDLE_THROW(platform::errors::InvalidArgument(
-            "%s(): argument (position %d) must be "
-            "list of float, but got %s at pos %d",
-            op_type, arg_pos + 1,
-            ((PyTypeObject*)item->ob_type)->tp_name,  // NOLINT
-            i));
-      }
-    }
-    attrs[key] = value;
-  } else if (PyTuple_Check(obj)) {
-    Py_ssize_t len = PyTuple_Size(obj);
-    PyObject* item = nullptr;
-    std::vector<double> value;
-    for (Py_ssize_t i = 0; i < len; i++) {
-      item = PyTuple_GetItem(obj, i);
-      if (PyObject_CheckFloatOrToFloat(&item)) {
-        value.emplace_back(PyFloat_AsDouble(item));
-      } else {
-        PADDLE_THROW(platform::errors::InvalidArgument(
-            "%s(): argument (position %d) must be "
-            "list of float, but got %s at pos %d",
-            op_type, arg_pos + 1,
-            ((PyTypeObject*)item->ob_type)->tp_name,  // NOLINT
-            i));
-      }
-    }
-    attrs[key] = value;
-  } else if (PySequence_Check(obj)) {
-    Py_ssize_t len = PySequence_Size(obj);
-    PyObject* item = nullptr;
-    std::vector<double> value;
-    for (Py_ssize_t i = 0; i < len; i++) {
-      item = PySequence_GetItem(obj, i);
-      if (PyObject_CheckFloatOrToFloat(&item)) {
-        value.emplace_back(PyFloat_AsDouble(item));
-      } else {
-        PADDLE_THROW(platform::errors::InvalidArgument(
-            "%s(): argument (position %d) must be "
-            "list of float, but got %s at pos %d",
-            op_type, arg_pos + 1,
-            ((PyTypeObject*)item->ob_type)->tp_name,  // NOLINT
-            i));
-      }
-    }
-    attrs[key] = value;
-  } else {
-    PADDLE_THROW(platform::errors::InvalidArgument(
-        "%s(): argument (position %d) must be "
-        "list or tuple, but got %s",
-        op_type, arg_pos + 1,
-        ((PyTypeObject*)obj->ob_type)->tp_name));  // NOLINT
-  }
-}
-
-static inline void CastPyArg2AttrStrings(
-    PyObject* obj,
-    paddle::framework::AttributeMap& attrs,  // NOLINT
-    const std::string& key, const std::string& op_type, ssize_t arg_pos) {
-  if (PyList_Check(obj)) {
-    Py_ssize_t len = PyList_Size(obj);
-    PyObject* item = nullptr;
-    std::vector<std::string> value;
-    for (Py_ssize_t i = 0; i < len; i++) {
-      item = PyList_GetItem(obj, i);
-      if (PyObject_CheckString(item)) {
-        Py_ssize_t size;
-        const char* data;
-        data = PyUnicode_AsUTF8AndSize(item, &size);
-        value.emplace_back(std::string(data, (size_t)size));  // NOLINT
-      } else {
-        PADDLE_THROW(platform::errors::InvalidArgument(
-            "%s(): argument (position %d) must be "
-            "list of str, but got %s at pos %d",
-            op_type, arg_pos + 1,
-            ((PyTypeObject*)item->ob_type)->tp_name,  // NOLINT
-            i));
-      }
-    }
-    attrs[key] = value;
-  } else if (PyTuple_Check(obj)) {
-    Py_ssize_t len = PyTuple_Size(obj);
-    PyObject* item = nullptr;
-    std::vector<std::string> value;
-    for (Py_ssize_t i = 0; i < len; i++) {
-      item = PyTuple_GetItem(obj, i);
-      if (PyObject_CheckString(item)) {
-        Py_ssize_t size;
-        const char* data;
-        data = PyUnicode_AsUTF8AndSize(item, &size);
-        value.emplace_back(std::string(data, static_cast<size_t>(size)));
-      } else {
-        PADDLE_THROW(platform::errors::InvalidArgument(
-            "%s(): argument (position %d) must be "
-            "list of str, but got %s at pos %d",
-            op_type, arg_pos + 1,
-            ((PyTypeObject*)item->ob_type)->tp_name,  // NOLINT
-            i));
-      }
-    }
-    attrs[key] = value;
-  } else {
-    PADDLE_THROW(platform::errors::InvalidArgument(
-        "%s(): argument (position %d) must be "
-        "list or tuple, but got %s",
-        op_type, arg_pos + 1,
-        ((PyTypeObject*)obj->ob_type)->tp_name));  // NOLINT
-  }
-}
-
-static inline void CastPyArg2AttrBlock(
-    PyObject* obj, paddle::framework::AttributeMap& attrs,  // NOLINT
-    const std::string& key, const std::string& op_type, ssize_t arg_pos) {
-  ::pybind11::detail::instance* inst =
-      (::pybind11::detail::instance*)obj;  // NOLINT
-
-  if (!PyObject_IsInstance((PyObject*)inst,                   // NOLINT
-                           (PyObject*)g_blockdesc_pytype)) {  // NOLINT
-    PADDLE_THROW(platform::errors::InvalidArgument(
-        "%s(): argument (position %d) must be "
-        "BlockDesc, but got %s",
-        op_type, arg_pos + 1,
-        ((PyTypeObject*)obj->ob_type)->tp_name));  // NOLINT
-  }
-  void** vh = inst->simple_layout ? inst->simple_value_holder
-                                  : &inst->nonsimple.values_and_holders[0];
-  attrs[key] = reinterpret_cast<paddle::framework::BlockDesc*&>(vh[0]);
-}
-
-static inline void ConstructAttrMapFromPyArgs(
-    const std::string& op_type, PyObject* args, ssize_t attr_start,
-    ssize_t attr_end, paddle::framework::AttributeMap& attrs) {  // NOLINT
-  PADDLE_ENFORCE_EQ(
-      (attr_end - attr_start) % 2, 0,
-      platform::errors::InvalidArgument(
-          "The number of arguments for attributes should be even."));
-
-  auto attr_type_map = &(OpAttrTypeMap::Instance().Map()[op_type]);
-
-  PyObject* obj = nullptr;
-  for (ssize_t arg_pos = attr_start; arg_pos < attr_end; arg_pos += 2) {
-    Py_ssize_t key_len;
-    const char* key_ptr;
-    obj = PyTuple_GET_ITEM(args, arg_pos);
-    if (PyObject_CheckString(obj)) {
-      key_ptr = PyUnicode_AsUTF8AndSize(obj, &key_len);
-    } else {
-      PADDLE_THROW(platform::errors::InvalidArgument(
-          "%s(): argument (position %d) must be str, but got "
-          "%s",
-          op_type, arg_pos, ((PyTypeObject*)obj->ob_type)->tp_name));  // NOLINT
-    }
-
-    std::string key(key_ptr, static_cast<size_t>(key_len));
-    auto iter = attr_type_map->find(key);
-    if (iter == attr_type_map->end()) {
-      continue;
-    }
-
-    obj = PyTuple_GET_ITEM(args, arg_pos + 1);
-
-    switch (iter->second) {
-      case paddle::framework::proto::AttrType::INT:
-        CastPyArg2AttrInt(obj, attrs, key, op_type, arg_pos);
-        break;
-      case paddle::framework::proto::AttrType::FLOAT:
-        CastPyArg2AttrFloat(obj, attrs, key, op_type, arg_pos);
-        break;
-      case paddle::framework::proto::AttrType::STRING:
-        CastPyArg2AttrString(obj, attrs, key, op_type, arg_pos);
-        break;
-      case paddle::framework::proto::AttrType::INTS:
-        CastPyArg2AttrInts(obj, attrs, key, op_type, arg_pos);
-        break;
-      case paddle::framework::proto::AttrType::FLOATS:
-        CastPyArg2AttrFloats(obj, attrs, key, op_type, arg_pos);
-        break;
-      case paddle::framework::proto::AttrType::STRINGS:
-        CastPyArg2AttrStrings(obj, attrs, key, op_type, arg_pos);
-        break;
-      case paddle::framework::proto::AttrType::BOOLEAN:
-        CastPyArg2AttrBoolean(obj, attrs, key, op_type, arg_pos);
-        break;
-      case paddle::framework::proto::AttrType::BOOLEANS:
-        CastPyArg2AttrBooleans(obj, attrs, key, op_type, arg_pos);
-        break;
-      case paddle::framework::proto::AttrType::LONG:
-        CastPyArg2AttrLong(obj, attrs, key, op_type, arg_pos);
-        break;
-      case paddle::framework::proto::AttrType::LONGS:
-        CastPyArg2AttrLongs(obj, attrs, key, op_type, arg_pos);
-        break;
-      case paddle::framework::proto::AttrType::FLOAT64S:
-        CastPyArg2AttrFloat64s(obj, attrs, key, op_type, arg_pos);
-        break;
-      case paddle::framework::proto::AttrType::BLOCK:
-        CastPyArg2AttrBlock(obj, attrs, key, op_type, arg_pos);
-        break;
-      default:
-        break;
-    }
-  }
-}
-
-static inline std::shared_ptr<imperative::VarBase> GetVarBaseFromArgs(
-    const std::string& op_type, const std::string& arg_name, PyObject* args,
-    ssize_t arg_idx, bool dispensable = false) {
-  ::pybind11::detail::instance* inst =
-      (::pybind11::detail::instance*)PyTuple_GET_ITEM(args, arg_idx);
-
-  if (PyTuple_Check((PyObject*)inst)) {  // NOLINT
-    inst = (::pybind11::detail::instance*)PyTuple_GET_ITEM(inst, 0);
-  }
-
-  if (inst == nullptr || (PyObject*)inst == Py_None) {  // NOLINT
-    if (!dispensable) {
-      PADDLE_THROW(platform::errors::InvalidArgument(
-          "%s(): argument '%s' (position %d) must be Tensor, but got None",
-          op_type, arg_name, arg_idx));
-    }
-    return nullptr;
-  }
-
-  if (!PyObject_IsInstance((PyObject*)inst,                 // NOLINT
-                           (PyObject*)g_varbase_pytype)) {  // NOLINT
-    PADDLE_THROW(platform::errors::InvalidArgument(
-        "%s(): argument '%s' (position %d) must be Tensor, but got "
-        "%s",
-        op_type, arg_name, arg_idx,
-        ((PyTypeObject*)((PyObject*)inst)->ob_type)->tp_name));  // NOLINT
-  }
-
-  void** vh = inst->simple_layout ? inst->simple_value_holder
-                                  : &inst->nonsimple.values_and_holders[0];
-  return reinterpret_cast<std::shared_ptr<paddle::imperative::VarBase>&>(vh[1]);
-}
-
-static inline std::vector<std::shared_ptr<imperative::VarBase>>
-GetVarBaseListFromArgs(const std::string& op_type, const std::string& arg_name,
-                       PyObject* args, ssize_t arg_idx,
-                       bool dispensable = false) {
-  PyObject* list = PyTuple_GET_ITEM(args, arg_idx);
-
-  if (list == nullptr || list == Py_None) {
-    if (!dispensable) {
-      PADDLE_THROW(platform::errors::InvalidArgument(
-          "%s(): argument '%s' (position %d) must be list of Tensor, but got "
-          "None",
-          op_type, arg_name, arg_idx));  // NOLINT
-    }
-    return {};
-  }
-
-  std::vector<std::shared_ptr<imperative::VarBase>> result;
-
-  if (PyList_Check(list)) {
-    Py_ssize_t len = PyList_Size(list);
-    if (len == 0) {
-      PADDLE_THROW(platform::errors::InvalidArgument(
-          "%s(): argument '%s' (position %d) must be list of Tensors, but got "
-          "empty list",
-          op_type, arg_name, arg_idx));
-    }
-    ::pybind11::detail::instance* item = nullptr;
-    for (Py_ssize_t i = 0; i < len; i++) {
-      item = (::pybind11::detail::instance*)PyList_GetItem(list, i);
-      if (!PyObject_IsInstance((PyObject*)item,                 // NOLINT
-                               (PyObject*)g_varbase_pytype)) {  // NOLINT
-        PADDLE_THROW(platform::errors::InvalidArgument(
-            "%s(): argument '%s' (position %d) must be list of Tensors, but "
-            "got list of "
-            "%s",
-            op_type, arg_name, arg_idx,
-            ((PyTypeObject*)((PyObject*)item)->ob_type)->tp_name));  // NOLINT
-      }
-      void** vh = item->simple_layout ? item->simple_value_holder
-                                      : &item->nonsimple.values_and_holders[0];
-      result.emplace_back(
-          reinterpret_cast<std::shared_ptr<paddle::imperative::VarBase>&>(
-              vh[1]));
-    }
-  } else if (PyTuple_Check(list)) {
-    Py_ssize_t len = PyTuple_Size(list);
-    if (len == 0) {
-      PADDLE_THROW(platform::errors::InvalidArgument(
-          "%s(): argument '%s' (position %d) must be list of Tensors, but got "
-          "empty list",
-          op_type, arg_name, arg_idx));
-    }
-    ::pybind11::detail::instance* item = nullptr;
-    for (Py_ssize_t i = 0; i < len; i++) {
-      item = (::pybind11::detail::instance*)PyTuple_GetItem(list, i);  // NOLINT
-      if (!PyObject_IsInstance((PyObject*)item,                        // NOLINT
-                               (PyObject*)g_varbase_pytype)) {         // NOLINT
-        PADDLE_THROW(platform::errors::InvalidArgument(
-            "%s(): argument '%s' (position %d) must be list of Tensors, but "
-            "got list of "
-            "%s",
-            op_type, arg_name, arg_idx,
-            ((PyTypeObject*)((PyObject*)item)->ob_type)->tp_name));  // NOLINT
-      }
-      void** vh = item->simple_layout ? item->simple_value_holder
-                                      : &item->nonsimple.values_and_holders[0];
-      result.emplace_back(
-          reinterpret_cast<std::shared_ptr<paddle::imperative::VarBase>&>(
-              vh[1]));
-    }
-  } else {
-    PADDLE_THROW(platform::errors::InvalidArgument(
-        "%s(): argument '%s' (position %d) must be list of Tensors, but got "
-        "%s",
-        op_type, arg_name, arg_idx,
-        ((PyTypeObject*)list->ob_type)->tp_name));  // NOLINT
-  }
-
-  return result;
-}
-
-static inline unsigned long GetUnsignedLongFromArgs(  // NOLINT
-    const std::string& op_type, const std::string& arg_name, PyObject* args,
-    ssize_t arg_idx, bool dispensable = false) {
-  PyObject* item = PyTuple_GET_ITEM(args, arg_idx);
-
-  if (item == nullptr) {
-    if (!dispensable) {
-      PADDLE_THROW(platform::errors::InvalidArgument(
-          "%s(): argument '%s' (position %d) must be long, but got None",
-          op_type, arg_name, arg_idx));
-    }
-    return 0;
-  }
-
-  if (PyObject_CheckLongOrToLong(&item)) {
-    return PyLong_AsUnsignedLong(item);
-  } else {
-    PADDLE_THROW(platform::errors::InvalidArgument(
-        "%s(): argument '%s' (position %d) must be "
-        "long, but got %s",
-        op_type, arg_name, arg_idx,
-        ((PyTypeObject*)item->ob_type)->tp_name));  // NOLINT
-  }
-}
-
-static inline PyObject* MakeReturnPyObject(
+PyObject* MakeReturnPyObject(
     const std::shared_ptr<paddle::imperative::VarBase>& out) {
   return ::pybind11::detail::type_caster_base<imperative::VarBase>::cast_holder(
              ::pybind11::detail::holder_helper<
@@ -937,7 +186,7 @@ static inline PyObject* MakeReturnPyObject(
       .ptr();
 }
 
-static inline PyObject* MakeReturnPyObject(
+PyObject* MakeReturnPyObject(
     const std::vector<std::shared_ptr<imperative::VarBase>>& out) {
   PyObject* result = PyList_New((Py_ssize_t)out.size());
 
@@ -970,7 +219,7 @@ struct TupleVarBasesResult<Tuple, 1> {
 };
 
 template <typename... Args>
-static inline PyObject* MakeReturnPyObject(const std::tuple<Args...>& out) {
+PyObject* MakeReturnPyObject(const std::tuple<Args...>& out) {
   auto len = sizeof...(Args);
   PyObject* result = PyTuple_New(len);
 
@@ -979,20 +228,6 @@ static inline PyObject* MakeReturnPyObject(const std::tuple<Args...>& out) {
   return result;
 }
 
-void InitOpsAttrTypeMap() {
-  auto op_info_map = paddle::framework::OpInfoMap::Instance().map();
-  for (auto iter = op_info_map.begin(); iter != op_info_map.end(); ++iter) {
-    auto op_proto = iter->second.proto_;
-    if (op_proto == nullptr) {
-      continue;
-    }
-    auto attrs_proto = op_proto->attrs();
-    for (auto& attr : attrs_proto) {
-      OpAttrTypeMap::Instance().Map()[iter->first][attr.name()] = attr.type();
-    }
-  }
-}
-
 }  // namespace pybind
 }  // namespace paddle
 
diff --git a/paddle/fluid/pybind/op_function_common.cc b/paddle/fluid/pybind/op_function_common.cc
new file mode 100644
index 0000000000000..1f761ae29c2af
--- /dev/null
+++ b/paddle/fluid/pybind/op_function_common.cc
@@ -0,0 +1,806 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <pybind11/chrono.h>
+#include <pybind11/complex.h>
+#include <pybind11/functional.h>
+#include <pybind11/stl.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "paddle/fluid/framework/attribute.h"
+#include "paddle/fluid/framework/op_info.h"
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/framework/variable.h"
+#include "paddle/fluid/imperative/tracer.h"
+#include "paddle/fluid/imperative/type_defs.h"
+#include "paddle/fluid/pybind/imperative.h"
+#include "paddle/fluid/pybind/op_function_common.h"
+
+namespace py = pybind11;
+namespace paddle {
+namespace pybind {
+
+class OpAttrTypeMap {
+ public:
+  static OpAttrTypeMap& Instance() {
+    static OpAttrTypeMap g_op_attr_type_map;
+    return g_op_attr_type_map;
+  }
+
+  std::unordered_map<
+      std::string,
+      std::unordered_map<std::string, paddle::framework::proto::AttrType>>&
+  Map() {
+    return ops_attrtype_map_;
+  }
+
+ private:
+  OpAttrTypeMap() = default;
+  std::unordered_map<
+      std::string,
+      std::unordered_map<std::string, paddle::framework::proto::AttrType>>
+      ops_attrtype_map_;
+};
+
+extern PyTypeObject* g_varbase_pytype;
+extern PyTypeObject* g_vartype_pytype;
+extern PyTypeObject* g_blockdesc_pytype;
+
+bool PyObject_CheckBool(PyObject** obj) { return PyBool_Check(*obj); }
+
+bool PyObject_CheckLongOrToLong(PyObject** obj) {
+  if ((PyLong_Check(*obj) && !PyBool_Check(*obj)) ||
+      PyObject_IsInstance(*obj, (PyObject*)g_vartype_pytype) ||  // NOLINT
+      PyObject_IsInstance(*obj, (PyObject*)g_varbase_pytype)) {  // NOLINT
+    return true;
+  }
+
+  if (std::string(((PyTypeObject*)(*obj)->ob_type)->tp_name)  // NOLINT
+          .find("numpy") != std::string::npos) {
+    auto to = PyNumber_Long(*obj);
+    if (to) {
+      *obj = to;
+      return true;
+    }
+  }
+
+  return false;
+}
+
+bool PyObject_CheckFloatOrToFloat(PyObject** obj) {
+  // sometimes users provide PyLong or numpy.int64 but attr is float
+  if (PyFloat_Check(*obj) || PyLong_Check(*obj) ||
+      PyObject_IsInstance(*obj, (PyObject*)g_varbase_pytype)) {  // NOLINT
+    return true;
+  }
+  if (std::string(((PyTypeObject*)(*obj)->ob_type)->tp_name)  // NOLINT
+          .find("numpy") != std::string::npos) {
+    auto to = PyNumber_Float(*obj);
+    if (to) {
+      *obj = to;
+      return true;
+    }
+  }
+  return false;
+}
+
+bool PyObject_CheckString(PyObject* obj) { return PyUnicode_Check(obj); }
+
+void CastPyArg2AttrBoolean(PyObject* obj,
+                           paddle::framework::AttributeMap& attrs,  // NOLINT
+                           const std::string& key, const std::string& op_type,
+                           ssize_t arg_pos) {
+  if (obj == Py_None) {
+    attrs[key] = false;  // To be compatible with QA integration testing. Some
+                         // test case pass in None.
+  } else if (obj == Py_True) {
+    attrs[key] = true;
+  } else if (obj == Py_False) {
+    attrs[key] = false;
+  } else {
+    PADDLE_THROW(platform::errors::InvalidArgument(
+        "%s(): argument (position %d) must be "
+        "bool, but got %s",
+        op_type, arg_pos + 1,
+        ((PyTypeObject*)obj->ob_type)->tp_name));  // NOLINT
+  }
+}
+
+void CastPyArg2AttrInt(PyObject* obj,
+                       paddle::framework::AttributeMap& attrs,  // NOLINT
+                       const std::string& key, const std::string& op_type,
+                       ssize_t arg_pos) {
+  if (PyObject_CheckLongOrToLong(&obj)) {
+    attrs[key] = (int)PyLong_AsLong(obj);  // NOLINT
+  } else {
+    PADDLE_THROW(platform::errors::InvalidArgument(
+        "%s(): argument (position %d) must be "
+        "int, but got %s",
+        op_type, arg_pos + 1,
+        ((PyTypeObject*)obj->ob_type)->tp_name));  // NOLINT
+  }
+}
+
+void CastPyArg2AttrLong(PyObject* obj,
+                        paddle::framework::AttributeMap& attrs,  // NOLINT
+                        const std::string& key, const std::string& op_type,
+                        ssize_t arg_pos) {
+  if (PyObject_CheckLongOrToLong(&obj)) {
+    attrs[key] = (int64_t)PyLong_AsLong(obj);  // NOLINT
+  } else {
+    PADDLE_THROW(platform::errors::InvalidArgument(
+        "%s(): argument (position %d) must be "
+        "long, but got %s",
+        op_type, arg_pos + 1,
+        ((PyTypeObject*)obj->ob_type)->tp_name));  // NOLINT
+  }
+}
+
+void CastPyArg2AttrFloat(PyObject* obj,
+                         paddle::framework::AttributeMap& attrs,  // NOLINT
+                         const std::string& key, const std::string& op_type,
+                         ssize_t arg_pos) {
+  if (PyObject_CheckFloatOrToFloat(&obj)) {
+    attrs[key] = (float)PyFloat_AsDouble(obj);  // NOLINT
+  } else {
+    PADDLE_THROW(platform::errors::InvalidArgument(
+        "%s(): argument (position %d) must be "
+        "float, but got %s",
+        op_type, arg_pos + 1,
+        ((PyTypeObject*)obj->ob_type)->tp_name));  // NOLINT
+  }
+}
+
+void CastPyArg2AttrString(PyObject* obj,
+                          paddle::framework::AttributeMap& attrs,  // NOLINT
+                          const std::string& key, const std::string& op_type,
+                          ssize_t arg_pos) {
+  if (PyObject_CheckString(obj)) {
+    Py_ssize_t size;
+    const char* data;
+    data = PyUnicode_AsUTF8AndSize(obj, &size);
+    attrs[key] = std::string(data, (size_t)size);  // NOLINT
+  } else {
+    PADDLE_THROW(platform::errors::InvalidArgument(
+        "%s(): argument (position %d) must be "
+        "str, but got %s",
+        op_type, arg_pos + 1,
+        ((PyTypeObject*)obj->ob_type)->tp_name));  // NOLINT
+  }
+}
+
+void CastPyArg2AttrBooleans(PyObject* obj,
+                            paddle::framework::AttributeMap& attrs,  // NOLINT
+                            const std::string& key, const std::string& op_type,
+                            ssize_t arg_pos) {
+  if (PyList_Check(obj)) {
+    Py_ssize_t len = PyList_Size(obj);
+    PyObject* item = nullptr;
+    std::vector<bool> value;
+    for (Py_ssize_t i = 0; i < len; i++) {
+      item = PyList_GetItem(obj, i);
+      if (PyObject_CheckBool(&item)) {
+        value.emplace_back(PyLong_AsLong(item));
+      } else {
+        PADDLE_THROW(platform::errors::InvalidArgument(
+            "%s(): argument (position %d) must be "
+            "list of bool, but got %s at pos %d",
+            op_type, arg_pos + 1,
+            ((PyTypeObject*)item->ob_type)->tp_name,  // NOLINT
+            i));
+      }
+    }
+    attrs[key] = value;
+  } else if (PyTuple_Check(obj)) {
+    Py_ssize_t len = PyTuple_Size(obj);
+    PyObject* item = nullptr;
+    std::vector<bool> value;
+    for (Py_ssize_t i = 0; i < len; i++) {
+      item = PyTuple_GetItem(obj, i);
+      if (PyObject_CheckBool(&item)) {
+        value.emplace_back(PyLong_AsLong(item));
+      } else {
+        PADDLE_THROW(platform::errors::InvalidArgument(
+            "%s(): argument (position %d) must be "
+            "list of bool, but got %s at pos %d",
+            op_type, arg_pos + 1,
+            ((PyTypeObject*)item->ob_type)->tp_name,  // NOLINT
+            i));
+      }
+    }
+    attrs[key] = value;
+  } else {
+    PADDLE_THROW(platform::errors::InvalidArgument(
+        "%s(): argument (position %d) must be "
+        "list or tuple, but got %s",
+        op_type, arg_pos + 1,
+        ((PyTypeObject*)obj->ob_type)->tp_name));  // NOLINT
+  }
+}
+
+void CastPyArg2AttrInts(PyObject* obj,
+                        paddle::framework::AttributeMap& attrs,  // NOLINT
+                        const std::string& key, const std::string& op_type,
+                        ssize_t arg_pos) {
+  if (PyList_Check(obj)) {
+    Py_ssize_t len = PyList_Size(obj);
+    PyObject* item = nullptr;
+    std::vector<int> value;
+    for (Py_ssize_t i = 0; i < len; i++) {
+      item = PyList_GetItem(obj, i);
+      if (PyObject_CheckLongOrToLong(&item)) {
+        value.emplace_back(PyLong_AsLong(item));
+      } else {
+        PADDLE_THROW(platform::errors::InvalidArgument(
+            "%s(): argument (position %d) must be "
+            "list of int, but got %s at pos %d",
+            op_type, arg_pos + 1,
+            ((PyTypeObject*)item->ob_type)->tp_name,  // NOLINT
+            i));
+      }
+    }
+    attrs[key] = value;
+  } else if (PyTuple_Check(obj)) {
+    Py_ssize_t len = PyTuple_Size(obj);
+    PyObject* item = nullptr;
+    std::vector<int> value;
+    for (Py_ssize_t i = 0; i < len; i++) {
+      item = PyTuple_GetItem(obj, i);
+      if (PyObject_CheckLongOrToLong(&item)) {
+        value.emplace_back(PyLong_AsLong(item));
+      } else {
+        PADDLE_THROW(platform::errors::InvalidArgument(
+            "%s(): argument (position %d) must be "
+            "list of int, but got %s at pos %d",
+            op_type, arg_pos + 1,
+            ((PyTypeObject*)item->ob_type)->tp_name,  // NOLINT
+            i));
+      }
+    }
+    attrs[key] = value;
+  } else if (PySequence_Check(obj)) {
+    Py_ssize_t len = PySequence_Size(obj);
+    PyObject* item = nullptr;
+    std::vector<int> value;
+    for (Py_ssize_t i = 0; i < len; i++) {
+      item = PySequence_GetItem(obj, i);
+      if (PyObject_CheckLongOrToLong(&item)) {
+        value.emplace_back(PyLong_AsLong(item));
+      } else {
+        PADDLE_THROW(platform::errors::InvalidArgument(
+            "%s(): argument (position %d) must be "
+            "list of int, but got %s at pos %d",
+            op_type, arg_pos + 1,
+            ((PyTypeObject*)item->ob_type)->tp_name,  // NOLINT
+            i));
+      }
+    }
+    attrs[key] = value;
+  } else {
+    PADDLE_THROW(platform::errors::InvalidArgument(
+        "%s(): argument (position %d) must be "
+        "list or tuple, but got %s",
+        op_type, arg_pos + 1,
+        ((PyTypeObject*)obj->ob_type)->tp_name));  // NOLINT
+  }
+}
+
+void CastPyArg2AttrLongs(PyObject* obj,
+                         paddle::framework::AttributeMap& attrs,  // NOLINT
+                         const std::string& key, const std::string& op_type,
+                         ssize_t arg_pos) {
+  if (PyList_Check(obj)) {
+    Py_ssize_t len = PyList_Size(obj);
+    PyObject* item = nullptr;
+    std::vector<int64_t> value;
+    for (Py_ssize_t i = 0; i < len; i++) {
+      item = PyList_GetItem(obj, i);
+      if (PyObject_CheckLongOrToLong(&item)) {
+        value.emplace_back(PyLong_AsLong(item));
+      } else {
+        PADDLE_THROW(platform::errors::InvalidArgument(
+            "%s(): argument (position %d) must be "
+            "list of int, but got %s at pos %d",
+            op_type, arg_pos + 1,
+            ((PyTypeObject*)item->ob_type)->tp_name,  // NOLINT
+            i));
+      }
+    }
+    attrs[key] = value;
+  } else if (PyTuple_Check(obj)) {
+    Py_ssize_t len = PyTuple_Size(obj);
+    PyObject* item = nullptr;
+    std::vector<int64_t> value;
+    for (Py_ssize_t i = 0; i < len; i++) {
+      item = PyTuple_GetItem(obj, i);
+      if (PyObject_CheckLongOrToLong(&item)) {
+        value.emplace_back(PyLong_AsLong(item));
+      } else {
+        PADDLE_THROW(platform::errors::InvalidArgument(
+            "%s(): argument (position %d) must be "
+            "list of int, but got %s at pos %d",
+            op_type, arg_pos + 1,
+            ((PyTypeObject*)item->ob_type)->tp_name,  // NOLINT
+            i));
+      }
+    }
+    attrs[key] = value;
+  } else if (PySequence_Check(obj)) {
+    Py_ssize_t len = PySequence_Size(obj);
+    PyObject* item = nullptr;
+    std::vector<int64_t> value;
+    for (Py_ssize_t i = 0; i < len; i++) {
+      item = PySequence_GetItem(obj, i);
+      if (PyObject_CheckLongOrToLong(&item)) {
+        value.emplace_back(PyLong_AsLong(item));
+      } else {
+        PADDLE_THROW(platform::errors::InvalidArgument(
+            "%s(): argument (position %d) must be "
+            "list of int, but got %s at pos %d",
+            op_type, arg_pos + 1,
+            ((PyTypeObject*)item->ob_type)->tp_name,  // NOLINT
+            i));
+      }
+    }
+    attrs[key] = value;
+  } else {
+    PADDLE_THROW(platform::errors::InvalidArgument(
+        "%s(): argument (position %d) must be "
+        "list or tuple, but got %s",
+        op_type, arg_pos + 1,
+        ((PyTypeObject*)obj->ob_type)->tp_name));  // NOLINT
+  }
+}
+
+void CastPyArg2AttrFloats(PyObject* obj,
+                          paddle::framework::AttributeMap& attrs,  // NOLINT
+                          const std::string& key, const std::string& op_type,
+                          ssize_t arg_pos) {
+  if (PyList_Check(obj)) {
+    Py_ssize_t len = PyList_Size(obj);
+    PyObject* item = nullptr;
+    std::vector<float> value;
+    for (Py_ssize_t i = 0; i < len; i++) {
+      item = PyList_GetItem(obj, i);
+      if (PyObject_CheckFloatOrToFloat(&item)) {
+        value.emplace_back(PyFloat_AsDouble(item));
+      } else {
+        PADDLE_THROW(platform::errors::InvalidArgument(
+            "%s(): argument (position %d) must be "
+            "list of float, but got %s at pos %d",
+            op_type, arg_pos + 1,
+            ((PyTypeObject*)item->ob_type)->tp_name,  // NOLINT
+            i));
+      }
+    }
+    attrs[key] = value;
+  } else if (PyTuple_Check(obj)) {
+    Py_ssize_t len = PyTuple_Size(obj);
+    PyObject* item = nullptr;
+    std::vector<float> value;
+    for (Py_ssize_t i = 0; i < len; i++) {
+      item = PyTuple_GetItem(obj, i);
+      if (PyObject_CheckFloatOrToFloat(&item)) {
+        value.emplace_back(PyFloat_AsDouble(item));
+      } else {
+        PADDLE_THROW(platform::errors::InvalidArgument(
+            "%s(): argument (position %d) must be "
+            "list of float, but got %s at pos %d",
+            op_type, arg_pos + 1,
+            ((PyTypeObject*)item->ob_type)->tp_name,  // NOLINT
+            i));
+      }
+    }
+    attrs[key] = value;
+  } else if (PySequence_Check(obj)) {
+    Py_ssize_t len = PySequence_Size(obj);
+    PyObject* item = nullptr;
+    std::vector<float> value;
+    for (Py_ssize_t i = 0; i < len; i++) {
+      item = PySequence_GetItem(obj, i);
+      if (PyObject_CheckFloatOrToFloat(&item)) {
+        value.emplace_back(PyFloat_AsDouble(item));
+      } else {
+        PADDLE_THROW(platform::errors::InvalidArgument(
+            "%s(): argument (position %d) must be "
+            "list of float, but got %s at pos %d",
+            op_type, arg_pos + 1,
+            ((PyTypeObject*)item->ob_type)->tp_name,  // NOLINT
+            i));
+      }
+    }
+    attrs[key] = value;
+  } else {
+    PADDLE_THROW(platform::errors::InvalidArgument(
+        "%s(): argument (position %d) must be "
+        "list or tuple, but got %s",
+        op_type, arg_pos + 1,
+        ((PyTypeObject*)obj->ob_type)->tp_name));  // NOLINT
+  }
+}
+
+void CastPyArg2AttrFloat64s(PyObject* obj,
+                            paddle::framework::AttributeMap& attrs,  // NOLINT
+                            const std::string& key, const std::string& op_type,
+                            ssize_t arg_pos) {
+  if (PyList_Check(obj)) {
+    Py_ssize_t len = PyList_Size(obj);
+    PyObject* item = nullptr;
+    std::vector<double> value;
+    for (Py_ssize_t i = 0; i < len; i++) {
+      item = PyList_GetItem(obj, i);
+      if (PyObject_CheckFloatOrToFloat(&item)) {
+        value.emplace_back(PyFloat_AsDouble(item));
+      } else {
+        PADDLE_THROW(platform::errors::InvalidArgument(
+            "%s(): argument (position %d) must be "
+            "list of float, but got %s at pos %d",
+            op_type, arg_pos + 1,
+            ((PyTypeObject*)item->ob_type)->tp_name,  // NOLINT
+            i));
+      }
+    }
+    attrs[key] = value;
+  } else if (PyTuple_Check(obj)) {
+    Py_ssize_t len = PyTuple_Size(obj);
+    PyObject* item = nullptr;
+    std::vector<double> value;
+    for (Py_ssize_t i = 0; i < len; i++) {
+      item = PyTuple_GetItem(obj, i);
+      if (PyObject_CheckFloatOrToFloat(&item)) {
+        value.emplace_back(PyFloat_AsDouble(item));
+      } else {
+        PADDLE_THROW(platform::errors::InvalidArgument(
+            "%s(): argument (position %d) must be "
+            "list of float, but got %s at pos %d",
+            op_type, arg_pos + 1,
+            ((PyTypeObject*)item->ob_type)->tp_name,  // NOLINT
+            i));
+      }
+    }
+    attrs[key] = value;
+  } else if (PySequence_Check(obj)) {
+    Py_ssize_t len = PySequence_Size(obj);
+    PyObject* item = nullptr;
+    std::vector<double> value;
+    for (Py_ssize_t i = 0; i < len; i++) {
+      item = PySequence_GetItem(obj, i);
+      if (PyObject_CheckFloatOrToFloat(&item)) {
+        value.emplace_back(PyFloat_AsDouble(item));
+      } else {
+        PADDLE_THROW(platform::errors::InvalidArgument(
+            "%s(): argument (position %d) must be "
+            "list of float, but got %s at pos %d",
+            op_type, arg_pos + 1,
+            ((PyTypeObject*)item->ob_type)->tp_name,  // NOLINT
+            i));
+      }
+    }
+    attrs[key] = value;
+  } else {
+    PADDLE_THROW(platform::errors::InvalidArgument(
+        "%s(): argument (position %d) must be "
+        "list or tuple, but got %s",
+        op_type, arg_pos + 1,
+        ((PyTypeObject*)obj->ob_type)->tp_name));  // NOLINT
+  }
+}
+
+void CastPyArg2AttrStrings(PyObject* obj,
+                           paddle::framework::AttributeMap& attrs,  // NOLINT
+                           const std::string& key, const std::string& op_type,
+                           ssize_t arg_pos) {
+  if (PyList_Check(obj)) {
+    Py_ssize_t len = PyList_Size(obj);
+    PyObject* item = nullptr;
+    std::vector<std::string> value;
+    for (Py_ssize_t i = 0; i < len; i++) {
+      item = PyList_GetItem(obj, i);
+      if (PyObject_CheckString(item)) {
+        Py_ssize_t size;
+        const char* data;
+        data = PyUnicode_AsUTF8AndSize(item, &size);
+        value.emplace_back(std::string(data, (size_t)size));  // NOLINT
+      } else {
+        PADDLE_THROW(platform::errors::InvalidArgument(
+            "%s(): argument (position %d) must be "
+            "list of str, but got %s at pos %d",
+            op_type, arg_pos + 1,
+            ((PyTypeObject*)item->ob_type)->tp_name,  // NOLINT
+            i));
+      }
+    }
+    attrs[key] = value;
+  } else if (PyTuple_Check(obj)) {
+    Py_ssize_t len = PyTuple_Size(obj);
+    PyObject* item = nullptr;
+    std::vector<std::string> value;
+    for (Py_ssize_t i = 0; i < len; i++) {
+      item = PyTuple_GetItem(obj, i);
+      if (PyObject_CheckString(item)) {
+        Py_ssize_t size;
+        const char* data;
+        data = PyUnicode_AsUTF8AndSize(item, &size);
+        value.emplace_back(std::string(data, (size_t)size));  // NOLINT
+      } else {
+        PADDLE_THROW(platform::errors::InvalidArgument(
+            "%s(): argument (position %d) must be "
+            "list of str, but got %s at pos %d",
+            op_type, arg_pos + 1,
+            ((PyTypeObject*)item->ob_type)->tp_name,  // NOLINT
+            i));
+      }
+    }
+    attrs[key] = value;
+  } else {
+    PADDLE_THROW(platform::errors::InvalidArgument(
+        "%s(): argument (position %d) must be "
+        "list or tuple, but got %s",
+        op_type, arg_pos + 1,
+        ((PyTypeObject*)obj->ob_type)->tp_name));  // NOLINT
+  }
+}
+
+void CastPyArg2AttrBlock(PyObject* obj,
+                         paddle::framework::AttributeMap& attrs,  // NOLINT
+                         const std::string& key, const std::string& op_type,
+                         ssize_t arg_pos) {
+  ::pybind11::detail::instance* inst =
+      (::pybind11::detail::instance*)obj;  // NOLINT
+
+  if (!PyObject_IsInstance((PyObject*)inst,                   // NOLINT
+                           (PyObject*)g_blockdesc_pytype)) {  // NOLINT
+    PADDLE_THROW(platform::errors::InvalidArgument(
+        "%s(): argument (position %d) must be "
+        "BlockDesc, but got %s",
+        op_type, arg_pos + 1,
+        ((PyTypeObject*)obj->ob_type)->tp_name));  // NOLINT
+  }
+  void** vh = inst->simple_layout ? inst->simple_value_holder
+                                  : &inst->nonsimple.values_and_holders[0];
+  attrs[key] = reinterpret_cast<paddle::framework::BlockDesc*&>(vh[0]);
+}
+
+void ConstructAttrMapFromPyArgs(
+    const std::string& op_type, PyObject* args, ssize_t attr_start,
+    ssize_t attr_end, paddle::framework::AttributeMap& attrs) {  // NOLINT
+  PADDLE_ENFORCE_EQ(
+      (attr_end - attr_start) % 2, 0,
+      platform::errors::InvalidArgument(
+          "The number of arguments for attributes should be even."));
+
+  auto attr_type_map = &(OpAttrTypeMap::Instance().Map()[op_type]);
+
+  PyObject* obj = nullptr;
+  for (ssize_t arg_pos = attr_start; arg_pos < attr_end; arg_pos += 2) {
+    Py_ssize_t key_len;
+    const char* key_ptr;
+    obj = PyTuple_GET_ITEM(args, arg_pos);
+    if (PyObject_CheckString(obj)) {
+      key_ptr = PyUnicode_AsUTF8AndSize(obj, &key_len);
+    } else {
+      PADDLE_THROW(platform::errors::InvalidArgument(
+          "%s(): argument (position %d) must be str, but got "
+          "%s",
+          op_type, arg_pos, ((PyTypeObject*)obj->ob_type)->tp_name));  // NOLINT
+    }
+
+    std::string key(key_ptr, (size_t)key_len);  // NOLINT
+    auto iter = attr_type_map->find(key);
+    if (iter == attr_type_map->end()) {
+      continue;
+    }
+
+    obj = PyTuple_GET_ITEM(args, arg_pos + 1);
+
+    switch (iter->second) {
+      case paddle::framework::proto::AttrType::INT:
+        CastPyArg2AttrInt(obj, attrs, key, op_type, arg_pos);
+        break;
+      case paddle::framework::proto::AttrType::FLOAT:
+        CastPyArg2AttrFloat(obj, attrs, key, op_type, arg_pos);
+        break;
+      case paddle::framework::proto::AttrType::STRING:
+        CastPyArg2AttrString(obj, attrs, key, op_type, arg_pos);
+        break;
+      case paddle::framework::proto::AttrType::INTS:
+        CastPyArg2AttrInts(obj, attrs, key, op_type, arg_pos);
+        break;
+      case paddle::framework::proto::AttrType::FLOATS:
+        CastPyArg2AttrFloats(obj, attrs, key, op_type, arg_pos);
+        break;
+      case paddle::framework::proto::AttrType::STRINGS:
+        CastPyArg2AttrStrings(obj, attrs, key, op_type, arg_pos);
+        break;
+      case paddle::framework::proto::AttrType::BOOLEAN:
+        CastPyArg2AttrBoolean(obj, attrs, key, op_type, arg_pos);
+        break;
+      case paddle::framework::proto::AttrType::BOOLEANS:
+        CastPyArg2AttrBooleans(obj, attrs, key, op_type, arg_pos);
+        break;
+      case paddle::framework::proto::AttrType::LONG:
+        CastPyArg2AttrLong(obj, attrs, key, op_type, arg_pos);
+        break;
+      case paddle::framework::proto::AttrType::LONGS:
+        CastPyArg2AttrLongs(obj, attrs, key, op_type, arg_pos);
+        break;
+      case paddle::framework::proto::AttrType::FLOAT64S:
+        CastPyArg2AttrFloat64s(obj, attrs, key, op_type, arg_pos);
+        break;
+      case paddle::framework::proto::AttrType::BLOCK:
+        CastPyArg2AttrBlock(obj, attrs, key, op_type, arg_pos);
+        break;
+      default:
+        break;
+    }
+  }
+}
+
+std::shared_ptr<imperative::VarBase> GetVarBaseFromArgs(
+    const std::string& op_type, const std::string& arg_name, PyObject* args,
+    ssize_t arg_idx, bool dispensable) {
+  ::pybind11::detail::instance* inst =
+      (::pybind11::detail::instance*)PyTuple_GET_ITEM(args, arg_idx);
+
+  if (PyTuple_Check((PyObject*)inst)) {  // NOLINT
+    inst = (::pybind11::detail::instance*)PyTuple_GET_ITEM(inst, 0);
+  }
+
+  if (inst == nullptr || (PyObject*)inst == Py_None) {  // NOLINT
+    if (!dispensable) {
+      PADDLE_THROW(platform::errors::InvalidArgument(
+          "%s(): argument '%s' (position %d) must be Tensor, but got None",
+          op_type, arg_name, arg_idx));
+    }
+    return nullptr;
+  }
+
+  if (!PyObject_IsInstance((PyObject*)inst,                 // NOLINT
+                           (PyObject*)g_varbase_pytype)) {  // NOLINT
+    PADDLE_THROW(platform::errors::InvalidArgument(
+        "%s(): argument '%s' (position %d) must be Tensor, but got "
+        "%s",
+        op_type, arg_name, arg_idx,
+        ((PyTypeObject*)((PyObject*)inst)->ob_type)->tp_name));  // NOLINT
+  }
+
+  void** vh = inst->simple_layout ? inst->simple_value_holder
+                                  : &inst->nonsimple.values_and_holders[0];
+  return reinterpret_cast<std::shared_ptr<paddle::imperative::VarBase>&>(vh[1]);
+}
+
+std::vector<std::shared_ptr<imperative::VarBase>> GetVarBaseListFromArgs(
+    const std::string& op_type, const std::string& arg_name, PyObject* args,
+    ssize_t arg_idx, bool dispensable) {
+  PyObject* list = PyTuple_GET_ITEM(args, arg_idx);
+
+  if (list == nullptr) {
+    if (!dispensable) {
+      PADDLE_THROW(platform::errors::InvalidArgument(
+          "%s(): argument '%s' (position %d) must be list of Tensor, but got "
+          "None",
+          op_type, arg_name, arg_idx));  // NOLINT
+    }
+    return {};
+  }
+
+  std::vector<std::shared_ptr<imperative::VarBase>> result;
+
+  if (PyList_Check(list)) {
+    Py_ssize_t len = PyList_Size(list);
+    if (len == 0) {
+      PADDLE_THROW(platform::errors::InvalidArgument(
+          "%s(): argument '%s' (position %d) must be list of Tensors, but got "
+          "empty list",
+          op_type, arg_name, arg_idx));
+    }
+    ::pybind11::detail::instance* item = nullptr;
+    for (Py_ssize_t i = 0; i < len; i++) {
+      item = (::pybind11::detail::instance*)PyList_GetItem(list, i);
+      if (!PyObject_IsInstance((PyObject*)item,                 // NOLINT
+                               (PyObject*)g_varbase_pytype)) {  // NOLINT
+        PADDLE_THROW(platform::errors::InvalidArgument(
+            "%s(): argument '%s' (position %d) must be list of Tensors, but "
+            "got list of "
+            "%s",
+            op_type, arg_name, arg_idx,
+            ((PyTypeObject*)((PyObject*)item)->ob_type)->tp_name));  // NOLINT
+      }
+      void** vh = item->simple_layout ? item->simple_value_holder
+                                      : &item->nonsimple.values_and_holders[0];
+      result.emplace_back(
+          reinterpret_cast<std::shared_ptr<paddle::imperative::VarBase>&>(
+              vh[1]));
+    }
+  } else if (PyTuple_Check(list)) {
+    Py_ssize_t len = PyTuple_Size(list);
+    if (len == 0) {
+      PADDLE_THROW(platform::errors::InvalidArgument(
+          "%s(): argument '%s' (position %d) must be list of Tensors, but got "
+          "empty list",
+          op_type, arg_name, arg_idx));
+    }
+    ::pybind11::detail::instance* item = nullptr;
+    for (Py_ssize_t i = 0; i < len; i++) {
+      item = (::pybind11::detail::instance*)PyTuple_GetItem(list, i);  // NOLINT
+      if (!PyObject_IsInstance((PyObject*)item,                        // NOLINT
+                               (PyObject*)g_varbase_pytype)) {         // NOLINT
+        PADDLE_THROW(platform::errors::InvalidArgument(
+            "%s(): argument '%s' (position %d) must be list of Tensors, but "
+            "got list of "
+            "%s",
+            op_type, arg_name, arg_idx,
+            ((PyTypeObject*)((PyObject*)item)->ob_type)->tp_name));  // NOLINT
+      }
+      void** vh = item->simple_layout ? item->simple_value_holder
+                                      : &item->nonsimple.values_and_holders[0];
+      result.emplace_back(
+          reinterpret_cast<std::shared_ptr<paddle::imperative::VarBase>&>(
+              vh[1]));
+    }
+  } else {
+    PADDLE_THROW(platform::errors::InvalidArgument(
+        "%s(): argument '%s' (position %d) must be list of Tensors, but got "
+        "%s",
+        op_type, arg_name, arg_idx,
+        ((PyTypeObject*)list->ob_type)->tp_name));  // NOLINT
+  }
+
+  return result;
+}
+
+unsigned long GetUnsignedLongFromArgs(  // NOLINT
+    const std::string& op_type, const std::string& arg_name, PyObject* args,
+    ssize_t arg_idx, bool dispensable) {
+  PyObject* item = PyTuple_GET_ITEM(args, arg_idx);
+
+  if (item == nullptr) {
+    if (!dispensable) {
+      PADDLE_THROW(platform::errors::InvalidArgument(
+          "%s(): argument '%s' (position %d) must be long, but got None",
+          op_type, arg_name, arg_idx));
+    }
+    return 0;
+  }
+
+  if (PyObject_CheckLongOrToLong(&item)) {
+    return PyLong_AsUnsignedLong(item);
+  } else {
+    PADDLE_THROW(platform::errors::InvalidArgument(
+        "%s(): argument '%s' (position %d) must be "
+        "long, but got %s",
+        op_type, arg_name, arg_idx,
+        ((PyTypeObject*)item->ob_type)->tp_name));  // NOLINT
+  }
+}
+
+void InitOpsAttrTypeMap() {
+  auto op_info_map = paddle::framework::OpInfoMap::Instance().map();
+  for (auto iter = op_info_map.begin(); iter != op_info_map.end(); ++iter) {
+    auto op_proto = iter->second.proto_;
+    if (op_proto == nullptr) {
+      continue;
+    }
+    auto attrs_proto = op_proto->attrs();
+    for (auto& attr : attrs_proto) {
+      OpAttrTypeMap::Instance().Map()[iter->first][attr.name()] = attr.type();
+    }
+  }
+}
+
+}  // namespace pybind
+}  // namespace paddle
diff --git a/paddle/fluid/pybind/op_function_common.h b/paddle/fluid/pybind/op_function_common.h
new file mode 100644
index 0000000000000..9dc3a71a6ccf9
--- /dev/null
+++ b/paddle/fluid/pybind/op_function_common.h
@@ -0,0 +1,126 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <pybind11/chrono.h>
+#include <pybind11/complex.h>
+#include <pybind11/functional.h>
+#include <pybind11/stl.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "paddle/fluid/framework/attribute.h"
+#include "paddle/fluid/framework/op_info.h"
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/framework/variable.h"
+#include "paddle/fluid/imperative/tracer.h"
+#include "paddle/fluid/imperative/type_defs.h"
+#include "paddle/fluid/pybind/imperative.h"
+
+namespace py = pybind11;
+namespace paddle {
+namespace pybind {
+
+bool PyObject_CheckBool(PyObject** obj);
+
+bool PyObject_CheckLongOrToLong(PyObject** obj);
+
+bool PyObject_CheckFloatOrToFloat(PyObject** obj);
+
+bool PyObject_CheckString(PyObject* obj);
+
+void CastPyArg2AttrBoolean(PyObject* obj,
+                           paddle::framework::AttributeMap& attrs,  // NOLINT
+                           const std::string& key, const std::string& op_type,
+                           ssize_t arg_pos);
+
+void CastPyArg2AttrInt(PyObject* obj,
+                       paddle::framework::AttributeMap& attrs,  // NOLINT
+                       const std::string& key, const std::string& op_type,
+                       ssize_t arg_pos);
+
+void CastPyArg2AttrLong(PyObject* obj,
+                        paddle::framework::AttributeMap& attrs,  // NOLINT
+                        const std::string& key, const std::string& op_type,
+                        ssize_t arg_pos);
+
+void CastPyArg2AttrFloat(PyObject* obj,
+                         paddle::framework::AttributeMap& attrs,  // NOLINT
+                         const std::string& key, const std::string& op_type,
+                         ssize_t arg_pos);
+
+void CastPyArg2AttrString(PyObject* obj,
+                          paddle::framework::AttributeMap& attrs,  // NOLINT
+                          const std::string& key, const std::string& op_type,
+                          ssize_t arg_pos);
+
+void CastPyArg2AttrBooleans(PyObject* obj,
+                            paddle::framework::AttributeMap& attrs,  // NOLINT
+                            const std::string& key, const std::string& op_type,
+                            ssize_t arg_pos);
+
+void CastPyArg2AttrInts(PyObject* obj,
+                        paddle::framework::AttributeMap& attrs,  // NOLINT
+                        const std::string& key, const std::string& op_type,
+                        ssize_t arg_pos);
+
+void CastPyArg2AttrLongs(PyObject* obj,
+                         paddle::framework::AttributeMap& attrs,  // NOLINT
+                         const std::string& key, const std::string& op_type,
+                         ssize_t arg_pos);
+
+void CastPyArg2AttrFloats(PyObject* obj,
+                          paddle::framework::AttributeMap& attrs,  // NOLINT
+                          const std::string& key, const std::string& op_type,
+                          ssize_t arg_pos);
+
+void CastPyArg2AttrFloat64s(PyObject* obj,
+                            paddle::framework::AttributeMap& attrs,  // NOLINT
+                            const std::string& key, const std::string& op_type,
+                            ssize_t arg_pos);
+
+void CastPyArg2AttrStrings(PyObject* obj,
+                           paddle::framework::AttributeMap& attrs,  // NOLINT
+                           const std::string& key, const std::string& op_type,
+                           ssize_t arg_pos);
+
+void CastPyArg2AttrBlock(PyObject* obj,
+                         paddle::framework::AttributeMap& attrs,  // NOLINT
+                         const std::string& key, const std::string& op_type,
+                         ssize_t arg_pos);
+
+void ConstructAttrMapFromPyArgs(
+    const std::string& op_type, PyObject* args, ssize_t attr_start,
+    ssize_t attr_end,
+    paddle::framework::AttributeMap& attrs);  // NOLINT
+
+std::shared_ptr<imperative::VarBase> GetVarBaseFromArgs(
+    const std::string& op_type, const std::string& arg_name, PyObject* args,
+    ssize_t arg_idx, bool dispensable = false);
+
+std::vector<std::shared_ptr<imperative::VarBase>> GetVarBaseListFromArgs(
+    const std::string& op_type, const std::string& arg_name, PyObject* args,
+    ssize_t arg_idx, bool dispensable = false);
+
+unsigned long GetUnsignedLongFromArgs(  // NOLINT
+    const std::string& op_type, const std::string& arg_name, PyObject* args,
+    ssize_t arg_idx, bool dispensable = false);
+
+void InitOpsAttrTypeMap();
+
+}  // namespace pybind
+}  // namespace paddle
diff --git a/python/paddle/_C_ops.py b/python/paddle/_C_ops.py
index ffec9dc69472e..83030bb2454f6 100644
--- a/python/paddle/_C_ops.py
+++ b/python/paddle/_C_ops.py
@@ -19,3 +19,21 @@
 for name in dir(core.ops):
     globals()[name] = getattr(core.ops, name)
     __all__.append(name)
+
+
+def switch_to_core_ops():
+    for name in dir(core.eager.ops):
+        del globals()[name]
+        __all__.remove(name)
+    for name in dir(core.ops):
+        globals()[name] = getattr(core.ops, name)
+        __all__.append(name)
+
+
+def switch_to_eager_ops():
+    for name in dir(core.ops):
+        del globals()[name]
+        __all__.remove(name)
+    for name in dir(core.eager.ops):
+        globals()[name] = getattr(core.eager.ops, name)
+        __all__.append(name)
diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py
index ee7aa4560364e..b38f9649305fc 100644
--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
@@ -40,6 +40,7 @@
 import warnings
 import functools
 from .variable_index import _getitem_impl_, _setitem_impl_
+from paddle import _C_ops
 
 __all__ = [
     'Program',
@@ -81,13 +82,36 @@
 
 
 @signature_safe_contextmanager
-def eager_guard():
+def eager_mode_place_guard(place):
+    if place is not None:
+        expected_place = _get_paddle_place(place)
+    else:
+        expected_place = _current_expected_place()
+
+    global _global_expected_place_
+    tmp_place = _global_expected_place_
+    _global_expected_place_ = expected_place
+
+    _set_expected_place(expected_place)
+
+    try:
+        yield
+    finally:
+        _global_expected_place_ = tmp_place
+        _set_expected_place(tmp_place)
+
+
+@signature_safe_contextmanager
+def eager_guard(place=None):
     global _eager_mode_
     _eager_mode_ = True
+    _C_ops.switch_to_eager_ops()
     try:
-        yield
+        with eager_mode_place_guard(place):
+            yield
     finally:
         _eager_mode_ = False
+        _C_ops.switch_to_core_ops()
 
 
 def in_eager_mode():
diff --git a/python/paddle/fluid/tests/unittests/test_egr_code_generate_api.py b/python/paddle/fluid/tests/unittests/test_egr_code_generate_api.py
new file mode 100644
index 0000000000000..728185c055958
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_egr_code_generate_api.py
@@ -0,0 +1,71 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle.fluid.core as core
+import paddle.fluid.eager.eager_tensor_patch_methods as eager_tensor_patch_methods
+import paddle
+import numpy as np
+from paddle.fluid import eager_guard
+import unittest
+
+
+class EagerOpAPIGenerateTestCase(unittest.TestCase):
+    def test_elementwise_add(self):
+        with eager_guard():
+            paddle.set_device("cpu")
+            np_x = np.ones([4, 16, 16, 32]).astype('float32')
+            np_y = np.ones([4, 16, 16, 32]).astype('float32')
+            x = paddle.to_tensor(np_x)
+            y = paddle.to_tensor(np_y)
+            out = paddle.add(x, y)
+            out_arr = out.numpy()
+
+            out_arr_expected = np.add(np_x, np_y)
+            self.assertTrue(np.array_equal(out_arr, out_arr_expected))
+
+    def test_sum(self):
+        with eager_guard():
+            x_data = np.array(
+                [[0.2, 0.3, 0.5, 0.9], [0.1, 0.2, 0.6, 0.7]]).astype('float32')
+            x = paddle.to_tensor(x_data, 'float32')
+            out = paddle.sum(x, axis=0)
+            out_arr = out.numpy()
+            out_arr_expected = np.sum(x_data, axis=0)
+            self.assertTrue(np.array_equal(out_arr, out_arr_expected))
+
+    def test_mm(self):
+        with eager_guard():
+            np_input = np.random.random([16, 32]).astype('float32')
+            np_mat2 = np.random.random([32, 32]).astype('float32')
+            input = paddle.to_tensor(np_input)
+            mat2 = paddle.to_tensor(np_mat2)
+            out = paddle.mm(input, mat2)
+            out_arr = out.numpy()
+            out_arr_expected = np.matmul(np_input, np_mat2)
+            self.assertTrue(np.allclose(out_arr, out_arr_expected))
+
+    def test_sigmoid(self):
+        with eager_guard():
+            np_x = np.array([-0.4, -0.2, 0.1, 0.3]).astype('float32')
+            x = paddle.to_tensor(np_x)
+            out = paddle.nn.functional.sigmoid(x)
+            out_arr = out.numpy()
+            out_arr_expected = np.array(
+                [0.40131234, 0.450166, 0.52497919, 0.57444252]).astype(
+                    'float32')
+            self.assertTrue(np.allclose(out_arr, out_arr_expected))
+
+
+if __name__ == "__main__":
+    unittest.main()

From 6c1d43652f140f789df1b1a5db12912ebd28845f Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Fri, 3 Dec 2021 05:07:38 +0000
Subject: [PATCH 18/31] refine, test=develop

---
 paddle/fluid/pybind/eager.cc                       | 1 -
 paddle/fluid/pybind/eager_op_function_generator.cc | 3 ++-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/paddle/fluid/pybind/eager.cc b/paddle/fluid/pybind/eager.cc
index 5e45bc078def6..0714080382205 100644
--- a/paddle/fluid/pybind/eager.cc
+++ b/paddle/fluid/pybind/eager.cc
@@ -29,7 +29,6 @@ limitations under the License. */
 #include "paddle/pten/include/core.h"
 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
 #include "paddle/fluid/pybind/eager_op_function_impl.h"
-#include "paddle/fluid/pybind/exception.h"
 
 namespace paddle {
 namespace pybind {
diff --git a/paddle/fluid/pybind/eager_op_function_generator.cc b/paddle/fluid/pybind/eager_op_function_generator.cc
index 38b4e1e161329..4940705682b8e 100644
--- a/paddle/fluid/pybind/eager_op_function_generator.cc
+++ b/paddle/fluid/pybind/eager_op_function_generator.cc
@@ -519,7 +519,8 @@ int main(int argc, char* argv[]) {
 
   std::vector<std::string> headers{
       "\"pybind11/detail/common.h\"",
-      "\"paddle/fluid/pybind/op_function_common.h\"", "<Python.h>"};
+      "\"paddle/fluid/pybind/op_function_common.h\"",
+      "\"paddle/fluid/pybind/exception.h\"", "<Python.h>"};
 
   std::ofstream out(argv[1], std::ios::out);
 

From cccecb56df00079114bf374c5fc486f42c9a8875 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Fri, 3 Dec 2021 06:27:46 +0000
Subject: [PATCH 19/31] refine, test=develop

---
 paddle/fluid/pybind/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt
index f22d1451c0671..f084174e636f1 100644
--- a/paddle/fluid/pybind/CMakeLists.txt
+++ b/paddle/fluid/pybind/CMakeLists.txt
@@ -259,7 +259,7 @@ if(WITH_PYTHON)
 
   cc_library(paddle_eager
   SRCS eager.cc eager_functions.cc eager_method.cc eager_properties.cc eager_utils.cc
-  DEPS eager_api autograd_meta backward grad_node_info pten op_function_common dygraph_function dygraph_node math_cpu linalg_cpu creation_cpu utils_cpu manipulation_cpu math_api linalg_api creation_api manipulation_api accumulation_node global_utils utils python)
+  DEPS eager_api autograd_meta backward grad_node_info pten op_function_common dygraph_function dygraph_node math_cpu linalg_cpu creation_cpu utils_cpu manipulation_cpu accumulation_node global_utils utils python)
   add_dependencies(paddle_eager eager_op_function_generator_cmd)
   list(APPEND PYBIND_DEPS paddle_eager)
   list(APPEND PYBIND_DEPS op_function_common)

From 70eb35edc542829c292fca533e2d1e0144af4c53 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Fri, 3 Dec 2021 06:47:43 +0000
Subject: [PATCH 20/31] refine, test=develop

---
 paddle/fluid/pybind/eager.cc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/paddle/fluid/pybind/eager.cc b/paddle/fluid/pybind/eager.cc
index 0714080382205..330605359c730 100644
--- a/paddle/fluid/pybind/eager.cc
+++ b/paddle/fluid/pybind/eager.cc
@@ -15,7 +15,6 @@ limitations under the License. */
 #include <vector>
 
 #include "paddle/fluid/eager/api/all.h"
-#include "paddle/fluid/eager/api/generated/fluid_generated/dygraph_forward_api.h"
 #include "paddle/fluid/eager/autograd_meta.h"
 #include "paddle/fluid/eager/utils.h"
 #include "paddle/fluid/memory/allocation/allocator.h"

From 0af316987a43aa0adcce00e1eb6e79e93b77d01c Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Fri, 3 Dec 2021 08:23:59 +0000
Subject: [PATCH 21/31] refine, test=develop

---
 paddle/fluid/pybind/eager.cc                 |   1 +
 paddle/fluid/pybind/eager_op_function_impl.h | 141 +++++++++++++++++++
 2 files changed, 142 insertions(+)
 create mode 100644 paddle/fluid/pybind/eager_op_function_impl.h

diff --git a/paddle/fluid/pybind/eager.cc b/paddle/fluid/pybind/eager.cc
index 330605359c730..0714080382205 100644
--- a/paddle/fluid/pybind/eager.cc
+++ b/paddle/fluid/pybind/eager.cc
@@ -15,6 +15,7 @@ limitations under the License. */
 #include <vector>
 
 #include "paddle/fluid/eager/api/all.h"
+#include "paddle/fluid/eager/api/generated/fluid_generated/dygraph_forward_api.h"
 #include "paddle/fluid/eager/autograd_meta.h"
 #include "paddle/fluid/eager/utils.h"
 #include "paddle/fluid/memory/allocation/allocator.h"
diff --git a/paddle/fluid/pybind/eager_op_function_impl.h b/paddle/fluid/pybind/eager_op_function_impl.h
new file mode 100644
index 0000000000000..dbe9b7b1c73b1
--- /dev/null
+++ b/paddle/fluid/pybind/eager_op_function_impl.h
@@ -0,0 +1,141 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <Python.h>
+#include "paddle/fluid/pybind/exception.h"
+#include "paddle/fluid/pybind/op_function_common.h"
+#include "pybind11/detail/common.h"
+
+namespace paddle {
+namespace pybind {
+
+static PyObject *eager_api_matmul_v2(PyObject *self, PyObject *args,
+                                     PyObject *kwargs) {
+  PyThreadState *tstate = nullptr;
+  try {
+    auto X = GetEagerTensorFromArgs("matmul_v2", "X", args, 0, false);
+    auto Y = GetEagerTensorFromArgs("matmul_v2", "Y", args, 1, false);
+    framework::AttributeMap attrs;
+    ConstructAttrMapFromPyArgs("matmul_v2", args, 2, PyTuple_GET_SIZE(args),
+                               attrs);
+    tstate = PyEval_SaveThread();
+    auto out = matmul_v2_dygraph_function(X, Y, attrs);
+    PyEval_RestoreThread(tstate);
+    tstate = nullptr;
+    return ToPyObject(out);
+  } catch (...) {
+    if (tstate) {
+      PyEval_RestoreThread(tstate);
+    }
+    ThrowExceptionToPython(std::current_exception());
+    return nullptr;
+  }
+}
+
+static PyObject *eager_api_elementwise_add(PyObject *self, PyObject *args,
+                                           PyObject *kwargs) {
+  PyThreadState *tstate = nullptr;
+  try {
+    auto X = GetEagerTensorFromArgs("elementwise_add", "X", args, 0, false);
+    auto Y = GetEagerTensorFromArgs("elementwise_add", "Y", args, 1, false);
+    framework::AttributeMap attrs;
+    ConstructAttrMapFromPyArgs("elementwise_add", args, 2,
+                               PyTuple_GET_SIZE(args), attrs);
+    tstate = PyEval_SaveThread();
+    auto out = elementwise_add_dygraph_function(X, Y, attrs);
+    PyEval_RestoreThread(tstate);
+    tstate = nullptr;
+    return ToPyObject(out);
+  } catch (...) {
+    if (tstate) {
+      PyEval_RestoreThread(tstate);
+    }
+    ThrowExceptionToPython(std::current_exception());
+    return nullptr;
+  }
+}
+
+static PyObject *eager_api_sigmoid(PyObject *self, PyObject *args,
+                                   PyObject *kwargs) {
+  PyThreadState *tstate = nullptr;
+  try {
+    auto X = GetEagerTensorFromArgs("sigmoid", "X", args, 0, false);
+    framework::AttributeMap attrs;
+    ConstructAttrMapFromPyArgs("sigmoid", args, 1, PyTuple_GET_SIZE(args),
+                               attrs);
+    tstate = PyEval_SaveThread();
+    auto out = sigmoid_dygraph_function(X, attrs);
+    PyEval_RestoreThread(tstate);
+    tstate = nullptr;
+    return ToPyObject(out);
+  } catch (...) {
+    if (tstate) {
+      PyEval_RestoreThread(tstate);
+    }
+    ThrowExceptionToPython(std::current_exception());
+    return nullptr;
+  }
+}
+
+static PyObject *eager_api_reduce_sum(PyObject *self, PyObject *args,
+                                      PyObject *kwargs) {
+  PyThreadState *tstate = nullptr;
+  try {
+    auto X = GetEagerTensorFromArgs("reduce_sum", "X", args, 0, false);
+    framework::AttributeMap attrs;
+    ConstructAttrMapFromPyArgs("reduce_sum", args, 1, PyTuple_GET_SIZE(args),
+                               attrs);
+    tstate = PyEval_SaveThread();
+    auto out = reduce_sum_dygraph_function(X, attrs);
+    PyEval_RestoreThread(tstate);
+    tstate = nullptr;
+    return ToPyObject(out);
+  } catch (...) {
+    if (tstate) {
+      PyEval_RestoreThread(tstate);
+    }
+    ThrowExceptionToPython(std::current_exception());
+    return nullptr;
+  }
+}
+
+static PyMethodDef ExtestMethods[] = {
+    {"matmul_v2", (PyCFunction)(void (*)(void))eager_api_matmul_v2,
+     METH_VARARGS | METH_KEYWORDS,
+     "C++ interface function for matmul_v2 in dygraph."},
+    {"elementwise_add", (PyCFunction)(void (*)(void))eager_api_elementwise_add,
+     METH_VARARGS | METH_KEYWORDS,
+     "C++ interface function for elementwise_add in dygraph."},
+    {"sigmoid", (PyCFunction)(void (*)(void))eager_api_sigmoid,
+     METH_VARARGS | METH_KEYWORDS,
+     "C++ interface function for sigmoid in dygraph."},
+    {"reduce_sum", (PyCFunction)(void (*)(void))eager_api_reduce_sum,
+     METH_VARARGS | METH_KEYWORDS,
+     "C++ interface function for reduce_sum in dygraph."},
+    {nullptr, nullptr, 0, nullptr}};
+
+inline void BindEagerOpFunctions(pybind11::module *module) {
+  auto m = module->def_submodule("ops");
+  if (PyModule_AddFunctions(m.ptr(), ExtestMethods) < 0) {
+    PADDLE_THROW(
+        platform::errors::Fatal("Add functions to core.eager.ops failed!"));
+  }
+
+  InitOpsAttrTypeMap();
+}
+
+}  // namespace pybind
+}  // namespace paddle

From 2a28b95707f40f7d077a988fa705e2ad8c6f2f63 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Fri, 3 Dec 2021 08:41:45 +0000
Subject: [PATCH 22/31] refine, test=develop

---
 paddle/fluid/pybind/CMakeLists.txt           |   4 +-
 paddle/fluid/pybind/eager_op_function_impl.h | 141 -------------------
 2 files changed, 2 insertions(+), 143 deletions(-)
 delete mode 100644 paddle/fluid/pybind/eager_op_function_impl.h

diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt
index f084174e636f1..1f7a192176a8b 100644
--- a/paddle/fluid/pybind/CMakeLists.txt
+++ b/paddle/fluid/pybind/CMakeLists.txt
@@ -255,11 +255,11 @@ if(WITH_PYTHON)
   add_custom_target(eager_op_function_generator_cmd ALL DEPENDS ${eager_impl_file})
 
   list(APPEND PYBIND_DEPS interpretercore standalone_executor)
-  cc_library(op_function_common SRCS op_function_common.cc)
+  cc_library(op_function_common SRCS op_function_common.cc DEPS framework_proto)
 
   cc_library(paddle_eager
   SRCS eager.cc eager_functions.cc eager_method.cc eager_properties.cc eager_utils.cc
-  DEPS eager_api autograd_meta backward grad_node_info pten op_function_common dygraph_function dygraph_node math_cpu linalg_cpu creation_cpu utils_cpu manipulation_cpu accumulation_node global_utils utils python)
+  DEPS eager_api autograd_meta backward grad_node_info pten op_function_common dygraph_function dygraph_node math_cpu linalg_cpu creation_cpu utils_cpu manipulation_cpu accumulation_node global_utils utils python eager_codegen)
   add_dependencies(paddle_eager eager_op_function_generator_cmd)
   list(APPEND PYBIND_DEPS paddle_eager)
   list(APPEND PYBIND_DEPS op_function_common)
diff --git a/paddle/fluid/pybind/eager_op_function_impl.h b/paddle/fluid/pybind/eager_op_function_impl.h
deleted file mode 100644
index dbe9b7b1c73b1..0000000000000
--- a/paddle/fluid/pybind/eager_op_function_impl.h
+++ /dev/null
@@ -1,141 +0,0 @@
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include <Python.h>
-#include "paddle/fluid/pybind/exception.h"
-#include "paddle/fluid/pybind/op_function_common.h"
-#include "pybind11/detail/common.h"
-
-namespace paddle {
-namespace pybind {
-
-static PyObject *eager_api_matmul_v2(PyObject *self, PyObject *args,
-                                     PyObject *kwargs) {
-  PyThreadState *tstate = nullptr;
-  try {
-    auto X = GetEagerTensorFromArgs("matmul_v2", "X", args, 0, false);
-    auto Y = GetEagerTensorFromArgs("matmul_v2", "Y", args, 1, false);
-    framework::AttributeMap attrs;
-    ConstructAttrMapFromPyArgs("matmul_v2", args, 2, PyTuple_GET_SIZE(args),
-                               attrs);
-    tstate = PyEval_SaveThread();
-    auto out = matmul_v2_dygraph_function(X, Y, attrs);
-    PyEval_RestoreThread(tstate);
-    tstate = nullptr;
-    return ToPyObject(out);
-  } catch (...) {
-    if (tstate) {
-      PyEval_RestoreThread(tstate);
-    }
-    ThrowExceptionToPython(std::current_exception());
-    return nullptr;
-  }
-}
-
-static PyObject *eager_api_elementwise_add(PyObject *self, PyObject *args,
-                                           PyObject *kwargs) {
-  PyThreadState *tstate = nullptr;
-  try {
-    auto X = GetEagerTensorFromArgs("elementwise_add", "X", args, 0, false);
-    auto Y = GetEagerTensorFromArgs("elementwise_add", "Y", args, 1, false);
-    framework::AttributeMap attrs;
-    ConstructAttrMapFromPyArgs("elementwise_add", args, 2,
-                               PyTuple_GET_SIZE(args), attrs);
-    tstate = PyEval_SaveThread();
-    auto out = elementwise_add_dygraph_function(X, Y, attrs);
-    PyEval_RestoreThread(tstate);
-    tstate = nullptr;
-    return ToPyObject(out);
-  } catch (...) {
-    if (tstate) {
-      PyEval_RestoreThread(tstate);
-    }
-    ThrowExceptionToPython(std::current_exception());
-    return nullptr;
-  }
-}
-
-static PyObject *eager_api_sigmoid(PyObject *self, PyObject *args,
-                                   PyObject *kwargs) {
-  PyThreadState *tstate = nullptr;
-  try {
-    auto X = GetEagerTensorFromArgs("sigmoid", "X", args, 0, false);
-    framework::AttributeMap attrs;
-    ConstructAttrMapFromPyArgs("sigmoid", args, 1, PyTuple_GET_SIZE(args),
-                               attrs);
-    tstate = PyEval_SaveThread();
-    auto out = sigmoid_dygraph_function(X, attrs);
-    PyEval_RestoreThread(tstate);
-    tstate = nullptr;
-    return ToPyObject(out);
-  } catch (...) {
-    if (tstate) {
-      PyEval_RestoreThread(tstate);
-    }
-    ThrowExceptionToPython(std::current_exception());
-    return nullptr;
-  }
-}
-
-static PyObject *eager_api_reduce_sum(PyObject *self, PyObject *args,
-                                      PyObject *kwargs) {
-  PyThreadState *tstate = nullptr;
-  try {
-    auto X = GetEagerTensorFromArgs("reduce_sum", "X", args, 0, false);
-    framework::AttributeMap attrs;
-    ConstructAttrMapFromPyArgs("reduce_sum", args, 1, PyTuple_GET_SIZE(args),
-                               attrs);
-    tstate = PyEval_SaveThread();
-    auto out = reduce_sum_dygraph_function(X, attrs);
-    PyEval_RestoreThread(tstate);
-    tstate = nullptr;
-    return ToPyObject(out);
-  } catch (...) {
-    if (tstate) {
-      PyEval_RestoreThread(tstate);
-    }
-    ThrowExceptionToPython(std::current_exception());
-    return nullptr;
-  }
-}
-
-static PyMethodDef ExtestMethods[] = {
-    {"matmul_v2", (PyCFunction)(void (*)(void))eager_api_matmul_v2,
-     METH_VARARGS | METH_KEYWORDS,
-     "C++ interface function for matmul_v2 in dygraph."},
-    {"elementwise_add", (PyCFunction)(void (*)(void))eager_api_elementwise_add,
-     METH_VARARGS | METH_KEYWORDS,
-     "C++ interface function for elementwise_add in dygraph."},
-    {"sigmoid", (PyCFunction)(void (*)(void))eager_api_sigmoid,
-     METH_VARARGS | METH_KEYWORDS,
-     "C++ interface function for sigmoid in dygraph."},
-    {"reduce_sum", (PyCFunction)(void (*)(void))eager_api_reduce_sum,
-     METH_VARARGS | METH_KEYWORDS,
-     "C++ interface function for reduce_sum in dygraph."},
-    {nullptr, nullptr, 0, nullptr}};
-
-inline void BindEagerOpFunctions(pybind11::module *module) {
-  auto m = module->def_submodule("ops");
-  if (PyModule_AddFunctions(m.ptr(), ExtestMethods) < 0) {
-    PADDLE_THROW(
-        platform::errors::Fatal("Add functions to core.eager.ops failed!"));
-  }
-
-  InitOpsAttrTypeMap();
-}
-
-}  // namespace pybind
-}  // namespace paddle

From 1294de02254470da747179a531a006c2ad972197 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Fri, 3 Dec 2021 09:00:37 +0000
Subject: [PATCH 23/31] refine, test=develop

---
 paddle/fluid/pybind/CMakeLists.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt
index 1f7a192176a8b..e361a8bc87dcf 100644
--- a/paddle/fluid/pybind/CMakeLists.txt
+++ b/paddle/fluid/pybind/CMakeLists.txt
@@ -259,7 +259,8 @@ if(WITH_PYTHON)
 
   cc_library(paddle_eager
   SRCS eager.cc eager_functions.cc eager_method.cc eager_properties.cc eager_utils.cc
-  DEPS eager_api autograd_meta backward grad_node_info pten op_function_common dygraph_function dygraph_node math_cpu linalg_cpu creation_cpu utils_cpu manipulation_cpu accumulation_node global_utils utils python eager_codegen)
+  DEPS eager_api autograd_meta backward grad_node_info pten op_function_common dygraph_function dygraph_node math_cpu linalg_cpu creation_cpu utils_cpu manipulation_cpu accumulation_node global_utils utils python)
+  add_dependencies(paddle_eager eager_codegen)
   add_dependencies(paddle_eager eager_op_function_generator_cmd)
   list(APPEND PYBIND_DEPS paddle_eager)
   list(APPEND PYBIND_DEPS op_function_common)

From e636f314cea4b26ea478b1ca295b36960ed2b8c1 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Fri, 3 Dec 2021 09:32:40 +0000
Subject: [PATCH 24/31] refine, test=develop

---
 paddle/fluid/pybind/CMakeLists.txt | 50 +++++++++++++++++-------------
 1 file changed, 29 insertions(+), 21 deletions(-)

diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt
index e361a8bc87dcf..b305f6538f6f0 100644
--- a/paddle/fluid/pybind/CMakeLists.txt
+++ b/paddle/fluid/pybind/CMakeLists.txt
@@ -210,11 +210,13 @@ if(WITH_PYTHON)
       COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file} ${impl_file}
       COMMENT "copy_if_different ${tmp_impl_file} to ${impl_file}"
       DEPENDS ${OP_IMPL_DEPS})
-    add_custom_command(OUTPUT ${eager_impl_file}
-      COMMAND ${CMAKE_BINARY_DIR}/paddle/fluid/pybind/eager_op_function_generator_retry.bat
-      COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_eager_impl_file} ${eager_impl_file}
-      COMMENT "copy_if_different ${tmp_eager_impl_file} to ${eager_impl_file}"
-      DEPENDS ${EAGER_OP_IMPL_DEPS})
+    if(NOT ON_INFER)
+      add_custom_command(OUTPUT ${eager_impl_file}
+        COMMAND ${CMAKE_BINARY_DIR}/paddle/fluid/pybind/eager_op_function_generator_retry.bat
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_eager_impl_file} ${eager_impl_file}
+        COMMENT "copy_if_different ${tmp_eager_impl_file} to ${eager_impl_file}"
+        DEPENDS ${EAGER_OP_IMPL_DEPS})
+    endif()
   else(WIN32)
     # If there are no *.so in /usr/lib or LD_LIBRARY_PATH,
     # copy these *.so to current directory and append current directory to
@@ -242,29 +244,35 @@ if(WITH_PYTHON)
           COMMENT "copy_if_different ${tmp_impl_file} to ${impl_file}"
           DEPENDS ${OP_IMPL_DEPS}
           VERBATIM)
-    add_custom_command(OUTPUT ${eager_impl_file}
-          COMMAND ${CMAKE_COMMAND} -E env "LD_LIBRARY_PATH=$ENV{LD_LIBRARY_PATH}:."
-              "${CMAKE_CURRENT_BINARY_DIR}/eager_op_function_generator"
-              "${tmp_eager_impl_file}"
-          COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_eager_impl_file} ${eager_impl_file}
-          COMMENT "copy_if_different ${tmp_eager_impl_file} to ${eager_impl_file}"
-          DEPENDS ${EAGER_OP_IMPL_DEPS}
-          VERBATIM)
+    if(NOT ON_INFER)
+      add_custom_command(OUTPUT ${eager_impl_file}
+            COMMAND ${CMAKE_COMMAND} -E env "LD_LIBRARY_PATH=$ENV{LD_LIBRARY_PATH}:."
+                "${CMAKE_CURRENT_BINARY_DIR}/eager_op_function_generator"
+                "${tmp_eager_impl_file}"
+            COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_eager_impl_file} ${eager_impl_file}
+            COMMENT "copy_if_different ${tmp_eager_impl_file} to ${eager_impl_file}"
+            DEPENDS ${EAGER_OP_IMPL_DEPS}
+            VERBATIM)
+      endif()
   endif(WIN32)
   add_custom_target(op_function_generator_cmd ALL DEPENDS ${impl_file})
-  add_custom_target(eager_op_function_generator_cmd ALL DEPENDS ${eager_impl_file})
+  if(NOT ON_INFER)
+    add_custom_target(eager_op_function_generator_cmd ALL DEPENDS ${eager_impl_file})
+  endif()
 
   list(APPEND PYBIND_DEPS interpretercore standalone_executor)
   cc_library(op_function_common SRCS op_function_common.cc DEPS framework_proto)
-
-  cc_library(paddle_eager
-  SRCS eager.cc eager_functions.cc eager_method.cc eager_properties.cc eager_utils.cc
-  DEPS eager_api autograd_meta backward grad_node_info pten op_function_common dygraph_function dygraph_node math_cpu linalg_cpu creation_cpu utils_cpu manipulation_cpu accumulation_node global_utils utils python)
-  add_dependencies(paddle_eager eager_codegen)
-  add_dependencies(paddle_eager eager_op_function_generator_cmd)
-  list(APPEND PYBIND_DEPS paddle_eager)
   list(APPEND PYBIND_DEPS op_function_common)
 
+  if(NOT ON_INFER)
+    cc_library(paddle_eager
+    SRCS eager.cc eager_functions.cc eager_method.cc eager_properties.cc eager_utils.cc
+    DEPS eager_api autograd_meta backward grad_node_info pten op_function_common dygraph_function dygraph_node math_cpu linalg_cpu creation_cpu utils_cpu manipulation_cpu accumulation_node global_utils utils python)
+    add_dependencies(paddle_eager eager_codegen)
+    add_dependencies(paddle_eager eager_op_function_generator_cmd)
+    list(APPEND PYBIND_DEPS paddle_eager)
+  endif()
+
   cc_library(paddle_pybind SHARED
     SRCS ${PYBIND_SRCS}
     DEPS ${PYBIND_DEPS} ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS})

From 0aa0337c98d7faedef7f81d64c902b7e9e081f09 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Fri, 3 Dec 2021 10:07:19 +0000
Subject: [PATCH 25/31] refine, test=develop

---
 paddle/fluid/pybind/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt
index b305f6538f6f0..682efa5822f3c 100644
--- a/paddle/fluid/pybind/CMakeLists.txt
+++ b/paddle/fluid/pybind/CMakeLists.txt
@@ -261,7 +261,7 @@ if(WITH_PYTHON)
   endif()
 
   list(APPEND PYBIND_DEPS interpretercore standalone_executor)
-  cc_library(op_function_common SRCS op_function_common.cc DEPS framework_proto)
+  cc_library(op_function_common SRCS op_function_common.cc DEPS framework_proto extern_xxhash)
   list(APPEND PYBIND_DEPS op_function_common)
 
   if(NOT ON_INFER)

From 1e09ccec3ca32f30d683d0414a51e2e659892279 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Fri, 3 Dec 2021 10:09:15 +0000
Subject: [PATCH 26/31] refine, test=develop

---
 paddle/fluid/pybind/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt
index 682efa5822f3c..6cfd28f1d2e45 100644
--- a/paddle/fluid/pybind/CMakeLists.txt
+++ b/paddle/fluid/pybind/CMakeLists.txt
@@ -261,7 +261,7 @@ if(WITH_PYTHON)
   endif()
 
   list(APPEND PYBIND_DEPS interpretercore standalone_executor)
-  cc_library(op_function_common SRCS op_function_common.cc DEPS framework_proto extern_xxhash)
+  cc_library(op_function_common SRCS op_function_common.cc DEPS framework_proto extern_xxhash extern_boost)
   list(APPEND PYBIND_DEPS op_function_common)
 
   if(NOT ON_INFER)

From af85f654fe4fb5e73f76ede78d09a9613160cdbd Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Fri, 3 Dec 2021 10:24:17 +0000
Subject: [PATCH 27/31] refine, test=develop

---
 paddle/fluid/pybind/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt
index 6cfd28f1d2e45..c65846d03cd11 100644
--- a/paddle/fluid/pybind/CMakeLists.txt
+++ b/paddle/fluid/pybind/CMakeLists.txt
@@ -261,7 +261,7 @@ if(WITH_PYTHON)
   endif()
 
   list(APPEND PYBIND_DEPS interpretercore standalone_executor)
-  cc_library(op_function_common SRCS op_function_common.cc DEPS framework_proto extern_xxhash extern_boost)
+  cc_library(op_function_common SRCS op_function_common.cc DEPS ${PYBIND_DEPS})
   list(APPEND PYBIND_DEPS op_function_common)
 
   if(NOT ON_INFER)

From b3c6f2843d86d98460f757fe7b21ee80b5c5a0d7 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Mon, 6 Dec 2021 02:45:23 +0000
Subject: [PATCH 28/31] refine, test=develop

---
 paddle/fluid/eager/eager_tensor.h            |   5 -
 paddle/fluid/pybind/eager_op_function_impl.h | 141 +++++++++++++++++++
 2 files changed, 141 insertions(+), 5 deletions(-)
 create mode 100644 paddle/fluid/pybind/eager_op_function_impl.h

diff --git a/paddle/fluid/eager/eager_tensor.h b/paddle/fluid/eager/eager_tensor.h
index 7ade0a9848dc4..61d32cb875cde 100644
--- a/paddle/fluid/eager/eager_tensor.h
+++ b/paddle/fluid/eager/eager_tensor.h
@@ -237,11 +237,6 @@ class EagerTensor final {
               "from VarBase, only LoDTensor and "
               "Tensor are supported for now"));
         }
-      } else {
-        PADDLE_THROW(paddle::platform::errors::Fatal(
-            "Can not Sync EagerTensor %s whose paddle::framework::Variable is "
-            "not initialized!",
-            name()));
       }
     }
   }
diff --git a/paddle/fluid/pybind/eager_op_function_impl.h b/paddle/fluid/pybind/eager_op_function_impl.h
new file mode 100644
index 0000000000000..dbe9b7b1c73b1
--- /dev/null
+++ b/paddle/fluid/pybind/eager_op_function_impl.h
@@ -0,0 +1,141 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <Python.h>
+#include "paddle/fluid/pybind/exception.h"
+#include "paddle/fluid/pybind/op_function_common.h"
+#include "pybind11/detail/common.h"
+
+namespace paddle {
+namespace pybind {
+
+static PyObject *eager_api_matmul_v2(PyObject *self, PyObject *args,
+                                     PyObject *kwargs) {
+  PyThreadState *tstate = nullptr;
+  try {
+    auto X = GetEagerTensorFromArgs("matmul_v2", "X", args, 0, false);
+    auto Y = GetEagerTensorFromArgs("matmul_v2", "Y", args, 1, false);
+    framework::AttributeMap attrs;
+    ConstructAttrMapFromPyArgs("matmul_v2", args, 2, PyTuple_GET_SIZE(args),
+                               attrs);
+    tstate = PyEval_SaveThread();
+    auto out = matmul_v2_dygraph_function(X, Y, attrs);
+    PyEval_RestoreThread(tstate);
+    tstate = nullptr;
+    return ToPyObject(out);
+  } catch (...) {
+    if (tstate) {
+      PyEval_RestoreThread(tstate);
+    }
+    ThrowExceptionToPython(std::current_exception());
+    return nullptr;
+  }
+}
+
+static PyObject *eager_api_elementwise_add(PyObject *self, PyObject *args,
+                                           PyObject *kwargs) {
+  PyThreadState *tstate = nullptr;
+  try {
+    auto X = GetEagerTensorFromArgs("elementwise_add", "X", args, 0, false);
+    auto Y = GetEagerTensorFromArgs("elementwise_add", "Y", args, 1, false);
+    framework::AttributeMap attrs;
+    ConstructAttrMapFromPyArgs("elementwise_add", args, 2,
+                               PyTuple_GET_SIZE(args), attrs);
+    tstate = PyEval_SaveThread();
+    auto out = elementwise_add_dygraph_function(X, Y, attrs);
+    PyEval_RestoreThread(tstate);
+    tstate = nullptr;
+    return ToPyObject(out);
+  } catch (...) {
+    if (tstate) {
+      PyEval_RestoreThread(tstate);
+    }
+    ThrowExceptionToPython(std::current_exception());
+    return nullptr;
+  }
+}
+
+static PyObject *eager_api_sigmoid(PyObject *self, PyObject *args,
+                                   PyObject *kwargs) {
+  PyThreadState *tstate = nullptr;
+  try {
+    auto X = GetEagerTensorFromArgs("sigmoid", "X", args, 0, false);
+    framework::AttributeMap attrs;
+    ConstructAttrMapFromPyArgs("sigmoid", args, 1, PyTuple_GET_SIZE(args),
+                               attrs);
+    tstate = PyEval_SaveThread();
+    auto out = sigmoid_dygraph_function(X, attrs);
+    PyEval_RestoreThread(tstate);
+    tstate = nullptr;
+    return ToPyObject(out);
+  } catch (...) {
+    if (tstate) {
+      PyEval_RestoreThread(tstate);
+    }
+    ThrowExceptionToPython(std::current_exception());
+    return nullptr;
+  }
+}
+
+static PyObject *eager_api_reduce_sum(PyObject *self, PyObject *args,
+                                      PyObject *kwargs) {
+  PyThreadState *tstate = nullptr;
+  try {
+    auto X = GetEagerTensorFromArgs("reduce_sum", "X", args, 0, false);
+    framework::AttributeMap attrs;
+    ConstructAttrMapFromPyArgs("reduce_sum", args, 1, PyTuple_GET_SIZE(args),
+                               attrs);
+    tstate = PyEval_SaveThread();
+    auto out = reduce_sum_dygraph_function(X, attrs);
+    PyEval_RestoreThread(tstate);
+    tstate = nullptr;
+    return ToPyObject(out);
+  } catch (...) {
+    if (tstate) {
+      PyEval_RestoreThread(tstate);
+    }
+    ThrowExceptionToPython(std::current_exception());
+    return nullptr;
+  }
+}
+
+static PyMethodDef ExtestMethods[] = {
+    {"matmul_v2", (PyCFunction)(void (*)(void))eager_api_matmul_v2,
+     METH_VARARGS | METH_KEYWORDS,
+     "C++ interface function for matmul_v2 in dygraph."},
+    {"elementwise_add", (PyCFunction)(void (*)(void))eager_api_elementwise_add,
+     METH_VARARGS | METH_KEYWORDS,
+     "C++ interface function for elementwise_add in dygraph."},
+    {"sigmoid", (PyCFunction)(void (*)(void))eager_api_sigmoid,
+     METH_VARARGS | METH_KEYWORDS,
+     "C++ interface function for sigmoid in dygraph."},
+    {"reduce_sum", (PyCFunction)(void (*)(void))eager_api_reduce_sum,
+     METH_VARARGS | METH_KEYWORDS,
+     "C++ interface function for reduce_sum in dygraph."},
+    {nullptr, nullptr, 0, nullptr}};
+
+inline void BindEagerOpFunctions(pybind11::module *module) {
+  auto m = module->def_submodule("ops");
+  if (PyModule_AddFunctions(m.ptr(), ExtestMethods) < 0) {
+    PADDLE_THROW(
+        platform::errors::Fatal("Add functions to core.eager.ops failed!"));
+  }
+
+  InitOpsAttrTypeMap();
+}
+
+}  // namespace pybind
+}  // namespace paddle

From 83a95068d9e0d5210c3c21c80c6a76517a6af72d Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Mon, 6 Dec 2021 03:12:11 +0000
Subject: [PATCH 29/31] refine, test=develop

---
 paddle/fluid/eager/eager_tensor.h   | 5 +++++
 paddle/fluid/pybind/eager_method.cc | 4 +++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/paddle/fluid/eager/eager_tensor.h b/paddle/fluid/eager/eager_tensor.h
index 61d32cb875cde..7ade0a9848dc4 100644
--- a/paddle/fluid/eager/eager_tensor.h
+++ b/paddle/fluid/eager/eager_tensor.h
@@ -237,6 +237,11 @@ class EagerTensor final {
               "from VarBase, only LoDTensor and "
               "Tensor are supported for now"));
         }
+      } else {
+        PADDLE_THROW(paddle::platform::errors::Fatal(
+            "Can not Sync EagerTensor %s whose paddle::framework::Variable is "
+            "not initialized!",
+            name()));
       }
     }
   }
diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc
index e95db865a1d59..e40c1260e2f2e 100644
--- a/paddle/fluid/pybind/eager_method.cc
+++ b/paddle/fluid/pybind/eager_method.cc
@@ -94,7 +94,9 @@ static PyObject* eager_tensor_method_is_initialized(EagerTensorObject* self,
                                                     PyObject* args,
                                                     PyObject* kwargs) {
   EAGER_TRY
-  self->eagertensor.SyncToTensor();
+  if (self->eagertensor.Var().IsInitialized()) {
+    self->eagertensor.SyncToTensor();
+  }
   return ToPyObject(self->eagertensor.initialized());
   EAGER_CATCH_AND_THROW_RETURN_NULL
 }

From 372fdb0da8b3a3eefb0be5ad7334dd4b81442c3a Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Mon, 6 Dec 2021 04:39:50 +0000
Subject: [PATCH 30/31] refine, test=develop

---
 paddle/fluid/pybind/eager_op_function_impl.h | 141 -------------------
 1 file changed, 141 deletions(-)
 delete mode 100644 paddle/fluid/pybind/eager_op_function_impl.h

diff --git a/paddle/fluid/pybind/eager_op_function_impl.h b/paddle/fluid/pybind/eager_op_function_impl.h
deleted file mode 100644
index dbe9b7b1c73b1..0000000000000
--- a/paddle/fluid/pybind/eager_op_function_impl.h
+++ /dev/null
@@ -1,141 +0,0 @@
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include <Python.h>
-#include "paddle/fluid/pybind/exception.h"
-#include "paddle/fluid/pybind/op_function_common.h"
-#include "pybind11/detail/common.h"
-
-namespace paddle {
-namespace pybind {
-
-static PyObject *eager_api_matmul_v2(PyObject *self, PyObject *args,
-                                     PyObject *kwargs) {
-  PyThreadState *tstate = nullptr;
-  try {
-    auto X = GetEagerTensorFromArgs("matmul_v2", "X", args, 0, false);
-    auto Y = GetEagerTensorFromArgs("matmul_v2", "Y", args, 1, false);
-    framework::AttributeMap attrs;
-    ConstructAttrMapFromPyArgs("matmul_v2", args, 2, PyTuple_GET_SIZE(args),
-                               attrs);
-    tstate = PyEval_SaveThread();
-    auto out = matmul_v2_dygraph_function(X, Y, attrs);
-    PyEval_RestoreThread(tstate);
-    tstate = nullptr;
-    return ToPyObject(out);
-  } catch (...) {
-    if (tstate) {
-      PyEval_RestoreThread(tstate);
-    }
-    ThrowExceptionToPython(std::current_exception());
-    return nullptr;
-  }
-}
-
-static PyObject *eager_api_elementwise_add(PyObject *self, PyObject *args,
-                                           PyObject *kwargs) {
-  PyThreadState *tstate = nullptr;
-  try {
-    auto X = GetEagerTensorFromArgs("elementwise_add", "X", args, 0, false);
-    auto Y = GetEagerTensorFromArgs("elementwise_add", "Y", args, 1, false);
-    framework::AttributeMap attrs;
-    ConstructAttrMapFromPyArgs("elementwise_add", args, 2,
-                               PyTuple_GET_SIZE(args), attrs);
-    tstate = PyEval_SaveThread();
-    auto out = elementwise_add_dygraph_function(X, Y, attrs);
-    PyEval_RestoreThread(tstate);
-    tstate = nullptr;
-    return ToPyObject(out);
-  } catch (...) {
-    if (tstate) {
-      PyEval_RestoreThread(tstate);
-    }
-    ThrowExceptionToPython(std::current_exception());
-    return nullptr;
-  }
-}
-
-static PyObject *eager_api_sigmoid(PyObject *self, PyObject *args,
-                                   PyObject *kwargs) {
-  PyThreadState *tstate = nullptr;
-  try {
-    auto X = GetEagerTensorFromArgs("sigmoid", "X", args, 0, false);
-    framework::AttributeMap attrs;
-    ConstructAttrMapFromPyArgs("sigmoid", args, 1, PyTuple_GET_SIZE(args),
-                               attrs);
-    tstate = PyEval_SaveThread();
-    auto out = sigmoid_dygraph_function(X, attrs);
-    PyEval_RestoreThread(tstate);
-    tstate = nullptr;
-    return ToPyObject(out);
-  } catch (...) {
-    if (tstate) {
-      PyEval_RestoreThread(tstate);
-    }
-    ThrowExceptionToPython(std::current_exception());
-    return nullptr;
-  }
-}
-
-static PyObject *eager_api_reduce_sum(PyObject *self, PyObject *args,
-                                      PyObject *kwargs) {
-  PyThreadState *tstate = nullptr;
-  try {
-    auto X = GetEagerTensorFromArgs("reduce_sum", "X", args, 0, false);
-    framework::AttributeMap attrs;
-    ConstructAttrMapFromPyArgs("reduce_sum", args, 1, PyTuple_GET_SIZE(args),
-                               attrs);
-    tstate = PyEval_SaveThread();
-    auto out = reduce_sum_dygraph_function(X, attrs);
-    PyEval_RestoreThread(tstate);
-    tstate = nullptr;
-    return ToPyObject(out);
-  } catch (...) {
-    if (tstate) {
-      PyEval_RestoreThread(tstate);
-    }
-    ThrowExceptionToPython(std::current_exception());
-    return nullptr;
-  }
-}
-
-static PyMethodDef ExtestMethods[] = {
-    {"matmul_v2", (PyCFunction)(void (*)(void))eager_api_matmul_v2,
-     METH_VARARGS | METH_KEYWORDS,
-     "C++ interface function for matmul_v2 in dygraph."},
-    {"elementwise_add", (PyCFunction)(void (*)(void))eager_api_elementwise_add,
-     METH_VARARGS | METH_KEYWORDS,
-     "C++ interface function for elementwise_add in dygraph."},
-    {"sigmoid", (PyCFunction)(void (*)(void))eager_api_sigmoid,
-     METH_VARARGS | METH_KEYWORDS,
-     "C++ interface function for sigmoid in dygraph."},
-    {"reduce_sum", (PyCFunction)(void (*)(void))eager_api_reduce_sum,
-     METH_VARARGS | METH_KEYWORDS,
-     "C++ interface function for reduce_sum in dygraph."},
-    {nullptr, nullptr, 0, nullptr}};
-
-inline void BindEagerOpFunctions(pybind11::module *module) {
-  auto m = module->def_submodule("ops");
-  if (PyModule_AddFunctions(m.ptr(), ExtestMethods) < 0) {
-    PADDLE_THROW(
-        platform::errors::Fatal("Add functions to core.eager.ops failed!"));
-  }
-
-  InitOpsAttrTypeMap();
-}
-
-}  // namespace pybind
-}  // namespace paddle

From 9a7c769e97f1e81ea2e72bbac61ae2356fe4048e Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Tue, 7 Dec 2021 01:57:36 +0000
Subject: [PATCH 31/31] refine, test=develop

---
 .../pybind/eager_op_function_generator.cc     | 170 +-----------------
 paddle/fluid/pybind/op_function_generator.cc  |  71 --------
 paddle/fluid/pybind/op_function_generator.h   |  71 ++++++++
 3 files changed, 72 insertions(+), 240 deletions(-)

diff --git a/paddle/fluid/pybind/eager_op_function_generator.cc b/paddle/fluid/pybind/eager_op_function_generator.cc
index 4940705682b8e..46d0bdcb46de7 100644
--- a/paddle/fluid/pybind/eager_op_function_generator.cc
+++ b/paddle/fluid/pybind/eager_op_function_generator.cc
@@ -30,179 +30,11 @@
 #ifdef PADDLE_WITH_ASCEND_CL
 #include "paddle/fluid/framework/fleet/ascend_wrapper.h"
 #endif
+#include "paddle/fluid/pybind/op_function_generator.h"
 
 std::set<std::string> gen_list = {"elementwise_add", "reduce_sum", "matmul_v2",
                                   "sigmoid"};
 
-// NOTE(zhiqiu): Commonly, the inputs in auto-generated OP function are
-// determined by the OP`s proto automatically, i.e., all the inputs registered
-// in OpMaker.
-// However, some OPs have dispensable inputs, which means the input can
-// be none for some conditions. It is discovered that most dispensable inputs
-// is not used in imperative mode, so we drop those inputs when generating OP
-// functions. While, for very few OPs, the dispensable inputs are used, we
-// need to manually specify them in this map.
-std::map<std::string, std::set<std::string>> op_ins_map = {
-    {"layer_norm", {"X", "Scale", "Bias"}},
-    {"fused_attention",
-     {"X", "LnScale", "LnBias", "QKVW", "QKVBias", "SrcMask", "OutLinearW",
-      "OutLinearBias", "Ln2Scale", "Ln2Bias"}},
-    {"instance_norm", {"X", "Scale", "Bias"}},
-    {"gru_unit", {"Input", "HiddenPrev", "Weight", "Bias"}},
-    {"label_smooth", {"X", "PriorDist"}},
-    {"assign", {"X"}},
-    {"reshape2", {"X", "Shape"}},
-    {"expand", {"X", "ExpandTimes"}},
-    {"slice", {"Input", "StartsTensor", "EndsTensor"}},
-    {"fake_quantize_dequantize_moving_average_abs_max",
-     {"X", "InScale", "InAccum", "InState"}},
-    {"nll_loss", {"X", "Label", "Weight"}},
-    {"bilinear_tensor_product", {"X", "Y", "Weight", "Bias"}},
-    {"gather", {"X", "Index", "Axis"}},
-    {"roi_pool", {"X", "ROIs", "RoisNum"}},
-    {"roi_align", {"X", "ROIs", "RoisNum"}},
-    {"psroi_pool", {"X", "ROIs", "RoisNum"}},
-    {"collect_fpn_proposals",
-     {"MultiLevelRois", "MultiLevelScores", "MultiLevelRoIsNum"}},
-    {"distribute_fpn_proposals", {"FpnRois", "RoisNum"}},
-    {"warpctc", {"Logits", "Label", "LogitsLength", "LabelLength"}},
-    {"hierarchical_sigmoid",
-     {"X", "W", "Label", "PathTable", "PathCode", "Bias"}},
-    {"moving_average_abs_max_scale", {"X", "InAccum", "InState"}},
-    {"multiclass_nms3", {"BBoxes", "Scores", "RoisNum"}},
-    {"box_coder", {"PriorBox", "PriorBoxVar", "TargetBox"}},
-    {"momentum", {"Param", "Grad", "Velocity", "LearningRate", "MasterParam"}},
-    {"sparse_momentum", {"Param", "Grad", "Velocity", "Index", "LearningRate"}},
-    {"rnn", {"Input", "PreState", "WeightList", "SequenceLength"}},
-    {"run_program", {"X", "Params"}},
-    {"faster_tokenizer", {"Text", "Vocab", "TextPair"}},
-    {"matrix_rank", {"X", "TolTensor"}},
-    {"adam",
-     {"Param", "Grad", "LearningRate", "Moment1", "Moment2", "Beta1Pow",
-      "Beta2Pow", "MasterParam"}},
-    {"adamw",
-     {"Param", "Grad", "LearningRate", "Moment1", "Moment2", "Beta1Pow",
-      "Beta2Pow", "MasterParam"}},
-};
-
-// NOTE(zhiqiu): Like op_ins_map.
-// Commonly, the outputs in auto-generated OP function are determined by the
-// OP`s proto automatically, i.e., all the outputs registered in OpMaker.
-// However, some OPs have dispensable outputs, which means the output can
-// be none for some conditions. It is discovered that most dispensable outputs
-// is not used in imperative mode, so we drop those outputs when generating OP
-// functions. While, for very few OPs, the dispensable outputs are used, we
-// need to manually specify them in this map.
-std::map<std::string, std::set<std::string>> op_outs_map = {
-    {"fake_quantize_dequantize_moving_average_abs_max",
-     {"Out", "OutScale", "OutAccum", "OutState"}},
-    {"batch_norm",
-     {"Y", "MeanOut", "VarianceOut", "SavedMean", "SavedVariance",
-      "ReserveSpace"}},
-    {"fused_attention",
-     {"LnMean", "LnVariance", "LnOut", "QKVOut", "QKVBiasOut", "TransposeOut2",
-      "QKOut", "QKTVOut", "SoftmaxOut", "AttnDropoutMaskOut", "AttnDropoutOut",
-      "SrcMaskOut", "FMHAOut", "OutLinearOut", "DropoutMaskOut", "Ln2Mean",
-      "Ln2Variance", "BiasDropoutResidualOut", "Y"}},
-    {"sync_batch_norm",
-     {"Y", "MeanOut", "VarianceOut", "SavedMean", "SavedVariance",
-      "ReserveSpace"}},
-    {"unique", {"Out", "Index", "Indices", "Counts"}},
-    {"unique_consecutive", {"Out", "Index", "Counts"}},
-    {"generate_proposals", {"RpnRois", "RpnRoiProbs", "RpnRoisNum"}},
-    {"collect_fpn_proposals", {"FpnRois", "RoisNum"}},
-    {"matrix_nms", {"Out", "Index", "RoisNum"}},
-    {"distribute_fpn_proposals",
-     {"MultiFpnRois", "RestoreIndex", "MultiLevelRoIsNum"}},
-    {"moving_average_abs_max_scale",
-     {"Out", "OutScale", "OutAccum", "OutState"}},
-    {"multiclass_nms3", {"Out", "NmsRoisNum"}},
-    {"generate_proposals_v2", {"RpnRois", "RpnRoiProbs", "RpnRoisNum"}},
-    {"momentum", {"ParamOut", "VelocityOut", "MasterParamOut"}},
-    {"sparse_momentum", {"ParamOut", "VelocityOut"}},
-    {"rnn", {"DropoutState", "Reserve", "Out", "State"}},
-    {"lamb",
-     {"ParamOut", "Moment1Out", "Moment2Out", "Beta1PowOut", "Beta2PowOut"}},
-    {"run_program", {"DOut"}},
-    {"adam",
-     {"ParamOut", "Moment1Out", "Moment2Out", "Beta1PowOut", "Beta2PowOut",
-      "MasterParamOut"}},
-    {"adamw",
-     {"ParamOut", "Moment1Out", "Moment2Out", "Beta1PowOut", "Beta2PowOut",
-      "MasterParamOut"}},
-};
-
-// NOTE(zhiqiu): Commonly, the outputs in auto-generated OP function are
-// generated in C++ automatically.
-// However, some OPs need to pass the outputs from Python instead of generating
-// them in C++. There are mainly 2 reasons for that,
-// (1) Optimizer OPs need to update the input param in-place, like sgd.
-//     So they need to pass the output which is same as input param.
-// (2) Very few python APIs has out in their arguments, like fill_constant.
-//     So they need to pass the python output to C++.
-//     Actually, this is not a good design, since it may break the SSA graph,
-//     especially in declarative mode.
-// For those OPs, we need to manually specify the outs need to pass in this map.
-std::map<std::string, std::set<std::string>> op_passing_outs_map = {
-    {"sgd", {"ParamOut"}},
-    {"adam",
-     {"ParamOut", "Moment1Out", "Moment2Out", "Beta1PowOut", "Beta2PowOut",
-      "MasterParamOut"}},
-    {"adamw",
-     {"ParamOut", "Moment1Out", "Moment2Out", "Beta1PowOut", "Beta2PowOut",
-      "MasterParamOut"}},
-    {"average_accumulates",
-     {"out_sum_1", "out_sum_2", "out_sum_3", "out_num_accumulates",
-      "out_old_num_accumulates", "out_num_updates"}},
-    {"momentum", {"ParamOut", "VelocityOut", "MasterParamOut"}},
-    {"sparse_momentum", {"ParamOut", "VelocityOut"}},
-    {"batch_norm", {"MeanOut", "VarianceOut"}},
-    {"sync_batch_norm", {"MeanOut", "VarianceOut"}},
-    {"accuracy", {"Correct", "Total"}},
-    {"fill_constant", {"Out"}},
-    {"recv_v2", {"Out"}},
-    {"partial_recv", {"Out"}},
-    {"matmul", {"Out"}},
-    {"c_broadcast", {"Out"}},
-    {"c_sync_calc_stream", {"Out"}},
-    {"c_sync_comm_stream", {"Out"}},
-    {"c_reduce_sum", {"Out"}},
-    {"c_reduce_max", {"Out"}},
-    {"c_reduce_min", {"Out"}},
-    {"c_reduce_prod", {"Out"}},
-    {"c_reduce", {"Out"}},
-    {"c_scatter", {"Out"}},
-    {"barrier", {"Out"}},
-    {"fake_quantize_dequantize_moving_average_abs_max",
-     {"Out", "OutScale", "OutAccum", "OutState"}},
-    {"fake_quantize_dequantize_abs_max", {"Out", "OutScale"}},
-    {"fake_channel_wise_quantize_dequantize_abs_max", {"Out", "OutScale"}},
-    {"check_finite_and_unscale", {"Out", "FoundInfinite"}},
-    {"update_loss_scaling",
-     {"Out", "LossScaling", "OutGoodSteps", "OutBadSteps"}},
-    {"moving_average_abs_max_scale",
-     {"Out", "OutScale", "OutAccum", "OutState"}},
-    {"lamb",
-     {"ParamOut", "Moment1Out", "Moment2Out", "Beta1PowOut", "Beta2PowOut"}},
-    {"rnn", {"DropoutState"}},
-    {"run_program", {"Out", "DOut", "OutScope"}},
-    {"clear_float_status", {"FloatStatusOut"}},
-    {"get_float_status", {"FloatStatusOut"}},
-};
-
-// NOTE(pangyoki): Tensor View Strategy.
-// In this case, a new output varbase will be created, and this varbase will
-// reuse the input varbase's allocation.
-// It's a map. The key of outer map is the view op name, the value is
-// a pair which implies the mapping relationship between the input and
-// output varbase.
-std::map<std::string, std::pair<std::string, std::string>> view_op_map = {
-    {"squeeze2", {"X", "Out"}},  // "X" -> "Out"
-    {"unsqueeze2", {"X", "Out"}},
-    {"reshape2", {"X", "Out"}},
-    {"flatten_contiguous_range", {"X", "Out"}},
-};
-
 // clang-format off
 const char* OUT_INITIALIZER_TEMPLATE =
     R"({"%s", {std::shared_ptr<imperative::VarBase>(new imperative::VarBase("auto_"+std::to_string(VarBaseUniqueNameID++)+"_"))}})";
diff --git a/paddle/fluid/pybind/op_function_generator.cc b/paddle/fluid/pybind/op_function_generator.cc
index 749782f2413e5..5587952facc53 100644
--- a/paddle/fluid/pybind/op_function_generator.cc
+++ b/paddle/fluid/pybind/op_function_generator.cc
@@ -32,77 +32,6 @@
 #include "paddle/fluid/framework/fleet/ascend_wrapper.h"
 #endif
 
-// NOTE(zhiqiu): Commonly, the outputs in auto-generated OP function are
-// generated in C++ automatically.
-// However, some OPs need to pass the outputs from Python instead of generating
-// them in C++. There are mainly 2 reasons for that,
-// (1) Optimizer OPs need to update the input param in-place, like sgd.
-//     So they need to pass the output which is same as input param.
-// (2) Very few python APIs has out in their arguments, like fill_constant.
-//     So they need to pass the python output to C++.
-//     Actually, this is not a good design, since it may break the SSA graph,
-//     especially in declarative mode.
-// For those OPs, we need to manually specify the outs need to pass in this map.
-std::map<std::string, std::set<std::string>> op_passing_outs_map = {
-    {"sgd", {"ParamOut"}},
-    {"adam",
-     {"ParamOut", "Moment1Out", "Moment2Out", "Beta1PowOut", "Beta2PowOut",
-      "MasterParamOut"}},
-    {"adamw",
-     {"ParamOut", "Moment1Out", "Moment2Out", "Beta1PowOut", "Beta2PowOut",
-      "MasterParamOut"}},
-    {"average_accumulates",
-     {"out_sum_1", "out_sum_2", "out_sum_3", "out_num_accumulates",
-      "out_old_num_accumulates", "out_num_updates"}},
-    {"momentum", {"ParamOut", "VelocityOut", "MasterParamOut"}},
-    {"sparse_momentum", {"ParamOut", "VelocityOut"}},
-    {"batch_norm", {"MeanOut", "VarianceOut"}},
-    {"sync_batch_norm", {"MeanOut", "VarianceOut"}},
-    {"accuracy", {"Correct", "Total"}},
-    {"fill_constant", {"Out"}},
-    {"recv_v2", {"Out"}},
-    {"partial_recv", {"Out"}},
-    {"matmul", {"Out"}},
-    {"c_broadcast", {"Out"}},
-    {"c_sync_calc_stream", {"Out"}},
-    {"c_sync_comm_stream", {"Out"}},
-    {"c_reduce_sum", {"Out"}},
-    {"c_reduce_max", {"Out"}},
-    {"c_reduce_min", {"Out"}},
-    {"c_reduce_prod", {"Out"}},
-    {"c_reduce", {"Out"}},
-    {"c_scatter", {"Out"}},
-    {"barrier", {"Out"}},
-    {"fake_quantize_dequantize_moving_average_abs_max",
-     {"Out", "OutScale", "OutAccum", "OutState"}},
-    {"fake_quantize_dequantize_abs_max", {"Out", "OutScale"}},
-    {"fake_channel_wise_quantize_dequantize_abs_max", {"Out", "OutScale"}},
-    {"check_finite_and_unscale", {"Out", "FoundInfinite"}},
-    {"update_loss_scaling",
-     {"Out", "LossScaling", "OutGoodSteps", "OutBadSteps"}},
-    {"moving_average_abs_max_scale",
-     {"Out", "OutScale", "OutAccum", "OutState"}},
-    {"lamb",
-     {"ParamOut", "Moment1Out", "Moment2Out", "Beta1PowOut", "Beta2PowOut"}},
-    {"rnn", {"DropoutState"}},
-    {"run_program", {"Out", "DOut", "OutScope"}},
-    {"clear_float_status", {"FloatStatusOut"}},
-    {"get_float_status", {"FloatStatusOut"}},
-};
-
-// NOTE(pangyoki): Tensor View Strategy.
-// In this case, a new output varbase will be created, and this varbase will
-// reuse the input varbase's allocation.
-// It's a map. The key of outer map is the view op name, the value is
-// a pair which implies the mapping relationship between the input and
-// output varbase.
-std::map<std::string, std::pair<std::string, std::string>> view_op_map = {
-    {"squeeze2", {"X", "Out"}},  // "X" -> "Out"
-    {"unsqueeze2", {"X", "Out"}},
-    {"reshape2", {"X", "Out"}},
-    {"flatten_contiguous_range", {"X", "Out"}},
-};
-
 // NOTE(pangyoki): Inplace OP with duplicable input.
 // The set includes inplace ops that have duplicable input.
 // The first Varbase in input needs to be specified for the inplace strategy
diff --git a/paddle/fluid/pybind/op_function_generator.h b/paddle/fluid/pybind/op_function_generator.h
index ad7fa780976d7..7000097e0abcb 100644
--- a/paddle/fluid/pybind/op_function_generator.h
+++ b/paddle/fluid/pybind/op_function_generator.h
@@ -119,3 +119,74 @@ std::map<std::string, std::set<std::string>> op_outs_map = {
      {"ParamOut", "Moment1Out", "Moment2Out", "Beta1PowOut", "Beta2PowOut",
       "MasterParamOut"}},
 };
+
+// NOTE(zhiqiu): Commonly, the outputs in auto-generated OP function are
+// generated in C++ automatically.
+// However, some OPs need to pass the outputs from Python instead of generating
+// them in C++. There are mainly 2 reasons for that,
+// (1) Optimizer OPs need to update the input param in-place, like sgd.
+//     So they need to pass the output which is same as input param.
+// (2) Very few python APIs has out in their arguments, like fill_constant.
+//     So they need to pass the python output to C++.
+//     Actually, this is not a good design, since it may break the SSA graph,
+//     especially in declarative mode.
+// For those OPs, we need to manually specify the outs need to pass in this map.
+std::map<std::string, std::set<std::string>> op_passing_outs_map = {
+    {"sgd", {"ParamOut"}},
+    {"adam",
+     {"ParamOut", "Moment1Out", "Moment2Out", "Beta1PowOut", "Beta2PowOut",
+      "MasterParamOut"}},
+    {"adamw",
+     {"ParamOut", "Moment1Out", "Moment2Out", "Beta1PowOut", "Beta2PowOut",
+      "MasterParamOut"}},
+    {"average_accumulates",
+     {"out_sum_1", "out_sum_2", "out_sum_3", "out_num_accumulates",
+      "out_old_num_accumulates", "out_num_updates"}},
+    {"momentum", {"ParamOut", "VelocityOut", "MasterParamOut"}},
+    {"sparse_momentum", {"ParamOut", "VelocityOut"}},
+    {"batch_norm", {"MeanOut", "VarianceOut"}},
+    {"sync_batch_norm", {"MeanOut", "VarianceOut"}},
+    {"accuracy", {"Correct", "Total"}},
+    {"fill_constant", {"Out"}},
+    {"recv_v2", {"Out"}},
+    {"partial_recv", {"Out"}},
+    {"matmul", {"Out"}},
+    {"c_broadcast", {"Out"}},
+    {"c_sync_calc_stream", {"Out"}},
+    {"c_sync_comm_stream", {"Out"}},
+    {"c_reduce_sum", {"Out"}},
+    {"c_reduce_max", {"Out"}},
+    {"c_reduce_min", {"Out"}},
+    {"c_reduce_prod", {"Out"}},
+    {"c_reduce", {"Out"}},
+    {"c_scatter", {"Out"}},
+    {"barrier", {"Out"}},
+    {"fake_quantize_dequantize_moving_average_abs_max",
+     {"Out", "OutScale", "OutAccum", "OutState"}},
+    {"fake_quantize_dequantize_abs_max", {"Out", "OutScale"}},
+    {"fake_channel_wise_quantize_dequantize_abs_max", {"Out", "OutScale"}},
+    {"check_finite_and_unscale", {"Out", "FoundInfinite"}},
+    {"update_loss_scaling",
+     {"Out", "LossScaling", "OutGoodSteps", "OutBadSteps"}},
+    {"moving_average_abs_max_scale",
+     {"Out", "OutScale", "OutAccum", "OutState"}},
+    {"lamb",
+     {"ParamOut", "Moment1Out", "Moment2Out", "Beta1PowOut", "Beta2PowOut"}},
+    {"rnn", {"DropoutState"}},
+    {"run_program", {"Out", "DOut", "OutScope"}},
+    {"clear_float_status", {"FloatStatusOut"}},
+    {"get_float_status", {"FloatStatusOut"}},
+};
+
+// NOTE(pangyoki): Tensor View Strategy.
+// In this case, a new output varbase will be created, and this varbase will
+// reuse the input varbase's allocation.
+// It's a map. The key of outer map is the view op name, the value is
+// a pair which implies the mapping relationship between the input and
+// output varbase.
+std::map<std::string, std::pair<std::string, std::string>> view_op_map = {
+    {"squeeze2", {"X", "Out"}},  // "X" -> "Out"
+    {"unsqueeze2", {"X", "Out"}},
+    {"reshape2", {"X", "Out"}},
+    {"flatten_contiguous_range", {"X", "Out"}},
+};