eager_local_interpreter_with_infer_cache (#8619)

* ThreadLocalGuard * refactor EagerBlobObjectList * op_args_reserved_size * remove useless comments * rename one::EagerBlobObjectList* to vm::EagerBlobObject* * refactor signature of InstructionsBuiler::Call * PhysicalRun * refactor InstructionsBuilder::Call * remove unused StatefulOpKernel::need_check_mem_case * remove EagerLocalTensorImpl::is_shape_synced_ * eager_local_interpreter_with_infer_cache * remove useless code * reslove comments * refactor TensorMeta::TensorMeta(const TensorMeta) * use small vector * add kMaxNumDims * fix error include * fix split Symbol LocalTensorMeta error * refactor SoftSync * move SmallVector from common/container_util.h to framework/instructions_builder.cpp * mone ONEFLOW_EAGER_ENABLE_LOCAL_INFER_CACHE to eager.h * add blank line * reslove comments * minor fix * refine * explicit scalar initialization * fix static check error * auto format by CI * of_format * reslove comment * refine * refine * refine Co-authored-by: lixinqi <lixinqi0703106@163.com> Co-authored-by: Li Xinqi <lixinqi2010@gmail.com> Co-authored-by: oneflow-ci-bot <ci-bot@oneflow.org> Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
Oneflow-Inc · Jul 19, 2022 · f57b0a0 · f57b0a0
1 parent 6ccedd3
commit f57b0a0
Show file tree

Hide file tree

Showing 9 changed files with 441 additions and 113 deletions.
diff --git a/oneflow/core/common/constant.h b/oneflow/core/common/constant.h
@@ -24,6 +24,7 @@ static const int64_t kInvalidSessionId = -1;
 static const std::string kNoPassTag = "";
 static const std::string kMainOp = "main_op";
 static const int64_t kMaxSplitAxis = 6;
+constexpr size_t kMaxNumDims = 8;
 static const std::string kAsymmetricCodeErrorMsg =
     "Maybe executing different code in different ranks, please check if the code is branched and "
     "operates on the global tensor.";

diff --git a/oneflow/core/common/env_var/eager.h b/oneflow/core/common/env_var/eager.h
@@ -0,0 +1,28 @@
+/*
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+#ifndef ONEFLOW_CORE_COMMON_ENV_VAR_EAGER_H_
+#define ONEFLOW_CORE_COMMON_ENV_VAR_EAGER_H_
+
+#include "oneflow/core/common/env_var/env_var.h"
+
+namespace oneflow {
+
+// NOTE: use env variable 'ONEFLOW_EAGER_ENABLE_LOCAL_INFER_CACHE' indicate whether the
+// use infer cache in naive local op interpret.
+DEFINE_THREAD_LOCAL_ENV_BOOL(ONEFLOW_EAGER_ENABLE_LOCAL_INFER_CACHE, true);
+
+}  // namespace oneflow
+#endif  // ONEFLOW_CORE_COMMON_ENV_VAR_EAGER_H_
diff --git a/oneflow/core/common/stride.cpp b/oneflow/core/common/stride.cpp
@@ -15,6 +15,7 @@ limitations under the License.
 */
 
 #include "oneflow/core/common/stride.h"
+#include "oneflow/core/common/constant.h"
 #include "oneflow/core/common/protobuf.h"
 #include "oneflow/core/common/cplusplus_17.h"
 
@@ -29,7 +30,7 @@ Stride::Stride(const Shape& shape) {
                           std::multiplies<>{});
     } else if (ndim > 0 && shape.elem_cnt() == 0) {
       // 0-size shape
-      std::vector<int64_t> tmp_shape(ndim);
+      small_vector<int64_t, kMaxNumDims> tmp_shape(ndim);
       for (int64_t i = 0; i < ndim; ++i) { tmp_shape[i] = shape.At(i) > 0 ? shape.At(i) : 1; }
       std::exclusive_scan(tmp_shape.rbegin(), tmp_shape.rend(), rbegin(), (int64_t)1,
                           std::multiplies<>{});

diff --git a/oneflow/core/framework/local_tensor_infer_cache.cpp b/oneflow/core/framework/local_tensor_infer_cache.cpp
@@ -0,0 +1,209 @@
+/*
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+#include "oneflow/core/framework/local_tensor_infer_cache.h"
+#include "oneflow/core/framework/tensor_tuple.h"
+#include "oneflow/core/framework/tensor.h"
+#include "oneflow/core/operator/operator.h"
+#include "oneflow/core/framework/op_expr.h"
+#include "oneflow/core/common/container_util.h"
+#include "oneflow/core/common/env_var/eager.h"
+#include "oneflow/core/framework/infer_util.h"
+
+namespace oneflow {
+namespace one {
+
+namespace {
+
+Maybe<void> CheckIsDeviceSupportedByOp(const Device& device, const std::string& op_type_name) {
+  if (IsCpuOnly(op_type_name)) { CHECK_EQ_OR_RETURN(device.type(), "cpu"); }  // NOLINT
+  return Maybe<void>::Ok();
+}
+
+Maybe<void> CheckInputDeviceIdentical(const LocalTensorMetaInferArgs& infer_args,
+                                      Symbol<Device> default_device) {
+  for (int i = 0; i < infer_args.input_local_tensor_metas().size(); ++i) {
+    CHECK_OR_RETURN(default_device
+                    == JUST(VectorAt(infer_args.input_local_tensor_metas(), i))->device())
+        << Error::RuntimeError()
+        << "Expected all tensors to be on the same device, but found "
+           "at least two devices, "
+        << default_device->ToString() << " (positional 0) and "
+        << JUST(VectorAt(infer_args.input_local_tensor_metas(), i))->device()->ToString()
+        << " (positional " << i << ")!";
+  }
+  return Maybe<void>::Ok();
+}
+
+class UserOpExprDeviceAndStreamInferContext final : public user_op::DeviceAndStreamInferContext {
+ public:
+  UserOpExprDeviceAndStreamInferContext(const UserOpExpr* user_op_expr,
+                                        const LocalTensorMetaInferArgs& infer_args,
+                                        OpArgsVector<LocalTensorMeta>* output_tensor_metas)
+      : user_op_expr_(user_op_expr),
+        composed_attrs_(infer_args.attrs(), user_op_expr->base_attrs()),
+        infer_args_(infer_args),
+        output_tensor_metas_(output_tensor_metas) {}
+
+  const std::vector<std::pair<std::string, int32_t>>& inputs() const override {
+    return user_op_expr_->indexed_input_pairs();
+  }
+
+  const std::vector<std::pair<std::string, int32_t>>& outputs() const override {
+    return user_op_expr_->indexed_output_pairs();
+  }
+
+  Symbol<Device>* OutputTensorDevice4ArgNameAndIndex(const std::string& name,
+                                                     int64_t index) override {
+    const auto& arg_tuple = *user_op_expr_->output_arg_tuple();
+    int32_t tuple_index = arg_tuple.TensorTupleIndex4ArgNameAndIndex(name, index);
+    CHECK_GE(tuple_index, 0);
+    CHECK_LT(tuple_index, user_op_expr_->output_size());
+    return output_tensor_metas_->at(tuple_index).mut_device();
+  }
+
+  Symbol<Device> InputTensorDevice4ArgNameAndIndex(const std::string& name,
+                                                   int64_t index) const override {
+    const auto& arg_tuple = *user_op_expr_->input_arg_tuple();
+    int32_t tuple_index = arg_tuple.TensorTupleIndex4ArgNameAndIndex(name, index);
+    CHECK_GE(tuple_index, 0);
+    CHECK_LT(tuple_index, user_op_expr_->input_size());
+    return infer_args_.input_local_tensor_metas().at(tuple_index)->device();
+  }
+
+ private:
+  const std::shared_ptr<const user_op::AttrVal>& Attr4Name(
+      const std::string& attr_name) const override {
+    return composed_attrs_.Attr4Name(attr_name);
+  }
+  const UserOpExpr* user_op_expr_;
+  const ComposedAttrMap composed_attrs_;
+  const LocalTensorMetaInferArgs& infer_args_;
+  OpArgsVector<LocalTensorMeta>* output_tensor_metas_;
+};
+
+Maybe<Symbol<Stream>> InferDeviceAndStream(const UserOpExpr& user_op_expr,
+                                           const Symbol<Device>& default_device,
+                                           const LocalTensorMetaInferArgs& infer_args,
+                                           OpArgsVector<LocalTensorMeta>* output_tensor_metas) {
+  Symbol<Stream> stream;
+  if (!user_op_expr.has_device_and_stream_infer_fn()) {
+    stream = JUST(GetDefaultStreamByDevice(default_device));
+    for (int i = 0; i < user_op_expr.output_size(); i++) {
+      auto& tensor_meta = output_tensor_metas->at(i);
+      *tensor_meta.mut_device() = default_device;
+    }
+  } else {
+    if (!user_op_expr.device_and_stream_infer_fn()) {
+      Symbol<Device> device = infer_args.input_local_tensor_metas().at(0)->device();
+      stream = JUST(GetDefaultStreamByDevice(device));
+    } else {
+      UserOpExprDeviceAndStreamInferContext device_and_stream_ctx(&user_op_expr, infer_args,
+                                                                  output_tensor_metas);
+      stream = JUST(user_op_expr.device_and_stream_infer_fn()(&device_and_stream_ctx));
+    }
+  }
+  return stream;
+}
+
+}  // namespace
+
+size_t LocalTensorMetaInferArgs::hash_value() const {
+  size_t hash_value = std::hash<AttrMap>()(attrs_);
+  HashCombine(&hash_value, std::hash<Symbol<Device>>()(default_device_));
+  const auto& tensor_meta_hash_functor = std::hash<Symbol<LocalTensorMeta>>();
+  for (const auto& tensor_meta : input_local_tensor_metas_) {
+    HashCombine(&hash_value, tensor_meta_hash_functor(tensor_meta));
+  }
+  return hash_value;
+}
+
+bool LocalTensorMetaInferArgs::operator==(const LocalTensorMetaInferArgs& other) const {
+  return this->attrs_ == other.attrs_ && this->default_device_ == other.default_device_
+         && this->input_local_tensor_metas_ == other.input_local_tensor_metas_;
+}
+
+Maybe<void> LocalTensorMetaInferArgs::Init(const AttrMap& attrs, Symbol<Device> default_device,
+                                           const TensorTuple& input_tensors) {
+  this->attrs_ = attrs;
+  this->default_device_ = default_device;
+  this->input_local_tensor_metas_.resize(input_tensors.size());
+  JUST(this->InitInputLocalTensorMetas(input_tensors));
+  return Maybe<void>::Ok();
+}
+
+Maybe<void> LocalTensorMetaInferArgs::InitInputLocalTensorMetas(const TensorTuple& input_tensors) {
+  for (int i = 0; i < input_tensors.size(); ++i) {
+    LocalTensorMeta* local_tensor_meta =
+        dynamic_cast<LocalTensorMeta*>(input_tensors.at(i)->mut_tensor_meta());
+    CHECK_NOTNULL_OR_RETURN(local_tensor_meta);  // NOLINT
+    input_local_tensor_metas_.at(i) = SymbolOf(*local_tensor_meta);
+  }
+  return Maybe<void>::Ok();
+}
+
+/* static */ Maybe<const LocalTensorInferResult> LocalTensorInferCache::Infer(
+    const UserOpExpr& user_op_expr, const LocalTensorMetaInferArgs& infer_args) {
+  const auto& default_device = infer_args.default_device();
+  JUST(CheckInputDeviceIdentical(infer_args, default_device));
+  JUST(CheckIsDeviceSupportedByOp(*default_device, user_op_expr.op_type_name()));
+
+  auto result = std::make_unique<LocalTensorInferResult>(user_op_expr.output_size());
+
+  OpArgsVector<LocalTensorMeta> output_mut_metas(user_op_expr.output_size());
+  // Infer devices
+  Symbol<Stream> stream =
+      JUST(InferDeviceAndStream(user_op_expr, default_device, infer_args, &output_mut_metas));
+  result->set_stream(stream);
+
+  {
+    const auto& GetInputTensorMeta = [&](int32_t i) -> const TensorMeta* {
+      return infer_args.input_local_tensor_metas().at(i).shared_from_symbol().get();
+    };
+    JUST(user_op_expr.InferPhysicalTensorDesc(
+        infer_args.attrs(), stream->device()->type(), GetInputTensorMeta,
+        [&](int32_t i) -> TensorMeta* { return &output_mut_metas.at(i); }));
+  }
+
+  auto* mut_output_tensor_metas = result->mut_output_tensor_metas();
+  for (int32_t i = 0; i < user_op_expr.output_size(); ++i) {
+    if (!JUST(user_op_expr.SupportNonContiguous())) {
+      std::shared_ptr<Stride> stride(new Stride(output_mut_metas.at(i).shape()));
+      output_mut_metas.at(i).set_stride(stride);
+    }
+    mut_output_tensor_metas->at(i) = SymbolOf(output_mut_metas.at(i));
+  }
+  return std::shared_ptr<const LocalTensorInferResult>(std::move(result));
+}
+
+Maybe<const LocalTensorInferResult> LocalTensorInferCache::GetOrInfer(
+    const LocalTensorMetaInferArgs& infer_args) {
+  if (ThreadLocalEnvBool<ONEFLOW_EAGER_ENABLE_LOCAL_INFER_CACHE>()) {
+    auto iter = cache_.find(infer_args);
+    if (iter == cache_.end()) {
+      const auto& user_op_expr = user_op_expr_.lock();
+      CHECK_OR_RETURN(static_cast<bool>(user_op_expr));  // NOLINT
+      const auto& output_tensor_metas = JUST(Infer(*user_op_expr, infer_args));
+      iter = cache_.emplace(infer_args, output_tensor_metas).first;
+    }
+    return iter->second;
+  } else {
+    const auto& user_op_expr = user_op_expr_.lock();
+    return JUST(Infer(*user_op_expr, infer_args));
+  }
+}
+
+}  // namespace one
+}  // namespace oneflow
diff --git a/oneflow/core/framework/local_tensor_infer_cache.h b/oneflow/core/framework/local_tensor_infer_cache.h
@@ -0,0 +1,124 @@
+/*
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+#ifndef ONEFLOW_CORE_FRAMEWORK_LOCAL_TENSOR_INFER_CACHE_H_
+#define ONEFLOW_CORE_FRAMEWORK_LOCAL_TENSOR_INFER_CACHE_H_
+
+#include "oneflow/core/common/symbol.h"
+#include "oneflow/core/common/maybe.h"
+#include "oneflow/core/common/small_vector.h"
+#include "oneflow/core/common/op_args_reserved_size.h"
+#include "oneflow/core/framework/attr_map.h"
+#include "oneflow/core/framework/device.h"
+#include "oneflow/core/framework/stream.h"
+#include "oneflow/core/framework/tensor_meta.h"
+
+namespace oneflow {
+
+class Device;
+
+namespace one {
+
+template<typename T>
+using OpArgsVector = small_vector<T, kOpArgsReservedSize>;
+
+class TensorTuple;
+class UserOpExpr;
+
+class LocalTensorMetaInferArgs final {
+ public:
+  LocalTensorMetaInferArgs() = default;
+  LocalTensorMetaInferArgs(const LocalTensorMetaInferArgs&) = default;
+  LocalTensorMetaInferArgs(LocalTensorMetaInferArgs&&) = default;
+  ~LocalTensorMetaInferArgs() = default;
+
+  const OpArgsVector<Symbol<LocalTensorMeta>>& input_local_tensor_metas() const {
+    return input_local_tensor_metas_;
+  }
+  const AttrMap& attrs() const { return attrs_; }
+
+  const Symbol<Device>& default_device() const { return default_device_; }
+
+  size_t hash_value() const;
+
+  bool operator==(const LocalTensorMetaInferArgs& other) const;
+
+  Maybe<void> Init(const AttrMap& attrs, Symbol<Device> default_device,
+                   const TensorTuple& input_tensors);
+
+ private:
+  Maybe<void> InitInputLocalTensorMetas(const TensorTuple& input_tensors);
+
+  AttrMap attrs_;
+  Symbol<Device> default_device_;
+  OpArgsVector<Symbol<LocalTensorMeta>> input_local_tensor_metas_;
+};
+
+}  // namespace one
+}  // namespace oneflow
+
+namespace std {
+
+template<>
+struct hash<oneflow::one::LocalTensorMetaInferArgs> final {
+  size_t operator()(const oneflow::one::LocalTensorMetaInferArgs& val) const {
+    return val.hash_value();
+  }
+};
+
+}  // namespace std
+
+namespace oneflow {
+namespace one {
+
+class LocalTensorInferResult final {
+ public:
+  LocalTensorInferResult(size_t output_size) : output_tensor_metas_(output_size) {}
+  LocalTensorInferResult(const LocalTensorInferResult&) = delete;
+  LocalTensorInferResult(LocalTensorInferResult&&) = delete;
+  ~LocalTensorInferResult() = default;
+
+  const OpArgsVector<Symbol<LocalTensorMeta>>& output_tensor_metas() const {
+    return output_tensor_metas_;
+  }
+  OpArgsVector<Symbol<LocalTensorMeta>>* mut_output_tensor_metas() { return &output_tensor_metas_; }
+
+  const Symbol<Stream>& stream() const { return stream_; }
+  void set_stream(const Symbol<Stream>& stream) { stream_ = stream; }
+
+ private:
+  OpArgsVector<Symbol<LocalTensorMeta>> output_tensor_metas_;
+  Symbol<Stream> stream_;
+};
+
+class LocalTensorInferCache final {
+ public:
+  LocalTensorInferCache(const std::shared_ptr<const UserOpExpr>& user_op_expr)
+      : user_op_expr_(user_op_expr) {}
+
+  Maybe<const LocalTensorInferResult> GetOrInfer(const LocalTensorMetaInferArgs& infer_args);
+
+ private:
+  static Maybe<const LocalTensorInferResult> Infer(const UserOpExpr& user_op_expr,
+                                                   const LocalTensorMetaInferArgs& infer_args);
+
+  std::weak_ptr<const UserOpExpr> user_op_expr_;
+  HashMap<LocalTensorMetaInferArgs, std::shared_ptr<const LocalTensorInferResult>> cache_;
+};
+
+}  // namespace one
+}  // namespace oneflow
+
+#endif  // ONEFLOW_CORE_FRAMEWORK_LOCAL_TENSOR_INFER_CACHE_H_