Skip to content

Commit

Permalink
Symbolic local tensor meta (#8662)
Browse files Browse the repository at this point in the history
* ThreadLocalGuard

* refactor EagerBlobObjectList

* op_args_reserved_size

* remove useless comments

* rename one::EagerBlobObjectList* to vm::EagerBlobObject*

* refactor signature of InstructionsBuiler::Call

* PhysicalRun

* refactor InstructionsBuilder::Call

* remove unused StatefulOpKernel::need_check_mem_case

* remove EagerLocalTensorImpl::is_shape_synced_

* eager_local_interpreter_with_infer_cache

* remove useless code

* reslove comments

* refactor TensorMeta::TensorMeta(const TensorMeta)

* use small vector

* Symbolic LocalTensorMeta

* check shape in critical_sectio

* add kMaxNumDims

* fix error include

* fix split Symbol LocalTensorMeta error

* fix split cache and symbolic local tensor meta error

* refactor SoftSync

* move SmallVector from common/container_util.h to framework/instructions_builder.cpp

* mone ONEFLOW_EAGER_ENABLE_LOCAL_INFER_CACHE to eager.h

* add blank line

* reslove comments

* minor fix

* refine

* explicit scalar initialization

* fix static check error

* auto format by CI

* of_format

* reslove comment

* refine

* refine

* refine

* fix error

* define MutOutputShape and MutOutputStride in InferContext

* define_mut_output_shape_and_mut_output_stride_in_infer_ctx

* fix merge master error

* fix typo

* fix static check error

* define_mut_output_dtype_and_mut_output_is_dynamic_in_infer_ctx

* define_mut_output_dtype_and_mut_output_tensor_desc

* replce const DataType& with DataType

* split const and mut func in LocalTensorMeta

* replace const DataType& with DataType ret

* split TensorDesc4ArgNameAndIndex and MutTensorDesc4ArgNameAndIndex

* refine

* minor fix

* fix merge error

* fix warning error

* refine

* fix static check error

* Update op_expr.cpp

* Update op_expr.cpp

* split MutTensorMeta and MutLocalTensorMeta

* Update stateful_opkernel.cpp

* refine

* fix static check error

* refine

* refine

* reslove comment

* refine

* fix typo

Co-authored-by: Houjiang Chen <chenhoujiangcug@gmail.com>

* fxi typo

* use OpArgsVector

Co-authored-by: lixinqi <lixinqi0703106@163.com>
Co-authored-by: Li Xinqi <lixinqi2010@gmail.com>
Co-authored-by: oneflow-ci-bot <ci-bot@oneflow.org>
Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
Co-authored-by: Houjiang Chen <chenhoujiangcug@gmail.com>
  • Loading branch information
6 people authored Jul 25, 2022
1 parent 60827b0 commit 146288e
Show file tree
Hide file tree
Showing 33 changed files with 475 additions and 190 deletions.
6 changes: 3 additions & 3 deletions oneflow/api/python/functional/tensor_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ class LocalTensorSharedNumpyDataFunctor {
}
stride_val /= element_size_in_bytes;
}
auto tensor_meta = std::make_shared<LocalTensorMeta>(shape, strides, data_type, device, 0);
auto tensor_meta = SymbolOf(LocalTensorMeta(shape, strides, data_type, device, 0));

// Build TensorBuffer
const auto& Free = [array](char* dptr) {
Expand All @@ -286,12 +286,12 @@ class LocalTensorSharedNumpyDataFunctor {
auto tensor_storage = std::make_shared<TensorStorage>(tensor_data);

// Build Tensor
auto tensor_impl = std::make_shared<EagerLocalTensorImpl>(tensor_meta, tensor_storage,
auto tensor_impl = std::make_shared<EagerLocalTensorImpl>(tensor_storage,
/*requires_grad=*/false,
/*ls_leaf=*/true);

// Init blob
JUST(tensor_impl->InitEagerBlobObject(NewLocalDepObject()));
JUST(tensor_impl->InitEagerBlobObject(tensor_meta, NewLocalDepObject()));
const auto& stream = JUST(GetDefaultStreamByDevice(device));
const auto& eager_blob_object = JUST(tensor_impl->eager_blob_object());
JUST(eager_blob_object->init_producer_stream(stream));
Expand Down
29 changes: 29 additions & 0 deletions oneflow/core/common/op_args_vector.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef ONEFLOW_CORE_COMMON_OP_ARGS_VECTOR_H_
#define ONEFLOW_CORE_COMMON_OP_ARGS_VECTOR_H_

#include "oneflow/core/common/small_vector.h"
#include "oneflow/core/common/op_args_reserved_size.h"

namespace oneflow {

template<typename T>
using OpArgsVector = small_vector<T, kOpArgsReservedSize>;

}

#endif // ONEFLOW_CORE_COMMON_OP_ARGS_VECTOR_H_
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/tensor_desc.h"
#include "oneflow/core/common/tensor_desc.h"
#include "oneflow/core/register/blob_desc.pb.h"

namespace oneflow {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,18 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef ONEFLOW_CORE_FRAMEWORK_TENSOR_DESC_H_
#define ONEFLOW_CORE_FRAMEWORK_TENSOR_DESC_H_
#ifndef ONEFLOW_CORE_COMMON_TENSOR_DESC_H_
#define ONEFLOW_CORE_COMMON_TENSOR_DESC_H_

#include "oneflow/core/common/util.h"
#include "oneflow/core/register/blob_desc.pb.h"
#include "oneflow/core/common/shape.h"
#include "oneflow/core/common/stride.h"
#include "oneflow/core/common/data_type.pb.h"

namespace oneflow {

class BlobDescProto;

namespace user_op {

class TensorDesc {
Expand Down Expand Up @@ -77,4 +79,4 @@ class NaiveTensorDesc final : public TensorDesc {

} // namespace oneflow

#endif // ONEFLOW_CORE_FRAMEWORK_TENSOR_DESC_H_
#endif // ONEFLOW_CORE_COMMON_TENSOR_DESC_H_
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,36 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/tensor_meta.h"
#include "oneflow/core/common/tensor_meta.h"
#include "oneflow/core/common/stride.h"
#include "oneflow/core/framework/device.h"

namespace oneflow {
namespace one {

MutTensorMeta::MutTensorMeta()
: TensorMeta(std::make_shared<const Shape>(), std::make_shared<const Stride>(),
kInvalidDataType) {}

MutTensorMeta::MutTensorMeta(const std::shared_ptr<const Shape>& shape, DataType dtype)
: TensorMeta(shape, std::make_shared<const Stride>(*shape), dtype) {}

MutTensorMeta::MutTensorMeta(const std::shared_ptr<const Shape>& shape,
const std::shared_ptr<const Stride>& stride, DataType dtype)
: TensorMeta(shape, stride, dtype) {}

bool MutTensorMeta::operator==(const MutTensorMeta& other) const {
// It's correct to ignore is_dynamic_ field.
return *this->shape_ptr() == *other.shape_ptr() && this->dtype() == other.dtype()
&& this->stride() == other.stride();
}

size_t MutTensorMeta::CalcHashValue() const {
// It's correct to ignore is_dynamic_ field.
return std::hash<Shape>()(*shape_ptr()) ^ std::hash<DataType>()(dtype())
^ std::hash<Stride>()(stride());
}

LocalTensorMeta::LocalTensorMeta()
: TensorMeta(std::make_shared<const Shape>(), std::make_shared<const Stride>(),
DataType::kInvalidDataType),
Expand Down Expand Up @@ -50,6 +73,36 @@ size_t LocalTensorMeta::CalcHashValue() const {
^ std::hash<Device>()(*device()) ^ std::hash<Stride>()(stride()) ^ storage_offset();
}

MutLocalTensorMeta::MutLocalTensorMeta()
: MutTensorMeta(std::make_shared<const Shape>(), std::make_shared<const Stride>(),
kInvalidDataType),
device_(Symbol<Device>()),
storage_offset_(0) {}

MutLocalTensorMeta::MutLocalTensorMeta(const std::shared_ptr<const Shape>& shape, DataType dtype,
Symbol<Device> device)
: MutTensorMeta(shape, std::make_shared<const Stride>(*shape), dtype),
device_(device),
storage_offset_(0) {}

MutLocalTensorMeta::MutLocalTensorMeta(const std::shared_ptr<const Shape>& shape,
const std::shared_ptr<const Stride>& stride, DataType dtype,
Symbol<Device> device, int64_t storage_offset)
: MutTensorMeta(shape, stride, dtype), device_(device), storage_offset_(storage_offset) {}

bool MutLocalTensorMeta::operator==(const MutLocalTensorMeta& other) const {
// It's correct to ignore is_dynamic_ field.
return *this->shape_ptr() == *other.shape_ptr() && this->dtype() == other.dtype()
&& *this->device() == *other.device() && this->stride() == other.stride()
&& this->storage_offset() == other.storage_offset();
}

size_t MutLocalTensorMeta::CalcHashValue() const {
// It's correct to ignore is_dynamic_ field.
return std::hash<Shape>()(*shape_ptr()) ^ std::hash<DataType>()(dtype())
^ std::hash<Device>()(*device()) ^ std::hash<Stride>()(stride()) ^ storage_offset();
}

bool GlobalTensorMeta::operator==(const GlobalTensorMeta& other) const {
// It's correct to ignore is_dynamic_ field.
return *this->shape_ptr() == *other.shape_ptr() && this->dtype() == other.dtype()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef ONEFLOW_FRAMEWORK_TENSOR_META_H_
#define ONEFLOW_FRAMEWORK_TENSOR_META_H_
#ifndef ONEFLOW_COMMON_TENSOR_META_H_
#define ONEFLOW_COMMON_TENSOR_META_H_

#include <memory>
#include "oneflow/core/framework/tensor_desc.h"
#include "oneflow/core/common/tensor_desc.h"
#include "oneflow/core/common/symbol.h"

namespace oneflow {
Expand Down Expand Up @@ -60,15 +60,23 @@ class TensorMeta : public user_op::TensorDesc {
bool is_dynamic() const override { return is_dynamic_; }
bool is_contiguous() const { return IsContiguous(shape(), *stride_); }

void set_shape(const std::shared_ptr<const Shape>& val) { shape_ = val; }
Shape* mut_shape() override { return const_cast<Shape*>(shape_.get()); }
void set_stride(const std::shared_ptr<const Stride>& val) { stride_ = val; }
Stride* mut_stride() override { return const_cast<Stride*>(stride_.get()); }
DataType* mut_dtype() { return &data_type_; }
void set_dtype(DataType data_type) { data_type_ = data_type; }
DataType* mut_data_type() override { return &data_type_; }
bool* mut_is_dynamic() override { return &is_dynamic_; }
void set_is_dynamic(bool val) override { is_dynamic_ = val; }
virtual Shape* mut_shape() override {
PRINT_BUG_PROMPT_AND_ABORT();
return nullptr;
}
virtual Stride* mut_stride() override {
PRINT_BUG_PROMPT_AND_ABORT();
return nullptr;
}
virtual DataType* mut_data_type() override {
PRINT_BUG_PROMPT_AND_ABORT();
return nullptr;
}
virtual bool* mut_is_dynamic() override {
PRINT_BUG_PROMPT_AND_ABORT();
return nullptr;
}
virtual void set_is_dynamic(bool val) override { PRINT_BUG_PROMPT_AND_ABORT(); }

protected:
TensorMeta& operator=(const TensorMeta& other) {
Expand All @@ -79,13 +87,39 @@ class TensorMeta : public user_op::TensorDesc {
return *this;
}

private:
std::shared_ptr<const Shape> shape_;
std::shared_ptr<const Stride> stride_;
DataType data_type_;
bool is_dynamic_;
};

class MutTensorMeta : public TensorMeta {
public:
// uninitialized MutTensorMeta.
MutTensorMeta();
MutTensorMeta(const MutTensorMeta&) = default;
MutTensorMeta(const std::shared_ptr<const Shape>& shape, DataType dtype);
MutTensorMeta(const std::shared_ptr<const Shape>& shape,
const std::shared_ptr<const Stride>& stride, DataType dtype);
virtual ~MutTensorMeta() = default;

Shape* mut_shape() override { return const_cast<Shape*>(shape_.get()); }
Stride* mut_stride() override { return const_cast<Stride*>(stride_.get()); }
DataType* mut_data_type() override { return &data_type_; }
bool* mut_is_dynamic() override { return &is_dynamic_; }
void set_is_dynamic(bool val) override { is_dynamic_ = val; }

void set_shape(const std::shared_ptr<const Shape>& val) { shape_ = val; }
void set_stride(const std::shared_ptr<const Stride>& val) { stride_ = val; }
DataType* mut_dtype() { return &data_type_; }
void set_dtype(DataType data_type) { data_type_ = data_type; }

bool operator==(const MutTensorMeta& other) const;
size_t CalcHashValue() const;

MutTensorMeta& operator=(const MutTensorMeta& other) = default;
};

class LocalTensorMeta : public TensorMeta {
public:
// uninitialized LocalTensorMeta.
Expand All @@ -100,13 +134,38 @@ class LocalTensorMeta : public TensorMeta {
const Symbol<Device>& device() const { return device_; }
int64_t storage_offset() const { return storage_offset_; }

bool operator==(const LocalTensorMeta& other) const;
size_t CalcHashValue() const;

LocalTensorMeta& operator=(const LocalTensorMeta& other) = default;

private:
Symbol<Device> device_;
int64_t storage_offset_;
};

class MutLocalTensorMeta : public MutTensorMeta {
public:
// uninitialized MutLocalTensorMeta.
MutLocalTensorMeta();
MutLocalTensorMeta(const MutLocalTensorMeta&) = default;
MutLocalTensorMeta(const std::shared_ptr<const Shape>& shape, DataType dtype,
Symbol<Device> device);
MutLocalTensorMeta(const std::shared_ptr<const Shape>& shape,
const std::shared_ptr<const Stride>& stride, DataType dtype,
Symbol<Device> device, int64_t storage_offset);
virtual ~MutLocalTensorMeta() = default;

const Symbol<Device>& device() const { return device_; }
int64_t storage_offset() const { return storage_offset_; }

Symbol<Device>* mut_device() { return &device_; }
void set_storage_offset(int64_t offset) { storage_offset_ = offset; }

bool operator==(const LocalTensorMeta& other) const;
bool operator==(const MutLocalTensorMeta& other) const;
size_t CalcHashValue() const;

LocalTensorMeta& operator=(const LocalTensorMeta& other) = default;
MutLocalTensorMeta& operator=(const MutLocalTensorMeta& other) = default;

private:
Symbol<Device> device_;
Expand All @@ -127,10 +186,6 @@ class GlobalTensorMeta : public TensorMeta {
Symbol<NdSbp> nd_sbp() const { return nd_sbp_; }
Symbol<ParallelDesc> parallel_desc() const { return parallel_desc_; }

void set_nd_sbp(Symbol<NdSbp> val) { nd_sbp_ = val; }

void set_parallel_desc(Symbol<ParallelDesc> val) { parallel_desc_ = val; }

size_t CalcHashValue() const;

private:
Expand Down Expand Up @@ -159,4 +214,4 @@ struct hash<oneflow::one::GlobalTensorMeta> final {

} // namespace std

#endif // ONEFLOW_FRAMEWORK_TENSOR_META_H_
#endif // ONEFLOW_COMMON_TENSOR_META_H_
5 changes: 2 additions & 3 deletions oneflow/core/eager/critical_section_phy_instr_operand.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,7 @@ void InputCriticalSectionBeginPhyInstrOperand::AccessBlobByOpName(uint64_t of_bl
{
size_t header_size = of_blob->mut_blob()->blob_desc().ByteSizeOfBlobHeader();
CHECK_EQ(header_size, eager_blob_object->shape().NumAxes() * sizeof(int64_t));
std::memcpy(of_blob->mut_blob()->mut_header_ptr(), eager_blob_object->mut_header_ptr(),
header_size);
CHECK_EQ(of_blob->blob().static_shape(), eager_blob_object->shape());
}
const auto& end_event_record = op_name2end_event_record_->at(op_name);
if (eager_blob_object->dptr() == nullptr) {
Expand All @@ -93,7 +92,7 @@ void OutputCriticalSectionBeginPhyInstrOperand::AccessBlobByOpName(uint64_t of_b
CHECK(interfaces_valid().at(i));
OfBlob* of_blob = reinterpret_cast<OfBlob*>(of_blob_ptr);
auto& eager_blob_object = eager_blob_objects_->at(i);
of_blob->blob().shape_view().ToShape(eager_blob_object->mut_shape());
CHECK_EQ(of_blob->blob().static_shape(), eager_blob_object->shape());
const auto& end_event_record = op_name2end_event_record_->at(op_name);
if (eager_blob_object->dptr() == nullptr) {
end_event_record->Init(std::make_shared<NaiveEventRecord>());
Expand Down
Loading

0 comments on commit 146288e

Please sign in to comment.