This repository has been archived by the owner on Jan 24, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 135
Bitmain Sophon Saber Ops Implementation #151
Merged
Merged
Changes from 47 commits
Commits
Show all changes
49 commits
Select commit
Hold shift + click to select a range
21b2379
Implement print_tensor_device for BM
guangzhixie b558318
Update BM tensor test
guangzhixie 99493a4
fix pooling api error
SophonTPU 1f02e14
Update pooling test
guangzhixie a1e8214
Skip context init for BM
guangzhixie b1b9f7c
remove flush action in print
guangzhixie 27517ca
ignore set_device for BM for now
guangzhixie 949c4c4
Update logs for copy_from
guangzhixie 51f0f2b
Initialize bm handle only in one place
guangzhixie 1fe4f19
chage tensor type_len
hlzy 683969c
Return correct size for AK_BM
guangzhixie adcac0e
Implement conv for BM
guangzhixie a4ed82e
Comment out last conv test for now
guangzhixie d4aa3eb
Modify sync_memcpy & add bm_mem_from_device
19b5ace
Update BM conv params
guangzhixie 81e33aa
Init handle in init function
guangzhixie 630cabc
Include BM conv implementation
guangzhixie e1c82c4
remove unecessary include
guangzhixie 6905020
empty create function
guangzhixie 59dba05
unit test for BM conv
guangzhixie c27573a
Update BM tensor print function
guangzhixie 679ae3f
modify activation op, test pass
SophonTPU c0edd55
Merge branch 'bitmain' of https://github.com/guangzhixie/Anakin into …
SophonTPU 1ab43e0
tensor_test
hlzy 80f57fb
Fix sync_memcpy functions & test_saber_buffer_BM all passes
a1bd3fd
Implement BM softmax
guangzhixie 7c0a0f0
only print in DEBUG
guangzhixie 635ff42
reduce iteration
guangzhixie dc155af
tensor_test_update
hlzy 69cf433
Merge branch 'bitmain' of https://github.com/guangzhixie/Anakin into …
hlzy 4a9863f
Revert "reduce iteration"
guangzhixie 4f08bea
Merge branch 'bitmain' of https://github.com/guangzhixie/Anakin into …
hlzy 2997faf
modify fc op, compile error
SophonTPU ff5039f
Update for BM softmax
guangzhixie ebb12b4
xRevert "modify fc op, compile error"
SophonTPU 9846cd9
Merge branch 'bitmain' of https://github.com/guangzhixie/Anakin into …
hlzy 56f6122
change tensor_test_bm
hlzy 048a61c
Merge branch 'bitmain' into tensor_test_lian
hlzy 571e3a4
tensor test update
hlzy 62a04c8
Add back missing files
guangzhixie bff601c
Add back missing files
guangzhixie 19413c5
Implement BM scale
guangzhixie 25fa481
pooling test
SophonTPU e532873
Merge branch 'bitmain' of https://github.com/guangzhixie/Anakin into …
SophonTPU 56271d4
Fix d2d mem copy
80654f2
Merge branch 'bitmain' of https://github.com/guangzhixie/Anakin into …
SophonTPU c5a30a7
Add batch norm operation
guangzhixie b5cdc73
Implement batch norm for BM
guangzhixie 5c6ec7f
Use template specifications instead of macro
guangzhixie File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,7 +19,7 @@ | |
#include "core/shape.h" | ||
#include "core/events.h" | ||
#include "core/tensor_traits.h" | ||
|
||
#include <typeinfo> | ||
namespace anakin{ | ||
|
||
namespace saber{ | ||
|
@@ -117,20 +117,49 @@ class Tensor : public TensorBase { | |
/** | ||
* \brief Constructor with allocated data ptr and entire memory shape. | ||
*/ | ||
template <typename TargetType_t> | ||
Tensor(Dtype* data_ptr, TargetType_t target, int id, Shape shape) { | ||
// template <typename TargetType_t> | ||
// Tensor(Dtype* data_ptr, TargetType_t target, int id, Shape shape) { | ||
// | ||
// CHECK_EQ(shape.dims(), TensorAPI::layout_dims::value) << \ | ||
// "shape dims is not matched to layout type"; | ||
// _shape = shape; | ||
// _valid_shape = shape; | ||
// _offset = Shape::zero(shape.dims()); | ||
// std::shared_ptr<Buffer<TargetType_t>> buf_from_date = \ | ||
// std::make_shared<Buffer<TargetType_t>>(data_ptr, shape.count() * _type_len(), id); | ||
// BufferMemShare(_buf, buf_from_date); | ||
// _is_subbuf = false; | ||
// } | ||
|
||
#ifdef USE_BM | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这样问题,要用模板特化。而不是在原始代码上加宏来控制。这样如果同时打开CMAKE里面的USE_CPU USE_BM可以保证执行正确嘛? |
||
/** | ||
* \brief Constructor with allocated data ptr and entire memory shape. only for BM | ||
*/ | ||
template <typename Dtype_s,typename TargetType_t> | ||
Tensor(Dtype_s* data_ptr, TargetType_t target, int id, Shape shape) { | ||
CHECK_EQ(shape.dims(), TensorAPI::layout_dims::value) << \ | ||
"shape dims is not matched to layout type"; | ||
_shape = shape; | ||
_valid_shape = shape; | ||
_offset = Shape::zero(shape.dims()); | ||
|
||
if(typeid(Dtype_s) == typeid(AK_FLOAT)) | ||
{ | ||
std::shared_ptr<Buffer<TargetType_t>> buf_from_date = \ | ||
std::make_shared<Buffer<TargetType_t>>(&bm_mem_from_system(const_cast<Dtype_s *>(data_ptr)), shape.count() * _type_len(), id); | ||
|
||
BufferMemShare(_buf, buf_from_date); | ||
} | ||
else | ||
{ | ||
std::shared_ptr<Buffer<TargetType_t>> buf_from_date = \ | ||
std::make_shared<Buffer<TargetType_t>>(data_ptr, shape.count() * _type_len(), id); | ||
|
||
BufferMemShare(_buf, buf_from_date); | ||
} | ||
_is_subbuf = false; | ||
} | ||
|
||
#endif | ||
/** | ||
* \brief Copy constructor, shallow copy. | ||
*/ | ||
|
@@ -580,7 +609,7 @@ class Tensor : public TensorBase { | |
} | ||
CHECK_EQ(valid_size(), tensor.valid_size()) \ | ||
<< "sizes of two valid shapes must be the same"; | ||
|
||
/// get the proper process target wrapper | ||
typedef TargetWrapper<TargetType_t> API_t; | ||
typedef typename TargetTypeTraits<TargetType_t>::target_type target_type_t; | ||
|
@@ -727,7 +756,8 @@ class Tensor : public TensorBase { | |
SaberStatus copy_from(const Tensor<NewTargetType_t, NewDataType_t, NewLayOutType_t>& tensor) { | ||
LOG(WARNING) << "Invalid: copy_from is not allowed for current type."; | ||
return SaberInvalidValue; | ||
} | ||
} | ||
|
||
#endif | ||
|
||
/** | ||
|
@@ -942,15 +972,19 @@ class Tensor : public TensorBase { | |
|
||
#ifdef USE_BM | ||
|
||
#ifndef BM_TENSOR_COPY | ||
#define BM_TENSOR_COPY | ||
|
||
|
||
template<> inline | ||
size_t Tensor<BM, AK_BM, NCHW>::_type_len(){ | ||
return 1; | ||
return 4; | ||
} | ||
|
||
template<> | ||
template<> inline | ||
SaberStatus Tensor<BM, AK_BM, NCHW>::copy_from<X86, AK_FLOAT, NCHW>(const Tensor<X86, AK_FLOAT, NCHW>& tensor) { | ||
LOG(INFO) << "BM copy_from"; | ||
LOG(INFO) << "BM copy_from X86"; | ||
CHECK_EQ(valid_size(), tensor.valid_size()) << "sizes of two valid shapes must be the same"; | ||
|
||
auto* device_data_ptr = mutable_data(); | ||
|
@@ -961,16 +995,62 @@ SaberStatus Tensor<BM, AK_BM, NCHW>::copy_from<X86, AK_FLOAT, NCHW>(const Tensor | |
template<> | ||
template<> inline | ||
SaberStatus Tensor<X86, AK_FLOAT, NCHW>::copy_from<BM, AK_BM, NCHW>(const Tensor<BM, AK_BM, NCHW>& tensor) { | ||
LOG(INFO) << "X86 copy_from"; | ||
LOG(INFO) << "X86 copy_from BM"; | ||
CHECK_EQ(valid_size(), tensor.valid_size()) << "sizes of two valid shapes must be the same"; | ||
|
||
auto* device_data_ptr = const_cast<bm_device_mem_t *>(tensor.data()); | ||
BMDNN_CHECK(bm_memcpy_d2s(get_bm_handle(), bm_mem_from_system(mutable_data()), *device_data_ptr)); | ||
return SaberSuccess; | ||
} | ||
|
||
/* | ||
template<> inline | ||
size_t Tensor<BM, AK_BM, NCHW>::_type_len(){ | ||
return 4; | ||
} | ||
|
||
template<> | ||
template<> inline | ||
SaberStatus Tensor<BM, AK_BM, NCHW>::copy_from<X86, AK_FLOAT, NCHW>(const Tensor<X86, AK_FLOAT, NCHW>& tensor) { | ||
LOG(INFO) << "BM copy_from X86"; | ||
CHECK_EQ(valid_size(), tensor.valid_size()) << "sizes of two valid shapes must be the same"; | ||
|
||
auto* device_data_ptr = mutable_data(); | ||
BMDNN_CHECK(bm_memcpy_s2d(get_bm_handle(), *device_data_ptr, bm_mem_from_system(const_cast<float *>(tensor.data())))); | ||
//BMDNN_CHECK(bm_memcpy_s2d(get_bm_handle(), *(bm_device_mem_t *)(mutable_data()), bm_mem_from_system(tensor.data()))); | ||
return SaberSuccess; | ||
} | ||
|
||
template<> | ||
template<> inline | ||
SaberStatus Tensor<X86, AK_FLOAT, NCHW>::copy_from<BM, AK_BM, NCHW>(const Tensor<BM, AK_BM, NCHW>& tensor) { | ||
LOG(INFO) << "X86 copy_from BM"; | ||
CHECK_EQ(valid_size(), tensor.valid_size()) << "sizes of two valid shapes must be the same"; | ||
|
||
auto* device_data_ptr = const_cast<bm_device_mem_t *>(tensor.data()); | ||
BMDNN_CHECK(bm_memcpy_d2s(get_bm_handle(), bm_mem_from_system(mutable_data()), *device_data_ptr)); | ||
//BMDNN_CHECK(bm_memcpy_d2s(get_bm_handle(), bm_mem_from_system(mutable_data()), *(bm_device_mem_t *)(tensor.data()))); | ||
return SaberSuccess; | ||
} | ||
|
||
template<> | ||
template<> inline | ||
SaberStatus Tensor<BM, AK_BM, NCHW>::copy_from<BM, AK_BM, NCHW>(const Tensor<BM, AK_BM, NCHW>& tensor) { | ||
LOG(INFO) << "BM copy_from BM"; | ||
CHECK_EQ(valid_size(), tensor.valid_size()) << "sizes of two valid shapes must be the same"; | ||
|
||
auto* device_data_ptr = const_cast<bm_device_mem_t *>(tensor.data()); | ||
//BMDNN_CHECK(bm_memcpy_d2s(get_bm_handle(), bm_mem_from_system(mutable_data()), *device_data_ptr)); | ||
//BMDNN_CHECK(bm_memcpy_d2s(get_bm_handle(), bm_mem_from_system(mutable_data()), *(bm_device_mem_t *)(tensor.data()))); | ||
return SaberSuccess; | ||
} | ||
*/ | ||
|
||
#endif | ||
|
||
#endif | ||
|
||
|
||
} //namespace saber | ||
|
||
} //namespace anakin | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
线程安全嘛?还是说目前HW只支持single thread?