-
Notifications
You must be signed in to change notification settings - Fork 5.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[oneDNN] First fix to #33021 #33174
[oneDNN] First fix to #33021 #33174
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,154 @@ | ||||||
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. | ||||||
|
||||||
Licensed under the Apache License, Version 2.0 (the "License"); | ||||||
you may not use this file except in compliance with the License. | ||||||
You may obtain a copy of the License at | ||||||
|
||||||
http://www.apache.org/licenses/LICENSE-2.0 | ||||||
|
||||||
Unless required by applicable law or agreed to in writing, software | ||||||
distributed under the License is distributed on an "AS IS" BASIS, | ||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||
See the License for the specific language governing permissions and | ||||||
limitations under the License. */ | ||||||
|
||||||
#include <gtest/gtest.h> | ||||||
#include <fstream> | ||||||
#include <iostream> | ||||||
#include "paddle/fluid/inference/tests/api/tester_helper.h" | ||||||
#include "paddle/fluid/platform/device_context.h" | ||||||
#include "paddle/fluid/platform/place.h" | ||||||
|
||||||
DEFINE_string(infer_shape, "", "data shape file"); | ||||||
DEFINE_int32(sample, 20, "number of sample"); | ||||||
|
||||||
namespace paddle { | ||||||
namespace inference { | ||||||
namespace analysis { | ||||||
|
||||||
struct Record { | ||||||
std::vector<float> data; | ||||||
std::vector<int32_t> shape; | ||||||
}; | ||||||
|
||||||
Record ProcessALine(const std::string &line, const std::string &shape_line) { | ||||||
VLOG(3) << "process a line"; | ||||||
std::vector<std::string> columns; | ||||||
|
||||||
Record record; | ||||||
std::vector<std::string> data_strs; | ||||||
split(line, ' ', &data_strs); | ||||||
for (auto &d : data_strs) { | ||||||
record.data.push_back(std::stof(d)); | ||||||
} | ||||||
|
||||||
std::vector<std::string> shape_strs; | ||||||
split(shape_line, ' ', &shape_strs); | ||||||
for (auto &s : shape_strs) { | ||||||
record.shape.push_back(std::stoi(s)); | ||||||
} | ||||||
return record; | ||||||
} | ||||||
|
||||||
void SetConfig(AnalysisConfig *cfg) { | ||||||
cfg->SetModel(FLAGS_infer_model + "/model", FLAGS_infer_model + "/params"); | ||||||
cfg->DisableGpu(); | ||||||
// cfg->SwitchIrDebug(); // Enable to have graphs dumped | ||||||
cfg->SwitchSpecifyInputNames(false); | ||||||
cfg->SetCpuMathLibraryNumThreads(FLAGS_cpu_num_threads); | ||||||
} | ||||||
|
||||||
void SetInput(std::vector<std::vector<PaddleTensor>> *inputs, | ||||||
const std::string &line, const std::string &shape_line) { | ||||||
auto record = ProcessALine(line, shape_line); | ||||||
|
||||||
PaddleTensor input; | ||||||
input.shape = record.shape; | ||||||
input.dtype = PaddleDType::FLOAT32; | ||||||
size_t input_size = record.data.size() * sizeof(float); | ||||||
input.data.Resize(input_size); | ||||||
memcpy(input.data.data(), record.data.data(), input_size); | ||||||
std::vector<PaddleTensor> input_slots; | ||||||
input_slots.assign({input}); | ||||||
(*inputs).emplace_back(input_slots); | ||||||
} | ||||||
|
||||||
#ifdef PADDLE_WITH_MKLDNN | ||||||
int GetNumCachedObjects(void) { | ||||||
auto &pool = platform::DeviceContextPool::Instance(); | ||||||
platform::CPUPlace place; | ||||||
auto onednn_dev_ctx = | ||||||
dynamic_cast<platform::MKLDNNDeviceContext *>(pool.Get(place)); | ||||||
return onednn_dev_ctx->GetCachedObjectsNumber(); | ||||||
} | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here we got number of all threas cached different input_shapes blobs? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function just return total number of cached objects regardless the number of threads used. oneDnn cache is one and shared among threads. |
||||||
|
||||||
void validate_cache_onednn(int cache_capacity = 1) { | ||||||
AnalysisConfig cfg; | ||||||
SetConfig(&cfg); | ||||||
cfg.EnableMKLDNN(); | ||||||
cfg.SetMkldnnCacheCapacity(cache_capacity); | ||||||
|
||||||
auto predictor = CreatePaddlePredictor<AnalysisConfig>(cfg); | ||||||
std::vector<std::vector<PaddleTensor>> ref_outputs; | ||||||
std::vector<std::vector<PaddleTensor>> input_slots_all; | ||||||
|
||||||
std::ifstream file(FLAGS_infer_data); | ||||||
std::ifstream infer_file(FLAGS_infer_shape); | ||||||
std::vector<std::string> lines; | ||||||
std::vector<std::string> shape_lines; | ||||||
|
||||||
// Let's work with 4 samples | ||||||
auto num_samples = 4; | ||||||
ref_outputs.resize(num_samples); | ||||||
lines.resize(num_samples); | ||||||
shape_lines.resize(num_samples); | ||||||
|
||||||
// Let's remeber number of cached objects before | ||||||
// exection and after every single execution | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
std::vector<int> cache_filling; | ||||||
cache_filling.push_back(GetNumCachedObjects()); | ||||||
|
||||||
// compute sequenctilly prediction | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
for (int i = 0; i < num_samples; ++i) { | ||||||
std::getline(file, lines[i]); | ||||||
std::getline(infer_file, shape_lines[i]); | ||||||
SetInput(&input_slots_all, lines[i], shape_lines[i]); | ||||||
predictor->Run(input_slots_all[i], &ref_outputs[i], FLAGS_batch_size); | ||||||
// record number of cached objects | ||||||
cache_filling.push_back(GetNumCachedObjects()); | ||||||
} | ||||||
|
||||||
file.close(); | ||||||
infer_file.close(); | ||||||
|
||||||
predictor.reset(nullptr); | ||||||
cache_filling.push_back(GetNumCachedObjects()); | ||||||
|
||||||
// Compare results | ||||||
// First and last value should be equal e.g. before using cache (empty) and | ||||||
// after releasing executor | ||||||
PADDLE_ENFORCE_EQ( | ||||||
cache_filling[0], cache_filling[cache_filling.size() - 1], | ||||||
platform::errors::Fatal("Cache size before execution and after " | ||||||
"releasing Executor do not match")); | ||||||
|
||||||
// Iterate to check if cache is not increasing | ||||||
// over exceeding cache capacity | ||||||
if (cache_capacity != 0) { | ||||||
for (int i = cache_capacity + 1; i < num_samples + 1; ++i) { | ||||||
PADDLE_ENFORCE_EQ( | ||||||
cache_filling[cache_capacity], cache_filling[i], | ||||||
platform::errors::Fatal("Cache capacity should not increase " | ||||||
"after full capacity is used")); | ||||||
} | ||||||
} | ||||||
} | ||||||
|
||||||
TEST(Analyzer_detect, validate_cache_onednn) { | ||||||
validate_cache_onednn(2 /*cache_capacity */); | ||||||
} | ||||||
#endif | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hi if this whole TEST(...) are within #ifdef PADDLE_WITH_MKLDNN #endif, does it mean this file could be renamed as xxx_ mkldnn_tester.cc because otherwise if WITH_MKLDNN OFF this test will not be executed but it looks now it will be executed There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok. It seems It will be multi-threading functional tests of oneDNN execution to be tested, so I can change a name of file |
||||||
|
||||||
} // namespace analysis | ||||||
} // namespace inference | ||||||
} // namespace paddle |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -563,7 +563,7 @@ Place CUDAPinnedDeviceContext::GetPlace() const { return place_; } | |
MKLDNNDeviceContext::MKLDNNDeviceContext(CPUPlace place) | ||
: CPUDeviceContext(place), p_blobmap_() { | ||
p_blobmap_.reset(new BlobMap()); | ||
p_exec_items_.reset(new ExecMap()); | ||
p_exec_items_.reset(new ExecShape()); | ||
p_mutex_.reset(new std::mutex()); | ||
} | ||
|
||
|
@@ -644,22 +644,40 @@ void MKLDNNDeviceContext::ResetBlobMap(void* ptr) { | |
if (ptr == nullptr) { | ||
p_blobmap_->clear(); | ||
} else { | ||
for (auto& v : (*p_exec_items_)[ptr]) { | ||
(v.first)->erase(v.second); | ||
// Iterate through all shapes and release | ||
// for each shape and active executor all entries | ||
// of this executor | ||
for (auto& s : *p_exec_items_) { | ||
for (auto& v : (*s.second)[ptr]) { | ||
(v.first)->erase(v.second); | ||
} | ||
s.second->erase(ptr); | ||
} | ||
p_exec_items_->erase(ptr); | ||
} | ||
} else { | ||
VLOG(3) << "Prevented Clearing DNNL cache."; | ||
block_next_cache_clearing_ = false; | ||
} | ||
} | ||
|
||
void MKLDNNDeviceContext::RemoveShapeEntriesWithExecutor(void) const { | ||
p_exec_items_->erase(p_exec_items_->begin()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are you removing the first object in map? How do you know that this is the actual key (shape) you want delete? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removing shapes works according to FIFO concepts. So I remove objects related to oldest shape |
||
} | ||
|
||
void MKLDNNDeviceContext::LinkEntryWithExecutor(BlobPtr_t<KeyBlob> pblob, | ||
KeyBlob::iterator it) const { | ||
// Take current input shape from TLS | ||
// Take current executor addess from TLS | ||
// and for this executor's items add the one defined with arguments | ||
(*p_exec_items_)[tls().get_curr_exec()].push_back(std::make_pair(pblob, it)); | ||
auto key_it = p_exec_items_ | ||
->insert(std::make_pair(tls().cur_input_shape_str, | ||
std::make_shared<ExecMap>())) | ||
.first; | ||
(*key_it->second)[tls().get_curr_exec()].push_back(std::make_pair(pblob, it)); | ||
|
||
VLOG(3) << "LinkEntryWithExecutor, shapes: " << p_exec_items_->size() | ||
<< " curr exec size: " | ||
<< (*key_it->second)[tls().get_curr_exec()].size() << "\n"; | ||
} | ||
|
||
void MKLDNNDeviceContext::BlockNextCacheClearing() { | ||
|
@@ -716,6 +734,7 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name, | |
VLOG(2) << "sid=" << sid | ||
<< ", remove all blobs of shape: " << sBlob->begin()->first; | ||
sBlob->erase(sBlob->begin()->first); | ||
RemoveShapeEntriesWithExecutor(); | ||
} | ||
pBlob = std::make_shared<KeyBlob>(); | ||
(*sBlob)[tls().cur_input_shape_str] = pBlob; | ||
|
@@ -739,7 +758,7 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name, | |
return; | ||
} | ||
|
||
unsigned int MKLDNNDeviceContext::GetCachedObjectsNumber(void) { | ||
unsigned int MKLDNNDeviceContext::GetCachedObjectsNumber(void) const { | ||
unsigned int num_entries = 0; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The GetCachedObjectsNumber results is num_threads * nums_input_shape ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually structure of cache is of three levels: session IDs , shapes, actual cached objects per shape. So To get quantity of cache we iterate through sessions and their shapes. For every shape we then get amount of cached objects (for shape) and sum it up with other amounts for different shapes. |
||
for (auto const& l3 : *p_blobmap_) { | ||
for (auto const& l2 : *(l3.second)) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -750,6 +750,7 @@ class MKLDNNDeviceContext : public CPUDeviceContext { | |
|
||
using ExecMap = std::unordered_map< | ||
void*, std::vector<std::pair<BlobPtr_t<KeyBlob>, KeyBlob::iterator>>>; | ||
using ExecShape = std::unordered_map<std::string, std::shared_ptr<ExecMap>>; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IMHO those structures deserves some nice documentation. Since every time I look at this code I need to decipher what kind of data are stored here and what are the keys in those maps. Please write detailed explanation. Maybe define some types which will help to make this code more readable. like:
|
||
|
||
explicit MKLDNNDeviceContext(CPUPlace place); | ||
|
||
|
@@ -758,6 +759,7 @@ class MKLDNNDeviceContext : public CPUDeviceContext { | |
|
||
// Register object to currently used executor's map | ||
void LinkEntryWithExecutor(BlobPtr_t<KeyBlob>, KeyBlob::iterator) const; | ||
void RemoveShapeEntriesWithExecutor(void) const; | ||
|
||
// Remove all entries from the blob map | ||
void ResetBlobMap(void* ptr); | ||
|
@@ -772,7 +774,7 @@ class MKLDNNDeviceContext : public CPUDeviceContext { | |
void SetBlob(const std::string& name, std::shared_ptr<void> data) const; | ||
|
||
// Calculate number of oneDNN objects cached | ||
unsigned int GetCachedObjectsNumber(void); | ||
unsigned int GetCachedObjectsNumber(void) const; | ||
|
||
// Find a saved blob. Return nullptr if not found | ||
std::shared_ptr<void> GetBlob(const std::string& name) const; | ||
|
@@ -785,7 +787,7 @@ class MKLDNNDeviceContext : public CPUDeviceContext { | |
std::shared_ptr<BlobMap> p_blobmap_; | ||
// Map key is pointer of executor and value is a data(iterator in map) needed | ||
// to erase | ||
std::shared_ptr<ExecMap> p_exec_items_; | ||
std::shared_ptr<ExecShape> p_exec_items_; | ||
std::shared_ptr<std::mutex> p_mutex_; | ||
bool block_next_cache_clearing_ = false; | ||
}; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hi this
columns
is not used ?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
good catch!