Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reuse data memory if not propagate down #86

Merged
merged 5 commits into from
Jul 20, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 24 additions & 1 deletion include/caffe/common_layers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,9 @@ class FlattenLayer : public Layer<Dtype> {
virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int ExactNumTopBlobs() const { return 1; }


virtual inline bool is_sharing_data(int top_id, int bottom_id){
return top_id == bottom_id;
}
virtual inline bool is_sharing_diff(int top_id, int bottom_id){
return top_id == bottom_id;
}
Expand Down Expand Up @@ -391,6 +393,9 @@ class ReshapeLayer : public Layer<Dtype> {
virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int ExactNumTopBlobs() const { return 1; }

virtual inline bool is_sharing_data(int top_id, int bottom_id) {
return top_id == bottom_id;
}
virtual inline bool is_sharing_diff(int top_id, int bottom_id) {
return top_id == bottom_id;
}
Expand Down Expand Up @@ -570,6 +575,10 @@ class SplitLayer : public Layer<Dtype> {
virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int MinTopBlobs() const { return 1; }

virtual inline bool is_sharing_data(int top_id, int bottom_id) {
return true;
}

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
Expand Down Expand Up @@ -643,6 +652,13 @@ class SliceLayer : public Layer<Dtype> {
virtual inline bool EqualNumBottomTopBlobs() const { return true; }
virtual inline bool is_gathering() {return true;}

virtual inline bool is_sharing_data(int top_id, int bottom_id){
#ifndef USE_MPI
return top_id == bottom_id;
#else
return (top_id == bottom_id) && (Caffe::parallel_mode()!=Caffe::MPI);
#endif
}
virtual inline bool is_sharing_diff(int top_id, int bottom_id){
#ifndef USE_MPI
return top_id == bottom_id;
Expand Down Expand Up @@ -685,6 +701,13 @@ class SliceLayer : public Layer<Dtype> {

virtual inline bool EqualNumBottomTopBlobs() const { return true; }

virtual inline bool is_sharing_data(int top_id, int bottom_id){
#ifndef USE_MPI
return top_id == bottom_id;
#else
return (top_id == bottom_id) && (Caffe::parallel_mode()!=Caffe::MPI);
#endif
}
virtual inline bool is_sharing_diff(int top_id, int bottom_id){
#ifndef USE_MPI
return top_id == bottom_id;
Expand Down
3 changes: 2 additions & 1 deletion include/caffe/layer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -309,8 +309,9 @@ class Layer {
#endif

/**
* @brief express whether this layer shares the diff between bottom and top
* @brief express whether this layer shares the data/diff between bottom and top
*/
virtual inline bool is_sharing_data(int top_id, int bottom_id){return false;}
virtual inline bool is_sharing_diff(int top_id, int bottom_id){return false;}


Expand Down
3 changes: 1 addition & 2 deletions include/caffe/net.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -271,8 +271,7 @@ class Net {
/// Whether to compute and display debug info for the net.
bool debug_info_;

vector< shared_ptr<SyncedMemory> > shared_diff_storage_;
vector< shared_ptr<SyncedMemory> > shared_data_storage_;
vector< shared_ptr<SyncedMemory> > shared_storage_;

DISABLE_COPY_AND_ASSIGN(Net);
};
Expand Down
205 changes: 131 additions & 74 deletions src/caffe/net.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,11 @@ void Net<Dtype>::Init(const NetParameter& in_param) {
LOG(INFO) << "Memory required for data: " << memory_used_ * sizeof(Dtype);

// optimize memory
if (phase_ == TRAIN && !debug_info_ && param.optimize_mem()){
const NetParameter_OptimizeMem om = param.optimize_mem();
const bool need_optimze_mem =
(om == NetParameter_OptimizeMem_TRAIN_ONLY && phase_ == TRAIN)
|| (om == NetParameter_OptimizeMem_ALL_OPTIM);
if (!debug_info_ && need_optimze_mem) {
MemoryOptimize();
}
}
Expand Down Expand Up @@ -961,68 +965,63 @@ const shared_ptr<Layer<Dtype> > Net<Dtype>::layer_by_name(
return layer_ptr;
}

template <typename Dtype>
class BlobMeta {
class SlotMeta {
public:
BlobMeta(Blob<Dtype>* blob, int ref, int layer_id){
blob_ = blob;
ref_ = ref;
layer_id_ = layer_id;
}
SlotMeta()
: key_(), ref_(0) { }

BlobMeta():blob_(NULL), ref_(0), layer_id_(-1){};
SlotMeta(const string& key, int ref)
: key_(key), ref_(ref) { }

inline const string& key() const { return key_; }
inline int ref() const { return ref_; }

inline void DerefOne(){
CHECK_GT(ref_, 0);
ref_ -= 1;
if (ref_ == 0){
blob_ = NULL;
layer_id_ = -1;
key_.clear();
}
}

inline void IncRef(){ref_ += 1;}

void RefBlob(Blob<Dtype>* blob, int ref, int layer_id){
CHECK(blob_==NULL);
CHECK_NE(blob, blob_);
void RefSlot(const string& key, int ref) {
CHECK(key_.empty());
CHECK_NE(key_, key);
CHECK_GT(ref, 0);
blob_ = blob;
key_ = key;
ref_ = ref;
layer_id_ = layer_id;
}

inline bool Empty(){
return blob_ == NULL;
return key_.empty();
}

inline bool isBlob(Blob<Dtype>* blob){return blob_==blob;}
inline bool isSlot(const string& key){return key_ == key;}

private:
Blob<Dtype>* blob_;
string key_;
int ref_;
int layer_id_;
};

template <typename Dtype>
size_t AcquireSlot(vector<BlobMeta<Dtype> >& slot_vec, Blob<Dtype>* blob, int ref, int layer_id){
for (size_t i = 0 ; i < slot_vec.size(); ++i){
if (slot_vec[i].Empty()){
slot_vec[i].RefBlob(blob, ref, layer_id);
size_t AcquireSlot(vector<SlotMeta>& slot_vec, const string& key, int ref) {
for (size_t i = 0 ; i < slot_vec.size(); ++i) {
if (slot_vec[i].Empty()) {
slot_vec[i].RefSlot(key, ref);
return i;
}
}

// no available slot, need a new one
slot_vec.push_back(BlobMeta<Dtype>(blob, ref, layer_id));
slot_vec.push_back(SlotMeta(key, ref));

return slot_vec.size() - 1;
}

template <typename Dtype>
int FindBlob(vector<BlobMeta<Dtype> >& slot_vec, Blob<Dtype>* blob){
int FindSlot(vector<SlotMeta>& slot_vec, const string& key){
for (int i = 0; i < slot_vec.size(); ++i){
if (slot_vec[i].isBlob(blob)){
if (slot_vec[i].isSlot(key)){
return i;
}
}
Expand All @@ -1033,11 +1032,65 @@ template <typename Dtype>
void Net<Dtype>::MemoryOptimize() {
// Dry run phase
// In this phase, we assume the network topology has been setup
boost::unordered_map<Blob<Dtype>*, int> blob_log;

vector<BlobMeta<Dtype> > slots;
boost::unordered_map<string, int> slot_index;

vector<SlotMeta> slots;

// Forward pass, try to reuse blobs' data memory
for (int i = 0; i < layers_.size(); ++i) {
if (layers_[i]->layer_param().no_mem_opt()) continue;
const vector<Blob<Dtype>* >& layer_top = top_vecs_[i];
const vector<Blob<Dtype>* >& layer_bottom = bottom_vecs_[i];
LOG(INFO) << "layer " << i << " " << layer_names_[i];
// Find slot for each top blob's data
for (int i_top = 0; i_top < layer_top.size(); ++i_top) {
const string& top_name = blob_names_[top_id_vecs_[i][i_top]];
int idx = FindSlot(slots, top_name + "_data");
if (idx == -1) {
// Detect share data conditions
bool sharing_data = false;
for (int i_bottom = 0; i_bottom < layer_bottom.size(); ++i_bottom) {
if (layers_[i]->is_sharing_data(i_top, i_bottom)) {
sharing_data = true;
const string& bottom_name = blob_names_[bottom_id_vecs_[i][i_bottom]];
idx = FindSlot(slots, bottom_name + "_data");
LOG(INFO) << "top " << top_name
<< " shares data with bottom " << bottom_name
<< " slot " << idx;
break;
}
}
if (!sharing_data) {
if (!layers_[i]->loss(i_top)) {
idx = (int)AcquireSlot(slots, top_name + "_data", 1);
slot_index[top_name + "_data"] = idx;
LOG(INFO) << "top " << top_name << " acquires data slot " << idx;
}
} else {
slots[idx].IncRef();
slot_index[top_name + "_data"] = idx;
}
} else {
// Top data blob is already assigned a slot (maybe inplace layer).
slots[idx].IncRef();
LOG(INFO) << "top " << top_name << " refers to data slot " << idx;
}
}
// Deref bottom blob's data slot if this layer does not propagate down.
if (phase_ == TRAIN && layer_need_backward_[i]) continue;
for (int i_bottom = 0; i_bottom < layer_bottom.size(); ++i_bottom) {
const string& bottom_name = blob_names_[bottom_id_vecs_[i][i_bottom]];
int idx = FindSlot(slots, bottom_name + "_data");
if (slot_index.find(bottom_name + "_data") != slot_index.end()) {
idx = slot_index[bottom_name + "_data"];
}
if (idx >= 0) {
// idx == -1 if this is an input blob
slots[idx].DerefOne();
}
LOG(INFO) << "bottom " << bottom_name << " derefs data slot " << idx;
}
}

for (int i = (layers_.size() -1); i >=0; --i){
vector<Blob<Dtype>* >& layer_top = top_vecs_[i];
Expand All @@ -1049,31 +1102,28 @@ void Net<Dtype>::MemoryOptimize() {

// first deal with bottoms
for (int i_bottom = 0; i_bottom < layer_bottom.size(); ++i_bottom){
Blob<Dtype>* bottom = layer_bottom[i_bottom];
int idx = FindBlob(slots, bottom);

const string& bottom_name = blob_names_[layer_bottom_idx[i_bottom]];
int idx = FindSlot(slots, bottom_name + "_diff");
if (!(layers_[i]->layer_param().no_mem_opt())){
if (idx == -1){

//detect share diff conditions
bool sharing_diff = false;

for (int i_top = 0; i_top < layer_top.size(); ++i_top){
if(layers_[i]->is_sharing_diff(i_top, i_bottom)){
const string& top_name = blob_names_[layer_top_idx[i_top]];
sharing_diff = true;
idx = FindBlob(slots, layer_top[i_top]);
idx = FindSlot(slots, top_name + "_diff");
}
}

if (!sharing_diff ) {
idx = (int) AcquireSlot(slots, bottom, 1, i);
blob_log[bottom] = idx;
if (!sharing_diff) {
idx = (int) AcquireSlot(slots, bottom_name + "_diff", 1);
slot_index[bottom_name + "_diff"] = idx;
LOG(INFO) << "acquired slot for new blob";
}else{
LOG(INFO) << "sharing diff using slot "<<idx;
if(idx != -1) {
slots[idx].IncRef();
blob_log[bottom] = idx;
slot_index[bottom_name + "_diff"] = idx;
}
}
}else{
Expand All @@ -1082,65 +1132,72 @@ void Net<Dtype>::MemoryOptimize() {
slots[idx].IncRef();
}
}
string blob_name = blob_names_[layer_bottom_idx[i_bottom]];
LOG(INFO)<<"bottom blob "<<i_bottom<<" name "
<<blob_name<<" ptr "<<bottom<<" slot id "<<idx;

<<bottom_name<<" slot id "<<idx;
}


// then deal with top
for (int i_top = 0; i_top < layer_top.size(); ++i_top){
Blob<Dtype>* top = layer_top[i_top];
const string& top_name = blob_names_[layer_top_idx[i_top]];

// find the top in the slots
int idx = FindBlob(slots, top);
int idx = FindSlot(slots, top_name + "_diff");

// look for shared diff
if (blob_log.find(top) != blob_log.end()){
idx = blob_log[top];
if (slot_index.find(top_name + "_diff") != slot_index.end()){
idx = slot_index[top_name + "_diff"];
}
//after the layer's operation, the refcount of top should be decreased by 1
if (idx != -1)
slots[idx].DerefOne();

string blob_name = blob_names_[layer_top_idx[i_top]];
LOG(INFO)<<"top blob "<<i_top
<<" name "<<blob_name<<" ptr "<<top<<" slot id "<<idx;
LOG(INFO) << "top blob " << i_top
<< " name " << top_name << " slot id " << idx;
}


}

// Memory assignment phase
shared_diff_storage_.resize(slots.size());
for (int i_mem = 0; i_mem < shared_diff_storage_.size(); i_mem++){
shared_diff_storage_[i_mem].reset(new SyncedMemory(1));
shared_storage_.resize(slots.size());
for (int i_mem = 0; i_mem < shared_storage_.size(); i_mem++){
shared_storage_[i_mem].reset(new SyncedMemory(1));
}

size_t count_raw = 0;
size_t count_opt = 0;
for (int i_blob = 0; i_blob < blobs_.size(); ++i_blob){
if (blob_log.find(blobs_[i_blob].get()) == blob_log.end()){
// loss blob cannot be shared due to loss weight
continue;
const string& name = blob_names_[i_blob];
const size_t bytes = blobs_[i_blob]->count() * sizeof(Dtype);
count_raw += bytes * 2;
int idx = -1;
if (slot_index.find(name + "_data") != slot_index.end()) {
idx = slot_index[name + "_data"];
blobs_[i_blob]->SetDataStorage(shared_storage_[idx]);
shared_storage_[idx]->Resize(bytes);
} else {
count_opt += bytes;
}
int idx = blob_log[blobs_[i_blob].get()];
blobs_[i_blob]->SetDiffStorage(shared_diff_storage_[idx]);
LOG(INFO)<<"blob "<<i_blob<<" name "<<blob_names_[i_blob]<<" idx "<<idx;

//recover the necessary mem size for the blob
shared_diff_storage_[idx]->Resize(blobs_[i_blob]->count() * sizeof(Dtype));
count_raw += blobs_[i_blob]->count() * sizeof(Dtype);
LOG(INFO) << "blob " << i_blob
<< " name " << blob_names_[i_blob]
<< " data idx " << idx;
if (slot_index.find(name + "_diff") != slot_index.end()) {
idx = slot_index[name + "_diff"];
blobs_[i_blob]->SetDiffStorage(shared_storage_[idx]);
shared_storage_[idx]->Resize(bytes);
} else {
count_opt += bytes;
}
LOG(INFO) << "blob " << i_blob
<< " name " << blob_names_[i_blob]
<< " diff idx " << idx;
}

for (int i_mem = 0; i_mem < shared_diff_storage_.size(); i_mem++){
count_opt += shared_diff_storage_[i_mem]->size();
for (int i_mem = 0; i_mem < shared_storage_.size(); i_mem++){
LOG(INFO) << "storage memory slot " << i_mem
<< " size " << shared_storage_[i_mem]->size();
count_opt += shared_storage_[i_mem]->size();
}

LOG(INFO)<<"raw memory "<<count_raw<<" opt memory "<<count_opt;

// LOG(FATAL)<<"";
LOG(INFO) << "raw memory " << count_raw << " opt memory " << count_opt;
}
INSTANTIATE_CLASS(Net);

Expand Down
Loading