-
Notifications
You must be signed in to change notification settings - Fork 18.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Rebased and cleaned-up Channelwise Affine for batch norm
- Loading branch information
1 parent
cff6f3d
commit cf3177e
Showing
6 changed files
with
555 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
#ifndef CAFFE_CHANNELWISE_AFFINE_LAYER_HPP_ | ||
#define CAFFE_CHANNELWISE_AFFINE_LAYER_HPP_ | ||
|
||
#include <vector> | ||
#include "caffe/blob.hpp" | ||
#include "caffe/layer.hpp" | ||
#include "caffe/layers/neuron_layer.hpp" | ||
#include "caffe/proto/caffe.pb.h" | ||
|
||
namespace caffe { | ||
/** | ||
* @brief Affine non-linearity function @f$ | ||
* y = ax+b | ||
* @f$, could be used after batch normalization layer | ||
* | ||
*/ | ||
template <typename Dtype> | ||
class ChannelwiseAffineLayer : public NeuronLayer<Dtype> { | ||
public: | ||
/** | ||
* @param param provides ChannelwiseAffineParameter ChannelwiseAffine_param, | ||
* with ChannelwiseAffineLayer options: | ||
* - slope_filler (\b optional, FillerParameter, | ||
* default {'type': constant 'value':1.0001}). | ||
* - bias_filler (\b optional, FillerParameter, | ||
* default {'type': constant 'value':0.0001}). | ||
* - channel_shared (\b optional, default false). | ||
* slopes and biases are shared across channels. | ||
*/ | ||
explicit ChannelwiseAffineLayer(const LayerParameter& param) | ||
: NeuronLayer<Dtype>(param) {} | ||
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top); | ||
virtual void Reshape(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top); | ||
virtual inline const char* type() const { return "ChannelwiseAffine"; } | ||
|
||
protected: | ||
/** | ||
* @param bottom input Blob vector (length 1) | ||
* -# @f$ (N \times C \times ...) @f$ | ||
* the inputs @f$ x @f$ | ||
* @param top output Blob vector (length 1) | ||
* -# @f$ (N \times C \times ...) @f$ | ||
* the computed outputs for each channel @f$i@f$ @f$ | ||
* y_i = a_i x_i + b_i | ||
* @f$. | ||
*/ | ||
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top); | ||
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top); | ||
/** | ||
* @brief Computes the error gradient w.r.t. the ChannelwiseAffine inputs. | ||
* | ||
* @param top output Blob vector (length 1), providing the error gradient with | ||
* respect to the outputs | ||
* -# @f$ (N \times C \times ...) @f$ | ||
* containing error gradients @f$ \frac{\partial E}{\partial y} @f$ | ||
* with respect to computed outputs @f$ y @f$ | ||
* @param propagate_down see Layer::Backward. | ||
* @param bottom input Blob vector (length 1) | ||
* -# @f$ (N \times C \times ...) @f$ | ||
* the inputs @f$ x @f$; For each channel @f$i@f$, backward fills their | ||
* diff with gradients @f$ | ||
* \frac{\partial E}{\partial x_i} = \left\{ | ||
* \begin{array}{lr} | ||
* a_i \frac{\partial E}{\partial y_i} | ||
* \end{array} \right. | ||
* @f$. | ||
* If param_propagate_down_[0] is true, it fills the diff with gradients | ||
* @f$ | ||
* \frac{\partial E}{\partial a_i} = \left\{ | ||
* \begin{array}{lr} | ||
* \sum_{x_i} x_i \frac{\partial E}{\partial y_i} | ||
* \end{array} \right. | ||
* @f$. | ||
* If param_propagate_down_[1] is true, it fills the diff with gradients | ||
* @f$ | ||
* \frac{\partial E}{\partial b_i} = \left\{ | ||
* \begin{array}{lr} | ||
* frac{\partial E}{\partial y_i} | ||
* \end{array} \right. | ||
* @f$. | ||
*/ | ||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, | ||
const vector<bool>& propagate_down, | ||
const vector<Blob<Dtype>*>& bottom); | ||
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, | ||
const vector<bool>& propagate_down, | ||
const vector<Blob<Dtype>*>& bottom); | ||
bool channel_shared_; | ||
Blob<Dtype> multiplier_; | ||
// dot multiplier for backward computation of params | ||
Blob<Dtype> bias_multiplier_; | ||
Blob<Dtype> backward_buff_; | ||
// temporary buffer for backward computation | ||
Blob<Dtype> bottom_memory_; | ||
// memory for in-place computation | ||
}; | ||
} // namespace caffe | ||
|
||
#endif // CAFFE_CHANNELWISE_AFFINE_LAYER_HPP_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,189 @@ | ||
#include <algorithm> | ||
#include <vector> | ||
|
||
#include "caffe/filler.hpp" | ||
#include "caffe/layer.hpp" | ||
#include "caffe/layers/channelwise_affine_layer.hpp" | ||
|
||
namespace caffe { | ||
|
||
template <typename Dtype> | ||
void ChannelwiseAffineLayer<Dtype>::LayerSetUp( | ||
const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top) { | ||
CHECK_GE(bottom[0]->num_axes(), 2) | ||
<< "Number of axes of bottom blob must be >=2."; | ||
ChannelwiseAffineParameter channelwise_affine_param = | ||
this->layer_param().channelwise_affine_param(); | ||
int channels = bottom[0]->channels(); | ||
channel_shared_ = channelwise_affine_param.channel_shared(); | ||
if (this->blobs_.size() > 0) { | ||
LOG(INFO) << "Skipping parameter initialization"; | ||
} else { | ||
this->blobs_.resize(2); | ||
if (channel_shared_) { | ||
this->blobs_[0].reset(new Blob<Dtype>(vector<int>(0))); | ||
this->blobs_[1].reset(new Blob<Dtype>(vector<int>(0))); | ||
|
||
} else { | ||
this->blobs_[0].reset(new Blob<Dtype>(vector<int>(1, channels))); | ||
this->blobs_[1].reset(new Blob<Dtype>(vector<int>(1, channels))); | ||
} | ||
shared_ptr<Filler<Dtype> > filler; | ||
if (channelwise_affine_param.has_slope_filler()) { | ||
filler.reset(GetFiller<Dtype>(channelwise_affine_param.slope_filler())); | ||
} else { | ||
FillerParameter filler_param; | ||
filler_param.set_type("constant"); | ||
filler_param.set_value(1.0001); | ||
filler.reset(GetFiller<Dtype>(filler_param)); | ||
} | ||
filler->Fill(this->blobs_[0].get()); | ||
|
||
if (channelwise_affine_param.has_bias_filler()) { | ||
filler.reset(GetFiller<Dtype>(channelwise_affine_param.bias_filler())); | ||
} else { | ||
FillerParameter filler_param; | ||
filler_param.set_type("constant"); | ||
filler_param.set_value(0.0001); | ||
filler.reset(GetFiller<Dtype>(filler_param)); | ||
} | ||
filler->Fill(this->blobs_[1].get()); | ||
} | ||
if (channel_shared_) { | ||
CHECK_EQ(this->blobs_[0]->count(), 1) | ||
<< "Slope size is inconsistent with prototxt config"; | ||
} else { | ||
CHECK_EQ(this->blobs_[0]->count(), channels) | ||
<< "Slope size is inconsistent with prototxt config"; | ||
} | ||
|
||
// Propagate gradients to the parameters (as directed by backward pass). | ||
this->param_propagate_down_.resize(this->blobs_.size(), true); | ||
multiplier_.Reshape(vector<int>(1, bottom[0]->count(1))); | ||
bias_multiplier_.Reshape(vector<int>(1, bottom[0]->count(1))); | ||
backward_buff_.Reshape(vector<int>(1, bottom[0]->count(1))); | ||
caffe_set(multiplier_.count(), Dtype(1.0), | ||
multiplier_.mutable_cpu_data()); | ||
caffe_set(bias_multiplier_.count(), Dtype(1.0), | ||
bias_multiplier_.mutable_cpu_data()); | ||
} | ||
|
||
template <typename Dtype> | ||
void ChannelwiseAffineLayer<Dtype>::Reshape( | ||
const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top) { | ||
CHECK_GE(bottom[0]->num_axes(), 2) | ||
<< "Number of axes of bottom blob must be >=2."; | ||
top[0]->ReshapeLike(*bottom[0]); | ||
if (bottom[0] == top[0]) { | ||
// For in-place computation | ||
bottom_memory_.ReshapeLike(*bottom[0]); | ||
} | ||
int height = 1; | ||
int width = 1; | ||
if (bottom[0]->num_axes() > 2) { | ||
height = bottom[0]->shape(2); | ||
width = bottom[0]->shape(3); | ||
} | ||
vector<int> bias_multiplier_shape(1, height * width); | ||
bias_multiplier_.Reshape(bias_multiplier_shape); | ||
caffe_set(bias_multiplier_.count(), Dtype(1), | ||
bias_multiplier_.mutable_cpu_data()); | ||
} | ||
|
||
template <typename Dtype> | ||
void ChannelwiseAffineLayer<Dtype>::Forward_cpu( | ||
const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top) { | ||
const Dtype* bottom_data = bottom[0]->cpu_data(); | ||
Dtype* top_data = top[0]->mutable_cpu_data(); | ||
const int count = bottom[0]->count(); | ||
const int dim = bottom[0]->count(2); | ||
const int channels = bottom[0]->channels(); | ||
const Dtype* slope_data = this->blobs_[0]->cpu_data(); | ||
const Dtype* bias_data = this->blobs_[1]->cpu_data(); | ||
// For in-place computation | ||
if (bottom[0] == top[0]) { | ||
caffe_copy(count, bottom_data, bottom_memory_.mutable_cpu_data()); | ||
} | ||
// if channel_shared, channel index in the following computation becomes | ||
// always zero. | ||
const int div_factor = channel_shared_ ? channels : 1; | ||
for (int i = 0; i < count; ++i) { | ||
int c = (i / dim) % channels / div_factor; | ||
top_data[i] = bottom_data[i] * slope_data[c] + bias_data[c]; | ||
} | ||
} | ||
|
||
template <typename Dtype> | ||
void ChannelwiseAffineLayer<Dtype>::Backward_cpu( | ||
const vector<Blob<Dtype>*>& top, | ||
const vector<bool>& propagate_down, | ||
const vector<Blob<Dtype>*>& bottom) { | ||
const Dtype* bottom_data = bottom[0]->cpu_data(); | ||
const Dtype* slope_data = this->blobs_[0]->cpu_data(); | ||
|
||
const Dtype* top_diff = top[0]->cpu_diff(); | ||
const int count = bottom[0]->count(); | ||
const int dim = bottom[0]->count(2); | ||
const int channels = bottom[0]->shape(1); | ||
const int num = bottom[0]->shape(0); | ||
int height = 1; | ||
int width = 1; | ||
if (bottom[0]->num_axes() > 2) { | ||
height = bottom[0]->shape(2); | ||
width = bottom[0]->shape(3); | ||
} | ||
|
||
// For in-place computation | ||
if (top[0] == bottom[0]) { | ||
bottom_data = bottom_memory_.cpu_data(); | ||
} | ||
|
||
// if channel_shared, channel index in the following computation becomes | ||
// always zero. | ||
const int div_factor = channel_shared_ ? channels : 1; | ||
|
||
// Propagte to param | ||
// Since to write bottom diff will affect top diff if top and bottom blobs | ||
// are identical (in-place computaion), we first compute param backward to | ||
// keep top_diff unchanged. | ||
|
||
if (this->param_propagate_down_[1]) { | ||
Dtype* bias_diff = this->blobs_[1]->mutable_cpu_diff(); | ||
caffe_set(this->blobs_[1]->count(), Dtype(0), bias_diff); | ||
for (int n = 0; n < num; ++n) { | ||
caffe_cpu_gemv<Dtype>(CblasNoTrans, channels, height * width, 1., | ||
top_diff + top[0]->offset(n), | ||
bias_multiplier_.cpu_data(), 1., bias_diff); | ||
} | ||
} | ||
if (this->param_propagate_down_[0]) { | ||
Dtype* slope_diff = this->blobs_[0]->mutable_cpu_diff(); | ||
caffe_set(this->blobs_[0]->count(), Dtype(0), slope_diff); | ||
for (int i = 0; i < count; ++i) { | ||
int c = (i / dim) % channels / div_factor; | ||
slope_diff[c] += top_diff[i] * bottom_data[i]; | ||
} | ||
} | ||
|
||
// Propagate to bottom | ||
if (propagate_down[0]) { | ||
Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); | ||
for (int i = 0; i < count; ++i) { | ||
int c = (i / dim) % channels / div_factor; | ||
bottom_diff[i] = slope_data[c] * top_diff[i]; | ||
} | ||
} | ||
} | ||
|
||
|
||
#ifdef CPU_ONLY | ||
STUB_GPU(ChannelwiseAffineLayer); | ||
#endif | ||
|
||
INSTANTIATE_CLASS(ChannelwiseAffineLayer); | ||
REGISTER_LAYER_CLASS(ChannelwiseAffine); | ||
|
||
} // namespace caffe |
Oops, something went wrong.