添加axpy 参数,如果没有参数,可不添加或为空
message AxpyParameter {
}
message LayerParameter{
...
optional AxpyParameter axpy_parm = 150;; //序号不冲突即可
}
https://github.com/hujie-frank/SENet
表达式如下:
f = a * x + y, 输入参数为三个, a为标量,x,y为矩阵且维度相同
input: bottom[0]->a, bottom[1]->x, bottom[2]->y
output: top[0]
int N = bottom[1].shape(0);
int C = bottom[1].shape(1);
int spatial_dim = bottom[1].count(2);
caffe_copy(bottom[2]->count(), bottom[2]->cpu_data(), top_data);
for (int n = 0; n < N; ++n) {
for (int c = 0; c < C; ++c) {
int scale_offset = n * C+ c;
caffe_axpy(spatial_dim, scale_data[scale_offset],
x_data + scale_offset * spatial_dim,
top_data + scale_offset * spatial_dim);
}
}
f = a * x + y;
根据公式可得,a_diff = top_diff * x;
由于top_diff NCHW=(n,c,x,y), bottom_a = (n,c ,1,1),
需要想办法改变矩阵大小,使用矩阵相乘(nc, xy) * (xy, 1)= (nc,1)
int count = top[0]->count();
caffe_mul(count, top_diff, x_data, a_diff);
...//矩阵变化,看链接内代码实现
根据公式可得, x_diff = top_diff * a;
int channel_dim = bottom[1]->channels();
int spatial_dim = bottom[1]->count(2);
const Dtype* scale_data = bottom[0]->cpu_data();
Dtype* x_diff = bottom[1]->mutable_cpu_diff();
for (int n = 0; n < bottom[1]->num(); ++n) {
for (int c = 0; c < channel_dim; ++c) {
int scale_offset = n * channel_dim + c;
caffe_cpu_scale(spatial_dim, scale_data[scale_offset],
top_diff + scale_offset * spatial_dim,
x_diff + scale_offset * spatial_dim);
}
}
根据公式可得,y_diff = top_diff;
caffe_copy(count, top_diff, bottom[2]->mutable_cpu_diff());