-
Notifications
You must be signed in to change notification settings - Fork 23
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[WIP] Conv2D Layer #39
base: master
Are you sure you want to change the base?
Changes from 4 commits
6954663
4beea0f
42da3a8
34ac18e
93b3554
a037c0a
49e077e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
/******************************************************* | ||
* Copyright (c) 2017, ArrayFire | ||
* All rights reserved. | ||
* | ||
* This file is distributed under 3-clause BSD license. | ||
* The complete license agreement can be obtained at: | ||
* http://arrayfire.com/licenses/BSD-3-Clause | ||
********************************************************/ | ||
#pragma once | ||
|
||
#include <af/nn/Modules/Module.hpp> | ||
|
||
namespace af | ||
{ | ||
namespace nn | ||
{ | ||
class Conv2D : public Module | ||
{ | ||
private: | ||
bool m_bias; | ||
int m_wx; | ||
int m_wy; | ||
int m_sx; | ||
int m_sy; | ||
int m_px; | ||
int m_py; | ||
public: | ||
Conv2D(int wx, int wy, int sx, int sy, int px, int py, int n_in, int n_out, bool bias = true); | ||
|
||
Conv2D(const autograd::Variable &w, int sx = 1, int sy = 1, int px = 0, int py = 0); | ||
|
||
Conv2D(const autograd::Variable &w, const autograd::Variable &b, int sx = 1, int sy = 1, int px = 0, int py = 0); | ||
|
||
autograd::Variable forward(const autograd::Variable &input); | ||
}; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -414,5 +414,143 @@ namespace af { | |
}; | ||
return Variable(result, {input}, grad_func); | ||
} | ||
|
||
Variable reorder(const Variable &input, int d0, int d1, int d2, int d3) | ||
{ | ||
array res = reorder(input.array(), d0, d1, d2, d3); | ||
|
||
int tmp[] = {d0, d1, d2, d3}; | ||
int tmp2[4]; | ||
for(int i = 0; i < 4; i++){ | ||
tmp2[tmp[i]] = i; | ||
} | ||
auto reverse = Variable(array(4, tmp2), false); | ||
|
||
auto grad_func = [tmp2](std::vector<Variable> &inputs, const Variable &grad_output){ | ||
inputs[0].addGrad(reorder(grad_output, tmp2[0], tmp2[1], tmp2[2], tmp2[3])); | ||
}; | ||
return Variable(res, {input, reverse}, grad_func); | ||
} | ||
|
||
Variable unwrap(const Variable &input, int wx, int wy, int sx, int sy, int px, int py) | ||
{ | ||
array res = unwrap(input.array(), wx, wy, sx, sy, px, py); | ||
auto grad_func = [wx, wy, sx, sy, px, py](std::vector<Variable> &inputs, const Variable &grad_output) { | ||
dim4 d = inputs[0].dims(); | ||
inputs[0].addGrad(wrap(grad_output, d[0], d[1], wx, wy, sx, sy, px, py)); | ||
}; | ||
return Variable(res, {input}, grad_func); | ||
} | ||
|
||
Variable wrap(const Variable &input, int ox, int oy, int wx, int wy, int sx, int sy, int px, int py) | ||
{ | ||
array res = wrap(input.array(), ox, oy, wx, wy, sx, sy, px, py); | ||
auto grad_func = [wx, wy, sx, sy, px, py](std::vector<Variable> &inputs, const Variable &grad_output) { | ||
inputs[0].addGrad(unwrap(grad_output, wx, wy, sx, sy, px, py)); | ||
}; | ||
return Variable(res, {input}, grad_func); | ||
} | ||
|
||
Variable conv2d(const Variable &input, const Variable &weights, int wx, int wy, int sx, int sy, int px, int py) | ||
{ | ||
dim4 idims = input.array().dims(); // (x_i, y_i, c_i, n ) | ||
dim4 wdims = weights.array().dims(); // (wx, wy, c_i, c_o) | ||
|
||
int x_i = idims[0]; //size of x dim of input | ||
int y_i = idims[1]; //size of y dim of input | ||
int c_i = idims[2]; //number of input channels | ||
int n = idims[3]; //batch size (1 for now) | ||
|
||
int x_o = (x_i + 2 * px - wx) / sx + 1; //size of x dim of output | ||
int y_o = (y_i + 2 * py - wy) / sy + 1; //size of x dim of output | ||
int c_o = wdims[3]; //number of output channels | ||
|
||
array windows = unwrap(input.array(), wx, wy, sx, sy, px, py); | ||
|
||
array lhs = moddims( | ||
reorder(windows, 1, 0, 2, 3), | ||
dim4(x_o * y_o, wx * wy * c_i, n, 1)); | ||
array rhs = moddims(weights.array(), dim4(wx * wy * c_i, c_o, 1, 1)); | ||
|
||
//TODO: This loop can be replaced with a batched matmult as soon as | ||
//that is added to arrayfire | ||
std::vector<array> out; | ||
for(int i = 0; i < n; i++){ | ||
array res = matmul(lhs(span, span, i), rhs); | ||
out.push_back(moddims(res , dim4(x_o, y_o, c_o, 1))); | ||
} | ||
|
||
//LOL @ C++ API - need this loop to have arbitrary batch size | ||
array result = out[0]; | ||
for(int i = 1; i < n; i+=3){ | ||
int rem = n - i; | ||
if(rem >= 3){ | ||
result = join(3, result, out[i], out[i+1], out[i+2]); | ||
}else if(rem == 2){ | ||
result = join(3, result, out[i], out[i+1]); | ||
break; | ||
}else if(rem == 1){ | ||
result = join(3, result, out[i]); | ||
break; | ||
}else{ | ||
break; | ||
} | ||
} | ||
|
||
auto grad_func = [wx, wy, sx, sy, px, py, c_i, n](std::vector<Variable> &inputs, const Variable &grad_output) { | ||
dim4 odims = grad_output.array().dims(); | ||
dim4 wdims = inputs[1].array().dims(); | ||
dim4 idims = inputs[0].array().dims(); | ||
|
||
auto grad_out_reshape = moddims(grad_output, dim4(odims[0]*odims[1], odims[2], odims[3], 1)); | ||
|
||
auto weights_reshape = moddims(inputs[1], dim4(wdims[0]*wdims[1]*wdims[2], wdims[3], 1, 1)); | ||
|
||
//TODO: This really needs batched matmul... | ||
//TODO: This doesn't work for n > 1 | ||
//TODO: Can these lines be shortened? - This seems like a large grad function - perhaps this | ||
// could all be implemented in Conv2D::forward(). I had to implement the helper functions anyways | ||
/* | ||
std::vector<array> out; | ||
for(int i = 0; i < n; i++){ | ||
auto a = matmulNT(grad_out_reshape(span, span, i), weights_reshape); //Problem is here - can't call () on Variable | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This line is all that is preventing me from having batches working. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can make the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That would be good. |
||
auto adims = a.array().dims(); | ||
auto b = moddims(a, dim4(adims[0], wx*wy, c_i, adims[3])); | ||
auto c = reorder(b, 1, 0, 2, 3); | ||
out.push_pack(wrap(c, idims[0], idims[1], wx, wy, sx, sy, px, py)); | ||
} | ||
|
||
array result = out[0]; | ||
for(int i = 1; i < n; i+=3){ | ||
int rem = n - i; | ||
if(rem >= 3){ | ||
result = join(3, result, out[i], out[i+1], out[i+2]); | ||
}else if(rem == 2){ | ||
result = join(3, result, out[i], out[i+1]); | ||
break; | ||
}else if(rem == 1){ | ||
result = join(3, result, out[i]); | ||
break; | ||
}else{ | ||
break; | ||
} | ||
} | ||
*/ | ||
auto a = matmulNT(grad_out_reshape, weights_reshape); | ||
auto adims = a.array().dims(); | ||
auto b = moddims(a, dim4(adims[0], wx*wy, c_i, adims[3])); | ||
auto c = reorder(b, 1, 0, 2, 3); | ||
inputs[0].addGrad(wrap(c, idims[0], idims[1], wx, wy, sx, sy, px, py)); | ||
|
||
auto d = matmulTN(inputs[2],grad_out_reshape); | ||
inputs[1].addGrad(moddims(d, dim4(wx, wy, c_i, d.dims()[1]))); | ||
|
||
}; | ||
return Variable(result, {input, weights, Variable(lhs, false)}, grad_func); | ||
|
||
} | ||
|
||
|
||
|
||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
/******************************************************* | ||
* Copyright (c) 2017, ArrayFire | ||
* All rights reserved. | ||
* | ||
* This file is distributed under 3-clause BSD license. | ||
* The complete license agreement can be obtained at: | ||
* http://arrayfire.com/licenses/BSD-3-Clause | ||
********************************************************/ | ||
#include <af/autograd/Functions.hpp> | ||
#include <af/nn/Init.hpp> | ||
#include <af/nn/Modules/Conv2D.hpp> | ||
//output will be ho x wo x no x n | ||
namespace af | ||
{ | ||
namespace nn | ||
{ | ||
using namespace autograd; | ||
|
||
Conv2D::Conv2D(int wx, int wy, int sx, int sy, int px, int py, int n_in, int n_out, bool bias) : | ||
m_wx(wx), | ||
m_wy(wy), | ||
m_sx(sx), | ||
m_sy(sy), | ||
m_px(px), | ||
m_py(py), | ||
m_bias(bias) | ||
{ | ||
auto w = nn::lecunNormal(dim4(wx, wy, n_in, n_out)); | ||
if (bias) { | ||
auto b = nn::lecunNormal(dim4(1, 1, n_out, 1)); | ||
setParams({w, b}); | ||
} else { | ||
setParams({w}); | ||
} | ||
} | ||
|
||
Conv2D::Conv2D(const Variable &w, int sx, int sy, int px, int py) : | ||
m_sx(sx), | ||
m_sy(sy), | ||
m_px(px), | ||
m_py(py), | ||
m_bias(false), | ||
Module({w}) | ||
{ | ||
dim4 pdims = w.array().dims(); | ||
m_wx = pdims[0]; | ||
m_wy = pdims[1]; | ||
} | ||
|
||
Conv2D::Conv2D(const Variable &w, const Variable &b, int sx, int sy, int px, int py) : | ||
m_sx(sx), | ||
m_sy(sy), | ||
m_px(px), | ||
m_py(py), | ||
m_bias(true), | ||
Module({w, b}) | ||
{ | ||
/*if (b.array().dims(0) != w.array().dims(0)) { | ||
throw af::exception("nn:Linear: Dimension mismatch between weight and bias."); | ||
}*/ | ||
if (b.array().dims(1) != 1) { | ||
throw af::exception("nn::Linear: Bias must be a vector."); | ||
} | ||
dim4 pdims = w.array().dims(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Btw I added |
||
m_wx = pdims[0]; | ||
m_wy = pdims[1]; | ||
} | ||
|
||
Variable Conv2D::forward(const Variable &input) | ||
{ | ||
auto res = conv2d(input, m_parameters[0], m_wx, m_wy, m_sx, m_sy, m_px, m_py); | ||
if (m_bias) { | ||
res = res + tileAs(m_parameters[1], res); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not familiar with bias in a Convolution layer. Let me know if you find a reference for this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The alexnet model I pulled from caffe's model zoo has both weights and biases for every learned layer. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can view biases in this implementation http://www.cs.toronto.edu/~guerzhoy/tf_alexnet/ There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @plavin I mean the way bias is used here. I don't know if it is the same as what we are doing in Linear layer. |
||
} | ||
return res; | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
reverse is not being used anymore.