MiniDNN
FullyConnected.h
1 #ifndef LAYER_FULLYCONNECTED_H_
2 #define LAYER_FULLYCONNECTED_H_
3 
4 #include <Eigen/Core>
5 #include <vector>
6 #include <stdexcept>
7 #include "../Config.h"
8 #include "../Layer.h"
9 #include "../Utils/Random.h"
10 
11 namespace MiniDNN {
12 
13 
19 template <typename Activation>
20 class FullyConnected: public Layer
21 {
22 private:
23  typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;
24  typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;
25  typedef Vector::ConstAlignedMapType ConstAlignedMapVec;
26  typedef Vector::AlignedMapType AlignedMapVec;
27 
28  Matrix m_weight; // Weight parameters, W(in_size x out_size)
29  Vector m_bias; // Bias parameters, b(out_size x 1)
30  Matrix m_dw; // Derivative of weights
31  Vector m_db; // Derivative of bias
32  Matrix m_z; // Linear term, z = W' * in + b
33  Matrix m_a; // Output of this layer, a = act(z)
34  Matrix m_din; // Derivative of the input of this layer.
35  // Note that input of this layer is also the output of previous layer
36 
37 public:
44  FullyConnected(const int in_size, const int out_size) :
45  Layer(in_size, out_size)
46  {}
47 
48  void init(const Scalar& mu, const Scalar& sigma, RNG& rng)
49  {
50  m_weight.resize(this->m_in_size, this->m_out_size);
51  m_bias.resize(this->m_out_size);
52  m_dw.resize(this->m_in_size, this->m_out_size);
53  m_db.resize(this->m_out_size);
54 
55  // Set random coefficients
56  internal::set_normal_random(m_weight.data(), m_weight.size(), rng, mu, sigma);
57  internal::set_normal_random(m_bias.data(), m_bias.size(), rng, mu, sigma);
58  }
59 
60  // prev_layer_data: in_size x nobs
61  void forward(const Matrix& prev_layer_data)
62  {
63  const int nobs = prev_layer_data.cols();
64  // Linear term z = W' * in + b
65  m_z.resize(this->m_out_size, nobs);
66  m_z.noalias() = m_weight.transpose() * prev_layer_data;
67  m_z.colwise() += m_bias;
68 
69  // Apply activation function
70  m_a.resize(this->m_out_size, nobs);
71  Activation::activate(m_z, m_a);
72  }
73 
74  const Matrix& output() const
75  {
76  return m_a;
77  }
78 
79  // prev_layer_data: in_size x nobs
80  // next_layer_data: out_size x nobs
81  void backprop(const Matrix& prev_layer_data, const Matrix& next_layer_data)
82  {
83  const int nobs = prev_layer_data.cols();
84 
85  // After forward stage, m_z contains z = W' * in + b
86  // Now we need to calculate d(L) / d(z) = [d(a) / d(z)] * [d(L) / d(a)]
87  // d(L) / d(a) is computed in the next layer, contained in next_layer_data
88  // The Jacobian matrix J = d(a) / d(z) is determined by the activation function
89  Matrix& dLz = m_z;
90  Activation::apply_jacobian(m_z, m_a, next_layer_data, dLz);
91 
92  // Now dLz contains d(L) / d(z)
93  // Derivative for weights, d(L) / d(W) = [d(L) / d(z)] * in'
94  m_dw.noalias() = prev_layer_data * dLz.transpose() / nobs;
95 
96  // Derivative for bias, d(L) / d(b) = d(L) / d(z)
97  m_db.noalias() = dLz.rowwise().mean();
98 
99  // Compute d(L) / d_in = W * [d(L) / d(z)]
100  m_din.resize(this->m_in_size, nobs);
101  m_din.noalias() = m_weight * dLz;
102  }
103 
104  const Matrix& backprop_data() const
105  {
106  return m_din;
107  }
108 
109  void update(Optimizer& opt)
110  {
111  ConstAlignedMapVec dw(m_dw.data(), m_dw.size());
112  ConstAlignedMapVec db(m_db.data(), m_db.size());
113  AlignedMapVec w(m_weight.data(), m_weight.size());
114  AlignedMapVec b(m_bias.data(), m_bias.size());
115 
116  opt.update(dw, w);
117  opt.update(db, b);
118  }
119 
120  std::vector<Scalar> get_parameters() const
121  {
122  std::vector<Scalar> res(m_weight.size() + m_bias.size());
123  // Copy the data of weights and bias to a long vector
124  std::copy(m_weight.data(), m_weight.data() + m_weight.size(), res.begin());
125  std::copy(m_bias.data(), m_bias.data() + m_bias.size(), res.begin() + m_weight.size());
126 
127  return res;
128  }
129 
130  void set_parameters(const std::vector<Scalar>& param)
131  {
132  if(static_cast<int>(param.size()) != m_weight.size() + m_bias.size())
133  throw std::invalid_argument("Parameter size does not match");
134 
135  std::copy(param.begin(), param.begin() + m_weight.size(), m_weight.data());
136  std::copy(param.begin() + m_weight.size(), param.end(), m_bias.data());
137  }
138 
139  std::vector<Scalar> get_derivatives() const
140  {
141  std::vector<Scalar> res(m_dw.size() + m_db.size());
142  // Copy the data of weights and bias to a long vector
143  std::copy(m_dw.data(), m_dw.data() + m_dw.size(), res.begin());
144  std::copy(m_db.data(), m_db.data() + m_db.size(), res.begin() + m_dw.size());
145 
146  return res;
147  }
148 };
149 
150 
151 } // namespace MiniDNN
152 
153 
154 #endif /* LAYER_FULLYCONNECTED_H_ */
std::vector< Scalar > get_derivatives() const
std::vector< Scalar > get_parameters() const
const Matrix & backprop_data() const
int out_size() const
Definition: Layer.h:58
void backprop(const Matrix &prev_layer_data, const Matrix &next_layer_data)
void set_parameters(const std::vector< Scalar > &param)
int in_size() const
Definition: Layer.h:54
void forward(const Matrix &prev_layer_data)
void init(const Scalar &mu, const Scalar &sigma, RNG &rng)
void update(Optimizer &opt)
const Matrix & output() const
virtual void update(ConstAlignedMapVec &dvec, AlignedMapVec &vec)=0
FullyConnected(const int in_size, const int out_size)