MiniDNN
Network.h
1 #ifndef NETWORK_H_
2 #define NETWORK_H_
3 
4 #include <Eigen/Core>
5 #include <vector>
6 #include <stdexcept>
7 #include "Config.h"
8 #include "RNG.h"
9 #include "Layer.h"
10 #include "Output.h"
11 #include "Callback.h"
12 #include "Utils/Random.h"
13 
14 namespace MiniDNN {
15 
16 
20 
28 class Network
29 {
30 private:
31  typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;
32  typedef Eigen::RowVectorXi IntegerVector;
33 
34  RNG m_default_rng; // Built-in RNG
35  RNG& m_rng; // Reference to the RNG provided by the user,
36  // otherwise reference to m_default_rng
37  std::vector<Layer*> m_layers; // Pointers to hidden layers
38  Output* m_output; // The output layer
39  Callback m_default_callback; // Default callback function
40  Callback* m_callback; // Points to user-provided callback function,
41  // otherwise points to m_default_callback
42 
43  // Check dimensions of layers
44  void check_unit_sizes() const
45  {
46  const int nlayer = num_layers();
47  if(nlayer <= 1)
48  return;
49 
50  for(int i = 1; i < nlayer; i++)
51  {
52  if(m_layers[i]->in_size() != m_layers[i - 1]->out_size())
53  throw std::invalid_argument("Unit sizes do not match");
54  }
55  }
56 
57  // Let each layer compute its output
58  void forward(const Matrix& input)
59  {
60  const int nlayer = num_layers();
61  if(nlayer <= 0)
62  return;
63 
64  // First layer
65  if(input.rows() != m_layers[0]->in_size())
66  throw std::invalid_argument("Input data have incorrect dimension");
67  m_layers[0]->forward(input);
68 
69  // The following layers
70  for(int i = 1; i < nlayer; i++)
71  {
72  m_layers[i]->forward(m_layers[i - 1]->output());
73  }
74  }
75 
76  // Let each layer compute its gradients of the parameters
77  // target has two versions: Matrix and RowVectorXi
78  // The RowVectorXi version is used in classification problems where each
79  // element is a class label
80  template <typename TargetType>
81  void backprop(const Matrix& input, const TargetType& target)
82  {
83  const int nlayer = num_layers();
84  if(nlayer <= 0)
85  return;
86 
87  Layer* first_layer = m_layers[0];
88  Layer* last_layer = m_layers[nlayer - 1];
89 
90  // Let output layer compute back-propagation data
91  m_output->check_target_data(target);
92  m_output->evaluate(last_layer->output(), target);
93 
94  // If there is only one hidden layer, "prev_layer_data" will be the input data
95  if(nlayer == 1)
96  {
97  first_layer->backprop(input, m_output->backprop_data());
98  return;
99  }
100 
101  // Compute gradients for the last hidden layer
102  last_layer->backprop(m_layers[nlayer - 2]->output(), m_output->backprop_data());
103  // Compute gradients for all the hidden layers except for the first one and the last one
104  for(int i = nlayer - 2; i > 0; i--)
105  {
106  m_layers[i]->backprop(m_layers[i - 1]->output(), m_layers[i + 1]->backprop_data());
107  }
108  // Compute gradients for the first layer
109  first_layer->backprop(input, m_layers[1]->backprop_data());
110  }
111 
112  // Update parameters
113  void update(Optimizer& opt)
114  {
115  const int nlayer = num_layers();
116  if(nlayer <= 0)
117  return;
118 
119  for(int i = 0; i < nlayer; i++)
120  {
121  m_layers[i]->update(opt);
122  }
123  }
124 
125 public:
130  m_default_rng(1),
131  m_rng(m_default_rng),
132  m_output(NULL),
133  m_default_callback(),
134  m_callback(&m_default_callback)
135  {}
136 
143  Network(RNG& rng) :
144  m_default_rng(1),
145  m_rng(rng),
146  m_output(NULL),
147  m_default_callback(),
148  m_callback(&m_default_callback)
149  {}
150 
155  {
156  const int nlayer = num_layers();
157  for(int i = 0; i < nlayer; i++)
158  {
159  delete m_layers[i];
160  }
161 
162  if(m_output)
163  delete m_output;
164  }
165 
174  void add_layer(Layer* layer)
175  {
176  m_layers.push_back(layer);
177  }
178 
187  void set_output(Output* output)
188  {
189  if(m_output)
190  delete m_output;
191 
192  m_output = output;
193  }
194 
198  int num_layers() const { return m_layers.size(); }
199 
203  std::vector<const Layer*> get_layers() const
204  {
205  const int nlayer = num_layers();
206  std::vector<const Layer*> layers(nlayer);
207  std::copy(m_layers.begin(), m_layers.end(), layers.begin());
208  return layers;
209  }
210 
214  const Output* get_output() const { return m_output; }
215 
222  void set_callback(Callback& callback)
223  {
224  m_callback = &callback;
225  }
230  {
231  m_callback = &m_default_callback;
232  }
233 
242  void init(const Scalar& mu = Scalar(0), const Scalar& sigma = Scalar(0.01), int seed = -1)
243  {
244  check_unit_sizes();
245 
246  if(seed > 0)
247  m_rng.seed(seed);
248 
249  const int nlayer = num_layers();
250  for(int i = 0; i < nlayer; i++)
251  {
252  m_layers[i]->init(mu, sigma, m_rng);
253  }
254  }
255 
259  std::vector< std::vector<Scalar> > get_parameters() const
260  {
261  const int nlayer = num_layers();
262  std::vector< std::vector<Scalar> > res;
263  res.reserve(nlayer);
264  for(int i = 0; i < nlayer; i++)
265  {
266  res.push_back(m_layers[i]->get_parameters());
267  }
268 
269  return res;
270  }
271 
277  void set_parameters(const std::vector< std::vector<Scalar> >& param)
278  {
279  const int nlayer = num_layers();
280  if(static_cast<int>(param.size()) != nlayer)
281  throw std::invalid_argument("Parameter size does not match");
282 
283  for(int i = 0; i < nlayer; i++)
284  {
285  m_layers[i]->set_parameters(param[i]);
286  }
287  }
288 
292  std::vector< std::vector<Scalar> > get_derivatives() const
293  {
294  const int nlayer = num_layers();
295  std::vector< std::vector<Scalar> > res;
296  res.reserve(nlayer);
297  for(int i = 0; i < nlayer; i++)
298  {
299  res.push_back(m_layers[i]->get_derivatives());
300  }
301 
302  return res;
303  }
304 
308  template <typename TargetType>
309  void check_gradient(const Matrix& input, const TargetType& target, int npoints, int seed = -1)
310  {
311  if(seed > 0)
312  m_rng.seed(seed);
313 
314  this->forward(input);
315  this->backprop(input, target);
316  std::vector< std::vector<Scalar> > param = this->get_parameters();
317  std::vector< std::vector<Scalar> > deriv = this->get_derivatives();
318 
319  const Scalar eps = 1e-5;
320  const int nlayer = deriv.size();
321  for(int i = 0; i < npoints; i++)
322  {
323  // Randomly select a layer
324  const int layer_id = int(m_rng.rand() * nlayer);
325  // Randomly pick a parameter, note that some layers may have no parameters
326  const int nparam = deriv[layer_id].size();
327  if(nparam < 1) continue;
328  const int param_id = int(m_rng.rand() * nparam);
329  // Turbulate the parameter a little bit
330  const Scalar old = param[layer_id][param_id];
331 
332  param[layer_id][param_id] -= eps;
333  this->set_parameters(param);
334  this->forward(input);
335  this->backprop(input, target);
336  const Scalar loss_pre = m_output->loss();
337 
338  param[layer_id][param_id] += eps * 2;
339  this->set_parameters(param);
340  this->forward(input);
341  this->backprop(input, target);
342  const Scalar loss_post = m_output->loss();
343 
344  const Scalar deriv_est = (loss_post - loss_pre) / eps / 2;
345 
346  std::cout << "[layer " << layer_id << ", param " << param_id <<
347  "] deriv = " << deriv[layer_id][param_id] << ", est = " << deriv_est <<
348  ", diff = " << deriv_est - deriv[layer_id][param_id] << std::endl;
349 
350  param[layer_id][param_id] = old;
351  }
352 
353  // Restore original parameters
354  this->set_parameters(param);
355  }
356 
368  template <typename DerivedX, typename DerivedY>
369  bool fit(Optimizer& opt, const Eigen::MatrixBase<DerivedX>& x, const Eigen::MatrixBase<DerivedY>& y,
370  int batch_size, int epoch, int seed = -1)
371  {
372  // We do not directly use PlainObjectX since it may be row-majored if x is passed as mat.transpose()
373  // We want to force XType and YType to be column-majored
374  typedef typename Eigen::MatrixBase<DerivedX>::PlainObject PlainObjectX;
375  typedef typename Eigen::MatrixBase<DerivedY>::PlainObject PlainObjectY;
376  typedef Eigen::Matrix<typename PlainObjectX::Scalar, PlainObjectX::RowsAtCompileTime, PlainObjectX::ColsAtCompileTime> XType;
377  typedef Eigen::Matrix<typename PlainObjectY::Scalar, PlainObjectY::RowsAtCompileTime, PlainObjectY::ColsAtCompileTime> YType;
378 
379  const int nlayer = num_layers();
380  if(nlayer <= 0)
381  return false;
382 
383  // Reset optimizer
384  opt.reset();
385 
386  // Create shuffled mini-batches
387  if(seed > 0)
388  m_rng.seed(seed);
389 
390  std::vector<XType> x_batches;
391  std::vector<YType> y_batches;
392  const int nbatch = internal::create_shuffled_batches(x, y, batch_size, m_rng, x_batches, y_batches);
393 
394  // Set up callback parameters
395  m_callback->m_nbatch = nbatch;
396  m_callback->m_nepoch = epoch;
397 
398  // Iterations on the whole data set
399  for(int k = 0; k < epoch; k++)
400  {
401  m_callback->m_epoch_id = k;
402 
403  // Train on each mini-batch
404  for(int i = 0; i < nbatch; i++)
405  {
406  m_callback->m_batch_id = i;
407  m_callback->pre_training_batch(this, x_batches[i], y_batches[i]);
408 
409  this->forward(x_batches[i]);
410  this->backprop(x_batches[i], y_batches[i]);
411  this->update(opt);
412 
413  m_callback->post_training_batch(this, x_batches[i], y_batches[i]);
414  }
415  }
416 
417  return true;
418  }
419 
425  Matrix predict(const Matrix& x)
426  {
427  const int nlayer = num_layers();
428  if(nlayer <= 0)
429  return Matrix();
430 
431  this->forward(x);
432  return m_layers[nlayer - 1]->output();
433  }
434 };
435 
436 
437 } // namespace MiniDNN
438 
439 
440 #endif /* NETWORK_H_ */
std::vector< std::vector< Scalar > > get_derivatives() const
Definition: Network.h:292
void add_layer(Layer *layer)
Definition: Network.h:174
void set_default_callback()
Definition: Network.h:229
void set_output(Output *output)
Definition: Network.h:187
const Output * get_output() const
Definition: Network.h:214
std::vector< const Layer * > get_layers() const
Definition: Network.h:203
void check_gradient(const Matrix &input, const TargetType &target, int npoints, int seed=-1)
Definition: Network.h:309
bool fit(Optimizer &opt, const Eigen::MatrixBase< DerivedX > &x, const Eigen::MatrixBase< DerivedY > &y, int batch_size, int epoch, int seed=-1)
Definition: Network.h:369
void set_callback(Callback &callback)
Definition: Network.h:222
virtual void backprop(const Matrix &prev_layer_data, const Matrix &next_layer_data)=0
virtual const Matrix & output() const =0
void set_parameters(const std::vector< std::vector< Scalar > > &param)
Definition: Network.h:277
Network(RNG &rng)
Definition: Network.h:143
virtual void reset()
Definition: Optimizer.h:32
int num_layers() const
Definition: Network.h:198
std::vector< std::vector< Scalar > > get_parameters() const
Definition: Network.h:259
Matrix predict(const Matrix &x)
Definition: Network.h:425
void init(const Scalar &mu=Scalar(0), const Scalar &sigma=Scalar(0.01), int seed=-1)
Definition: Network.h:242