12 #include "Utils/Random.h" 31 typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;
32 typedef Eigen::RowVectorXi IntegerVector;
37 std::vector<Layer*> m_layers;
44 void check_unit_sizes()
const 50 for(
int i = 1; i < nlayer; i++)
52 if(m_layers[i]->in_size() != m_layers[i - 1]->out_size())
53 throw std::invalid_argument(
"Unit sizes do not match");
58 void forward(
const Matrix& input)
65 if(input.rows() != m_layers[0]->in_size())
66 throw std::invalid_argument(
"Input data have incorrect dimension");
67 m_layers[0]->forward(input);
70 for(
int i = 1; i < nlayer; i++)
72 m_layers[i]->forward(m_layers[i - 1]->output());
80 template <
typename TargetType>
81 void backprop(
const Matrix& input,
const TargetType& target)
87 Layer* first_layer = m_layers[0];
88 Layer* last_layer = m_layers[nlayer - 1];
91 m_output->check_target_data(target);
92 m_output->evaluate(last_layer->
output(), target);
97 first_layer->
backprop(input, m_output->backprop_data());
102 last_layer->
backprop(m_layers[nlayer - 2]->output(), m_output->backprop_data());
104 for(
int i = nlayer - 2; i > 0; i--)
106 m_layers[i]->backprop(m_layers[i - 1]->output(), m_layers[i + 1]->backprop_data());
109 first_layer->
backprop(input, m_layers[1]->backprop_data());
119 for(
int i = 0; i < nlayer; i++)
121 m_layers[i]->update(opt);
131 m_rng(m_default_rng),
133 m_default_callback(),
134 m_callback(&m_default_callback)
147 m_default_callback(),
148 m_callback(&m_default_callback)
157 for(
int i = 0; i < nlayer; i++)
176 m_layers.push_back(layer);
206 std::vector<const Layer*> layers(nlayer);
207 std::copy(m_layers.begin(), m_layers.end(), layers.begin());
224 m_callback = &callback;
231 m_callback = &m_default_callback;
242 void init(
const Scalar& mu = Scalar(0),
const Scalar& sigma = Scalar(0.01),
int seed = -1)
250 for(
int i = 0; i < nlayer; i++)
252 m_layers[i]->init(mu, sigma, m_rng);
262 std::vector< std::vector<Scalar> > res;
264 for(
int i = 0; i < nlayer; i++)
280 if(static_cast<int>(param.size()) != nlayer)
281 throw std::invalid_argument(
"Parameter size does not match");
283 for(
int i = 0; i < nlayer; i++)
285 m_layers[i]->set_parameters(param[i]);
295 std::vector< std::vector<Scalar> > res;
297 for(
int i = 0; i < nlayer; i++)
308 template <
typename TargetType>
309 void check_gradient(
const Matrix& input,
const TargetType& target,
int npoints,
int seed = -1)
314 this->forward(input);
315 this->backprop(input, target);
316 std::vector< std::vector<Scalar> > param = this->
get_parameters();
319 const Scalar eps = 1e-5;
320 const int nlayer = deriv.size();
321 for(
int i = 0; i < npoints; i++)
324 const int layer_id = int(m_rng.rand() * nlayer);
326 const int nparam = deriv[layer_id].size();
327 if(nparam < 1)
continue;
328 const int param_id = int(m_rng.rand() * nparam);
330 const Scalar old = param[layer_id][param_id];
332 param[layer_id][param_id] -= eps;
334 this->forward(input);
335 this->backprop(input, target);
336 const Scalar loss_pre = m_output->loss();
338 param[layer_id][param_id] += eps * 2;
340 this->forward(input);
341 this->backprop(input, target);
342 const Scalar loss_post = m_output->loss();
344 const Scalar deriv_est = (loss_post - loss_pre) / eps / 2;
346 std::cout <<
"[layer " << layer_id <<
", param " << param_id <<
347 "] deriv = " << deriv[layer_id][param_id] <<
", est = " << deriv_est <<
348 ", diff = " << deriv_est - deriv[layer_id][param_id] << std::endl;
350 param[layer_id][param_id] = old;
368 template <
typename DerivedX,
typename DerivedY>
369 bool fit(
Optimizer& opt,
const Eigen::MatrixBase<DerivedX>& x,
const Eigen::MatrixBase<DerivedY>& y,
370 int batch_size,
int epoch,
int seed = -1)
374 typedef typename Eigen::MatrixBase<DerivedX>::PlainObject PlainObjectX;
375 typedef typename Eigen::MatrixBase<DerivedY>::PlainObject PlainObjectY;
376 typedef Eigen::Matrix<typename PlainObjectX::Scalar, PlainObjectX::RowsAtCompileTime, PlainObjectX::ColsAtCompileTime> XType;
377 typedef Eigen::Matrix<typename PlainObjectY::Scalar, PlainObjectY::RowsAtCompileTime, PlainObjectY::ColsAtCompileTime> YType;
390 std::vector<XType> x_batches;
391 std::vector<YType> y_batches;
392 const int nbatch = internal::create_shuffled_batches(x, y, batch_size, m_rng, x_batches, y_batches);
395 m_callback->m_nbatch = nbatch;
396 m_callback->m_nepoch = epoch;
399 for(
int k = 0; k < epoch; k++)
401 m_callback->m_epoch_id = k;
404 for(
int i = 0; i < nbatch; i++)
406 m_callback->m_batch_id = i;
407 m_callback->pre_training_batch(
this, x_batches[i], y_batches[i]);
409 this->forward(x_batches[i]);
410 this->backprop(x_batches[i], y_batches[i]);
413 m_callback->post_training_batch(
this, x_batches[i], y_batches[i]);
432 return m_layers[nlayer - 1]->output();
std::vector< std::vector< Scalar > > get_derivatives() const
void add_layer(Layer *layer)
void set_default_callback()
void set_output(Output *output)
const Output * get_output() const
std::vector< const Layer * > get_layers() const
void check_gradient(const Matrix &input, const TargetType &target, int npoints, int seed=-1)
bool fit(Optimizer &opt, const Eigen::MatrixBase< DerivedX > &x, const Eigen::MatrixBase< DerivedY > &y, int batch_size, int epoch, int seed=-1)
void set_callback(Callback &callback)
virtual void backprop(const Matrix &prev_layer_data, const Matrix &next_layer_data)=0
virtual const Matrix & output() const =0
void set_parameters(const std::vector< std::vector< Scalar > > ¶m)
std::vector< std::vector< Scalar > > get_parameters() const
Matrix predict(const Matrix &x)
void init(const Scalar &mu=Scalar(0), const Scalar &sigma=Scalar(0.01), int seed=-1)