1 #ifndef LAYER_FULLYCONNECTED_H_ 2 #define LAYER_FULLYCONNECTED_H_ 9 #include "../Utils/Random.h" 19 template <
typename Activation>
23 typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;
24 typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;
25 typedef Vector::ConstAlignedMapType ConstAlignedMapVec;
26 typedef Vector::AlignedMapType AlignedMapVec;
45 Layer(in_size, out_size)
48 void init(
const Scalar& mu,
const Scalar& sigma,
RNG& rng)
50 m_weight.resize(this->m_in_size, this->m_out_size);
51 m_bias.resize(this->m_out_size);
52 m_dw.resize(this->m_in_size, this->m_out_size);
53 m_db.resize(this->m_out_size);
56 internal::set_normal_random(m_weight.data(), m_weight.size(), rng, mu, sigma);
57 internal::set_normal_random(m_bias.data(), m_bias.size(), rng, mu, sigma);
61 void forward(
const Matrix& prev_layer_data)
63 const int nobs = prev_layer_data.cols();
65 m_z.resize(this->m_out_size, nobs);
66 m_z.noalias() = m_weight.transpose() * prev_layer_data;
67 m_z.colwise() += m_bias;
70 m_a.resize(this->m_out_size, nobs);
71 Activation::activate(m_z, m_a);
81 void backprop(
const Matrix& prev_layer_data,
const Matrix& next_layer_data)
83 const int nobs = prev_layer_data.cols();
90 Activation::apply_jacobian(m_z, m_a, next_layer_data, dLz);
94 m_dw.noalias() = prev_layer_data * dLz.transpose() / nobs;
97 m_db.noalias() = dLz.rowwise().mean();
100 m_din.resize(this->m_in_size, nobs);
101 m_din.noalias() = m_weight * dLz;
111 ConstAlignedMapVec dw(m_dw.data(), m_dw.size());
112 ConstAlignedMapVec db(m_db.data(), m_db.size());
113 AlignedMapVec w(m_weight.data(), m_weight.size());
114 AlignedMapVec b(m_bias.data(), m_bias.size());
122 std::vector<Scalar> res(m_weight.size() + m_bias.size());
124 std::copy(m_weight.data(), m_weight.data() + m_weight.size(), res.begin());
125 std::copy(m_bias.data(), m_bias.data() + m_bias.size(), res.begin() + m_weight.size());
132 if(static_cast<int>(param.size()) != m_weight.size() + m_bias.size())
133 throw std::invalid_argument(
"Parameter size does not match");
135 std::copy(param.begin(), param.begin() + m_weight.size(), m_weight.data());
136 std::copy(param.begin() + m_weight.size(), param.end(), m_bias.data());
141 std::vector<Scalar> res(m_dw.size() + m_db.size());
143 std::copy(m_dw.data(), m_dw.data() + m_dw.size(), res.begin());
144 std::copy(m_db.data(), m_db.data() + m_db.size(), res.begin() + m_dw.size());
std::vector< Scalar > get_derivatives() const
std::vector< Scalar > get_parameters() const
const Matrix & backprop_data() const
void backprop(const Matrix &prev_layer_data, const Matrix &next_layer_data)
void set_parameters(const std::vector< Scalar > ¶m)
void forward(const Matrix &prev_layer_data)
void init(const Scalar &mu, const Scalar &sigma, RNG &rng)
void update(Optimizer &opt)
const Matrix & output() const
virtual void update(ConstAlignedMapVec &dvec, AlignedMapVec &vec)=0
FullyConnected(const int in_size, const int out_size)