Commit aafa4116 authored by Davis King's avatar Davis King

Added mult_prev layer.

parent f7310f4b
...@@ -2126,6 +2126,107 @@ namespace dlib ...@@ -2126,6 +2126,107 @@ namespace dlib
using add_prev9_ = add_prev_<tag9>; using add_prev9_ = add_prev_<tag9>;
using add_prev10_ = add_prev_<tag10>; using add_prev10_ = add_prev_<tag10>;
// ----------------------------------------------------------------------------------------
template <
template<typename> class tag
>
class mult_prev_
{
public:
const static unsigned long id = tag_id<tag>::id;
mult_prev_()
{
}
template <typename SUBNET>
void setup (const SUBNET& /*sub*/)
{
}
template <typename SUBNET>
void forward(const SUBNET& sub, resizable_tensor& output)
{
auto&& t1 = sub.get_output();
auto&& t2 = layer<tag>(sub).get_output();
output.set_size(std::max(t1.num_samples(),t2.num_samples()),
std::max(t1.k(),t2.k()),
std::max(t1.nr(),t2.nr()),
std::max(t1.nc(),t2.nc()));
tt::multiply_zero_padded(false, output, t1, t2);
}
template <typename SUBNET>
void backward(const tensor& gradient_input, SUBNET& sub, tensor& /*params_grad*/)
{
auto&& t1 = sub.get_output();
auto&& t2 = layer<tag>(sub).get_output();
// The gradient just flows backwards to the two layers that forward()
// multiplied together.
tt::multiply_zero_padded(true, sub.get_gradient_input(), t2, gradient_input);
tt::multiply_zero_padded(true, layer<tag>(sub).get_gradient_input(), t1, gradient_input);
}
const tensor& get_layer_params() const { return params; }
tensor& get_layer_params() { return params; }
friend void serialize(const mult_prev_& , std::ostream& out)
{
serialize("mult_prev_", out);
}
friend void deserialize(mult_prev_& , std::istream& in)
{
std::string version;
deserialize(version, in);
if (version != "mult_prev_")
throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::mult_prev_.");
}
friend std::ostream& operator<<(std::ostream& out, const mult_prev_& item)
{
out << "mult_prev"<<id;
return out;
}
friend void to_xml(const mult_prev_& item, std::ostream& out)
{
out << "<mult_prev tag='"<<id<<"'/>\n";
}
private:
resizable_tensor params;
};
template <
template<typename> class tag,
typename SUBNET
>
using mult_prev = add_layer<mult_prev_<tag>, SUBNET>;
template <typename SUBNET> using mult_prev1 = mult_prev<tag1, SUBNET>;
template <typename SUBNET> using mult_prev2 = mult_prev<tag2, SUBNET>;
template <typename SUBNET> using mult_prev3 = mult_prev<tag3, SUBNET>;
template <typename SUBNET> using mult_prev4 = mult_prev<tag4, SUBNET>;
template <typename SUBNET> using mult_prev5 = mult_prev<tag5, SUBNET>;
template <typename SUBNET> using mult_prev6 = mult_prev<tag6, SUBNET>;
template <typename SUBNET> using mult_prev7 = mult_prev<tag7, SUBNET>;
template <typename SUBNET> using mult_prev8 = mult_prev<tag8, SUBNET>;
template <typename SUBNET> using mult_prev9 = mult_prev<tag9, SUBNET>;
template <typename SUBNET> using mult_prev10 = mult_prev<tag10, SUBNET>;
using mult_prev1_ = mult_prev_<tag1>;
using mult_prev2_ = mult_prev_<tag2>;
using mult_prev3_ = mult_prev_<tag3>;
using mult_prev4_ = mult_prev_<tag4>;
using mult_prev5_ = mult_prev_<tag5>;
using mult_prev6_ = mult_prev_<tag6>;
using mult_prev7_ = mult_prev_<tag7>;
using mult_prev8_ = mult_prev_<tag8>;
using mult_prev9_ = mult_prev_<tag9>;
using mult_prev10_ = mult_prev_<tag10>;
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
class relu_ class relu_
......
...@@ -2138,6 +2138,78 @@ namespace dlib ...@@ -2138,6 +2138,78 @@ namespace dlib
using add_prev9_ = add_prev_<tag9>; using add_prev9_ = add_prev_<tag9>;
using add_prev10_ = add_prev_<tag10>; using add_prev10_ = add_prev_<tag10>;
// ----------------------------------------------------------------------------------------
template <
template<typename> class tag
>
class mult_prev_
{
/*!
WHAT THIS OBJECT REPRESENTS
This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface
defined above. This layer simply multiplies the output of two previous
layers. In particular, it multiplies the tensor from its immediate
predecessor layer, sub.get_output(), with the tensor from a deeper layer,
layer<tag>(sub).get_output().
Therefore, you supply a tag via mult_prev_'s template argument that tells
it what layer to multiply with the output of the previous layer. The
result of this multiplication is output by mult_prev_. Finally, the
multiplication happens pointwise according to 4D tensor arithmetic. If the
dimensions don't match then missing elements are presumed to be equal to 0.
Moreover, each dimension of the output tensor is equal to the maximum
dimension of either of the inputs. That is, if the tensors A and B are
being multiplied to produce C then:
- C.num_samples() == max(A.num_samples(), B.num_samples())
- C.k() == max(A.k(), B.k())
- C.nr() == max(A.nr(), B.nr())
- C.nc() == max(A.nc(), B.nc())
!*/
public:
mult_prev_(
);
template <typename SUBNET> void setup (const SUBNET& sub);
template <typename SUBNET> void forward(const SUBNET& sub, resizable_tensor& output);
template <typename SUBNET> void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad);
const tensor& get_layer_params() const;
tensor& get_layer_params();
/*!
These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface.
!*/
};
template <
template<typename> class tag,
typename SUBNET
>
using mult_prev = add_layer<mult_prev_<tag>, SUBNET>;
// Here we add some convenient aliases for using mult_prev_ with the tag layers.
template <typename SUBNET> using mult_prev1 = mult_prev<tag1, SUBNET>;
template <typename SUBNET> using mult_prev2 = mult_prev<tag2, SUBNET>;
template <typename SUBNET> using mult_prev3 = mult_prev<tag3, SUBNET>;
template <typename SUBNET> using mult_prev4 = mult_prev<tag4, SUBNET>;
template <typename SUBNET> using mult_prev5 = mult_prev<tag5, SUBNET>;
template <typename SUBNET> using mult_prev6 = mult_prev<tag6, SUBNET>;
template <typename SUBNET> using mult_prev7 = mult_prev<tag7, SUBNET>;
template <typename SUBNET> using mult_prev8 = mult_prev<tag8, SUBNET>;
template <typename SUBNET> using mult_prev9 = mult_prev<tag9, SUBNET>;
template <typename SUBNET> using mult_prev10 = mult_prev<tag10, SUBNET>;
using mult_prev1_ = mult_prev_<tag1>;
using mult_prev2_ = mult_prev_<tag2>;
using mult_prev3_ = mult_prev_<tag3>;
using mult_prev4_ = mult_prev_<tag4>;
using mult_prev5_ = mult_prev_<tag5>;
using mult_prev6_ = mult_prev_<tag6>;
using mult_prev7_ = mult_prev_<tag7>;
using mult_prev8_ = mult_prev_<tag8>;
using mult_prev9_ = mult_prev_<tag9>;
using mult_prev10_ = mult_prev_<tag10>;
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
template< template<
......
...@@ -1990,6 +1990,49 @@ namespace ...@@ -1990,6 +1990,49 @@ namespace
} }
// ----------------------------------------------------------------------------------------
void test_simple_linear_regression_with_mult_prev()
{
print_spinner();
const int num_samples = 1000;
::std::vector<matrix<double>> x(num_samples);
::std::vector<float> y(num_samples);
const float true_slope = 2.0;
for ( int ii = 0; ii < num_samples; ++ii )
{
const double val = static_cast<double>(ii-500)/100;
matrix<double> tmp(1,1);
tmp = val;
x[ii] = tmp;
y[ii] = ( true_slope*static_cast<float>(val*val));
}
randomize_samples(x,y);
using net_type = loss_mean_squared<fc<1, mult_prev1<fc<2,tag1<fc<2,input<matrix<double>>>>>>>>;
net_type net;
sgd defsolver(0,0.9);
dnn_trainer<net_type> trainer(net, defsolver);
trainer.set_learning_rate(1e-5);
trainer.set_min_learning_rate(1e-11);
trainer.set_mini_batch_size(50);
trainer.set_max_num_epochs(300);
trainer.train(x, y);
running_stats<double> rs;
for (size_t i = 0; i < x.size(); ++i)
{
double val = y[i];
double out = net(x[i]);
rs.add(std::abs(val-out));
}
dlog << LINFO << "rs.mean(): " << rs.mean();
dlog << LINFO << "rs.stddev(): " << rs.stddev();
dlog << LINFO << "rs.max(): " << rs.max();
DLIB_TEST(rs.mean() < 0.1);
}
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
void test_multioutput_linear_regression() void test_multioutput_linear_regression()
...@@ -2706,6 +2749,7 @@ namespace ...@@ -2706,6 +2749,7 @@ namespace
test_copy_tensor_cpu(); test_copy_tensor_cpu();
test_concat(); test_concat();
test_simple_linear_regression(); test_simple_linear_regression();
test_simple_linear_regression_with_mult_prev();
test_multioutput_linear_regression(); test_multioutput_linear_regression();
test_simple_autoencoder(); test_simple_autoencoder();
test_loss_multiclass_per_pixel_learned_params_on_trivial_single_pixel_task(); test_loss_multiclass_per_pixel_learned_params_on_trivial_single_pixel_task();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment