Commit f1fe908a authored by Davis King's avatar Davis King

Added loss_dot layer

parent a0220801
......@@ -2468,6 +2468,111 @@ namespace dlib
// ----------------------------------------------------------------------------------------
class loss_dot_
{
public:
typedef matrix<float,0,1> training_label_type;
typedef matrix<float,0,1> output_label_type;
template <
typename SUB_TYPE,
typename label_iterator
>
void to_label (
const tensor& input_tensor,
const SUB_TYPE& sub,
label_iterator iter
) const
{
const tensor& output_tensor = sub.get_output();
DLIB_CASSERT(sub.sample_expansion_factor() == 1);
DLIB_CASSERT(input_tensor.num_samples() != 0);
DLIB_CASSERT(input_tensor.num_samples()%sub.sample_expansion_factor() == 0);
DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples());
for (long i = 0; i < output_tensor.num_samples(); ++i)
*iter++ = trans(rowm(mat(output_tensor),i));
}
template <
typename const_label_iterator,
typename SUBNET
>
double compute_loss_value_and_gradient (
const tensor& input_tensor,
const_label_iterator truth,
SUBNET& sub
) const
{
const tensor& output_tensor = sub.get_output();
tensor& grad = sub.get_gradient_input();
DLIB_CASSERT(sub.sample_expansion_factor() == 1);
DLIB_CASSERT(input_tensor.num_samples() != 0);
DLIB_CASSERT(input_tensor.num_samples()%sub.sample_expansion_factor() == 0);
DLIB_CASSERT(input_tensor.num_samples() == grad.num_samples());
DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples());
const long network_output_dims = output_tensor.size()/output_tensor.num_samples();
// The loss we output is the average loss over the mini-batch.
const double scale = 1.0/output_tensor.num_samples();
double loss = 0;
float* g = grad.host();
const float* out_data = output_tensor.host();
for (long i = 0; i < output_tensor.num_samples(); ++i)
{
DLIB_CASSERT(truth->size() == network_output_dims, "The network must output a vector with the same dimensionality as the training labels. "
<< "\ntruth->size(): " << truth->size()
<< "\nnetwork_output_dims: " << network_output_dims);
const float* t = &(*truth++)(0);
for (long j = 0; j < network_output_dims; ++j)
{
g[j] = -t[j]*scale;
loss -= out_data[j]*t[j];
}
g += network_output_dims;
out_data += network_output_dims;
}
return loss*scale;
}
friend void serialize(const loss_dot_& , std::ostream& out)
{
serialize("loss_dot_", out);
}
friend void deserialize(loss_dot_& , std::istream& in)
{
std::string version;
deserialize(version, in);
if (version != "loss_dot_")
throw serialization_error("Unexpected version found while deserializing dlib::loss_dot_.");
}
friend std::ostream& operator<<(std::ostream& out, const loss_dot_& )
{
out << "loss_dot";
return out;
}
friend void to_xml(const loss_dot_& /*item*/, std::ostream& out)
{
out << "<loss_dot/>";
}
};
template <typename SUBNET>
using loss_dot = add_loss_layer<loss_dot_, SUBNET>;
// ----------------------------------------------------------------------------------------
}
......
......@@ -1250,6 +1250,68 @@ namespace dlib
template <typename SUBNET>
using loss_mean_squared_per_pixel = add_loss_layer<loss_mean_squared_per_pixel_, SUBNET>;
// ----------------------------------------------------------------------------------------
class loss_dot_
{
/*!
WHAT THIS OBJECT REPRESENTS
This object implements the loss layer interface defined above by
EXAMPLE_LOSS_LAYER_. In particular, selecting this loss means you want
maximize the dot product between the output of a network and a set of
training vectors. The loss is therefore the negative dot product. To be
very specific, if X is the output vector of a network and Y is a training
label (also a vector), then the loss for this training sample is: -dot(X,Y)
!*/
public:
typedef matrix<float,0,1> training_label_type;
typedef matrix<float,0,1> output_label_type;
template <
typename SUB_TYPE,
typename label_iterator
>
void to_label (
const tensor& input_tensor,
const SUB_TYPE& sub,
label_iterator iter
) const;
/*!
This function has the same interface as EXAMPLE_LOSS_LAYER_::to_label() except
it has the additional calling requirements that:
- sub.get_output().num_samples() == input_tensor.num_samples()
- sub.sample_expansion_factor() == 1
and the output labels are simply the final network outputs stuffed into a
vector. To be very specific, the output is the following for all valid i:
*(iter+i) == trans(rowm(mat(sub.get_output()),i))
!*/
template <
typename const_label_iterator,
typename SUBNET
>
double compute_loss_value_and_gradient (
const tensor& input_tensor,
const_label_iterator truth,
SUBNET& sub
) const;
/*!
This function has the same interface as EXAMPLE_LOSS_LAYER_::compute_loss_value_and_gradient()
except it has the additional calling requirements that:
- sub.get_output().num_samples() == input_tensor.num_samples()
- sub.sample_expansion_factor() == 1
- Let NETWORK_OUTPUT_DIMS == sub.get_output().size()/sub.get_output().num_samples()
- for all idx such that 0 <= idx < sub.get_output().num_samples():
- NETWORK_OUTPUT_DIMS == (*(truth + idx)).size()
!*/
};
template <typename SUBNET>
using loss_dot = add_loss_layer<loss_dot_, SUBNET>;
// ----------------------------------------------------------------------------------------
}
......
......@@ -3009,6 +3009,46 @@ namespace
dlib::deserialize(net2, in);
}
// ----------------------------------------------------------------------------------------
void test_loss_dot()
{
print_spinner();
std::vector<matrix<float,0,1>> samples;
std::vector<matrix<float,0,1>> labels;
const matrix<float> proj = matrix_cast<float>(randm(2,3));
for (int i = 0; i < 128; ++i)
{
// The task is going to be to learn the matrix proj. So we make our
// training data thusly:
matrix<float,0,1> x = matrix_cast<float>(randm(3,1));
matrix<float,0,1> y = normalize(proj*x);
samples.push_back(x);
labels.push_back(y);
}
using net_type = loss_dot<
l2normalize<fc_no_bias<2,
input<matrix<float,0,1>>
>>>;
net_type net;
dnn_trainer<net_type> trainer(net, sgd(1e-4, 0.9));
trainer.set_learning_rate(0.01);
trainer.set_min_learning_rate(0.0000001);
trainer.set_mini_batch_size(128);
trainer.set_max_num_epochs(50000);
trainer.train(samples, labels);
for (size_t i = 0; i < samples.size(); ++i)
{
DLIB_TEST(std::abs(1-dot(net(samples[i]),labels[i])) < 0.001);
}
}
// ----------------------------------------------------------------------------------------
class dnn_tester : public tester
......@@ -3095,6 +3135,7 @@ namespace
test_loss_multiclass_per_pixel_with_noise_and_pixels_to_ignore();
test_loss_multiclass_per_pixel_weighted();
test_serialization();
test_loss_dot();
}
void perform_test()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment