Commit ffaa322f authored by Davis King's avatar Davis King

Added loss_multimulticlass_log_

parent 72fbed20
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include "../image_processing/full_object_detection.h" #include "../image_processing/full_object_detection.h"
#include "../svm/ranking_tools.h" #include "../svm/ranking_tools.h"
#include <sstream> #include <sstream>
#include <map>
namespace dlib namespace dlib
{ {
...@@ -365,6 +366,288 @@ namespace dlib ...@@ -365,6 +366,288 @@ namespace dlib
template <typename SUBNET> template <typename SUBNET>
using loss_multiclass_log = add_loss_layer<loss_multiclass_log_, SUBNET>; using loss_multiclass_log = add_loss_layer<loss_multiclass_log_, SUBNET>;
// ----------------------------------------------------------------------------------------
class loss_multimulticlass_log_
{
public:
loss_multimulticlass_log_ () = default;
loss_multimulticlass_log_ (
const std::map<std::string,std::vector<std::string>>& labels
)
{
for (auto& l : labels)
{
possible_labels[l.first] = std::make_shared<decltype(l.second)>(l.second);
DLIB_CASSERT(l.second.size() >= 2, "Each classifier must have at least two possible labels.");
for (size_t i = 0; i < l.second.size(); ++i)
{
label_idx_lookup[l.first][l.second[i]] = i;
++total_num_labels;
}
}
}
unsigned long number_of_labels() const { return total_num_labels; }
unsigned long number_of_classifiers() const { return possible_labels.size(); }
std::map<std::string,std::vector<std::string>> get_labels (
) const
{
std::map<std::string,std::vector<std::string>> info;
for (auto& i : possible_labels)
{
for (auto& label : *i.second)
info[i.first].emplace_back(label);
}
return info;
}
class classifier_output
{
public:
classifier_output() = default;
size_t num_classes() const { return class_probs.size(); }
double probability_of_class (
size_t i
) const
{
DLIB_CASSERT(i < num_classes());
return class_probs(i);
}
const std::string& label(
size_t i
) const
{
DLIB_CASSERT(i < num_classes());
return (*_labels)[i];
}
operator std::string(
) const
{
DLIB_CASSERT(num_classes() != 0);
return (*_labels)[index_of_max(class_probs)];
}
friend std::ostream& operator<< (std::ostream& out, const classifier_output& item)
{
DLIB_ASSERT(item.num_classes() != 0);
out << static_cast<std::string>(item);
return out;
}
private:
friend class loss_multimulticlass_log_;
template <typename EXP>
classifier_output(
const matrix_exp<EXP>& class_probs,
const std::shared_ptr<std::vector<std::string>>& _labels
) :
class_probs(class_probs),
_labels(_labels)
{
}
matrix<float,1,0> class_probs;
std::shared_ptr<std::vector<std::string>> _labels;
};
typedef std::map<std::string,std::string> training_label_type;
typedef std::map<std::string,classifier_output> output_label_type;
template <
typename SUB_TYPE,
typename label_iterator
>
void to_label (
const tensor& input_tensor,
const SUB_TYPE& sub,
label_iterator iter_begin
) const
{
const tensor& output_tensor = sub.get_output();
DLIB_CASSERT(sub.sample_expansion_factor() == 1);
DLIB_CASSERT(output_tensor.nr() == 1 &&
output_tensor.nc() == 1 );
DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples());
DLIB_CASSERT(number_of_labels() != 0, "You must give the loss_multimulticlass_log_'s constructor label data before you can use it!");
DLIB_CASSERT(output_tensor.k() == (long)number_of_labels(), "The output tensor must have " << number_of_labels() << " channels.");
long k_offset = 0;
for (auto& l : possible_labels)
{
auto iter = iter_begin;
const std::string& classifier_name = l.first;
const auto& labels = (*l.second);
scratch.set_size(output_tensor.num_samples(), labels.size());
tt::copy_tensor(false, scratch, 0, output_tensor, k_offset, labels.size());
tt::softmax(scratch, scratch);
for (long i = 0; i < scratch.num_samples(); ++i)
(*iter++)[classifier_name] = classifier_output(rowm(mat(scratch),i), l.second);
k_offset += labels.size();
}
}
template <
typename const_label_iterator,
typename SUBNET
>
double compute_loss_value_and_gradient (
const tensor& input_tensor,
const_label_iterator truth_begin,
SUBNET& sub
) const
{
const tensor& output_tensor = sub.get_output();
tensor& grad = sub.get_gradient_input();
DLIB_CASSERT(sub.sample_expansion_factor() == 1);
DLIB_CASSERT(input_tensor.num_samples() != 0);
DLIB_CASSERT(input_tensor.num_samples()%sub.sample_expansion_factor() == 0);
DLIB_CASSERT(input_tensor.num_samples() == grad.num_samples());
DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples());
DLIB_CASSERT(output_tensor.nr() == 1 &&
output_tensor.nc() == 1);
DLIB_CASSERT(grad.nr() == 1 &&
grad.nc() == 1);
DLIB_CASSERT(number_of_labels() != 0, "You must give the loss_multimulticlass_log_'s constructor label data before you can use it!");
DLIB_CASSERT(output_tensor.k() == (long)number_of_labels(), "The output tensor must have " << number_of_labels() << " channels.");
// The loss we output is the average loss over the mini-batch.
const double scale = 1.0/output_tensor.num_samples();
double loss = 0;
long k_offset = 0;
for (auto& l : label_idx_lookup)
{
const std::string& classifier_name = l.first;
const auto& int_labels = l.second;
scratch.set_size(output_tensor.num_samples(), int_labels.size());
tt::copy_tensor(false, scratch, 0, output_tensor, k_offset, int_labels.size());
tt::softmax(scratch, scratch);
auto truth = truth_begin;
float* g = scratch.host();
for (long i = 0; i < scratch.num_samples(); ++i)
{
const long y = int_labels.at(truth->at(classifier_name));
++truth;
for (long k = 0; k < scratch.k(); ++k)
{
const unsigned long idx = i*scratch.k()+k;
if (k == y)
{
loss += scale*-std::log(g[idx]);
g[idx] = scale*(g[idx]-1);
}
else
{
g[idx] = scale*g[idx];
}
}
}
tt::copy_tensor(false, grad, k_offset, scratch, 0, int_labels.size());
k_offset += int_labels.size();
}
return loss;
}
friend void serialize(const loss_multimulticlass_log_& item, std::ostream& out)
{
serialize("loss_multimulticlass_log_", out);
serialize(item.get_labels(), out);
}
friend void deserialize(loss_multimulticlass_log_& item, std::istream& in)
{
std::string version;
deserialize(version, in);
if (version != "loss_multimulticlass_log_")
throw serialization_error("Unexpected version found while deserializing dlib::loss_multimulticlass_log_.");
std::map<std::string,std::vector<std::string>> info;
deserialize(info, in);
item = loss_multimulticlass_log_(info);
}
friend std::ostream& operator<<(std::ostream& out, const loss_multimulticlass_log_& item)
{
out << "loss_multimulticlass_log, labels={";
for (auto i = item.possible_labels.begin(); i != item.possible_labels.end(); )
{
auto& category = i->first;
auto& labels = *(i->second);
out << category << ":(";
for (size_t j = 0; j < labels.size(); ++j)
{
out << labels[j];
if (j+1 < labels.size())
out << ",";
}
out << ")";
if (++i != item.possible_labels.end())
out << ", ";
}
out << "}";
return out;
}
friend void to_xml(const loss_multimulticlass_log_& item, std::ostream& out)
{
out << "<loss_multimulticlass_log>\n";
out << item;
out << "\n</loss_multimulticlass_log>";
}
private:
std::map<std::string,std::shared_ptr<std::vector<std::string>>> possible_labels;
unsigned long total_num_labels = 0;
// We make it true that: possible_labels[classifier][label_idx_lookup[classifier][label]] == label
std::map<std::string, std::map<std::string,long>> label_idx_lookup;
// Scratch doesn't logically contribute to the state of this object. It's just
// temporary scratch space used by this class.
mutable resizable_tensor scratch;
};
template <typename SUBNET>
using loss_multimulticlass_log = add_loss_layer<loss_multimulticlass_log_, SUBNET>;
inline bool operator== (const std::string& lhs, const loss_multimulticlass_log_::classifier_output& rhs)
{ return lhs == static_cast<const std::string&>(rhs); }
inline bool operator== (const loss_multimulticlass_log_::classifier_output& lhs, const std::string& rhs)
{ return rhs == static_cast<const std::string&>(lhs); }
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
......
...@@ -362,6 +362,228 @@ namespace dlib ...@@ -362,6 +362,228 @@ namespace dlib
template <typename SUBNET> template <typename SUBNET>
using loss_multiclass_log = add_loss_layer<loss_multiclass_log_, SUBNET>; using loss_multiclass_log = add_loss_layer<loss_multiclass_log_, SUBNET>;
// ----------------------------------------------------------------------------------------
class loss_multimulticlass_log_
{
/*!
WHAT THIS OBJECT REPRESENTS
This object implements the loss layer interface defined above by
EXAMPLE_LOSS_LAYER_. In particular, it implements a collection of
multiclass classifiers. An example will make its use clear. So suppose,
for example, that you want to make something that takes a picture of a
vehicle and answers the following questions:
- What type of vehicle is it? A sedan or a truck?
- What color is it? red, green, blue, gray, or black?
You need two separate multi-class classifiers to do this. One to decide
the type of vehicle, and another to decide the color. The
loss_multimulticlass_log_ allows you to pack these two classifiers into one
neural network. This means that when you use the network to process an
image it will output 2 labels for each image, the type label and the color
label.
To create a loss_multimulticlass_log_ for the above case you would
construct it as follows:
std::map<std::string,std::vector<std::string>> labels;
labels["type"] = {"sedan", "truck"};
labels["color"] = {"red", "green", "blue", "gray", "black"};
loss_multimulticlass_log_ myloss(labels);
Then you could use myloss with a network object and train it to do this
task. More generally, you can use any number of classifiers and labels
when using this object. Finally, each of the classifiers uses a standard
multi-class logistic regression loss.
!*/
public:
loss_multimulticlass_log_(
);
/*!
ensures
- #number_of_labels() == 0
- #get_labels().size() == 0
!*/
loss_multimulticlass_log_ (
const std::map<std::string,std::vector<std::string>>& labels
);
/*!
requires
- Each vector in labels must contain at least 2 strings. I.e. each
classifier must have at least two possible labels.
ensures
- #number_of_labels() == the total number of strings in all the
std::vectors in labels.
- #number_of_classifiers() == labels.size()
- #get_labels() == labels
!*/
unsigned long number_of_labels(
) const;
/*!
ensures
- returns the total number of labels known to this loss. This is the count of
all the labels in each classifier.
!*/
unsigned long number_of_classifiers(
) const;
/*!
ensures
- returns the number of classifiers defined by this loss.
!*/
std::map<std::string,std::vector<std::string>> get_labels (
) const;
/*!
ensures
- returns the names of the classifiers and labels used by this loss. In
particular, if the returned object is L then:
- L[CLASS] == the set of labels used by the classifier CLASS.
- L.size() == number_of_classifiers()
- The count of strings in the vectors in L == number_of_labels()
!*/
class classifier_output
{
/*!
WHAT THIS OBJECT REPRESENTS
This object stores the predictions from one of the classifiers in
loss_multimulticlass_log_. It allows you to find out the most likely
string label predicted by that classifier, as well as get the class
conditional probability of any of the classes in the classifier.
!*/
public:
classifier_output(
);
/*!
ensures
- #num_classes() == 0
!*/
size_t num_classes(
) const;
/*!
ensures
- returns the number of possible classes output by this classifier.
!*/
double probability_of_class (
size_t i
) const;
/*!
requires
- i < num_classes()
ensures
- returns the probability that the true class has a label of label(i).
- The sum of probability_of_class(j) for j in the range [0, num_classes()) is always 1.
!*/
const std::string& label(
size_t i
) const;
/*!
requires
- i < num_classes()
ensures
- returns the string label for the ith class.
!*/
operator std::string(
) const;
/*!
requires
- num_classes() != 0
ensures
- returns the string label for the most probable class.
!*/
friend std::ostream& operator<< (std::ostream& out, const classifier_output& item);
/*!
requires
- num_classes() != 0
ensures
- prints the most probable class label to out.
!*/
};
// Both training_label_type and output_label_type should always have sizes equal to
// number_of_classifiers(). That is, the std::map should have an entry for every
// classifier known to this loss.
typedef std::map<std::string,std::string> training_label_type;
typedef std::map<std::string,classifier_output> output_label_type;
template <
typename SUB_TYPE,
typename label_iterator
>
void to_label (
const tensor& input_tensor,
const SUB_TYPE& sub,
label_iterator iter
) const;
/*!
This function has the same interface as EXAMPLE_LOSS_LAYER_::to_label() except
it has the additional calling requirements that:
- number_of_labels() != 0
- sub.get_output().k() == number_of_labels()
- sub.get_output().nr() == 1
- sub.get_output().nc() == 1
- sub.get_output().num_samples() == input_tensor.num_samples()
- sub.sample_expansion_factor() == 1
!*/
template <
typename const_label_iterator,
typename SUBNET
>
double compute_loss_value_and_gradient (
const tensor& input_tensor,
const_label_iterator truth,
SUBNET& sub
) const;
/*!
This function has the same interface as EXAMPLE_LOSS_LAYER_::compute_loss_value_and_gradient()
except it has the additional calling requirements that:
- number_of_labels() != 0
- sub.get_output().k() == number_of_labels()
It should be noted that the last layer in your network should usually
be an fc layer. If so, you can satisfy this requirement of k() being
number_of_labels() by calling set_num_outputs() prior to training your
network like so:
your_network.subnet().layer_details().set_num_outputs(your_network.loss_details().number_of_labels());
- sub.get_output().nr() == 1
- sub.get_output().nc() == 1
- sub.get_output().num_samples() == input_tensor.num_samples()
- sub.sample_expansion_factor() == 1
- All the std::maps pointed to by truth contain entries for all the
classifiers known to this loss. That is, it must be valid to call
truth[i][classifier] for any of the classifiers known to this loss. To
say this another way, all the training samples must contain labels for
each of the classifiers defined by this loss.
To really belabor this, this also means that truth[i].size() ==
get_labels().size() and that both truth[i] and get_labels() have the same
set of key strings. It also means that the value strings in truth[i]
must be strings known to the loss, i.e. they are valid labels according
to get_labels().
!*/
};
template <typename SUBNET>
using loss_multimulticlass_log = add_loss_layer<loss_multimulticlass_log_, SUBNET>;
// Allow comparison between classifier_outputs and std::string to check if the
// predicted class is a particular string.
inline bool operator== (const std::string& lhs, const loss_multimulticlass_log_::classifier_output& rhs)
{ return lhs == static_cast<const std::string&>(rhs); }
inline bool operator== (const loss_multimulticlass_log_::classifier_output& lhs, const std::string& rhs)
{ return rhs == static_cast<const std::string&>(lhs); }
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
......
...@@ -3094,6 +3094,65 @@ namespace ...@@ -3094,6 +3094,65 @@ namespace
} }
} }
// ----------------------------------------------------------------------------------------
void test_loss_multimulticlass_log()
{
print_spinner();
std::map<string,std::vector<string>> all_labels;
all_labels["c1"] = {"a", "b", "c"};
all_labels["c2"] = {"d", "e", "f"};
// make training data
std::vector<matrix<float>> samples;
std::vector<std::map<string,string>> labels;
for (int i = 0; i < 3; ++i)
{
for (int j = 0; j < 3; ++j)
{
matrix<float> samp(2,3);
samp = 0;
samp(0,i) = 1;
samp(1,j) = 1;
samples.push_back(samp);
std::map<string,string> l;
if (i == 0) l["c1"] = "a";
if (i == 1) l["c1"] = "b";
if (i == 2) l["c1"] = "c";
if (j == 0) l["c2"] = "d";
if (j == 1) l["c2"] = "e";
if (j == 2) l["c2"] = "f";
labels.push_back(l);
}
}
using net_type = loss_multimulticlass_log<
fc<1,
input<matrix<float>>
>>;
net_type net(all_labels);
net.subnet().layer_details().set_num_outputs(net.loss_details().number_of_labels());
dnn_trainer<net_type> trainer(net, sgd(0.1));
trainer.set_learning_rate(0.1);
trainer.set_min_learning_rate(0.00001);
trainer.set_iterations_without_progress_threshold(500);
trainer.train(samples, labels);
auto predicted_labels = net(samples);
// make sure the network predicts the right labels
for (size_t i = 0; i < samples.size(); ++i)
{
DLIB_TEST(predicted_labels[i]["c1"] == labels[i]["c1"]);
DLIB_TEST(predicted_labels[i]["c2"] == labels[i]["c2"]);
}
}
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
class dnn_tester : public tester class dnn_tester : public tester
...@@ -3182,6 +3241,7 @@ namespace ...@@ -3182,6 +3241,7 @@ namespace
test_loss_multiclass_per_pixel_weighted(); test_loss_multiclass_per_pixel_weighted();
test_serialization(); test_serialization();
test_loss_dot(); test_loss_dot();
test_loss_multimulticlass_log();
} }
void perform_test() void perform_test()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment