Commit ebdc064c authored by Davis King's avatar Davis King

merged

parents 917dcad3 0ed1ce61
......@@ -1549,11 +1549,11 @@ namespace dlib
typename SUB_TYPE,
typename label_iterator
>
void to_label (
static void to_label (
const tensor& input_tensor,
const SUB_TYPE& sub,
label_iterator iter
) const
)
{
DLIB_CASSERT(sub.sample_expansion_factor() == 1);
......@@ -1678,7 +1678,7 @@ namespace dlib
std::string version;
deserialize(version, in);
if (version != "loss_multiclass_log_per_pixel_")
throw serialization_error("Unexpected version found while deserializing dlib::loss_multiclass_log_.");
throw serialization_error("Unexpected version found while deserializing dlib::loss_multiclass_log_per_pixel_.");
}
friend std::ostream& operator<<(std::ostream& out, const loss_multiclass_log_per_pixel_& )
......@@ -1704,6 +1704,148 @@ namespace dlib
template <typename SUBNET>
using loss_multiclass_log_per_pixel = add_loss_layer<loss_multiclass_log_per_pixel_, SUBNET>;
// ----------------------------------------------------------------------------------------
class loss_multiclass_log_per_pixel_weighted_
{
public:
struct weighted_label
{
weighted_label()
{}
weighted_label(uint16_t label, float weight = 1.f)
: label(label), weight(weight)
{}
// In semantic segmentation, 65536 classes ought to be enough for anybody.
uint16_t label = 0;
float weight = 1.f;
};
typedef matrix<weighted_label> training_label_type;
typedef matrix<uint16_t> output_label_type;
template <
typename SUB_TYPE,
typename label_iterator
>
static void to_label (
const tensor& input_tensor,
const SUB_TYPE& sub,
label_iterator iter
)
{
loss_multiclass_log_per_pixel_::to_label(input_tensor, sub, iter);
}
template <
typename const_label_iterator,
typename SUBNET
>
double compute_loss_value_and_gradient (
const tensor& input_tensor,
const_label_iterator truth,
SUBNET& sub
) const
{
const tensor& output_tensor = sub.get_output();
tensor& grad = sub.get_gradient_input();
DLIB_CASSERT(sub.sample_expansion_factor() == 1);
DLIB_CASSERT(input_tensor.num_samples() != 0);
DLIB_CASSERT(input_tensor.num_samples()%sub.sample_expansion_factor() == 0);
DLIB_CASSERT(input_tensor.num_samples() == grad.num_samples());
DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples());
DLIB_CASSERT(output_tensor.k() >= 1);
DLIB_CASSERT(output_tensor.k() < std::numeric_limits<uint16_t>::max());
DLIB_CASSERT(output_tensor.nr() == grad.nr() &&
output_tensor.nc() == grad.nc() &&
output_tensor.k() == grad.k());
for (long idx = 0; idx < output_tensor.num_samples(); ++idx)
{
const_label_iterator truth_matrix_ptr = (truth + idx);
DLIB_CASSERT(truth_matrix_ptr->nr() == output_tensor.nr() &&
truth_matrix_ptr->nc() == output_tensor.nc(),
"truth size = " << truth_matrix_ptr->nr() << " x " << truth_matrix_ptr->nc() << ", "
"output size = " << output_tensor.nr() << " x " << output_tensor.nc());
}
tt::softmax(grad, output_tensor);
// The loss we output is the weighted average loss over the mini-batch, and also over each element of the matrix output.
const double scale = 1.0 / (output_tensor.num_samples() * output_tensor.nr() * output_tensor.nc());
double loss = 0;
float* const g = grad.host();
for (long i = 0; i < output_tensor.num_samples(); ++i, ++truth)
{
for (long r = 0; r < output_tensor.nr(); ++r)
{
for (long c = 0; c < output_tensor.nc(); ++c)
{
const weighted_label& weighted_label = truth->operator()(r, c);
const uint16_t y = weighted_label.label;
const float weight = weighted_label.weight;
// The network must produce a number of outputs that is equal to the number
// of labels when using this type of loss.
DLIB_CASSERT(static_cast<long>(y) < output_tensor.k() || weight == 0.f,
"y: " << y << ", output_tensor.k(): " << output_tensor.k());
for (long k = 0; k < output_tensor.k(); ++k)
{
const size_t idx = tensor_index(output_tensor, i, r, c, k);
if (k == y)
{
loss += weight*scale*-std::log(g[idx]);
g[idx] = weight*scale*(g[idx] - 1);
}
else
{
g[idx] = weight*scale*g[idx];
}
}
}
}
}
return loss;
}
friend void serialize(const loss_multiclass_log_per_pixel_weighted_& , std::ostream& out)
{
serialize("loss_multiclass_log_per_pixel_weighted_", out);
}
friend void deserialize(loss_multiclass_log_per_pixel_weighted_& , std::istream& in)
{
std::string version;
deserialize(version, in);
if (version != "loss_multiclass_log_per_pixel_weighted_")
throw serialization_error("Unexpected version found while deserializing dlib::loss_multiclass_log_per_pixel_weighted_.");
}
friend std::ostream& operator<<(std::ostream& out, const loss_multiclass_log_per_pixel_weighted_& )
{
out << "loss_multiclass_log_per_pixel_weighted";
return out;
}
friend void to_xml(const loss_multiclass_log_per_pixel_weighted_& /*item*/, std::ostream& out)
{
out << "<loss_multiclass_log_per_pixel_weighted/>";
}
private:
static size_t tensor_index(const tensor& t, long sample, long row, long column, long k)
{
// See: https://github.com/davisking/dlib/blob/4dfeb7e186dd1bf6ac91273509f687293bd4230a/dlib/dnn/tensor_abstract.h#L38
return ((sample * t.k() + k) * t.nr() + row) * t.nc() + column;
}
};
template <typename SUBNET>
using loss_multiclass_log_per_pixel_weighted = add_loss_layer<loss_multiclass_log_per_pixel_weighted_, SUBNET>;
// ----------------------------------------------------------------------------------------
}
......
......@@ -863,6 +863,94 @@ namespace dlib
template <typename SUBNET>
using loss_multiclass_log_per_pixel = add_loss_layer<loss_multiclass_log_per_pixel_, SUBNET>;
// ----------------------------------------------------------------------------------------
class loss_multiclass_log_per_pixel_weighted_
{
/*!
WHAT THIS OBJECT REPRESENTS
This object implements the loss layer interface defined above by
EXAMPLE_LOSS_LAYER_. In particular, it implements the multiclass logistic
regression loss (e.g. negative log-likelihood loss), which is appropriate
for multiclass classification problems. It is basically just like
loss_multiclass_log_per_pixel_ except that it lets you define per-pixel
weights, which may be useful e.g. if you want to emphasize rare classes
while training. (If the classification problem is difficult, a flat weight
structure may lead the network to always predict the most common label, in
particular if the degree of imbalance is high. To emphasize a certain
class or classes, simply increase the weights of the corresponding pixels,
relative to the weights of the other pixels.)
Note that if you set the weight to 0 whenever a pixel's label is equal to
loss_multiclass_log_per_pixel_::label_to_ignore, and to 1 otherwise, then
you essentially get loss_multiclass_log_per_pixel_ as a special case.
!*/
public:
struct weighted_label
{
/*!
WHAT THIS OBJECT REPRESENTS
This object represents the truth label of a single pixel, together with
an associated weight (the higher the weight, the more emphasis the
corresponding pixel is given during the training).
!*/
weighted_label();
weighted_label(uint16_t label, float weight = 1.f);
// The ground-truth label. In semantic segmentation, 65536 classes ought to be
// enough for anybody.
uint16_t label = 0;
// The weight of the corresponding pixel.
float weight = 1.f;
};
typedef matrix<weighted_label> training_label_type;
typedef matrix<uint16_t> output_label_type;
template <
typename SUB_TYPE,
typename label_iterator
>
void to_label (
const tensor& input_tensor,
const SUB_TYPE& sub,
label_iterator iter
) const;
/*!
This function has the same interface as EXAMPLE_LOSS_LAYER_::to_label() except
it has the additional calling requirements that:
- sub.get_output().num_samples() == input_tensor.num_samples()
- sub.sample_expansion_factor() == 1
and the output label is the predicted class for each classified element. The number
of possible output classes is sub.get_output().k().
!*/
template <
typename const_label_iterator,
typename SUBNET
>
double compute_loss_value_and_gradient (
const tensor& input_tensor,
const_label_iterator truth,
SUBNET& sub
) const;
/*!
This function has the same interface as EXAMPLE_LOSS_LAYER_::compute_loss_value_and_gradient()
except it has the additional calling requirements that:
- sub.get_output().num_samples() == input_tensor.num_samples()
- sub.sample_expansion_factor() == 1
- all labels pointed to by truth are < sub.get_output().k(), or the corresponding weight
is zero.
!*/
};
template <typename SUBNET>
using loss_multiclass_log_per_pixel_weighted = add_loss_layer<loss_multiclass_log_per_pixel_weighted_, SUBNET>;
// ----------------------------------------------------------------------------------------
}
......
......@@ -2331,7 +2331,102 @@ namespace
// ----------------------------------------------------------------------------------------
void test_tensor_resize_bilienar(long samps, long k, long nr, long nc, long onr, long onc)
void test_loss_multiclass_per_pixel_weighted()
{
// Train with pixel-specific weights
print_spinner();
constexpr int input_height = 5;
constexpr int input_width = 7;
constexpr int output_height = input_height;
constexpr int output_width = input_width;
const int num_samples = 1000;
const int num_classes = 6;
::std::default_random_engine generator(16);
::std::uniform_real_distribution<double> u01(0.0, 1.0);
::std::uniform_int_distribution<uint16_t> noisy_label(0, num_classes - 1);
::std::vector<matrix<double>> x(num_samples);
::std::vector<matrix<uint16_t>> y(num_samples);
matrix<double> xtmp(input_height, input_width);
matrix<uint16_t> ytmp(output_height, output_width);
// Generate input data
for (int ii = 0; ii < num_samples; ++ii) {
for (int jj = 0; jj < input_height; ++jj) {
for (int kk = 0; kk < input_width; ++kk) {
xtmp(jj, kk) = u01(generator);
ytmp(jj, kk) = noisy_label(generator);
}
}
x[ii] = xtmp;
y[ii] = ytmp;
}
using net_type = loss_multiclass_log_per_pixel_weighted<con<num_classes,1,1,1,1,input<matrix<double>>>>;
using weighted_label = loss_multiclass_log_per_pixel_weighted_::weighted_label;
::std::vector<matrix<weighted_label>> y_weighted(num_samples);
for (int weighted_class = 0; weighted_class < num_classes; ++weighted_class) {
print_spinner();
// Assign weights
for (int ii = 0; ii < num_samples; ++ii) {
if (weighted_class == 0) {
y_weighted[ii].set_size(input_height, input_width);
}
for (int jj = 0; jj < input_height; ++jj) {
for (int kk = 0; kk < input_width; ++kk) {
const uint16_t label = y[ii](jj, kk);
const float weight
= label == weighted_class
? 1.1f
: 0.9f;
y_weighted[ii](jj, kk) = weighted_label(label, weight);
}
}
}
net_type net;
sgd defsolver(0,0.9);
dnn_trainer<net_type> trainer(net, defsolver);
trainer.set_learning_rate(0.1);
trainer.set_min_learning_rate(0.01);
trainer.set_mini_batch_size(10);
trainer.set_max_num_epochs(10);
trainer.train(x, y_weighted);
const ::std::vector<matrix<uint16_t>> predictions = net(x);
int num_weighted_class = 0;
int num_not_weighted_class = 0;
for ( int ii = 0; ii < num_samples; ++ii ) {
const matrix<uint16_t>& prediction = predictions[ii];
DLIB_TEST(prediction.nr() == output_height);
DLIB_TEST(prediction.nc() == output_width);
for ( int jj = 0; jj < output_height; ++jj )
for ( int kk = 0; kk < output_width; ++kk )
if ( prediction(jj, kk) == weighted_class )
++num_weighted_class;
else
++num_not_weighted_class;
}
DLIB_TEST_MSG(num_weighted_class > num_not_weighted_class,
"The weighted class (" << weighted_class << ") does not dominate: "
<< num_weighted_class << " <= " << num_not_weighted_class);
}
}
// ----------------------------------------------------------------------------------------
void test_tensor_resize_bilinear(long samps, long k, long nr, long nc, long onr, long onc)
{
resizable_tensor img(samps,k,nr,nc);
resizable_tensor out(samps,k,onr,onc);
......@@ -2426,9 +2521,9 @@ namespace
compare_adam();
test_copy_tensor_gpu();
#endif
test_tensor_resize_bilienar(2, 3, 6,6, 11, 11);
test_tensor_resize_bilienar(2, 3, 6,6, 3, 4);
test_tensor_resize_bilienar(2, 3, 5,6, 12, 21);
test_tensor_resize_bilinear(2, 3, 6,6, 11, 11);
test_tensor_resize_bilinear(2, 3, 6,6, 3, 4);
test_tensor_resize_bilinear(2, 3, 5,6, 12, 21);
test_max_pool(1,1,2,3,0,0);
test_max_pool(3,3,1,1,0,0);
test_max_pool(3,3,2,2,0,0);
......@@ -2469,6 +2564,7 @@ namespace
test_loss_multiclass_per_pixel_activations_on_trivial_single_pixel_task();
test_loss_multiclass_per_pixel_outputs_on_trivial_task();
test_loss_multiclass_per_pixel_with_noise_and_pixels_to_ignore();
test_loss_multiclass_per_pixel_weighted();
}
void perform_test()
......
......@@ -526,7 +526,11 @@ class build(_build):
# this checks the sysconfig and will correctly pick up a brewed python lib
# e.g. in /usr/local/Cellar
py_ver = get_python_version()
# check: in some virtual environments the libpython has the form "libpython_#m.dylib
py_lib = os.path.join(get_config_var('LIBDIR'), 'libpython'+py_ver+'.dylib')
if not os.path.isfile(py_lib):
py_lib = os.path.join(get_config_var('LIBDIR'), 'libpython'+py_ver+'m.dylib')
cmake_extra_arch += ['-DPYTHON_LIBRARY={lib}'.format(lib=py_lib)]
if sys.platform == "win32":
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment