Commit be8dc926 authored by Davis King's avatar Davis King

merged

parents c763fafd f685cb42
......@@ -2386,6 +2386,106 @@ namespace dlib
using mult_prev9_ = mult_prev_<tag9>;
using mult_prev10_ = mult_prev_<tag10>;
// ----------------------------------------------------------------------------------------
template <
template<typename> class tag
>
class resize_prev_to_tagged_
{
public:
const static unsigned long id = tag_id<tag>::id;
resize_prev_to_tagged_()
{
}
template <typename SUBNET>
void setup (const SUBNET& /*sub*/)
{
}
template <typename SUBNET>
void forward(const SUBNET& sub, resizable_tensor& output)
{
auto& prev = sub.get_output();
auto& tagged = layer<tag>(sub).get_output();
DLIB_CASSERT(prev.num_samples() == tagged.num_samples());
output.set_size(prev.num_samples(),
prev.k(),
tagged.nr(),
tagged.nc());
if (prev.nr() == tagged.nr() && prev.nc() == tagged.nc())
{
tt::copy_tensor(false, output, 0, prev, 0, prev.k());
}
else
{
tt::resize_bilinear(output, prev);
}
}
template <typename SUBNET>
void backward(const tensor& gradient_input, SUBNET& sub, tensor& /*params_grad*/)
{
auto& prev = sub.get_gradient_input();
DLIB_CASSERT(prev.k() == gradient_input.k());
DLIB_CASSERT(prev.num_samples() == gradient_input.num_samples());
if (prev.nr() == gradient_input.nr() && prev.nc() == gradient_input.nc())
{
tt::copy_tensor(true, prev, 0, gradient_input, 0, prev.k());
}
else
{
tt::resize_bilinear_gradient(prev, gradient_input);
}
}
const tensor& get_layer_params() const { return params; }
tensor& get_layer_params() { return params; }
inline dpoint map_input_to_output (const dpoint& p) const { return p; }
inline dpoint map_output_to_input (const dpoint& p) const { return p; }
friend void serialize(const resize_prev_to_tagged_& , std::ostream& out)
{
serialize("resize_prev_to_tagged_", out);
}
friend void deserialize(resize_prev_to_tagged_& , std::istream& in)
{
std::string version;
deserialize(version, in);
if (version != "resize_prev_to_tagged_")
throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::resize_prev_to_tagged_.");
}
friend std::ostream& operator<<(std::ostream& out, const resize_prev_to_tagged_& item)
{
out << "resize_prev_to_tagged"<<id;
return out;
}
friend void to_xml(const resize_prev_to_tagged_& item, std::ostream& out)
{
out << "<resize_prev_to_tagged tag='"<<id<<"'/>\n";
}
private:
resizable_tensor params;
};
template <
template<typename> class tag,
typename SUBNET
>
using resize_prev_to_tagged = add_layer<resize_prev_to_tagged_<tag>, SUBNET>;
// ----------------------------------------------------------------------------------------
template <
......
......@@ -2382,6 +2382,56 @@ namespace dlib
using mult_prev9_ = mult_prev_<tag9>;
using mult_prev10_ = mult_prev_<tag10>;
// ----------------------------------------------------------------------------------------
template <
template<typename> class tag
>
class resize_prev_to_tagged_
{
/*!
WHAT THIS OBJECT REPRESENTS
This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface
defined above. This layer resizes the output channels of the previous layer
to have the same number of rows and columns as the output of the tagged layer.
This layer uses bilinear interpolation. If the sizes match already, then it
simply copies the data.
Therefore, you supply a tag via resize_prev_to_tagged's template argument that
tells it what layer to use for the target size.
If tensor PREV is resized to size of tensor TAGGED, then a tensor OUT is
produced such that:
- OUT.num_samples() == PREV.num_samples()
- OUT.k() == PREV.k()
- OUT.nr() == TAGGED.nr()
- OUT.nc() == TAGGED.nc()
!*/
public:
resize_prev_to_tagged_(
);
template <typename SUBNET> void setup(const SUBNET& sub);
template <typename SUBNET> void forward(const SUBNET& sub, resizable_tensor& output);
template <typename SUBNET> void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad);
dpoint map_input_to_output(dpoint p) const;
dpoint map_output_to_input(dpoint p) const;
const tensor& get_layer_params() const;
tensor& get_layer_params();
/*!
These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface.
!*/
};
template <
template<typename> class tag,
typename SUBNET
>
using resize_prev_to_tagged = add_layer<resize_prev_to_tagged_<tag>, SUBNET>;
// ----------------------------------------------------------------------------------------
template <
......
......@@ -293,7 +293,7 @@ jpeg_make_d_derived_tbl (j_decompress_ptr cinfo, int isDC, int tblno,
GLOBAL(int)
jpeg_fill_bit_buffer (bitread_working_state * state,
bit_buf_type get_buffer, register int bits_left,
bit_buf_type get_buffer, int bits_left,
int nbits)
/* Load up the bit buffer to a depth of at least nbits */
{
......@@ -399,7 +399,7 @@ jpeg_fill_bit_buffer (bitread_working_state * state,
GLOBAL(int)
jpeg_huff_decode (bitread_working_state * state,
bit_buf_type get_buffer, register int bits_left,
bit_buf_type get_buffer, int bits_left,
d_derived_tbl * htbl, int min_bits)
{
int l = min_bits;
......
......@@ -1910,7 +1910,7 @@ namespace
template <typename SUBNET>
using pres = prelu<add_prev1<bn_con<con<8,3,3,1,1,prelu<bn_con<con<8,3,3,1,1,tag1<SUBNET>>>>>>>>;
void test_visit_funcions()
void test_visit_functions()
{
using net_type2 = loss_multiclass_log<fc<10,
avg_pool_everything<
......@@ -3243,7 +3243,7 @@ namespace
test_batch_normalize_conv();
test_basic_tensor_ops();
test_layers();
test_visit_funcions();
test_visit_functions();
test_copy_tensor_cpu();
test_copy_tensor_add_to_cpu();
test_concat();
......
......@@ -16,7 +16,7 @@
./dnn_semantic_segmentation_ex /path/to/VOC2012-or-other-images
An alternative to steps 2-4 above is to download a pre-trained network
from here: http://dlib.net/files/semantic_segmentation_voc2012net.dnn
from here: http://dlib.net/files/semantic_segmentation_voc2012net_v2.dnn
It would be a good idea to become familiar with dlib's DNN tooling before reading this
example. So you should read dnn_introduction_ex.cpp and dnn_introduction2_ex.cpp
......@@ -111,16 +111,16 @@ int main(int argc, char** argv) try
cout << "You call this program like this: " << endl;
cout << "./dnn_semantic_segmentation_train_ex /path/to/images" << endl;
cout << endl;
cout << "You will also need a trained 'semantic_segmentation_voc2012net.dnn' file." << endl;
cout << "You will also need a trained '" << semantic_segmentation_net_filename << "' file." << endl;
cout << "You can either train it yourself (see example program" << endl;
cout << "dnn_semantic_segmentation_train_ex), or download a" << endl;
cout << "copy from here: http://dlib.net/files/semantic_segmentation_voc2012net.dnn" << endl;
cout << "copy from here: http://dlib.net/files/" << semantic_segmentation_net_filename << endl;
return 1;
}
// Read the file containing the trained network from the working directory.
anet_type net;
deserialize("semantic_segmentation_voc2012net.dnn") >> net;
deserialize(semantic_segmentation_net_filename) >> net;
// Show inference results in a window.
image_window win;
......
This diff is collapsed.
......@@ -41,7 +41,7 @@ struct training_sample
// ----------------------------------------------------------------------------------------
rectangle make_random_cropping_rect_resnet(
rectangle make_random_cropping_rect(
const matrix<rgb_pixel>& img,
dlib::rand& rnd
)
......@@ -66,7 +66,7 @@ void randomly_crop_image (
dlib::rand& rnd
)
{
const auto rect = make_random_cropping_rect_resnet(input_image, rnd);
const auto rect = make_random_cropping_rect(input_image, rnd);
const chip_details chip_details(rect, chip_dims(227, 227));
......@@ -259,12 +259,12 @@ double calculate_accuracy(anet_type& anet, const std::vector<image_info>& datase
int main(int argc, char** argv) try
{
if (argc != 2)
if (argc < 2 || argc > 3)
{
cout << "To run this program you need a copy of the PASCAL VOC2012 dataset." << endl;
cout << endl;
cout << "You call this program like this: " << endl;
cout << "./dnn_semantic_segmentation_train_ex /path/to/VOC2012" << endl;
cout << "./dnn_semantic_segmentation_train_ex /path/to/VOC2012 [minibatch-size]" << endl;
return 1;
}
......@@ -277,14 +277,17 @@ int main(int argc, char** argv) try
cout << "Didn't find the VOC2012 dataset. " << endl;
return 1;
}
// a mini-batch smaller than the default can be used with GPUs having less memory
const int minibatch_size = argc == 3 ? std::stoi(argv[2]) : 23;
cout << "mini-batch size: " << minibatch_size << endl;
const double initial_learning_rate = 0.1;
const double weight_decay = 0.0001;
const double momentum = 0.9;
net_type net;
dnn_trainer<net_type> trainer(net,sgd(weight_decay, momentum));
bnet_type bnet;
dnn_trainer<bnet_type> trainer(bnet,sgd(weight_decay, momentum));
trainer.be_verbose();
trainer.set_learning_rate(initial_learning_rate);
trainer.set_synchronization_file("pascal_voc2012_trainer_state_file.dat", std::chrono::minutes(10));
......@@ -292,7 +295,7 @@ int main(int argc, char** argv) try
trainer.set_iterations_without_progress_threshold(5000);
// Since the progress threshold is so large might as well set the batch normalization
// stats window to something big too.
set_all_bn_running_stats_window_sizes(net, 1000);
set_all_bn_running_stats_window_sizes(bnet, 1000);
// Output training parameters.
cout << endl << trainer << endl;
......@@ -345,9 +348,9 @@ int main(int argc, char** argv) try
samples.clear();
labels.clear();
// make a 30-image mini-batch
// make a mini-batch
training_sample temp;
while(samples.size() < 30)
while(samples.size() < minibatch_size)
{
data.dequeue(temp);
......@@ -369,13 +372,13 @@ int main(int argc, char** argv) try
// also wait for threaded processing to stop in the trainer.
trainer.get_net();
net.clean();
bnet.clean();
cout << "saving network" << endl;
serialize("semantic_segmentation_voc2012net.dnn") << net;
serialize(semantic_segmentation_net_filename) << bnet;
// Make a copy of the network to use it for inference.
anet_type anet = net;
anet_type anet = bnet;
cout << "Testing the network..." << endl;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment