Commit d1b307a8 authored by Davis King's avatar Davis King

Added some dnn examples.

parent fdf2a454
......@@ -29,6 +29,11 @@ MACRO(add_example name)
TARGET_LINK_LIBRARIES(${name} dlib )
ENDMACRO()
# The deep learning toolkit requires a C++11 capable compiler.
if (COMPILER_CAN_DO_CPP_11)
add_example(dnn_mnist_ex)
add_example(dnn_mit67_ex)
endif()
#here we apply our macros
add_example(3d_point_cloud_ex)
......
#include <dlib/dnn.h>
#include <iostream>
#include <dlib/svm.h>
#include <dlib/data_io.h>
#include <dlib/gui_widgets.h>
#include <dlib/image_transforms.h>
#include <dlib/dir_nav.h>
#include <iterator>
#include <regex>
using namespace std;
using namespace dlib;
// ----------------------------------------------------------------------------------------
template <typename T> using ares = relu<affine<add_prev1<con<relu<affine<con<tag1<T>>>>>>>>;
template <typename T> using res = relu<bn<add_prev1<con<relu<bn<con<tag1<T>>>>>>>>;
std::tuple<relu_,bn_,add_prev1_,con_,relu_,bn_,con_> res_ (
unsigned long outputs,
unsigned long stride = 1
)
{
return std::make_tuple(relu_(),
bn_(CONV_MODE),
add_prev1_(),
con_(outputs,3,3,stride,stride),
relu_(),
bn_(CONV_MODE),
con_(outputs,3,3,stride,stride));
}
// ----------------------------------------------------------------------------------------
void randomly_crop_image (
const matrix<rgb_pixel>& img,
matrix<rgb_pixel>& crop,
dlib::rand& rnd
)
{
// figure out what rectangle we want to crop from the image
auto scale = 1-rnd.get_random_double()*0.2;
auto size = scale*std::min(img.nr(), img.nc());
rectangle rect(size, size);
// randomly shift the box around
point offset(rnd.get_random_32bit_number()%(img.nc()-rect.width()),
rnd.get_random_32bit_number()%(img.nr()-rect.height()));
rect = move_rect(rect, offset);
// now crop it out as a 250x250 image.
extract_image_chip(img, chip_details(rect, chip_dims(250,250)), crop);
// Also randomly flip the image
if (rnd.get_random_double() > 0.5)
crop = fliplr(crop);
// And then randomly adjust the color balance and gamma.
disturb_colors(crop, rnd);
}
void randomly_crop_images (
const matrix<rgb_pixel>& img,
dlib::array<matrix<rgb_pixel>>& crops,
dlib::rand& rnd,
long num_crops
)
{
std::vector<chip_details> dets;
for (long i = 0; i < num_crops; ++i)
{
// figure out what rectangle we want to crop from the image
auto scale = 1-rnd.get_random_double()*0.2;
auto size = scale*std::min(img.nr(), img.nc());
rectangle rect(size, size);
// randomly shift the box around
point offset(rnd.get_random_32bit_number()%(img.nc()-rect.width()),
rnd.get_random_32bit_number()%(img.nr()-rect.height()));
rect = move_rect(rect, offset);
dets.push_back(chip_details(rect, chip_dims(250,250)));
}
extract_image_chips(img, dets, crops);
for (auto&& img : crops)
{
// Also randomly flip the image
if (rnd.get_random_double() > 0.5)
img = fliplr(img);
// And then randomly adjust the color balance and gamma.
disturb_colors(img, rnd);
}
}
// ----------------------------------------------------------------------------------------
struct image_info
{
string filename;
string label;
unsigned long numeric_label;
};
std::vector<image_info> get_mit67_listing(
const std::string& images_folder
)
{
std::vector<image_info> results;
image_info temp;
temp.numeric_label = 0;
// loop over all the scene types in the dataset, each is contained in a subfolder.
auto subdirs = directory(images_folder).get_dirs();
// sort the sub directories so the numeric labels will be assigned in sorted order.
std::sort(subdirs.begin(), subdirs.end());
regex is_gif(".*_gif.jpg");
for (auto subdir : subdirs)
{
// Now get all the images in this scene type
temp.label = subdir.name();
for (auto image_file : subdir.get_files())
{
// Ignore gif files in this dataset since dlib::load_image() doesn't support
// them and there are only a tiny number of them.
temp.filename = image_file;
if (regex_match(temp.filename, is_gif))
continue;
results.push_back(temp);
}
++temp.numeric_label;
}
return results;
}
unsigned long vote (
const std::vector<unsigned long>& votes
)
{
std::vector<unsigned long> counts(max(mat(votes))+1);
for (auto i : votes)
counts[i]++;
return index_of_max(mat(counts));
}
int main(int argc, char** argv) try
{
if (argc != 3)
{
cout << "give MIT 67 scene folder as input and a weight decay value!" << endl;
return 1;
}
auto listing = get_mit67_listing(argv[1]);
cout << "images in dataset: " << listing.size() << endl;
if (listing.size() == 0 || listing.back().numeric_label != 66)
{
cout << "Didn't find the MIT 67 scene dataset. Are you sure you gave the correct folder?" << endl;
cout << "Give the Images folder as an argument to this program." << endl;
return 1;
}
const double initial_step_size = 0.1;
const double weight_decay = sa = argv[2];
typedef loss_multiclass_log<fc<avg_pool<
res<res<
res<res<
res<res<
res<res<
max_pool<relu<bn<con<
input<matrix<rgb_pixel>
>>>>>>>>>>>>>>>> net_type;
net_type net(fc_(67),
avg_pool_(1000,1000,1000,1000),
res_(512),res_(512,2),
res_(256),res_(256,2),
res_(128),res_(128,2),
res_(64), res_(64),
max_pool_(3,3,2,2), relu_(), bn_(CONV_MODE), con_(64,7,7,2,2)
);
cout << "initial step size: "<< initial_step_size << endl;
cout << "weight decay: " << weight_decay << endl;
dnn_trainer<net_type> trainer(net,sgd(initial_step_size, weight_decay));
trainer.be_verbose();
trainer.set_synchronization_file("mit67_sync3_"+cast_to_string(weight_decay), std::chrono::minutes(5));
std::vector<matrix<rgb_pixel>> samples;
std::vector<unsigned long> labels;
randomize_samples(listing);
const size_t training_part = listing.size()*0.7;
dlib::rand rnd;
const bool do_training = true;
if (do_training)
{
while(trainer.get_step_size() >= 1e-4)
{
samples.clear();
labels.clear();
// make a 64 image mini-batch
matrix<rgb_pixel> img, crop;
while(samples.size() < 64)
{
auto l = listing[rnd.get_random_32bit_number()%training_part];
load_image(img, l.filename);
randomly_crop_image(img, crop, rnd);
samples.push_back(crop);
labels.push_back(l.numeric_label);
}
trainer.train_one_step(samples, labels);
}
// wait for threaded processing to stop.
trainer.get_net();
net.clean();
cout << "saving network" << endl;
serialize("mit67_network3_"+cast_to_string(weight_decay)+".dat") << net;
}
const bool test_network = true;
if (test_network)
{
typedef loss_multiclass_log<fc<avg_pool<
ares<ares<
ares<ares<
ares<ares<
ares<ares<
max_pool<relu<affine<con<
input<matrix<rgb_pixel>
>>>>>>>>>>>>>>>> anet_type;
anet_type net;
deserialize("mit67_network3_"+cast_to_string(weight_decay)+".dat") >> net;
dlib::array<matrix<rgb_pixel>> images;
std::vector<unsigned long> labels;
matrix<rgb_pixel> img, crop;
cout << "loading images..." << endl;
int num_right = 0;
int num_wrong = 0;
console_progress_indicator pbar(training_part);
for (size_t i = 0; i < training_part; ++i)
{
pbar.print_status(i);
load_image(img, listing[i].filename);
randomly_crop_images(img, images, rnd, 16);
unsigned long predicted_label = vote(net(images, 32));
if (predicted_label == listing[i].numeric_label)
++num_right;
else
++num_wrong;
}
cout << "\ntraining num_right: " << num_right << endl;
cout << "training num_wrong: " << num_wrong << endl;
cout << "training accuracy: " << num_right/(double)(num_right+num_wrong) << endl;
pbar.reset(listing.size()-training_part);
num_right = 0;
num_wrong = 0;
for (size_t i = training_part; i < listing.size(); ++i)
{
pbar.print_status(i-training_part);
load_image(img, listing[i].filename);
randomly_crop_images(img, images, rnd, 16);
unsigned long predicted_label = vote(net(images, 32));
if (predicted_label == listing[i].numeric_label)
++num_right;
else
++num_wrong;
}
cout << "\ntesting num_right: " << num_right << endl;
cout << "testing num_wrong: " << num_wrong << endl;
cout << "testing accuracy: " << num_right/(double)(num_right+num_wrong) << endl;
return 0;
}
}
catch(std::exception& e)
{
cout << e.what() << endl;
}
/*
Train the venerable LeNet from
LeCun, Yann, et al. "Gradient-based learning applied to document recognition."
Proceedings of the IEEE 86.11 (1998): 2278-2324.
on MNIST
*/
#include <dlib/dnn.h>
#include <iostream>
#include <dlib/data_io.h>
#include "dlib/time_this.h"
using namespace std;
using namespace dlib;
int main(int argc, char** argv) try
{
if (argc != 2)
{
cout << "give MNIST data folder!" << endl;
return 1;
}
std::vector<matrix<unsigned char>> training_images;
std::vector<unsigned long> training_labels;
std::vector<matrix<unsigned char>> testing_images;
std::vector<unsigned long> testing_labels;
load_mnist_dataset(argv[1], training_images, training_labels, testing_images, testing_labels);
typedef loss_multiclass_log<fc<relu<fc<relu<fc<max_pool<relu<con<max_pool<relu<con<
input<matrix<unsigned char>>>>>>>>>>>>>> net_type;
net_type net(fc_(10),
relu_(),
fc_(84),
relu_(),
fc_(120),
max_pool_(2,2,2,2),
relu_(),
con_(16,5,5),
max_pool_(2,2,2,2),
relu_(),
con_(6,5,5));
dnn_trainer<net_type> trainer(net,sgd(0.1));
trainer.set_mini_batch_size(128);
trainer.be_verbose();
trainer.set_synchronization_file("mnist_sync", std::chrono::seconds(20));
trainer.train(training_images, training_labels);
net.clean();
serialize("mnist_network.dat") << net;
// Run the net on all the data to get predictions
std::vector<unsigned long> predicted_labels = net(training_images);
int num_right = 0;
int num_wrong = 0;
for (size_t i = 0; i < training_images.size(); ++i)
{
if (predicted_labels[i] == training_labels[i])
++num_right;
else
++num_wrong;
}
cout << "training num_right: " << num_right << endl;
cout << "training num_wrong: " << num_wrong << endl;
cout << "training accuracy: " << num_right/(double)(num_right+num_wrong) << endl;
predicted_labels = net(testing_images);
num_right = 0;
num_wrong = 0;
for (size_t i = 0; i < testing_images.size(); ++i)
{
if (predicted_labels[i] == testing_labels[i])
++num_right;
else
++num_wrong;
}
cout << "testing num_right: " << num_right << endl;
cout << "testing num_wrong: " << num_wrong << endl;
cout << "testing accuracy: " << num_right/(double)(num_right+num_wrong) << endl;
}
catch(std::exception& e)
{
cout << e.what() << endl;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment