Commit b1627bc5 authored by Fm's avatar Fm

Merge branch 'master' of https://github.com/davisking/dlib

parents 3b3a9939 5e550a26
......@@ -29,3 +29,4 @@ feaff82884ded598bde93c635eb3ded9c0933a07 v18.15
ce6f364987865b19bdb1b4730ac5403e2bb55dc4 v18.17
7ae1775f61a44b7f07866050b50ad3ade581f019 v18.18
4d6b102506bb9e2f195c7ddf984cc2d86b8643e7 before_dnn_serialization_cleanup
7210589728f6d83f6cb7d21cd24d114a5364d9e2 v19.0
......@@ -12,9 +12,9 @@ include(release_build_by_default)
include(use_cpp_11.cmake)
set(CPACK_PACKAGE_VERSION_MAJOR "18")
set(CPACK_PACKAGE_VERSION_MINOR "18")
set(CPACK_PACKAGE_VERSION_PATCH "100")
set(CPACK_PACKAGE_VERSION_MAJOR "19")
set(CPACK_PACKAGE_VERSION_MINOR "0")
set(CPACK_PACKAGE_VERSION_PATCH "99")
set(VERSION ${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH})
# Set DLIB_VERSION in the including CMake file so they can use it to do whatever they want.
get_directory_property(has_parent PARENT_DIRECTORY)
......
......@@ -385,13 +385,15 @@ namespace dlib
};
// -----------------------------------------------------------------------------------
void copy_tensor(
void copy_tensor(
tensor& dest,
size_t dest_k_offset,
const tensor& src,
size_t src_k_offset,
size_t count_k
);
);
// -----------------------------------------------------------------------------------
}
......
......@@ -208,9 +208,9 @@ namespace dlib
auto ptr = data.host();
for (auto i = ibegin; i != iend; ++i)
{
for (long r = 0; r < NR; ++r)
for (size_t r = 0; r < NR; ++r)
{
for (long c = 0; c < NC; ++c)
for (size_t c = 0; c < NC; ++c)
{
rgb_pixel temp = (*i)(r,c);
auto p = ptr++;
......
......@@ -25,32 +25,42 @@ namespace dlib
);
const full_object_detection& d = dets[i];
// Around Chin. Ear to Ear
for (unsigned long i = 1; i <= 16; ++i)
lines.push_back(image_window::overlay_line(d.part(i), d.part(i-1), color));
// Line on top of nose
for (unsigned long i = 28; i <= 30; ++i)
lines.push_back(image_window::overlay_line(d.part(i), d.part(i-1), color));
// left eyebrow
for (unsigned long i = 18; i <= 21; ++i)
lines.push_back(image_window::overlay_line(d.part(i), d.part(i-1), color));
// Right eyebrow
for (unsigned long i = 23; i <= 26; ++i)
lines.push_back(image_window::overlay_line(d.part(i), d.part(i-1), color));
// Bottom part of the nose
for (unsigned long i = 31; i <= 35; ++i)
lines.push_back(image_window::overlay_line(d.part(i), d.part(i-1), color));
// Line from the nose to the bottom part above
lines.push_back(image_window::overlay_line(d.part(30), d.part(35), color));
// Left eye
for (unsigned long i = 37; i <= 41; ++i)
lines.push_back(image_window::overlay_line(d.part(i), d.part(i-1), color));
lines.push_back(image_window::overlay_line(d.part(36), d.part(41), color));
// Right eye
for (unsigned long i = 43; i <= 47; ++i)
lines.push_back(image_window::overlay_line(d.part(i), d.part(i-1), color));
lines.push_back(image_window::overlay_line(d.part(42), d.part(47), color));
// Lips outer part
for (unsigned long i = 49; i <= 59; ++i)
lines.push_back(image_window::overlay_line(d.part(i), d.part(i-1), color));
lines.push_back(image_window::overlay_line(d.part(48), d.part(59), color));
// Lips inside part
for (unsigned long i = 61; i <= 67; ++i)
lines.push_back(image_window::overlay_line(d.part(i), d.part(i-1), color));
lines.push_back(image_window::overlay_line(d.part(60), d.part(67), color));
......
......@@ -142,8 +142,8 @@ namespace dlib
rgb_pixel temp;
assign_pixel(temp, img[r][c]);
temp.red = rtable[temp.red];
temp.green = rtable[temp.green];
temp.blue = rtable[temp.blue];
temp.green = gtable[temp.green];
temp.blue = btable[temp.blue];
assign_pixel(img[r][c], temp);
}
}
......
......@@ -42,6 +42,8 @@ namespace dlib
inline void load(const type* ptr) { x = _mm256_loadu_ps(ptr); }
inline void store(type* ptr) const { _mm256_storeu_ps(ptr, x); }
inline simd8f& operator=(const simd8i& rhs) { *this = simd8f(rhs); return *this; }
inline unsigned int size() const { return 8; }
inline float operator[](unsigned int idx) const
{
......
......@@ -170,10 +170,10 @@ namespace
test_qr(3*randmat<float,2,2>());
test_qr(3*randmat<float,4,3>());
test_qr(3*randmat<float,4,4>());
test_qr(3*randmat<float,9,4>());
test_qr(3*randmat<float,5,4>());
typedef matrix<float,0,0,default_memory_manager, column_major_layout> mat;
test_qr(mat(3*randmat<float>(9,4)));
test_qr(mat(3*randmat<float>(5,4)));
test_qr(mat(3*randmat<float>(9,9)));
}
......
......@@ -61,6 +61,7 @@ else()
";${CMAKE_CXX_COMPILE_FEATURES};" MATCHES ";cxx_delegating_constructors;" AND
";${CMAKE_CXX_COMPILE_FEATURES};" MATCHES ";cxx_thread_local;" AND
";${CMAKE_CXX_COMPILE_FEATURES};" MATCHES ";cxx_constexpr;" AND
";${CMAKE_CXX_COMPILE_FEATURES};" MATCHES ";cxx_decltype_incomplete_return_types;" AND
";${CMAKE_CXX_COMPILE_FEATURES};" MATCHES ";cxx_auto_type;")
set(COMPILER_CAN_DO_CPP_11 1)
......
......@@ -17,8 +17,7 @@
<p>
If you think you found some kind of bug or problem in dlib then feel
free to post on <a href="https://sourceforge.net/p/dclib/discussion">sourceforge</a>
or <a href="https://github.com/davisking/dlib/issues">github</a>.
free to submit a dlib issue on <a href="https://github.com/davisking/dlib/issues">github</a>.
But include the version of dlib you are using, what you
are trying, what happened, what you expected to have happened instead, etc.
</p>
......@@ -26,22 +25,8 @@
<p>
On the other hand, if you haven't found a bug or problem in dlib, but
instead are looking for machine learning/computer vision/programming
consulting then you can still <a href="https://sourceforge.net/p/dclib/discussion">post your question on sourceforge</a>.
But be clear that this is what you are asking for. Maybe someone will
help you or you can find someone to pay money in exchange for a
solution to your problem.
help then post your question to <a href="http://stackoverflow.com/questions/tagged/dlib">stack overflow with the dlib tag</a>.
</p>
<p>
However, don't try to get someone to write your code for you by
repeatedly asking a question like "ok, what do I type next to make a
program that does X?". I get this question all the time from people
who obviously don't know how to program. If you are not familiar
with C++ it's much better to learn it by
<a href="http://dlib.net/books.html">reading one of the excellent books on the topic</a>
than by an infinite sequence of questions posted in the dlib forums.
</p>
</question>
<question text="How can I use dlib in Visual Studio?">
......
......@@ -449,7 +449,7 @@
you take an image of a human face as input and are expected to identify the
locations of important facial landmarks such as the corners of the mouth
and eyes, tip of the nose, and so forth. For example, here is the output
of dlib's <a href="http://sourceforge.net/projects/dclib/files/dlib/v18.10/shape_predictor_68_face_landmarks.dat.bz2">68-face-landmark shape_predictor</a> on an image from the HELEN dataset: <br/><br/>
of dlib's <a href="http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2">68-face-landmark shape_predictor</a> on an image from the HELEN dataset: <br/><br/>
<img src='face_landmarking_example.png'/>
<br/><br/>
......
......@@ -8,26 +8,19 @@
<body>
<p>
Dlib is a modern C++ toolkit containing machine learning algorithms and tools
for creating complex software in C++ to solve real world problems.
It is open source software and licensed
under the <a href="license.html">Boost Software License</a>.
The <a href="intro.html">introduction</a> contains everything you need to know to get
started using the library. However, if after consulting the documentation, you have any questions, comments,
or complaints feel free to post in the
<a href='http://sourceforge.net/p/dclib/discussion'>forums</a>.
Dlib is a modern C++ toolkit containing machine learning algorithms and
tools for creating complex software in C++ to solve real world problems.
It is used in both industry and academia in a wide range of domains
including robotics, embedded devices, mobile phones, and large high
performance computing environments. Dlib's <a href="license.html">open source licensing</a>
allows you to use it in any application, free of charge.
</p>
<chm>
<p>
For updates to this project see <a href="http://dlib.net">dlib.net</a>.
</p>
</chm>
<p>
To follow or participate in the development of dlib subscribe to <a href="https://github.com/davisking/dlib">dlib on github</a>.
Also be sure to read the <a href="howto_contribute.html">how to contribute</a> page if you intend to
submit code to the project.
</p>
<br/>
......
......@@ -96,10 +96,12 @@
<name>Dlib Blog</name>
<link>http://blog.dlib.net</link>
</item>
<!--
<item>
<name>Forums</name>
<link>https://sourceforge.net/p/dclib/discussion</link>
</item>
-->
<item>
<name>Who uses dlib?</name>
<link>http://sourceforge.net/p/dclib/wiki/Known_users/</link>
......@@ -187,17 +189,21 @@
<name>Examples: C++</name>
<sub>
<item>
<name>Deep Learning</name>
<link>dnn_mnist_ex.cpp.html</link>
<name>Deep Learning Introduction Part 1</name>
<link>dnn_introduction_ex.cpp.html</link>
</item>
<item>
<name>Deep Learning Advanced</name>
<link>dnn_mnist_advanced_ex.cpp.html</link>
<name>Deep Learning Introduction Part 2</name>
<link>dnn_introduction2_ex.cpp.html</link>
</item>
<item>
<name>Deep Learning Imagenet Classifier </name>
<name>Deep Learning Imagenet Classifier</name>
<link>dnn_imagenet_ex.cpp.html</link>
</item>
<item>
<name>Deep Learning Imagenet Trainer </name>
<link>dnn_imagenet_train_ex.cpp.html</link>
</item>
<item>
<name>Deep Learning Inception</name>
<link>dnn_inception_ex.cpp.html</link>
......
......@@ -426,15 +426,17 @@ Davis E. King. <a href="http://jmlr.csail.mit.edu/papers/volume10/king09a/king09
input layer or an entire network. Therefore, deep neural networks are created
by stacking many layers on top of each other using the add_layer class.
<p>
For a tutorial showing how this is accomplished see
<a href="dnn_mnist_ex.cpp.html">this MNIST example</a>.
For a tutorial showing how this is accomplished read
the <a href="dnn_introduction_ex.cpp.html">DNN Introduction part 1</a> and
<a href="dnn_introduction2_ex.cpp.html">DNN Introduction part 2</a>.
</p>
</description>
<examples>
<example>dnn_mnist_ex.cpp.html</example>
<example>dnn_mnist_advanced_ex.cpp.html</example>
<example>dnn_introduction_ex.cpp.html</example>
<example>dnn_introduction2_ex.cpp.html</example>
<example>dnn_inception_ex.cpp.html</example>
<example>dnn_imagenet_ex.cpp.html</example>
<example>dnn_imagenet_train_ex.cpp.html</example>
</examples>
</component>
......@@ -449,10 +451,11 @@ Davis E. King. <a href="http://jmlr.csail.mit.edu/papers/volume10/king09a/king09
on the top of a deep neural network.
</description>
<examples>
<example>dnn_mnist_ex.cpp.html</example>
<example>dnn_mnist_advanced_ex.cpp.html</example>
<example>dnn_introduction_ex.cpp.html</example>
<example>dnn_introduction2_ex.cpp.html</example>
<example>dnn_inception_ex.cpp.html</example>
<example>dnn_imagenet_ex.cpp.html</example>
<example>dnn_imagenet_train_ex.cpp.html</example>
</examples>
</component>
......@@ -468,7 +471,7 @@ Davis E. King. <a href="http://jmlr.csail.mit.edu/papers/volume10/king09a/king09
except that it involves less typing, and for large N, will compile much faster.
</description>
<examples>
<example>dnn_mnist_advanced_ex.cpp.html</example>
<example>dnn_introduction2_ex.cpp.html</example>
</examples>
</component>
......@@ -490,12 +493,12 @@ Davis E. King. <a href="http://jmlr.csail.mit.edu/papers/volume10/king09a/king09
<p>
For a tutorial showing how to use tagging see the
<a href="dnn_mnist_advanced_ex.cpp.html">dnn_mnist_advanced_ex.cpp</a>
<a href="dnn_introduction2_ex.cpp.html">dnn_introduction2_ex.cpp</a>
example program.
</p>
</description>
<examples>
<example>dnn_mnist_advanced_ex.cpp.html</example>
<example>dnn_introduction2_ex.cpp.html</example>
</examples>
</component>
......@@ -512,7 +515,7 @@ Davis E. King. <a href="http://jmlr.csail.mit.edu/papers/volume10/king09a/king09
<p>
For a tutorial showing how to use tagging see the
<a href="dnn_mnist_advanced_ex.cpp.html">dnn_mnist_advanced_ex.cpp</a>
<a href="dnn_introduction2_ex.cpp.html">dnn_introduction2_ex.cpp</a>
example program.
</p>
</description>
......@@ -530,12 +533,12 @@ Davis E. King. <a href="http://jmlr.csail.mit.edu/papers/volume10/king09a/king09
<p>
For a tutorial showing how to use tagging see the
<a href="dnn_mnist_advanced_ex.cpp.html">dnn_mnist_advanced_ex.cpp</a>
<a href="dnn_introduction2_ex.cpp.html">dnn_introduction2_ex.cpp</a>
example program.
</p>
</description>
<examples>
<example>dnn_mnist_advanced_ex.cpp.html</example>
<example>dnn_introduction2_ex.cpp.html</example>
</examples>
</component>
......@@ -550,10 +553,11 @@ Davis E. King. <a href="http://jmlr.csail.mit.edu/papers/volume10/king09a/king09
takes some kind of image as input and loads it into a network.
</description>
<examples>
<example>dnn_mnist_ex.cpp.html</example>
<example>dnn_mnist_advanced_ex.cpp.html</example>
<example>dnn_introduction_ex.cpp.html</example>
<example>dnn_introduction2_ex.cpp.html</example>
<example>dnn_inception_ex.cpp.html</example>
<example>dnn_imagenet_ex.cpp.html</example>
<example>dnn_imagenet_train_ex.cpp.html</example>
</examples>
</component>
......
......@@ -37,6 +37,7 @@
<item>find_max_bobyqa</item>
<item>find_max_trust_region</item>
<item>find_min_trust_region</item>
<item>find_optimal_parameters</item>
</section>
<section>
......@@ -58,6 +59,7 @@
<item>find_max_factor_graph_potts</item>
<item>find_max_parse_cky</item>
<item>min_cut</item>
<item>elastic_net</item>
</section>
<section>
......@@ -877,6 +879,65 @@ Or it can use the elastic net regularizer:
</component>
<!-- ************************************************************************* -->
<component cpp11="true">
<name>find_optimal_parameters</name>
<file>dlib/optimization/find_optimal_parameters.h</file>
<spec_file link="true">dlib/optimization/find_optimal_parameters_abstract.h</spec_file>
<description>
Performs a constrained minimization of a function and doesn't require derivatives from the user.
This function is similar to <a href="#find_min_bobyqa">find_min_bobyqa</a> and
<a href="#find_min_single_variable">find_min_single_variable</a> except that it
allows any number of variables and never throws exceptions when the max iteration
limit is reached (even if it didn't converge).
</description>
</component>
<!-- ************************************************************************* -->
<component cpp11="true">
<name>elastic_net</name>
<file>dlib/optimization/elastic_net.h</file>
<spec_file link="true">dlib/optimization/elastic_net_abstract.h</spec_file>
<description>
This object is a tool for solving the following optimization problem:
<pre>
min_w: length_squared(X*w - Y) + ridge_lambda*length_squared(w)
such that: sum(abs(w)) &lt;= lasso_budget
</pre>
<p>
That is, it solves the elastic net optimization problem. This object also
has the special property that you can quickly obtain different solutions
for different settings of ridge_lambda, lasso_budget, and target Y values.
</p>
<p>
This is because a large amount of work is precomputed in the constructor.
The solver will also remember the previous solution and will use that to
warm start subsequent invocations. Therefore, you can efficiently get
solutions for a wide range of regularization parameters.
</p>
The particular algorithm used to solve it is described in the paper:
<blockquote>
Zhou, Quan, et al. "A reduction of the elastic net to support vector
machines with an application to gpu computing." arXiv preprint
arXiv:1409.1976 (2014). APA
</blockquote>
And for the SVM solver sub-component we use the algorithm from:
<blockquote>
Hsieh, Cho-Jui, et al. "A dual coordinate descent method for large-scale
linear SVM." Proceedings of the 25th international conference on Machine
learning. ACM, 2008.
</blockquote>
</description>
</component>
<!-- ************************************************************************* -->
<component>
......
......@@ -12,53 +12,61 @@
<current>
New Features:
- Added random_color_transform and disturb_colors().
- Added a constructor for seeding rand with a time_t.
- Added apply_random_color_offset()
- Made load_image() support GIF files.
- Added subm_clipped()
- load_mnist_dataset()
- Added an option to solve the L2-loss version of the SVM objective function for svm_c_linear_dcd_trainer.
- Added the option to use the elastic net regularizer to the OCA solver.
- Added solve_qp_box_constrained()
- Added unserialize.
- MATLAB binding stuff
- link to MATLAB's intel MKL when used on linux
- struct support, more arguments (20 now instead of 10),
- in place operation. Made column major matrices directly wrap matlab
- A deep learning toolkit using CPU and/or GPU hardware. Some major elements
of this are:
- Clean and fully documented C++11 API
- Clean tutorials: see dnn_introduction_ex.cpp and dnn_introduction2_ex.cpp
- Uses cuDNN v5.0
- Multi-GPU support
- Automatic learning rate adjustment
- A pretrained 1000 class Imagenet classifier (see dnn_imagenet_ex.cpp)
- Optimization Tools
- Added find_optimal_parameters()
- Added elastic_net class
- Added the option to use the elastic net regularizer to the OCA solver.
- Added an option to solve the L2-loss version of the SVM objective function to svm_c_linear_dcd_trainer.
- Added solve_qp_box_constrained()
- Image Processing
- Added random_color_transform, disturb_colors(), and apply_random_color_offset().
- load_image() now supports loading GIF files.
- Many improvements to the MATLAB binding API
- Automatically link to MATLAB's Intel MKL when used on linux.
- struct support
- mex functions can have up to 20 arguments instead of 10.
- In place operation. Made column major matrices directly wrap MATLAB
matrix objects when used inside mex files. This way, if you use
matrix_colmajor or fmatrix_colmajor in a mex file it will not do any
unnecessary copying or transposing.
- catch ctrl+c presses in MATLAB console.
- DLIB_ASSERTS won't kill the matlab process, just throw an exception
- Made cerr print in matlab as a red warning message.
- C++11 only tools
- Added log1pexp()
- Added running_gradient
- deep learning tools
- dnn_trainer
- cuDNN v4.0
- auto step size adjust and stopping condition.
- CUDA/tensor stuff
- gpu_data, tensor, alias tensors
- Catch ctrl+c presses in MATLAB console. Allowing early termination of mex functions.
- When used inside mex files, DLIB_ASSERTS won't kill the MATLAB process,
just throw an exception.
- Made cerr print in MATLAB as a red warning message.
- load_mnist_dataset()
- Added a constructor for seeding rand with a time_t.
- Added subm_clipped()
- Added unserialize.
- Added running_gradient
Non-Backwards Compatible Changes:
- Everything in dlib/matlab/call_matlab.h is now in the dlib namespace.
- DLIB_TEST() and DLIB_TEST_MSG() macros now require you to terminate them with a ;
Bug fixes:
- Fixed bug in 10 argument version of call_matlab() and also cleaned up a few
minor things.
- setup.py and cmake scripts work in a few more contexts.
Other:
- Made cmake scripts uniformly require cmake version 2.8.4.
- C++11
- CMake scripts now enable C++11 by default
- Gave array2d and matrix move constructors and move assignment operators.
minor things.
- setup.py and CMake scripts work in a few more contexts.
- Fixed compiler errors in visual studio 2015.
- Fixed a bug in gaussian_blur() that caused messed up outputs when big
sigma values were used on some pixel types.
- Fixed minor bugs in join_rows() and join_cols(). They didn't work when one
of the matrices was empty.
Other:
- Made CMake scripts uniformly require CMake version 2.8.4.
- Faster fHOG feature extraction / face detection
- CMake scripts now enable C++11 by default
- Gave array2d and matrix move constructors and move assignment operators. Matrix
can also now be created from initializer lists.
</current>
<!-- ************************************************************************************** -->
......
......@@ -261,6 +261,8 @@
<term file="optimization.html" name="find_min_single_variable" include="dlib/optimization.h"/>
<term file="optimization.html" name="find_min_using_approximate_derivatives" include="dlib/optimization.h"/>
<term file="optimization.html" name="find_min_bobyqa" include="dlib/optimization.h"/>
<term file="optimization.html" name="find_optimal_parameters" include="dlib/optimization/find_optimal_parameters.h"/>
<term file="optimization.html" name="elastic_net" include="dlib/optimization/elastic_net.h"/>
<term file="optimization.html" name="solve_qp_box_constrained" include="dlib/optimization.h"/>
<term file="optimization.html" name="solve_qp_using_smo" include="dlib/optimization.h"/>
<term file="optimization.html" name="solve_qp2_using_smo" include="dlib/optimization.h"/>
......
......@@ -76,12 +76,13 @@ bzip2 $WEBPAGE || report_failure
rm -rf $RELDIR
wine ../docs/chm/htmlhelp/hhc.exe ../docs/chm/lib.hhp
mv ../docs/chm/help.chm dlib_documentation-$RELEASE.chm || report_failure
# Don't make the chm doc file since hhc.exe doesn't run in any copy of wine anymore :(
#wine ../docs/chm/htmlhelp/hhc.exe ../docs/chm/lib.hhp
#mv ../docs/chm/help.chm dlib_documentation-$RELEASE.chm || report_failure
mkdir v$RELEASE
mv dlib_documentation-$RELEASE.chm v$RELEASE
#mv dlib_documentation-$RELEASE.chm v$RELEASE
mv $SOURCE_TAR.bz2 v$RELEASE
mv $SOURCE_ZIP v$RELEASE
......
......@@ -31,10 +31,11 @@ ENDMACRO()
# The deep learning toolkit requires a C++11 capable compiler.
if (COMPILER_CAN_DO_CPP_11)
add_example(dnn_mnist_ex)
add_example(dnn_mnist_advanced_ex)
add_example(dnn_introduction_ex)
add_example(dnn_introduction2_ex)
add_example(dnn_inception_ex)
add_example(dnn_imagenet_ex)
add_example(dnn_imagenet_train_ex)
endif()
#here we apply our macros
......@@ -121,12 +122,14 @@ add_example(video_tracking_ex)
add_example(xml_parser_ex)
find_package(OpenCV)
find_package(OpenCV QUIET)
if (OpenCV_FOUND)
include_directories(${OpenCV_INCLUDE_DIRS})
ADD_EXECUTABLE(webcam_face_pose_ex webcam_face_pose_ex.cpp)
TARGET_LINK_LIBRARIES(webcam_face_pose_ex dlib ${OpenCV_LIBS} )
else()
message("OpenCV not found, so we won't build the webcam_face_pose_ex example.")
endif()
......
// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
/*
This example shows how to classify an image into one of the 1000 imagenet clategories
using the deep learning tools from the dlib C++ Library. We will use the pretrained
ResNet34 model available on the dlib website.
This example shows how to classify an image into one of the 1000 imagenet
categories using the deep learning tools from the dlib C++ Library. We will
use the pretrained ResNet34 model available on the dlib website.
The ResNet34 model is from Deep Residual Learning for Image Recognition by He, Zhang,
Ren, and Sun.
The ResNet34 architecture is from the paper Deep Residual Learning for Image
Recognition by He, Zhang, Ren, and Sun. The model file that comes with dlib
was trained using the dnn_imagenet_train_ex.cpp program on a Titan X for
about 2 weeks. This pretrained model has a top5 error of 7.572% on the 2012
imagenet validation dataset.
For an introduction to dlib's DNN module read the dnn_introduction_ex.cpp and
dnn_introduction2_ex.cpp example programs.
These tools will use CUDA and cuDNN to drastically accelerate network
training and testing. CMake should automatically find them if they are
installed and configure things appropriately. If not, the program will
Finally, these tools will use CUDA and cuDNN to drastically accelerate
network training and testing. CMake should automatically find them if they
are installed and configure things appropriately. If not, the program will
still run but will be much slower to execute.
*/
......@@ -27,6 +33,7 @@ using namespace dlib;
// ----------------------------------------------------------------------------------------
// This block of statements defines the resnet-34 network
template <template <int,template<typename>class,int,typename> class block, int N, template<typename>class BN, typename SUBNET>
using residual = add_prev1<block<N,BN,1,tag1<SUBNET>>>;
......@@ -41,14 +48,14 @@ template <int N, typename SUBNET> using ares = relu<residual<block,N,affine
template <int N, typename SUBNET> using ares_down = relu<residual_down<block,N,affine,SUBNET>>;
typedef loss_multiclass_log<fc<1000,avg_pool_everything<
using anet_type = loss_multiclass_log<fc<1000,avg_pool_everything<
ares<512,ares<512,ares_down<512,
ares<256,ares<256,ares<256,ares<256,ares<256,ares_down<256,
ares<128,ares<128,ares<128,ares_down<128,
ares<64,ares<64,ares<64,
max_pool<3,3,2,2,relu<affine<con<64,7,7,2,2,
input_rgb_image_sized<227>
>>>>>>>>>>>>>>>>>>>>>>> anet_type;
>>>>>>>>>>>>>>>>>>>>>>>;
// ----------------------------------------------------------------------------------------
......@@ -101,14 +108,24 @@ void randomly_crop_images (
int main(int argc, char** argv) try
{
if (argc == 1)
{
cout << "Give this program image files as command line arguments.\n" << endl;
cout << "You will also need a copy of the file resnet34_1000_imagenet_classifier.dnn " << endl;
cout << "available at http://dlib.net/files/resnet34_1000_imagenet_classifier.dnn.bz2" << endl;
cout << endl;
return 1;
}
std::vector<string> labels;
anet_type net;
// Get this file from http://dlib.net/files/resnet34_1000_imagenet_classifier.dnn.bz2
// This pretrained model has a top5 error of 7.572% on the 2012 imagenet validation
// dataset.
deserialize("resnet34_1000_imagenet_classifier.dnn") >> net >> labels;
// Make a network with softmax as the final layer. We don't have to do this
// if we just want to output the single best prediction, since the anet_type
// already does this. But if we instead want to get the probability of each
// class as output we need to replace the last layer of the network with a
// softmax layer, which we do as follows:
softmax<anet_type::subnet_type> snet;
snet.subnet() = net.subnet();
......@@ -118,16 +135,19 @@ int main(int argc, char** argv) try
dlib::rand rnd;
image_window win;
// read images from the command prompt and print the top 5 best labels.
// Read images from the command prompt and print the top 5 best labels for each.
for (int i = 1; i < argc; ++i)
{
load_image(img, argv[i]);
const int num_crops = 16;
// Grab 16 random crops from the image. We will run all of them through the
// network and average the results.
randomly_crop_images(img, images, rnd, num_crops);
// p(i) == the probability the image contains object of class i.
matrix<float,1,1000> p = sum_rows(mat(snet(images.begin(), images.end())))/num_crops;
win.set_image(img);
// Print the 5 most probable labels
for (int k = 0; k < 5; ++k)
{
unsigned long predicted_label = index_of_max(p);
......@@ -135,6 +155,7 @@ int main(int argc, char** argv) try
p(predicted_label) = 0;
}
cout << "Hit enter to process the next image";
cin.get();
}
......
// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
/*
This program was used to train the resnet34_1000_imagenet_classifier.dnn
network used by the dnn_imagenet_ex.cpp example program.
You should be familiar with dlib's DNN module before reading this example
program. So read dnn_introduction_ex.cpp and dnn_introduction2_ex.cpp first.
*/
#include <dlib/dnn.h>
#include <iostream>
#include <dlib/data_io.h>
#include <dlib/image_transforms.h>
#include <dlib/dir_nav.h>
#include <iterator>
#include <thread>
using namespace std;
using namespace dlib;
// ----------------------------------------------------------------------------------------
template <template <int,template<typename>class,int,typename> class block, int N, template<typename>class BN, typename SUBNET>
using residual = add_prev1<block<N,BN,1,tag1<SUBNET>>>;
template <template <int,template<typename>class,int,typename> class block, int N, template<typename>class BN, typename SUBNET>
using residual_down = add_prev2<avg_pool<2,2,2,2,skip1<tag2<block<N,BN,2,tag1<SUBNET>>>>>>;
template <int N, template <typename> class BN, int stride, typename SUBNET>
using block = BN<con<N,3,3,1,1,relu<BN<con<N,3,3,stride,stride,SUBNET>>>>>;
template <int N, typename SUBNET> using res = relu<residual<block,N,bn_con,SUBNET>>;
template <int N, typename SUBNET> using ares = relu<residual<block,N,affine,SUBNET>>;
template <int N, typename SUBNET> using res_down = relu<residual_down<block,N,bn_con,SUBNET>>;
template <int N, typename SUBNET> using ares_down = relu<residual_down<block,N,affine,SUBNET>>;
// ----------------------------------------------------------------------------------------
// training network type
using net_type = loss_multiclass_log<fc<1000,avg_pool_everything<
res<512,res<512,res_down<512,
res<256,res<256,res<256,res<256,res<256,res_down<256,
res<128,res<128,res<128,res_down<128,
res<64,res<64,res<64,
max_pool<3,3,2,2,relu<bn_con<con<64,7,7,2,2,
input_rgb_image_sized<227>
>>>>>>>>>>>>>>>>>>>>>>>;
// testing network type (replaced batch normalization with fixed affine transforms)
using anet_type = loss_multiclass_log<fc<1000,avg_pool_everything<
ares<512,ares<512,ares_down<512,
ares<256,ares<256,ares<256,ares<256,ares<256,ares_down<256,
ares<128,ares<128,ares<128,ares_down<128,
ares<64,ares<64,ares<64,
max_pool<3,3,2,2,relu<affine<con<64,7,7,2,2,
input_rgb_image_sized<227>
>>>>>>>>>>>>>>>>>>>>>>>;
// ----------------------------------------------------------------------------------------
rectangle make_random_cropping_rect_resnet(
const matrix<rgb_pixel>& img,
dlib::rand& rnd
)
{
// figure out what rectangle we want to crop from the image
double mins = 0.466666666, maxs = 0.875;
auto scale = mins + rnd.get_random_double()*(maxs-mins);
auto size = scale*std::min(img.nr(), img.nc());
rectangle rect(size, size);
// randomly shift the box around
point offset(rnd.get_random_32bit_number()%(img.nc()-rect.width()),
rnd.get_random_32bit_number()%(img.nr()-rect.height()));
return move_rect(rect, offset);
}
// ----------------------------------------------------------------------------------------
void randomly_crop_image (
const matrix<rgb_pixel>& img,
matrix<rgb_pixel>& crop,
dlib::rand& rnd
)
{
auto rect = make_random_cropping_rect_resnet(img, rnd);
// now crop it out as a 227x227 image.
extract_image_chip(img, chip_details(rect, chip_dims(227,227)), crop);
// Also randomly flip the image
if (rnd.get_random_double() > 0.5)
crop = fliplr(crop);
// And then randomly adjust the colors.
apply_random_color_offset(crop, rnd);
}
void randomly_crop_images (
const matrix<rgb_pixel>& img,
dlib::array<matrix<rgb_pixel>>& crops,
dlib::rand& rnd,
long num_crops
)
{
std::vector<chip_details> dets;
for (long i = 0; i < num_crops; ++i)
{
auto rect = make_random_cropping_rect_resnet(img, rnd);
dets.push_back(chip_details(rect, chip_dims(227,227)));
}
extract_image_chips(img, dets, crops);
for (auto&& img : crops)
{
// Also randomly flip the image
if (rnd.get_random_double() > 0.5)
img = fliplr(img);
// And then randomly adjust the colors.
apply_random_color_offset(img, rnd);
}
}
// ----------------------------------------------------------------------------------------
struct image_info
{
string filename;
string label;
long numeric_label;
};
std::vector<image_info> get_imagenet_train_listing(
const std::string& images_folder
)
{
std::vector<image_info> results;
image_info temp;
temp.numeric_label = 0;
// We will loop over all the label types in the dataset, each is contained in a subfolder.
auto subdirs = directory(images_folder).get_dirs();
// But first, sort the sub directories so the numeric labels will be assigned in sorted order.
std::sort(subdirs.begin(), subdirs.end());
for (auto subdir : subdirs)
{
// Now get all the images in this label type
temp.label = subdir.name();
for (auto image_file : subdir.get_files())
{
temp.filename = image_file;
results.push_back(temp);
}
++temp.numeric_label;
}
return results;
}
std::vector<image_info> get_imagenet_val_listing(
const std::string& imagenet_root_dir,
const std::string& validation_images_file
)
{
ifstream fin(validation_images_file);
string label, filename;
std::vector<image_info> results;
image_info temp;
temp.numeric_label = -1;
while(fin >> label >> filename)
{
temp.filename = imagenet_root_dir+"/"+filename;
if (!file_exists(temp.filename))
{
cerr << "file doesn't exist! " << temp.filename << endl;
exit(1);
}
if (label != temp.label)
++temp.numeric_label;
temp.label = label;
results.push_back(temp);
}
return results;
}
// ----------------------------------------------------------------------------------------
int main(int argc, char** argv) try
{
if (argc != 3)
{
cout << "To run this program you need a copy of the imagenet ILSVRC2015 dataset and" << endl;
cout << "also the file http://dlib.net/files/imagenet2015_validation_images.txt.bz2" << endl;
cout << endl;
cout << "With those things, you call this program like this: " << endl;
cout << "./dnn_imagenet_train_ex /path/to/ILSVRC2015 imagenet2015_validation_images.txt" << endl;
return 1;
}
cout << "\nSCANNING IMAGENET DATASET\n" << endl;
auto listing = get_imagenet_train_listing(string(argv[1])+"/Data/CLS-LOC/train/");
cout << "images in dataset: " << listing.size() << endl;
const auto number_of_classes = listing.back().numeric_label+1;
if (listing.size() == 0 || number_of_classes != 1000)
{
cout << "Didn't find the imagenet dataset. " << endl;
return 1;
}
set_dnn_prefer_smallest_algorithms();
const double initial_learning_rate = 0.1;
const double weight_decay = 0.0001;
const double momentum = 0.9;
net_type net;
dnn_trainer<net_type> trainer(net,sgd(weight_decay, momentum));
trainer.be_verbose();
trainer.set_learning_rate(initial_learning_rate);
trainer.set_synchronization_file("imagenet_trainer_state_file.dat", std::chrono::minutes(10));
// This threshold is probably excessively large. You could likely get good results
// with a smaller value but if you aren't in a hurry this value will surely work well.
trainer.set_iterations_without_progress_threshold(20000);
std::vector<matrix<rgb_pixel>> samples;
std::vector<unsigned long> labels;
// Start a bunch of threads that read images from disk and pull out random crops. It's
// important to be sure to feed the GPU fast enough to keep it busy. Using multiple
// thread for this kind of data preparation helps us do that. Each thread puts the
// crops into the data queue.
dlib::pipe<std::pair<image_info,matrix<rgb_pixel>>> data(200);
auto f = [&data, &listing](time_t seed)
{
dlib::rand rnd(time(0)+seed);
matrix<rgb_pixel> img;
std::pair<image_info, matrix<rgb_pixel>> temp;
while(data.is_enabled())
{
temp.first = listing[rnd.get_random_32bit_number()%listing.size()];
load_image(img, temp.first.filename);
randomly_crop_image(img, temp.second, rnd);
data.enqueue(temp);
}
};
std::thread data_loader1([f](){ f(1); });
std::thread data_loader2([f](){ f(2); });
std::thread data_loader3([f](){ f(3); });
std::thread data_loader4([f](){ f(4); });
// The main training loop. Keep making mini-batches and giving them to the trainer.
// We will run until the learning rate has dropped by a factor of 1e-3.
while(trainer.get_learning_rate() >= initial_learning_rate*1e-3)
{
samples.clear();
labels.clear();
// make a 160 image mini-batch
std::pair<image_info, matrix<rgb_pixel>> img;
while(samples.size() < 160)
{
data.dequeue(img);
samples.push_back(std::move(img.second));
labels.push_back(img.first.numeric_label);
}
trainer.train_one_step(samples, labels);
}
// Training done, tell threads to stop and make sure to wait for them to finish before
// moving on.
data.disable();
data_loader1.join();
data_loader2.join();
data_loader3.join();
data_loader4.join();
// also wait for threaded processing to stop in the trainer.
trainer.get_net();
net.clean();
cout << "saving network" << endl;
serialize("resnet34.dnn") << net;
// Now test the network on the imagenet validation dataset. First, make a testing
// network with softmax as the final layer. We don't have to do this if we just wanted
// to test the "top1 accuracy" since the normal network outputs the class prediction.
// But this snet object will make getting the top5 predictions easy as it directly
// outputs the probability of each class as its final output.
softmax<anet_type::subnet_type> snet; snet.subnet() = net.subnet();
cout << "Testing network on imagenet validation dataset..." << endl;
int num_right = 0;
int num_wrong = 0;
int num_right_top1 = 0;
int num_wrong_top1 = 0;
dlib::rand rnd(time(0));
// loop over all the imagenet validation images
for (auto l : get_imagenet_val_listing(argv[1], argv[2]))
{
dlib::array<matrix<rgb_pixel>> images;
matrix<rgb_pixel> img;
load_image(img, l.filename);
// Grab 16 random crops from the image. We will run all of them through the
// network and average the results.
const int num_crops = 16;
randomly_crop_images(img, images, rnd, num_crops);
// p(i) == the probability the image contains object of class i.
matrix<float,1,1000> p = sum_rows(mat(snet(images.begin(), images.end())))/num_crops;
// check top 1 accuracy
if (index_of_max(p) == l.numeric_label)
++num_right_top1;
else
++num_wrong_top1;
// check top 5 accuracy
bool found_match = false;
for (int k = 0; k < 5; ++k)
{
long predicted_label = index_of_max(p);
p(predicted_label) = 0;
if (predicted_label == l.numeric_label)
{
found_match = true;
break;
}
}
if (found_match)
++num_right;
else
++num_wrong;
}
cout << "val top5 accuracy: " << num_right/(double)(num_right+num_wrong) << endl;
cout << "val top1 accuracy: " << num_right_top1/(double)(num_right_top1+num_wrong_top1) << endl;
}
catch(std::exception& e)
{
cout << e.what() << endl;
}
......@@ -2,8 +2,8 @@
/*
This is an example illustrating the use of the deep learning tools from the
dlib C++ Library. I'm assuming you have already read the introductory
dnn_mnist_ex.cpp and dnn_mnist_advanced_ex.cpp examples. In this example we
are going to show how to create inception networks.
dnn_introduction_ex.cpp and dnn_introduction2_ex.cpp examples. In this
example we are going to show how to create inception networks.
An inception network is composed of inception blocks of the form:
......
// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
/*
This is an example illustrating the use of the deep learning tools from the
dlib C++ Library. I'm assuming you have already read the dnn_mnist_ex.cpp
dlib C++ Library. I'm assuming you have already read the dnn_introduction_ex.cpp
example. So in this example program I'm going to go over a number of more
advanced parts of the API, including:
- Using multiple GPUs
......
......@@ -18,7 +18,7 @@
# tools. See train_shape_predictor.py to see an example.
#
# You can get the shape_predictor_68_face_landmarks.dat file from:
# http://sourceforge.net/projects/dclib/files/dlib/v18.10/shape_predictor_68_face_landmarks.dat.bz2
# http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2
#
# COMPILING/INSTALLING THE DLIB PYTHON INTERFACE
# You can install dlib using the command:
......@@ -56,7 +56,7 @@ if len(sys.argv) != 3:
"execute this program by running:\n"
" ./face_landmark_detection.py shape_predictor_68_face_landmarks.dat ../examples/faces\n"
"You can download a trained facial shape predictor from:\n"
" http://sourceforge.net/projects/dclib/files/dlib/v18.10/shape_predictor_68_face_landmarks.dat.bz2")
" http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2")
exit()
predictor_path = sys.argv[1]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment