Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
D
dlib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
钟尚武
dlib
Commits
ed7c344a
Commit
ed7c344a
authored
Sep 17, 2011
by
Davis King
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added some examples for the new object detection stuff.
parent
79c4c85a
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
545 additions
and
0 deletions
+545
-0
CMakeLists.txt
examples/CMakeLists.txt
+2
-0
object_detector_advanced_ex.cpp
examples/object_detector_advanced_ex.cpp
+308
-0
object_detector_ex.cpp
examples/object_detector_ex.cpp
+235
-0
No files found.
examples/CMakeLists.txt
View file @
ed7c344a
...
...
@@ -64,6 +64,8 @@ add_example(mlp_ex)
add_example
(
model_selection_ex
)
add_example
(
multiclass_classification_ex
)
add_example
(
multithreaded_object_ex
)
add_example
(
object_detector_advanced_ex
)
add_example
(
object_detector_ex
)
add_example
(
optimization_ex
)
add_example
(
pipe_ex
)
add_example
(
pipe_ex_2
)
...
...
examples/object_detector_advanced_ex.cpp
0 → 100644
View file @
ed7c344a
// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
/*
This is an example illustrating the process for defining custom
feature extractors for use with the structural_object_detection_trainer.
NOTICE: This example assumes you are familiar with the contents of the
object_detector_ex.cpp example program.
*/
#include "dlib/svm_threaded.h"
#include "dlib/gui_widgets.h"
#include "dlib/array.h"
#include "dlib/array2d.h"
#include "dlib/image_keypoint.h"
#include "dlib/image_processing.h"
#include <iostream>
#include <fstream>
using
namespace
std
;
using
namespace
dlib
;
// ----------------------------------------------------------------------------------------
template
<
typename
image_array_type
>
void
make_simple_test_data
(
image_array_type
&
images
,
std
::
vector
<
std
::
vector
<
rectangle
>
>&
object_locations
)
/*!
ensures
- #images.size() == 3
- #object_locations.size() == 3
- Creates some simple images to test the object detection routines. In particular,
this function creates images with white 70x70 squares in them. It also stores
the locations of these squares in object_locations.
- for all valid i:
- object_locations[i] == A list of all the white rectangles present in images[i].
!*/
{
images
.
clear
();
object_locations
.
clear
();
images
.
resize
(
3
);
images
[
0
].
set_size
(
400
,
400
);
images
[
1
].
set_size
(
400
,
400
);
images
[
2
].
set_size
(
400
,
400
);
// set all the pixel values to black
assign_all_pixels
(
images
[
0
],
0
);
assign_all_pixels
(
images
[
1
],
0
);
assign_all_pixels
(
images
[
2
],
0
);
// Now make some squares and draw them onto our black images. All the
// squares will be 70 pixels wide and tall.
std
::
vector
<
rectangle
>
temp
;
temp
.
push_back
(
centered_rect
(
point
(
100
,
100
),
70
,
70
));
fill_rect
(
images
[
0
],
temp
.
back
(),
255
);
// Paint the square white
temp
.
push_back
(
centered_rect
(
point
(
200
,
300
),
70
,
70
));
fill_rect
(
images
[
0
],
temp
.
back
(),
255
);
// Paint the square white
object_locations
.
push_back
(
temp
);
temp
.
clear
();
temp
.
push_back
(
centered_rect
(
point
(
140
,
200
),
70
,
70
));
fill_rect
(
images
[
1
],
temp
.
back
(),
255
);
// Paint the square white
temp
.
push_back
(
centered_rect
(
point
(
303
,
200
),
70
,
70
));
fill_rect
(
images
[
1
],
temp
.
back
(),
255
);
// Paint the square white
object_locations
.
push_back
(
temp
);
temp
.
clear
();
temp
.
push_back
(
centered_rect
(
point
(
123
,
121
),
70
,
70
));
fill_rect
(
images
[
2
],
temp
.
back
(),
255
);
// Paint the square white
object_locations
.
push_back
(
temp
);
}
// ----------------------------------------------------------------------------------------
class
very_simple_feature_extractor
:
noncopyable
{
/*!
WHAT THIS OBJECT REPRESENTS
This object is a feature extractor which goes to every pixel in an image and
produces a 32 dimensional feature vector. This vector is an indicator vector
which records the pattern of pixel values in a 4-connected region. So it should
be able to distinguish basic things like whether or not a location falls on the
corner of a white box, on an edge, in the middle, etc.
Note that this object also implements the interface defined in dlib/image_keypoint/hashed_feature_image_abstract.h.
This means all the member functions in this object are supposed to behave as
described in the hashed_feature_image specification. So when you define your own
feature extractor objects you should probably refer yourself to that documentation
in addition to reading this example program.
!*/
public
:
template
<
typename
image_type
>
inline
void
load
(
const
image_type
&
img
)
{
feat_image
.
set_size
(
img
.
nr
(),
img
.
nc
());
assign_all_pixels
(
feat_image
,
0
);
for
(
long
r
=
1
;
r
+
1
<
img
.
nr
();
++
r
)
{
for
(
long
c
=
1
;
c
+
1
<
img
.
nc
();
++
c
)
{
unsigned
char
f
=
0
;
if
(
img
[
r
][
c
])
f
|=
0x1
;
if
(
img
[
r
][
c
+
1
])
f
|=
0x2
;
if
(
img
[
r
][
c
-
1
])
f
|=
0x4
;
if
(
img
[
r
+
1
][
c
])
f
|=
0x8
;
if
(
img
[
r
-
1
][
c
])
f
|=
0x10
;
// Store the code value for the pattern of pixel values in the 4-connected
// neighborhood around this row and column.
feat_image
[
r
][
c
]
=
f
;
}
}
}
inline
unsigned
long
size
()
const
{
return
feat_image
.
size
();
}
inline
long
nr
()
const
{
return
feat_image
.
nr
();
}
inline
long
nc
()
const
{
return
feat_image
.
nc
();
}
inline
long
get_num_dimensions
(
)
const
{
// Return the dimensionality of the vectors produced by operator()
return
32
;
}
typedef
std
::
vector
<
std
::
pair
<
unsigned
int
,
double
>
>
descriptor_type
;
inline
const
descriptor_type
&
operator
()
(
long
row
,
long
col
)
const
/*!
requires
- 0 <= row < nr()
- 0 <= col < nc()
ensures
- returns a sparse vector which describes the image at the given row and column.
In particular, this is a vector that is 0 everywhere except for one element.
!*/
{
feat
.
clear
();
const
unsigned
long
only_nonzero_element_index
=
feat_image
[
row
][
col
];
feat
.
push_back
(
make_pair
(
only_nonzero_element_index
,
1.0
));
return
feat
;
}
// This block of functions is meant to provide a way to map between the row/col space taken by
// this object's operator() function and the images supplied to load(). In this example it's trivial.
// However, in general, you might create feature extractors which don't perform extraction at every
// possible image location (e.g. the hog_image) and thus result in some more complex mapping.
inline
const
rectangle
get_block_rect
(
long
row
,
long
col
)
const
{
return
centered_rect
(
col
,
row
,
1
,
1
);
}
inline
const
point
image_to_feat_space
(
const
point
&
p
)
const
{
return
p
;
}
inline
const
rectangle
image_to_feat_space
(
const
rectangle
&
rect
)
const
{
return
rect
;
}
inline
const
point
feat_to_image_space
(
const
point
&
p
)
const
{
return
p
;
}
inline
const
rectangle
feat_to_image_space
(
const
rectangle
&
rect
)
const
{
return
rect
;
}
inline
friend
void
serialize
(
const
very_simple_feature_extractor
&
item
,
std
::
ostream
&
out
)
{
serialize
(
item
.
feat_image
,
out
);
}
inline
friend
void
deserialize
(
very_simple_feature_extractor
&
item
,
std
::
istream
&
in
)
{
deserialize
(
item
.
feat_image
,
in
);
}
void
copy_configuration
(
const
very_simple_feature_extractor
&
item
){}
private
:
array2d
<
unsigned
char
>
feat_image
;
// This variable doesn't logically contribute to the state of this object. It is here
// only to avoid returning a descriptor_type object by value inside the operator() method.
mutable
descriptor_type
feat
;
};
// ----------------------------------------------------------------------------------------
int
main
(
int
argc
,
char
*
argv
[])
{
try
{
// Get some data
typedef
array
<
array2d
<
unsigned
char
>
>::
expand_1b
grayscale_image_array_type
;
grayscale_image_array_type
images
;
std
::
vector
<
std
::
vector
<
rectangle
>
>
object_locations
;
make_simple_test_data
(
images
,
object_locations
);
typedef
scan_image_pyramid
<
pyramid_down_5_4
,
very_simple_feature_extractor
>
image_scanner_type
;
image_scanner_type
scanner
;
// Setup the sliding window box. Lets use a window with the same shape as the white boxes we
// are trying to detect.
const
rectangle
object_box
=
compute_box_dimensions
(
1
,
// width/height ratio
70
*
70
// box area
);
scanner
.
add_detection_template
(
object_box
,
create_grid_detection_template
(
object_box
,
2
,
2
));
// Since our sliding window is already the right size to detect our objects we don't need
// to use an image pyramid. So setting this to 1 turns off the image pyramid.
scanner
.
set_max_pyramid_levels
(
1
);
// While the very_simple_feature_extractor doesn't have any parameters, when you go solve
// real problems you might define a feature extractor which has some non-trivial parameters
// that need to be setup before it can be used. So you need to be able to pass these parameters
// to the scanner object somehow. You can do this using the copy_configuration() function as
// shown below.
very_simple_feature_extractor
fe
;
/*
setup the parameters in the fe object.
...
*/
// The scanner will call fe.copy_configuration() to copy the state of fe
// into it's internal feature extractor.
scanner
.
copy_configuration
(
fe
);
// Now that we have defined the kind of sliding window classifier system we want and stored
// the details into the scanner object we are ready to use the structural_object_detection_trainer
// to learn the weight vector and threshold needed to produce a complete object detector.
structural_object_detection_trainer
<
image_scanner_type
>
trainer
(
scanner
);
trainer
.
set_num_threads
(
4
);
// Set this to the number of processing cores on your machine.
// This line tells the algorithm that it is never OK for two detections to overlap. So
// this controls how the non-max suppression is performed and in general you can set this up
// any way you like.
trainer
.
set_overlap_tester
(
test_box_overlap
(
0
));
// The trainer will try and find the detector which minimizes the number of detection mistakes.
// This function controls how it decides if a detection output is a mistake or not. The bigger
// the input to this function the more strict it is in deciding if the detector is correctly
// hitting the targets. Try reducing the value to 0.001 and observing the results. You should
// see that the detections aren't exactly on top of the white squares anymore. See the documentation
// for the structural_object_detection_trainer and structural_svm_object_detection_problem objects
// for a more detailed discussion of this parameter.
trainer
.
set_overlap_eps
(
0.95
);
object_detector
<
image_scanner_type
>
detector
=
trainer
.
train
(
images
,
object_locations
);
// We can easily test the new detector against our training data. This print statement will indicate that it
// has perfect precision and recall on this simple task.
cout
<<
"Test detector (precision,recall): "
<<
test_object_detection_function
(
detector
,
images
,
object_locations
)
<<
endl
;
// The cross validation should also indicate perfect precision and recall.
cout
<<
"3-fold cross validation (precision,recall): "
<<
cross_validate_object_detection_trainer
(
trainer
,
images
,
object_locations
,
3
)
<<
endl
;
/*
It is also worth pointing out that you don't have to use dlib::array2d objects to
represent your images. In fact, you can use any object, even something like a struct
of many images and other things, as the "image". The only requirements on an image
are that it should be possible to pass it to scanner.load(). So if you can say
scanner.load(images[0]), for example. See the documentation for scan_image_pyramid::load()
for the details.
*/
// Lets display the output of the detector along with our training images.
image_window
win
;
for
(
unsigned
long
i
=
0
;
i
<
images
.
size
();
++
i
)
{
// Run the detector on images[i]
const
std
::
vector
<
rectangle
>
rects
=
detector
(
images
[
i
]);
cout
<<
"Number of detections: "
<<
rects
.
size
()
<<
endl
;
// Put the image and detections into the window.
win
.
clear_overlay
();
win
.
set_image
(
images
[
i
]);
for
(
unsigned
long
j
=
0
;
j
<
rects
.
size
();
++
j
)
{
// Add each detection as a red box.
win
.
add_overlay
(
image_display
::
overlay_rect
(
rects
[
j
],
rgb_pixel
(
255
,
0
,
0
)));
}
cout
<<
"Hit enter to see the next image."
;
cin
.
get
();
}
}
catch
(
exception
&
e
)
{
cout
<<
"
\n
exception thrown!"
<<
endl
;
cout
<<
e
.
what
()
<<
endl
;
}
catch
(...)
{
cout
<<
"Some error occurred"
<<
endl
;
}
}
// ----------------------------------------------------------------------------------------
examples/object_detector_ex.cpp
0 → 100644
View file @
ed7c344a
// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
/*
This is an example illustrating the use of the dlib tools for
detecting objects in images. In this example we will create
three simple images, each containing some white squares. We
will then use the sliding window classifier tools to learn to
detect these squares.
*/
#include "dlib/svm_threaded.h"
#include "dlib/gui_widgets.h"
#include "dlib/array.h"
#include "dlib/array2d.h"
#include "dlib/image_keypoint.h"
#include "dlib/image_processing.h"
#include <iostream>
#include <fstream>
using
namespace
std
;
using
namespace
dlib
;
// ----------------------------------------------------------------------------------------
template
<
typename
image_array_type
>
void
make_simple_test_data
(
image_array_type
&
images
,
std
::
vector
<
std
::
vector
<
rectangle
>
>&
object_locations
)
/*!
ensures
- #images.size() == 3
- #object_locations.size() == 3
- Creates some simple images to test the object detection routines. In particular,
this function creates images with white 70x70 squares in them. It also stores
the locations of these squares in object_locations.
- for all valid i:
- object_locations[i] == A list of all the white rectangles present in images[i].
!*/
{
images
.
clear
();
object_locations
.
clear
();
images
.
resize
(
3
);
images
[
0
].
set_size
(
400
,
400
);
images
[
1
].
set_size
(
400
,
400
);
images
[
2
].
set_size
(
400
,
400
);
// set all the pixel values to black
assign_all_pixels
(
images
[
0
],
0
);
assign_all_pixels
(
images
[
1
],
0
);
assign_all_pixels
(
images
[
2
],
0
);
// Now make some squares and draw them onto our black images. All the
// squares will be 70 pixels wide and tall.
std
::
vector
<
rectangle
>
temp
;
temp
.
push_back
(
centered_rect
(
point
(
100
,
100
),
70
,
70
));
fill_rect
(
images
[
0
],
temp
.
back
(),
255
);
// Paint the square white
temp
.
push_back
(
centered_rect
(
point
(
200
,
300
),
70
,
70
));
fill_rect
(
images
[
0
],
temp
.
back
(),
255
);
// Paint the square white
object_locations
.
push_back
(
temp
);
temp
.
clear
();
temp
.
push_back
(
centered_rect
(
point
(
140
,
200
),
70
,
70
));
fill_rect
(
images
[
1
],
temp
.
back
(),
255
);
// Paint the square white
temp
.
push_back
(
centered_rect
(
point
(
303
,
200
),
70
,
70
));
fill_rect
(
images
[
1
],
temp
.
back
(),
255
);
// Paint the square white
object_locations
.
push_back
(
temp
);
temp
.
clear
();
temp
.
push_back
(
centered_rect
(
point
(
123
,
121
),
70
,
70
));
fill_rect
(
images
[
2
],
temp
.
back
(),
255
);
// Paint the square white
object_locations
.
push_back
(
temp
);
}
// ----------------------------------------------------------------------------------------
int
main
(
int
argc
,
char
*
argv
[])
{
try
{
// The first thing we do is create the set of 3 images discussed above.
typedef
array
<
array2d
<
unsigned
char
>
>::
expand_1b
grayscale_image_array_type
;
grayscale_image_array_type
images
;
std
::
vector
<
std
::
vector
<
rectangle
>
>
object_locations
;
make_simple_test_data
(
images
,
object_locations
);
/*
This next block of code specifies the type of sliding window classifier we will
be using to detect the white squares. The most important thing here is the
scan_image_pyramid template. Instances of this template represent the core
of a sliding window classifier. To go into more detail, the sliding window
classifiers used by this object have three parts:
1. The underlying feature extraction. See the dlib documentation for a detailed
discussion of how the hashed_feature_image and hog_image feature extractors
work. However, to understand this example, all you need to know is that the
feature extractor associates a vector with each location in an image. This
vector is supposed to capture information which describes how parts of the
image look in a way that is relevant to the problem you are trying to solve.
2. A detection template. This is a rectangle which defines the shape of a
sliding window (the object_box), as well as a set of rectangles which
envelop it. This set of enveloping rectangles defines the spatial
structure of the overall feature extraction within a sliding window.
In particular, each location of a sliding window has a feature vector
associated with it. This feature vector is defined as follows:
- Let N denote the number of enveloping rectangles.
- Let M denote the dimensionality of the vectors output by feature_extractor_type
objects.
- Let F(i) == the M dimensional vector which is the sum of all vectors
given by our feature_extractor_type object inside the ith enveloping
rectangle.
- Then the feature vector for a sliding window is an M*N dimensional vector
[F(1) F(2) F(3) ... F(N)] (i.e. it is a concatenation of the N vectors).
This feature vector can be thought of as a collection of N "bags of features",
each bag coming from a spatial location determined one of the enveloping
rectangles.
3. A weight vector and a threshold value. The dot product between the weight
vector and the feature vector for a sliding window location gives the score
of the window. If this score is greater than the threshold value then the
window location is output as a detection. You don't need to determine these
parameters yourself. They are automatically populated by the
structural_object_detection_trainer.
Finally, the sliding window classifiers described above are applied to every level
of an image pyramid. So you need to tell scan_image_pyramid what kind of pyramid
you want to use. In this case we are using pyramid_down which downsamples each
pyramid layer by half (dlib also contains other version of pyramid_down which result
in finer grained pyramids).
*/
typedef
hashed_feature_image
<
hog_image
<
3
,
3
,
1
,
4
,
hog_signed_gradient
,
hog_full_interpolation
>
>
feature_extractor_type
;
typedef
scan_image_pyramid
<
pyramid_down
,
feature_extractor_type
>
image_scanner_type
;
image_scanner_type
scanner
;
// Setup the sliding window box. Lets use a window with the same shape as the white boxes we
// are trying to detect.
const
rectangle
object_box
=
compute_box_dimensions
(
1
,
// width/height ratio
70
*
70
// box area
);
// Setup the detection template so it contains 4 feature extraction zones inside the object_box. These
// are the upper left, upper right, lower left, and lower right quadrants of object_box. (Note that
// in general we can add more than one detection template. But in this case one is enough.)
scanner
.
add_detection_template
(
object_box
,
create_grid_detection_template
(
object_box
,
2
,
2
));
// Now that we have defined the kind of sliding window classifier system we want and stored
// the details into the scanner object we are ready to use the structural_object_detection_trainer
// to learn the weight vector and threshold needed to produce a complete object detector.
structural_object_detection_trainer
<
image_scanner_type
>
trainer
(
scanner
);
trainer
.
set_num_threads
(
4
);
// Set this to the number of processing cores on your machine.
// This line tells the algorithm that it is never OK for two detections to overlap. So
// this controls how the non-max suppression is performed and in general you can set this up
// any way you like.
trainer
.
set_overlap_tester
(
test_box_overlap
(
0
));
// There are a variety of other useful parameters to the structural_object_detection_trainer.
// Examples of the ones you are most likely to use follow (see dlib documentation for what they do):
//trainer.set_overlap_eps(0.80);
//trainer.set_c(1.0);
//trainer.set_loss_per_missed_target(1);
//trainer.set_loss_per_false_alarm(1);
// Do the actual training and save the results into the detector object.
object_detector
<
image_scanner_type
>
detector
=
trainer
.
train
(
images
,
object_locations
);
// We can easily test the new detector against our training data. This print statement will indicate that it
// has perfect precision and recall on this simple task.
cout
<<
"Test detector (precision,recall): "
<<
test_object_detection_function
(
detector
,
images
,
object_locations
)
<<
endl
;
// The cross validation should also indicate perfect precision and recall.
cout
<<
"3-fold cross validation (precision,recall): "
<<
cross_validate_object_detection_trainer
(
trainer
,
images
,
object_locations
,
3
)
<<
endl
;
// Lets display the output of the detector along with our training images.
image_window
win
;
for
(
unsigned
long
i
=
0
;
i
<
images
.
size
();
++
i
)
{
// Run the detector on images[i]
const
std
::
vector
<
rectangle
>
rects
=
detector
(
images
[
i
]);
cout
<<
"Number of detections: "
<<
rects
.
size
()
<<
endl
;
// Put the image and detections into the window.
win
.
clear_overlay
();
win
.
set_image
(
images
[
i
]);
for
(
unsigned
long
j
=
0
;
j
<
rects
.
size
();
++
j
)
{
// Add each detection as a red box.
win
.
add_overlay
(
image_display
::
overlay_rect
(
rects
[
j
],
rgb_pixel
(
255
,
0
,
0
)));
}
cout
<<
"Hit enter to see the next image."
;
cin
.
get
();
}
// Finally, note that the detector can be serialized to disk just like other dlib objects.
ofstream
fout
(
"object_detector.dat"
,
ios
::
binary
);
serialize
(
detector
,
fout
);
fout
.
close
();
// Recall from disk.
ifstream
fin
(
"object_detector.dat"
,
ios
::
binary
);
deserialize
(
detector
,
fin
);
}
catch
(
exception
&
e
)
{
cout
<<
"
\n
exception thrown!"
<<
endl
;
cout
<<
e
.
what
()
<<
endl
;
}
catch
(...)
{
cout
<<
"Some error occurred"
<<
endl
;
}
}
// ----------------------------------------------------------------------------------------
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment