Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
D
dlib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
钟尚武
dlib
Commits
4f275bd7
Commit
4f275bd7
authored
May 26, 2014
by
Davis King
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added evaluate_detectors() to make it easy to run a bunch of HOG detectors
efficiently, even when their window sizes differ.
parent
09af3eb8
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
310 additions
and
53 deletions
+310
-53
scan_fhog_pyramid.h
dlib/image_processing/scan_fhog_pyramid.h
+246
-53
scan_fhog_pyramid_abstract.h
dlib/image_processing/scan_fhog_pyramid_abstract.h
+64
-0
No files found.
dlib/image_processing/scan_fhog_pyramid.h
View file @
4f275bd7
...
@@ -226,7 +226,6 @@ namespace dlib
...
@@ -226,7 +226,6 @@ namespace dlib
return
num
;
return
num
;
}
}
private
:
std
::
vector
<
matrix
<
float
>
>
filters
;
std
::
vector
<
matrix
<
float
>
>
filters
;
std
::
vector
<
std
::
vector
<
matrix
<
float
,
0
,
1
>
>
>
row_filters
,
col_filters
;
std
::
vector
<
std
::
vector
<
matrix
<
float
,
0
,
1
>
>
>
row_filters
,
col_filters
;
};
};
...
@@ -361,14 +360,6 @@ namespace dlib
...
@@ -361,14 +360,6 @@ namespace dlib
height
=
temp
.
height
();
height
=
temp
.
height
();
}
}
static
bool
compare_pair_rect
(
const
std
::
pair
<
double
,
rectangle
>&
a
,
const
std
::
pair
<
double
,
rectangle
>&
b
)
{
return
a
.
first
<
b
.
first
;
}
void
get_mapped_rect_and_metadata
(
void
get_mapped_rect_and_metadata
(
const
unsigned
long
number_pyramid_levels
,
const
unsigned
long
number_pyramid_levels
,
const
rectangle
&
rect
,
const
rectangle
&
rect
,
...
@@ -389,12 +380,6 @@ namespace dlib
...
@@ -389,12 +380,6 @@ namespace dlib
typedef
array
<
array2d
<
float
>
>
fhog_image
;
typedef
array
<
array2d
<
float
>
>
fhog_image
;
static
rectangle
apply_filters_to_fhog
(
const
fhog_filterbank
&
w
,
const
fhog_image
&
feats
,
array2d
<
float
>&
saliency_image
);
feature_extractor_type
fe
;
feature_extractor_type
fe
;
array
<
fhog_image
>
feats
;
array
<
fhog_image
>
feats
;
int
cell_size
;
int
cell_size
;
...
@@ -422,11 +407,12 @@ namespace dlib
...
@@ -422,11 +407,12 @@ namespace dlib
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
template
<
typename
T
,
typename
U
>
namespace
impl
rectangle
scan_fhog_pyramid
<
T
,
U
>::
{
apply_filters_to_fhog
(
template
<
typename
fhog_filterbank
>
rectangle
apply_filters_to_fhog
(
const
fhog_filterbank
&
w
,
const
fhog_filterbank
&
w
,
const
fhog_image
&
feats
,
const
array
<
array2d
<
float
>
>
&
feats
,
array2d
<
float
>&
saliency_image
array2d
<
float
>&
saliency_image
)
)
{
{
...
@@ -471,6 +457,7 @@ namespace dlib
...
@@ -471,6 +457,7 @@ namespace dlib
}
}
return
area
;
return
area
;
}
}
}
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
...
@@ -563,16 +550,23 @@ namespace dlib
...
@@ -563,16 +550,23 @@ namespace dlib
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
namespace
impl
{
template
<
template
<
typename
Pyramid_type
,
typename
pyramid_type
,
typename
image_type
,
typename
feature_extractor_type
typename
feature_extractor_type
>
>
template
<
void
create_fhog_pyramid
(
typename
image_type
const
image_type
&
img
,
>
const
feature_extractor_type
&
fe
,
void
scan_fhog_pyramid
<
Pyramid_type
,
feature_extractor_type
>::
array
<
array
<
array2d
<
float
>
>
>&
feats
,
load
(
int
cell_size
,
const
image_type
&
img
int
filter_rows_padding
,
int
filter_cols_padding
,
unsigned
long
min_pyramid_layer_width
,
unsigned
long
min_pyramid_layer_height
,
unsigned
long
max_pyramid_levels
)
)
{
{
unsigned
long
levels
=
0
;
unsigned
long
levels
=
0
;
...
@@ -591,14 +585,12 @@ namespace dlib
...
@@ -591,14 +585,12 @@ namespace dlib
feats
.
set_max_size
(
levels
);
feats
.
set_max_size
(
levels
);
feats
.
set_size
(
levels
);
feats
.
set_size
(
levels
);
unsigned
long
width
,
height
;
compute_fhog_window_size
(
width
,
height
);
typedef
typename
image_type
::
type
pixel_type
;
typedef
typename
image_type
::
type
pixel_type
;
typedef
typename
image_type
::
mem_manager_type
mem_manager_type
;
typedef
typename
image_type
::
mem_manager_type
mem_manager_type
;
// build our feature pyramid
// build our feature pyramid
fe
(
img
,
feats
[
0
],
cell_size
,
height
,
width
);
fe
(
img
,
feats
[
0
],
cell_size
,
filter_rows_padding
,
filter_cols_padding
);
DLIB_ASSERT
(
feats
[
0
].
size
()
==
fe
.
get_num_planes
(),
DLIB_ASSERT
(
feats
[
0
].
size
()
==
fe
.
get_num_planes
(),
"Invalid feature extractor used with dlib::scan_fhog_pyramid. The output does not have the
\n
"
"Invalid feature extractor used with dlib::scan_fhog_pyramid. The output does not have the
\n
"
"indicated number of planes."
);
"indicated number of planes."
);
...
@@ -607,17 +599,39 @@ namespace dlib
...
@@ -607,17 +599,39 @@ namespace dlib
{
{
array2d
<
pixel_type
,
mem_manager_type
>
temp1
,
temp2
;
array2d
<
pixel_type
,
mem_manager_type
>
temp1
,
temp2
;
pyr
(
img
,
temp1
);
pyr
(
img
,
temp1
);
fe
(
temp1
,
feats
[
1
],
cell_size
,
height
,
width
);
fe
(
temp1
,
feats
[
1
],
cell_size
,
filter_rows_padding
,
filter_cols_padding
);
swap
(
temp1
,
temp2
);
swap
(
temp1
,
temp2
);
for
(
unsigned
long
i
=
2
;
i
<
feats
.
size
();
++
i
)
for
(
unsigned
long
i
=
2
;
i
<
feats
.
size
();
++
i
)
{
{
pyr
(
temp2
,
temp1
);
pyr
(
temp2
,
temp1
);
fe
(
temp1
,
feats
[
i
],
cell_size
,
height
,
width
);
fe
(
temp1
,
feats
[
i
],
cell_size
,
filter_rows_padding
,
filter_cols_padding
);
swap
(
temp1
,
temp2
);
swap
(
temp1
,
temp2
);
}
}
}
}
}
}
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
,
typename
feature_extractor_type
>
template
<
typename
image_type
>
void
scan_fhog_pyramid
<
Pyramid_type
,
feature_extractor_type
>::
load
(
const
image_type
&
img
)
{
unsigned
long
width
,
height
;
compute_fhog_window_size
(
width
,
height
);
impl
::
create_fhog_pyramid
<
Pyramid_type
>
(
img
,
fe
,
feats
,
cell_size
,
height
,
width
,
min_pyramid_layer_width
,
min_pyramid_layer_height
,
max_pyramid_levels
);
}
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
...
@@ -732,33 +746,36 @@ namespace dlib
...
@@ -732,33 +746,36 @@ namespace dlib
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
namespace
impl
{
inline
bool
compare_pair_rect
(
const
std
::
pair
<
double
,
rectangle
>&
a
,
const
std
::
pair
<
double
,
rectangle
>&
b
)
{
return
a
.
first
<
b
.
first
;
}
template
<
template
<
typename
Pyramid_type
,
typename
pyramid_type
,
typename
feature_extractor_type
typename
feature_extractor_type
,
typename
fhog_filterbank
>
>
void
scan_fhog_pyramid
<
Pyramid_type
,
feature_extractor_type
>::
void
detect_from_fhog_pyramid
(
detect
(
const
array
<
array
<
array2d
<
float
>
>
>&
feats
,
const
feature_extractor_type
&
fe
,
const
fhog_filterbank
&
w
,
const
fhog_filterbank
&
w
,
std
::
vector
<
std
::
pair
<
double
,
rectangle
>
>&
dets
,
const
double
thresh
,
const
double
thresh
const
unsigned
long
det_box_height
,
)
const
const
unsigned
long
det_box_width
,
const
int
cell_size
,
const
int
filter_rows_padding
,
const
int
filter_cols_padding
,
std
::
vector
<
std
::
pair
<
double
,
rectangle
>
>&
dets
)
{
{
// make sure requires clause is not broken
DLIB_ASSERT
(
is_loaded_with_image
()
&&
w
.
get_num_dimensions
()
==
get_num_dimensions
(),
"
\t
void scan_fhog_pyramid::detect()"
<<
"
\n\t
Invalid inputs were given to this function "
<<
"
\n\t
is_loaded_with_image(): "
<<
is_loaded_with_image
()
<<
"
\n\t
w.get_num_dimensions(): "
<<
w
.
get_num_dimensions
()
<<
"
\n\t
get_num_dimensions(): "
<<
get_num_dimensions
()
<<
"
\n\t
this: "
<<
this
);
dets
.
clear
();
dets
.
clear
();
unsigned
long
width
,
height
;
compute_fhog_window_size
(
width
,
height
);
array2d
<
float
>
saliency_image
;
array2d
<
float
>
saliency_image
;
pyramid_type
pyr
;
pyramid_type
pyr
;
...
@@ -775,7 +792,8 @@ namespace dlib
...
@@ -775,7 +792,8 @@ namespace dlib
// if we found a detection
// if we found a detection
if
(
saliency_image
[
r
][
c
]
>=
thresh
)
if
(
saliency_image
[
r
][
c
]
>=
thresh
)
{
{
rectangle
rect
=
fe
.
feats_to_image
(
centered_rect
(
point
(
c
,
r
),
width
-
2
*
padding
,
height
-
2
*
padding
),
cell_size
,
height
,
width
);
rectangle
rect
=
fe
.
feats_to_image
(
centered_rect
(
point
(
c
,
r
),
det_box_width
,
det_box_height
),
cell_size
,
filter_rows_padding
,
filter_cols_padding
);
rect
=
pyr
.
rect_up
(
rect
,
l
);
rect
=
pyr
.
rect_up
(
rect
,
l
);
dets
.
push_back
(
std
::
make_pair
(
saliency_image
[
r
][
c
],
rect
));
dets
.
push_back
(
std
::
make_pair
(
saliency_image
[
r
][
c
],
rect
));
}
}
...
@@ -786,6 +804,53 @@ namespace dlib
...
@@ -786,6 +804,53 @@ namespace dlib
std
::
sort
(
dets
.
rbegin
(),
dets
.
rend
(),
compare_pair_rect
);
std
::
sort
(
dets
.
rbegin
(),
dets
.
rend
(),
compare_pair_rect
);
}
}
inline
bool
overlaps_any_box
(
const
test_box_overlap
&
tester
,
const
std
::
vector
<
rect_detection
>&
rects
,
const
rect_detection
&
rect
)
{
for
(
unsigned
long
i
=
0
;
i
<
rects
.
size
();
++
i
)
{
if
(
tester
(
rects
[
i
].
rect
,
rect
.
rect
))
return
true
;
}
return
false
;
}
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
,
typename
feature_extractor_type
>
void
scan_fhog_pyramid
<
Pyramid_type
,
feature_extractor_type
>::
detect
(
const
fhog_filterbank
&
w
,
std
::
vector
<
std
::
pair
<
double
,
rectangle
>
>&
dets
,
const
double
thresh
)
const
{
// make sure requires clause is not broken
DLIB_ASSERT
(
is_loaded_with_image
()
&&
w
.
get_num_dimensions
()
==
get_num_dimensions
(),
"
\t
void scan_fhog_pyramid::detect()"
<<
"
\n\t
Invalid inputs were given to this function "
<<
"
\n\t
is_loaded_with_image(): "
<<
is_loaded_with_image
()
<<
"
\n\t
w.get_num_dimensions(): "
<<
w
.
get_num_dimensions
()
<<
"
\n\t
get_num_dimensions(): "
<<
get_num_dimensions
()
<<
"
\n\t
this: "
<<
this
);
unsigned
long
width
,
height
;
compute_fhog_window_size
(
width
,
height
);
impl
::
detect_from_fhog_pyramid
<
pyramid_type
>
(
feats
,
fe
,
w
,
thresh
,
height
-
2
*
padding
,
width
-
2
*
padding
,
cell_size
,
height
,
width
,
dets
);
}
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
template
<
template
<
...
@@ -1145,6 +1210,134 @@ namespace dlib
...
@@ -1145,6 +1210,134 @@ namespace dlib
};
};
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
template
<
typename
pyramid_type
,
typename
image_type
>
void
evaluate_detectors
(
const
std
::
vector
<
object_detector
<
scan_fhog_pyramid
<
pyramid_type
>
>
>&
detectors
,
const
image_type
&
img
,
std
::
vector
<
rect_detection
>&
dets
,
const
double
adjust_threshold
=
0
)
{
typedef
scan_fhog_pyramid
<
pyramid_type
>
scanner_type
;
dets
.
clear
();
if
(
detectors
.
size
()
==
0
)
return
;
const
int
cell_size
=
detectors
[
0
].
get_scanner
().
get_cell_size
();
// Find the maximum sized filters and also most extreme pyramiding settings used.
unsigned
long
max_filter_width
=
0
;
unsigned
long
max_filter_height
=
0
;
unsigned
long
min_pyramid_layer_width
=
std
::
numeric_limits
<
unsigned
long
>::
max
();
unsigned
long
min_pyramid_layer_height
=
std
::
numeric_limits
<
unsigned
long
>::
max
();
unsigned
long
max_pyramid_levels
=
0
;
bool
all_cell_sizes_the_same
=
true
;
for
(
unsigned
long
i
=
0
;
i
<
detectors
.
size
();
++
i
)
{
const
scanner_type
&
scanner
=
detectors
[
i
].
get_scanner
();
max_filter_width
=
std
::
max
(
max_filter_width
,
scanner
.
get_fhog_window_width
());
max_filter_height
=
std
::
max
(
max_filter_height
,
scanner
.
get_fhog_window_height
());
max_pyramid_levels
=
std
::
max
(
max_pyramid_levels
,
scanner
.
get_max_pyramid_levels
());
min_pyramid_layer_width
=
std
::
min
(
min_pyramid_layer_width
,
scanner
.
get_min_pyramid_layer_width
());
min_pyramid_layer_height
=
std
::
min
(
min_pyramid_layer_height
,
scanner
.
get_min_pyramid_layer_height
());
if
(
cell_size
!=
scanner
.
get_cell_size
())
all_cell_sizes_the_same
=
false
;
}
std
::
vector
<
rect_detection
>
dets_accum
;
// Do to the HOG feature extraction to make the fhog pyramid. Again, note that we
// are making a pyramid that will work with any of the detectors. But only if all
// the cell sizes are the same. If they aren't then we have to calculate the
// pyramid for each detector individually.
array
<
array
<
array2d
<
float
>
>
>
feats
;
if
(
all_cell_sizes_the_same
)
{
impl
::
create_fhog_pyramid
<
pyramid_type
>
(
img
,
detectors
[
0
].
get_scanner
().
get_feature_extractor
(),
feats
,
cell_size
,
max_filter_height
,
max_filter_width
,
min_pyramid_layer_width
,
min_pyramid_layer_height
,
max_pyramid_levels
);
}
std
::
vector
<
std
::
pair
<
double
,
rectangle
>
>
temp_dets
;
for
(
unsigned
long
i
=
0
;
i
<
detectors
.
size
();
++
i
)
{
const
scanner_type
&
scanner
=
detectors
[
i
].
get_scanner
();
if
(
!
all_cell_sizes_the_same
)
{
impl
::
create_fhog_pyramid
<
pyramid_type
>
(
img
,
scanner
.
get_feature_extractor
(),
feats
,
scanner
.
get_cell_size
(),
max_filter_height
,
max_filter_width
,
min_pyramid_layer_width
,
min_pyramid_layer_height
,
max_pyramid_levels
);
}
const
unsigned
long
det_box_width
=
scanner
.
get_fhog_window_width
()
-
2
*
scanner
.
get_padding
();
const
unsigned
long
det_box_height
=
scanner
.
get_fhog_window_height
()
-
2
*
scanner
.
get_padding
();
// A single detector object might itself have multiple weight vectors in it. So
// we need to evaluate all of them.
for
(
unsigned
d
=
0
;
d
<
detectors
[
i
].
num_detectors
();
++
d
)
{
const
double
thresh
=
detectors
[
i
].
get_processed_w
(
d
).
w
(
scanner
.
get_num_dimensions
());
impl
::
detect_from_fhog_pyramid
<
pyramid_type
>
(
feats
,
scanner
.
get_feature_extractor
(),
detectors
[
i
].
get_processed_w
(
d
).
get_detect_argument
(),
thresh
,
det_box_height
,
det_box_width
,
cell_size
,
max_filter_height
,
max_filter_width
,
temp_dets
);
for
(
unsigned
long
j
=
0
;
j
<
temp_dets
.
size
();
++
j
)
{
rect_detection
temp
;
temp
.
detection_confidence
=
temp_dets
[
j
].
first
-
thresh
;
temp
.
weight_index
=
i
;
temp
.
rect
=
temp_dets
[
j
].
second
;
dets_accum
.
push_back
(
temp
);
}
}
}
// Do non-max suppression
dets
.
clear
();
if
(
detectors
.
size
()
>
1
)
std
::
sort
(
dets_accum
.
rbegin
(),
dets_accum
.
rend
());
for
(
unsigned
long
i
=
0
;
i
<
dets_accum
.
size
();
++
i
)
{
const
test_box_overlap
tester
=
detectors
[
dets_accum
[
i
].
weight_index
].
get_overlap_tester
();
if
(
impl
::
overlaps_any_box
(
tester
,
dets
,
dets_accum
[
i
]))
continue
;
dets
.
push_back
(
dets_accum
[
i
]);
}
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
,
typename
image_type
>
std
::
vector
<
rectangle
>
evaluate_detectors
(
const
std
::
vector
<
object_detector
<
scan_fhog_pyramid
<
Pyramid_type
>
>
>&
detectors
,
const
image_type
&
img
,
const
double
adjust_threshold
=
0
)
{
std
::
vector
<
rectangle
>
out_dets
;
std
::
vector
<
rect_detection
>
dets
;
evaluate_detectors
(
detectors
,
img
,
dets
,
adjust_threshold
);
out_dets
.
reserve
(
dets
.
size
());
for
(
unsigned
long
i
=
0
;
i
<
dets
.
size
();
++
i
)
out_dets
.
push_back
(
dets
[
i
].
rect
);
return
out_dets
;
}
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
}
}
...
...
dlib/image_processing/scan_fhog_pyramid_abstract.h
View file @
4f275bd7
...
@@ -693,6 +693,70 @@ namespace dlib
...
@@ -693,6 +693,70 @@ namespace dlib
provides deserialization support
provides deserialization support
!*/
!*/
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
template
<
typename
pyramid_type
,
typename
image_type
>
void
evaluate_detectors
(
const
std
::
vector
<
object_detector
<
scan_fhog_pyramid
<
pyramid_type
>>>&
detectors
,
const
image_type
&
img
,
std
::
vector
<
rect_detection
>&
dets
,
const
double
adjust_threshold
=
0
);
/*!
ensures
- This function runs each of the provided object_detector objects over img and
stores the resulting detections into #dets. Importantly, this function is
faster than running each detector individually because it computes the HOG
features only once and then reuses them for each detector. However, it is
important to note that this speedup is only possible if all the detectors use
the same cell_size parameter that determines how HOG features are computed.
If different cell_size values are used then this function will not be any
faster than running the detectors individually.
- This function applies non-max suppression to the outputs from all detectors
and therefore none of the outputs will overlap with each other.
- To be precise, this function performs object detection on the given image and
stores the detected objects into #dets. In particular, we will have that:
- #dets is sorted such that the highest confidence detections come first.
E.g. element 0 is the best detection, element 1 the next best, and so on.
- #dets.size() == the number of detected objects.
- #dets[i].detection_confidence == The strength of the i-th detection.
Larger values indicate that the detector is more confident that #dets[i]
is a correct detection rather than being a false alarm. Moreover, the
detection_confidence is equal to the detection value output by the
scanner minus the threshold value stored at the end of the weight vector.
- #dets[i].rect == the bounding box for the i-th detection.
- The detection #dets[i].rect was produced by detectors[#dets[i].weight_index].
- The detection threshold is adjusted by having adjust_threshold added to it.
Therefore, an adjust_threshold value > 0 makes detecting objects harder while
a negative value makes it easier. Moreover, the following will be true for
all valid i:
- #dets[i].detection_confidence >= adjust_threshold
This means that, for example, you can obtain the maximum possible number of
detections by setting adjust_threshold equal to negative infinity.
!*/
// ----------------------------------------------------------------------------------------
template
<
typename
pyramid_type
,
typename
image_type
>
std
::
vector
<
rectangle
>
evaluate_detectors
(
const
std
::
vector
<
object_detector
<
scan_fhog_pyramid
<
pyramid_type
>>>&
detectors
,
const
image_type
&
img
,
const
double
adjust_threshold
=
0
);
/*!
ensures
- This function just calls the above evaluate_detectors() routine and copies
the output dets into a vector<rectangle> object and returns it. Therefore,
this function is provided for convenience.
!*/
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment