Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
D
dlib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
钟尚武
dlib
Commits
6401e693
Commit
6401e693
authored
Nov 11, 2013
by
Davis King
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added scan_fhog_pyramid.
parent
dc4cc092
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
1395 additions
and
0 deletions
+1395
-0
image_processing.h
dlib/image_processing.h
+1
-0
scan_fhog_pyramid.h
dlib/image_processing/scan_fhog_pyramid.h
+866
-0
scan_fhog_pyramid_abstract.h
dlib/image_processing/scan_fhog_pyramid_abstract.h
+528
-0
No files found.
dlib/image_processing.h
View file @
6401e693
...
...
@@ -13,6 +13,7 @@
#include "image_processing/scan_image_boxes.h"
#include "image_processing/scan_image_custom.h"
#include "image_processing/remove_unobtainable_rectangles.h"
#include "image_processing/scan_fhog_pyramid.h"
#endif // DLIB_IMAGE_PROCESSInG_H___
...
...
dlib/image_processing/scan_fhog_pyramid.h
0 → 100644
View file @
6401e693
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_SCAN_fHOG_PYRAMID_H__
#define DLIB_SCAN_fHOG_PYRAMID_H__
#include "scan_fhog_pyramid_abstract.h"
#include "../matrix.h"
#include "../image_transforms.h"
#include "../array.h"
#include "../array2d.h"
#include "object_detector.h"
namespace
dlib
{
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
>
class
scan_fhog_pyramid
:
noncopyable
{
public
:
typedef
matrix
<
double
,
0
,
1
>
feature_vector_type
;
typedef
Pyramid_type
pyramid_type
;
scan_fhog_pyramid
(
);
template
<
typename
image_type
>
void
load
(
const
image_type
&
img
);
inline
bool
is_loaded_with_image
(
)
const
;
inline
void
copy_configuration
(
const
scan_fhog_pyramid
&
item
);
void
set_detection_window_size
(
unsigned
long
width
,
unsigned
long
height
)
{
window_width
=
width
;
window_height
=
height
;
}
inline
unsigned
long
get_detection_window_width
(
)
const
{
return
window_width
;
}
inline
unsigned
long
get_detection_window_height
(
)
const
{
return
window_height
;
}
inline
unsigned
long
get_num_detection_templates
(
)
const
;
inline
unsigned
long
get_num_movable_components_per_detection_template
(
)
const
;
void
set_padding
(
unsigned
long
new_padding
)
{
padding
=
new_padding
;
}
unsigned
long
get_padding
(
)
const
{
return
padding
;
}
void
set_cell_size
(
unsigned
long
new_cell_size
)
{
// make sure requires clause is not broken
DLIB_ASSERT
(
new_cell_size
>
0
,
"
\t
void scan_fhog_pyramid::set_cell_size()"
<<
"
\n\t
You can't have zero sized fHOG cells. "
<<
"
\n\t
this: "
<<
this
);
cell_size
=
new_cell_size
;
}
unsigned
long
get_cell_size
(
)
const
{
return
cell_size
;
}
inline
long
get_num_dimensions
(
)
const
;
unsigned
long
get_max_pyramid_levels
(
)
const
;
void
set_max_pyramid_levels
(
unsigned
long
max_levels
);
void
set_min_pyramid_layer_size
(
unsigned
long
width
,
unsigned
long
height
);
inline
unsigned
long
get_min_pyramid_layer_width
(
)
const
;
inline
unsigned
long
get_min_pyramid_layer_height
(
)
const
;
void
detect
(
const
feature_vector_type
&
w
,
std
::
vector
<
std
::
pair
<
double
,
rectangle
>
>&
dets
,
const
double
thresh
)
const
{
fhog_filterbank
temp
=
build_fhog_filterbank
(
w
);
detect
(
temp
,
dets
,
thresh
);
}
class
fhog_filterbank
{
friend
class
scan_fhog_pyramid
;
public
:
inline
unsigned
long
get_num_dimensions
()
const
{
unsigned
long
dims
=
0
;
for
(
unsigned
long
i
=
0
;
i
<
filters
.
size
();
++
i
)
{
dims
+=
filters
[
i
].
size
();
}
return
dims
;
}
const
std
::
vector
<
matrix
<
float
>
>&
get_filters
()
const
{
return
filters
;}
unsigned
long
num_separable_filters
()
const
{
unsigned
long
num
=
0
;
for
(
unsigned
long
i
=
0
;
i
<
row_filters
.
size
();
++
i
)
{
num
+=
row_filters
[
i
].
size
();
}
return
num
;
}
private
:
std
::
vector
<
matrix
<
float
>
>
filters
;
std
::
vector
<
std
::
vector
<
matrix
<
float
,
0
,
1
>
>
>
row_filters
,
col_filters
;
};
fhog_filterbank
build_fhog_filterbank
(
const
feature_vector_type
&
weights
)
const
{
fhog_filterbank
temp
;
temp
.
filters
.
resize
(
31
);
temp
.
row_filters
.
resize
(
31
);
temp
.
col_filters
.
resize
(
31
);
// load filters from w
unsigned
long
width
,
height
;
compute_fhog_window_size
(
width
,
height
);
const
long
size
=
width
*
height
;
for
(
unsigned
long
i
=
0
;
i
<
temp
.
filters
.
size
();
++
i
)
{
matrix
<
double
>
u
,
v
,
w
,
f
;
f
=
reshape
(
rowm
(
weights
,
range
(
i
*
size
,
(
i
+
1
)
*
size
-
1
)),
height
,
width
);
temp
.
filters
[
i
]
=
matrix_cast
<
float
>
(
f
);
svd3
(
f
,
u
,
w
,
v
);
matrix
<
double
>
w2
=
w
;
rsort_columns
(
u
,
w
);
rsort_columns
(
v
,
w2
);
double
thresh
=
std
::
max
(
1e-3
,
max
(
w
)
*
0
.
01
);
w
=
round_zeros
(
w
,
thresh
);
for
(
long
j
=
0
;
j
<
w
.
size
();
++
j
)
{
if
(
w
(
j
)
!=
0
)
{
temp
.
col_filters
[
i
].
push_back
(
matrix_cast
<
float
>
(
colm
(
u
,
j
)
*
std
::
sqrt
(
w
(
j
))));
temp
.
row_filters
[
i
].
push_back
(
matrix_cast
<
float
>
(
colm
(
v
,
j
)
*
std
::
sqrt
(
w
(
j
))));
}
}
}
return
temp
;
}
void
detect
(
const
fhog_filterbank
&
w
,
std
::
vector
<
std
::
pair
<
double
,
rectangle
>
>&
dets
,
const
double
thresh
)
const
;
void
get_feature_vector
(
const
full_object_detection
&
obj
,
feature_vector_type
&
psi
)
const
;
full_object_detection
get_full_object_detection
(
const
rectangle
&
rect
,
const
feature_vector_type
&
w
)
const
;
const
rectangle
get_best_matching_rect
(
const
rectangle
&
rect
)
const
;
double
get_nuclear_norm_regularization_strength
(
)
const
{
return
nuclear_norm_regularization_strength
;
}
void
set_nuclear_norm_regularization_strength
(
double
strength
)
/*!
requires
- strength >= 0
ensures
- #get_nuclear_norm_regularization_strength() == strength
!*/
{
nuclear_norm_regularization_strength
=
strength
;
}
unsigned
long
get_fhog_window_width
(
)
const
{
unsigned
long
width
,
height
;
compute_fhog_window_size
(
width
,
height
);
return
width
;
}
unsigned
long
get_fhog_window_height
(
)
const
{
unsigned
long
width
,
height
;
compute_fhog_window_size
(
width
,
height
);
return
height
;
}
template
<
typename
T
>
friend
void
serialize
(
const
scan_fhog_pyramid
<
T
>&
item
,
std
::
ostream
&
out
);
template
<
typename
T
>
friend
void
deserialize
(
scan_fhog_pyramid
<
T
>&
item
,
std
::
istream
&
in
);
private
:
inline
void
compute_fhog_window_size
(
unsigned
long
&
width
,
unsigned
long
&
height
)
const
{
const
rectangle
temp
=
grow_rect
(
image_to_fhog
(
centered_rect
(
point
(
0
,
0
),
window_width
,
window_height
),
cell_size
),
padding
);
width
=
temp
.
width
();
height
=
temp
.
height
();
}
static
bool
compare_pair_rect
(
const
std
::
pair
<
double
,
rectangle
>&
a
,
const
std
::
pair
<
double
,
rectangle
>&
b
)
{
return
a
.
first
<
b
.
first
;
}
void
get_mapped_rect_and_metadata
(
const
unsigned
long
number_pyramid_levels
,
const
rectangle
&
rect
,
rectangle
&
mapped_rect
,
rectangle
&
fhog_rect
,
unsigned
long
&
best_level
)
const
;
double
get_match_score
(
rectangle
r1
,
rectangle
r2
)
const
{
// make the rectangles overlap as much as possible before computing the match score.
r1
=
move_rect
(
r1
,
r2
.
tl_corner
());
return
(
r1
.
intersect
(
r2
).
area
())
/
(
double
)(
r1
+
r2
).
area
();
}
typedef
array
<
array2d
<
float
>
>
fhog_image
;
array
<
fhog_image
>
feats
;
int
cell_size
;
unsigned
long
padding
;
unsigned
long
window_width
;
unsigned
long
window_height
;
unsigned
long
max_pyramid_levels
;
unsigned
long
min_pyramid_layer_width
;
unsigned
long
min_pyramid_layer_height
;
double
nuclear_norm_regularization_strength
;
};
// ----------------------------------------------------------------------------------------
template
<
typename
T
>
void
serialize
(
const
scan_fhog_pyramid
<
T
>&
item
,
std
::
ostream
&
out
)
{
int
version
=
1
;
serialize
(
version
,
out
);
serialize
(
item
.
feats
,
out
);
serialize
(
item
.
cell_size
,
out
);
serialize
(
item
.
padding
,
out
);
serialize
(
item
.
window_width
,
out
);
serialize
(
item
.
window_height
,
out
);
serialize
(
item
.
max_pyramid_levels
,
out
);
serialize
(
item
.
min_pyramid_layer_width
,
out
);
serialize
(
item
.
min_pyramid_layer_height
,
out
);
serialize
(
item
.
nuclear_norm_regularization_strength
,
out
);
serialize
(
item
.
get_num_dimensions
(),
out
);
}
// ----------------------------------------------------------------------------------------
template
<
typename
T
>
void
deserialize
(
scan_fhog_pyramid
<
T
>&
item
,
std
::
istream
&
in
)
{
int
version
=
0
;
deserialize
(
version
,
in
);
if
(
version
!=
1
)
throw
serialization_error
(
"Unsupported version found when deserializing a scan_fhog_pyramid object."
);
deserialize
(
item
.
feats
,
in
);
deserialize
(
item
.
cell_size
,
in
);
deserialize
(
item
.
padding
,
in
);
deserialize
(
item
.
window_width
,
in
);
deserialize
(
item
.
window_height
,
in
);
deserialize
(
item
.
max_pyramid_levels
,
in
);
deserialize
(
item
.
min_pyramid_layer_width
,
in
);
deserialize
(
item
.
min_pyramid_layer_height
,
in
);
deserialize
(
item
.
nuclear_norm_regularization_strength
,
in
);
// When developing some feature extractor, it's easy to accidentally change its
// number of dimensions and then try to deserialize data from an older version of
// your extractor into the current code. This check is here to catch that kind of
// user error.
long
dims
;
deserialize
(
dims
,
in
);
if
(
item
.
get_num_dimensions
()
!=
dims
)
throw
serialization_error
(
"Number of dimensions in serialized scan_fhog_pyramid doesn't match the expected number."
);
}
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// scan_fhog_pyramid member functions
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
>
scan_fhog_pyramid
<
Pyramid_type
>::
scan_fhog_pyramid
(
)
:
cell_size
(
8
),
padding
(
1
),
window_width
(
64
),
window_height
(
64
),
max_pyramid_levels
(
1000
),
min_pyramid_layer_width
(
64
),
min_pyramid_layer_height
(
64
),
nuclear_norm_regularization_strength
(
0
)
{
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
>
template
<
typename
image_type
>
void
scan_fhog_pyramid
<
Pyramid_type
>::
load
(
const
image_type
&
img
)
{
unsigned
long
levels
=
0
;
rectangle
rect
=
get_rect
(
img
);
// figure out how many pyramid levels we should be using based on the image size
pyramid_type
pyr
;
do
{
rect
=
pyr
.
rect_down
(
rect
);
++
levels
;
}
while
(
rect
.
width
()
>=
min_pyramid_layer_width
&&
rect
.
height
()
>=
min_pyramid_layer_height
&&
levels
<
max_pyramid_levels
);
if
(
feats
.
max_size
()
<
levels
)
feats
.
set_max_size
(
levels
);
feats
.
set_size
(
levels
);
unsigned
long
width
,
height
;
compute_fhog_window_size
(
width
,
height
);
// build our feature pyramid
extract_fhog_features
(
img
,
feats
[
0
],
cell_size
,
height
,
width
);
if
(
feats
.
size
()
>
1
)
{
image_type
temp1
,
temp2
;
pyr
(
img
,
temp1
);
extract_fhog_features
(
temp1
,
feats
[
1
],
cell_size
,
height
,
width
);
swap
(
temp1
,
temp2
);
for
(
unsigned
long
i
=
2
;
i
<
feats
.
size
();
++
i
)
{
pyr
(
temp2
,
temp1
);
extract_fhog_features
(
temp1
,
feats
[
i
],
cell_size
,
height
,
width
);
swap
(
temp1
,
temp2
);
}
}
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
>
bool
scan_fhog_pyramid
<
Pyramid_type
>::
is_loaded_with_image
(
)
const
{
return
feats
.
size
()
!=
0
;
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
>
void
scan_fhog_pyramid
<
Pyramid_type
>::
copy_configuration
(
const
scan_fhog_pyramid
&
item
)
{
cell_size
=
item
.
cell_size
;
padding
=
item
.
padding
;
window_width
=
item
.
window_width
;
window_height
=
item
.
window_height
;
max_pyramid_levels
=
item
.
max_pyramid_levels
;
min_pyramid_layer_width
=
item
.
min_pyramid_layer_width
;
min_pyramid_layer_height
=
item
.
min_pyramid_layer_height
;
nuclear_norm_regularization_strength
=
item
.
nuclear_norm_regularization_strength
;
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
>
unsigned
long
scan_fhog_pyramid
<
Pyramid_type
>::
get_num_detection_templates
(
)
const
{
return
1
;
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
>
unsigned
long
scan_fhog_pyramid
<
Pyramid_type
>::
get_num_movable_components_per_detection_template
(
)
const
{
return
0
;
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
>
long
scan_fhog_pyramid
<
Pyramid_type
>::
get_num_dimensions
(
)
const
{
unsigned
long
width
,
height
;
compute_fhog_window_size
(
width
,
height
);
return
width
*
height
*
31
;
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
>
unsigned
long
scan_fhog_pyramid
<
Pyramid_type
>::
get_max_pyramid_levels
(
)
const
{
return
max_pyramid_levels
;
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
>
void
scan_fhog_pyramid
<
Pyramid_type
>::
set_max_pyramid_levels
(
unsigned
long
max_levels
)
{
// make sure requires clause is not broken
DLIB_ASSERT
(
max_levels
>
0
,
"
\t
void scan_fhog_pyramid::set_max_pyramid_levels()"
<<
"
\n\t
You can't have zero levels. "
<<
"
\n\t
max_levels: "
<<
max_levels
<<
"
\n\t
this: "
<<
this
);
max_pyramid_levels
=
max_levels
;
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
>
void
scan_fhog_pyramid
<
Pyramid_type
>::
detect
(
const
fhog_filterbank
&
w
,
std
::
vector
<
std
::
pair
<
double
,
rectangle
>
>&
dets
,
const
double
thresh
)
const
{
// make sure requires clause is not broken
DLIB_ASSERT
(
is_loaded_with_image
()
&&
w
.
get_num_dimensions
()
==
get_num_dimensions
(),
"
\t
void scan_fhog_pyramid::detect()"
<<
"
\n\t
Invalid inputs were given to this function "
<<
"
\n\t
is_loaded_with_image(): "
<<
is_loaded_with_image
()
<<
"
\n\t
w.get_num_dimensions(): "
<<
w
.
get_num_dimensions
()
<<
"
\n\t
get_num_dimensions(): "
<<
get_num_dimensions
()
<<
"
\n\t
this: "
<<
this
);
dets
.
clear
();
unsigned
long
width
,
height
;
compute_fhog_window_size
(
width
,
height
);
const
point
anchor
((
width
+
1
)
%
2
,
(
height
+
1
)
%
2
);
array2d
<
float
>
saliency_image
;
array2d
<
float
>
temp
;
pyramid_type
pyr
;
const
unsigned
long
num_separable_filters
=
w
.
num_separable_filters
();
// for all pyramid levels
for
(
unsigned
long
l
=
0
;
l
<
feats
.
size
();
++
l
)
{
rectangle
area
;
if
(
num_separable_filters
>
62
)
{
area
=
spatially_filter_image
(
feats
[
l
][
0
],
saliency_image
,
w
.
filters
[
0
]);
for
(
unsigned
long
i
=
1
;
i
<
w
.
filters
.
size
();
++
i
)
{
// now we filter but the output adds to saliency_image rather than
// overwriting it.
spatially_filter_image
(
feats
[
l
][
i
],
saliency_image
,
w
.
filters
[
i
],
1
,
false
,
true
);
}
}
else
{
saliency_image
.
clear
();
// find the first filter to apply
unsigned
long
i
=
0
;
while
(
i
<
w
.
row_filters
.
size
()
&&
w
.
row_filters
[
i
].
size
()
==
0
)
++
i
;
for
(;
i
<
w
.
row_filters
.
size
();
++
i
)
{
for
(
unsigned
long
j
=
0
;
j
<
w
.
row_filters
[
i
].
size
();
++
j
)
{
if
(
saliency_image
.
size
()
==
0
)
area
=
spatially_filter_image_separable
(
feats
[
l
][
i
],
saliency_image
,
w
.
row_filters
[
i
][
j
],
w
.
col_filters
[
i
][
j
],
1
,
false
,
false
);
else
area
=
spatially_filter_image_separable
(
feats
[
l
][
i
],
saliency_image
,
w
.
row_filters
[
i
][
j
],
w
.
col_filters
[
i
][
j
],
1
,
false
,
true
);
}
}
if
(
saliency_image
.
size
()
==
0
)
{
saliency_image
.
set_size
(
feats
[
l
][
0
].
nr
(),
feats
[
l
][
0
].
nc
());
assign_all_pixels
(
saliency_image
,
0
);
}
}
// now search the saliency image for any detections
for
(
long
r
=
area
.
top
();
r
<=
area
.
bottom
();
++
r
)
{
for
(
long
c
=
area
.
left
();
c
<=
area
.
right
();
++
c
)
{
// if we found a detection
if
(
saliency_image
[
r
][
c
]
>=
thresh
)
{
rectangle
rect
=
fhog_to_image
(
centered_rect
(
point
(
c
,
r
)
+
anchor
,
width
-
2
*
padding
,
height
-
2
*
padding
),
cell_size
,
height
,
width
);
rect
=
pyr
.
rect_up
(
rect
,
l
);
dets
.
push_back
(
std
::
make_pair
(
saliency_image
[
r
][
c
],
rect
));
}
}
}
}
std
::
sort
(
dets
.
rbegin
(),
dets
.
rend
(),
compare_pair_rect
);
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
>
const
rectangle
scan_fhog_pyramid
<
Pyramid_type
>::
get_best_matching_rect
(
const
rectangle
&
rect
)
const
{
rectangle
mapped_rect
,
fhog_rect
;
unsigned
long
best_level
;
get_mapped_rect_and_metadata
(
max_pyramid_levels
,
rect
,
mapped_rect
,
fhog_rect
,
best_level
);
return
mapped_rect
;
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
>
void
scan_fhog_pyramid
<
Pyramid_type
>::
get_mapped_rect_and_metadata
(
const
unsigned
long
number_pyramid_levels
,
const
rectangle
&
rect
,
rectangle
&
mapped_rect
,
rectangle
&
fhog_rect
,
unsigned
long
&
best_level
)
const
{
pyramid_type
pyr
;
best_level
=
0
;
double
best_match_score
=
-
1
;
unsigned
long
width
,
height
;
compute_fhog_window_size
(
width
,
height
);
// Figure out the pyramid level which best matches rect against our detection
// window.
for
(
unsigned
long
l
=
0
;
l
<
number_pyramid_levels
;
++
l
)
{
const
rectangle
rect_fhog_space
=
image_to_fhog
(
pyr
.
rect_down
(
rect
,
l
),
cell_size
,
height
,
width
);
const
rectangle
win_image_space
=
pyr
.
rect_up
(
fhog_to_image
(
centered_rect
(
center
(
rect_fhog_space
),
width
-
2
*
padding
,
height
-
2
*
padding
),
cell_size
,
height
,
width
),
l
);
const
double
match_score
=
get_match_score
(
win_image_space
,
rect
);
if
(
match_score
>
best_match_score
)
{
best_match_score
=
match_score
;
best_level
=
l
;
fhog_rect
=
centered_rect
(
center
(
rect_fhog_space
),
width
,
height
);
}
if
(
rect_fhog_space
.
area
()
<=
1
)
break
;
}
mapped_rect
=
pyr
.
rect_up
(
fhog_to_image
(
shrink_rect
(
fhog_rect
,
padding
),
cell_size
,
height
,
width
),
best_level
);
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
>
full_object_detection
scan_fhog_pyramid
<
Pyramid_type
>::
get_full_object_detection
(
const
rectangle
&
rect
,
const
feature_vector_type
&
)
const
{
return
full_object_detection
(
rect
);
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
>
void
scan_fhog_pyramid
<
Pyramid_type
>::
get_feature_vector
(
const
full_object_detection
&
obj
,
feature_vector_type
&
psi
)
const
{
// make sure requires clause is not broken
DLIB_ASSERT
(
is_loaded_with_image
()
&&
psi
.
size
()
>=
get_num_dimensions
()
&&
obj
.
num_parts
()
==
0
,
"
\t
void scan_fhog_pyramid::get_feature_vector()"
<<
"
\n\t
Invalid inputs were given to this function "
<<
"
\n\t
is_loaded_with_image(): "
<<
is_loaded_with_image
()
<<
"
\n\t
psi.size(): "
<<
psi
.
size
()
<<
"
\n\t
get_num_dimensions(): "
<<
get_num_dimensions
()
<<
"
\n\t
obj.num_parts(): "
<<
obj
.
num_parts
()
<<
"
\n\t
this: "
<<
this
);
rectangle
mapped_rect
;
unsigned
long
best_level
;
rectangle
fhog_rect
;
get_mapped_rect_and_metadata
(
feats
.
size
(),
obj
.
get_rect
(),
mapped_rect
,
fhog_rect
,
best_level
);
long
i
=
0
;
for
(
unsigned
long
ii
=
0
;
ii
<
feats
[
best_level
].
size
();
++
ii
)
{
const
rectangle
rect
=
get_rect
(
feats
[
best_level
][
0
]);
for
(
long
r
=
fhog_rect
.
top
();
r
<=
fhog_rect
.
bottom
();
++
r
)
{
for
(
long
c
=
fhog_rect
.
left
();
c
<=
fhog_rect
.
right
();
++
c
)
{
if
(
rect
.
contains
(
c
,
r
))
psi
(
i
)
+=
feats
[
best_level
][
ii
][
r
][
c
];
++
i
;
}
}
}
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
>
void
scan_fhog_pyramid
<
Pyramid_type
>::
set_min_pyramid_layer_size
(
unsigned
long
width
,
unsigned
long
height
)
{
// make sure requires clause is not broken
DLIB_ASSERT
(
width
>
0
&&
height
>
0
,
"
\t
void scan_fhog_pyramid::set_min_pyramid_layer_size()"
<<
"
\n\t
These sizes can't be zero. "
<<
"
\n\t
width: "
<<
width
<<
"
\n\t
height: "
<<
height
<<
"
\n\t
this: "
<<
this
);
min_pyramid_layer_width
=
width
;
min_pyramid_layer_height
=
height
;
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
>
unsigned
long
scan_fhog_pyramid
<
Pyramid_type
>::
get_min_pyramid_layer_width
(
)
const
{
return
min_pyramid_layer_width
;
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
>
unsigned
long
scan_fhog_pyramid
<
Pyramid_type
>::
get_min_pyramid_layer_height
(
)
const
{
return
min_pyramid_layer_height
;
}
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
>
matrix
<
unsigned
char
>
draw_fhog
(
const
object_detector
<
scan_fhog_pyramid
<
Pyramid_type
>
>&
detector
,
const
long
cell_draw_size
=
15
)
{
typename
scan_fhog_pyramid
<
Pyramid_type
>::
fhog_filterbank
fb
=
detector
.
get_scanner
().
build_fhog_filterbank
(
detector
.
get_w
());
return
draw_fhog
(
fb
.
get_filters
(),
cell_draw_size
);
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
>
unsigned
long
num_separable_filters
(
const
object_detector
<
scan_fhog_pyramid
<
Pyramid_type
>
>&
detector
)
{
typename
scan_fhog_pyramid
<
Pyramid_type
>::
fhog_filterbank
fb
=
detector
.
get_scanner
().
build_fhog_filterbank
(
detector
.
get_w
());
return
fb
.
num_separable_filters
();
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
,
typename
svm_struct_prob_type
>
void
configure_nuclear_norm_regularizer
(
const
scan_fhog_pyramid
<
Pyramid_type
>&
scanner
,
svm_struct_prob_type
&
prob
)
{
const
double
strength
=
scanner
.
get_nuclear_norm_regularization_strength
();
if
(
strength
!=
0
)
{
const
unsigned
long
width
=
scanner
.
get_fhog_window_width
();
const
unsigned
long
height
=
scanner
.
get_fhog_window_height
();
for
(
int
i
=
0
;
i
<
31
;
++
i
)
{
prob
.
add_nuclear_norm_regularizer
(
i
*
width
*
height
,
height
,
width
,
strength
);
}
prob
.
set_cache_based_epsilon
(
0
.
001
);
}
}
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_SCAN_fHOG_PYRAMID_H__
dlib/image_processing/scan_fhog_pyramid_abstract.h
0 → 100644
View file @
6401e693
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_SCAN_fHOG_PYRAMID_ABSTRACT_H__
#ifdef DLIB_SCAN_fHOG_PYRAMID_ABSTRACT_H__
#include <vector>
#include "../image_transforms/fhog_abstract.h"
#include "object_detector_abstract.h"
namespace
dlib
{
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
>
class
scan_fhog_pyramid
:
noncopyable
{
/*!
REQUIREMENTS ON Pyramid_type
- Must be one of the pyramid_down objects defined in
dlib/image_transforms/image_pyramid_abstract.h or an object with a
compatible interface
INITIAL VALUE
WHAT THIS OBJECT REPRESENTS
This object is a tool for running a fixed sized sliding window classifier
over an image pyramid. In particular, it slides a linear classifier over
a HOG pyramid as discussed in the paper:
Histograms of Oriented Gradients for Human Detection by Navneet Dalal
and Bill Triggs, CVPR 2005
However, we augment the method slightly to use the version of HOG features
from:
Object Detection with Discriminatively Trained Part Based Models by
P. Felzenszwalb, R. Girshick, D. McAllester, D. Ramanan
IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. 32, No. 9, Sep. 2010
Since these HOG features have been shown to give superior performance.
THREAD SAFETY
Concurrent access to an instance of this object is not safe and should be
protected by a mutex lock except for the case where you are copying the
configuration (via copy_configuration()) of a scan_fhog_pyramid object to
many other threads. In this case, it is safe to copy the configuration of
a shared object so long as no other operations are performed on it.
!*/
public
:
typedef
matrix
<
double
,
0
,
1
>
feature_vector_type
;
typedef
Pyramid_type
pyramid_type
;
scan_fhog_pyramid
(
);
/*!
ensures
- this object is properly initialized
!*/
template
<
typename
image_type
>
void
load
(
const
image_type
&
img
);
/*!
requires
- image_type == is an implementation of array2d/array2d_kernel_abstract.h
- img contains some kind of pixel type.
(i.e. pixel_traits<typename image_type::type> is defined)
ensures
- #is_loaded_with_image() == true
- This object is ready to run a classifier over img to detect object
locations. Call detect() to do this.
!*/
bool
is_loaded_with_image
(
)
const
;
/*!
ensures
- returns true if this object has been loaded with an image to process and
false otherwise.
!*/
void
copy_configuration
(
const
scan_fhog_pyramid
&
item
);
/*!
ensures
- Copies all the state information of item into *this, except for state
information populated by load(). More precisely, given two scan_fhog_pyramid
objects S1 and S2, the following sequence of instructions should always
result in both of them having the exact same state:
S2.copy_configuration(S1);
S1.load(img);
S2.load(img);
!*/
void
set_detection_window_size
(
unsigned
long
window_width
,
unsigned
long
window_height
);
/*!
requires
- window_width > 0
- window_height > 0
ensures
- When detect() is called, this object scans a window that is of the given
width and height (in pixels) over each layer in an image pyramid. This
means that the rectangle detections which come out of detect() will have
a width to height ratio approximately equal to window_width/window_height
and will be approximately window_width*window_height pixels in area or
larger. Therefore, the smallest object that can be detected is roughly
window_width by window_height pixels in size.
- #get_detection_window_width() == window_width
- #get_detection_window_height() == window_height
- Since we use a HOG feature representation, the detection procedure works
as follows:
Step 1. Make an image pyramid.
Step 2. Convert each layer of the image pyramid into a 31 band HOG "image".
Step 3. Scan a linear classifier over each HOG image in the pyramid.
Moreover, the HOG features quantize the input image into a grid of cells,
each cell being get_cell_size() by get_cell_size() pixels in size. So
when we scan the object detector over the pyramid we are scanning an
appropriately sized window over these smaller quantized HOG features. In
particular, the size of the window we scan over the HOG feature pyramid
is #get_fhog_window_width() by #get_fhog_window_height() HOG cells in
size.
!*/
unsigned
long
get_detection_window_width
(
)
const
;
/*!
ensures
- returns the width, in pixels, of the detection window that is scanned
over the image when detect() is called.
!*/
inline
unsigned
long
get_detection_window_height
(
)
const
;
/*!
ensures
- returns the height, in pixels, of the detection window that is scanned
over the image when detect() is called.
!*/
unsigned
long
get_fhog_window_width
(
)
const
;
/*!
ensures
- Returns the width of the HOG scanning window in terms of HOG cell blocks.
Note that this is a function of get_detection_window_width(), get_cell_size(),
and get_padding() and is therefore not something you set directly.
- #get_fhog_window_width() is approximately equal to the number of HOG cells
that fit into get_detection_window_width() pixels plus 2*get_padding()
since we include additional padding around each window to add context.
!*/
unsigned
long
get_fhog_window_height
(
)
const
;
/*!
ensures
- Returns the height of the HOG scanning window in terms of HOG cell blocks.
Note that this is a function of get_detection_window_height(), get_cell_size(),
and get_padding() and is therefore not something you set directly.
- #get_fhog_window_height() is approximately equal to the number of HOG cells
that fit into get_detection_window_height() pixels plus 2*get_padding()
since we include additional padding around each window to add context.
!*/
void
set_padding
(
unsigned
long
new_padding
);
/*!
ensures
- #get_padding() == new_padding
!*/
unsigned
long
get_padding
(
)
const
;
/*!
ensures
- The HOG windows scanned over the HOG pyramid can include additional HOG
cells outside the detection window. This can help add context and
improve detection accuracy. This function returns the number of extra
HOG cells added onto the border of the HOG windows which are scanned by
detect().
!*/
unsigned
long
get_cell_size
(
)
const
;
/*!
ensures
- Returns the size of the HOG cells. Each HOG cell is square and contains
get_cell_size()*get_cell_size() pixels.
!*/
void
set_cell_size
(
unsigned
long
new_cell_size
);
/*!
requires
- new_cell_size > 0
ensures
- #get_cell_size() == new_cell_size
!*/
inline
long
get_num_dimensions
(
)
const
;
/*!
ensures
- get_fhog_window_width()*get_fhog_window_height()*31
(i.e. The number of features is equal to the size of the HOG window
times 31 since there are 31 channels in the HOG feature representation.)
!*/
inline
unsigned
long
get_num_detection_templates
(
)
const
{
return
1
;
}
/*!
ensures
- returns 1. Note that this function is here only for compatibility with
the scan_image_pyramid object. Notionally, its return value indicates
that a scan_fhog_pyramid object is always ready to detect objects once
an image has been loaded.
!*/
inline
unsigned
long
get_num_movable_components_per_detection_template
(
)
const
{
return
0
;
}
/*!
ensures
- returns 0. Note that this function is here only for compatibility with
the scan_image_pyramid object. Its return value means that this object
does not support using movable part models.
!*/
unsigned
long
get_max_pyramid_levels
(
)
const
;
/*!
ensures
- returns the maximum number of image pyramid levels this object will use.
Note that #get_max_pyramid_levels() == 1 indicates that no image pyramid
will be used at all. That is, only the original image will be processed
and no lower scale versions will be created.
!*/
void
set_max_pyramid_levels
(
unsigned
long
max_levels
);
/*!
requires
- max_levels > 0
ensures
- #get_max_pyramid_levels() == max_levels
!*/
void
set_min_pyramid_layer_size
(
unsigned
long
width
,
unsigned
long
height
);
/*!
requires
- width > 0
- height > 0
ensures
- #get_min_pyramid_layer_width() == width
- #get_min_pyramid_layer_height() == height
!*/
inline
unsigned
long
get_min_pyramid_layer_width
(
)
const
;
/*!
ensures
- returns the smallest allowable width of an image in the image pyramid.
All pyramids will always include the original input image, however, no
pyramid levels will be created which have a width smaller than the
value returned by this function.
!*/
inline
unsigned
long
get_min_pyramid_layer_height
(
)
const
;
/*!
ensures
- returns the smallest allowable height of an image in the image pyramid.
All pyramids will always include the original input image, however, no
pyramid levels will be created which have a height smaller than the
value returned by this function.
!*/
fhog_filterbank
build_fhog_filterbank
(
const
feature_vector_type
&
weights
)
const
;
/*!
requires
- weights.size() >= get_num_dimensions()
ensures
- Creates and then returns a fhog_filterbank object FB such that:
- FB.get_num_dimensions() == get_num_dimensions()
- FB.get_filters() == the values in weights unpacked into 31 filters.
- FB.num_separable_filters() == the number of separable filters necessary to
represent all the filters in FB.get_filters().
!*/
class
fhog_filterbank
{
/*!
WHAT THIS OBJECT REPRESENTS
This object represents a HOG filter bank. That is, the classifier that
is slid over a HOG pyramid is a set of 31 linear filters, each
get_fhog_window_width() rows by get_fhog_window_height() columns in
size. This object contains that set of 31 filters.
!*/
public
:
unsigned
long
get_num_dimensions
(
)
const
;
/*!
ensures
- Returns the total number of values in the filters.
!*/
const
std
::
vector
<
matrix
<
float
>
>&
get_filters
(
)
const
;
/*!
ensures
- returns the set of 31 HOG filters in this object.
!*/
unsigned
long
num_separable_filters
(
)
const
;
/*!
ensures
- returns the number of separable filters necessary to represent all
the filters in get_filters().
!*/
};
void
detect
(
const
fhog_filterbank
&
w
,
std
::
vector
<
std
::
pair
<
double
,
rectangle
>
>&
dets
,
const
double
thresh
)
const
;
/*!
requires
- w.get_num_dimensions() == get_num_dimensions()
- is_loaded_with_image() == true
ensures
- Scans the HOG filter defined by w over the HOG pyramid that was populated
by the last call to load() and stores all object detections into #dets.
- for all valid i:
- #dets[i].second == The object box which produced this detection. This rectangle gives
the location of the detection. Note that the rectangle will have been converted back into
the original image input space. That is, if this detection was made at a low level in the
image pyramid then the object box will have been automatically mapped up the pyramid layers
to the original image space. Or in other words, if you plot #dets[i].second on top of the
image given to load() it will show up in the right place.
- #dets[i].first == The score for this detection. This value is equal to dot(w, feature vector
for this sliding window location).
- #dets[i].first >= thresh
- #dets will be sorted in descending order. (i.e. #dets[i].first >= #dets[j].first for all i, and j>i)
- Elements of w beyond index get_num_dimensions()-1 are ignored. I.e. only the first
get_num_dimensions() are used.
- Note that no form of non-max suppression is performed. If a window has a score >= thresh
then it is reported in #dets.
!*/
void
detect
(
const
feature_vector_type
&
w
,
std
::
vector
<
std
::
pair
<
double
,
rectangle
>
>&
dets
,
const
double
thresh
)
const
;
/*!
requires
- w.get_num_dimensions() >= get_num_dimensions()
- is_loaded_with_image() == true
ensures
- performs: detect(build_fhog_filterbank(w), dets, thresh)
!*/
void
get_feature_vector
(
const
full_object_detection
&
obj
,
feature_vector_type
&
psi
)
const
;
/*!
requires
- obj.num_parts() == 0
- is_loaded_with_image() == true
- psi.size() >= get_num_dimensions()
(i.e. psi must have preallocated its memory before this function is called)
ensures
- This function allows you to determine the feature vector used for an
object detection output from detect(). Note that this vector is
added to psi. Note also that you must use get_full_object_detection() to
convert a rectangle from detect() into the needed full_object_detection.
- The dimensionality of the vector added to psi is get_num_dimensions(). This
means that elements of psi after psi(get_num_dimensions()-1) are not modified.
- Since scan_fhog_pyramid only searches a limited set of object locations,
not all possible rectangles can be output by detect(). So in the case
where obj.get_rect() could not arise from a call to detect(), this
function will map obj.get_rect() to the nearest possible rectangle and
then add the feature vector for the mapped rectangle into #psi.
- get_best_matching_rect(obj.get_rect()) == the rectangle obj.get_rect()
gets mapped to for feature extraction.
!*/
full_object_detection
get_full_object_detection
(
const
rectangle
&
rect
,
const
feature_vector_type
&
w
)
const
;
/*!
ensures
- returns full_object_detection(rect)
(This function is here only for compatibility with the scan_image_pyramid
object)
!*/
const
rectangle
get_best_matching_rect
(
const
rectangle
&
rect
)
const
;
/*!
ensures
- Since scan_fhog_pyramid only searches a limited set of object locations,
not all possible rectangles can be represented. Therefore, this function
allows you to supply a rectangle and obtain the nearest possible
candidate object location rectangle.
!*/
double
get_nuclear_norm_regularization_strength
(
)
const
;
/*!
ensures
- If the number of separable filters in a fhog_filterbank is small then the
filter bank can be scanned over an image much faster than a normal set of
31 filters. Therefore, this object provides the option to encourage
machine learning methods that learn a HOG filter bank (i.e.
structural_object_detection_trainer) to select filter banks that have
this beneficial property. In particular, the value returned by
get_nuclear_norm_regularization_strength() is a multiplier on a nuclear
norm regularizer which will encourage the selection of filters that use a
small number of separable components. Larger values encourage tend to
give a smaller number of separable filters.
- if (get_nuclear_norm_regularization_strength() == 0) then
- This feature is disabled
- else
- A nuclear norm regularizer will be added when
structural_object_detection_trainer is used to learn a HOG filter
bank. Note that this can make the training process take
significantly longer (but can result in faster object detectors).
!*/
void
set_nuclear_norm_regularization_strength
(
double
strength
);
/*!
requires
- strength >= 0
ensures
- #get_nuclear_norm_regularization_strength() == strength
!*/
};
// ----------------------------------------------------------------------------------------
template
<
typename
T
>
void
serialize
(
const
scan_fhog_pyramid
<
T
>&
item
,
std
::
ostream
&
out
);
/*!
provides serialization support
!*/
// ----------------------------------------------------------------------------------------
template
<
typename
T
>
void
deserialize
(
scan_fhog_pyramid
<
T
>&
item
,
std
::
istream
&
in
);
/*!
provides deserialization support
!*/
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
>
matrix
<
unsigned
char
>
draw_fhog
(
const
object_detector
<
scan_fhog_pyramid
<
Pyramid_type
>
>&
detector
,
const
long
cell_draw_size
=
15
);
/*!
requires
- detector.get_w().size() >= detector.get_scanner().get_num_dimensions()
(i.e. the detector must have been populated with a HOG filter)
ensures
- Converts the HOG filters in the given detector into an image suitable for
display on the screen. In particular, we draw all the HOG cells into a
grayscale image in a way that shows the magnitude and orientation of the
gradient energy in each cell. The resulting image is then returned.
!*/
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
>
unsigned
long
num_separable_filters
(
const
object_detector
<
scan_fhog_pyramid
<
Pyramid_type
>
>&
detector
);
/*!
requires
- detector.get_w().size() >= detector.get_scanner().get_num_dimensions()
(i.e. the detector must have been populated with a HOG filter)
ensures
- Returns the number of separable filters necessary to represent the HOG
filters in the given detector.
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_SCAN_fHOG_PYRAMID_ABSTRACT_H__
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment