Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
D
dlib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
钟尚武
dlib
Commits
b0795c96
Commit
b0795c96
authored
Mar 07, 2014
by
Davis King
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Gave scan_fhog_pyramid an optional template argument that lets you
define a custom version of HOG feature extraction.
parent
882e5ec1
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
346 additions
and
103 deletions
+346
-103
scan_fhog_pyramid.h
dlib/image_processing/scan_fhog_pyramid.h
+195
-84
scan_fhog_pyramid_abstract.h
dlib/image_processing/scan_fhog_pyramid_abstract.h
+151
-19
No files found.
dlib/image_processing/scan_fhog_pyramid.h
View file @
b0795c96
...
...
@@ -13,10 +13,60 @@
namespace
dlib
{
// ----------------------------------------------------------------------------------------
class
default_fhog_feature_extractor
{
public
:
inline
rectangle
image_to_feats
(
const
rectangle
&
rect
,
int
cell_size
,
int
filter_rows_padding
,
int
filter_cols_padding
)
const
{
return
image_to_fhog
(
rect
,
cell_size
,
filter_rows_padding
,
filter_cols_padding
);
}
inline
rectangle
feats_to_image
(
const
rectangle
&
rect
,
int
cell_size
,
int
filter_rows_padding
,
int
filter_cols_padding
)
const
{
return
fhog_to_image
(
rect
,
cell_size
,
filter_rows_padding
,
filter_cols_padding
);
}
template
<
typename
image_type
>
void
operator
()(
const
image_type
&
img
,
dlib
::
array
<
array2d
<
float
>
>&
hog
,
int
cell_size
,
int
filter_rows_padding
,
int
filter_cols_padding
)
const
{
extract_fhog_features
(
img
,
hog
,
cell_size
,
filter_rows_padding
,
filter_cols_padding
);
}
inline
long
get_num_planes
(
)
const
{
return
31
;
}
};
inline
void
serialize
(
const
default_fhog_feature_extractor
&
,
std
::
ostream
&
)
{}
inline
void
deserialize
(
default_fhog_feature_extractor
&
,
std
::
istream
&
)
{}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
typename
Pyramid_type
,
typename
Feature_extractor_type
=
default_fhog_feature_extractor
>
class
scan_fhog_pyramid
:
noncopyable
{
...
...
@@ -26,10 +76,15 @@ namespace dlib
typedef
matrix
<
double
,
0
,
1
>
feature_vector_type
;
typedef
Pyramid_type
pyramid_type
;
typedef
Feature_extractor_type
feature_extractor_type
;
scan_fhog_pyramid
(
);
explicit
scan_fhog_pyramid
(
const
feature_extractor_type
&
fe_
);
template
<
typename
image_type
>
...
...
@@ -106,6 +161,9 @@ namespace dlib
unsigned
long
get_max_pyramid_levels
(
)
const
;
const
feature_extractor_type
&
get_feature_extractor
(
)
const
{
return
fe
;
}
void
set_max_pyramid_levels
(
unsigned
long
max_levels
);
...
...
@@ -187,9 +245,9 @@ namespace dlib
);
fhog_filterbank
temp
;
temp
.
filters
.
resize
(
31
);
temp
.
row_filters
.
resize
(
31
);
temp
.
col_filters
.
resize
(
31
);
temp
.
filters
.
resize
(
fe
.
get_num_planes
()
);
temp
.
row_filters
.
resize
(
fe
.
get_num_planes
()
);
temp
.
col_filters
.
resize
(
fe
.
get_num_planes
()
);
// load filters from w
unsigned
long
width
,
height
;
...
...
@@ -279,15 +337,15 @@ namespace dlib
return
height
;
}
template
<
typename
T
>
template
<
typename
T
,
typename
U
>
friend
void
serialize
(
const
scan_fhog_pyramid
<
T
>&
item
,
const
scan_fhog_pyramid
<
T
,
U
>&
item
,
std
::
ostream
&
out
);
template
<
typename
T
>
template
<
typename
T
,
typename
U
>
friend
void
deserialize
(
scan_fhog_pyramid
<
T
>&
item
,
scan_fhog_pyramid
<
T
,
U
>&
item
,
std
::
istream
&
in
);
...
...
@@ -297,7 +355,8 @@ namespace dlib
unsigned
long
&
height
)
const
{
const
rectangle
temp
=
grow_rect
(
image_to_fhog
(
centered_rect
(
point
(
0
,
0
),
window_width
,
window_height
),
cell_size
),
padding
);
const
rectangle
rect
=
centered_rect
(
point
(
0
,
0
),
window_width
,
window_height
);
const
rectangle
temp
=
grow_rect
(
fe
.
image_to_feats
(
rect
,
cell_size
,
1
,
1
),
padding
);
width
=
temp
.
width
();
height
=
temp
.
height
();
}
...
...
@@ -336,6 +395,7 @@ namespace dlib
array2d
<
float
>&
saliency_image
);
feature_extractor_type
fe
;
array
<
fhog_image
>
feats
;
int
cell_size
;
unsigned
long
padding
;
...
...
@@ -346,12 +406,24 @@ namespace dlib
unsigned
long
min_pyramid_layer_height
;
double
nuclear_norm_regularization_strength
;
void
init
()
{
cell_size
=
8
;
padding
=
1
;
window_width
=
64
;
window_height
=
64
;
max_pyramid_levels
=
1000
;
min_pyramid_layer_width
=
64
;
min_pyramid_layer_height
=
64
;
nuclear_norm_regularization_strength
=
0
;
}
};
// ----------------------------------------------------------------------------------------
template
<
typename
T
>
rectangle
scan_fhog_pyramid
<
T
>::
template
<
typename
T
,
typename
U
>
rectangle
scan_fhog_pyramid
<
T
,
U
>::
apply_filters_to_fhog
(
const
fhog_filterbank
&
w
,
const
fhog_image
&
feats
,
...
...
@@ -361,7 +433,7 @@ namespace dlib
const
unsigned
long
num_separable_filters
=
w
.
num_separable_filters
();
rectangle
area
;
// use the separable filters if they would be faster than running the regular filters.
if
(
num_separable_filters
>
31
*
std
::
min
(
w
.
filters
[
0
].
nr
(),
w
.
filters
[
0
].
nc
())
/
3
.
0
)
if
(
num_separable_filters
>
w
.
filters
.
size
()
*
std
::
min
(
w
.
filters
[
0
].
nr
(),
w
.
filters
[
0
].
nc
())
/
3
.
0
)
{
area
=
spatially_filter_image
(
feats
[
0
],
saliency_image
,
w
.
filters
[
0
]);
for
(
unsigned
long
i
=
1
;
i
<
w
.
filters
.
size
();
++
i
)
...
...
@@ -402,14 +474,15 @@ namespace dlib
// ----------------------------------------------------------------------------------------
template
<
typename
T
>
template
<
typename
T
,
typename
U
>
void
serialize
(
const
scan_fhog_pyramid
<
T
>&
item
,
const
scan_fhog_pyramid
<
T
,
U
>&
item
,
std
::
ostream
&
out
)
{
int
version
=
1
;
serialize
(
version
,
out
);
serialize
(
item
.
fe
,
out
);
serialize
(
item
.
feats
,
out
);
serialize
(
item
.
cell_size
,
out
);
serialize
(
item
.
padding
,
out
);
...
...
@@ -424,9 +497,9 @@ namespace dlib
// ----------------------------------------------------------------------------------------
template
<
typename
T
>
template
<
typename
T
,
typename
U
>
void
deserialize
(
scan_fhog_pyramid
<
T
>&
item
,
scan_fhog_pyramid
<
T
,
U
>&
item
,
std
::
istream
&
in
)
{
...
...
@@ -435,6 +508,7 @@ namespace dlib
if
(
version
!=
1
)
throw
serialization_error
(
"Unsupported version found when deserializing a scan_fhog_pyramid object."
);
deserialize
(
item
.
fe
,
in
);
deserialize
(
item
.
feats
,
in
);
deserialize
(
item
.
cell_size
,
in
);
deserialize
(
item
.
padding
,
in
);
...
...
@@ -462,31 +536,41 @@ namespace dlib
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
typename
Pyramid_type
,
typename
feature_extractor_type
>
scan_fhog_pyramid
<
Pyramid_type
>::
scan_fhog_pyramid
<
Pyramid_type
,
feature_extractor_type
>::
scan_fhog_pyramid
(
)
:
cell_size
(
8
),
padding
(
1
),
window_width
(
64
),
window_height
(
64
),
max_pyramid_levels
(
1000
),
min_pyramid_layer_width
(
64
),
min_pyramid_layer_height
(
64
),
nuclear_norm_regularization_strength
(
0
)
)
{
init
();
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
typename
Pyramid_type
,
typename
feature_extractor_type
>
scan_fhog_pyramid
<
Pyramid_type
,
feature_extractor_type
>::
scan_fhog_pyramid
(
const
feature_extractor_type
&
fe_
)
{
init
();
fe
=
fe_
;
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
,
typename
feature_extractor_type
>
template
<
typename
image_type
>
void
scan_fhog_pyramid
<
Pyramid_type
>::
void
scan_fhog_pyramid
<
Pyramid_type
,
feature_extractor_type
>::
load
(
const
image_type
&
img
)
...
...
@@ -514,18 +598,22 @@ namespace dlib
typedef
typename
image_type
::
mem_manager_type
mem_manager_type
;
// build our feature pyramid
extract_fhog_features
(
img
,
feats
[
0
],
cell_size
,
height
,
width
);
fe
(
img
,
feats
[
0
],
cell_size
,
height
,
width
);
DLIB_ASSERT
(
feats
[
0
].
size
()
==
fe
.
get_num_planes
(),
"Invalid feature extractor used with dlib::scan_fhog_pyramid. The output does not have the
\n
"
"indicated number of planes."
);
if
(
feats
.
size
()
>
1
)
{
array2d
<
pixel_type
,
mem_manager_type
>
temp1
,
temp2
;
pyr
(
img
,
temp1
);
extract_fhog_features
(
temp1
,
feats
[
1
],
cell_size
,
height
,
width
);
fe
(
temp1
,
feats
[
1
],
cell_size
,
height
,
width
);
swap
(
temp1
,
temp2
);
for
(
unsigned
long
i
=
2
;
i
<
feats
.
size
();
++
i
)
{
pyr
(
temp2
,
temp1
);
extract_fhog_features
(
temp1
,
feats
[
i
],
cell_size
,
height
,
width
);
fe
(
temp1
,
feats
[
i
],
cell_size
,
height
,
width
);
swap
(
temp1
,
temp2
);
}
}
...
...
@@ -534,9 +622,10 @@ namespace dlib
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
typename
Pyramid_type
,
typename
feature_extractor_type
>
bool
scan_fhog_pyramid
<
Pyramid_type
>::
bool
scan_fhog_pyramid
<
Pyramid_type
,
feature_extractor_type
>::
is_loaded_with_image
(
)
const
{
...
...
@@ -546,9 +635,10 @@ namespace dlib
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
typename
Pyramid_type
,
typename
feature_extractor_type
>
void
scan_fhog_pyramid
<
Pyramid_type
>::
void
scan_fhog_pyramid
<
Pyramid_type
,
feature_extractor_type
>::
copy_configuration
(
const
scan_fhog_pyramid
&
item
)
...
...
@@ -561,14 +651,16 @@ namespace dlib
min_pyramid_layer_width
=
item
.
min_pyramid_layer_width
;
min_pyramid_layer_height
=
item
.
min_pyramid_layer_height
;
nuclear_norm_regularization_strength
=
item
.
nuclear_norm_regularization_strength
;
fe
=
item
.
fe
;
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
typename
Pyramid_type
,
typename
feature_extractor_type
>
unsigned
long
scan_fhog_pyramid
<
Pyramid_type
>::
unsigned
long
scan_fhog_pyramid
<
Pyramid_type
,
feature_extractor_type
>::
get_num_detection_templates
(
)
const
{
...
...
@@ -578,9 +670,10 @@ namespace dlib
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
typename
Pyramid_type
,
typename
feature_extractor_type
>
unsigned
long
scan_fhog_pyramid
<
Pyramid_type
>::
unsigned
long
scan_fhog_pyramid
<
Pyramid_type
,
feature_extractor_type
>::
get_num_movable_components_per_detection_template
(
)
const
{
...
...
@@ -590,23 +683,25 @@ namespace dlib
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
typename
Pyramid_type
,
typename
feature_extractor_type
>
long
scan_fhog_pyramid
<
Pyramid_type
>::
long
scan_fhog_pyramid
<
Pyramid_type
,
feature_extractor_type
>::
get_num_dimensions
(
)
const
{
unsigned
long
width
,
height
;
compute_fhog_window_size
(
width
,
height
);
return
width
*
height
*
31
;
return
width
*
height
*
fe
.
get_num_planes
()
;
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
typename
Pyramid_type
,
typename
feature_extractor_type
>
unsigned
long
scan_fhog_pyramid
<
Pyramid_type
>::
unsigned
long
scan_fhog_pyramid
<
Pyramid_type
,
feature_extractor_type
>::
get_max_pyramid_levels
(
)
const
{
...
...
@@ -616,9 +711,10 @@ namespace dlib
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
typename
Pyramid_type
,
typename
feature_extractor_type
>
void
scan_fhog_pyramid
<
Pyramid_type
>::
void
scan_fhog_pyramid
<
Pyramid_type
,
feature_extractor_type
>::
set_max_pyramid_levels
(
unsigned
long
max_levels
)
...
...
@@ -637,9 +733,10 @@ namespace dlib
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
typename
Pyramid_type
,
typename
feature_extractor_type
>
void
scan_fhog_pyramid
<
Pyramid_type
>::
void
scan_fhog_pyramid
<
Pyramid_type
,
feature_extractor_type
>::
detect
(
const
fhog_filterbank
&
w
,
std
::
vector
<
std
::
pair
<
double
,
rectangle
>
>&
dets
,
...
...
@@ -678,7 +775,7 @@ namespace dlib
// if we found a detection
if
(
saliency_image
[
r
][
c
]
>=
thresh
)
{
rectangle
rect
=
f
hog
_to_image
(
centered_rect
(
point
(
c
,
r
),
width
-
2
*
padding
,
height
-
2
*
padding
),
cell_size
,
height
,
width
);
rectangle
rect
=
f
e
.
feats
_to_image
(
centered_rect
(
point
(
c
,
r
),
width
-
2
*
padding
,
height
-
2
*
padding
),
cell_size
,
height
,
width
);
rect
=
pyr
.
rect_up
(
rect
,
l
);
dets
.
push_back
(
std
::
make_pair
(
saliency_image
[
r
][
c
],
rect
));
}
...
...
@@ -692,9 +789,10 @@ namespace dlib
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
typename
Pyramid_type
,
typename
feature_extractor_type
>
const
rectangle
scan_fhog_pyramid
<
Pyramid_type
>::
const
rectangle
scan_fhog_pyramid
<
Pyramid_type
,
feature_extractor_type
>::
get_best_matching_rect
(
const
rectangle
&
rect
)
const
...
...
@@ -708,9 +806,10 @@ namespace dlib
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
typename
Pyramid_type
,
typename
feature_extractor_type
>
void
scan_fhog_pyramid
<
Pyramid_type
>::
void
scan_fhog_pyramid
<
Pyramid_type
,
feature_extractor_type
>::
get_mapped_rect_and_metadata
(
const
unsigned
long
number_pyramid_levels
,
const
rectangle
&
rect
,
...
...
@@ -731,9 +830,9 @@ namespace dlib
// window.
for
(
unsigned
long
l
=
0
;
l
<
number_pyramid_levels
;
++
l
)
{
const
rectangle
rect_fhog_space
=
image_to_fhog
(
pyr
.
rect_down
(
rect
,
l
),
cell_size
,
height
,
width
);
const
rectangle
rect_fhog_space
=
fe
.
image_to_feats
(
pyr
.
rect_down
(
rect
,
l
),
cell_size
,
height
,
width
);
const
rectangle
win_image_space
=
pyr
.
rect_up
(
f
hog
_to_image
(
centered_rect
(
center
(
rect_fhog_space
),
width
-
2
*
padding
,
height
-
2
*
padding
),
cell_size
,
height
,
width
),
l
);
const
rectangle
win_image_space
=
pyr
.
rect_up
(
f
e
.
feats
_to_image
(
centered_rect
(
center
(
rect_fhog_space
),
width
-
2
*
padding
,
height
-
2
*
padding
),
cell_size
,
height
,
width
),
l
);
const
double
match_score
=
get_match_score
(
win_image_space
,
rect
);
if
(
match_score
>
best_match_score
)
...
...
@@ -746,15 +845,16 @@ namespace dlib
if
(
rect_fhog_space
.
area
()
<=
1
)
break
;
}
mapped_rect
=
pyr
.
rect_up
(
f
hog
_to_image
(
shrink_rect
(
fhog_rect
,
padding
),
cell_size
,
height
,
width
),
best_level
);
mapped_rect
=
pyr
.
rect_up
(
f
e
.
feats
_to_image
(
shrink_rect
(
fhog_rect
,
padding
),
cell_size
,
height
,
width
),
best_level
);
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
typename
Pyramid_type
,
typename
feature_extractor_type
>
full_object_detection
scan_fhog_pyramid
<
Pyramid_type
>::
full_object_detection
scan_fhog_pyramid
<
Pyramid_type
,
feature_extractor_type
>::
get_full_object_detection
(
const
rectangle
&
rect
,
const
feature_vector_type
&
...
...
@@ -766,9 +866,10 @@ namespace dlib
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
typename
Pyramid_type
,
typename
feature_extractor_type
>
void
scan_fhog_pyramid
<
Pyramid_type
>::
void
scan_fhog_pyramid
<
Pyramid_type
,
feature_extractor_type
>::
get_feature_vector
(
const
full_object_detection
&
obj
,
feature_vector_type
&
psi
...
...
@@ -814,9 +915,10 @@ namespace dlib
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
typename
Pyramid_type
,
typename
feature_extractor_type
>
void
scan_fhog_pyramid
<
Pyramid_type
>::
void
scan_fhog_pyramid
<
Pyramid_type
,
feature_extractor_type
>::
set_min_pyramid_layer_size
(
unsigned
long
width
,
unsigned
long
height
...
...
@@ -838,9 +940,10 @@ namespace dlib
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
typename
Pyramid_type
,
typename
feature_extractor_type
>
unsigned
long
scan_fhog_pyramid
<
Pyramid_type
>::
unsigned
long
scan_fhog_pyramid
<
Pyramid_type
,
feature_extractor_type
>::
get_min_pyramid_layer_width
(
)
const
{
...
...
@@ -850,9 +953,10 @@ namespace dlib
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
typename
Pyramid_type
,
typename
feature_extractor_type
>
unsigned
long
scan_fhog_pyramid
<
Pyramid_type
>::
unsigned
long
scan_fhog_pyramid
<
Pyramid_type
,
feature_extractor_type
>::
get_min_pyramid_layer_height
(
)
const
{
...
...
@@ -863,10 +967,11 @@ namespace dlib
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
typename
Pyramid_type
,
typename
feature_extractor_type
>
matrix
<
unsigned
char
>
draw_fhog
(
const
object_detector
<
scan_fhog_pyramid
<
Pyramid_type
>
>&
detector
,
const
object_detector
<
scan_fhog_pyramid
<
Pyramid_type
,
feature_extractor_type
>
>&
detector
,
const
unsigned
long
weight_index
=
0
,
const
long
cell_draw_size
=
15
)
...
...
@@ -887,17 +992,18 @@ namespace dlib
<<
"
\n\t
detector.get_scanner().get_num_dimensions(): "
<<
detector
.
get_scanner
().
get_num_dimensions
()
);
typename
scan_fhog_pyramid
<
Pyramid_type
>::
fhog_filterbank
fb
=
detector
.
get_scanner
().
build_fhog_filterbank
(
detector
.
get_w
(
weight_index
));
typename
scan_fhog_pyramid
<
Pyramid_type
,
feature_extractor_type
>::
fhog_filterbank
fb
=
detector
.
get_scanner
().
build_fhog_filterbank
(
detector
.
get_w
(
weight_index
));
return
draw_fhog
(
fb
.
get_filters
(),
cell_draw_size
);
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
typename
Pyramid_type
,
typename
feature_extractor_type
>
unsigned
long
num_separable_filters
(
const
object_detector
<
scan_fhog_pyramid
<
Pyramid_type
>
>&
detector
,
const
object_detector
<
scan_fhog_pyramid
<
Pyramid_type
,
feature_extractor_type
>
>&
detector
,
const
unsigned
long
weight_index
=
0
)
{
...
...
@@ -915,17 +1021,18 @@ namespace dlib
<<
"
\n\t
detector.get_scanner().get_num_dimensions(): "
<<
detector
.
get_scanner
().
get_num_dimensions
()
);
typename
scan_fhog_pyramid
<
Pyramid_type
>::
fhog_filterbank
fb
=
detector
.
get_scanner
().
build_fhog_filterbank
(
detector
.
get_w
(
weight_index
));
typename
scan_fhog_pyramid
<
Pyramid_type
,
feature_extractor_type
>::
fhog_filterbank
fb
=
detector
.
get_scanner
().
build_fhog_filterbank
(
detector
.
get_w
(
weight_index
));
return
fb
.
num_separable_filters
();
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
typename
Pyramid_type
,
typename
feature_extractor_type
>
object_detector
<
scan_fhog_pyramid
<
Pyramid_type
>
>
threshold_filter_singular_values
(
const
object_detector
<
scan_fhog_pyramid
<
Pyramid_type
>
>&
detector
,
object_detector
<
scan_fhog_pyramid
<
Pyramid_type
,
feature_extractor_type
>
>
threshold_filter_singular_values
(
const
object_detector
<
scan_fhog_pyramid
<
Pyramid_type
,
feature_extractor_type
>
>&
detector
,
double
thresh
,
const
unsigned
long
weight_index
=
0
)
...
...
@@ -953,6 +1060,7 @@ namespace dlib
const
unsigned
long
width
=
detector
.
get_scanner
().
get_fhog_window_width
();
const
unsigned
long
height
=
detector
.
get_scanner
().
get_fhog_window_height
();
const
long
num_planes
=
detector
.
get_scanner
().
get_feature_extractor
().
get_num_planes
();
const
long
size
=
width
*
height
;
std
::
vector
<
matrix
<
double
,
0
,
1
>
>
detector_weights
;
...
...
@@ -963,7 +1071,7 @@ namespace dlib
if
(
j
==
weight_index
)
{
matrix
<
double
>
u
,
v
,
w
,
f
;
for
(
int
i
=
0
;
i
<
31
;
++
i
)
for
(
long
i
=
0
;
i
<
num_planes
;
++
i
)
{
f
=
reshape
(
rowm
(
weights
,
range
(
i
*
size
,
(
i
+
1
)
*
size
-
1
)),
height
,
width
);
...
...
@@ -978,7 +1086,7 @@ namespace dlib
detector_weights
.
push_back
(
weights
);
}
return
object_detector
<
scan_fhog_pyramid
<
Pyramid_type
>
>
(
detector
.
get_scanner
(),
return
object_detector
<
scan_fhog_pyramid
<
Pyramid_type
,
feature_extractor_type
>
>
(
detector
.
get_scanner
(),
detector
.
get_overlap_tester
(),
detector_weights
);
}
...
...
@@ -987,19 +1095,21 @@ namespace dlib
template
<
typename
Pyramid_type
,
typename
feature_extractor_type
,
typename
svm_struct_prob_type
>
void
configure_nuclear_norm_regularizer
(
const
scan_fhog_pyramid
<
Pyramid_type
>&
scanner
,
const
scan_fhog_pyramid
<
Pyramid_type
,
feature_extractor_type
>&
scanner
,
svm_struct_prob_type
&
prob
)
{
const
double
strength
=
scanner
.
get_nuclear_norm_regularization_strength
();
const
long
num_planes
=
scanner
.
get_feature_extractor
().
get_num_planes
();
if
(
strength
!=
0
)
{
const
unsigned
long
width
=
scanner
.
get_fhog_window_width
();
const
unsigned
long
height
=
scanner
.
get_fhog_window_height
();
for
(
int
i
=
0
;
i
<
31
;
++
i
)
for
(
long
i
=
0
;
i
<
num_planes
;
++
i
)
{
prob
.
add_nuclear_norm_regularizer
(
i
*
width
*
height
,
height
,
width
,
strength
);
}
...
...
@@ -1010,17 +1120,18 @@ namespace dlib
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
typename
Pyramid_type
,
typename
feature_extractor_type
>
struct
processed_weight_vector
<
scan_fhog_pyramid
<
Pyramid_type
>
>
struct
processed_weight_vector
<
scan_fhog_pyramid
<
Pyramid_type
,
feature_extractor_type
>
>
{
processed_weight_vector
(){}
typedef
matrix
<
double
,
0
,
1
>
feature_vector_type
;
typedef
typename
scan_fhog_pyramid
<
Pyramid_type
>::
fhog_filterbank
fhog_filterbank
;
typedef
typename
scan_fhog_pyramid
<
Pyramid_type
,
feature_extractor_type
>::
fhog_filterbank
fhog_filterbank
;
void
init
(
const
scan_fhog_pyramid
<
Pyramid_type
>&
scanner
const
scan_fhog_pyramid
<
Pyramid_type
,
feature_extractor_type
>&
scanner
)
{
fb
=
scanner
.
build_fhog_filterbank
(
w
);
...
...
dlib/image_processing/scan_fhog_pyramid_abstract.h
View file @
b0795c96
...
...
@@ -13,10 +13,11 @@ namespace dlib
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
typename
Pyramid_type
,
typename
feature_extractor_type
>
matrix
<
unsigned
char
>
draw_fhog
(
const
object_detector
<
scan_fhog_pyramid
<
Pyramid_type
>
>&
detector
,
const
object_detector
<
scan_fhog_pyramid
<
Pyramid_type
,
feature_extractor_type
>
>&
detector
,
const
unsigned
long
weight_index
=
0
,
const
long
cell_draw_size
=
15
);
...
...
@@ -37,10 +38,11 @@ namespace dlib
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
typename
Pyramid_type
,
typename
feature_extractor_type
>
unsigned
long
num_separable_filters
(
const
object_detector
<
scan_fhog_pyramid
<
Pyramid_type
>
>&
detector
,
const
object_detector
<
scan_fhog_pyramid
<
Pyramid_type
,
feature_extractor_type
>
>&
detector
,
const
unsigned
long
weight_index
=
0
);
/*!
...
...
@@ -57,10 +59,11 @@ namespace dlib
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
typename
Pyramid_type
,
typename
feature_extractor_type
>
object_detector
<
scan_fhog_pyramid
<
Pyramid_type
>
>
threshold_filter_singular_values
(
const
object_detector
<
scan_fhog_pyramid
<
Pyramid_type
>
>&
detector
,
object_detector
<
scan_fhog_pyramid
<
Pyramid_type
,
feature_extractor_type
>
>
threshold_filter_singular_values
(
const
object_detector
<
scan_fhog_pyramid
<
Pyramid_type
,
feature_extractor_type
>
>&
detector
,
double
thresh
,
const
unsigned
long
weight_index
=
0
);
...
...
@@ -84,8 +87,115 @@ namespace dlib
// ----------------------------------------------------------------------------------------
class
default_fhog_feature_extractor
{
/*!
WHAT THIS OBJECT REPRESENTS
The scan_fhog_pyramid object defined below is primarily meant to be used
with the feature extraction technique implemented by extract_fhog_features().
This technique can generally be understood as taking an input image and
outputting a multi-planed output image of floating point numbers that
somehow describe the image contents. Since there are many ways to define
how this feature mapping is performed, the scan_fhog_pyramid allows you to
replace the extract_fhog_features() method with a customized method of your
choosing. To do this you implement a class with the same interface as
default_fhog_feature_extractor.
Therefore, the point of default_fhog_feature_extractor is two fold. First,
it provides the default FHOG feature extraction method used by scan_fhog_pyramid.
Second, it serves to document the interface you need to implement to define
your own custom HOG style feature extraction.
!*/
public
:
rectangle
image_to_feats
(
const
rectangle
&
rect
,
int
cell_size
,
int
filter_rows_padding
,
int
filter_cols_padding
)
const
{
return
image_to_fhog
(
rect
,
cell_size
,
filter_rows_padding
,
filter_cols_padding
);
}
/*!
requires
- cell_size > 0
- filter_rows_padding > 0
- filter_cols_padding > 0
ensures
- Maps a rectangle from the coordinates in an input image to the corresponding
area in the output feature image.
!*/
rectangle
feats_to_image
(
const
rectangle
&
rect
,
int
cell_size
,
int
filter_rows_padding
,
int
filter_cols_padding
)
const
{
return
fhog_to_image
(
rect
,
cell_size
,
filter_rows_padding
,
filter_cols_padding
);
}
/*!
requires
- cell_size > 0
- filter_rows_padding > 0
- filter_cols_padding > 0
ensures
- Maps a rectangle from the coordinates of the hog feature image back to
the input image.
- Mapping from feature space to image space is an invertible
transformation. That is, for any rectangle R we have:
R == image_to_feats(feats_to_image(R,cell_size,filter_rows_padding,filter_cols_padding),
cell_size,filter_rows_padding,filter_cols_padding).
!*/
template
<
typename
Pyramid_type
typename
image_type
>
void
operator
()(
const
image_type
&
img
,
dlib
::
array
<
array2d
<
float
>
>&
hog
,
int
cell_size
,
int
filter_rows_padding
,
int
filter_cols_padding
)
const
{
extract_fhog_features
(
img
,
hog
,
cell_size
,
filter_rows_padding
,
filter_cols_padding
);
}
/*!
requires
- image_type == is an implementation of array2d/array2d_kernel_abstract.h
- img contains some kind of pixel type.
(i.e. pixel_traits<typename image_type::type> is defined)
ensures
- Extracts FHOG features by calling extract_fhog_features(). The results are
stored into #hog. Note that if you are implementing your own feature extractor you can
pretty much do whatever you want in terms of feature extraction so long as the following
conditions are met:
- #hog.size() == get_num_planes()
- Each image plane in of #hog has the same dimensions.
- for all valid i, r, and c:
- #hog[i][r][c] == a feature value describing the image content centered at the
following pixel location in img:
feats_to_image(point(c,r),cell_size,filter_rows_padding,filter_cols_padding)
!*/
inline
long
get_num_planes
(
)
const
{
return
31
;
}
/*!
ensures
- returns the number of planes in the hog image output by the operator()
method.
!*/
};
inline
void
serialize
(
const
default_fhog_feature_extractor
&
,
std
::
ostream
&
)
{}
inline
void
deserialize
(
default_fhog_feature_extractor
&
,
std
::
istream
&
)
{}
/*!
Provides serialization support. Note that there is no state in the default hog
feature extractor so these functions do nothing. But if you define a custom
feature extractor then make sure you remember to serialize any state in your
feature extractor.
!*/
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
,
typename
Feature_extractor_type
=
default_fhog_feature_extractor
>
class
scan_fhog_pyramid
:
noncopyable
{
...
...
@@ -95,6 +205,10 @@ namespace dlib
dlib/image_transforms/image_pyramid_abstract.h or an object with a
compatible interface
REQUIREMENTS ON Feature_extractor_type
- Must be a type with an interface compatible with the
default_fhog_feature_extractor.
INITIAL VALUE
- get_padding() == 1
- get_cell_size() == 8
...
...
@@ -129,6 +243,7 @@ namespace dlib
public
:
typedef
matrix
<
double
,
0
,
1
>
feature_vector_type
;
typedef
Pyramid_type
pyramid_type
;
typedef
Feature_extractor_type
feature_extractor_type
;
scan_fhog_pyramid
(
);
...
...
@@ -137,6 +252,15 @@ namespace dlib
- this object is properly initialized
!*/
scan_fhog_pyramid
(
const
feature_extractor_type
&
fe
);
/*!
ensures
- this object is properly initialized
- #get_feature_extractor() == fe
!*/
template
<
typename
image_type
>
...
...
@@ -154,6 +278,13 @@ namespace dlib
locations. Call detect() to do this.
!*/
const
feature_extractor_type
&
get_feature_extractor
(
)
const
;
/*!
ensures
- returns a const reference to the feature extractor used by this object.
!*/
bool
is_loaded_with_image
(
)
const
;
/*!
...
...
@@ -197,7 +328,8 @@ namespace dlib
- Since we use a HOG feature representation, the detection procedure works
as follows:
Step 1. Make an image pyramid.
Step 2. Convert each layer of the image pyramid into a 31 band HOG "image".
Step 2. Convert each layer of the image pyramid into a multi-planed HOG "image".
(the number of bands is given by get_feature_extractor().get_num_planes())
Step 3. Scan a linear classifier over each HOG image in the pyramid.
Moreover, the HOG features quantize the input image into a grid of cells,
each cell being get_cell_size() by get_cell_size() pixels in size. So
...
...
@@ -289,9 +421,9 @@ namespace dlib
)
const
;
/*!
ensures
-
get_fhog_window_width()*get_fhog_window_height()*31
(i.e. The number of features is equal to the size of the HOG window
t
imes 31 since there are 31 channels in the HOG feature representation.
)
-
returns get_fhog_window_width()*get_fhog_window_height()*get_feature_extractor().get_num_planes()
(i.e. The number of features is equal to the size of the HOG window
times
t
he number of planes output by the feature extractor.
)
!*/
inline
unsigned
long
get_num_detection_templates
(
...
...
@@ -375,7 +507,7 @@ namespace dlib
ensures
- Creates and then returns a fhog_filterbank object FB such that:
- FB.get_num_dimensions() == get_num_dimensions()
- FB.get_filters() == the values in weights unpacked into
31
filters.
- FB.get_filters() == the values in weights unpacked into
get_feature_extractor().get_num_planes()
filters.
- FB.num_separable_filters() == the number of separable filters necessary to
represent all the filters in FB.get_filters().
!*/
...
...
@@ -384,10 +516,10 @@ namespace dlib
{
/*!
WHAT THIS OBJECT REPRESENTS
This object represents a HOG filter bank. That is, the classifier that
is slid over a HOG pyramid is a set of 31 linear filters, each
get_fhog_window_width() rows by get_fhog_window_height() columns in
size. This object contains that set of 31
filters.
This object represents a HOG filter bank. That is, the classifier that
is
slid over a HOG pyramid is a set of get_feature_extractor().get_num_planes()
linear filters, each get_fhog_window_width() rows by get_fhog_window_height()
columns in size. This object contains that set of
filters.
!*/
public
:
...
...
@@ -402,7 +534,7 @@ namespace dlib
)
const
;
/*!
ensures
- returns the set of
31
HOG filters in this object.
- returns the set of HOG filters in this object.
!*/
unsigned
long
num_separable_filters
(
...
...
@@ -510,7 +642,7 @@ namespace dlib
ensures
- If the number of separable filters in a fhog_filterbank is small then the
filter bank can be scanned over an image much faster than a normal set of
31
filters. Therefore, this object provides the option to encourage
filters. Therefore, this object provides the option to encourage
machine learning methods that learn a HOG filter bank (i.e.
structural_object_detection_trainer) to select filter banks that have
this beneficial property. In particular, the value returned by
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment