Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
D
dlib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
钟尚武
dlib
Commits
51c0c148
Commit
51c0c148
authored
Sep 08, 2011
by
Davis King
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added the scan_image_pyramid object.
parent
0aac2844
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
1001 additions
and
0 deletions
+1001
-0
image_processing.h
dlib/image_processing.h
+1
-0
scan_image_pyramid.h
dlib/image_processing/scan_image_pyramid.h
+653
-0
scan_image_pyramid_abstract.h
dlib/image_processing/scan_image_pyramid_abstract.h
+347
-0
No files found.
dlib/image_processing.h
View file @
51c0c148
...
...
@@ -4,6 +4,7 @@
#define DLIB_IMAGE_PROCESSInG_H___
#include "image_processing/scan_image.h"
#include "image_processing/scan_image_pyramid.h"
#endif // DLIB_IMAGE_PROCESSInG_H___
...
...
dlib/image_processing/scan_image_pyramid.h
0 → 100644
View file @
51c0c148
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_SCAN_IMaGE_PYRAMID_H__
#define DLIB_SCAN_IMaGE_PYRAMID_H__
#include "scan_image_pyramid_abstract.h"
#include "../matrix.h"
#include "../geometry.h"
#include "../image_processing.h"
#include "../array2d.h"
#include <vector>
namespace
dlib
{
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
,
typename
Feature_extractor_type
>
class
scan_image_pyramid
:
noncopyable
{
public
:
typedef
matrix
<
double
,
0
,
1
>
feature_vector_type
;
typedef
Pyramid_type
pyramid_type
;
typedef
Feature_extractor_type
feature_extractor_type
;
scan_image_pyramid
(
);
template
<
typename
image_type
>
void
load
(
const
image_type
&
img
);
inline
bool
is_loaded_with_image
(
)
const
;
inline
void
copy_configuration
(
const
feature_extractor_type
&
fe
);
inline
void
copy_configuration
(
const
scan_image_pyramid
&
item
);
void
add_detection_template
(
const
rectangle
&
object_box
,
const
std
::
vector
<
rectangle
>&
feature_extraction_regions
);
inline
unsigned
long
get_num_detection_templates
(
)
const
;
inline
unsigned
long
get_num_components_per_detection_template
(
)
const
;
inline
long
get_num_dimensions
(
)
const
;
unsigned
long
get_max_pyramid_levels
(
)
const
;
void
set_max_pyramid_levels
(
unsigned
long
max_levels
);
inline
unsigned
long
get_max_detections_per_template
(
)
const
;
void
set_max_detections_per_template
(
unsigned
long
max_dets
);
void
detect
(
const
feature_vector_type
&
w
,
std
::
vector
<
std
::
pair
<
double
,
rectangle
>
>&
dets
,
const
double
thresh
)
const
;
void
get_feature_vector
(
const
std
::
vector
<
rectangle
>&
rects
,
feature_vector_type
&
psi
,
std
::
vector
<
rectangle
>&
mapped_rects
)
const
;
template
<
typename
T
,
typename
U
>
friend
void
serialize
(
const
scan_image_pyramid
<
T
,
U
>&
item
,
std
::
ostream
&
out
);
template
<
typename
T
,
typename
U
>
friend
void
deserialize
(
scan_image_pyramid
<
T
,
U
>&
item
,
std
::
istream
&
in
);
private
:
static
bool
compare_pair_rect
(
const
std
::
pair
<
double
,
rectangle
>&
a
,
const
std
::
pair
<
double
,
rectangle
>&
b
)
{
return
a
.
first
<
b
.
first
;
}
struct
detection_template
{
rectangle
object_box
;
// always centered at (0,0)
std
::
vector
<
rectangle
>
rects
;
// template with respect to (0,0)
};
friend
void
serialize
(
const
detection_template
&
item
,
std
::
ostream
&
out
)
{
serialize
(
item
.
object_box
,
out
);
serialize
(
item
.
rects
,
out
);
}
friend
void
deserialize
(
detection_template
&
item
,
std
::
istream
&
in
)
{
deserialize
(
item
.
object_box
,
in
);
deserialize
(
item
.
rects
,
in
);
}
feature_extractor_type
feats_config
;
// just here to hold configuration. use it to populate the feats elements.
typename
array
<
feature_extractor_type
>::
kernel_2a
feats
;
std
::
vector
<
detection_template
>
det_templates
;
unsigned
long
max_dets_per_template
;
unsigned
long
max_pyramid_levels
;
};
// ----------------------------------------------------------------------------------------
template
<
typename
T
,
typename
U
>
void
serialize
(
const
scan_image_pyramid
<
T
,
U
>&
item
,
std
::
ostream
&
out
)
{
serialize
(
item
.
feats_config
,
out
);
serialize
(
item
.
feats
,
out
);
serialize
(
item
.
det_templates
,
out
);
serialize
(
item
.
max_dets_per_template
,
out
);
serialize
(
item
.
max_pyramid_levels
,
out
);
}
// ----------------------------------------------------------------------------------------
template
<
typename
T
,
typename
U
>
void
deserialize
(
scan_image_pyramid
<
T
,
U
>&
item
,
std
::
istream
&
in
)
{
deserialize
(
item
.
feats_config
,
in
);
deserialize
(
item
.
feats
,
in
);
deserialize
(
item
.
det_templates
,
in
);
deserialize
(
item
.
max_dets_per_template
,
in
);
deserialize
(
item
.
max_pyramid_levels
,
in
);
}
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// scan_image_pyramid member functions
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
,
typename
Feature_extractor_type
>
scan_image_pyramid
<
Pyramid_type
,
Feature_extractor_type
>::
scan_image_pyramid
(
)
:
max_dets_per_template
(
2000
),
max_pyramid_levels
(
1000
)
{
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
,
typename
Feature_extractor_type
>
template
<
typename
image_type
>
void
scan_image_pyramid
<
Pyramid_type
,
Feature_extractor_type
>::
load
(
const
image_type
&
img
)
{
int
levels
=
0
;
rectangle
rect
=
get_rect
(
img
);
// figure out how many pyramid levels we should be using based on the image size
pyramid_type
pyr
;
while
(
rect
.
width
()
>
20
&&
rect
.
height
()
>
20
)
{
rect
=
pyr
.
rect_down
(
rect
);
++
levels
;
if
(
levels
>=
max_pyramid_levels
)
break
;
}
std
::
cout
<<
"levels: "
<<
levels
<<
std
::
endl
;
if
(
feats
.
max_size
()
<
levels
)
feats
.
set_max_size
(
levels
);
feats
.
set_size
(
levels
);
for
(
unsigned
long
i
=
0
;
i
<
feats
.
size
();
++
i
)
feats
[
i
].
copy_configuration
(
feats_config
);
// build our feature pyramid
feats
[
0
].
load
(
img
);
if
(
feats
.
size
()
>
1
)
{
image_type
temp1
,
temp2
;
pyr
(
img
,
temp1
);
feats
[
1
].
load
(
temp1
);
swap
(
temp1
,
temp2
);
for
(
unsigned
long
i
=
2
;
i
<
feats
.
size
();
++
i
)
{
pyr
(
temp2
,
temp1
);
feats
[
i
].
load
(
temp1
);
swap
(
temp1
,
temp2
);
}
}
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
,
typename
Feature_extractor_type
>
unsigned
long
scan_image_pyramid
<
Pyramid_type
,
Feature_extractor_type
>::
get_max_detections_per_template
(
)
const
{
return
max_dets_per_template
;
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
,
typename
Feature_extractor_type
>
void
scan_image_pyramid
<
Pyramid_type
,
Feature_extractor_type
>::
set_max_detections_per_template
(
unsigned
long
max_dets
)
{
// make sure requires clause is not broken
DLIB_ASSERT
(
max_dets
>
0
,
"
\t
void scan_image_pyramid::set_max_detections_per_template()"
<<
"
\n\t
The max number of possible detections can't be zero. "
<<
"
\n\t
max_dets: "
<<
max_dets
<<
"
\n\t
this: "
<<
this
);
max_dets_per_template
=
max_dets
;
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
,
typename
Feature_extractor_type
>
bool
scan_image_pyramid
<
Pyramid_type
,
Feature_extractor_type
>::
is_loaded_with_image
(
)
const
{
return
feats
.
size
()
!=
0
;
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
,
typename
Feature_extractor_type
>
void
scan_image_pyramid
<
Pyramid_type
,
Feature_extractor_type
>::
copy_configuration
(
const
feature_extractor_type
&
fe
)
{
return
feats_config
.
copy_configuration
(
fe
);
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
,
typename
Feature_extractor_type
>
void
scan_image_pyramid
<
Pyramid_type
,
Feature_extractor_type
>::
copy_configuration
(
const
scan_image_pyramid
&
item
)
{
feats_config
.
copy_configuration
(
item
.
feats_config
);
det_templates
=
item
.
det_templates
;
max_dets_per_template
=
item
.
max_dets_per_template
;
max_pyramid_levels
=
item
.
max_pyramid_levels
;
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
,
typename
Feature_extractor_type
>
void
scan_image_pyramid
<
Pyramid_type
,
Feature_extractor_type
>::
add_detection_template
(
const
rectangle
&
object_box
,
const
std
::
vector
<
rectangle
>&
feature_extraction_regions
)
{
// make sure requires clause is not broken
DLIB_ASSERT
((
get_num_detection_templates
()
==
0
||
get_num_components_per_detection_template
()
==
feature_extraction_regions
.
size
())
&&
center
(
object_box
)
==
point
(
0
,
0
),
"
\t
void scan_image_pyramid::add_detection_template()"
<<
"
\n\t
The number of rects in this new detection template doesn't match "
<<
"
\n\t
the number in previous detection templates."
<<
"
\n\t
get_num_components_per_detection_template(): "
<<
get_num_components_per_detection_template
()
<<
"
\n\t
feature_extraction_regions.size(): "
<<
feature_extraction_regions
.
size
()
<<
"
\n\t
this: "
<<
this
);
detection_template
temp
;
temp
.
object_box
=
object_box
;
temp
.
rects
=
feature_extraction_regions
;
det_templates
.
push_back
(
temp
);
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
,
typename
Feature_extractor_type
>
unsigned
long
scan_image_pyramid
<
Pyramid_type
,
Feature_extractor_type
>::
get_num_detection_templates
(
)
const
{
return
det_templates
.
size
();
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
,
typename
Feature_extractor_type
>
unsigned
long
scan_image_pyramid
<
Pyramid_type
,
Feature_extractor_type
>::
get_num_components_per_detection_template
(
)
const
{
// make sure requires clause is not broken
DLIB_ASSERT
(
get_num_detection_templates
()
>
0
,
"
\t
unsigned long scan_image_pyramid::get_num_components_per_detection_template()"
<<
"
\n\t
You need to give some detection templates before calling this function. "
<<
"
\n\t
get_num_detection_templates(): "
<<
get_num_detection_templates
()
<<
"
\n\t
this: "
<<
this
);
return
det_templates
[
0
].
rects
.
size
();
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
,
typename
Feature_extractor_type
>
long
scan_image_pyramid
<
Pyramid_type
,
Feature_extractor_type
>::
get_num_dimensions
(
)
const
{
// make sure requires clause is not broken
DLIB_ASSERT
(
get_num_detection_templates
()
>
0
,
"
\t
long scan_image_pyramid::get_num_dimensions()"
<<
"
\n\t
You need to give some detection templates before calling this function. "
<<
"
\n\t
get_num_detection_templates(): "
<<
get_num_detection_templates
()
<<
"
\n\t
this: "
<<
this
);
return
feats_config
.
get_num_dimensions
()
*
get_num_components_per_detection_template
();
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
,
typename
Feature_extractor_type
>
unsigned
long
scan_image_pyramid
<
Pyramid_type
,
Feature_extractor_type
>::
get_max_pyramid_levels
(
)
const
{
return
max_pyramid_levels
;
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
,
typename
Feature_extractor_type
>
void
scan_image_pyramid
<
Pyramid_type
,
Feature_extractor_type
>::
set_max_pyramid_levels
(
unsigned
long
max_levels
)
{
// make sure requires clause is not broken
DLIB_ASSERT
(
max_levels
>
0
,
"
\t
void scan_image_pyramid::set_max_pyramid_levels()"
<<
"
\n\t
You can't have zero levels. "
<<
"
\n\t
max_levels: "
<<
max_levels
<<
"
\n\t
this: "
<<
this
);
max_pyramid_levels
=
max_levels
;
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
,
typename
Feature_extractor_type
>
void
scan_image_pyramid
<
Pyramid_type
,
Feature_extractor_type
>::
detect
(
const
feature_vector_type
&
w
,
std
::
vector
<
std
::
pair
<
double
,
rectangle
>
>&
dets
,
const
double
thresh
)
const
{
// make sure requires clause is not broken
DLIB_ASSERT
(
get_num_detection_templates
()
>
0
&&
is_loaded_with_image
()
&&
w
.
size
()
>=
get_num_dimensions
(),
"
\t
void scan_image_pyramid::detect()"
<<
"
\n\t
Invalid inputs were given to this function "
<<
"
\n\t
get_num_detection_templates(): "
<<
get_num_detection_templates
()
<<
"
\n\t
is_loaded_with_image(): "
<<
is_loaded_with_image
()
<<
"
\n\t
w.size(): "
<<
w
.
size
()
<<
"
\n\t
get_num_dimensions(): "
<<
get_num_dimensions
()
<<
"
\n\t
this: "
<<
this
);
dets
.
clear
();
array
<
array2d
<
double
>
>::
kernel_2a
saliency_images
;
saliency_images
.
set_max_size
(
get_num_components_per_detection_template
());
saliency_images
.
set_size
(
get_num_components_per_detection_template
());
std
::
vector
<
std
::
pair
<
unsigned
int
,
rectangle
>
>
region_rects
(
get_num_components_per_detection_template
());
pyramid_type
pyr
;
std
::
vector
<
std
::
pair
<
double
,
point
>
>
point_dets
;
// for all pyramid levels
for
(
unsigned
long
l
=
0
;
l
<
feats
.
size
();
++
l
)
{
for
(
unsigned
long
i
=
0
;
i
<
saliency_images
.
size
();
++
i
)
saliency_images
[
i
].
set_size
(
feats
[
l
].
nr
(),
feats
[
l
].
nc
());
// build saliency images for pyramid level l
for
(
long
r
=
0
;
r
<
feats
[
l
].
nr
();
++
r
)
{
for
(
long
c
=
0
;
c
<
feats
[
l
].
nc
();
++
c
)
{
const
typename
feature_extractor_type
::
descriptor_type
&
descriptor
=
feats
[
l
](
r
,
c
);
for
(
unsigned
long
i
=
0
;
i
<
saliency_images
.
size
();
++
i
)
{
const
unsigned
long
offset
=
feats_config
.
get_num_dimensions
()
*
i
;
double
sum
=
0
;
for
(
unsigned
long
k
=
0
;
k
<
descriptor
.
size
();
++
k
)
{
sum
+=
w
(
descriptor
[
k
].
first
+
offset
)
*
descriptor
[
k
].
second
;
}
saliency_images
[
i
][
r
][
c
]
=
sum
;
}
}
}
// now search the saliency images
for
(
unsigned
long
i
=
0
;
i
<
det_templates
.
size
();
++
i
)
{
const
point
offset
=
-
feats
[
l
].
image_to_feat_space
(
point
(
0
,
0
));
for
(
unsigned
long
j
=
0
;
j
<
region_rects
.
size
();
++
j
)
region_rects
[
j
]
=
make_pair
(
j
,
translate_rect
(
feats
[
l
].
image_to_feat_space
(
det_templates
[
i
].
rects
[
j
]),
offset
));
scan_image
(
point_dets
,
saliency_images
,
region_rects
,
thresh
,
max_dets_per_template
);
// convert all the point detections into rectangles at the original image scale and coordinate system
for
(
unsigned
long
j
=
0
;
j
<
point_dets
.
size
();
++
j
)
{
const
double
score
=
point_dets
[
j
].
first
;
point
p
=
point_dets
[
j
].
second
;
p
=
feats
[
l
].
feat_to_image_space
(
p
);
rectangle
rect
=
translate_rect
(
det_templates
[
i
].
object_box
,
p
);
rectangle
old_rect
=
rect
;
// TODO remove later
rect
=
pyr
.
rect_up
(
rect
,
l
);
DLIB_CASSERT
(
pyr
.
rect_down
(
rect
,
l
)
==
old_rect
,
""
);
dets
.
push_back
(
std
::
make_pair
(
score
,
rect
));
{
rectangle
r
=
pyr
.
rect_down
(
rect
,
l
);
const
point
origin
=
center
(
r
);
DLIB_CASSERT
(
origin
==
p
,
origin
<<
" "
<<
p
);
DLIB_CASSERT
(
feats
[
l
].
image_to_feat_space
(
origin
)
==
point_dets
[
j
].
second
,
""
);
}
}
}
}
std
::
cout
<<
"THRESH: "
<<
thresh
<<
std
::
endl
;
std
::
cout
<<
"NUM POINT DETS FOUND: "
<<
dets
.
size
()
<<
std
::
endl
;
std
::
sort
(
dets
.
rbegin
(),
dets
.
rend
(),
compare_pair_rect
);
}
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
,
typename
Feature_extractor_type
>
void
scan_image_pyramid
<
Pyramid_type
,
Feature_extractor_type
>::
get_feature_vector
(
const
std
::
vector
<
rectangle
>&
rects
,
feature_vector_type
&
psi
,
std
::
vector
<
rectangle
>&
mapped_rects
)
const
{
psi
=
0
;
mapped_rects
.
clear
();
pyramid_type
pyr
;
for
(
unsigned
long
i
=
0
;
i
<
rects
.
size
();
++
i
)
{
// Figure out the pyramid level which best matches rects[i] against one of our
// detection template object boxes.
unsigned
long
best_level
=
0
;
double
match_score
=
std
::
numeric_limits
<
double
>::
infinity
();
detection_template
best_template
;
rectangle
rect
=
rects
[
i
];
const
dlib
::
vector
<
double
,
2
>
p
(
rect
.
width
(),
rect
.
height
());
// for all the levels
for
(
unsigned
long
l
=
0
;
l
<
feats
.
size
();
++
l
)
{
// Run the center point through the feature/image space transformation just to make
// sure we exactly replicate the procedure for shifting an object_box used elsewhere
// in this file.
const
point
origin
=
feats
[
l
].
feat_to_image_space
(
feats
[
l
].
image_to_feat_space
(
center
(
pyr
.
rect_down
(
rect
,
l
))));
for
(
unsigned
long
t
=
0
;
t
<
det_templates
.
size
();
++
t
)
{
// Map this detection template into the normal image space and see how
// close it is to the rect we are looking for. We do the translation here
// because the rect_up() routine takes place using integer arithmetic and
// could potentially give slightly different results with and without the
// translation.
rectangle
mapped_rect
=
translate_rect
(
det_templates
[
t
].
object_box
,
origin
);
mapped_rect
=
pyr
.
rect_up
(
mapped_rect
,
l
);
const
dlib
::
vector
<
double
,
2
>
p2
(
mapped_rect
.
width
(),
mapped_rect
.
height
());
if
((
p
-
p2
).
length
()
<
match_score
)
{
match_score
=
(
p
-
p2
).
length
();
best_level
=
l
;
best_template
=
det_templates
[
t
];
}
}
}
// Now get the features out of feats[best_level]. But first translate best_template
// into the right spot (it should be centered at the location determined by rects[i])
// and convert it into the feature image coordinate system.
rect
=
pyr
.
rect_down
(
rects
[
i
],
best_level
);
const
point
offset
=
-
feats
[
best_level
].
image_to_feat_space
(
point
(
0
,
0
));
const
point
origin
=
feats
[
best_level
].
image_to_feat_space
(
center
(
rect
))
+
offset
;
for
(
unsigned
long
k
=
0
;
k
<
best_template
.
rects
.
size
();
++
k
)
{
rectangle
temp
=
best_template
.
rects
[
k
];
temp
=
feats
[
best_level
].
image_to_feat_space
(
temp
);
temp
=
translate_rect
(
temp
,
origin
);
temp
=
get_rect
(
feats
[
best_level
]).
intersect
(
temp
);
best_template
.
rects
[
k
]
=
temp
;
}
// The input rectangle was mapped to one of the detection templates. Reverse the process
// to figure out what the mapped rectangle is in the original input space.
rectangle
mapped_rect
=
translate_rect
(
best_template
.
object_box
,
feats
[
best_level
].
feat_to_image_space
(
origin
-
offset
));
mapped_rect
=
pyr
.
rect_up
(
mapped_rect
,
best_level
);
mapped_rects
.
push_back
(
mapped_rect
);
for
(
unsigned
long
j
=
0
;
j
<
best_template
.
rects
.
size
();
++
j
)
{
rect
=
best_template
.
rects
[
j
];
const
unsigned
long
template_region_id
=
j
;
const
unsigned
long
offset
=
feats_config
.
get_num_dimensions
()
*
template_region_id
;
for
(
long
r
=
rect
.
top
();
r
<=
rect
.
bottom
();
++
r
)
{
for
(
long
c
=
rect
.
left
();
c
<=
rect
.
right
();
++
c
)
{
const
typename
feature_extractor_type
::
descriptor_type
&
descriptor
=
feats
[
best_level
](
r
,
c
);
for
(
unsigned
long
k
=
0
;
k
<
descriptor
.
size
();
++
k
)
{
psi
(
descriptor
[
k
].
first
+
offset
)
+=
descriptor
[
k
].
second
;
}
}
}
}
}
}
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_SCAN_IMaGE_PYRAMID_H__
dlib/image_processing/scan_image_pyramid_abstract.h
0 → 100644
View file @
51c0c148
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_SCAN_IMaGE_PYRAMID_ABSTRACT_H__
#ifdef DLIB_SCAN_IMaGE_PYRAMID_ABSTRACT_H__
#include "../matrix.h"
#include "../geometry.h"
#include "../image_processing.h"
#include "../array2d.h"
#include <vector>
namespace
dlib
{
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
,
typename
Feature_extractor_type
>
class
scan_image_pyramid
:
noncopyable
{
/*!
REQUIREMENTS ON Pyramid_type
- must be one of the pyramid_down objects defined in
dlib/image_transforms/image_pyramid_abstract.h or an object with
a compatible interface
REQUIREMENTS ON Feature_extractor_type
- must be an object with an interface compatible with the hashed_feature_image
object defined in dlib/image_keypoint/hashed_feature_image_abstract.h.
INITIAL VALUE
- get_num_detection_templates() == 0
- is_loaded_with_image() == false
- get_max_detections_per_template() == 2000
- get_max_pyramid_levels() == 1000
WHAT THIS OBJECT REPRESENTS
This object is a tool for running a sliding window classifier over
an image pyramid. This object can also be understood as a general
tool for implementing the spatial pyramid models described in the paper:
Beyond Bags of Features: Spatial Pyramid Matching for Recognizing
Natural Scene Categories by Svetlana Lazebnik, Cordelia Schmid,
and Jean Ponce
The sliding window classifiers used by this object have three parts:
1. The underlying feature extraction provided by Feature_extractor_type
objects, which associate a vector with each location in an image.
2. A detection template. This is a rectangle which defines the shape of a
sliding window (the object_box), as well as a set of rectangles which
envelop it. This set of enveloping rectangles defines the spatial
structure of the overall feature extraction within a sliding window.
In particular, each location of a sliding window has a feature vector
associated with it. This feature vector is defined as follows:
- Let N denote the number of enveloping rectangles.
- Let M denote the dimensionality of the vectors output by Feature_extractor_type
objects.
- Let F(i) == the M dimensional vector which is the sum of all vectors
given by our Feature_extractor_type object inside the ith enveloping
rectangle.
- Then the feature vector for a sliding window is an M*N dimensional vector
[F(1) F(2) F(3) ... F(N)] (i.e. it is a concatenation of the N vectors).
This feature vector can be though of as a collection of N "bags of features",
each bag coming from a spatial location determined one of the enveloping
rectangles.
3. A weight vector and a threshold value. The dot product between the weight
vector and the feature vector for a sliding window location gives the score
of the window. If this score is greater than the threshold value then the
window location is output as a detection.
Finally, the sliding window classifiers described above are applied to every level
of an image pyramid.
!*/
public
:
typedef
matrix
<
double
,
0
,
1
>
feature_vector_type
;
typedef
Pyramid_type
pyramid_type
;
typedef
Feature_extractor_type
feature_extractor_type
;
scan_image_pyramid
(
);
/*!
ensures
- this object is properly initialized
!*/
template
<
typename
image_type
>
void
load
(
const
image_type
&
img
);
/*!
requires
- image_type must be a type with the following properties:
- image_type is default constructable.
- image_type is swappable by the global swap() function.
- image_type logically represents some kind of image and therefore
has .nr() and .nc() member functions. .nr() should return the
number of rows while .nc() returns the number of columns.
- image_type objects can be loaded into Feature_extractor_type
objects via Feature_extractor_type::load().
- image_type objects can be used with Pyramid_type. That is,
if pyr is an object of type Pyramid_type while img1 and img2
are objects of image_type. Then pyr(img1,img2) should be
a valid expression which downsamples img1 into img2.
ensures
- #is_loaded_with_image() == true
- This object is ready to run sliding window classifiers over img. Call
detect() to do this.
!*/
bool
is_loaded_with_image
(
)
const
;
/*!
ensures
- returns true if this object has been loaded with an image to process
and false otherwise.
!*/
void
copy_configuration
(
const
feature_extractor_type
&
fe
);
/*!
ensures
- Let BASE_FE denote the feature_extractor_type object used
internally for local feature extraction. Then this function
performs BASE_FE.copy_configuration(fe)
(i.e. this function allows you to configure the parameters of the
underlying feature extractor used by a scan_image_pyramid object)
!*/
void
copy_configuration
(
const
scan_image_pyramid
&
item
);
/*!
ensures
- copies all the state information of item into *this, except for state
information populated by load(). More precisely, given two scan_image_pyramid
objects S1 and S2, the following sequence of instructions should always
result in both of them having the exact same state.
S2.copy_configuration(S1);
S1.load(img);
S2.load(img);
!*/
void
add_detection_template
(
const
rectangle
&
object_box
,
const
std
::
vector
<
rectangle
>&
feature_extraction_regions
);
/*!
requires
- center(object_box) == point(0,0),
- if (get_num_detection_templates() > 0) then
- get_num_components_per_detection_template() == feature_extraction_regions.size()
(i.e. if you already have detection templates in this object, then
any new detection template must declare a consistent number of
feature extraction regions)
ensures
- Adds another detection template to this object. In particular, object_box
defines the size and shape of a sliding window while feature_extraction_regions
defines the locations for feature extraction as discussed in the WHAT THIS
OBJECT REPRESENTS section above. Note also that the locations of the feature
extraction regions are relative to the object_box.
- #get_num_detection_templates() == get_num_detection_templates() + 1
- The order of rectangles in feature_extraction_regions matters. Recall that
each rectangle gets its own set of features. So given two different templates,
their ith rectangles will both share the same part of the weight vector (w)
supplied to detect(). So there should be some reasonable correspondence
between the rectangle ordering in different detection templates. For,
example, different detection templates should place corresponding
feature extraction regions in roughly the same part of the object_box.
!*/
unsigned
long
get_num_detection_templates
(
)
const
;
/*!
ensures
- returns the number of detection templates in this object
!*/
unsigned
long
get_num_components_per_detection_template
(
)
const
;
/*!
requires
- get_num_detection_templates() > 0
ensures
- A detection template is a rectangle which defines the shape of a
sliding window (the object_box), as well as a set of rectangles which
envelop it. This function returns the number of enveloping rectangles
in the detection templates used by this object.
!*/
long
get_num_dimensions
(
)
const
;
/*!
requires
- get_num_detection_templates() > 0
ensures
- returns the number of dimensions in the feature vector for a sliding window
location. This value is the dimensionality of the underlying feature vectors
produced by Feature_extractor_type times get_num_components_per_detection_template().
!*/
unsigned
long
get_max_pyramid_levels
(
)
const
;
/*!
ensures
- returns the maximum number of image pyramid levels this object will use.
Note that #get_max_pyramid_levels() == 1 indicates that no image pyramid
will be used at all. That is, only the original image will be processed
and no lower scale versions will be created.
!*/
void
set_max_pyramid_levels
(
unsigned
long
max_levels
);
/*!
requires
- max_levels > 0
ensures
- #get_max_pyramid_levels() == max_levels
!*/
unsigned
long
get_max_detections_per_template
(
)
const
;
/*!
ensures
- For each image pyramid layer and detection template, this object scans a sliding
window classifier over an image and produces a number of detections. This
function returns a number which defines a hard upper limit on the number of
detections allowed by a single scan. This means that the total number of
possible detections produced by detect() is get_max_detections_per_template()*
get_num_detection_templates()*(number of image pyramid layers).
!*/
void
set_max_detections_per_template
(
unsigned
long
max_dets
);
/*!
requires
- max_dets > 0
ensures
- #get_max_detections_per_template() == max_dets
!*/
void
detect
(
const
feature_vector_type
&
w
,
std
::
vector
<
std
::
pair
<
double
,
rectangle
>
>&
dets
,
const
double
thresh
)
const
;
/*!
requires
- w.size() >= get_num_dimensions()
- is_loaded_with_image() == true
- get_num_detection_templates() > 0
ensures
- Scans all the detection templates over all pyramid layers as discussed in the
WHAT THIS OBJECT REPRESENTS section and stores all detections into #dets.
- for all valid i:
- #dets[i].second == The object box which produced this detection. This rectangle gives
the location of the detection. Note that the rectangle will have been converted back into
the original image input space. That is, if this detection was made at a low level in the
image pyramid then the object box will have been automatically mapped up the pyramid layers
to the original image space. Or in other words, if you plot #dets[i].second on top of the
image given to load() it will show up in the right place.
- #dets[i].first == The score for this detection. This value is equal to dot(w, feature vector
for this sliding window location).
- #dets[i].first >= thresh
- #dets will be sorted in descending order. (i.e. #dets[i].first >= #dets[j].first for all i, and j>i)
- Elements of w beyond index get_num_dimensions()-1 are ignored. I.e. only the first
get_num_dimensions() are used.
- Note that no form of non-max suppression is performed. If a window has a score >= thresh
then it is reported in #dets (assuming the limit imposed by get_max_detections_per_template() hasn't
been reached).
!*/
void
get_feature_vector
(
const
std
::
vector
<
rectangle
>&
rects
,
feature_vector_type
&
psi
,
std
::
vector
<
rectangle
>&
mapped_rects
)
const
;
/*!
requires
- is_loaded_with_image() == true
- get_num_detection_templates() > 0
- psi.size() >= get_num_dimensions()
ensures
- This function allows you to determine the feature vector used for a sliding window location
or the sum of such vectors for a set of locations.
- if (rects was produced by a call to detect(), i.e. rects contains the contents of dets) then
- #psi == the sum of feature vectors corresponding to the sliding window locations contained
in rects.
- #mapped_rects == rects
- Let w denote the w vector given to detect(), then we have:
- dot(w,#psi) == sum of scores of the dets produced by detect()
- else
- Since scan_image_pyramid is a sliding window classifier system, not all possible rectangles can
be output by detect(). So in the case where rects contains rectangles which could not arise
from a call to detect(), this function will map the rectangles in rects to the nearest possible
object boxes and then store the sum of feature vectors for the mapped rectangles into #psi.
- for all valid i: #mapped_rects[i] == the rectangle rects[i] gets mapped to for feature extraction.
- #mapped_rects.size() == rects.size()
!*/
};
// ----------------------------------------------------------------------------------------
template
<
typename
Pyramid_type
,
typename
Feature_extractor_type
>
void
serialize
(
const
scan_image_pyramid
<
Pyramid_type
,
Feature_extractor_type
>&
item
,
std
::
ostream
&
out
);
/*!
provides serialization support
!*/
template
<
typename
Pyramid_type
,
typename
Feature_extractor_type
>
void
deserialize
(
scan_image_pyramid
<
Pyramid_type
,
Feature_extractor_type
>&
item
,
std
::
istream
&
in
);
/*!
provides deserialization support
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_SCAN_IMaGE_PYRAMID_ABSTRACT_H__
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment