Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
D
dlib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
钟尚武
dlib
Commits
8001b924
Commit
8001b924
authored
May 25, 2019
by
Facundo Galán
Committed by
Davis E. King
May 25, 2019
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add input_grayscale_image_pyramid, issue #354 (#1761)
Add input_grayscale_image_pyramid
parent
0ecb49b9
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
354 additions
and
86 deletions
+354
-86
input.h
dlib/dnn/input.h
+205
-86
input_abstract.h
dlib/dnn/input_abstract.h
+149
-0
No files found.
dlib/dnn/input.h
View file @
8001b924
...
...
@@ -588,20 +588,216 @@ namespace dlib
}
};
// ----------------------------------------------------------------------------------------
namespace
detail
{
template
<
typename
PYRAMID_TYPE
>
class
input_image_pyramid
{
public
:
virtual
~
input_image_pyramid
()
=
0
;
typedef
PYRAMID_TYPE
pyramid_type
;
unsigned
long
get_pyramid_padding
()
const
{
return
pyramid_padding
;
}
void
set_pyramid_padding
(
unsigned
long
value
)
{
pyramid_padding
=
value
;
}
unsigned
long
get_pyramid_outer_padding
()
const
{
return
pyramid_outer_padding
;
}
void
set_pyramid_outer_padding
(
unsigned
long
value
)
{
pyramid_outer_padding
=
value
;
}
bool
image_contained_point
(
const
tensor
&
data
,
const
point
&
p
)
const
{
auto
&&
rects
=
any_cast
<
std
::
vector
<
rectangle
>>
(
data
.
annotation
());
DLIB_CASSERT
(
rects
.
size
()
>
0
);
return
rects
[
0
].
contains
(
p
+
rects
[
0
].
tl_corner
());
}
drectangle
tensor_space_to_image_space
(
const
tensor
&
data
,
drectangle
r
)
const
{
auto
&&
rects
=
any_cast
<
std
::
vector
<
rectangle
>>
(
data
.
annotation
());
return
tiled_pyramid_to_image
<
pyramid_type
>
(
rects
,
r
);
}
drectangle
image_space_to_tensor_space
(
const
tensor
&
data
,
double
scale
,
drectangle
r
)
const
{
DLIB_CASSERT
(
0
<
scale
&&
scale
<=
1
,
"scale: "
<<
scale
);
auto
&&
rects
=
any_cast
<
std
::
vector
<
rectangle
>>
(
data
.
annotation
());
return
image_to_tiled_pyramid
<
pyramid_type
>
(
rects
,
scale
,
r
);
}
protected
:
template
<
typename
forward_iterator
>
void
to_tensor_init
(
forward_iterator
ibegin
,
forward_iterator
iend
,
resizable_tensor
&
data
,
unsigned
int
k
)
const
{
DLIB_CASSERT
(
std
::
distance
(
ibegin
,
iend
)
>
0
);
auto
nr
=
ibegin
->
nr
();
auto
nc
=
ibegin
->
nc
();
// make sure all the input matrices have the same dimensions
for
(
auto
i
=
ibegin
;
i
!=
iend
;
++
i
)
{
DLIB_CASSERT
(
i
->
nr
()
==
nr
&&
i
->
nc
()
==
nc
,
"
\t
input_grayscale_image_pyramid::to_tensor()"
<<
"
\n\t
All matrices given to to_tensor() must have the same dimensions."
<<
"
\n\t
nr: "
<<
nr
<<
"
\n\t
nc: "
<<
nc
<<
"
\n\t
i->nr(): "
<<
i
->
nr
()
<<
"
\n\t
i->nc(): "
<<
i
->
nc
()
);
}
long
NR
,
NC
;
pyramid_type
pyr
;
auto
&
rects
=
data
.
annotation
().
get
<
std
::
vector
<
rectangle
>>
();
impl
::
compute_tiled_image_pyramid_details
(
pyr
,
nr
,
nc
,
pyramid_padding
,
pyramid_outer_padding
,
rects
,
NR
,
NC
);
// initialize data to the right size to contain the stuff in the iterator range.
data
.
set_size
(
std
::
distance
(
ibegin
,
iend
),
k
,
NR
,
NC
);
// We need to zero the image before doing the pyramid, since the pyramid
// creation code doesn't write to all parts of the image. We also take
// care to avoid triggering any device to hosts copies.
auto
ptr
=
data
.
host_write_only
();
for
(
size_t
i
=
0
;
i
<
data
.
size
();
++
i
)
ptr
[
i
]
=
0
;
}
// now build the image pyramid into data. This does the same thing as
// standard create_tiled_pyramid(), except we use the GPU if one is available.
void
create_tiled_pyramid
(
const
std
::
vector
<
rectangle
>&
rects
,
resizable_tensor
&
data
)
const
{
for
(
size_t
i
=
1
;
i
<
rects
.
size
();
++
i
)
{
alias_tensor
src
(
data
.
num_samples
(),
data
.
k
(),
rects
[
i
-
1
].
height
(),
rects
[
i
-
1
].
width
());
alias_tensor
dest
(
data
.
num_samples
(),
data
.
k
(),
rects
[
i
].
height
(),
rects
[
i
].
width
());
auto
asrc
=
src
(
data
,
data
.
nc
()
*
rects
[
i
-
1
].
top
()
+
rects
[
i
-
1
].
left
());
auto
adest
=
dest
(
data
,
data
.
nc
()
*
rects
[
i
].
top
()
+
rects
[
i
].
left
());
tt
::
resize_bilinear
(
adest
,
data
.
nc
(),
data
.
nr
()
*
data
.
nc
(),
asrc
,
data
.
nc
(),
data
.
nr
()
*
data
.
nc
());
}
}
unsigned
long
pyramid_padding
=
10
;
unsigned
long
pyramid_outer_padding
=
11
;
};
template
<
typename
PYRAMID_TYPE
>
input_image_pyramid
<
PYRAMID_TYPE
>::~
input_image_pyramid
()
{}
}
// ----------------------------------------------------------------------------------------
template
<
typename
PYRAMID_TYPE
>
class
input_rgb_image_pyramid
class
input_grayscale_image_pyramid
:
public
detail
::
input_image_pyramid
<
PYRAMID_TYPE
>
{
public
:
typedef
matrix
<
unsigned
char
>
input_type
;
typedef
PYRAMID_TYPE
pyramid_type
;
template
<
typename
forward_iterator
>
void
to_tensor
(
forward_iterator
ibegin
,
forward_iterator
iend
,
resizable_tensor
&
data
)
const
{
this
->
to_tensor_init
(
ibegin
,
iend
,
data
,
1
);
const
auto
rects
=
data
.
annotation
().
get
<
std
::
vector
<
rectangle
>>
();
if
(
rects
.
size
()
==
0
)
return
;
// copy the first raw image into the top part of the tiled pyramid. We need to
// do this for each of the input images/samples in the tensor.
auto
ptr
=
data
.
host_write_only
();
for
(
auto
i
=
ibegin
;
i
!=
iend
;
++
i
)
{
auto
&
img
=
*
i
;
ptr
+=
rects
[
0
].
top
()
*
data
.
nc
();
for
(
long
r
=
0
;
r
<
img
.
nr
();
++
r
)
{
auto
p
=
ptr
+
rects
[
0
].
left
();
for
(
long
c
=
0
;
c
<
img
.
nc
();
++
c
)
p
[
c
]
=
(
img
(
r
,
c
))
/
256
.
0
;
ptr
+=
data
.
nc
();
}
ptr
+=
data
.
nc
()
*
(
data
.
nr
()
-
rects
[
0
].
bottom
()
-
1
);
}
this
->
create_tiled_pyramid
(
rects
,
data
);
}
friend
void
serialize
(
const
input_grayscale_image_pyramid
&
item
,
std
::
ostream
&
out
)
{
serialize
(
"input_grayscale_image_pyramid"
,
out
);
serialize
(
item
.
pyramid_padding
,
out
);
serialize
(
item
.
pyramid_outer_padding
,
out
);
}
friend
void
deserialize
(
input_grayscale_image_pyramid
&
item
,
std
::
istream
&
in
)
{
std
::
string
version
;
deserialize
(
version
,
in
);
if
(
version
!=
"input_grayscale_image_pyramid"
)
throw
serialization_error
(
"Unexpected version found while deserializing dlib::input_grayscale_image_pyramid."
);
deserialize
(
item
.
pyramid_padding
,
in
);
deserialize
(
item
.
pyramid_outer_padding
,
in
);
}
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
out
,
const
input_grayscale_image_pyramid
&
item
)
{
out
<<
"input_grayscale_image_pyramid()"
;
out
<<
" pyramid_padding="
<<
item
.
pyramid_padding
;
out
<<
" pyramid_outer_padding="
<<
item
.
pyramid_outer_padding
;
return
out
;
}
friend
void
to_xml
(
const
input_grayscale_image_pyramid
&
item
,
std
::
ostream
&
out
)
{
out
<<
"<input_grayscale_image_pyramid"
<<
"' pyramid_padding='"
<<
item
.
pyramid_padding
<<
"' pyramid_outer_padding='"
<<
item
.
pyramid_outer_padding
<<
"'/>"
;
}
};
// ----------------------------------------------------------------------------------------
template
<
typename
PYRAMID_TYPE
>
class
input_rgb_image_pyramid
:
public
detail
::
input_image_pyramid
<
PYRAMID_TYPE
>
{
public
:
typedef
matrix
<
rgb_pixel
>
input_type
;
typedef
PYRAMID_TYPE
pyramid_type
;
input_rgb_image_pyramid
(
)
:
avg_red
(
122
.
782
),
)
:
avg_red
(
122
.
782
),
avg_green
(
117
.
001
),
avg_blue
(
104
.
298
)
avg_blue
(
104
.
298
)
{
}
...
...
@@ -609,49 +805,13 @@ namespace dlib
float
avg_red_
,
float
avg_green_
,
float
avg_blue_
)
:
avg_red
(
avg_red_
),
avg_green
(
avg_green_
),
avg_blue
(
avg_blue_
)
)
:
avg_red
(
avg_red_
),
avg_green
(
avg_green_
),
avg_blue
(
avg_blue_
)
{}
float
get_avg_red
()
const
{
return
avg_red
;
}
float
get_avg_green
()
const
{
return
avg_green
;
}
float
get_avg_blue
()
const
{
return
avg_blue
;
}
unsigned
long
get_pyramid_padding
()
const
{
return
pyramid_padding
;
}
void
set_pyramid_padding
(
unsigned
long
value
)
{
pyramid_padding
=
value
;
}
unsigned
long
get_pyramid_outer_padding
()
const
{
return
pyramid_outer_padding
;
}
void
set_pyramid_outer_padding
(
unsigned
long
value
)
{
pyramid_outer_padding
=
value
;
}
bool
image_contained_point
(
const
tensor
&
data
,
const
point
&
p
)
const
{
auto
&&
rects
=
any_cast
<
std
::
vector
<
rectangle
>>
(
data
.
annotation
());
DLIB_CASSERT
(
rects
.
size
()
>
0
);
return
rects
[
0
].
contains
(
p
+
rects
[
0
].
tl_corner
());
}
drectangle
tensor_space_to_image_space
(
const
tensor
&
data
,
drectangle
r
)
const
{
auto
&&
rects
=
any_cast
<
std
::
vector
<
rectangle
>>
(
data
.
annotation
());
return
tiled_pyramid_to_image
<
pyramid_type
>
(
rects
,
r
);
}
drectangle
image_space_to_tensor_space
(
const
tensor
&
data
,
double
scale
,
drectangle
r
)
const
{
DLIB_CASSERT
(
0
<
scale
&&
scale
<=
1
,
"scale: "
<<
scale
);
auto
&&
rects
=
any_cast
<
std
::
vector
<
rectangle
>>
(
data
.
annotation
());
return
image_to_tiled_pyramid
<
pyramid_type
>
(
rects
,
scale
,
r
);
}
template
<
typename
forward_iterator
>
void
to_tensor
(
forward_iterator
ibegin
,
...
...
@@ -659,42 +819,15 @@ namespace dlib
resizable_tensor
&
data
)
const
{
DLIB_CASSERT
(
std
::
distance
(
ibegin
,
iend
)
>
0
);
auto
nr
=
ibegin
->
nr
();
auto
nc
=
ibegin
->
nc
();
// make sure all the input matrices have the same dimensions
for
(
auto
i
=
ibegin
;
i
!=
iend
;
++
i
)
{
DLIB_CASSERT
(
i
->
nr
()
==
nr
&&
i
->
nc
()
==
nc
,
"
\t
input_rgb_image_pyramid::to_tensor()"
<<
"
\n\t
All matrices given to to_tensor() must have the same dimensions."
<<
"
\n\t
nr: "
<<
nr
<<
"
\n\t
nc: "
<<
nc
<<
"
\n\t
i->nr(): "
<<
i
->
nr
()
<<
"
\n\t
i->nc(): "
<<
i
->
nc
()
);
}
long
NR
,
NC
;
pyramid_type
pyr
;
auto
&
rects
=
data
.
annotation
().
get
<
std
::
vector
<
rectangle
>>
();
impl
::
compute_tiled_image_pyramid_details
(
pyr
,
nr
,
nc
,
pyramid_padding
,
pyramid_outer_padding
,
rects
,
NR
,
NC
);
// initialize data to the right size to contain the stuff in the iterator range.
data
.
set_size
(
std
::
distance
(
ibegin
,
iend
),
3
,
NR
,
NC
);
// We need to zero the image before doing the pyramid, since the pyramid
// creation code doesn't write to all parts of the image. We also take
// care to avoid triggering any device to hosts copies.
auto
ptr
=
data
.
host_write_only
();
for
(
size_t
i
=
0
;
i
<
data
.
size
();
++
i
)
ptr
[
i
]
=
0
;
this
->
to_tensor_init
(
ibegin
,
iend
,
data
,
3
);
const
auto
rects
=
data
.
annotation
().
get
<
std
::
vector
<
rectangle
>>
();
if
(
rects
.
size
()
==
0
)
return
;
// copy the first raw image into the top part of the tiled pyramid. We need to
// do this for each of the input images/samples in the tensor.
auto
ptr
=
data
.
host_write_only
();
for
(
auto
i
=
ibegin
;
i
!=
iend
;
++
i
)
{
auto
&
img
=
*
i
;
...
...
@@ -729,19 +862,7 @@ namespace dlib
ptr
+=
data
.
nc
()
*
(
data
.
nr
()
-
rects
[
0
].
bottom
()
-
1
);
}
// now build the image pyramid into data. This does the same thing as
// create_tiled_pyramid(), except we use the GPU if one is available.
for
(
size_t
i
=
1
;
i
<
rects
.
size
();
++
i
)
{
alias_tensor
src
(
data
.
num_samples
(),
data
.
k
(),
rects
[
i
-
1
].
height
(),
rects
[
i
-
1
].
width
());
alias_tensor
dest
(
data
.
num_samples
(),
data
.
k
(),
rects
[
i
].
height
(),
rects
[
i
].
width
());
auto
asrc
=
src
(
data
,
data
.
nc
()
*
rects
[
i
-
1
].
top
()
+
rects
[
i
-
1
].
left
());
auto
adest
=
dest
(
data
,
data
.
nc
()
*
rects
[
i
].
top
()
+
rects
[
i
].
left
());
tt
::
resize_bilinear
(
adest
,
data
.
nc
(),
data
.
nr
()
*
data
.
nc
(),
asrc
,
data
.
nc
(),
data
.
nr
()
*
data
.
nc
());
}
this
->
create_tiled_pyramid
(
rects
,
data
);
}
friend
void
serialize
(
const
input_rgb_image_pyramid
&
item
,
std
::
ostream
&
out
)
...
...
@@ -796,8 +917,6 @@ namespace dlib
float
avg_red
;
float
avg_green
;
float
avg_blue
;
unsigned
long
pyramid_padding
=
10
;
unsigned
long
pyramid_outer_padding
=
11
;
};
// ----------------------------------------------------------------------------------------
...
...
dlib/dnn/input_abstract.h
View file @
8001b924
...
...
@@ -271,6 +271,155 @@ namespace dlib
};
// ----------------------------------------------------------------------------------------
template
<
typename
PYRAMID_TYPE
>
class
input_grayscale_image_pyramid
{
/*!
REQUIREMENTS ON PYRAMID_TYPE
PYRAMID_TYPE must be an instance of the dlib::pyramid_down template.
WHAT THIS OBJECT REPRESENTS
This input layer works with gray scale images of type matrix<unsigned char>.
It is identical to input layer except that it outputs a tensor containing a tiled
image pyramid of each input image rather than a simple copy of each image.
The tiled image pyramid is created using create_tiled_pyramid().
!*/
public
:
typedef
matrix
<
unsigned
char
>
input_type
;
typedef
PYRAMID_TYPE
pyramid_type
;
input_grayscale_image_pyramid
(
);
/*!
ensures
- #get_pyramid_padding() == 10
- #get_pyramid_outer_padding() == 11
!*/
unsigned
long
get_pyramid_padding
(
)
const
;
/*!
ensures
- When this object creates a pyramid it will call create_tiled_pyramid() and
set create_tiled_pyramid's pyramid_padding parameter to get_pyramid_padding().
!*/
void
set_pyramid_padding
(
unsigned
long
value
);
/*!
ensures
- #get_pyramid_padding() == value
!*/
unsigned
long
get_pyramid_outer_padding
(
)
const
;
/*!
ensures
- When this object creates a pyramid it will call create_tiled_pyramid()
and set create_tiled_pyramid's pyramid_outer_padding parameter to
get_pyramid_outer_padding().
!*/
void
set_pyramid_outer_padding
(
unsigned
long
value
);
/*!
ensures
- #get_pyramid_outer_padding() == value
!*/
template
<
typename
forward_iterator
>
void
to_tensor
(
forward_iterator
ibegin
,
forward_iterator
iend
,
resizable_tensor
&
data
)
const
;
/*!
requires
- [ibegin, iend) is an iterator range over input_type objects.
- std::distance(ibegin,iend) > 0
- The input range should contain images that all have the same
dimensions.
ensures
- Converts the iterator range into a tensor and stores it into #data. In
particular, we will have:
- #data.num_samples() == std::distance(ibegin,iend)
- #data.k() == 1
- Each sample in #data contains a tiled image pyramid of the
corresponding input image. The tiled pyramid is created by
create_tiled_pyramid().
Moreover, each pixel is normalized, dividing them by 256.0.
!*/
bool
image_contained_point
(
const
tensor
&
data
,
const
point
&
p
)
const
;
/*!
requires
- data is a tensor that was produced by this->to_tensor()
ensures
- Since data is a tensor that is built from a bunch of identically sized
images, we can ask if those images were big enough to contain the point
p. This function returns the answer to that question.
!*/
drectangle
image_space_to_tensor_space
(
const
tensor
&
data
,
double
scale
,
drectangle
r
)
const
;
/*!
requires
- data is a tensor that was produced by this->to_tensor()
- 0 < scale <= 1
ensures
- This function maps from to_tensor()'s input image space to its output
tensor space. Therefore, given that data is a tensor produced by
to_tensor(), image_space_to_tensor_space() allows you to ask for the
rectangle in data that corresponds to a rectangle in the original image
space.
Note that since the output tensor contains an image pyramid, there are
multiple points in the output tensor that correspond to any input
location. So you must also specify a scale so we know what level of the
pyramid is needed. So given a rectangle r in an input image, you can
ask, what rectangle in data corresponds to r when things are scale times
smaller? That rectangle is returned by this function.
- A scale of 1 means we don't move anywhere in the pyramid scale space relative
to the input image while smaller values of scale mean we move down the
pyramid.
!*/
drectangle
tensor_space_to_image_space
(
const
tensor
&
data
,
drectangle
r
)
const
;
/*!
requires
- data is a tensor that was produced by this->to_tensor()
ensures
- This function maps from to_tensor()'s output tensor space to its input
image space. Therefore, given that data is a tensor produced by
to_tensor(), tensor_space_to_image_space() allows you to ask for the
rectangle in the input image that corresponds to a rectangle in data.
- It should be noted that this function isn't always an inverse of
image_space_to_tensor_space(). This is because you can ask
image_space_to_tensor_space() for the coordinates of points outside the input
image and they will be mapped to somewhere that doesn't have an inverse.
But for points actually inside the input image this function performs an
approximate inverse mapping. I.e. when image_contained_point(data,center(r))==true
there is an approximate inverse.
!*/
};
// ----------------------------------------------------------------------------------------
template
<
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment