Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
D
dlib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
钟尚武
dlib
Commits
8a707f17
Commit
8a707f17
authored
Sep 03, 2016
by
Davis King
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added input_rgb_image_pyramid
parent
09200e8d
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
331 additions
and
0 deletions
+331
-0
input.h
dlib/dnn/input.h
+167
-0
input_abstract.h
dlib/dnn/input_abstract.h
+164
-0
No files found.
dlib/dnn/input.h
View file @
8a707f17
...
...
@@ -7,6 +7,7 @@
#include "../matrix.h"
#include "../array2d.h"
#include "../pixel.h"
#include "../image_processing.h"
#include <sstream>
...
...
@@ -57,6 +58,10 @@ namespace dlib
float
get_avg_green
()
const
{
return
avg_green
;
}
float
get_avg_blue
()
const
{
return
avg_blue
;
}
bool
image_contained_point
(
const
tensor
&
data
,
const
point
&
p
)
const
{
return
get_rect
(
data
).
contains
(
p
);
}
drectangle
tensor_space_to_image_space
(
const
tensor
&
/*data*/
,
drectangle
r
)
const
{
return
r
;
}
drectangle
image_space_to_tensor_space
(
const
tensor
&
/*data*/
,
double
/*scale*/
,
drectangle
r
)
const
{
return
r
;
}
template
<
typename
forward_iterator
>
void
to_tensor
(
forward_iterator
ibegin
,
...
...
@@ -180,6 +185,10 @@ namespace dlib
float
get_avg_green
()
const
{
return
avg_green
;
}
float
get_avg_blue
()
const
{
return
avg_blue
;
}
bool
image_contained_point
(
const
tensor
&
data
,
const
point
&
p
)
const
{
return
get_rect
(
data
).
contains
(
p
);
}
drectangle
tensor_space_to_image_space
(
const
tensor
&
/*data*/
,
drectangle
r
)
const
{
return
r
;
}
drectangle
image_space_to_tensor_space
(
const
tensor
&
/*data*/
,
double
/*scale*/
,
drectangle
r
)
const
{
return
r
;
}
template
<
typename
forward_iterator
>
void
to_tensor
(
forward_iterator
ibegin
,
...
...
@@ -298,6 +307,10 @@ namespace dlib
template
<
typename
mm
>
input
(
const
input
<
array2d
<
T
,
mm
>>&
)
{}
bool
image_contained_point
(
const
tensor
&
data
,
const
point
&
p
)
const
{
return
get_rect
(
data
).
contains
(
p
);
}
drectangle
tensor_space_to_image_space
(
const
tensor
&
/*data*/
,
drectangle
r
)
const
{
return
r
;
}
drectangle
image_space_to_tensor_space
(
const
tensor
&
/*data*/
,
double
/*scale*/
,
drectangle
r
)
const
{
return
r
;
}
template
<
typename
forward_iterator
>
void
to_tensor
(
forward_iterator
ibegin
,
...
...
@@ -391,6 +404,10 @@ namespace dlib
template
<
long
NR
,
long
NC
,
typename
mm
,
typename
L
>
input
(
const
input
<
matrix
<
T
,
NR
,
NC
,
mm
,
L
>>&
)
{}
bool
image_contained_point
(
const
tensor
&
data
,
const
point
&
p
)
const
{
return
get_rect
(
data
).
contains
(
p
);
}
drectangle
tensor_space_to_image_space
(
const
tensor
&
/*data*/
,
drectangle
r
)
const
{
return
r
;
}
drectangle
image_space_to_tensor_space
(
const
tensor
&
/*data*/
,
double
/*scale*/
,
drectangle
r
)
const
{
return
r
;
}
template
<
typename
forward_iterator
>
void
to_tensor
(
forward_iterator
ibegin
,
...
...
@@ -468,6 +485,156 @@ namespace dlib
}
};
// ----------------------------------------------------------------------------------------
template
<
typename
PYRAMID_TYPE
>
class
input_rgb_image_pyramid
{
public
:
typedef
matrix
<
rgb_pixel
>
input_type
;
typedef
PYRAMID_TYPE
pyramid_type
;
input_rgb_image_pyramid
(
)
:
avg_red
(
122
.
782
),
avg_green
(
117
.
001
),
avg_blue
(
104
.
298
)
{
}
input_rgb_image_pyramid
(
float
avg_red_
,
float
avg_green_
,
float
avg_blue_
)
:
avg_red
(
avg_red_
),
avg_green
(
avg_green_
),
avg_blue
(
avg_blue_
)
{}
float
get_avg_red
()
const
{
return
avg_red
;
}
float
get_avg_green
()
const
{
return
avg_green
;
}
float
get_avg_blue
()
const
{
return
avg_blue
;
}
bool
image_contained_point
(
const
tensor
&
data
,
const
point
&
p
)
const
{
auto
&&
rects
=
any_cast
<
std
::
vector
<
rectangle
>>
(
data
.
annotation
());
DLIB_CASSERT
(
rects
.
size
()
>
0
);
return
rects
[
0
].
contains
(
p
);
}
drectangle
tensor_space_to_image_space
(
const
tensor
&
data
,
drectangle
r
)
const
{
auto
&&
rects
=
any_cast
<
std
::
vector
<
rectangle
>>
(
data
.
annotation
());
return
tiled_pyramid_to_image
<
pyramid_type
>
(
rects
,
r
);
}
drectangle
image_space_to_tensor_space
(
const
tensor
&
data
,
double
scale
,
drectangle
r
)
const
{
auto
&&
rects
=
any_cast
<
std
::
vector
<
rectangle
>>
(
data
.
annotation
());
return
image_to_tiled_pyramid
<
pyramid_type
>
(
rects
,
scale
,
r
);
}
template
<
typename
forward_iterator
>
void
to_tensor
(
forward_iterator
ibegin
,
forward_iterator
iend
,
resizable_tensor
&
data
)
const
{
DLIB_CASSERT
(
std
::
distance
(
ibegin
,
iend
)
>
0
);
auto
nr
=
ibegin
->
nr
();
auto
nc
=
ibegin
->
nc
();
// make sure all the input matrices have the same dimensions
for
(
auto
i
=
ibegin
;
i
!=
iend
;
++
i
)
{
DLIB_CASSERT
(
i
->
nr
()
==
nr
&&
i
->
nc
()
==
nc
,
"
\t
input_rgb_image_pyramid::to_tensor()"
<<
"
\n\t
All matrices given to to_tensor() must have the same dimensions."
<<
"
\n\t
nr: "
<<
nr
<<
"
\n\t
nc: "
<<
nc
<<
"
\n\t
i->nr(): "
<<
i
->
nr
()
<<
"
\n\t
i->nc(): "
<<
i
->
nc
()
);
}
matrix
<
rgb_pixel
>
img
;
create_tiled_pyramid
<
pyramid_type
>
(
*
ibegin
,
img
,
data
.
annotation
().
get
<
std
::
vector
<
rectangle
>>
());
nr
=
img
.
nr
();
nc
=
img
.
nc
();
data
.
set_size
(
std
::
distance
(
ibegin
,
iend
),
3
,
nr
,
nc
);
const
size_t
offset
=
nr
*
nc
;
auto
ptr
=
data
.
host
();
while
(
true
)
{
for
(
long
r
=
0
;
r
<
nr
;
++
r
)
{
for
(
long
c
=
0
;
c
<
nc
;
++
c
)
{
rgb_pixel
temp
=
img
(
r
,
c
);
auto
p
=
ptr
++
;
*
p
=
(
temp
.
red
-
avg_red
)
/
256
.
0
;
p
+=
offset
;
*
p
=
(
temp
.
green
-
avg_green
)
/
256
.
0
;
p
+=
offset
;
*
p
=
(
temp
.
blue
-
avg_blue
)
/
256
.
0
;
p
+=
offset
;
}
}
ptr
+=
offset
*
(
data
.
k
()
-
1
);
++
ibegin
;
if
(
ibegin
==
iend
)
break
;
create_tiled_pyramid
<
pyramid_type
>
(
*
ibegin
,
img
,
data
.
annotation
().
get
<
std
::
vector
<
rectangle
>>
());
}
}
friend
void
serialize
(
const
input_rgb_image_pyramid
&
item
,
std
::
ostream
&
out
)
{
serialize
(
"input_rgb_image_pyramid"
,
out
);
serialize
(
item
.
avg_red
,
out
);
serialize
(
item
.
avg_green
,
out
);
serialize
(
item
.
avg_blue
,
out
);
}
friend
void
deserialize
(
input_rgb_image_pyramid
&
item
,
std
::
istream
&
in
)
{
std
::
string
version
;
deserialize
(
version
,
in
);
if
(
version
!=
"input_rgb_image_pyramid"
)
throw
serialization_error
(
"Unexpected version found while deserializing dlib::input_rgb_image_pyramid."
);
deserialize
(
item
.
avg_red
,
in
);
deserialize
(
item
.
avg_green
,
in
);
deserialize
(
item
.
avg_blue
,
in
);
}
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
out
,
const
input_rgb_image_pyramid
&
item
)
{
out
<<
"input_rgb_image_pyramid("
<<
item
.
avg_red
<<
","
<<
item
.
avg_green
<<
","
<<
item
.
avg_blue
<<
")"
;
return
out
;
}
friend
void
to_xml
(
const
input_rgb_image_pyramid
&
item
,
std
::
ostream
&
out
)
{
out
<<
"<input_rgb_image_pyramid r='"
<<
item
.
avg_red
<<
"' g='"
<<
item
.
avg_green
<<
"' b='"
<<
item
.
avg_blue
<<
"'/>"
;
}
private
:
float
avg_red
;
float
avg_green
;
float
avg_blue
;
};
// ----------------------------------------------------------------------------------------
}
...
...
dlib/dnn/input_abstract.h
View file @
8a707f17
...
...
@@ -153,6 +153,11 @@ namespace dlib
value written to the output tensor is first divided by 256.0 so that the
resulting outputs are all in the range [0,1].
!*/
// Provided for compatibility with input_rgb_image_pyramid's interface
bool
image_contained_point
(
const
tensor
&
data
,
const
point
&
p
)
const
{
return
get_rect
(
data
).
contains
(
p
);
}
drectangle
tensor_space_to_image_space
(
const
tensor
&
/*data*/
,
drectangle
r
)
const
{
return
r
;
}
drectangle
image_space_to_tensor_space
(
const
tensor
&
/*data*/
,
double
/*scale*/
,
drectangle
r
)
const
{
return
r
;
}
};
// ----------------------------------------------------------------------------------------
...
...
@@ -234,6 +239,12 @@ namespace dlib
subtracted (according to get_avg_red(), get_avg_green(), or
get_avg_blue()) and then is divided by 256.0.
!*/
// Provided for compatibility with input_rgb_image_pyramid's interface
bool
image_contained_point
(
const
tensor
&
data
,
const
point
&
p
)
const
{
return
get_rect
(
data
).
contains
(
p
);
}
drectangle
tensor_space_to_image_space
(
const
tensor
&
/*data*/
,
drectangle
r
)
const
{
return
r
;
}
drectangle
image_space_to_tensor_space
(
const
tensor
&
/*data*/
,
double
/*scale*/
,
drectangle
r
)
const
{
return
r
;
}
};
// ----------------------------------------------------------------------------------------
...
...
@@ -253,6 +264,159 @@ namespace dlib
};
// ----------------------------------------------------------------------------------------
template
<
typename
PYRAMID_TYPE
>
class
input_rgb_image_pyramid
{
/*!
REQUIREMENTS ON PYRAMID_TYPE
PYRAMID_TYPE must be an instance of the dlib::pyramid_down template.
WHAT THIS OBJECT REPRESENTS
This input layer works with RGB images of type matrix<rgb_pixel>. It is
identical to input_rgb_image except that it outputs a tensor containing a
tiled image pyramid of each input image rather than a simple copy of each
image. The tiled image pyramid is created using create_tiled_pyramid().
!*/
public
:
typedef
matrix
<
rgb_pixel
>
input_type
;
typedef
PYRAMID_TYPE
pyramid_type
;
input_rgb_image_pyramid
(
);
/*!
ensures
- #get_avg_red() == 122.782
- #get_avg_green() == 117.001
- #get_avg_blue() == 104.298
!*/
input_rgb_image_pyramid
(
float
avg_red
,
float
avg_green
,
float
avg_blue
);
/*!
ensures
- #get_avg_red() == avg_red
- #get_avg_green() == avg_green
- #get_avg_blue() == avg_blue
!*/
float
get_avg_red
(
)
const
;
/*!
ensures
- returns the value subtracted from the red color channel.
!*/
float
get_avg_green
(
)
const
;
/*!
ensures
- returns the value subtracted from the green color channel.
!*/
float
get_avg_blue
(
)
const
;
/*!
ensures
- returns the value subtracted from the blue color channel.
!*/
template
<
typename
forward_iterator
>
void
to_tensor
(
forward_iterator
ibegin
,
forward_iterator
iend
,
resizable_tensor
&
data
)
const
;
/*!
requires
- [ibegin, iend) is an iterator range over input_type objects.
- std::distance(ibegin,iend) > 0
- The input range should contain images that all have the same
dimensions.
ensures
- Converts the iterator range into a tensor and stores it into #data. In
particular, we will have:
- #data.num_samples() == std::distance(ibegin,iend)
- #data.k() == 3
- Each sample in #data contains a tiled image pyramid of the
corresponding input image. The tiled pyramid is created by
create_tiled_pyramid().
Moreover, each color channel is normalized by having its average value
subtracted (according to get_avg_red(), get_avg_green(), or
get_avg_blue()) and then is divided by 256.0.
!*/
bool
image_contained_point
(
const
tensor
&
data
,
const
point
&
p
)
const
;
/*!
requires
- data is a tensor that was produced by this->to_tensor()
ensures
- Since data is a tensor that is built from a bunch of identically sized
images, we can ask if those images were big enough to contain the point
p. This function returns the answer to that question.
!*/
drectangle
image_space_to_tensor_space
(
const
tensor
&
data
,
double
scale
,
drectangle
r
)
const
;
/*!
requires
- data is a tensor that was produced by this->to_tensor()
- 0 < scale <= 1
ensures
- This function maps from to_tensor()'s input image space to its output
tensor space. Therefore, given that data is a tensor produced by
to_tensor(), image_space_to_tensor_space() allows you to ask for the
rectangle in data that corresponds to a rectangle in the original image
space.
Note that since the output tensor contains an image pyramid, there are
multiple points in the output tensor that correspond to any input
location. So you must also specify a scale so we know what level of the
pyramid is needed. So given a rectangle r in an input image, you can
ask, what rectangle in data corresponds to r when things are scale times
smaller? That rectangle is returned by this function.
- A scale of 1 means we don't move anywhere in the pyramid scale space relative
to the input image while smaller values of scale mean we move down the
pyramid.
!*/
drectangle
tensor_space_to_image_space
(
const
tensor
&
data
,
drectangle
r
)
const
;
/*!
requires
- data is a tensor that was produced by this->to_tensor()
ensures
- This function maps from to_tensor()'s output tensor space to its input
image space. Therefore, given that data is a tensor produced by
to_tensor(), tensor_space_to_image_space() allows you to ask for the
rectangle in the input image that corresponds to a rectangle in data.
- It should be noted that this function isn't always an inverse of
image_space_to_tensor_space(). This is because you can ask
image_space_to_tensor_space() for the coordinates of points outside the input
image and they will be mapped to somewhere that doesn't have an inverse.
But for points actually inside the input image this function performs an
approximate inverse mapping. I.e. when image_contained_point(data,center(r))==true
there is an approximate inverse.
!*/
};
// ----------------------------------------------------------------------------------------
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment