Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
D
dlib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
钟尚武
dlib
Commits
7b006f37
Commit
7b006f37
authored
Sep 10, 2018
by
Davis King
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added an option to do bounding box regression to the loss_mmod layer.
parent
16c96bc5
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
145 additions
and
9 deletions
+145
-9
loss.h
dlib/dnn/loss.h
+130
-9
loss_abstract.h
dlib/dnn/loss_abstract.h
+15
-0
No files found.
dlib/dnn/loss.h
View file @
7b006f37
...
...
@@ -702,6 +702,8 @@ namespace dlib
double
truth_match_iou_threshold
=
0
.
5
;
test_box_overlap
overlaps_nms
=
test_box_overlap
(
0
.
4
);
test_box_overlap
overlaps_ignore
;
bool
use_bounding_box_regression
=
false
;
double
bbr_lambda
=
100
;
use_image_pyramid
assume_image_pyramid
=
use_image_pyramid
::
yes
;
...
...
@@ -937,7 +939,7 @@ namespace dlib
inline
void
serialize
(
const
mmod_options
&
item
,
std
::
ostream
&
out
)
{
int
version
=
3
;
int
version
=
4
;
serialize
(
version
,
out
);
serialize
(
item
.
detector_windows
,
out
);
...
...
@@ -947,13 +949,15 @@ namespace dlib
serialize
(
item
.
overlaps_nms
,
out
);
serialize
(
item
.
overlaps_ignore
,
out
);
serialize
(
static_cast
<
uint8_t
>
(
item
.
assume_image_pyramid
),
out
);
serialize
(
item
.
use_bounding_box_regression
,
out
);
serialize
(
item
.
bbr_lambda
,
out
);
}
inline
void
deserialize
(
mmod_options
&
item
,
std
::
istream
&
in
)
{
int
version
=
0
;
deserialize
(
version
,
in
);
if
(
version
!=
3
&&
version
!=
2
&&
version
!=
1
)
if
(
!
(
1
<=
version
&&
version
<=
4
)
)
throw
serialization_error
(
"Unexpected version found while deserializing dlib::mmod_options"
);
if
(
version
==
1
)
{
...
...
@@ -979,6 +983,13 @@ namespace dlib
deserialize
(
assume_image_pyramid
,
in
);
item
.
assume_image_pyramid
=
static_cast
<
use_image_pyramid
>
(
assume_image_pyramid
);
}
item
.
use_bounding_box_regression
=
mmod_options
().
use_bounding_box_regression
;
// use default value since this wasn't provided
item
.
bbr_lambda
=
mmod_options
().
bbr_lambda
;
// use default value since this wasn't provided
if
(
version
>=
4
)
{
deserialize
(
item
.
use_bounding_box_regression
,
in
);
deserialize
(
item
.
bbr_lambda
,
in
);
}
}
// ----------------------------------------------------------------------------------------
...
...
@@ -991,20 +1002,31 @@ namespace dlib
intermediate_detection
(
rectangle
rect_
)
:
rect
(
rect_
)
{}
)
:
rect
(
rect_
)
,
rect_bbr
(
rect_
)
{}
intermediate_detection
(
rectangle
rect_
,
double
detection_confidence_
,
size_t
tensor_offset_
,
long
channel
)
:
rect
(
rect_
),
detection_confidence
(
detection_confidence_
),
tensor_offset
(
tensor_offset_
),
tensor_channel
(
channel
)
{}
)
:
rect
(
rect_
),
rect_bbr
(
rect_
),
detection_confidence
(
detection_confidence_
),
tensor_offset
(
tensor_offset_
),
tensor_channel
(
channel
)
{}
// rect is the rectangle you get without any bounding box regression. So it's
// the basic sliding window box (aka, the "anchor box").
rectangle
rect
;
double
detection_confidence
=
0
;
size_t
tensor_offset
=
0
;
long
tensor_channel
=
0
;
// rect_bbr = rect + bounding box regression. So more accurate. Or if bbr is off then
// this is just rect. The important thing about rect_bbr is that its the
// rectangle we use for doing NMS.
drectangle
rect_bbr
;
size_t
tensor_offset_dx
=
0
;
size_t
tensor_offset_dy
=
0
;
size_t
tensor_offset_dw
=
0
;
size_t
tensor_offset_dh
=
0
;
bool
operator
<
(
const
intermediate_detection
&
item
)
const
{
return
detection_confidence
<
item
.
detection_confidence
;
}
};
...
...
@@ -1032,7 +1054,14 @@ namespace dlib
)
const
{
const
tensor
&
output_tensor
=
sub
.
get_output
();
if
(
options
.
use_bounding_box_regression
)
{
DLIB_CASSERT
(
output_tensor
.
k
()
==
(
long
)
options
.
detector_windows
.
size
()
*
5
);
}
else
{
DLIB_CASSERT
(
output_tensor
.
k
()
==
(
long
)
options
.
detector_windows
.
size
());
}
DLIB_CASSERT
(
input_tensor
.
num_samples
()
==
output_tensor
.
num_samples
());
DLIB_CASSERT
(
sub
.
sample_expansion_factor
()
==
1
,
sub
.
sample_expansion_factor
());
...
...
@@ -1046,10 +1075,10 @@ namespace dlib
final_dets
.
clear
();
for
(
unsigned
long
i
=
0
;
i
<
dets_accum
.
size
();
++
i
)
{
if
(
overlaps_any_box_nms
(
final_dets
,
dets_accum
[
i
].
rect
))
if
(
overlaps_any_box_nms
(
final_dets
,
dets_accum
[
i
].
rect
_bbr
))
continue
;
final_dets
.
push_back
(
mmod_rect
(
dets_accum
[
i
].
rect
,
final_dets
.
push_back
(
mmod_rect
(
dets_accum
[
i
].
rect
_bbr
,
dets_accum
[
i
].
detection_confidence
,
options
.
detector_windows
[
dets_accum
[
i
].
tensor_channel
].
label
));
}
...
...
@@ -1075,13 +1104,19 @@ namespace dlib
DLIB_CASSERT
(
sub
.
sample_expansion_factor
()
==
1
);
DLIB_CASSERT
(
input_tensor
.
num_samples
()
==
grad
.
num_samples
());
DLIB_CASSERT
(
input_tensor
.
num_samples
()
==
output_tensor
.
num_samples
());
if
(
options
.
use_bounding_box_regression
)
{
DLIB_CASSERT
(
output_tensor
.
k
()
==
(
long
)
options
.
detector_windows
.
size
()
*
5
);
}
else
{
DLIB_CASSERT
(
output_tensor
.
k
()
==
(
long
)
options
.
detector_windows
.
size
());
}
double
det_thresh_speed_adjust
=
0
;
// we will scale the loss so that it doesn't get really huge
const
double
scale
=
1
.
0
/
output_tensor
.
size
(
);
const
double
scale
=
1
.
0
/
(
output_tensor
.
nr
()
*
output_tensor
.
nc
()
*
output_tensor
.
num_samples
()
*
options
.
detector_windows
.
size
()
);
double
loss
=
0
;
float
*
g
=
grad
.
host_write_only
();
...
...
@@ -1230,6 +1265,59 @@ namespace dlib
hit_truth_table
[
hittruth
.
second
]
=
true
;
final_dets
.
push_back
(
dets
[
i
]);
loss
-=
options
.
loss_per_missed_target
;
// Now account for BBR loss and gradient if appropriate.
if
(
options
.
use_bounding_box_regression
)
{
double
dx
=
out_data
[
dets
[
i
].
tensor_offset_dx
];
double
dy
=
out_data
[
dets
[
i
].
tensor_offset_dy
];
double
dw
=
out_data
[
dets
[
i
].
tensor_offset_dw
];
double
dh
=
out_data
[
dets
[
i
].
tensor_offset_dh
];
dpoint
p
=
dcenter
(
dets
[
i
].
rect_bbr
);
double
w
=
dets
[
i
].
rect_bbr
.
width
()
-
1
;
double
h
=
dets
[
i
].
rect_bbr
.
height
()
-
1
;
drectangle
truth_box
=
(
*
truth
)[
hittruth
.
second
].
rect
;
dpoint
p_truth
=
dcenter
(
truth_box
);
DLIB_CASSERT
(
w
>
0
);
DLIB_CASSERT
(
h
>
0
);
double
target_dx
=
(
p_truth
.
x
()
-
p
.
x
())
/
w
;
double
target_dy
=
(
p_truth
.
y
()
-
p
.
y
())
/
h
;
double
target_dw
=
std
::
log
((
truth_box
.
width
()
-
1
)
/
w
);
double
target_dh
=
std
::
log
((
truth_box
.
height
()
-
1
)
/
h
);
// compute smoothed L1 loss on BBR outputs. This loss
// is just the MSE loss when the loss is small and L1
// when large.
dx
=
dx
-
target_dx
;
dy
=
dy
-
target_dy
;
dw
=
dw
-
target_dw
;
dh
=
dh
-
target_dh
;
// use smoothed L1
double
ldx
=
std
::
abs
(
dx
)
<
1
?
0
.
5
*
dx
*
dx
:
std
::
abs
(
dx
)
-
0
.
5
;
double
ldy
=
std
::
abs
(
dy
)
<
1
?
0
.
5
*
dy
*
dy
:
std
::
abs
(
dy
)
-
0
.
5
;
double
ldw
=
std
::
abs
(
dw
)
<
1
?
0
.
5
*
dw
*
dw
:
std
::
abs
(
dw
)
-
0
.
5
;
double
ldh
=
std
::
abs
(
dh
)
<
1
?
0
.
5
*
dh
*
dh
:
std
::
abs
(
dh
)
-
0
.
5
;
loss
+=
options
.
bbr_lambda
*
(
ldx
+
ldy
+
ldw
+
ldh
);
// now compute the derivatives of the smoothed L1 loss
ldx
=
put_in_range
(
-
1
,
1
,
dx
);
ldy
=
put_in_range
(
-
1
,
1
,
dy
);
ldw
=
put_in_range
(
-
1
,
1
,
dw
);
ldh
=
put_in_range
(
-
1
,
1
,
dh
);
// also smoothed L1 gradient goes to gradient output
g
[
dets
[
i
].
tensor_offset_dx
]
+=
scale
*
options
.
bbr_lambda
*
ldx
;
g
[
dets
[
i
].
tensor_offset_dy
]
+=
scale
*
options
.
bbr_lambda
*
ldy
;
g
[
dets
[
i
].
tensor_offset_dw
]
+=
scale
*
options
.
bbr_lambda
*
ldw
;
g
[
dets
[
i
].
tensor_offset_dh
]
+=
scale
*
options
.
bbr_lambda
*
ldh
;
}
}
else
{
...
...
@@ -1299,6 +1387,9 @@ namespace dlib
out
<<
", loss per FA:"
<<
opts
.
loss_per_false_alarm
;
out
<<
", loss per miss:"
<<
opts
.
loss_per_missed_target
;
out
<<
", truth match IOU thresh:"
<<
opts
.
truth_match_iou_threshold
;
out
<<
", use_bounding_box_regression:"
<<
opts
.
use_bounding_box_regression
;
if
(
opts
.
use_bounding_box_regression
)
out
<<
", bbr_lambda:"
<<
opts
.
bbr_lambda
;
out
<<
", overlaps_nms:("
<<
opts
.
overlaps_nms
.
get_iou_thresh
()
<<
","
<<
opts
.
overlaps_nms
.
get_percent_covered_thresh
()
<<
")"
;
out
<<
", overlaps_ignore:("
<<
opts
.
overlaps_ignore
.
get_iou_thresh
()
<<
","
<<
opts
.
overlaps_ignore
.
get_percent_covered_thresh
()
<<
")"
;
...
...
@@ -1325,11 +1416,19 @@ namespace dlib
)
const
{
DLIB_CASSERT
(
net
.
sample_expansion_factor
()
==
1
,
net
.
sample_expansion_factor
());
if
(
options
.
use_bounding_box_regression
)
{
DLIB_CASSERT
(
output_tensor
.
k
()
==
(
long
)
options
.
detector_windows
.
size
()
*
5
);
}
else
{
DLIB_CASSERT
(
output_tensor
.
k
()
==
(
long
)
options
.
detector_windows
.
size
());
}
const
float
*
out_data
=
output_tensor
.
host
()
+
output_tensor
.
k
()
*
output_tensor
.
nr
()
*
output_tensor
.
nc
()
*
i
;
// scan the final layer and output the positive scoring locations
dets_accum
.
clear
();
for
(
long
k
=
0
;
k
<
o
utput_tensor
.
k
();
++
k
)
for
(
long
k
=
0
;
k
<
o
ptions
.
detector_windows
.
size
();
++
k
)
{
for
(
long
r
=
0
;
r
<
output_tensor
.
nr
();
++
r
)
{
...
...
@@ -1343,6 +1442,28 @@ namespace dlib
rect
=
input_layer
(
net
).
tensor_space_to_image_space
(
input_tensor
,
rect
);
dets_accum
.
push_back
(
intermediate_detection
(
rect
,
score
,
(
k
*
output_tensor
.
nr
()
+
r
)
*
output_tensor
.
nc
()
+
c
,
k
));
if
(
options
.
use_bounding_box_regression
)
{
const
auto
offset
=
options
.
detector_windows
.
size
()
+
k
*
4
;
dets_accum
.
back
().
tensor_offset_dx
=
((
offset
+
0
)
*
output_tensor
.
nr
()
+
r
)
*
output_tensor
.
nc
()
+
c
;
dets_accum
.
back
().
tensor_offset_dy
=
((
offset
+
1
)
*
output_tensor
.
nr
()
+
r
)
*
output_tensor
.
nc
()
+
c
;
dets_accum
.
back
().
tensor_offset_dw
=
((
offset
+
2
)
*
output_tensor
.
nr
()
+
r
)
*
output_tensor
.
nc
()
+
c
;
dets_accum
.
back
().
tensor_offset_dh
=
((
offset
+
3
)
*
output_tensor
.
nr
()
+
r
)
*
output_tensor
.
nc
()
+
c
;
// apply BBR to dets_accum.back()
double
dx
=
out_data
[
dets_accum
.
back
().
tensor_offset_dx
];
double
dy
=
out_data
[
dets_accum
.
back
().
tensor_offset_dy
];
double
dw
=
out_data
[
dets_accum
.
back
().
tensor_offset_dw
];
double
dh
=
out_data
[
dets_accum
.
back
().
tensor_offset_dh
];
dw
=
std
::
exp
(
dw
);
dh
=
std
::
exp
(
dh
);
double
w
=
rect
.
width
()
-
1
;
double
h
=
rect
.
height
()
-
1
;
rect
=
translate_rect
(
rect
,
dpoint
(
dx
*
w
,
dy
*
h
));
rect
=
centered_drect
(
rect
,
w
*
dw
+
1
,
h
*
dh
+
1
);
dets_accum
.
back
().
rect_bbr
=
rect
;
}
}
}
}
...
...
dlib/dnn/loss_abstract.h
View file @
7b006f37
...
...
@@ -652,6 +652,21 @@ namespace dlib
// However, sometimes scale-invariance may not be desired.
use_image_pyramid
assume_image_pyramid
=
use_image_pyramid
::
yes
;
// By default, the mmod loss doesn't train any bounding box regression model. But
// if you set use_bounding_box_regression == true then it expects the network to
// output a tensor with detector_windows.size()*5 channels rather than just
// detector_windows.size() channels. The 4 extra channels per window are trained
// to give a bounding box regression output that improves the positioning of the
// output detection box.
bool
use_bounding_box_regression
=
false
;
// When using bounding box regression, bbr_lambda determines how much you care
// about getting the bounding box shape correct vs just getting the detector to
// find objects. That is, the objective function being optimized is
// basic_mmod_loss + bbr_lambda*bounding_box_regression_loss. So setting
// bbr_lambda to a larger value will cause the overall loss to care more about
// getting the bounding box shape correct.
double
bbr_lambda
=
100
;
mmod_options
(
const
std
::
vector
<
std
::
vector
<
mmod_rect
>>&
boxes
,
const
unsigned
long
target_size
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment