Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
D
dlib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
钟尚武
dlib
Commits
1fe5aa05
Commit
1fe5aa05
authored
8 years ago
by
Davis King
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Adding more possible metric loss layers.
parent
1c19a80a
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
304 additions
and
19 deletions
+304
-19
loss.h
dlib/dnn/loss.h
+304
-19
No files found.
dlib/dnn/loss.h
View file @
1fe5aa05
...
...
@@ -870,6 +870,35 @@ namespace dlib
public
:
typedef
unsigned
long
training_label_type
;
typedef
matrix
<
float
,
0
,
1
>
output_label_type
;
template
<
typename
SUB_TYPE
,
typename
label_iterator
>
void
to_label
(
const
tensor
&
input_tensor
,
const
SUB_TYPE
&
sub
,
label_iterator
iter
)
const
{
const
tensor
&
output_tensor
=
sub
.
get_output
();
DLIB_CASSERT
(
sub
.
sample_expansion_factor
()
==
1
);
DLIB_CASSERT
(
input_tensor
.
num_samples
()
!=
0
);
DLIB_CASSERT
(
input_tensor
.
num_samples
()
%
sub
.
sample_expansion_factor
()
==
0
);
DLIB_CASSERT
(
input_tensor
.
num_samples
()
==
output_tensor
.
num_samples
());
DLIB_CASSERT
(
output_tensor
.
nr
()
==
1
&&
output_tensor
.
nc
()
==
1
);
const
float
*
p
=
output_tensor
.
host
();
for
(
long
i
=
0
;
i
<
output_tensor
.
num_samples
();
++
i
)
{
*
iter
=
mat
(
p
,
output_tensor
.
k
(),
1
);
++
iter
;
p
+=
output_tensor
.
k
();
}
}
template
<
...
...
@@ -896,23 +925,60 @@ namespace dlib
grad
.
nc
()
==
1
);
const
float
margin
=
0
.
5
;
const
float
margin
=
0
.
1
;
const
float
dist_thresh
=
0
.
75
;
temp
.
set_size
(
output_tensor
.
num_samples
(),
output_tensor
.
num_samples
());
grad_mul
.
copy_size
(
temp
);
tt
::
gemm
(
0
,
temp
,
1
,
output_tensor
,
false
,
output_tensor
,
true
);
const
double
num_pairs
=
output_tensor
.
num_samples
()
*
(
output_tensor
.
num_samples
()
-
1
)
/
2
.
0
;
std
::
vector
<
double
>
temp_threshs
;
const
float
*
d
=
temp
.
host
();
double
loss
=
0
;
double
num_pos_samps
=
0
.
0001
;
double
num_neg_samps
=
0
.
0001
;
for
(
long
r
=
0
;
r
<
temp
.
num_samples
();
++
r
)
{
auto
xx
=
d
[
r
*
temp
.
num_samples
()
+
r
];
const
auto
x_label
=
*
(
truth
+
r
);
for
(
long
c
=
r
+
1
;
c
<
temp
.
num_samples
();
++
c
)
{
const
auto
y_label
=
*
(
truth
+
c
);
if
(
x_label
==
y_label
)
{
++
num_pos_samps
;
}
else
{
++
num_neg_samps
;
// Figure out what distance threshold, when applied to the negative pairs,
// causes there to be an equal number of positive and negative pairs.
auto
yy
=
d
[
c
*
temp
.
num_samples
()
+
c
];
auto
xy
=
d
[
r
*
temp
.
num_samples
()
+
c
];
// compute the distance between x and y samples.
auto
d2
=
xx
+
yy
-
2
*
xy
;
if
(
d2
<
0
)
d2
=
0
;
temp_threshs
.
push_back
(
d2
);
}
}
}
// The whole objective function is multiplied by this to scale the loss
// relative to the number of things in the mini-batch.
const
double
scale
=
1
.
0
/
num_pairs
;
const
double
scale
=
0
.
5
/
num_pos_samps
;
DLIB_CASSERT
(
num_pos_samps
>=
1
,
"Make sure each mini-batch contains both positive pairs and negative pairs"
);
DLIB_CASSERT
(
num_neg_samps
>=
1
,
"Make sure each mini-batch contains both positive pairs and negative pairs"
);
double
loss
=
0
;
std
::
sort
(
temp_threshs
.
begin
(),
temp_threshs
.
end
());
const
float
neg_thresh
=
std
::
sqrt
(
temp_threshs
[
std
::
min
(
num_pos_samps
,
num_neg_samps
)
-
1
]);
// loop over all the pairs of training samples and compute the loss and
// gradients.
const
float
*
d
=
temp
.
host
();
// gradients. Note that we only use the hardest negative pairs and that in
// particular we pick the number of negative pairs equal to the number of
// positive pairs so everything is balanced.
float
*
gm
=
grad_mul
.
host
();
for
(
long
r
=
0
;
r
<
temp
.
num_samples
();
++
r
)
{
...
...
@@ -936,32 +1002,34 @@ namespace dlib
if
(
x_label
==
y_label
)
{
// Things with the same label should have distances <
1
between
// Things with the same label should have distances <
dist_thresh
between
// them. If not then we experience non-zero loss.
if
(
d2
>
1
-
margin
)
if
(
d2
<
dist_thresh
-
margin
)
{
loss
+=
scale
*
(
d2
-
(
1
-
margin
));
gm
[
r
*
temp
.
num_samples
()
+
r
]
+=
scale
/
d2
;
gm
[
r
*
temp
.
num_samples
()
+
c
]
=
-
scale
/
d2
;
gm
[
r
*
temp
.
num_samples
()
+
c
]
=
0
;
}
else
{
gm
[
r
*
temp
.
num_samples
()
+
c
]
=
0
;
loss
+=
scale
*
(
d2
-
(
dist_thresh
-
margin
));
gm
[
r
*
temp
.
num_samples
()
+
r
]
+=
scale
/
d2
;
gm
[
r
*
temp
.
num_samples
()
+
c
]
=
-
scale
/
d2
;
}
}
else
{
// Things with different labels should have distances >
1
between
// Things with different labels should have distances >
dist_thresh
between
// them. If not then we experience non-zero loss.
if
(
d2
<
1
+
margin
)
if
(
d2
>
dist_thresh
+
margin
||
d2
>
neg_thresh
)
{
loss
+=
scale
*
((
1
+
margin
)
-
d2
);
gm
[
r
*
temp
.
num_samples
()
+
r
]
-=
scale
/
d2
;
gm
[
r
*
temp
.
num_samples
()
+
c
]
=
scale
/
d2
;
gm
[
r
*
temp
.
num_samples
()
+
c
]
=
0
;
}
else
{
gm
[
r
*
temp
.
num_samples
()
+
c
]
=
0
;
loss
+=
scale
*
((
dist_thresh
+
margin
)
-
d2
);
// don't divide by zero (or a really small number)
d2
=
std
::
max
(
d2
,
0
.
001
f
);
gm
[
r
*
temp
.
num_samples
()
+
r
]
-=
scale
/
d2
;
gm
[
r
*
temp
.
num_samples
()
+
c
]
=
scale
/
d2
;
}
}
}
...
...
@@ -983,7 +1051,7 @@ namespace dlib
std
::
string
version
;
deserialize
(
version
,
in
);
if
(
version
!=
"loss_metric_"
)
throw
serialization_error
(
"Unexpected version found while deserializing dlib::loss_metric_.
"
);
throw
serialization_error
(
"Unexpected version found while deserializing dlib::loss_metric_.
Instead found "
+
version
);
}
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
out
,
const
loss_metric_
&
)
...
...
@@ -1007,6 +1075,223 @@ namespace dlib
template
<
typename
SUBNET
>
using
loss_metric
=
add_loss_layer
<
loss_metric_
,
SUBNET
>
;
// ----------------------------------------------------------------------------------------
class
loss_metric_hardish_
{
public
:
typedef
unsigned
long
training_label_type
;
typedef
matrix
<
float
,
0
,
1
>
output_label_type
;
template
<
typename
SUB_TYPE
,
typename
label_iterator
>
void
to_label
(
const
tensor
&
input_tensor
,
const
SUB_TYPE
&
sub
,
label_iterator
iter
)
const
{
const
tensor
&
output_tensor
=
sub
.
get_output
();
DLIB_CASSERT
(
sub
.
sample_expansion_factor
()
==
1
);
DLIB_CASSERT
(
input_tensor
.
num_samples
()
!=
0
);
DLIB_CASSERT
(
input_tensor
.
num_samples
()
%
sub
.
sample_expansion_factor
()
==
0
);
DLIB_CASSERT
(
input_tensor
.
num_samples
()
==
output_tensor
.
num_samples
());
DLIB_CASSERT
(
output_tensor
.
nr
()
==
1
&&
output_tensor
.
nc
()
==
1
);
const
float
*
p
=
output_tensor
.
host
();
for
(
long
i
=
0
;
i
<
output_tensor
.
num_samples
();
++
i
)
{
*
iter
=
mat
(
p
,
output_tensor
.
k
(),
1
);
++
iter
;
p
+=
output_tensor
.
k
();
}
}
template
<
typename
const_label_iterator
,
typename
SUBNET
>
double
compute_loss_value_and_gradient
(
const
tensor
&
input_tensor
,
const_label_iterator
truth
,
SUBNET
&
sub
)
const
{
const
tensor
&
output_tensor
=
sub
.
get_output
();
tensor
&
grad
=
sub
.
get_gradient_input
();
DLIB_CASSERT
(
sub
.
sample_expansion_factor
()
==
1
);
DLIB_CASSERT
(
input_tensor
.
num_samples
()
!=
0
);
DLIB_CASSERT
(
input_tensor
.
num_samples
()
%
sub
.
sample_expansion_factor
()
==
0
);
DLIB_CASSERT
(
input_tensor
.
num_samples
()
==
grad
.
num_samples
());
DLIB_CASSERT
(
input_tensor
.
num_samples
()
==
output_tensor
.
num_samples
());
DLIB_CASSERT
(
output_tensor
.
nr
()
==
1
&&
output_tensor
.
nc
()
==
1
);
DLIB_CASSERT
(
grad
.
nr
()
==
1
&&
grad
.
nc
()
==
1
);
const
float
margin
=
0
.
1
;
const
float
dist_thresh
=
0
.
75
;
temp
.
set_size
(
output_tensor
.
num_samples
(),
output_tensor
.
num_samples
());
grad_mul
.
copy_size
(
temp
);
tt
::
gemm
(
0
,
temp
,
1
,
output_tensor
,
false
,
output_tensor
,
true
);
const
float
*
d
=
temp
.
host
();
double
loss
=
0
;
double
num_pos_samps
=
0
.
0001
;
double
num_active_neg_samps
=
0
.
0001
;
for
(
long
r
=
0
;
r
<
temp
.
num_samples
();
++
r
)
{
auto
xx
=
d
[
r
*
temp
.
num_samples
()
+
r
];
const
auto
x_label
=
*
(
truth
+
r
);
for
(
long
c
=
r
+
1
;
c
<
temp
.
num_samples
();
++
c
)
{
const
auto
y_label
=
*
(
truth
+
c
);
if
(
x_label
==
y_label
)
{
++
num_pos_samps
;
}
else
{
// Figure out what distance threshold, when applied to the negative pairs,
// causes there to be an equal number of positive and negative pairs.
auto
yy
=
d
[
c
*
temp
.
num_samples
()
+
c
];
auto
xy
=
d
[
r
*
temp
.
num_samples
()
+
c
];
// compute the distance between x and y samples.
auto
d2
=
xx
+
yy
-
2
*
xy
;
if
(
d2
<
0
)
d2
=
0
;
else
d2
=
std
::
sqrt
(
d2
);
if
(
d2
<
dist_thresh
+
margin
)
++
num_active_neg_samps
;
}
}
}
// The whole objective function is multiplied by this to scale the loss
// relative to the number of things in the mini-batch.
const
double
scale
=
0
.
5
/
num_pos_samps
;
DLIB_CASSERT
(
num_pos_samps
>=
1
,
"Make sure each mini-batch contains both positive pairs and negative pairs"
);
// We will pick which negative pairs to include in the objective by selecting
// randomly among the pairs that violate the margin. This means we should pick
// this fraction of the margin violators if we want the pos/neg ratio to be
// balanced.
const
double
neg_select_probability
=
num_pos_samps
/
num_active_neg_samps
;
// loop over all the pairs of training samples and compute the loss and
// gradients. Note that we only use the hardest negative pairs and that in
// particular we pick the number of negative pairs equal to the number of
// positive pairs so everything is balanced.
float
*
gm
=
grad_mul
.
host
();
for
(
long
r
=
0
;
r
<
temp
.
num_samples
();
++
r
)
{
gm
[
r
*
temp
.
num_samples
()
+
r
]
=
0
;
const
auto
x_label
=
*
(
truth
+
r
);
auto
xx
=
d
[
r
*
temp
.
num_samples
()
+
r
];
for
(
long
c
=
0
;
c
<
temp
.
num_samples
();
++
c
)
{
if
(
r
==
c
)
continue
;
const
auto
y_label
=
*
(
truth
+
c
);
auto
yy
=
d
[
c
*
temp
.
num_samples
()
+
c
];
auto
xy
=
d
[
r
*
temp
.
num_samples
()
+
c
];
// compute the distance between x and y samples.
auto
d2
=
xx
+
yy
-
2
*
xy
;
if
(
d2
<=
0
)
d2
=
0
;
else
d2
=
std
::
sqrt
(
d2
);
if
(
x_label
==
y_label
)
{
// Things with the same label should have distances < dist_thresh between
// them. If not then we experience non-zero loss.
if
(
d2
<
dist_thresh
-
margin
)
{
gm
[
r
*
temp
.
num_samples
()
+
c
]
=
0
;
}
else
{
loss
+=
scale
*
(
d2
-
(
dist_thresh
-
margin
));
gm
[
r
*
temp
.
num_samples
()
+
r
]
+=
scale
/
d2
;
gm
[
r
*
temp
.
num_samples
()
+
c
]
=
-
scale
/
d2
;
}
}
else
{
// Things with different labels should have distances > dist_thresh between
// them. If not then we experience non-zero loss.
if
(
d2
>
dist_thresh
+
margin
||
rnd
.
get_random_double
()
>
neg_select_probability
)
{
gm
[
r
*
temp
.
num_samples
()
+
c
]
=
0
;
}
else
{
loss
+=
scale
*
((
dist_thresh
+
margin
)
-
d2
);
// don't divide by zero (or a really small number)
d2
=
std
::
max
(
d2
,
0
.
001
f
);
gm
[
r
*
temp
.
num_samples
()
+
r
]
-=
scale
/
d2
;
gm
[
r
*
temp
.
num_samples
()
+
c
]
=
scale
/
d2
;
}
}
}
}
tt
::
gemm
(
0
,
grad
,
1
,
grad_mul
,
false
,
output_tensor
,
false
);
return
loss
;
}
friend
void
serialize
(
const
loss_metric_hardish_
&
,
std
::
ostream
&
out
)
{
serialize
(
"loss_metric_hardish_"
,
out
);
}
friend
void
deserialize
(
loss_metric_hardish_
&
,
std
::
istream
&
in
)
{
std
::
string
version
;
deserialize
(
version
,
in
);
if
(
version
!=
"loss_metric_hardish_"
)
throw
serialization_error
(
"Unexpected version found while deserializing dlib::loss_metric_hardish_. Instead found: "
+
version
);
}
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
out
,
const
loss_metric_hardish_
&
)
{
out
<<
"loss_metric_hardish"
;
return
out
;
}
friend
void
to_xml
(
const
loss_metric_hardish_
&
/*item*/
,
std
::
ostream
&
out
)
{
out
<<
"<loss_metric_hardish/>"
;
}
private
:
// These variables are only here to avoid being reallocated over and over in
// compute_loss_value_and_gradient()
mutable
resizable_tensor
temp
,
grad_mul
;
mutable
dlib
::
rand
rnd
;
};
template
<
typename
SUBNET
>
using
loss_metric_hardish
=
add_loss_layer
<
loss_metric_hardish_
,
SUBNET
>
;
// ----------------------------------------------------------------------------------------
}
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment