Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
D
dlib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
钟尚武
dlib
Commits
ebdc064c
Commit
ebdc064c
authored
Jul 07, 2017
by
Davis King
Browse files
Options
Browse Files
Download
Plain Diff
merged
parents
917dcad3
0ed1ce61
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
337 additions
and
7 deletions
+337
-7
loss.h
dlib/dnn/loss.h
+145
-3
loss_abstract.h
dlib/dnn/loss_abstract.h
+88
-0
dnn.cpp
dlib/test/dnn.cpp
+100
-4
setup.py
setup.py
+4
-0
No files found.
dlib/dnn/loss.h
View file @
ebdc064c
...
...
@@ -1549,11 +1549,11 @@ namespace dlib
typename
SUB_TYPE
,
typename
label_iterator
>
void
to_label
(
static
void
to_label
(
const
tensor
&
input_tensor
,
const
SUB_TYPE
&
sub
,
label_iterator
iter
)
const
)
{
DLIB_CASSERT
(
sub
.
sample_expansion_factor
()
==
1
);
...
...
@@ -1678,7 +1678,7 @@ namespace dlib
std
::
string
version
;
deserialize
(
version
,
in
);
if
(
version
!=
"loss_multiclass_log_per_pixel_"
)
throw
serialization_error
(
"Unexpected version found while deserializing dlib::loss_multiclass_log_."
);
throw
serialization_error
(
"Unexpected version found while deserializing dlib::loss_multiclass_log_
per_pixel_
."
);
}
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
out
,
const
loss_multiclass_log_per_pixel_
&
)
...
...
@@ -1704,6 +1704,148 @@ namespace dlib
template
<
typename
SUBNET
>
using
loss_multiclass_log_per_pixel
=
add_loss_layer
<
loss_multiclass_log_per_pixel_
,
SUBNET
>
;
// ----------------------------------------------------------------------------------------
class
loss_multiclass_log_per_pixel_weighted_
{
public
:
struct
weighted_label
{
weighted_label
()
{}
weighted_label
(
uint16_t
label
,
float
weight
=
1
.
f
)
:
label
(
label
),
weight
(
weight
)
{}
// In semantic segmentation, 65536 classes ought to be enough for anybody.
uint16_t
label
=
0
;
float
weight
=
1
.
f
;
};
typedef
matrix
<
weighted_label
>
training_label_type
;
typedef
matrix
<
uint16_t
>
output_label_type
;
template
<
typename
SUB_TYPE
,
typename
label_iterator
>
static
void
to_label
(
const
tensor
&
input_tensor
,
const
SUB_TYPE
&
sub
,
label_iterator
iter
)
{
loss_multiclass_log_per_pixel_
::
to_label
(
input_tensor
,
sub
,
iter
);
}
template
<
typename
const_label_iterator
,
typename
SUBNET
>
double
compute_loss_value_and_gradient
(
const
tensor
&
input_tensor
,
const_label_iterator
truth
,
SUBNET
&
sub
)
const
{
const
tensor
&
output_tensor
=
sub
.
get_output
();
tensor
&
grad
=
sub
.
get_gradient_input
();
DLIB_CASSERT
(
sub
.
sample_expansion_factor
()
==
1
);
DLIB_CASSERT
(
input_tensor
.
num_samples
()
!=
0
);
DLIB_CASSERT
(
input_tensor
.
num_samples
()
%
sub
.
sample_expansion_factor
()
==
0
);
DLIB_CASSERT
(
input_tensor
.
num_samples
()
==
grad
.
num_samples
());
DLIB_CASSERT
(
input_tensor
.
num_samples
()
==
output_tensor
.
num_samples
());
DLIB_CASSERT
(
output_tensor
.
k
()
>=
1
);
DLIB_CASSERT
(
output_tensor
.
k
()
<
std
::
numeric_limits
<
uint16_t
>::
max
());
DLIB_CASSERT
(
output_tensor
.
nr
()
==
grad
.
nr
()
&&
output_tensor
.
nc
()
==
grad
.
nc
()
&&
output_tensor
.
k
()
==
grad
.
k
());
for
(
long
idx
=
0
;
idx
<
output_tensor
.
num_samples
();
++
idx
)
{
const_label_iterator
truth_matrix_ptr
=
(
truth
+
idx
);
DLIB_CASSERT
(
truth_matrix_ptr
->
nr
()
==
output_tensor
.
nr
()
&&
truth_matrix_ptr
->
nc
()
==
output_tensor
.
nc
(),
"truth size = "
<<
truth_matrix_ptr
->
nr
()
<<
" x "
<<
truth_matrix_ptr
->
nc
()
<<
", "
"output size = "
<<
output_tensor
.
nr
()
<<
" x "
<<
output_tensor
.
nc
());
}
tt
::
softmax
(
grad
,
output_tensor
);
// The loss we output is the weighted average loss over the mini-batch, and also over each element of the matrix output.
const
double
scale
=
1
.
0
/
(
output_tensor
.
num_samples
()
*
output_tensor
.
nr
()
*
output_tensor
.
nc
());
double
loss
=
0
;
float
*
const
g
=
grad
.
host
();
for
(
long
i
=
0
;
i
<
output_tensor
.
num_samples
();
++
i
,
++
truth
)
{
for
(
long
r
=
0
;
r
<
output_tensor
.
nr
();
++
r
)
{
for
(
long
c
=
0
;
c
<
output_tensor
.
nc
();
++
c
)
{
const
weighted_label
&
weighted_label
=
truth
->
operator
()(
r
,
c
);
const
uint16_t
y
=
weighted_label
.
label
;
const
float
weight
=
weighted_label
.
weight
;
// The network must produce a number of outputs that is equal to the number
// of labels when using this type of loss.
DLIB_CASSERT
(
static_cast
<
long
>
(
y
)
<
output_tensor
.
k
()
||
weight
==
0
.
f
,
"y: "
<<
y
<<
", output_tensor.k(): "
<<
output_tensor
.
k
());
for
(
long
k
=
0
;
k
<
output_tensor
.
k
();
++
k
)
{
const
size_t
idx
=
tensor_index
(
output_tensor
,
i
,
r
,
c
,
k
);
if
(
k
==
y
)
{
loss
+=
weight
*
scale
*-
std
::
log
(
g
[
idx
]);
g
[
idx
]
=
weight
*
scale
*
(
g
[
idx
]
-
1
);
}
else
{
g
[
idx
]
=
weight
*
scale
*
g
[
idx
];
}
}
}
}
}
return
loss
;
}
friend
void
serialize
(
const
loss_multiclass_log_per_pixel_weighted_
&
,
std
::
ostream
&
out
)
{
serialize
(
"loss_multiclass_log_per_pixel_weighted_"
,
out
);
}
friend
void
deserialize
(
loss_multiclass_log_per_pixel_weighted_
&
,
std
::
istream
&
in
)
{
std
::
string
version
;
deserialize
(
version
,
in
);
if
(
version
!=
"loss_multiclass_log_per_pixel_weighted_"
)
throw
serialization_error
(
"Unexpected version found while deserializing dlib::loss_multiclass_log_per_pixel_weighted_."
);
}
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
out
,
const
loss_multiclass_log_per_pixel_weighted_
&
)
{
out
<<
"loss_multiclass_log_per_pixel_weighted"
;
return
out
;
}
friend
void
to_xml
(
const
loss_multiclass_log_per_pixel_weighted_
&
/*item*/
,
std
::
ostream
&
out
)
{
out
<<
"<loss_multiclass_log_per_pixel_weighted/>"
;
}
private
:
static
size_t
tensor_index
(
const
tensor
&
t
,
long
sample
,
long
row
,
long
column
,
long
k
)
{
// See: https://github.com/davisking/dlib/blob/4dfeb7e186dd1bf6ac91273509f687293bd4230a/dlib/dnn/tensor_abstract.h#L38
return
((
sample
*
t
.
k
()
+
k
)
*
t
.
nr
()
+
row
)
*
t
.
nc
()
+
column
;
}
};
template
<
typename
SUBNET
>
using
loss_multiclass_log_per_pixel_weighted
=
add_loss_layer
<
loss_multiclass_log_per_pixel_weighted_
,
SUBNET
>
;
// ----------------------------------------------------------------------------------------
}
...
...
dlib/dnn/loss_abstract.h
View file @
ebdc064c
...
...
@@ -863,6 +863,94 @@ namespace dlib
template
<
typename
SUBNET
>
using
loss_multiclass_log_per_pixel
=
add_loss_layer
<
loss_multiclass_log_per_pixel_
,
SUBNET
>
;
// ----------------------------------------------------------------------------------------
class
loss_multiclass_log_per_pixel_weighted_
{
/*!
WHAT THIS OBJECT REPRESENTS
This object implements the loss layer interface defined above by
EXAMPLE_LOSS_LAYER_. In particular, it implements the multiclass logistic
regression loss (e.g. negative log-likelihood loss), which is appropriate
for multiclass classification problems. It is basically just like
loss_multiclass_log_per_pixel_ except that it lets you define per-pixel
weights, which may be useful e.g. if you want to emphasize rare classes
while training. (If the classification problem is difficult, a flat weight
structure may lead the network to always predict the most common label, in
particular if the degree of imbalance is high. To emphasize a certain
class or classes, simply increase the weights of the corresponding pixels,
relative to the weights of the other pixels.)
Note that if you set the weight to 0 whenever a pixel's label is equal to
loss_multiclass_log_per_pixel_::label_to_ignore, and to 1 otherwise, then
you essentially get loss_multiclass_log_per_pixel_ as a special case.
!*/
public
:
struct
weighted_label
{
/*!
WHAT THIS OBJECT REPRESENTS
This object represents the truth label of a single pixel, together with
an associated weight (the higher the weight, the more emphasis the
corresponding pixel is given during the training).
!*/
weighted_label
();
weighted_label
(
uint16_t
label
,
float
weight
=
1
.
f
);
// The ground-truth label. In semantic segmentation, 65536 classes ought to be
// enough for anybody.
uint16_t
label
=
0
;
// The weight of the corresponding pixel.
float
weight
=
1
.
f
;
};
typedef
matrix
<
weighted_label
>
training_label_type
;
typedef
matrix
<
uint16_t
>
output_label_type
;
template
<
typename
SUB_TYPE
,
typename
label_iterator
>
void
to_label
(
const
tensor
&
input_tensor
,
const
SUB_TYPE
&
sub
,
label_iterator
iter
)
const
;
/*!
This function has the same interface as EXAMPLE_LOSS_LAYER_::to_label() except
it has the additional calling requirements that:
- sub.get_output().num_samples() == input_tensor.num_samples()
- sub.sample_expansion_factor() == 1
and the output label is the predicted class for each classified element. The number
of possible output classes is sub.get_output().k().
!*/
template
<
typename
const_label_iterator
,
typename
SUBNET
>
double
compute_loss_value_and_gradient
(
const
tensor
&
input_tensor
,
const_label_iterator
truth
,
SUBNET
&
sub
)
const
;
/*!
This function has the same interface as EXAMPLE_LOSS_LAYER_::compute_loss_value_and_gradient()
except it has the additional calling requirements that:
- sub.get_output().num_samples() == input_tensor.num_samples()
- sub.sample_expansion_factor() == 1
- all labels pointed to by truth are < sub.get_output().k(), or the corresponding weight
is zero.
!*/
};
template
<
typename
SUBNET
>
using
loss_multiclass_log_per_pixel_weighted
=
add_loss_layer
<
loss_multiclass_log_per_pixel_weighted_
,
SUBNET
>
;
// ----------------------------------------------------------------------------------------
}
...
...
dlib/test/dnn.cpp
View file @
ebdc064c
...
...
@@ -2331,7 +2331,102 @@ namespace
// ----------------------------------------------------------------------------------------
void
test_tensor_resize_bilienar
(
long
samps
,
long
k
,
long
nr
,
long
nc
,
long
onr
,
long
onc
)
void
test_loss_multiclass_per_pixel_weighted
()
{
// Train with pixel-specific weights
print_spinner
();
constexpr
int
input_height
=
5
;
constexpr
int
input_width
=
7
;
constexpr
int
output_height
=
input_height
;
constexpr
int
output_width
=
input_width
;
const
int
num_samples
=
1000
;
const
int
num_classes
=
6
;
::
std
::
default_random_engine
generator
(
16
);
::
std
::
uniform_real_distribution
<
double
>
u01
(
0.0
,
1.0
);
::
std
::
uniform_int_distribution
<
uint16_t
>
noisy_label
(
0
,
num_classes
-
1
);
::
std
::
vector
<
matrix
<
double
>>
x
(
num_samples
);
::
std
::
vector
<
matrix
<
uint16_t
>>
y
(
num_samples
);
matrix
<
double
>
xtmp
(
input_height
,
input_width
);
matrix
<
uint16_t
>
ytmp
(
output_height
,
output_width
);
// Generate input data
for
(
int
ii
=
0
;
ii
<
num_samples
;
++
ii
)
{
for
(
int
jj
=
0
;
jj
<
input_height
;
++
jj
)
{
for
(
int
kk
=
0
;
kk
<
input_width
;
++
kk
)
{
xtmp
(
jj
,
kk
)
=
u01
(
generator
);
ytmp
(
jj
,
kk
)
=
noisy_label
(
generator
);
}
}
x
[
ii
]
=
xtmp
;
y
[
ii
]
=
ytmp
;
}
using
net_type
=
loss_multiclass_log_per_pixel_weighted
<
con
<
num_classes
,
1
,
1
,
1
,
1
,
input
<
matrix
<
double
>>>>
;
using
weighted_label
=
loss_multiclass_log_per_pixel_weighted_
::
weighted_label
;
::
std
::
vector
<
matrix
<
weighted_label
>>
y_weighted
(
num_samples
);
for
(
int
weighted_class
=
0
;
weighted_class
<
num_classes
;
++
weighted_class
)
{
print_spinner
();
// Assign weights
for
(
int
ii
=
0
;
ii
<
num_samples
;
++
ii
)
{
if
(
weighted_class
==
0
)
{
y_weighted
[
ii
].
set_size
(
input_height
,
input_width
);
}
for
(
int
jj
=
0
;
jj
<
input_height
;
++
jj
)
{
for
(
int
kk
=
0
;
kk
<
input_width
;
++
kk
)
{
const
uint16_t
label
=
y
[
ii
](
jj
,
kk
);
const
float
weight
=
label
==
weighted_class
?
1.1
f
:
0.9
f
;
y_weighted
[
ii
](
jj
,
kk
)
=
weighted_label
(
label
,
weight
);
}
}
}
net_type
net
;
sgd
defsolver
(
0
,
0.9
);
dnn_trainer
<
net_type
>
trainer
(
net
,
defsolver
);
trainer
.
set_learning_rate
(
0.1
);
trainer
.
set_min_learning_rate
(
0.01
);
trainer
.
set_mini_batch_size
(
10
);
trainer
.
set_max_num_epochs
(
10
);
trainer
.
train
(
x
,
y_weighted
);
const
::
std
::
vector
<
matrix
<
uint16_t
>>
predictions
=
net
(
x
);
int
num_weighted_class
=
0
;
int
num_not_weighted_class
=
0
;
for
(
int
ii
=
0
;
ii
<
num_samples
;
++
ii
)
{
const
matrix
<
uint16_t
>&
prediction
=
predictions
[
ii
];
DLIB_TEST
(
prediction
.
nr
()
==
output_height
);
DLIB_TEST
(
prediction
.
nc
()
==
output_width
);
for
(
int
jj
=
0
;
jj
<
output_height
;
++
jj
)
for
(
int
kk
=
0
;
kk
<
output_width
;
++
kk
)
if
(
prediction
(
jj
,
kk
)
==
weighted_class
)
++
num_weighted_class
;
else
++
num_not_weighted_class
;
}
DLIB_TEST_MSG
(
num_weighted_class
>
num_not_weighted_class
,
"The weighted class ("
<<
weighted_class
<<
") does not dominate: "
<<
num_weighted_class
<<
" <= "
<<
num_not_weighted_class
);
}
}
// ----------------------------------------------------------------------------------------
void
test_tensor_resize_bilinear
(
long
samps
,
long
k
,
long
nr
,
long
nc
,
long
onr
,
long
onc
)
{
resizable_tensor
img
(
samps
,
k
,
nr
,
nc
);
resizable_tensor
out
(
samps
,
k
,
onr
,
onc
);
...
...
@@ -2426,9 +2521,9 @@ namespace
compare_adam
();
test_copy_tensor_gpu
();
#endif
test_tensor_resize_bili
en
ar
(
2
,
3
,
6
,
6
,
11
,
11
);
test_tensor_resize_bili
en
ar
(
2
,
3
,
6
,
6
,
3
,
4
);
test_tensor_resize_bili
en
ar
(
2
,
3
,
5
,
6
,
12
,
21
);
test_tensor_resize_bili
ne
ar
(
2
,
3
,
6
,
6
,
11
,
11
);
test_tensor_resize_bili
ne
ar
(
2
,
3
,
6
,
6
,
3
,
4
);
test_tensor_resize_bili
ne
ar
(
2
,
3
,
5
,
6
,
12
,
21
);
test_max_pool
(
1
,
1
,
2
,
3
,
0
,
0
);
test_max_pool
(
3
,
3
,
1
,
1
,
0
,
0
);
test_max_pool
(
3
,
3
,
2
,
2
,
0
,
0
);
...
...
@@ -2469,6 +2564,7 @@ namespace
test_loss_multiclass_per_pixel_activations_on_trivial_single_pixel_task
();
test_loss_multiclass_per_pixel_outputs_on_trivial_task
();
test_loss_multiclass_per_pixel_with_noise_and_pixels_to_ignore
();
test_loss_multiclass_per_pixel_weighted
();
}
void
perform_test
()
...
...
setup.py
View file @
ebdc064c
...
...
@@ -526,7 +526,11 @@ class build(_build):
# this checks the sysconfig and will correctly pick up a brewed python lib
# e.g. in /usr/local/Cellar
py_ver
=
get_python_version
()
# check: in some virtual environments the libpython has the form "libpython_#m.dylib
py_lib
=
os
.
path
.
join
(
get_config_var
(
'LIBDIR'
),
'libpython'
+
py_ver
+
'.dylib'
)
if
not
os
.
path
.
isfile
(
py_lib
):
py_lib
=
os
.
path
.
join
(
get_config_var
(
'LIBDIR'
),
'libpython'
+
py_ver
+
'm.dylib'
)
cmake_extra_arch
+=
[
'-DPYTHON_LIBRARY={lib}'
.
format
(
lib
=
py_lib
)]
if
sys
.
platform
==
"win32"
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment