Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
D
dlib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
钟尚武
dlib
Commits
4d121e7e
Commit
4d121e7e
authored
Dec 17, 2016
by
Davis King
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added another metric learning example
parent
29047a22
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
262 additions
and
5 deletions
+262
-5
CMakeLists.txt
examples/CMakeLists.txt
+1
-0
dnn_metric_learning_ex.cpp
examples/dnn_metric_learning_ex.cpp
+10
-5
dnn_metric_learning_on_images_ex.cpp
examples/dnn_metric_learning_on_images_ex.cpp
+251
-0
No files found.
examples/CMakeLists.txt
View file @
4d121e7e
...
...
@@ -47,6 +47,7 @@ if (NOT USING_OLD_VISUAL_STUDIO_COMPILER)
add_example
(
dnn_introduction2_ex
)
add_example
(
dnn_inception_ex
)
add_example
(
dnn_metric_learning_ex
)
add_example
(
dnn_metric_learning_on_images_ex
)
add_gui_example
(
dnn_imagenet_ex
)
add_gui_example
(
dnn_mmod_ex
)
add_gui_example
(
dnn_mmod_face_detection_ex
)
...
...
examples/dnn_metric_learning_ex.cpp
View file @
4d121e7e
...
...
@@ -47,12 +47,14 @@ int main() try
trainer
.
train
(
samples
,
labels
);
auto
embedded
=
net
(
samples
);
// Run all the images through the network to get their vector embeddings.
std
::
vector
<
matrix
<
float
,
0
,
1
>>
embedded
=
net
(
images
);
for
(
size_t
i
=
0
;
i
<
embedded
.
size
();
++
i
)
cout
<<
"label: "
<<
labels
[
i
]
<<
"
\t
"
<<
trans
(
embedded
[
i
]);
// now count how many pairs are correctly classified.
// Now, check if the embedding puts things with the same labels near each other and
// things with different labels far apart.
int
num_right
=
0
;
int
num_wrong
=
0
;
for
(
size_t
i
=
0
;
i
<
embedded
.
size
();
++
i
)
...
...
@@ -61,6 +63,9 @@ int main() try
{
if
(
labels
[
i
]
==
labels
[
j
])
{
// The loss_metric layer will cause things with the same label to be less
// than net.loss_details().get_distance_threshold() distance from each
// other. So we can use that distance value as our testing threshold.
if
(
length
(
embedded
[
i
]
-
embedded
[
j
])
<
net
.
loss_details
().
get_distance_threshold
())
++
num_right
;
else
...
...
@@ -68,10 +73,10 @@ int main() try
}
else
{
if
(
length
(
embedded
[
i
]
-
embedded
[
j
])
<
net
.
loss_details
().
get_distance_threshold
())
++
num_wrong
;
else
if
(
length
(
embedded
[
i
]
-
embedded
[
j
])
>=
net
.
loss_details
().
get_distance_threshold
())
++
num_right
;
else
++
num_wrong
;
}
}
}
...
...
examples/dnn_metric_learning_on_images_ex.cpp
0 → 100644
View file @
4d121e7e
#include <dlib/dnn.h>
#include <dlib/image_io.h>
#include <dlib/misc_api.h>
using
namespace
dlib
;
using
namespace
std
;
std
::
vector
<
std
::
vector
<
string
>>
load_objects_list
(
const
string
&
dir
)
{
std
::
vector
<
std
::
vector
<
string
>>
objects
;
for
(
auto
subdir
:
directory
(
dir
).
get_dirs
())
{
std
::
vector
<
string
>
imgs
;
for
(
auto
img
:
subdir
.
get_files
())
imgs
.
push_back
(
img
);
objects
.
push_back
(
imgs
);
}
return
objects
;
}
void
load_mini_batch
(
const
size_t
num_ids
,
const
size_t
samples_per_id
,
dlib
::
rand
&
rnd
,
const
std
::
vector
<
std
::
vector
<
string
>>&
objs
,
std
::
vector
<
matrix
<
rgb_pixel
>>&
images
,
std
::
vector
<
unsigned
long
>&
labels
)
{
images
.
clear
();
labels
.
clear
();
matrix
<
rgb_pixel
>
image
;
for
(
size_t
i
=
0
;
i
<
num_ids
;
++
i
)
{
const
size_t
id
=
rnd
.
get_random_32bit_number
()
%
objs
.
size
();
for
(
size_t
j
=
0
;
j
<
samples_per_id
;
++
j
)
{
const
auto
&
obj
=
objs
[
id
][
rnd
.
get_random_32bit_number
()
%
objs
[
id
].
size
()];
load_image
(
image
,
obj
);
images
.
push_back
(
std
::
move
(
image
));
labels
.
push_back
(
id
);
}
}
// You might want to do some data augmentation at this point. Here we so some simple
// color augmentation.
for
(
auto
&&
crop
:
images
)
disturb_colors
(
crop
,
rnd
);
// All the images going into a mini-batch have to be the same size. And really, all
// the images in your entire training dataset should be the same size for what we are
// doing to make the most sense.
DLIB_CASSERT
(
images
.
size
()
>
0
);
for
(
auto
&&
img
:
images
)
{
DLIB_CASSERT
(
img
.
nr
()
==
images
[
0
].
nr
()
&&
img
.
nc
()
==
images
[
0
].
nc
(),
"All the images in a single mini-batch must be the same size."
);
}
}
// ----------------------------------------------------------------------------------------
template
<
template
<
int
,
template
<
typename
>
class
,
int
,
typename
>
class
block
,
int
N
,
template
<
typename
>
class
BN
,
typename
SUBNET
>
using
residual
=
add_prev1
<
block
<
N
,
BN
,
1
,
tag1
<
SUBNET
>>>
;
template
<
template
<
int
,
template
<
typename
>
class
,
int
,
typename
>
class
block
,
int
N
,
template
<
typename
>
class
BN
,
typename
SUBNET
>
using
residual_down
=
add_prev2
<
avg_pool
<
2
,
2
,
2
,
2
,
skip1
<
tag2
<
block
<
N
,
BN
,
2
,
tag1
<
SUBNET
>>>>>>
;
template
<
int
N
,
template
<
typename
>
class
BN
,
int
stride
,
typename
SUBNET
>
using
block
=
BN
<
con
<
N
,
3
,
3
,
1
,
1
,
relu
<
BN
<
con
<
N
,
3
,
3
,
stride
,
stride
,
SUBNET
>>>>>
;
template
<
int
N
,
typename
SUBNET
>
using
res
=
relu
<
residual
<
block
,
N
,
bn_con
,
SUBNET
>>
;
template
<
int
N
,
typename
SUBNET
>
using
ares
=
relu
<
residual
<
block
,
N
,
affine
,
SUBNET
>>
;
template
<
int
N
,
typename
SUBNET
>
using
res_down
=
relu
<
residual_down
<
block
,
N
,
bn_con
,
SUBNET
>>
;
template
<
int
N
,
typename
SUBNET
>
using
ares_down
=
relu
<
residual_down
<
block
,
N
,
affine
,
SUBNET
>>
;
// ----------------------------------------------------------------------------------------
template
<
typename
SUBNET
>
using
level1
=
res
<
512
,
res
<
512
,
res_down
<
512
,
SUBNET
>>>
;
template
<
typename
SUBNET
>
using
level2
=
res
<
256
,
res
<
256
,
res
<
256
,
res
<
256
,
res
<
256
,
res_down
<
256
,
SUBNET
>>>>>>
;
template
<
typename
SUBNET
>
using
level3
=
res
<
128
,
res
<
128
,
res
<
128
,
res_down
<
128
,
SUBNET
>>>>
;
template
<
typename
SUBNET
>
using
level4
=
res
<
64
,
res
<
64
,
res
<
64
,
SUBNET
>>>
;
template
<
typename
SUBNET
>
using
alevel1
=
ares
<
512
,
ares
<
512
,
ares_down
<
512
,
SUBNET
>>>
;
template
<
typename
SUBNET
>
using
alevel2
=
ares
<
256
,
ares
<
256
,
ares
<
256
,
ares
<
256
,
ares
<
256
,
ares_down
<
256
,
SUBNET
>>>>>>
;
template
<
typename
SUBNET
>
using
alevel3
=
ares
<
128
,
ares
<
128
,
ares
<
128
,
ares_down
<
128
,
SUBNET
>>>>
;
template
<
typename
SUBNET
>
using
alevel4
=
ares
<
64
,
ares
<
64
,
ares
<
64
,
SUBNET
>>>
;
template
<
typename
SUBNET
>
using
final_pooling
=
avg_pool_everything
<
SUBNET
>
;
template
<
typename
SUBNET
>
using
afinal_pooling
=
avg_pool_everything
<
SUBNET
>
;
// training network type
using
net_type
=
loss_metric
<
fc_no_bias
<
128
,
final_pooling
<
level1
<
level2
<
level3
<
level4
<
max_pool
<
3
,
3
,
2
,
2
,
relu
<
bn_con
<
con
<
64
,
7
,
7
,
2
,
2
,
input_rgb_image
>>>>>>>>>>>
;
// testing network type (replaced batch normalization with fixed affine transforms)
using
anet_type
=
loss_metric
<
fc_no_bias
<
128
,
afinal_pooling
<
alevel1
<
alevel2
<
alevel3
<
alevel4
<
max_pool
<
3
,
3
,
2
,
2
,
relu
<
affine
<
con
<
64
,
7
,
7
,
2
,
2
,
input_rgb_image
>>>>>>>>>>>
;
// ----------------------------------------------------------------------------------------
int
main
(
int
argc
,
char
**
argv
)
{
if
(
argc
!=
2
)
{
cout
<<
"Give folder as input. It should contain sub-folders of images and we will "
<<
endl
;
cout
<<
"learn to distinguish these sub-folders with metric learning."
<<
endl
;
return
1
;
}
auto
objs
=
load_objects_list
(
argv
[
1
]);
cout
<<
"objs.size(): "
<<
objs
.
size
()
<<
endl
;
std
::
vector
<
matrix
<
rgb_pixel
>>
images
;
std
::
vector
<
unsigned
long
>
labels
;
net_type
net
;
dnn_trainer
<
net_type
>
trainer
(
net
,
sgd
(
0.0005
,
0.9
));
trainer
.
set_learning_rate
(
0.1
);
trainer
.
be_verbose
();
trainer
.
set_synchronization_file
(
"face_metric_sync"
,
std
::
chrono
::
minutes
(
5
));
trainer
.
set_iterations_without_progress_threshold
(
300
);
// It's important to feed the GPU fast enough to keep it occupied. So here we create a
// bunch of threads that are responsible for creating mini-batches of training data.
dlib
::
pipe
<
std
::
vector
<
matrix
<
rgb_pixel
>>>
qimages
(
4
);
dlib
::
pipe
<
std
::
vector
<
unsigned
long
>>
qlabels
(
4
);
auto
data_loader
=
[
&
qimages
,
&
qlabels
,
&
objs
](
time_t
seed
)
{
dlib
::
rand
rnd
(
time
(
0
)
+
seed
);
std
::
vector
<
matrix
<
rgb_pixel
>>
images
;
std
::
vector
<
unsigned
long
>
labels
;
while
(
qimages
.
is_enabled
())
{
try
{
load_mini_batch
(
15
,
15
,
rnd
,
objs
,
images
,
labels
);
qimages
.
enqueue
(
images
);
qlabels
.
enqueue
(
labels
);
}
catch
(
std
::
exception
&
e
)
{
cout
<<
"EXCEPTION IN LOADING DATA"
<<
endl
;
cout
<<
e
.
what
()
<<
endl
;
}
}
};
std
::
thread
data_loader1
([
data_loader
](){
data_loader
(
1
);
});
std
::
thread
data_loader2
([
data_loader
](){
data_loader
(
2
);
});
std
::
thread
data_loader3
([
data_loader
](){
data_loader
(
3
);
});
std
::
thread
data_loader4
([
data_loader
](){
data_loader
(
4
);
});
std
::
thread
data_loader5
([
data_loader
](){
data_loader
(
5
);
});
// Here we do the training. We keep passing mini-batches to the trainer until the
// learning rate has dropped low enough.
while
(
trainer
.
get_learning_rate
()
>=
1e-4
)
{
qimages
.
dequeue
(
images
);
qlabels
.
dequeue
(
labels
);
trainer
.
train_one_step
(
images
,
labels
);
}
// wait for training threads to stop
trainer
.
get_net
();
cout
<<
"done training"
<<
endl
;
// Save the network to disk
net
.
clean
();
serialize
(
"metric_network_renset.dat"
)
<<
net
;
// stop all the data loading threads and wait for them to terminate.
qimages
.
disable
();
qlabels
.
disable
();
data_loader1
.
join
();
data_loader2
.
join
();
data_loader3
.
join
();
data_loader4
.
join
();
data_loader5
.
join
();
// Now, just to show an example of how you would use the network, lets check how well
// it performs on the training data.
dlib
::
rand
rnd
(
time
(
0
));
load_mini_batch
(
15
,
15
,
rnd
,
objs
,
images
,
labels
);
// Run all the images through the network to get their vector embeddings.
std
::
vector
<
matrix
<
float
,
0
,
1
>>
embedded
=
net
(
images
);
// Now, check if the embedding puts things with the same labels near each other and
// things with different labels far apart.
int
num_right
=
0
;
int
num_wrong
=
0
;
for
(
size_t
i
=
0
;
i
<
embedded
.
size
();
++
i
)
{
for
(
size_t
j
=
i
+
1
;
j
<
embedded
.
size
();
++
j
)
{
if
(
labels
[
i
]
==
labels
[
j
])
{
// The loss_metric layer will cause things with the same label to be less
// than net.loss_details().get_distance_threshold() distance from each
// other. So we can use that distance value as our testing threshold.
if
(
length
(
embedded
[
i
]
-
embedded
[
j
])
<
net
.
loss_details
().
get_distance_threshold
())
++
num_right
;
else
++
num_wrong
;
}
else
{
if
(
length
(
embedded
[
i
]
-
embedded
[
j
])
>=
net
.
loss_details
().
get_distance_threshold
())
++
num_right
;
else
++
num_wrong
;
}
}
}
cout
<<
"num_right: "
<<
num_right
<<
endl
;
cout
<<
"num_wrong: "
<<
num_wrong
<<
endl
;
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment