Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
D
dlib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
钟尚武
dlib
Commits
a88f1bd8
Commit
a88f1bd8
authored
May 24, 2017
by
Davis King
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Made the converter add zero padding layers when needed by Eltwise to replicate
the behavior of dlib's add_prev layers.
parent
984b6949
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
141 additions
and
12 deletions
+141
-12
main.cpp
tools/convert_dlib_nets_to_caffe/main.cpp
+141
-12
No files found.
tools/convert_dlib_nets_to_caffe/main.cpp
View file @
a88f1bd8
...
...
@@ -21,6 +21,8 @@ struct layer
string
type
;
// comp, loss, or input
int
idx
;
matrix
<
long
,
4
,
1
>
output_tensor_shape
;
// (N,K,NR,NC)
string
detail_name
;
// The name of the tag inside the layer tag. e.g. fc, con, max_pool, input_rgb_image.
std
::
map
<
string
,
double
>
attributes
;
matrix
<
double
>
params
;
...
...
@@ -49,30 +51,32 @@ struct layer
// ----------------------------------------------------------------------------------------
std
::
vector
<
layer
>
parse_dlib_xml
(
const
matrix
<
long
,
4
,
1
>&
input_tensor_shape
,
const
string
&
xml_filename
);
// ----------------------------------------------------------------------------------------
template
<
typename
iterator
>
string
find_layer_caffe_name
(
const
layer
&
find_layer
(
iterator
i
,
long
tag_id
)
/*!
requires
- i is a
n
iterator pointing to a layer in the list of layers produced by parse_dlib_xml().
- i is a
reverse
iterator pointing to a layer in the list of layers produced by parse_dlib_xml().
- i is not an input layer.
ensures
- if (tag_id == -1) then
- returns the
caffe string name for the previous layer
to layer i.
- returns the
previous layer (i.e. closer to the input)
to layer i.
- else
- returns the caffe string name for the previous layer to layer i with the given tag_id.
- returns the previous layer (i.e. closer to the input) to layer i with the
given tag_id.
!*/
{
if
(
tag_id
==
-
1
)
{
return
(
i
-
1
)
->
caffe_layer_name
(
);
return
*
(
i
-
1
);
}
else
{
...
...
@@ -81,7 +85,7 @@ string find_layer_caffe_name (
i
--
;
// if we hit the end of the network before we found what we were looking for
if
(
i
->
tag_id
==
tag_id
)
return
i
->
caffe_layer_name
()
;
return
*
i
;
if
(
i
->
type
==
"input"
)
throw
dlib
::
error
(
"Network definition is bad, a layer wanted to skip back to a non-existing layer."
);
}
...
...
@@ -89,7 +93,19 @@ string find_layer_caffe_name (
}
template
<
typename
iterator
>
string
find_input_layer_caffe_name
(
iterator
i
)
{
return
find_layer_caffe_name
(
i
,
i
->
skip_id
);
}
const
layer
&
find_input_layer
(
iterator
i
)
{
return
find_layer
(
i
,
i
->
skip_id
);
}
template
<
typename
iterator
>
string
find_layer_caffe_name
(
iterator
i
,
long
tag_id
)
{
return
find_layer
(
i
,
tag_id
).
caffe_layer_name
();
}
template
<
typename
iterator
>
string
find_input_layer_caffe_name
(
iterator
i
)
{
return
find_input_layer
(
i
).
caffe_layer_name
();
}
// ----------------------------------------------------------------------------------------
...
...
@@ -116,7 +132,8 @@ void convert_dlib_xml_to_caffe_python_code(
cout
<<
"Writing model to "
<<
out_filename
<<
endl
;
ofstream
fout
(
out_filename
);
fout
.
precision
(
9
);
const
auto
layers
=
parse_dlib_xml
(
xml_filename
);
const
auto
layers
=
parse_dlib_xml
({
N
,
K
,
NR
,
NC
},
xml_filename
);
fout
<<
"#
\n
"
;
fout
<<
"# !!! This file was automatically generated by dlib's tools/convert_dlib_nets_to_caffe utility. !!!
\n
"
;
...
...
@@ -301,10 +318,59 @@ void convert_dlib_xml_to_caffe_python_code(
}
else
if
(
i
->
detail_name
==
"add_prev"
)
{
fout
<<
" n."
<<
i
->
caffe_layer_name
()
<<
" = L.Eltwise(n."
<<
find_input_layer_caffe_name
(
i
);
fout
<<
", n."
<<
find_layer_caffe_name
(
i
,
i
->
attribute
(
"tag"
));
fout
<<
", operation=P.Eltwise.SUM"
;
fout
<<
");
\n
"
;
auto
in_shape1
=
find_input_layer
(
i
).
output_tensor_shape
;
auto
in_shape2
=
find_layer
(
i
,
i
->
attribute
(
"tag"
)).
output_tensor_shape
;
if
(
in_shape1
!=
in_shape2
)
{
// if only the number of channels differs then we will use a dummy layer to
// pad with zeros. But otherwise we will throw an error.
if
(
in_shape1
(
0
)
==
in_shape2
(
0
)
&&
in_shape1
(
2
)
==
in_shape2
(
2
)
&&
in_shape1
(
3
)
==
in_shape2
(
3
))
{
fout
<<
" n."
<<
i
->
caffe_layer_name
()
<<
"_zeropad = L.DummyData(num="
<<
in_shape1
(
0
);
fout
<<
", channels="
<<
std
::
abs
(
in_shape1
(
1
)
-
in_shape2
(
1
));
fout
<<
", height="
<<
in_shape1
(
2
);
fout
<<
", width="
<<
in_shape1
(
3
);
fout
<<
");
\n
"
;
string
smaller_layer
=
find_input_layer_caffe_name
(
i
);
string
bigger_layer
=
find_layer_caffe_name
(
i
,
i
->
attribute
(
"tag"
));
if
(
in_shape1
(
1
)
>
in_shape2
(
1
))
swap
(
smaller_layer
,
bigger_layer
);
fout
<<
" n."
<<
i
->
caffe_layer_name
()
<<
"_concat = L.Concat(n."
<<
smaller_layer
;
fout
<<
", n."
<<
i
->
caffe_layer_name
()
<<
"_zeropad"
;
fout
<<
");
\n
"
;
fout
<<
" n."
<<
i
->
caffe_layer_name
()
<<
" = L.Eltwise(n."
<<
i
->
caffe_layer_name
()
<<
"_concat"
;
fout
<<
", n."
<<
bigger_layer
;
fout
<<
", operation=P.Eltwise.SUM"
;
fout
<<
");
\n
"
;
}
else
{
std
::
ostringstream
sout
;
sout
<<
"The dlib network contained an add_prev layer (layer idx "
<<
i
->
idx
<<
") that adds two previous "
;
sout
<<
"layers with different output tensor dimensions. Caffe's equivalent layer, Eltwise, doesn't support "
;
sout
<<
"adding layers together with different dimensions. In the special case where the only difference is "
;
sout
<<
"in the number of channels, this converter program will add a dummy layer that outputs a tensor full of zeros "
;
sout
<<
"and concat it appropriately so this will work. However, this network you are converting has tensor dimensions "
;
sout
<<
"different in values other than the number of channels. In particular, here are the two tensor shapes (batch size, channels, rows, cols): "
;
std
::
ostringstream
sout2
;
sout2
<<
wrap_string
(
sout
.
str
())
<<
endl
;
sout2
<<
trans
(
in_shape1
);
sout2
<<
trans
(
in_shape2
);
throw
dlib
::
error
(
sout2
.
str
());
}
}
else
{
fout
<<
" n."
<<
i
->
caffe_layer_name
()
<<
" = L.Eltwise(n."
<<
find_input_layer_caffe_name
(
i
);
fout
<<
", n."
<<
find_layer_caffe_name
(
i
,
i
->
attribute
(
"tag"
));
fout
<<
", operation=P.Eltwise.SUM"
;
fout
<<
");
\n
"
;
}
}
else
{
...
...
@@ -549,7 +615,68 @@ public:
// ----------------------------------------------------------------------------------------
void
compute_output_tensor_shapes
(
const
matrix
<
long
,
4
,
1
>&
input_tensor_shape
,
std
::
vector
<
layer
>&
layers
)
{
DLIB_CASSERT
(
layers
.
back
().
type
==
"input"
);
layers
.
back
().
output_tensor_shape
=
input_tensor_shape
;
for
(
auto
i
=
++
layers
.
rbegin
();
i
!=
layers
.
rend
();
++
i
)
{
const
auto
input_shape
=
find_input_layer
(
i
).
output_tensor_shape
;
if
(
i
->
type
==
"comp"
)
{
if
(
i
->
detail_name
==
"fc"
||
i
->
detail_name
==
"fc_no_bias"
)
{
long
num_outputs
=
i
->
attribute
(
"num_outputs"
);
i
->
output_tensor_shape
=
{
input_shape
(
0
),
num_outputs
,
1
,
1
};
}
else
if
(
i
->
detail_name
==
"con"
)
{
long
num_filters
=
i
->
attribute
(
"num_filters"
);
long
filter_nc
=
i
->
attribute
(
"nc"
);
long
filter_nr
=
i
->
attribute
(
"nr"
);
long
stride_x
=
i
->
attribute
(
"stride_x"
);
long
stride_y
=
i
->
attribute
(
"stride_y"
);
long
padding_x
=
i
->
attribute
(
"padding_x"
);
long
padding_y
=
i
->
attribute
(
"padding_y"
);
long
nr
=
1
+
(
input_shape
(
2
)
+
2
*
padding_y
-
filter_nr
)
/
stride_y
;
long
nc
=
1
+
(
input_shape
(
3
)
+
2
*
padding_x
-
filter_nc
)
/
stride_x
;
i
->
output_tensor_shape
=
{
input_shape
(
0
),
num_filters
,
nr
,
nc
};
}
else
if
(
i
->
detail_name
==
"max_pool"
||
i
->
detail_name
==
"avg_pool"
)
{
long
filter_nc
=
i
->
attribute
(
"nc"
);
long
filter_nr
=
i
->
attribute
(
"nr"
);
long
stride_x
=
i
->
attribute
(
"stride_x"
);
long
stride_y
=
i
->
attribute
(
"stride_y"
);
long
padding_x
=
i
->
attribute
(
"padding_x"
);
long
padding_y
=
i
->
attribute
(
"padding_y"
);
long
nr
=
1
+
(
input_shape
(
2
)
+
2
*
padding_y
-
filter_nr
)
/
stride_y
;
long
nc
=
1
+
(
input_shape
(
3
)
+
2
*
padding_x
-
filter_nc
)
/
stride_x
;
i
->
output_tensor_shape
=
{
input_shape
(
0
),
input_shape
(
1
),
nr
,
nc
};
}
else
if
(
i
->
detail_name
==
"add_prev"
)
{
auto
aux_shape
=
find_layer
(
i
,
i
->
attribute
(
"tag"
)).
output_tensor_shape
;
for
(
long
j
=
0
;
j
<
input_shape
.
size
();
++
j
)
i
->
output_tensor_shape
(
j
)
=
std
::
max
(
input_shape
(
j
),
aux_shape
(
j
));
}
else
{
i
->
output_tensor_shape
=
input_shape
;
}
}
else
{
i
->
output_tensor_shape
=
input_shape
;
}
}
}
// ----------------------------------------------------------------------------------------
std
::
vector
<
layer
>
parse_dlib_xml
(
const
matrix
<
long
,
4
,
1
>&
input_tensor_shape
,
const
string
&
xml_filename
)
{
...
...
@@ -561,6 +688,8 @@ std::vector<layer> parse_dlib_xml(
if
(
dh
.
layers
.
back
().
type
!=
"input"
)
throw
dlib
::
error
(
"The network in the XML file is missing an input layer!"
);
compute_output_tensor_shapes
(
input_tensor_shape
,
dh
.
layers
);
return
dh
.
layers
;
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment