Commit a29086bf authored by Davis King's avatar Davis King

Made multiply() more flexible and also fixed a bug in the CPU implementation of

batch_normalize_conv.
parent 29f56b12
...@@ -34,14 +34,39 @@ namespace dlib ...@@ -34,14 +34,39 @@ namespace dlib
const tensor& src2 const tensor& src2
) )
{ {
DLIB_CASSERT(dest.size()==src1.size(),""); DLIB_CASSERT(dest.k() == src1.k() && src1.k() == src2.k() &&
DLIB_CASSERT(dest.size()==src2.size(),""); dest.nr() == src1.nr() && src1.nr() == src2.nr() &&
dest.nc() == src1.nc() && src1.nc() == src2.nc() ,"");
const long MD = std::max(std::max(dest.num_samples(),src1.num_samples()),src2.num_samples());
DLIB_CASSERT((dest.num_samples()==1 || dest.num_samples()==MD) &&
(src1.num_samples()==1 || src1.num_samples()==MD) &&
(src2.num_samples()==1 || src2.num_samples()==MD) ,"");
if (dest.size() == 0)
return;
const size_t max_size = std::max(std::max(dest.size(),src1.size()),src2.size());
const auto d = dest.host(); const auto d = dest.host();
const auto s1 = src1.host(); const auto s1 = src1.host();
const auto s2 = src2.host(); const auto s2 = src2.host();
if (dest.size() == src1.size() && src1.size() == src2.size())
{
for (size_t i = 0; i < src1.size(); ++i) for (size_t i = 0; i < src1.size(); ++i)
d[i] = s1[i]*s2[i]; d[i] = s1[i]*s2[i];
} }
else if (dest.num_samples() == 1)
{
for (size_t i = 0; i < dest.size(); ++i)
d[i] = 0;
for (size_t i = 0; i < max_size; ++i)
d[i%dest.size()] += s1[i%src1.size()]*s2[i%src2.size()];
}
else
{
for (size_t i = 0; i < max_size; ++i)
d[i] = s1[i%src1.size()]*s2[i%src2.size()];
}
}
// ----------------------------------------------------------------------------------- // -----------------------------------------------------------------------------------
...@@ -422,6 +447,10 @@ namespace dlib ...@@ -422,6 +447,10 @@ namespace dlib
DLIB_CASSERT(src.k() == beta_grad.size(),""); DLIB_CASSERT(src.k() == beta_grad.size(),"");
DLIB_CASSERT(have_same_dimensions(gradient_input, src),""); DLIB_CASSERT(have_same_dimensions(gradient_input, src),"");
DLIB_CASSERT(have_same_dimensions(gradient_input, src_grad),""); DLIB_CASSERT(have_same_dimensions(gradient_input, src_grad),"");
beta_grad = 0;
gamma_grad = 0;
auto p_grad = gradient_input.host(); auto p_grad = gradient_input.host();
auto p_src = src.host(); auto p_src = src.host();
const auto p_gamma = gamma.host(); const auto p_gamma = gamma.host();
......
...@@ -116,8 +116,13 @@ namespace dlib { namespace tt ...@@ -116,8 +116,13 @@ namespace dlib { namespace tt
const tensor& src2 const tensor& src2
) )
{ {
DLIB_CASSERT(have_same_dimensions(dest,src1) == true,""); DLIB_CASSERT(dest.k() == src1.k() && src1.k() == src2.k() &&
DLIB_CASSERT(have_same_dimensions(dest,src2) == true,""); dest.nr() == src1.nr() && src1.nr() == src2.nr() &&
dest.nc() == src1.nc() && src1.nc() == src2.nc() ,"");
const long MD = std::max(std::max(dest.num_samples(),src1.num_samples()),src2.num_samples());
DLIB_CASSERT((dest.num_samples()==1 || dest.num_samples()==MD) &&
(src1.num_samples()==1 || src1.num_samples()==MD) &&
(src2.num_samples()==1 || src2.num_samples()==MD) ,"");
#ifdef DLIB_USE_CUDA #ifdef DLIB_USE_CUDA
cuda::multiply(dest, src1, src2); cuda::multiply(dest, src1, src2);
#else #else
......
...@@ -92,6 +92,7 @@ namespace dlib { namespace tt ...@@ -92,6 +92,7 @@ namespace dlib { namespace tt
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
// TODO, delete this function
void multiply ( void multiply (
tensor& dest, tensor& dest,
const tensor& src const tensor& src
...@@ -114,12 +115,23 @@ namespace dlib { namespace tt ...@@ -114,12 +115,23 @@ namespace dlib { namespace tt
); );
/*! /*!
requires requires
- have_same_dimensions(dest,src1) == true - dest.k() == src1.k() == src2.k()
- have_same_dimensions(dest,src2) == true - dest.nr() == src1.nr() == src2.nr()
- dest.nc() == src1.nc() == src2.nc()
- dest.num_samples(), src1.num_samples(), and src2.num_samples() must each
either be 1 or whichever ones aren't equal to 1 must have the same values.
ensures ensures
- #dest == src1*src2 - let MD = max(dest.num_samples(), src1.num_samples(), src2.num_samples)
That is, for all valid i: - This function pointwise multiplies src1 with src2 and stores the result into
#dest.host()[i] == src1.host()[i]*src2.host()[i] #dest. However, how the multiplication happens depends on the dimensions of
the tensors. First, when src1 and src2 are multiplied together, if either
has a num_samples() dimension that is != MD, then it is first replicated to
produce a tensor with num_samples()==MD dimensions and then they are
pointwise multiplied together.
Second, if dest.num_samples()==1, then after the pointwise multiplication of
src1 with src2, the result has its samples summed to produce an output tensor
with num_samples()==1 which is then assigned to #dest.
!*/ !*/
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
......
...@@ -380,6 +380,40 @@ namespace ...@@ -380,6 +380,40 @@ namespace
truth3 += 2; truth3 += 2;
DLIB_TEST(mat(at(A,4)) == reshape(truth2,2,2)); DLIB_TEST(mat(at(A,4)) == reshape(truth2,2,2));
DLIB_TEST(mat(A) == join_cols(truth1,join_cols(truth2,truth3))); DLIB_TEST(mat(A) == join_cols(truth1,join_cols(truth2,truth3)));
{
resizable_tensor dest(3,4);
resizable_tensor A, B;
A = dest;
B = dest;
tensor_rand rnd;
rnd.fill_uniform(dest);
rnd.fill_uniform(A);
rnd.fill_uniform(B);
dest.set_size(1,4);
tt::multiply(dest, A, B);
DLIB_TEST(max(abs(mat(dest)-sum_rows(pointwise_multiply(mat(A),mat(B))))) < 1e-6);
A.set_size(1,4);
rnd.fill_uniform(A);
matrix<float> AA = join_cols(mat(A),mat(A)); AA = join_cols(mat(A),AA);
tt::multiply(dest, A, B);
DLIB_TEST(max(abs(mat(dest)-sum_rows(pointwise_multiply(AA,mat(B))))) < 1e-6);
tt::multiply(dest, B, A);
DLIB_TEST(max(abs(mat(dest)-sum_rows(pointwise_multiply(AA,mat(B))))) < 1e-6);
dest.set_size(3,4);
tt::multiply(dest, B, A);
DLIB_TEST(max(abs(mat(dest)-pointwise_multiply(AA,mat(B)))) < 1e-6);
tt::multiply(dest, A, B);
DLIB_TEST(max(abs(mat(dest)-pointwise_multiply(AA,mat(B)))) < 1e-6);
}
} }
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
...@@ -457,8 +491,40 @@ namespace ...@@ -457,8 +491,40 @@ namespace
cpu::threshold(src2, 0.5); cpu::threshold(src2, 0.5);
DLIB_TEST(equal(mat(src),mat(src2))); DLIB_TEST(equal(mat(src),mat(src2)));
{
resizable_tensor dest(3,4);
resizable_tensor A, B;
A = dest;
B = dest;
tensor_rand rnd;
rnd.fill_uniform(dest);
rnd.fill_uniform(A);
rnd.fill_uniform(B);
dest.set_size(1,4);
cuda::multiply(dest, A, B);
DLIB_TEST(max(abs(mat(dest)-sum_rows(pointwise_multiply(mat(A),mat(B))))) < 1e-6);
A.set_size(1,4);
rnd.fill_uniform(A);
matrix<float> AA = join_cols(mat(A),mat(A)); AA = join_cols(mat(A),AA);
cuda::multiply(dest, A, B);
DLIB_TEST(max(abs(mat(dest)-sum_rows(pointwise_multiply(AA,mat(B))))) < 1e-6);
cuda::multiply(dest, B, A);
DLIB_TEST(max(abs(mat(dest)-sum_rows(pointwise_multiply(AA,mat(B))))) < 1e-6);
dest.set_size(3,4);
cuda::multiply(dest, B, A);
DLIB_TEST(max(abs(mat(dest)-pointwise_multiply(AA,mat(B)))) < 1e-6);
cuda::multiply(dest, A, B);
DLIB_TEST(max(abs(mat(dest)-pointwise_multiply(AA,mat(B)))) < 1e-6);
}
} }
#endif
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
...@@ -561,6 +627,7 @@ namespace ...@@ -561,6 +627,7 @@ namespace
DLIB_TEST(max(abs(mat(gamma_grad)-mat(gamma_grad2))) < 1e-4); DLIB_TEST(max(abs(mat(gamma_grad)-mat(gamma_grad2))) < 1e-4);
DLIB_TEST(max(abs(mat(beta_grad)-mat(beta_grad2))) < 1e-4); DLIB_TEST(max(abs(mat(beta_grad)-mat(beta_grad2))) < 1e-4);
} }
#endif
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
...@@ -642,6 +709,8 @@ namespace ...@@ -642,6 +709,8 @@ namespace
test_more_ops(4,1); test_more_ops(4,1);
test_more_ops(1,4); test_more_ops(1,4);
test_more_ops(10000,4); test_more_ops(10000,4);
compare_bn_gpu_and_cpu();
compare_bn_conv_gpu_and_cpu();
#endif #endif
test_tanh(); test_tanh();
test_softmax(); test_softmax();
...@@ -649,8 +718,6 @@ namespace ...@@ -649,8 +718,6 @@ namespace
test_batch_normalize(); test_batch_normalize();
test_batch_normalize_conv(); test_batch_normalize_conv();
test_basic_tensor_ops(); test_basic_tensor_ops();
compare_bn_gpu_and_cpu();
compare_bn_conv_gpu_and_cpu();
test_layers(); test_layers();
} }
} a; } a;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment