Commit 6c05ff45 authored by Davis King's avatar Davis King

Added CPU version of batch normalization functions

parent 141b384b
......@@ -135,6 +135,12 @@ if (NOT TARGET dlib)
data_io/image_dataset_metadata.cpp
data_io/mnist.cpp)
if (COMPILER_CAN_DO_CPP_11)
set(source_files ${source_files}
dnn/cpu_dlib.cpp
)
endif()
if (DLIB_ISO_CPP_ONLY)
add_library(dlib STATIC ${source_files} )
if (UNIX AND NOT DLIB_IN_PROJECT_BUILD)
......
......@@ -18,6 +18,11 @@
#include "../data_io/image_dataset_metadata.cpp"
#include "../data_io/mnist.cpp"
// Stuff that requires C++11
#if __cplusplus >= 201103
#include "../dnn/cpu_dlib.cpp"
#endif
#ifndef DLIB_ISO_CPP_ONLY
// Code that depends on OS specific APIs
......
......@@ -10,6 +10,7 @@
#include "dnn/core.h"
#include "dnn/solvers.h"
#include "dnn/trainer.h"
#include "dnn/cpu_dlib.h"
#endif // DLIB_DNn_
......
// Copyright (C) 2015 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_DNN_CPU_cPP_
#define DLIB_DNN_CPU_cPP_
// This file contains CPU implementations of the GPU based functions in cuda_dlib.h
#include "cpu_dlib.h"
namespace dlib
{
namespace cpu
{
// -----------------------------------------------------------------------------------
void affine_transform(
resizable_tensor& dest,
const tensor& src,
const float A,
const float B
)
{
// TODO
}
// -----------------------------------------------------------------------------------
void affine_transform(
resizable_tensor& dest,
const tensor& src,
const tensor& A,
const tensor& B
)
{
// TODO
}
// -----------------------------------------------------------------------------------
void batch_normalize (
resizable_tensor& dest,
resizable_tensor& means,
resizable_tensor& vars,
const tensor& src,
const tensor& gamma,
const tensor& beta
)
{
DLIB_CASSERT(
src.num_samples() > 1 &&
gamma.num_samples() == 1 &&
beta.num_samples() == 1 &&
gamma.nr() == beta.nr() && beta.nr() == src.nr() &&
gamma.nc() == beta.nc() && beta.nc() == src.nc() &&
gamma.k() == beta.k() && beta.k() == src.k(),
"\ngamma.num_samples(): " << gamma.num_samples() <<
"\ngamma.k(): " << gamma.k() <<
"\ngamma.nr(): " << gamma.nr() <<
"\ngamma.nc(): " << gamma.nc() <<
"\nbeta.num_samples(): " << beta.num_samples() <<
"\nbeta.k(): " << beta.k() <<
"\nbeta.nr(): " << beta.nr() <<
"\nbeta.nc(): " << beta.nc() <<
"\nsrc.k(): " << src.k() <<
"\nsrc.nr(): " << src.nr() <<
"\nsrc.nc(): " << src.nc()
);
dest.copy_size(src);
means.set_size(1, src.k(), src.nr(), src.nc());
vars.set_size(1, src.k(), src.nr(), src.nc());
// first compute means and vars
means = 0;
vars = 0;
const auto p_vars = vars.host();
const auto p_means = means.host();
auto p_src = src.host();
const long num = src.k()*src.nr()*src.nc();
// compute means, and sum of squares
for (long i = 0; i < num; ++i)
{
for (long n = 0; n < src.num_samples(); ++n)
{
float val = p_src[n*num+i];
p_means[i] += val;
p_vars[i] += val*val;
}
}
means /= src.num_samples();
vars /= src.num_samples();
// copy data back to host
vars.host(); means.host();
p_src = src.host();
// compute variances
for (long i = 0; i < num; ++i)
{
p_vars[i] = p_vars[i] - p_means[i]*p_means[i];
}
// TODO, must match eps in batch_normalize_gradient() so make this a shared variable.
const float eps = 0.00001;
p_src = src.host();
auto p_dest = dest.host();
const auto p_gamma = gamma.host();
const auto p_beta = beta.host();
for (long n = 0; n < src.num_samples(); ++n)
{
for (long i = 0; i < num; ++i)
{
*p_dest = (*p_src - p_means[i])/std::sqrt(p_vars[i] + eps);
*p_dest = (*p_dest)*p_gamma[i] + p_beta[i];
++p_src;
++p_dest;
}
}
}
void batch_normalize_gradient (
const tensor& gradient_input,
const tensor& means,
const tensor& vars,
const tensor& src,
const tensor& gamma,
tensor& src_grad,
tensor& gamma_grad,
tensor& beta_grad
)
{
const float eps = 0.00001;
const long num = src.k()*src.nr()*src.nc();
DLIB_CASSERT(num == means.size(),"");
DLIB_CASSERT(num == vars.size(),"");
DLIB_CASSERT(num == gamma.size(),"");
DLIB_CASSERT(num == gamma_grad.size(),"");
DLIB_CASSERT(num == beta_grad.size(),"");
DLIB_CASSERT(have_same_dimensions(gradient_input, src),"");
DLIB_CASSERT(have_same_dimensions(gradient_input, src_grad),"");
auto p_grad = gradient_input.host();
auto p_src = src.host();
const auto p_gamma = gamma.host();
const auto p_gamma_grad = gamma_grad.host();
const auto p_beta_grad = beta_grad.host();
const auto p_vars = vars.host();
const auto p_means = means.host();
resizable_tensor dvars, dmeans;
dvars.copy_size(vars);
dmeans.copy_size(means);
dvars = 0;
dmeans = 0;
const auto p_dvars = dvars.host();
const auto p_dmeans = dmeans.host();
for (long n = 0; n < src.num_samples(); ++n)
{
for (long i = 0; i < num; ++i)
{
const float x_hat = (*p_src - p_means[i])/std::sqrt(p_vars[i] + eps);
p_beta_grad[i] += *p_grad;
p_gamma_grad[i] += (*p_grad)*x_hat;
const float dx = *p_grad * p_gamma[i];
p_dvars[i] += dx*(*p_src - p_means[i])* -0.5*std::pow(p_vars[i]+eps, -3.0f/2);
++p_grad;
++p_src;
}
}
p_grad = gradient_input.host();
p_src = src.host();
for (long n = 0; n < src.num_samples(); ++n)
{
for (long i = 0; i < num; ++i)
{
const float dx = *p_grad * p_gamma[i];
p_dmeans[i] += dx*-1/std::sqrt(p_vars[i] + eps) + p_dvars[i] * -2*(*p_src - p_means[i])/src.num_samples();
++p_grad;
++p_src;
}
}
p_grad = gradient_input.host();
p_src = src.host();
auto p_src_grad = src_grad.host();
for (long n = 0; n < src.num_samples(); ++n)
{
for (long i = 0; i < num; ++i)
{
const float dx = *p_grad * p_gamma[i];
*p_src_grad += dx/std::sqrt(p_vars[i] + eps) +
p_dvars[i] *2*(*p_src - p_means[i])/src.num_samples() +
p_dmeans[i]/src.num_samples();
++p_grad;
++p_src;
++p_src_grad;
}
}
}
// ----------------------------------------------------------------------------------------
void batch_normalize_conv (
resizable_tensor& dest,
resizable_tensor& means,
resizable_tensor& vars,
const tensor& src,
const tensor& gamma,
const tensor& beta
)
{
DLIB_CASSERT(
src.num_samples() > 1 &&
gamma.num_samples() == 1 &&
beta.num_samples() == 1 &&
gamma.nr() == 1 &&
beta.nr() == 1 &&
gamma.nc() == 1 &&
beta.nc() == 1 &&
gamma.k() == beta.k() && beta.k() == src.k(),
"\ngamma.num_samples(): " << gamma.num_samples() <<
"\ngamma.k(): " << gamma.k() <<
"\ngamma.nr(): " << gamma.nr() <<
"\ngamma.nc(): " << gamma.nc() <<
"\nbeta.num_samples(): " << beta.num_samples() <<
"\nbeta.k(): " << beta.k() <<
"\nbeta.nr(): " << beta.nr() <<
"\nbeta.nc(): " << beta.nc() <<
"\nsrc.k(): " << src.k() <<
"\nsrc.nr(): " << src.nr() <<
"\nsrc.nc(): " << src.nc()
);
dest.copy_size(src);
means.set_size(1, src.k());
vars.set_size(1, src.k());
// first compute means and vars
means = 0;
vars = 0;
const auto p_vars = vars.host();
const auto p_means = means.host();
const auto p_gamma = gamma.host();
const auto p_beta = beta.host();
auto p_src = src.host();
const long num = src.nr()*src.nc();
// compute means, and sum of squares
for (long n = 0; n < src.num_samples(); ++n)
{
for (long k = 0; k < src.k(); ++k)
{
for (long i = 0; i < num; ++i)
{
p_means[k] += *p_src;
p_vars[k] += (*p_src)*(*p_src);
++p_src;
}
}
}
means /= src.num_samples()*num;
vars /= src.num_samples()*num;
// copy data back to host
vars.host(); means.host();
p_src = src.host();
// compute variances
for (long k = 0; k < src.k(); ++k)
{
p_vars[k] = p_vars[k] - p_means[k]*p_means[k];
}
// TODO, must match eps in batch_normalize_gradient() so make this a shared variable.
const float eps = 0.00001;
p_src = src.host();
auto p_dest = dest.host();
for (long n = 0; n < src.num_samples(); ++n)
{
for (long k = 0; k < src.k(); ++k)
{
for (long i = 0; i < num; ++i)
{
*p_dest = (*p_src - p_means[k])/std::sqrt(p_vars[k] + eps);
*p_dest = (*p_dest)*p_gamma[k] + p_beta[k];
++p_src;
++p_dest;
}
}
}
}
void batch_normalize_conv_gradient (
const tensor& gradient_input,
const tensor& means,
const tensor& vars,
const tensor& src,
const tensor& gamma,
tensor& src_grad,
tensor& gamma_grad,
tensor& beta_grad
)
{
const float eps = 0.00001;
const long num = src.nr()*src.nc();
DLIB_CASSERT(src.k() == means.size(),"");
DLIB_CASSERT(src.k() == vars.size(),"");
DLIB_CASSERT(src.k() == gamma.size(),"");
DLIB_CASSERT(src.k() == gamma_grad.size(),"");
DLIB_CASSERT(src.k() == beta_grad.size(),"");
DLIB_CASSERT(have_same_dimensions(gradient_input, src),"");
DLIB_CASSERT(have_same_dimensions(gradient_input, src_grad),"");
auto p_grad = gradient_input.host();
auto p_src = src.host();
const auto p_gamma = gamma.host();
const auto p_gamma_grad = gamma_grad.host();
const auto p_beta_grad = beta_grad.host();
const auto p_vars = vars.host();
const auto p_means = means.host();
resizable_tensor dvars, dmeans;
dvars.copy_size(vars);
dmeans.copy_size(means);
dvars = 0;
dmeans = 0;
const auto p_dvars = dvars.host();
const auto p_dmeans = dmeans.host();
for (long n = 0; n < src.num_samples(); ++n)
{
for (long k = 0; k < src.k(); ++k)
{
for (long i = 0; i < num; ++i)
{
const float x_hat = (*p_src - p_means[k])/std::sqrt(p_vars[k] + eps);
p_beta_grad[k] += *p_grad;
p_gamma_grad[k] += (*p_grad)*x_hat;
const float dx = *p_grad * p_gamma[k];
p_dvars[k] += dx*(*p_src - p_means[k])* -0.5*std::pow(p_vars[k]+eps, -3.0f/2);
++p_grad;
++p_src;
}
}
}
p_grad = gradient_input.host();
p_src = src.host();
for (long n = 0; n < src.num_samples(); ++n)
{
for (long k = 0; k < src.k(); ++k)
{
for (long i = 0; i < num; ++i)
{
const float dx = *p_grad * p_gamma[k];
p_dmeans[k] += dx*-1/std::sqrt(p_vars[k] + eps) + p_dvars[k] * -2*(*p_src - p_means[k])/src.num_samples()/num;
++p_grad;
++p_src;
}
}
}
p_grad = gradient_input.host();
p_src = src.host();
auto p_src_grad = src_grad.host();
for (long n = 0; n < src.num_samples(); ++n)
{
for (long k = 0; k < src.k(); ++k)
{
for (long i = 0; i < num; ++i)
{
const float dx = *p_grad * p_gamma[k];
*p_src_grad += dx/std::sqrt(p_vars[k] + eps) +
p_dvars[k] *2*(*p_src - p_means[k])/src.num_samples()/num +
p_dmeans[k]/src.num_samples()/num;
++p_grad;
++p_src;
++p_src_grad;
}
}
}
}
// -----------------------------------------------------------------------------------
dropout::
dropout(
float drop_rate
)
{
}
dropout::
dropout(
float drop_rate,
int seed
)
{
}
void dropout::
operator() (
resizable_tensor& dest,
resizable_tensor& random_mask,
const tensor& src
)
{
}
void dropout::
get_gradient(
const tensor& gradient_input,
const tensor& random_mask,
tensor& grad
)
{
}
// -----------------------------------------------------------------------------------
}
}
#endif // DLIB_DNN_CPU_cPP_
// Copyright (C) 2015 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_DNN_CPU_H_
#define DLIB_DNN_CPU_H_
// This file contains CPU implementations of the GPU based functions in cuda_dlib.h
#include "tensor.h"
namespace dlib
{
namespace cpu
{
// -----------------------------------------------------------------------------------
void affine_transform(
resizable_tensor& dest,
const tensor& src,
const float A,
const float B
);
// -----------------------------------------------------------------------------------
void affine_transform(
resizable_tensor& dest,
const tensor& src,
const tensor& A,
const tensor& B
);
// -----------------------------------------------------------------------------------
void batch_normalize (
resizable_tensor& dest,
resizable_tensor& means,
resizable_tensor& vars,
const tensor& src,
const tensor& gamma,
const tensor& beta
);
void batch_normalize_gradient (
const tensor& gradient_input,
const tensor& means,
const tensor& vars,
const tensor& src,
const tensor& gamma,
tensor& src_grad,
tensor& gamma_grad,
tensor& beta_grad
);
void batch_normalize_conv (
resizable_tensor& dest,
resizable_tensor& means,
resizable_tensor& vars,
const tensor& src,
const tensor& gamma,
const tensor& beta
);
void batch_normalize_conv_gradient (
const tensor& gradient_input,
const tensor& means,
const tensor& vars,
const tensor& src,
const tensor& gamma,
tensor& src_grad,
tensor& gamma_grad,
tensor& beta_grad
);
// -----------------------------------------------------------------------------------
class dropout
{
public:
// not copyable
dropout(const dropout&) = delete;
dropout& operator=(const dropout&) = delete;
// but is movable
dropout(dropout&& item) : dropout() { swap(item); }
dropout& operator=(dropout&& item) { swap(item); return *this; }
dropout(float drop_rate = 0.5);
dropout(float drop_rate, int seed);
void swap(dropout& item)
{
// TODO
}
void operator() (
resizable_tensor& dest,
resizable_tensor& random_mask,
const tensor& src
);
void get_gradient(
const tensor& gradient_input,
const tensor& random_mask,
tensor& grad
);
};
// -----------------------------------------------------------------------------------
}
}
#endif // DLIB_DNN_CPU_H_
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment