Commit 93e786db authored by Fm's avatar Fm

Merge branch 'master' of https://github.com/davisking/dlib into dnn_group_layer

parents 59892409 91163863
...@@ -488,6 +488,13 @@ namespace dlib ...@@ -488,6 +488,13 @@ namespace dlib
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
struct general_ {};
struct special_ : general_ {};
template<typename> struct int_ { typedef int type; };
// ----------------------------------------------------------------------------------------
/*!A is_same_object /*!A is_same_object
This is a templated function which checks if both of its arguments are actually This is a templated function which checks if both of its arguments are actually
......
...@@ -24,6 +24,38 @@ ...@@ -24,6 +24,38 @@
namespace dlib namespace dlib
{ {
// ----------------------------------------------------------------------------------------
namespace impl
{
template <typename T, typename int_<decltype(&T::get_learning_rate_multiplier)>::type = 0>
double get_learning_rate_multiplier (
const T& obj,
special_
) { return obj.get_learning_rate_multiplier(); }
template <typename T>
double get_learning_rate_multiplier ( const T& obj, general_) { return 1; }
}
template <typename T>
double get_learning_rate_multiplier(const T& obj) { return impl::get_learning_rate_multiplier(obj, special_()); }
// ----------------------------------------------------------------------------------------
namespace impl
{
template <typename T, typename int_<decltype(&T::get_weight_decay_multiplier)>::type = 0>
double get_weight_decay_multiplier (
const T& obj,
special_
) { return obj.get_weight_decay_multiplier(); }
template <typename T>
double get_weight_decay_multiplier ( const T& obj, general_) { return 1; }
}
template <typename T>
double get_weight_decay_multiplier(const T& obj) { return impl::get_weight_decay_multiplier(obj, special_()); }
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
namespace impl namespace impl
...@@ -458,7 +490,7 @@ namespace dlib ...@@ -458,7 +490,7 @@ namespace dlib
sstack pop(size_t num=1) sstack pop(size_t num=1)
{ {
DLIB_CASSERT(num < size(), "You can't pop more things from the stack than it has in it."); DLIB_CASSERT(num <= size(), "You can't pop more things from the stack than it has in it.");
return sstack(data+num, mysize-num); return sstack(data+num, mysize-num);
} }
...@@ -849,8 +881,9 @@ namespace dlib ...@@ -849,8 +881,9 @@ namespace dlib
void update_parameters(sstack<solver_type> solvers, double learning_rate) void update_parameters(sstack<solver_type> solvers, double learning_rate)
{ {
DLIB_CASSERT(solvers.size()>=num_computational_layers,""); DLIB_CASSERT(solvers.size()>=num_computational_layers,"");
// Don't try to adjust the parameters if this layer doesn't have any. // Don't try to adjust the parameters if this layer doesn't have any or the
if (params_grad.size() != 0) // learning rate is disabled for this layer.
if (params_grad.size() != 0 && get_learning_rate_multiplier(details) != 0)
{ {
const tensor& step = solvers.top()(learning_rate, details, static_cast<const tensor&>(params_grad)); const tensor& step = solvers.top()(learning_rate, details, static_cast<const tensor&>(params_grad));
tt::add(details.get_layer_params(), details.get_layer_params(), step); tt::add(details.get_layer_params(), details.get_layer_params(), step);
...@@ -1200,8 +1233,9 @@ namespace dlib ...@@ -1200,8 +1233,9 @@ namespace dlib
void update_parameters(sstack<solver_type> solvers, double learning_rate) void update_parameters(sstack<solver_type> solvers, double learning_rate)
{ {
DLIB_CASSERT(solvers.size()>=num_computational_layers,""); DLIB_CASSERT(solvers.size()>=num_computational_layers,"");
// Don't try to adjust the parameters if this layer doesn't have any. // Don't try to adjust the parameters if this layer doesn't have any or the
if (params_grad.size() != 0) // learning rate is disabled for this layer.
if (params_grad.size() != 0 && get_learning_rate_multiplier(details) != 0)
{ {
const tensor& step = solvers.top()(learning_rate, details, static_cast<const tensor&>(params_grad)); const tensor& step = solvers.top()(learning_rate, details, static_cast<const tensor&>(params_grad));
tt::add(details.get_layer_params(), details.get_layer_params(), step); tt::add(details.get_layer_params(), details.get_layer_params(), step);
...@@ -1817,9 +1851,7 @@ namespace dlib ...@@ -1817,9 +1851,7 @@ namespace dlib
public: public:
typedef INPUT_LAYER subnet_type; typedef INPUT_LAYER subnet_type;
typedef typename subnet_type::input_type input_type; typedef typename subnet_type::input_type input_type;
// This layer counts as a computational layer because it copies and stores the const static size_t num_computational_layers = 0;
// inputs.
const static size_t num_computational_layers = 1;
const static size_t num_layers = 2; const static size_t num_layers = 2;
const static unsigned int sample_expansion_factor = subnet_type::sample_expansion_factor; const static unsigned int sample_expansion_factor = subnet_type::sample_expansion_factor;
static_assert(sample_expansion_factor >= 1, static_assert(sample_expansion_factor >= 1,
......
...@@ -67,6 +67,32 @@ namespace dlib ...@@ -67,6 +67,32 @@ namespace dlib
(except computes it using a numerically accurate method) (except computes it using a numerically accurate method)
!*/ !*/
// ----------------------------------------------------------------------------------------
template <typename T>
double get_learning_rate_multiplier(
const T& obj
);
/*!
ensures
- if (obj has a get_learning_rate_multiplier() member function) then
- returns obj.get_learning_rate_multiplier()
- else
- returns 1
!*/
template <typename T>
double get_weight_decay_multiplier(
const T& obj
);
/*!
ensures
- if (obj has a get_weight_decay_multiplier() member function) then
- returns obj.get_weight_decay_multiplier()
- else
- returns 1
!*/
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
bool dnn_prefer_fastest_algorithms( bool dnn_prefer_fastest_algorithms(
...@@ -152,7 +178,7 @@ namespace dlib ...@@ -152,7 +178,7 @@ namespace dlib
); );
/*! /*!
requires requires
- num < size() - num <= size()
ensures ensures
- returns a reference to the sub-stack S such that: - returns a reference to the sub-stack S such that:
- S.size() == size()-num. - S.size() == size()-num.
......
...@@ -385,6 +385,30 @@ namespace dlib ...@@ -385,6 +385,30 @@ namespace dlib
d[i] = A*s1[i] + B*s2[i] + C*s3[i] + D; d[i] = A*s1[i] + B*s2[i] + C*s3[i] + D;
} }
void affine_transform_range(
size_t begin,
size_t end,
tensor& dest,
const tensor& src1,
const tensor& src2,
const tensor& src3,
const float A,
const float B,
const float C
)
{
DLIB_CASSERT(dest.size()==src1.size(),"");
DLIB_CASSERT(dest.size()==src2.size(),"");
DLIB_CASSERT(dest.size()==src3.size(),"");
DLIB_CASSERT(begin <= end && end <= dest.size(),"");
const auto d = dest.host();
const auto s1 = src1.host();
const auto s2 = src2.host();
const auto s3 = src3.host();
for (size_t i = begin; i < end; ++i)
d[i] = A*s1[i] + B*s2[i] + C*s3[i];
}
// ----------------------------------------------------------------------------------- // -----------------------------------------------------------------------------------
void affine_transform( void affine_transform(
...@@ -464,6 +488,8 @@ namespace dlib ...@@ -464,6 +488,8 @@ namespace dlib
// ----------------------------------------------------------------------------------- // -----------------------------------------------------------------------------------
void compute_adam_update ( void compute_adam_update (
size_t begin,
size_t end,
tensor& s, tensor& s,
tensor& m, tensor& m,
tensor& v, tensor& v,
...@@ -480,6 +506,7 @@ namespace dlib ...@@ -480,6 +506,7 @@ namespace dlib
s.size() == v.size() && s.size() == v.size() &&
s.size() == params.size() && s.size() == params.size() &&
s.size() == params_grad.size(),""); s.size() == params_grad.size(),"");
DLIB_CASSERT(begin <= end && end <= params.size(),"");
const float eps = 1e-8; const float eps = 1e-8;
const float alpha = learning_rate*std::sqrt(1-std::pow(momentum2,t))/(1-std::pow(momentum1, t)); const float alpha = learning_rate*std::sqrt(1-std::pow(momentum2,t))/(1-std::pow(momentum1, t));
...@@ -492,7 +519,7 @@ namespace dlib ...@@ -492,7 +519,7 @@ namespace dlib
auto ps = s.host_write_only(); auto ps = s.host_write_only();
auto pparams = params.host(); auto pparams = params.host();
auto ppgrad = params_grad.host(); auto ppgrad = params_grad.host();
for (size_t i = 0; i < params.size(); ++i) for (size_t i = begin; i < end; ++i)
{ {
float g = weight_decay*pparams[i] + ppgrad[i]; float g = weight_decay*pparams[i] + ppgrad[i];
pm[i] = momentum1*pm[i] + (1-momentum1)*g; pm[i] = momentum1*pm[i] + (1-momentum1)*g;
...@@ -504,6 +531,7 @@ namespace dlib ...@@ -504,6 +531,7 @@ namespace dlib
// ----------------------------------------------------------------------------------- // -----------------------------------------------------------------------------------
void batch_normalize_inference ( void batch_normalize_inference (
const double eps,
resizable_tensor& dest, resizable_tensor& dest,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
...@@ -519,7 +547,8 @@ namespace dlib ...@@ -519,7 +547,8 @@ namespace dlib
gamma.k() == src.k() && gamma.k() == src.k() &&
have_same_dimensions(gamma, beta) && have_same_dimensions(gamma, beta) &&
have_same_dimensions(gamma, running_means) && have_same_dimensions(gamma, running_means) &&
have_same_dimensions(gamma, running_variances), have_same_dimensions(gamma, running_variances) &&
eps > 0,
"\ngamma.num_samples(): " << gamma.num_samples() << "\ngamma.num_samples(): " << gamma.num_samples() <<
"\ngamma.k(): " << gamma.k() << "\ngamma.k(): " << gamma.k() <<
"\ngamma.nr(): " << gamma.nr() << "\ngamma.nr(): " << gamma.nr() <<
...@@ -538,7 +567,8 @@ namespace dlib ...@@ -538,7 +567,8 @@ namespace dlib
"\nrunning_variances.nc(): " << running_variances.nc() << "\nrunning_variances.nc(): " << running_variances.nc() <<
"\nsrc.k(): " << src.k() << "\nsrc.k(): " << src.k() <<
"\nsrc.nr(): " << src.nr() << "\nsrc.nr(): " << src.nr() <<
"\nsrc.nc(): " << src.nc() "\nsrc.nc(): " << src.nc() <<
"\neps: " << eps
); );
dest.copy_size(src); dest.copy_size(src);
...@@ -554,7 +584,7 @@ namespace dlib ...@@ -554,7 +584,7 @@ namespace dlib
{ {
for (long k = 0; k < num; ++k) for (long k = 0; k < num; ++k)
{ {
*d = g[k]*(*s - m[k])/std::sqrt(v[k]+dlib::tt::BATCH_NORM_EPS) + b[k]; *d = g[k]*(*s - m[k])/std::sqrt(v[k]+eps) + b[k];
++d; ++d;
++s; ++s;
} }
...@@ -562,6 +592,7 @@ namespace dlib ...@@ -562,6 +592,7 @@ namespace dlib
} }
void batch_normalize ( void batch_normalize (
const double eps,
resizable_tensor& dest, resizable_tensor& dest,
resizable_tensor& means, resizable_tensor& means,
resizable_tensor& invstds, resizable_tensor& invstds,
...@@ -582,7 +613,8 @@ namespace dlib ...@@ -582,7 +613,8 @@ namespace dlib
beta.num_samples() == 1 && beta.num_samples() == 1 &&
gamma.nr() == beta.nr() && beta.nr() == src.nr() && gamma.nr() == beta.nr() && beta.nr() == src.nr() &&
gamma.nc() == beta.nc() && beta.nc() == src.nc() && gamma.nc() == beta.nc() && beta.nc() == src.nc() &&
gamma.k() == beta.k() && beta.k() == src.k(), gamma.k() == beta.k() && beta.k() == src.k() &&
eps > 0,
"\ngamma.num_samples(): " << gamma.num_samples() << "\ngamma.num_samples(): " << gamma.num_samples() <<
"\ngamma.k(): " << gamma.k() << "\ngamma.k(): " << gamma.k() <<
"\ngamma.nr(): " << gamma.nr() << "\ngamma.nr(): " << gamma.nr() <<
...@@ -593,7 +625,8 @@ namespace dlib ...@@ -593,7 +625,8 @@ namespace dlib
"\nbeta.nc(): " << beta.nc() << "\nbeta.nc(): " << beta.nc() <<
"\nsrc.k(): " << src.k() << "\nsrc.k(): " << src.k() <<
"\nsrc.nr(): " << src.nr() << "\nsrc.nr(): " << src.nr() <<
"\nsrc.nc(): " << src.nc() "\nsrc.nc(): " << src.nc() <<
"\neps: " << eps
); );
dest.copy_size(src); dest.copy_size(src);
...@@ -635,7 +668,7 @@ namespace dlib ...@@ -635,7 +668,7 @@ namespace dlib
else else
rvar[i] = (1-averaging_factor)*rvar[i] + scale*averaging_factor*actual_var; rvar[i] = (1-averaging_factor)*rvar[i] + scale*averaging_factor*actual_var;
p_invstds[i] = 1.0f/std::sqrt(actual_var + dlib::tt::BATCH_NORM_EPS); p_invstds[i] = 1.0f/std::sqrt(actual_var + eps);
} }
p_src = src.host(); p_src = src.host();
...@@ -662,6 +695,7 @@ namespace dlib ...@@ -662,6 +695,7 @@ namespace dlib
} }
void batch_normalize_gradient ( void batch_normalize_gradient (
const double eps,
const tensor& gradient_input, const tensor& gradient_input,
const tensor& means, const tensor& means,
const tensor& invstds, const tensor& invstds,
...@@ -682,6 +716,7 @@ namespace dlib ...@@ -682,6 +716,7 @@ namespace dlib
DLIB_CASSERT(num == beta_grad.size(),""); DLIB_CASSERT(num == beta_grad.size(),"");
DLIB_CASSERT(have_same_dimensions(gradient_input, src),""); DLIB_CASSERT(have_same_dimensions(gradient_input, src),"");
DLIB_CASSERT(have_same_dimensions(gradient_input, src_grad),""); DLIB_CASSERT(have_same_dimensions(gradient_input, src_grad),"");
DLIB_CASSERT(eps > 0,"");
beta_grad = 0; beta_grad = 0;
gamma_grad = 0; gamma_grad = 0;
...@@ -757,6 +792,7 @@ namespace dlib ...@@ -757,6 +792,7 @@ namespace dlib
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
void batch_normalize_conv_inference ( void batch_normalize_conv_inference (
const double eps,
resizable_tensor& dest, resizable_tensor& dest,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
...@@ -772,7 +808,8 @@ namespace dlib ...@@ -772,7 +808,8 @@ namespace dlib
gamma.k() == src.k() && gamma.k() == src.k() &&
have_same_dimensions(gamma, beta) && have_same_dimensions(gamma, beta) &&
have_same_dimensions(gamma, running_means) && have_same_dimensions(gamma, running_means) &&
have_same_dimensions(gamma, running_variances), have_same_dimensions(gamma, running_variances) &&
eps > 0,
"\ngamma.num_samples(): " << gamma.num_samples() << "\ngamma.num_samples(): " << gamma.num_samples() <<
"\ngamma.k(): " << gamma.k() << "\ngamma.k(): " << gamma.k() <<
"\ngamma.nr(): " << gamma.nr() << "\ngamma.nr(): " << gamma.nr() <<
...@@ -791,7 +828,8 @@ namespace dlib ...@@ -791,7 +828,8 @@ namespace dlib
"\nrunning_variances.nc(): " << running_variances.nc() << "\nrunning_variances.nc(): " << running_variances.nc() <<
"\nsrc.k(): " << src.k() << "\nsrc.k(): " << src.k() <<
"\nsrc.nr(): " << src.nr() << "\nsrc.nr(): " << src.nr() <<
"\nsrc.nc(): " << src.nc() "\nsrc.nc(): " << src.nc() <<
"\neps: " << eps
); );
dest.copy_size(src); dest.copy_size(src);
...@@ -807,7 +845,7 @@ namespace dlib ...@@ -807,7 +845,7 @@ namespace dlib
{ {
for (long k = 0; k < src.k(); ++k) for (long k = 0; k < src.k(); ++k)
{ {
const float invstd = 1.0f/std::sqrt(v[k] + dlib::tt::BATCH_NORM_EPS); const float invstd = 1.0f/std::sqrt(v[k] + eps);
for (long j = 0; j < num; ++j) for (long j = 0; j < num; ++j)
{ {
*d = g[k]*(*s - m[k])*invstd + b[k]; *d = g[k]*(*s - m[k])*invstd + b[k];
...@@ -819,6 +857,7 @@ namespace dlib ...@@ -819,6 +857,7 @@ namespace dlib
} }
void batch_normalize_conv ( void batch_normalize_conv (
const double eps,
resizable_tensor& dest, resizable_tensor& dest,
resizable_tensor& means, resizable_tensor& means,
resizable_tensor& invstds, resizable_tensor& invstds,
...@@ -841,7 +880,8 @@ namespace dlib ...@@ -841,7 +880,8 @@ namespace dlib
beta.nr() == 1 && beta.nr() == 1 &&
gamma.nc() == 1 && gamma.nc() == 1 &&
beta.nc() == 1 && beta.nc() == 1 &&
gamma.k() == beta.k() && beta.k() == src.k(), gamma.k() == beta.k() && beta.k() == src.k() &&
eps > 0,
"\ngamma.num_samples(): " << gamma.num_samples() << "\ngamma.num_samples(): " << gamma.num_samples() <<
"\ngamma.k(): " << gamma.k() << "\ngamma.k(): " << gamma.k() <<
"\ngamma.nr(): " << gamma.nr() << "\ngamma.nr(): " << gamma.nr() <<
...@@ -852,7 +892,8 @@ namespace dlib ...@@ -852,7 +892,8 @@ namespace dlib
"\nbeta.nc(): " << beta.nc() << "\nbeta.nc(): " << beta.nc() <<
"\nsrc.k(): " << src.k() << "\nsrc.k(): " << src.k() <<
"\nsrc.nr(): " << src.nr() << "\nsrc.nr(): " << src.nr() <<
"\nsrc.nc(): " << src.nc() "\nsrc.nc(): " << src.nc() <<
"\neps: " << eps
); );
dest.copy_size(src); dest.copy_size(src);
...@@ -900,7 +941,7 @@ namespace dlib ...@@ -900,7 +941,7 @@ namespace dlib
else else
rvar[k] = (1-averaging_factor)*rvar[k] + scale*averaging_factor*actual_var; rvar[k] = (1-averaging_factor)*rvar[k] + scale*averaging_factor*actual_var;
p_invstds[k] = 1.0f/std::sqrt(actual_var + dlib::tt::BATCH_NORM_EPS); p_invstds[k] = 1.0f/std::sqrt(actual_var + eps);
} }
p_src = src.host(); p_src = src.host();
...@@ -928,6 +969,7 @@ namespace dlib ...@@ -928,6 +969,7 @@ namespace dlib
} }
void batch_normalize_conv_gradient( void batch_normalize_conv_gradient(
const double eps,
const tensor& gradient_input, const tensor& gradient_input,
const tensor& means, const tensor& means,
const tensor& invstds, const tensor& invstds,
...@@ -948,6 +990,7 @@ namespace dlib ...@@ -948,6 +990,7 @@ namespace dlib
DLIB_CASSERT(src.k() == beta_grad.size(),""); DLIB_CASSERT(src.k() == beta_grad.size(),"");
DLIB_CASSERT(have_same_dimensions(gradient_input, src),""); DLIB_CASSERT(have_same_dimensions(gradient_input, src),"");
DLIB_CASSERT(have_same_dimensions(gradient_input, src_grad),""); DLIB_CASSERT(have_same_dimensions(gradient_input, src_grad),"");
DLIB_CASSERT(eps > 0,"");
beta_grad = 0; beta_grad = 0;
gamma_grad = 0; gamma_grad = 0;
......
...@@ -81,6 +81,18 @@ namespace dlib ...@@ -81,6 +81,18 @@ namespace dlib
const float D const float D
); );
void affine_transform_range(
size_t begin,
size_t end,
tensor& dest,
const tensor& src1,
const tensor& src2,
const tensor& src3,
const float A,
const float B,
const float C
);
// ----------------------------------------------------------------------------------- // -----------------------------------------------------------------------------------
void affine_transform( void affine_transform(
...@@ -102,6 +114,8 @@ namespace dlib ...@@ -102,6 +114,8 @@ namespace dlib
// ----------------------------------------------------------------------------------- // -----------------------------------------------------------------------------------
void compute_adam_update ( void compute_adam_update (
size_t begin,
size_t end,
tensor& s, tensor& s,
tensor& m, tensor& m,
tensor& v, tensor& v,
...@@ -117,6 +131,7 @@ namespace dlib ...@@ -117,6 +131,7 @@ namespace dlib
// ----------------------------------------------------------------------------------- // -----------------------------------------------------------------------------------
void batch_normalize_inference ( void batch_normalize_inference (
const double eps,
resizable_tensor& dest, resizable_tensor& dest,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
...@@ -126,6 +141,7 @@ namespace dlib ...@@ -126,6 +141,7 @@ namespace dlib
); );
void batch_normalize ( void batch_normalize (
const double eps,
resizable_tensor& dest, resizable_tensor& dest,
resizable_tensor& means, resizable_tensor& means,
resizable_tensor& invstds, resizable_tensor& invstds,
...@@ -138,6 +154,7 @@ namespace dlib ...@@ -138,6 +154,7 @@ namespace dlib
); );
void batch_normalize_gradient ( void batch_normalize_gradient (
const double eps,
const tensor& gradient_input, const tensor& gradient_input,
const tensor& means, const tensor& means,
const tensor& invstds, const tensor& invstds,
...@@ -149,6 +166,7 @@ namespace dlib ...@@ -149,6 +166,7 @@ namespace dlib
); );
void batch_normalize_conv_inference ( void batch_normalize_conv_inference (
const double eps,
resizable_tensor& dest, resizable_tensor& dest,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
...@@ -158,6 +176,7 @@ namespace dlib ...@@ -158,6 +176,7 @@ namespace dlib
); );
void batch_normalize_conv ( void batch_normalize_conv (
const double eps,
resizable_tensor& dest, resizable_tensor& dest,
resizable_tensor& means, resizable_tensor& means,
resizable_tensor& invstds, resizable_tensor& invstds,
...@@ -170,6 +189,7 @@ namespace dlib ...@@ -170,6 +189,7 @@ namespace dlib
); );
void batch_normalize_conv_gradient ( void batch_normalize_conv_gradient (
const double eps,
const tensor& gradient_input, const tensor& gradient_input,
const tensor& means, const tensor& means,
const tensor& invstds, const tensor& invstds,
......
...@@ -504,6 +504,40 @@ namespace dlib ...@@ -504,6 +504,40 @@ namespace dlib
src2.device(), src3.device(), dest.size(), A, B, C, D); src2.device(), src3.device(), dest.size(), A, B, C, D);
} }
// ----------------------------------------------------------------------------------------
__global__ void _cuda_affine_transform_range(
float* d, const float* s1, const float* s2, const float* s3, size_t begin, size_t end, float A, float B, float C
)
{
for (auto i : grid_stride_range(begin, end))
{
d[i] = A*s1[i] + B*s2[i] + C*s3[i];
}
}
void affine_transform_range(
size_t begin,
size_t end,
tensor& dest,
const tensor& src1,
const tensor& src2,
const tensor& src3,
const float A,
const float B,
const float C
)
{
DLIB_CASSERT(dest.size()==src1.size(),"");
DLIB_CASSERT(dest.size()==src2.size(),"");
DLIB_CASSERT(dest.size()==src3.size(),"");
DLIB_CASSERT(begin <= end && end <= dest.size(),"");
launch_kernel(_cuda_affine_transform_range,max_jobs(end-begin),
dest.device(), src1.device(),
src2.device(), src3.device(), begin, end, A, B, C);
}
// ----------------------------------------------------------------------------------- // -----------------------------------------------------------------------------------
__global__ void _cuda_affine_transform2(float* d, const float* s, size_t n, const float* A, const float* B) __global__ void _cuda_affine_transform2(float* d, const float* s, size_t n, const float* A, const float* B)
...@@ -549,7 +583,8 @@ namespace dlib ...@@ -549,7 +583,8 @@ namespace dlib
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
__global__ void _cuda_compute_adam_update( __global__ void _cuda_compute_adam_update(
size_t n, size_t begin,
size_t end,
float* s, float* s,
float* m, float* m,
float* v, float* v,
...@@ -566,7 +601,7 @@ namespace dlib ...@@ -566,7 +601,7 @@ namespace dlib
// m = momentum1*m + (1-momentum1) * (weight_decay*params + params_grad); // m = momentum1*m + (1-momentum1) * (weight_decay*params + params_grad);
// v = momentum2*v + (1-momentum2)*squared(weight_decay*params + params_grad); // v = momentum2*v + (1-momentum2)*squared(weight_decay*params + params_grad);
// s = -alpha*m/(sqrt(v) + eps); // s = -alpha*m/(sqrt(v) + eps);
for (auto i : grid_stride_range(0, n)) for (auto i : grid_stride_range(begin, end))
{ {
float g = (weight_decay*params[i] + params_grad[i]); float g = (weight_decay*params[i] + params_grad[i]);
m[i] = momentum1*m[i] + (1-momentum1)*g; m[i] = momentum1*m[i] + (1-momentum1)*g;
...@@ -576,6 +611,8 @@ namespace dlib ...@@ -576,6 +611,8 @@ namespace dlib
} }
void compute_adam_update ( void compute_adam_update (
size_t begin,
size_t end,
tensor& s, tensor& s,
tensor& m, tensor& m,
tensor& v, tensor& v,
...@@ -592,10 +629,11 @@ namespace dlib ...@@ -592,10 +629,11 @@ namespace dlib
s.size() == v.size() && s.size() == v.size() &&
s.size() == params.size() && s.size() == params.size() &&
s.size() == params_grad.size(),""); s.size() == params_grad.size(),"");
DLIB_CASSERT(begin <= end && end <= params.size(),"");
const float alpha = learning_rate*std::sqrt(1-std::pow(momentum2,t))/(1-std::pow(momentum1, t)); const float alpha = learning_rate*std::sqrt(1-std::pow(momentum2,t))/(1-std::pow(momentum1, t));
launch_kernel(_cuda_compute_adam_update,max_jobs(s.size()), launch_kernel(_cuda_compute_adam_update,max_jobs(end-begin),
s.size(), s.device(), m.device(), v.device(), alpha, weight_decay, begin, end, s.device(), m.device(), v.device(), alpha, weight_decay,
momentum1, momentum2, params.device(), params_grad.device()); momentum1, momentum2, params.device(), params_grad.device());
} }
......
...@@ -164,6 +164,18 @@ namespace dlib ...@@ -164,6 +164,18 @@ namespace dlib
const float D const float D
); );
void affine_transform_range(
size_t begin,
size_t end,
tensor& dest,
const tensor& src1,
const tensor& src2,
const tensor& src3,
const float A,
const float B,
const float C
);
// Note that this function isn't in the tt:: namespace because add_scaled() is // Note that this function isn't in the tt:: namespace because add_scaled() is
// called by cuda::add() so we don't need a tt:: version of add_scaled(). // called by cuda::add() so we don't need a tt:: version of add_scaled().
void add_scaled( void add_scaled(
...@@ -193,6 +205,8 @@ namespace dlib ...@@ -193,6 +205,8 @@ namespace dlib
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
void compute_adam_update ( void compute_adam_update (
size_t begin,
size_t end,
tensor& s, tensor& s,
tensor& m, tensor& m,
tensor& v, tensor& v,
......
...@@ -338,6 +338,7 @@ namespace dlib ...@@ -338,6 +338,7 @@ namespace dlib
// ------------------------------------------------------------------------------------ // ------------------------------------------------------------------------------------
void batch_normalize_inference ( void batch_normalize_inference (
const double eps,
resizable_tensor& dest, resizable_tensor& dest,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
...@@ -353,7 +354,8 @@ namespace dlib ...@@ -353,7 +354,8 @@ namespace dlib
gamma.k() == src.k() && gamma.k() == src.k() &&
have_same_dimensions(gamma, beta) && have_same_dimensions(gamma, beta) &&
have_same_dimensions(gamma, running_means) && have_same_dimensions(gamma, running_means) &&
have_same_dimensions(gamma, running_variances), have_same_dimensions(gamma, running_variances) &&
eps > 0,
"\ngamma.num_samples(): " << gamma.num_samples() << "\ngamma.num_samples(): " << gamma.num_samples() <<
"\ngamma.k(): " << gamma.k() << "\ngamma.k(): " << gamma.k() <<
"\ngamma.nr(): " << gamma.nr() << "\ngamma.nr(): " << gamma.nr() <<
...@@ -372,7 +374,8 @@ namespace dlib ...@@ -372,7 +374,8 @@ namespace dlib
"\nrunning_variances.nc(): " << running_variances.nc() << "\nrunning_variances.nc(): " << running_variances.nc() <<
"\nsrc.k(): " << src.k() << "\nsrc.k(): " << src.k() <<
"\nsrc.nr(): " << src.nr() << "\nsrc.nr(): " << src.nr() <<
"\nsrc.nc(): " << src.nc() "\nsrc.nc(): " << src.nc() <<
"\neps: " << eps
); );
const float in_scale = 1; const float in_scale = 1;
const float out_scale = 0; const float out_scale = 0;
...@@ -393,10 +396,11 @@ namespace dlib ...@@ -393,10 +396,11 @@ namespace dlib
beta.device(), beta.device(),
running_means.device(), running_means.device(),
running_variances.device(), running_variances.device(),
dlib::tt::BATCH_NORM_EPS)); eps));
} }
void batch_normalize ( void batch_normalize (
const double eps,
resizable_tensor& dest, resizable_tensor& dest,
resizable_tensor& means, resizable_tensor& means,
resizable_tensor& invstds, resizable_tensor& invstds,
...@@ -417,7 +421,8 @@ namespace dlib ...@@ -417,7 +421,8 @@ namespace dlib
beta.num_samples() == 1 && beta.num_samples() == 1 &&
gamma.nr() == beta.nr() && beta.nr() == src.nr() && gamma.nr() == beta.nr() && beta.nr() == src.nr() &&
gamma.nc() == beta.nc() && beta.nc() == src.nc() && gamma.nc() == beta.nc() && beta.nc() == src.nc() &&
gamma.k() == beta.k() && beta.k() == src.k(), gamma.k() == beta.k() && beta.k() == src.k() &&
eps > 0,
"\ngamma.num_samples(): " << gamma.num_samples() << "\ngamma.num_samples(): " << gamma.num_samples() <<
"\ngamma.k(): " << gamma.k() << "\ngamma.k(): " << gamma.k() <<
"\ngamma.nr(): " << gamma.nr() << "\ngamma.nr(): " << gamma.nr() <<
...@@ -428,7 +433,8 @@ namespace dlib ...@@ -428,7 +433,8 @@ namespace dlib
"\nbeta.nc(): " << beta.nc() << "\nbeta.nc(): " << beta.nc() <<
"\nsrc.k(): " << src.k() << "\nsrc.k(): " << src.k() <<
"\nsrc.nr(): " << src.nr() << "\nsrc.nr(): " << src.nr() <<
"\nsrc.nc(): " << src.nc() "\nsrc.nc(): " << src.nc() <<
"\neps: " << eps
); );
const float in_scale = 1; const float in_scale = 1;
...@@ -455,12 +461,13 @@ namespace dlib ...@@ -455,12 +461,13 @@ namespace dlib
averaging_factor, averaging_factor,
running_means.device(), running_means.device(),
running_variances.device(), running_variances.device(),
dlib::tt::BATCH_NORM_EPS, eps,
means.device(), means.device(),
invstds.device())); invstds.device()));
} }
void batch_normalize_gradient( void batch_normalize_gradient(
const double eps,
const tensor& gradient_input, const tensor& gradient_input,
const tensor& means, const tensor& means,
const tensor& invstds, const tensor& invstds,
...@@ -480,6 +487,7 @@ namespace dlib ...@@ -480,6 +487,7 @@ namespace dlib
DLIB_CASSERT(num == beta_grad.size(),""); DLIB_CASSERT(num == beta_grad.size(),"");
DLIB_CASSERT(have_same_dimensions(gradient_input, src),""); DLIB_CASSERT(have_same_dimensions(gradient_input, src),"");
DLIB_CASSERT(have_same_dimensions(gradient_input, src_grad),""); DLIB_CASSERT(have_same_dimensions(gradient_input, src_grad),"");
DLIB_CASSERT(eps > 0,"");
const float in_scale = 1; const float in_scale = 1;
const float out_scale = 1; const float out_scale = 1;
...@@ -503,7 +511,7 @@ namespace dlib ...@@ -503,7 +511,7 @@ namespace dlib
gamma.device(), gamma.device(),
gamma_grad.device(), gamma_grad.device(),
beta_grad.device(), beta_grad.device(),
dlib::tt::BATCH_NORM_EPS, eps,
means.device(), means.device(),
invstds.device())); invstds.device()));
} }
...@@ -511,6 +519,7 @@ namespace dlib ...@@ -511,6 +519,7 @@ namespace dlib
// ------------------------------------------------------------------------------------ // ------------------------------------------------------------------------------------
void batch_normalize_conv_inference ( void batch_normalize_conv_inference (
const double eps,
resizable_tensor& dest, resizable_tensor& dest,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
...@@ -526,7 +535,8 @@ namespace dlib ...@@ -526,7 +535,8 @@ namespace dlib
gamma.k() == src.k() && gamma.k() == src.k() &&
have_same_dimensions(gamma, beta) && have_same_dimensions(gamma, beta) &&
have_same_dimensions(gamma, running_means) && have_same_dimensions(gamma, running_means) &&
have_same_dimensions(gamma, running_variances), have_same_dimensions(gamma, running_variances) &&
eps > 0,
"\ngamma.num_samples(): " << gamma.num_samples() << "\ngamma.num_samples(): " << gamma.num_samples() <<
"\ngamma.k(): " << gamma.k() << "\ngamma.k(): " << gamma.k() <<
"\ngamma.nr(): " << gamma.nr() << "\ngamma.nr(): " << gamma.nr() <<
...@@ -545,7 +555,8 @@ namespace dlib ...@@ -545,7 +555,8 @@ namespace dlib
"\nrunning_variances.nc(): " << running_variances.nc() << "\nrunning_variances.nc(): " << running_variances.nc() <<
"\nsrc.k(): " << src.k() << "\nsrc.k(): " << src.k() <<
"\nsrc.nr(): " << src.nr() << "\nsrc.nr(): " << src.nr() <<
"\nsrc.nc(): " << src.nc() "\nsrc.nc(): " << src.nc() <<
"\neps: " << eps
); );
const float in_scale = 1; const float in_scale = 1;
const float out_scale = 0; const float out_scale = 0;
...@@ -566,10 +577,11 @@ namespace dlib ...@@ -566,10 +577,11 @@ namespace dlib
beta.device(), beta.device(),
running_means.device(), running_means.device(),
running_variances.device(), running_variances.device(),
dlib::tt::BATCH_NORM_EPS)); eps));
} }
void batch_normalize_conv ( void batch_normalize_conv (
const double eps,
resizable_tensor& dest, resizable_tensor& dest,
resizable_tensor& means, resizable_tensor& means,
resizable_tensor& invstds, resizable_tensor& invstds,
...@@ -592,7 +604,8 @@ namespace dlib ...@@ -592,7 +604,8 @@ namespace dlib
beta.nr() == 1 && beta.nr() == 1 &&
gamma.nc() == 1 && gamma.nc() == 1 &&
beta.nc() == 1 && beta.nc() == 1 &&
gamma.k() == beta.k() && beta.k() == src.k(), gamma.k() == beta.k() && beta.k() == src.k() &&
eps > 0,
"\ngamma.num_samples(): " << gamma.num_samples() << "\ngamma.num_samples(): " << gamma.num_samples() <<
"\ngamma.k(): " << gamma.k() << "\ngamma.k(): " << gamma.k() <<
"\ngamma.nr(): " << gamma.nr() << "\ngamma.nr(): " << gamma.nr() <<
...@@ -603,7 +616,8 @@ namespace dlib ...@@ -603,7 +616,8 @@ namespace dlib
"\nbeta.nc(): " << beta.nc() << "\nbeta.nc(): " << beta.nc() <<
"\nsrc.k(): " << src.k() << "\nsrc.k(): " << src.k() <<
"\nsrc.nr(): " << src.nr() << "\nsrc.nr(): " << src.nr() <<
"\nsrc.nc(): " << src.nc() "\nsrc.nc(): " << src.nc() <<
"\neps: " << eps
); );
const float in_scale = 1; const float in_scale = 1;
const float out_scale = 0; const float out_scale = 0;
...@@ -629,12 +643,13 @@ namespace dlib ...@@ -629,12 +643,13 @@ namespace dlib
averaging_factor, averaging_factor,
running_means.device(), running_means.device(),
running_variances.device(), running_variances.device(),
dlib::tt::BATCH_NORM_EPS, eps,
means.device(), means.device(),
invstds.device())); invstds.device()));
} }
void batch_normalize_conv_gradient( void batch_normalize_conv_gradient(
const double eps,
const tensor& gradient_input, const tensor& gradient_input,
const tensor& means, const tensor& means,
const tensor& invstds, const tensor& invstds,
...@@ -653,6 +668,7 @@ namespace dlib ...@@ -653,6 +668,7 @@ namespace dlib
DLIB_CASSERT(src.k() == beta_grad.size(),""); DLIB_CASSERT(src.k() == beta_grad.size(),"");
DLIB_CASSERT(have_same_dimensions(gradient_input, src),""); DLIB_CASSERT(have_same_dimensions(gradient_input, src),"");
DLIB_CASSERT(have_same_dimensions(gradient_input, src_grad),""); DLIB_CASSERT(have_same_dimensions(gradient_input, src_grad),"");
DLIB_CASSERT(eps > 0,"");
const float in_scale = 1; const float in_scale = 1;
const float out_scale = 1; const float out_scale = 1;
...@@ -676,7 +692,7 @@ namespace dlib ...@@ -676,7 +692,7 @@ namespace dlib
gamma.device(), gamma.device(),
gamma_grad.device(), gamma_grad.device(),
beta_grad.device(), beta_grad.device(),
dlib::tt::BATCH_NORM_EPS, eps,
means.device(), means.device(),
invstds.device())); invstds.device()));
} }
......
...@@ -135,6 +135,7 @@ namespace dlib ...@@ -135,6 +135,7 @@ namespace dlib
// ------------------------------------------------------------------------------------ // ------------------------------------------------------------------------------------
void batch_normalize_inference ( void batch_normalize_inference (
const double eps,
resizable_tensor& dest, resizable_tensor& dest,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
...@@ -144,6 +145,7 @@ namespace dlib ...@@ -144,6 +145,7 @@ namespace dlib
); );
void batch_normalize ( void batch_normalize (
const double eps,
resizable_tensor& dest, resizable_tensor& dest,
resizable_tensor& means, resizable_tensor& means,
resizable_tensor& invstds, resizable_tensor& invstds,
...@@ -156,6 +158,7 @@ namespace dlib ...@@ -156,6 +158,7 @@ namespace dlib
); );
void batch_normalize_gradient( void batch_normalize_gradient(
const double eps,
const tensor& gradient_input, const tensor& gradient_input,
const tensor& means, const tensor& means,
const tensor& invstds, const tensor& invstds,
...@@ -169,6 +172,7 @@ namespace dlib ...@@ -169,6 +172,7 @@ namespace dlib
// ------------------------------------------------------------------------------------ // ------------------------------------------------------------------------------------
void batch_normalize_conv_inference ( void batch_normalize_conv_inference (
const double eps,
resizable_tensor& dest, resizable_tensor& dest,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
...@@ -178,6 +182,7 @@ namespace dlib ...@@ -178,6 +182,7 @@ namespace dlib
); );
void batch_normalize_conv ( void batch_normalize_conv (
const double eps,
resizable_tensor& dest, resizable_tensor& dest,
resizable_tensor& means, resizable_tensor& means,
resizable_tensor& invstds, resizable_tensor& invstds,
...@@ -190,6 +195,7 @@ namespace dlib ...@@ -190,6 +195,7 @@ namespace dlib
); );
void batch_normalize_conv_gradient( void batch_normalize_conv_gradient(
const double eps,
const tensor& gradient_input, const tensor& gradient_input,
const tensor& means, const tensor& means,
const tensor& invstds, const tensor& invstds,
......
This diff is collapsed.
This diff is collapsed.
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#include "solvers_abstract.h" #include "solvers_abstract.h"
#include "tensor.h" #include "tensor.h"
#include <iostream> #include <iostream>
#include "layers.h"
namespace dlib namespace dlib
{ {
...@@ -49,10 +50,53 @@ namespace dlib ...@@ -49,10 +50,53 @@ namespace dlib
v = 0; v = 0;
} }
//perform: v = momentum*mat(v) - weight_decay*learning_rate*mat(params) - learning_rate*mat(params_grad); const double lr = learning_rate*get_learning_rate_multiplier(l);
tt::affine_transform(v, v, params, params_grad, const double wd = weight_decay*get_weight_decay_multiplier(l);
momentum, -weight_decay*learning_rate, -learning_rate, 0);
//perform: v = momentum*mat(v) - wd*lr*mat(params) - lr*mat(params_grad);
tt::affine_transform(v, v, params, params_grad, momentum, -wd*lr, -lr);
return v;
}
template <unsigned long N>
const tensor& operator() (
const float learning_rate,
const fc_<N,FC_HAS_BIAS>& l,
const tensor& params_grad
)
{
update_considering_bias(learning_rate, l, params_grad, l.get_num_outputs());
return v;
}
template <
long _num_filters,
long _nr,
long _nc,
int _stride_y,
int _stride_x,
int _padding_y,
int _padding_x
>
const tensor& operator() (
const float learning_rate,
const con_<_num_filters,_nr,_nc,_stride_y,_stride_x,_padding_y,_padding_x>& l,
const tensor& params_grad
)
{
update_considering_bias(learning_rate, l, params_grad, l.num_filters());
return v;
}
template < layer_mode mode >
const tensor& operator() (
const float learning_rate,
const bn_<mode>& l,
const tensor& params_grad
)
{
update_considering_bias(learning_rate, l, params_grad, params_grad.size()/2);
return v; return v;
} }
...@@ -76,9 +120,49 @@ namespace dlib ...@@ -76,9 +120,49 @@ namespace dlib
} }
private: private:
template <typename layer_type>
void update_considering_bias(
const float learning_rate,
const layer_type& l,
const tensor& params_grad,
unsigned long bias_offset
)
{
const tensor& params = l.get_layer_params();
DLIB_CASSERT(params.size() != 0,"");
if (v.size() == 0)
{
v.copy_size(params_grad);
v = 0;
}
double lr = learning_rate*get_learning_rate_multiplier(l);
double wd = weight_decay*get_weight_decay_multiplier(l);
//perform: v = momentum*mat(v) - wd*lr*mat(params) - lr*mat(params_grad);
if (l.get_bias_learning_rate_multiplier() == 1 && l.get_bias_weight_decay_multiplier() == 1)
{
tt::affine_transform(v, v, params, params_grad, momentum, -wd*lr, -lr);
}
else
{
tt::affine_transform_range(0, bias_offset, v, v, params, params_grad, momentum, -wd*lr, -lr);
// now update the biases but apply their multipliers
lr *= l.get_bias_learning_rate_multiplier();
wd *= l.get_bias_weight_decay_multiplier();
tt::affine_transform_range(bias_offset, v.size(), v, v, params, params_grad, momentum, -wd*lr, -lr);
}
}
resizable_tensor v; resizable_tensor v;
float weight_decay; float weight_decay;
float momentum; float momentum;
}; };
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
...@@ -132,11 +216,57 @@ namespace dlib ...@@ -132,11 +216,57 @@ namespace dlib
++t; ++t;
tt::compute_adam_update(s, m, v, t, learning_rate, weight_decay, momentum1, momentum2, params, params_grad);
tt::compute_adam_update(0, params.size(), s, m, v, t,
learning_rate*get_learning_rate_multiplier(l),
weight_decay*get_weight_decay_multiplier(l),
momentum1, momentum2, params, params_grad);
return s; return s;
} }
template <unsigned long N>
const tensor& operator() (
const float learning_rate,
const fc_<N,FC_HAS_BIAS>& l,
const tensor& params_grad
)
{
update_considering_bias(learning_rate, l, params_grad, l.get_num_outputs());
return s;
}
template <
long _num_filters,
long _nr,
long _nc,
int _stride_y,
int _stride_x,
int _padding_y,
int _padding_x
>
const tensor& operator() (
const float learning_rate,
const con_<_num_filters,_nr,_nc,_stride_y,_stride_x,_padding_y,_padding_x>& l,
const tensor& params_grad
)
{
update_considering_bias(learning_rate, l, params_grad, l.num_filters());
return s;
}
template < layer_mode mode >
const tensor& operator() (
const float learning_rate,
const bn_<mode>& l,
const tensor& params_grad
)
{
update_considering_bias(learning_rate, l, params_grad, params_grad.size()/2);
return s;
}
friend void serialize(const adam& item, std::ostream& out) friend void serialize(const adam& item, std::ostream& out)
{ {
serialize("adam2", out); serialize("adam2", out);
...@@ -165,6 +295,49 @@ namespace dlib ...@@ -165,6 +295,49 @@ namespace dlib
} }
private: private:
template <typename layer_type>
void update_considering_bias(
const float learning_rate,
const layer_type& l,
const tensor& params_grad,
unsigned long bias_offset
)
{
const tensor& params = l.get_layer_params();
DLIB_CASSERT(params.size() != 0,"");
if (v.size() == 0)
{
m.copy_size(params_grad);
m = 0;
v.copy_size(params_grad);
v = 0;
s.copy_size(params_grad);
}
++t;
if (l.get_bias_learning_rate_multiplier() == 1 && l.get_bias_weight_decay_multiplier() == 1)
{
tt::compute_adam_update(0, params.size(), s, m, v, t,
learning_rate*get_learning_rate_multiplier(l),
weight_decay*get_weight_decay_multiplier(l),
momentum1, momentum2, params, params_grad);
}
else
{
tt::compute_adam_update(0, bias_offset, s, m, v, t,
learning_rate*get_learning_rate_multiplier(l),
weight_decay*get_weight_decay_multiplier(l),
momentum1, momentum2, params, params_grad);
tt::compute_adam_update(bias_offset, params.size(), s, m, v, t,
learning_rate*get_learning_rate_multiplier(l)*l.get_bias_learning_rate_multiplier(),
weight_decay*get_weight_decay_multiplier(l)*l.get_bias_weight_decay_multiplier(),
momentum1, momentum2, params, params_grad);
}
}
resizable_tensor m; resizable_tensor m;
resizable_tensor v; resizable_tensor v;
resizable_tensor s; resizable_tensor s;
......
...@@ -78,6 +78,15 @@ namespace dlib ...@@ -78,6 +78,15 @@ namespace dlib
V = momentum*V - weight_decay*learning_rate*l.get_layer_params() - learning_rate*params_grad; V = momentum*V - weight_decay*learning_rate*l.get_layer_params() - learning_rate*params_grad;
Here V is a momentum term that is remembered by the solver from one Here V is a momentum term that is remembered by the solver from one
invocation of operator() to the next. invocation of operator() to the next.
Note that the actual learning rate and weight decay used by the solver are
multiplied by the per layer multipliers. That is, the solver will call
get_learning_rate_multiplier(l) and get_weight_decay_multiplier(l) and
multiply these values with the nominal learning rate and weight decay,
respectively, to determine the values it will use during each step. It is
also overloaded to allow additional learning rate multipliers to be applied
to fc_ and con_ bias parameters.
!*/ !*/
public: public:
...@@ -123,6 +132,15 @@ namespace dlib ...@@ -123,6 +132,15 @@ namespace dlib
paper: paper:
Kingma, Diederik P., and Jimmy Ba Adam. "A method for stochastic Kingma, Diederik P., and Jimmy Ba Adam. "A method for stochastic
optimization." International Conference on Learning Representation. 2015. optimization." International Conference on Learning Representation. 2015.
Note that the actual learning rate and weight decay used by the solver are
multiplied by the per layer multipliers. That is, the solver will call
get_learning_rate_multiplier(l) and get_weight_decay_multiplier(l) and
multiply these values with the nominal learning rate and weight decay,
respectively, to determine the values it will use during each step. It is
also overloaded to allow additional learning rate multipliers to be applied
to fc_ and con_ bias parameters.
!*/ !*/
public: public:
......
...@@ -240,6 +240,42 @@ namespace dlib { namespace tt ...@@ -240,6 +240,42 @@ namespace dlib { namespace tt
#endif #endif
} }
void affine_transform_range(
size_t begin,
size_t end,
tensor& dest,
const tensor& src1,
const tensor& src2,
const tensor& src3,
const float A,
const float B,
const float C
)
{
#ifdef DLIB_USE_CUDA
cuda::affine_transform_range(begin, end, dest,src1,src2,src3,A,B,C);
#else
cpu::affine_transform_range(begin, end, dest,src1,src2,src3,A,B,C);
#endif
}
void affine_transform(
tensor& dest,
const tensor& src1,
const tensor& src2,
const tensor& src3,
const float A,
const float B,
const float C
)
{
#ifdef DLIB_USE_CUDA
cuda::affine_transform_range(0,dest.size(),dest,src1,src2,src3,A,B,C);
#else
cpu::affine_transform_range(0,dest.size(),dest,src1,src2,src3,A,B,C);
#endif
}
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
void affine_transform( void affine_transform(
...@@ -275,6 +311,8 @@ namespace dlib { namespace tt ...@@ -275,6 +311,8 @@ namespace dlib { namespace tt
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
void compute_adam_update ( void compute_adam_update (
size_t begin,
size_t end,
tensor& s, tensor& s,
tensor& m, tensor& m,
tensor& v, tensor& v,
...@@ -288,10 +326,10 @@ namespace dlib { namespace tt ...@@ -288,10 +326,10 @@ namespace dlib { namespace tt
) )
{ {
#ifdef DLIB_USE_CUDA #ifdef DLIB_USE_CUDA
cuda::compute_adam_update(s, m, v, t, learning_rate, weight_decay, momentum1, cuda::compute_adam_update(begin, end, s, m, v, t, learning_rate, weight_decay, momentum1,
momentum2, params, params_grad); momentum2, params, params_grad);
#else #else
cpu::compute_adam_update(s, m, v, t, learning_rate, weight_decay, momentum1, cpu::compute_adam_update(begin, end, s, m, v, t, learning_rate, weight_decay, momentum1,
momentum2, params, params_grad); momentum2, params, params_grad);
#endif #endif
} }
...@@ -299,6 +337,7 @@ namespace dlib { namespace tt ...@@ -299,6 +337,7 @@ namespace dlib { namespace tt
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
void batch_normalize_inference ( void batch_normalize_inference (
const double eps,
resizable_tensor& dest, resizable_tensor& dest,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
...@@ -308,13 +347,14 @@ namespace dlib { namespace tt ...@@ -308,13 +347,14 @@ namespace dlib { namespace tt
) )
{ {
#ifdef DLIB_USE_CUDA #ifdef DLIB_USE_CUDA
cuda::batch_normalize_inference(dest,src,gamma,beta,running_means,running_variances); cuda::batch_normalize_inference(eps,dest,src,gamma,beta,running_means,running_variances);
#else #else
cpu::batch_normalize_inference(dest,src,gamma,beta,running_means,running_variances); cpu::batch_normalize_inference(eps,dest,src,gamma,beta,running_means,running_variances);
#endif #endif
} }
void batch_normalize ( void batch_normalize (
const double eps,
resizable_tensor& dest, resizable_tensor& dest,
resizable_tensor& means, resizable_tensor& means,
resizable_tensor& vars, resizable_tensor& vars,
...@@ -327,13 +367,14 @@ namespace dlib { namespace tt ...@@ -327,13 +367,14 @@ namespace dlib { namespace tt
) )
{ {
#ifdef DLIB_USE_CUDA #ifdef DLIB_USE_CUDA
cuda::batch_normalize(dest,means,vars,averaging_factor,running_means,running_variances,src,gamma,beta); cuda::batch_normalize(eps,dest,means,vars,averaging_factor,running_means,running_variances,src,gamma,beta);
#else #else
cpu::batch_normalize(dest,means,vars,averaging_factor,running_means,running_variances,src,gamma,beta); cpu::batch_normalize(eps,dest,means,vars,averaging_factor,running_means,running_variances,src,gamma,beta);
#endif #endif
} }
void batch_normalize_gradient ( void batch_normalize_gradient (
const double eps,
const tensor& gradient_input, const tensor& gradient_input,
const tensor& means, const tensor& means,
const tensor& invstds, const tensor& invstds,
...@@ -346,15 +387,16 @@ namespace dlib { namespace tt ...@@ -346,15 +387,16 @@ namespace dlib { namespace tt
{ {
#ifdef DLIB_USE_CUDA #ifdef DLIB_USE_CUDA
cuda::batch_normalize_gradient(gradient_input, means, invstds, src, gamma, src_grad, gamma_grad, beta_grad); cuda::batch_normalize_gradient(eps,gradient_input, means, invstds, src, gamma, src_grad, gamma_grad, beta_grad);
#else #else
cpu::batch_normalize_gradient(gradient_input, means, invstds, src, gamma, src_grad, gamma_grad, beta_grad); cpu::batch_normalize_gradient(eps,gradient_input, means, invstds, src, gamma, src_grad, gamma_grad, beta_grad);
#endif #endif
} }
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
void batch_normalize_conv_inference ( void batch_normalize_conv_inference (
const double eps,
resizable_tensor& dest, resizable_tensor& dest,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
...@@ -364,13 +406,14 @@ namespace dlib { namespace tt ...@@ -364,13 +406,14 @@ namespace dlib { namespace tt
) )
{ {
#ifdef DLIB_USE_CUDA #ifdef DLIB_USE_CUDA
cuda::batch_normalize_conv_inference(dest,src,gamma,beta,running_means,running_variances); cuda::batch_normalize_conv_inference(eps,dest,src,gamma,beta,running_means,running_variances);
#else #else
cpu::batch_normalize_conv_inference(dest,src,gamma,beta,running_means,running_variances); cpu::batch_normalize_conv_inference(eps,dest,src,gamma,beta,running_means,running_variances);
#endif #endif
} }
void batch_normalize_conv ( void batch_normalize_conv (
const double eps,
resizable_tensor& dest, resizable_tensor& dest,
resizable_tensor& means, resizable_tensor& means,
resizable_tensor& vars, resizable_tensor& vars,
...@@ -383,13 +426,14 @@ namespace dlib { namespace tt ...@@ -383,13 +426,14 @@ namespace dlib { namespace tt
) )
{ {
#ifdef DLIB_USE_CUDA #ifdef DLIB_USE_CUDA
cuda::batch_normalize_conv(dest,means,vars,averaging_factor,running_means,running_variances,src,gamma,beta); cuda::batch_normalize_conv(eps,dest,means,vars,averaging_factor,running_means,running_variances,src,gamma,beta);
#else #else
cpu::batch_normalize_conv(dest,means,vars,averaging_factor,running_means,running_variances,src,gamma,beta); cpu::batch_normalize_conv(eps,dest,means,vars,averaging_factor,running_means,running_variances,src,gamma,beta);
#endif #endif
} }
void batch_normalize_conv_gradient ( void batch_normalize_conv_gradient (
const double eps,
const tensor& gradient_input, const tensor& gradient_input,
const tensor& means, const tensor& means,
const tensor& invstds, const tensor& invstds,
...@@ -402,9 +446,9 @@ namespace dlib { namespace tt ...@@ -402,9 +446,9 @@ namespace dlib { namespace tt
{ {
#ifdef DLIB_USE_CUDA #ifdef DLIB_USE_CUDA
cuda::batch_normalize_conv_gradient(gradient_input, means, invstds, src, gamma, src_grad, gamma_grad, beta_grad); cuda::batch_normalize_conv_gradient(eps,gradient_input, means, invstds, src, gamma, src_grad, gamma_grad, beta_grad);
#else #else
cpu::batch_normalize_conv_gradient(gradient_input, means, invstds, src, gamma, src_grad, gamma_grad, beta_grad); cpu::batch_normalize_conv_gradient(eps,gradient_input, means, invstds, src, gamma, src_grad, gamma_grad, beta_grad);
#endif #endif
} }
......
...@@ -229,13 +229,58 @@ namespace dlib { namespace tt ...@@ -229,13 +229,58 @@ namespace dlib { namespace tt
const float D const float D
); );
/*! /*!
requires - dest.size()==src1.size() requires
- dest.size()==src1.size()
- dest.size()==src2.size() - dest.size()==src2.size()
- dest.size()==src3.size() - dest.size()==src3.size()
ensures ensures
- #dest == A*src1 + B*src2 + C*src3 + D - #dest == A*src1 + B*src2 + C*src3 + D
!*/ !*/
void affine_transform(
tensor& dest,
const tensor& src1,
const tensor& src2,
const tensor& src3,
const float A,
const float B,
const float C
);
/*!
requires
- dest.size()==src1.size()
- dest.size()==src2.size()
- dest.size()==src3.size()
ensures
- #dest == A*src1 + B*src2 + C*src3
!*/
void affine_transform_range(
size_t begin,
size_t end,
tensor& dest,
const tensor& src1,
const tensor& src2,
const tensor& src3,
const float A,
const float B,
const float C
);
/*!
requires
- dest.size()==src1.size()
- dest.size()==src2.size()
- dest.size()==src3.size()
- begin <= end <= dest.size()
ensures
- This function operates much like
affine_transform(dest,src1,src2,src3,A,B,C,0), except that it runs over only
the half open range [begin,end) rather than processing the entire tensor.
Specifically, it does this:
- for i in the range [begin, end):
- #dest.host()[i] == A*src1.host()[i] + B*src2.host()[i] + C*src3.host()[i]
!*/
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
void affine_transform( void affine_transform(
...@@ -290,6 +335,8 @@ namespace dlib { namespace tt ...@@ -290,6 +335,8 @@ namespace dlib { namespace tt
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
void compute_adam_update ( void compute_adam_update (
size_t begin,
size_t end,
tensor& s, tensor& s,
tensor& m, tensor& m,
tensor& v, tensor& v,
...@@ -309,19 +356,22 @@ namespace dlib { namespace tt ...@@ -309,19 +356,22 @@ namespace dlib { namespace tt
- weight_decay >= 0 - weight_decay >= 0
- 0 <= momentum1 < 1 - 0 <= momentum1 < 1
- 0 <= momentum2 < 1 - 0 <= momentum2 < 1
- begin <= end <= params.size()
ensures ensures
- This function implements the ADAM parameter update method described in the paper: - This function implements the ADAM parameter update method described in the paper:
Kingma, Diederik P., and Jimmy Ba Adam. "A method for stochastic Kingma, Diederik P., and Jimmy Ba Adam. "A method for stochastic
optimization." International Conference on Learning Representation. 2015. optimization." International Conference on Learning Representation. 2015.
Specifically, it implements the method shown as Algorithm 1. Specifically, it implements the method shown as Algorithm 1.
- #s is the update vector that should be added to the parameters. - #s is the update vector that should be added to the parameters.
- The function only operates in the half open range [begin,end) of the memory
blocks of each tensor. E.g. to make this function run on the entire tensor
set begin to 0 and end to params.size().
!*/ !*/
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
const double BATCH_NORM_EPS = 0.00001;
void batch_normalize_inference ( void batch_normalize_inference (
const double eps,
resizable_tensor& dest, resizable_tensor& dest,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
...@@ -331,6 +381,7 @@ namespace dlib { namespace tt ...@@ -331,6 +381,7 @@ namespace dlib { namespace tt
); );
/*! /*!
requires requires
- eps > 0
- gamma.num_samples() == 1 - gamma.num_samples() == 1
- gamma.nr() == src.nr() - gamma.nr() == src.nr()
- gamma.nc() == src.nc() - gamma.nc() == src.nc()
...@@ -342,11 +393,12 @@ namespace dlib { namespace tt ...@@ -342,11 +393,12 @@ namespace dlib { namespace tt
- Linearly transforms src as a call to batch_normalize() would if src had means - Linearly transforms src as a call to batch_normalize() would if src had means
and variances as given by running_means and running_variances. That is, this and variances as given by running_means and running_variances. That is, this
function performs: function performs:
dest = gamma*(src-running_means)/sqrt(running_variances+BATCH_NORM_EPS) + beta dest = gamma*(src-running_means)/sqrt(running_variances+eps) + beta
Note that it does it in a pointwise fashion over the samples in src. Note that it does it in a pointwise fashion over the samples in src.
!*/ !*/
void batch_normalize ( void batch_normalize (
const double eps,
resizable_tensor& dest, resizable_tensor& dest,
resizable_tensor& means, resizable_tensor& means,
resizable_tensor& invstds, resizable_tensor& invstds,
...@@ -359,6 +411,7 @@ namespace dlib { namespace tt ...@@ -359,6 +411,7 @@ namespace dlib { namespace tt
); );
/*! /*!
requires requires
- eps > 0
- src.num_samples() > 1 - src.num_samples() > 1
- gamma.num_samples() == 1 - gamma.num_samples() == 1
- beta.num_samples() == 1 - beta.num_samples() == 1
...@@ -384,6 +437,7 @@ namespace dlib { namespace tt ...@@ -384,6 +437,7 @@ namespace dlib { namespace tt
!*/ !*/
void batch_normalize_gradient ( void batch_normalize_gradient (
const double eps,
const tensor& gradient_input, const tensor& gradient_input,
const tensor& means, const tensor& means,
const tensor& invstds, const tensor& invstds,
...@@ -395,8 +449,9 @@ namespace dlib { namespace tt ...@@ -395,8 +449,9 @@ namespace dlib { namespace tt
); );
/*! /*!
requires requires
- eps > 0
- invstds and means should be the output of a call to - invstds and means should be the output of a call to
batch_normalize(dest,means,invstds,src,gamma,beta) batch_normalize(eps,dest,means,invstds,src,gamma,beta)
- have_same_dimensions(gradient_input, src) == true - have_same_dimensions(gradient_input, src) == true
- have_same_dimensions(src, src_grad) == true - have_same_dimensions(src, src_grad) == true
- src.num_samples() > 1 - src.num_samples() > 1
...@@ -410,7 +465,7 @@ namespace dlib { namespace tt ...@@ -410,7 +465,7 @@ namespace dlib { namespace tt
- have_same_dimensions(invstds, gamma) == true - have_same_dimensions(invstds, gamma) == true
ensures ensures
- Let f(src,gamma,beta) == dot(gradient_input, dest output of - Let f(src,gamma,beta) == dot(gradient_input, dest output of
batch_normalize(dest,means,invstds,src,gamma,beta)) batch_normalize(eps,dest,means,invstds,src,gamma,beta))
- Adds the gradient of f() with respect to src to #src_grad. - Adds the gradient of f() with respect to src to #src_grad.
- Assigns the gradient of f() with respect to gamma to #gamma_grad. - Assigns the gradient of f() with respect to gamma to #gamma_grad.
- Assigns the gradient of f() with respect to beta to #beta_grad. - Assigns the gradient of f() with respect to beta to #beta_grad.
...@@ -419,6 +474,7 @@ namespace dlib { namespace tt ...@@ -419,6 +474,7 @@ namespace dlib { namespace tt
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
void batch_normalize_conv_inference ( void batch_normalize_conv_inference (
const double eps,
resizable_tensor& dest, resizable_tensor& dest,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
...@@ -428,6 +484,7 @@ namespace dlib { namespace tt ...@@ -428,6 +484,7 @@ namespace dlib { namespace tt
); );
/*! /*!
requires requires
- eps > 0
- gamma.num_samples() == 1 - gamma.num_samples() == 1
- gamma.nr() == 1 - gamma.nr() == 1
- gamma.nc() == 1 - gamma.nc() == 1
...@@ -439,12 +496,13 @@ namespace dlib { namespace tt ...@@ -439,12 +496,13 @@ namespace dlib { namespace tt
- Linearly transforms src as a call to batch_normalize_conv() would if src had - Linearly transforms src as a call to batch_normalize_conv() would if src had
means and variances as given by running_means and running_variances. That means and variances as given by running_means and running_variances. That
is, this function performs: is, this function performs:
dest = gamma*(src-running_means)/sqrt(running_variances+BATCH_NORM_EPS) + beta dest = gamma*(src-running_means)/sqrt(running_variances+eps) + beta
Note that it does this in a pointwise fashion over the samples, rows, and Note that it does this in a pointwise fashion over the samples, rows, and
columns in src. columns in src.
!*/ !*/
void batch_normalize_conv ( void batch_normalize_conv (
const double eps,
resizable_tensor& dest, resizable_tensor& dest,
resizable_tensor& means, resizable_tensor& means,
resizable_tensor& invstds, resizable_tensor& invstds,
...@@ -457,6 +515,7 @@ namespace dlib { namespace tt ...@@ -457,6 +515,7 @@ namespace dlib { namespace tt
); );
/*! /*!
requires requires
- eps > 0
- src.num_samples() > 1 - src.num_samples() > 1
- gamma.num_samples()==gamma.nr()==gamma.nc() == 1 - gamma.num_samples()==gamma.nr()==gamma.nc() == 1
- beta.num_samples() ==beta.nr() ==gamma.nc() == 1 - beta.num_samples() ==beta.nr() ==gamma.nc() == 1
...@@ -478,6 +537,7 @@ namespace dlib { namespace tt ...@@ -478,6 +537,7 @@ namespace dlib { namespace tt
!*/ !*/
void batch_normalize_conv_gradient ( void batch_normalize_conv_gradient (
const double eps,
const tensor& gradient_input, const tensor& gradient_input,
const tensor& means, const tensor& means,
const tensor& invstds, const tensor& invstds,
...@@ -489,8 +549,9 @@ namespace dlib { namespace tt ...@@ -489,8 +549,9 @@ namespace dlib { namespace tt
); );
/*! /*!
requires requires
- eps > 0
- invstds and means should be the output of a call to - invstds and means should be the output of a call to
batch_normalize_conv(dest,means,invstds,src,gamma,beta) batch_normalize_conv(eps,dest,means,invstds,src,gamma,beta)
- have_same_dimensions(gradient_input, src) == true - have_same_dimensions(gradient_input, src) == true
- have_same_dimensions(src, src_grad) == true - have_same_dimensions(src, src_grad) == true
- src.num_samples() > 1 - src.num_samples() > 1
...@@ -502,7 +563,7 @@ namespace dlib { namespace tt ...@@ -502,7 +563,7 @@ namespace dlib { namespace tt
- have_same_dimensions(invstds, gamma) == true - have_same_dimensions(invstds, gamma) == true
ensures ensures
- Let f(src,gamma,beta) == dot(gradient_input, dest output of - Let f(src,gamma,beta) == dot(gradient_input, dest output of
batch_normalize_conv(dest,means,invstds,src,gamma,beta)) batch_normalize_conv(eps,dest,means,invstds,src,gamma,beta))
- Adds the gradient of f() with respect to src to #src_grad. - Adds the gradient of f() with respect to src to #src_grad.
- Assigns the gradient of f() with respect to gamma to #gamma_grad. - Assigns the gradient of f() with respect to gamma to #gamma_grad.
- Assigns the gradient of f() with respect to beta to #beta_grad. - Assigns the gradient of f() with respect to beta to #beta_grad.
......
...@@ -526,8 +526,7 @@ namespace dlib ...@@ -526,8 +526,7 @@ namespace dlib
label_type pick_which_run_update; label_type pick_which_run_update;
job_t next_job; job_t next_job;
std::vector<std::future<double>> losses(devices.size()); std::vector<dlib::future<double>> losses(devices.size());
std::vector<std::future<void>> update_futs(devices.size());
std::vector<tt::multi_device_tensor_averager> averagers; std::vector<tt::multi_device_tensor_averager> averagers;
// An array of all the parameter tensors in the first network. We will // An array of all the parameter tensors in the first network. We will
...@@ -536,6 +535,16 @@ namespace dlib ...@@ -536,6 +535,16 @@ namespace dlib
std::vector<tensor*> reference_params; std::vector<tensor*> reference_params;
visit_layer_parameters(devices[0]->net, [&](size_t, tensor& t) { reference_params.push_back(&t); }); visit_layer_parameters(devices[0]->net, [&](size_t, tensor& t) { reference_params.push_back(&t); });
// We make separate thread pools with just one thread in them because we want
// to make sure each device is always executed on the same thread. We care
// about this because there are thread_local context variables for some cuda
// components and they get regenerated when the current cuda device changes.
// Recreating them over and over is somewhat expensive so we want to avoid
// that.
std::vector<std::shared_ptr<thread_pool>> tp;
for (size_t i = 0; i < devices.size(); ++i)
tp.push_back(std::make_shared<thread_pool>(1));
size_t iteration = 0; size_t iteration = 0;
while(job_pipe.dequeue(next_job)) while(job_pipe.dequeue(next_job))
...@@ -545,7 +554,7 @@ namespace dlib ...@@ -545,7 +554,7 @@ namespace dlib
// right version for unsupervised or supervised training based on the type // right version for unsupervised or supervised training based on the type
// of label_type. // of label_type.
for (size_t i = 0; i < devices.size(); ++i) for (size_t i = 0; i < devices.size(); ++i)
losses[i] = std::async(std::launch::async,[&,i](){ return compute_parameter_gradients(i, next_job, pick_which_run_update); }); tp[i]->add_task_by_value([&,i](double& loss){ loss = compute_parameter_gradients(i, next_job, pick_which_run_update); }, losses[i]);
// aggregate loss values from all the network computations. // aggregate loss values from all the network computations.
double theloss = 0; double theloss = 0;
for (auto&& loss : losses) for (auto&& loss : losses)
...@@ -596,10 +605,10 @@ namespace dlib ...@@ -596,10 +605,10 @@ namespace dlib
// Now apply all the updates to each device. // Now apply all the updates to each device.
for (size_t i = 0; i < devices.size(); ++i) for (size_t i = 0; i < devices.size(); ++i)
update_futs[i] = std::async(std::launch::async, [&,i](){ if (next_job.have_data[i]) update_parameters(i); }); tp[i]->add_task_by_value([&,i](){ if (next_job.have_data[i]) update_parameters(i); });
// and wait for the updates to all happen. // and wait for the updates to all happen.
for (auto&& f : update_futs) for (size_t i = 0; i < devices.size(); ++i)
f.wait(); tp[i]->wait_for_all_tasks();
// Evey now and then force all the parameters to be the same just to make // Evey now and then force all the parameters to be the same just to make
......
...@@ -482,7 +482,7 @@ namespace dlib ...@@ -482,7 +482,7 @@ namespace dlib
<< "\n\t x_upper.size(): " << x_upper.size() << "\n\t x_upper.size(): " << x_upper.size()
); );
DLIB_ASSERT ( DLIB_ASSERT (
min(x_upper-x_lower) > 0, min(x_upper-x_lower) >= 0,
"\tdouble find_min_box_constrained()" "\tdouble find_min_box_constrained()"
<< "\n\t You have to supply proper box constraints to this function." << "\n\t You have to supply proper box constraints to this function."
<< "\n\r min(x_upper-x_lower): " << min(x_upper-x_lower) << "\n\r min(x_upper-x_lower): " << min(x_upper-x_lower)
...@@ -610,7 +610,7 @@ namespace dlib ...@@ -610,7 +610,7 @@ namespace dlib
<< "\n\t x_upper.size(): " << x_upper.size() << "\n\t x_upper.size(): " << x_upper.size()
); );
DLIB_ASSERT ( DLIB_ASSERT (
min(x_upper-x_lower) > 0, min(x_upper-x_lower) >= 0,
"\tdouble find_max_box_constrained()" "\tdouble find_max_box_constrained()"
<< "\n\t You have to supply proper box constraints to this function." << "\n\t You have to supply proper box constraints to this function."
<< "\n\r min(x_upper-x_lower): " << min(x_upper-x_lower) << "\n\r min(x_upper-x_lower): " << min(x_upper-x_lower)
......
...@@ -297,7 +297,7 @@ namespace dlib ...@@ -297,7 +297,7 @@ namespace dlib
- is_col_vector(x_upper) == true - is_col_vector(x_upper) == true
- x.size() == x_lower.size() == x_upper.size() - x.size() == x_lower.size() == x_upper.size()
(i.e. x, x_lower, and x_upper need to all be column vectors of the same dimensionality) (i.e. x, x_lower, and x_upper need to all be column vectors of the same dimensionality)
- min(x_upper-x_lower) > 0 - min(x_upper-x_lower) >= 0
(i.e. x_upper must contain upper bounds relative to x_lower) (i.e. x_upper must contain upper bounds relative to x_lower)
ensures ensures
- Performs a box constrained minimization of the function f() using the given - Performs a box constrained minimization of the function f() using the given
...@@ -391,7 +391,7 @@ namespace dlib ...@@ -391,7 +391,7 @@ namespace dlib
- is_col_vector(x_upper) == true - is_col_vector(x_upper) == true
- x.size() == x_lower.size() == x_upper.size() - x.size() == x_lower.size() == x_upper.size()
(i.e. x, x_lower, and x_upper need to all be column vectors of the same dimensionality) (i.e. x, x_lower, and x_upper need to all be column vectors of the same dimensionality)
- min(x_upper-x_lower) > 0 - min(x_upper-x_lower) >= 0
(i.e. x_upper must contain upper bounds relative to x_lower) (i.e. x_upper must contain upper bounds relative to x_lower)
ensures ensures
- Performs a box constrained maximization of the function f() using the given - Performs a box constrained maximization of the function f() using the given
......
This diff is collapsed.
This diff is collapsed.
...@@ -42,6 +42,12 @@ int main() ...@@ -42,6 +42,12 @@ int main()
try try
{ {
cv::VideoCapture cap(0); cv::VideoCapture cap(0);
if (!cap.isOpened())
{
cerr << "Unable to connect to camera" << endl;
return 1;
}
image_window win; image_window win;
// Load face detection and pose estimation models. // Load face detection and pose estimation models.
......
Hi Davis,
thanks for your work on dlib!
I have created a natvis file to have nicer debugger visualization of dlib matrices in Visual Studio (2012 - …) and I just wanted to share it with you.
To test it, copy the file into you folder %USERPROFILE%\My Documents\Visual Studio 2015\Visualizers or %VSINSTALLDIR%\Common7\Packages\Debugger\Visualizers as described here https://msdn.microsoft.com/en-us/library/jj620914.aspx
It’s certainly extendable, especially to include it into image watch, but currently it may help users to debug much faster.
Feel free to share it.
Best,
Johannes Huber
<?xml version="1.0" encoding="utf-8"?>
<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
<!-- dlib matrix debugger visualization in Visual Studio-->
<!-- Johannes Huber, SAFEmine Part of Hexagon -->
<!-- no warranty -->
<!-- general dlib::matrix fixed size-->
<Type Name="dlib::matrix&lt;*,*,*,*&gt;">
<DisplayString>{{ size= &lt;{$T2}&gt; x &lt;{$T3}&gt; }}</DisplayString>
<Expand>
<ArrayItems>
<Size>$T2 * $T3</Size>
<ValuePointer>($T1*)data.data</ValuePointer>
</ArrayItems>
</Expand>
</Type>
<!-- general dlib::matrix fixed rows-->
<Type Name="dlib::matrix&lt;*,0,*,*&gt;">
<DisplayString>{{ size={data.nr_} x &lt;{$T2}&gt; }}</DisplayString>
<Expand>
<ArrayItems Condition="data.data != 0">
<Size>data.nr_ * $T2</Size>
<ValuePointer>($T1*)data.data</ValuePointer>
</ArrayItems>
</Expand>
</Type>
<!-- general dlib::matrix fixed cols-->
<Type Name="dlib::matrix&lt;*,*,0,*&gt;">
<DisplayString>{{ size= &lt;{$T2}&gt; x {data.nc_} }}</DisplayString>
<Expand>
<ArrayItems Condition="data.data != 0">
<Size>$T2 * data.nc_</Size>
<ValuePointer>($T1*)data.data</ValuePointer>
</ArrayItems>
</Expand>
</Type>
<!-- general dlib::matrix dynamic size-->
<Type Name="dlib::matrix&lt;*,0,0,*&gt;">
<DisplayString>{{ size= {data.nc_} x {data.nc_} }}</DisplayString>
<Expand>
<ArrayItems Condition="data.data != 0">
<Size>data.nr_*data.nc_</Size>
<ValuePointer>($T1*)data.data</ValuePointer>
</ArrayItems>
</Expand>
</Type>
</AutoVisualizer>
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment