Commit 273a21cf authored by Davis King's avatar Davis King

More activation layer updates. Now the relu and fc layer objects

use either CPU or GPU.  Fixed a bug in gemm().
parent db19a781
......@@ -454,7 +454,99 @@ namespace dlib
}
// -----------------------------------------------------------------------------------
// -----------------------------------------------------------------------------------
// -----------------------------------------------------------------------------------
void softmax (
tensor& dest,
const tensor& src
)
{
// TODO
DLIB_CASSERT(false,"");
}
void softmax_gradient (
tensor& grad,
const tensor& dest,
const tensor& gradient_input
)
{
// TODO
DLIB_CASSERT(false,"");
}
// ------------------------------------------------------------------------------------
void sigmoid (
tensor& dest,
const tensor& src
)
{
// TODO
DLIB_CASSERT(false,"");
}
void sigmoid_gradient (
tensor& grad,
const tensor& dest,
const tensor& gradient_input
)
{
// TODO
DLIB_CASSERT(false,"");
}
// ------------------------------------------------------------------------------------
void relu (
tensor& dest,
const tensor& src
)
{
dest = lowerbound(mat(src), 0);
}
void relu_gradient (
tensor& grad,
const tensor& dest,
const tensor& gradient_input
)
{
const float* gi = gradient_input.host();
const float* in = dest.host();
float* out = grad.host();
for (size_t i = 0; i < dest.size(); ++i)
{
if (in[i] > 0)
out[i] = gi[i];
else
out[i] = 0;
}
}
// ------------------------------------------------------------------------------------
void tanh (
tensor& dest,
const tensor& src
)
{
// TODO
DLIB_CASSERT(false,"");
}
void tanh_gradient (
tensor& grad,
const tensor& dest,
const tensor& gradient_input
)
{
// TODO
DLIB_CASSERT(false,"");
}
// ------------------------------------------------------------------------------------
}
}
......
......@@ -4,6 +4,7 @@
#define DLIB_DNN_CPU_H_
// This file contains CPU implementations of the GPU based functions in cuda_dlib.h
// and cudnn_dlibapi.h
#include "tensor.h"
......@@ -86,6 +87,58 @@ namespace dlib
float thresh
);
// -----------------------------------------------------------------------------------
void softmax (
tensor& dest,
const tensor& src
);
void softmax_gradient (
tensor& grad,
const tensor& dest,
const tensor& gradient_input
);
// ------------------------------------------------------------------------------------
void sigmoid (
tensor& dest,
const tensor& src
);
void sigmoid_gradient (
tensor& grad,
const tensor& dest,
const tensor& gradient_input
);
// ------------------------------------------------------------------------------------
void relu (
tensor& dest,
const tensor& src
);
void relu_gradient (
tensor& grad,
const tensor& dest,
const tensor& gradient_input
);
// ------------------------------------------------------------------------------------
void tanh (
tensor& dest,
const tensor& src
);
void tanh_gradient (
tensor& grad,
const tensor& dest,
const tensor& gradient_input
);
// -----------------------------------------------------------------------------------
}
......
......@@ -614,11 +614,11 @@ namespace dlib
// ------------------------------------------------------------------------------------
void softmax (
resizable_tensor& dest,
tensor& dest,
const tensor& src
)
{
dest.copy_size(src);
DLIB_CASSERT(have_same_dimensions(dest,src),"");
if (src.size() == 0)
return;
......@@ -668,11 +668,11 @@ namespace dlib
// ------------------------------------------------------------------------------------
void sigmoid (
resizable_tensor& dest,
tensor& dest,
const tensor& src
)
{
dest.copy_size(src);
DLIB_CASSERT(have_same_dimensions(dest,src),"");
if (src.size() == 0)
return;
......@@ -719,11 +719,11 @@ namespace dlib
// ------------------------------------------------------------------------------------
void relu (
resizable_tensor& dest,
tensor& dest,
const tensor& src
)
{
dest.copy_size(src);
DLIB_CASSERT(have_same_dimensions(dest,src),"");
if (src.size() == 0)
return;
......@@ -770,11 +770,11 @@ namespace dlib
// ------------------------------------------------------------------------------------
void tanh (
resizable_tensor& dest,
tensor& dest,
const tensor& src
)
{
dest.copy_size(src);
DLIB_CASSERT(have_same_dimensions(dest,src),"");
if (src.size() == 0)
return;
......
......@@ -326,12 +326,13 @@ namespace dlib
// ------------------------------------------------------------------------------------
void softmax (
resizable_tensor& dest,
tensor& dest,
const tensor& src
);
/*!
requires
- have_same_dimensions(dest, src) == true
ensures
- have_same_dimensions(#dest, src) == true
- Note that the softmax function is a vector valued function:
s(x) == exp(x)/sum(exp(x))
- Computes the softmax function on src and writes the results to dest. The
......@@ -365,12 +366,13 @@ namespace dlib
// ------------------------------------------------------------------------------------
void sigmoid (
resizable_tensor& dest,
tensor& dest,
const tensor& src
);
/*!
requires
- have_same_dimensions(dest, src) == true
ensures
- have_same_dimensions(#dest, src) == true
- for all valid i:
- #dest.host()[i] == 1/(1+std::exp(-src.host()[i]))
- This function supports in-place operation, i.e. having
......@@ -399,12 +401,13 @@ namespace dlib
// ------------------------------------------------------------------------------------
void relu (
resizable_tensor& dest,
tensor& dest,
const tensor& src
);
/*!
requires
- have_same_dimensions(dest, src) == true
ensures
- have_same_dimensions(#dest, src) == true
- for all valid i:
- #dest.host()[i] == std::max(0,src.host()[i])
- This function supports in-place operation, i.e. having
......@@ -433,12 +436,13 @@ namespace dlib
// ------------------------------------------------------------------------------------
void tanh (
resizable_tensor& dest,
tensor& dest,
const tensor& src
);
/*!
requires
- have_same_dimensions(dest, src) == true
ensures
- have_same_dimensions(#dest, src) == true
- for all valid i:
- #dest.host()[i] == std::tanh(src.host()[i])
- This function supports in-place operation, i.e. having
......
......@@ -10,6 +10,7 @@
#include <string>
#include "../rand.h"
#include "../string.h"
#include "tensor_tools.h"
namespace dlib
......@@ -85,17 +86,17 @@ namespace dlib
{
output.set_size(sub.get_output().num_samples(), num_outputs);
output = mat(sub.get_output())*mat(params);
tt::gemm(0,output, 1,sub.get_output(),false, params,false);
}
template <typename SUBNET>
void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad)
{
// compute the gradient of the parameters.
params_grad = trans(mat(sub.get_output()))*mat(gradient_input);
tt::gemm(0,params_grad, 1,sub.get_output(),true, gradient_input,false);
// compute the gradient for the data
sub.get_gradient_input() += mat(gradient_input)*trans(mat(params));
tt::gemm(1,sub.get_gradient_input(), 1,gradient_input,false, params,true);
}
const tensor& get_layer_params() const { return params; }
......@@ -147,27 +148,17 @@ namespace dlib
void forward_inplace(const tensor& input, tensor& output)
{
output = lowerbound(mat(input), 0);
tt::relu(output, input);
}
void backward_inplace(
const tensor& computed_output,
const tensor& gradient_input,
tensor& data_grad,
tensor& params_grad
tensor&
)
{
const float* grad = gradient_input.host();
const float* in = computed_output.host();
float* out = data_grad.host();
for (unsigned long i = 0; i < computed_output.size(); ++i)
{
if (in[i] > 0)
out[i] = grad[i];
else
out[i] = 0;
}
tt::relu_gradient(data_grad, computed_output, gradient_input);
}
const tensor& get_layer_params() const { return params; }
......
......@@ -27,9 +27,9 @@ namespace dlib { namespace tt
#else
if (trans_lhs && trans_rhs)
dest = alpha*trans(mat(lhs))*trans(mat(rhs)) + beta*mat(dest);
if (!trans_lhs && trans_rhs)
else if (!trans_lhs && trans_rhs)
dest = alpha*mat(lhs)*trans(mat(rhs)) + beta*mat(dest);
if (trans_lhs && !trans_rhs)
else if (trans_lhs && !trans_rhs)
dest = alpha*trans(mat(lhs))*mat(rhs) + beta*mat(dest);
else
dest = alpha*mat(lhs)*mat(rhs) + beta*mat(dest);
......@@ -407,15 +407,14 @@ namespace dlib { namespace tt
// ----------------------------------------------------------------------------------------
void softmax (
resizable_tensor& dest,
tensor& dest,
const tensor& src
)
{
#ifdef DLIB_USE_CUDA
cuda::softmax(dest,src);
#else
// TODO
DLIB_CASSERT(false,"");
cpu::softmax(dest,src);
#endif
}
......@@ -428,23 +427,21 @@ namespace dlib { namespace tt
#ifdef DLIB_USE_CUDA
cuda::softmax_gradient(grad, dest, gradient_input);
#else
// TODO
DLIB_CASSERT(false,"");
cpu::softmax_gradient(grad, dest, gradient_input);
#endif
}
// ----------------------------------------------------------------------------------------
void sigmoid (
resizable_tensor& dest,
tensor& dest,
const tensor& src
)
{
#ifdef DLIB_USE_CUDA
cuda::sigmoid(dest,src);
#else
// TODO
DLIB_CASSERT(false,"");
cpu::sigmoid(dest,src);
#endif
}
......@@ -457,23 +454,21 @@ namespace dlib { namespace tt
#ifdef DLIB_USE_CUDA
cuda::sigmoid_gradient(grad, dest, gradient_input);
#else
// TODO
DLIB_CASSERT(false,"");
cpu::sigmoid_gradient(grad, dest, gradient_input);
#endif
}
// ----------------------------------------------------------------------------------------
void relu (
resizable_tensor& dest,
tensor& dest,
const tensor& src
)
{
#ifdef DLIB_USE_CUDA
cuda::relu(dest,src);
#else
// TODO
DLIB_CASSERT(false,"");
cpu::relu(dest,src);
#endif
}
......@@ -486,23 +481,21 @@ namespace dlib { namespace tt
#ifdef DLIB_USE_CUDA
cuda::relu_gradient(grad, dest, gradient_input);
#else
// TODO
DLIB_CASSERT(false,"");
cpu::relu_gradient(grad, dest, gradient_input);
#endif
}
// ----------------------------------------------------------------------------------------
void tanh (
resizable_tensor& dest,
tensor& dest,
const tensor& src
)
{
#ifdef DLIB_USE_CUDA
cuda::tanh(dest,src);
#else
// TODO
DLIB_CASSERT(false,"");
cpu::tanh(dest,src);
#endif
}
......@@ -515,8 +508,7 @@ namespace dlib { namespace tt
#ifdef DLIB_USE_CUDA
cuda::tanh_gradient(grad, dest, gradient_input);
#else
// TODO
DLIB_CASSERT(false,"");
cpu::tanh_gradient(grad, dest, gradient_input);
#endif
}
......
......@@ -499,12 +499,13 @@ namespace dlib { namespace tt
// ----------------------------------------------------------------------------------------
void softmax (
resizable_tensor& dest,
tensor& dest,
const tensor& src
);
/*!
requires
- have_same_dimensions(dest, src) == true
ensures
- have_same_dimensions(#dest, src) == true
- Note that the softmax function is a vector valued function:
s(x) == exp(x)/sum(exp(x))
- Computes the softmax function on src and writes the results to dest. The
......@@ -538,12 +539,13 @@ namespace dlib { namespace tt
// ----------------------------------------------------------------------------------------
void sigmoid (
resizable_tensor& dest,
tensor& dest,
const tensor& src
);
/*!
requires
- have_same_dimensions(dest, src) == true
ensures
- have_same_dimensions(#dest, src) == true
- for all valid i:
- #dest.host()[i] == 1/(1+std::exp(-src.host()[i]))
- This function supports in-place operation, i.e. having
......@@ -572,12 +574,13 @@ namespace dlib { namespace tt
// ----------------------------------------------------------------------------------------
void relu (
resizable_tensor& dest,
tensor& dest,
const tensor& src
);
/*!
requires
- have_same_dimensions(dest, src) == true
ensures
- have_same_dimensions(#dest, src) == true
- for all valid i:
- #dest.host()[i] == std::max(0,src.host()[i])
- This function supports in-place operation, i.e. having
......@@ -606,12 +609,13 @@ namespace dlib { namespace tt
// ----------------------------------------------------------------------------------------
void tanh (
resizable_tensor& dest,
tensor& dest,
const tensor& src
);
/*!
requires
- have_same_dimensions(dest, src) == true
ensures
- have_same_dimensions(#dest, src) == true
- for all valid i:
- #dest.host()[i] == std::tanh(src.host()[i])
- This function supports in-place operation, i.e. having
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment