Commit f335ce4f authored by Davis King's avatar Davis King

Adding a rough initial version of a deep learning API.

parent 16ea6f11
// Copyright (C) 2015 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_DNn_
#define DLIB_DNn_
#include "dnn/tensor.h"
#include "dnn/input.h"
#include "dnn/layers.h"
#include "dnn/loss.h"
#include "dnn/core.h"
#include "dnn/solvers.h"
#endif // DLIB_DNn_
This diff is collapsed.
This diff is collapsed.
// Copyright (C) 2015 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_DNn_INPUT_H_
#define DLIB_DNn_INPUT_H_
#include <dlib/matrix.h>
#include <dlib/pixel.h>
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <typename T>
class input
{
public:
// sample_expansion_factor must be > 0
const static unsigned int sample_expansion_factor = 1;
typedef T input_type;
template <typename input_iterator>
void to_tensor (
input_iterator begin,
input_iterator end,
resizable_tensor& data
) const
/*!
requires
- [begin, end) is an iterator range over input_type objects.
ensures
- Converts the iterator range into a tensor and stores it into #data.
- Normally you would have #data.num_samples() == distance(begin,end) but
you can also expand the output by some integer factor so long as the loss
you use can deal with it correctly.
- #data.num_samples() == distance(begin,end)*sample_expansion_factor.
!*/
{
// initialize data to the right size to contain the stuff in the iterator range.
for (input_iterator i = begin; i != end; ++i)
{
matrix<rgb_pixel> temp = *i;
// now copy *i into the right part of data.
}
}
};
// ----------------------------------------------------------------------------------------
template <typename T,long NR, typename MM, typename L>
class input<matrix<T,NR,1,MM,L>>
{
public:
// TODO, maybe we should only allow T to be float? Seems kinda pointless to allow
// double. Don't forget to remove the matrix_cast if we enforce just float.
typedef matrix<T,NR,1,MM,L> input_type;
const static unsigned int sample_expansion_factor = 1;
template <typename input_iterator>
void to_tensor (
input_iterator begin,
input_iterator end,
resizable_tensor& data
) const
/*!
requires
- [begin, end) is an iterator range over input_type objects.
ensures
- converts the iterator range into a tensor and stores it into #data.
- Normally you would have #data.num_samples() == distance(begin,end) but
you can also expand the output by some integer factor so long as the loss
you use can deal with it correctly.
- #data.num_samples() == distance(begin,end)*sample_expansion_factor.
!*/
{
// initialize data to the right size to contain the stuff in the iterator range.
data.set_size(std::distance(begin,end), 1, 1, begin->size());
unsigned long idx = 0;
for (input_iterator i = begin; i != end; ++i)
{
data.set_sample(idx++, matrix_cast<float>(*i));
}
}
};
// ----------------------------------------------------------------------------------------
template <typename T>
class input2
{
public:
input2(){}
input2(const input<T>&) {}
typedef T input_type;
const static unsigned int sample_expansion_factor = 1;
template <typename input_iterator>
void to_tensor (
input_iterator begin,
input_iterator end,
resizable_tensor& data
) const
/*!
requires
- [begin, end) is an iterator range over T objects.
ensures
- converts the iterator range into a tensor and stores it into #data.
- Normally you would have #data.num_samples() == distance(begin,end) but
you can also expand the output by some integer factor so long as the loss
you use can deal with it correctly.
- #data.num_samples() == distance(begin,end)*K where K is an integer >= 1.
!*/
{
// initialize data to the right size to contain the stuff in the iterator range.
for (input_iterator i = begin; i != end; ++i)
{
matrix<rgb_pixel> temp = *i;
// now copy *i into the right part of data.
}
}
};
// ----------------------------------------------------------------------------------------
}
#endif // #define DLIB_DNn_INPUT_H_
// Copyright (C) 2015 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_DNn_LAYERS_H_
#define DLIB_DNn_LAYERS_H_
#include "layers_abstract.h"
#include "tensor.h"
#include "core.h"
#include <iostream>
#include <string>
#include <dlib/rand.h>
#include <dlib/string.h>
namespace dlib
{
// ----------------------------------------------------------------------------------------
class con_
{
public:
con_()
{}
template <typename SUB_NET>
void setup (const SUB_NET& sub)
{
// TODO
}
template <typename SUB_NET>
void forward(const SUB_NET& sub, resizable_tensor& output)
{
// TODO
}
template <typename SUB_NET>
void backward(const tensor& gradient_input, SUB_NET& sub, tensor& params_grad)
{
// TODO
}
const tensor& get_layer_params() const { return params; }
tensor& get_layer_params() { return params; }
private:
resizable_tensor params;
};
template <typename SUB_NET>
using con = add_layer<con_, SUB_NET>;
// ----------------------------------------------------------------------------------------
class fc_
{
public:
fc_() : num_outputs(1)
{
rnd.set_seed("fc_" + cast_to_string(num_outputs));
}
explicit fc_(unsigned long num_outputs_)
{
num_outputs = num_outputs_;
rnd.set_seed("fc_" + cast_to_string(num_outputs));
}
unsigned long get_num_outputs (
) const { return num_outputs; }
template <typename SUB_NET>
void setup (const SUB_NET& sub)
{
num_inputs = sub.get_output().nr()*sub.get_output().nc()*sub.get_output().k();
params.set_size(num_inputs, num_outputs);
std::cout << "fc_::setup() " << params.size() << std::endl;
randomize_parameters(params, num_inputs+num_outputs, rnd);
}
template <typename SUB_NET>
void forward(const SUB_NET& sub, resizable_tensor& output)
{
output.set_size(sub.get_output().num_samples(), num_outputs);
output = mat(sub.get_output())*mat(params);
}
template <typename SUB_NET>
void backward(const tensor& gradient_input, SUB_NET& sub, tensor& params_grad)
{
// d1*W*p1 + d2*W*p2
// total gradient = [d1*W; d2*W; d3*W; ...] == D*W
// compute the gradient of the parameters.
params_grad += trans(mat(sub.get_output()))*mat(gradient_input);
// compute the gradient for the data
sub.get_gradient_input() += mat(gradient_input)*trans(mat(params));
}
const tensor& get_layer_params() const { return params; }
tensor& get_layer_params() { return params; }
private:
unsigned long num_outputs;
unsigned long num_inputs;
resizable_tensor params;
dlib::rand rnd;
};
template <typename SUB_NET>
using fc = add_layer<fc_, SUB_NET>;
// ----------------------------------------------------------------------------------------
class relu_
{
public:
relu_()
{
}
template <typename SUB_NET>
void setup (const SUB_NET& sub)
{
}
template <typename SUB_NET>
void forward(const SUB_NET& sub, resizable_tensor& output)
{
output.copy_size(sub.get_output());
output = lowerbound(mat(sub.get_output()), 0);
}
template <typename SUB_NET>
void backward(const tensor& gradient_input, SUB_NET& sub, tensor& params_grad)
{
const float* grad = gradient_input.host();
const float* in = sub.get_output().host();
float* out = sub.get_gradient_input().host();
for (unsigned long i = 0; i < sub.get_output().size(); ++i)
{
if (in[i] > 0)
out[i] += grad[i];
}
}
const tensor& get_layer_params() const { return params; }
tensor& get_layer_params() { return params; }
private:
resizable_tensor params;
};
template <typename SUB_NET>
using relu = add_layer<relu_, SUB_NET>;
// ----------------------------------------------------------------------------------------
class multiply_
{
public:
multiply_()
{
}
template <typename SUB_NET>
void setup (const SUB_NET& sub)
{
num_inputs = sub.get_output().nr()*sub.get_output().nc()*sub.get_output().k();
params.set_size(1, num_inputs);
std::cout << "multiply_::setup() " << params.size() << std::endl;
const int num_outputs = num_inputs;
randomize_parameters(params, num_inputs+num_outputs, rnd);
}
template <typename SUB_NET>
void forward(const SUB_NET& sub, resizable_tensor& output)
{
DLIB_CASSERT( sub.get_output().nr()*sub.get_output().nc()*sub.get_output().k() == params.size(), "");
DLIB_CASSERT( sub.get_output().nr()*sub.get_output().nc()*sub.get_output().k() == num_inputs, "");
output.copy_size(sub.get_output());
auto indata = sub.get_output().host();
auto outdata = output.host();
auto paramdata = params.host();
for (int i = 0; i < sub.get_output().num_samples(); ++i)
{
for (int j = 0; j < num_inputs; ++j)
{
*outdata++ = *indata++ * paramdata[j];
}
}
}
template <typename SUB_NET>
void backward(const tensor& gradient_input, SUB_NET& sub, tensor& params_grad)
{
params_grad += sum_rows(pointwise_multiply(mat(sub.get_output()),mat(gradient_input)));
for (long i = 0; i < gradient_input.num_samples(); ++i)
{
sub.get_gradient_input().add_to_sample(i,
pointwise_multiply(rowm(mat(gradient_input),i), mat(params)));
}
}
const tensor& get_layer_params() const { return params; }
tensor& get_layer_params() { return params; }
private:
int num_inputs;
resizable_tensor params;
dlib::rand rnd;
};
template <typename SUB_NET>
using multiply = add_layer<multiply_, SUB_NET>;
// ----------------------------------------------------------------------------------------
}
#endif // #define DLIB_DNn_LAYERS_H_
// Copyright (C) 2015 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_DNn_LAYERS_ABSTRACT_H_
#ifdef DLIB_DNn_LAYERS_ABSTRACT_H_
#include "tensor_abstract.h"
#include "core_abstract.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
class SUB_NET
{
/*!
WHAT THIS OBJECT REPRESENTS
By "Sub net" we mean the part of the network closer to the input. Whenever
you get a SUB_NET it will always have computed its outputs and they will be
available in get_output().
!*/
public:
const tensor& get_output(
) const;
tensor& get_gradient_input(
);
const NEXT_SUB_NET& sub_net(
) const;
NEXT_SUB_NET& sub_net(
);
};
// ----------------------------------------------------------------------------------------
class EXAMPLE_LAYER_
{
/*!
WHAT THIS OBJECT REPRESENTS
Each layer in a deep neural network can be thought of as a function,
f(data,parameters), that takes in a data tensor, some parameters, and
produces an output tensor. You create an entire deep network by composing
these functions. Importantly, you are able to use a wide range of
different functions to accommodate whatever task you are trying to accomplish.
Dlib includes a number of common layer types but if you want to define your
own then you simply implement a class with the same interface as EXAMPLE_LAYER_.
!*/
public:
EXAMPLE_LAYER_(
);
/*!
ensures
- Default constructs this object. This function is not required to do
anything in particular but it is required that layer objects be default
constructable.
!*/
template <typename SUB_NET>
void setup (
const SUB_NET& sub
);
/*!
requires
- SUB_NET implements the SUB_NET interface defined at the top of this file.
ensures
- performs any necessary initial memory allocations and/or sets parameters
to their initial values prior to learning. Therefore, calling setup
destroys any previously learned parameters.
!*/
template <typename SUB_NET>
void forward(
const SUB_NET& sub,
resizable_tensor& output
);
/*!
requires
- SUB_NET implements the SUB_NET interface defined at the top of this file.
- setup() has been called.
ensures
- Runs the output of the sub-network through this layer and stores the
output into #output. In particular, forward() can use any of the outputs
in sub (e.g. sub.get_output(), sub.sub_net().get_output(), etc.) to
compute whatever it wants.
- #output.num_samples() == sub.get_output().num_samples()
!*/
template <typename SUB_NET>
void backward(
const tensor& gradient_input,
SUB_NET& sub,
tensor& params_grad
);
/*!
requires
- SUB_NET implements the SUB_NET interface defined at the top of this file.
- setup() has been called.
- gradient_input has the same dimensions as the output of forward(sub,output).
- have_same_dimensions(sub.get_gradient_input(), sub.get_output()) == true
- have_same_dimensions(params_grad, get_layer_params()) == true
ensures
- This function outputs the gradients of this layer with respect to the
input data from sub and also with respect to this layer's parameters.
These gradients are stored into #sub and #params_grad, respectively. To be
precise, the gradients are taken of a function f(sub,get_layer_params())
which is defined thusly:
- let OUT be the output of forward(sub,OUT).
- let f(sub,get_layer_params()) == dot(OUT, gradient_input)
Then we define the following gradient vectors:
- PARAMETER_GRADIENT == gradient of f(sub,get_layer_params()) with
respect to get_layer_params().
- for all valid I:
- DATA_GRADIENT_I == gradient of f(sub,get_layer_params()) with
respect to layer<I>(sub).get_output() (recall that forward() can
draw inputs from the immediate sub layer, sub.sub_net(), or
any earlier layer. So you must consider the gradients with
respect to all inputs drawn from sub)
Finally, backward() adds these gradients into the output by performing:
- params_grad += PARAMETER_GRADIENT
- for all valid I:
- layer<I>(sub).get_gradient_input() += DATA_GRADIENT_I
!*/
const tensor& get_layer_params(
) const;
/*!
ensures
- returns the parameters that define the behavior of forward().
!*/
tensor& get_layer_params(
);
/*!
ensures
- returns the parameters that define the behavior of forward().
!*/
};
// For each layer you define, always define an add_layer template so that layers can be
// easily composed. Moreover, the convention is that the layer class ends with an _
// while the add_layer template has the same name but without the trailing _.
template <typename SUB_NET>
using EXAMPLE_LAYER = add_layer<EXAMPLE_LAYER_, SUB_NET>;
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
class fc_
{
/*!
WHAT THIS OBJECT REPRESENTS
This is an implementation of the EXAMPLE_LAYER_ interface defined above.
In particular, it defines a fully connected layer that takes an input
tensor and multiplies it by a weight matrix and outputs the results.
!*/
public:
fc_(
);
/*!
ensures
- #get_num_outputs() == 1
!*/
explicit fc_(
unsigned long num_outputs
);
/*!
ensures
- #get_num_outputs() == num_outputs
!*/
unsigned long get_num_outputs (
) const;
/*!
ensures
- This layer outputs column vectors that contain get_num_outputs()
elements. That is, the output tensor T from forward() will be such that:
- T.num_samples() == however many samples were given to forward().
- T.nr() == get_num_outputs()
- The rest of the dimensions of T will be 1.
!*/
template <typename SUB_NET> void setup (const SUB_NET& sub);
template <typename SUB_NET> void forward(const SUB_NET& sub, resizable_tensor& output);
template <typename SUB_NET> void backward(const tensor& gradient_input, SUB_NET& sub, tensor& params_grad);
const tensor& get_layer_params() const;
tensor& get_layer_params();
/*!
These functions are implemented as described in the EXAMPLE_LAYER_ interface.
!*/
};
template <typename SUB_NET>
using fc = add_layer<fc_, SUB_NET>;
// ----------------------------------------------------------------------------------------
class relu_
{
public:
relu_(
);
template <typename SUB_NET> void setup (const SUB_NET& sub);
template <typename SUB_NET> void forward(const SUB_NET& sub, resizable_tensor& output);
template <typename SUB_NET> void backward(const tensor& gradient_input, SUB_NET& sub, tensor& params_grad);
const tensor& get_layer_params() const;
tensor& get_layer_params();
/*!
These functions are implemented as described in the EXAMPLE_LAYER_ interface.
!*/
};
template <typename SUB_NET>
using relu = add_layer<relu_, SUB_NET>;
// ----------------------------------------------------------------------------------------
}
#endif // #define DLIB_DNn_LAYERS_H_
// Copyright (C) 2015 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_DNn_LOSS_H_
#define DLIB_DNn_LOSS_H_
#include "core.h"
#include <dlib/matrix.h>
namespace dlib
{
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
class loss_binary_hinge_
{
public:
const static unsigned int sample_expansion_factor = 1;
typedef double label_type;
// Implementing to_label() is optional. If you don't do it then it just means the
// automatic operator() mapping from tensors to outputs is missing from the net object.
template <
typename SUB_TYPE,
typename label_iterator
>
void to_label (
const SUB_TYPE& sub,
label_iterator iter
) const
/*!
requires
- SUB_NET implements the SUB_NET interface defined at the top of layers_abstract.h.
- sub.get_output().num_samples() must be a multiple of sample_expansion_factor.
- iter == an iterator pointing to the beginning of a range of
sub.get_output().num_samples()/sample_expansion_factor elements. In
particular, they must be label_type elements.
!*/
{
const tensor& output_tensor = sub.get_output();
DLIB_CASSERT(output_tensor.nr() == 1 &&
output_tensor.nc() == 1 &&
output_tensor.k() == 1,"");
DLIB_CASSERT(output_tensor.num_samples()%sample_expansion_factor == 0,"");
const float* out_data = output_tensor.host();
for (unsigned long i = 0; i < output_tensor.num_samples(); ++i)
{
*iter++ = out_data[i];
}
}
template <
typename label_iterator,
typename SUB_NET
>
double compute_loss (
const tensor& input_tensor,
label_iterator truth, // TODO, this parameter is optional.
SUB_NET& sub
) const
/*!
requires
- SUB_NET implements the SUB_NET interface defined at the top of layers_abstract.h.
- input_tensor was given as input to the network sub and the outputs are now
visible in sub.get_output(), sub.sub_net().get_output(), etc.
- input_tensor.num_samples() must be a multiple of sample_expansion_factor.
- input_tensor.num_samples() == sub.get_output().num_samples() == grad.num_samples()
- truth == an iterator pointing to the beginning of a range of
input_tensor.num_samples()/sample_expansion_factor elements. In particular,
they must be label_type elements.
- sub.get_gradient_input() has the same dimensions as sub.get_output().
- for all valid i:
- *(truth+i/sample_expansion_factor) is the label of the ith sample in
sub.get_output().
ensures
- #sub.get_gradient_input() == the gradient of the loss with respect to
sub.get_output().
!*/
{
const tensor& output_tensor = sub.get_output();
tensor& grad = sub.get_gradient_input();
// TODO, throw an exception instead of asserting, probably...
DLIB_CASSERT(input_tensor.num_samples() == grad.num_samples(),"");
DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples(),"");
DLIB_CASSERT(output_tensor.nr() == 1 &&
output_tensor.nc() == 1 &&
output_tensor.k() == 1,"");
// The loss we output is the average loss over the mini-batch.
const double scale = 1.0/output_tensor.num_samples();
double loss = 0;
const float* out_data = output_tensor.host();
float* g = grad.host();
for (unsigned long i = 0; i < output_tensor.num_samples(); ++i)
{
const float y = *truth++;
const float temp = 1-y*out_data[i];
if (temp > 0)
{
loss += scale*temp;
g[i] = -scale*y;
}
else
{
g[i] = 0;
}
}
return loss;
}
};
// ----------------------------------------------------------------------------------------
template <typename SUB_NET>
using loss_binary_hinge = add_loss<loss_binary_hinge_, SUB_NET>;
// ----------------------------------------------------------------------------------------
class loss_no_label_
{
public:
//typedef int label_type;
const static unsigned int sample_expansion_factor = 1;
template <
typename SUB_NET
>
double compute_loss (
const tensor& input_tensor,
SUB_NET& sub
) const
/*!
requires
- SUB_NET implements the SUB_NET interface defined at the top of layers_abstract.h.
- input_tensor was given as input to the network sub and the outputs are now
visible in sub.get_output(), sub.sub_net().get_output(), etc.
- input_tensor.num_samples() must be a multiple of sample_expansion_factor.
- input_tensor.num_samples() == sub.get_output().num_samples() == grad.num_samples()
- truth == an iterator pointing to the beginning of a range of
input_tensor.num_samples()/sample_expansion_factor elements. In particular,
they must be label_type elements.
- sub.get_gradient_input() has the same dimensions as sub.get_output().
- for all valid i:
- *(truth+i/sample_expansion_factor) is the label of the ith sample in
sub.get_output().
ensures
- #sub.get_gradient_input() == the gradient of the loss with respect to
sub.get_output().
!*/
{
return 0;
}
};
// ----------------------------------------------------------------------------------------
template <typename SUB_NET>
using loss_no_label = add_loss<loss_no_label_, SUB_NET>;
// ----------------------------------------------------------------------------------------
}
#endif // #define DLIB_DNn_LOSS_H_
// Copyright (C) 2015 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_DNn_SOLVERS_H_
#define DLIB_DNn_SOLVERS_H_
#include "tensor.h"
#include <iostream>
namespace dlib
{
/*
class EXAMPLE_SOLVER
{
};
*/
struct sgd
{
matrix<float> v;
float weight_decay;
float eps;
float momentum;
sgd(double eps_ = 0.001)
{
weight_decay = 0.0005;
eps = eps_;
//eps = 0.001;
momentum = 0.9;
}
template <typename layer_type>
void operator() (layer_type& l, const tensor& params_grad)
/*!
requires
- l.get_layer_params().size() != 0
- l.get_layer_params() and params_grad have the same dimensions.
!*/
{
if (v.size() != 0)
v = momentum*v - weight_decay*eps*mat(l.get_layer_params()) - eps*mat(params_grad);
else
v = - weight_decay*eps*mat(l.get_layer_params()) - eps*mat(params_grad);
l.get_layer_params() += v;
}
};
}
#endif // #define DLIB_DNn_SOLVERS_H_
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment