Commit 7d85e33d authored by Davis King's avatar Davis King

- Made the train_probabilistic_decision_function() more general by making it work

   with any kind of trainer object rather than only ones which produce
   dlib::decision_function objects.  I also made it work with trainers that only
   take std::vectors.
 - train_probabilistic_decision_function() no longer accepts column vectors of
   samples and labels.  Now it only accepts std::vectors of samples and labels.
 - Added a new generic_probabilistic_decision_function object which is returned by
   the train_probabilistic_decision_function().  The old probabilistic_decision_function
   still exists and can be constructed from a generic_probabilistic_decision_function.
   This way backwards compatibility is maintained with older code so long as that
   code used std::vectors to hold samples and labels.

--HG--
extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%404072
parent 50eaa497
......@@ -129,6 +129,104 @@ namespace dlib
}
}
// ----------------------------------------------------------------------------------------
template <
typename function_type
>
struct generic_probabilistic_decision_function
{
typedef typename function_type::scalar_type scalar_type;
typedef typename function_type::sample_type sample_type;
typedef typename function_type::mem_manager_type mem_manager_type;
scalar_type alpha;
scalar_type beta;
function_type decision_funct;
generic_probabilistic_decision_function (
) : alpha(0), beta(0), decision_funct(function_type()) {}
generic_probabilistic_decision_function (
const generic_probabilistic_decision_function& d
) :
alpha(d.alpha),
beta(d.beta),
decision_funct(d.decision_funct)
{}
generic_probabilistic_decision_function (
const scalar_type a_,
const scalar_type b_,
const function_type& decision_funct_
) :
alpha(a_),
beta(b_),
decision_funct(decision_funct_)
{}
generic_probabilistic_decision_function& operator= (
const generic_probabilistic_decision_function& d
)
{
if (this != &d)
{
alpha = d.alpha;
beta = d.beta;
decision_funct = d.decision_funct;
}
return *this;
}
scalar_type operator() (
const sample_type& x
) const
{
scalar_type f = decision_funct(x);
return 1/(1 + std::exp(alpha*f + beta));
}
};
template <
typename function_type
>
void serialize (
const generic_probabilistic_decision_function<function_type>& item,
std::ostream& out
)
{
try
{
serialize(item.alpha, out);
serialize(item.beta, out);
serialize(item.decision_funct, out);
}
catch (serialization_error& e)
{
throw serialization_error(e.info + "\n while serializing object of type generic_probabilistic_decision_function");
}
}
template <
typename function_type
>
void deserialize (
generic_probabilistic_decision_function<function_type>& item,
std::istream& in
)
{
try
{
deserialize(item.alpha, in);
deserialize(item.beta, in);
deserialize(item.decision_funct, in);
}
catch (serialization_error& e)
{
throw serialization_error(e.info + "\n while deserializing object of type generic_probabilistic_decision_function");
}
}
// ----------------------------------------------------------------------------------------
template <
......@@ -148,6 +246,14 @@ namespace dlib
probabilistic_decision_function (
) : alpha(0), beta(0), decision_funct(decision_function<K>()) {}
probabilistic_decision_function (
const generic_probabilistic_decision_function<decision_function<K> >& d
) :
alpha(d.alpha),
beta(d.beta),
decision_funct(d.decision_funct)
{}
probabilistic_decision_function (
const probabilistic_decision_function& d
) :
......@@ -216,7 +322,6 @@ namespace dlib
std::istream& in
)
{
typedef typename K::scalar_type scalar_type;
try
{
deserialize(item.alpha, in);
......
......@@ -121,6 +121,112 @@ namespace dlib
provides serialization support for decision_function
!*/
// ----------------------------------------------------------------------------------------
template <
typename function_type
>
struct generic_probabilistic_decision_function
{
/*!
REQUIREMENTS ON function_type
- function_type must be a function object with an overloaded
operator() similar to the other function objects defined in
this file. The operator() should return a scalar type such as
double or float.
WHAT THIS OBJECT REPRESENTS
This object represents a binary decision function that returns an
estimate of the probability that a given sample is in the +1 class.
!*/
typedef typename function_type::scalar_type scalar_type;
typedef typename function_type::sample_type sample_type;
typedef typename function_type::mem_manager_type mem_manager_type;
scalar_type alpha;
scalar_type beta;
function_type decision_funct;
generic_probabilistic_decision_function (
);
/*!
ensures
- #alpha == 0
- #beta == 0
- #decision_funct has its initial value
!*/
generic_probabilistic_decision_function (
const generic_probabilistic_decision_function& f
);
/*!
ensures
- #*this is a copy of f
!*/
generic_probabilistic_decision_function (
const scalar_type a,
const scalar_type b,
const function_type& decision_funct_
) : alpha(a), beta(b), decision_funct(decision_funct_) {}
/*!
ensures
- populates the probabilistic decision function with the given alpha, beta,
and decision function.
!*/
generic_probabilistic_decision_function& operator= (
const generic_probabilistic_decision_function& d
);
/*!
ensures
- #*this is identical to d
- returns *this
!*/
scalar_type operator() (
const sample_type& x
) const
/*!
ensures
- returns a number P such that:
- 0 <= P <= 1
- P represents the probability that sample x is from
the class +1
!*/
{
// Evaluate the normal decision function
scalar_type f = decision_funct(x);
// Now basically normalize the output so that it is a properly
// conditioned probability of x being in the +1 class given
// the output of the decision function.
return 1/(1 + std::exp(alpha*f + beta));
}
};
template <
typename function_type
>
void serialize (
const generic_probabilistic_decision_function<function_type>& item,
std::ostream& out
);
/*!
provides serialization support for generic_probabilistic_decision_function
!*/
template <
typename function_type
>
void deserialize (
generic_probabilistic_decision_function<function_type>& item,
std::istream& in
);
/*!
provides serialization support for generic_probabilistic_decision_function
!*/
// ----------------------------------------------------------------------------------------
template <
......@@ -136,6 +242,12 @@ namespace dlib
WHAT THIS OBJECT REPRESENTS
This object represents a binary decision function that returns an
estimate of the probability that a given sample is in the +1 class.
Note that this object is essentially just a copy of
generic_probabilistic_decision_function but with the template argument
changed from being a function type to a kernel type. Therefore, this
type is just a convenient version of generic_probabilistic_decision_function
for the case where the decision function is a dlib::decision_function<K>.
!*/
typedef K kernel_type;
......@@ -153,7 +265,7 @@ namespace dlib
ensures
- #alpha == 0
- #beta == 0
- #decision_function has its initial value
- #decision_funct has its initial value
!*/
probabilistic_decision_function (
......@@ -164,6 +276,14 @@ namespace dlib
- #*this is a copy of f
!*/
probabilistic_decision_function (
const generic_probabilistic_decision_function<decision_function<K> >& d
);
/*!
ensures
- #*this is a copy of f
!*/
probabilistic_decision_function (
const scalar_type a,
const scalar_type b,
......
......@@ -17,6 +17,7 @@
#include "../enable_if.h"
#include "../optimization.h"
#include "svm_nu_trainer.h"
#include <vector>
namespace dlib
{
......@@ -473,21 +474,19 @@ namespace dlib
template <
typename trainer_type,
typename in_sample_vector_type,
typename in_scalar_vector_type
typename sample_type,
typename scalar_type,
typename alloc_type1,
typename alloc_type2
>
const probabilistic_decision_function<typename trainer_type::kernel_type> train_probabilistic_decision_function_impl (
const generic_probabilistic_decision_function<typename trainer_type::trained_function_type>
train_probabilistic_decision_function (
const trainer_type& trainer,
const in_sample_vector_type& x,
const in_scalar_vector_type& y,
const std::vector<sample_type,alloc_type1>& x,
const std::vector<scalar_type,alloc_type2>& y,
const long folds
)
{
typedef typename trainer_type::sample_type sample_type;
typedef typename trainer_type::scalar_type scalar_type;
typedef typename trainer_type::mem_manager_type mem_manager_type;
typedef typename trainer_type::kernel_type K;
/*
This function fits a sigmoid function to the output of the
......@@ -504,20 +503,18 @@ namespace dlib
// make sure requires clause is not broken
DLIB_ASSERT(is_binary_classification_problem(x,y) == true &&
1 < folds && folds <= x.nr(),
1 < folds && folds <= (long)x.size(),
"\tprobabilistic_decision_function train_probabilistic_decision_function()"
<< "\n\t invalid inputs were given to this function"
<< "\n\t x.nr(): " << x.nr()
<< "\n\t y.nr(): " << y.nr()
<< "\n\t x.nc(): " << x.nc()
<< "\n\t y.nc(): " << y.nc()
<< "\n\t x.size(): " << x.size()
<< "\n\t y.size(): " << y.size()
<< "\n\t folds: " << folds
<< "\n\t is_binary_classification_problem(x,y): " << ((is_binary_classification_problem(x,y))? "true":"false")
<< "\n\t is_binary_classification_problem(x,y): " << is_binary_classification_problem(x,y)
);
// count the number of positive and negative examples
const long num_pos = (long)sum(y > 0);
const long num_neg = (long)sum(y < 0);
const long num_pos = (long)sum(vector_to_matrix(y) > 0);
const long num_neg = (long)sum(vector_to_matrix(y) < 0);
// figure out how many positive and negative examples we will have in each fold
const long num_pos_test_samples = num_pos/folds;
......@@ -525,16 +522,15 @@ namespace dlib
const long num_neg_test_samples = num_neg/folds;
const long num_neg_train_samples = num_neg - num_neg_test_samples;
decision_function<K> d;
typename decision_function<K>::sample_vector_type x_test, x_train;
typename decision_function<K>::scalar_vector_type y_test, y_train;
x_test.set_size (num_pos_test_samples + num_neg_test_samples);
y_test.set_size (num_pos_test_samples + num_neg_test_samples);
x_train.set_size(num_pos_train_samples + num_neg_train_samples);
y_train.set_size(num_pos_train_samples + num_neg_train_samples);
typename trainer_type::trained_function_type d;
std::vector<sample_type,alloc_type1> x_test, x_train;
std::vector<scalar_type,alloc_type2> y_test, y_train;
x_test.resize (num_pos_test_samples + num_neg_test_samples);
y_test.resize (num_pos_test_samples + num_neg_test_samples);
x_train.resize(num_pos_train_samples + num_neg_train_samples);
y_train.resize(num_pos_train_samples + num_neg_train_samples);
typedef std_allocator<scalar_type, mem_manager_type> alloc_scalar_type_vector;
typedef std::vector<scalar_type, alloc_scalar_type_vector > dvector;
typedef std::vector<scalar_type, alloc_type2 > dvector;
dvector out;
dvector target;
......@@ -554,25 +550,25 @@ namespace dlib
// load up our positive test samples
while (cur < num_pos_test_samples)
{
if (y(pos_idx) == +1.0)
if (y[pos_idx] == +1.0)
{
x_test(cur) = x(pos_idx);
y_test(cur) = +1.0;
x_test[cur] = x[pos_idx];
y_test[cur] = +1.0;
++cur;
}
pos_idx = (pos_idx+1)%x.nr();
pos_idx = (pos_idx+1)%x.size();
}
// load up our negative test samples
while (cur < x_test.nr())
while (cur < (long)x_test.size())
{
if (y(neg_idx) == -1.0)
if (y[neg_idx] == -1.0)
{
x_test(cur) = x(neg_idx);
y_test(cur) = -1.0;
x_test[cur] = x[neg_idx];
y_test[cur] = -1.0;
++cur;
}
neg_idx = (neg_idx+1)%x.nr();
neg_idx = (neg_idx+1)%x.size();
}
// load the training data from the data following whatever we loaded
......@@ -584,40 +580,40 @@ namespace dlib
// load up our positive train samples
while (cur < num_pos_train_samples)
{
if (y(train_pos_idx) == +1.0)
if (y[train_pos_idx] == +1.0)
{
x_train(cur) = x(train_pos_idx);
y_train(cur) = +1.0;
x_train[cur] = x[train_pos_idx];
y_train[cur] = +1.0;
++cur;
}
train_pos_idx = (train_pos_idx+1)%x.nr();
train_pos_idx = (train_pos_idx+1)%x.size();
}
// load up our negative train samples
while (cur < x_train.nr())
while (cur < (long)x_train.size())
{
if (y(train_neg_idx) == -1.0)
if (y[train_neg_idx] == -1.0)
{
x_train(cur) = x(train_neg_idx);
y_train(cur) = -1.0;
x_train[cur] = x[train_neg_idx];
y_train[cur] = -1.0;
++cur;
}
train_neg_idx = (train_neg_idx+1)%x.nr();
train_neg_idx = (train_neg_idx+1)%x.size();
}
// do the training
d = trainer.train (x_train,y_train);
// now test this fold
for (long i = 0; i < x_test.nr(); ++i)
for (unsigned long i = 0; i < x_test.size(); ++i)
{
out.push_back(d(x_test(i)));
out.push_back(d(x_test[i]));
// if this was a positive example
if (y_test(i) == +1.0)
if (y_test[i] == +1.0)
{
target.push_back(hi_target);
}
else if (y_test(i) == -1.0)
else if (y_test[i] == -1.0)
{
target.push_back(lo_target);
}
......@@ -647,25 +643,7 @@ namespace dlib
const double A = val(0);
const double B = val(1);
return probabilistic_decision_function<K>( A, B, trainer.train(x,y) );
}
template <
typename trainer_type,
typename in_sample_vector_type,
typename in_scalar_vector_type
>
const probabilistic_decision_function<typename trainer_type::kernel_type> train_probabilistic_decision_function (
const trainer_type& trainer,
const in_sample_vector_type& x,
const in_scalar_vector_type& y,
const long folds
)
{
return train_probabilistic_decision_function_impl(trainer,
vector_to_matrix(x),
vector_to_matrix(y),
folds);
return generic_probabilistic_decision_function<typename trainer_type::trained_function_type>( A, B, trainer.train(x,y) );
}
// ----------------------------------------------------------------------------------------
......
......@@ -68,19 +68,21 @@ namespace dlib
template <
typename trainer_type,
typename in_sample_vector_type,
typename in_scalar_vector_type
typename sample_type,
typename scalar_type,
typename alloc_type1,
typename alloc_type2
>
const probabilistic_decision_function<typename trainer_type::kernel_type>
const generic_probabilistic_decision_function<typename trainer_type::trained_function_type>
train_probabilistic_decision_function (
const trainer_type& trainer,
const in_sample_vector_type& x,
const in_scalar_vector_type& y,
const std::vector<sample_type,alloc_type1>& x,
const std::vector<scalar_type,alloc_type2>& y,
const long folds
)
);
/*!
requires
- 1 < folds <= x.nr()
- 1 < folds <= x.size()
- is_binary_classification_problem(x,y) == true
- trainer_type == some kind of batch trainer object (e.g. svm_nu_trainer)
ensures
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment