Commit 3c7c8fee authored by Davis King's avatar Davis King

Refactored a bunch of the svm training code into a much cleaner form.

--HG--
extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402380
parent b51cc5c2
...@@ -19,12 +19,21 @@ ...@@ -19,12 +19,21 @@
namespace dlib namespace dlib
{ {
// ----------------------------------------------------------------------------------------
class invalid_svm_nu_error : public dlib::error
{
public:
invalid_svm_nu_error(const std::string& msg, double nu_) : dlib::error(msg), nu(nu_) {};
const double nu;
};
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
template < template <
typename T typename T
> >
typename T::type maximum_nu ( typename T::type maximum_nu_impl (
const T& y const T& y
) )
{ {
...@@ -63,10 +72,67 @@ namespace dlib ...@@ -63,10 +72,67 @@ namespace dlib
return static_cast<scalar_type>(2.0*(scalar_type)std::min(pos_count,neg_count)/(scalar_type)y.nr()); return static_cast<scalar_type>(2.0*(scalar_type)std::min(pos_count,neg_count)/(scalar_type)y.nr());
} }
template <
typename T
>
typename T::type maximum_nu (
const T& y
)
{
return maximum_nu_impl(vector_to_matrix(y));
}
template <
typename T
>
typename T::value_type maximum_nu (
const T& y
)
{
return maximum_nu_impl(vector_to_matrix(y));
}
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
template < template <
typename K typename T,
typename U
>
bool is_binary_classification_problem_impl (
const T& x,
const U& x_labels
)
{
if (x.nc() != 1 || x_labels.nc() != 1) return false;
if (x.nr() != x_labels.nr()) return false;
if (x.nr() <= 1) return false;
for (long r = 0; r < x_labels.nr(); ++r)
{
if (x_labels(r) != -1 && x_labels(r) != 1)
return false;
}
return true;
}
template <
typename T,
typename U
>
bool is_binary_classification_problem (
const T& x,
const U& x_labels
)
{
return is_binary_classification_problem_impl(vector_to_matrix(x), vector_to_matrix(x_labels));
}
// ----------------------------------------------------------------------------------------
template <
typename K,
typename sample_vector_type,
typename scalar_vector_type
> >
class kernel_matrix_cache class kernel_matrix_cache
{ {
...@@ -74,8 +140,8 @@ namespace dlib ...@@ -74,8 +140,8 @@ namespace dlib
typedef typename K::sample_type sample_type; typedef typename K::sample_type sample_type;
typedef typename K::mem_manager_type mem_manager_type; typedef typename K::mem_manager_type mem_manager_type;
const matrix<sample_type,0,1,mem_manager_type>& x; const sample_vector_type& x;
const matrix<scalar_type,0,1,mem_manager_type>& y; const scalar_vector_type& y;
K kernel_function; K kernel_function;
mutable matrix<scalar_type,0,0,mem_manager_type> cache; mutable matrix<scalar_type,0,0,mem_manager_type> cache;
...@@ -104,8 +170,8 @@ namespace dlib ...@@ -104,8 +170,8 @@ namespace dlib
public: public:
kernel_matrix_cache ( kernel_matrix_cache (
const matrix<sample_type,0,1,mem_manager_type>& x_, const sample_vector_type& x_,
const matrix<scalar_type,0,1,mem_manager_type>& y_, const scalar_vector_type& y_,
K kernel_function_, K kernel_function_,
long max_size_megabytes long max_size_megabytes
) : x(x_), y(y_), kernel_function(kernel_function_) ) : x(x_), y(y_), kernel_function(kernel_function_)
...@@ -180,397 +246,712 @@ namespace dlib ...@@ -180,397 +246,712 @@ namespace dlib
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
template < template <
typename scalar_type, typename trainer_type,
typename scalar_vector_type typename in_sample_vector_type,
typename in_scalar_vector_type
> >
inline void set_initial_alpha ( const matrix<typename trainer_type::scalar_type, 1, 2, typename trainer_type::mem_manager_type>
const scalar_vector_type& y, cross_validate_trainer_impl (
const scalar_type nu, const trainer_type& trainer,
scalar_vector_type& alpha const in_sample_vector_type& x,
const in_scalar_vector_type& y,
const long folds
) )
{ {
set_all_elements(alpha,0); typedef typename trainer_type::scalar_type scalar_type;
const scalar_type l = y.nr(); typedef typename trainer_type::sample_type sample_type;
scalar_type temp = nu*l/2; typedef typename trainer_type::mem_manager_type mem_manager_type;
long num = (long)std::floor(temp); typedef matrix<sample_type,0,1,mem_manager_type> sample_vector_type;
long num_total = (long)std::ceil(temp); typedef matrix<scalar_type,0,1,mem_manager_type> scalar_vector_type;
int count = 0; // make sure requires clause is not broken
for (int i = 0; i < alpha.nr(); ++i) DLIB_ASSERT(is_binary_classification_problem(x,y) == true &&
1 < folds && folds <= x.nr(),
"\tmatrix cross_validate_trainer()"
<< "\n\t invalid inputs were given to this function"
<< "\n\t x.nr(): " << x.nr()
<< "\n\t y.nr(): " << y.nr()
<< "\n\t x.nc(): " << x.nc()
<< "\n\t y.nc(): " << y.nc()
<< "\n\t folds: " << folds
<< "\n\t is_binary_classification_problem(x,y): " << ((is_binary_classification_problem(x,y))? "true":"false")
);
// count the number of positive and negative examples
long num_pos = 0;
long num_neg = 0;
for (long r = 0; r < y.nr(); ++r)
{ {
if (y(i) == 1) if (y(r) == +1.0)
++num_pos;
else
++num_neg;
}
// figure out how many positive and negative examples we will have in each fold
const long num_pos_test_samples = num_pos/folds;
const long num_pos_train_samples = num_pos - num_pos_test_samples;
const long num_neg_test_samples = num_neg/folds;
const long num_neg_train_samples = num_neg - num_neg_test_samples;
long num_pos_correct = 0;
long num_neg_correct = 0;
typename trainer_type::trained_function_type d;
sample_vector_type x_test, x_train;
scalar_vector_type y_test, y_train;
x_test.set_size (num_pos_test_samples + num_neg_test_samples);
y_test.set_size (num_pos_test_samples + num_neg_test_samples);
x_train.set_size(num_pos_train_samples + num_neg_train_samples);
y_train.set_size(num_pos_train_samples + num_neg_train_samples);
long pos_idx = 0;
long neg_idx = 0;
for (long i = 0; i < folds; ++i)
{ {
if (count < num) long cur = 0;
// load up our positive test samples
while (cur < num_pos_test_samples)
{ {
++count; if (y(pos_idx) == +1.0)
alpha(i) = 1; {
x_test(cur) = x(pos_idx);
y_test(cur) = +1.0;
++cur;
} }
else pos_idx = (pos_idx+1)%x.nr();
}
// load up our negative test samples
while (cur < x_test.nr())
{ {
if (temp > num) if (y(neg_idx) == -1.0)
{ {
++count; x_test(cur) = x(neg_idx);
alpha(i) = temp - std::floor(temp); y_test(cur) = -1.0;
++cur;
} }
break; neg_idx = (neg_idx+1)%x.nr();
} }
// load the training data from the data following whatever we loaded
// as the testing data
long train_pos_idx = pos_idx;
long train_neg_idx = neg_idx;
cur = 0;
// load up our positive train samples
while (cur < num_pos_train_samples)
{
if (y(train_pos_idx) == +1.0)
{
x_train(cur) = x(train_pos_idx);
y_train(cur) = +1.0;
++cur;
} }
train_pos_idx = (train_pos_idx+1)%x.nr();
} }
if (count != num_total) // load up our negative train samples
while (cur < x_train.nr())
{ {
std::ostringstream sout; if (y(train_neg_idx) == -1.0)
sout << "invalid nu of " << nu << ". Must be between 0 and " << (scalar_type)count/y.nr(); {
throw error(sout.str()); x_train(cur) = x(train_neg_idx);
y_train(cur) = -1.0;
++cur;
}
train_neg_idx = (train_neg_idx+1)%x.nr();
} }
count = 0; // do the training
for (int i = 0; i < alpha.nr(); ++i) d = trainer.train(x_train,y_train);
// now test this fold
for (long i = 0; i < x_test.nr(); ++i)
{ {
if (y(i) == -1) // if this is a positive example
if (y_test(i) == +1.0)
{ {
if (count < num) if (d(x_test(i)) >= 0)
++num_pos_correct;
}
else if (y_test(i) == -1.0)
{ {
++count; if (d(x_test(i)) < 0)
alpha(i) = 1; ++num_neg_correct;
} }
else else
{ {
if (temp > num) throw dlib::error("invalid input labels to the cross_validate_trainer() function");
{
++count;
alpha(i) = temp - std::floor(temp);
}
break;
} }
} }
} // for (long i = 0; i < folds; ++i)
matrix<scalar_type, 1, 2, mem_manager_type> res;
res(0) = (scalar_type)num_pos_correct/(scalar_type)(num_pos_test_samples*folds);
res(1) = (scalar_type)num_neg_correct/(scalar_type)(num_neg_test_samples*folds);
return res;
} }
if (count != num_total) template <
typename trainer_type,
typename in_sample_vector_type,
typename in_scalar_vector_type
>
const matrix<typename trainer_type::scalar_type, 1, 2, typename trainer_type::mem_manager_type>
cross_validate_trainer (
const trainer_type& trainer,
const in_sample_vector_type& x,
const in_scalar_vector_type& y,
const long folds
)
{ {
std::ostringstream sout; return cross_validate_trainer_impl(trainer,
sout << "invalid nu of " << nu << ". Must be between 0 and " << (scalar_type)count/y.nr(); vector_to_matrix(x),
throw error(sout.str()); vector_to_matrix(y),
} folds);
} }
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
template < template <
typename K, typename trainer_type,
typename scalar_vector_type, typename in_sample_vector_type,
typename scalar_type typename in_scalar_vector_type
> >
inline bool find_working_group ( const probabilistic_decision_function<typename trainer_type::kernel_type> train_probabilistic_decision_function_impl (
const scalar_vector_type& y, const trainer_type& trainer,
const scalar_vector_type& alpha, const in_sample_vector_type& x,
const kernel_matrix_cache<K>& Q, const in_scalar_vector_type& y,
const scalar_vector_type& df, const long folds
const scalar_type tau,
const scalar_type eps,
long& i_out,
long& j_out
) )
{ {
using namespace std; typedef typename trainer_type::sample_type sample_type;
long ip = -1; typedef typename trainer_type::scalar_type scalar_type;
long jp = -1; typedef typename trainer_type::mem_manager_type mem_manager_type;
long in = -1; typedef typename trainer_type::kernel_type K;
long jn = -1;
scalar_type ip_val = -numeric_limits<scalar_type>::infinity();
scalar_type jp_val = numeric_limits<scalar_type>::infinity();
scalar_type in_val = -numeric_limits<scalar_type>::infinity();
scalar_type jn_val = numeric_limits<scalar_type>::infinity();
// loop over the alphas and find the maximum ip and in indices. /*
for (long i = 0; i < alpha.nr(); ++i) This function fits a sigmoid function to the output of the
{ svm trained by svm_nu_train(). The technique used is the one
if (y(i) == 1) described in the paper:
{
if (alpha(i) < 1.0) Probabilistic Outputs for Support Vector Machines and
{ Comparisons to Regularized Likelihood Methods by
if (-df(i) > ip_val) John C. Platt. Match 26, 1999
*/
// make sure requires clause is not broken
DLIB_ASSERT(is_binary_classification_problem(x,y) == true &&
1 < folds && folds <= x.nr(),
"\tprobabilistic_decision_function train_probabilistic_decision_function()"
<< "\n\t invalid inputs were given to this function"
<< "\n\t x.nr(): " << x.nr()
<< "\n\t y.nr(): " << y.nr()
<< "\n\t x.nc(): " << x.nc()
<< "\n\t y.nc(): " << y.nc()
<< "\n\t folds: " << folds
<< "\n\t is_binary_classification_problem(x,y): " << ((is_binary_classification_problem(x,y))? "true":"false")
);
// count the number of positive and negative examples
long num_pos = 0;
long num_neg = 0;
for (long r = 0; r < y.nr(); ++r)
{ {
ip_val = -df(i); if (y(r) == +1.0)
ip = i; ++num_pos;
}
}
}
else else
{ ++num_neg;
if (alpha(i) > 0.0)
{
if (df(i) > in_val)
{
in_val = df(i);
in = i;
}
}
}
} }
scalar_type Mp = numeric_limits<scalar_type>::infinity(); // figure out how many positive and negative examples we will have in each fold
scalar_type Mn = numeric_limits<scalar_type>::infinity(); const long num_pos_test_samples = num_pos/folds;
scalar_type bp = -numeric_limits<scalar_type>::infinity(); const long num_pos_train_samples = num_pos - num_pos_test_samples;
scalar_type bn = -numeric_limits<scalar_type>::infinity(); const long num_neg_test_samples = num_neg/folds;
const long num_neg_train_samples = num_neg - num_neg_test_samples;
// now we need to find the minimum jp and jn indices
for (long j = 0; j < alpha.nr(); ++j) decision_function<K> d;
{ typename decision_function<K>::sample_vector_type x_test, x_train;
if (y(j) == 1) typename decision_function<K>::scalar_vector_type y_test, y_train;
{ x_test.set_size (num_pos_test_samples + num_neg_test_samples);
if (alpha(j) > 0.0) y_test.set_size (num_pos_test_samples + num_neg_test_samples);
x_train.set_size(num_pos_train_samples + num_neg_train_samples);
y_train.set_size(num_pos_train_samples + num_neg_train_samples);
typedef std_allocator<scalar_type, mem_manager_type> alloc_scalar_type_vector;
typedef std::vector<scalar_type, alloc_scalar_type_vector > dvector;
typedef std_allocator<int, mem_manager_type> alloc_int_vector;
typedef std::vector<int, alloc_int_vector > ivector;
dvector out;
ivector target;
long pos_idx = 0;
long neg_idx = 0;
for (long i = 0; i < folds; ++i)
{ {
scalar_type b = ip_val + df(j); long cur = 0;
if (-df(j) < Mp)
Mp = -df(j);
if (b > 0 && (Q.is_cached(j) || b > bp || jp == -1 )) // load up our positive test samples
while (cur < num_pos_test_samples)
{ {
bp = b; if (y(pos_idx) == +1.0)
scalar_type a = Q(ip,ip) + Q(j,j) - 2*Q(j,ip);
if (a <= 0)
a = tau;
scalar_type temp = -b*b/a;
if (temp < jp_val)
{ {
jp_val = temp; x_test(cur) = x(pos_idx);
jp = j; y_test(cur) = +1.0;
} ++cur;
}
} }
pos_idx = (pos_idx+1)%x.nr();
} }
else
{
if (alpha(j) < 1.0)
{
scalar_type b = in_val - df(j);
if (df(j) < Mn)
Mn = df(j);
if (b > 0 && (Q.is_cached(j) || b > bn || jn == -1 )) // load up our negative test samples
while (cur < x_test.nr())
{ {
bn = b; if (y(neg_idx) == -1.0)
scalar_type a = Q(in,in) + Q(j,j) - 2*Q(j,in);
if (a <= 0)
a = tau;
scalar_type temp = -b*b/a;
if (temp < jn_val)
{ {
jn_val = temp; x_test(cur) = x(neg_idx);
jn = j; y_test(cur) = -1.0;
} ++cur;
}
}
} }
neg_idx = (neg_idx+1)%x.nr();
} }
// if we are at the optimal point then return false so the caller knows // load the training data from the data following whatever we loaded
// to stop optimizing // as the testing data
if (std::max(ip_val - Mp, in_val - Mn) < eps) long train_pos_idx = pos_idx;
return false; long train_neg_idx = neg_idx;
cur = 0;
if (jp_val < jn_val) // load up our positive train samples
while (cur < num_pos_train_samples)
{ {
i_out = ip; if (y(train_pos_idx) == +1.0)
j_out = jp;
}
else
{ {
i_out = in; x_train(cur) = x(train_pos_idx);
j_out = jn; y_train(cur) = +1.0;
++cur;
} }
train_pos_idx = (train_pos_idx+1)%x.nr();
if (j_out >= 0 && i_out >= 0)
return true;
else
return false;
} }
// ---------------------------------------------------------------------------------------- // load up our negative train samples
while (cur < x_train.nr())
template <
typename scalar_vector_type,
typename scalar_type
>
void calculate_rho_and_b(
const scalar_vector_type& y,
const scalar_vector_type& alpha,
const scalar_vector_type& df,
scalar_type& rho,
scalar_type& b
)
{ {
using namespace std; if (y(train_neg_idx) == -1.0)
long num_p_free = 0; {
long num_n_free = 0; x_train(cur) = x(train_neg_idx);
scalar_type sum_p_free = 0; y_train(cur) = -1.0;
scalar_type sum_n_free = 0; ++cur;
}
train_neg_idx = (train_neg_idx+1)%x.nr();
}
scalar_type upper_bound_p = -numeric_limits<scalar_type>::infinity(); // do the training
scalar_type upper_bound_n = -numeric_limits<scalar_type>::infinity(); d = trainer.train (x_train,y_train);
scalar_type lower_bound_p = numeric_limits<scalar_type>::infinity();
scalar_type lower_bound_n = numeric_limits<scalar_type>::infinity();
for(long i = 0; i < alpha.nr(); ++i) // now test this fold
{ for (long i = 0; i < x_test.nr(); ++i)
if(y(i) == 1)
{ {
if(alpha(i) == 1) out.push_back(d(x_test(i)));
// if this was a positive example
if (y_test(i) == +1.0)
{ {
if (df(i) > upper_bound_p) target.push_back(1);
upper_bound_p = df(i);
} }
else if(alpha(i) == 0) else if (y_test(i) == -1.0)
{ {
if (df(i) < lower_bound_p) target.push_back(0);
lower_bound_p = df(i);
} }
else else
{ {
++num_p_free; throw dlib::error("invalid input labels to the train_probabilistic_decision_function() function");
sum_p_free += df(i);
} }
} }
else
} // for (long i = 0; i < folds; ++i)
// Now find the parameters of the sigmoid. Do so using the method from the
// above referenced paper.
scalar_type prior0 = num_pos_test_samples*folds;
scalar_type prior1 = num_neg_test_samples*folds;
scalar_type A = 0;
scalar_type B = std::log((prior0+1)/(prior1+1));
const scalar_type hiTarget = (prior1+1)/(prior1+2);
const scalar_type loTarget = 1.0/(prior0+2);
scalar_type lambda = 1e-3;
scalar_type olderr = std::numeric_limits<scalar_type>::max();;
dvector pp(out.size(),(prior1+1)/(prior1+prior0+2));
const scalar_type min_log = -200.0;
scalar_type t = 0;
int count = 0;
for (int it = 0; it < 100; ++it)
{ {
if(alpha(i) == 1) scalar_type a = 0;
scalar_type b = 0;
scalar_type c = 0;
scalar_type d = 0;
scalar_type e = 0;
// First, compute Hessian & gradient of error function with
// respect to A & B
for (unsigned long i = 0; i < out.size(); ++i)
{ {
if (df(i) > upper_bound_n) if (target[i])
upper_bound_n = df(i); t = hiTarget;
else
t = loTarget;
const scalar_type d1 = pp[i] - t;
const scalar_type d2 = pp[i]*(1-pp[i]);
a += out[i]*out[i]*d2;
b += d2;
c += out[i]*d2;
d += out[i]*d1;
e += d1;
} }
else if(alpha(i) == 0)
// If gradient is really tiny, then stop.
if (std::abs(d) < 1e-9 && std::abs(e) < 1e-9)
break;
scalar_type oldA = A;
scalar_type oldB = B;
scalar_type err = 0;
// Loop until goodness of fit increases
while (true)
{ {
if (df(i) < lower_bound_n) scalar_type det = (a+lambda)*(b+lambda)-c*c;
lower_bound_n = df(i); // if determinant of Hessian is really close to zero then increase stabilizer.
} if (std::abs(det) <= std::numeric_limits<scalar_type>::epsilon())
else
{ {
++num_n_free; lambda *= 10;
sum_n_free += df(i); continue;
}
}
} }
scalar_type r1,r2; A = oldA + ((b+lambda)*d-c*e)/det;
if(num_p_free > 0) B = oldB + ((a+lambda)*e-c*d)/det;
r1 = sum_p_free/num_p_free;
else
r1 = (upper_bound_p+lower_bound_p)/2;
if(num_n_free > 0) // Now, compute the goodness of fit
r2 = sum_n_free/num_n_free; err = 0;
for (unsigned long i = 0; i < out.size(); ++i)
{
if (target[i])
t = hiTarget;
else else
r2 = (upper_bound_n+lower_bound_n)/2; t = loTarget;
scalar_type p = 1.0/(1.0+std::exp(out[i]*A+B));
rho = (r1+r2)/2; pp[i] = p;
b = (r1-r2)/2/rho; // At this step, make sure log(0) returns min_log
err -= t*std::max(std::log(p),min_log) + (1-t)*std::max(std::log(1-p),min_log);
} }
// ---------------------------------------------------------------------------------------- if (err < olderr*(1+1e-7))
{
template < lambda *= 0.1;
typename K, break;
typename scalar_vector_type, }
typename scalar_type
// error did not decrease: increase stabilizer by factor of 10
// & try again
lambda *= 10;
if (lambda >= 1e6) // something is broken. Give up
break;
}
scalar_type diff = err-olderr;
scalar_type scale = 0.5*(err+olderr+1.0);
if (diff > -1e-3*scale && diff < 1e-7*scale)
++count;
else
count = 0;
olderr = err;
if (count == 3)
break;
}
return probabilistic_decision_function<K>( A, B, trainer.train(x,y) );
}
template <
typename trainer_type,
typename in_sample_vector_type,
typename in_scalar_vector_type
> >
inline void optimize_working_pair ( const probabilistic_decision_function<typename trainer_type::kernel_type> train_probabilistic_decision_function (
const scalar_vector_type& y, const trainer_type& trainer,
scalar_vector_type& alpha, const in_sample_vector_type& x,
const kernel_matrix_cache<K>& Q, const in_scalar_vector_type& y,
const scalar_vector_type& df, const long folds
const scalar_type tau,
const long i,
const long j
) )
{ {
scalar_type quad_coef = Q(i,i)+Q(j,j)-2*Q(j,i); return train_probabilistic_decision_function_impl(trainer,
if (quad_coef <= 0) vector_to_matrix(x),
quad_coef = tau; vector_to_matrix(y),
scalar_type delta = (df(i)-df(j))/quad_coef; folds);
scalar_type sum = alpha(i) + alpha(j); }
alpha(i) -= delta;
alpha(j) += delta;
if(sum > 1) // ----------------------------------------------------------------------------------------
template <
typename T,
typename U
>
typename enable_if<is_matrix<T>,void>::type randomize_samples (
T& t,
U& u
)
{ {
if(alpha(i) > 1) rand::kernel_1a r;
long n = t.nr()-1;
while (n > 0)
{ {
alpha(i) = 1; // put a random integer into idx
alpha(j) = sum - 1; unsigned long idx = r.get_random_32bit_number();
// make idx be less than n
idx %= n;
// swap our randomly selected index into the n position
exchange(t(idx), t(n));
exchange(u(idx), u(n));
--n;
} }
else if(alpha(j) > 1) }
// ----------------------------------------------------------------------------------------
template <
typename T,
typename U
>
typename disable_if<is_matrix<T>,void>::type randomize_samples (
T& t,
U& u
)
{ {
alpha(j) = 1; rand::kernel_1a r;
alpha(i) = sum - 1;
long n = t.size()-1;
while (n > 0)
{
// put a random integer into idx
unsigned long idx = r.get_random_32bit_number();
// make idx be less than n
idx %= n;
// swap our randomly selected index into the n position
exchange(t[idx], t[n]);
exchange(u[idx], u[n]);
--n;
} }
} }
else
// ----------------------------------------------------------------------------------------
template <
typename T
>
typename enable_if<is_matrix<T>,void>::type randomize_samples (
T& t
)
{ {
if(alpha(j) < 0) rand::kernel_1a r;
long n = t.nr()-1;
while (n > 0)
{ {
alpha(j) = 0; // put a random integer into idx
alpha(i) = sum; unsigned long idx = r.get_random_32bit_number();
// make idx be less than n
idx %= n;
// swap our randomly selected index into the n position
exchange(t(idx), t(n));
--n;
} }
else if(alpha(i) < 0)
{
alpha(i) = 0;
alpha(j) = sum;
} }
// ----------------------------------------------------------------------------------------
template <
typename T
>
typename disable_if<is_matrix<T>,void>::type randomize_samples (
T& t
)
{
rand::kernel_1a r;
long n = t.size()-1;
while (n > 0)
{
// put a random integer into idx
unsigned long idx = r.get_random_32bit_number();
// make idx be less than n
idx %= n;
// swap our randomly selected index into the n position
exchange(t[idx], t[n]);
--n;
} }
} }
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
template < template <
typename K typename K
> >
const decision_function<K> svm_nu_train ( class svm_nu_trainer
const typename decision_function<K>::sample_vector_type& x, {
const typename decision_function<K>::scalar_vector_type& y, public:
const K& kernel_function, typedef K kernel_type;
const typename K::scalar_type nu, typedef typename kernel_type::scalar_type scalar_type;
const long cache_size = 200, typedef typename kernel_type::sample_type sample_type;
const typename K::scalar_type eps = 0.001 typedef typename kernel_type::mem_manager_type mem_manager_type;
typedef decision_function<kernel_type> trained_function_type;
svm_nu_trainer (
) :
nu(0.1),
cache_size(200),
eps(0.001)
{
}
svm_nu_trainer (
const kernel_type& kernel_,
const scalar_type& nu_
) :
kernel_function(kernel_),
nu(nu_),
cache_size(200),
eps(0.001)
{
}
void set_cache_size (
long cache_size_
) )
{ {
typedef typename K::scalar_type scalar_type; cache_size = cache_size_;
typedef typename decision_function<K>::sample_vector_type sample_vector_type; }
typedef typename decision_function<K>::scalar_vector_type scalar_vector_type;
// make sure requires clause is not broken const long get_cache_size (
#ifdef ENABLE_ASSERTS ) const
for (long r = 0; r < y.nr(); ++r)
{ {
DLIB_ASSERT(y(r) == -1.0 || y(r) == 1.0, return cache_size;
"\tdecision_function svm_nu_train()" }
<< "\n\tinvalid inputs were given to this function"
<< "\n\tr: " << r
<< "\n\ty(r): " << y(r)
);
void set_epsilon (
scalar_type eps_
)
{
eps = eps_;
} }
#endif
DLIB_ASSERT(x.nr() > 1 && y.nr() == x.nr(), const scalar_type get_epsilon (
"\tdecision_function svm_nu_train()" ) const
<< "\n\tinvalid inputs were given to this function" {
<< "\n\tx.nr(): " << x.nr() return eps;
<< "\n\ty.nr(): " << y.nr() }
<< "\n\tx.nc(): " << x.nc()
<< "\n\ty.nc(): " << y.nc() void set_kernel (
); const kernel_type& k
)
{
kernel_function = k;
}
const kernel_type& get_kernel (
) const
{
return kernel_function;
}
void set_nu (
scalar_type nu_
)
{
nu = nu_;
}
const scalar_type get_nu (
) const
{
return nu;
}
template <
typename in_sample_vector_type,
typename in_scalar_vector_type
>
const decision_function<kernel_type> train (
const in_sample_vector_type& x,
const in_scalar_vector_type& y
) const
{
return do_train(vector_to_matrix(x), vector_to_matrix(y));
}
void swap (
svm_nu_trainer& item
)
{
exchange(kernel_function, item.kernel_function);
exchange(nu, item.nu);
exchange(cache_size, item.cache_size);
exchange(eps, item.eps);
}
private:
// ------------------------------------------------------------------------------------
template <
typename in_sample_vector_type,
typename in_scalar_vector_type
>
const decision_function<kernel_type> do_train (
const in_sample_vector_type& x,
const in_scalar_vector_type& y
) const
{
typedef typename K::scalar_type scalar_type;
typedef typename decision_function<K>::sample_vector_type sample_vector_type;
typedef typename decision_function<K>::scalar_vector_type scalar_vector_type;
DLIB_ASSERT(eps > 0 && // make sure requires clause is not broken
cache_size > 0 && DLIB_ASSERT(is_binary_classification_problem(x,y) == true,
0 < nu && nu < maximum_nu(y), "\tdecision_function svm_nu_trainer::train(x,y)"
"\tdecision_function svm_nu_train()" << "\n\t invalid inputs were given to this function"
<< "\n\tinvalid inputs were given to this function" << "\n\t x.nr(): " << x.nr()
<< "\n\teps: " << eps << "\n\t y.nr(): " << y.nr()
<< "\n\tcache_size: " << cache_size << "\n\t x.nc(): " << x.nc()
<< "\n\tnu: " << nu << "\n\t y.nc(): " << y.nc()
<< "\n\tmaximum_nu(y): " << maximum_nu(y) << "\n\t is_binary_classification_problem(x,y): " << ((is_binary_classification_problem(x,y))? "true":"false")
); );
...@@ -578,7 +959,7 @@ namespace dlib ...@@ -578,7 +959,7 @@ namespace dlib
scalar_vector_type df; // delta f(alpha) scalar_vector_type df; // delta f(alpha)
scalar_vector_type alpha; scalar_vector_type alpha;
kernel_matrix_cache<K> Q(x,y,kernel_function,cache_size); kernel_matrix_cache<K, in_sample_vector_type, in_scalar_vector_type> Q(x,y,kernel_function,cache_size);
alpha.set_size(x.nr()); alpha.set_size(x.nr());
df.set_size(x.nr()); df.set_size(x.nr());
...@@ -650,587 +1031,361 @@ namespace dlib ...@@ -650,587 +1031,361 @@ namespace dlib
return decision_function<K> (sv_alpha, b, kernel_function, support_vectors); return decision_function<K> (sv_alpha, b, kernel_function, support_vectors);
} }
// ---------------------------------------------------------------------------------------- // ------------------------------------------------------------------------------------
template < template <
typename K typename scalar_type,
typename scalar_vector_type,
typename scalar_vector_type2
> >
const matrix<typename K::scalar_type, 1, 2, typename K::mem_manager_type> svm_nu_cross_validate ( inline void set_initial_alpha (
const typename decision_function<K>::sample_vector_type& x, const scalar_vector_type& y,
const typename decision_function<K>::scalar_vector_type& y, const scalar_type nu,
const K& kernel_function, scalar_vector_type2& alpha
const typename K::scalar_type nu, ) const
const long folds,
const long cache_size = 200,
const typename K::scalar_type eps = 0.001
)
{ {
typedef typename K::scalar_type scalar_type; set_all_elements(alpha,0);
typedef typename decision_function<K>::sample_vector_type sample_vector_type; const scalar_type l = y.nr();
typedef typename decision_function<K>::scalar_vector_type scalar_vector_type; scalar_type temp = nu*l/2;
long num = (long)std::floor(temp);
long num_total = (long)std::ceil(temp);
// make sure requires clause is not broken int count = 0;
#ifdef ENABLE_ASSERTS for (int i = 0; i < alpha.nr(); ++i)
for (long r = 0; r < y.nr(); ++r)
{ {
DLIB_ASSERT(y(r) == -1.0 || y(r) == 1.0, if (y(i) == 1)
"\tdecision_function svm_nu_cross_validate()"
<< "\n\tinvalid inputs were given to this function"
<< "\n\tr: " << r
<< "\n\ty(r): " << y(r)
);
}
#endif
DLIB_ASSERT(x.nr() > 1 && y.nr() == x.nr(),
"\tdecision_function svm_nu_cross_validate()"
<< "\n\tinvalid inputs were given to this function"
<< "\n\tx.nr(): " << x.nr()
<< "\n\ty.nr(): " << y.nr()
<< "\n\tx.nc(): " << x.nc()
<< "\n\ty.nc(): " << y.nc()
);
DLIB_ASSERT(eps > 0 &&
folds > 1 && folds <= x.nr() &&
cache_size > 0 &&
0 < nu && nu < maximum_nu(y),
"\tdecision_function svm_nu_cross_validate()"
<< "\n\tinvalid inputs were given to this function"
<< "\n\teps: " << eps
<< "\n\tfolds: " << folds
<< "\n\tcache_size: " << cache_size
<< "\n\tnu: " << nu
<< "\n\tmaximum_nu(y): " << maximum_nu(y)
);
// count the number of positive and negative examples
long num_pos = 0;
long num_neg = 0;
for (long r = 0; r < y.nr(); ++r)
{
if (y(r) == +1.0)
++num_pos;
else
++num_neg;
}
// figure out how many positive and negative examples we will have in each fold
const long num_pos_test_samples = num_pos/folds;
const long num_pos_train_samples = num_pos - num_pos_test_samples;
const long num_neg_test_samples = num_neg/folds;
const long num_neg_train_samples = num_neg - num_neg_test_samples;
long num_pos_correct = 0;
long num_neg_correct = 0;
decision_function<K> d;
typename decision_function<K>::sample_vector_type x_test, x_train;
typename decision_function<K>::scalar_vector_type y_test, y_train;
x_test.set_size (num_pos_test_samples + num_neg_test_samples);
y_test.set_size (num_pos_test_samples + num_neg_test_samples);
x_train.set_size(num_pos_train_samples + num_neg_train_samples);
y_train.set_size(num_pos_train_samples + num_neg_train_samples);
long pos_idx = 0;
long neg_idx = 0;
for (long i = 0; i < folds; ++i)
{
long cur = 0;
// load up our positive test samples
while (cur < num_pos_test_samples)
{ {
if (y(pos_idx) == +1.0) if (count < num)
{ {
x_test(cur) = x(pos_idx); ++count;
y_test(cur) = +1.0; alpha(i) = 1;
++cur;
}
pos_idx = (pos_idx+1)%x.nr();
} }
else
// load up our negative test samples
while (cur < x_test.nr())
{ {
if (y(neg_idx) == -1.0) if (temp > num)
{ {
x_test(cur) = x(neg_idx); ++count;
y_test(cur) = -1.0; alpha(i) = temp - std::floor(temp);
++cur;
} }
neg_idx = (neg_idx+1)%x.nr(); break;
} }
// load the training data from the data following whatever we loaded
// as the testing data
long train_pos_idx = pos_idx;
long train_neg_idx = neg_idx;
cur = 0;
// load up our positive train samples
while (cur < num_pos_train_samples)
{
if (y(train_pos_idx) == +1.0)
{
x_train(cur) = x(train_pos_idx);
y_train(cur) = +1.0;
++cur;
} }
train_pos_idx = (train_pos_idx+1)%x.nr();
} }
// load up our negative train samples if (count != num_total)
while (cur < x_train.nr())
{
if (y(train_neg_idx) == -1.0)
{ {
x_train(cur) = x(train_neg_idx); std::ostringstream sout;
y_train(cur) = -1.0; sout << "invalid nu of " << nu << ". Must be between 0 and " << (scalar_type)count/y.nr();
++cur; throw invalid_svm_nu_error(sout.str(),nu);
}
train_neg_idx = (train_neg_idx+1)%x.nr();
} }
// do the training count = 0;
d = svm_nu_train (x_train,y_train,kernel_function,nu,cache_size,eps); for (int i = 0; i < alpha.nr(); ++i)
// now test this fold
for (long i = 0; i < x_test.nr(); ++i)
{ {
// if this is a positive example if (y(i) == -1)
if (y_test(i) == +1.0)
{ {
if (d(x_test(i)) >= 0) if (count < num)
++num_pos_correct;
}
else if (y_test(i) == -1.0)
{ {
if (d(x_test(i)) < 0) ++count;
++num_neg_correct; alpha(i) = 1;
} }
else else
{ {
throw dlib::error("invalid input labels to the svm_nu_cross_validate() function"); if (temp > num)
{
++count;
alpha(i) = temp - std::floor(temp);
}
break;
}
} }
} }
} // for (long i = 0; i < folds; ++i) if (count != num_total)
{
matrix<typename K::scalar_type, 1, 2, typename K::mem_manager_type> res; std::ostringstream sout;
res(0) = (scalar_type)num_pos_correct/(scalar_type)(num_pos_test_samples*folds); sout << "invalid nu of " << nu << ". Must be between 0 and " << (scalar_type)count/y.nr();
res(1) = (scalar_type)num_neg_correct/(scalar_type)(num_neg_test_samples*folds); throw invalid_svm_nu_error(sout.str(),nu);
return res; }
} }
// ---------------------------------------------------------------------------------------- // ------------------------------------------------------------------------------------
template < template <
typename K typename sample_vector_type,
typename scalar_vector_type,
typename scalar_vector_type2,
typename scalar_type
> >
const probabilistic_decision_function<K> svm_nu_train_prob ( inline bool find_working_group (
const typename decision_function<K>::sample_vector_type& x, const scalar_vector_type2& y,
const typename decision_function<K>::scalar_vector_type& y, const scalar_vector_type& alpha,
const K& kernel_function, const kernel_matrix_cache<K,sample_vector_type, scalar_vector_type2>& Q,
const typename K::scalar_type nu, const scalar_vector_type& df,
const long folds, const scalar_type tau,
const long cache_size = 200, const scalar_type eps,
const typename K::scalar_type eps = 0.001 long& i_out,
) long& j_out
{ ) const
typedef typename K::scalar_type scalar_type;
typedef typename K::mem_manager_type mem_manager_type;
typedef typename decision_function<K>::sample_vector_type sample_vector_type;
typedef typename decision_function<K>::scalar_vector_type scalar_vector_type;
/*
This function fits a sigmoid function to the output of the
svm trained by svm_nu_train(). The technique used is the one
described in the paper:
Probabilistic Outputs for Support Vector Machines and
Comparisons to Regularized Likelihood Methods by
John C. Platt. Match 26, 1999
*/
// make sure requires clause is not broken
#ifdef ENABLE_ASSERTS
for (long r = 0; r < y.nr(); ++r)
{ {
DLIB_ASSERT(y(r) == -1.0 || y(r) == 1.0, using namespace std;
"\tdecision_function svm_nu_train()" long ip = -1;
<< "\n\tinvalid inputs were given to this function" long jp = -1;
<< "\n\tr: " << r long in = -1;
<< "\n\ty(r): " << y(r) long jn = -1;
);
}
#endif
DLIB_ASSERT(x.nr() > 1 && y.nr() == x.nr(),
"\tdecision_function svm_nu_train()"
<< "\n\tinvalid inputs were given to this function"
<< "\n\tx.nr(): " << x.nr()
<< "\n\ty.nr(): " << y.nr()
<< "\n\tx.nc(): " << x.nc()
<< "\n\ty.nc(): " << y.nc()
);
DLIB_ASSERT(eps > 0 && scalar_type ip_val = -numeric_limits<scalar_type>::infinity();
folds > 1 && folds <= x.nr() && scalar_type jp_val = numeric_limits<scalar_type>::infinity();
cache_size > 0 && scalar_type in_val = -numeric_limits<scalar_type>::infinity();
0 < nu && nu < maximum_nu(y), scalar_type jn_val = numeric_limits<scalar_type>::infinity();
"\tdecision_function svm_nu_train()"
<< "\n\tinvalid inputs were given to this function"
<< "\n\teps: " << eps
<< "\n\tfolds: " << folds
<< "\n\tcache_size: " << cache_size
<< "\n\tnu: " << nu
<< "\n\tmaximum_nu(y): " << maximum_nu(y)
);
// count the number of positive and negative examples // loop over the alphas and find the maximum ip and in indices.
long num_pos = 0; for (long i = 0; i < alpha.nr(); ++i)
long num_neg = 0;
for (long r = 0; r < y.nr(); ++r)
{ {
if (y(r) == +1.0) if (y(i) == 1)
++num_pos;
else
++num_neg;
}
// figure out how many positive and negative examples we will have in each fold
const long num_pos_test_samples = num_pos/folds;
const long num_pos_train_samples = num_pos - num_pos_test_samples;
const long num_neg_test_samples = num_neg/folds;
const long num_neg_train_samples = num_neg - num_neg_test_samples;
decision_function<K> d;
typename decision_function<K>::sample_vector_type x_test, x_train;
typename decision_function<K>::scalar_vector_type y_test, y_train;
x_test.set_size (num_pos_test_samples + num_neg_test_samples);
y_test.set_size (num_pos_test_samples + num_neg_test_samples);
x_train.set_size(num_pos_train_samples + num_neg_train_samples);
y_train.set_size(num_pos_train_samples + num_neg_train_samples);
typedef std_allocator<scalar_type, mem_manager_type> alloc_scalar_type_vector;
typedef std::vector<scalar_type, alloc_scalar_type_vector > dvector;
typedef std_allocator<int, mem_manager_type> alloc_int_vector;
typedef std::vector<int, alloc_int_vector > ivector;
dvector out;
ivector target;
long pos_idx = 0;
long neg_idx = 0;
for (long i = 0; i < folds; ++i)
{ {
long cur = 0; if (alpha(i) < 1.0)
// load up our positive test samples
while (cur < num_pos_test_samples)
{ {
if (y(pos_idx) == +1.0) if (-df(i) > ip_val)
{ {
x_test(cur) = x(pos_idx); ip_val = -df(i);
y_test(cur) = +1.0; ip = i;
++cur;
} }
pos_idx = (pos_idx+1)%x.nr();
} }
}
// load up our negative test samples else
while (cur < x_test.nr())
{ {
if (y(neg_idx) == -1.0) if (alpha(i) > 0.0)
{ {
x_test(cur) = x(neg_idx); if (df(i) > in_val)
y_test(cur) = -1.0; {
++cur; in_val = df(i);
in = i;
} }
neg_idx = (neg_idx+1)%x.nr();
} }
// load the training data from the data following whatever we loaded
// as the testing data
long train_pos_idx = pos_idx;
long train_neg_idx = neg_idx;
cur = 0;
// load up our positive train samples
while (cur < num_pos_train_samples)
{
if (y(train_pos_idx) == +1.0)
{
x_train(cur) = x(train_pos_idx);
y_train(cur) = +1.0;
++cur;
} }
train_pos_idx = (train_pos_idx+1)%x.nr();
} }
// load up our negative train samples scalar_type Mp = numeric_limits<scalar_type>::infinity();
while (cur < x_train.nr()) scalar_type Mn = numeric_limits<scalar_type>::infinity();
scalar_type bp = -numeric_limits<scalar_type>::infinity();
scalar_type bn = -numeric_limits<scalar_type>::infinity();
// now we need to find the minimum jp and jn indices
for (long j = 0; j < alpha.nr(); ++j)
{ {
if (y(train_neg_idx) == -1.0) if (y(j) == 1)
{ {
x_train(cur) = x(train_neg_idx); if (alpha(j) > 0.0)
y_train(cur) = -1.0; {
++cur; scalar_type b = ip_val + df(j);
} if (-df(j) < Mp)
train_neg_idx = (train_neg_idx+1)%x.nr(); Mp = -df(j);
}
// do the training
d = svm_nu_train (x_train,y_train,kernel_function,nu,cache_size,eps);
// now test this fold if (b > 0 && (Q.is_cached(j) || b > bp || jp == -1 ))
for (long i = 0; i < x_test.nr(); ++i)
{ {
out.push_back(d(x_test(i))); bp = b;
// if this was a positive example scalar_type a = Q(ip,ip) + Q(j,j) - 2*Q(j,ip);
if (y_test(i) == +1.0) if (a <= 0)
a = tau;
scalar_type temp = -b*b/a;
if (temp < jp_val)
{ {
target.push_back(1); jp_val = temp;
jp = j;
} }
else if (y_test(i) == -1.0)
{
target.push_back(0);
} }
else
{
throw dlib::error("invalid input labels to the svm_nu_train_prob() function");
} }
} }
else
} // for (long i = 0; i < folds; ++i)
// Now find the parameters of the sigmoid. Do so using the method from the
// above referenced paper.
scalar_type prior0 = num_pos_test_samples*folds;
scalar_type prior1 = num_neg_test_samples*folds;
scalar_type A = 0;
scalar_type B = std::log((prior0+1)/(prior1+1));
const scalar_type hiTarget = (prior1+1)/(prior1+2);
const scalar_type loTarget = 1.0/(prior0+2);
scalar_type lambda = 1e-3;
scalar_type olderr = std::numeric_limits<scalar_type>::max();;
dvector pp(out.size(),(prior1+1)/(prior1+prior0+2));
const scalar_type min_log = -200.0;
scalar_type t = 0;
int count = 0;
for (int it = 0; it < 100; ++it)
{ {
scalar_type a = 0; if (alpha(j) < 1.0)
scalar_type b = 0;
scalar_type c = 0;
scalar_type d = 0;
scalar_type e = 0;
// First, compute Hessian & gradient of error function with
// respect to A & B
for (unsigned long i = 0; i < out.size(); ++i)
{ {
if (target[i]) scalar_type b = in_val - df(j);
t = hiTarget; if (df(j) < Mn)
else Mn = df(j);
t = loTarget;
const scalar_type d1 = pp[i] - t;
const scalar_type d2 = pp[i]*(1-pp[i]);
a += out[i]*out[i]*d2;
b += d2;
c += out[i]*d2;
d += out[i]*d1;
e += d1;
}
// If gradient is really tiny, then stop.
if (std::abs(d) < 1e-9 && std::abs(e) < 1e-9)
break;
scalar_type oldA = A;
scalar_type oldB = B;
scalar_type err = 0;
// Loop until goodness of fit increases if (b > 0 && (Q.is_cached(j) || b > bn || jn == -1 ))
while (true)
{ {
scalar_type det = (a+lambda)*(b+lambda)-c*c; bn = b;
// if determinant of Hessian is really close to zero then increase stabilizer. scalar_type a = Q(in,in) + Q(j,j) - 2*Q(j,in);
if (std::abs(det) <= std::numeric_limits<scalar_type>::epsilon()) if (a <= 0)
a = tau;
scalar_type temp = -b*b/a;
if (temp < jn_val)
{ {
lambda *= 10; jn_val = temp;
continue; jn = j;
}
}
}
}
} }
A = oldA + ((b+lambda)*d-c*e)/det; // if we are at the optimal point then return false so the caller knows
B = oldB + ((a+lambda)*e-c*d)/det; // to stop optimizing
if (std::max(ip_val - Mp, in_val - Mn) < eps)
return false;
// Now, compute the goodness of fit if (jp_val < jn_val)
err = 0;
for (unsigned long i = 0; i < out.size(); ++i)
{ {
if (target[i]) i_out = ip;
t = hiTarget; j_out = jp;
else
t = loTarget;
scalar_type p = 1.0/(1.0+std::exp(out[i]*A+B));
pp[i] = p;
// At this step, make sure log(0) returns min_log
err -= t*std::max(std::log(p),min_log) + (1-t)*std::max(std::log(1-p),min_log);
} }
else
if (err < olderr*(1+1e-7))
{ {
lambda *= 0.1; i_out = in;
break; j_out = jn;
}
// error did not decrease: increase stabilizer by factor of 10
// & try again
lambda *= 10;
if (lambda >= 1e6) // something is broken. Give up
break;
} }
scalar_type diff = err-olderr; if (j_out >= 0 && i_out >= 0)
scalar_type scale = 0.5*(err+olderr+1.0); return true;
if (diff > -1e-3*scale && diff < 1e-7*scale)
++count;
else else
count = 0; return false;
olderr = err;
if (count == 3)
break;
}
return probabilistic_decision_function<K>(
A, B,
svm_nu_train (x,y,kernel_function,nu,cache_size,eps) );
} }
// ---------------------------------------------------------------------------------------- // ------------------------------------------------------------------------------------
template < template <
typename T, typename scalar_vector_type,
typename U typename scalar_vector_type2,
typename scalar_type
> >
typename enable_if<is_matrix<T>,void>::type randomize_samples ( void calculate_rho_and_b(
T& t, const scalar_vector_type2& y,
U& u const scalar_vector_type& alpha,
) const scalar_vector_type& df,
{ scalar_type& rho,
rand::kernel_1a r; scalar_type& b
) const
long n = t.nr()-1;
while (n > 0)
{ {
// put a random integer into idx using namespace std;
unsigned long idx = r.get_random_32bit_number(); long num_p_free = 0;
long num_n_free = 0;
// make idx be less than n scalar_type sum_p_free = 0;
idx %= n; scalar_type sum_n_free = 0;
// swap our randomly selected index into the n position scalar_type upper_bound_p = -numeric_limits<scalar_type>::infinity();
exchange(t(idx), t(n)); scalar_type upper_bound_n = -numeric_limits<scalar_type>::infinity();
exchange(u(idx), u(n)); scalar_type lower_bound_p = numeric_limits<scalar_type>::infinity();
scalar_type lower_bound_n = numeric_limits<scalar_type>::infinity();
--n; for(long i = 0; i < alpha.nr(); ++i)
{
if(y(i) == 1)
{
if(alpha(i) == 1)
{
if (df(i) > upper_bound_p)
upper_bound_p = df(i);
} }
else if(alpha(i) == 0)
{
if (df(i) < lower_bound_p)
lower_bound_p = df(i);
} }
else
// ----------------------------------------------------------------------------------------
template <
typename T,
typename U
>
typename disable_if<is_matrix<T>,void>::type randomize_samples (
T& t,
U& u
)
{ {
rand::kernel_1a r; ++num_p_free;
sum_p_free += df(i);
long n = t.size()-1; }
while (n > 0) }
else
{ {
// put a random integer into idx if(alpha(i) == 1)
unsigned long idx = r.get_random_32bit_number(); {
if (df(i) > upper_bound_n)
upper_bound_n = df(i);
}
else if(alpha(i) == 0)
{
if (df(i) < lower_bound_n)
lower_bound_n = df(i);
}
else
{
++num_n_free;
sum_n_free += df(i);
}
}
}
// make idx be less than n scalar_type r1,r2;
idx %= n; if(num_p_free > 0)
r1 = sum_p_free/num_p_free;
else
r1 = (upper_bound_p+lower_bound_p)/2;
// swap our randomly selected index into the n position if(num_n_free > 0)
exchange(t[idx], t[n]); r2 = sum_n_free/num_n_free;
exchange(u[idx], u[n]); else
r2 = (upper_bound_n+lower_bound_n)/2;
--n; rho = (r1+r2)/2;
} b = (r1-r2)/2/rho;
} }
// ---------------------------------------------------------------------------------------- // ------------------------------------------------------------------------------------
template < template <
typename T typename sample_vector_type,
typename scalar_vector_type,
typename scalar_vector_type2,
typename scalar_type
> >
typename enable_if<is_matrix<T>,void>::type randomize_samples ( inline void optimize_working_pair (
T& t const scalar_vector_type2& y,
) scalar_vector_type& alpha,
const kernel_matrix_cache<K, sample_vector_type, scalar_vector_type2>& Q,
const scalar_vector_type& df,
const scalar_type tau,
const long i,
const long j
) const
{ {
rand::kernel_1a r; scalar_type quad_coef = Q(i,i)+Q(j,j)-2*Q(j,i);
if (quad_coef <= 0)
quad_coef = tau;
scalar_type delta = (df(i)-df(j))/quad_coef;
scalar_type sum = alpha(i) + alpha(j);
alpha(i) -= delta;
alpha(j) += delta;
long n = t.nr()-1; if(sum > 1)
while (n > 0)
{ {
// put a random integer into idx if(alpha(i) > 1)
unsigned long idx = r.get_random_32bit_number(); {
alpha(i) = 1;
// make idx be less than n alpha(j) = sum - 1;
idx %= n;
// swap our randomly selected index into the n position
exchange(t(idx), t(n));
--n;
} }
else if(alpha(j) > 1)
{
alpha(j) = 1;
alpha(i) = sum - 1;
} }
}
// ---------------------------------------------------------------------------------------- else
template <
typename T
>
typename disable_if<is_matrix<T>,void>::type randomize_samples (
T& t
)
{ {
rand::kernel_1a r; if(alpha(j) < 0)
long n = t.size()-1;
while (n > 0)
{ {
// put a random integer into idx alpha(j) = 0;
unsigned long idx = r.get_random_32bit_number(); alpha(i) = sum;
// make idx be less than n
idx %= n;
// swap our randomly selected index into the n position
exchange(t[idx], t[n]);
--n;
} }
else if(alpha(i) < 0)
{
alpha(i) = 0;
alpha(j) = sum;
} }
}
}
// ------------------------------------------------------------------------------------
kernel_type kernel_function;
scalar_type nu;
long cache_size;
scalar_type eps;
}; // end of class svm_nu_trainer
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
......
...@@ -17,8 +17,24 @@ namespace dlib ...@@ -17,8 +17,24 @@ namespace dlib
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
// Functions that perform SVM training
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
class invalid_svm_nu_error : public dlib::error
{
/*!
WHAT THIS OBJECT REPRESENTS
This object is an exception class used to indicate that a
value of nu used for svm training is incompatible with a
particular data set.
this->nu will be set to the invalid value of nu used.
!*/
public:
invalid_svm_nu_error(const std::string& msg, double nu_) : dlib::error(msg), nu(nu_) {};
const double nu;
};
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
template < template <
...@@ -29,102 +45,242 @@ namespace dlib ...@@ -29,102 +45,242 @@ namespace dlib
); );
/*! /*!
requires requires
- T == a matrix object - T == a matrix object or an object convertible to a matrix via
vector_to_matrix()
- y.nc() == 1 - y.nc() == 1
- y.nr() > 1 - y.nr() > 1
- for all valid i: - for all valid i:
- y(i) == -1 or +1 - y(i) == -1 or +1
ensures ensures
- returns the maximum valid nu that can be used with svm_nu_train(). - returns the maximum valid nu that can be used with the svm_nu_trainer and
the training set labels from the given y vector.
(i.e. 2.0*min(number of +1 examples in y, number of -1 examples in y)/y.nr()) (i.e. 2.0*min(number of +1 examples in y, number of -1 examples in y)/y.nr())
!*/ !*/
template < // ----------------------------------------------------------------------------------------
typename K
bool template <
typename T,
typename U
> >
const decision_function<K> svm_nu_train ( bool is_binary_classification_problem (
const typename decision_function<K>::sample_vector_type& x, const T& x,
const typename decision_function<K>::scalar_vector_type& y, const U& x_labels
const K& kernel_function,
const typename K::scalar_type nu,
const long cache_size = 200,
const typename K::scalar_type eps = 0.001
); );
/*! /*!
requires requires
- eps > 0 - T == a matrix or something convertible to a matrix via vector_to_matrix()
- U == a matrix or something convertible to a matrix via vector_to_matrix()
ensures
- returns true if all of the following are true and false otherwise:
- x.nc() == 1 (i.e. x is a column vector) - x.nc() == 1 (i.e. x is a column vector)
- y.nc() == 1 (i.e. y is a column vector) - x_labels.nc() == 1 (i.e. x_labels is a column vector)
- x.nr() == y.nr() - x.nr() == x_labels.nr()
- x.nr() > 1 - x.nr() > 1
- cache_size > 0
- for all valid i: - for all valid i:
- y(i) == -1 or +1 - x_labels(i) == -1 or +1
- y(i) is the class that should be assigned to training example x(i) !*/
- 0 < nu < maximum_nu(y)
- kernel_function == a kernel function object type as defined at the // ----------------------------------------------------------------------------------------
top of dlib/svm/kernel_abstract.h // ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
template <
typename K
>
class svm_nu_trainer
{
/*!
REQUIREMENTS ON K
is a kernel function object as defined in dlib/svm/kernel_abstract.h
WHAT THIS OBJECT REPRESENTS
This object implements a trainer for a nu support vector machine for
solving binary classification problems.
The implementation of the nu-svm training algorithm used by this object is based
on the following excellent papers:
- Chang and Lin, Training {nu}-Support Vector Classifiers: Theory and Algorithms
- Chih-Chung Chang and Chih-Jen Lin, LIBSVM : a library for support vector
machines, 2001. Software available at http://www.csie.ntu.edu.tw/~cjlin/libsvm
!*/
public:
typedef K kernel_type;
typedef typename kernel_type::scalar_type scalar_type;
typedef typename kernel_type::sample_type sample_type;
typedef typename kernel_type::mem_manager_type mem_manager_type;
typedef decision_function<kernel_type> trained_function_type;
svm_nu_trainer (
);
/*!
ensures
- This object is properly initialized and ready to be used
to train a support vector machine.
- #get_kernel() == kernel_type()
- #get_nu() == 0.1
- #get_cache_size() == 200
- #get_epsilon() == 0.001
!*/
svm_nu_trainer (
const kernel_type& kernel,
const scalar_type& nu
);
/*!
requires
- 0 < nu <= 1
ensures
- This object is properly initialized and ready to be used
to train a support vector machine.
- #get_kernel() == kernel
- #get_nu() == nu
- #get_cache_size() == 200
- #get_epsilon() == 0.001
!*/
void set_cache_size (
long cache_size
);
/*!
requires
- cache_size > 0
ensures
- #get_cache_size() == cache_size
!*/
const long get_cache_size (
) const;
/*!
ensures
- returns the number of megabytes of cache this object will use
when it performs training via the this->train() function.
(bigger values of this may make training go faster but doesn't affect
the result. However, too big a value will cause you to run out of
memory obviously.)
!*/
void set_epsilon (
scalar_type eps
);
/*!
requires
- eps > 0
ensures
- #get_epsilon() == eps
!*/
const scalar_type get_epsilon (
) const;
/*!
ensures
- returns the error epsilon that determines when training should stop.
Generally a good value for this is 0.001. Smaller values may result
in a more accurate solution but take longer to execute.
!*/
void set_kernel (
const kernel_type& k
);
/*!
ensures
- #get_kernel() == k
!*/
const kernel_type& get_kernel (
) const;
/*!
ensures
- returns a copy of the kernel function in use by this object
!*/
void set_nu (
scalar_type nu
);
/*!
requires
- 0 < nu <= 1
ensures
- #get_nu() == nu
!*/
const scalar_type get_nu (
) const;
/*!
ensures
- returns the nu svm parameter. This is a value between 0 and
1. It is the parameter that determines the trade off between
trying to fit the training data exactly or allowing more errors
but hopefully improving the generalization ability of the
resulting classifier. For more information you should consult
the papers referenced above.
!*/
template <
typename in_sample_vector_type,
typename in_scalar_vector_type
>
const decision_function<kernel_type> train (
const in_sample_vector_type& x,
const in_scalar_vector_type& y
) const;
/*!
requires
- is_binary_classification_problem(x,y) == true
ensures ensures
- trains a nu support vector classifier given the training samples in x and - trains a nu support vector classifier given the training samples in x and
labels in y. Training is done when the error is less than eps. labels in y. Training is done when the error is less than get_epsilon().
- caches approximately at most cache_size megabytes of the kernel matrix.
(bigger values of this may make training go faster but doesn't affect the
result. However, too big a value will cause you to run out of memory.)
- returns a decision function F with the following properties: - returns a decision function F with the following properties:
- if (new_x is a sample predicted have +1 label) then - if (new_x is a sample predicted have +1 label) then
- F(new_x) >= 0 - F(new_x) >= 0
- else - else
- F(new_x) < 0 - F(new_x) < 0
throws
- invalid_svm_nu_error
This exception is thrown if get_nu() > maximum_nu(y)
- std::bad_alloc
!*/ !*/
/* void swap (
The implementation of the nu-svm training algorithm used by this library is based svm_nu_trainer& item
on the following excellent papers: );
- Chang and Lin, Training {nu}-Support Vector Classifiers: Theory and Algorithms /*!
- Chih-Chung Chang and Chih-Jen Lin, LIBSVM : a library for support vector ensures
machines, 2001. Software available at http://www.csie.ntu.edu.tw/~cjlin/libsvm - swaps *this and item
*/ !*/
};
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
template < template <
typename K typename trainer_type,
typename in_sample_vector_type,
typename in_scalar_vector_type
> >
const probabilistic_decision_function<K> svm_nu_train_prob ( const probabilistic_decision_function<typename trainer_type::kernel_type>
const typename decision_function<K>::sample_vector_type& x, train_probabilistic_decision_function (
const typename decision_function<K>::scalar_vector_type& y, const trainer_type& trainer,
const K& kernel_function, const in_sample_vector_type& x,
const typename K::scalar_type nu, const in_scalar_vector_type& y,
const long folds, const long folds
const long cache_size = 200, )
const typename K::scalar_type eps = 0.001
);
/*! /*!
requires requires
- eps > 0
- 1 < folds <= x.nr() - 1 < folds <= x.nr()
- x.nc() == 1 (i.e. x is a column vector) - is_binary_classification_problem(x,y) == true
- y.nc() == 1 (i.e. y is a column vector) - trainer_type == some kind of trainer object (e.g. svm_nu_trainer)
- x.nr() == y.nr()
- x.nr() > 1
- cache_size > 0
- for all valid i:
- y(i) == -1 or +1
- y(i) is the class that should be assigned to training example x(i)
- 0 < nu < maximum_nu(y)
- kernel_function == a kernel function object type as defined at the
top of dlib/svm/kernel_abstract.h
ensures ensures
- trains a nu support vector classifier given the training samples in x and - trains a nu support vector classifier given the training samples in x and
labels in y. Training is done when the error is less than eps. labels in y.
- caches approximately at most cache_size megabytes of the kernel matrix. - returns a probabilistic_decision_function that represents the trained svm.
(bigger values of this may make training go faster but doesn't affect the
result. However, too big a value will cause you to run out of memory.)
- returns a probabilistic_decision_function that represents the trained
svm.
- The parameters of the probability model are estimated by performing k-fold - The parameters of the probability model are estimated by performing k-fold
cross validation. cross validation.
- The number of folds used is given by the folds argument. - The number of folds used is given by the folds argument.
throws
- any exceptions thrown by trainer.train()
- std::bad_alloc
!*/ !*/
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
...@@ -134,46 +290,36 @@ namespace dlib ...@@ -134,46 +290,36 @@ namespace dlib
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
template < template <
typename K typename trainer_type,
typename in_sample_vector_type,
typename in_scalar_vector_type
> >
const matrix<typename K::scalar_type, 1, 2, typename K::mem_manager_type> svm_nu_cross_validate ( const matrix<typename trainer_type::scalar_type, 1, 2, typename trainer_type::mem_manager_type>
const typename decision_function<K>::sample_vector_type& x, cross_validate_trainer (
const typename decision_function<K>::scalar_vector_type& y, const trainer_type& trainer,
const K& kernel_function, const in_sample_vector_type& x,
const typename K::scalar_type nu, const in_scalar_vector_type& y,
const long folds, const long folds
const long cache_size = 200,
const typename K::scalar_type eps = 0.001
); );
/*! /*!
requires requires
- eps > 0 - is_binary_classification_problem(x,y) == true
- 1 < folds <= x.nr() - 1 < folds <= x.nr()
- x.nc() == 1 (i.e. x is a column vector) - trainer_type == some kind of trainer object (e.g. svm_nu_trainer)
- y.nc() == 1 (i.e. y is a column vector)
- x.nr() == y.nr()
- x.nr() > 1
- cache_size > 0
- for all valid i:
- y(i) == -1 or +1
- y(i) is the class that should be assigned to training example x(i)
- 0 < nu < maximum_nu(y)
- kernel_function == a kernel function object type as defined at the
top of dlib/svm/kernel_abstract.h
ensures ensures
- performs k-fold cross validation by training a nu-svm using the svm_nu_train() - performs k-fold cross validation by using the given trainer to solve the
function. Each fold is tested using the learned decision_function and the given binary classification problem for the given number of folds.
average accuracy from all folds is returned. The accuracy is returned in Each fold is tested using the output of the trainer and the average
a column vector, let us call it R. Both quantities in R are numbers between classification accuracy from all folds is returned.
0 and 1 which represent the fraction of examples correctly classified. R(0) - The accuracy is returned in a column vector, let us call it R. Both
is the fraction of +1 examples correctly classified and R(1) is the fraction quantities in R are numbers between 0 and 1 which represent the fraction
of -1 examples correctly classified. of examples correctly classified. R(0) is the fraction of +1 examples
correctly classified and R(1) is the fraction of -1 examples correctly
classified.
- The number of folds used is given by the folds argument. - The number of folds used is given by the folds argument.
- in each fold: trains a nu support vector classifier given the training samples throws
in x and labels in y. Training is done when the error is less than eps. - any exceptions thrown by trainer.train()
- caches approximately at most cache_size megabytes of the kernel matrix. - std::bad_alloc
(bigger values of this may make training go faster but doesn't affect the
result. However, too big a value will cause you to run out of memory.)
!*/ !*/
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment