Commit c729e2ea authored by Davis King's avatar Davis King

Adding a copy of the krr_trainer that will be just for use with linear kernels.

--HG--
extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%404113
parent b2013e68
......@@ -295,15 +295,17 @@ namespace dlib
// a convenient way of dealing with the bias term later on.
if (verbose == false)
{
proj_x(i) = join_cols(ekm.project(x(i)), ones_matrix<scalar_type>(1,1));
proj_x(i) = ekm.project(x(i));
}
else
{
proj_x(i) = join_cols(ekm.project(x(i),err), ones_matrix<scalar_type>(1,1));
proj_x(i) = ekm.project(x(i),err);
rs.add(err);
}
}
const long dims = ekm.out_vector_size();
if (verbose)
{
std::cout << "Mean EKM projection error: " << rs.mean() << std::endl;
......@@ -351,6 +353,12 @@ namespace dlib
leave one out error for sample x(i):
LOOE = loss(y(i), LOOV)
Finally, note that we will pretend there was a 1 appended to the end of each
vector in proj_x. We won't actually do that though because we don't want to
have to make a copy of all the samples. So throughout the following code
I have explicitly dealt with this.
*/
general_matrix_type C, tempm, G;
......@@ -361,8 +369,15 @@ namespace dlib
{
C += proj_x(i)*trans(proj_x(i));
L += y(i)*proj_x(i);
tempv += proj_x(i);
}
// Make C = [C tempv
// tempv' proj_x.size()]
C = join_cols(join_rows(C, tempv),
join_rows(trans(tempv), uniform_matrix<scalar_type>(1,1, proj_x.size())));
L = join_cols(L, uniform_matrix<scalar_type>(1,1, sum(y)));
eigenvalue_decomposition<general_matrix_type> eig(make_symmetric(C));
const general_matrix_type V = eig.get_pseudo_v();
const column_matrix_type D = eig.get_real_eigenvalues();
......@@ -374,10 +389,16 @@ namespace dlib
{
// Save the transpose of V into a temporary because the subsequent matrix
// vector multiplies will be faster (because of better cache locality).
const general_matrix_type transV(trans(V));
const general_matrix_type transV( colm(trans(V),range(0,dims-1)) );
// Remember the pretend 1 at the end of proj_x(*). We want to multiply trans(V)*proj_x(*)
// so to do this we pull the last column off trans(V) and store it separately.
const column_matrix_type lastV = colm(trans(V), dims);
Vx.set_size(proj_x.size());
for (long i = 0; i < proj_x.size(); ++i)
Vx(i) = squared(transV*proj_x(i));
{
Vx(i) = transV*proj_x(i);
Vx(i) = squared(Vx(i) + lastV);
}
}
the_lambda = lambda;
......@@ -392,13 +413,17 @@ namespace dlib
for (long idx = 0; idx < lams.size(); ++idx)
{
// first compute G
tempv = reciprocal(D + uniform_matrix<scalar_type>(D.nr(),D.nc(), lams(idx)));
tempv = 1.0/(D + lams(idx));
tempm = scale_columns(V,tempv);
G = tempm*trans(V);
// compute the solution w for the current lambda
w = G*L;
// make w have the same length as the x_proj vectors.
const scalar_type b = w(dims);
w = colm(w,0,dims);
scalar_type looe = 0;
for (long i = 0; i < proj_x.size(); ++i)
{
......@@ -407,7 +432,7 @@ namespace dlib
const scalar_type temp = (1 - val);
scalar_type loov;
if (temp != 0)
loov = (trans(w)*proj_x(i) - y(i)*val) / temp;
loov = (trans(w)*proj_x(i) + b - y(i)*val) / temp;
else
loov = 0;
......@@ -439,11 +464,15 @@ namespace dlib
// Now perform the main training. That is, find w.
// first, compute G = inv(C + the_lambda*I)
tempv = reciprocal(D + uniform_matrix<scalar_type>(D.nr(),D.nc(), the_lambda));
tempv = 1.0/(D + the_lambda);
tempm = scale_columns(V,tempv);
G = tempm*trans(V);
w = G*L;
// make w have the same length as the x_proj vectors.
const scalar_type b = w(dims);
w = colm(w,0,dims);
// If we haven't done this already and we are supposed to then compute the LOO error rate for
// the current lambda and store the result in best_looe.
......@@ -457,7 +486,7 @@ namespace dlib
const scalar_type temp = (1 - val);
scalar_type loov;
if (temp != 0)
loov = (trans(w)*proj_x(i) - y(i)*val) / temp;
loov = (trans(w)*proj_x(i) + b - y(i)*val) / temp;
else
loov = 0;
......@@ -477,8 +506,8 @@ namespace dlib
// convert w into a proper decision function
decision_function<kernel_type> df;
df = ekm.convert_to_decision_function(colm(w,0,w.size()-1));
df.b = -w(w.size()-1); // don't forget about the bias we stuck onto all the vectors
df = ekm.convert_to_decision_function(w);
df.b = -b; // don't forget about the bias we stuck onto all the vectors
// If we used an automatically derived basis then there isn't any point in
// keeping the ekm around. So free its memory.
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment