Commit c729e2ea authored by Davis King's avatar Davis King

Adding a copy of the krr_trainer that will be just for use with linear kernels.

--HG--
extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%404113
parent b2013e68
...@@ -295,15 +295,17 @@ namespace dlib ...@@ -295,15 +295,17 @@ namespace dlib
// a convenient way of dealing with the bias term later on. // a convenient way of dealing with the bias term later on.
if (verbose == false) if (verbose == false)
{ {
proj_x(i) = join_cols(ekm.project(x(i)), ones_matrix<scalar_type>(1,1)); proj_x(i) = ekm.project(x(i));
} }
else else
{ {
proj_x(i) = join_cols(ekm.project(x(i),err), ones_matrix<scalar_type>(1,1)); proj_x(i) = ekm.project(x(i),err);
rs.add(err); rs.add(err);
} }
} }
const long dims = ekm.out_vector_size();
if (verbose) if (verbose)
{ {
std::cout << "Mean EKM projection error: " << rs.mean() << std::endl; std::cout << "Mean EKM projection error: " << rs.mean() << std::endl;
...@@ -351,6 +353,12 @@ namespace dlib ...@@ -351,6 +353,12 @@ namespace dlib
leave one out error for sample x(i): leave one out error for sample x(i):
LOOE = loss(y(i), LOOV) LOOE = loss(y(i), LOOV)
Finally, note that we will pretend there was a 1 appended to the end of each
vector in proj_x. We won't actually do that though because we don't want to
have to make a copy of all the samples. So throughout the following code
I have explicitly dealt with this.
*/ */
general_matrix_type C, tempm, G; general_matrix_type C, tempm, G;
...@@ -361,8 +369,15 @@ namespace dlib ...@@ -361,8 +369,15 @@ namespace dlib
{ {
C += proj_x(i)*trans(proj_x(i)); C += proj_x(i)*trans(proj_x(i));
L += y(i)*proj_x(i); L += y(i)*proj_x(i);
tempv += proj_x(i);
} }
// Make C = [C tempv
// tempv' proj_x.size()]
C = join_cols(join_rows(C, tempv),
join_rows(trans(tempv), uniform_matrix<scalar_type>(1,1, proj_x.size())));
L = join_cols(L, uniform_matrix<scalar_type>(1,1, sum(y)));
eigenvalue_decomposition<general_matrix_type> eig(make_symmetric(C)); eigenvalue_decomposition<general_matrix_type> eig(make_symmetric(C));
const general_matrix_type V = eig.get_pseudo_v(); const general_matrix_type V = eig.get_pseudo_v();
const column_matrix_type D = eig.get_real_eigenvalues(); const column_matrix_type D = eig.get_real_eigenvalues();
...@@ -374,10 +389,16 @@ namespace dlib ...@@ -374,10 +389,16 @@ namespace dlib
{ {
// Save the transpose of V into a temporary because the subsequent matrix // Save the transpose of V into a temporary because the subsequent matrix
// vector multiplies will be faster (because of better cache locality). // vector multiplies will be faster (because of better cache locality).
const general_matrix_type transV(trans(V)); const general_matrix_type transV( colm(trans(V),range(0,dims-1)) );
// Remember the pretend 1 at the end of proj_x(*). We want to multiply trans(V)*proj_x(*)
// so to do this we pull the last column off trans(V) and store it separately.
const column_matrix_type lastV = colm(trans(V), dims);
Vx.set_size(proj_x.size()); Vx.set_size(proj_x.size());
for (long i = 0; i < proj_x.size(); ++i) for (long i = 0; i < proj_x.size(); ++i)
Vx(i) = squared(transV*proj_x(i)); {
Vx(i) = transV*proj_x(i);
Vx(i) = squared(Vx(i) + lastV);
}
} }
the_lambda = lambda; the_lambda = lambda;
...@@ -392,13 +413,17 @@ namespace dlib ...@@ -392,13 +413,17 @@ namespace dlib
for (long idx = 0; idx < lams.size(); ++idx) for (long idx = 0; idx < lams.size(); ++idx)
{ {
// first compute G // first compute G
tempv = reciprocal(D + uniform_matrix<scalar_type>(D.nr(),D.nc(), lams(idx))); tempv = 1.0/(D + lams(idx));
tempm = scale_columns(V,tempv); tempm = scale_columns(V,tempv);
G = tempm*trans(V); G = tempm*trans(V);
// compute the solution w for the current lambda // compute the solution w for the current lambda
w = G*L; w = G*L;
// make w have the same length as the x_proj vectors.
const scalar_type b = w(dims);
w = colm(w,0,dims);
scalar_type looe = 0; scalar_type looe = 0;
for (long i = 0; i < proj_x.size(); ++i) for (long i = 0; i < proj_x.size(); ++i)
{ {
...@@ -407,7 +432,7 @@ namespace dlib ...@@ -407,7 +432,7 @@ namespace dlib
const scalar_type temp = (1 - val); const scalar_type temp = (1 - val);
scalar_type loov; scalar_type loov;
if (temp != 0) if (temp != 0)
loov = (trans(w)*proj_x(i) - y(i)*val) / temp; loov = (trans(w)*proj_x(i) + b - y(i)*val) / temp;
else else
loov = 0; loov = 0;
...@@ -439,11 +464,15 @@ namespace dlib ...@@ -439,11 +464,15 @@ namespace dlib
// Now perform the main training. That is, find w. // Now perform the main training. That is, find w.
// first, compute G = inv(C + the_lambda*I) // first, compute G = inv(C + the_lambda*I)
tempv = reciprocal(D + uniform_matrix<scalar_type>(D.nr(),D.nc(), the_lambda)); tempv = 1.0/(D + the_lambda);
tempm = scale_columns(V,tempv); tempm = scale_columns(V,tempv);
G = tempm*trans(V); G = tempm*trans(V);
w = G*L; w = G*L;
// make w have the same length as the x_proj vectors.
const scalar_type b = w(dims);
w = colm(w,0,dims);
// If we haven't done this already and we are supposed to then compute the LOO error rate for // If we haven't done this already and we are supposed to then compute the LOO error rate for
// the current lambda and store the result in best_looe. // the current lambda and store the result in best_looe.
...@@ -457,7 +486,7 @@ namespace dlib ...@@ -457,7 +486,7 @@ namespace dlib
const scalar_type temp = (1 - val); const scalar_type temp = (1 - val);
scalar_type loov; scalar_type loov;
if (temp != 0) if (temp != 0)
loov = (trans(w)*proj_x(i) - y(i)*val) / temp; loov = (trans(w)*proj_x(i) + b - y(i)*val) / temp;
else else
loov = 0; loov = 0;
...@@ -477,8 +506,8 @@ namespace dlib ...@@ -477,8 +506,8 @@ namespace dlib
// convert w into a proper decision function // convert w into a proper decision function
decision_function<kernel_type> df; decision_function<kernel_type> df;
df = ekm.convert_to_decision_function(colm(w,0,w.size()-1)); df = ekm.convert_to_decision_function(w);
df.b = -w(w.size()-1); // don't forget about the bias we stuck onto all the vectors df.b = -b; // don't forget about the bias we stuck onto all the vectors
// If we used an automatically derived basis then there isn't any point in // If we used an automatically derived basis then there isn't any point in
// keeping the ekm around. So free its memory. // keeping the ekm around. So free its memory.
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment