Adding a copy of the krr_trainer that will be just for use with linear kernels.

--HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%404113

Adding a copy of the krr_trainer that will be just for use with linear kernels.
--HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%404113
c729e2ea · Davis King · b2013e68 · c729e2ea · c729e2ea
Commit c729e2ea authored Jan 22, 2011 by Davis King
Expand all Show whitespace changes
Inline Side-by-side

Showing with 39 additions and 10 deletions

krr_trainer.h dlib/svm/krr_trainer.h +39 -10

rr_trainer.h dlib/svm/rr_trainer.h +0 -0

No files found.
--- a/dlib/svm/krr_trainer.h
+++ b/dlib/svm/krr_trainer.h
@@ -295,15 +295,17 @@ namespace dlib
                // a convenient way of dealing with the bias term later on.
                if (verbose == false)
                {
-                    proj_x(i) = join_cols(ekm.project(x(i)), ones_matrix<scalar_type>(1,1));
+                    proj_x(i) = ekm.project(x(i));
                }
                else
                {
-                    proj_x(i) = join_cols(ekm.project(x(i),err), ones_matrix<scalar_type>(1,1));
+                    proj_x(i) = ekm.project(x(i),err);
                    rs.add(err);
                }
            }
+            const long dims = ekm.out_vector_size();
            if (verbose)
            {
                std::cout << "Mean EKM projection error:                  " << rs.mean() << std::endl;
@@ -351,6 +353,12 @@ namespace dlib
                        leave one out error for sample x(i):
                        LOOE = loss(y(i), LOOV)
+                Finally, note that we will pretend there was a 1 appended to the end of each
+                vector in proj_x.  We won't actually do that though because we don't want to
+                have to make a copy of all the samples.  So throughout the following code 
+                I have explicitly dealt with this.
            */
            general_matrix_type C, tempm, G;
@@ -361,8 +369,15 @@ namespace dlib
            {
                C += proj_x(i)*trans(proj_x(i));
                L += y(i)*proj_x(i);
+                tempv += proj_x(i);
            }
+            // Make C = [C      tempv
+            //           tempv' proj_x.size()]
+            C = join_cols(join_rows(C, tempv), 
+                          join_rows(trans(tempv), uniform_matrix<scalar_type>(1,1, proj_x.size())));
+            L = join_cols(L, uniform_matrix<scalar_type>(1,1, sum(y)));
            eigenvalue_decomposition<general_matrix_type> eig(make_symmetric(C));
            const general_matrix_type V = eig.get_pseudo_v();
            const column_matrix_type  D = eig.get_real_eigenvalues();
@@ -374,10 +389,16 @@ namespace dlib
            {
                // Save the transpose of V into a temporary because the subsequent matrix
                // vector multiplies will be faster (because of better cache locality).
-                const general_matrix_type transV(trans(V));
+                const general_matrix_type transV( colm(trans(V),range(0,dims-1))  );
+                // Remember the pretend 1 at the end of proj_x(*).  We want to multiply trans(V)*proj_x(*)
+                // so to do this we pull the last column off trans(V) and store it separately.
+                const column_matrix_type lastV = colm(trans(V), dims);
                Vx.set_size(proj_x.size());
                for (long i = 0; i < proj_x.size(); ++i)
-                    Vx(i) = squared(transV*proj_x(i));
+                {
+                    Vx(i) = transV*proj_x(i);
+                    Vx(i) = squared(Vx(i) + lastV);
+                }
            }
            the_lambda = lambda;
@@ -392,13 +413,17 @@ namespace dlib
                for (long idx = 0; idx < lams.size(); ++idx)
                {
                    // first compute G
-                    tempv = reciprocal(D + uniform_matrix<scalar_type>(D.nr(),D.nc(), lams(idx)));
+                    tempv = 1.0/(D + lams(idx));
                    tempm = scale_columns(V,tempv);
                    G = tempm*trans(V);
                    // compute the solution w for the current lambda
                    w = G*L;
+                    // make w have the same length as the x_proj vectors.
+                    const scalar_type b = w(dims);
+                    w = colm(w,0,dims);
                    scalar_type looe = 0;
                    for (long i = 0; i < proj_x.size(); ++i)
                    {
@@ -407,7 +432,7 @@ namespace dlib
                        const scalar_type temp = (1 - val);
                        scalar_type loov;
                        if (temp != 0)
-                            loov = (trans(w)*proj_x(i) - y(i)*val) / temp;
+                            loov = (trans(w)*proj_x(i) + b - y(i)*val) / temp;
                        else
                            loov = 0;
@@ -439,11 +464,15 @@ namespace dlib
            // Now perform the main training.  That is, find w.
            // first, compute G = inv(C + the_lambda*I)
-            tempv = reciprocal(D + uniform_matrix<scalar_type>(D.nr(),D.nc(), the_lambda));
+            tempv = 1.0/(D + the_lambda);
            tempm = scale_columns(V,tempv);
            G = tempm*trans(V);
            w = G*L;
+            // make w have the same length as the x_proj vectors.
+            const scalar_type b = w(dims);
+            w = colm(w,0,dims);
            // If we haven't done this already and we are supposed to then compute the LOO error rate for 
            // the current lambda and store the result in best_looe.
@@ -457,7 +486,7 @@ namespace dlib
                    const scalar_type temp = (1 - val);
                    scalar_type loov;
                    if (temp != 0)
-                        loov = (trans(w)*proj_x(i) - y(i)*val) / temp;
+                        loov = (trans(w)*proj_x(i) + b - y(i)*val) / temp;
                    else
                        loov = 0;
@@ -477,8 +506,8 @@ namespace dlib
            // convert w into a proper decision function
            decision_function<kernel_type> df;
-            df = ekm.convert_to_decision_function(colm(w,0,w.size()-1));
+            df = ekm.convert_to_decision_function(w);
-            df.b = -w(w.size()-1); // don't forget about the bias we stuck onto all the vectors
+            df.b = -b; // don't forget about the bias we stuck onto all the vectors
            // If we used an automatically derived basis then there isn't any point in
            // keeping the ekm around.  So free its memory.

--- a/dlib/svm/rr_trainer.h
+++ b/dlib/svm/rr_trainer.h