Refactored the krr_trainer into two objects. A rr_trainer which just does

linear ridge regression and the krr_trainer which uses the empirical_kernel_map to do non-linear ridge regression. No changes were made to the behavior of the krr_trainer. This update is just to allow the use of linear ridge regression without a superfluous empirical_kernel_map running at the beginning of the training process. --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%404114

Refactored the krr_trainer into two objects. A rr_trainer which just does
linear ridge regression and the krr_trainer which uses the empirical_kernel_map to do non-linear ridge regression. No changes were made to the behavior of the krr_trainer. This update is just to allow the use of linear ridge regression without a superfluous empirical_kernel_map running at the beginning of the training process. --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%404114
67f3f463 · Davis King · c729e2ea · 67f3f463 · 67f3f463 · 67f3f463
Commit 67f3f463 authored Jan 22, 2011 by Davis King
Hide whitespace changes
Inline Side-by-side

Showing with 328 additions and 400 deletions

krr_trainer.h dlib/svm/krr_trainer.h +22 -230

rr_trainer.h dlib/svm/rr_trainer.h +47 -170

rr_trainer_abstract.h dlib/svm/rr_trainer_abstract.h +259 -0

No files found.
--- a/dlib/svm/krr_trainer.h
+++ b/dlib/svm/krr_trainer.h
@@ -9,6 +9,7 @@
 #include "empirical_kernel_map.h"
 #include "linearly_independent_subset_finder.h"
 #include "../statistics.h"
+#include "rr_trainer.h"
 #include "krr_trainer_abstract.h"
 #include <vector>
 #include <iostream>
@@ -31,43 +32,41 @@ namespace dlib
        krr_trainer (
        ) :
            verbose(false),
-            use_regression_loss(true),
-            lambda(0),
            max_basis_size(400),
            ekm_stale(true)
        {
-            // default lambda search list
-            lams = matrix_cast<scalar_type>(logspace(-9, 2, 50)); 
        }

        void be_verbose (
        )
        {
            verbose = true;
+            trainer.be_verbose();
        }

        void be_quiet (
        )
        {
            verbose = false;
+            trainer.be_quiet();
        }

        void use_regression_loss_for_loo_cv (
        )
        {
-            use_regression_loss = true;
+            trainer.use_regression_loss_for_loo_cv();
        }

        void use_classification_loss_for_loo_cv (
        )
        {
-            use_regression_loss = false;
+            trainer.use_classification_loss_for_loo_cv();
        }

        bool will_use_regression_loss_for_loo_cv (
        ) const
        {
-            return use_regression_loss;
+            return trainer.will_use_regression_loss_for_loo_cv();
        }

        const kernel_type get_kernel (
@@ -148,13 +147,13 @@ namespace dlib
                << "\n\t this:   " << this
                );

-            lambda = lambda_;
+            trainer.set_lambda(lambda_);
        }

        const scalar_type get_lambda (
        ) const
        {
-            return lambda;
+            return trainer.get_lambda();
        }

        template <typename EXP>
@@ -172,14 +171,13 @@ namespace dlib
                << "\n\t this:   " << this
                );

-
-            lams = matrix_cast<scalar_type>(lambdas);
+            trainer.set_search_lambdas(lambdas);
        }

        const matrix<scalar_type,0,0,mem_manager_type>& get_search_lambdas (
        ) const
        {
-            return lams;
+            return trainer.get_search_lambdas();
        }

        template <
@@ -304,210 +302,24 @@ namespace dlib
                }
            }

-            const long dims = ekm.out_vector_size();
-
            if (verbose)
            {
                std::cout << "Mean EKM projection error:                  " << rs.mean() << std::endl;
                std::cout << "Standard deviation of EKM projection error: " << rs.stddev() << std::endl;
            }

-            /*
-                Notes on the solution of KRR
-
-                Let A = an proj_x.size() by ekm.out_vector_size() matrix which contains
-                all the projected data samples.
-
-                Let I = an identity matrix
-
-                Let C = trans(A)*A
-                Let L = trans(A)*y
-
-                Then the optimal w is given by:
-                    w = inv(C + lambda*I) * L 
-
-
-                There is a trick to compute leave one out cross validation results for many different
-                lambda values quickly.  The following paper has a detailed discussion of various
-                approaches:
-
-                    Notes on Regularized Least Squares by Ryan M. Rifkin and Ross A. Lippert.
-
-                    In the implementation of the krr_trainer I'm only using two simple equations
-                    from the above paper.
-
-
-                    First note that inv(C + lambda*I) can be computed for many different lambda
-                    values in an efficient way by using an eigen decomposition of C.  So we use
-                    the fact that:
-                        inv(C + lambda*I) == V*inv(D + lambda*I)*trans(V)
-                        where V*D*trans(V) == C 
-
-                    Also, via some simple linear algebra the above paper works out that the leave one out 
-                    value for a sample x(i) is equal to the following (we refer to proj_x(i) as x(i) for brevity):
-                        Let G = inv(C + lambda*I)
-                        let val = trans(x(i))*G*x(i);
-
-                        leave one out value for sample x(i):
-                        LOOV = (trans(w)*x(i) - y(i)*val) / (1 - val)
-
-                        leave one out error for sample x(i):
-                        LOOE = loss(y(i), LOOV)
-
-
-                Finally, note that we will pretend there was a 1 appended to the end of each
-                vector in proj_x.  We won't actually do that though because we don't want to
-                have to make a copy of all the samples.  So throughout the following code 
-                I have explicitly dealt with this.
-            */
-
-            general_matrix_type C, tempm, G;
-            column_matrix_type  L, tempv, w;
-
-            // compute C and L
-            for (long i = 0; i < proj_x.size(); ++i)
-            {
-                C += proj_x(i)*trans(proj_x(i));
-                L += y(i)*proj_x(i);
-                tempv += proj_x(i);
-            }
-
-            // Make C = [C      tempv
-            //           tempv' proj_x.size()]
-            C = join_cols(join_rows(C, tempv), 
-                          join_rows(trans(tempv), uniform_matrix<scalar_type>(1,1, proj_x.size())));
-            L = join_cols(L, uniform_matrix<scalar_type>(1,1, sum(y)));
-
-            eigenvalue_decomposition<general_matrix_type> eig(make_symmetric(C));
-            const general_matrix_type V = eig.get_pseudo_v();
-            const column_matrix_type  D = eig.get_real_eigenvalues();
-
-            // We can save some work by pre-multiplying the proj_x vectors by trans(V)
-            // and saving the result so we don't have to recompute it over and over later.
-            matrix<column_matrix_type,0,1,mem_manager_type > Vx;
-            if (lambda == 0 || output_looe)
-            {
-                // Save the transpose of V into a temporary because the subsequent matrix
-                // vector multiplies will be faster (because of better cache locality).
-                const general_matrix_type transV( colm(trans(V),range(0,dims-1))  );
-                // Remember the pretend 1 at the end of proj_x(*).  We want to multiply trans(V)*proj_x(*)
-                // so to do this we pull the last column off trans(V) and store it separately.
-                const column_matrix_type lastV = colm(trans(V), dims);
-                Vx.set_size(proj_x.size());
-                for (long i = 0; i < proj_x.size(); ++i)
-                {
-                    Vx(i) = transV*proj_x(i);
-                    Vx(i) = squared(Vx(i) + lastV);
-                }
-            }
-
-            the_lambda = lambda;
-
-            // If we need to automatically select a lambda then do so using the LOOE trick described
-            // above.
-            if (lambda == 0)
-            {
-                best_looe = std::numeric_limits<scalar_type>::max();
-
-                // Compute leave one out errors for a bunch of different lambdas and pick the best one.
-                for (long idx = 0; idx < lams.size(); ++idx)
-                {
-                    // first compute G
-                    tempv = 1.0/(D + lams(idx));
-                    tempm = scale_columns(V,tempv);
-                    G = tempm*trans(V);
-
-                    // compute the solution w for the current lambda
-                    w = G*L;
-
-                    // make w have the same length as the x_proj vectors.
-                    const scalar_type b = w(dims);
-                    w = colm(w,0,dims);
-
-                    scalar_type looe = 0;
-                    for (long i = 0; i < proj_x.size(); ++i)
-                    {
-                        // perform equivalent of: val = trans(proj_x(i))*G*proj_x(i);
-                        const scalar_type val = dot(tempv, Vx(i));
-                        const scalar_type temp = (1 - val);
-                        scalar_type loov;
-                        if (temp != 0)
-                            loov = (trans(w)*proj_x(i) + b - y(i)*val) / temp;
-                        else
-                            loov = 0;
-
-                        looe += loss(loov, y(i));
-                    }
-
-                    // Keep track of the lambda which gave the lowest looe.  If two lambdas
-                    // have the same looe then pick the biggest lambda.
-                    if (looe < best_looe || (looe == best_looe && lams(idx) > the_lambda))
-                    {
-                        best_looe = looe;
-                        the_lambda = lams(idx);
-                    }
-                }
-
-                // mark that we saved the looe to best_looe already
-                output_looe = false;
-                best_looe /= proj_x.size();
-
-                if (verbose)
-                {
-                    using namespace std;
-                    cout << "Using lambda: " << the_lambda << endl;
-                    cout << "LOO Error:    " << best_looe << endl;
-                }
-            }
-
-

-            // Now perform the main training.  That is, find w.
-            // first, compute G = inv(C + the_lambda*I)
-            tempv = 1.0/(D + the_lambda);
-            tempm = scale_columns(V,tempv);
-            G = tempm*trans(V);
-            w = G*L;
-           
-            // make w have the same length as the x_proj vectors.
-            const scalar_type b = w(dims);
-            w = colm(w,0,dims);
+            decision_function<linear_kernel<matrix<scalar_type,0,0,mem_manager_type> > > lin_df;

-
-            // If we haven't done this already and we are supposed to then compute the LOO error rate for 
-            // the current lambda and store the result in best_looe.
            if (output_looe)
-            {
-                best_looe = 0;
-                for (long i = 0; i < proj_x.size(); ++i)
-                {
-                    // perform equivalent of: val = trans(proj_x(i))*G*proj_x(i);
-                    const scalar_type val = dot(tempv, Vx(i));
-                    const scalar_type temp = (1 - val);
-                    scalar_type loov;
-                    if (temp != 0)
-                        loov = (trans(w)*proj_x(i) + b - y(i)*val) / temp;
-                    else
-                        loov = 0;
-
-                    best_looe += loss(loov, y(i));
-                }
-
-                best_looe /= proj_x.size();
-
-                if (verbose)
-                {
-                    using namespace std;
-                    cout << "Using lambda: " << the_lambda << endl;
-                    cout << "LOO Error:    " << best_looe << endl;
-                }
-            }
-
+                lin_df = trainer.train(proj_x,y, best_looe, the_lambda);
+            else
+                lin_df = trainer.train(proj_x,y);

-            // convert w into a proper decision function
+            // convert the linear decision function into a kernelized one.
            decision_function<kernel_type> df;
-            df = ekm.convert_to_decision_function(w);
-            df.b = -b; // don't forget about the bias we stuck onto all the vectors
+            df = ekm.convert_to_decision_function(lin_df.basis_vectors(0));
+            df.b = lin_df.b; 

            // If we used an automatically derived basis then there isn't any point in
            // keeping the ekm around.  So free its memory.
@@ -519,25 +331,6 @@ namespace dlib
            return df;
        }

-        inline scalar_type loss (
-            const scalar_type& a,
-            const scalar_type& b
-        ) const
-        {
-            if (use_regression_loss)
-            {
-                return (a-b)*(a-b);
-            }
-            else
-            {
-                // if a and b have the same sign then no loss
-                if (a*b >= 0)
-                    return 0;
-                else
-                    return 1;
-            }
-        }
-

        /*!
            CONVENTION
@@ -545,19 +338,19 @@ namespace dlib
                    - kern or basis have changed since the last time
                      they were loaded into the ekm

-                - get_lambda() == lambda
+                - get_lambda() == trainer.get_lambda()
                - get_kernel() == kern
                - get_max_basis_size() == max_basis_size
-                - will_use_regression_loss_for_loo_cv() == use_regression_loss
-                - get_search_lambdas() == lams
+                - will_use_regression_loss_for_loo_cv() == trainer.will_use_regression_loss_for_loo_cv() 
+                - get_search_lambdas() == trainer.get_search_lambdas() 

                - basis_loaded() == (basis.size() != 0)
        !*/

+        rr_trainer<linear_kernel<matrix<scalar_type,0,0,mem_manager_type> > > trainer;
+
        bool verbose;
-        bool use_regression_loss;

-        scalar_type lambda;

        kernel_type kern;
        unsigned long max_basis_size;
@@ -566,7 +359,6 @@ namespace dlib
        mutable empirical_kernel_map<kernel_type> ekm;
        mutable bool ekm_stale; 

-        matrix<scalar_type,0,0,mem_manager_type> lams; 
    }; 

 }

--- a/dlib/svm/rr_trainer.h
+++ b/dlib/svm/rr_trainer.h
 // Copyright (C) 2010  Davis E. King (davis@dlib.net)
 // License: Boost Software License   See LICENSE.txt for the full license.
-#ifndef DLIB_KRR_TRAInER_H__
-#define DLIB_KRR_TRAInER_H__
+#ifndef DLIB_RR_TRAInER_H__
+#define DLIB_RR_TRAInER_H__

 #include "../algs.h"
 #include "function.h"
@@ -9,7 +9,7 @@
 #include "empirical_kernel_map.h"
 #include "linearly_independent_subset_finder.h"
 #include "../statistics.h"
-#include "krr_trainer_abstract.h"
+#include "rr_trainer_abstract.h"
 #include <vector>
 #include <iostream>

@@ -18,7 +18,7 @@ namespace dlib
    template <
        typename K 
        >
-    class krr_trainer
+    class rr_trainer
    {

    public:
@@ -28,13 +28,15 @@ namespace dlib
        typedef typename kernel_type::mem_manager_type mem_manager_type;
        typedef decision_function<kernel_type> trained_function_type;

-        krr_trainer (
+        // You are getting a compiler error on this line because you supplied a non-linear or 
+        // sparse kernel to the rr_trainer object.  You have to use dlib::linear_kernel with this trainer.
+        COMPILE_TIME_ASSERT((is_same_type<K, linear_kernel<sample_type> >::value));
+
+        rr_trainer (
        ) :
            verbose(false),
            use_regression_loss(true),
-            lambda(0),
-            max_basis_size(400),
-            ekm_stale(true)
+            lambda(0)
        {
            // default lambda search list
            lams = matrix_cast<scalar_type>(logspace(-9, 2, 50)); 
@@ -73,67 +75,7 @@ namespace dlib
        const kernel_type get_kernel (
        ) const
        {
-            return kern;
-        }
-
-        void set_kernel (
-            const kernel_type& k
-        )
-        {
-            kern = k;
-        }
-
-        template <typename T>
-        void set_basis (
-            const T& basis_samples
-        )
-        {
-            // make sure requires clause is not broken
-            DLIB_ASSERT(basis_samples.size() > 0 && is_vector(vector_to_matrix(basis_samples)),
-                "\tvoid krr_trainer::set_basis(basis_samples)"
-                << "\n\t You have to give a non-empty set of basis_samples and it must be a vector"
-                << "\n\t basis_samples.size():                       " << basis_samples.size() 
-                << "\n\t is_vector(vector_to_matrix(basis_samples)): " << is_vector(vector_to_matrix(basis_samples)) 
-                << "\n\t this: " << this
-                );
-
-            basis = vector_to_matrix(basis_samples);
-            ekm_stale = true;
-        }
-
-        bool basis_loaded (
-        ) const
-        {
-            return (basis.size() != 0);
-        }
-
-        void clear_basis (
-        )
-        {
-            basis.set_size(0);
-            ekm.clear();
-            ekm_stale = true;
-        }
-
-        unsigned long get_max_basis_size (
-        ) const
-        {
-            return max_basis_size;
-        }
-
-        void set_max_basis_size (
-            unsigned long max_basis_size_
-        )
-        {
-            // make sure requires clause is not broken
-            DLIB_ASSERT(max_basis_size_ > 0,
-                "\t void krr_trainer::set_max_basis_size()"
-                << "\n\t max_basis_size_ must be greater than 0"
-                << "\n\t max_basis_size_: " << max_basis_size_ 
-                << "\n\t this:            " << this
-                );
-
-            max_basis_size = max_basis_size_;
+            return kernel_type();
        }

        void set_lambda (
@@ -142,7 +84,7 @@ namespace dlib
        {
            // make sure requires clause is not broken
            DLIB_ASSERT(lambda_ >= 0,
-                "\t void krr_trainer::set_lambda()"
+                "\t void rr_trainer::set_lambda()"
                << "\n\t lambda must be greater than or equal to 0"
                << "\n\t lambda: " << lambda 
                << "\n\t this:   " << this
@@ -164,7 +106,7 @@ namespace dlib
        {
            // make sure requires clause is not broken
            DLIB_ASSERT(is_vector(lambdas) && lambdas.size() > 0 && min(lambdas) > 0,
-                "\t void krr_trainer::set_search_lambdas()"
+                "\t void rr_trainer::set_search_lambdas()"
                << "\n\t lambdas must be a non-empty vector of values"
                << "\n\t is_vector(lambdas): " << is_vector(lambdas) 
                << "\n\t lambdas.size():     " << lambdas.size()
@@ -240,7 +182,7 @@ namespace dlib
        {
            // make sure requires clause is not broken
            DLIB_ASSERT(is_learning_problem(x,y),
-                "\t decision_function krr_trainer::train(x,y)"
+                "\t decision_function rr_trainer::train(x,y)"
                << "\n\t invalid inputs were given to this function"
                << "\n\t is_vector(x): " << is_vector(x)
                << "\n\t is_vector(y): " << is_vector(y)
@@ -253,69 +195,21 @@ namespace dlib
            {
                // make sure requires clause is not broken
                DLIB_ASSERT(is_binary_classification_problem(x,y),
-                    "\t decision_function krr_trainer::train(x,y)"
+                    "\t decision_function rr_trainer::train(x,y)"
                    << "\n\t invalid inputs were given to this function"
                    );
            }
 #endif

-            // The first thing we do is make sure we have an appropriate ekm ready for use below.
-            if (basis_loaded())
-            {
-                if (ekm_stale)
-                {
-                    ekm.load(kern, basis);
-                    ekm_stale = false;
-                }
-            }
-            else
-            {
-                linearly_independent_subset_finder<kernel_type> lisf(kern, max_basis_size);
-                fill_lisf(lisf, x);
-                ekm.load(lisf);
-            }
-
-            if (verbose)
-            {
-                std::cout << "\nNumber of basis vectors used: " << ekm.out_vector_size() << std::endl;
-            }
-
            typedef matrix<scalar_type,0,1,mem_manager_type> column_matrix_type;
            typedef matrix<scalar_type,0,0,mem_manager_type> general_matrix_type;

-            running_stats<scalar_type> rs;
-
-            // Now we project all the x samples into kernel space using our EKM 
-            matrix<column_matrix_type,0,1,mem_manager_type > proj_x;
-            proj_x.set_size(x.size());
-            for (long i = 0; i < proj_x.size(); ++i)
-            {
-                scalar_type err;
-                // Note that we also append a 1 to the end of the vectors because this is
-                // a convenient way of dealing with the bias term later on.
-                if (verbose == false)
-                {
-                    proj_x(i) = ekm.project(x(i));
-                }
-                else
-                {
-                    proj_x(i) = ekm.project(x(i),err);
-                    rs.add(err);
-                }
-            }
-
-            const long dims = ekm.out_vector_size();
-
-            if (verbose)
-            {
-                std::cout << "Mean EKM projection error:                  " << rs.mean() << std::endl;
-                std::cout << "Standard deviation of EKM projection error: " << rs.stddev() << std::endl;
-            }
+            const long dims = x(0).size();

            /*
-                Notes on the solution of KRR
+                Notes on the solution of ridge regression 

-                Let A = an proj_x.size() by ekm.out_vector_size() matrix which contains
+                Let A = an x.size() by dims matrix which contains
                all the projected data samples.

                Let I = an identity matrix
@@ -333,7 +227,7 @@ namespace dlib

                    Notes on Regularized Least Squares by Ryan M. Rifkin and Ross A. Lippert.

-                    In the implementation of the krr_trainer I'm only using two simple equations
+                    In the implementation of the rr_trainer I'm only using two simple equations
                    from the above paper.


@@ -344,7 +238,7 @@ namespace dlib
                        where V*D*trans(V) == C 

                    Also, via some simple linear algebra the above paper works out that the leave one out 
-                    value for a sample x(i) is equal to the following (we refer to proj_x(i) as x(i) for brevity):
+                    value for a sample x(i) is equal to the following (we refer to x(i) as x(i) for brevity):
                        Let G = inv(C + lambda*I)
                        let val = trans(x(i))*G*x(i);

@@ -356,7 +250,7 @@ namespace dlib


                Finally, note that we will pretend there was a 1 appended to the end of each
-                vector in proj_x.  We won't actually do that though because we don't want to
+                vector in x.  We won't actually do that though because we don't want to
                have to make a copy of all the samples.  So throughout the following code 
                I have explicitly dealt with this.
            */
@@ -365,24 +259,25 @@ namespace dlib
            column_matrix_type  L, tempv, w;

            // compute C and L
-            for (long i = 0; i < proj_x.size(); ++i)
+            for (long i = 0; i < x.size(); ++i)
            {
-                C += proj_x(i)*trans(proj_x(i));
-                L += y(i)*proj_x(i);
-                tempv += proj_x(i);
+                C += x(i)*trans(x(i));
+                L += y(i)*x(i);
+                tempv += x(i);
            }

+            // Account for the extra 1 that we pretend is appended to x
            // Make C = [C      tempv
-            //           tempv' proj_x.size()]
+            //           tempv' x.size()]
            C = join_cols(join_rows(C, tempv), 
-                          join_rows(trans(tempv), uniform_matrix<scalar_type>(1,1, proj_x.size())));
+                          join_rows(trans(tempv), uniform_matrix<scalar_type>(1,1, x.size())));
            L = join_cols(L, uniform_matrix<scalar_type>(1,1, sum(y)));

            eigenvalue_decomposition<general_matrix_type> eig(make_symmetric(C));
            const general_matrix_type V = eig.get_pseudo_v();
            const column_matrix_type  D = eig.get_real_eigenvalues();

-            // We can save some work by pre-multiplying the proj_x vectors by trans(V)
+            // We can save some work by pre-multiplying the x vectors by trans(V)
            // and saving the result so we don't have to recompute it over and over later.
            matrix<column_matrix_type,0,1,mem_manager_type > Vx;
            if (lambda == 0 || output_looe)
@@ -390,13 +285,13 @@ namespace dlib
                // Save the transpose of V into a temporary because the subsequent matrix
                // vector multiplies will be faster (because of better cache locality).
                const general_matrix_type transV( colm(trans(V),range(0,dims-1))  );
-                // Remember the pretend 1 at the end of proj_x(*).  We want to multiply trans(V)*proj_x(*)
+                // Remember the pretend 1 at the end of x(*).  We want to multiply trans(V)*x(*)
                // so to do this we pull the last column off trans(V) and store it separately.
                const column_matrix_type lastV = colm(trans(V), dims);
-                Vx.set_size(proj_x.size());
-                for (long i = 0; i < proj_x.size(); ++i)
+                Vx.set_size(x.size());
+                for (long i = 0; i < x.size(); ++i)
                {
-                    Vx(i) = transV*proj_x(i);
+                    Vx(i) = transV*x(i);
                    Vx(i) = squared(Vx(i) + lastV);
                }
            }
@@ -425,14 +320,14 @@ namespace dlib
                    w = colm(w,0,dims);

                    scalar_type looe = 0;
-                    for (long i = 0; i < proj_x.size(); ++i)
+                    for (long i = 0; i < x.size(); ++i)
                    {
-                        // perform equivalent of: val = trans(proj_x(i))*G*proj_x(i);
+                        // perform equivalent of: val = trans(x(i))*G*x(i);
                        const scalar_type val = dot(tempv, Vx(i));
                        const scalar_type temp = (1 - val);
                        scalar_type loov;
                        if (temp != 0)
-                            loov = (trans(w)*proj_x(i) + b - y(i)*val) / temp;
+                            loov = (trans(w)*x(i) + b - y(i)*val) / temp;
                        else
                            loov = 0;

@@ -450,7 +345,7 @@ namespace dlib

                // mark that we saved the looe to best_looe already
                output_looe = false;
-                best_looe /= proj_x.size();
+                best_looe /= x.size();

                if (verbose)
                {
@@ -479,21 +374,21 @@ namespace dlib
            if (output_looe)
            {
                best_looe = 0;
-                for (long i = 0; i < proj_x.size(); ++i)
+                for (long i = 0; i < x.size(); ++i)
                {
-                    // perform equivalent of: val = trans(proj_x(i))*G*proj_x(i);
+                    // perform equivalent of: val = trans(x(i))*G*x(i);
                    const scalar_type val = dot(tempv, Vx(i));
                    const scalar_type temp = (1 - val);
                    scalar_type loov;
                    if (temp != 0)
-                        loov = (trans(w)*proj_x(i) + b - y(i)*val) / temp;
+                        loov = (trans(w)*x(i) + b - y(i)*val) / temp;
                    else
                        loov = 0;

                    best_looe += loss(loov, y(i));
                }

-                best_looe /= proj_x.size();
+                best_looe /= x.size();

                if (verbose)
                {
@@ -506,16 +401,12 @@ namespace dlib

            // convert w into a proper decision function
            decision_function<kernel_type> df;
-            df = ekm.convert_to_decision_function(w);
+            df.alpha.set_size(1);
+            df.alpha = 1;
+            df.basis_vectors.set_size(1);
+            df.basis_vectors(0) = w;
            df.b = -b; // don't forget about the bias we stuck onto all the vectors

-            // If we used an automatically derived basis then there isn't any point in
-            // keeping the ekm around.  So free its memory.
-            if (basis_loaded() == false)
-            {
-                ekm.clear();
-            }
-
            return df;
        }

@@ -541,17 +432,10 @@ namespace dlib

        /*!
            CONVENTION
-                - if (ekm_stale) then
-                    - kern or basis have changed since the last time
-                      they were loaded into the ekm
-
                - get_lambda() == lambda
-                - get_kernel() == kern
-                - get_max_basis_size() == max_basis_size
+                - get_kernel() == kernel_type() 
                - will_use_regression_loss_for_loo_cv() == use_regression_loss
                - get_search_lambdas() == lams
-
-                - basis_loaded() == (basis.size() != 0)
        !*/

        bool verbose;
@@ -559,18 +443,11 @@ namespace dlib

        scalar_type lambda;

-        kernel_type kern;
-        unsigned long max_basis_size;
-
-        matrix<sample_type,0,1,mem_manager_type> basis;
-        mutable empirical_kernel_map<kernel_type> ekm;
-        mutable bool ekm_stale; 
-
        matrix<scalar_type,0,0,mem_manager_type> lams; 
    }; 

 }

-#endif // DLIB_KRR_TRAInER_H__
+#endif // DLIB_RR_TRAInER_H__


--- a/dlib/svm/rr_trainer_abstract.h
+++ b/dlib/svm/rr_trainer_abstract.h
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_RR_TRAInER_ABSTRACT_H__
+#ifdef DLIB_RR_TRAInER_ABSTRACT_H__
+
+#include "../algs.h"
+#include "function_abstract.h"
+
+namespace dlib
+{
+    template <
+        typename K 
+        >
+    class rr_trainer
+    {
+        /*!
+            REQUIREMENTS ON K 
+                is the dlib::linear_kernel instantiated with some kind of column vector.
+
+            INITIAL VALUE
+                - get_lambda() == 0
+                - basis_loaded() == false
+                - get_max_basis_size() == 400
+                - will_use_regression_loss_for_loo_cv() == true
+                - get_search_lambdas() == logspace(-9, 2, 50) 
+                - this object will not be verbose unless be_verbose() is called
+
+            WHAT THIS OBJECT REPRESENTS
+                This object represents a tool for performing linear ridge regression 
+                (This basic algorithm is also known my many other names, e.g. regularized 
+                least squares or least squares SVM). 
+
+                The exact definition of what this algorithm does is this:
+                    Find w and b that minimizes the following (x_i are input samples and y_i are target values):
+                        lambda*dot(w,w) + sum_over_i( (f(x_i) - y_i)^2 )
+                        where f(x) == dot(x,w) - b
+
+                    So this algorithm is just regular old least squares regression but 
+                    with the addition of a regularization term which encourages small w.
+
+
+                It is capable of estimating the lambda parameter using leave-one-out cross-validation.
+
+
+                The leave-one-out cross-validation implementation is based on the techniques
+                discussed in this paper:
+                    Notes on Regularized Least Squares by Ryan M. Rifkin and Ross A. Lippert.
+        !*/
+
+    public:
+        typedef K kernel_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+        typedef decision_function<kernel_type> trained_function_type;
+
+        rr_trainer (
+        );
+        /*!
+            ensures
+                - This object is properly initialized and ready to be used.
+        !*/
+
+        void be_verbose (
+        );
+        /*!
+            ensures
+                - This object will print status messages to standard out.
+        !*/
+
+        void be_quiet (
+        );
+        /*!
+            ensures
+                - this object will not print anything to standard out
+        !*/
+
+        const kernel_type get_kernel (
+        ) const;
+        /*!
+            ensures
+                - returns a copy of the kernel function in use by this object
+        !*/
+
+        void set_lambda (
+            scalar_type lambda 
+        );
+        /*!
+            requires
+                - lambda >= 0
+            ensures
+                - #get_lambda() == lambda 
+        !*/
+
+        const scalar_type get_lambda (
+        ) const;
+        /*!
+            ensures
+                - returns the regularization parameter.  It is the parameter that 
+                  determines the trade off between trying to fit the training data 
+                  exactly or allowing more errors but hopefully improving the 
+                  generalization ability of the resulting function.  Smaller values 
+                  encourage exact fitting while larger values of lambda may encourage 
+                  better generalization. 
+
+                  Note that a lambda of 0 has a special meaning.  It indicates to this
+                  object that it should automatically determine an appropriate lambda
+                  value.  This is done using leave-one-out cross-validation.
+        !*/
+
+        void use_regression_loss_for_loo_cv (
+        );
+        /*!
+            ensures
+                - #will_use_regression_loss_for_loo_cv() == true
+        !*/
+
+        void use_classification_loss_for_loo_cv (
+        );
+        /*!
+            ensures
+                - #will_use_regression_loss_for_loo_cv() == false 
+        !*/
+
+        bool will_use_regression_loss_for_loo_cv (
+        ) const;
+        /*!
+            ensures
+                - returns true if the automatic lambda estimation will attempt to estimate a lambda
+                  appropriate for a regression task.  Otherwise it will try and find one which
+                  minimizes the number of classification errors.
+        !*/
+
+        template <typename EXP>
+        void set_search_lambdas (
+            const matrix_exp<EXP>& lambdas
+        );
+        /*!
+            requires
+                - is_vector(lambdas) == true
+                - lambdas.size() > 0
+                - min(lambdas) > 0
+                - lambdas must contain floating point numbers
+            ensures
+                - #get_search_lambdas() == lambdas
+        !*/
+
+        const matrix<scalar_type,0,0,mem_manager_type>& get_search_lambdas (
+        ) const;
+        /*!
+            ensures
+                - returns a matrix M such that:
+                    - is_vector(M) == true
+                    - M == a list of all the lambda values which will be tried when performing
+                      LOO cross-validation for determining the best lambda. 
+        !*/
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const;
+        /*!
+            requires
+                - x == a matrix or something convertible to a matrix via vector_to_matrix().
+                  Also, x should contain sample_type objects.
+                - y == a matrix or something convertible to a matrix via vector_to_matrix().
+                  Also, y should contain scalar_type objects.
+                - is_learning_problem(x,y) == true
+                - if (get_lambda() == 0 && will_use_regression_loss_for_loo_cv() == false) then
+                    - is_binary_classification_problem(x,y) == true
+                      (i.e. if you want this algorithm to estimate a lambda appropriate for
+                      classification functions then you had better give a valid classification
+                      problem)
+            ensures
+                - performs linear ridge regression given the training samples in x and target values in y.  
+                - returns a decision_function F with the following properties:
+                    - F(new_x) == predicted y value
+                    - F.alpha.size() == 1
+                    - F.basis_vectors.size() == 1
+                    - F.alpha(0) == 1
+
+                - if (get_lambda() == 0) then
+                    - This object will perform internal leave-one-out cross-validation to determine an 
+                      appropriate lambda automatically.  It will compute the LOO error for each lambda
+                      in get_search_lambdas() and select the best one.
+                    - if (will_use_regression_loss_for_loo_cv()) then
+                        - the lambda selected will be the one that minimizes the mean squared error.
+                    - else
+                        - the lambda selected will be the one that minimizes the number classification 
+                          mistakes.  We say a point is classified correctly if the output of the
+                          decision_function has the same sign as its label.
+                    - #get_lambda() == 0
+                      (i.e. we don't change the get_lambda() value.  If you want to know what the
+                      automatically selected lambda value was then call the version of train()
+                      defined below)
+                - else
+                    - The user supplied value of get_lambda() will be used to perform the ridge regression.
+        !*/
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y,
+            scalar_type& looe
+        ) const;
+        /*!
+            requires
+                - all the requirements for train(x,y) must be satisfied
+            ensures
+                - returns train(x,y)
+                  (i.e. executes train(x,y) and returns its result)
+                - if (will_use_regression_loss_for_loo_cv())
+                    - #looe == the mean squared error as determined by leave-one-out 
+                      cross-validation.  
+                - else
+                    - #looe == the fraction of samples misclassified as determined by
+                      leave-one-out cross-validation.
+        !*/
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y,
+            scalar_type& looe,
+            scalar_type& lambda_used 
+        ) const;
+        /*!
+            requires
+                - all the requirements for train(x,y) must be satisfied
+            ensures
+                - returns train(x,y)
+                  (i.e. executes train(x,y) and returns its result)
+                - if (will_use_regression_loss_for_loo_cv())
+                    - #looe == the mean squared error as determined by leave-one-out 
+                      cross-validation.  
+                - else
+                    - #looe == the fraction of samples misclassified as determined by
+                      leave-one-out cross-validation.
+                - #lambda_used == the value of lambda used to generate the 
+                  decision_function.  Note that this lambda value is always 
+                  equal to get_lambda() if get_lambda() isn't 0.
+        !*/
+
+    }; 
+
+}
+
+#endif // DLIB_RR_TRAInER_ABSTRACT_H__
+