Renamed the support_vectors field of the decision_function and distance_function objects

to basis_vectors. A long time ago the name support_vectors made sense but now that these objects are used by a lot of algorithms that don't technically produce support vectors this name is confusing. The name basis_vectors more accurately reflects how these objects get used. --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%403278

Renamed the support_vectors field of the decision_function and distance_function objects
to basis_vectors. A long time ago the name support_vectors made sense but now that these objects are used by a lot of algorithms that don't technically produce support vectors this name is confusing. The name basis_vectors more accurately reflects how these objects get used. --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%403278
eff11e27 · Davis King · 4ddd0cae · eff11e27 · eff11e27 · eff11e27
Commit eff11e27 authored Nov 29, 2009 by Davis King
9 changed files
--- a/dlib/svm/function.h
+++ b/dlib/svm/function.h
@@ -34,7 +34,7 @@ namespace dlib
        scalar_vector_type alpha;
        scalar_type b;
        K kernel_function;
-        sample_vector_type support_vectors;
+        sample_vector_type basis_vectors;

        decision_function (
        ) : b(0), kernel_function(K()) {}
@@ -45,19 +45,19 @@ namespace dlib
            alpha(d.alpha), 
            b(d.b),
            kernel_function(d.kernel_function),
-            support_vectors(d.support_vectors) 
+            basis_vectors(d.basis_vectors) 
        {}

        decision_function (
            const scalar_vector_type& alpha_,
            const scalar_type& b_,
            const K& kernel_function_,
-            const sample_vector_type& support_vectors_
+            const sample_vector_type& basis_vectors_
        ) :
            alpha(alpha_),
            b(b_),
            kernel_function(kernel_function_),
-            support_vectors(support_vectors_)
+            basis_vectors(basis_vectors_)
        {}

        decision_function& operator= (
@@ -69,7 +69,7 @@ namespace dlib
                alpha = d.alpha;
                b = d.b;
                kernel_function = d.kernel_function;
-                support_vectors = d.support_vectors;
+                basis_vectors = d.basis_vectors;
            }
            return *this;
        }
@@ -80,7 +80,7 @@ namespace dlib
        {
            scalar_type temp = 0;
            for (long i = 0; i < alpha.nr(); ++i)
-                temp += alpha(i) * kernel_function(x,support_vectors(i));
+                temp += alpha(i) * kernel_function(x,basis_vectors(i));

            return temp - b;
        }
@@ -99,7 +99,7 @@ namespace dlib
            serialize(item.alpha, out);
            serialize(item.b,     out);
            serialize(item.kernel_function, out);
-            serialize(item.support_vectors, out);
+            serialize(item.basis_vectors, out);
        }
        catch (serialization_error e)
        { 
@@ -120,7 +120,7 @@ namespace dlib
            deserialize(item.alpha, in);
            deserialize(item.b, in);
            deserialize(item.kernel_function, in);
-            deserialize(item.support_vectors, in);
+            deserialize(item.basis_vectors, in);
        }
        catch (serialization_error e)
        { 
@@ -246,7 +246,7 @@ namespace dlib
        scalar_vector_type alpha;
        scalar_type b;
        K kernel_function;
-        sample_vector_type support_vectors;
+        sample_vector_type basis_vectors;

        distance_function (
        ) : b(0), kernel_function(K()) {}
@@ -257,19 +257,19 @@ namespace dlib
            alpha(d.alpha), 
            b(d.b),
            kernel_function(d.kernel_function),
-            support_vectors(d.support_vectors) 
+            basis_vectors(d.basis_vectors) 
        {}

        distance_function (
            const scalar_vector_type& alpha_,
            const scalar_type& b_,
            const K& kernel_function_,
-            const sample_vector_type& support_vectors_
+            const sample_vector_type& basis_vectors_
        ) :
            alpha(alpha_),
            b(b_),
            kernel_function(kernel_function_),
-            support_vectors(support_vectors_)
+            basis_vectors(basis_vectors_)
        {}

        distance_function& operator= (
@@ -281,7 +281,7 @@ namespace dlib
                alpha = d.alpha;
                b = d.b;
                kernel_function = d.kernel_function;
-                support_vectors = d.support_vectors;
+                basis_vectors = d.basis_vectors;
            }
            return *this;
        }
@@ -292,7 +292,7 @@ namespace dlib
        {
            scalar_type temp = 0;
            for (long i = 0; i < alpha.nr(); ++i)
-                temp += alpha(i) * kernel_function(x,support_vectors(i));
+                temp += alpha(i) * kernel_function(x,basis_vectors(i));

            temp = b + kernel_function(x,x) - 2*temp; 
            if (temp > 0)
@@ -308,7 +308,7 @@ namespace dlib
            scalar_type temp = 0;
            for (long i = 0; i < alpha.nr(); ++i)
                for (long j = 0; j < x.alpha.nr(); ++j)
-                    temp += alpha(i)*x.alpha(j) * kernel_function(support_vectors(i), x.support_vectors(j));
+                    temp += alpha(i)*x.alpha(j) * kernel_function(basis_vectors(i), x.basis_vectors(j));

            temp = b + x.b - 2*temp;
            if (temp > 0)
@@ -331,7 +331,7 @@ namespace dlib
            serialize(item.alpha, out);
            serialize(item.b,     out);
            serialize(item.kernel_function, out);
-            serialize(item.support_vectors, out);
+            serialize(item.basis_vectors, out);
        }
        catch (serialization_error e)
        { 
@@ -352,7 +352,7 @@ namespace dlib
            deserialize(item.alpha, in);
            deserialize(item.b, in);
            deserialize(item.kernel_function, in);
-            deserialize(item.support_vectors, in);
+            deserialize(item.basis_vectors, in);
        }
        catch (serialization_error e)
        { 

--- a/dlib/svm/function_abstract.h
+++ b/dlib/svm/function_abstract.h
@@ -42,7 +42,7 @@ namespace dlib
        scalar_vector_type alpha;
        scalar_type        b;
        K                  kernel_function;
-        sample_vector_type support_vectors;
+        sample_vector_type basis_vectors;

        decision_function (
        );
@@ -50,7 +50,7 @@ namespace dlib
            ensures
                - #b == 0
                - #alpha.nr() == 0
-                - #support_vectors.nr() == 0
+                - #basis_vectors.nr() == 0
        !*/

        decision_function (
@@ -65,11 +65,11 @@ namespace dlib
            const scalar_vector_type& alpha_,
            const scalar_type& b_,
            const K& kernel_function_,
-            const sample_vector_type& support_vectors_
-        ) : alpha(alpha_), b(b_), kernel_function(kernel_function_), support_vectors(support_vectors_) {}
+            const sample_vector_type& basis_vectors_
+        ) : alpha(alpha_), b(b_), kernel_function(kernel_function_), basis_vectors(basis_vectors_) {}
        /*!
            ensures
-                - populates the decision function with the given support vectors, weights(i.e. alphas),
+                - populates the decision function with the given basis vectors, weights(i.e. alphas),
                  b term, and kernel function.
        !*/

@@ -93,7 +93,7 @@ namespace dlib
        {
            scalar_type temp = 0;
            for (long i = 0; i < alpha.nr(); ++i)
-                temp += alpha(i) * kernel_function(x,support_vectors(i));
+                temp += alpha(i) * kernel_function(x,basis_vectors(i));

            return temp - b;
        }
@@ -255,7 +255,7 @@ namespace dlib
        scalar_vector_type alpha;
        scalar_type        b;
        K                  kernel_function;
-        sample_vector_type support_vectors;
+        sample_vector_type basis_vectors;

        distance_function (
        );
@@ -263,7 +263,7 @@ namespace dlib
            ensures
                - #b == 0
                - #alpha.nr() == 0
-                - #support_vectors.nr() == 0
+                - #basis_vectors.nr() == 0
        !*/

        distance_function (
@@ -278,11 +278,11 @@ namespace dlib
            const scalar_vector_type& alpha_,
            const scalar_type& b_,
            const K& kernel_function_,
-            const sample_vector_type& support_vectors_
-        ) : alpha(alpha_), b(b_), kernel_function(kernel_function_), support_vectors(support_vectors_) {}
+            const sample_vector_type& basis_vectors_
+        ) : alpha(alpha_), b(b_), kernel_function(kernel_function_), basis_vectors(basis_vectors_) {}
        /*!
            ensures
-                - populates the decision function with the given support vectors, weights(i.e. alphas),
+                - populates the distance function with the given basis vectors, weights(i.e. alphas),
                  b term, and kernel function.
        !*/

@@ -301,15 +301,15 @@ namespace dlib
        /*!
            ensures
                - Let O(x) represent the point x projected into kernel induced feature space.
-                - let c == sum alpha(i)*O(support_vectors(i)) == the point in kernel space that
+                - let c == sum alpha(i)*O(basis_vectors(i)) == the point in kernel space that
                  this object represents.
-                - Then this object returns the distance between the points O(x) and c in kernel
+                - Then this object returns the distance between the point O(x) and c in kernel
                  space. 
        !*/
        {
            scalar_type temp = 0;
            for (long i = 0; i < alpha.nr(); ++i)
-                temp += alpha(i) * kernel_function(x,support_vectors(i));
+                temp += alpha(i) * kernel_function(x,basis_vectors(i));

            temp = b + kernel_function(x,x) - 2*temp; 
            if (temp > 0)
@@ -329,7 +329,7 @@ namespace dlib
            scalar_type temp = 0;
            for (long i = 0; i < alpha.nr(); ++i)
                for (long j = 0; j < x.alpha.nr(); ++j)
-                    temp += alpha(i)*x.alpha(j) * kernel_function(support_vectors(i), x.support_vectors(j));
+                    temp += alpha(i)*x.alpha(j) * kernel_function(basis_vectors(i), x.basis_vectors(j));

            temp = b + x.b - 2*temp;
            if (temp > 0)

--- a/dlib/svm/kcentroid_abstract.h
+++ b/dlib/svm/kcentroid_abstract.h
@@ -93,9 +93,9 @@ namespace dlib
        ) const;
        /*!
            ensures
-                - returns the maximum number of dictionary vectors (i.e. support 
-                  vectors) this object will use at a time.  That is, dictionary_size() 
-                  will never be greater than max_dictionary_size().
+                - returns the maximum number of dictionary vectors this object will 
+                  use at a time.  That is, dictionary_size() will never be greater 
+                  than max_dictionary_size().
        !*/

        bool remove_oldest_first (
@@ -122,7 +122,9 @@ namespace dlib
        ) const;
        /*!
            ensures
-                - returns the number of "support vectors" in the dictionary.  
+                - returns the number of basis vectors in the dictionary.  These are
+                  the basis vectors used by this object to represent a point in kernel
+                  feature space.
        !*/

        scalar_type samples_trained (
@@ -140,11 +142,11 @@ namespace dlib
                  test used for sparsification (see the KRLS paper for details).  This is 
                  a number which governs how accurately this object will approximate the 
                  centroid it is learning.  Smaller values generally result in a more 
-                  accurate estimate while also resulting in a bigger set of support 
-                  vectors in the learned dictionary.  Bigger tolerances values result in 
-                  a less accurate estimate but also in less support vectors.  (Note
-                  that in any case, the max_dictionary_size() limits the number
-                  of support vectors no matter the setting of the tolerance)
+                  accurate estimate while also resulting in a bigger set of vectors in 
+                  the dictionary.  Bigger tolerances values result in a less accurate 
+                  estimate but also in less dictionary vectors.  (Note that in any case, 
+                  the max_dictionary_size() limits the number of dictionary vectors no 
+                  matter the setting of the tolerance)
        !*/

        void clear_dictionary (

--- a/dlib/svm/kcentroid_overloads.h
+++ b/dlib/svm/kcentroid_overloads.h
@@ -253,8 +253,8 @@ namespace dlib
            if (samples_seen > 0)
            {
                temp.b = squared_norm();
-                temp.support_vectors.set_size(1);
-                temp.support_vectors(0) = w;
+                temp.basis_vectors.set_size(1);
+                temp.basis_vectors(0) = w;
                temp.alpha.set_size(1);
                temp.alpha(0) = alpha;
            }
@@ -595,20 +595,20 @@ namespace dlib
                if (std::abs(w_extra) > std::numeric_limits<scalar_type>::epsilon())
                {
                    scale = (x_extra/w_extra);
-                    temp.support_vectors.set_size(1);
+                    temp.basis_vectors.set_size(1);
                    temp.alpha.set_size(1);
-                    temp.support_vectors(0) = w*scale;
+                    temp.basis_vectors(0) = w*scale;
                    temp.alpha(0) = alpha/scale;
                }
                else
                {
                    // In this case w_extra is zero. So the only way we can get the same
-                    // thing in the output support vector set is by using two vectors
-                    temp.support_vectors.set_size(2);
+                    // thing in the output basis vector set is by using two vectors
+                    temp.basis_vectors.set_size(2);
                    temp.alpha.set_size(2);
-                    temp.support_vectors(0) = 2*w;
+                    temp.basis_vectors(0) = 2*w;
                    temp.alpha(0) = alpha;
-                    temp.support_vectors(1) = w;
+                    temp.basis_vectors(1) = w;
                    temp.alpha(1) = -alpha;
                }

@@ -883,8 +883,8 @@ namespace dlib
            if (samples_seen > 0)
            {
                temp.b = squared_norm();
-                temp.support_vectors.set_size(1);
-                temp.support_vectors(0) = sample_type(w.begin(), w.end());
+                temp.basis_vectors.set_size(1);
+                temp.basis_vectors(0) = sample_type(w.begin(), w.end());
                temp.alpha.set_size(1);
                temp.alpha(0) = alpha;
            }
@@ -1220,22 +1220,22 @@ namespace dlib
                if (std::abs(w_extra) > std::numeric_limits<scalar_type>::epsilon())
                {
                    scale = (x_extra/w_extra);
-                    temp.support_vectors.set_size(1);
+                    temp.basis_vectors.set_size(1);
                    temp.alpha.set_size(1);
-                    temp.support_vectors(0) = sample_type(w.begin(), w.end());
-                    sparse_vector::scale_by(temp.support_vectors(0), scale);
+                    temp.basis_vectors(0) = sample_type(w.begin(), w.end());
+                    sparse_vector::scale_by(temp.basis_vectors(0), scale);
                    temp.alpha(0) = alpha/scale;
                }
                else
                {
                    // In this case w_extra is zero. So the only way we can get the same
-                    // thing in the output support vector set is by using two vectors
-                    temp.support_vectors.set_size(2);
+                    // thing in the output basis vector set is by using two vectors
+                    temp.basis_vectors.set_size(2);
                    temp.alpha.set_size(2);
-                    temp.support_vectors(0) = sample_type(w.begin(), w.end());
-                    sparse_vector::scale_by(temp.support_vectors(0), 2);
+                    temp.basis_vectors(0) = sample_type(w.begin(), w.end());
+                    sparse_vector::scale_by(temp.basis_vectors(0), 2);
                    temp.alpha(0) = alpha;
-                    temp.support_vectors(1) = sample_type(w.begin(), w.end());
+                    temp.basis_vectors(1) = sample_type(w.begin(), w.end());
                    temp.alpha(1) = -alpha;
                }


--- a/dlib/svm/krls_abstract.h
+++ b/dlib/svm/krls_abstract.h
@@ -72,9 +72,9 @@ namespace dlib
                  test in the KRLS algorithm.  This is a number which governs how 
                  accurately this object will approximate the decision function it is 
                  learning.  Smaller values generally result in a more accurate 
-                  estimate while also resulting in a bigger set of support vectors in 
+                  estimate while also resulting in a bigger set of dictionary vectors in 
                  the learned decision function.  Bigger tolerances values result in a 
-                  less accurate decision function but also in less support vectors.
+                  less accurate decision function but also in less dictionary vectors.
        !*/

        const kernel_type& get_kernel (
@@ -98,7 +98,7 @@ namespace dlib
        /*!
            ensures
                - clears out all learned data 
-                  (e.g. #get_decision_function().support_vectors.size() == 0)
+                  (e.g. #get_decision_function().basis_vectors.size() == 0)
        !*/

        scalar_type operator() (
@@ -135,8 +135,8 @@ namespace dlib
        ) const;
        /*!
            ensures
-                - returns the number of "support vectors" in the dictionary.  That is,
-                  returns a number equal to get_decision_function().support_vectors.size()
+                - returns the number of vectors in the dictionary.  That is,
+                  returns a number equal to get_decision_function().basis_vectors.size()
        !*/

        decision_function<kernel_type> get_decision_function (

--- a/dlib/svm/pegasos.h
+++ b/dlib/svm/pegasos.h
@@ -248,7 +248,7 @@ namespace dlib
        ) const
        {
            distance_function<offset_kernel<kernel_type> > df = w.get_distance_function();
-            return decision_function<kernel_type>(df.alpha, -tau*sum(df.alpha), kernel, df.support_vectors);
+            return decision_function<kernel_type>(df.alpha, -tau*sum(df.alpha), kernel, df.basis_vectors);
        }

        void swap (
@@ -572,7 +572,7 @@ namespace dlib
            {
                decision_function<kernel_type> df = my_trainer.get_decision_function();
                std::cout << "\rbatch_trainer(): Percent complete: 100           " << std::endl;
-                std::cout << "    Num sv: " << df.support_vectors.size() << std::endl;
+                std::cout << "    Num sv: " << df.basis_vectors.size() << std::endl;
                std::cout << "    bias:   " << df.b << std::endl;
                return df;
            }
@@ -632,14 +632,14 @@ namespace dlib
                cached_df = my_trainer.get_decision_function();

                std::cout << "\rbatch_trainer(): Percent complete: 100           " << std::endl;
-                std::cout << "    Num sv: " << cached_df.support_vectors.size() << std::endl;
+                std::cout << "    Num sv: " << cached_df.basis_vectors.size() << std::endl;
                std::cout << "    bias:   " << cached_df.b << std::endl;

                return decision_function<kernel_type> (
                        cached_df.alpha,
                        cached_df.b,
                        trainer.get_kernel(),
-                        rowm(x, cached_df.support_vectors)
+                        rowm(x, cached_df.basis_vectors)
                        );
            }
            else
@@ -651,7 +651,7 @@ namespace dlib
                        cached_df.alpha,
                        cached_df.b,
                        trainer.get_kernel(),
-                        rowm(x, cached_df.support_vectors)
+                        rowm(x, cached_df.basis_vectors)
                        );
            }
        }

--- a/dlib/svm/rbf_network_abstract.h
+++ b/dlib/svm/rbf_network_abstract.h
@@ -75,7 +75,7 @@ namespace dlib
        ) const;
        /*!
            ensures
-                - returns the maximum number of centers (a.k.a. support_vectors in the 
+                - returns the maximum number of centers (a.k.a. basis_vectors in the 
                  trained decision_function) you will get when you train this object on data.
        !*/


--- a/dlib/svm/reduced.h
+++ b/dlib/svm/reduced.h
@@ -117,7 +117,7 @@ namespace dlib
            {
                for (long c = 0; c < K.nc(); ++c)
                {
-                    K(r,c) = kernel(lisf[r], dec_funct.support_vectors(c));
+                    K(r,c) = kernel(lisf[r], dec_funct.basis_vectors(c));
                }
            }

@@ -255,7 +255,7 @@ namespace dlib
                    for (long j = 0; j < dec_funct.alpha.size(); ++j)
                    {
                        bias += dec_funct.alpha(i)*dec_funct.alpha(j)*
-                            k(dec_funct.support_vectors(i), dec_funct.support_vectors(j));
+                            k(dec_funct.basis_vectors(i), dec_funct.basis_vectors(j));
                    }
                }
            }
@@ -332,9 +332,9 @@ namespace dlib
                double temp = 0;
                for (long i = 0; i < out_vectors.size(); ++i)
                {
-                    for (long j = 0; j < dec_funct.support_vectors.nr(); ++j)
+                    for (long j = 0; j < dec_funct.basis_vectors.nr(); ++j)
                    {
-                        temp -= b(i)*dec_funct.alpha(j)*k(out_vectors(i), dec_funct.support_vectors(j));
+                        temp -= b(i)*dec_funct.alpha(j)*k(out_vectors(i), dec_funct.basis_vectors(j));
                    }
                }

@@ -436,9 +436,9 @@ namespace dlib
                }
                for (long i = 0; i < out_vectors.size(); ++i)
                {
-                    for (long j = 0; j < dec_funct.support_vectors.size(); ++j)
+                    for (long j = 0; j < dec_funct.basis_vectors.size(); ++j)
                    {
-                        res(i) -= dec_funct.alpha(j)*k(out_vectors(i), dec_funct.support_vectors(j)); 
+                        res(i) -= dec_funct.alpha(j)*k(out_vectors(i), dec_funct.basis_vectors(j)); 
                    }
                }

@@ -454,9 +454,9 @@ namespace dlib
                    {
                        temp += b(j)*K_der(out_vectors(j), out_vectors(i));
                    }
-                    for (long j = 0; j < dec_funct.support_vectors.nr(); ++j)
+                    for (long j = 0; j < dec_funct.basis_vectors.nr(); ++j)
                    {
-                        temp -= dec_funct.alpha(j)*K_der(dec_funct.support_vectors(j), out_vectors(i) );
+                        temp -= dec_funct.alpha(j)*K_der(dec_funct.basis_vectors(j), out_vectors(i) );
                    }

                    // store the gradient for out_vectors[i] into result in the proper spot
@@ -529,7 +529,7 @@ namespace dlib
            {
                for (long c = 0; c < K.nc(); ++c)
                {
-                    K(r,c) = kernel(lisf[r], dec_funct.support_vectors(c));
+                    K(r,c) = kernel(lisf[r], dec_funct.basis_vectors(c));
                }
            }

@@ -556,7 +556,7 @@ namespace dlib


            // Do a final reoptimization of beta just to make sure it is optimal given the new
-            // set of support vectors.
+            // set of basis vectors.
            for (long r = 0; r < K_inv.nr(); ++r)
            {
                for (long c = 0; c < K_inv.nc(); ++c)
@@ -569,7 +569,7 @@ namespace dlib
            {
                for (long c = 0; c < K.nc(); ++c)
                {
-                    K(r,c) = kernel(out_vectors(r), dec_funct.support_vectors(c));
+                    K(r,c) = kernel(out_vectors(r), dec_funct.basis_vectors(c));
                }
            }


--- a/dlib/test/svm.cpp
+++ b/dlib/test/svm.cpp
@@ -378,9 +378,9 @@ namespace
        DLIB_TEST_MSG(mean(peg_cv) > 0.9, peg_cv);
        DLIB_TEST_MSG(mean(peg_c_cv) > 0.9, peg_c_cv);

-        const long num_sv = trainer.train(x,y).support_vectors.size();
+        const long num_sv = trainer.train(x,y).basis_vectors.size();
        print_spinner();
-        const long num_rv = rvm_trainer.train(x,y).support_vectors.size();
+        const long num_rv = rvm_trainer.train(x,y).basis_vectors.size();
        print_spinner();
        dlog << LDEBUG << "num sv: " << num_sv;
        dlog << LDEBUG << "num rv: " << num_rv;
@@ -394,14 +394,14 @@ namespace
        matrix<scalar_type> svm_reduced_error = test_binary_decision_function(df, x, y);
        print_spinner();
        dlog << LDEBUG << "svm reduced test error: " << svm_reduced_error;
-        dlog << LDEBUG << "svm reduced num sv: " << df.support_vectors.size();
+        dlog << LDEBUG << "svm reduced num sv: " << df.basis_vectors.size();
        DLIB_TEST(mean(svm_reduced_error) > 0.9);

        svm_cv = cross_validate_trainer(reduced(trainer,30), x,y, 4);
        dlog << LDEBUG << "svm reduced cv: " << svm_cv;
        DLIB_TEST_MSG(mean(svm_cv) > 0.9, svm_cv);

-        DLIB_TEST(df.support_vectors.size() == 19);
+        DLIB_TEST(df.basis_vectors.size() == 19);
        dlog << LINFO << "   end test_binary_classification()";
    }