Made set_prior() work with sparse vectors.

721597f2 · Davis King · 05c0b373 · 721597f2 · 721597f2 · 721597f2
Commit 721597f2 authored May 24, 2014 by Davis King
Hide whitespace changes
Inline Side-by-side

Showing with 77 additions and 23 deletions

svm_c_linear_trainer.h dlib/svm/svm_c_linear_trainer.h +25 -10

oca.cpp dlib/test/oca.cpp +13 -13

svm_c_linear.cpp dlib/test/svm_c_linear.cpp +39 -0

No files found.
--- a/dlib/svm/svm_c_linear_trainer.h
+++ b/dlib/svm/svm_c_linear_trainer.h
@@ -44,7 +44,8 @@ namespace dlib
            const in_scalar_vector_type& labels_,
            const bool be_verbose_,
            const scalar_type eps_,
-            const unsigned long max_iter
+            const unsigned long max_iter,
+            const unsigned long dims_
        ) :
            samples(samples_),
            labels(labels_),
@@ -53,7 +54,8 @@ namespace dlib
            Cneg(C_neg/C),
            be_verbose(be_verbose_),
            eps(eps_),
-            max_iterations(max_iter)
+            max_iterations(max_iter),
+            dims(dims_)
        {
            dot_prods.resize(samples.size());
            is_first_call = true;
@@ -69,7 +71,7 @@ namespace dlib
        ) const 
        {
            // plus 1 for the bias term
-            return max_index_plus_one(samples) + 1;
+            return dims + 1;
        }
        virtual bool optimization_status (
@@ -300,6 +302,7 @@ namespace dlib
        const bool be_verbose;
        const scalar_type eps;
        const unsigned long max_iterations;
+        const unsigned long dims;
    };
 // ----------------------------------------------------------------------------------------
@@ -317,11 +320,12 @@ namespace dlib
        const in_scalar_vector_type& labels,
        const bool be_verbose,
        const scalar_type eps,
-        const unsigned long max_iterations
+        const unsigned long max_iterations,
+        const unsigned long dims
    )
    {
        return oca_problem_c_svm<matrix_type, in_sample_vector_type, in_scalar_vector_type>(
-            C_pos, C_neg, samples, labels, be_verbose, eps, max_iterations);
+            C_pos, C_neg, samples, labels, be_verbose, eps, max_iterations, dims);
    }
 // ----------------------------------------------------------------------------------------
@@ -478,7 +482,8 @@ namespace dlib
                << "\n\t this: " << this
                );
-            prior = join_cols(prior_.basis_vectors(0), mat((scalar_type)prior_.b));
+            prior = sparse_to_dense(prior_.basis_vectors(0));
+            prior_b = prior_.b;
            learn_nonnegative_weights = false;
            last_weight_1 = false;
        }
@@ -631,7 +636,7 @@ namespace dlib
                if (is_matrix<sample_type>::value)
                {
                    // make sure requires clause is not broken
-                    DLIB_CASSERT(num_dims+1 == (unsigned long)prior.size(),
+                    DLIB_CASSERT(num_dims == (unsigned long)prior.size(),
                        "\t decision_function svm_c_linear_trainer::train(x,y)"
                        << "\n\t The dimension of the training vectors must match the dimension of\n"
                        << "\n\t those used to create the prior."
@@ -639,15 +644,24 @@ namespace dlib
                        << "\n\t prior.size(): " << prior.size() 
                    );
                }
+                const unsigned long dims = std::max(num_dims, (unsigned long)prior.size());
+                // In the case of sparse sample vectors, it is possible that the input
+                // vector dimensionality is larger than the prior vector dimensionality.
+                // We need to check for this case and pad prior with zeros if it is the
+                // case.
+                matrix<scalar_type,0,1> prior_temp = join_cols(join_cols(prior, 
+                                                                         zeros_matrix<scalar_type>(dims-prior.size(),1)),
+                                                                         mat(prior_b));
                svm_objective = solver(
-                    make_oca_problem_c_svm<w_type>(Cpos, Cneg, x, y, verbose, eps, max_iterations), 
+                    make_oca_problem_c_svm<w_type>(Cpos, Cneg, x, y, verbose, eps, max_iterations, dims), 
                    w,
-                    prior);
+                    prior_temp);
            }
            else
            {
                svm_objective = solver(
-                    make_oca_problem_c_svm<w_type>(Cpos, Cneg, x, y, verbose, eps, max_iterations), 
+                    make_oca_problem_c_svm<w_type>(Cpos, Cneg, x, y, verbose, eps, max_iterations, num_dims), 
                    w,
                    num_nonnegative,
                    force_weight_1_idx);
@@ -678,6 +692,7 @@ namespace dlib
        bool learn_nonnegative_weights;
        bool last_weight_1;
        matrix<scalar_type,0,1> prior;
+        scalar_type prior_b;
    }; 
 // ----------------------------------------------------------------------------------------

--- a/dlib/test/oca.cpp
+++ b/dlib/test/oca.cpp
@@ -66,42 +66,42 @@ namespace
            oca solver;
            // test the version without a non-negativity constraint on w.
-            solver(make_oca_problem_c_svm<w_type>(2.0, 3.0, mat(x), mat(y), false, 1e-12, 40), w, 0);
+            solver(make_oca_problem_c_svm<w_type>(2.0, 3.0, mat(x), mat(y), false, 1e-12, 40, max_index_plus_one(x)), w, 0);
            dlog << LINFO << trans(w);
            true_w = -0.5, 0.5, 0;
            dlog << LINFO << "error: "<< max(abs(w-true_w));
            DLIB_TEST(max(abs(w-true_w)) < 1e-10);
            w_type prior = true_w;
-            solver(make_oca_problem_c_svm<w_type>(20.0, 30.0, mat(x), mat(y), false, 1e-12, 40), w, prior);
+            solver(make_oca_problem_c_svm<w_type>(20.0, 30.0, mat(x), mat(y), false, 1e-12, 40, max_index_plus_one(x)), w, prior);
            dlog << LINFO << trans(w);
            true_w = -0.5, 0.5, 0;
            dlog << LINFO << "error: "<< max(abs(w-true_w));
            DLIB_TEST(max(abs(w-true_w)) < 1e-10);
            prior = 0,0,0;
-            solver(make_oca_problem_c_svm<w_type>(20.0, 30.0, mat(x), mat(y), false, 1e-12, 40), w, prior);
+            solver(make_oca_problem_c_svm<w_type>(20.0, 30.0, mat(x), mat(y), false, 1e-12, 40, max_index_plus_one(x)), w, prior);
            dlog << LINFO << trans(w);
            true_w = -0.5, 0.5, 0;
            dlog << LINFO << "error: "<< max(abs(w-true_w));
            DLIB_TEST(max(abs(w-true_w)) < 1e-10);
            prior = -1,1,0;
-            solver(make_oca_problem_c_svm<w_type>(20.0, 30.0, mat(x), mat(y), false, 1e-12, 40), w, prior);
+            solver(make_oca_problem_c_svm<w_type>(20.0, 30.0, mat(x), mat(y), false, 1e-12, 40, max_index_plus_one(x)), w, prior);
            dlog << LINFO << trans(w);
            true_w = -1.0, 1.0, 0;
            dlog << LINFO << "error: "<< max(abs(w-true_w));
            DLIB_TEST(max(abs(w-true_w)) < 1e-10);
            prior = -0.2,0.2,0;
-            solver(make_oca_problem_c_svm<w_type>(20.0, 30.0, mat(x), mat(y), false, 1e-12, 40), w, prior);
+            solver(make_oca_problem_c_svm<w_type>(20.0, 30.0, mat(x), mat(y), false, 1e-12, 40, max_index_plus_one(x)), w, prior);
            dlog << LINFO << trans(w);
            true_w = -0.5, 0.5, 0;
            dlog << LINFO << "error: "<< max(abs(w-true_w));
            DLIB_TEST(max(abs(w-true_w)) < 1e-10);
            prior = -10.2,-1,0;
-            solver(make_oca_problem_c_svm<w_type>(20.0, 30.0, mat(x), mat(y), false, 1e-12, 40), w, prior);
+            solver(make_oca_problem_c_svm<w_type>(20.0, 30.0, mat(x), mat(y), false, 1e-12, 40, max_index_plus_one(x)), w, prior);
            dlog << LINFO << trans(w);
            true_w = -10.2, -1.0, 0;
            dlog << LINFO << "error: "<< max(abs(w-true_w));
@@ -110,7 +110,7 @@ namespace
            print_spinner();
            // test the version with a non-negativity constraint on w.
-            solver(make_oca_problem_c_svm<w_type>(2.0, 3.0, mat(x), mat(y), false, 1e-12, 40), w, 9999);
+            solver(make_oca_problem_c_svm<w_type>(2.0, 3.0, mat(x), mat(y), false, 1e-12, 40, max_index_plus_one(x)), w, 9999);
            dlog << LINFO << trans(w);
            true_w = 0, 1, 0;
            dlog << LINFO << "error: "<< max(abs(w-true_w));
@@ -126,7 +126,7 @@ namespace
            print_spinner();
            // test the version with a non-negativity constraint on w.
-            solver(make_oca_problem_c_svm<w_type>(2.0, 3.0, mat(x), mat(y), false, 1e-12, 40), w, 2);
+            solver(make_oca_problem_c_svm<w_type>(2.0, 3.0, mat(x), mat(y), false, 1e-12, 40, max_index_plus_one(x)), w, 2);
            dlog << LINFO << trans(w);
            true_w = 0, 1, 0;
            dlog << LINFO << "error: "<< max(abs(w-true_w));
@@ -136,7 +136,7 @@ namespace
            // test the version with a non-negativity constraint on w.
-            solver(make_oca_problem_c_svm<w_type>(2.0, 3.0, mat(x), mat(y), false, 1e-12, 40), w, 1);
+            solver(make_oca_problem_c_svm<w_type>(2.0, 3.0, mat(x), mat(y), false, 1e-12, 40, max_index_plus_one(x)), w, 1);
            dlog << LINFO << trans(w);
            true_w = 0, 1, 0;
            dlog << LINFO << "error: "<< max(abs(w-true_w));
@@ -151,7 +151,7 @@ namespace
            y.push_back(+1);
-            solver(make_oca_problem_c_svm<w_type>(2.0, 3.0, mat(x), mat(y), false, 1e-12, 40), w, 0);
+            solver(make_oca_problem_c_svm<w_type>(2.0, 3.0, mat(x), mat(y), false, 1e-12, 40, max_index_plus_one(x)), w, 0);
            dlog << LINFO << trans(w);
            true_w = 0.5, -0.5, 0;
            dlog << LINFO << "error: "<< max(abs(w-true_w));
@@ -159,7 +159,7 @@ namespace
            print_spinner();
-            solver(make_oca_problem_c_svm<w_type>(2.0, 3.0, mat(x), mat(y), false, 1e-12, 40), w, 1);
+            solver(make_oca_problem_c_svm<w_type>(2.0, 3.0, mat(x), mat(y), false, 1e-12, 40, max_index_plus_one(x)), w, 1);
            dlog << LINFO << trans(w);
            true_w = 0.5, -0.5, 0;
            dlog << LINFO << "error: "<< max(abs(w-true_w));
@@ -167,7 +167,7 @@ namespace
            print_spinner();
-            solver(make_oca_problem_c_svm<w_type>(2.0, 3.0, mat(x), mat(y), false, 1e-12, 40), w, 2);
+            solver(make_oca_problem_c_svm<w_type>(2.0, 3.0, mat(x), mat(y), false, 1e-12, 40, max_index_plus_one(x)), w, 2);
            dlog << LINFO << trans(w);
            true_w = 1, 0, 0;
            dlog << LINFO << "error: "<< max(abs(w-true_w));
@@ -175,7 +175,7 @@ namespace
            print_spinner();
-            solver(make_oca_problem_c_svm<w_type>(2.0, 3.0, mat(x), mat(y), false, 1e-12, 40), w, 5);
+            solver(make_oca_problem_c_svm<w_type>(2.0, 3.0, mat(x), mat(y), false, 1e-12, 40, max_index_plus_one(x)), w, 5);
            dlog << LINFO << trans(w);
            true_w = 1, 0, 0;
            dlog << LINFO << "error: "<< max(abs(w-true_w));

--- a/dlib/test/svm_c_linear.cpp
+++ b/dlib/test/svm_c_linear.cpp
@@ -70,6 +70,44 @@ namespace
        DLIB_TEST(df.basis_vectors(0)(2) > 0);
    }
+    void run_prior_sparse_test()
+    {
+        typedef std::map<unsigned long,double> sample_type;
+        typedef sparse_linear_kernel<sample_type> kernel_type;
+        svm_c_linear_trainer<kernel_type> trainer;
+        std::vector<sample_type> samples;
+        std::vector<double> labels;
+        sample_type samp;
+        samp[0] = 1; samples.push_back(samp); labels.push_back(+1); samp.clear();
+        samp[1] = 1; samples.push_back(samp); labels.push_back(-1); samp.clear();
+        trainer.set_c(10);
+        decision_function<kernel_type> df = trainer.train(samples, labels);
+        trainer.set_prior(df);
+        samples.clear();
+        labels.clear();
+        samp[2] = 1; samples.push_back(samp); labels.push_back(+1); samp.clear();
+        samp[1] = 1; samples.push_back(samp); labels.push_back(-1); samp.clear();
+        df = trainer.train(samples, labels);
+        matrix<double,1,2> rs = test_binary_decision_function(df, samples, labels);
+        dlog << LINFO << rs;
+        DLIB_TEST(rs(0) == 1);
+        DLIB_TEST(rs(1) == 1);
+        matrix<double,0,1> w = sparse_to_dense(df.basis_vectors(0));
+        dlog << LINFO << trans(w);
+        DLIB_TEST(w(0) > 0.1);
+        DLIB_TEST(w(1) < -0.1);
+        DLIB_TEST(w(2) > 0.1);
+    }
    void get_simple_points (
        std::vector<sample_type>& samples,
        std::vector<double>& labels
@@ -255,6 +293,7 @@ namespace
            test_dense();
            test_sparse();
            run_prior_test();
+            run_prior_sparse_test();
            // test mixed sparse and dense dot products
            {