Added the fix_nonzero_indexing() function.

--HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%404172

Added the fix_nonzero_indexing() function.
--HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%404172
a6b8cc9b · Davis King · ae79b13a · a6b8cc9b · a6b8cc9b
Commit a6b8cc9b authored Mar 18, 2011 by Davis King
Hide whitespace changes
Inline Side-by-side

Showing with 94 additions and 0 deletions

libsvm_io.h dlib/data_io/libsvm_io.h +76 -0

libsvm_io_abstract.h dlib/data_io/libsvm_io_abstract.h +18 -0

No files found.
--- a/dlib/data_io/libsvm_io.h
+++ b/dlib/data_io/libsvm_io.h
@@ -107,6 +107,82 @@ namespace dlib
    }

 // ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <typename sample_type, typename alloc>
+    typename enable_if<is_const_type<typename sample_type::value_type::first_type> >::type 
+    fix_nonzero_indexing (
+        std::vector<sample_type,alloc>& samples
+    )
+    {
+        typedef typename sample_type::value_type pair_type;
+        typedef typename impl::strip_const<typename pair_type::first_type>::type key_type;
+
+        if (samples.size() == 0)
+            return;
+
+        // figure out the min index value
+        key_type min_idx = samples[0].begin()->first;
+        for (unsigned long i = 0; i < samples.size(); ++i)
+            min_idx = std::min(min_idx, samples[i].begin()->first);
+
+        // Now adjust all the samples so that their min index value is zero.
+        if (min_idx != 0)
+        {
+            sample_type temp;
+            for (unsigned long i = 0; i < samples.size(); ++i)
+            {
+                // copy samples[i] into temp but make sure it has a min index of zero.
+                temp.clear();
+                typename sample_type::iterator j;
+                for (j = samples[i].begin(); j != samples[i].end(); ++j)
+                {
+                    temp.insert(temp.end(), std::make_pair(j->first-min_idx, j->second));
+                }
+
+                // replace the current sample with temp.
+                samples[i].swap(temp);
+            }
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+// If the "first" values in the std::pair objects are not const then we can modify them 
+// directly and that is what this version of fix_nonzero_indexing() does.
+    template <typename sample_type, typename alloc>
+    typename disable_if<is_const_type<typename sample_type::value_type::first_type> >::type 
+    fix_nonzero_indexing (
+        std::vector<sample_type,alloc>& samples
+    )
+    {
+        typedef typename sample_type::value_type pair_type;
+        typedef typename impl::strip_const<typename pair_type::first_type>::type key_type;
+
+        if (samples.size() == 0)
+            return;
+
+        // figure out the min index value
+        key_type min_idx = samples[0].begin()->first;
+        for (unsigned long i = 0; i < samples.size(); ++i)
+            min_idx = std::min(min_idx, samples[i].begin()->first);
+
+        // Now adjust all the samples so that their min index value is zero.
+        if (min_idx != 0)
+        {
+            for (unsigned long i = 0; i < samples.size(); ++i)
+            {
+                typename sample_type::iterator j;
+                for (j = samples[i].begin(); j != samples[i].end(); ++j)
+                {
+                    j->first -= min_idx;
+                }
+            }
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------

 // This is an overload for sparse vectors
    template <typename sample_type, typename label_type, typename alloc1, typename alloc2>

--- a/dlib/data_io/libsvm_io_abstract.h
+++ b/dlib/data_io/libsvm_io_abstract.h
@@ -129,6 +129,24 @@ namespace dlib
                          vector have a value of 0.
    !*/

+// ----------------------------------------------------------------------------------------
+
+    template <typename sample_type, typename alloc>
+    void fix_nonzero_indexing (
+        std::vector<sample_type,alloc>& samples
+    );
+    /*!
+        requires
+            - samples must only contain valid sparse vectors.  The definition of
+              a sparse vector can be found at the top of dlib/svm/sparse_vector_abstract.h
+        ensures
+            - Adjusts the sparse vectors in samples so that they are zero-indexed.  
+              Or in other words, assume the smallest used index value in any of the sparse 
+              vectors is N.  Then this function subtracts N from all the index values in 
+              samples.  This is useful, for example, if you load a libsvm formatted datafile 
+              with features indexed from 1 rather than 0 and you would like to fix this.
+    !*/
+
 // ----------------------------------------------------------------------------------------

 }