updated the docs

--HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%403601

updated the docs
--HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%403601
09624611 · Davis King · 49dfe071 · 09624611 · 09624611 · 09624611
Commit 09624611 authored May 13, 2010 by Davis King
5 changed files
--- a/docs/docs/algorithms.xml
+++ b/docs/docs/algorithms.xml
@@ -30,6 +30,7 @@
         <item>running_stats</item> 
         <item>running_covariance</item> 
         <item>random_subset_selector</item> 
+         <item>randomly_subsample</item> 
         <item nolink="true">
            <name>Quantum Computing</name>
            <sub>
@@ -332,6 +333,19 @@
      </component>
      
      
+   <!-- ************************************************************************* -->
+
+      <component>
+         <name>randomly_subsample</name>
+         <file>dlib/statistics.h</file>
+         <spec_file>dlib/statistics/random_subset_selector_abstract.h</spec_file>
+         <description>
+            This is a pair of convenience functions for 
+            creating <a href="#random_subset_selector">random subsets</a> of data.
+         </description>
+                                 
+      </component>
+      
   <!-- ************************************************************************* -->

      <component>

--- a/docs/docs/main_menu.xml
+++ b/docs/docs/main_menu.xml
@@ -123,6 +123,10 @@
                  <name>Empirical_Kernel_Map</name>
                  <link>empirical_kernel_map_ex.cpp.html</link>
               </item>
+               <item>
+                  <name>Linear_Manifold_Regularizer</name>
+                  <link>linear_manifold_regularizer_ex.cpp.html</link>
+               </item>
               <item>
                  <name>Kernel_RLS_Regression</name>
                  <link>krls_ex.cpp.html</link>

--- a/docs/docs/ml.xml
+++ b/docs/docs/ml.xml
@@ -67,6 +67,7 @@ Davis E. King. <a href="http://www.jmlr.org/papers/volume10/king09a/king09a.pdf"
         <item>krls</item>
         <item>kcentroid</item>
         <item>linearly_independent_subset_finder</item>
+         <item>linear_manifold_regularizer</item>
         <item>empirical_kernel_map</item>
         <item>kkmeans</item>
         <item>svm_nu_trainer</item> 
@@ -149,6 +150,19 @@ Davis E. King. <a href="http://www.jmlr.org/papers/volume10/king09a/king09a.pdf"
               <link>dlib/svm/sparse_vector_abstract.h.html#sparse_vectors</link>
         </item>

+         
+         <item nolink="true">
+            <name>manifold_regularization_tools</name>
+            <sub>
+               <item>sample_pair</item>
+               <item>find_percent_shortest_edges_randomly</item>
+               <item>find_k_nearest_neighbors</item>
+               <item>squared_euclidean_distance</item>
+               <item>use_weights_of_one</item>
+               <item>use_gaussian_weights</item>
+            </sub>
+         </item>
+
      </section>

    </top>  
@@ -162,6 +176,104 @@ Davis E. King. <a href="http://www.jmlr.org/papers/volume10/king09a/king09a.pdf"
   
   <!-- ************************************************************************* -->
      
+      <component>
+         <name>use_gaussian_weights</name>
+         <file>dlib/manifold_regularization.h</file>
+         <spec_file link="true">dlib/manifold_regularization/function_objects_abstract.h</spec_file>
+         <description>
+                This is a simple function object that takes a single argument
+                which should be an object similar to <a href="#sample_pair">sample_pair</a>.  
+         </description>
+         <examples>
+            <example>linear_manifold_regularizer_ex.cpp.html</example>
+         </examples>
+
+      </component>
+
+   <!-- ************************************************************************* -->
+      
+      <component>
+         <name>use_weights_of_one</name>
+         <file>dlib/manifold_regularization.h</file>
+         <spec_file link="true">dlib/manifold_regularization/function_objects_abstract.h</spec_file>
+         <description>
+                This is a simple function object that takes a single argument
+                and always returns 1 
+         </description>
+
+      </component>
+
+   <!-- ************************************************************************* -->
+      
+      
+      <component>
+         <name>squared_euclidean_distance</name>
+         <file>dlib/manifold_regularization.h</file>
+         <spec_file link="true">dlib/manifold_regularization/function_objects_abstract.h</spec_file>
+         <description>
+                This is a simple function object that computes squared euclidean distance
+                between two <a href="containers.html#matrix">matrix</a> objects.
+         </description>
+         <examples>
+            <example>linear_manifold_regularizer_ex.cpp.html</example>
+         </examples>
+
+      </component>
+
+   <!-- ************************************************************************* -->
+      
+      <component>
+         <name>find_k_nearest_neighbors</name>
+         <file>dlib/manifold_regularization.h</file>
+         <spec_file link="true">dlib/manifold_regularization/graph_creation_abstract.h</spec_file>
+         <description>
+            This is a function which finds all the k nearest neighbors of a set of points and outputs
+            the result as a vector of <a href="#sample_pair">sample_pair</a> objects.  It takes O(n^2) where
+            n is the number of data samples.  A faster approximate version is provided by 
+            <a href="#find_percent_shortest_edges_randomly">find_percent_shortest_edges_randomly</a>.
+         </description>
+
+      </component>
+
+   <!-- ************************************************************************* -->
+
+      <component>
+         <name>find_percent_shortest_edges_randomly</name>
+         <file>dlib/manifold_regularization.h</file>
+         <spec_file link="true">dlib/manifold_regularization/graph_creation_abstract.h</spec_file>
+         <description>
+            This function is a simple approximate form <a href="#find_k_nearest_neighbors">find_k_nearest_neighbors</a>.
+            Instead of checking all possible edges it randomly samples a large number of them and
+            then returns the best ones.  
+         </description>
+
+         <examples>
+            <example>linear_manifold_regularizer_ex.cpp.html</example>
+         </examples>
+                                 
+      </component>
+
+   <!-- ************************************************************************* -->
+
+      
+      <component>
+         <name>sample_pair</name>
+         <file>dlib/manifold_regularization.h</file>
+         <spec_file link="true">dlib/manifold_regularization/sample_pair_abstract.h</spec_file>
+         <description>
+            This object is intended to represent an edge in an undirected graph 
+                that has data samples in its vertices.  
+         </description>
+
+         <examples>
+            <example>linear_manifold_regularizer_ex.cpp.html</example>
+         </examples>
+                                 
+      </component>
+
+   <!-- ************************************************************************* -->
+
+      
      <component>
         <name>find_clusters_using_kmeans</name>
         <file>dlib/svm.h</file>
@@ -422,6 +534,68 @@ Davis E. King. <a href="http://www.jmlr.org/papers/volume10/king09a/king09a.pdf"
      </component>
      
      
+   <!-- ************************************************************************* -->
+      
+      <component>
+         <name>linear_manifold_regularizer</name>
+         <file>dlib/manifold_regularization.h</file>
+         <spec_file link="true">dlib/manifold_regularization/linear_manifold_regularizer_abstract.h</spec_file>
+         <description>
+            <p>
+                Many learning algorithms attempt to minimize a loss function that,
+                at a high level, looks like this:   
+<pre>
+   loss(w) == complexity + training_set_error
+</pre>
+            </p>
+
+               <p>
+                The idea is to find the set of parameters, w, that gives low error on 
+                your training data but also is not "complex" according to some particular
+                measure of complexity.  This strategy of penalizing complexity is 
+                usually called regularization.
+               </p>
+
+               <p>
+                In the above setting all the training data consists of labeled samples.  
+                However, it would be nice to be able to benefit from unlabeled data 
+                (see the <a href="linear_manifold_regularizer_ex.cpp.html">example program</a> 
+                for this object for an example where unlabeled 
+                data is useful).  The idea of manifold regularization is to extract useful 
+                information from unlabeled data by defining which data samples are "close" 
+                to each other (perhaps by using their 3 <a href="#find_k_nearest_neighbors">nearest neighbors</a>) 
+                and then adding a term to the loss function that penalizes any decision rule which produces 
+                different output on data samples that we have designated as being close.
+               </p>
+                
+                <p>
+                It turns out that it is possible to turn these manifold regularized loss
+                functions into the normal form shown above by applying a certain kind
+                of processing to all of our data samples.  Once this is done we can use
+                a normal learning algorithm, such as the <a href="#svm_c_linear_trainer">svm_c_linear_trainer</a>, 
+                on just the labeled data samples and obtain the same output as the manifold regularized
+                learner would have produced.  Therefore, the linear_manifold_regularizer is 
+                a tool for creating this preprocessing transformation.  In particular, the
+                transformation is linear.  That is, it is just a matrix you multiply with
+                all your samples.
+                </p>
+
+               <p>
+                For a more detailed discussion of this topic you should consult the following
+                paper.  In particular, see section 4.2.  This object computes the inverse T 
+                matrix described in that section.
+               <blockquote>
+                    Linear Manifold Regularization for Large Scale Semi-supervised Learning
+                    by Vikas Sindhwani, Partha Niyogi, and Mikhail Belkin
+               </blockquote>
+               </p>
+         </description>
+         <examples>
+            <example>linear_manifold_regularizer_ex.cpp.html</example>
+         </examples>
+      </component>
+      
+      
   <!-- ************************************************************************* -->
      
      <component>
@@ -457,12 +631,13 @@ Davis E. King. <a href="http://www.jmlr.org/papers/volume10/king09a/king09a.pdf"
                few thousand samples then you can just use all of them as basis samples.  
                Alternatively, the 
                <a href="#linearly_independent_subset_finder">linearly_independent_subset_finder</a> 
-                often works well for selecting a basis set.  Some people also find that picking a 
-                <a href="algorithms.html#random_subset_selector">random subset</a> works fine.
+                often works well for selecting a basis set.  I also find that picking a 
+                <a href="algorithms.html#random_subset_selector">random subset</a> typically works well.
            </p>
         </description>
         <examples>
            <example>empirical_kernel_map_ex.cpp.html</example>
+            <example>linear_manifold_regularizer_ex.cpp.html</example>
         </examples>
      </component>
      
@@ -753,6 +928,9 @@ Davis E. King. <a href="http://www.jmlr.org/papers/volume10/king09a/king09a.pdf"
               this object are created using the 
               <a href="#empirical_kernel_map">empirical_kernel_map</a>.
         </description>
+         <examples>
+            <example>linear_manifold_regularizer_ex.cpp.html</example>
+         </examples>
                                 
      </component>
      

--- a/docs/docs/release_notes.xml
+++ b/docs/docs/release_notes.xml
@@ -15,6 +15,8 @@ New Stuff:
   - Added the svm_c_ekm_trainer 
   - Added the sum_rows(), sum_cols(), join_rows(), join_cols(), reshape(), 
     and pointer_to_matrix() functions.
+   - Added the linear_manifold_regularizer and some supporting tools.
+   - Added the randomly_subsample() function.

 Non-Backwards Compatible Changes:


--- a/docs/docs/term_index.xml
+++ b/docs/docs/term_index.xml
@@ -111,6 +111,7 @@
         <term file="algorithms.html" name="running_stats"/>
         <term file="algorithms.html" name="running_covariance"/>
         <term file="algorithms.html" name="random_subset_selector"/>
+         <term file="algorithms.html" name="randomly_subsample"/>

         <term file="dlib/svm/pegasos_abstract.h.html" name="replicate_settings"/>
         <term file="ml.html" name="mlp"/>
@@ -121,6 +122,19 @@
         <term link="ml.html#mlp" name="neural network"/>
         <term file="ml.html" name="svm_pegasos"/>
         <term file="ml.html" name="empirical_kernel_map"/>
+
+         <term file="ml.html" name="sample_pair"/>
+         <term file="dlib/manifold_regularization/sample_pair_abstract.h.html" name="order_by_index"/>
+         <term file="dlib/manifold_regularization/sample_pair_abstract.h.html" name="order_by_distance"/>
+         <term file="ml.html" name="find_percent_shortest_edges_randomly"/>
+         <term file="ml.html" name="find_k_nearest_neighbors"/>
+         <term file="dlib/manifold_regularization/graph_creation_abstract.h.html" name="contains_duplicate_pairs"/>
+         <term file="dlib/manifold_regularization/graph_creation_abstract.h.html" name="max_index_value_plus_one"/>
+         <term file="ml.html" name="linear_manifold_regularizer"/>
+         <term file="ml.html" name="squared_euclidean_distance"/>
+         <term file="ml.html" name="use_weights_of_one"/>
+         <term file="ml.html" name="use_gaussian_weights"/>
+
         <term file="dlib/svm/empirical_kernel_map_abstract.h.html" name="empirical_kernel_map_error"/>
         <term file="dlib/svm/empirical_kernel_map_abstract.h.html" name="convert_to_decision_function"/>
         <term file="ml.html" name="kernel_matrix"/>