A bit of code cleanup.

f6f744f7 · Davis King · 07a7995a · f6f744f7 · f6f744f7
Commit f6f744f7 authored Jan 31, 2018 by Davis King
Hide whitespace changes
Inline Side-by-side

Showing with 16 additions and 15 deletions

statistics.h dlib/statistics/statistics.h +10 -11

statistics_abstract.h dlib/statistics/statistics_abstract.h +6 -4

No files found.
--- a/dlib/statistics/statistics.h
+++ b/dlib/statistics/statistics.h
@@ -1840,7 +1840,15 @@ namespace dlib
            return k*std::log(p) + (n-k)*std::log(1-p);
        };
-        return ll(p1,k1,n1) + ll(p2,k2,n2) - ll(p,k1,n1) - ll(p,k2,n2); 
+        auto logll = ll(p1,k1,n1) + ll(p2,k2,n2) - ll(p,k1,n1) - ll(p,k2,n2); 
+        // The basic statistic only tells you if the random variables are different.  But
+        // it's nice to know which way they are different, i.e., which one is bigger.  So
+        // stuff that information into the sign bit of the return value.
+        if (p1>=p2)
+            return logll;
+        else
+            return -logll;
    }
 // ----------------------------------------------------------------------------------------
@@ -1868,19 +1876,10 @@ namespace dlib
        const auto notB_count = total_num_observations - B_count;
        // How likely is it that the odds of A happening is different when conditioned on
        // whether or not B happened?
-        const auto cor =  binomial_random_vars_are_different( 
+        return binomial_random_vars_are_different( 
            AB_count, B_count,      // A conditional on the presence of B
            AnotB_count, notB_count // A conditional on the absence of B 
        );
-        // Check if there are more or less co-occurrences than expected (if A and B were
-        // unrelated) and use that to give the return value its sign.
-        const double expected_AB_count_if_unrelated = (A_count/(double)total_num_observations)*B_count;
-        if (AB_count >= expected_AB_count_if_unrelated)
-            return cor;
-        else
-            return -cor;
    }
 // ----------------------------------------------------------------------------------------

--- a/dlib/statistics/statistics_abstract.h
+++ b/dlib/statistics/statistics_abstract.h
@@ -124,10 +124,12 @@ namespace dlib
                - You observed X1 to give k1 successes out of n1 trials.
                - You observed X2 to give k2 successes out of n2 trials.
            - This function performs a simple likelihood ratio test to determine if X1 and
-              X2 have the same parameter.  The return value of this function will be 0 if
+              X2 have the same parameter.  The return value of this function will be:
-              they are probably the same or it will be some positive number otherwise.
+                - Close to 0 if they are probably the same.
-              Moreover, the larger the return value the more likely it is that X1 and X2
+                - Larger than 0 if X1 probably has a higher "success" rate than X2. 
-              have different distributions.
+                - Smaller than 0 if X2 probably has a higher "success" rate than X1. 
+              Moreover, the larger the absolute magnitude of the return value the more
+              likely it is that X1 and X2 have different distributions.
            - For a discussion of the technique and applications see:
                  Dunning, Ted. "Accurate methods for the statistics of surprise and
                  coincidence." Computational linguistics 19.1 (1993): 61-74.