Commit f6f744f7 authored by Davis King's avatar Davis King

A bit of code cleanup.

parent 07a7995a
...@@ -1840,7 +1840,15 @@ namespace dlib ...@@ -1840,7 +1840,15 @@ namespace dlib
return k*std::log(p) + (n-k)*std::log(1-p); return k*std::log(p) + (n-k)*std::log(1-p);
}; };
return ll(p1,k1,n1) + ll(p2,k2,n2) - ll(p,k1,n1) - ll(p,k2,n2); auto logll = ll(p1,k1,n1) + ll(p2,k2,n2) - ll(p,k1,n1) - ll(p,k2,n2);
// The basic statistic only tells you if the random variables are different. But
// it's nice to know which way they are different, i.e., which one is bigger. So
// stuff that information into the sign bit of the return value.
if (p1>=p2)
return logll;
else
return -logll;
} }
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
...@@ -1868,19 +1876,10 @@ namespace dlib ...@@ -1868,19 +1876,10 @@ namespace dlib
const auto notB_count = total_num_observations - B_count; const auto notB_count = total_num_observations - B_count;
// How likely is it that the odds of A happening is different when conditioned on // How likely is it that the odds of A happening is different when conditioned on
// whether or not B happened? // whether or not B happened?
const auto cor = binomial_random_vars_are_different( return binomial_random_vars_are_different(
AB_count, B_count, // A conditional on the presence of B AB_count, B_count, // A conditional on the presence of B
AnotB_count, notB_count // A conditional on the absence of B AnotB_count, notB_count // A conditional on the absence of B
); );
// Check if there are more or less co-occurrences than expected (if A and B were
// unrelated) and use that to give the return value its sign.
const double expected_AB_count_if_unrelated = (A_count/(double)total_num_observations)*B_count;
if (AB_count >= expected_AB_count_if_unrelated)
return cor;
else
return -cor;
} }
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
......
...@@ -124,10 +124,12 @@ namespace dlib ...@@ -124,10 +124,12 @@ namespace dlib
- You observed X1 to give k1 successes out of n1 trials. - You observed X1 to give k1 successes out of n1 trials.
- You observed X2 to give k2 successes out of n2 trials. - You observed X2 to give k2 successes out of n2 trials.
- This function performs a simple likelihood ratio test to determine if X1 and - This function performs a simple likelihood ratio test to determine if X1 and
X2 have the same parameter. The return value of this function will be 0 if X2 have the same parameter. The return value of this function will be:
they are probably the same or it will be some positive number otherwise. - Close to 0 if they are probably the same.
Moreover, the larger the return value the more likely it is that X1 and X2 - Larger than 0 if X1 probably has a higher "success" rate than X2.
have different distributions. - Smaller than 0 if X2 probably has a higher "success" rate than X1.
Moreover, the larger the absolute magnitude of the return value the more
likely it is that X1 and X2 have different distributions.
- For a discussion of the technique and applications see: - For a discussion of the technique and applications see:
Dunning, Ted. "Accurate methods for the statistics of surprise and Dunning, Ted. "Accurate methods for the statistics of surprise and
coincidence." Computational linguistics 19.1 (1993): 61-74. coincidence." Computational linguistics 19.1 (1993): 61-74.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment