Commit a75645b1 authored by Steve Taylor's avatar Steve Taylor

Added a numerical constants file numeric_constants.h. Expanded the

running_stats object in statistics.h by including two functions that
compute the unbiased empirical skewness and kurtosis of a set
of real numbers.  Added unit tests for these functions in statistics.cpp.
parent 5977b0b8
//Copyright (C) 2013 Steve Taylor (steve98654@gmail.com), Davis E. King
//License: Boost Software License. See LICENSE.txt for full license.
#ifndef DLIB_NUMERIC_CONSTANTs_H_
#define DLIB_NUMERIC_CONSTANTs_H_
namespace dlib
{
// pi -- Pi
const double pi = 3.1415926535897932385;
// e -- Euler's Constant
const double e = 2.7182818284590452354;
// sqrt_2 -- the square root of 2
const double sqrt_2 = 1.4142135623730950488;
// sqrt_3 -- the square root of 3
const double sqrt_3 = 1.7320508075688772935;
// light_spd -- the speed of light in vacuum in meters per second
const double light_spd = 2.99792458e8;
// newton_G -- Newton's gravitational constant (in metric units of m^3/(kg*s^2))
const double newton_G = 6.67384e-11;
// planck_cst -- Planck's constant (in units of Joules * seconds)
const double planck_cst = 6.62606957e-34;
}
#endif //DLIB_NUMERIC_CONSTANTs_H_
// Copyright (C) 2008 Davis E. King (davis@dlib.net) // Copyright (C) 2008 Davis E. King (davis@dlib.net), Steve Taylor
// License: Boost Software License See LICENSE.txt for the full license. // License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_STATISTICs_ #ifndef DLIB_STATISTICs_
#define DLIB_STATISTICs_ #define DLIB_STATISTICs_
...@@ -36,6 +36,9 @@ namespace dlib ...@@ -36,6 +36,9 @@ namespace dlib
{ {
sum = 0; sum = 0;
sum_sqr = 0; sum_sqr = 0;
sum_cub = 0;
sum_four = 0;
n = 0; n = 0;
maximum_n = std::numeric_limits<T>::max(); maximum_n = std::numeric_limits<T>::max();
min_value = std::numeric_limits<T>::infinity(); min_value = std::numeric_limits<T>::infinity();
...@@ -55,6 +58,8 @@ namespace dlib ...@@ -55,6 +58,8 @@ namespace dlib
{ {
sum += val; sum += val;
sum_sqr += val*val; sum_sqr += val*val;
sum_cub += cubed(val);
sum_four += quaded(val);
if (val < min_value) if (val < min_value)
min_value = val; min_value = val;
...@@ -145,6 +150,43 @@ namespace dlib ...@@ -145,6 +150,43 @@ namespace dlib
return std::sqrt(variance()); return std::sqrt(variance());
} }
T skewness (
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(current_n() > 2,
"\tT running_stats::skewness"
<< "\n\tyou have to add some numbers to this object first"
<< "\n\tthis: " << this
);
T temp = 1/n;
T temp1 = std::sqrt(n*(n-1))/(n-2);
temp = temp1*temp*(sum_cub - 3*sum_sqr*sum*temp + 2*cubed(sum)*temp*temp)/
(std::sqrt(std::pow(temp*(sum_sqr-sum*sum*temp),3)));
return temp;
}
T ex_kurtosis (
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(current_n() > 3,
"\tT running_stats::kurtosis"
<< "\n\tyou have to add some numbers to this object first"
<< "\n\tthis: " << this
);
T temp = 1/n;
T m4 = temp*(sum_four - 4*sum_cub*sum*temp+6*sum_sqr*sum*sum*temp*temp
-3*quaded(sum)*cubed(temp));
T m2 = temp*(sum_sqr-sum*sum*temp);
temp = (n-1)*((n+1)*m4/(m2*m2)-3*(n-1))/((n-2)*(n-3));
return temp;
}
T scale ( T scale (
const T& val const T& val
) const ) const
...@@ -175,6 +217,8 @@ namespace dlib ...@@ -175,6 +217,8 @@ namespace dlib
temp.sum += rhs.sum; temp.sum += rhs.sum;
temp.sum_sqr += rhs.sum_sqr; temp.sum_sqr += rhs.sum_sqr;
temp.sum_cub += rhs.sum_cub;
temp.sum_four += rhs.sum_four;
temp.n += rhs.n; temp.n += rhs.n;
temp.min_value = std::min(rhs.min_value, min_value); temp.min_value = std::min(rhs.min_value, min_value);
temp.max_value = std::max(rhs.max_value, max_value); temp.max_value = std::max(rhs.max_value, max_value);
...@@ -196,10 +240,15 @@ namespace dlib ...@@ -196,10 +240,15 @@ namespace dlib
private: private:
T sum; T sum;
T sum_sqr; T sum_sqr;
T sum_cub;
T sum_four;
T n; T n;
T maximum_n; T maximum_n;
T min_value; T min_value;
T max_value; T max_value;
T cubed (const T& val) const {return val*val*val; }
T quaded (const T& val) const {return val*val*val*val; }
}; };
template <typename T> template <typename T>
...@@ -208,8 +257,13 @@ namespace dlib ...@@ -208,8 +257,13 @@ namespace dlib
std::ostream& out std::ostream& out
) )
{ {
int version = 2;
serialize(version, out);
serialize(item.sum, out); serialize(item.sum, out);
serialize(item.sum_sqr, out); serialize(item.sum_sqr, out);
serialize(item.sum_cub, out);
serialize(item.sum_four, out);
serialize(item.n, out); serialize(item.n, out);
serialize(item.maximum_n, out); serialize(item.maximum_n, out);
serialize(item.min_value, out); serialize(item.min_value, out);
...@@ -222,8 +276,15 @@ namespace dlib ...@@ -222,8 +276,15 @@ namespace dlib
std::istream& in std::istream& in
) )
{ {
int version = 0;
deserialize(version, in);
if (version != 2)
throw dlib::serialization_error("Unexpected version number found while deserializing dlib::running_stats object.");
deserialize(item.sum, in); deserialize(item.sum, in);
deserialize(item.sum_sqr, in); deserialize(item.sum_sqr, in);
deserialize(item.sum_cub, in);
deserialize(item.sum_four, in);
deserialize(item.n, in); deserialize(item.n, in);
deserialize(item.maximum_n, in); deserialize(item.maximum_n, in);
deserialize(item.min_value, in); deserialize(item.min_value, in);
......
// Copyright (C) 2008 Davis E. King (davis@dlib.net) // Copyright (C) 2008 Davis E. King (davis@dlib.net), Steve Taylor
// License: Boost Software License See LICENSE.txt for the full license. // License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_STATISTICs_ABSTRACT_ #undef DLIB_STATISTICs_ABSTRACT_
#ifdef DLIB_STATISTICs_ABSTRACT_ #ifdef DLIB_STATISTICs_ABSTRACT_
...@@ -121,8 +121,8 @@ namespace dlib ...@@ -121,8 +121,8 @@ namespace dlib
- current_n() == 0 - current_n() == 0
WHAT THIS OBJECT REPRESENTS WHAT THIS OBJECT REPRESENTS
This object represents something that can compute the running mean and This object represents something that can compute the running mean,
variance of a stream of real numbers. variance, skewness, and excess kurtosis of a stream of real numbers.
As this object accumulates more and more numbers it will be the case As this object accumulates more and more numbers it will be the case
that each new number impacts the current mean and variance estimate that each new number impacts the current mean and variance estimate
...@@ -184,10 +184,13 @@ namespace dlib ...@@ -184,10 +184,13 @@ namespace dlib
); );
/*! /*!
ensures ensures
- updates the mean and variance stored in this object so that - updates the mean, variance, skewness, and kurtosis stored in this object
the new value is factored into them so that the new value is factored into them.
- #mean() == mean()*current_n()/(current_n()+1) + val/(current_n()+1) - #mean() == mean()*current_n()/(current_n()+1) + val/(current_n()+1).
- #variance() == the updated variance that takes this new value into account (i.e. the updated mean value that takes the new value into account)
- #variance() == the updated variance that takes this new value into account.
- #skewness() == the updated skewness that takes this new value into account.
- #ex_kurtosis() == the updated kurtosis that takes this new value into account.
- if (current_n() < max_n()) then - if (current_n() < max_n()) then
- #current_n() == current_n() + 1 - #current_n() == current_n() + 1
- else - else
...@@ -222,6 +225,26 @@ namespace dlib ...@@ -222,6 +225,26 @@ namespace dlib
presented to this object so far. presented to this object so far.
!*/ !*/
T skewness (
) const;
/*!
requires
- current_n() > 2
ensures
- returns the unbiased sample skewness of all the values presented
to this object so far.
!*/
T ex_kurtosis(
) const;
/*!
requires
- current_n() > 3
ensures
- returns the unbiased sample kurtosis of all the values presented
to this object so far.
!*/
T max ( T max (
) const; ) const;
/*! /*!
......
...@@ -10,6 +10,8 @@ ...@@ -10,6 +10,8 @@
#include <dlib/rand.h> #include <dlib/rand.h>
#include <dlib/svm.h> #include <dlib/svm.h>
#include <algorithm> #include <algorithm>
#include <dlib/matrix.h>
#include <cmath>
#include "tester.h" #include "tester.h"
...@@ -239,6 +241,90 @@ namespace ...@@ -239,6 +241,90 @@ namespace
} }
void test_skewness_and_kurtosis_1()
{
dlib::rand rnum;
running_stats<double> rs1;
double tp = 0;
rnum.set_seed("DlibRocks");
for(int i = 0; i< 1000000; i++)
{
tp = rnum.get_random_gaussian();
rs1.add(tp);
}
// check the unbiased skewness and excess kurtosis of one million Gaussian
// draws are both near zero.
DLIB_TEST(abs(rs1.skewness()) < 0.1);
DLIB_TEST(abs(rs1.ex_kurtosis()) < 0.1);
}
void test_skewness_and_kurtosis_2()
{
string str = "DlibRocks";
for(int j = 0; j<5 ; j++)
{
matrix<double,1,100000> dat;
dlib::rand rnum;
running_stats<double> rs1;
double tp = 0;
double n = 100000;
double xb = 0;
double sknum = 0;
double skdenom = 0;
double unbi_skew = 0;
double exkurnum = 0;
double exkurdenom = 0;
double unbi_exkur = 0;
random_shuffle(str.begin(), str.end());
rnum.set_seed(str);
for(int i = 0; i<n; i++)
{
tp = rnum.get_random_gaussian();
rs1.add(tp);
dat(i)=tp;
xb += dat(i);
}
xb = xb/n;
for(int i = 0; i < n; i++ )
{
sknum += pow(dat(i) - xb,3);
skdenom += pow(dat(i) - xb,2);
exkurnum += pow(dat(i) - xb,4);
exkurdenom += pow(dat(i)-xb,2);
}
sknum = sknum/n;
skdenom = pow(skdenom/n,1.5);
exkurnum = exkurnum/n;
exkurdenom = pow(exkurdenom/n,2);
unbi_skew = sqrt(n*(n-1))/(n-2)*sknum/skdenom;
unbi_exkur = (n-1)*((n+1)*(exkurnum/exkurdenom-3)+6)/((n-2)*(n-3));
dlog << LINFO << "Skew Diff: " << unbi_skew - rs1.skewness();
dlog << LINFO << "Kur Diff: " << unbi_exkur - rs1.ex_kurtosis();
// Test an alternative implementation of the unbiased skewness and excess
// kurtosis against the one in running_stats.
DLIB_TEST(abs(unbi_skew - rs1.skewness()) < 1e-10);
DLIB_TEST(abs(unbi_exkur - rs1.ex_kurtosis()) < 1e-10);
}
}
void test_randomize_samples() void test_randomize_samples()
{ {
std::vector<unsigned int> t(15),u(15),v(15); std::vector<unsigned int> t(15),u(15),v(15);
...@@ -371,6 +457,8 @@ namespace ...@@ -371,6 +457,8 @@ namespace
test_random_subset_selector2(); test_random_subset_selector2();
test_running_covariance(); test_running_covariance();
test_running_stats(); test_running_stats();
test_skewness_and_kurtosis_1();
test_skewness_and_kurtosis_2();
test_randomize_samples(); test_randomize_samples();
test_randomize_samples2(); test_randomize_samples2();
another_test(); another_test();
......
...@@ -70,6 +70,7 @@ add_example(server_http_ex) ...@@ -70,6 +70,7 @@ add_example(server_http_ex)
add_example(server_iostream_ex) add_example(server_iostream_ex)
add_example(sockets_ex) add_example(sockets_ex)
add_example(sockstreambuf_ex) add_example(sockstreambuf_ex)
add_example(running_stats_ex)
add_example(std_allocator_ex) add_example(std_allocator_ex)
add_example(surf_ex) add_example(surf_ex)
add_example(svm_ex) add_example(svm_ex)
......
// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
/*
This is an example illustrating the use of the running_stats object from the dlib C++
Library. It is a simple tool for computing basic statistics on a stream of numbers.
In this example, we sample 100 points from the sinc function and then then compute the
unbiased sample mean, variance, skewness, and excess kurtosis.
*/
#include <iostream>
#include <vector>
#include <dlib/statistics.h>
#include <dlib/numeric_constants.h> // for pi
using namespace std;
using namespace dlib;
// Here we define the sinc function so that we may generate sample data. We compute the mean,
// variance, skewness, and excess kurtosis of this sample data.
double sinc(double x)
{
if (x == 0)
return 1;
return sin(x)/x;
}
int main()
{
running_stats<double> rs;
double tp1 = 0;
double tp2 = 0;
// We first generate the data and add it sequentially to our running_stats object. We
// then print every fifth data point.
for (int x = 1; x <= 100; x++)
{
tp1 = x/100.0;
tp2 = sinc(pi*x/100.0);
rs.add(tp2);
if(x % 5 == 0)
{
cout << " x = " << tp1 << " sinc(x) = " << tp2 << endl;
}
}
// Finally, we compute and print the mean, variance, skewness, and excess kurtosis of
// our data.
cout << endl;
cout << "Mean: " << rs.mean() << endl;
cout << "Variance: " << rs.variance() << endl;
cout << "Skewness: " << rs.skewness() << endl;
cout << "Excess Kurtosis " << rs.ex_kurtosis() << endl;
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment