Commit a75645b1 authored by Steve Taylor's avatar Steve Taylor

Added a numerical constants file numeric_constants.h. Expanded the

running_stats object in statistics.h by including two functions that
compute the unbiased empirical skewness and kurtosis of a set
of real numbers.  Added unit tests for these functions in statistics.cpp.
parent 5977b0b8
//Copyright (C) 2013 Steve Taylor (steve98654@gmail.com), Davis E. King
//License: Boost Software License. See LICENSE.txt for full license.
#ifndef DLIB_NUMERIC_CONSTANTs_H_
#define DLIB_NUMERIC_CONSTANTs_H_
namespace dlib
{
// pi -- Pi
const double pi = 3.1415926535897932385;
// e -- Euler's Constant
const double e = 2.7182818284590452354;
// sqrt_2 -- the square root of 2
const double sqrt_2 = 1.4142135623730950488;
// sqrt_3 -- the square root of 3
const double sqrt_3 = 1.7320508075688772935;
// light_spd -- the speed of light in vacuum in meters per second
const double light_spd = 2.99792458e8;
// newton_G -- Newton's gravitational constant (in metric units of m^3/(kg*s^2))
const double newton_G = 6.67384e-11;
// planck_cst -- Planck's constant (in units of Joules * seconds)
const double planck_cst = 6.62606957e-34;
}
#endif //DLIB_NUMERIC_CONSTANTs_H_
// Copyright (C) 2008 Davis E. King (davis@dlib.net)
// Copyright (C) 2008 Davis E. King (davis@dlib.net), Steve Taylor
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_STATISTICs_
#define DLIB_STATISTICs_
......@@ -35,7 +35,10 @@ namespace dlib
void clear()
{
sum = 0;
sum_sqr = 0;
sum_sqr = 0;
sum_cub = 0;
sum_four = 0;
n = 0;
maximum_n = std::numeric_limits<T>::max();
min_value = std::numeric_limits<T>::infinity();
......@@ -53,8 +56,10 @@ namespace dlib
const T& val
)
{
sum += val;
sum_sqr += val*val;
sum += val;
sum_sqr += val*val;
sum_cub += cubed(val);
sum_four += quaded(val);
if (val < min_value)
min_value = val;
......@@ -145,6 +150,43 @@ namespace dlib
return std::sqrt(variance());
}
T skewness (
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(current_n() > 2,
"\tT running_stats::skewness"
<< "\n\tyou have to add some numbers to this object first"
<< "\n\tthis: " << this
);
T temp = 1/n;
T temp1 = std::sqrt(n*(n-1))/(n-2);
temp = temp1*temp*(sum_cub - 3*sum_sqr*sum*temp + 2*cubed(sum)*temp*temp)/
(std::sqrt(std::pow(temp*(sum_sqr-sum*sum*temp),3)));
return temp;
}
T ex_kurtosis (
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(current_n() > 3,
"\tT running_stats::kurtosis"
<< "\n\tyou have to add some numbers to this object first"
<< "\n\tthis: " << this
);
T temp = 1/n;
T m4 = temp*(sum_four - 4*sum_cub*sum*temp+6*sum_sqr*sum*sum*temp*temp
-3*quaded(sum)*cubed(temp));
T m2 = temp*(sum_sqr-sum*sum*temp);
temp = (n-1)*((n+1)*m4/(m2*m2)-3*(n-1))/((n-2)*(n-3));
return temp;
}
T scale (
const T& val
) const
......@@ -175,6 +217,8 @@ namespace dlib
temp.sum += rhs.sum;
temp.sum_sqr += rhs.sum_sqr;
temp.sum_cub += rhs.sum_cub;
temp.sum_four += rhs.sum_four;
temp.n += rhs.n;
temp.min_value = std::min(rhs.min_value, min_value);
temp.max_value = std::max(rhs.max_value, max_value);
......@@ -196,10 +240,15 @@ namespace dlib
private:
T sum;
T sum_sqr;
T sum_cub;
T sum_four;
T n;
T maximum_n;
T min_value;
T max_value;
T cubed (const T& val) const {return val*val*val; }
T quaded (const T& val) const {return val*val*val*val; }
};
template <typename T>
......@@ -208,8 +257,13 @@ namespace dlib
std::ostream& out
)
{
int version = 2;
serialize(version, out);
serialize(item.sum, out);
serialize(item.sum_sqr, out);
serialize(item.sum_cub, out);
serialize(item.sum_four, out);
serialize(item.n, out);
serialize(item.maximum_n, out);
serialize(item.min_value, out);
......@@ -222,8 +276,15 @@ namespace dlib
std::istream& in
)
{
int version = 0;
deserialize(version, in);
if (version != 2)
throw dlib::serialization_error("Unexpected version number found while deserializing dlib::running_stats object.");
deserialize(item.sum, in);
deserialize(item.sum_sqr, in);
deserialize(item.sum_cub, in);
deserialize(item.sum_four, in);
deserialize(item.n, in);
deserialize(item.maximum_n, in);
deserialize(item.min_value, in);
......
// Copyright (C) 2008 Davis E. King (davis@dlib.net)
// Copyright (C) 2008 Davis E. King (davis@dlib.net), Steve Taylor
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_STATISTICs_ABSTRACT_
#ifdef DLIB_STATISTICs_ABSTRACT_
......@@ -121,8 +121,8 @@ namespace dlib
- current_n() == 0
WHAT THIS OBJECT REPRESENTS
This object represents something that can compute the running mean and
variance of a stream of real numbers.
This object represents something that can compute the running mean,
variance, skewness, and excess kurtosis of a stream of real numbers.
As this object accumulates more and more numbers it will be the case
that each new number impacts the current mean and variance estimate
......@@ -184,10 +184,13 @@ namespace dlib
);
/*!
ensures
- updates the mean and variance stored in this object so that
the new value is factored into them
- #mean() == mean()*current_n()/(current_n()+1) + val/(current_n()+1)
- #variance() == the updated variance that takes this new value into account
- updates the mean, variance, skewness, and kurtosis stored in this object
so that the new value is factored into them.
- #mean() == mean()*current_n()/(current_n()+1) + val/(current_n()+1).
(i.e. the updated mean value that takes the new value into account)
- #variance() == the updated variance that takes this new value into account.
- #skewness() == the updated skewness that takes this new value into account.
- #ex_kurtosis() == the updated kurtosis that takes this new value into account.
- if (current_n() < max_n()) then
- #current_n() == current_n() + 1
- else
......@@ -222,6 +225,26 @@ namespace dlib
presented to this object so far.
!*/
T skewness (
) const;
/*!
requires
- current_n() > 2
ensures
- returns the unbiased sample skewness of all the values presented
to this object so far.
!*/
T ex_kurtosis(
) const;
/*!
requires
- current_n() > 3
ensures
- returns the unbiased sample kurtosis of all the values presented
to this object so far.
!*/
T max (
) const;
/*!
......
......@@ -10,6 +10,8 @@
#include <dlib/rand.h>
#include <dlib/svm.h>
#include <algorithm>
#include <dlib/matrix.h>
#include <cmath>
#include "tester.h"
......@@ -239,6 +241,90 @@ namespace
}
void test_skewness_and_kurtosis_1()
{
dlib::rand rnum;
running_stats<double> rs1;
double tp = 0;
rnum.set_seed("DlibRocks");
for(int i = 0; i< 1000000; i++)
{
tp = rnum.get_random_gaussian();
rs1.add(tp);
}
// check the unbiased skewness and excess kurtosis of one million Gaussian
// draws are both near zero.
DLIB_TEST(abs(rs1.skewness()) < 0.1);
DLIB_TEST(abs(rs1.ex_kurtosis()) < 0.1);
}
void test_skewness_and_kurtosis_2()
{
string str = "DlibRocks";
for(int j = 0; j<5 ; j++)
{
matrix<double,1,100000> dat;
dlib::rand rnum;
running_stats<double> rs1;
double tp = 0;
double n = 100000;
double xb = 0;
double sknum = 0;
double skdenom = 0;
double unbi_skew = 0;
double exkurnum = 0;
double exkurdenom = 0;
double unbi_exkur = 0;
random_shuffle(str.begin(), str.end());
rnum.set_seed(str);
for(int i = 0; i<n; i++)
{
tp = rnum.get_random_gaussian();
rs1.add(tp);
dat(i)=tp;
xb += dat(i);
}
xb = xb/n;
for(int i = 0; i < n; i++ )
{
sknum += pow(dat(i) - xb,3);
skdenom += pow(dat(i) - xb,2);
exkurnum += pow(dat(i) - xb,4);
exkurdenom += pow(dat(i)-xb,2);
}
sknum = sknum/n;
skdenom = pow(skdenom/n,1.5);
exkurnum = exkurnum/n;
exkurdenom = pow(exkurdenom/n,2);
unbi_skew = sqrt(n*(n-1))/(n-2)*sknum/skdenom;
unbi_exkur = (n-1)*((n+1)*(exkurnum/exkurdenom-3)+6)/((n-2)*(n-3));
dlog << LINFO << "Skew Diff: " << unbi_skew - rs1.skewness();
dlog << LINFO << "Kur Diff: " << unbi_exkur - rs1.ex_kurtosis();
// Test an alternative implementation of the unbiased skewness and excess
// kurtosis against the one in running_stats.
DLIB_TEST(abs(unbi_skew - rs1.skewness()) < 1e-10);
DLIB_TEST(abs(unbi_exkur - rs1.ex_kurtosis()) < 1e-10);
}
}
void test_randomize_samples()
{
std::vector<unsigned int> t(15),u(15),v(15);
......@@ -371,6 +457,8 @@ namespace
test_random_subset_selector2();
test_running_covariance();
test_running_stats();
test_skewness_and_kurtosis_1();
test_skewness_and_kurtosis_2();
test_randomize_samples();
test_randomize_samples2();
another_test();
......
......@@ -70,6 +70,7 @@ add_example(server_http_ex)
add_example(server_iostream_ex)
add_example(sockets_ex)
add_example(sockstreambuf_ex)
add_example(running_stats_ex)
add_example(std_allocator_ex)
add_example(surf_ex)
add_example(svm_ex)
......
// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
/*
This is an example illustrating the use of the running_stats object from the dlib C++
Library. It is a simple tool for computing basic statistics on a stream of numbers.
In this example, we sample 100 points from the sinc function and then then compute the
unbiased sample mean, variance, skewness, and excess kurtosis.
*/
#include <iostream>
#include <vector>
#include <dlib/statistics.h>
#include <dlib/numeric_constants.h> // for pi
using namespace std;
using namespace dlib;
// Here we define the sinc function so that we may generate sample data. We compute the mean,
// variance, skewness, and excess kurtosis of this sample data.
double sinc(double x)
{
if (x == 0)
return 1;
return sin(x)/x;
}
int main()
{
running_stats<double> rs;
double tp1 = 0;
double tp2 = 0;
// We first generate the data and add it sequentially to our running_stats object. We
// then print every fifth data point.
for (int x = 1; x <= 100; x++)
{
tp1 = x/100.0;
tp2 = sinc(pi*x/100.0);
rs.add(tp2);
if(x % 5 == 0)
{
cout << " x = " << tp1 << " sinc(x) = " << tp2 << endl;
}
}
// Finally, we compute and print the mean, variance, skewness, and excess kurtosis of
// our data.
cout << endl;
cout << "Mean: " << rs.mean() << endl;
cout << "Variance: " << rs.variance() << endl;
cout << "Skewness: " << rs.skewness() << endl;
cout << "Excess Kurtosis " << rs.ex_kurtosis() << endl;
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment