Added a numerical constants file numeric_constants.h. Expanded the

running_stats object in statistics.h by including two functions that compute the unbiased empirical skewness and kurtosis of a set of real numbers. Added unit tests for these functions in statistics.cpp.

Added a numerical constants file numeric_constants.h. Expanded the
running_stats object in statistics.h by including two functions that compute the unbiased empirical skewness and kurtosis of a set of real numbers. Added unit tests for these functions in statistics.cpp.
a75645b1 · Steve Taylor · 5977b0b8 · a75645b1 · a75645b1 · a75645b1
Commit a75645b1 authored Mar 17, 2013 by Steve Taylor
6 changed files
--- a/dlib/numeric_constants.h
+++ b/dlib/numeric_constants.h
+//Copyright (C) 2013 Steve Taylor (steve98654@gmail.com), Davis E. King
+//License: Boost Software License.  See LICENSE.txt for full license.
+#ifndef DLIB_NUMERIC_CONSTANTs_H_
+#define DLIB_NUMERIC_CONSTANTs_H_
+
+namespace dlib 
+{
+
+    // pi -- Pi
+    const double pi = 3.1415926535897932385;
+
+    // e  -- Euler's Constant
+    const double e = 2.7182818284590452354;
+
+    // sqrt_2 -- the square root of 2
+    const double sqrt_2 = 1.4142135623730950488;
+
+    // sqrt_3 -- the square root of 3
+    const double sqrt_3 = 1.7320508075688772935;
+
+    // light_spd -- the speed of light in vacuum in meters per second
+    const double light_spd = 2.99792458e8;
+
+    // newton_G  -- Newton's gravitational constant (in metric units of m^3/(kg*s^2))
+    const double newton_G = 6.67384e-11;
+
+    // planck_cst -- Planck's constant (in units of Joules * seconds) 
+    const double planck_cst = 6.62606957e-34;
+
+}
+
+#endif //DLIB_NUMERIC_CONSTANTs_H_
+
--- a/dlib/statistics/statistics.h
+++ b/dlib/statistics/statistics.h
-// Copyright (C) 2008  Davis E. King (davis@dlib.net)
+// Copyright (C) 2008  Davis E. King (davis@dlib.net), Steve Taylor
 // License: Boost Software License   See LICENSE.txt for the full license.
 #ifndef DLIB_STATISTICs_
 #define DLIB_STATISTICs_
@@ -35,7 +35,10 @@ namespace dlib
        void clear()
        {
            sum = 0;
-            sum_sqr = 0;
+            sum_sqr  = 0;
+            sum_cub  = 0;
+            sum_four = 0;
+
            n = 0;
            maximum_n = std::numeric_limits<T>::max();
            min_value = std::numeric_limits<T>::infinity();
@@ -53,8 +56,10 @@ namespace dlib
            const T& val
        )
        {
-            sum     += val;
-            sum_sqr += val*val;
+            sum      += val;
+            sum_sqr  += val*val;
+            sum_cub  += cubed(val);
+            sum_four += quaded(val);

            if (val < min_value)
                min_value = val;
@@ -145,6 +150,43 @@ namespace dlib
            return std::sqrt(variance());
        }

+        T skewness (
+        ) const
+        {  
+            // make sure requires clause is not broken
+            DLIB_ASSERT(current_n() > 2,
+                "\tT running_stats::skewness"
+                << "\n\tyou have to add some numbers to this object first"
+                << "\n\tthis: " << this
+            );
+
+            T temp  = 1/n;
+            T temp1 = std::sqrt(n*(n-1))/(n-2); 
+            temp    = temp1*temp*(sum_cub - 3*sum_sqr*sum*temp + 2*cubed(sum)*temp*temp)/
+                      (std::sqrt(std::pow(temp*(sum_sqr-sum*sum*temp),3)));
+
+            return temp; 
+        }
+
+        T ex_kurtosis (
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(current_n() > 3,
+                "\tT running_stats::kurtosis"
+                << "\n\tyou have to add some numbers to this object first"
+                << "\n\tthis: " << this
+            );
+
+            T temp = 1/n;
+            T m4   = temp*(sum_four - 4*sum_cub*sum*temp+6*sum_sqr*sum*sum*temp*temp
+                     -3*quaded(sum)*cubed(temp));
+            T m2   = temp*(sum_sqr-sum*sum*temp);
+            temp   = (n-1)*((n+1)*m4/(m2*m2)-3*(n-1))/((n-2)*(n-3));
+
+            return temp; 
+        }
+
        T scale (
            const T& val
        ) const
@@ -175,6 +217,8 @@ namespace dlib

            temp.sum += rhs.sum;
            temp.sum_sqr += rhs.sum_sqr;
+            temp.sum_cub += rhs.sum_cub;
+            temp.sum_four += rhs.sum_four;
            temp.n += rhs.n;
            temp.min_value = std::min(rhs.min_value, min_value);
            temp.max_value = std::max(rhs.max_value, max_value);
@@ -196,10 +240,15 @@ namespace dlib
    private:
        T sum;
        T sum_sqr;
+        T sum_cub;
+        T sum_four;
        T n;
        T maximum_n;
        T min_value;
        T max_value;
+    
+        T cubed  (const T& val) const {return val*val*val; }
+        T quaded (const T& val) const {return val*val*val*val; }
    };

    template <typename T>
@@ -208,8 +257,13 @@ namespace dlib
        std::ostream& out 
    )
    {
+        int version = 2;
+        serialize(version, out);
+
        serialize(item.sum, out);
        serialize(item.sum_sqr, out);
+        serialize(item.sum_cub, out);
+        serialize(item.sum_four, out);
        serialize(item.n, out);
        serialize(item.maximum_n, out);
        serialize(item.min_value, out);
@@ -222,8 +276,15 @@ namespace dlib
        std::istream& in
    ) 
    {
+        int version = 0;
+        deserialize(version, in);
+        if (version != 2)
+            throw dlib::serialization_error("Unexpected version number found while deserializing dlib::running_stats object.");
+
        deserialize(item.sum, in);
        deserialize(item.sum_sqr, in);
+        deserialize(item.sum_cub, in);
+        deserialize(item.sum_four, in);
        deserialize(item.n, in);
        deserialize(item.maximum_n, in);
        deserialize(item.min_value, in);

--- a/dlib/statistics/statistics_abstract.h
+++ b/dlib/statistics/statistics_abstract.h
-// Copyright (C) 2008  Davis E. King (davis@dlib.net)
+// Copyright (C) 2008  Davis E. King (davis@dlib.net), Steve Taylor
 // License: Boost Software License   See LICENSE.txt for the full license.
 #undef DLIB_STATISTICs_ABSTRACT_
 #ifdef DLIB_STATISTICs_ABSTRACT_
@@ -121,8 +121,8 @@ namespace dlib
                - current_n() == 0

            WHAT THIS OBJECT REPRESENTS
-                This object represents something that can compute the running mean and
-                variance of a stream of real numbers.  
+                This object represents something that can compute the running mean, 
+                variance, skewness, and excess kurtosis of a stream of real numbers.  

                As this object accumulates more and more numbers it will be the case
                that each new number impacts the current mean and variance estimate
@@ -184,10 +184,13 @@ namespace dlib
        );
        /*!
            ensures
-                - updates the mean and variance stored in this object so that
-                  the new value is factored into them
-                - #mean() == mean()*current_n()/(current_n()+1) + val/(current_n()+1)
-                - #variance() == the updated variance that takes this new value into account
+                - updates the mean, variance, skewness, and kurtosis stored in this object
+                  so that the new value is factored into them.
+                - #mean() == mean()*current_n()/(current_n()+1) + val/(current_n()+1).
+                  (i.e. the updated mean value that takes the new value into account)
+                - #variance() == the updated variance that takes this new value into account.
+                - #skewness() == the updated skewness that takes this new value into account.
+                - #ex_kurtosis() == the updated kurtosis that takes this new value into account.
                - if (current_n() < max_n()) then
                    - #current_n() == current_n() + 1
                - else
@@ -222,6 +225,26 @@ namespace dlib
                  presented to this object so far.
        !*/

+        T skewness (
+        ) const;
+        /*!
+            requires
+                - current_n() > 2
+            ensures
+                - returns the unbiased sample skewness of all the values presented 
+                  to this object so far.
+        !*/
+
+        T ex_kurtosis(
+        ) const;
+        /*!
+            requires
+                - current_n() > 3
+            ensures
+                - returns the unbiased sample kurtosis of all the values presented 
+                  to this object so far.
+        !*/
+
        T max (
        ) const;
        /*!

--- a/dlib/test/statistics.cpp
+++ b/dlib/test/statistics.cpp
@@ -10,6 +10,8 @@
 #include <dlib/rand.h>
 #include <dlib/svm.h>
 #include <algorithm>
+#include <dlib/matrix.h>
+#include <cmath>

 #include "tester.h"

@@ -239,6 +241,90 @@ namespace

        }

+        void test_skewness_and_kurtosis_1()
+        {
+
+            dlib::rand rnum;
+            running_stats<double> rs1;
+
+            double tp = 0;
+
+            rnum.set_seed("DlibRocks");
+
+            for(int i = 0; i< 1000000; i++)
+            {
+                tp = rnum.get_random_gaussian();
+                rs1.add(tp);
+            }   
+
+            // check the unbiased skewness and excess kurtosis of one million Gaussian
+            // draws are both near zero.
+            DLIB_TEST(abs(rs1.skewness()) < 0.1);
+            DLIB_TEST(abs(rs1.ex_kurtosis()) < 0.1);
+        }
+
+        void test_skewness_and_kurtosis_2()
+        {
+
+            string str = "DlibRocks";
+
+            for(int j = 0; j<5 ; j++)
+            {
+                matrix<double,1,100000> dat;
+                dlib::rand rnum;
+                running_stats<double> rs1;
+     
+                double tp = 0;
+                double n = 100000;
+                double xb = 0;
+    
+                double sknum = 0;
+                double skdenom = 0;
+                double unbi_skew = 0;
+    
+                double exkurnum = 0;
+                double exkurdenom = 0;
+                double unbi_exkur = 0;
+
+                random_shuffle(str.begin(), str.end());
+                rnum.set_seed(str);
+
+                for(int i = 0; i<n; i++)
+                {
+                    tp = rnum.get_random_gaussian();
+                    rs1.add(tp);
+                    dat(i)=tp;
+                    xb += dat(i);
+                }   
+    
+                xb = xb/n;
+
+                for(int i = 0; i < n; i++ )
+                { 
+                    sknum += pow(dat(i) - xb,3);
+                    skdenom += pow(dat(i) - xb,2);
+                    exkurnum += pow(dat(i) - xb,4);
+                    exkurdenom += pow(dat(i)-xb,2);
+                }
+
+                sknum = sknum/n;
+                skdenom = pow(skdenom/n,1.5);
+                exkurnum = exkurnum/n;
+                exkurdenom = pow(exkurdenom/n,2);
+    
+                unbi_skew = sqrt(n*(n-1))/(n-2)*sknum/skdenom;
+                unbi_exkur = (n-1)*((n+1)*(exkurnum/exkurdenom-3)+6)/((n-2)*(n-3));
+
+                dlog << LINFO << "Skew Diff: " <<  unbi_skew - rs1.skewness();
+                dlog << LINFO << "Kur Diff: " << unbi_exkur - rs1.ex_kurtosis();
+                
+                // Test an alternative implementation of the unbiased skewness and excess
+                // kurtosis against the one in running_stats.
+                DLIB_TEST(abs(unbi_skew - rs1.skewness()) < 1e-10);
+                DLIB_TEST(abs(unbi_exkur - rs1.ex_kurtosis()) < 1e-10);
+            }
+        }
+
        void test_randomize_samples()
        {
            std::vector<unsigned int> t(15),u(15),v(15);
@@ -371,6 +457,8 @@ namespace
            test_random_subset_selector2();
            test_running_covariance();
            test_running_stats();
+            test_skewness_and_kurtosis_1();
+            test_skewness_and_kurtosis_2();
            test_randomize_samples();
            test_randomize_samples2();
            another_test();

--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -70,6 +70,7 @@ add_example(server_http_ex)
 add_example(server_iostream_ex)
 add_example(sockets_ex)
 add_example(sockstreambuf_ex)
+add_example(running_stats_ex)
 add_example(std_allocator_ex)
 add_example(surf_ex)
 add_example(svm_ex)

--- a/examples/running_stats_ex.cpp
+++ b/examples/running_stats_ex.cpp
+// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
+/*
+    This is an example illustrating the use of the running_stats object from the dlib C++
+    Library.  It is a simple tool for computing basic statistics on a stream of numbers.
+    In this example, we sample 100 points from the sinc function and then then compute the
+    unbiased sample mean, variance, skewness, and excess kurtosis.
+
+*/    
+#include <iostream>
+#include <vector>
+#include <dlib/statistics.h>
+#include <dlib/numeric_constants.h> // for pi
+
+using namespace std;
+using namespace dlib;
+
+// Here we define the sinc function so that we may generate sample data. We compute the mean,
+// variance, skewness, and excess kurtosis of this sample data.
+
+double sinc(double x)
+{
+    if (x == 0)
+        return 1;
+    return sin(x)/x;
+}
+
+int main()
+{
+    running_stats<double> rs;
+
+    double tp1 = 0;
+    double tp2 = 0;
+
+    // We first generate the data and add it sequentially to our running_stats object.  We
+    // then print every fifth data point.
+    for (int x = 1; x <= 100; x++)
+    {
+        tp1 = x/100.0;
+        tp2 = sinc(pi*x/100.0);
+        rs.add(tp2);
+
+        if(x % 5 == 0)
+        {
+            cout << " x = " << tp1 << " sinc(x) = " << tp2 << endl;
+        }
+    }
+
+    // Finally, we compute and print the mean, variance, skewness, and excess kurtosis of
+    // our data.
+
+    cout << endl;
+    cout << "Mean:           " << rs.mean() << endl;
+    cout << "Variance:       " << rs.variance() << endl;
+    cout << "Skewness:       " << rs.skewness() << endl;
+    cout << "Excess Kurtosis " << rs.ex_kurtosis() << endl;
+
+    return 0;
+}
+