#include <algorithm>
#include <cassert>
#include <cmath>
#include <functional>
#include <iterator>
#include <iostream>
#include <numeric>
#include <ostream>
#include <vector>
#include <boost/math/distributions/chi_squared.hpp>
//From http://www.richelbilderbeek.nl/CppGetCumulativeDensityNormal.htm
//Adapted from http://www.ma.ic.ac.uk/~mdavis/course_material/MOP/CumNormDist.txt
double GetCumulativeDensityNormal(const double x)
{
const double c0 = 0.2316419;
const double c1 = 1.330274429;
const double c2 = 1.821255978;
const double c3 = 1.781477937;
const double c4 = 0.356563782;
const double c5 = 0.319381530;
const double c6 = 0.398942280401;
const double negative = (x < 0 ? 1.0 : 0.0);
const double xPos = (x < 0.0 ? -x : x);
const double k = 1.0 / ( 1.0 + (c0 * xPos));
const double y1 = (((((((c1*k-c2)*k)+c3)*k)-c4)*k)+c5)*k;
const double y2 = 1.0 - (c6*std::exp(-0.5*xPos*xPos)*y1);
return ((1.0-negative)*y2) + (negative*(1.0-y2));
}
//From http://www.richelbilderbeek.nl/CppGetCumulativeDensityNormal.htm
double GetCumulativeDensityNormal(
const double x,
const double mean,
const double stddev)
{
return GetCumulativeDensityNormal(
(x-mean) / stddev);
}
//From htpp://www.richelbilderbeek.nl/CppGetMean.htm
double GetMean(const std::vector<double>& v)
{
return std::accumulate(v.begin(),v.end(),0.0) / static_cast<double>(v.size());
}
template <class T> struct SquareAccumulator
: public std::binary_function<T,T,T>
{
const T operator()(const T& sum, const T& x) const
{ return sum+(x*x); }
};
//From http://www.richelbilderbeek.nl/CppGetStdDev.htm
double GetStdDev(const std::vector<double>& v)
{
assert(v.size() > 1
&& "Can only calculate standard deviations from "
"data sets with size 2 or larger");
const double sum_x
= std::accumulate(v.begin(),v.end(),0.0);
const double sum_x_squared
= std::accumulate(v.begin(),v.end(),
0.0,SquareAccumulator<double>());
const double sz = static_cast<double>(v.size());
return std::sqrt(((sz*sum_x_squared)-(sum_x*sum_x))
/ (sz*(sz-1.0)));
}
//Data from:
// * David Heath. An introduction to experimental
// design and statistics for biology. 1995.
// ISBN: 1-85728-132-2 PB. Page 149, box 6.9:
// Chi-squared goodness-of-fit test to a normal
// distribution: body lengths of the
// isopod (Sphaeroma Rugicauda)
//Isopod sizes in millimetres
const std::vector<double> GetIsopodSizes()
{
std::vector<double> v;
for (int i=0; i!= 5; ++i) v.push_back(3.65);
for (int i=0; i!= 9; ++i) v.push_back(3.85);
for (int i=0; i!=20; ++i) v.push_back(4.05);
for (int i=0; i!=30; ++i) v.push_back(4.25);
for (int i=0; i!=29; ++i) v.push_back(4.45);
for (int i=0; i!=44; ++i) v.push_back(4.65);
for (int i=0; i!=44; ++i) v.push_back(4.85);
for (int i=0; i!=69; ++i) v.push_back(5.05);
for (int i=0; i!=50; ++i) v.push_back(5.25);
for (int i=0; i!=50; ++i) v.push_back(5.45);
for (int i=0; i!=29; ++i) v.push_back(5.65);
for (int i=0; i!=14; ++i) v.push_back(5.85);
for (int i=0; i!= 4; ++i) v.push_back(6.05);
for (int i=0; i!= 1; ++i) v.push_back(6.25);
for (int i=0; i!= 2; ++i) v.push_back(6.45);
//For fun, let's shuffle these values
std::random_shuffle(v.begin(),v.end());
return v;
}
const std::vector<std::pair<double,double> > GetRanges(
const std::size_t n_categories)
{
std::vector<std::pair<double,double> > v;
double lower_limit = 3.45;
double range_bandwidth = 0.2;
for (std::size_t i=0; i!=n_categories; ++i)
{
v.push_back(std::make_pair(
lower_limit,lower_limit+range_bandwidth));
lower_limit+=range_bandwidth;
}
return v;
}
const std::vector<int> Tally(
const std::vector<double>& values,
const std::vector<std::pair<double,double> >& ranges)
{
std::vector<int> tally(ranges.size());
const std::size_t n_values = values.size();
const std::size_t n_ranges = ranges.size();
for (std::size_t value_index = 0; value_index!=n_values; ++value_index)
{
assert(value_index < values.size());
const double value = values[value_index];
for (std::size_t range_index = 0; range_index!=n_ranges; ++range_index)
{
const std::pair<double,double> &range = ranges[range_index];
if ( range.first < value && value < range.second)
{
++tally[range_index];
break;
}
}
}
assert( std::accumulate(tally.begin(),tally.end(),0)
== static_cast<int>(values.size())
&& "Assume all values can be tallied in existing ranges");
return tally;
}
const std::vector<double> GetTallyExpected(
const std::vector<std::pair<double,double> >& ranges,
const double mean,
const double stdDev,
const std::size_t n)
{
const std::size_t sz = ranges.size();
std::vector<double> v(sz,0.0);
for (std::size_t i = 0; i!=sz; ++i)
{
const double x_left = ranges[i].first;
const double x_right = ranges[i].second;
assert(x_left < x_right);
const double surface_left
= GetCumulativeDensityNormal(x_left ,mean,stdDev)
* static_cast<double>(n);
const double surface_right
= GetCumulativeDensityNormal(x_right,mean,stdDev)
* static_cast<double>(n);
assert(surface_left < surface_right);
v[i] = surface_right - surface_left;
}
return v;
}
const std::vector<double> CalculateRelativeError(
const std::vector<int>& tally_measured,
const std::vector<double>& tally_expected)
{
assert(tally_measured.size() == tally_expected.size());
const std::size_t sz = tally_measured.size();
std::vector<double> v(sz);
for (std::size_t i = 0; i!=sz; ++i)
{
const double obs = static_cast<double>(tally_measured[i]);
const double exp = tally_expected[i];
assert(exp!=0.0);
v[i] = ((obs-exp)*(obs-exp))/exp;
}
return v;
}
int main()
{
const std::size_t n_categories = 15;
const double degrees_of_freedom
= static_cast<double>(n_categories)
- 1.0 //We need to calculate the mean ourselves
- 1.0 //We need to calculate the standard deviation ourselves
- 1.0; //We need to calculate the sample size ourselves
const std::vector<double> isopod_sizes = GetIsopodSizes();
const double mean = GetMean(isopod_sizes);
const double stdDev = GetStdDev(isopod_sizes);
const std::vector<std::pair<double,double> > ranges = GetRanges(n_categories);
assert(n_categories == ranges.size());
const std::vector<int> tally_measured = Tally(isopod_sizes,ranges);
const std::vector<double> tally_expected
= GetTallyExpected(ranges,mean,stdDev,isopod_sizes.size());
const std::vector<double> rel_error
= CalculateRelativeError(tally_measured,tally_expected);
for (std::size_t i=0; i!=n_categories; ++i)
{
std::cout
<< ranges[i].first << "-" << ranges[i].second << ":\t"
<< tally_measured[i] << "\t"
<< tally_expected[i] << "\t"
<< rel_error[i] << "\n";
}
const double significance_level = 0.05;
const double chi_squared_value
= std::accumulate(rel_error.begin(),rel_error.end(),0.0);
boost::math::chi_squared_distribution<double> distribution(degrees_of_freedom);
const double critical_value
= boost::math::quantile(boost::math::complement(distribution, significance_level));
std::cout
<< "Mean size: " << mean
<< "\nStdDev size: " << stdDev
<< "\nSUM observer: "
<< std::accumulate(tally_measured.begin(),tally_measured.end(), 0)
<< "\nSUM expected: "
<< std::accumulate(tally_expected.begin(),tally_expected.end(),0.0)
<< "\nChi-square value: " << chi_squared_value
<< "\nSignificance level: " << significance_level
<< "\nDegrees of freedom: " << degrees_of_freedom
<< "\nCritical value: " << critical_value << '\n';
if (chi_squared_value < critical_value)
{
std::cout
<< "Cannot reject null hypothesis that the measured values "
"do follow a normal distribution" << std::endl;
}
else
{
std::cout
<< "Reject null hypothesis that the measured values "
"do follow a normal distribution" << std::endl;
}
}
|