Go back to Richel Bilderbeek's homepage.

Go back to Richel Bilderbeek's C++ page.

 

 

 

 

 

(C++) SimpleLinearRegression

 

Technical facts

 

 

 

 

 

 

./CppSimpleLinearRegression/CppSimpleLinearRegression.pri

 

INCLUDEPATH += \
    ../../Classes/CppSimpleLinearRegression

SOURCES += \
    ../../Classes/CppSimpleLinearRegression/simplelinearregression.cpp

HEADERS  += \
    ../../Classes/CppSimpleLinearRegression/simplelinearregression.h

OTHER_FILES += \
    ../../Classes/CppSimpleLinearRegression/Licence.txt

 

 

 

 

 

./CppSimpleLinearRegression/simplelinearregression.h

 

#ifndef SIMPLELINEARREGRESSION_H
#define SIMPLELINEARREGRESSION_H

#include <cassert>
#include <numeric>
#include <string>
#include <vector>

namespace ribi {

template <typename T>
T GetZero() { return T{}; }

//template <>
//double GetZero() { return 0.0; }

//template <typename T> void GetZero(T& t) { t = T(); }
//template <> void GetZero(double& d) { d = 0.0; }


///Class to obtain the best fitting line through all (x,y) pairs
///The best fitting line has form 'y = a.x + b', where
///- a: std::pair<double,double>::first
///- b: std::pair<double,double>::second
struct SimpleLinearRegression
{
  ///Obtain the best fitting line through all (x,y) pairs
  ///The best fitting line has form 'y = a.x + b', where
  ///- a: std::pair<double,double>::first
  ///- b: std::pair<double,double>::second
  template <class X, class Y>
  static auto CalculateBestFit(
    const std::vector<X>& xs,
    const std::vector<Y>& ys) -> const std::pair<decltype(ys[0]/xs[0]),Y>;

  ///Obtain Anscombe's Quartet its x values, for index 1 to (and including) 4
  static std::vector<double> GetAnscombesQuartetX(const int index);

  ///Obtain Anscombe's Quartet its y values, for index 1 to (and including) 4
  static std::vector<double> GetAnscombesQuartetY(const int index);

  static std::string GetVersion() noexcept;
  static std::vector<std::string> GetVersionHistory() noexcept;

private:

  ///From htpp://www.richelbilderbeek.nl/CppCalculateCovariance.htm
  static double CalculateCovariance(
    const std::vector<double>& xs,
    const std::vector<double>& ys);

  ///From htpp://www.richelbilderbeek.nl/CppGetMean.htm
  template <class T>
  static T CalculateMean(
    const std::vector<T>& v);

  template <class X, class Y>
  static auto CalculateSlope(
    const std::vector<X>& xs,
    const std::vector<Y>& ys) -> decltype(ys[0]/xs[0]);

  ///From htpp://www.richelbilderbeek.nl/CppCalculateVariance.htm
  static double CalculateVariance(
    const std::vector<double>& v);

  #ifndef NDEBUG
  static void Test() noexcept;
  #endif
};

template <class X, class Y>
auto SimpleLinearRegression::CalculateBestFit(
  const std::vector<X>& xs,
  const std::vector<Y>& ys) -> const std::pair<decltype(ys[0]/xs[0]),Y>
{
  #ifndef NDEBUG
  Test();
  #endif

  assert(xs.size() == ys.size()
    && "Every coordinat must have exactly one x and one y value");
  const X mean_x { CalculateMean(xs) };
  const Y mean_y { CalculateMean(ys) };
  //const double covariance_xy = CalculateCovariance(xs,ys);
  //const double variance_x = CalculateVariance(xs);
  //const double slope = covariance_xy / variance_x;
  typedef decltype(ys[0]/xs[0]) Slope;
  const Slope slope { CalculateSlope(xs,ys) };
  const Y intercept { mean_y - (slope * mean_x) };
  return std::make_pair(slope,intercept);
}

template <class T>
T SimpleLinearRegression::CalculateMean(
  const std::vector<T>& v)
{
  //static_assert(!std::is_integral<T>(),"Cannot calculate the mean of integers");
  assert(!v.empty());
  return std::accumulate(v.begin(),v.end(),T()) / static_cast<double>(v.size());
}

template <class X, class Y>
auto SimpleLinearRegression::CalculateSlope(
  const std::vector<X>& xs,
  const std::vector<Y>& ys) -> decltype(ys[0]/xs[0])
{
  assert(xs.size() == ys.size());
  typedef decltype(xs[0] * ys[0]) Numerator;
  typedef decltype(xs[0] * xs[0]) Denominator;
  Numerator numerator { GetZero<X>() * GetZero<Y>() };
  Denominator denominator { GetZero<X>() * GetZero<X>() };
  const X x_mean { CalculateMean(xs) };
  const Y y_mean { CalculateMean(ys) };
  const std::size_t sz { xs.size() };
  for (std::size_t i=0; i!=sz; ++i)
  {
    const X x { xs[i] };
    const Y y { ys[i] };
    numerator   += ((x - x_mean) * (y - y_mean));
    denominator += ((x - x_mean) * (x - x_mean));
  }
  return numerator / denominator;
}

//template <> double SimpleLinearRegression::GetZero() { return 0.0; }
//template <class T> T SimpleLinearRegression::GetZero() { return T(); }

} //~namespace ribi

#endif // SIMPLELINEARREGRESSION_H

 

 

 

 

 

./CppSimpleLinearRegression/simplelinearregression.cpp

 

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Weffc++"
#include "simplelinearregression.h"

#include <cassert>
#include <iostream>
#include <numeric>
#include <stdexcept>
#include <sstream>

#include "testtimer.h"
#include "trace.h"
#pragma GCC diagnostic pop

double ribi::SimpleLinearRegression::CalculateCovariance(
  const std::vector<double>& xs,
  const std::vector<double>& ys)
{
  assert(xs.size() == ys.size());
  const std::size_t sz { xs.size() };
  std::vector<double> ps(sz);
  for (std::size_t i=0; i!=sz; ++i)
  {
    ps[i] = xs[i] * ys[i];
  }

  const double mean_x { CalculateMean(xs) };
  const double mean_y { CalculateMean(ys) };
  const double mean_p { CalculateMean(ps) };
  return mean_p - (mean_x * mean_y);
}

double ribi::SimpleLinearRegression::CalculateVariance(
  const std::vector<double>& v)
{
  assert(!v.empty());
  const double mean { CalculateMean(v) };


  const double mean_of_squares {
    std::accumulate(v.begin(),v.end(),0.0,
      [](const double init, const double x)
      {
        return init + (x * x);
      }
    ) / static_cast<double>(v.size())
  };

  return mean_of_squares - (mean * mean);
}

std::vector<double> ribi::SimpleLinearRegression::GetAnscombesQuartetX(const int index)
{
  switch (index)
  {
    case 1:
    case 2:
    case 3:
      return { 10.0,8.0,13.0,9.0,11.0,14.0,6.0,4.0,12.0,7.0,5.0 };
    case 4:
      return { 8.0,8.0,8.0,8.0,8.0,8.0,8.0,19.0,8.0,8.0,8.0 };
    default:
    {
      std::stringstream s;
      s << "ERROR ("
        << __func__
        << "): Anscombe's Quartet has index 1 to and including 4, given index is: "
        << index;
      std::cerr << s << std::endl;
      assert(!"Should not get here: ");
      throw std::logic_error(s.str());
    }
  }
}

std::vector<double>
  ribi::SimpleLinearRegression::GetAnscombesQuartetY(const int index)
{
  switch (index)
  {
    case 1:
      return { 8.04,6.95,7.58,8.81,8.33,9.96,7.24,4.26,10.84,4.82,5.68 };
    case 2:
      return { 9.14,8.14,8.74,8.77,9.26,8.1,6.13,3.1,9.13,7.26,4.74 };
    case 3:
      return { 7.46,6.77,12.74,7.11,7.81,8.84,6.08,5.39,8.15,6.42,5.73 };
    case 4:
      return { 6.58,5.76,7.71,8.84,8.47,7.04,5.25,12.5,5.56,7.91,6.89 };
    default:
    {
      std::stringstream s;
      s << "ERROR ("
        << __func__
        << "): Anscombe's Quartet has index 1 to and including 4, given index is: "
        << index;
      std::cerr << s << std::endl;
      assert(!"Should not get here: ");
      throw std::logic_error(s.str());
    }
  }
}

std::string ribi::SimpleLinearRegression::GetVersion() noexcept
{
  return "1.1";
}

std::vector<std::string> ribi::SimpleLinearRegression::GetVersionHistory() noexcept
{
  std::vector<std::string> v {
    "2013-08-27: version 1.0: initial version, as ToolTestSimpleLinearRegressionMainDialog",
    "2013-08-28: version 1.1: renamed to SimpleLinearRegression, templated CalculateBestFit",
  };
  return v;
}

#ifndef NDEBUG
void ribi::SimpleLinearRegression::Test() noexcept
{
  {
    static bool is_tested { false };
    if (is_tested) return;
    is_tested = true;
  }
  const TestTimer test_timer(__func__,__FILE__,1.0);
  {
    const std::vector<double> v { 75.0, 83.0, 96.0, 100.0, 121.0, 125.0 };
    const double variance { CalculateVariance(v) };
    const double expected { 332.666667 };
    assert(std::abs(variance - expected) < 0.0001);
  }
  {
    const std::vector<double> v { 0.23, 0.37, 0.45, 0.49, 0.56, 0.63, 0.63, 0.70, 0.72, 0.82 };
    const double variance { CalculateVariance(v) };
    const double expected { 0.02846 };
    assert(std::abs(variance - expected) < 0.0001);
  }
  for (int i=1; i!=5; ++i) //Human-based counting, following the Ansombe's Quartet indices
  {
    const std::vector<double> xs { GetAnscombesQuartetX(i) };
    const std::vector<double> ys { GetAnscombesQuartetY(i) };
    const std::pair<double,double> p { CalculateBestFit(xs,ys) };

    const double mean_x { CalculateMean(xs) };
    const double mean_y { CalculateMean(ys) };
    const double slope { p.first };
    const double intercept { p.second };

    //const double variance_x = CalculateVariance(xs);
    //const double variance_y = CalculateVariance(ys);


    const double expected_mean_x { 9.0 };
    const double expected_mean_y { 7.5 };     //to 2 decimal places
    const double expected_slope  { 0.500 };   //to 3 decimal places
    const double expected_intercept { 3.00 }; //to 2 decimal places

    const double e { 0.01 };
    assert(std::abs(expected_mean_x - mean_x) < e);
    assert(std::abs(expected_mean_y - mean_y) < e);

    //const double expected_variance_x = 11.0;
    //const double expected_variance_y = 4.125;        //4.122 or 4.127 (to 3 decimal places)
    //const double expected_correlation = 0.816;       //to 3 decimal places)
    //assert(std::abs(expected_variance_x - variance_x) < e);
    //assert(std::abs(expected_variance_y - variance_y) < e);
    assert(std::abs(expected_slope - slope) < e);
    assert(std::abs(expected_intercept - intercept) < e);
  }
}
#endif

 

 

 

 

 

Go back to Richel Bilderbeek's C++ page.

Go back to Richel Bilderbeek's homepage.

 

Valid XHTML 1.0 Strict

This page has been created by the tool CodeToHtml