Go back to Richel Bilderbeek's homepage.

Go back to Richel Bilderbeek's C++ page.

 

 

 

 

 

(C++) ManyDigitNewick

 

STLQt CreatorLubuntu

 

A Newick class.

Technical facts

 

 

 

 

 

 

./CppManyDigitNewick/CppManyDigitNewick.pri

 

DEFINES += DEBUG_SKIP_SAZ_AND_SAO

INCLUDEPATH += \
    ../../Classes/CppManyDigitNewick

SOURCES += \
    ../../Classes/CppManyDigitNewick/manydigitnewick.cpp \
    ../../Classes/CppManyDigitNewick/manydigitnewickcoordinat.cpp \
    ../../Classes/CppManyDigitNewick/manydigitnewickderivative.cpp \
    ../../Classes/CppManyDigitNewick/manydigitnewickindexer.cpp \
    ../../Classes/CppManyDigitNewick/manydigitnewickindextable.cpp \
    ../../Classes/CppManyDigitNewick/manydigitnewicks.cpp

HEADERS  += \
    ../../Classes/CppManyDigitNewick/manydigitnewick.h \
    ../../Classes/CppManyDigitNewick/manydigitnewickcoordinat.h \
    ../../Classes/CppManyDigitNewick/manydigitnewickderivative.h \
    ../../Classes/CppManyDigitNewick/manydigitnewickindexer.h \
    ../../Classes/CppManyDigitNewick/manydigitnewickindextable.h \
    ../../Classes/CppManyDigitNewick/manydigitnewicks.h \

OTHER_FILES += \
    ../../Classes/CppManyDigitNewick/Licence.txt

 

 

 

 

 

./CppManyDigitNewick/manydigitnewick.h

 

//---------------------------------------------------------------------------
/*
ManyDigitNewick, Newick class
Copyright (C) 2011-2015 Richel Bilderbeek

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program.If not, see <http://www.gnu.org/licenses/>.
*/
//---------------------------------------------------------------------------
//From http://www.richelbilderbeek.nl/CppManyDigitNewick.htm
//---------------------------------------------------------------------------
#ifndef MANYDIGITNEWICK_H
#define MANYDIGITNEWICK_H

#include <string>
#include <vector>

#include "manydigitnewickderivative.h"

namespace ribi {

///ManyDigitNewick contains all
///ManyDigitDerivative that can be
///constructed from a phylogeny. For example,
///if from a certain phylogeny three derived
///phylogenies can be constructed, ManyDigitDerivativesData
///will hold three elements.
///The algorithm is described at
///http://www.richelbilderbeek.nl/CppTwoDigitNewickAlgorithm.htm
///
///The meat of the code is in ManyDigitNewick::CalculateProbability
struct ManyDigitNewick
{
  typedef ManyDigitNewickDerivative Derivative;

  //An empty ManyDigitNewick
  ManyDigitNewick();

  ///A ManyDigitNewick cannot be created without
  ///its derivatives: a ManyDigitNewick IS its
  ///derivatives in a way.
  ///sum_above_zero and sum_above_one are needed to
  ///calculate its denominator
  explicit ManyDigitNewick(
    const std::vector<Derivative>& derivatives,
    const int sum_above_zero,
    const int sum_above_one);

  ///Empty returns !IsComplete
  bool Empty() const noexcept;

  const std::vector<Derivative>& GetDerivatives() const noexcept;
  ///IsComplete determines if the ManyDigitNewick is
  ///initialized completely
  bool IsComplete() const;
  bool IsProbabilityKnown() const noexcept;
  double GetDenominator() const;
  double GetProbability() const;
  int GetSumTermsAboveOne() const;
  int GetSumTermsAboveZero() const;
  void SetProbability(const double p);
  static void SetTheta(const double theta);

  static std::string GetVersion() noexcept;
  static std::vector<std::string> GetVersionHistory() noexcept;

  private:
  ///m_derivatives contains all the information
  ///needed to get to this Newick's derivatives
  std::vector<Derivative> m_derivatives;

  ///m_probability denotes the probability
  ///a Newick exists.
  ///A negative value of m_probability denotes
  ///that it is not yet calculated.
  double m_probability;

  ///m_denominator constant for a Newick.
  double m_denominator;

  int m_sum_terms_above_zero;
  int m_sum_terms_above_one;

  static double sm_theta;

  double CalculateDenominator(
    const int sum_above_zero,
    const int sum_above_one) const;

  #ifndef NDEBUG
  static void Test() noexcept;
  #endif

  public:

  static double CalculateProbability(
    const std::string& newick,
    const double theta);


};

} //~namespace ribi

#endif // TWODIGITNEWICK_H

 

 

 

 

 

./CppManyDigitNewick/manydigitnewick.cpp

 

//---------------------------------------------------------------------------
/*
ManyDigitNewick, Newick class
Copyright (C) 2011-2015 Richel Bilderbeek

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program.If not, see <http://www.gnu.org/licenses/>.
*/
//---------------------------------------------------------------------------
//From http://www.richelbilderbeek.nl/CppManyDigitNewick.htm
//---------------------------------------------------------------------------
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Weffc++"
#pragma GCC diagnostic ignored "-Wunused-local-typedefs"

#include "manydigitnewick.h"

#include <cassert>
#include <cmath>

#include <iostream>


#include <boost/numeric/conversion/cast.hpp>

#include "newickvector.h"
#include "newick.h"
#include "manydigitnewickindexer.h"
#include "testtimer.h"
#pragma GCC diagnostic pop

double ribi::ManyDigitNewick::sm_theta = -1.0;

///Create an empty MyDigitNewick, to have a
///default contructor
ribi::ManyDigitNewick::ManyDigitNewick()
  : m_probability(-1.0),
    m_denominator(-1.0),
    m_sum_terms_above_zero(-1),
    m_sum_terms_above_one(-1)
{
  #ifndef NDEBUG
  Test();
  #endif
  assert(this->Empty());
}

ribi::ManyDigitNewick::ManyDigitNewick(
  const std::vector<ManyDigitNewickDerivative>& derivatives,
  const int sum_above_zero,
  const int sum_above_one)
  : m_derivatives(derivatives),
    m_probability(-1.0),
    m_denominator(
      CalculateDenominator(sum_above_zero,sum_above_one)),
    m_sum_terms_above_zero(sum_above_zero),
    m_sum_terms_above_one(sum_above_one)
{

}

double ribi::ManyDigitNewick::CalculateDenominator(
  const int sum_above_zero,
  const int sum_above_one) const
{
  assert(sm_theta >= 0.0);
  assert(sum_above_zero >= 0);
  #ifndef NDEBUG
  if (sum_above_one < 0)
  {
    std::cerr << "Invalid sum_above_one: " << sum_above_one << '\n';
  }
  #endif
  assert(sum_above_one >= 0);
  const double d
    = boost::numeric_cast<double>(
      sum_above_zero * (sum_above_zero - 1))
    + (boost::numeric_cast<double>(sum_above_one)
       * sm_theta);
  return d;
}

///Calculates the probability for a certain Newick
///with a certain theta. This is the main (helper)
///function.
double ribi::ManyDigitNewick::CalculateProbability(
  const std::string& newick_str,
  const double theta)
{
  ribi::ManyDigitNewick::SetTheta(theta);
  const NewickVector n(newick_str);
  const ManyDigitNewickIndexer i(n,theta);
  return i.GetProbability();
}

bool ribi::ManyDigitNewick::Empty() const noexcept
{
  return m_derivatives.empty();
}

double ribi::ManyDigitNewick::GetDenominator() const
{
  assert(IsComplete());
  return m_denominator;
}

const std::vector<ribi::ManyDigitNewickDerivative>&
  ribi::ManyDigitNewick::GetDerivatives() const noexcept
{
  return m_derivatives;
}

double ribi::ManyDigitNewick::GetProbability() const
{
  assert(IsProbabilityKnown());
  return m_probability;
}

int ribi::ManyDigitNewick::GetSumTermsAboveOne() const
{
  assert(m_sum_terms_above_one >= 0);
  return m_sum_terms_above_one;
}

int ribi::ManyDigitNewick::GetSumTermsAboveZero() const
{
  if (!(m_sum_terms_above_zero >= 0))
  {
    std::clog << "BREAKPOINT";
  }
  assert(m_sum_terms_above_zero >= 0);
  return m_sum_terms_above_zero;
}

std::string ribi::ManyDigitNewick::GetVersion() noexcept
{
  return "1.1";
}

std::vector<std::string> ribi::ManyDigitNewick::GetVersionHistory() noexcept
{
  return {
    "2010-08-22: version 1.0: initial version",
    "2011-02-20: version 1.1: added version history"
  };
}

///A complete ManyDigitNewick has (hopefully) all its
///derivatives present, as well as its sums of terms
///above zero and one.
bool ribi::ManyDigitNewick::IsComplete() const
{
  return (!m_derivatives.empty()
    && m_sum_terms_above_zero >= 0
    && m_sum_terms_above_one  >= 0);
}

bool ribi::ManyDigitNewick::IsProbabilityKnown() const noexcept
{
  return m_probability >= 0.0;
}

void ribi::ManyDigitNewick::SetProbability(const double p)
{
  assert(p >= 0.0);
  assert(p <= 1.0000001);
  m_probability = p;
}

void ribi::ManyDigitNewick::SetTheta(const double theta)
{
  assert(theta >= 0.0);
  sm_theta = theta;
}

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-local-typedefs"
#ifndef NDEBUG
void ribi::ManyDigitNewick::Test() noexcept
{
  {
    static bool is_tested{false};
    if (is_tested) return;
    is_tested = true;
  }
  {
    ribi::MultiVector<int>();
  }
  const TestTimer test_timer(__func__,__FILE__,1.0);
  const double theta = 10.0;
  ribi::ManyDigitNewick::SetTheta(theta);
  const std::vector<std::string> v = Newick().CreateValidNewicks();
  for(const std::string& s: v)
  {
    if ( Newick().CalcComplexity(Newick().StringToNewick(s))
      >  BigInteger(10000) )
    {
      continue;
    }
    ribi::ManyDigitNewick::CalculateProbability(s,theta);
  }
}
#endif // NDEBUG
#pragma GCC diagnostic pop

 

 

 

 

 

./CppManyDigitNewick/manydigitnewickcoordinat.h

 

//---------------------------------------------------------------------------
/*
ManyDigitNewick, Newick class
Copyright (C) 2011-2015 Richel Bilderbeek

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program.If not, see <http://www.gnu.org/licenses/>.
*/
//---------------------------------------------------------------------------
//From http://www.richelbilderbeek.nl/CppManyDigitNewick.htm
//---------------------------------------------------------------------------
#ifndef MANYDIGITNEWICKCOORDINAT_H
#define MANYDIGITNEWICKCOORDINAT_H

#include <vector>

namespace ribi {

///ManyDigitNewickCoordinat is an any-dimensional
///coordinat with its indices sorted.
struct ManyDigitNewickCoordinat
{
  ManyDigitNewickCoordinat(const std::vector<int>& v);
  const std::vector<int>& ToVector() const { return m_v; }
  bool AllSimple(const int threshold_index) const;

  bool IsSorted() const;
  bool IsValid() const;

  private:
  const std::vector<int> m_v;

  static const std::vector<int> Sort(std::vector<int> v);

};

} //~namespace ribi

#endif // MANYDIGITNEWICKCOORDINAT_H

 

 

 

 

 

./CppManyDigitNewick/manydigitnewickcoordinat.cpp

 

//---------------------------------------------------------------------------
/*
ManyDigitNewick, Newick class
Copyright (C) 2011-2015 Richel Bilderbeek

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program.If not, see <http://www.gnu.org/licenses/>.
*/
//---------------------------------------------------------------------------
//From http://www.richelbilderbeek.nl/CppManyDigitNewick.htm
//---------------------------------------------------------------------------
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Weffc++"
#include "manydigitnewickcoordinat.h"

#include <algorithm>
#include <cassert>
#include <functional>

#include <boost/numeric/conversion/cast.hpp>
#pragma GCC diagnostic pop

ribi::ManyDigitNewickCoordinat::ManyDigitNewickCoordinat(
  const std::vector<int>& v)
  : m_v(Sort(v))
{
  assert(IsValid()  && "Assume all indices in a ManyDigitCoordinat are positive");
  assert(IsSorted() && "Assume that all indices are sorted");
}

///AllSimple returns whether it contains simple indices only.
///ManyDigitNewickIndexer::m_reserved determines this:
///a value of '7' might be the Newick frequency of '7' if
///threshold_index equals '9'. If the threshold_index equals '5', however,
///'7' might be a summarization of '(2,2)'.
bool ribi::ManyDigitNewickCoordinat::AllSimple(const int threshold_index) const
{
  return
    std::count_if(
      m_v.begin(),
      m_v.end(),
      std::bind2nd(std::less<int>(),threshold_index))
    == boost::numeric_cast<int>(m_v.size());

}

///IsSorted checks if a std::vector is sorted.
///From http://www.richelbilderbeek.nl/CppIsSorted.htm
bool ribi::ManyDigitNewickCoordinat::IsSorted() const
{
  return std::adjacent_find(
    m_v.begin(),
    m_v.end(),
    std::greater<int>()) == m_v.end();
}

///IsValid returns if all indices are valid, that is, positive and
///non-zero.
bool ribi::ManyDigitNewickCoordinat::IsValid() const
{
  return
    std::count_if(
      m_v.begin(),
      m_v.begin(),
      std::bind2nd(std::less_equal<int>(),0))
    == 0;

}

const std::vector<int> ribi::ManyDigitNewickCoordinat::Sort(std::vector<int> v)
{
  std::sort(std::begin(v),std::end(v));
  return v;
}

 

 

 

 

 

./CppManyDigitNewick/manydigitnewickderivative.h

 

//---------------------------------------------------------------------------
/*
ManyDigitNewick, Newick class
Copyright (C) 2011-2015 Richel Bilderbeek

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program.If not, see <http://www.gnu.org/licenses/>.
*/
//---------------------------------------------------------------------------
//From http://www.richelbilderbeek.nl/CppManyDigitNewick.htm
//---------------------------------------------------------------------------
#ifndef MANYDIGITDERIVATIVE_H
#define MANYDIGITDERIVATIVE_H

namespace ribi {

///ManyDigitDerivative contains
///the index of the derived phylogeny
///and the value that must be changed
///to get there. For example, if for a
///complex phylogeny (of unknown index)
///a '3' must be changed to a '2' to get
///to the phylogeny with index 42,
///ManyDigitDerivatives has an
///m_derived_index of 42 and an
///m_value_changed of 3.
///For calculations it is important to know which value
///has changed, sometimes with another.
///If no other value changes, set m_other_value_changed to zero.
///Some examples of using m_value_changed and m_other_value_changed:\n
///(1,1) ->   (2), then m_value_changed == 1 && m_other_value_changed == 1\n
///(1,2) ->   (3), then m_value_changed == 1 && m_other_value_changed == 2\n
///(1,2) -> (1,1), then m_value_changed == 2 && m_other_value_changed == 0\n
///(1,3) ->   (4), then m_value_changed == 1 && m_other_value_changed == 3\n
///(1,3) -> (1,2), then m_value_changed == 3 && m_other_value_changed == 0\n
///(2,3) -> (1,3), then m_value_changed == 2 && m_other_value_changed == 0\n
///(2,3) -> (2,2), then m_value_changed == 3 && m_other_value_changed == 0\n
///A valid ManyDigitDerivative has either:\n
///- m_value_changed == 1 && m_other_value_changed  > 0\n
///- m_value_changed  > 1 && m_other_value_changed == 0\n
///This is checked at the constructor.\n
///Because there are at highest two values that change, for both a\n
///TwoDigitNewick and ManyDigitNewick, the same Derivative can be used.
struct ManyDigitNewickDerivative
{
  ManyDigitNewickDerivative(
    const int derived_index,
    const int value_changed,
    const int other_value);

  int m_derived_index;
  int m_value_changed;
  int m_other_value_changed;
};

} //~namespace ribi

#endif // MANYDIGITDERIVATIVE_H

 

 

 

 

 

./CppManyDigitNewick/manydigitnewickderivative.cpp

 

//---------------------------------------------------------------------------
/*
ManyDigitNewick, Newick class
Copyright (C) 2011-2015 Richel Bilderbeek

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program.If not, see <http://www.gnu.org/licenses/>.
*/
//---------------------------------------------------------------------------
//From http://www.richelbilderbeek.nl/CppManyDigitNewick.htm
//---------------------------------------------------------------------------


#include "manydigitnewickderivative.h"

#include <cassert>

ribi::ManyDigitNewickDerivative::ManyDigitNewickDerivative(
  const int derived_index,
  const int value_changed,
  const int other_value_changed)
  : m_derived_index(derived_index),
    m_value_changed(value_changed),
    m_other_value_changed(other_value_changed)
{
  assert(m_derived_index > 0);
  assert(m_value_changed > 0);
  assert( (m_value_changed == 1 && m_other_value_changed  > 0)
       || (m_value_changed  > 1 && m_other_value_changed == 0) );
}

 

 

 

 

 

./CppManyDigitNewick/manydigitnewickindexer.h

 

//---------------------------------------------------------------------------
/*
ManyDigitNewick, Newick class
Copyright (C) 2011-2015 Richel Bilderbeek

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program.If not, see <http://www.gnu.org/licenses/>.
*/
//---------------------------------------------------------------------------
//From http://www.richelbilderbeek.nl/CppManyDigitNewick.htm
//---------------------------------------------------------------------------
#ifndef MANYDIGITNEWICKINDEXER_H
#define MANYDIGITNEWICKINDEXER_H

#include <cassert>
#include <vector>

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Weffc++"
#pragma GCC diagnostic ignored "-Wunused-local-typedefs"
#include "manydigitnewick.h"
//#include "manydigitnewickcoordinat.h"
#include "manydigitnewickderivative.h"
#include "manydigitnewickindextable.h"
#include "manydigitnewicks.h"
#include "multivector.h"
#include "newickvector.h"
#pragma GCC diagnostic pop

namespace ribi {

///ManyDigitNewickIndexer converts any newick to (X,Y,...)
struct ManyDigitNewickIndexer
{
  typedef ManyDigitNewickDerivative Derivative;

  //ManyDigitNewickIndexer constructor does all the work
  ManyDigitNewickIndexer(const NewickVector& n, const double theta);

  ///ConstructNewick constructs a full NewickVector from
  ///the ManyDigitNewick at index i.
  ///ConstructNewick is for debugging purposes only,
  ///as it's the idea to work with non-full determined
  ///(that is: two-digit) Newicks
  const NewickVector ConstructNewick(const int i) const;

  int GetCalculatedToIndex() const { return m_calculated_to_index; }
  ///GetData allows a peek at the data
  const ManyDigitIndexTable& GetIndexTable() const
  {
    return m_index_table;
  }
  const ManyDigitNewick& GetNewick(const int i) const
  {
    return m_newicks.GetNewick(i);
  }
  const ManyDigitNewick& GetNewick(const std::vector<int>& indices) const;
  const ManyDigitNewicks& GetNewicks() const
  {
    return m_newicks;
  }
  ///GetProbability returns the probability of the NewickVector
  ///given at the constructor
  double GetProbability() const;

  int GetReserved() const { return m_reserved; }


  ///After a leaf has been cut, then
  void TryToCalculateNewNewicks();
  void UpdateCalculatedFromIndex();


  private:
  ManyDigitIndexTable m_index_table;
  ManyDigitNewicks m_newicks;
  const double m_theta;
  const int m_reserved;
  int m_current_index;
  int m_calculated_to_index;

  ///m_probability is the probability of the given Newick.
  ///m_probability is calculated in the constructor of ManyDigitNewickIndexer
  ///and can be obtained by GetProbability.
  double m_probability;

  ///Calculate the Newick probability of Newick (a,b).
  ///Both a and b are simple, that is: no index
  ///for a complex Newick
  //double CalculateEwensProbability(
  //  const int a,const int b) const;

  ///CalculateReserved calculates the index
  ///that must be reserver
  int CalculateReserved(const NewickVector& n) const;

  const ManyDigitNewick CreateManyDigitDerivatives(
    const ManyDigitNewickCoordinat& coordinat,
    const int sum_above_zero,
    const int sum_above_one);

  ///GetDeltaSumAboveZero calculates the delta in the
  ///ManyDigitNewick::m_sum_above_zero of a new Newick
  ///when an old_value is changed.
  int GetDeltaSumAboveZero(const int old_value) const;

  ///GetDeltaSumAboveOne calculates the delta in the
  ///ManyDigitNewick::m_sum_above_one of a new Newick
  ///when an old_value is changed.
  int GetDeltaSumAboveOne(const Derivative& d) const;
  //int GetDeltaSumAboveOne(const int old_value) const;


  ///IsSimple determines if a Newick at index i is simple.
  ///A Newick is simple if F=(X,Y) where X and Y are
  ///less than the reserved index
  bool IsSimple(const int i) const;

  ///Feed the (x,y) indices of a Newick and obtain the
  ///summarized index.
  ///Feed also obtains the derived Newicks.
  ///Note that Feed must be fed simple Newicks first
  ///int Feed(const int x, const int y)
  int SummarizeNewick(
    const ManyDigitNewickCoordinat& coordinat,
    const int sum_above_zero,
    const int sum_above_one);

  ///TryToCalculateNewNewick tries to calculate the probability
  ///of Newick with index i
  void TryToCalculateNewNewick(const int i);

  //static bool IsSorted(const std::vector<int>& v);
};

} //~namespace ribi

#endif //MANYDIGITNEWICKINDEXER_H

 

 

 

 

 

./CppManyDigitNewick/manydigitnewickindexer.cpp

 

//---------------------------------------------------------------------------
/*
ManyDigitNewick, Newick class
Copyright (C) 2011-2015 Richel Bilderbeek

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program.If not, see <http://www.gnu.org/licenses/>.
*/
//---------------------------------------------------------------------------
//From http://www.richelbilderbeek.nl/CppManyDigitNewick.htm
//---------------------------------------------------------------------------
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Weffc++"
#pragma GCC diagnostic ignored "-Wunused-local-typedefs"
#include "manydigitnewickindexer.h"

#include <algorithm>
#include <cassert>
#include <iostream>
#include <stack>


#include <boost/numeric/conversion/cast.hpp>

#include "newick.h"
#include "newickvector.h"

#pragma GCC diagnostic pop

//ManyDigitNewickIndexer constructor does all the work
ribi::ManyDigitNewickIndexer::ManyDigitNewickIndexer(
  const NewickVector& n,
  const double theta)
  : m_newicks(CalculateReserved(n),theta),
    m_theta(theta),
    m_reserved(CalculateReserved(n)),
    m_probability(-1.0)
{
  m_current_index = m_reserved;
  m_calculated_to_index = m_reserved;

  assert(m_reserved == m_newicks.Size());
  assert(m_current_index == m_newicks.Size());

  //If the Newick is simple
  if (Newick().IsSimple(n.Peek()))
  {
    //Calculate the Ewens probability only
    m_probability = Newick().CalcProbabilitySimpleNewick(n.Peek(),theta);
    return;
  }

  //Assume all reserved Newicks from index 2 are complete
  #ifndef NDEBUG
  {
    const int sz = m_newicks.Size();
    for (int i=2; i!=sz; ++i)
    {
      assert(GetNewick(i).IsComplete());
    }
  }
  #endif

  //Create all Newicks and derivatives, but do not calculate their
  //probabilities yet
  std::vector<int> v = n.Peek();
  #define DEBUG_DO_THIS_ONLY_ONE
  #ifndef DEBUG_DO_THIS_ONLY_ONE
  while(v.size()>2) //Find a leaf and cut it until the string is empty
  #else
  //Only once!
  #endif
  {
    //Find a leaf
    //Find index i (starting opening bracket) and j (closing bracket)
    const std::size_t sz = v.size();
    std::size_t i = 0;
    std::size_t j = 0;
    for (i=0 ; i!=sz; ++i) //Index of opening bracket
    {
      if (v[i]!=Newick::bracket_open) continue;
      for (j=i+1; j!=sz; ++j)
      {
        if (v[j]==Newick::bracket_open) { j = 0; break; }
        if (v[j]!=Newick::bracket_close) continue;
        break;
      }
      if (j == 0) continue; //j cannot be 0 after previous for loop
      break;
    }
    //Find simplest leaf
    assert(v[i]==Newick::bracket_open);
    assert(v[j]==Newick::bracket_close);
    std::vector<int> v_new(v.begin(),v.begin() + i);
    //assert(Newick::IsNewick(v_new));
    //const int x = v[i+1];
    //const int y = v[i+2];

    const std::vector<int> values(
      v.begin() + i + 1,
      v.begin() + j);
    int saz = 0;
    int sao = 0;
    #ifndef DEBUG_SKIP_SAZ_AND_SAO
    for(const int value: values)
    {
      TRACE(boost::lexical_cast<std::string>(value));
      assert(value < 2 || this->GetNewick(value).IsComplete());
      //assert(!this->GetNewick(value).Empty());
      saz+=this->GetNewick(value).GetSumTermsAboveZero();
      sao+=this->GetNewick(value).GetSumTermsAboveOne();
    }
    //Feed it and obtain simpler index
    //Indices 0 and 1 cannot be complete, because
    //they don't have sensible derivates to point to
    assert(saz >  0);
    assert(sao >= 0);
    #endif
    v_new.push_back(SummarizeNewick(values,saz,sao));
    //Replace leaf with simpler index
    std::copy(v.begin() + j + 1, v.end(),std::back_inserter(v_new));
    v = v_new;
  }

  #define DEBUG_ONLY_INITIAL_INDICES_FIRST
  #ifndef DEBUG_ONLY_INITIAL_INDICES_FIRST
  //Now all Newicks are created, but do not have their probabilities calculated
  const int sz = m_newicks.Size();
  //m_calculated_to_index == sz denotes that all Newicks'
  //probabilities are calculated
  while (m_calculated_to_index != sz)
  {
    //Try to calculate if new Newicks can be solved
    this->TryToCalculateNewNewicks();
    //If all is well, there will be new Newicks known
    this->UpdateCalculatedFromIndex();
    //When no new probabilities are calculated,
    //all Newicks are solved
  }
  #endif
}

int ribi::ManyDigitNewickIndexer::CalculateReserved(const NewickVector& n) const
{
  const std::vector<int>& v = n.Peek();
  //Count the number of elements
  const int n_elements
    = std::count_if(v.begin(),v.end(),
      std::bind2nd(std::greater<int>(),0));
  const int max_element = *std::max_element(std::begin(v),std::end(v));
  //\todo: +1 needed?
  return n_elements + max_element + 1;
}

///ConstructNewick constructs a full NewickVector from
///the ManyDigitNewick at index i.
///ConstructNewick is for debugging purposes only,
///as it's the idea to work with non-full determined
///(that is: two-digit) Newicks
const ribi::NewickVector ribi::ManyDigitNewickIndexer::ConstructNewick(const int i) const
{
  std::vector<int> v;

  if (i < m_reserved)
  {
    v.push_back(Newick::bracket_open);
    //Newick '(0)' is not valid, so fake it as '(1)'
    v.push_back(i == 0 ? 1 : i);
    v.push_back(Newick::bracket_close);
    NewickVector n(v);
    return n;
  }

  //Search for index i in Indextable to get two digits
  {
    const std::vector<int> p = m_index_table.Find(i).ToVector();
    v.push_back(Newick::bracket_open);
    std::copy(p.begin(),p.end(),std::back_inserter(v));
    v.push_back(Newick::bracket_close);
  }
  assert(Newick().IsNewick(v));

  //As long as there are not only reserved (that is: simple)
  //values in v, replace those by their simplers
  while (1)
  {
    const std::vector<int>::iterator i
      = std::find_if(v.begin(),v.end(),
          std::bind2nd(std::greater_equal<int>(),m_reserved));
    if (i== v.end()) break;
    assert(*i >= m_reserved);
    //Create a new std::vector from the v's begin to i
    std::vector<int> v_new( v.begin(),i);
    v_new.push_back(Newick::bracket_open);
    const std::vector<int> p = m_index_table.Find(*i).ToVector();
    assert(p.size() == 2 && "For now...");
    v_new.push_back(p[0]);
    v_new.push_back(p[1]);
    v_new.push_back(Newick::bracket_close);
    //Copy the remainder of v (from after i) to v_new
    std::copy(i+1,v.end(),std::back_inserter(v_new));
    std::swap(v,v_new);
    assert(Newick().IsNewick(v));
  }
  NewickVector n(v);
  return n;
}

///Create a ManyDigitNewick (to be stored at 'coordinat')
///by constructing its derivatives, SAZ and SAO.
///
///Note that 'coordinat' serves two
///purposes in this context: the coordinat { 5,6,7 } denotes a
///trinary Newick, consisting of the simple or summarized indices
///'5', '6' and '7'. If all three indices are simple (that is, below m_reserved),
///the coordinat { 5,6,7 } denotes the Newick '(5,6,7)'. If, for example all three
///indices are complex (that is, equal of greater than m_reserved), the coordinat
///{ 5,6,7 } might denote the Newick '((1,1),(1,2),(1,3))'.
///
///Allow for recursion
const ribi::ManyDigitNewick ribi::ManyDigitNewickIndexer::CreateManyDigitDerivatives(
  const ManyDigitNewickCoordinat& coordinat, //two purposes!
  const int sum_above_zero,
  const int sum_above_one)
{
  assert(sum_above_zero >= 0);
  assert(sum_above_one >= 0);
  assert(coordinat.IsSorted());
  assert(coordinat.IsValid());

  const std::vector<int>& indices = coordinat.ToVector();
  const int n_indices = boost::numeric_cast<int>(indices.size());

  std::vector<ManyDigitNewickDerivative> ds;

  for(int i=0;i!=n_indices;++i)
  {
    #ifndef NDEBUG
    const int f = indices[i];
    #endif

    assert((f==1 || !m_newicks.Empty(f)) && "Assume each single/loose value/frequency exists");

    //Obtain this single/loose Newick
    const ManyDigitNewick n = m_newicks.GetNewick( indices[i]);
    //Of this single/loose Newick, obtain its derivatives
    //and chain those with the other members of the complete Newick/Coordinat
    for(const ManyDigitNewickDerivative& d: n.GetDerivatives())
    {
      #ifndef DEBUG_SKIP_SAZ_AND_SAO
      //dsaz = delta sum above zero
      const int dsaz = GetDeltaSumAboveZero(d.m_value_changed);
      //dsao = delta sum above one
      const int dsao = GetDeltaSumAboveOne(d);
      //saz = sum above zero
      const int saz = sum_above_zero + dsaz;
      //sao = sum above one
      const int sao = sum_above_one + dsao;

      assert(saz >  0);
      assert(sao >= 0);
      #else
      const int saz = 0;
      const int sao = 0;
      #endif

      std::vector<int> new_indices = indices;
      new_indices[i] = d.m_derived_index;

      const int d_i = SummarizeNewick(new_indices,saz,sao);

      const int value_changed = d.m_value_changed;
      ///\todo: check that guess is right, that using
      ///i.m_other_value_changed is better than '0'
      const int other_value_changed = d.m_other_value_changed;

      ds.push_back(ManyDigitNewickDerivative(d_i,value_changed,other_value_changed));
    }
  }
  return ManyDigitNewick(ds,sum_above_zero,sum_above_one);

  /*
  if (IsSimple(x))
  {
    if (IsSimple(y))
    {
      return
        CreateManyDigitDerivativesSimpleSimple(x,y);
    }
    return
      CreateManyDigitDerivativesSimpleComplex(
        x,y,sum_above_zero,sum_above_one);
  }
  if (IsSimple(y))
  {
    return
      CreateManyDigitDerivativesSimpleComplex(
        y,x,sum_above_zero,sum_above_one);
  }
  return
    CreateManyDigitDerivativesComplexComplex(
      x,y,sum_above_zero,sum_above_one);
  */
}

/*
const ManyDigitNewick
  ribi::ManyDigitNewickIndexer::CreateManyDigitDerivativesSimpleSimple(
  const int x, const int y)
{
  assert(x <= y);
  assert(x <= m_reserved && y <= m_reserved);
  assert(IsSimple(x) && IsSimple(y));


  if (x==1 && y==1)
  {
    //(1,1) -> 2
    //'2' has reserved index 2
    std::vector<ManyDigitDerivative> v;
    const int derived_index = 2;
    const int value_changed = 1;
    const int other_value_changed = 1;
    //Add this derivative twice, because there are two ways
    //to change (1,1) -> 2
    v.push_back(
      ManyDigitDerivative(
        derived_index,value_changed,other_value_changed));
    v.push_back(
      ManyDigitDerivative(
        derived_index,value_changed,other_value_changed));
    //saz = sum above zero
    const int saz = 2;
    //sao = sum above one
    const int sao = 0;
    ManyDigitNewick n(v,saz,sao);
    return n;
  }
  else if (x==1)
  {
    //(1,y) -> { (1,y-1), (y+1) }
    //'1','y' and 'y-1' are reserved indices
    assert(x == 1);
    assert(y > 1 && y < m_reserved);
    std::vector<ManyDigitDerivative> v;
    //Create (1,y-1)
    {
      //saz = sum above zero
      const int saz = x + y - 1;
      //sao = sum above one
      const int sao = (y - 1 == 1 ? 0 : y - 1);
      assert(saz >  0);
      assert(sao >= 0);
      const int d_i = SummarizeNewick( { x,y-1 } ,saz,sao);
      const int value_changed = y;
      const int other_value_changed = 0; //<Only y changed
      v.push_back(ManyDigitDerivative(d_i,value_changed,other_value_changed));
    }
    //Create (y+1)
    {
      ///\todo: this
      const int d_i = y + 1;
      const int value_changed = 1;       //Note the reversal of 1 and y
      const int other_value_changed = y; //Note the reversal of 1 and y
      v.push_back(ManyDigitDerivative(d_i,value_changed,other_value_changed));
    }
    ManyDigitNewick n( v,x+y,y);
    return n;
  }
  else
  {
    //(x,y) -> { (x-1,y), (x,y-1) }
    //'y','y-1','x' and 'x-1' are reserved indices
    assert(x > 1 && x < m_reserved);
    assert(y > 1 && y < m_reserved);

    std::vector<ManyDigitDerivative> v;

    //saz = sum above zero
    const int saz = x + y - 1;
    //sao = sum above one
    const int sao_left  = (x - 1 == 1 ? x + y - 2 : x + y - 1);
    const int sao_right = (y - 1 == 1 ? x + y - 2 : x + y - 1);

    assert(saz       >  0);
    assert(sao_left  >= 0);
    assert(sao_right >= 0);
    //Derive (x-1,y)
    {
      const int d_i_left  = SummarizeNewick( { x-1,y } ,saz,sao_left );
      const int value_changed_left = x;
      const int other_value_changed_left = 0; //<Only x changed
      v.push_back(
        ManyDigitDerivative(
          d_i_left,value_changed_left,other_value_changed_left));
    }
    //Derive (x,y-1)
    {
      const int d_i_right = SummarizeNewick( { x,y-1 } ,saz,sao_right);
      const int value_changed_right = y;
      const int other_value_changed_right = 0; //<Only y changed
      v.push_back(
        ManyDigitDerivative(
          d_i_right,value_changed_right,other_value_changed_right));
      ManyDigitNewick n(v,x+y,x+y);
      return n;
    }
  }
}
*/

/*
const ManyDigitNewick ribi::ManyDigitNewickIndexer::CreateManyDigitDerivativesSimpleComplex(
  const int x, const int y,
  const int sum_above_zero,
  const int sum_above_one)
{
  assert(IsSimple(x) && !IsSimple(y));

  std::vector<ManyDigitDerivative> v;
  assert(x>=0);
  if (x>1)
  {
    assert(x > 1);
    assert(y >= m_reserved && "So cannot work with y-1");
    //saz = sum above zero
    const int saz = sum_above_zero - 1;
    assert(saz >= 0);
    //sao = sum above one
    const int sao = (x - 1 == 1 ? sum_above_one - 2 : sum_above_one - 1);

    assert(saz >  0);
    assert(sao >= 0);

    const int i = SummarizeNewick( { x-1,y } ,saz,sao);
    assert(i < m_newicks.Size());

    const int value_changed = x;
    const int other_value_changed = 0; //<Only x changed
    v.push_back(ManyDigitDerivative(i,value_changed,other_value_changed));
  }
  assert(y < m_newicks.Size());
  assert(!m_newicks.Empty(y));
  //\todo: Find out why 'const ManyDigitDerivativesData&' does not work
  const ManyDigitNewick v_derived = m_newicks.GetNewick(y);
  for(const ManyDigitDerivative& i: v_derived.GetDerivatives())
  {
    assert(i.m_derived_index < m_newicks.Size()
      && "ManyDigitDerivative index must be smaller than the number of derivatives");

    //dsaz = delta sum above zero
    const int dsaz = GetDeltaSumAboveZero(i.m_value_changed);
    //dsao = delta sum above one
    const int dsao = GetDeltaSumAboveOne(i);
    //saz = sum above zero
    const int saz = sum_above_zero + dsaz;
    //sao = sum above one
    const int sao = sum_above_one + dsao;

    assert(saz >  0);
    assert(sao >= 0);

    const int d_i = SummarizeNewick( { x,i.m_derived_index } ,saz,sao);

    const int value_changed = i.m_value_changed;
    ///\todo: check that guess is right, that using
    ///i.m_other_value_changed is better than '0'
    const int other_value_changed = i.m_other_value_changed;
    v.push_back(ManyDigitDerivative(d_i,value_changed,other_value_changed));
  }
  ManyDigitNewick n( v,sum_above_zero,sum_above_one);
  return n;
}
*/

/*
const ManyDigitNewick
  ribi::ManyDigitNewickIndexer::CreateManyDigitDerivativesComplexComplex(
  const int x, const int y,
  const int sum_above_zero,
  const int sum_above_one)
{
  assert(!IsSimple(x) && !IsSimple(y));
  std::vector<ManyDigitDerivative> v;

  //Get (X',Y)
  {
    assert(x < boost::numeric_cast<int>(m_newicks.Size()));
    assert(!m_newicks.Empty(x));
    //\todo: Find out why 'const ManyDigitDerivativesData&' does not work
    const ManyDigitNewick v_derived = m_newicks.GetNewick(x);
    for(const ManyDigitDerivative& i: v_derived.GetDerivatives())
    {
      //dsaz = delta sum above zero
      const int dsaz = GetDeltaSumAboveZero(i.m_value_changed);
      //dsao = delta sum above one
      const int dsao = GetDeltaSumAboveOne(i);
      //saz = sum above zero
      const int saz = sum_above_zero + dsaz;
      //sao = sum above one
      const int sao = sum_above_one + dsao;

      assert(saz >  0);
      assert(sao >= 0);

      const int d_i = SummarizeNewick( { y,i.m_derived_index } ,saz,sao);

      const int value_changed = i.m_value_changed;
      ///\todo: check that guess is right, that using
      ///i.m_other_value_changed is better than '0'
      const int other_value_changed = i.m_other_value_changed;

      v.push_back(ManyDigitDerivative(d_i,value_changed,other_value_changed));
    }
  }
  //Get (X,Y')
  {
    assert(y < boost::numeric_cast<int>(m_newicks.Size()));
    assert(!m_newicks.Empty(y));
    //\todo: Find out why 'const ManyDigitDerivativesData&' does not work
    const ManyDigitNewick v_derived = m_newicks.GetNewick(y);
    for(const ManyDigitDerivative& i: v_derived.GetDerivatives())
    {
      //dsaz = delta sum above zero
      const int dsaz = GetDeltaSumAboveZero(i.m_value_changed);
      //dsao = delta sum above one
      const int dsao = GetDeltaSumAboveOne(i);
      //saz = sum above zero
      const int saz = sum_above_zero + dsaz;
      //sao = sum above one
      const int sao = sum_above_one + dsao;

      assert(saz >  0);
      assert(sao >= 0);

      const int d_i = SummarizeNewick( { x,i.m_derived_index } ,saz,sao);
      const int value_changed = i.m_value_changed;
      ///\todo: check that guess is right, that using
      ///i.m_other_value_changed is better than '0'
      const int other_value_changed = i.m_other_value_changed;

      v.push_back(ManyDigitDerivative(d_i,value_changed,other_value_changed));
    }
  }
  ManyDigitNewick n(v,sum_above_zero,sum_above_one);
  return n;
}
*/

///GetDeltaSumAboveZero calculates the delta in the
///ManyDigitNewick::m_sum_above_zero of a new Newick
///when an old_value is changed.
int ribi::ManyDigitNewickIndexer::GetDeltaSumAboveZero(const int old_value) const
{
  assert(old_value > 0);
  return (old_value == 1 ? 0 : -1);
}

///GetDeltaSumAboveOne calculates the delta in the
///ManyDigitNewick::m_sum_above_one of a new Newick
///when an old_value is changed.
int ribi::ManyDigitNewickIndexer::GetDeltaSumAboveOne(
  const ManyDigitNewickDerivative& d) const
{
  const int x = d.m_value_changed;
  //std::clog << "GetDeltaSumAboveOne for x " << x << '\n';
  assert(x > 0);
  if (x  > 2) return -1;
  if (x == 2) return -2;
  assert(x == 1);
  return (d.m_other_value_changed == 1 ? 2 : 1);
}

const ribi::ManyDigitNewick& ribi::ManyDigitNewickIndexer::GetNewick(const std::vector<int>& indices) const
{
  assert(m_index_table.CanGetIndex(indices));
  const int i = m_index_table.GetIndex(indices);
  return GetNewick(i);
}

double ribi::ManyDigitNewickIndexer::GetProbability() const
{
  assert(m_probability >= 0.0);
  assert(m_probability <= 1.00001);

  #ifndef DEBUG_SKIP_SAZ_AND_SAO
  assert(m_calculated_to_index == m_newicks.Size()
    && "Assume calculation is completed");
  #endif
  return m_probability;
}

///IsSimple determines if an index is the index of
///a simple Newick
bool ribi::ManyDigitNewickIndexer::IsSimple(const int i) const
{
  return i < m_reserved;
}

///SummarizeNewick summarizes a Newick of any arity
///to a single index. Note that 'coordinat' serves two
///purposes in this context: the coordinat { 5,6,7 } denotes a
///trinary Newick, consisting of the simple or summarized indices
///'5', '6' and '7'. If all three indices are simple (that is, below m_reserved),
///the coordinat { 5,6,7 } denotes the Newick '(5,6,7)'. If, for example all three
///indices are complex (that is, equal of greater than m_reserved), the coordinat
///{ 5,6,7 } might denote the Newick '((1,1),(1,2),(1,3))'.
///Allow for recursion
int ribi::ManyDigitNewickIndexer::SummarizeNewick(
  const ManyDigitNewickCoordinat& coordinat, //two purposes!
  const int sum_above_zero,
  const int sum_above_one)
{
  #ifndef DEBUG_SKIP_SAZ_AND_SAO
  assert(sum_above_zero >  0);
  assert(sum_above_one  >= 0);
  #endif

  //Coordinat already assume all values are non-zero
  //and positive.
  assert(coordinat.IsSorted()
    && "To prevent duplicates, Coordinat is sorted");

  //If index is known, return the index
  if (m_index_table.CanGetIndex(coordinat))
  {
    const int i = m_index_table.GetIndex(coordinat);
    if (i) return i;
  }

  //A new coordinat is fed
  const int i = m_current_index;
  m_index_table.SetIndex(coordinat,i);

  ++m_current_index;

  assert(sum_above_zero >= 0);
  assert(sum_above_one  >= 0);

  ManyDigitNewick n = CreateManyDigitDerivatives(
    coordinat,sum_above_zero,sum_above_one);

  //Check if the Newick is simple
  //and its probability can be calculated with
  //the Ewens formula
  //This can be done if all indices are reserved
  //OLD: if (x < m_reserved && y < m_reserved)
  if (coordinat.AllSimple(m_reserved))
  {
    std::vector<int> v;
    v.push_back(Newick::bracket_open);
    std::copy(coordinat.ToVector().begin(),coordinat.ToVector().end(),std::back_inserter(v));
    v.push_back(Newick::bracket_close);
    n.SetProbability(Newick().CalcProbabilitySimpleNewick(v,m_theta));
    //If the user requests simple Newicks to be solved,
    //perhaps the requested probability has just been calculated
    m_probability = n.GetProbability();
  }
  m_newicks.SetNewick(i,n);
  return i;
}

///TryToCalculateNewNewick tries to calculate the probability
///of Newick with index i
void ribi::ManyDigitNewickIndexer::TryToCalculateNewNewick(const int i)
{
  assert(i >= m_calculated_to_index);
  assert(i  < m_newicks.Size());
  ///\todo: why cannot use 'const ManyDigitNewick&'?
  ///\bug: this seems to give memory problems (and funny output)
  const ManyDigitNewick n = this->GetNewick(i);
  //Remember: m_calculated_to_index is not increased
  if (n.IsProbabilityKnown())
  {
    //Already know the probability
    return;
  }
  const std::vector<Derivative> derivatives = n.GetDerivatives();
  //Check if of all derivates the probability is known
  for(const Derivative& derivative: derivatives)
  {
    if (!GetNewick(derivative.m_derived_index).IsProbabilityKnown())
    {
      //Too bad, derived Newick's probability is unknown
      return;
    }
  }
  ///Calculate constants and probability
  double p = 0.0;
  //The denominator is that of the focal Newick,
  //and not its derivative(s)
  const double denominator = GetNewick(i).GetDenominator();
  for(const Derivative& derivative: derivatives)
  {
    assert(GetNewick(derivative.m_derived_index).IsProbabilityKnown());

    const double c
      = (derivative.m_value_changed == 1
      ? m_theta
      : boost::numeric_cast<double>(derivative.m_value_changed) * boost::numeric_cast<double>(derivative.m_value_changed - 1))
      / denominator;
    const double p_this
      = c * GetNewick(derivative.m_derived_index).GetProbability();
    p+=p_this;
  }
  //std::clog << "Index " << i
  //  << " has been calculated to have the probability of "
  //  << p << '\n';
  m_probability = p;
  this->m_newicks.SetNewickProbability(i,p);
}

///TryToCalculateNewNewicks tries to calculate new Newick probabilities
///\warning: m_calculated_to_index is not increased
void ribi::ManyDigitNewickIndexer::TryToCalculateNewNewicks()
{
  const int sz = m_newicks.Size();
  //m_calculated_to_index == sz denotes that all Newicks'
  //probabilities are calculated
  assert(m_calculated_to_index <= sz);
  for (int i = m_calculated_to_index; i!=sz; ++i)
  {
    TryToCalculateNewNewick(i);
  }
}

void ribi::ManyDigitNewickIndexer::UpdateCalculatedFromIndex()
{
  const int sz = m_newicks.Size();
  while (m_calculated_to_index < sz)
  {
    assert(m_calculated_to_index < m_newicks.Size());
    ///\todo: find out why 'const ManyDigitNewick&' does not work
    const ManyDigitNewick n
      = m_newicks.GetNewick(m_calculated_to_index);
    if (n.IsProbabilityKnown())
    {
      //std::clog << "At index " << m_calculated_to_index
      //  << " the probability is known\n";
      ++m_calculated_to_index;
    }
    else
    {
      break;
    }
  }
}

 

 

 

 

 

./CppManyDigitNewick/manydigitnewickindextable.h

 

//---------------------------------------------------------------------------
/*
ManyDigitNewick, Newick class
Copyright (C) 2011-2015 Richel Bilderbeek

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program.If not, see <http://www.gnu.org/licenses/>.
*/
//---------------------------------------------------------------------------
//From http://www.richelbilderbeek.nl/CppManyDigitNewick.htm
//---------------------------------------------------------------------------
#ifndef MANYDIGITINDEXTABLE_H
#define MANYDIGITINDEXTABLE_H

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-local-typedefs"
#pragma GCC diagnostic ignored "-Weffc++"
#include <vector>
#include "multivector.h"
#include "manydigitnewickcoordinat.h"
#pragma GCC diagnostic push

namespace ribi {

///ManyDigitIndexTable manages (X,Y) -> index
struct ManyDigitIndexTable
{
  typedef ManyDigitNewickCoordinat Coordinat;

  ///CanGetIndex returns if GetIndex can be called with success
  bool CanGetIndex(const Coordinat& indices) const;

  ///Find returns the x-y-coordinats of the Newick with index i
  ///This is a linear (in this case, relatively time-intensive) member function.
  const Coordinat Find(const int i) const;

  ///Get returns the internals of ManyDigitIndexTable
  const MultiVector<int>& Get() const { return m_v; }

  ///GetIndex returns m_index_table[x][y]
  int GetIndex(const Coordinat& indices) const;

  ///GetNumAllocated calculates the number of indices allocated
  //int GetNumAllocated() const;
  ///GetNumUsed calculates the number of indices used
  //int GetNumUsed() const;
  ///Set (x,y) to index z
  void SetIndex(const Coordinat& coordinat, const int value);
  private:
  ///m_index_table is the index table that maps (x,y) to a value,
  ///so that m_index_table[x][y] equals that value
  MultiVector<int> m_v;


  static const std::vector<int> FindInternal(
    const std::vector<int>& indices,
    const MultiVector<int>& v,
    const int value);
};

} //~namespace ribi

#endif // MANYDIGITINDEXTABLE_H

 

 

 

 

 

./CppManyDigitNewick/manydigitnewickindextable.cpp

 

//---------------------------------------------------------------------------
/*
ManyDigitNewick, Newick class
Copyright (C) 2011-2015 Richel Bilderbeek

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program.If not, see <http://www.gnu.org/licenses/>.
*/
//---------------------------------------------------------------------------
//From http://www.richelbilderbeek.nl/CppManyDigitNewick.htm
//---------------------------------------------------------------------------


#include "manydigitnewickindextable.h"

#include <algorithm>
#include <cassert>
#include <stdexcept>
#include <iostream>
#include <sstream>


#include <boost/numeric/conversion/cast.hpp>

///CanGetData returns is GetData can be called with success
bool ribi::ManyDigitIndexTable::CanGetIndex(const Coordinat& coordinat) const
{
  return m_v.CanRetrieve(coordinat.ToVector());
}

const ribi::ManyDigitNewickCoordinat ribi::ManyDigitIndexTable::Find(
  const int value) const
{
  //Check indices
  {
    const auto begin = m_v.PeekIndices().begin();
    const auto end   = m_v.PeekIndices().end();
    const auto x = std::find(begin,end,value);
    if (x!=end)
    {
      const int index_found = std::distance(begin,x);
      assert(index_found >= 0);
      assert(index_found < boost::numeric_cast<int>(m_v.PeekIndices().size()));
      return Coordinat( { index_found } );
    }
  }

  //Check MultiVector
  const std::vector<MultiVector<int> >& mvs
    = m_v.PeekMultiVectors();
  const int size = mvs.size();
  for (int i=0; i!=size; ++i)
  {

    const auto result
      = FindInternal(
          std::vector<int>(1,i),
          mvs[i],
          value);
    if (!result.empty())
    {
      return Coordinat(result);
    }
  }

  assert(!"Should not get here");
  std::stringstream s;
  s << "Value " << value << " not found in ribi::ManyDigitIndexTable::Find";
  throw std::logic_error(s.str());
}

const std::vector<int> ribi::ManyDigitIndexTable::FindInternal(
  const std::vector<int>& indices,
  const MultiVector<int>& v,
  const int value)
{
  //Check indices
  {
    const auto begin = v.PeekIndices().begin();
    const auto end   = v.PeekIndices().end();
    const auto x = std::find(begin,end,value);
    if (x!=end)
    {
      const int index_found = std::distance(begin,x);
      assert(index_found >= 0);
      assert(index_found < boost::numeric_cast<int>(v.PeekIndices().size()));
      std::vector<int> result = indices;
      result.push_back(index_found);
      return result;
    }
  }

  //Check MultiVector
  const auto& mvs = v.PeekMultiVectors();
  const int size = mvs.size();
  for (int i=0; i!=size; ++i)
  {
    std::vector<int> indices_deeper = indices;
    indices_deeper.push_back(i);
    const auto result
      = FindInternal(
        indices_deeper,mvs[i],value);
    if (!result.empty()) return result;
  }
  return std::vector<int>();
}

///GetIndex returns m_index_table[x][y]
int ribi::ManyDigitIndexTable::GetIndex(const ManyDigitNewickCoordinat& indices) const
{
  assert(CanGetIndex(indices));
  return m_v.Retrieve(indices.ToVector());
}

///GetNumAllocated calculates the number of indices allocated
//int ribi::ManyDigitIndexTable::GetNumAllocated() const
//{
//  int n_allocated = 0;
//  for(const MultiVector<int>& v: m_v.PeekMultiVectors())
//  {
//    n_allocated+=boost::numeric_cast<int>(v.size());
//  }
//  return n_allocated;
//}

///GetNumUsed calculates the number of indices used
//int ribi::ManyDigitIndexTable::GetNumUsed() const
//{
//  int n_non_zero = 0;
//  for(const MultiVector<int>& v: m_v.PeekMultiVectors())
//  {
//    n_non_zero +=
//      std::count_if(
//        v.PeekIndices().begin(),
//        v.PeekIndices().end(),
//        std::bind2nd(std::greater<int>(),0));
//  }
//  return n_non_zero;
//}

///SetIndex sets m_index_table[x][y] == z
///and resized the m_index_table is necessary
void ribi::ManyDigitIndexTable::SetIndex(
  const ManyDigitNewickCoordinat& indices,
  const int value)
{
  assert(value > 0);
  assert(
    !m_v.CanRetrieve(indices.ToVector())
    || m_v.Retrieve(indices.ToVector()) == 0);
  m_v.Store(indices.ToVector(),value);
  assert(m_v.Retrieve(indices.ToVector()) == value);
}

 

 

 

 

 

./CppManyDigitNewick/manydigitnewicks.h

 

//---------------------------------------------------------------------------
/*
ManyDigitNewick, Newick class
Copyright (C) 2011-2015 Richel Bilderbeek

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program.If not, see <http://www.gnu.org/licenses/>.
*/
//---------------------------------------------------------------------------
//From http://www.richelbilderbeek.nl/CppManyDigitNewick.htm
//---------------------------------------------------------------------------
#ifndef MANYDIGITNEWICKS_H
#define MANYDIGITNEWICKS_H

#include <vector>

#include "manydigitnewick.h"

namespace ribi {

///ManyDigitNewicks manages ManyDigitManyDigitNewicks.
///ManyDigitManyDigitNewicks manage F'(index) -> { indices }
struct ManyDigitNewicks
{
  typedef ManyDigitNewickDerivative Derivative;

  ManyDigitNewicks(const int n_reserved, const double theta);
  ///Empty returns if m_v is empty
  ///Set a Newick at index i
  ///\warning: do not use push_back, because of recursive calls
  void SetNewick(const int i, const ManyDigitNewick& v);
  bool Empty() const { return m_v.empty(); }
  ///Empty returns if an index is empty
  bool Empty(const int i) const;
  ///Get returns the internals of ManyDigitNewicks
  const std::vector<ManyDigitNewick>& Get() const { return m_v; }
  ///GetNewick returns the Newick at index i.
  ///i is checked for its range.
  const ManyDigitNewick& GetNewick(const int i) const;
  ///Sets the derivatives of F(i) to { v }
  //void SetDerivatives(const int i,const ManyDigitNewick& v);
  ///Obtain the number of Derivatives

  ///SetNewickProbability sets the probability of the Newick
  ///at index i to p
  void SetNewickProbability(const int i,const double p);

  int Size() const;
private:
  ///m_derivatives contains for every indexed Newick
  ///its derived indexed ManyDigitNewicks
  std::vector<ManyDigitNewick> m_v;
};

} //~namespace ribi

#endif // MANYDIGITNEWICKS_H

 

 

 

 

 

./CppManyDigitNewick/manydigitnewicks.cpp

 

//---------------------------------------------------------------------------
/*
ManyDigitNewick, Newick class
Copyright (C) 2011-2015 Richel Bilderbeek

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program.If not, see <http://www.gnu.org/licenses/>.
*/
//---------------------------------------------------------------------------
//From http://www.richelbilderbeek.nl/CppManyDigitNewick.htm
//---------------------------------------------------------------------------
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Weffc++"
#include "manydigitnewicks.h"

#include <cassert>


#include <boost/numeric/conversion/cast.hpp>

#include "newick.h"
#include "manydigitnewickderivative.h"
#include "newickvector.h"

#pragma GCC diagnostic pop

ribi::ManyDigitNewicks::ManyDigitNewicks(const int n_reserved, const double theta)
  : m_v{}
{
  //Create derivatives of simplest and reserved ManyDigitNewicks

  //Create the Newick at index 0
  //this->SetNewick(0,ManyDigitNewick(std::vector<ManyDigitDerivative>(ManyDigitDerivative(),0,0));
  //Create the Newick at index 1
  this->SetNewick(1,ManyDigitNewick(std::vector<Derivative>(),1,0));

  //Let i start at 1, because '(0)' is no valid Newick
  for (int i=2; i!=n_reserved; ++i)
  {
    std::vector<Derivative> v;
    const int value_changed = i;
    const int other_value_changed = 0; //<There is no other value
    v.push_back(Derivative(i-1,value_changed,other_value_changed));

    //saz = sum above zero
    const int saz = i;
    //sao = sum above one
    const int sao = (i > 1 ? i : 0);

    ManyDigitNewick n(v,saz,sao);

    n.SetProbability(Newick().CalcProbabilitySimpleNewick(
      {
        static_cast<int>(Newick::bracket_open),
        i,
        static_cast<int>(Newick::bracket_close)
      },
      theta)
    );
    this->SetNewick(i,n);

    assert( (i < 2 || this->GetNewick(i).IsComplete())
     && "All newick with index >= 2 must be complete");
  }

  assert(Empty(1));
}

///Empty returns if an index is empty
bool ribi::ManyDigitNewicks::Empty(const int i) const
{
  assert(i >= 0);
  assert(i < this->Size());
  return m_v[i].Empty();
}

const ribi::ManyDigitNewick& ribi::ManyDigitNewicks::GetNewick(
    const int i) const
{
  //Check if i is in range
  assert(i>=0);
  assert(i < this->Size());
  //Check if returned indices are okay
  #ifndef NDEBUG
  const ManyDigitNewick& v = m_v[i];
  for(const Derivative& j: v.GetDerivatives())
  {
    assert(j.m_derived_index >= 0);
    assert(j.m_derived_index < this->Size() );
  }
  #endif
  return m_v[i];
}

void ribi::ManyDigitNewicks::SetNewick(const int i, const ManyDigitNewick& v)
{
  //Allocate storage
  //TODO: replace by push_back
  if (i >= boost::numeric_cast<int>(m_v.size()))
  {
    m_v.resize(i + 1);
    assert(m_v[m_v.size()-1].Empty());
  }
  //
  //std::clog << "Adding a Newick at index " << i << '\n';
  assert(m_v[i].Empty());

  #ifndef NDEBUG
  for(const Derivative& j: v.GetDerivatives())
  {
    assert(j.m_derived_index > 0);
    assert(j.m_derived_index < boost::numeric_cast<int>(m_v.size())
      && "Cannot set a derivative index "
         "bigger than the number of derivatives");
  }
  #endif

  m_v[i] = v;
}

void ribi::ManyDigitNewicks::SetNewickProbability(
  const int i,const double p)
{
  assert(i >= 0);
  assert(i  < Size());
  assert(p >= 0.0);
  assert(p <= 1.0);
  m_v[i].SetProbability(p);
}

int ribi::ManyDigitNewicks::Size() const
{
  return boost::numeric_cast<int>(m_v.size());
}

 

 

 

 

 

Go back to Richel Bilderbeek's C++ page.

Go back to Richel Bilderbeek's homepage.

 

Valid XHTML 1.0 Strict

This page has been created by the tool CodeToHtml