Go back to Richel Bilderbeek's homepage.

Go back to Richel Bilderbeek's C++ page.

 

 

 

 

 

(C++) HugeVector

 

HugeVector is a class similar to std::vector that, in theory can contain 18,446,744,073,709,551,616 elements. In practice, HugeVector is limited by the available memory. On my computer, a std::vector is already limited by the available memory, so I keep using std::vector.

 

 

 

 

 

Idea behind HugeVector

 

Assuming that memory is not a limiting factor, the size of a std::vector is limited by the maximum (unsigned) integer value: 2^32 = 4,294,967,296 = 4 billion. This integer value is used in the std::vector methods resize and the index operator. Perhaps a std::vector can become of a bigger size (using push_back), but one cannot retrieve the, for example, 5th billionth index directly (that is, without using iterators), because there is no integer that can hold such a value.

 

HugeVector takes the maximum size to 2^64 = 18,446,744,073,709,551,616 = 18 billion billion, by storing elements in a 2D std::vector: there is a std::vector that stores as much std::vectors as it can store (which is 2^32 = 4,294,967,296 = 4 billion). Each of these std::vectors stores as much elements as they can store (which is 2^32 = 4,294,967,296 = 4 billion).

 

To overcome the maximum integer value, the CLN data type cln::cl_I is used, which can store near-infinite-size integers.

 

 

 

 

 

System specifications

 

Operating system: Ubuntu 10.04 LTS Lucid Lynx

IDE: Qt Creator 2.0.0

Project type: GUI application

Compiler: G++ 4.4.1

Libraries used:

 

 

 

 

 

hugevector.h

 

#ifndef HUGEVECTOR_H
#define HUGEVECTOR_H
//---------------------------------------------------------------------------
#include <cassert>
#include <vector>
#include <boost/lexical_cast.hpp>
#include <boost/numeric/conversion/bounds.hpp>
#include <boost/numeric/conversion/cast.hpp>
#include <cln/cln.h>
//---------------------------------------------------------------------------
///IntToStrWithSep converts an integer to std::string
///and adds thousands seperators.
///From http://www.richelbilderbeek.nl/CppIntToStrWithSep.htm
const std::string IntToStrWithSep(cln::cl_I i);
//---------------------------------------------------------------------------
///SafeIntToCli converts an int to cln::cl_I safely.
///From http://www.richelbilderbeek.nl/CppSafeIntToCli.htm
const cln::cl_I SafeIntToCli(const int i);
//---------------------------------------------------------------------------
///HugeVector is a class for storing
template <class T>
struct HugeVector
{
  HugeVector(const cln::cl_I& sz)
    : m_max_index(cln::isqrt(sz)+1),
      m_cur_index(0)
  {
    std::clog << "HugeVector(" << sz << ")"
      << ": m_max_index(" << m_max_index << ")\n";
    assert(this->max_size() >= sz);
    //Only add an empty std::vector,
    //for HugeVector::push_back to work
    //(because it needs an existing m_v.back())
    m_v.push_back(std::vector<T>());

    assert(m_max_index < SafeIntToCli(m_v.max_size()));
    //std::clog << "m_v.max_size: " << m_v.max_size() << '\n';
  }
  const T& operator[](const cln::cl_I& i) const
  {
    assert(i >= 0);
    assert(i < m_max_index * m_max_index);

    //cln::cl_I_to_int will throw if cln::cl_I is bigger then the maximum
    //integer value. This method should not throw, however.
    //x = i / m_max_index
    const int x = cln::cl_I_to_int(cln::floor1(i,m_max_index));
    //y = i % m_max_index
    const int y = cln::cl_I_to_int(cln::mod(i,m_max_index));
    //std::clog << "const T& operator[" << i << "], "
    //  << "retrieves at (x,y) = (" << x << ',' << y << ")\n";
    assert(x < boost::numeric_cast<int>(m_v.size()));
    assert(y < boost::numeric_cast<int>(m_v[x].size()));
    return m_v[x][y];
  }

  T& operator[](const cln::cl_I& i)
  {
    //Calls the const version of operator[]
    //To avoid duplication in const and non-const member functions [1]
    return const_cast<T&>(const_cast<const HugeVector<T>& >(*this)[i]);
  }
  void push_back(const T& t)
  {
    //std::clog << "HugeVector::push_back\n";
    const int cur_sz = boost::numeric_cast<int>(m_v[m_cur_index].size());
    //std::clog << "Current size: " << cur_sz << '\n';
    assert(cur_sz < boost::numeric_cast<int>(m_v[m_cur_index].max_size()));
    if (cln::cl_I(cur_sz) == m_max_index)
    {
      //std::clog << "Add a std::vector\n";
      m_v.push_back(std::vector<T>());
      ++m_cur_index;
    }
    //const int index = boost::numeric_cast<int>(m_v.size()) - 1;
    assert(m_cur_index >= 0);
    //std::clog << "Push_back data at m_v["<< m_cur_index << "]\n";
    m_v[m_cur_index].push_back(t);

  }
  ///HugeVector<T>::reserve reserves at least i elements of space
  void reserve_all()
  {
    this->reserve(max_size());
  }
  void reserve(const cln::cl_I& i)
  {
    assert(i >= 0);
    std::clog << "HugeVector<T>::reserve(" << i << ")\n";
    //how many 1D std::vectors to reserve in?
    //j = 1 + (i / m_max_index)
    const int j = 1 + cln::cl_I_to_int(cln::floor1(i,m_max_index));
    std::clog << "Number of 1D std::vectors: " << j << '\n';
    //create space for those 1D std::vectors
    m_v.resize(j);
    //reserve in all those 1D std::vectors
    const int reserve_sz = cln::cl_I_to_int(m_max_index);
    //std::clog << "Reserved size in 1D std::vectors: " << reserve_sz << '\n';
    for (int i=0; i!=j; ++i)
    {
      //std::clog << "Reserving " << reserve_sz << " spaces in std::vector[" << i << "]\n";
      assert(i < boost::numeric_cast<int>(m_v.size()));
      assert(reserve_sz < boost::numeric_cast<int>(m_v[i].max_size()));
      //reserve maximum space
      m_v[i].reserve(reserve_sz);
    }
  }
  const cln::cl_I max_size() const
  {
    const cln::cl_I x = (m_max_index * m_max_index);
    const cln::cl_I y = x - cln::cl_I(1);
    std::clog << "HugeVector<T>::max_size() = " << y << '\n';
    return y;
  }

  private:
  //The maximum index a std::vector will/can be called from
  const cln::cl_I m_max_index;
  //m_cur_index is the index in m_v currently working on
  //(m_v.size() does not work, because in HugeVector::reserve
  // the 1D std::vectors must exist, to be able to reserve
  // memory for them).
  int m_cur_index;
  //The 2D std::vector containing all data
  std::vector<std::vector<T> > m_v;
};
//---------------------------------------------------------------------------
///m_max_index = 2 ^ 27 = 134217728
///m_max_index = 2 ^ 28 = 268435456
///m_max_index must be less than std::vector<T>::max_size
///Measured was 357913941, which equals 2 ^ 28.415
///template <class T>
///const cln::cl_I HugeVector<T>::m_max_index("134217728");
//---------------------------------------------------------------------------
///IntToStrWithSep converts an integer to std::string
///and adds thousands seperators.
///From http://www.richelbilderbeek.nl/CppIntToStrWithSep.htm
const std::string IntToStrWithSep(cln::cl_I i)
{
  std::string s
    = boost::lexical_cast<std::string>(cln::mod(i,10));
  i = cln::floor1(i,10);
  int d = 1;
  while (!cln::zerop(i))
  {
    s = boost::lexical_cast<std::string>(cln::mod(i,10))
      + (d % 3 == 0 ? "," : "")
      + s;
    i = cln::floor1(i,10);
    ++d;
  }
  return s;
}
//---------------------------------------------------------------------------
///SafeIntToCli converts an int to cln::cl_I safely.
///From http://www.richelbilderbeek.nl/CppSafeIntToCli.htm
const cln::cl_I SafeIntToCli(const int i)
{
  //A cln::cl_I can handle integer values to 2^29 in its constructor
  if (i < 536870912)
  {
    return cln::cl_I(i);
  }
  const std::string s = boost::lexical_cast<std::string>(i);
  return cln::cl_I(s.c_str());
}
//---------------------------------------------------------------------------
#endif // HUGEVECTOR_H

 

 

 

 

 

Go back to Richel Bilderbeek's C++ page.

Go back to Richel Bilderbeek's homepage.

 

Valid XHTML 1.0 Strict