Go back to Richel Bilderbeek's homepage.

Go back to Richel Bilderbeek's C++ page.

 

 

 

 

 

(C++) RemoveDuplicates

 

RemoveDuplicates is a std::vector code snippet to remove duplicates from a std::vector.

 

#include <algorithm>
#include <vector>

//From http://learningcppisfun.blogspot.com/2008/04/remove-duplicates-from-vector.html
//Assumes order of elements is not important
//Sorts order of container
template<typename T>
void RemoveDuplicates(std::vector<T>& vec)
{
  std::sort(vec.begin(), vec.end());
  vec.erase(std::unique(vec.begin(), vec.end()), vec.end());
}

 

 

 

 

 

Use of RemoveDuplicates

 

#include <algorithm>
#include <cassert>
#include <iostream>
#include <iterator>
#include <vector>
//---------------------------------------------------------------------------
//Assumes order of elements is not important
//Sorts order of container
//http://learningcppisfun.blogspot.com/2008/04/remove-duplicates-from-vector.html
template<typename T>
void RemoveDuplicates(std::vector<T>& vec)
{
  std::sort(vec.begin(), vec.end());
  vec.erase(std::unique(vec.begin(), vec.end()), vec.end());
}
//---------------------------------------------------------------------------
const std::vector<int> CreateVector(const std::size_t sz, const int max)
{
  std::vector<int> v(sz,0);
  for (std::size_t i=0; i!=sz; ++i)
  {
    v[i] = std::rand() % max;
  }
  return v;
}
//---------------------------------------------------------------------------
int main()
{
  std::vector<int> v = CreateVector(20,4);
  std::copy(v.begin(),v.end(),std::ostream_iterator<int>(std::cout," "));
  std::cout << '\n';
  RemoveDuplicates(v);
  std::copy(v.begin(),v.end(),std::ostream_iterator<int>(std::cout," "));
  std::cout << '\n';
}

 

Screen output:

 

3 2 1 3 1 3 2 0 1 1 2 3 2 3 3 2 0 2 0 0
0 1 2 3

 

 

 

 

 

RemoveDuplicates benchmark

 

#include <algorithm>
#include <cassert>
#include <iostream>
#include <iterator>
#include <set>
#include <vector>
#include <boost/timer.hpp>
//---------------------------------------------------------------------------
struct Function
{
  typedef void (*FunctionPointer)(std::vector<int>&);
  Function(FunctionPointer anyFunction, const std::string& s)
    : function(anyFunction), name(s), sumTime(0.0) {}
  FunctionPointer function;
  std::string name;
  double sumTime;
};
//---------------------------------------------------------------------------
//From http://www.richelbilderbeek.nl/CppExerciseAddOneAnswer.htm
struct SpeedMeasurer : std::unary_function <void,Function>
{
  SpeedMeasurer(std::vector<int>& v) : mV(v) {}
  void operator()(Function& function)
  {
    boost::timer t;
    function.function(mV); //Perform the function
    const double time = t.elapsed();
    function.sumTime += time;
    assert(time >= t.elapsed_min() * 10
      && "Measurement must be above 10x minimal timer interval!");
  }
  std::vector<int>& mV;
};
//---------------------------------------------------------------------------
//From http://www.richelbilderbeek.nl/CppRemoveDuplicates.htm
//Assumes order of elements is not important
template<typename T>
void RemoveDuplicates0(std::vector<T>& v)
{
  std::set<T> s;
  for (std::size_t i=0; i<v.size(); ++i)
  {
    if (s.count(v[i])!=0)
    {
      std::swap(v.back(),v[i]);
      v.pop_back();
      --i;
    }
    else
    {
      s.insert(v[i]);
    }
  }
}
//---------------------------------------------------------------------------
//From http://www.richelbilderbeek.nl/CppRemoveDuplicates.htm
//Assumes order of elements is not important
//Sorts order of container
template<typename T>
void RemoveDuplicates1(std::vector<T>& v)
{
  std::sort(v.begin(), v.end());
  const std::size_t sz = std::distance(v.begin(),std::unique(v.begin(), v.end()));
  v.resize(sz);
}
//---------------------------------------------------------------------------
//From http://learningcppisfun.blogspot.com/2008/04/remove-duplicates-from-vector.html
//Assumes order of elements is not important
//Sorts order of container
template<typename T>
void RemoveDuplicates2(std::vector<T>& vec)
{
  std::sort(vec.begin(), vec.end());
  vec.erase(std::unique(vec.begin(), vec.end()), vec.end());
}
//---------------------------------------------------------------------------
const std::vector<int> CreateVector(const std::size_t sz, const int max)
{
  std::vector<int> v(sz,0);
  for (std::size_t i=0; i!=sz; ++i)
  {
    v[i] = std::rand() % max;
  }
  return v;
}
//---------------------------------------------------------------------------
std::vector<Function> GetFunctions()
{
  std::vector<Function> v;
  v.push_back(Function(RemoveDuplicates0,"RemoveDuplicates0"));
  v.push_back(Function(RemoveDuplicates1,"RemoveDuplicates1"));
  v.push_back(Function(RemoveDuplicates2,"RemoveDuplicates2"));
  return v;
}
//---------------------------------------------------------------------------
std::ostream& operator<<(std::ostream& os, const Function& function)
{
  os << function.sumTime << '\t' << function.name;
  return os;
}
//---------------------------------------------------------------------------
bool LowestSumTime(const Function& lhs, const Function& rhs)
{
  return (lhs.sumTime < rhs.sumTime);
}
//---------------------------------------------------------------------------
//From http://www.richelbilderbeek.nl/CppRemoveDuplicates.htm
int main()
{
  boost::timer t;

  const std::size_t vector_sz = 1000000;
  const std::size_t n_times = 100;
  const int max_rand = static_cast<int>(vector_sz) / 10;
  std::vector<Function> functions = GetFunctions();
  const std::size_t n_tests = functions.size();

  for (std::size_t time=0; time!=n_times; ++time)
  {
    std::cout << time << " / " << n_times << std::endl;
    std::random_shuffle(functions.begin(), functions.end());
    for (std::size_t test=0; test!=n_tests; ++test)
    {
      std::vector<int> v = CreateVector(vector_sz, max_rand);
      SpeedMeasurer s(v);
      s.operator ()(functions[test]);
    }
  }

  std::cout << "Done.\n";
  std::cout << "Size std::vector: " << vector_sz << '\n';
  std::cout << "Repeats: " << n_times << '\n';
  std::cout << "Timer minimal interval: " << t.elapsed_min() << '\n';
  std::sort(functions.begin(),functions.end(),LowestSumTime);
  std::copy(functions.begin(),functions.end(), std::ostream_iterator<Function>(std::cout,"\n"));
  std::cout << "Total running time: " << t.elapsed() << " seconds.\n";
  std::cin.get();
}

 

Screen output:

 

0 / 100
1 / 100
[Intermediate values]
98 / 100
99 / 100
Done.
Size std::vector: 1000000
Repeats: 100
Timer minimal interval: 1e-06
39.45 RemoveDuplicates2
39.47 RemoveDuplicates1
59.24 RemoveDuplicates0
Total running time: 147.24 seconds.

 

 

 

 

 

Go back to Richel Bilderbeek's C++ page.

Go back to Richel Bilderbeek's homepage.

 

Valid XHTML 1.0 Strict