struct BinaryNewickVector
{
enum { bracket_open = -1 };
enum { bracket_close = -2 };
enum { comma = -3 };
enum { new_line = -4 };
enum { null = -5 };
};
///CheckNewick checks if a std::string is a valid Newick.
///If this std::string is not a valid Newick,
///CheckNewick throws an exception with a detailed description
///From http://www.richelbilderbeek.nl/CppCheckNewick.htm
void CheckNewick(const std::string& s)
{
if (s.size()<3)
throw std::invalid_argument(
"The Newick std::string must have a size of at least three characters");
if (s[0]!='(')
throw std::invalid_argument(
"The Newick std::string must start with an opening bracket ('(').");
if (s[s.size()-1]!=')')
throw std::invalid_argument(
"The Newick std::string must end with a closing bracket (')').");
if (std::count(s.begin(),s.end(),'(')
!=std::count(s.begin(),s.end(),')'))
throw std::invalid_argument(
"The Newick std::string must have as much opening "
"as closing brackets #1");
std::string s_copy = s;
while(s_copy.size()>2) //Find a leaf and cut it until the string is empty
{
//Find a leaf
//Find index i (starting opening bracket) and j (closing bracket)
const std::size_t sz = s_copy.size();
std::size_t i = 0;
std::size_t j = 0;
for (i=0 ; i!=sz; ++i) //Index of opening bracket
{
if (s_copy[i]!='(') continue;
if (s_copy[i+1]==')')
throw std::invalid_argument(
"The Newick std::string cannot have "
"a consecutive opening and closing bracket");
for (j=i+1; j!=sz; ++j)
{
if (s_copy[j]=='(') { j = 0; break; }
if (s_copy[j]!=')') continue;
break;
}
if (j == 0) continue; //j cannot be 0 after previous for loop
if (j == sz)
throw std::invalid_argument(
"The Newick std::string must have as much opening as closing brackets #2");
break;
}
if (s_copy[i]!='(')
throw std::invalid_argument(
"The Newick std::string must have as much opening as closing brackets #3");
//Indices i and j found
//Is range between i and j valid?
if (s_copy[i]!='(')
throw std::logic_error(
"Bilderbikkel incorrectly assumes that s_copy[i]=='('");
if (s_copy[j]!=')')
throw std::logic_error(
"Bilderbikkel incorrectly assumes that s_copy[j]==')'");
if (i + 1 != sz && s_copy[i+1]==',')
throw std::invalid_argument(
"After bracket open, there can be no comma");
if (j != 0 && s_copy[j-1]==',')
throw std::invalid_argument(
"Before bracket close, there can be no comma");
//Check the range
for (size_t k=i+1; k!=j; ++k)
{
if ( s_copy[k]!='0'
&& s_copy[k]!='1'
&& s_copy[k]!='2'
&& s_copy[k]!='3'
&& s_copy[k]!='4'
&& s_copy[k]!='5'
&& s_copy[k]!='6'
&& s_copy[k]!='7'
&& s_copy[k]!='8'
&& s_copy[k]!='9'
&& s_copy[k]!='0'
&& s_copy[k]!=',')
{
std::stringstream err_msg;
err_msg << "Invalid non-number character in input: '" << s_copy[k] << "'";
throw std::invalid_argument(err_msg.str().c_str());
}
if (k + 1 != sz && s_copy[k] == ',' && s_copy[k+1] == ',')
throw std::invalid_argument(
"There can be no consecutive comma's");
}
if (i > 0 && s_copy[i-1]=='(' &
j +1 < sz && s_copy[j + 1] == ')')
{
throw std::invalid_argument(
"Newicks must not have the form ((X))");
}
//Range is assumed valid
//Cut the leaf (turns '(1,2)' to (9) )
assert(s_copy[i]=='(');
assert(s_copy[j]==')');
const std::string s_new_1 = s_copy.substr(0,i);
const std::string s_new_2 = s_copy.substr(j+1,sz-j-1); //OK
const std::string s_new = s_new_1 + "9" + s_new_2;
s_copy = s_new;
}
}
///CheckNewick checks if a std::vector<int> is a valid Newick.
///If this std::vector<int> is not a valid Newick,
///CheckNewick throws an exception with a detailed description
///From http://www.richelbilderbeek.nl/CppCheckNewick.htm
void CheckNewick(const std::vector<int>& v)
{
if (v.size()<3)
throw std::invalid_argument(
"The Newick std::vector<int> must have a size of at least three characters");
if (v[0]!=BinaryNewickVector::bracket_open)
throw std::invalid_argument(
"The Newick std::vector<int> must start with an opening bracket ('(').");
if (v[v.size()-1]!=BinaryNewickVector::bracket_close)
throw std::invalid_argument(
"The Newick std::vector<int> must end with a closing bracket (')').");
if (std::count(v.begin(),v.end(),static_cast<int>(BinaryNewickVector::bracket_open))
!=std::count(v.begin(),v.end(),static_cast<int>(BinaryNewickVector::bracket_close)))
throw std::invalid_argument(
"The Newick std::string must have as much opening "
"as closing brackets #1");
std::vector<int> v_copy = v;
while(v_copy.size()>2) //Find a leaf and cut it until the string is empty
{
//Find a leaf
//Find index i (starting opening bracket) and j (closing bracket)
const std::size_t sz = v_copy.size();
std::size_t i = 0;
std::size_t j = 0;
for (i=0 ; i!=sz; ++i) //Index of opening bracket
{
if (v_copy[i]!=BinaryNewickVector::bracket_open) continue;
if (v_copy[i+1]==BinaryNewickVector::bracket_close)
throw std::invalid_argument(
"The Newick std::vector<int> cannot have "
"a consecutive opening and closing bracket");
for (j=i+1; j!=sz; ++j)
{
if (v_copy[j]==BinaryNewickVector::bracket_open) { j = 0; break; }
if (v_copy[j]!=BinaryNewickVector::bracket_close) continue;
break;
}
if (j == 0) continue; //j cannot be 0 after previous for loop
if (j == sz)
throw std::invalid_argument(
"The Newick std::vector<int> must have as much opening "
"as closing brackets #2");
break;
}
if (v_copy[i]!=BinaryNewickVector::bracket_open)
throw std::invalid_argument(
"The Newick std::vector<int> must have as much opening "
"as closing brackets #3");
//Indices i and j found
//Is range between i and j valid?
if (v_copy[i]!=BinaryNewickVector::bracket_open)
throw std::logic_error(
"Bilderbikkel incorrectly assumes that s_copy[i]=='('");
if (v_copy[j]!=BinaryNewickVector::bracket_close)
throw std::logic_error(
"Bilderbikkel incorrectly assumes that s_copy[j]==')'");
//Check the range
for (size_t k=i+1; k!=j; ++k)
{
if (v_copy[k] < 0)
{
std::ostringstream err_msg;
err_msg << "Invalid non-number in input: '" << v_copy[k] << "'";
throw std::invalid_argument(err_msg.str().c_str());
}
}
//Range is assumed valid
//Cut the leaf
//Changes '(1,2)' to '(999)'
assert(v_copy[i]==BinaryNewickVector::bracket_open);
assert(v_copy[j]==BinaryNewickVector::bracket_close);
std::vector<int> v_new(v_copy.begin(),v_copy.begin() + i);
v_new.push_back(999);
std::copy(v_copy.begin() + j + 1, v_copy.end(),std::back_inserter(v_new));
v_copy = v_new;
}
}
|