Regular Expressions in C++
by John Maddock 


Listing One
bool validate_card_format(const std::string s) 
{ 
   static const boost::regex e("\\d{15,16}"); 
   return regex_match(s, e); 
}

Listing Two
bool validate_card_format(const std::wstring s) 
{ 
   static const boost::wregex e(L"\\d{15,16}"); 
   return regex_match(s, e); 
}

Listing Three
// match any format with the regular expression:
const boost::regex e("\\A"              // asserts start of string
                     "(\\d{3,4})[- ]?"  // first group of digits
                     "(\\d{4})[- ]?"    // second group of digits
                     "(\\d{4})[- ]?"    // third group of digits
                     "(\\d{4})"         // forth group of digits
                     "\\z");            // asserts end of string

// format strings using sed syntax:
const std::string machine_format("\\1\\2\\3\\4");
const std::string human_format("\\1-\\2-\\3-\\4");

std::string 
machine_readable_card_number(const std::string& s)
{
    std::string result = regex_merge(s, e, machine_format, 
                           boost::match_default 
                           | boost::format_sed 
                           | boost::format_no_copy);
   if(result.size() == 0)
    throw std::runtime_error
           ("String is not a credit card number");
   return result;
}
std::string 
human_readable_card_number(const std::string& s)
{
   std::string result = regex_merge(s, e, human_format, 
                           boost::match_default 
                           | boost::format_sed 
                           | boost::format_no_copy);
   if(result.size() == 0)
    throw std::runtime_error
           ("String is not a credit card number");
   return result;
} 

Listing Four
#include <string>
#include <iostream>
#include <fstream>
#include <iterator>
#include <boost/regex.hpp>

const char* expression = 
   "<\\s*datamerge"                      // tag prefix
   "(?:"                                 // non-marking grouping
      "\\s+table\\s*=\\s*\"([^\"]*)\""   // $1 = table name
      "|\\s+item\\s*=\\s*\"([^\"]*)\""   // $2 = item name
      "|\\s+field\\s*=\\s*\"([^\"]*)\""  // $3 = field name
   "){1,3}"                              // grouping repeated 1, 2 or 3 times
   "\\s*>";                              // tag suffix
const boost::regex e(expression);
std::string::const_iterator endp;
std::string lookup_datamerge_string(const std::string& table, const 
std::string& item, const std::string& field)
{
   // this should carry out a database lookup, 
   // for now just concatonate the names together:
   std::string result = table + "#" + item + "#" + field;
   return result;
}
bool grep_callback(const boost::match_results<std::string::const_iterator>& in)
{
   // get table name with default if necessary:
   std::string table = in[1];
   if(table.size() == 0) table = "default_table_name";
   // get item name (required no defaults):
   std::string item = in[2];
   if(item.size() == 0) 
      throw std::runtime_error("Incomplete datamerge field found");
   // get field name with default if necessary:
   std::string field = in[3];
   if(field.size() == 0) field = "default_field_name";
   // now carry out output, start by
   // sending everything from the end of the last match
   // to the start of this match to output:
   std::cout << std::string(in[-1]);   // output $`
   std::cout << lookup_datamerge_string(table, item, field);
   // now save end of what matched for later:
   endp = in[0].second;
   return true; // continue grepping
}
void load_file(std::string& s, std::istream& is)
{
   s.erase();
   s.reserve(is.rdbuf()->in_avail());
   char c;
   while(is.get(c))
   {
      if(s.capacity() == s.size())
         s.reserve(s.capacity() * 3);
      s.append(1, c);
   }
}
int main(int argc, char * argv[])
{
   try{
   std::filebuf ifs;
   std::filebuf ofs;
   std::streambuf* old_in = 0;
   std::streambuf* old_out = 0;
   if(argc > 1)
   {
      // redirect cin:
      ifs.open(argv[1], std::ios_base::in);
      old_in = std::cin.rdbuf(&ifs);
   }
   if(argc > 2)
   {
      // redirect cout:
      ofs.open(argv[2], std::ios_base::out);
      old_out = std::cout.rdbuf(&ofs);
   }
   std::string s;
   load_file(s, std::cin);
   endp = s.begin();
   // perform search and replace with lookup:
   boost::regex_grep(&grep_callback, s, e);
   // copy tail of file to output:
   std::string::const_iterator end = s.end();
   std::copy(endp, end, std::ostream_iterator<char>(std::cout)); 
   // reset streams:
   if(old_in) std::cin.rdbuf(old_in);
   if(old_out) std::cout.rdbuf(old_out);
   }
   catch(const std::exception& e)
   {
      std::cerr << "Exception thrown during merge: \"" 
                << e.what() << "\"" << std::endl;
   }
   return 0;
}






3

