libcsdbg  1.28
C++ exception (and generic) stack trace debug library
chain< string > * csdbg::string::split ( const string exp,
bool  imatch = true,
bool  icase = false 
) const
virtual

Tokenize using a POSIX extended regular expression.

Parameters
[in]expthe delimiter expression
[in]imatchfalse to include the actual matches in the result
[in]icasetrue to ignore case sensitivity
Returns
the list of tokens (heap allocated)
Exceptions
std::bad_alloc
csdbg::exception

Definition at line 642 of file string.cpp.

References csdbg::chain< T >::add(), cstr(), likely, m_data, m_length, match(), string(), and unlikely.

Referenced by csdbg::parser::parse(), and csdbg::plugin::resolve().

643 {
644  chain<string> *tokens = NULL;
645  string *word = NULL;
646  regex_t regexp;
647 
648  /* If an exception occurs, release resources and rethrow it */
649  try {
650  tokens = new chain<string>;
651 
652  /* Compile the regular expression */
653  i32 flags = REG_EXTENDED;
654  if ( unlikely(icase) )
655  flags |= REG_ICASE;
656 
657  i32 retval = regcomp(&regexp, exp.cstr(), flags);
658  if ( unlikely(retval != 0) ) {
659  i32 len = regerror(retval, &regexp, NULL, 0);
660  i8 errbuf[len];
661  regerror(retval, &regexp, errbuf, len);
662 
663  throw exception(
664  "failed to compile regexp '%s' (regex errno %d - %s)",
665  exp.cstr(),
666  retval,
667  errbuf
668  );
669  }
670 
671  regmatch_t match;
672  regoff_t offset = 0;
673  i32 len = m_length;
674  do {
675  bool found = !regexec(&regexp, m_data + offset, 1, &match, 0);
676 
677  /*
678  * The delimiter pattern is found. The left token is from the beginning of
679  * the text plus an offset, to the beginning of the matched text. The
680  * right token is from the end of the matched text to the end of the text
681  * or to the beginning of the next matched text. This will be evaluated on
682  * the next loop pass
683  */
684  if ( likely(found) ) {
685  i32 bgn = match.rm_so;
686  i32 end = match.rm_eo;
687  if ( unlikely(end == 0) )
688  throw exception("logic error in regular expression '%s'", exp.cstr());
689 
690  word = new string("%.*s", bgn, m_data + offset);
691  tokens->add(word);
692  word = NULL;
693 
694  if ( unlikely(!imatch) ) {
695  word = new string("%.*s", end - bgn, m_data + offset + bgn);
696  tokens->add(word);
697  word = NULL;
698  }
699 
700  offset += end;
701  if ( unlikely(offset > len) )
702  break;
703  }
704 
705  /*
706  * The pattern isn't found. That means that either the delimiter was never
707  * in the text, so the whole text is the one and only token, or there is
708  * some text after the last delimiter. In that case this trailing text is
709  * the last token
710  */
711  else if ( likely(offset <= len) ) {
712  word = new string(m_data + offset);
713  tokens->add(word);
714  word = NULL;
715  break;
716  }
717 
718  /* No more tokens */
719  else
720  break;
721  }
722 
723  while ( likely(true) );
724 
725  regfree(&regexp);
726  return tokens;
727  }
728 
729  catch (...) {
730  delete tokens;
731  delete word;
732  regfree(&regexp);
733  throw;
734  }
735 }
char i8
8-bit signed integer
Definition: config.hpp:72
string(u32=0)
Object constructor.
Definition: string.cpp:127
#define likely(expr)
Offer a hint (positive) to the pipeline branch predictor.
Definition: config.hpp:344
virtual bool match(const string &, bool=false) const
Match against a POSIX extended regular expression.
Definition: string.cpp:490
i8 * m_data
String data.
Definition: string.hpp:42
int i32
32-bit signed integer
Definition: config.hpp:82
#define unlikely(expr)
Offer a hint (negative) to the pipeline branch predictor.
Definition: config.hpp:349
u32 m_length
Character count.
Definition: string.hpp:44

+ Here is the call graph for this function:

+ Here is the caller graph for this function: