nuttx/libs/libc/regex/regexec.c

/****************************************************************************
 * libs/libc/regex/regexec.c
 *
 * regexec.c - TRE POSIX compatible matching functions (and more).
 *
 *  Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
 *  All rights reserved.
 *
 *  Redistribution and use in source and binary forms, with or without
 *  modification, are permitted provided that the following conditions
 *  are met:
 *
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *
 *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
 *  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 *  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
 *  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 *  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 ****************************************************************************/

/****************************************************************************
 * Included Files
 ****************************************************************************/

#include <stdlib.h>
#include <string.h>
#include <wchar.h>
#include <wctype.h>
#include <limits.h>

#include <regex.h>

#include "tre.h"

#include <assert.h>

/****************************************************************************
 * Private Functions
 ****************************************************************************/

static void tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
                            const tre_tnfa_t *tnfa, int *tags, int match_eo);

/* from tre-match-utils.h
 */

#define GET_NEXT_WCHAR()    do {                                        \
      prev_c = next_c;pos += pos_add_next;                              \
      if ((pos_add_next = mbtowc(&next_c, str_byte, MB_LEN_MAX)) <= 0){ \
          if (pos_add_next < 0){ ret = REG_NOMATCH;goto error_exit; }   \
          else pos_add_next++;                                          \
        }                                                               \
      str_byte += pos_add_next;                                         \
  } while (0)

#define IS_WORD_CHAR(c)     ((c) == L'_' || tre_isalnum(c))

#define CHECK_ASSERTIONS(assertions)                         \
  (((assertions & ASSERT_AT_BOL)                             \
    && (pos > 0 || reg_notbol)                               \
    && (prev_c != L'\n' || !reg_newline))                    \
   || ((assertions & ASSERT_AT_EOL)                          \
       && (next_c != L'\0' || reg_noteol)                    \
       && (next_c != L'\n' || !reg_newline))                 \
   || ((assertions & ASSERT_AT_BOW)                          \
       && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c)))   \
   || ((assertions & ASSERT_AT_EOW)                          \
       && (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c)))   \
   || ((assertions & ASSERT_AT_WB)                           \
       && (pos != 0 && next_c != L'\0'                       \
           && IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c))) \
   || ((assertions & ASSERT_AT_WB_NEG)                       \
       && (pos == 0 || next_c == L'\0'                       \
           || IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c))))

#define CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)                              \
  (((trans_i->assertions & ASSERT_CHAR_CLASS)                                  \
    && !(tnfa->cflags & REG_ICASE)                                             \
    && !tre_isctype((tre_cint_t)prev_c, trans_i->u.class))                     \
   || ((trans_i->assertions & ASSERT_CHAR_CLASS)                               \
       && (tnfa->cflags & REG_ICASE)                                           \
       && !tre_isctype(tre_tolower((tre_cint_t)prev_c), trans_i->u.class)      \
       && !tre_isctype(tre_toupper((tre_cint_t)prev_c), trans_i->u.class))     \
   || ((trans_i->assertions & ASSERT_CHAR_CLASS_NEG)                           \
       && tre_neg_char_classes_match(trans_i->neg_classes, (tre_cint_t)prev_c, \
                                     tnfa->cflags & REG_ICASE)))

/* Returns 1 if `t1' wins `t2', 0 otherwise. */

static int tre_tag_order(int num_tags, tre_tag_direction_t *tag_directions,
                         int *t1, int *t2)
{
  int i;

  for (i = 0; i < num_tags; i++)
    {
      if (tag_directions[i] == TRE_TAG_MINIMIZE)
        {
          if (t1[i] < t2[i])
            {
              return 1;
            }

          if (t1[i] > t2[i])
            {
              return 0;
            }
        }
      else
        {
          if (t1[i] > t2[i])
            {
              return 1;
            }

          if (t1[i] < t2[i])
            {
              return 0;
            }
        }
    }

  /*  ASSERT(0); */

  return 0;
}

static int tre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc,
                                      int icase)
{
  while (*classes != (tre_ctype_t)0)
    {
      if ((!icase &&
           tre_isctype(wc,
                       *classes)) ||
          (icase &&
           (tre_isctype(tre_toupper(wc),
                        *classes) || tre_isctype(tre_tolower(wc),
                        *classes))))
        {
          return 1; /* Match. */
        }
      else
        {
          classes++;
        }
    }

  return 0; /* No match. */
}

/* from tre-match-parallel.c
 */

/*  This algorithm searches for matches basically by reading characters
 *  in the searched string one by one, starting at the beginning. All
 *  matching paths in the TNFA are traversed in parallel. When two or
 *  more paths reach the same state, exactly one is chosen according to
 *  tag ordering rules; if returning submatches is not required it does
 *  not matter which path is chosen.
 *
 *  The worst case time required for finding the leftmost and longest
 *  match, or determining that there is no match, is always linearly
 *  dependent on the length of the text being searched.
 *
 *  This algorithm cannot handle TNFAs with back referencing nodes.
 *  See `tre-match-backtrack.c'.
 */

typedef struct
{
  tre_tnfa_transition_t *state;
  int *tags;
} tre_tnfa_reach_t;

typedef struct
{
  int pos;
  int **tags;
} tre_reach_pos_t;

static reg_errcode_t tre_tnfa_run_parallel(const tre_tnfa_t *tnfa,
                                           const void *string,
                                           int *match_tags, int eflags,
                                           int *match_end_ofs)
{
  /* State variables required by GET_NEXT_WCHAR. */

  tre_char_t    prev_c = 0, next_c = 0;
  const char    *str_byte       = string;
  int           pos             = -1;
  int           pos_add_next    = 1;

#ifdef TRE_MBSTATE
  mbstate_t             mbstate;
#endif /* TRE_MBSTATE */
  int                   reg_notbol  = eflags & REG_NOTBOL;
  int                   reg_noteol  = eflags & REG_NOTEOL;
  int                   reg_newline = tnfa->cflags & REG_NEWLINE;
  reg_errcode_t         ret;

  char                  *buf;
  tre_tnfa_transition_t *trans_i;
  tre_tnfa_reach_t      *reach, *reach_next, *reach_i, *reach_next_i;
  tre_reach_pos_t       *reach_pos;
  int                   *tag_i;
  int                   num_tags;
  int                   i;

  int                   match_eo    = -1; /* end offset of match (-1 if no
                                           * match found yet) */
  int                   new_match   = 0;
  int                   *tmp_tags   = NULL;
  int                   *tmp_iptr;

#ifdef TRE_MBSTATE
  memset(&mbstate, '\0', sizeof(mbstate));
#endif /* TRE_MBSTATE */

  if (!match_tags)
    {
      num_tags = 0;
    }
  else
    {
      num_tags = tnfa->num_tags;
    }

  /* Allocate memory for temporary data required for matching. This needs to
   *  be done for every matching operation to be thread safe.  This allocates
   *  everything in a single large block from the stack frame using alloca()
   *  or with malloc() if alloca is unavailable.
   */

    {
      int     tbytes;
      int     rbytes;
      int     pbytes;
      int     xbytes;
      int     total_bytes;
      char    *tmp_buf;

      /* Compute the length of the block we need. */

      tbytes      = sizeof(*tmp_tags) * num_tags;
      rbytes      = sizeof(*reach_next) * (tnfa->num_states + 1);
      pbytes      = sizeof(*reach_pos) * tnfa->num_states;
      xbytes      = sizeof(int) * num_tags;
      total_bytes = (sizeof(long) - 1) * 4 /* for alignment paddings */
                  + (rbytes + xbytes * tnfa->num_states) * 2 + tbytes +
                  pbytes;

      /* Allocate the memory. */

      buf = xmalloc((unsigned)total_bytes);
      if (buf == NULL)
        {
          return REG_ESPACE;
        }

      memset(buf, 0, (size_t)total_bytes);

      /* Get the various pointers within tmp_buf (properly aligned). */

      tmp_tags    = (void *)buf;
      tmp_buf     = buf + tbytes;
      tmp_buf     += ALIGN(tmp_buf, long);
      reach_next  = (void *)tmp_buf;
      tmp_buf     += rbytes;
      tmp_buf     += ALIGN(tmp_buf, long);
      reach       = (void *)tmp_buf;
      tmp_buf     += rbytes;
      tmp_buf     += ALIGN(tmp_buf, long);
      reach_pos   = (void *)tmp_buf;
      tmp_buf     += pbytes;
      tmp_buf     += ALIGN(tmp_buf, long);
      for (i = 0; i < tnfa->num_states; i++)
        {
          reach[i].tags       = (void *)tmp_buf;
          tmp_buf             += xbytes;
          reach_next[i].tags  = (void *)tmp_buf;
          tmp_buf             += xbytes;
        }
    }

  for (i = 0; i < tnfa->num_states; i++)
    {
      reach_pos[i].pos = -1;
    }

  GET_NEXT_WCHAR();
  pos = 0;

  reach_next_i = reach_next;
  while (1)
    {
      /* If no match found yet, add the initial states to `reach_next'. */

      if (match_eo < 0)
        {
          trans_i = tnfa->initial;
          while (trans_i->state != NULL)
            {
              if (reach_pos[trans_i->state_id].pos < pos)
                {
                  if (trans_i->assertions &&
                      CHECK_ASSERTIONS(trans_i->assertions))
                    {
                      trans_i++;
                      continue;
                    }

                  reach_next_i->state = trans_i->state;
                  for (i = 0; i < num_tags; i++)
                    {
                      reach_next_i->tags[i] = -1;
                    }

                  tag_i = trans_i->tags;
                  if (tag_i)
                    {
                      while (*tag_i >= 0)
                        {
                          if (*tag_i < num_tags)
                            {
                              reach_next_i->tags[*tag_i] = pos;
                            }

                          tag_i++;
                        }
                    }

                  if (reach_next_i->state == tnfa->final)
                    {
                      match_eo  = pos;
                      new_match = 1;
                      for (i = 0; i < num_tags; i++)
                        {
                          match_tags[i] = reach_next_i->tags[i];
                        }
                    }

                  reach_pos[trans_i->state_id].pos  = pos;
                  reach_pos[trans_i->state_id].tags = &reach_next_i->tags;
                  reach_next_i++;
                }

              trans_i++;
            }

          reach_next_i->state = NULL;
        }
      else
        {
          if (num_tags == 0 || reach_next_i == reach_next)
            {
              /* We have found a match. */

              break;
            }
        }

      /* Check for end of string. */

      if (!next_c)
        {
          break;
        }

      GET_NEXT_WCHAR();

      /* Swap `reach' and `reach_next'. */

      reach_i       = reach;
      reach         = reach_next;
      reach_next    = reach_i;

      /* For each state in `reach', weed out states that don't fulfill the
       *  minimal matching conditions.
       */

      if (tnfa->num_minimals && new_match)
        {
          new_match     = 0;
          reach_next_i  = reach_next;
          for (reach_i = reach; reach_i->state; reach_i++)
            {
              int skip = 0;
              for (i = 0; tnfa->minimal_tags[i] >= 0; i += 2)
                {
                  int   end     = tnfa->minimal_tags[i];
                  int   start   = tnfa->minimal_tags[i + 1];
                  if (end >= num_tags)
                    {
                      skip = 1;
                      break;
                    }
                  else if (reach_i->tags[start] == match_tags[start] &&
                           reach_i->tags[end] < match_tags[end])
                    {
                      skip = 1;
                      break;
                    }
                }

              if (!skip)
                {
                  reach_next_i->state   = reach_i->state;
                  tmp_iptr              = reach_next_i->tags;
                  reach_next_i->tags    = reach_i->tags;
                  reach_i->tags         = tmp_iptr;
                  reach_next_i++;
                }
            }

          reach_next_i->state = NULL;

          /* Swap `reach' and `reach_next'. */

          reach_i       = reach;
          reach         = reach_next;
          reach_next    = reach_i;
        }

      /* For each state in `reach' see if there is a transition leaving with
       *  the current input symbol to a state not yet in `reach_next', and
       *  add the destination states to `reach_next'.
       */

      reach_next_i = reach_next;
      for (reach_i = reach; reach_i->state; reach_i++)
        {
          for (trans_i = reach_i->state; trans_i->state; trans_i++)
            {
              /* Does this transition match the input symbol? */

              if (trans_i->code_min <= (tre_cint_t)prev_c &&
                  trans_i->code_max >= (tre_cint_t)prev_c)
                {
                  if (trans_i->assertions &&
                      (CHECK_ASSERTIONS(trans_i->assertions) ||
                       CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)))
                    {
                      continue;
                    }

                  /* Compute the tags after this transition. */

                  for (i = 0; i < num_tags; i++)
                    {
                      tmp_tags[i] = reach_i->tags[i];
                    }

                  tag_i = trans_i->tags;
                  if (tag_i != NULL)
                    {
                      while (*tag_i >= 0)
                        {
                          if (*tag_i < num_tags)
                            {
                              tmp_tags[*tag_i] = pos;
                            }

                          tag_i++;
                        }
                    }

                  if (reach_pos[trans_i->state_id].pos < pos)
                    {
                      /* Found an unvisited node. */

                      reach_next_i->state               = trans_i->state;
                      tmp_iptr                          = reach_next_i->tags;
                      reach_next_i->tags                = tmp_tags;
                      tmp_tags                          = tmp_iptr;
                      reach_pos[trans_i->state_id].pos  = pos;
                      reach_pos[trans_i->state_id].tags =
                                                &reach_next_i->tags;

                      if (reach_next_i->state == tnfa->final &&
                          (match_eo == -1 ||
                           (num_tags > 0 &&
                            reach_next_i->tags[0] <= match_tags[0])))
                        {
                          match_eo  = pos;
                          new_match = 1;
                          for (i = 0; i < num_tags; i++)
                            {
                              match_tags[i] = reach_next_i->tags[i];
                            }
                        }

                      reach_next_i++;
                    }

                  else
                    {
                      ASSERT(reach_pos[trans_i->state_id].pos == pos);

                      /* Another path has also reached this state.  We choose
                       *  the winner by examining the tag values for both
                       *  paths.
                       */

                      if (tre_tag_order(num_tags, tnfa->tag_directions,
                                        tmp_tags,
                                        *reach_pos[trans_i->state_id].tags))
                        {
                          /* The new path wins. */

                          tmp_iptr =
                            *reach_pos[trans_i->state_id].tags;
                          *reach_pos[trans_i->state_id].tags = tmp_tags;
                          if (trans_i->state == tnfa->final)
                            {
                              match_eo  = pos;
                              new_match = 1;
                              for (i = 0; i < num_tags; i++)
                                {
                                  match_tags[i] = tmp_tags[i];
                                }
                            }

                          tmp_tags = tmp_iptr;
                        }
                    }
                }
            }
        }

      reach_next_i->state = NULL;
    }

  *match_end_ofs    = match_eo;
  ret               = match_eo >= 0 ? REG_OK : REG_NOMATCH;
error_exit:
  xfree(buf);
  return ret;
}

/* from tre-match-backtrack.c
 */

/*  This matcher is for regexps that use back referencing.  Regexp matching
 *  with back referencing is an NP-complete problem on the number of back
 *  references.  The easiest way to match them is to use a backtracking
 *  routine which basically goes through all possible paths in the TNFA
 *  and chooses the one which results in the best (leftmost and longest)
 *  match.  This can be spectacularly expensive and may run out of stack
 *  space, but there really is no better known generic algorithm. Quoting
 *  Henry Spencer from comp.compilers:
 *  <URL: http://compilers.iecc.com/comparch/article/93-03-102>
 *
 *   POSIX.2 REs require longest match, which is really exciting to
 *   implement since the obsolete ("basic") variant also includes
 * \<digit>.  I haven't found a better way of tackling this than doing
 *   a preliminary match using a DFA (or simulation) on a modified RE
 *   that just replicates subREs for \<digit>, and then doing a
 *   backtracking match to determine whether the subRE matches were
 *   right.  This can be rather slow, but I console myself with the
 *   thought that people who use \<digit> deserve very slow execution.
 *   (Pun unintentional but very appropriate.)
 *
 */

typedef struct
{
  int pos;
  const char *str_byte;
  tre_tnfa_transition_t *state;
  int state_id;
  int next_c;
  int *tags;
#ifdef TRE_MBSTATE
  mbstate_t mbstate;
#endif /* TRE_MBSTATE */
} tre_backtrack_item_t;

struct tre_backtrack_struct
{
  tre_backtrack_item_t item;
  struct tre_backtrack_struct *prev;
  struct tre_backtrack_struct *next;
};

typedef struct tre_backtrack_struct *tre_backtrack_t;

#ifdef TRE_MBSTATE
#define BT_STACK_MBSTATE_IN     stack->item.mbstate = (mbstate)
#define BT_STACK_MBSTATE_OUT    (mbstate)           = stack->item.mbstate
#else /* !TRE_MBSTATE */
#define BT_STACK_MBSTATE_IN
#define BT_STACK_MBSTATE_OUT
#endif /* !TRE_MBSTATE */

#define tre_bt_mem_new          tre_mem_new
#define tre_bt_mem_alloc        tre_mem_alloc
#define tre_bt_mem_destroy      tre_mem_destroy

#define BT_STACK_PUSH(_pos, _str_byte, _str_wide, _state, _state_id, _next_c, \
                      _tags, _mbstate)                                        \
  do                                                                          \
    {                                                                         \
      int i;                                                                  \
      if (!stack->next)                                                       \
        {                                                                     \
          tre_backtrack_t s;                                                  \
          s = tre_bt_mem_alloc(mem, sizeof(*s));                              \
          if (!s)                                                             \
            {                                                                 \
              tre_bt_mem_destroy(mem);                                        \
              if (tags)                                                       \
                {                                                             \
                  xfree (tags);                                               \
                }                                                             \
              if (pmatch)                                                     \
                {                                                             \
                  xfree (pmatch);                                             \
                }                                                             \
              if (states_seen)                                                \
                {                                                             \
                  xfree (states_seen);                                        \
                }                                                             \
              return REG_ESPACE;                                              \
            }                                                                 \
          s->prev       = stack;                                              \
          s->next       = NULL;                                               \
          s->item.tags  = tre_bt_mem_alloc(mem,                               \
                                           sizeof(*tags) * tnfa->num_tags);   \
          if (!s->item.tags)                                                  \
            {                                                                 \
              tre_bt_mem_destroy(mem);                                        \
              if (tags)                                                       \
                {                                                             \
                  xfree (tags);                                               \
                }                                                             \
              if (pmatch)                                                     \
                {                                                             \
                  xfree (pmatch);                                             \
                }                                                             \
              if (states_seen)                                                \
                {                                                             \
                  xfree (states_seen);                                        \
                }                                                             \
              return REG_ESPACE;                                              \
            }                                                                 \
          stack->next   = s;                                                  \
          stack         = s;                                                  \
        }                                                                     \
      else                                                                    \
        {                                                                     \
          stack             = stack->next;                                    \
        }                                                                     \
      stack->item.pos       = (_pos);                                         \
      stack->item.str_byte  = (_str_byte);                                    \
      stack->item.state     = (_state);                                       \
      stack->item.state_id  = (_state_id);                                    \
      stack->item.next_c    = (_next_c);                                      \
      for (i = 0; i < tnfa->num_tags; i++)                                    \
        {                                                                     \
          stack->item.tags[i] = (_tags)[i];                                   \
        }                                                                     \
      BT_STACK_MBSTATE_IN;                                                    \
    }                                                                         \
  while (0)

#define BT_STACK_POP()                     \
  do                                       \
    {                                      \
      int i;                               \
      ASSERT(stack->prev);                 \
      pos       = stack->item.pos;         \
      str_byte  = stack->item.str_byte;    \
      state     = stack->item.state;       \
      next_c    = stack->item.next_c;      \
      for (i = 0; i < tnfa->num_tags; i++) \
        {                                  \
          tags[i] = stack->item.tags[i];   \
        }                                  \
      BT_STACK_MBSTATE_OUT;                \
      stack = stack->prev;                 \
    }                                      \
  while (0)

#undef MIN
#define MIN(a, b)  ((a) <= (b) ? (a) : (b))

static reg_errcode_t tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa,
                                            const void *string,
                                            int *match_tags, int eflags,
                                            int *match_end_ofs)
{
  /* State variables required by GET_NEXT_WCHAR. */

  tre_char_t    prev_c = 0, next_c = 0;
  const char    *str_byte       = string;
  int           pos             = 0;
  int           pos_add_next    = 1;

#ifdef TRE_MBSTATE
  mbstate_t mbstate;
#endif /* TRE_MBSTATE */
  int       reg_notbol  = eflags & REG_NOTBOL;
  int       reg_noteol  = eflags & REG_NOTEOL;
  int       reg_newline = tnfa->cflags & REG_NEWLINE;

  /* These are used to remember the necessary values of the above
   *  variables to return to the position where the current search
   *  started from.
   */

  int           next_c_start;
  const char    *str_byte_start;
  int           pos_start = -1;
#ifdef TRE_MBSTATE
  mbstate_t     mbstate_start;
#endif /* TRE_MBSTATE */

  /* End offset of best match so far, or -1 if no match found yet. */

  int match_eo = -1;

  /* Tag arrays. */

  int *next_tags;
  int *tags = NULL;

  /* Current TNFA state. */

  tre_tnfa_transition_t *state;
  int                   *states_seen = NULL;

  /* Memory allocator to for allocating the backtracking stack. */

  tre_mem_t mem = tre_bt_mem_new();

  /* The backtracking stack. */

  tre_backtrack_t       stack;

  tre_tnfa_transition_t *trans_i;
  regmatch_t            *pmatch = NULL;
  int                   ret;

#ifdef TRE_MBSTATE
  memset(&mbstate, '\0', sizeof(mbstate));
#endif /* TRE_MBSTATE */

  if (!mem)
    {
      return REG_ESPACE;
    }

  stack = tre_bt_mem_alloc(mem, sizeof(*stack));
  if (!stack)
    {
      ret = REG_ESPACE;
      goto error_exit;
    }

  stack->prev   = NULL;
  stack->next   = NULL;

  if (tnfa->num_tags)
    {
      tags = xmalloc(sizeof(*tags) * tnfa->num_tags);
      if (!tags)
        {
          ret = REG_ESPACE;
          goto error_exit;
        }
    }

  if (tnfa->num_submatches)
    {
      pmatch = xmalloc(sizeof(*pmatch) * tnfa->num_submatches);
      if (!pmatch)
        {
          ret = REG_ESPACE;
          goto error_exit;
        }
    }

  if (tnfa->num_states)
    {
      states_seen = xmalloc(sizeof(*states_seen) * tnfa->num_states);
      if (!states_seen)
        {
          ret = REG_ESPACE;
          goto error_exit;
        }
    }

retry:
    {
      int i;
      for (i = 0; i < tnfa->num_tags; i++)
        {
          tags[i] = -1;
          if (match_tags)
            {
              match_tags[i] = -1;
            }
        }

      for (i = 0; i < tnfa->num_states; i++)
        {
          states_seen[i] = 0;
        }
    }

  state = NULL;
  pos   = pos_start;
  GET_NEXT_WCHAR();
  pos_start         = pos;
  next_c_start      = next_c;
  str_byte_start    = str_byte;
#ifdef TRE_MBSTATE
  mbstate_start = mbstate;
#endif /* TRE_MBSTATE */

  /* Handle initial states. */

  next_tags = NULL;
  for (trans_i = tnfa->initial; trans_i->state; trans_i++)
    {
      if (trans_i->assertions && CHECK_ASSERTIONS(trans_i->assertions))
        {
          continue;
        }

      if (state == NULL)
        {
          /* Start from this state. */

          state     = trans_i->state;
          next_tags = trans_i->tags;
        }
      else
        {
          /* Backtrack to this state. */

          BT_STACK_PUSH(pos, str_byte, 0, trans_i->state, trans_i->state_id,
                        next_c, tags, mbstate);
            {
              int *tmp = trans_i->tags;
              if (tmp)
                {
                  while (*tmp >= 0)
                    {
                      stack->item.tags[*tmp++] = pos;
                    }
                }
            }
        }
    }

  if (next_tags)
    {
      for (; *next_tags >= 0; next_tags++)
        {
          tags[*next_tags] = pos;
        }
    }

  if (state == NULL)
    {
      goto backtrack;
    }

  while (1)
    {
      tre_tnfa_transition_t *next_state;
      int                   empty_br_match;

      if (state == tnfa->final)
        {
          if (match_eo < pos ||
              (match_eo == pos && match_tags &&
               tre_tag_order(tnfa->num_tags, tnfa->tag_directions, tags,
                             match_tags)))
            {
              int i;

              /* This match wins the previous match. */

              match_eo = pos;
              if (match_tags)
                {
                  for (i = 0; i < tnfa->num_tags; i++)
                    {
                      match_tags[i] = tags[i];
                    }
                }
            }

          /* Our TNFAs never have transitions leaving from the final state,
           *  so we jump right to backtracking.
           */

          goto backtrack;
        }

      /* Go to the next character in the input string. */

      empty_br_match    = 0;
      trans_i           = state;
      if (trans_i->state && trans_i->assertions & ASSERT_BACKREF)
        {
          /* This is a back reference state.  All transitions leaving from
           *  this state have the same back reference "assertion".  Instead
           *  of reading the next character, we match the back reference.
           */

          int   so;
          int   eo;
          int   bt = trans_i->u.backref;
          int   bt_len;
          int   result;

          /* Get the substring we need to match against.  Remember to
           *  turn off REG_NOSUB temporarily.
           */

          tre_fill_pmatch(bt + 1, pmatch, tnfa->cflags & ~REG_NOSUB, tnfa,
                          tags, pos);
          so        = pmatch[bt].rm_so;
          eo        = pmatch[bt].rm_eo;
          bt_len    = eo - so;

          result = strncmp((const char *) string + so, str_byte - 1,
                           (size_t) bt_len);

          if (result == 0)
            {
              /* Back reference matched.  Check for infinite loop. */

              if (bt_len == 0)
                {
                  empty_br_match = 1;
                }

              if (empty_br_match && states_seen[trans_i->state_id])
                {
                  goto backtrack;
                }

              states_seen[trans_i->state_id] = empty_br_match;

              /* Advance in input string and resync `prev_c', `next_c'
               *  and pos.
               */

              str_byte  += bt_len - 1;
              pos       += bt_len - 1;
              GET_NEXT_WCHAR();
            }
          else
            {
              goto backtrack;
            }
        }
      else
        {
          /* Check for end of string. */

          if (next_c == L'\0')
            {
              goto backtrack;
            }

          /* Read the next character. */

          GET_NEXT_WCHAR();
        }

      next_state = NULL;
      for (trans_i = state; trans_i->state; trans_i++)
        {
          if (trans_i->code_min <= (tre_cint_t)prev_c &&
              trans_i->code_max >= (tre_cint_t)prev_c)
            {
              if (trans_i->assertions &&
                  (CHECK_ASSERTIONS(trans_i->assertions) ||
                   CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)))
                {
                  continue;
                }

              if (next_state == NULL)
                {
                  /* First matching transition. */

                  next_state    = trans_i->state;
                  next_tags     = trans_i->tags;
                }
              else
                {
                  /* Second matching transition.  We may need to backtrack
                   * here
                   * to take this transition instead of the first one, so we
                   * push this transition in the backtracking stack so we can
                   * jump back here if needed.
                   */

                  BT_STACK_PUSH(pos, str_byte, 0, trans_i->state,
                                trans_i->state_id, next_c, tags, mbstate);
                    {
                      int *tmp;
                      for (tmp = trans_i->tags; tmp && *tmp >= 0; tmp++)
                        {
                          stack->item.tags[*tmp] = pos;
                        }
                    }

#if 0

      /* XXX - it's important not to look at all transitions here to keep
       *  the stack small!
       */

                  break;
#endif
                }
            }
        }

      if (next_state != NULL)
        {
          /* Matching transitions were found.  Take the first one. */

          state = next_state;

          /* Update the tag values. */

          if (next_tags)
            {
              while (*next_tags >= 0)
                {
                  tags[*next_tags++] = pos;
                }
            }
        }
      else
        {
backtrack:

          /* A matching transition was not found.  Try to backtrack. */

          if (stack->prev)
            {
              if (stack->item.state->assertions & ASSERT_BACKREF)
                {
                  states_seen[stack->item.state_id] = 0;
                }

              BT_STACK_POP();
            }
          else if (match_eo < 0)
            {
              /* Try starting from a later position in the input string.
               * Check for end of string.
               */

              if (next_c == L'\0')
                {
                  break;
                }

              next_c = next_c_start;
#ifdef TRE_MBSTATE
              mbstate = mbstate_start;
#endif /* TRE_MBSTATE */
              str_byte = str_byte_start;
              goto retry;
            }
          else
            {
              break;
            }
        }
    }

  ret               = match_eo >= 0 ? REG_OK : REG_NOMATCH;
  *match_end_ofs    = match_eo;

error_exit:
  tre_bt_mem_destroy(mem);
#ifndef TRE_USE_ALLOCA
  if (tags)
    {
      xfree(tags);
    }

  if (pmatch)
    {
      xfree(pmatch);
    }

  if (states_seen)
    {
      xfree(states_seen);
    }
#endif /* !TRE_USE_ALLOCA */

  return ret;
}

/* from regexec.c
 */

/* Fills the POSIX.2 regmatch_t array according to the TNFA tag and match
 * endpoint values.
 */

static void tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
                            const tre_tnfa_t *tnfa, int *tags, int match_eo)
{
  tre_submatch_data_t   *submatch_data;
  unsigned int          i;
  unsigned int          j;
  int                   *parents;

  i = 0;
  if (match_eo >= 0 && !(cflags & REG_NOSUB))
    {
      /* Construct submatch offsets from the tags. */

      submatch_data = tnfa->submatch_data;
      while (i < tnfa->num_submatches && i < nmatch)
        {
          if (submatch_data[i].so_tag == tnfa->end_tag)
            {
              pmatch[i].rm_so = match_eo;
            }
          else
            {
              pmatch[i].rm_so = tags[submatch_data[i].so_tag];
            }

          if (submatch_data[i].eo_tag == tnfa->end_tag)
            {
              pmatch[i].rm_eo = match_eo;
            }
          else
            {
              pmatch[i].rm_eo = tags[submatch_data[i].eo_tag];
            }

          /* If either of the endpoints were not used, this submatch
           * was not part of the match.
           */

          if (pmatch[i].rm_so == -1 || pmatch[i].rm_eo == -1)
            {
              pmatch[i].rm_so = pmatch[i].rm_eo = -1;
            }

          i++;
        }

      /* Reset all submatches that are not within all of their parent
       * submatches.
       */

      i = 0;
      while (i < tnfa->num_submatches && i < nmatch)
        {
          if (pmatch[i].rm_eo == -1)
            {
              ASSERT(pmatch[i].rm_so == -1);
            }

          ASSERT(pmatch[i].rm_so <= pmatch[i].rm_eo);

          parents = submatch_data[i].parents;
          if (parents != NULL)
            {
              for (j = 0; parents[j] >= 0; j++)
                {
                  if (pmatch[i].rm_so < pmatch[parents[j]].rm_so ||
                      pmatch[i].rm_eo > pmatch[parents[j]].rm_eo)
                    {
                      pmatch[i].rm_so = pmatch[i].rm_eo = -1;
                    }
                }
            }

          i++;
        }
    }

  while (i < nmatch)
    {
      pmatch[i].rm_so   = -1;
      pmatch[i].rm_eo   = -1;
      i++;
    }
}

/* Wrapper functions for POSIX compatible regexp matching.
 */

int regexec(const regex_t *restrict preg, const char *restrict string,
            size_t nmatch, regmatch_t pmatch[restrict], int eflags)
{
  tre_tnfa_t    *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
  reg_errcode_t status;
  int           *tags = NULL;
  int           eo;

  if (tnfa->cflags & REG_NOSUB)
    {
      nmatch = 0;
    }

  if (tnfa->num_tags > 0 && nmatch > 0)
    {
      tags = xmalloc(sizeof(*tags) * tnfa->num_tags);
      if (tags == NULL)
        {
          return REG_ESPACE;
        }
    }

  /* Dispatch to the appropriate matcher. */

  if (tnfa->have_backrefs)
    {
      /* The regex has back references, use the backtracking matcher. */

      status = tre_tnfa_run_backtrack(tnfa, string, tags, eflags, &eo);
    }
  else
    {
      /* Exact matching, no back references, use the parallel matcher. */

      status = tre_tnfa_run_parallel(tnfa, string, tags, eflags, &eo);
    }

  if (status == REG_OK)
    {
      /* A match was found, so fill the submatch registers. */

      tre_fill_pmatch(nmatch, pmatch, tnfa->cflags, tnfa, tags, eo);
    }

  if (tags)
    {
      xfree(tags);
    }

  return status;
}