/**************************************************************************** * libs/libc/regex/regexec.c * * regexec.c - TRE POSIX compatible matching functions (and more). * * Copyright (c) 2001-2009 Ville Laurikari * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ****************************************************************************/ /**************************************************************************** * Included Files ****************************************************************************/ #include #include #include #include #include #include #include "tre.h" #include /**************************************************************************** * Private Functions ****************************************************************************/ static void tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags, const tre_tnfa_t *tnfa, int *tags, int match_eo); /* from tre-match-utils.h */ #define GET_NEXT_WCHAR() do { \ prev_c = next_c;pos += pos_add_next; \ if ((pos_add_next = mbtowc(&next_c, str_byte, MB_LEN_MAX)) <= 0){ \ if (pos_add_next < 0){ ret = REG_NOMATCH;goto error_exit; } \ else pos_add_next++; \ } \ str_byte += pos_add_next; \ } while (0) #define IS_WORD_CHAR(c) ((c) == L'_' || tre_isalnum(c)) #define CHECK_ASSERTIONS(assertions) \ (((assertions & ASSERT_AT_BOL) \ && (pos > 0 || reg_notbol) \ && (prev_c != L'\n' || !reg_newline)) \ || ((assertions & ASSERT_AT_EOL) \ && (next_c != L'\0' || reg_noteol) \ && (next_c != L'\n' || !reg_newline)) \ || ((assertions & ASSERT_AT_BOW) \ && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c))) \ || ((assertions & ASSERT_AT_EOW) \ && (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c))) \ || ((assertions & ASSERT_AT_WB) \ && (pos != 0 && next_c != L'\0' \ && IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c))) \ || ((assertions & ASSERT_AT_WB_NEG) \ && (pos == 0 || next_c == L'\0' \ || IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c)))) #define CHECK_CHAR_CLASSES(trans_i, tnfa, eflags) \ (((trans_i->assertions & ASSERT_CHAR_CLASS) \ && !(tnfa->cflags & REG_ICASE) \ && !tre_isctype((tre_cint_t)prev_c, trans_i->u.class)) \ || ((trans_i->assertions & ASSERT_CHAR_CLASS) \ && (tnfa->cflags & REG_ICASE) \ && !tre_isctype(tre_tolower((tre_cint_t)prev_c), trans_i->u.class) \ && !tre_isctype(tre_toupper((tre_cint_t)prev_c), trans_i->u.class)) \ || ((trans_i->assertions & ASSERT_CHAR_CLASS_NEG) \ && tre_neg_char_classes_match(trans_i->neg_classes, (tre_cint_t)prev_c, \ tnfa->cflags & REG_ICASE))) /* Returns 1 if `t1' wins `t2', 0 otherwise. */ static int tre_tag_order(int num_tags, tre_tag_direction_t *tag_directions, int *t1, int *t2) { int i; for (i = 0; i < num_tags; i++) { if (tag_directions[i] == TRE_TAG_MINIMIZE) { if (t1[i] < t2[i]) { return 1; } if (t1[i] > t2[i]) { return 0; } } else { if (t1[i] > t2[i]) { return 1; } if (t1[i] < t2[i]) { return 0; } } } /* assert(0); */ return 0; } static int tre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc, int icase) { while (*classes != (tre_ctype_t)0) { if ((!icase && tre_isctype(wc, *classes)) || (icase && (tre_isctype(tre_toupper(wc), *classes) || tre_isctype(tre_tolower(wc), *classes)))) { return 1; /* Match. */ } else { classes++; } } return 0; /* No match. */ } /* from tre-match-parallel.c */ /* This algorithm searches for matches basically by reading characters * in the searched string one by one, starting at the beginning. All * matching paths in the TNFA are traversed in parallel. When two or * more paths reach the same state, exactly one is chosen according to * tag ordering rules; if returning submatches is not required it does * not matter which path is chosen. * * The worst case time required for finding the leftmost and longest * match, or determining that there is no match, is always linearly * dependent on the length of the text being searched. * * This algorithm cannot handle TNFAs with back referencing nodes. * See `tre-match-backtrack.c'. */ typedef struct { tre_tnfa_transition_t *state; int *tags; } tre_tnfa_reach_t; typedef struct { int pos; int **tags; } tre_reach_pos_t; static reg_errcode_t tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, int *match_tags, int eflags, int *match_end_ofs) { /* State variables required by GET_NEXT_WCHAR. */ tre_char_t prev_c = 0, next_c = 0; const char *str_byte = string; int pos = -1; int pos_add_next = 1; #ifdef TRE_MBSTATE mbstate_t mbstate; #endif /* TRE_MBSTATE */ int reg_notbol = eflags & REG_NOTBOL; int reg_noteol = eflags & REG_NOTEOL; int reg_newline = tnfa->cflags & REG_NEWLINE; reg_errcode_t ret; char *buf; tre_tnfa_transition_t *trans_i; tre_tnfa_reach_t *reach, *reach_next, *reach_i, *reach_next_i; tre_reach_pos_t *reach_pos; int *tag_i; int num_tags; int i; int match_eo = -1; /* end offset of match (-1 if no * match found yet) */ int new_match = 0; int *tmp_tags = NULL; int *tmp_iptr; #ifdef TRE_MBSTATE memset(&mbstate, '\0', sizeof(mbstate)); #endif /* TRE_MBSTATE */ if (!match_tags) { num_tags = 0; } else { num_tags = tnfa->num_tags; } /* Allocate memory for temporary data required for matching. This needs to * be done for every matching operation to be thread safe. This allocates * everything in a single large block from the stack frame using alloca() * or with malloc() if alloca is unavailable. */ { int tbytes; int rbytes; int pbytes; int xbytes; int total_bytes; char *tmp_buf; /* Compute the length of the block we need. */ tbytes = sizeof(*tmp_tags) * num_tags; rbytes = sizeof(*reach_next) * (tnfa->num_states + 1); pbytes = sizeof(*reach_pos) * tnfa->num_states; xbytes = sizeof(int) * num_tags; total_bytes = (sizeof(long) - 1) * 4 /* for alignment paddings */ + (rbytes + xbytes * tnfa->num_states) * 2 + tbytes + pbytes; /* Allocate the memory. */ buf = xmalloc((unsigned)total_bytes); if (buf == NULL) { return REG_ESPACE; } memset(buf, 0, (size_t)total_bytes); /* Get the various pointers within tmp_buf (properly aligned). */ tmp_tags = (void *)buf; tmp_buf = buf + tbytes; tmp_buf += ALIGN(tmp_buf, long); reach_next = (void *)tmp_buf; tmp_buf += rbytes; tmp_buf += ALIGN(tmp_buf, long); reach = (void *)tmp_buf; tmp_buf += rbytes; tmp_buf += ALIGN(tmp_buf, long); reach_pos = (void *)tmp_buf; tmp_buf += pbytes; tmp_buf += ALIGN(tmp_buf, long); for (i = 0; i < tnfa->num_states; i++) { reach[i].tags = (void *)tmp_buf; tmp_buf += xbytes; reach_next[i].tags = (void *)tmp_buf; tmp_buf += xbytes; } } for (i = 0; i < tnfa->num_states; i++) { reach_pos[i].pos = -1; } GET_NEXT_WCHAR(); pos = 0; reach_next_i = reach_next; while (1) { /* If no match found yet, add the initial states to `reach_next'. */ if (match_eo < 0) { trans_i = tnfa->initial; while (trans_i->state != NULL) { if (reach_pos[trans_i->state_id].pos < pos) { if (trans_i->assertions && CHECK_ASSERTIONS(trans_i->assertions)) { trans_i++; continue; } reach_next_i->state = trans_i->state; for (i = 0; i < num_tags; i++) { reach_next_i->tags[i] = -1; } tag_i = trans_i->tags; if (tag_i) { while (*tag_i >= 0) { if (*tag_i < num_tags) { reach_next_i->tags[*tag_i] = pos; } tag_i++; } } if (reach_next_i->state == tnfa->final) { match_eo = pos; new_match = 1; for (i = 0; i < num_tags; i++) { match_tags[i] = reach_next_i->tags[i]; } } reach_pos[trans_i->state_id].pos = pos; reach_pos[trans_i->state_id].tags = &reach_next_i->tags; reach_next_i++; } trans_i++; } reach_next_i->state = NULL; } else { if (num_tags == 0 || reach_next_i == reach_next) { /* We have found a match. */ break; } } /* Check for end of string. */ if (!next_c) { break; } GET_NEXT_WCHAR(); /* Swap `reach' and `reach_next'. */ reach_i = reach; reach = reach_next; reach_next = reach_i; /* For each state in `reach', weed out states that don't fulfill the * minimal matching conditions. */ if (tnfa->num_minimals && new_match) { new_match = 0; reach_next_i = reach_next; for (reach_i = reach; reach_i->state; reach_i++) { int skip = 0; for (i = 0; tnfa->minimal_tags[i] >= 0; i += 2) { int end = tnfa->minimal_tags[i]; int start = tnfa->minimal_tags[i + 1]; if (end >= num_tags) { skip = 1; break; } else if (reach_i->tags[start] == match_tags[start] && reach_i->tags[end] < match_tags[end]) { skip = 1; break; } } if (!skip) { reach_next_i->state = reach_i->state; tmp_iptr = reach_next_i->tags; reach_next_i->tags = reach_i->tags; reach_i->tags = tmp_iptr; reach_next_i++; } } reach_next_i->state = NULL; /* Swap `reach' and `reach_next'. */ reach_i = reach; reach = reach_next; reach_next = reach_i; } /* For each state in `reach' see if there is a transition leaving with * the current input symbol to a state not yet in `reach_next', and * add the destination states to `reach_next'. */ reach_next_i = reach_next; for (reach_i = reach; reach_i->state; reach_i++) { for (trans_i = reach_i->state; trans_i->state; trans_i++) { /* Does this transition match the input symbol? */ if (trans_i->code_min <= (tre_cint_t)prev_c && trans_i->code_max >= (tre_cint_t)prev_c) { if (trans_i->assertions && (CHECK_ASSERTIONS(trans_i->assertions) || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags))) { continue; } /* Compute the tags after this transition. */ for (i = 0; i < num_tags; i++) { tmp_tags[i] = reach_i->tags[i]; } tag_i = trans_i->tags; if (tag_i != NULL) { while (*tag_i >= 0) { if (*tag_i < num_tags) { tmp_tags[*tag_i] = pos; } tag_i++; } } if (reach_pos[trans_i->state_id].pos < pos) { /* Found an unvisited node. */ reach_next_i->state = trans_i->state; tmp_iptr = reach_next_i->tags; reach_next_i->tags = tmp_tags; tmp_tags = tmp_iptr; reach_pos[trans_i->state_id].pos = pos; reach_pos[trans_i->state_id].tags = &reach_next_i->tags; if (reach_next_i->state == tnfa->final && (match_eo == -1 || (num_tags > 0 && reach_next_i->tags[0] <= match_tags[0]))) { match_eo = pos; new_match = 1; for (i = 0; i < num_tags; i++) { match_tags[i] = reach_next_i->tags[i]; } } reach_next_i++; } else { assert(reach_pos[trans_i->state_id].pos == pos); /* Another path has also reached this state. We choose * the winner by examining the tag values for both * paths. */ if (tre_tag_order(num_tags, tnfa->tag_directions, tmp_tags, *reach_pos[trans_i->state_id].tags)) { /* The new path wins. */ tmp_iptr = *reach_pos[trans_i->state_id].tags; *reach_pos[trans_i->state_id].tags = tmp_tags; if (trans_i->state == tnfa->final) { match_eo = pos; new_match = 1; for (i = 0; i < num_tags; i++) { match_tags[i] = tmp_tags[i]; } } tmp_tags = tmp_iptr; } } } } } reach_next_i->state = NULL; } *match_end_ofs = match_eo; ret = match_eo >= 0 ? REG_OK : REG_NOMATCH; error_exit: xfree(buf); return ret; } /* from tre-match-backtrack.c */ /* This matcher is for regexps that use back referencing. Regexp matching * with back referencing is an NP-complete problem on the number of back * references. The easiest way to match them is to use a backtracking * routine which basically goes through all possible paths in the TNFA * and chooses the one which results in the best (leftmost and longest) * match. This can be spectacularly expensive and may run out of stack * space, but there really is no better known generic algorithm. Quoting * Henry Spencer from comp.compilers: * * * POSIX.2 REs require longest match, which is really exciting to * implement since the obsolete ("basic") variant also includes * \. I haven't found a better way of tackling this than doing * a preliminary match using a DFA (or simulation) on a modified RE * that just replicates subREs for \, and then doing a * backtracking match to determine whether the subRE matches were * right. This can be rather slow, but I console myself with the * thought that people who use \ deserve very slow execution. * (Pun unintentional but very appropriate.) * */ typedef struct { int pos; const char *str_byte; tre_tnfa_transition_t *state; int state_id; int next_c; int *tags; #ifdef TRE_MBSTATE mbstate_t mbstate; #endif /* TRE_MBSTATE */ } tre_backtrack_item_t; struct tre_backtrack_struct { tre_backtrack_item_t item; struct tre_backtrack_struct *prev; struct tre_backtrack_struct *next; }; typedef struct tre_backtrack_struct *tre_backtrack_t; #ifdef TRE_MBSTATE #define BT_STACK_MBSTATE_IN stack->item.mbstate = (mbstate) #define BT_STACK_MBSTATE_OUT (mbstate) = stack->item.mbstate #else /* !TRE_MBSTATE */ #define BT_STACK_MBSTATE_IN #define BT_STACK_MBSTATE_OUT #endif /* !TRE_MBSTATE */ #define tre_bt_mem_new tre_mem_new #define tre_bt_mem_alloc tre_mem_alloc #define tre_bt_mem_destroy tre_mem_destroy #define BT_STACK_PUSH(_pos, _str_byte, _str_wide, _state, _state_id, _next_c, \ _tags, _mbstate) \ do \ { \ int i; \ if (!stack->next) \ { \ tre_backtrack_t s; \ s = tre_bt_mem_alloc(mem, sizeof(*s)); \ if (!s) \ { \ tre_bt_mem_destroy(mem); \ if (tags) \ { \ xfree (tags); \ } \ if (pmatch) \ { \ xfree (pmatch); \ } \ if (states_seen) \ { \ xfree (states_seen); \ } \ return REG_ESPACE; \ } \ s->prev = stack; \ s->next = NULL; \ s->item.tags = tre_bt_mem_alloc(mem, \ sizeof(*tags) * tnfa->num_tags); \ if (!s->item.tags) \ { \ tre_bt_mem_destroy(mem); \ if (tags) \ { \ xfree (tags); \ } \ if (pmatch) \ { \ xfree (pmatch); \ } \ if (states_seen) \ { \ xfree (states_seen); \ } \ return REG_ESPACE; \ } \ stack->next = s; \ stack = s; \ } \ else \ { \ stack = stack->next; \ } \ stack->item.pos = (_pos); \ stack->item.str_byte = (_str_byte); \ stack->item.state = (_state); \ stack->item.state_id = (_state_id); \ stack->item.next_c = (_next_c); \ for (i = 0; i < tnfa->num_tags; i++) \ { \ stack->item.tags[i] = (_tags)[i]; \ } \ BT_STACK_MBSTATE_IN; \ } \ while (0) #define BT_STACK_POP() \ do \ { \ int i; \ assert(stack->prev); \ pos = stack->item.pos; \ str_byte = stack->item.str_byte; \ state = stack->item.state; \ next_c = stack->item.next_c; \ for (i = 0; i < tnfa->num_tags; i++) \ { \ tags[i] = stack->item.tags[i]; \ } \ BT_STACK_MBSTATE_OUT; \ stack = stack->prev; \ } \ while (0) #undef MIN #define MIN(a, b) ((a) <= (b) ? (a) : (b)) static reg_errcode_t tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string, int *match_tags, int eflags, int *match_end_ofs) { /* State variables required by GET_NEXT_WCHAR. */ tre_char_t prev_c = 0, next_c = 0; const char *str_byte = string; int pos = 0; int pos_add_next = 1; #ifdef TRE_MBSTATE mbstate_t mbstate; #endif /* TRE_MBSTATE */ int reg_notbol = eflags & REG_NOTBOL; int reg_noteol = eflags & REG_NOTEOL; int reg_newline = tnfa->cflags & REG_NEWLINE; /* These are used to remember the necessary values of the above * variables to return to the position where the current search * started from. */ int next_c_start; const char *str_byte_start; int pos_start = -1; #ifdef TRE_MBSTATE mbstate_t mbstate_start; #endif /* TRE_MBSTATE */ /* End offset of best match so far, or -1 if no match found yet. */ int match_eo = -1; /* Tag arrays. */ int *next_tags; int *tags = NULL; /* Current TNFA state. */ tre_tnfa_transition_t *state; int *states_seen = NULL; /* Memory allocator to for allocating the backtracking stack. */ tre_mem_t mem = tre_bt_mem_new(); /* The backtracking stack. */ tre_backtrack_t stack; tre_tnfa_transition_t *trans_i; regmatch_t *pmatch = NULL; int ret; #ifdef TRE_MBSTATE memset(&mbstate, '\0', sizeof(mbstate)); #endif /* TRE_MBSTATE */ if (!mem) { return REG_ESPACE; } stack = tre_bt_mem_alloc(mem, sizeof(*stack)); if (!stack) { ret = REG_ESPACE; goto error_exit; } stack->prev = NULL; stack->next = NULL; if (tnfa->num_tags) { tags = xmalloc(sizeof(*tags) * tnfa->num_tags); if (!tags) { ret = REG_ESPACE; goto error_exit; } } if (tnfa->num_submatches) { pmatch = xmalloc(sizeof(*pmatch) * tnfa->num_submatches); if (!pmatch) { ret = REG_ESPACE; goto error_exit; } } if (tnfa->num_states) { states_seen = xmalloc(sizeof(*states_seen) * tnfa->num_states); if (!states_seen) { ret = REG_ESPACE; goto error_exit; } } retry: { int i; for (i = 0; i < tnfa->num_tags; i++) { tags[i] = -1; if (match_tags) { match_tags[i] = -1; } } for (i = 0; i < tnfa->num_states; i++) { states_seen[i] = 0; } } state = NULL; pos = pos_start; GET_NEXT_WCHAR(); pos_start = pos; next_c_start = next_c; str_byte_start = str_byte; #ifdef TRE_MBSTATE mbstate_start = mbstate; #endif /* TRE_MBSTATE */ /* Handle initial states. */ next_tags = NULL; for (trans_i = tnfa->initial; trans_i->state; trans_i++) { if (trans_i->assertions && CHECK_ASSERTIONS(trans_i->assertions)) { continue; } if (state == NULL) { /* Start from this state. */ state = trans_i->state; next_tags = trans_i->tags; } else { /* Backtrack to this state. */ BT_STACK_PUSH(pos, str_byte, 0, trans_i->state, trans_i->state_id, next_c, tags, mbstate); { int *tmp = trans_i->tags; if (tmp) { while (*tmp >= 0) { stack->item.tags[*tmp++] = pos; } } } } } if (next_tags) { for (; *next_tags >= 0; next_tags++) { tags[*next_tags] = pos; } } if (state == NULL) { goto backtrack; } while (1) { tre_tnfa_transition_t *next_state; int empty_br_match; if (state == tnfa->final) { if (match_eo < pos || (match_eo == pos && match_tags && tre_tag_order(tnfa->num_tags, tnfa->tag_directions, tags, match_tags))) { int i; /* This match wins the previous match. */ match_eo = pos; if (match_tags) { for (i = 0; i < tnfa->num_tags; i++) { match_tags[i] = tags[i]; } } } /* Our TNFAs never have transitions leaving from the final state, * so we jump right to backtracking. */ goto backtrack; } /* Go to the next character in the input string. */ empty_br_match = 0; trans_i = state; if (trans_i->state && trans_i->assertions & ASSERT_BACKREF) { /* This is a back reference state. All transitions leaving from * this state have the same back reference "assertion". Instead * of reading the next character, we match the back reference. */ int so; int eo; int bt = trans_i->u.backref; int bt_len; int result; /* Get the substring we need to match against. Remember to * turn off REG_NOSUB temporarily. */ tre_fill_pmatch(bt + 1, pmatch, tnfa->cflags & ~REG_NOSUB, tnfa, tags, pos); so = pmatch[bt].rm_so; eo = pmatch[bt].rm_eo; bt_len = eo - so; result = strncmp((const char *) string + so, str_byte - 1, (size_t) bt_len); if (result == 0) { /* Back reference matched. Check for infinite loop. */ if (bt_len == 0) { empty_br_match = 1; } if (empty_br_match && states_seen[trans_i->state_id]) { goto backtrack; } states_seen[trans_i->state_id] = empty_br_match; /* Advance in input string and resync `prev_c', `next_c' * and pos. */ str_byte += bt_len - 1; pos += bt_len - 1; GET_NEXT_WCHAR(); } else { goto backtrack; } } else { /* Check for end of string. */ if (next_c == L'\0') { goto backtrack; } /* Read the next character. */ GET_NEXT_WCHAR(); } next_state = NULL; for (trans_i = state; trans_i->state; trans_i++) { if (trans_i->code_min <= (tre_cint_t)prev_c && trans_i->code_max >= (tre_cint_t)prev_c) { if (trans_i->assertions && (CHECK_ASSERTIONS(trans_i->assertions) || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags))) { continue; } if (next_state == NULL) { /* First matching transition. */ next_state = trans_i->state; next_tags = trans_i->tags; } else { /* Second matching transition. We may need to backtrack * here * to take this transition instead of the first one, so we * push this transition in the backtracking stack so we can * jump back here if needed. */ BT_STACK_PUSH(pos, str_byte, 0, trans_i->state, trans_i->state_id, next_c, tags, mbstate); { int *tmp; for (tmp = trans_i->tags; tmp && *tmp >= 0; tmp++) { stack->item.tags[*tmp] = pos; } } #if 0 /* XXX - it's important not to look at all transitions here to keep * the stack small! */ break; #endif } } } if (next_state != NULL) { /* Matching transitions were found. Take the first one. */ state = next_state; /* Update the tag values. */ if (next_tags) { while (*next_tags >= 0) { tags[*next_tags++] = pos; } } } else { backtrack: /* A matching transition was not found. Try to backtrack. */ if (stack->prev) { if (stack->item.state->assertions & ASSERT_BACKREF) { states_seen[stack->item.state_id] = 0; } BT_STACK_POP(); } else if (match_eo < 0) { /* Try starting from a later position in the input string. * Check for end of string. */ if (next_c == L'\0') { break; } next_c = next_c_start; #ifdef TRE_MBSTATE mbstate = mbstate_start; #endif /* TRE_MBSTATE */ str_byte = str_byte_start; goto retry; } else { break; } } } ret = match_eo >= 0 ? REG_OK : REG_NOMATCH; *match_end_ofs = match_eo; error_exit: tre_bt_mem_destroy(mem); #ifndef TRE_USE_ALLOCA if (tags) { xfree(tags); } if (pmatch) { xfree(pmatch); } if (states_seen) { xfree(states_seen); } #endif /* !TRE_USE_ALLOCA */ return ret; } /* from regexec.c */ /* Fills the POSIX.2 regmatch_t array according to the TNFA tag and match * endpoint values. */ static void tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags, const tre_tnfa_t *tnfa, int *tags, int match_eo) { tre_submatch_data_t *submatch_data; unsigned int i; unsigned int j; int *parents; i = 0; if (match_eo >= 0 && !(cflags & REG_NOSUB)) { /* Construct submatch offsets from the tags. */ submatch_data = tnfa->submatch_data; while (i < tnfa->num_submatches && i < nmatch) { if (submatch_data[i].so_tag == tnfa->end_tag) { pmatch[i].rm_so = match_eo; } else { pmatch[i].rm_so = tags[submatch_data[i].so_tag]; } if (submatch_data[i].eo_tag == tnfa->end_tag) { pmatch[i].rm_eo = match_eo; } else { pmatch[i].rm_eo = tags[submatch_data[i].eo_tag]; } /* If either of the endpoints were not used, this submatch * was not part of the match. */ if (pmatch[i].rm_so == -1 || pmatch[i].rm_eo == -1) { pmatch[i].rm_so = pmatch[i].rm_eo = -1; } i++; } /* Reset all submatches that are not within all of their parent * submatches. */ i = 0; while (i < tnfa->num_submatches && i < nmatch) { if (pmatch[i].rm_eo == -1) { assert(pmatch[i].rm_so == -1); } assert(pmatch[i].rm_so <= pmatch[i].rm_eo); parents = submatch_data[i].parents; if (parents != NULL) { for (j = 0; parents[j] >= 0; j++) { if (pmatch[i].rm_so < pmatch[parents[j]].rm_so || pmatch[i].rm_eo > pmatch[parents[j]].rm_eo) { pmatch[i].rm_so = pmatch[i].rm_eo = -1; } } } i++; } } while (i < nmatch) { pmatch[i].rm_so = -1; pmatch[i].rm_eo = -1; i++; } } /* Wrapper functions for POSIX compatible regexp matching. */ int regexec(const regex_t *restrict preg, const char *restrict string, size_t nmatch, regmatch_t pmatch[restrict], int eflags) { tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; reg_errcode_t status; int *tags = NULL; int eo; if (tnfa->cflags & REG_NOSUB) { nmatch = 0; } if (tnfa->num_tags > 0 && nmatch > 0) { tags = xmalloc(sizeof(*tags) * tnfa->num_tags); if (tags == NULL) { return REG_ESPACE; } } /* Dispatch to the appropriate matcher. */ if (tnfa->have_backrefs) { /* The regex has back references, use the backtracking matcher. */ status = tre_tnfa_run_backtrack(tnfa, string, tags, eflags, &eo); } else { /* Exact matching, no back references, use the parallel matcher. */ status = tre_tnfa_run_parallel(tnfa, string, tags, eflags, &eo); } if (status == REG_OK) { /* A match was found, so fill the submatch registers. */ tre_fill_pmatch(nmatch, pmatch, tnfa->cflags, tnfa, tags, eo); } if (tags) { xfree(tags); } return status; }