From 1d9a2e966ddb5242d79e4b6e238124c4be81f6dd Mon Sep 17 00:00:00 2001 From: Karl Dahlke Date: Thu, 30 Sep 2021 20:21:39 -0400 Subject: [PATCH] Upgrade from pcre1 to pcre2. Patch courtesy of Sylvain BERTRAND. I only had to tweak it in a couple places. --- src/buffers.c | 74 ++++++++++++++++++++++++++++++++------------------- src/main.c | 5 ---- src/makefile | 2 +- 5 files changed, 54 insertions(+), 37 deletions(-) diff --git a/src/buffers.c b/src/buffers.c index 4324b4a6..b18a7674 100644 --- a/src/buffers.c +++ b/src/buffers.c @@ -11,7 +11,8 @@ /* If this include file is missing, you need the pcre package, * and the pcre-devel package. */ -#include +#define PCRE2_CODE_UNIT_WIDTH 8 +#include static bool pcre_utf8_error_stop = false; #include @@ -3147,22 +3148,23 @@ regexpCheck(const char *line, bool isleft, bool ebmuck, /* regexp variables */ static int re_count; -static int re_vector[11 * 3]; -static pcre *re_cc; /* compiled */ +static PCRE2_SIZE *re_vector; +static pcre2_match_data *match_data; +static pcre2_code *re_cc; /* compiled */ static bool re_utf8 = true; static void regexpCompile(const char *re, bool ci) { static signed char try8 = 0; /* 1 is utf8 on, -1 is utf8 off */ - const char *re_error; - int re_offset; + int re_error; + PCRE2_SIZE re_offset; int re_opt; top: /* Do we need PCRE_NO_AUTO_CAPTURE? */ re_opt = 0; if (ci) - re_opt |= PCRE_CASELESS; + re_opt |= PCRE2_CASELESS; if (re_utf8) { if (cons_utf8 && !cw->binMode && try8 >= 0) { @@ -3174,23 +3176,26 @@ static void regexpCompile(const char *re, bool ci) } } try8 = 1; - re_opt |= PCRE_UTF8; + re_opt |= PCRE2_UTF; } } - re_cc = pcre_compile(re, re_opt, &re_error, &re_offset, 0); - if (!re_cc && try8 > 0 && strstr(re_error, "PCRE_UTF8 support")) { + re_cc = pcre2_compile((uchar*)re, PCRE2_ZERO_TERMINATED, re_opt, &re_error, &re_offset, 0); + if (!re_cc && try8 > 0 && re_error == PCRE2_ERROR_UTF_IS_DISABLED) { i_puts(MSG_PcreUtf8); try8 = -1; goto top; } - if (!re_cc && try8 > 0 && strstr(re_error, "invalid UTF-8 string")) { + if (!re_cc && try8 > 0 && (PCRE2_ERROR_UTF32_ERR2 <= re_error && re_error <= PCRE2_ERROR_UTF8_ERR1)) { i_puts(MSG_BadUtf8String); } if (!re_cc) - setError(MSG_RexpError, re_error); -} /* regexpCompile */ + setError(MSG_RexpError, "ERROR"); + else +// re_cc and match_data rise and fall together. + match_data = pcre2_match_data_create_from_pattern(re_cc, NULL); +} /* Get the start or end of a range. * Pass the line containing the address. */ @@ -3246,7 +3251,8 @@ const char **split) char *subject; ln += incr; if (!searchWrap && (ln == 0 || ln > cw->dol)) { - pcre_free(re_cc); + pcre2_match_data_free(match_data); + pcre2_code_free(re_cc); setError(MSG_NotFound); return false; } @@ -3256,26 +3262,31 @@ const char **split) ln = cw->dol; subject = (char *)fetchLine(ln, 1); re_count = - pcre_exec(re_cc, 0, subject, + pcre2_match(re_cc, (uchar*)subject, pstLength((pst) subject) - 1, 0, 0, - re_vector, 33); + match_data, NULL); +// {uchar snork[300]; pcre2_get_error_message(re_count, snork, 300); puts(snork); } + re_vector = pcre2_get_ovector_pointer(match_data); free(subject); // An error in evaluation is treated like text not found. // This usually happens because this particular line has bad binary, not utf8. if (re_count < -1 && pcre_utf8_error_stop) { - pcre_free(re_cc); + pcre2_match_data_free(match_data); + pcre2_code_free(re_cc); setError(MSG_RexpError2, ln); return (globSub = false); } if (re_count >= 0) break; if (ln == cw->dot) { - pcre_free(re_cc); + pcre2_match_data_free(match_data); + pcre2_code_free(re_cc); setError(MSG_NotFound); return false; } } /* loop over lines */ - pcre_free(re_cc); + pcre2_match_data_free(match_data); + pcre2_code_free(re_cc); /* and ln is the line that matches */ } /* Now add or subtract from this number */ @@ -3363,11 +3374,14 @@ static bool doGlobal(const char *line) for (i = startRange; i <= endRange; ++i) { char *subject = (char *)fetchLine(i, 1); re_count = - pcre_exec(re_cc, 0, subject, pstLength((pst) subject) - 1, - 0, 0, re_vector, 33); + pcre2_match(re_cc, (uchar*)subject, pstLength((pst) subject) - 1, + 0, 0, match_data, NULL); + re_vector = pcre2_get_ovector_pointer(match_data); + free(subject); if (re_count < -1 && pcre_utf8_error_stop) { - pcre_free(re_cc); + pcre2_match_data_free(match_data); + pcre2_code_free(re_cc); setError(MSG_RexpError2, i); return false; } @@ -3377,7 +3391,8 @@ static bool doGlobal(const char *line) cw->map[i].gflag = true; } } /* loop over line */ - pcre_free(re_cc); + pcre2_match_data_free(match_data); + pcre2_code_free(re_cc); if (!gcnt) { setError((cmd == 'v') + MSG_NoMatchG); @@ -3477,7 +3492,8 @@ replaceText(const char *line, int len, const char *rhs, while (true) { /* find the next match */ re_count = - pcre_exec(re_cc, 0, line, len, offset, 0, re_vector, 33); + pcre2_match(re_cc, (uchar*)line, len, offset, 0, match_data, NULL); + re_vector = pcre2_get_ovector_pointer(match_data); if (re_count < -1 && (pcre_utf8_error_stop || startRange == endRange)) { setError(MSG_RexpError2, ln); @@ -4092,8 +4108,10 @@ static int substituteText(const char *line) breakLineResult = 0; } /* loop over lines in the range */ - if (re_cc) - pcre_free(re_cc); + if (re_cc) { + pcre2_match_data_free(match_data); + pcre2_code_free(re_cc); + } if (intFlag) { setError(MSG_Interrupted); @@ -4113,8 +4131,10 @@ static int substituteText(const char *line) return true; abort: - if (re_cc) - pcre_free(re_cc); + if (re_cc) { + pcre2_match_data_free(match_data); + pcre2_code_free(re_cc); + } nzFree(replaceString); /* we may have just freed the result of a breakline command */ breakLineResult = 0; diff --git a/src/main.c b/src/main.c index 862e5dc3..838b0ce0 100644 --- a/src/main.c +++ b/src/main.c @@ -6,7 +6,6 @@ #include "eb.h" #include -#include #include /* Define the globals that are declared in eb.h. */ @@ -577,10 +576,6 @@ int main(int argc, char **argv) ttySaveSettings(); initializeReadline(); -/* Let's everybody use my malloc and free routines */ - pcre_malloc = allocMem; - pcre_free = nzFree; - loadReplacements(); if (argc && stringEqual(argv[0], "-c")) { diff --git a/src/makefile b/src/makefile index e30866e9..82076114 100644 --- a/src/makefile +++ b/src/makefile @@ -11,7 +11,7 @@ CFLAGS += $(PLATFORM_CFLAGS) PERL != which perl TIDY_LIBS != pkg-config --libs tidy CURL_LIBS != pkg-config --libs libcurl -PCRE_LIBS != pkg-config --libs libpcre +PCRE_LIBS != pkg-config --libs libpcre2-8 EDBR_ODBC_OBJS != ./make-helper.sh --odbc-objs "${BUILD_EDBR_ODBC}" EDBR_ODBC_LIBS != ./make-helper.sh --odbc-libs "${BUILD_EDBR_ODBC}" # Set EBDEMIN to a nonempty string to support dynamic js deminimization