237 lines
7.0 KiB
Diff
237 lines
7.0 KiB
Diff
|
From 1d9a2e966ddb5242d79e4b6e238124c4be81f6dd Mon Sep 17 00:00:00 2001
|
||
|
From: Karl Dahlke <eklhad@comcast.net>
|
||
|
Date: Thu, 30 Sep 2021 20:21:39 -0400
|
||
|
Subject: [PATCH] Upgrade from pcre1 to pcre2. Patch courtesy of Sylvain
|
||
|
BERTRAND.
|
||
|
|
||
|
I only had to tweak it in a couple places.
|
||
|
---
|
||
|
src/buffers.c | 74 ++++++++++++++++++++++++++++++++-------------------
|
||
|
src/main.c | 5 ----
|
||
|
src/makefile | 2 +-
|
||
|
5 files changed, 54 insertions(+), 37 deletions(-)
|
||
|
|
||
|
diff --git a/src/buffers.c b/src/buffers.c
|
||
|
index 4324b4a6..b18a7674 100644
|
||
|
--- a/src/buffers.c
|
||
|
+++ b/src/buffers.c
|
||
|
@@ -11,7 +11,8 @@
|
||
|
|
||
|
/* If this include file is missing, you need the pcre package,
|
||
|
* and the pcre-devel package. */
|
||
|
-#include <pcre.h>
|
||
|
+#define PCRE2_CODE_UNIT_WIDTH 8
|
||
|
+#include <pcre2.h>
|
||
|
static bool pcre_utf8_error_stop = false;
|
||
|
|
||
|
#include <readline/readline.h>
|
||
|
@@ -3147,22 +3148,23 @@ regexpCheck(const char *line, bool isleft, bool ebmuck,
|
||
|
|
||
|
/* regexp variables */
|
||
|
static int re_count;
|
||
|
-static int re_vector[11 * 3];
|
||
|
-static pcre *re_cc; /* compiled */
|
||
|
+static PCRE2_SIZE *re_vector;
|
||
|
+static pcre2_match_data *match_data;
|
||
|
+static pcre2_code *re_cc; /* compiled */
|
||
|
static bool re_utf8 = true;
|
||
|
|
||
|
static void regexpCompile(const char *re, bool ci)
|
||
|
{
|
||
|
static signed char try8 = 0; /* 1 is utf8 on, -1 is utf8 off */
|
||
|
- const char *re_error;
|
||
|
- int re_offset;
|
||
|
+ int re_error;
|
||
|
+ PCRE2_SIZE re_offset;
|
||
|
int re_opt;
|
||
|
|
||
|
top:
|
||
|
/* Do we need PCRE_NO_AUTO_CAPTURE? */
|
||
|
re_opt = 0;
|
||
|
if (ci)
|
||
|
- re_opt |= PCRE_CASELESS;
|
||
|
+ re_opt |= PCRE2_CASELESS;
|
||
|
|
||
|
if (re_utf8) {
|
||
|
if (cons_utf8 && !cw->binMode && try8 >= 0) {
|
||
|
@@ -3174,23 +3176,26 @@ static void regexpCompile(const char *re, bool ci)
|
||
|
}
|
||
|
}
|
||
|
try8 = 1;
|
||
|
- re_opt |= PCRE_UTF8;
|
||
|
+ re_opt |= PCRE2_UTF;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
- re_cc = pcre_compile(re, re_opt, &re_error, &re_offset, 0);
|
||
|
- if (!re_cc && try8 > 0 && strstr(re_error, "PCRE_UTF8 support")) {
|
||
|
+ re_cc = pcre2_compile((uchar*)re, PCRE2_ZERO_TERMINATED, re_opt, &re_error, &re_offset, 0);
|
||
|
+ if (!re_cc && try8 > 0 && re_error == PCRE2_ERROR_UTF_IS_DISABLED) {
|
||
|
i_puts(MSG_PcreUtf8);
|
||
|
try8 = -1;
|
||
|
goto top;
|
||
|
}
|
||
|
- if (!re_cc && try8 > 0 && strstr(re_error, "invalid UTF-8 string")) {
|
||
|
+ if (!re_cc && try8 > 0 && (PCRE2_ERROR_UTF32_ERR2 <= re_error && re_error <= PCRE2_ERROR_UTF8_ERR1)) {
|
||
|
i_puts(MSG_BadUtf8String);
|
||
|
}
|
||
|
|
||
|
if (!re_cc)
|
||
|
- setError(MSG_RexpError, re_error);
|
||
|
-} /* regexpCompile */
|
||
|
+ setError(MSG_RexpError, "ERROR");
|
||
|
+ else
|
||
|
+// re_cc and match_data rise and fall together.
|
||
|
+ match_data = pcre2_match_data_create_from_pattern(re_cc, NULL);
|
||
|
+}
|
||
|
|
||
|
/* Get the start or end of a range.
|
||
|
* Pass the line containing the address. */
|
||
|
@@ -3246,7 +3251,8 @@ const char **split)
|
||
|
char *subject;
|
||
|
ln += incr;
|
||
|
if (!searchWrap && (ln == 0 || ln > cw->dol)) {
|
||
|
- pcre_free(re_cc);
|
||
|
+ pcre2_match_data_free(match_data);
|
||
|
+ pcre2_code_free(re_cc);
|
||
|
setError(MSG_NotFound);
|
||
|
return false;
|
||
|
}
|
||
|
@@ -3256,26 +3262,31 @@ const char **split)
|
||
|
ln = cw->dol;
|
||
|
subject = (char *)fetchLine(ln, 1);
|
||
|
re_count =
|
||
|
- pcre_exec(re_cc, 0, subject,
|
||
|
+ pcre2_match(re_cc, (uchar*)subject,
|
||
|
pstLength((pst) subject) - 1, 0, 0,
|
||
|
- re_vector, 33);
|
||
|
+ match_data, NULL);
|
||
|
+// {uchar snork[300]; pcre2_get_error_message(re_count, snork, 300); puts(snork); }
|
||
|
+ re_vector = pcre2_get_ovector_pointer(match_data);
|
||
|
free(subject);
|
||
|
// An error in evaluation is treated like text not found.
|
||
|
// This usually happens because this particular line has bad binary, not utf8.
|
||
|
if (re_count < -1 && pcre_utf8_error_stop) {
|
||
|
- pcre_free(re_cc);
|
||
|
+ pcre2_match_data_free(match_data);
|
||
|
+ pcre2_code_free(re_cc);
|
||
|
setError(MSG_RexpError2, ln);
|
||
|
return (globSub = false);
|
||
|
}
|
||
|
if (re_count >= 0)
|
||
|
break;
|
||
|
if (ln == cw->dot) {
|
||
|
- pcre_free(re_cc);
|
||
|
+ pcre2_match_data_free(match_data);
|
||
|
+ pcre2_code_free(re_cc);
|
||
|
setError(MSG_NotFound);
|
||
|
return false;
|
||
|
}
|
||
|
} /* loop over lines */
|
||
|
- pcre_free(re_cc);
|
||
|
+ pcre2_match_data_free(match_data);
|
||
|
+ pcre2_code_free(re_cc);
|
||
|
/* and ln is the line that matches */
|
||
|
}
|
||
|
/* Now add or subtract from this number */
|
||
|
@@ -3363,11 +3374,14 @@ static bool doGlobal(const char *line)
|
||
|
for (i = startRange; i <= endRange; ++i) {
|
||
|
char *subject = (char *)fetchLine(i, 1);
|
||
|
re_count =
|
||
|
- pcre_exec(re_cc, 0, subject, pstLength((pst) subject) - 1,
|
||
|
- 0, 0, re_vector, 33);
|
||
|
+ pcre2_match(re_cc, (uchar*)subject, pstLength((pst) subject) - 1,
|
||
|
+ 0, 0, match_data, NULL);
|
||
|
+ re_vector = pcre2_get_ovector_pointer(match_data);
|
||
|
+
|
||
|
free(subject);
|
||
|
if (re_count < -1 && pcre_utf8_error_stop) {
|
||
|
- pcre_free(re_cc);
|
||
|
+ pcre2_match_data_free(match_data);
|
||
|
+ pcre2_code_free(re_cc);
|
||
|
setError(MSG_RexpError2, i);
|
||
|
return false;
|
||
|
}
|
||
|
@@ -3377,7 +3391,8 @@ static bool doGlobal(const char *line)
|
||
|
cw->map[i].gflag = true;
|
||
|
}
|
||
|
} /* loop over line */
|
||
|
- pcre_free(re_cc);
|
||
|
+ pcre2_match_data_free(match_data);
|
||
|
+ pcre2_code_free(re_cc);
|
||
|
|
||
|
if (!gcnt) {
|
||
|
setError((cmd == 'v') + MSG_NoMatchG);
|
||
|
@@ -3477,7 +3492,8 @@ replaceText(const char *line, int len, const char *rhs,
|
||
|
while (true) {
|
||
|
/* find the next match */
|
||
|
re_count =
|
||
|
- pcre_exec(re_cc, 0, line, len, offset, 0, re_vector, 33);
|
||
|
+ pcre2_match(re_cc, (uchar*)line, len, offset, 0, match_data, NULL);
|
||
|
+ re_vector = pcre2_get_ovector_pointer(match_data);
|
||
|
if (re_count < -1 &&
|
||
|
(pcre_utf8_error_stop || startRange == endRange)) {
|
||
|
setError(MSG_RexpError2, ln);
|
||
|
@@ -4092,8 +4108,10 @@ static int substituteText(const char *line)
|
||
|
breakLineResult = 0;
|
||
|
} /* loop over lines in the range */
|
||
|
|
||
|
- if (re_cc)
|
||
|
- pcre_free(re_cc);
|
||
|
+ if (re_cc) {
|
||
|
+ pcre2_match_data_free(match_data);
|
||
|
+ pcre2_code_free(re_cc);
|
||
|
+ }
|
||
|
|
||
|
if (intFlag) {
|
||
|
setError(MSG_Interrupted);
|
||
|
@@ -4113,8 +4131,10 @@ static int substituteText(const char *line)
|
||
|
return true;
|
||
|
|
||
|
abort:
|
||
|
- if (re_cc)
|
||
|
- pcre_free(re_cc);
|
||
|
+ if (re_cc) {
|
||
|
+ pcre2_match_data_free(match_data);
|
||
|
+ pcre2_code_free(re_cc);
|
||
|
+ }
|
||
|
nzFree(replaceString);
|
||
|
/* we may have just freed the result of a breakline command */
|
||
|
breakLineResult = 0;
|
||
|
diff --git a/src/main.c b/src/main.c
|
||
|
index 862e5dc3..838b0ce0 100644
|
||
|
--- a/src/main.c
|
||
|
+++ b/src/main.c
|
||
|
@@ -6,7 +6,6 @@
|
||
|
#include "eb.h"
|
||
|
|
||
|
#include <pthread.h>
|
||
|
-#include <pcre.h>
|
||
|
#include <signal.h>
|
||
|
|
||
|
/* Define the globals that are declared in eb.h. */
|
||
|
@@ -577,10 +576,6 @@ int main(int argc, char **argv)
|
||
|
ttySaveSettings();
|
||
|
initializeReadline();
|
||
|
|
||
|
-/* Let's everybody use my malloc and free routines */
|
||
|
- pcre_malloc = allocMem;
|
||
|
- pcre_free = nzFree;
|
||
|
-
|
||
|
loadReplacements();
|
||
|
|
||
|
if (argc && stringEqual(argv[0], "-c")) {
|
||
|
diff --git a/src/makefile b/src/makefile
|
||
|
index e30866e9..82076114 100644
|
||
|
--- a/src/makefile
|
||
|
+++ b/src/makefile
|
||
|
@@ -11,7 +11,7 @@ CFLAGS += $(PLATFORM_CFLAGS)
|
||
|
PERL != which perl
|
||
|
TIDY_LIBS != pkg-config --libs tidy
|
||
|
CURL_LIBS != pkg-config --libs libcurl
|
||
|
-PCRE_LIBS != pkg-config --libs libpcre
|
||
|
+PCRE_LIBS != pkg-config --libs libpcre2-8
|
||
|
EDBR_ODBC_OBJS != ./make-helper.sh --odbc-objs "${BUILD_EDBR_ODBC}"
|
||
|
EDBR_ODBC_LIBS != ./make-helper.sh --odbc-libs "${BUILD_EDBR_ODBC}"
|
||
|
# Set EBDEMIN to a nonempty string to support dynamic js deminimization
|