>From e707dbe7c6da9dd8300cb3d60141f144a7a5d5b1 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Wed, 14 Jul 2021 23:55:30 -0500 Subject: [PATCH 2/2] regex: modernize to newer regex bugset Problem reported by Hiroo Hayashi in: https://lists.gnu.org/r/bug-gnulib/2021-07/msg00024.html * m4/regex.m4 (gl_REGEX): Allow newer glibc behavior for ()0|\1, behavior where the regex compiles but does not match. Test for glibc bug 11053. * tests/test-regex.c (bug_regex11, main): Add casts needed for printf portability. (main): Allow newer glibc behavior for ()0|\1. --- ChangeLog | 10 ++++++++++ m4/regex.m4 | 40 ++++++++++++++++++++++++++++++++++++++-- tests/test-regex.c | 11 ++++++----- 3 files changed, 54 insertions(+), 7 deletions(-) diff --git a/ChangeLog b/ChangeLog index 00d31cdc7..78590feae 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,15 @@ 2021-07-14 Paul Eggert + regex: modernize to newer regex bugset + Problem reported by Hiroo Hayashi in: + https://lists.gnu.org/r/bug-gnulib/2021-07/msg00024.html + * m4/regex.m4 (gl_REGEX): Allow newer glibc behavior for ()0|\1, + behavior where the regex compiles but does not match. + Test for glibc bug 11053. + * tests/test-regex.c (bug_regex11, main): Add casts needed + for printf portability. + (main): Allow newer glibc behavior for ()0|\1. + regex: fix shell quoting problem in configuration * m4/regex.m4 (gl_REGEX): Fix quoting problems. These C programs are put into unquoted here-documents, diff --git a/m4/regex.m4 b/m4/regex.m4 index 0e1bafef2..1c7e562f6 100644 --- a/m4/regex.m4 +++ b/m4/regex.m4 @@ -1,4 +1,4 @@ -# serial 72 +# serial 73 # Copyright (C) 1996-2001, 2003-2021 Free Software Foundation, Inc. # @@ -266,12 +266,48 @@ AC_DEFUN([gl_REGEX], memset (®ex, 0, sizeof regex); s = re_compile_pattern ("0|()0|\\\\1|0", 10, ®ex); if (!s) - result |= 64; + { + memset (®s, 0, sizeof regs); + i = re_search (®ex, "x", 1, 0, 1, ®s); + if (i != -1) + result |= 64; + if (0 <= i) + { + free (regs.start); + free (regs.end); + } + regfree (®ex); + } else { if (strcmp (s, "Invalid back reference")) result |= 64; + } + + /* glibc bug 11053. */ + re_set_syntax (RE_SYNTAX_POSIX_BASIC); + memset (®ex, 0, sizeof regex); + static char const pat_sub2[] = "\\\\(a*\\\\)*a*\\\\1"; + s = re_compile_pattern (pat_sub2, sizeof pat_sub2 - 1, ®ex); + if (s) + result |= 64; + else + { + memset (®s, 0, sizeof regs); + static char const data[] = "a"; + int datalen = sizeof data - 1; + i = re_search (®ex, data, datalen, 0, datalen, ®s); + if (i != 0) + result |= 64; + else if (regs.num_regs < 2) + result |= 64; + else if (! (regs.start[0] == 0 && regs.end[0] == 1)) + result |= 64; + else if (! (regs.start[1] == 0 && regs.end[1] == 0)) + result |= 64; regfree (®ex); + free (regs.start); + free (regs.end); } #if 0 diff --git a/tests/test-regex.c b/tests/test-regex.c index 7ea73cfb6..ed4ca64c0 100644 --- a/tests/test-regex.c +++ b/tests/test-regex.c @@ -155,7 +155,8 @@ bug_regex11 (void) if (tests[i].rm[n].rm_so == -1 && tests[i].rm[n].rm_eo == -1) break; report_error ("%s: regexec %zd match failure rm[%d] %d..%d", - tests[i].pattern, i, n, rm[n].rm_so, rm[n].rm_eo); + tests[i].pattern, i, n, + (int) rm[n].rm_so, (int) rm[n].rm_eo); break; } @@ -433,7 +434,7 @@ main (void) pat_sub2, data, (int) regs.start[0], (int) regs.end[0]); else if (! (regs.start[1] == 0 && regs.end[1] == 0)) report_error ("re_search '%s' on '%s' returned wrong submatch [%d,%d)", - pat_sub2, data, regs.start[1], regs.end[1]); + pat_sub2, data, (int) regs.start[1], (int) regs.end[1]); regfree (®ex); free (regs.start); free (regs.end); @@ -466,9 +467,9 @@ main (void) memset (®ex, 0, sizeof regex); static char const pat_badback[] = "0|()0|\\1|0"; s = re_compile_pattern (pat_badback, sizeof pat_badback, ®ex); - if (!s) - s = "failed to report invalid back reference"; - if (strcmp (s, "Invalid back reference") != 0) + if (!s && re_search (®ex, "x", 1, 0, 1, ®s) != -1) + s = "mishandled invalid back reference"; + if (s && strcmp (s, "Invalid back reference") != 0) report_error ("%s: %s", pat_badback, s); #if 0 -- 2.25.1