From dd83dfb3f2d2e5139ea7d00240b5441daa0b3a56 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Thu, 21 Apr 2022 18:56:12 -0700 Subject: [PATCH] regex: match [...---...] like V7 grep Problem reported by Arnold Robbins in: https://bugs.gnu.org/20657 https://lists.gnu.org/r/bug-gnulib/2022-04/msg00053.html * lib/regcomp.c (peek_token_bracket): Let [...---...] match '-'. This is an extension to POSIX, and matches V7 Unix grep. --- ChangeLog | 9 +++++++++ lib/regcomp.c | 16 +++++++++++++--- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index cd16bbe0cd..ddd4826bcf 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +2022-04-21 Paul Eggert + + regex: match [...---...] like V7 grep + Problem reported by Arnold Robbins in: + https://bugs.gnu.org/20657 + https://lists.gnu.org/r/bug-gnulib/2022-04/msg00053.html + * lib/regcomp.c (peek_token_bracket): Let [...---...] match '-'. + This is an extension to POSIX, and matches V7 Unix grep. + 2022-04-20 Paul Eggert backupfile: fix bug when renaming simple backups diff --git a/lib/regcomp.c b/lib/regcomp.c index b607c85320..122c3de58c 100644 --- a/lib/regcomp.c +++ b/lib/regcomp.c @@ -2038,15 +2038,25 @@ peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax) } switch (c) { - case '-': - token->type = OP_CHARSET_RANGE; - break; case ']': token->type = OP_CLOSE_BRACKET; break; case '^': token->type = OP_NON_MATCH_LIST; break; + case '-': + /* In V7 Unix grep and Unix awk and mawk, [...---...] + (3 adjacent minus signs) stands for a single minus sign. + Support that without breaking anything else. */ + if (! (re_string_cur_idx (input) + 2 < re_string_length (input) + && re_string_peek_byte (input, 1) == '-' + && re_string_peek_byte (input, 2) == '-')) + { + token->type = OP_CHARSET_RANGE; + break; + } + re_string_skip_bytes (input, 2); + FALLTHROUGH; default: token->type = CHARACTER; } -- 2.35.1