bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: new module 'regex-quote'


From: Bruno Haible
Subject: Re: new module 'regex-quote'
Date: Sun, 19 Sep 2010 00:05:54 +0200
User-agent: KMail/1.9.9

Reuben Thomas wrote:
> > it's a function which transforms a string to a string.
> > I'm sure this function has been implemented many times already. The one I
> > wrote is called 'regexp-quote' [1][2].
> 
> Great, can we have it in gnulib? Does it work for all syntaxes?

It needs a flag to distinguish the syntax, since BRE and ERE have different
syntaxes. Here's a suggested module:


2010-09-18  Bruno Haible  <address@hidden>

        New module 'regex-quote'.
        * lib/regex-quote.h: New file.
        * lib/regex-quote.c: New file.
        * modules/regex-quote: New file.
        Suggested by Reuben Thomas <address@hidden>.

============================== lib/regex-quote.h ==============================
/* Construct a regular expression from a literal string.
   Copyright (C) 1995, 2010 Free Software Foundation, Inc.
   Written by Bruno Haible <address@hidden>, 2010.

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */

#include <stddef.h>

/* regex_quote converts a literal string to a regular expression that will
   look for this literal string.
   cflags can be 0 or REG_EXTENDED.
   If it is 0, the result is a Basic Regular Expression (BRE)
   
<http://www.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_03>.
   If it is REG_EXTENDED, the result is an Extended Regular Expression (ERE)
   
<http://www.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_04>.
   The result is not anchored;  if you want it to match only complete lines,
   you need to add "^" at the beginning of the result and "$" at the end of the
   result.
 */

/* Returns the number of bytes needed for the quoted string.  */
extern size_t regex_quote_length (const char *string, int cflags);

/* Copies the quoted string to p and returns the incremented p.
   There must be room for regex_quote_length (string, cflags) + 1 bytes at p.
 */
extern char * regex_quote_copy (char *p, const char *string, int cflags);

/* Returns the freshly allocated quoted string.  */
extern char * regex_quote (const char *string, int cflags);
============================== lib/regex-quote.c ==============================
/* Construct a regular expression from a literal string.
   Copyright (C) 1995, 2010 Free Software Foundation, Inc.
   Written by Bruno Haible <address@hidden>, 2010.

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */

#include <config.h>

/* Specification.  */
#include "regex-quote.h"

#include <string.h>

#include "mbuiter.h"
#include "xalloc.h"

/* Characters that are special in a BRE.  */
static const char bre_special[] = "$^.*[]\\";

/* Characters that are special in an ERE.  */
static const char ere_special[] = "$^.*[]\\+?()";

size_t
regex_quote_length (const char *string, int cflags)
{
  const char *special = (cflags != 0 ? ere_special : bre_special);
  size_t length;
  mbui_iterator_t iter;

  length = 0;
  for (mbui_init (iter, string); mbui_avail (iter); mbui_advance (iter))
    {
      /* We know that special contains only ASCII characters.  */
      if (mb_len (mbui_cur (iter)) == 1
          && strchr (special, * mbui_cur_ptr (iter)))
        length += 1;
      length += mb_len (mbui_cur (iter));
    }
  return length;
}

/* Copies the quoted string to p and returns the incremented p.
   There must be room for regex_quote_length (string, cflags) + 1 bytes at p.
 */
char *
regex_quote_copy (char *p, const char *string, int cflags)
{
  const char *special = (cflags != 0 ? ere_special : bre_special);
  mbui_iterator_t iter;

  for (mbui_init (iter, string); mbui_avail (iter); mbui_advance (iter))
    {
      /* We know that special contains only ASCII characters.  */
      if (mb_len (mbui_cur (iter)) == 1
          && strchr (special, * mbui_cur_ptr (iter)))
        *p++ = '\\';
      memcpy (p, mbui_cur_ptr (iter), mb_len (mbui_cur (iter)));
      p += mb_len (mbui_cur (iter));
    }
  return p;
}

/* Returns the freshly allocated quoted string.  */
char *
regex_quote (const char *string, int cflags)
{
  size_t length = regex_quote_length (string, cflags);
  char *result = XNMALLOC (length + 1, char);
  char *p;

  p = result;
  p = regex_quote_copy (p, string, cflags);
  *p = '\0';
  return result;
}
============================= modules/regex-quote =============================
Description:
Construct a regular expression from a literal string.

Files:
lib/regex-quote.h
lib/regex-quote.c

Depends-on:
xalloc
mbuiter

configure.ac:

Makefile.am:
lib_SOURCES += regex-quote.c

Include:
"regex-quote.h"

License:
GPL

Maintainer:
Bruno Haible



reply via email to

[Prev in Thread] Current Thread [Next in Thread]