[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Changes to m4/m4/syntax.c,v
From: |
Eric Blake |
Subject: |
Changes to m4/m4/syntax.c,v |
Date: |
Sat, 23 Dec 2006 00:02:23 +0000 |
CVSROOT: /sources/m4
Module name: m4
Changes by: Eric Blake <ericb> 06/12/23 00:02:21
Index: m4/syntax.c
===================================================================
RCS file: /sources/m4/m4/m4/syntax.c,v
retrieving revision 1.18
retrieving revision 1.19
diff -u -b -r1.18 -r1.19
--- m4/syntax.c 11 Nov 2006 16:21:25 -0000 1.18
+++ m4/syntax.c 23 Dec 2006 00:02:20 -0000 1.19
@@ -35,14 +35,13 @@
M4_SYNTAX_OPEN Open list of macro arguments
M4_SYNTAX_CLOSE Close list of macro arguments
M4_SYNTAX_COMMA Separates macro arguments
- M4_SYNTAX_DOLLAR *Indicates macro argument in user macros
+ M4_SYNTAX_DOLLAR Indicates macro argument in user macros
M4_SYNTAX_ACTIVE This character is a macro name by itself
M4_SYNTAX_ESCAPE Use this character to prefix all macro names
- M4_SYNTAX_ASSIGN Used to assign defaults in parameter lists
+ M4_SYNTAX_ASSIGN *Used to assign defaults in parameter lists
M4_SYNTAX_ALPHA Alphabetic characters (can start macro names)
- M4_SYNTAX_NUM Numeric characters
- M4_SYNTAX_ALNUM Alphanumeric characters (can form macro names)
+ M4_SYNTAX_NUM Numeric characters (can form macro names)
M4_SYNTAX_LQUOTE A single characters left quote
M4_SYNTAX_BCOMM A single characters begin comment delimiter
@@ -76,10 +75,10 @@
The precedence as implemented by next_token () is:
M4_SYNTAX_IGNORE *Filtered out below next_token ()
- M4_SYNTAX_BCOMM Reads all until M4_SYNTAX_ECOMM
M4_SYNTAX_ESCAPE Reads macro name iff set, else next
M4_SYNTAX_ALPHA Reads macro name
M4_SYNTAX_LQUOTE Reads all until balanced M4_SYNTAX_RQUOTE
+ M4_SYNTAX_BCOMM Reads all until M4_SYNTAX_ECOMM
M4_SYNTAX_OTHER } Reads all M4_SYNTAX_OTHER, M4_SYNTAX_NUM
M4_SYNTAX_NUM } and M4_SYNTAX_DOLLAR
@@ -93,9 +92,11 @@
string is parsed equally whether there is a $ or not. The character
$ is used by convention in user macros. */
-static bool check_is_macro_escaped (m4_syntax_table *syntax);
-static int add_syntax_attribute (m4_syntax_table *syntax, int ch,
int code);
-static int remove_syntax_attribute (m4_syntax_table *syntax, int ch, int code);
+static bool check_is_single_quotes (m4_syntax_table *);
+static bool check_is_single_comments (m4_syntax_table *);
+static bool check_is_macro_escaped (m4_syntax_table *);
+static int add_syntax_attribute (m4_syntax_table *, int, int);
+static int remove_syntax_attribute (m4_syntax_table *, int, int);
m4_syntax_table *
m4_syntax_create (void)
@@ -103,52 +104,49 @@
m4_syntax_table *syntax = xzalloc (sizeof *syntax);
int ch;
- for (ch = 256; --ch > 0;)
+ /* Set up default table. This table never changes during operation. */
+ for (ch = 256; --ch >= 0;)
+ switch (ch)
{
- if (ch == '(')
- add_syntax_attribute (syntax, ch, M4_SYNTAX_OPEN);
- else if (ch == ')')
- add_syntax_attribute (syntax, ch, M4_SYNTAX_CLOSE);
- else if (ch == ',')
- add_syntax_attribute (syntax, ch, M4_SYNTAX_COMMA);
- else if (ch == '$')
- add_syntax_attribute (syntax, ch, M4_SYNTAX_DOLLAR);
- else if (ch == '=')
- add_syntax_attribute (syntax, ch, M4_SYNTAX_ASSIGN);
- else if (isspace (ch))
- add_syntax_attribute (syntax, ch, M4_SYNTAX_SPACE);
+ case '(':
+ syntax->orig[ch] = M4_SYNTAX_OPEN;
+ break;
+ case ')':
+ syntax->orig[ch] = M4_SYNTAX_CLOSE;
+ break;
+ case ',':
+ syntax->orig[ch] = M4_SYNTAX_COMMA;
+ break;
+ case '$':
+ syntax->orig[ch] = M4_SYNTAX_DOLLAR;
+ break;
+ case '`':
+ syntax->orig[ch] = M4_SYNTAX_LQUOTE;
+ break;
+ case '#':
+ syntax->orig[ch] = M4_SYNTAX_BCOMM;
+ break;
+ case '=':
+ /* FIXME -revisit the assign syntax attribute. */
+ /* syntax->orig[ch] = M4_SYNTAX_ASSIGN; */
+ /* break; */
+ case '\0':
+ /* FIXME - revisit the ignore syntax attribute. */
+ /* syntax->orig[ch] = M4_SYNTAX_IGNORE; */
+ /* break; */
+ default:
+ if (isspace (ch))
+ syntax->orig[ch] = M4_SYNTAX_SPACE;
else if (isalpha (ch) || ch == '_')
- add_syntax_attribute (syntax, ch, M4_SYNTAX_ALPHA);
+ syntax->orig[ch] = M4_SYNTAX_ALPHA;
else if (isdigit (ch))
- add_syntax_attribute (syntax, ch, M4_SYNTAX_NUM);
+ syntax->orig[ch] = M4_SYNTAX_NUM;
else
- add_syntax_attribute (syntax, ch, M4_SYNTAX_OTHER);
+ syntax->orig[ch] = M4_SYNTAX_OTHER;
}
- /* add_syntax_attribute(syntax, 0, M4_SYNTAX_IGNORE); */
-
- /* Default quotes and comment delimiters are always one char */
- syntax->lquote.string = xstrdup (DEF_LQUOTE);
- syntax->lquote.length = strlen (syntax->lquote.string);
- syntax->rquote.string = xstrdup (DEF_RQUOTE);
- syntax->rquote.length = strlen (syntax->rquote.string);
- syntax->bcomm.string = xstrdup (DEF_BCOMM);
- syntax->bcomm.length = strlen (syntax->bcomm.string);
- syntax->ecomm.string = xstrdup (DEF_ECOMM);
- syntax->ecomm.length = strlen (syntax->ecomm.string);
-
- syntax->is_single_quotes = true;
- syntax->is_single_comments = true;
- syntax->is_macro_escaped = false;
-
- add_syntax_attribute (syntax, to_uchar (syntax->lquote.string[0]),
- M4_SYNTAX_LQUOTE);
- add_syntax_attribute (syntax, to_uchar (syntax->rquote.string[0]),
- M4_SYNTAX_RQUOTE);
- add_syntax_attribute (syntax, to_uchar (syntax->bcomm.string[0]),
- M4_SYNTAX_BCOMM);
- add_syntax_attribute (syntax, to_uchar (syntax->ecomm.string[0]),
- M4_SYNTAX_ECOMM);
+ /* Set up current table to match default. */
+ m4_set_syntax (syntax, '\0', '\0', NULL);
return syntax;
}
@@ -171,6 +169,7 @@
switch (ch)
{
+ /* FIXME - revisit the ignore syntax attribute. */
case 'I': case 'i': code = M4_SYNTAX_IGNORE; break;
case 'O': case 'o': code = M4_SYNTAX_OTHER; break;
case 'S': case 's': code = M4_SYNTAX_SPACE; break;
@@ -180,6 +179,7 @@
case '(': code = M4_SYNTAX_OPEN; break;
case ')': code = M4_SYNTAX_CLOSE; break;
case ',': code = M4_SYNTAX_COMMA; break;
+ /* FIXME - revisit the assign syntax attribute. */
case '=': code = M4_SYNTAX_ASSIGN; break;
case '@': code = M4_SYNTAX_ESCAPE; break;
case '$': code = M4_SYNTAX_DOLLAR; break;
@@ -205,7 +205,7 @@
if (code & M4_SYNTAX_MASKS)
syntax->table[ch] |= code;
else
- syntax->table[ch] = code;
+ syntax->table[ch] = (syntax->table[ch] & M4_SYNTAX_MASKS) | code;
#ifdef DEBUG_SYNTAX
fprintf(stderr, "Set syntax %o %c = %04X\n",
@@ -219,7 +219,7 @@
static int
remove_syntax_attribute (m4_syntax_table *syntax, int ch, int code)
{
- if (code & M4_SYNTAX_MASKS)
+ assert (code & M4_SYNTAX_MASKS);
syntax->table[ch] &= ~code;
#ifdef DEBUG_SYNTAX
@@ -231,34 +231,294 @@
return syntax->table[ch];
}
-int
-m4_set_syntax (m4_syntax_table *syntax, const char key, const char *chars)
+static void
+add_syntax_set (m4_syntax_table *syntax, const char *chars, int code)
{
- int ch, code;
+ int ch;
- assert (syntax);
+ if (*chars == '\0')
+ return;
- code = m4_syntax_code (key);
+ if (code == M4_SYNTAX_ESCAPE)
+ syntax->is_macro_escaped = true;
+
+ /* Adding doesn't affect single-quote or single-comment. */
+
+ while ((ch = to_uchar (*chars++)))
+ add_syntax_attribute (syntax, ch, code);
+}
+
+static void
+subtract_syntax_set (m4_syntax_table *syntax, const char *chars, int code)
+{
+ int ch;
+
+ if (*chars == '\0')
+ return;
- if ((code < 0) && (key != '\0'))
+ while ((ch = to_uchar (*chars++)))
{
- return -1;
+ if ((code & M4_SYNTAX_MASKS) != 0)
+ remove_syntax_attribute (syntax, ch, code);
+ else if (m4_has_syntax (syntax, ch, code))
+ add_syntax_attribute (syntax, ch, M4_SYNTAX_OTHER);
+ }
+
+ /* Check for any cleanup needed. */
+ switch (code)
+ {
+ case M4_SYNTAX_ESCAPE:
+ if (syntax->is_macro_escaped)
+ check_is_macro_escaped (syntax);
+ break;
+
+ case M4_SYNTAX_LQUOTE:
+ case M4_SYNTAX_RQUOTE:
+ if (syntax->is_single_quotes)
+ check_is_single_quotes (syntax);
+ break;
+
+ case M4_SYNTAX_BCOMM:
+ case M4_SYNTAX_ECOMM:
+ if (syntax->is_single_comments)
+ check_is_single_comments (syntax);
+ break;
+
+ default:
+ break;
}
+}
- if (*chars != '\0')
+static void
+set_syntax_set (m4_syntax_table *syntax, const char *chars, int code)
+{
+ int ch;
+ /* Explicit set of characters to install with this category; all
+ other characters that used to have the category get reset to
+ OTHER. */
+ for (ch = 256; --ch >= 0; )
+ {
+ if (code == M4_SYNTAX_RQUOTE || code == M4_SYNTAX_ECOMM)
+ remove_syntax_attribute (syntax, ch, code);
+ else if (m4_has_syntax (syntax, ch, code))
+ add_syntax_attribute (syntax, ch, M4_SYNTAX_OTHER);
+ }
while ((ch = to_uchar (*chars++)))
add_syntax_attribute (syntax, ch, code);
+
+ /* Check for any cleanup needed. */
+ check_is_macro_escaped (syntax);
+ check_is_single_quotes (syntax);
+ check_is_single_comments (syntax);
+}
+
+static void
+reset_syntax_set (m4_syntax_table *syntax, int code)
+{
+ int ch;
+ for (ch = 256; --ch >= 0; )
+ {
+ /* Reset the category back to its default state. All other
+ characters that used to have this category get reset to
+ their default state as well. */
+ if (code == M4_SYNTAX_RQUOTE)
+ {
+ if (ch == '\'')
+ add_syntax_attribute (syntax, ch, code);
else
- for (ch = 256; --ch > 0; )
+ remove_syntax_attribute (syntax, ch, code);
+ }
+ else if (code == M4_SYNTAX_ECOMM)
+ {
+ if (ch == '\n')
add_syntax_attribute (syntax, ch, code);
-
- if (syntax->is_macro_escaped || code == M4_SYNTAX_ESCAPE)
+ else
+ remove_syntax_attribute (syntax, ch, code);
+ }
+ else if (syntax->orig[ch] == code || m4_has_syntax (syntax, ch, code))
+ add_syntax_attribute (syntax, ch, syntax->orig[ch]);
+ }
check_is_macro_escaped (syntax);
+ check_is_single_quotes (syntax);
+ check_is_single_comments (syntax);
+}
+int
+m4_set_syntax (m4_syntax_table *syntax, char key, char action,
+ const char *chars)
+{
+ int code;
+
+ assert (syntax);
+ assert (chars || key == '\0');
+
+ if (key == '\0')
+ {
+ /* Restore the default syntax, which has known quote and comment
+ properties. */
+ memcpy (syntax->table, syntax->orig, sizeof syntax->orig);
+
+ free (syntax->lquote.string);
+ free (syntax->rquote.string);
+ free (syntax->bcomm.string);
+ free (syntax->ecomm.string);
+
+ syntax->lquote.string = xstrdup (DEF_LQUOTE);
+ syntax->lquote.length = strlen (syntax->lquote.string);
+ syntax->rquote.string = xstrdup (DEF_RQUOTE);
+ syntax->rquote.length = strlen (syntax->rquote.string);
+ syntax->bcomm.string = xstrdup (DEF_BCOMM);
+ syntax->bcomm.length = strlen (syntax->bcomm.string);
+ syntax->ecomm.string = xstrdup (DEF_ECOMM);
+ syntax->ecomm.length = strlen (syntax->ecomm.string);
+
+ add_syntax_attribute (syntax, to_uchar (syntax->rquote.string[0]),
+ M4_SYNTAX_RQUOTE);
+ add_syntax_attribute (syntax, to_uchar (syntax->ecomm.string[0]),
+ M4_SYNTAX_ECOMM);
+
+ syntax->is_single_quotes = true;
+ syntax->is_single_comments = true;
+ syntax->is_macro_escaped = false;
+ return 0;
+ }
+
+ code = m4_syntax_code (key);
+ if (code < 0)
+ {
+ return -1;
+ }
+ switch (action)
+ {
+ case '+':
+ add_syntax_set (syntax, chars, code);
+ break;
+ case '-':
+ subtract_syntax_set (syntax, chars, code);
+ break;
+ case '=':
+ set_syntax_set (syntax, chars, code);
+ break;
+ case '\0':
+ reset_syntax_set (syntax, code);
+ break;
+ default:
+ assert (false);
+ }
return code;
}
static bool
+check_is_single_quotes (m4_syntax_table *syntax)
+{
+ int ch;
+ int lquote = -1;
+ int rquote = -1;
+
+ if (! syntax->is_single_quotes)
+ return false;
+ assert (syntax->lquote.length == 1 && syntax->rquote.length == 1);
+
+ if (m4_has_syntax (syntax, to_uchar (*syntax->lquote.string),
+ M4_SYNTAX_LQUOTE)
+ && m4_has_syntax (syntax, to_uchar (*syntax->rquote.string),
+ M4_SYNTAX_RQUOTE))
+ return true;
+
+ /* The most recent action invalidated our current lquote/rquote. If
+ we still have exactly one character performing those roles based
+ on the syntax table, then update lquote/rquote accordingly.
+ Otherwise, keep lquote/rquote, but we no longer have single
+ quotes. */
+ for (ch = 256; --ch >= 0; )
+ {
+ if (m4_has_syntax (syntax, ch, M4_SYNTAX_LQUOTE))
+ {
+ if (lquote == -1)
+ lquote = ch;
+ else
+ {
+ syntax->is_single_quotes = false;
+ break;
+ }
+ }
+ if (m4_has_syntax (syntax, ch, M4_SYNTAX_RQUOTE))
+ {
+ if (rquote == -1)
+ rquote = ch;
+ else
+ {
+ syntax->is_single_quotes = false;
+ break;
+ }
+ }
+ }
+ if (lquote == -1 || rquote == -1)
+ syntax->is_single_quotes = false;
+ else if (syntax->is_single_quotes)
+ {
+ *syntax->lquote.string = lquote;
+ *syntax->rquote.string = rquote;
+ }
+ return syntax->is_single_quotes;
+}
+
+static bool
+check_is_single_comments (m4_syntax_table *syntax)
+{
+ int ch;
+ int bcomm = -1;
+ int ecomm = -1;
+
+ if (! syntax->is_single_comments)
+ return false;
+ assert (syntax->bcomm.length == 1 && syntax->ecomm.length == 1);
+
+ if (m4_has_syntax (syntax, to_uchar (*syntax->bcomm.string),
+ M4_SYNTAX_BCOMM)
+ && m4_has_syntax (syntax, to_uchar (*syntax->ecomm.string),
+ M4_SYNTAX_ECOMM))
+ return true;
+
+ /* The most recent action invalidated our current bcomm/ecomm. If
+ we still have exactly one character performing those roles based
+ on the syntax table, then update bcomm/ecomm accordingly.
+ Otherwise, keep bcomm/ecomm, but we no longer have single
+ comments. */
+ for (ch = 256; --ch >= 0; )
+ {
+ if (m4_has_syntax (syntax, ch, M4_SYNTAX_BCOMM))
+ {
+ if (bcomm == -1)
+ bcomm = ch;
+ else
+ {
+ syntax->is_single_comments = false;
+ break;
+ }
+ }
+ if (m4_has_syntax (syntax, ch, M4_SYNTAX_ECOMM))
+ {
+ if (ecomm == -1)
+ ecomm = ch;
+ else
+ {
+ syntax->is_single_comments = false;
+ break;
+ }
+ }
+ }
+ if (bcomm == -1 || ecomm == -1)
+ syntax->is_single_comments = false;
+ else if (syntax->is_single_comments)
+ {
+ *syntax->bcomm.string = bcomm;
+ *syntax->ecomm.string = ecomm;
+ }
+ return syntax->is_single_comments;
+}
+
+static bool
check_is_macro_escaped (m4_syntax_table *syntax)
{
int ch;
@@ -277,7 +537,7 @@
/* Functions for setting quotes and comment delimiters. Used by
- m4_changecom () and m4_changequote (). Both functions overrides the
+ m4_changecom () and m4_changequote (). Both functions override the
syntax table to maintain compatibility. */
void
m4_set_quotes (m4_syntax_table *syntax, const char *lq, const char *rq)
@@ -286,20 +546,48 @@
assert (syntax);
- for (ch = 256; --ch >= 0;) /* changequote overrides syntax_table */
- if (m4_has_syntax (syntax, ch, M4_SYNTAX_LQUOTE|M4_SYNTAX_RQUOTE))
- remove_syntax_attribute (syntax, ch, M4_SYNTAX_LQUOTE|M4_SYNTAX_RQUOTE);
-
free (syntax->lquote.string);
free (syntax->rquote.string);
- syntax->lquote.string = xstrdup (lq ? lq : DEF_LQUOTE);
+ /* POSIX states that with 0 arguments, the default quotes are used.
+ POSIX XCU ERN 112 states that behavior is implementation-defined
+ if there was only one argument, or if there is an empty string in
+ either position when there are two arguments. We allow an empty
+ left quote to disable quoting, but a non-empty left quote will
+ always create a non-empty right quote. See the texinfo for what
+ some other implementations do. */
+ if (!lq)
+ {
+ lq = DEF_LQUOTE;
+ rq = DEF_RQUOTE;
+ }
+ else if (!rq || (*lq && !*rq))
+ rq = DEF_RQUOTE;
+
+ syntax->lquote.string = xstrdup (lq);
syntax->lquote.length = strlen (syntax->lquote.string);
- syntax->rquote.string = xstrdup (rq ? rq : DEF_RQUOTE);
+ syntax->rquote.string = xstrdup (rq);
syntax->rquote.length = strlen (syntax->rquote.string);
- syntax->is_single_quotes = (syntax->lquote.length == 1
- && syntax->rquote.length == 1);
+ /* changequote overrides syntax_table, but be careful when it is
+ used to select a start-quote sequence that is effectively
+ disabled. */
+
+ syntax->is_single_quotes
+ = (syntax->lquote.length == 1 && syntax->rquote.length == 1
+ && !m4_has_syntax (syntax, to_uchar (*syntax->lquote.string),
+ (M4_SYNTAX_IGNORE | M4_SYNTAX_ESCAPE
+ | M4_SYNTAX_ALPHA | M4_SYNTAX_NUM)));
+
+ for (ch = 256; --ch >= 0;)
+ {
+ if (m4_has_syntax (syntax, ch, M4_SYNTAX_LQUOTE))
+ add_syntax_attribute (syntax, ch,
+ (syntax->orig[ch] == M4_SYNTAX_LQUOTE
+ ? M4_SYNTAX_OTHER : syntax->orig[ch]));
+ if (m4_has_syntax (syntax, ch, M4_SYNTAX_RQUOTE))
+ remove_syntax_attribute (syntax, ch, M4_SYNTAX_RQUOTE);
+ }
if (syntax->is_single_quotes)
{
@@ -320,21 +608,46 @@
assert (syntax);
- for (ch = 256; --ch >= 0;) /* changecom overrides syntax_table */
- if (m4_has_syntax (syntax, ch, M4_SYNTAX_BCOMM|M4_SYNTAX_ECOMM))
- remove_syntax_attribute (syntax, ch, M4_SYNTAX_BCOMM|M4_SYNTAX_ECOMM);
-
free (syntax->bcomm.string);
free (syntax->ecomm.string);
- syntax->bcomm.string = xstrdup (bc ? bc : DEF_BCOMM);
+ /* POSIX requires no arguments to disable comments. It requires
+ empty arguments to be used as-is, but this is counter to
+ traditional behavior, because a non-null begin and null end makes
+ it impossible to end a comment. An aardvark has been filed:
+ http://www.opengroup.org/austin/mailarchives/ag-review/msg02168.html
+ This implementation assumes the aardvark will be approved. See
+ the texinfo for what some other implementations do. */
+ if (!bc)
+ bc = ec = "";
+ else if (!ec || (*bc && !*ec))
+ ec = DEF_ECOMM;
+
+ syntax->bcomm.string = xstrdup (bc);
syntax->bcomm.length = strlen (syntax->bcomm.string);
- syntax->ecomm.string = xstrdup (ec ? ec : DEF_ECOMM);
+ syntax->ecomm.string = xstrdup (ec);
syntax->ecomm.length = strlen (syntax->ecomm.string);
- syntax->is_single_comments = (syntax->bcomm.length == 1
- && syntax->ecomm.length == 1);
+ /* changecom overrides syntax_table, but be careful when it is used
+ to select a start-comment sequence that is effectively
+ disabled. */
+
+ syntax->is_single_comments
+ = (syntax->bcomm.length == 1 && syntax->ecomm.length == 1
+ && !m4_has_syntax (syntax, to_uchar (*syntax->bcomm.string),
+ (M4_SYNTAX_IGNORE | M4_SYNTAX_ESCAPE
+ | M4_SYNTAX_ALPHA | M4_SYNTAX_NUM
+ | M4_SYNTAX_LQUOTE)));
+ for (ch = 256; --ch >= 0;)
+ {
+ if (m4_has_syntax (syntax, ch, M4_SYNTAX_BCOMM))
+ add_syntax_attribute (syntax, ch,
+ (syntax->orig[ch] == M4_SYNTAX_BCOMM
+ ? M4_SYNTAX_OTHER : syntax->orig[ch]));
+ if (m4_has_syntax (syntax, ch, M4_SYNTAX_ECOMM))
+ remove_syntax_attribute (syntax, ch, M4_SYNTAX_ECOMM);
+ }
if (syntax->is_single_comments)
{
add_syntax_attribute (syntax, to_uchar (syntax->bcomm.string[0]),
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- Changes to m4/m4/syntax.c,v,
Eric Blake <=