[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[VAR_NAME_LEN 11/17] variable-parser: Rewrite parse_DATA_LIST_vars().
From: |
Ben Pfaff |
Subject: |
[VAR_NAME_LEN 11/17] variable-parser: Rewrite parse_DATA_LIST_vars(). |
Date: |
Sat, 5 Feb 2011 13:25:53 -0800 |
This rewrite was prompted by getting rid of the VAR_NAME_LEN limit inside
parse_DATA_LIST_vars(), but then I noticed that the variable naming and
coding style was dated, and that duplicate variable names were only
detected for variables named using TO, not for individual names, so I
rewrote much of the code instead.
---
src/language/lexer/variable-parser.c | 207 +++++++++++++++++++---------------
1 files changed, 115 insertions(+), 92 deletions(-)
diff --git a/src/language/lexer/variable-parser.c
b/src/language/lexer/variable-parser.c
index 5b698bb..0cefb5d 100644
--- a/src/language/lexer/variable-parser.c
+++ b/src/language/lexer/variable-parser.c
@@ -1,5 +1,5 @@
/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-9, 2000, 2009, 2010 Free Software Foundation, Inc.
+ Copyright (C) 1997-9, 2000, 2009, 2010, 2011 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -19,6 +19,7 @@
#include "language/lexer/variable-parser.h"
#include <ctype.h>
+#include <limits.h>
#include <stdbool.h>
#include <stdlib.h>
@@ -36,6 +37,7 @@
#include "libpspp/str.h"
#include "libpspp/stringi-set.h"
+#include "gl/c-ctype.h"
#include "gl/xalloc.h"
#include "gettext.h"
@@ -355,86 +357,98 @@ fail:
return 0;
}
-/* Extracts a numeric suffix from variable name S, copying it
- into string R. Sets *D to the length of R and *N to its
- value. */
+/* Attempts to break UTF-8 encoded NAME into a root (whose contents are
+ arbitrary except that it does not end in a digit) followed by an integer
+ numeric suffix. On success, stores the value of the suffix into *NUMBERP,
+ the number of digits in the suffix into *N_DIGITSP, and returns the number
+ of bytes in the root. On failure, returns 0. */
static int
-extract_num (char *s, char *r, int *n, int *d)
+extract_numeric_suffix (const char *name,
+ unsigned long int *numberp, int *n_digitsp)
{
- char *cp;
-
- /* Find first digit. */
- cp = s + strlen (s) - 1;
- while (isdigit ((unsigned char) *cp) && cp > s)
- cp--;
- cp++;
+ size_t root_len, n_digits;
+ size_t i;
- /* Extract root. */
- strncpy (r, s, cp - s);
- r[cp - s] = 0;
+ /* Count length of root. */
+ root_len = 1; /* Valid identifier never starts with digit. */
+ for (i = 1; name[i] != '\0'; i++)
+ if (!c_isdigit (name[i]))
+ root_len = i + 1;
+ n_digits = i - root_len;
- /* Count initial zeros. */
- *n = *d = 0;
- while (*cp == '0')
+ if (n_digits == 0)
{
- (*d)++;
- cp++;
+ msg (SE, _("`%s' cannot be used with TO because it does not end in "
+ "a digit."), name);
+ return 0;
}
- /* Extract value. */
- while (isdigit ((unsigned char) *cp))
+ *numberp = strtoull (name + root_len, NULL, 10);
+ if (*numberp == ULONG_MAX)
{
- (*d)++;
- *n = (*n * 10) + (*cp - '0');
- cp++;
+ msg (SE, _("Numeric suffix on `%s' is larger than supported with TO."),
+ name);
+ return 0;
}
+ *n_digitsp = n_digits;
+ return root_len;
+}
- /* Sanity check. */
- if (*n == 0 && *d == 0)
+static bool
+add_var_name (char *name,
+ char ***names, size_t *n_vars, size_t *allocated_vars,
+ struct stringi_set *set, int pv_opts)
+{
+ if (pv_opts & PV_NO_DUPLICATE && !stringi_set_insert (set, name))
{
- msg (SE, _("incorrect use of TO convention"));
- return 0;
+ msg (SE, _("Variable %s appears twice in variable list."),
+ name);
+ return false;
}
- return 1;
+
+ if (*n_vars >= *allocated_vars)
+ *names = x2nrealloc (*names, allocated_vars, sizeof **names);
+ (*names)[(*n_vars)++] = name;
+ return true;
}
/* Parses a list of variable names according to the DATA LIST version
of the TO convention. */
bool
-parse_DATA_LIST_vars (struct lexer *lexer, char ***names,
- size_t *nnames, int pv_opts)
+parse_DATA_LIST_vars (struct lexer *lexer, char ***namesp,
+ size_t *n_varsp, int pv_opts)
{
- int n1, n2;
- int d1, d2;
- int n;
- size_t nvar, mvar;
- char name1[VAR_NAME_LEN + 1], name2[VAR_NAME_LEN + 1];
- char root1[VAR_NAME_LEN + 1], root2[VAR_NAME_LEN + 1];
+ char **names;
+ size_t n_vars;
+ size_t allocated_vars;
+
struct stringi_set set;
- int success = 0;
- assert (names != NULL);
- assert (nnames != NULL);
+ char *name1 = NULL;
+ char *name2 = NULL;
+ bool ok = false;
+
assert ((pv_opts & ~(PV_APPEND | PV_SINGLE
| PV_NO_SCRATCH | PV_NO_DUPLICATE)) == 0);
stringi_set_init (&set);
if (pv_opts & PV_APPEND)
{
- nvar = mvar = *nnames;
+ n_vars = allocated_vars = *n_varsp;
+ names = *namesp;
if (pv_opts & PV_NO_DUPLICATE)
{
size_t i;
- for (i = 0; i < nvar; i++)
- stringi_set_insert (&set, (*names)[i]);
+ for (i = 0; i < n_vars; i++)
+ stringi_set_insert (&set, names[i]);
}
}
else
{
- nvar = mvar = 0;
- *names = NULL;
+ n_vars = allocated_vars = 0;
+ names = NULL;
}
do
@@ -442,73 +456,75 @@ parse_DATA_LIST_vars (struct lexer *lexer, char ***names,
if (lex_token (lexer) != T_ID)
{
lex_error (lexer, "expecting variable name");
- goto fail;
+ goto exit;
}
if (dict_class_from_id (lex_tokcstr (lexer)) == DC_SCRATCH
&& (pv_opts & PV_NO_SCRATCH))
{
msg (SE, _("Scratch variables not allowed here."));
- goto fail;
+ goto exit;
}
- strcpy (name1, lex_tokcstr (lexer));
+ name1 = xstrdup (lex_tokcstr (lexer));
lex_get (lexer);
if (lex_token (lexer) == T_TO)
{
+ unsigned long int num1, num2;
+ int n_digits1, n_digits2;
+ int root_len1, root_len2;
+ unsigned long int number;
+
lex_get (lexer);
if (lex_token (lexer) != T_ID)
{
lex_error (lexer, "expecting variable name");
- goto fail;
+ goto exit;
}
- strcpy (name2, lex_tokcstr (lexer));
+ name2 = xstrdup (lex_tokcstr (lexer));
lex_get (lexer);
- if (!extract_num (name1, root1, &n1, &d1)
- || !extract_num (name2, root2, &n2, &d2))
- goto fail;
+ root_len1 = extract_numeric_suffix (name1, &num1, &n_digits1);
+ if (root_len1 == 0)
+ goto exit;
+
+ root_len2 = extract_numeric_suffix (name2, &num2, &n_digits2);
+ if (root_len2 == 0)
+ goto exit;
- if (strcasecmp (root1, root2))
+ if (root_len1 != root_len2 || memcasecmp (name1, name2, root_len1))
{
msg (SE, _("Prefixes don't match in use of TO convention."));
- goto fail;
+ goto exit;
}
- if (n1 > n2)
+ if (num1 > num2)
{
msg (SE, _("Bad bounds in use of TO convention."));
- goto fail;
- }
- if (d2 > d1)
- d2 = d1;
-
- if (mvar < nvar + (n2 - n1 + 1))
- {
- mvar += ROUND_UP (n2 - n1 + 1, 16);
- *names = xnrealloc (*names, mvar, sizeof **names);
+ goto exit;
}
- for (n = n1; n <= n2; n++)
+ for (number = num1; number <= num2; number++)
{
- char name[VAR_NAME_LEN + 1];
- sprintf (name, "%s%0*d", root1, d1, n);
-
- if (pv_opts & PV_NO_DUPLICATE && !stringi_set_insert (&set,
name))
+ char *name = xasprintf ("%.*s%0*lu",
+ root_len1, name1,
+ n_digits1, number);
+ if (!add_var_name (name, &names, &n_vars, &allocated_vars,
+ &set, pv_opts))
{
- msg (SE, _("Variable %s appears twice in variable list."),
- name);
- goto fail;
+ free (name);
+ goto exit;
}
- (*names)[nvar] = xstrdup (name);
- nvar++;
}
+
+ free (name1);
+ name1 = NULL;
+ free (name2);
+ name2 = NULL;
}
else
{
- if (nvar >= mvar)
- {
- mvar += 16;
- *names = xnrealloc (*names, mvar, sizeof **names);
- }
- (*names)[nvar++] = xstrdup (name1);
+ if (!add_var_name (name1, &names, &n_vars, &allocated_vars,
+ &set, pv_opts))
+ goto exit;
+ name1 = NULL;
}
lex_match (lexer, T_COMMA);
@@ -517,21 +533,28 @@ parse_DATA_LIST_vars (struct lexer *lexer, char ***names,
break;
}
while (lex_token (lexer) == T_ID);
- success = 1;
+ ok = true;
-fail:
- *nnames = nvar;
+exit:
stringi_set_destroy (&set);
- if (!success)
+ if (ok)
+ {
+ *namesp = names;
+ *n_varsp = n_vars;
+ }
+ else
{
int i;
- for (i = 0; i < nvar; i++)
- free ((*names)[i]);
- free (*names);
- *names = NULL;
- *nnames = 0;
+ for (i = 0; i < n_vars; i++)
+ free (names[i]);
+ free (names);
+ *namesp = NULL;
+ *n_varsp = 0;
+
+ free (name1);
+ free (name2);
}
- return success;
+ return ok;
}
/* Registers each of the NAMES[0...NNAMES - 1] in POOL, as well
--
1.7.1
- [VAR_NAME_LEN 07/17] DATAFILE ATTRIBUTE, VARIABLE ATTRIBUTE: Eliminate VAR_NAME_LEN limit., (continued)
- [VAR_NAME_LEN 07/17] DATAFILE ATTRIBUTE, VARIABLE ATTRIBUTE: Eliminate VAR_NAME_LEN limit., Ben Pfaff, 2011/02/05
- [VAR_NAME_LEN 06/17] GET DATA /TYPE=TXT: Get rid of VAR_NAME_LEN limit on variable names., Ben Pfaff, 2011/02/05
- [VAR_NAME_LEN 09/17] VECTOR: Eliminate VAR_NAME_LEN limit for variable names., Ben Pfaff, 2011/02/05
- [VAR_NAME_LEN 17/17] por-file-reader: Remove dependency on VAR_NAME_LEN., Ben Pfaff, 2011/02/05
- [VAR_NAME_LEN 02/17] variable: Remove VAR_NAME_LEN limit for internal representation of name., Ben Pfaff, 2011/02/05
- [VAR_NAME_LEN 10/17] DEBUG EVALUATE: Eliminate VAR_NAME_LEN limit., Ben Pfaff, 2011/02/05
- [VAR_NAME_LEN 04/17] combine-files: Eliminate VAR_NAME_LEN restriction from combine_files()., Ben Pfaff, 2011/02/05
- [VAR_NAME_LEN 15/17] REGRESSION: Eliminate restriction to VAR_NAME_LEN in reg_get_name()., Ben Pfaff, 2011/02/05
- [VAR_NAME_LEN 13/17] DESCRIPTIVES: Eliminate main restriction on Z-score variable name length., Ben Pfaff, 2011/02/05
- [VAR_NAME_LEN 16/17] text-data-import-dialog: Eliminate VAR_NAME_LEN restriction., Ben Pfaff, 2011/02/05
- [VAR_NAME_LEN 11/17] variable-parser: Rewrite parse_DATA_LIST_vars().,
Ben Pfaff <=
- [VAR_NAME_LEN 12/17] variable-parser: Drop VAR_NAME_LEN restriction from var_set_lookup_var_idx()., Ben Pfaff, 2011/02/05
- Re: [VAR_NAME_LEN 00/17] Eliminate uses of VAR_NAME_LEN, John Darrington, 2011/02/08