[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 3/4] dfa: simplify charclass by assuming C99
From: |
Paul Eggert |
Subject: |
[PATCH 3/4] dfa: simplify charclass by assuming C99 |
Date: |
Tue, 17 Dec 2019 21:47:23 -0800 |
* lib/dfa.c (CHARCLASS_WORD_BITS): Now always 64.
(charclass_word): Now always uint_fast64_t.
(CHARCLASS_PAIR): Remove.
(CHARCLASS_INIT): Take 4 arguments instead of 8. All uses changed.
---
ChangeLog | 6 ++++++
lib/dfa.c | 35 +++++++++++------------------------
2 files changed, 17 insertions(+), 24 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index f22770294..8d0595bbc 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,11 @@
2019-12-17 Paul Eggert <address@hidden>
+ dfa: simplify charclass by assuming C99
+ * lib/dfa.c (CHARCLASS_WORD_BITS): Now always 64.
+ (charclass_word): Now always uint_fast64_t.
+ (CHARCLASS_PAIR): Remove.
+ (CHARCLASS_INIT): Take 4 arguments instead of 8. All uses changed.
+
fts: tune via calloc
* lib/fts.c (fts_open): Prefer calloc to malloc + memset.
diff --git a/lib/dfa.c b/lib/dfa.c
index dd2fa84fd..385125f52 100644
--- a/lib/dfa.c
+++ b/lib/dfa.c
@@ -82,28 +82,15 @@ isasciidigit (char c)
/* First integer value that is greater than any character code. */
enum { NOTCHAR = 1 << CHAR_BIT };
+/* Number of bits used in a charclass word. */
+enum { CHARCLASS_WORD_BITS = 64 };
+
/* This represents part of a character class. It must be unsigned and
at least CHARCLASS_WORD_BITS wide. Any excess bits are zero. */
-typedef unsigned long int charclass_word;
-
-/* CHARCLASS_WORD_BITS is the number of bits used in a charclass word.
- CHARCLASS_PAIR (LO, HI) is part of a charclass initializer, and
- represents 64 bits' worth of a charclass, where LO and HI are the
- low and high-order 32 bits of the 64-bit quantity. */
-#if ULONG_MAX >> 31 >> 31 < 3
-enum { CHARCLASS_WORD_BITS = 32 };
-# define CHARCLASS_PAIR(lo, hi) lo, hi
-#else
-enum { CHARCLASS_WORD_BITS = 64 };
-# define CHARCLASS_PAIR(lo, hi) (((charclass_word) (hi) << 32) + (lo))
-#endif
+typedef uint_fast64_t charclass_word;
-/* An initializer for a charclass whose 32-bit words are A through H. */
-#define CHARCLASS_INIT(a, b, c, d, e, f, g, h) \
- {{ \
- CHARCLASS_PAIR (a, b), CHARCLASS_PAIR (c, d), \
- CHARCLASS_PAIR (e, f), CHARCLASS_PAIR (g, h) \
- }}
+/* An initializer for a charclass whose 64-bit words are A through D. */
+#define CHARCLASS_INIT(a, b, c, d) {{a, b, c, d}}
/* The maximum useful value of a charclass_word; all used bits are 1. */
static charclass_word const CHARCLASS_WORD_MASK
@@ -1684,19 +1671,19 @@ add_utf8_anychar (struct dfa *dfa)
{
static charclass const utf8_classes[5] = {
/* 80-bf: non-leading bytes. */
- CHARCLASS_INIT (0, 0, 0, 0, 0xffffffff, 0xffffffff, 0, 0),
+ CHARCLASS_INIT (0, 0, 0xffffffffffffffff, 0),
/* 00-7f: 1-byte sequence. */
- CHARCLASS_INIT (0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0, 0, 0,
0),
+ CHARCLASS_INIT (0xffffffffffffffff, 0xffffffffffffffff, 0, 0),
/* c2-df: 2-byte sequence. */
- CHARCLASS_INIT (0, 0, 0, 0, 0, 0, 0xfffffffc, 0),
+ CHARCLASS_INIT (0, 0, 0, 0x00000000fffffffc),
/* e0-ef: 3-byte sequence. */
- CHARCLASS_INIT (0, 0, 0, 0, 0, 0, 0, 0xffff),
+ CHARCLASS_INIT (0, 0, 0, 0x0000ffff00000000),
/* f0-f7: 4-byte sequence. */
- CHARCLASS_INIT (0, 0, 0, 0, 0, 0, 0, 0xff0000)
+ CHARCLASS_INIT (0, 0, 0, 0x00ff000000000000)
};
int n = sizeof utf8_classes / sizeof *utf8_classes;
--
2.17.1