[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH] Add !HAVE_MBRTOWC fallbacks for mbchar, mbiter and mbuiter
From: |
Miloslav Trmac |
Subject: |
[PATCH] Add !HAVE_MBRTOWC fallbacks for mbchar, mbiter and mbuiter |
Date: |
Wed, 14 Feb 2007 18:13:00 +0100 |
User-agent: |
Thunderbird 1.5.0.9 (X11/20070212) |
Hello,
I'm adding multibyte character support to the info viewer using the
mbiter and mbuiter modules. Those modules currently depend on mbrtowc
() and friends.
The mbs* functions in gnulib currently have two separate implementations
of most algorithms, one for HAVE_MBRTOWC using mbchar, and one for
!HAVE_MBRTOWC. To avoid duplicating the large amount of text-processing
code in the info viewer in a similar way, the attached patches add a
!HAVE_MBRTOWC implementation of the mbiter, mbuiter and mbchar interfaces.
* lib/mbchar.h: #include <stddef.h> for ptrdiff_t.
* lib/mbchar.h
* lib/mbiter.h
* lib/mbuiter.h: Add an implementation for !HAVE_MBRTOWC.
* m4/mbchar.m4: Require gl_FUNC_MBRTOWC.
* m4/mbiter.m4: Remove an obsolete comment.
* modules/mbchar: Include m4/mbrtowc.m4.
* modules/mbiter
* modules/mbuiter: Don't suggest #if HAVE_MBRTOWC around header
#includes.
Thanks,
Mirek
Index: lib/mbchar.h
===================================================================
RCS file: /sources/gnulib/gnulib/lib/mbchar.h,v
retrieving revision 1.10
diff -u -r1.10 mbchar.h
--- lib/mbchar.h 27 Dec 2006 19:54:25 -0000 1.10
+++ lib/mbchar.h 14 Feb 2007 16:59:55 -0000
@@ -146,8 +146,15 @@
#define _MBCHAR_H 1
#include <stdbool.h>
+#include <stddef.h>
#include <string.h>
+/* Multibyte characters could in principle be handled without mbrtowc (), but
+ all current users of mbchar (mbfile, mbiter and mbuiter) need mbrtowc (),
+ so their !HAVE_MBRTOWC fallbacks need a non-multibyte mbchar
+ implementation. */
+#if HAVE_MBRTOWC
+
/* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
<wchar.h>.
BSD/OS 4.1 has a bug: <stdio.h> and <time.h> must be included before
@@ -274,6 +281,110 @@
new_mbc->wc = old_mbc->wc;
}
+#else /* not HAVE_MBRTOWC */
+
+#include <ctype.h>
+
+struct mbchar
+{
+ const char *ptr; /* pointer to current character */
+ bool c_valid; /* true if c is a valid character */
+ unsigned char c; /* if c_valid: the current character */
+ char buf[1]; /* room for the character, used for file input only */
+};
+
+/* EOF (not a real character) is represented with c_valid = false. */
+
+typedef struct mbchar mbchar_t;
+
+/* Access the current character. */
+#define mb_ptr(mbc) ((mbc).ptr)
+#define mb_len(mbc) ((mbc).c_valid ? 1 : 0)
+
+/* Comparison of characters. */
+#define mb_iseq(mbc, sc) ((mbc).c_valid && (mbc).c == (sc))
+#define mb_isnul(mbc) ((mbc).c_valid && (mbc).c == 0)
+#define mb_cmp(mbc1, mbc2) \
+ ((mbc1).c_valid \
+ ? ((mbc2).c_valid \
+ ? (int) (mbc1).c - (int) (mbc2).c
\
+ : -1) \
+ : ((mbc2).c_valid \
+ ? 1 \
+ : 0))
+#define mb_casecmp(mbc1, mbc2) \
+ ((mbc1).c_valid \
+ ? ((mbc2).c_valid \
+ ? ((int) (unsigned char) tolower ((mbc1).c) \
+ - (int) (unsigned char) tolower ((mbc2).c)) \
+ : -1) \
+ : ((mbc2).c_valid \
+ ? 1 \
+ : 0))
+#define mb_equal(mbc1, mbc2) \
+ ((mbc1).c_valid && (mbc2).c_valid \
+ ? (mbc1).c == (mbc2).c \
+ : !(mbc1).c_valid && !(mbc2).c_valid)
+#define mb_caseequal(mbc1, mbc2) \
+ ((mbc1).c_valid && (mbc2).c_valid \
+ ? tolower ((mbc1).c) == tolower ((mbc2).c) \
+ : !(mbc1).c_valid && !(mbc2).c_valid)
+
+/* <ctype.h>, <wctype.h> classification. */
+#define mb_isascii(mbc) ((mbc).c_valid && (mbc).c <= 127)
+#define mb_isalnum(mbc) ((mbc).c_valid && isalnum ((mbc).c))
+#define mb_isalpha(mbc) ((mbc).c_valid && isalpha ((mbc).c))
+#define mb_isblank(mbc) ((mbc).c_valid && isblank ((mbc).c))
+#define mb_iscntrl(mbc) ((mbc).c_valid && iscntrl ((mbc).c))
+#define mb_isdigit(mbc) ((mbc).c_valid && isdigit ((mbc).c))
+#define mb_isgraph(mbc) ((mbc).c_valid && isgraph ((mbc).c))
+#define mb_islower(mbc) ((mbc).c_valid && islower ((mbc).c))
+#define mb_isprint(mbc) ((mbc).c_valid && isprint ((mbc).c))
+#define mb_ispunct(mbc) ((mbc).c_valid && ispunct ((mbc).c))
+#define mb_isspace(mbc) ((mbc).c_valid && isspace ((mbc).c))
+#define mb_isupper(mbc) ((mbc).c_valid && isupper ((mbc).c))
+#define mb_isxdigit(mbc) ((mbc).c_valid && isxdigit ((mbc).c))
+
+/* Extra <wchar.h> function. */
+
+/* Unprintable characters appear as a small box of width 1. */
+#define MB_UNPRINTABLE_WIDTH 1
+
+static inline int
+mb_width_aux (int c)
+{
+ /* For unprintable characters, arbitrarily return 0 for control characters
+ and MB_UNPRINTABLE_WIDTH otherwise. */
+ return isprint (c) ? 1 : iscntrl (c) ? 0 : MB_UNPRINTABLE_WIDTH;
+}
+
+#define mb_width(mbc) \
+ ((mbc).c_valid ? mb_width_aux ((mbc).c) : MB_UNPRINTABLE_WIDTH)
+
+/* Output. */
+#define mb_putc(mbc, stream) fwrite ((mbc).ptr, 1, mb_len (mbc), (stream))
+
+/* Assignment. */
+#define mb_setascii(mbc, sc) \
+ ((mbc)->ptr = (mbc)->buf, (mbc)->c_valid = 1, \
+ (mbc)->c = (mbc)->buf[0] = (sc))
+
+/* Copying a character. */
+static inline void
+mb_copy (mbchar_t *new_mbc, const mbchar_t *old_mbc)
+{
+ if (old_mbc->ptr == &old_mbc->buf[0])
+ {
+ new_mbc->buf[0] = old_mbc->buf[0];
+ new_mbc->ptr = &new_mbc->buf[0];
+ }
+ else
+ new_mbc->ptr = old_mbc->ptr;
+ if ((new_mbc->c_valid = old_mbc->c_valid))
+ new_mbc->c = old_mbc->c;
+}
+
+#endif /* not HAVE_MBRTOWC */
/* is_basic(c) tests whether the single-byte character c is in the
ISO C "basic character set".
Index: lib/mbiter.h
===================================================================
RCS file: /sources/gnulib/gnulib/lib/mbiter.h,v
retrieving revision 1.3
diff -u -r1.3 mbiter.h
--- lib/mbiter.h 11 Feb 2007 17:17:09 -0000 1.3
+++ lib/mbiter.h 14 Feb 2007 16:59:55 -0000
@@ -87,6 +87,8 @@
#include <stdbool.h>
#include <string.h>
+#ifdef HAVE_MBRTOWC
+
/* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
<wchar.h>.
BSD/OS 4.1 has a bug: <stdio.h> and <time.h> must be included before
@@ -173,13 +175,6 @@
}
static inline void
-mbiter_multi_reloc (struct mbiter_multi *iter, ptrdiff_t ptrdiff)
-{
- iter->cur.ptr += ptrdiff;
- iter->limit += ptrdiff;
-}
-
-static inline void
mbiter_multi_copy (struct mbiter_multi *new_iter, const struct mbiter_multi
*old_iter)
{
new_iter->limit = old_iter->limit;
@@ -202,6 +197,59 @@
#define mbi_advance(iter) \
((iter).cur.ptr += (iter).cur.bytes, (iter).next_done = false)
+#else /* not HAVE_MBRTOWC */
+
+#include "mbchar.h"
+
+struct mbiter_multi
+{
+ const char *limit; /* pointer to end of string */
+ bool next_done; /* true if mbi_avail has already filled the following */
+ struct mbchar cur; /* the current character:
+ const char *cur.ptr pointer to current character
+ The following are only valid after mbi_avail.
+ bool cur.c_valid true if c is a valid wide character
+ unsigned char cur.c if c_valid: the current character
+ */
+};
+
+static inline void
+mbiter_multi_next (struct mbiter_multi *iter)
+{
+ if (iter->next_done)
+ return;
+ iter->cur.c = *iter->cur.ptr;
+ iter->cur.c_valid = true;
+ iter->next_done = true;
+}
+
+static inline void
+mbiter_multi_copy (struct mbiter_multi *new_iter, const struct mbiter_multi
*old_iter)
+{
+ new_iter->limit = old_iter->limit;
+ new_iter->next_done = old_iter->next_done;
+ mb_copy (&new_iter->cur, &old_iter->cur);
+}
+
+/* Iteration macros. */
+typedef struct mbiter_multi mbi_iterator_t;
+#define mbi_init(iter, startptr, length) \
+ ((iter).cur.ptr = (startptr), (iter).limit = (iter).cur.ptr + (length), \
+ (iter).next_done = false)
+#define mbi_avail(iter) \
+ ((iter).cur.ptr < (iter).limit && (mbiter_multi_next (&(iter)), true))
+#define mbi_advance(iter) \
+ ((iter).cur.ptr++, (iter).next_done = false)
+
+#endif /* not HAVE_MBRTOWC */
+
+static inline void
+mbiter_multi_reloc (struct mbiter_multi *iter, ptrdiff_t ptrdiff)
+{
+ iter->cur.ptr += ptrdiff;
+ iter->limit += ptrdiff;
+}
+
/* Access to the current character. */
#define mbi_cur(iter) (iter).cur
#define mbi_cur_ptr(iter) (iter).cur.ptr
Index: lib/mbuiter.h
===================================================================
RCS file: /sources/gnulib/gnulib/lib/mbuiter.h,v
retrieving revision 1.2
diff -u -r1.2 mbuiter.h
--- lib/mbuiter.h 11 Feb 2007 17:17:09 -0000 1.2
+++ lib/mbuiter.h 14 Feb 2007 16:59:55 -0000
@@ -95,6 +95,8 @@
#include <stdlib.h>
#include <string.h>
+#ifdef HAVE_MBRTOWC
+
/* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
<wchar.h>.
BSD/OS 4.1 has a bug: <stdio.h> and <time.h> must be included before
@@ -182,12 +184,6 @@
}
static inline void
-mbuiter_multi_reloc (struct mbuiter_multi *iter, ptrdiff_t ptrdiff)
-{
- iter->cur.ptr += ptrdiff;
-}
-
-static inline void
mbuiter_multi_copy (struct mbuiter_multi *new_iter, const struct mbuiter_multi
*old_iter)
{
if ((new_iter->in_shift = old_iter->in_shift))
@@ -209,6 +205,56 @@
#define mbui_advance(iter) \
((iter).cur.ptr += (iter).cur.bytes, (iter).next_done = false)
+#else /* not HAVE_MBRTOWC */
+
+#include "mbchar.h"
+
+struct mbuiter_multi
+{
+ bool next_done; /* true if mbui_avail has already filled the following
*/
+ struct mbchar cur; /* the current character:
+ const char *cur.ptr pointer to current character
+ The following are only valid after mbui_avail.
+ bool cur.c_valid true if wc is a valid wide character
+ unsigned char cur.c if c_valid: the current character
+ */
+};
+
+static inline void
+mbuiter_multi_next (struct mbuiter_multi *iter)
+{
+ if (iter->next_done)
+ return;
+ iter->cur.c = *iter->cur.ptr;
+ iter->cur.c_valid = true;
+ iter->next_done = true;
+}
+
+static inline void
+mbuiter_multi_copy (struct mbuiter_multi *new_iter, const struct mbuiter_multi
*old_iter)
+{
+ new_iter->next_done = old_iter->next_done;
+ mb_copy (&new_iter->cur, &old_iter->cur);
+}
+
+/* Iteration macros. */
+typedef struct mbuiter_multi mbui_iterator_t;
+#define mbui_init(iter, startptr) \
+ ((iter).cur.ptr = (startptr), \
+ (iter).next_done = false)
+#define mbui_avail(iter) \
+ (mbuiter_multi_next (&(iter)), !mb_isnul ((iter).cur))
+#define mbui_advance(iter) \
+ ((iter).cur.ptr++, (iter).next_done = false)
+
+#endif /* not HAVE_MBRTOWC */
+
+static inline void
+mbuiter_multi_reloc (struct mbuiter_multi *iter, ptrdiff_t ptrdiff)
+{
+ iter->cur.ptr += ptrdiff;
+}
+
/* Access to the current character. */
#define mbui_cur(iter) (iter).cur
#define mbui_cur_ptr(iter) (iter).cur.ptr
Index: m4/mbchar.m4
===================================================================
RCS file: /sources/gnulib/gnulib/m4/mbchar.m4,v
retrieving revision 1.7
diff -u -r1.7 mbchar.m4
--- m4/mbchar.m4 28 Jan 2007 16:00:03 -0000 1.7
+++ m4/mbchar.m4 14 Feb 2007 16:59:55 -0000
@@ -10,4 +10,5 @@
AC_DEFUN([gl_MBCHAR],
[
AC_REQUIRE([AC_GNU_SOURCE])
+ AC_REQUIRE([gl_FUNC_MBRTOWC])
])
Index: m4/mbiter.m4
===================================================================
RCS file: /sources/gnulib/gnulib/m4/mbiter.m4,v
retrieving revision 1.2
diff -u -r1.2 mbiter.m4
--- m4/mbiter.m4 26 Sep 2005 13:58:51 -0000 1.2
+++ m4/mbiter.m4 14 Feb 2007 16:59:55 -0000
@@ -10,8 +10,6 @@
AC_DEFUN([gl_MBITER],
[
AC_REQUIRE([AC_TYPE_MBSTATE_T])
- dnl The following line is that so the user can test HAVE_MBRTOWC before
- dnl #include "mbiter.h" or "mbuiter.h".
AC_REQUIRE([gl_FUNC_MBRTOWC])
:
])
Index: modules/mbchar
===================================================================
RCS file: /sources/gnulib/gnulib/modules/mbchar,v
retrieving revision 1.10
diff -u -r1.10 mbchar
--- modules/mbchar 28 Jan 2007 16:00:02 -0000 1.10
+++ modules/mbchar 14 Feb 2007 16:59:55 -0000
@@ -5,6 +5,7 @@
lib/mbchar.h
lib/mbchar.c
m4/mbchar.m4
+m4/mbrtowc.m4
Depends-on:
stdbool
Index: modules/mbiter
===================================================================
RCS file: /sources/gnulib/gnulib/modules/mbiter,v
retrieving revision 1.2
diff -u -r1.2 mbiter
--- modules/mbiter 26 Sep 2005 13:58:51 -0000 1.2
+++ modules/mbiter 14 Feb 2007 16:59:55 -0000
@@ -17,9 +17,7 @@
lib_SOURCES += mbiter.h
Include:
-#if HAVE_MBRTOWC
#include "mbiter.h"
-#endif
License:
LGPL
Index: modules/mbuiter
===================================================================
RCS file: /sources/gnulib/gnulib/modules/mbuiter,v
retrieving revision 1.2
diff -u -r1.2 mbuiter
--- modules/mbuiter 26 Sep 2005 13:58:51 -0000 1.2
+++ modules/mbuiter 14 Feb 2007 16:59:55 -0000
@@ -18,9 +18,7 @@
lib_SOURCES += mbuiter.h
Include:
-#if HAVE_MBRTOWC
#include "mbuiter.h"
-#endif
License:
LGPL
- [PATCH] Add !HAVE_MBRTOWC fallbacks for mbchar, mbiter and mbuiter,
Miloslav Trmac <=