[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemacs-commit] qemacs charset.c charsetjis.c extras.c qe.c qe....
From: |
Charlie Gordon |
Subject: |
[Qemacs-commit] qemacs charset.c charsetjis.c extras.c qe.c qe.... |
Date: |
Wed, 15 Mar 2017 19:42:23 -0400 (EDT) |
CVSROOT: /sources/qemacs
Module name: qemacs
Changes by: Charlie Gordon <chqrlie> 17/03/15 19:42:23
Modified files:
. : charset.c charsetjis.c extras.c qe.c qe.h tty.c
unihex.c
Log message:
basic: charset improvements
- add QECharset.encode_table as a default for CharsetDecodeState.table
- make CharsetDecodeState.table const
- use static initializers for utf8 tables
- add al32utf8 charset name (used in Oracle for true utf-8 encoding)
- use direct pointers in unicode_glyph_range_index
- initialize unicode_glyph_range_index at run time
- rename unicode_glyph_tty_width as unicode_tty_glyph_width
- changed unihex display to space out single width characters
CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/qemacs/charset.c?cvsroot=qemacs&r1=1.45&r2=1.46
http://cvs.savannah.gnu.org/viewcvs/qemacs/charsetjis.c?cvsroot=qemacs&r1=1.9&r2=1.10
http://cvs.savannah.gnu.org/viewcvs/qemacs/extras.c?cvsroot=qemacs&r1=1.55&r2=1.56
http://cvs.savannah.gnu.org/viewcvs/qemacs/qe.c?cvsroot=qemacs&r1=1.251&r2=1.252
http://cvs.savannah.gnu.org/viewcvs/qemacs/qe.h?cvsroot=qemacs&r1=1.236&r2=1.237
http://cvs.savannah.gnu.org/viewcvs/qemacs/tty.c?cvsroot=qemacs&r1=1.71&r2=1.72
http://cvs.savannah.gnu.org/viewcvs/qemacs/unihex.c?cvsroot=qemacs&r1=1.35&r2=1.36
Patches:
Index: charset.c
===================================================================
RCS file: /sources/qemacs/qemacs/charset.c,v
retrieving revision 1.45
retrieving revision 1.46
diff -u -b -r1.45 -r1.46
--- charset.c 15 Mar 2017 23:17:58 -0000 1.45
+++ charset.c 15 Mar 2017 23:42:23 -0000 1.46
@@ -51,31 +51,13 @@
UINT_MAX, 1, /* 26: catchall */
};
-static unsigned int const unicode_glyph_range_index[16] = {
- 2 * 0, /* 0000-0FFF */
- 2 * 0, /* 1000-1FFF */
- 2 * 2, /* 2000-2FFF */
- 2 * 7, /* 3000-3FFF */
- 2 * 9, /* 4000-4FFF */
- 2 * 11, /* 5000-5FFF */
- 2 * 11, /* 6000-6FFF */
- 2 * 11, /* 7000-7FFF */
- 2 * 11, /* 8000-8FFF */
- 2 * 11, /* 9000-9FFF */
- 2 * 11, /* A000-AFFF */
- 2 * 13, /* B000-BFFF */
- 2 * 13, /* C000-CFFF */
- 2 * 13, /* D000-DFFF */
- 2 * 14, /* E000-EFFF */
- 2 * 14, /* F000-FFFF */
-};
+static const unsigned int *unicode_glyph_range_index[0x20];
-int unicode_glyph_tty_width(unsigned int ucs)
+int unicode_tty_glyph_width(unsigned int ucs)
{
- unsigned int const *ip;
-
/* Iterative lookup with fast initial jump, no boundary test needed */
- ip = unicode_glyph_ranges + unicode_glyph_range_index[(ucs >> 12) & 0xF];
+ /* Very efficient for BMP and SMP code-points */
+ unsigned int const *ip = unicode_glyph_range_index[(ucs >> 12) & 0x1F];
while (ucs > ip[0]) {
ip += 2;
@@ -85,28 +67,62 @@
/* utf-8 specific tables */
-static unsigned short table_idem[256];
-static unsigned short table_utf8[256];
-static unsigned short table_none[256];
+#define REP2(x) x, x
+#define REP4(x) x, x, x, x
+#define REP8(x) REP4(x), REP4(x)
+#define REP16(x) REP4(x), REP4(x), REP4(x), REP4(x)
+#define REP32(x) REP16(x), REP16(x)
+#define REP64(x) REP16(x), REP16(x), REP16(x), REP16(x)
+#define REP128(x) REP64(x), REP64(x)
+#define REP256(x) REP64(x), REP64(x), REP64(x), REP64(x)
+
+#define RUN2(x) (x)+0, (x)+1
+#define RUN4(x) (x)+0, (x)+1, (x)+2, (x)+3
+#define RUN8(x) RUN4(x), RUN4((x)+4)
+#define RUN16(x) RUN4(x), RUN4((x)+4), RUN4((x)+8), RUN4((x)+12)
+#define RUN32(x) RUN16(x), RUN16((x)+16)
+#define RUN64(x) RUN16(x), RUN16((x)+16), RUN16((x)+32), RUN16((x)+48)
+#define RUN128(x) RUN64(x), RUN64((x)+64)
+#define RUN256(x) RUN64(x), RUN64((x)+64), RUN64((x)+128), RUN64((x)+192)
+
+static unsigned short const table_idem[256] = { RUN256(0) };
+static unsigned short const table_none[256] = { REP256(ESCAPE_CHAR) };
+
+static unsigned short const table_utf8[256] = {
+ RUN128(0), /* [0x00...0x80] are self-encoding ASCII bytes */
+ REP64(INVALID_CHAR), /* [0x80...0xC0] are invalid prefix bytes */
+ REP32(ESCAPE_CHAR), /* [0xC0...0xE0] leading bytes of 2 byte sequences
*/
+ REP16(ESCAPE_CHAR), /* [0xE0...0xF0] leading bytes of 3 byte sequences
*/
+ REP8(ESCAPE_CHAR), /* [0xF0...0xF8] leading bytes of 4 byte sequences
*/
+ REP4(ESCAPE_CHAR), /* [0xF8...0xFC] leading bytes of 5 byte sequences
*/
+ REP2(ESCAPE_CHAR), /* [0xFC...0xFE] leading bytes of byte sequences
*/
+ INVALID_CHAR, /* 0xFE is invalid in UTF-8 encoding */
+ INVALID_CHAR, /* 0xFF is invalid in UTF-8 encoding */
+};
-unsigned char utf8_length[256];
+unsigned char const utf8_length[256] = {
+ REP128(1), /* [0x00...0x80] are self-encoding ASCII bytes */
+ REP64(1), /* [0x80...0xC0] are invalid prefix bytes, could use 0 */
+ REP32(2), /* [0xC0...0xE0] leading bytes of 2 byte sequences */
+ REP16(3), /* [0xE0...0xF0] leading bytes of 3 byte sequences */
+ REP8(4), /* [0xF0...0xF8] leading bytes of 4 byte sequences */
+ REP4(5), /* [0xF8...0xFC] leading bytes of 5 byte sequences */
+ REP2(6), /* [0xFC...0xFE] leading bytes of byte sequences */
+ 1, /* 0xFE is invalid in UTF-8 encoding */
+ 1, /* 0xFF is invalid in UTF-8 encoding */
+};
-static const unsigned int utf8_min_code[7] = {
+static unsigned int const utf8_min_code[7] = {
0, 0, 0x80, 0x800, 0x10000, 0x00200000, 0x04000000,
};
-static const unsigned char utf8_first_code_mask[7] = {
+static unsigned char const utf8_first_code_mask[7] = {
0, 0, 0x1f, 0xf, 0x7, 0x3, 0x1,
};
/********************************************************/
/* raw */
-static void decode_raw_init(CharsetDecodeState *s)
-{
- s->table = table_idem;
-}
-
static u8 *encode_raw(qe__unused__ QECharset *charset, u8 *p, int c)
{
if (c <= 0xff) {
@@ -121,14 +137,14 @@
"raw",
"binary|none",
NULL,
- decode_raw_init,
+ NULL,
decode_8bit,
encode_raw,
charset_get_pos_8bit,
charset_get_chars_8bit,
charset_goto_char_8bit,
charset_goto_line_8bit,
- 1, 0, 0, 10, 0, 0, NULL, NULL,
+ 1, 0, 0, 10, 0, 0, table_idem, NULL, NULL,
};
/********************************************************/
@@ -174,11 +190,6 @@
return 0;
}
-static void decode_8859_1_init(CharsetDecodeState *s)
-{
- s->table = table_idem;
-}
-
static u8 *encode_8859_1(qe__unused__ QECharset *charset, u8 *p, int c)
{
if (c <= 0xff) {
@@ -193,24 +204,19 @@
"8859-1",
"ISO-8859-1|iso-ir-100|latin1|l1|819",
probe_8859_1,
- decode_8859_1_init,
+ NULL,
decode_8bit,
encode_8859_1,
charset_get_pos_8bit,
charset_get_chars_8bit,
charset_goto_char_8bit,
charset_goto_line_8bit,
- 1, 0, 0, 10, 0, 0, NULL, NULL,
+ 1, 0, 0, 10, 0, 0, table_idem, NULL, NULL,
};
/********************************************************/
/* vt100 */
-static void decode_vt100_init(CharsetDecodeState *s)
-{
- s->table = table_idem;
-}
-
static u8 *encode_vt100(qe__unused__ QECharset *charset, u8 *p, int c)
{
if (c <= 0xff) {
@@ -225,14 +231,14 @@
"vt100",
NULL,
NULL,
- decode_vt100_init,
+ NULL,
decode_8bit,
encode_vt100,
charset_get_pos_8bit,
charset_get_chars_8bit,
charset_goto_char_8bit,
charset_goto_line_8bit,
- 1, 0, 0, 10, 0, 0, NULL, NULL,
+ 1, 0, 0, 10, 0, 0, table_idem, NULL, NULL,
};
/********************************************************/
@@ -252,14 +258,14 @@
"7bit",
"us-ascii|ascii|7-bit|iso-ir-6|ANSI_X3.4|646",
NULL,
- decode_8859_1_init,
+ NULL,
decode_8bit,
encode_7bit,
charset_get_pos_8bit,
charset_get_chars_8bit,
charset_goto_char_8bit,
charset_goto_line_8bit,
- 1, 0, 0, 10, 0, 0, NULL, NULL,
+ 1, 0, 0, 10, 0, 0, table_idem, NULL, NULL,
};
/********************************************************/
@@ -435,11 +441,6 @@
return 0;
}
-static void decode_utf8_init(CharsetDecodeState *s)
-{
- s->table = table_utf8;
-}
-
static int decode_utf8_func(CharsetDecodeState *s)
{
return utf8_decode((const char **)(void *)&s->p);
@@ -550,16 +551,16 @@
struct QECharset charset_utf8 = {
"utf-8",
- "utf8",
+ "utf8|al32utf8",
probe_utf8,
- decode_utf8_init,
+ NULL,
decode_utf8_func,
encode_utf8,
charset_get_pos_utf8,
charset_get_chars_utf8,
charset_goto_char_utf8,
charset_goto_line_8bit,
- 1, 1, 0, 10, 0, 0, NULL, NULL,
+ 1, 1, 0, 10, 0, 0, table_utf8, NULL, NULL,
};
/********************************************************/
@@ -603,11 +604,6 @@
return 0;
}
-static void decode_ucs_init(CharsetDecodeState *s)
-{
- s->table = table_none;
-}
-
static int decode_ucs2le(CharsetDecodeState *s)
{
/* XXX: should handle surrogates */
@@ -821,28 +817,28 @@
"ucs2le",
"utf16le|utf-16le",
probe_ucs2le,
- decode_ucs_init,
+ NULL,
decode_ucs2le,
encode_ucs2le,
charset_get_pos_ucs2,
charset_get_chars_ucs2,
charset_goto_char_ucs2,
charset_goto_line_ucs2,
- 2, 0, 0, 10, 0, 0, NULL, NULL,
+ 2, 0, 0, 10, 0, 0, table_none, NULL, NULL,
};
struct QECharset charset_ucs2be = {
"ucs2be",
"ucs2|utf16|utf-16|utf16be|utf-16be",
probe_ucs2be,
- decode_ucs_init,
+ NULL,
decode_ucs2be,
encode_ucs2be,
charset_get_pos_ucs2,
charset_get_chars_ucs2,
charset_goto_char_ucs2,
charset_goto_line_ucs2,
- 2, 0, 0, 10, 0, 0, NULL, NULL,
+ 2, 0, 0, 10, 0, 0, table_none, NULL, NULL,
};
static int probe_ucs4le(qe__unused__ QECharset *charset, const u8 *buf, int
size)
@@ -1093,28 +1089,28 @@
"ucs4le",
"utf32le|utf-32le",
probe_ucs4le,
- decode_ucs_init,
+ NULL,
decode_ucs4le,
encode_ucs4le,
charset_get_pos_ucs4,
charset_get_chars_ucs4,
charset_goto_char_ucs4,
charset_goto_line_ucs4,
- 4, 0, 0, 10, 0, 0, NULL, NULL,
+ 4, 0, 0, 10, 0, 0, table_none, NULL, NULL,
};
struct QECharset charset_ucs4be = {
"ucs4be",
"ucs4|utf32|utf-32|utf32be|utf-32be",
probe_ucs4be,
- decode_ucs_init,
+ NULL,
decode_ucs4be,
encode_ucs4be,
charset_get_pos_ucs4,
charset_get_chars_ucs4,
charset_goto_char_ucs4,
charset_goto_line_ucs4,
- 4, 0, 0, 10, 0, 0, NULL, NULL,
+ 4, 0, 0, 10, 0, 0, table_none, NULL, NULL,
};
/********************************************************/
@@ -1181,11 +1177,12 @@
void charset_decode_init(CharsetDecodeState *s, QECharset *charset,
EOLType eol_type)
{
- s->table = NULL; /* fail safe */
+ s->table = charset->encode_table; /* default encode table */
if (charset->table_alloc) {
s->table = qe_malloc_array(unsigned short, 256);
if (!s->table) {
charset = &charset_8859_1;
+ s->table = charset->encode_table;
}
}
s->charset = charset;
@@ -1202,8 +1199,10 @@
void charset_decode_close(CharsetDecodeState *s)
{
- if (s->charset->table_alloc)
- qe_free(&s->table);
+ if (s->charset->table_alloc) {
+ /* remove the const qualifier */
+ qe_free((unsigned short **)&s->table);
+ }
/* safety */
memset(s, 0, sizeof(CharsetDecodeState));
}
@@ -1551,7 +1550,7 @@
unsigned short *table;
int i, n;
- table = s->table;
+ table = (unsigned short *)s->table; /* remove const qualifier */
for (i = 0; i < charset->min_char; i++)
*table++ = i;
n = charset->max_char - charset->min_char + 1;
@@ -1706,36 +1705,16 @@
void charset_init(void)
{
- int l, i, n;
-
- for (i = 0; i < 256; i++) {
- table_idem[i] = i;
- table_none[i] = ESCAPE_CHAR;
- }
-
- /* utf8 tables */
+ /* initialize unicode_glyph_range_index[] */
+ unsigned int const *ip = unicode_glyph_ranges;
+ unsigned int ucs;
- // could set utf8_length[128...0xc0] to 0 as invalid bytes
- memset(utf8_length, 1, 256);
-
- i = 0xc0;
- l = 2;
- while (l <= 6) {
- n = utf8_first_code_mask[l] + 1;
- while (n > 0) {
- utf8_length[i++] = l;
- n--;
- }
- l++;
+ for (ucs = 0; ucs < 0x20000; ucs += 0x1000) {
+ while (ucs > ip[0])
+ ip += 2;
+ unicode_glyph_range_index[ucs >> 12] = ip;
}
- for (i = 0; i < 256; i++)
- table_utf8[i] = INVALID_CHAR;
- for (i = 0; i < 0x80; i++)
- table_utf8[i] = i;
- for (i = 0xc0; i < 0xfe; i++)
- table_utf8[i] = ESCAPE_CHAR;
-
qe_register_charset(&charset_raw);
qe_register_charset(&charset_8859_1);
qe_register_charset(&charset_vt100);
Index: charsetjis.c
===================================================================
RCS file: /sources/qemacs/qemacs/charsetjis.c,v
retrieving revision 1.9
retrieving revision 1.10
diff -u -b -r1.9 -r1.10
--- charsetjis.c 16 Sep 2015 22:18:23 -0000 1.9
+++ charsetjis.c 15 Mar 2017 23:42:23 -0000 1.10
@@ -2,6 +2,7 @@
* JIS Charset handling for QEmacs
*
* Copyright (c) 2002 Fabrice Bellard.
+ * Copyright (c) 2002-2017 Charlie Gordon.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@@ -78,7 +79,8 @@
static void decode_euc_jp_init(CharsetDecodeState *s)
{
- unsigned short *table = s->table;
+ /* XXX: should use static table instead of removing const qualifier */
+ unsigned short *table = (unsigned short *)s->table;
int i;
for (i = 0; i < 256; i++)
@@ -162,7 +164,8 @@
static void decode_sjis_init(CharsetDecodeState *s)
{
- unsigned short *table = s->table;
+ /* XXX: should use static table instead of removing const qualifier */
+ unsigned short *table = (unsigned short *)s->table;
int i;
for (i = 0; i < 256; i++)
Index: extras.c
===================================================================
RCS file: /sources/qemacs/qemacs/extras.c,v
retrieving revision 1.55
retrieving revision 1.56
diff -u -b -r1.55 -r1.56
--- extras.c 15 Mar 2017 07:24:31 -0000 1.55
+++ extras.c 15 Mar 2017 23:42:23 -0000 1.56
@@ -275,13 +275,13 @@
col += tw - col % tw;
continue;
}
- col += unicode_glyph_tty_width(c);
+ col += unicode_tty_glyph_width(c);
if (c != ' ' || offset < start || col % tw == 0)
continue;
while (offset1 < stop) {
c = eb_nextc(b, offset1, &offset2);
if (c == ' ') {
- col += unicode_glyph_tty_width(c);
+ col += unicode_tty_glyph_width(c);
offset1 = offset2;
if (col % tw == 0) {
delta = eb_delete_range(b, offset, offset1);
@@ -342,7 +342,7 @@
continue;
}
if (c != '\t') {
- col += unicode_glyph_tty_width(c);
+ col += unicode_tty_glyph_width(c);
continue;
}
col0 = col;
Index: qe.c
===================================================================
RCS file: /sources/qemacs/qemacs/qe.c,v
retrieving revision 1.251
retrieving revision 1.252
diff -u -b -r1.251 -r1.252
--- qe.c 15 Mar 2017 07:32:48 -0000 1.251
+++ qe.c 15 Mar 2017 23:42:23 -0000 1.252
@@ -1642,7 +1642,7 @@
if (c == '\t') {
col += tw - col % tw;
} else {
- col += unicode_glyph_tty_width(c);
+ col += unicode_tty_glyph_width(c);
}
}
if (argval == NO_ARG)
@@ -8070,7 +8070,7 @@
const char str_version[] = "QEmacs version " QE_VERSION;
const char str_credits[] = "Copyright (c) 2000-2003 Fabrice Bellard\n"
- "Copyright (c) 2000-2016 Charlie Gordon\n";
+ "Copyright (c) 2000-2017 Charlie Gordon\n";
static void show_version(void)
{
Index: qe.h
===================================================================
RCS file: /sources/qemacs/qemacs/qe.h,v
retrieving revision 1.236
retrieving revision 1.237
diff -u -b -r1.236 -r1.237
--- qe.h 15 Mar 2017 07:32:48 -0000 1.236
+++ qe.h 15 Mar 2017 23:42:23 -0000 1.237
@@ -569,6 +569,7 @@
/* private data for some charsets */
u8 eol_char; /* 0x0A for ASCII, 0x25 for EBCDIC */
u8 min_char, max_char;
+ const unsigned short *encode_table;
const unsigned short *private_table;
struct QECharset *next;
};
@@ -591,7 +592,7 @@
struct CharsetDecodeState {
/* 256 ushort table for hyper fast decoding */
- unsigned short *table;
+ const unsigned short *table;
int char_size;
EOLType eol_type;
int eol_char;
@@ -612,7 +613,7 @@
void qe_register_charset(struct QECharset *charset);
-extern unsigned char utf8_length[256];
+extern unsigned char const utf8_length[256];
static inline int utf8_is_trailing_byte(int c) { return (c & 0xC0) == 0x80; }
int utf8_encode(char *q, int c);
int utf8_decode(const char **pp);
@@ -635,7 +636,7 @@
int decode_8bit(CharsetDecodeState *s);
u8 *encode_8bit(QECharset *charset, u8 *q, int c);
-int unicode_glyph_tty_width(unsigned int ucs);
+int unicode_tty_glyph_width(unsigned int ucs);
/* arabic.c */
int arab_join(unsigned int *line, unsigned int *ctog, int len);
Index: tty.c
===================================================================
RCS file: /sources/qemacs/qemacs/tty.c,v
retrieving revision 1.71
retrieving revision 1.72
diff -u -b -r1.71 -r1.72
--- tty.c 15 Mar 2017 07:24:31 -0000 1.71
+++ tty.c 15 Mar 2017 23:42:23 -0000 1.72
@@ -1033,7 +1033,7 @@
if (ucs < 0x1100)
return 1;
- return unicode_glyph_tty_width(ucs);
+ return unicode_tty_glyph_width(ucs);
}
static void tty_term_text_metrics(QEditScreen *s, qe__unused__ QEFont *font,
@@ -1140,6 +1140,7 @@
/* We cannot print anything on the bottom right screen cell,
* pretend it's OK: */
ts->screen[shadow - 1] = ts->screen[2 * shadow - 1];
+
for (y = 0; y < s->height; y++) {
if (ts->line_updated[y]) {
ts->line_updated[y] = 0;
Index: unihex.c
===================================================================
RCS file: /sources/qemacs/qemacs/unihex.c,v
retrieving revision 1.35
retrieving revision 1.36
diff -u -b -r1.35 -r1.36
--- unihex.c 15 Mar 2017 07:38:58 -0000 1.35
+++ unihex.c 15 Mar 2017 23:42:23 -0000 1.36
@@ -146,11 +146,10 @@
}
}
display_char(ds, offset1, offset2, b);
-#if 0
- /* CG: spacing out single width glyphs is less readable */
- if (unicode_glyph_tty_width(b) == 1)
+ /* spacing out single width glyphs may be less readable */
+ if (unicode_tty_glyph_width(b) < 2) {
display_char(ds, -1, -1, ' ');
-#endif
+ }
}
display_eol(ds, -1, -1);
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Qemacs-commit] qemacs charset.c charsetjis.c extras.c qe.c qe....,
Charlie Gordon <=