[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[formatting i18n 10/14] format: Count prefix and suffix width in terms o
From: |
Ben Pfaff |
Subject: |
[formatting i18n 10/14] format: Count prefix and suffix width in terms of display columns. |
Date: |
Sat, 19 Feb 2011 17:42:22 -0800 |
Until now, the prefixes and suffixes for custom currency formats
(CCA, etc.) have been considered to occupy one display column per
byte. This is fine for prefixes and suffixes like "$" or "%", but
falls down badly with U+00A5 (¥) or U+20AC (€), which occupy two
or three bytes, respectively, in UTF-8, while occupying only a
single display column.
This commit fixes the problem. It doesn't add a test yet because
there are still some higher-level issues, but that will come in
a later commit when those remaining issues are resolved.
---
Smake | 1 +
src/data/data-out.c | 26 +++++++++++++++-----------
src/data/format.c | 26 ++++++++++++++++++--------
src/data/format.h | 12 +++++++++++-
4 files changed, 45 insertions(+), 20 deletions(-)
diff --git a/Smake b/Smake
index 6d54f5b..2210e66 100644
--- a/Smake
+++ b/Smake
@@ -78,6 +78,7 @@ GNULIB_MODULES = \
unistr/u8-mbtouc \
unistr/u8-strlen \
unistr/u8-strncat \
+ uniwidth/u8-strwidth \
unitypes \
unlocked-io \
vasprintf-posix \
diff --git a/src/data/data-out.c b/src/data/data-out.c
index a30e7e7..bb77437 100644
--- a/src/data/data-out.c
+++ b/src/data/data-out.c
@@ -131,11 +131,12 @@ char *
data_out_pool (const union value *input, const char *encoding,
const struct fmt_spec *format, struct pool *pool)
{
+ const struct fmt_number_style *style = settings_get_style (format->type);
char *output;
char *t ;
assert (fmt_check_output (format));
- output = xmalloc (format->w + 1);
+ output = xmalloc (format->w + style->extra_bytes + 1);
converters[format->type] (input, format, output);
@@ -602,9 +603,9 @@ output_decimal (const struct rounder *r, const struct
fmt_spec *format,
the negative suffix, plus (if negative) the negative
prefix. */
width = rounder_width (r, decimals, &integer_digits, &add_neg_prefix);
- width += strlen (style->neg_suffix.s);
+ width += style->neg_suffix.width;
if (add_neg_prefix)
- width += strlen (style->neg_prefix.s);
+ width += style->neg_prefix.width;
if (width > format->w)
continue;
@@ -659,8 +660,11 @@ output_decimal (const struct rounder *r, const struct
fmt_spec *format,
if (add_neg_prefix)
p = stpcpy (p, style->neg_suffix.s);
else
- p = mempset (p, ' ', strlen (style->neg_suffix.s));
- assert (p == output + format->w);
+ p = mempset (p, ' ', style->neg_suffix.width);
+
+ assert (p >= output + format->w);
+ assert (p <= output + format->w + style->extra_bytes);
+ *p = '\0';
return true;
}
@@ -681,9 +685,9 @@ output_scientific (double number, const struct fmt_spec
*format,
char buf[64], *p;
/* Allocate minimum required space. */
- width = 6 + strlen (style->neg_suffix.s);
+ width = 6 + style->neg_suffix.width;
if (number < 0)
- width += strlen (style->neg_prefix.s);
+ width += style->neg_prefix.width;
if (width > format->w)
return false;
@@ -739,11 +743,11 @@ output_scientific (double number, const struct fmt_spec
*format,
if (number < 0)
p = stpcpy (p, style->neg_suffix.s);
else
- p = mempset (p, ' ', strlen (style->neg_suffix.s));
+ p = mempset (p, ' ', style->neg_suffix.width);
- assert (p == buf + format->w);
- memcpy (output, buf, format->w);
- output[format->w] = '\0';
+ assert (p >= output + format->w);
+ assert (p <= output + format->w + style->extra_bytes);
+ *p = '\0';
return true;
}
diff --git a/src/data/format.c b/src/data/format.c
index d3c6880..95e87a0 100644
--- a/src/data/format.c
+++ b/src/data/format.c
@@ -20,6 +20,7 @@
#include <ctype.h>
#include <stdlib.h>
+#include <uniwidth.h>
#include "data/identifier.h"
#include "data/settings.h"
@@ -113,7 +114,7 @@ fmt_settings_get_style (const struct fmt_settings *settings,
/* Sets the number style for TYPE to have the given DECIMAL and GROUPING
characters, negative prefix NEG_PREFIX, prefix PREFIX, suffix SUFFIX, and
- negative suffix NEG_SUFFIX. */
+ negative suffix NEG_SUFFIX. All of the strings are UTF-8 encoded. */
void
fmt_settings_set_style (struct fmt_settings *settings, enum fmt_type type,
char decimal, char grouping,
@@ -121,6 +122,7 @@ fmt_settings_set_style (struct fmt_settings *settings, enum
fmt_type type,
const char *suffix, const char *neg_suffix)
{
struct fmt_number_style *style = &settings->styles[type];
+ int total_bytes, total_width;
assert (grouping == '.' || grouping == ',' || grouping == 0);
assert (decimal == '.' || decimal == ',');
@@ -134,6 +136,12 @@ fmt_settings_set_style (struct fmt_settings *settings,
enum fmt_type type,
fmt_affix_set (&style->neg_suffix, neg_suffix);
style->decimal = decimal;
style->grouping = grouping;
+
+ total_bytes = (strlen (neg_prefix) + strlen (prefix)
+ + strlen (suffix) + strlen (neg_suffix));
+ total_width = (style->neg_prefix.width + style->prefix.width
+ + style->suffix.width + style->neg_suffix.width);
+ style->extra_bytes = MAX (0, total_bytes - total_width);
}
/* Sets the decimal point character for the settings in S to DECIMAL.
@@ -934,11 +942,12 @@ max_digits_for_bytes (int bytes)
return map[bytes - 1];
}
-/* Sets AFFIX's string value to S. */
+/* Sets AFFIX's string value to S, a UTF-8 encoded string. */
static void
fmt_affix_set (struct fmt_affix *affix, const char *s)
{
affix->s = s[0] == '\0' ? CONST_CAST (char *, "") : xstrdup (s);
+ affix->width = u8_strwidth (CHAR_CAST (const uint8_t *, s), "UTF-8");
}
/* Frees data in AFFIX. */
@@ -970,6 +979,7 @@ fmt_number_style_clone (struct fmt_number_style *new,
fmt_affix_set (&new->neg_suffix, old->neg_suffix.s);
new->decimal = old->decimal;
new->grouping = old->grouping;
+ new->extra_bytes = old->extra_bytes;
}
/* Destroys a struct fmt_number_style. */
@@ -985,20 +995,20 @@ fmt_number_style_destroy (struct fmt_number_style *style)
}
}
-/* Returns the total width of the standard prefix and suffix for
- STYLE. */
+/* Returns the total width of the standard prefix and suffix for STYLE, in
+ display columns (e.g. as returned by u8_strwidth()). */
int
fmt_affix_width (const struct fmt_number_style *style)
{
- return strlen (style->prefix.s) + strlen (style->suffix.s);
+ return style->prefix.width + style->suffix.width;
}
-/* Returns the total width of the negative prefix and suffix for
- STYLE. */
+/* Returns the total width of the negative prefix and suffix for STYLE, in
+ display columns (e.g. as returned by u8_strwidth()). */
int
fmt_neg_affix_width (const struct fmt_number_style *style)
{
- return strlen (style->neg_prefix.s) + strlen (style->neg_suffix.s);
+ return style->neg_prefix.width + style->neg_suffix.width;
}
/* Returns the struct fmt_desc for the given format TYPE. */
diff --git a/src/data/format.h b/src/data/format.h
index 55643ab..7df3744 100644
--- a/src/data/format.h
+++ b/src/data/format.h
@@ -146,7 +146,8 @@ void fmt_settings_set_style (struct fmt_settings *, enum
fmt_type,
/* A prefix or suffix for a numeric output format. */
struct fmt_affix
{
- char *s; /* String contents of affix. */
+ char *s; /* String contents of affix, in UTF-8. */
+ int width; /* Display width in columns (see wcwidth()). */
};
/* A numeric output style. */
@@ -158,6 +159,15 @@ struct fmt_number_style
struct fmt_affix neg_suffix; /* Negative suffix. */
char decimal; /* Decimal point: '.' or ','. */
char grouping; /* Grouping character: ',', '.', or 0. */
+
+ /* A fmt_affix may require more bytes than its display width; for example,
+ U+00A5 (¥) is 3 bytes in UTF-8 but occupies only one display column.
+ This member is the sum of the number of bytes required by all of the
+ fmt_affix members in this struct, minus their display widths. Thus, it
+ can be used to size memory allocations: for example, the formatted
+ result of CCA20.5 requires no more than (20 + extra_bytes) bytes in
+ UTF-8. */
+ int extra_bytes;
};
int fmt_affix_width (const struct fmt_number_style *);
--
1.7.2.3
- [formatting i18n 00/14] Fix i18n of formatted data, Ben Pfaff, 2011/02/19
- [formatting i18n 03/14] PRINT: Use UTF-8 encoding for output to the output subsystem., Ben Pfaff, 2011/02/19
- [formatting i18n 01/14] Use new Gnulib function dtoastr() to format short, accurate real numbers., Ben Pfaff, 2011/02/19
- [formatting i18n 04/14] i18n: New function recode_byte()., Ben Pfaff, 2011/02/19
- [formatting i18n 10/14] format: Count prefix and suffix width in terms of display columns.,
Ben Pfaff <=
- [formatting i18n 08/14] data-out: Make each converter responsible for storing null terminator., Ben Pfaff, 2011/02/19
- [formatting i18n 12/14] pool: Support NULL pool argument to pool_alloc_unaligned()., Ben Pfaff, 2011/02/19
- [formatting i18n 14/14] data-out: Add test for non-ASCII custom currency formats., Ben Pfaff, 2011/02/19
- [formatting i18n 11/14] data-out: Reorganize output_Z() to be more easily understood., Ben Pfaff, 2011/02/19
- [formatting i18n 02/14] CROSSTABS: Eliminate redundant data copying., Ben Pfaff, 2011/02/19
- [formatting i18n 06/14] legacy-encoding: Remove., Ben Pfaff, 2011/02/19
- [formatting i18n 07/14] format: Increase abstraction of fmt_number_style., Ben Pfaff, 2011/02/19
- [formatting i18n 09/14] format: Create a new "struct fmt_affix" for prefix and suffix strings., Ben Pfaff, 2011/02/19
- [formatting i18n 05/14] i18n: Introduce C_ENCODING as replacement for LEGACY_NATIVE., Ben Pfaff, 2011/02/19
- [formatting i18n 13/14] data-out: Optimize and fix some bad assumptions., Ben Pfaff, 2011/02/19