pspp-dev
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[formatting i18n 10/14] format: Count prefix and suffix width in terms o


From: Ben Pfaff
Subject: [formatting i18n 10/14] format: Count prefix and suffix width in terms of display columns.
Date: Sat, 19 Feb 2011 17:42:22 -0800

Until now, the prefixes and suffixes for custom currency formats
(CCA, etc.) have been considered to occupy one display column per
byte.  This is fine for prefixes and suffixes like "$" or "%", but
falls down badly with U+00A5 (¥) or U+20AC (€), which occupy two
or three bytes, respectively, in UTF-8, while occupying only a
single display column.

This commit fixes the problem.  It doesn't add a test yet because
there are still some higher-level issues, but that will come in
a later commit when those remaining issues are resolved.
---
 Smake               |    1 +
 src/data/data-out.c |   26 +++++++++++++++-----------
 src/data/format.c   |   26 ++++++++++++++++++--------
 src/data/format.h   |   12 +++++++++++-
 4 files changed, 45 insertions(+), 20 deletions(-)

diff --git a/Smake b/Smake
index 6d54f5b..2210e66 100644
--- a/Smake
+++ b/Smake
@@ -78,6 +78,7 @@ GNULIB_MODULES = \
        unistr/u8-mbtouc \
        unistr/u8-strlen \
        unistr/u8-strncat \
+       uniwidth/u8-strwidth \
        unitypes \
        unlocked-io \
        vasprintf-posix \
diff --git a/src/data/data-out.c b/src/data/data-out.c
index a30e7e7..bb77437 100644
--- a/src/data/data-out.c
+++ b/src/data/data-out.c
@@ -131,11 +131,12 @@ char *
 data_out_pool (const union value *input, const char *encoding,
               const struct fmt_spec *format, struct pool *pool)
 {
+  const struct fmt_number_style *style = settings_get_style (format->type);
   char *output;
   char *t ;
   assert (fmt_check_output (format));
 
-  output = xmalloc (format->w + 1);
+  output = xmalloc (format->w + style->extra_bytes + 1);
 
   converters[format->type] (input, format, output);
 
@@ -602,9 +603,9 @@ output_decimal (const struct rounder *r, const struct 
fmt_spec *format,
          the negative suffix, plus (if negative) the negative
          prefix. */
       width = rounder_width (r, decimals, &integer_digits, &add_neg_prefix);
-      width += strlen (style->neg_suffix.s);
+      width += style->neg_suffix.width;
       if (add_neg_prefix)
-        width += strlen (style->neg_prefix.s);
+        width += style->neg_prefix.width;
       if (width > format->w)
         continue;
 
@@ -659,8 +660,11 @@ output_decimal (const struct rounder *r, const struct 
fmt_spec *format,
       if (add_neg_prefix)
         p = stpcpy (p, style->neg_suffix.s);
       else
-        p = mempset (p, ' ', strlen (style->neg_suffix.s));
-      assert (p == output + format->w);
+        p = mempset (p, ' ', style->neg_suffix.width);
+
+      assert (p >= output + format->w);
+      assert (p <= output + format->w + style->extra_bytes);
+      *p = '\0';
 
       return true;
     }
@@ -681,9 +685,9 @@ output_scientific (double number, const struct fmt_spec 
*format,
   char buf[64], *p;
 
   /* Allocate minimum required space. */
-  width = 6 + strlen (style->neg_suffix.s);
+  width = 6 + style->neg_suffix.width;
   if (number < 0)
-    width += strlen (style->neg_prefix.s);
+    width += style->neg_prefix.width;
   if (width > format->w)
     return false;
 
@@ -739,11 +743,11 @@ output_scientific (double number, const struct fmt_spec 
*format,
   if (number < 0)
     p = stpcpy (p, style->neg_suffix.s);
   else
-    p = mempset (p, ' ', strlen (style->neg_suffix.s));
+    p = mempset (p, ' ', style->neg_suffix.width);
 
-  assert (p == buf + format->w);
-  memcpy (output, buf, format->w);
-  output[format->w] = '\0';
+  assert (p >= output + format->w);
+  assert (p <= output + format->w + style->extra_bytes);
+  *p = '\0';
 
   return true;
 }
diff --git a/src/data/format.c b/src/data/format.c
index d3c6880..95e87a0 100644
--- a/src/data/format.c
+++ b/src/data/format.c
@@ -20,6 +20,7 @@
 
 #include <ctype.h>
 #include <stdlib.h>
+#include <uniwidth.h>
 
 #include "data/identifier.h"
 #include "data/settings.h"
@@ -113,7 +114,7 @@ fmt_settings_get_style (const struct fmt_settings *settings,
 
 /* Sets the number style for TYPE to have the given DECIMAL and GROUPING
    characters, negative prefix NEG_PREFIX, prefix PREFIX, suffix SUFFIX, and
-   negative suffix NEG_SUFFIX. */
+   negative suffix NEG_SUFFIX.  All of the strings are UTF-8 encoded. */
 void
 fmt_settings_set_style (struct fmt_settings *settings, enum fmt_type type,
                         char decimal, char grouping,
@@ -121,6 +122,7 @@ fmt_settings_set_style (struct fmt_settings *settings, enum 
fmt_type type,
                         const char *suffix, const char *neg_suffix)
 {
   struct fmt_number_style *style = &settings->styles[type];
+  int total_bytes, total_width;
 
   assert (grouping == '.' || grouping == ',' || grouping == 0);
   assert (decimal == '.' || decimal == ',');
@@ -134,6 +136,12 @@ fmt_settings_set_style (struct fmt_settings *settings, 
enum fmt_type type,
   fmt_affix_set (&style->neg_suffix, neg_suffix);
   style->decimal = decimal;
   style->grouping = grouping;
+
+  total_bytes = (strlen (neg_prefix) + strlen (prefix)
+                 + strlen (suffix) + strlen (neg_suffix));
+  total_width = (style->neg_prefix.width + style->prefix.width
+                 + style->suffix.width + style->neg_suffix.width);
+  style->extra_bytes = MAX (0, total_bytes - total_width);
 }
 
 /* Sets the decimal point character for the settings in S to DECIMAL.
@@ -934,11 +942,12 @@ max_digits_for_bytes (int bytes)
   return map[bytes - 1];
 }
 
-/* Sets AFFIX's string value to S. */
+/* Sets AFFIX's string value to S, a UTF-8 encoded string. */
 static void
 fmt_affix_set (struct fmt_affix *affix, const char *s)
 {
   affix->s = s[0] == '\0' ? CONST_CAST (char *, "") : xstrdup (s);
+  affix->width = u8_strwidth (CHAR_CAST (const uint8_t *, s), "UTF-8");
 }
 
 /* Frees data in AFFIX. */
@@ -970,6 +979,7 @@ fmt_number_style_clone (struct fmt_number_style *new,
   fmt_affix_set (&new->neg_suffix, old->neg_suffix.s);
   new->decimal = old->decimal;
   new->grouping = old->grouping;
+  new->extra_bytes = old->extra_bytes;
 }
 
 /* Destroys a struct fmt_number_style. */
@@ -985,20 +995,20 @@ fmt_number_style_destroy (struct fmt_number_style *style)
     }
 }
 
-/* Returns the total width of the standard prefix and suffix for
-   STYLE. */
+/* Returns the total width of the standard prefix and suffix for STYLE, in
+   display columns (e.g. as returned by u8_strwidth()). */
 int
 fmt_affix_width (const struct fmt_number_style *style)
 {
-  return strlen (style->prefix.s) + strlen (style->suffix.s);
+  return style->prefix.width + style->suffix.width;
 }
 
-/* Returns the total width of the negative prefix and suffix for
-   STYLE. */
+/* Returns the total width of the negative prefix and suffix for STYLE, in
+   display columns (e.g. as returned by u8_strwidth()). */
 int
 fmt_neg_affix_width (const struct fmt_number_style *style)
 {
-  return strlen (style->neg_prefix.s) + strlen (style->neg_suffix.s);
+  return style->neg_prefix.width + style->neg_suffix.width;
 }
 
 /* Returns the struct fmt_desc for the given format TYPE. */
diff --git a/src/data/format.h b/src/data/format.h
index 55643ab..7df3744 100644
--- a/src/data/format.h
+++ b/src/data/format.h
@@ -146,7 +146,8 @@ void fmt_settings_set_style (struct fmt_settings *, enum 
fmt_type,
 /* A prefix or suffix for a numeric output format. */
 struct fmt_affix
   {
-    char *s;                    /* String contents of affix. */
+    char *s;                    /* String contents of affix, in UTF-8. */
+    int width;                  /* Display width in columns (see wcwidth()). */
   };
 
 /* A numeric output style. */
@@ -158,6 +159,15 @@ struct fmt_number_style
     struct fmt_affix neg_suffix; /* Negative suffix. */
     char decimal;                /* Decimal point: '.' or ','. */
     char grouping;               /* Grouping character: ',', '.', or 0. */
+
+    /* A fmt_affix may require more bytes than its display width; for example,
+       U+00A5 (¥) is 3 bytes in UTF-8 but occupies only one display column.
+       This member is the sum of the number of bytes required by all of the
+       fmt_affix members in this struct, minus their display widths.  Thus, it
+       can be used to size memory allocations: for example, the formatted
+       result of CCA20.5 requires no more than (20 + extra_bytes) bytes in
+       UTF-8. */
+    int extra_bytes;
   };
 
 int fmt_affix_width (const struct fmt_number_style *);
-- 
1.7.2.3




reply via email to

[Prev in Thread] Current Thread [Next in Thread]