bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: strtod bugs


From: Eric Blake
Subject: Re: strtod bugs
Date: Sat, 29 Mar 2008 21:35:00 -0600
User-agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.12) Gecko/20080213 Thunderbird/2.0.0.12 Mnenhy/0.7.5.666

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

According to Eric Blake on 3/29/2008 3:29 PM:
| Therefore, I'm asking for opinions - should I apply this patch as is, to
| at least guarantee 'inf' and 'nan' parsing on all platforms, even though
| it can cause regressions on the accuracy of the parse and loses hex-nan
| for platforms like cygwin 1.5.24 or glibc 2.3.x?  Does anyone feel up to
| the task of writing a hex-nan parser?

I meant hex-float, not hex-nan.  At any rate, I want to release M4 1.4.11
with C99 parsing capabilities (the 1.4.10 testsuite failed on mingw,
OpenBSD, and any other platform where strtod can't parse "inf"), so here's
the followon that implements hex-float parsing.

|  Should we pull in glibc's or gmp's
| algorithm for accurately parsing decimal into floating point, rather than
| relying on the inaccuracy of pow (and in the process, avoid pulling in
| -lm)?

At this point, I'm okay with the inaccuracies in gnulib's implementation;
for platforms like glibc 2.3.6, it becomes a tradeoff of common-case C99
parsing improvements vs. regressions in corner cases of rounding.  And
hopefully, it won't be too long before a perfectly working glibc version
is available in distros.  We can improve gnulib later, but this is a good
first start.

Therefore, I'm going ahead and committing this series.

- --
Don't work too hard, make some time for fun as well!

Eric Blake             address@hidden
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.8 (Cygwin)
Comment: Public key at home.comcast.net/~ericblake/eblake.gpg
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org

iEYEARECAAYFAkfvCmQACgkQ84KuGfSFAYDGqwCfS4RYsM4Ew7L2Oq4armMSnpwJ
niQAnjlPnedGRzmrTH0CgDhzXOVr4XK3
=ldVo
-----END PGP SIGNATURE-----
>From bdc599c7bcdbb9b5c1682c3af8bc22c54325dff9 Mon Sep 17 00:00:00 2001
From: Eric Blake <address@hidden>
Date: Sat, 29 Mar 2008 21:24:07 -0600
Subject: [PATCH] Add hex float support.

* modules/strtod (Depends-on): Add c-ctype.
(Link): Mention POW_LIB.
* lib/strtod.c (strtod): Recognize hex floats.  Don't allow
whitespace between 'e' and exponent.
* tests/test-strtod.c (main): Enable hex float tests.
* doc/posix-functions/strtod.texi (strtod): Document what gnulib
now provides.

Signed-off-by: Eric Blake <address@hidden>
---
 ChangeLog                       |    9 +++
 doc/posix-functions/strtod.texi |   19 +++--
 lib/strtod.c                    |  139 +++++++++++++++++++++++++++------------
 modules/strtod                  |    4 +
 tests/test-strtod.c             |   12 +++-
 5 files changed, 130 insertions(+), 53 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 1e0254d..f7072a6 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,14 @@
 2008-03-29  Eric Blake  <address@hidden>
 
+       Add hex float support.
+       * modules/strtod (Depends-on): Add c-ctype.
+       (Link): Mention POW_LIB.
+       * lib/strtod.c (strtod): Recognize hex floats.  Don't allow
+       whitespace between 'e' and exponent.
+       * tests/test-strtod.c (main): Enable hex float tests.
+       * doc/posix-functions/strtod.texi (strtod): Document what gnulib
+       now provides.
+
        Document various strtod bugs, with some fixes.
        * doc/posix-functions/strtod.texi (strtod): Document bugs with
        "-0x", "inf", "nan", and hex constants.
diff --git a/doc/posix-functions/strtod.texi b/doc/posix-functions/strtod.texi
index 349802a..fecade3 100644
--- a/doc/posix-functions/strtod.texi
+++ b/doc/posix-functions/strtod.texi
@@ -18,14 +18,6 @@ Old versions of Linux.
 @item
 This function returns a wrong end pointer on some platforms:
 Solaris 2.4.
address@hidden itemize
-
-Portability problems not fixed by Gnulib:
address@hidden
address@hidden
-This function returns a positive value for negative underflow on some
-platforms:
-glibc 2.4, Mingw, Cygwin.
 
 @item
 This function fails to do a valid parse of @samp{-0x} on some
@@ -53,9 +45,20 @@ glibc 2.4.
 This function fails to parse C99 hexadecimal floating point on some
 platforms:
 Solaris 8, Mingw, OpenBSD 4.0.
address@hidden itemize
+
+Portability problems not fixed by Gnulib:
address@hidden
address@hidden
+This function returns a positive value for negative underflow on some
+platforms:
+glibc 2.4, Mingw, Cygwin.
 
 @item
 This function fails to correctly parse very long strings on some
 platforms:
 Mingw, Cygwin.
+
address@hidden
+The replacement function does not always return correctly rounded results.
 @end itemize
diff --git a/lib/strtod.c b/lib/strtod.c
index 147a5bf..0ebbb33 100644
--- a/lib/strtod.c
+++ b/lib/strtod.c
@@ -25,12 +25,14 @@
 #include <stdbool.h>
 #include <string.h>
 
+#include "c-ctype.h"
+
 /* Convert NPTR to a double.  If ENDPTR is not NULL, a pointer to the
    character after the last one used in the number is put in *ENDPTR.  */
 double
 strtod (const char *nptr, char **endptr)
 {
-  const char *s;
+  const unsigned char *s;
   bool negative = false;
 
   /* The number so far.  */
@@ -38,6 +40,7 @@ strtod (const char *nptr, char **endptr)
 
   bool got_dot;                        /* Found a decimal point.  */
   bool got_digit;              /* Seen any digits.  */
+  bool hex = false;            /* Look for hex float exponent.  */
 
   /* The exponent of the number.  */
   long int exponent;
@@ -51,7 +54,7 @@ strtod (const char *nptr, char **endptr)
   s = nptr;
 
   /* Eat whitespace.  */
-  while (isspace ((unsigned char) *s))
+  while (isspace (*s))
     ++s;
 
   /* Get the sign.  */
@@ -63,59 +66,104 @@ strtod (const char *nptr, char **endptr)
   got_dot = false;
   got_digit = false;
   exponent = 0;
-  for (;; ++s)
+
+  /* Check for hex float.  */
+  if (*s == '0' && c_tolower (s[1]) == 'x'
+      && (c_isxdigit (s[2]) || ('.' == s[2] && c_isxdigit (s[3]))))
     {
-      if ('0' <= *s && *s <= '9')
+      hex = true;
+      s += 2;
+      for (;; ++s)
        {
-         got_digit = true;
-
-         /* Make sure that multiplication by 10 will not overflow.  */
-         if (num > DBL_MAX * 0.1)
-           /* The value of the digit doesn't matter, since we have already
-              gotten as many digits as can be represented in a `double'.
-              This doesn't necessarily mean the result will overflow.
-              The exponent may reduce it to within range.
-
-              We just need to record that there was another
-              digit so that we can multiply by 10 later.  */
-           ++exponent;
+         if (c_isxdigit (*s))
+           {
+             got_digit = true;
+
+             /* Make sure that multiplication by 16 will not overflow.  */
+             if (num > DBL_MAX / 16)
+               /* The value of the digit doesn't matter, since we have already
+                  gotten as many digits as can be represented in a `double'.
+                  This doesn't necessarily mean the result will overflow.
+                  The exponent may reduce it to within range.
+
+                  We just need to record that there was another
+                  digit so that we can multiply by 16 later.  */
+               ++exponent;
+             else
+               num = ((num * 16.0)
+                      + (c_tolower (*s) - (c_isdigit (*s) ? '0' : 'a' - 10)));
+
+             /* Keep track of the number of digits after the decimal point.
+                If we just divided by 16 here, we would lose precision.  */
+             if (got_dot)
+               --exponent;
+           }
+         else if (!got_dot && *s == '.')
+           /* Record that we have found the decimal point.  */
+           got_dot = true;
          else
-           num = (num * 10.0) + (*s - '0');
+           /* Any other character terminates the number.  */
+           break;
+       }
+    }
+
+  /* Not a hex float.  */
+  else
+    {
+      for (;; ++s)
+       {
+         if (c_isdigit (*s))
+           {
+             got_digit = true;
+
+             /* Make sure that multiplication by 10 will not overflow.  */
+             if (num > DBL_MAX * 0.1)
+               /* The value of the digit doesn't matter, since we have already
+                  gotten as many digits as can be represented in a `double'.
+                  This doesn't necessarily mean the result will overflow.
+                  The exponent may reduce it to within range.
+
+                  We just need to record that there was another
+                  digit so that we can multiply by 10 later.  */
+               ++exponent;
+             else
+               num = (num * 10.0) + (*s - '0');
 
-         /* Keep track of the number of digits after the decimal point.
-            If we just divided by 10 here, we would lose precision.  */
-         if (got_dot)
-           --exponent;
+             /* Keep track of the number of digits after the decimal point.
+                If we just divided by 10 here, we would lose precision.  */
+             if (got_dot)
+               --exponent;
+           }
+         else if (!got_dot && *s == '.')
+           /* Record that we have found the decimal point.  */
+           got_dot = true;
+         else
+           /* Any other character terminates the number.  */
+           break;
        }
-      else if (!got_dot && *s == '.')
-       /* Record that we have found the decimal point.  */
-       got_dot = true;
-      else
-       /* Any other character terminates the number.  */
-       break;
     }
 
   if (!got_digit)
     {
       /* Check for infinities and NaNs.  */
-      if (tolower ((unsigned char) *s) == 'i'
-         && tolower ((unsigned char) s[1]) == 'n'
-         && tolower ((unsigned char) s[2]) == 'f')
+      if (c_tolower (*s) == 'i'
+         && c_tolower (s[1]) == 'n'
+         && c_tolower (s[2]) == 'f')
        {
          s += 3;
          num = HUGE_VAL;
-         if (tolower ((unsigned char) *s) == 'i'
-             && tolower ((unsigned char) s[1]) == 'n'
-             && tolower ((unsigned char) s[2]) == 'i'
-             && tolower ((unsigned char) s[3]) == 't'
-             && tolower ((unsigned char) s[4]) == 'y')
+         if (c_tolower (*s) == 'i'
+             && c_tolower (s[1]) == 'n'
+             && c_tolower (s[2]) == 'i'
+             && c_tolower (s[3]) == 't'
+             && c_tolower (s[4]) == 'y')
            s += 5;
          goto valid;
        }
 #ifdef NAN
-      else if (tolower ((unsigned char) *s) == 'n'
-              && tolower ((unsigned char) s[1]) == 'a'
-              && tolower ((unsigned char) s[2]) == 'n')
+      else if (c_tolower (*s) == 'n'
+              && c_tolower (s[1]) == 'a'
+              && c_tolower (s[2]) == 'n')
        {
          s += 3;
          num = NAN;
@@ -126,8 +174,8 @@ strtod (const char *nptr, char **endptr)
             hexadecimal number, but the result is still a NaN.  */
          if (*s == '(')
            {
-             const char *p = s + 1;
-             while (isalnum ((unsigned char) *p))
+             const unsigned char *p = s + 1;
+             while (c_isalnum (*p))
                p++;
              if (*p == ')')
                s = p + 1;
@@ -138,7 +186,7 @@ strtod (const char *nptr, char **endptr)
       goto noconv;
     }
 
-  if (tolower ((unsigned char) *s) == 'e')
+  if (c_tolower (*s) == (hex ? 'p' : 'e') && !isspace (s[1]))
     {
       /* Get the exponent specified after the `e' or `E'.  */
       int save = errno;
@@ -160,7 +208,7 @@ strtod (const char *nptr, char **endptr)
          else
            goto overflow;
        }
-      else if (end == s)
+      else if (end == (char *) s)
        /* There was no exponent.  Reset END to point to
           the 'e' or 'E', so *ENDPTR will be set there.  */
        end = (char *) s - 1;
@@ -172,6 +220,13 @@ strtod (const char *nptr, char **endptr)
   if (num == 0.0)
     goto valid;
 
+  if (hex)
+    {
+      /* ldexp takes care of range errors.  */
+      num = ldexp (num, exponent);
+      goto valid;
+    }
+
   /* Multiply NUM by 10 to the EXPONENT power,
      checking for overflow and underflow.  */
 
diff --git a/modules/strtod b/modules/strtod
index 5f2dce5..9a4a0f3 100644
--- a/modules/strtod
+++ b/modules/strtod
@@ -6,6 +6,7 @@ lib/strtod.c
 m4/strtod.m4
 
 Depends-on:
+c-ctype
 stdbool
 stdlib
 
@@ -19,6 +20,9 @@ LIBS += $(POW_LIB)
 Include:
 <stdlib.h>
 
+Link:
+$(POW_LIB)
+
 License:
 LGPL
 
diff --git a/tests/test-strtod.c b/tests/test-strtod.c
index c54a8fd..59fe7e7 100644
--- a/tests/test-strtod.c
+++ b/tests/test-strtod.c
@@ -333,6 +333,15 @@ main ()
   }
   {
     errno = 0;
+    const char input[] = "1E 2";
+    char *ptr;
+    double result = strtod (input, &ptr);
+    ASSERT (result == 1.0);
+    ASSERT (ptr == input + 1);
+    ASSERT (errno == 0);
+  }
+  {
+    errno = 0;
     const char input[] = "0x";
     char *ptr;
     double result = strtod (input, &ptr);
@@ -637,8 +646,6 @@ main ()
   }
 
   /* Hex.  */
-#if 0
-  /* TODO - gnulib doesn't implement this yet.  */
   {
     errno = 0;
     const char input[] = "0xa";
@@ -693,7 +700,6 @@ main ()
     ASSERT (ptr == input + 6);
     ASSERT (errno == 0);
   }
-#endif
 
   /* Large buffers.  */
   {
-- 
1.5.4


reply via email to

[Prev in Thread] Current Thread [Next in Thread]