[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
fmod, fmodl: rewrite
From: |
Bruno Haible |
Subject: |
fmod, fmodl: rewrite |
Date: |
Sun, 04 Mar 2012 20:59:25 +0100 |
User-agent: |
KMail/4.7.4 (Linux/3.1.0-1.2-desktop; KDE/4.7.4; x86_64; ; ) |
The additional unit tests for fmod() and fmodl() revealed that gnulib's
replacement algorithm returned wrong results when the quotient of the
two arguments was large. For example:
fmod (1e30, 1.5)
In this case the result was not only wrong but also out-of-range.
A complete rewrite is necessary to fix it.
2012-03-04 Bruno Haible <address@hidden>
fmod, fmodl: Fix computation for large quotients x / y.
* lib/fmod.c: Completely rewritten.
* lib/fmodl.c (fmodl): Use implementation of fmod.c with
USE_LONG_DOUBLE.
* modules/fmod (Depends-on): Add isfinite, signbit, fabs, frexp, ldexp,
isnand. Remove fma.
* modules/fmodl (Depends-on): Add float, isfinite, signbit, fabsl,
frexpl, ldexpl, isnanl. Remove fma.
* m4/fmod.m4 (gl_FUNC_FMOD): Update computation of FMOD_LIBM.
* m4/fmodl.m4 (gl_FUNC_FMODL): Update computation of FMODL_LIBM.
================================= lib/fmod.c =================================
/* Remainder.
Copyright (C) 2011-2012 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#if ! defined USE_LONG_DOUBLE
# include <config.h>
#endif
/* Specification. */
#include <math.h>
#include <float.h>
#include <stdlib.h>
#ifdef USE_LONG_DOUBLE
# define FMOD fmodl
# define DOUBLE long double
# define MANT_DIG LDBL_MANT_DIG
# define L_(literal) literal##L
# define FREXP frexpl
# define LDEXP ldexpl
# define FABS fabsl
# define TRUNC truncl
# define ISNAN isnanl
#else
# define FMOD fmod
# define DOUBLE double
# define MANT_DIG DBL_MANT_DIG
# define L_(literal) literal
# define FREXP frexp
# define LDEXP ldexp
# define FABS fabs
# define TRUNC trunc
# define ISNAN isnand
#endif
/* MSVC with option -fp:strict refuses to compile constant initializers that
contain floating-point operations. Pacify this compiler. */
#ifdef _MSC_VER
# pragma fenv_access (off)
#endif
#undef NAN
#if defined _MSC_VER
static DOUBLE zero;
# define NAN (zero / zero)
#else
# define NAN (L_(0.0) / L_(0.0))
#endif
/* To avoid rounding errors during the computation of x - q * y,
there are three possibilities:
- Use fma (- q, y, x).
- Have q be a single bit at a time, and compute x - q * y
through a subtraction.
- Have q be at most MANT_DIG/2 bits at a time, and compute
x - q * y by splitting y into two halves:
y = y1 * 2^(MANT_DIG/2) + y0
x - q * y = (x - q * y1 * 2^MANT_DIG/2) - q * y0.
The latter is probably the most efficient. */
/* Number of bits in a limb. */
#define LIMB_BITS (MANT_DIG / 2)
/* 2^LIMB_BITS. */
static const DOUBLE TWO_LIMB_BITS =
/* Assume LIMB_BITS <= 3 * 31.
Use the identity
n = floor(n/3) + floor((n+1)/3) + floor((n+2)/3). */
(DOUBLE) (1U << (LIMB_BITS / 3))
* (DOUBLE) (1U << ((LIMB_BITS + 1) / 3))
* (DOUBLE) (1U << ((LIMB_BITS + 2) / 3));
/* 2^-LIMB_BITS. */
static const DOUBLE TWO_LIMB_BITS_INVERSE =
/* Assume LIMB_BITS <= 3 * 31.
Use the identity
n = floor(n/3) + floor((n+1)/3) + floor((n+2)/3). */
L_(1.0) / ((DOUBLE) (1U << (LIMB_BITS / 3))
* (DOUBLE) (1U << ((LIMB_BITS + 1) / 3))
* (DOUBLE) (1U << ((LIMB_BITS + 2) / 3)));
DOUBLE
FMOD (DOUBLE x, DOUBLE y)
{
if (isfinite (x) && isfinite (y) && y != L_(0.0))
{
if (x == L_(0.0))
/* Return x, regardless of the sign of y. */
return x;
{
int negate = ((!signbit (x)) ^ (!signbit (y)));
/* Take the absolute value of x and y. */
x = FABS (x);
y = FABS (y);
/* Trivial case that requires no computation. */
if (x < y)
return (negate ? - x : x);
{
int yexp;
DOUBLE ym;
DOUBLE y1;
DOUBLE y0;
int k;
DOUBLE x2;
DOUBLE x1;
DOUBLE x0;
/* Write y = 2^yexp * (y1 * 2^-LIMB_BITS + y0 * 2^-(2*LIMB_BITS))
where y1 is an integer, 2^(LIMB_BITS-1) <= y1 < 2^LIMB_BITS,
y1 has at most LIMB_BITS bits,
0 <= y0 < 2^LIMB_BITS,
y0 has at most (MANT_DIG + 1) / 2 bits. */
ym = FREXP (y, &yexp);
ym = ym * TWO_LIMB_BITS;
y1 = TRUNC (ym);
y0 = (ym - y1) * TWO_LIMB_BITS;
/* Write
x = 2^(yexp+(k-3)*LIMB_BITS)
* (x2 * 2^(2*LIMB_BITS) + x1 * 2^LIMB_BITS + x0)
where x2, x1, x0 are each integers >= 0, < 2^LIMB_BITS. */
{
int xexp;
DOUBLE xm = FREXP (x, &xexp);
/* Since we know x >= y, we know xexp >= yexp. */
xexp -= yexp;
/* Compute k = ceil(xexp / LIMB_BITS). */
k = (xexp + LIMB_BITS - 1) / LIMB_BITS;
/* Note: (k - 1) * LIMB_BITS + 1 <= xexp <= k * LIMB_BITS. */
/* Note: 0.5 <= xm < 1.0. */
xm = LDEXP (xm, xexp - (k - 1) * LIMB_BITS);
/* Note: Now xm < 2^(xexp - (k - 1) * LIMB_BITS) <= 2^LIMB_BITS
and xm >= 0.5 * 2^(xexp - (k - 1) * LIMB_BITS) >= 1.0
and xm has at most MANT_DIG <= 2*LIMB_BITS+1 bits. */
x2 = TRUNC (xm);
x1 = (xm - x2) * TWO_LIMB_BITS;
/* Split off x0 from x1 later. */
}
/* Test whether [x2,x1,0] >= 2^LIMB_BITS * [y1,y0]. */
if (x2 > y1 || (x2 == y1 && x1 >= y0))
{
/* Subtract 2^LIMB_BITS * [y1,y0] from [x2,x1,0]. */
x2 -= y1;
x1 -= y0;
if (x1 < L_(0.0))
{
if (!(x2 >= L_(1.0)))
abort ();
x2 -= L_(1.0);
x1 += TWO_LIMB_BITS;
}
}
/* Split off x0 from x1. */
{
DOUBLE x1int = TRUNC (x1);
x0 = TRUNC ((x1 - x1int) * TWO_LIMB_BITS);
x1 = x1int;
}
for (; k > 0; k--)
{
/* Multiprecision division of the limb sequence [x2,x1,x0]
by [y1,y0]. */
/* Here [x2,x1,x0] < 2^LIMB_BITS * [y1,y0]. */
/* The first guess takes into account only [x2,x1] and [y1].
By Knuth's theorem, we know that
q* = min (floor ([x2,x1] / [y1]), 2^LIMB_BITS - 1)
and
q = floor ([x2,x1,x0] / [y1,y0])
are not far away:
q* - 2 <= q <= q* + 1.
Proof:
a) q* * y1 <= floor ([x2,x1] / [y1]) * y1 <= [x2,x1].
Hence
[x2,x1,x0] - q* * [y1,y0]
= 2^LIMB_BITS * ([x2,x1] - q* * [y1]) + x0 - q* * y0
>= x0 - q* * y0
>= - q* * y0
> - 2^(2*LIMB_BITS)
>= - 2 * [y1,y0]
So
[x2,x1,x0] > (q* - 2) * [y1,y0].
b) If q* = floor ([x2,x1] / [y1]), then
[x2,x1] < (q* + 1) * y1
Hence
[x2,x1,x0] - q* * [y1,y0]
= 2^LIMB_BITS * ([x2,x1] - q* * [y1]) + x0 - q* * y0
<= 2^LIMB_BITS * (y1 - 1) + x0 - q* * y0
<= 2^LIMB_BITS * (2^LIMB_BITS-2) + (2^LIMB_BITS-1) - 0
< 2^(2*LIMB_BITS)
<= 2 * [y1,y0]
So
[x2,x1,x0] < (q* + 2) * [y1,y0].
and so
q < q* + 2
which implies
q <= q* + 1.
In the other case, q* = 2^LIMB_BITS - 1. Then trivially
q < 2^LIMB_BITS = q* + 1.
We know that floor ([x2,x1] / [y1]) >= 2^LIMB_BITS if and
only if x2 >= y1. */
DOUBLE q =
(x2 >= y1
? TWO_LIMB_BITS - L_(1.0)
: TRUNC ((x2 * TWO_LIMB_BITS + x1) / y1));
if (q > L_(0.0))
{
/* Compute
[x2,x1,x0] - q* * [y1,y0]
= 2^LIMB_BITS * ([x2,x1] - q* * [y1]) + x0 - q* * y0. */
DOUBLE q_y1 = q * y1; /* exact, at most 2*LIMB_BITS bits */
DOUBLE q_y1_1 = TRUNC (q_y1 * TWO_LIMB_BITS_INVERSE);
DOUBLE q_y1_0 = q_y1 - q_y1_1 * TWO_LIMB_BITS;
DOUBLE q_y0 = q * y0; /* exact, at most MANT_DIG bits */
DOUBLE q_y0_1 = TRUNC (q_y0 * TWO_LIMB_BITS_INVERSE);
DOUBLE q_y0_0 = q_y0 - q_y0_1 * TWO_LIMB_BITS;
x2 -= q_y1_1;
x1 -= q_y1_0;
x1 -= q_y0_1;
x0 -= q_y0_0;
/* Move negative carry from x0 to x1 and from x1 to x2. */
if (x0 < L_(0.0))
{
x0 += TWO_LIMB_BITS;
x1 -= L_(1.0);
}
if (x1 < L_(0.0))
{
x1 += TWO_LIMB_BITS;
x2 -= L_(1.0);
if (x1 < L_(0.0)) /* not sure this can happen */
{
x1 += TWO_LIMB_BITS;
x2 -= L_(1.0);
}
}
if (x2 < L_(0.0))
{
/* Reduce q by 1. */
x1 += y1;
x0 += y0;
/* Move overflow from x0 to x1 and from x1 to x0. */
if (x0 >= TWO_LIMB_BITS)
{
x0 -= TWO_LIMB_BITS;
x1 += L_(1.0);
}
if (x1 >= TWO_LIMB_BITS)
{
x1 -= TWO_LIMB_BITS;
x2 += L_(1.0);
}
if (x2 < L_(0.0))
{
/* Reduce q by 1 again. */
x1 += y1;
x0 += y0;
/* Move overflow from x0 to x1 and from x1 to x0. */
if (x0 >= TWO_LIMB_BITS)
{
x0 -= TWO_LIMB_BITS;
x1 += L_(1.0);
}
if (x1 >= TWO_LIMB_BITS)
{
x1 -= TWO_LIMB_BITS;
x2 += L_(1.0);
}
if (x2 < L_(0.0))
/* Shouldn't happen, because we proved that
q >= q* - 2. */
abort ();
}
}
}
if (x2 > L_(0.0)
|| x1 > y1
|| (x1 == y1 && x0 >= y0))
{
/* Increase q by 1. */
x1 -= y1;
x0 -= y0;
/* Move negative carry from x0 to x1 and from x1 to x2. */
if (x0 < L_(0.0))
{
x0 += TWO_LIMB_BITS;
x1 -= L_(1.0);
}
if (x1 < L_(0.0))
{
x1 += TWO_LIMB_BITS;
x2 -= L_(1.0);
}
if (x2 < L_(0.0))
abort ();
if (x2 > L_(0.0)
|| x1 > y1
|| (x1 == y1 && x0 >= y0))
/* Shouldn't happen, because we proved that
q <= q* + 1. */
abort ();
}
/* Here [x2,x1,x0] < [y1,y0]. */
/* Next round. */
x2 = x1;
#if (MANT_DIG + 1) / 2 > LIMB_BITS /* y0 can have a fractional bit */
x1 = TRUNC (x0);
x0 = (x0 - x1) * TWO_LIMB_BITS;
#else
x1 = x0;
x0 = L_(0.0);
#endif
/* Here [x2,x1,x0] < 2^LIMB_BITS * [y1,y0]. */
}
/* Here k = 0.
The result is
2^(yexp-3*LIMB_BITS)
* (x2 * 2^(2*LIMB_BITS) + x1 * 2^LIMB_BITS + x0). */
{
DOUBLE r =
LDEXP ((x2 * TWO_LIMB_BITS + x1) * TWO_LIMB_BITS + x0,
yexp - 3 * LIMB_BITS);
return (negate ? - r : r);
}
}
}
}
else
{
if (ISNAN (x) || ISNAN (y))
return x + y; /* NaN */
else if (isinf (y))
return x;
else
/* x infinite or y zero */
return NAN;
}
}
==============================================================================
--- lib/fmodl.c.orig Sun Mar 4 20:46:31 2012
+++ lib/fmodl.c Sun Mar 4 13:28:51 2012
@@ -29,56 +29,7 @@
#else
-long double
-fmodl (long double x, long double y)
-{
- if (isinf (y))
- return x;
- else
- {
- long double q = - truncl (x / y);
- long double r = fmal (q, y, x); /* = x + q * y, computed in one step */
- /* Correct possible rounding errors in the quotient x / y. */
- if (y >= 0)
- {
- if (x >= 0)
- {
- /* Expect 0 <= r < y. */
- if (r < 0)
- q += 1, r = fmal (q, y, x);
- else if (r >= y)
- q -= 1, r = fmal (q, y, x);
- }
- else
- {
- /* Expect - y < r <= 0. */
- if (r > 0)
- q -= 1, r = fmal (q, y, x);
- else if (r <= - y)
- q += 1, r = fmal (q, y, x);
- }
- }
- else
- {
- if (x >= 0)
- {
- /* Expect 0 <= r < - y. */
- if (r < 0)
- q -= 1, r = fmal (q, y, x);
- else if (r >= - y)
- q += 1, r = fmal (q, y, x);
- }
- else
- {
- /* Expect y < r <= 0. */
- if (r > 0)
- q += 1, r = fmal (q, y, x);
- else if (r <= y)
- q -= 1, r = fmal (q, y, x);
- }
- }
- return r;
- }
-}
+# define USE_LONG_DOUBLE
+# include "fmod.c"
#endif
--- m4/fmod.m4.orig Sun Mar 4 20:46:31 2012
+++ m4/fmod.m4 Sun Mar 4 18:22:03 2012
@@ -1,4 +1,4 @@
-# fmod.m4 serial 2
+# fmod.m4 serial 3
dnl Copyright (C) 2011-2012 Free Software Foundation, Inc.
dnl This file is free software; the Free Software Foundation
dnl gives unlimited permission to copy and/or distribute it,
@@ -67,18 +67,36 @@
])
if test $REPLACE_FMOD = 1; then
dnl Find libraries needed to link lib/fmod.c.
+ AC_REQUIRE([gl_FUNC_FABS])
+ AC_REQUIRE([gl_FUNC_FREXP])
AC_REQUIRE([gl_FUNC_TRUNC])
- AC_REQUIRE([gl_FUNC_FMA])
+ AC_REQUIRE([gl_FUNC_LDEXP])
+ AC_REQUIRE([gl_FUNC_ISNAND])
FMOD_LIBM=
+ dnl Append $FABS_LIBM to FMOD_LIBM, avoiding gratuitous duplicates.
+ case " $FMOD_LIBM " in
+ *" $FABS_LIBM "*) ;;
+ *) FMOD_LIBM="$FMOD_LIBM $FABS_LIBM" ;;
+ esac
+ dnl Append $FREXP_LIBM to FMOD_LIBM, avoiding gratuitous duplicates.
+ case " $FMOD_LIBM " in
+ *" $FREXP_LIBM "*) ;;
+ *) FMOD_LIBM="$FMOD_LIBM $FREXP_LIBM" ;;
+ esac
dnl Append $TRUNC_LIBM to FMOD_LIBM, avoiding gratuitous duplicates.
case " $FMOD_LIBM " in
*" $TRUNC_LIBM "*) ;;
*) FMOD_LIBM="$FMOD_LIBM $TRUNC_LIBM" ;;
esac
- dnl Append $FMA_LIBM to FMOD_LIBM, avoiding gratuitous duplicates.
+ dnl Append $LDEXP_LIBM to FMOD_LIBM, avoiding gratuitous duplicates.
+ case " $FMOD_LIBM " in
+ *" $LDEXP_LIBM "*) ;;
+ *) FMOD_LIBM="$FMOD_LIBM $LDEXP_LIBM" ;;
+ esac
+ dnl Append $ISNAND_LIBM to FMOD_LIBM, avoiding gratuitous duplicates.
case " $FMOD_LIBM " in
- *" $FMA_LIBM "*) ;;
- *) FMOD_LIBM="$FMOD_LIBM $FMA_LIBM" ;;
+ *" $ISNAND_LIBM "*) ;;
+ *) FMOD_LIBM="$FMOD_LIBM $ISNAND_LIBM" ;;
esac
fi
])
--- m4/fmodl.m4.orig Sun Mar 4 20:46:31 2012
+++ m4/fmodl.m4 Sun Mar 4 18:22:01 2012
@@ -1,4 +1,4 @@
-# fmodl.m4 serial 2
+# fmodl.m4 serial 3
dnl Copyright (C) 2011-2012 Free Software Foundation, Inc.
dnl This file is free software; the Free Software Foundation
dnl gives unlimited permission to copy and/or distribute it,
@@ -82,18 +82,36 @@
if test $HAVE_SAME_LONG_DOUBLE_AS_DOUBLE = 1; then
FMODL_LIBM="$FMOD_LIBM"
else
+ AC_REQUIRE([gl_FUNC_FABSL])
+ AC_REQUIRE([gl_FUNC_FREXPL])
AC_REQUIRE([gl_FUNC_TRUNCL])
- AC_REQUIRE([gl_FUNC_FMAL])
+ AC_REQUIRE([gl_FUNC_LDEXPL])
+ AC_REQUIRE([gl_FUNC_ISNANL])
FMODL_LIBM=
+ dnl Append $FABSL_LIBM to FMODL_LIBM, avoiding gratuitous duplicates.
+ case " $FMODL_LIBM " in
+ *" $FABSL_LIBM "*) ;;
+ *) FMODL_LIBM="$FMODL_LIBM $FABSL_LIBM" ;;
+ esac
+ dnl Append $FREXPL_LIBM to FMODL_LIBM, avoiding gratuitous duplicates.
+ case " $FMODL_LIBM " in
+ *" $FREXPL_LIBM "*) ;;
+ *) FMODL_LIBM="$FMODL_LIBM $FREXPL_LIBM" ;;
+ esac
dnl Append $TRUNCL_LIBM to FMODL_LIBM, avoiding gratuitous duplicates.
case " $FMODL_LIBM " in
*" $TRUNCL_LIBM "*) ;;
*) FMODL_LIBM="$FMODL_LIBM $TRUNCL_LIBM" ;;
esac
- dnl Append $FMAL_LIBM to FMODL_LIBM, avoiding gratuitous duplicates.
+ dnl Append $LDEXPL_LIBM to FMODL_LIBM, avoiding gratuitous duplicates.
+ case " $FMODL_LIBM " in
+ *" $LDEXPL_LIBM "*) ;;
+ *) FMODL_LIBM="$FMODL_LIBM $LDEXPL_LIBM" ;;
+ esac
+ dnl Append $ISNANL_LIBM to FMODL_LIBM, avoiding gratuitous duplicates.
case " $FMODL_LIBM " in
- *" $FMAL_LIBM "*) ;;
- *) FMODL_LIBM="$FMODL_LIBM $FMAL_LIBM" ;;
+ *" $ISNANL_LIBM "*) ;;
+ *) FMODL_LIBM="$FMODL_LIBM $ISNANL_LIBM" ;;
esac
fi
fi
--- modules/fmod.orig Sun Mar 4 20:46:31 2012
+++ modules/fmod Sun Mar 4 18:10:10 2012
@@ -8,9 +8,14 @@
Depends-on:
math
-isinf [test $REPLACE_FMOD = 1]
+isfinite [test $REPLACE_FMOD = 1]
+signbit [test $REPLACE_FMOD = 1]
+fabs [test $REPLACE_FMOD = 1]
+frexp [test $REPLACE_FMOD = 1]
trunc [test $REPLACE_FMOD = 1]
-fma [test $REPLACE_FMOD = 1]
+ldexp [test $REPLACE_FMOD = 1]
+isnand [test $REPLACE_FMOD = 1]
+isinf [test $REPLACE_FMOD = 1]
configure.ac:
gl_FUNC_FMOD
--- modules/fmodl.orig Sun Mar 4 20:46:31 2012
+++ modules/fmodl Sun Mar 4 18:27:21 2012
@@ -9,9 +9,15 @@
Depends-on:
math
fmod [{ test $HAVE_FMODL = 0 || test $REPLACE_FMODL = 1; } && test
$HAVE_SAME_LONG_DOUBLE_AS_DOUBLE = 1]
-isinf [{ test $HAVE_FMODL = 0 || test $REPLACE_FMODL = 1; } && test
$HAVE_SAME_LONG_DOUBLE_AS_DOUBLE = 0]
+float [{ test $HAVE_FMODL = 0 || test $REPLACE_FMODL = 1; } && test
$HAVE_SAME_LONG_DOUBLE_AS_DOUBLE = 0]
+isfinite [{ test $HAVE_FMODL = 0 || test $REPLACE_FMODL = 1; } && test
$HAVE_SAME_LONG_DOUBLE_AS_DOUBLE = 0]
+signbit [{ test $HAVE_FMODL = 0 || test $REPLACE_FMODL = 1; } && test
$HAVE_SAME_LONG_DOUBLE_AS_DOUBLE = 0]
+fabsl [{ test $HAVE_FMODL = 0 || test $REPLACE_FMODL = 1; } && test
$HAVE_SAME_LONG_DOUBLE_AS_DOUBLE = 0]
+frexpl [{ test $HAVE_FMODL = 0 || test $REPLACE_FMODL = 1; } && test
$HAVE_SAME_LONG_DOUBLE_AS_DOUBLE = 0]
truncl [{ test $HAVE_FMODL = 0 || test $REPLACE_FMODL = 1; } && test
$HAVE_SAME_LONG_DOUBLE_AS_DOUBLE = 0]
-fmal [{ test $HAVE_FMODL = 0 || test $REPLACE_FMODL = 1; } && test
$HAVE_SAME_LONG_DOUBLE_AS_DOUBLE = 0]
+ldexpl [{ test $HAVE_FMODL = 0 || test $REPLACE_FMODL = 1; } && test
$HAVE_SAME_LONG_DOUBLE_AS_DOUBLE = 0]
+isnanl [{ test $HAVE_FMODL = 0 || test $REPLACE_FMODL = 1; } && test
$HAVE_SAME_LONG_DOUBLE_AS_DOUBLE = 0]
+isinf [{ test $HAVE_FMODL = 0 || test $REPLACE_FMODL = 1; } && test
$HAVE_SAME_LONG_DOUBLE_AS_DOUBLE = 0]
configure.ac:
gl_FUNC_FMODL
- fmod, fmodl: rewrite,
Bruno Haible <=