new module fma

bug-gnulib
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
new module fma

From:	Bruno Haible
Subject:	new module fma
Date:	Sun, 6 Nov 2011 02:39:10 +0100
User-agent:	KMail/1.13.6 (Linux/2.6.37.6-0.5-desktop; KDE/4.6.0; x86_64; ; )
The function fma() [1] - shorthand for "fused multiply-add" [2] - computes

  fma(x,y,z) = x * y + z

without intermediate rounding of the product. Its applications include:

  - computation of all the bits (not only the high bits but also the low
    bits) of a product,
  - evaluation of polynomials and transcendental functions with higher
    accuracy,
  - computation of remainders.

Unfortunately, only AIX, Solaris, Cygwin, and the very newest glibc
implement this function correctly.

I'm adding a portable implementation of it, and likewise for fmaf() and
fmal().

[1] http://pubs.opengroup.org/onlinepubs/9699919799/functions/fma.html
[2] http://en.wikipedia.org/wiki/Multiply–accumulate_operation


2011-11-05  Bruno Haible  <address@hidden>

        New module 'fma'.
        * lib/math.in.h (fma): New declaration.
        * lib/fma.c: New file.
        * m4/fma.m4: New file.
        * m4/fegetround.m4: New file.
        * m4/math_h.m4 (gl_MATH_H): Test whethern fma is declared.
        (gl_MATH_H_DEFAULTS): Initialize GNULIB_FMA, HAVE_FMA, REPLACE_FMA.
        * modules/math (Makefile.am): Substitute GNULIB_FMA, HAVE_FMA,
        REPLACE_FMA.
        * modules/fma: New file.
        * doc/posix-functions/fma.texi: Mention the new module and the various
        bugs.

================================== lib/fma.c ==================================
/* Fused multiply-add.
   Copyright (C) 2007, 2009, 2011 Free Software Foundation, Inc.

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */

/* Written by Bruno Haible <address@hidden>, 2011.  */

#if ! defined USE_LONG_DOUBLE
# include <config.h>
#endif

/* Specification.  */
#include <math.h>

#include <stdbool.h>
#include <stdlib.h>

#if HAVE_FEGETROUND
# include <fenv.h>
#endif

#include "float+.h"
#include "integer_length.h"
#include "verify.h"

#ifdef USE_LONG_DOUBLE
# define FUNC fmal
# define DOUBLE long double
# define FREXP frexpl
# define LDEXP ldexpl
# define MIN_EXP LDBL_MIN_EXP
# define MANT_BIT LDBL_MANT_BIT
# define DECL_ROUNDING DECL_LONG_DOUBLE_ROUNDING
# define BEGIN_ROUNDING() BEGIN_LONG_DOUBLE_ROUNDING ()
# define END_ROUNDING() END_LONG_DOUBLE_ROUNDING ()
# define L_(literal) literal##L
#elif ! defined USE_FLOAT
# define FUNC fma
# define DOUBLE double
# define FREXP frexp
# define LDEXP ldexp
# define MIN_EXP DBL_MIN_EXP
# define MANT_BIT DBL_MANT_BIT
# define DECL_ROUNDING
# define BEGIN_ROUNDING()
# define END_ROUNDING()
# define L_(literal) literal
#else /* defined USE_FLOAT */
# define FUNC fmaf
# define DOUBLE float
# define FREXP frexpf
# define LDEXP ldexpf
# define MIN_EXP FLT_MIN_EXP
# define MANT_BIT FLT_MANT_BIT
# define DECL_ROUNDING
# define BEGIN_ROUNDING()
# define END_ROUNDING()
# define L_(literal) literal##f
#endif

#undef MAX
#define MAX(a,b) ((a) > (b) ? (a) : (b))

#undef MIN
#define MIN(a,b) ((a) < (b) ? (a) : (b))

/* It is possible to write an implementation of fused multiply-add with
   floating-point operations alone.  See
     Sylvie Boldo, Guillaume Melquiond:
     Emulation of FMA and correctly-rounded sums: proved algorithms using
     rounding to odd.
     <http://www.lri.fr/~melquion/doc/08-tc.pdf>
   But is it complicated.
   Here we take the simpler (and probably slower) approach of doing
   multi-precision arithmetic.  */

/* We use the naming conventions of GNU gmp, but vastly simpler (and slower)
   algorithms.  */

typedef unsigned int mp_limb_t;
#define GMP_LIMB_BITS 32
verify (sizeof (mp_limb_t) * CHAR_BIT == GMP_LIMB_BITS);

typedef unsigned long long mp_twolimb_t;
#define GMP_TWOLIMB_BITS 64
verify (sizeof (mp_twolimb_t) * CHAR_BIT == GMP_TWOLIMB_BITS);

/* Number of limbs needed for a single DOUBLE.  */
#define NLIMBS1 ((MANT_BIT + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS)

/* Number of limbs needed for the accumulator.  */
#define NLIMBS3 (3 * NLIMBS1 + 1)

/* Assuming 0.5 <= x < 1.0:
   Convert the mantissa (x * 2^DBL_MANT_BIT) to a sequence of limbs.  */
static void
decode (DOUBLE x, mp_limb_t limbs[NLIMBS1])
{
  /* I'm not sure whether it's safe to cast a 'double' value between
     2^31 and 2^32 to 'unsigned int', therefore play safe and cast only
     'double' values between 0 and 2^31 (to 'unsigned int' or 'int',
     doesn't matter).
     So, we split the MANT_BIT bits of x into a number of chunks of
     at most 31 bits.  */
  enum { chunk_count = (MANT_BIT - 1) / 31 + 1 };
  /* Variables used for storing the bits limb after limb.  */
  mp_limb_t *p = limbs + NLIMBS1 - 1;
  mp_limb_t accu = 0;
  unsigned int bits_needed = MANT_BIT - (NLIMBS1 - 1) * GMP_LIMB_BITS;
  /* The bits bits_needed-1...0 need to be ORed into the accu.
     1 <= bits_needed <= GMP_LIMB_BITS.  */
  /* Unroll the first 4 loop rounds.  */
  if (chunk_count > 0)
    {
      /* Here we still have MANT_BIT-0*31 bits to extract from x.  */
      enum { chunk_bits = MIN (31, MANT_BIT - 0 * 31) }; /* > 0, <= 31 */
      mp_limb_t d;
      x *= (mp_limb_t) 1 << chunk_bits;
      d = (int) x; /* 0 <= d < 2^chunk_bits.  */
      x -= d;
      if (!(x >= L_(0.0) && x < L_(1.0)))
        abort ();
      if (bits_needed < chunk_bits)
        {
          /* store bits_needed bits */
          accu |= d >> (chunk_bits - bits_needed);
          *p = accu;
          if (p == limbs)
            goto done;
          p--;
          /* hold (chunk_bits - bits_needed) bits */
          accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed));
          bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed);
        }
      else
        {
          /* store chunk_bits bits */
          accu |= d << (bits_needed - chunk_bits);
          bits_needed -= chunk_bits;
          if (bits_needed == 0)
            {
              *p = accu;
              if (p == limbs)
                goto done;
              p--;
              accu = 0;
              bits_needed = GMP_LIMB_BITS;
            }
        }
    }
  if (chunk_count > 1)
    {
      /* Here we still have MANT_BIT-1*31 bits to extract from x.  */
      enum { chunk_bits = MIN (31, MAX (MANT_BIT - 1 * 31, 0)) }; /* > 0, <= 31 
*/
      mp_limb_t d;
      x *= (mp_limb_t) 1 << chunk_bits;
      d = (int) x; /* 0 <= d < 2^chunk_bits.  */
      x -= d;
      if (!(x >= L_(0.0) && x < L_(1.0)))
        abort ();
      if (bits_needed < chunk_bits)
        {
          /* store bits_needed bits */
          accu |= d >> (chunk_bits - bits_needed);
          *p = accu;
          if (p == limbs)
            goto done;
          p--;
          /* hold (chunk_bits - bits_needed) bits */
          accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed));
          bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed);
        }
      else
        {
          /* store chunk_bits bits */
          accu |= d << (bits_needed - chunk_bits);
          bits_needed -= chunk_bits;
          if (bits_needed == 0)
            {
              *p = accu;
              if (p == limbs)
                goto done;
              p--;
              accu = 0;
              bits_needed = GMP_LIMB_BITS;
            }
        }
    }
  if (chunk_count > 2)
    {
      /* Here we still have MANT_BIT-2*31 bits to extract from x.  */
      enum { chunk_bits = MIN (31, MAX (MANT_BIT - 2 * 31, 0)) }; /* > 0, <= 31 
*/
      mp_limb_t d;
      x *= (mp_limb_t) 1 << chunk_bits;
      d = (int) x; /* 0 <= d < 2^chunk_bits.  */
      x -= d;
      if (!(x >= L_(0.0) && x < L_(1.0)))
        abort ();
      if (bits_needed < chunk_bits)
        {
          /* store bits_needed bits */
          accu |= d >> (chunk_bits - bits_needed);
          *p = accu;
          if (p == limbs)
            goto done;
          p--;
          /* hold (chunk_bits - bits_needed) bits */
          accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed));
          bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed);
        }
      else
        {
          /* store chunk_bits bits */
          accu |= d << (bits_needed - chunk_bits);
          bits_needed -= chunk_bits;
          if (bits_needed == 0)
            {
              *p = accu;
              if (p == limbs)
                goto done;
              p--;
              accu = 0;
              bits_needed = GMP_LIMB_BITS;
            }
        }
    }
  if (chunk_count > 3)
    {
      /* Here we still have MANT_BIT-3*31 bits to extract from x.  */
      enum { chunk_bits = MIN (31, MAX (MANT_BIT - 3 * 31, 0)) }; /* > 0, <= 31 
*/
      mp_limb_t d;
      x *= (mp_limb_t) 1 << chunk_bits;
      d = (int) x; /* 0 <= d < 2^chunk_bits.  */
      x -= d;
      if (!(x >= L_(0.0) && x < L_(1.0)))
        abort ();
      if (bits_needed < chunk_bits)
        {
          /* store bits_needed bits */
          accu |= d >> (chunk_bits - bits_needed);
          *p = accu;
          if (p == limbs)
            goto done;
          p--;
          /* hold (chunk_bits - bits_needed) bits */
          accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed));
          bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed);
        }
      else
        {
          /* store chunk_bits bits */
          accu |= d << (bits_needed - chunk_bits);
          bits_needed -= chunk_bits;
          if (bits_needed == 0)
            {
              *p = accu;
              if (p == limbs)
                goto done;
              p--;
              accu = 0;
              bits_needed = GMP_LIMB_BITS;
            }
        }
    }
  if (chunk_count > 4)
    {
      /* Here we still have MANT_BIT-4*31 bits to extract from x.  */
      /* Generic loop.  */
      size_t k;
      for (k = 4; k < chunk_count; k++)
        {
          size_t chunk_bits = MIN (31, MANT_BIT - k * 31); /* > 0, <= 31 */
          mp_limb_t d;
          x *= (mp_limb_t) 1 << chunk_bits;
          d = (int) x; /* 0 <= d < 2^chunk_bits.  */
          x -= d;
          if (!(x >= L_(0.0) && x < L_(1.0)))
            abort ();
          if (bits_needed < chunk_bits)
            {
              /* store bits_needed bits */
              accu |= d >> (chunk_bits - bits_needed);
              *p = accu;
              if (p == limbs)
                goto done;
              p--;
              /* hold (chunk_bits - bits_needed) bits */
              accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed));
              bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed);
            }
          else
            {
              /* store chunk_bits bits */
              accu |= d << (bits_needed - chunk_bits);
              bits_needed -= chunk_bits;
              if (bits_needed == 0)
                {
                  *p = accu;
                  if (p == limbs)
                    goto done;
                  p--;
                  accu = 0;
                  bits_needed = GMP_LIMB_BITS;
                }
            }
        }
    }
  /* We shouldn't get here.  */
  abort ();

 done: ;
#ifndef USE_LONG_DOUBLE /* On FreeBSD 6.1/x86, 'long double' numbers sometimes
                           have excess precision.  */
  if (!(x == L_(0.0)))
    abort ();
#endif
}

/* Multiply two sequences of limbs.  */
static void
multiply (mp_limb_t xlimbs[NLIMBS1], mp_limb_t ylimbs[NLIMBS1],
          mp_limb_t prod_limbs[2 * NLIMBS1])
{
  size_t k, i, j;
  enum { len1 = NLIMBS1 };
  enum { len2 = NLIMBS1 };

  for (k = len2; k > 0; )
    prod_limbs[--k] = 0;
  for (i = 0; i < len1; i++)
    {
      mp_limb_t digit1 = xlimbs[i];
      mp_twolimb_t carry = 0;
      for (j = 0; j < len2; j++)
        {
          mp_limb_t digit2 = ylimbs[j];
          carry += (mp_twolimb_t) digit1 * (mp_twolimb_t) digit2;
          carry += prod_limbs[i + j];
          prod_limbs[i + j] = (mp_limb_t) carry;
          carry = carry >> GMP_LIMB_BITS;
        }
      prod_limbs[i + len2] = (mp_limb_t) carry;
    }
}

DOUBLE
FUNC (DOUBLE x, DOUBLE y, DOUBLE z)
{
  if (isfinite (x) && isfinite (y))
    {
      if (isfinite (z))
        {
          /* x, y, z are all finite.  */
          if (x == L_(0.0) || y == L_(0.0))
            return z;
          if (z == L_(0.0))
            return x * y;
          /* x, y, z are all non-zero.
             The result is x * y + z.  */
          {
            int e;                  /* exponent of x * y + z */
            int sign;
            mp_limb_t sum[NLIMBS3];
            size_t sum_len;

            {
              int xys;                /* sign of x * y */
              int zs;                 /* sign of z */
              int xye;                /* sum of exponents of x and y */
              int ze;                 /* exponent of z */
              mp_limb_t summand1[NLIMBS3];
              size_t summand1_len;
              mp_limb_t summand2[NLIMBS3];
              size_t summand2_len;

              {
                mp_limb_t zlimbs[NLIMBS1];
                mp_limb_t xylimbs[2 * NLIMBS1];

                {
                  DOUBLE xn;              /* normalized part of x */
                  DOUBLE yn;              /* normalized part of y */
                  DOUBLE zn;              /* normalized part of z */
                  int xe;                 /* exponent of x */
                  int ye;                 /* exponent of y */
                  mp_limb_t xlimbs[NLIMBS1];
                  mp_limb_t ylimbs[NLIMBS1];

                  xys = 0;
                  xn = x;
                  if (x < 0)
                    {
                      xys = 1;
                      xn = - x;
                    }
                  yn = y;
                  if (y < 0)
                    {
                      xys = 1 - xys;
                      yn = - y;
                    }

                  zs = 0;
                  zn = z;
                  if (z < 0)
                    {
                      zs = 1;
                      zn = - z;
                    }

                  /* xn, yn, zn are all positive.
                     The result is (-1)^xys * xn * yn + (-1)^zs * zn.  */
                  xn = FREXP (xn, &xe);
                  yn = FREXP (yn, &ye);
                  zn = FREXP (zn, &ze);
                  xye = xe + ye;
                  /* xn, yn, zn are all < 1.0 and >= 0.5.
                     The result is
                       (-1)^xys * 2^xye * xn * yn + (-1)^zs * 2^ze * zn.  */
                  if (xye < ze - MANT_BIT)
                    {
                      /* 2^xye * xn * yn < 2^xye <= 2^(ze-MANT_BIT-1)  */
                      return z;
                    }
                  if (xye - 2 * MANT_BIT > ze)
                    {
                      /* 2^ze * zn < 2^ze <= 2^(xye-2*MANT_BIT-1).
                         We cannot simply do
                           return x * y;
                         because it would round differently: A round-to-even
                         in the multiplication can be a round-up or round-down
                         here, due to z.  So replace z with a value that doesn't
                         require the use of long bignums but that rounds the
                         same way.  */
                      zn = L_(0.5);
                      ze = xye - 2 * MANT_BIT - 1;
                    }
                  /* Convert mantissas of xn, yn, zn to limb sequences:
                     xlimbs = 2^MANT_BIT * xn
                     ylimbs = 2^MANT_BIT * yn
                     zlimbs = 2^MANT_BIT * zn  */
                  decode (xn, xlimbs);
                  decode (yn, ylimbs);
                  decode (zn, zlimbs);
                  /* Multiply the mantissas of xn and yn:
                     xylimbs = xlimbs * ylimbs  */
                  multiply (xlimbs, ylimbs, xylimbs);
                }
                /* The result is
                     (-1)^xys * 2^(xye-2*MANT_BIT) * xylimbs
                     + (-1)^zs * 2^(ze-MANT_BIT) * zlimbs.
                   Compute
                     e = min (xye-2*MANT_BIT, ze-MANT_BIT)
                   then
                     summand1 = 2^(xye-2*MANT_BIT-e) * xylimbs
                     summand2 = 2^(ze-MANT_BIT-e) * zlimbs  */
                e = MIN (xye - 2 * MANT_BIT, ze - MANT_BIT);
                if (e == xye - 2 * MANT_BIT)
                  {
                    /* Simply copy the limbs of xylimbs.  */
                    size_t i;
                    for (i = 0; i < 2 * NLIMBS1; i++)
                      summand1[i] = xylimbs[i];
                    summand1_len = 2 * NLIMBS1;
                  }
                else
                  {
                    size_t ediff = xye - 2 * MANT_BIT - e;
                    /* Left shift the limbs of xylimbs by ediff bits.  */
                    size_t ldiff = ediff / GMP_LIMB_BITS;
                    size_t shift = ediff % GMP_LIMB_BITS;
                    size_t i;
                    for (i = 0; i < ldiff; i++)
                      summand1[i] = 0;
                    if (shift > 0)
                      {
                        mp_limb_t carry = 0;
                        for (i = 0; i < 2 * NLIMBS1; i++)
                          {
                            summand1[ldiff + i] = (xylimbs[i] << shift) | carry;
                            carry = xylimbs[i] >> (GMP_LIMB_BITS - shift);
                          }
                        summand1[ldiff + 2 * NLIMBS1] = carry;
                        summand1_len = ldiff + 2 * NLIMBS1 + 1;
                      }
                    else
                      {
                        for (i = 0; i < 2 * NLIMBS1; i++)
                          summand1[ldiff + i] = xylimbs[i];
                        summand1_len = ldiff + 2 * NLIMBS1;
                      }
                    /* Estimation of needed array size:
                       ediff = (xye - 2 * MANT_BIT) - (ze - MANT_BIT) <= 
MANT_BIT + 1
                       therefore
                       summand1_len
                         = (ediff + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS + 2 * 
NLIMBS1
                         <= (MANT_BIT + GMP_LIMB_BITS) / GMP_LIMB_BITS + 2 * 
NLIMBS1
                         <= 3 * NLIMBS1 + 1
                         = NLIMBS3  */
                    if (!(summand1_len <= NLIMBS3))
                      abort ();
                  }
                if (e == ze - MANT_BIT)
                  {
                    /* Simply copy the limbs of zlimbs.  */
                    size_t i;
                    for (i = 0; i < NLIMBS1; i++)
                      summand2[i] = zlimbs[i];
                    summand2_len = NLIMBS1;
                  }
                else
                  {
                    size_t ediff = ze - MANT_BIT - e;
                    /* Left shift the limbs of zlimbs by ediff bits.  */
                    size_t ldiff = ediff / GMP_LIMB_BITS;
                    size_t shift = ediff % GMP_LIMB_BITS;
                    size_t i;
                    for (i = 0; i < ldiff; i++)
                      summand2[i] = 0;
                    if (shift > 0)
                      {
                        mp_limb_t carry = 0;
                        for (i = 0; i < NLIMBS1; i++)
                          {
                            summand2[ldiff + i] = (zlimbs[i] << shift) | carry;
                            carry = zlimbs[i] >> (GMP_LIMB_BITS - shift);
                          }
                        summand2[ldiff + NLIMBS1] = carry;
                        summand2_len = ldiff + NLIMBS1 + 1;
                      }
                    else
                      {
                        for (i = 0; i < NLIMBS1; i++)
                          summand2[ldiff + i] = zlimbs[i];
                        summand2_len = ldiff + NLIMBS1;
                      }
                    /* Estimation of needed array size:
                       ediff = (ze - MANT_BIT) - (xye - 2 * MANT_BIT) <= 2 * 
MANT_BIT
                       therefore
                       summand2_len
                         = (ediff + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS + NLIMBS1
                         <= (2 * MANT_BIT + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS 
+ NLIMBS1
                         <= 3 * NLIMBS1 + 1
                         = NLIMBS3  */
                    if (!(summand2_len <= NLIMBS3))
                      abort ();
                  }
              }
              /* The result is
                   (-1)^xys * 2^e * summand1 + (-1)^zs * 2^e * summand2.  */
              if (xys == zs)
                {
                  /* Perform an addition.  */
                  size_t i;
                  mp_limb_t carry;

                  sign = xys;
                  carry = 0;
                  for (i = 0; i < MIN (summand1_len, summand2_len); i++)
                    {
                      mp_limb_t digit1 = summand1[i];
                      mp_limb_t digit2 = summand2[i];
                      sum[i] = digit1 + digit2 + carry;
                      carry = (carry
                               ? digit1 >= (mp_limb_t)-1 - digit2
                               : digit1 > (mp_limb_t)-1 - digit2);
                    }
                  if (summand1_len > summand2_len)
                    for (; i < summand1_len; i++)
                      {
                        mp_limb_t digit1 = summand1[i];
                        sum[i] = carry + digit1;
                        carry = carry && digit1 == (mp_limb_t)-1;
                      }
                  else
                    for (; i < summand2_len; i++)
                      {
                        mp_limb_t digit2 = summand2[i];
                        sum[i] = carry + digit2;
                        carry = carry && digit2 == (mp_limb_t)-1;
                      }
                  if (carry)
                    sum[i++] = carry;
                  sum_len = i;
                }
              else
                {
                  /* Perform a subtraction.  */
                  /* Compare summand1 and summand2 by magnitude.  */
                  while (summand1[summand1_len - 1] == 0)
                    summand1_len--;
                  while (summand2[summand2_len - 1] == 0)
                    summand2_len--;
                  if (summand1_len > summand2_len)
                    sign = xys;
                  else if (summand1_len < summand2_len)
                    sign = zs;
                  else
                    {
                      size_t i = summand1_len;
                      for (;;)
                        {
                          --i;
                          if (summand1[i] > summand2[i])
                            {
                              sign = xys;
                              break;
                            }
                          if (summand1[i] < summand2[i])
                            {
                              sign = zs;
                              break;
                            }
                          if (i == 0)
                            /* summand1 and summand2 are equal.  */
                            return L_(0.0);
                        }
                    }
                  if (sign == xys)
                    {
                      /* Compute summand1 - summand2.  */
                      size_t i;
                      mp_limb_t carry;

                      carry = 0;
                      for (i = 0; i < summand2_len; i++)
                        {
                          mp_limb_t digit1 = summand1[i];
                          mp_limb_t digit2 = summand2[i];
                          sum[i] = digit1 - digit2 - carry;
                          carry = (carry ? digit1 <= digit2 : digit1 < digit2);
                        }
                      for (; i < summand1_len; i++)
                        {
                          mp_limb_t digit1 = summand1[i];
                          sum[i] = digit1 - carry;
                          carry = carry && digit1 == 0;
                        }
                      if (carry)
                        abort ();
                      sum_len = summand1_len;
                    }
                  else
                    {
                      /* Compute summand2 - summand1.  */
                      size_t i;
                      mp_limb_t carry;

                      carry = 0;
                      for (i = 0; i < summand1_len; i++)
                        {
                          mp_limb_t digit1 = summand1[i];
                          mp_limb_t digit2 = summand2[i];
                          sum[i] = digit2 - digit1 - carry;
                          carry = (carry ? digit2 <= digit1 : digit2 < digit1);
                        }
                      for (; i < summand2_len; i++)
                        {
                          mp_limb_t digit2 = summand2[i];
                          sum[i] = digit2 - carry;
                          carry = carry && digit2 == 0;
                        }
                      if (carry)
                        abort ();
                      sum_len = summand2_len;
                    }
                }
            }
            /* The result is
                 (-1)^sign * 2^e * sum.  */
            /* Now perform the rounding to MANT_BIT mantissa bits.  */
            while (sum[sum_len - 1] == 0)
              sum_len--;
            /* Here we know that the most significant limb, sum[sum_len - 1], is
               non-zero.  */
            {
              /* How many bits the sum has.  */
              unsigned int sum_bits =
                integer_length (sum[sum_len - 1]) + (sum_len - 1) * 
GMP_LIMB_BITS;
              /* How many bits to keep when rounding.  */
              unsigned int keep_bits;
              /* How many bits to round off.  */
              unsigned int roundoff_bits;
              if (e + (int) sum_bits >= MIN_EXP)
                /* 2^e * sum >= 2^(MIN_EXP-1).
                   result will be a normalized number.  */
                keep_bits = MANT_BIT;
              else if (e + (int) sum_bits >= MIN_EXP - MANT_BIT)
                /* 2^e * sum >= 2^(MIN_EXP-MANT_BIT-1).
                   result will be a denormalized number or rounded to zero.  */
                keep_bits = e + (int) sum_bits - (MIN_EXP + MANT_BIT);
              else
                /* 2^e * sum < 2^(MIN_EXP-MANT_BIT-1).  Round to zero.  */
                return L_(0.0);
              /* Note: 0 <= keep_bits <= MANT_BIT.  */
              if (sum_bits <= keep_bits)
                {
                  /* Nothing to do.  */
                  roundoff_bits = 0;
                  keep_bits = sum_bits;
                }
              else
                {
                  int round_up;
                  roundoff_bits = sum_bits - keep_bits; /* > 0, <= sum_bits */
                  {
#if HAVE_FEGETROUND && defined FE_TOWARDZERO
                    /* Cf. 
<http://pubs.opengroup.org/onlinepubs/9699919799/functions/fegetround.html> */
                    int rounding_mode = fegetround ();
                    if (rounding_mode == FE_TOWARDZERO)
                      round_up = 0;
                    else if (rounding_mode == FE_DOWNWARD)
                      round_up = sign;
                    else if (rounding_mode == FE_UPWARD)
                      round_up = !sign;
#else
                    /* Cf. 
<http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/float.h.html> */
                    int rounding_mode = FLT_ROUNDS;
                    if (rounding_mode == 0) /* toward zero */
                      round_up = 0;
                    else if (rounding_mode == 3) /* toward negative infinity */
                      round_up = sign;
                    else if (rounding_mode == 2) /* toward positive infinity */
                      round_up = !sign;
#endif
                    else
                      {
                        /* Round to nearest.  */
                        round_up = 0;
                        /* Test bit (roundoff_bits-1).  */
                        if ((sum[(roundoff_bits - 1) / GMP_LIMB_BITS]
                             >> ((roundoff_bits - 1) % GMP_LIMB_BITS)) & 1)
                          {
                            /* Test bits roundoff_bits-1 .. 0.  */
                            bool halfway =
                              ((sum[(roundoff_bits - 1) / GMP_LIMB_BITS]
                                & (((mp_limb_t) 1 << ((roundoff_bits - 1) % 
GMP_LIMB_BITS)) - 1))
                               == 0);
                            if (halfway)
                              {
                                int i;
                                for (i = (roundoff_bits - 1) / GMP_LIMB_BITS - 
1; i >= 0; i--)
                                  if (sum[i] != 0)
                                    {
                                      halfway = false;
                                      break;
                                    }
                              }
                            if (halfway)
                              /* Round to even.  Test bit roundoff_bits.  */
                              round_up = ((sum[roundoff_bits / GMP_LIMB_BITS]
                                          >> (roundoff_bits % GMP_LIMB_BITS)) & 
1);
                            else
                              /* Round up.  */
                              round_up = 1;
                          }
                      }
                  }
                  /* Perform the rounding.  */
                  {
                    size_t i = roundoff_bits / GMP_LIMB_BITS;
                    {
                      size_t j = i;
                      while (j > 0)
                        sum[--j] = 0;
                    }
                    if (round_up)
                      {
                        /* Round up.  */
                        sum[i] =
                          (sum[i]
                           | (((mp_limb_t) 1 << (roundoff_bits % 
GMP_LIMB_BITS)) - 1))
                          + 1;
                        if (sum[i] == 0)
                          {
                            /* Propagate carry.  */
                            while (i < sum_len - 1)
                              {
                                ++i;
                                sum[i] += 1;
                                if (sum[i] != 0)
                                  break;
                              }
                          }
                        /* sum[i] is the most significant limb that was
                           incremented.  */
                        if (i == sum_len - 1 && (sum[i] & (sum[i] - 1)) == 0)
                          {
                            /* Through the carry, one more bit is needed.  */
                            if (sum[i] != 0)
                              sum_bits += 1;
                            else
                              {
                                /* Instead of requiring one more limb of memory,
                                   perform a shift by one bit, and adjust the
                                   exponent.  */
                                sum[i] = (mp_limb_t) 1 << (GMP_LIMB_BITS - 1);
                                e += 1;
                              }
                            /* The bit sequence has the form 1000...000.  */
                            keep_bits = 1;
                          }
                      }
                    else
                      {
                        /* Round down.  */
                        sum[i] &= ((mp_limb_t) -1 << (roundoff_bits % 
GMP_LIMB_BITS));
                        if (i == sum_len - 1 && sum[i] == 0)
                          /* The entire sum has become zero.  */
                          return L_(0.0);
                      }
                  }
                }
              /* The result is
                   (-1)^sign * 2^e * sum
                 and here we know that
                   2^(sum_bits-1) <= sum < 2^sum_bits,
                 and sum is a multiple of 2^(sum_bits-keep_bits), where
                   0 < keep_bits <= MANT_BIT  and  keep_bits <= sum_bits.
                 (If keep_bits was initially 0, the rounding either returned 
zero
                 or produced a bit sequence of the form 1000...000, setting
                 keep_bits to 1.)  */
              {
                /* Split the keep_bits bits into chunks of at most 32 bits.  */
                unsigned int chunk_count = (keep_bits - 1) / GMP_LIMB_BITS + 1;
                /* 1 <= chunk_count <= ceil (sum_bits / GMP_LIMB_BITS) = 
sum_len. */
                static const DOUBLE chunk_multiplier = /* 2^-GMP_LIMB_BITS */
                  L_(1.0) / ((DOUBLE) (1 << (GMP_LIMB_BITS / 2))
                             * (DOUBLE) (1 << ((GMP_LIMB_BITS + 1) / 2)));
                unsigned int shift = sum_bits % GMP_LIMB_BITS;
                DOUBLE fsum;
                if (MANT_BIT <= GMP_LIMB_BITS)
                  {
                    /* Since keep_bits <= MANT_BIT <= GMP_LIMB_BITS,
                       chunk_count is 1.  No need for a loop.  */
                    if (shift == 0)
                      fsum = (DOUBLE) sum[sum_len - 1];
                    else
                      fsum = (DOUBLE)
                             ((sum[sum_len - 1] << (GMP_LIMB_BITS - shift))
                              | (sum_len >= 2 ? sum[sum_len - 2] >> shift : 0));
                  }
                else
                  {
                    int k;
                    k = chunk_count - 1;
                    if (shift == 0)
                      {
                        /* First loop round.  */
                        fsum = (DOUBLE) sum[sum_len - k - 1];
                        /* General loop.  */
                        while (--k >= 0)
                          {
                            fsum *= chunk_multiplier;
                            fsum += (DOUBLE) sum[sum_len - k - 1];
                          }
                      }
                    else
                      {
                        /* First loop round.  */
                        fsum = (DOUBLE)
                               ((sum[sum_len - k - 1] << (GMP_LIMB_BITS - 
shift))
                                | (sum_len >= k + 2 ? sum[sum_len - k - 2] >> 
shift : 0));
                        /* General loop.  */
                        while (--k >= 0)
                          {
                            fsum *= chunk_multiplier;
                            fsum += (DOUBLE)
                                    ((sum[sum_len - k - 1] << (GMP_LIMB_BITS - 
shift))
                                     | (sum[sum_len - k - 2] >> shift));
                          }
                      }
                  }
                fsum = LDEXP (fsum, e + (int) sum_bits - GMP_LIMB_BITS);
                return (sign ? - fsum : fsum);
              }
            }
          }
        }
      else
        return z;
    }
  else
    return (x * y) + z;
}
================================== m4/fma.m4 ==================================
# fma.m4 serial 1
dnl Copyright (C) 2011 Free Software Foundation, Inc.
dnl This file is free software; the Free Software Foundation
dnl gives unlimited permission to copy and/or distribute it,
dnl with or without modifications, as long as this notice is preserved.

AC_DEFUN([gl_FUNC_FMA],
[
  AC_REQUIRE([gl_MATH_H_DEFAULTS])

  dnl Determine FMA_LIBM.
  gl_MATHFUNC([fma], [double], [(double, double, double)])
  if test $gl_cv_func_fma_no_libm = yes \
     || test $gl_cv_func_fma_in_libm = yes; then
    gl_FUNC_FMA_WORKS
    case "$gl_cv_func_fma_works" in
      *no) REPLACE_FMA=1 ;;
    esac
  else
    HAVE_FMA=0
  fi
  if test $HAVE_FMA = 0 || test $REPLACE_FMA = 1; then
    dnl Find libraries needed to link lib/fmal.c.
    AC_REQUIRE([gl_FUNC_FREXP])
    AC_REQUIRE([gl_FUNC_LDEXP])
    AC_REQUIRE([gl_FUNC_FEGETROUND])
    FMA_LIBM=
    dnl Append $FREXP_LIBM to FMA_LIBM, avoiding gratuitous duplicates.
    case " $FMA_LIBM " in
      *" $FREXP_LIBM "*) ;;
      *) FMA_LIBM="$FMA_LIBM $FREXP_LIBM" ;;
    esac
    dnl Append $LDEXP_LIBM to FMA_LIBM, avoiding gratuitous duplicates.
    case " $FMA_LIBM " in
      *" $LDEXP_LIBM "*) ;;
      *) FMA_LIBM="$FMA_LIBM $LDEXP_LIBM" ;;
    esac
    dnl Append $FEGETROUND_LIBM to FMA_LIBM, avoiding gratuitous duplicates.
    case " $FMA_LIBM " in
      *" $FEGETROUND_LIBM "*) ;;
      *) FMA_LIBM="$FMA_LIBM $FEGETROUND_LIBM" ;;
    esac
  fi
  AC_SUBST([FMA_LIBM])
])

dnl Test whether fma() has any of the 7 known bugs of glibc 2.11.3 on x86_64.
AC_DEFUN([gl_FUNC_FMA_WORKS],
[
  AC_REQUIRE([AC_PROG_CC])
  AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles
  AC_REQUIRE([gl_FUNC_LDEXP])
  save_LIBS="$LIBS"
  LIBS="$LIBS $FMA_LIBM $LDEXP_LIBM"
  AC_CACHE_CHECK([whether fma works], [gl_cv_func_fma_works],
    [
      AC_RUN_IFELSE(
        [AC_LANG_SOURCE([[
#include <float.h>
#include <math.h>
double p0 = 0.0;
int main()
{
  int failed_tests = 0;
  /* These tests fail with glibc 2.11.3 on x86_64.  */
  {
    volatile double x = 1.5; /* 3 * 2^-1 */
    volatile double y = x;
    volatile double z = ldexp (1.0, DBL_MANT_DIG + 1); /* 2^54 */
    /* x * y + z with infinite precision: 2^54 + 9 * 2^-2.
       Lies between (2^52 + 0) * 2^2 and (2^52 + 1) * 2^2
       and is closer to (2^52 + 1) * 2^2, therefore the rounding
       must round up and produce (2^52 + 1) * 2^2.  */
    volatile double expected = z + 4.0;
    volatile double result = fma (x, y, z);
    if (result != expected)
      failed_tests |= 1;
  }
  {
    volatile double x = 1.25; /* 2^0 + 2^-2 */
    volatile double y = - x;
    volatile double z = ldexp (1.0, DBL_MANT_DIG + 1); /* 2^54 */
    /* x * y + z with infinite precision: 2^54 - 2^0 - 2^-1 - 2^-4.
       Lies between (2^53 - 1) * 2^1 and 2^53 * 2^1
       and is closer to (2^53 - 1) * 2^1, therefore the rounding
       must round down and produce (2^53 - 1) * 2^1.  */
    volatile double expected = (ldexp (1.0, DBL_MANT_DIG) - 1.0) * 2.0;
    volatile double result = fma (x, y, z);
    if (result != expected)
      failed_tests |= 2;
  }
  {
    volatile double x = 1.0 + ldexp (1.0, 1 - DBL_MANT_DIG); /* 2^0 + 2^-52 */
    volatile double y = x;
    volatile double z = 4.0; /* 2^2 */
    /* x * y + z with infinite precision: 2^2 + 2^0 + 2^-51 + 2^-104.
       Lies between (2^52 + 2^50) * 2^-50 and (2^52 + 2^50 + 1) * 2^-50
       and is closer to (2^52 + 2^50 + 1) * 2^-50, therefore the rounding
       must round up and produce (2^52 + 2^50 + 1) * 2^-50.  */
    volatile double expected = 4.0 + 1.0 + ldexp (1.0, 3 - DBL_MANT_DIG);
    volatile double result = fma (x, y, z);
    if (result != expected)
      failed_tests |= 4;
  }
  {
    volatile double x = 1.0 + ldexp (1.0, 1 - DBL_MANT_DIG); /* 2^0 + 2^-52 */
    volatile double y = - x;
    volatile double z = 8.0; /* 2^3 */
    /* x * y + z with infinite precision: 2^2 + 2^1 + 2^0 - 2^-51 - 2^-104.
       Lies between (2^52 + 2^51 + 2^50 - 1) * 2^-50 and
       (2^52 + 2^51 + 2^50) * 2^-50 and is closer to
       (2^52 + 2^51 + 2^50 - 1) * 2^-50, therefore the rounding
       must round down and produce (2^52 + 2^51 + 2^50 - 1) * 2^-50.  */
    volatile double expected = 7.0 - ldexp (1.0, 3 - DBL_MANT_DIG);
    volatile double result = fma (x, y, z);
    if (result != expected)
      failed_tests |= 8;
  }
  {
    volatile double x = 1.25; /* 2^0 + 2^-2 */
    volatile double y = - 0.75; /* - 2^0 + 2^-2 */
    volatile double z = ldexp (1.0, DBL_MANT_DIG); /* 2^53 */
    /* x * y + z with infinite precision: 2^53 - 2^0 + 2^-4.
       Lies between (2^53 - 2^0) and 2^53 and is closer to (2^53 - 2^0),
       therefore the rounding must round down and produce (2^53 - 2^0).  */
    volatile double expected = ldexp (1.0, DBL_MANT_DIG) - 1.0;
    volatile double result = fma (x, y, z);
    if (result != expected)
      failed_tests |= 16;
  }
  if ((DBL_MANT_DIG % 2) == 1)
    {
      volatile double x = 1.0 + ldexp (1.0, - (DBL_MANT_DIG + 1) / 2); /* 2^0 + 
2^-27 */
      volatile double y = 1.0 - ldexp (1.0, - (DBL_MANT_DIG + 1) / 2); /* 2^0 - 
2^-27 */
      volatile double z = - ldexp (1.0, DBL_MIN_EXP - DBL_MANT_DIG); /* - 
2^-1074 */
      /* x * y + z with infinite precision: 2^0 - 2^-54 - 2^-1074.
         Lies between (2^53 - 1) * 2^-53 and 2^53 * 2^-53 and is closer to
         (2^53 - 1) * 2^-53, therefore the rounding must round down and
         produce (2^53 - 1) * 2^-53.  */
      volatile double expected = 1.0 - ldexp (1.0, - DBL_MANT_DIG);
      volatile double result = fma (x, y, z);
      if (result != expected)
        failed_tests |= 32;
    }
  {
    double minus_inf = -1.0 / p0;
    volatile double x = ldexp (1.0, DBL_MAX_EXP - 1);
    volatile double y = ldexp (1.0, DBL_MAX_EXP - 1);
    volatile double z = minus_inf;
    volatile double result = fma (x, y, z);
    if (!(result == minus_inf))
      failed_tests |= 64;
  }
  return failed_tests;
}]])],
        [gl_cv_func_fma_works=yes],
        [gl_cv_func_fma_works=no],
        [dnl Guess no, even on glibc systems.
         gl_cv_func_fma_works="guessing no"
        ])
    ])
  LIBS="$save_LIBS"
])

# Prerequisites of lib/fma.c.
AC_DEFUN([gl_PREREQ_FMA], [:])
============================== m4/fegetround.m4 ===============================
# fegetround.m4 serial 1
dnl Copyright (C) 2011 Free Software Foundation, Inc.
dnl This file is free software; the Free Software Foundation
dnl gives unlimited permission to copy and/or distribute it,
dnl with or without modifications, as long as this notice is preserved.

AC_DEFUN([gl_FUNC_FEGETROUND],
[
  dnl Determine FEGETROUND_LIBM.
  gl_MATHFUNC([fegetround], [int], [(void)], [#include <fenv.h>])
  if test $gl_cv_func_fegetround_no_libm = no \
     && test $gl_cv_func_fegetround_in_libm = no; then
    HAVE_FEGETROUND=0
  else
    HAVE_FEGETROUND=1
    AC_DEFINE([HAVE_FEGETROUND], [1],
      [Define to 1 if you have the 'fegetround' function.])
  fi
  AC_SUBST([FEGETROUND_LIBM])
])
================================= modules/fma =================================
Description:
fma() function: fused multiply-add.

Files:
lib/fma.c
lib/float+.h
m4/fma.m4
m4/fegetround.m4
m4/mathfunc.m4

Depends-on:
math
float           [test $HAVE_FMA = 0 || test $REPLACE_FMA = 1]
stdbool         [test $HAVE_FMA = 0 || test $REPLACE_FMA = 1]
verify          [test $HAVE_FMA = 0 || test $REPLACE_FMA = 1]
isfinite        [test $HAVE_FMA = 0 || test $REPLACE_FMA = 1]
integer_length  [test $HAVE_FMA = 0 || test $REPLACE_FMA = 1]
frexp           [test $HAVE_FMA = 0 || test $REPLACE_FMA = 1]
ldexp           [test $HAVE_FMA = 0 || test $REPLACE_FMA = 1]

configure.ac:
gl_FUNC_FMA
if test $HAVE_FMA = 0 || test $REPLACE_FMA = 1; then
  AC_LIBOBJ([fma])
  gl_PREREQ_FMA
fi
gl_MATH_MODULE_INDICATOR([fma])

Makefile.am:

Include:
<math.h>

Link:
$(FMA_LIBM)

License:
LGPL

Maintainer:
Bruno Haible
===============================================================================
--- lib/math.in.h
+++ lib/math.in.h
@@ -507,6 +507,30 @@ _GL_WARN_ON_USE (floorl, "floorl is unportable - "
 #endif
 
 
+#if @GNULIB_FMA@
+# if @REPLACE_FMA@
+#  if !(defined __cplusplus && defined GNULIB_NAMESPACE)
+#   undef fma
+#   define fma rpl_fma
+#  endif
+_GL_FUNCDECL_RPL (fma, double, (double x, double y, double z));
+_GL_CXXALIAS_RPL (fma, double, (double x, double y, double z));
+# else
+#  if address@hidden@
+_GL_FUNCDECL_SYS (fma, double, (double x, double y, double z));
+#  endif
+_GL_CXXALIAS_SYS (fma, double, (double x, double y, double z));
+# endif
+_GL_CXXALIASWARN (fma);
+#elif defined GNULIB_POSIXCHECK
+# undef fma
+# if HAVE_RAW_DECL_FMA
+_GL_WARN_ON_USE (fma, "fma is unportable - "
+                 "use gnulib module fma for portability");
+# endif
+#endif
+
+
 #if @GNULIB_FMODF@
 # if address@hidden@
 #  undef fmodf
--- m4/math_h.m4
+++ m4/math_h.m4
@@ -1,4 +1,4 @@
-# math_h.m4 serial 53
+# math_h.m4 serial 54
 dnl Copyright (C) 2007-2011 Free Software Foundation, Inc.
 dnl This file is free software; the Free Software Foundation
 dnl gives unlimited permission to copy and/or distribute it,
@@ -41,7 +41,7 @@ AC_DEFUN([gl_MATH_H],
   gl_WARN_ON_USE_PREPARE([[#include <math.h>]],
     [acosf acosl asinf asinl atanf atanl
      ceilf ceill copysign copysignf copysignl cosf cosl coshf
-     expf expl fabsf floorf floorl fmodf frexpf frexpl
+     expf expl fabsf floorf floorl fma fmodf frexpf frexpl
      ldexpf ldexpl logb logf logl log10f modff powf
      rint rintf rintl round roundf roundl sinf sinl sinhf sqrtf sqrtl
      tanf tanl tanhf trunc truncf truncl])
@@ -80,6 +80,7 @@ AC_DEFUN([gl_MATH_H_DEFAULTS],
   GNULIB_FLOOR=0;     AC_SUBST([GNULIB_FLOOR])
   GNULIB_FLOORF=0;    AC_SUBST([GNULIB_FLOORF])
   GNULIB_FLOORL=0;    AC_SUBST([GNULIB_FLOORL])
+  GNULIB_FMA=0;       AC_SUBST([GNULIB_FMA])
   GNULIB_FMODF=0;     AC_SUBST([GNULIB_FMODF])
   GNULIB_FREXPF=0;    AC_SUBST([GNULIB_FREXPF])
   GNULIB_FREXP=0;     AC_SUBST([GNULIB_FREXP])
@@ -133,6 +134,7 @@ AC_DEFUN([gl_MATH_H_DEFAULTS],
   HAVE_EXPF=1;                 AC_SUBST([HAVE_EXPF])
   HAVE_EXPL=1;                 AC_SUBST([HAVE_EXPL])
   HAVE_FABSF=1;                AC_SUBST([HAVE_FABSF])
+  HAVE_FMA=1;                  AC_SUBST([HAVE_FMA])
   HAVE_FMODF=1;                AC_SUBST([HAVE_FMODF])
   HAVE_FREXPF=1;               AC_SUBST([HAVE_FREXPF])
   HAVE_ISNANF=1;               AC_SUBST([HAVE_ISNANF])
@@ -183,6 +185,7 @@ AC_DEFUN([gl_MATH_H_DEFAULTS],
   REPLACE_FLOOR=0;             AC_SUBST([REPLACE_FLOOR])
   REPLACE_FLOORF=0;            AC_SUBST([REPLACE_FLOORF])
   REPLACE_FLOORL=0;            AC_SUBST([REPLACE_FLOORL])
+  REPLACE_FMA=0;               AC_SUBST([REPLACE_FMA])
   REPLACE_FREXPF=0;            AC_SUBST([REPLACE_FREXPF])
   REPLACE_FREXP=0;             AC_SUBST([REPLACE_FREXP])
   REPLACE_FREXPL=0;            AC_SUBST([REPLACE_FREXPL])
--- modules/math
+++ modules/math
@@ -50,6 +50,7 @@ math.h: math.in.h $(top_builddir)/config.status $(CXXDEFS_H) 
$(ARG_NONNULL_H) $(
              -e 's/@''GNULIB_FLOOR''@/$(GNULIB_FLOOR)/g' \
              -e 's/@''GNULIB_FLOORF''@/$(GNULIB_FLOORF)/g' \
              -e 's/@''GNULIB_FLOORL''@/$(GNULIB_FLOORL)/g' \
+             -e 's/@''GNULIB_FMA''@/$(GNULIB_FMA)/g' \
              -e 's/@''GNULIB_FMODF''@/$(GNULIB_FMODF)/g' \
              -e 's/@''GNULIB_FREXPF''@/$(GNULIB_FREXPF)/g' \
              -e 's/@''GNULIB_FREXP''@/$(GNULIB_FREXP)/g' \
@@ -103,6 +104,7 @@ math.h: math.in.h $(top_builddir)/config.status 
$(CXXDEFS_H) $(ARG_NONNULL_H) $(
              -e 's|@''HAVE_EXPF''@|$(HAVE_EXPF)|g' \
              -e 's|@''HAVE_EXPL''@|$(HAVE_EXPL)|g' \
              -e 's|@''HAVE_FABSF''@|$(HAVE_FABSF)|g' \
+             -e 's|@''HAVE_FMA''@|$(HAVE_FMA)|g' \
              -e 's|@''HAVE_FMODF''@|$(HAVE_FMODF)|g' \
              -e 's|@''HAVE_FREXPF''@|$(HAVE_FREXPF)|g' \
              -e 's|@''HAVE_ISNANF''@|$(HAVE_ISNANF)|g' \
@@ -154,6 +156,7 @@ math.h: math.in.h $(top_builddir)/config.status 
$(CXXDEFS_H) $(ARG_NONNULL_H) $(
              -e 's|@''REPLACE_FLOOR''@|$(REPLACE_FLOOR)|g' \
              -e 's|@''REPLACE_FLOORF''@|$(REPLACE_FLOORF)|g' \
              -e 's|@''REPLACE_FLOORL''@|$(REPLACE_FLOORL)|g' \
+             -e 's|@''REPLACE_FMA''@|$(REPLACE_FMA)|g' \
              -e 's|@''REPLACE_FREXPF''@|$(REPLACE_FREXPF)|g' \
              -e 's|@''REPLACE_FREXP''@|$(REPLACE_FREXP)|g' \
              -e 's|@''REPLACE_FREXPL''@|$(REPLACE_FREXPL)|g' \
--- doc/posix-functions/fma.texi.orig   Sun Nov  6 02:24:28 2011
+++ doc/posix-functions/fma.texi        Sun Nov  6 01:56:46 2011
@@ -4,15 +4,18 @@
 
 POSIX specification:@* 
@url{http://www.opengroup.org/onlinepubs/9699919799/functions/fma.html}
 
-Gnulib module: ---
+Gnulib module: fma
 
 Portability problems fixed by Gnulib:
 @itemize
address@hidden
+This function is missing on some platforms:
+FreeBSD 5.2.1, NetBSD 5.0, OpenBSD 3.8, Minix 3.1.8, AIX 5.1, HP-UX 11, IRIX 
6.5, OSF/1 4.0, Solaris 9, MSVC 9, Interix 3.5.
address@hidden
+This function produces wrong results on some platforms:
+glibc 2.11, MacOS X 10.5, FreeBSD 6.4/x86, OSF/1 5.1, Cygwin 1.5, mingw.
 @end itemize
 
 Portability problems not fixed by Gnulib:
 @itemize
address@hidden
-This function is missing on some platforms:
-FreeBSD 5.2.1, NetBSD 5.0, OpenBSD 3.8, Minix 3.1.8, AIX 5.1, HP-UX 11, IRIX 
6.5, OSF/1 4.0, Solaris 9, MSVC 9, Interix 3.5.
 @end itemize

-- 
In memoriam Bernhard Lichtenberg 
<http://en.wikipedia.org/wiki/Bernhard_Lichtenberg>
[Prev in Thread]
Current Thread
[Next in Thread]
new module fma, Bruno Haible <=
Prev by Date: extend gl_MATHFUNC
Next by Date: Add "java_quoting_style" to quotearg
Previous by thread: extend gl_MATHFUNC
Next by thread: Add "java_quoting_style" to quotearg
Index(es):
- Date
- Thread