[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH 6/8] softfloat: Implement float128_muladd
From: |
David Hildenbrand |
Subject: |
Re: [PATCH 6/8] softfloat: Implement float128_muladd |
Date: |
Thu, 24 Sep 2020 09:56:42 +0200 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Thunderbird/68.11.0 |
[...]
>
> /*----------------------------------------------------------------------------
> | Packs the sign `zSign', the exponent `zExp', and the significand formed
> | by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
> @@ -7205,6 +7253,312 @@ float128 float128_mul(float128 a, float128 b,
> float_status *status)
>
> }
>
I do wonder if a type for Int256 would make sense - instead of manually
passing these arrays.
> +static void shortShift256Left(uint64_t p[4], unsigned count)
> +{
> + int negcount = -count & 63;
That's the same as "64 - count", right? (which I find easier to get)
> +
> + if (count == 0) {
> + return;
> + }
> + g_assert(count < 64);
> + p[0] = (p[0] << count) | (p[1] >> negcount);
> + p[1] = (p[1] << count) | (p[2] >> negcount);
> + p[2] = (p[2] << count) | (p[3] >> negcount);
> + p[3] = (p[3] << count);
> +}
> +
> +static void shift256RightJamming(uint64_t p[4], int count)
> +{
> + uint64_t in = 0;
> +
> + g_assert(count >= 0);
> +
> + count = MIN(count, 256);
> + for (; count >= 64; count -= 64) {
> + in |= p[3];
> + p[3] = p[2];
> + p[2] = p[1];
> + p[1] = p[0];
> + p[0] = 0;
> + }
> +
> + if (count) {
> + int negcount = -count & 63;
dito
> +
> + in |= p[3] << negcount;
> + p[3] = (p[2] << negcount) | (p[3] >> count);
> + p[2] = (p[1] << negcount) | (p[2] >> count);
> + p[1] = (p[0] << negcount) | (p[1] >> count);
> + p[0] = p[0] >> count;
> + }
> + p[3] |= (in != 0);
Took ma a bit longer to understand, but now I know why the function name
has "Jamming" in it :)
[...]
> +
> +float128 float128_muladd(float128 a_f, float128 b_f, float128 c_f,
> + int flags, float_status *status)
> +{
> + bool inf_zero, p_sign, sign_flip;
> + uint64_t p_frac[4];
> + FloatParts128 a, b, c;
> + int p_exp, exp_diff, shift, ab_mask, abc_mask;
> + FloatClass p_cls;
> +
> + float128_unpack(&a, a_f, status);
> + float128_unpack(&b, b_f, status);
> + float128_unpack(&c, c_f, status);
> +
> + ab_mask = float_cmask(a.cls) | float_cmask(b.cls);
> + abc_mask = float_cmask(c.cls) | ab_mask;
> + inf_zero = ab_mask == float_cmask_infzero;
> +
> + /* If any input is a NaN, select the required result. */
> + if (unlikely(abc_mask & float_cmask_anynan)) {
> + if (unlikely(abc_mask & float_cmask_snan)) {
> + float_raise(float_flag_invalid, status);
> + }
> +
> + int which = pickNaNMulAdd(a.cls, b.cls, c.cls, inf_zero, status);
> + if (status->default_nan_mode) {
> + which = 3;
> + }
> + switch (which) {
> + case 0:
> + break;
> + case 1:
> + a_f = b_f;
> + a.cls = b.cls;
> + break;
> + case 2:
> + a_f = c_f;
> + a.cls = c.cls;
> + break;
> + case 3:
> + return float128_default_nan(status);
> + }
> + if (is_snan(a.cls)) {
> + return float128_silence_nan(a_f, status);
> + }
> + return a_f;
> + }
> +
> + /* After dealing with input NaNs, look for Inf * Zero. */
> + if (unlikely(inf_zero)) {
> + float_raise(float_flag_invalid, status);
> + return float128_default_nan(status);
> + }
> +
> + p_sign = a.sign ^ b.sign;
> +
> + if (flags & float_muladd_negate_c) {
> + c.sign ^= 1;
> + }
> + if (flags & float_muladd_negate_product) {
> + p_sign ^= 1;
> + }
> + sign_flip = (flags & float_muladd_negate_result);
> +
> + if (ab_mask & float_cmask_inf) {
> + p_cls = float_class_inf;
> + } else if (ab_mask & float_cmask_zero) {
> + p_cls = float_class_zero;
> + } else {
> + p_cls = float_class_normal;
> + }
> +
> + if (c.cls == float_class_inf) {
> + if (p_cls == float_class_inf && p_sign != c.sign) {
> + /* +Inf + -Inf = NaN */
> + float_raise(float_flag_invalid, status);
> + return float128_default_nan(status);
> + }
> + /* Inf + Inf = Inf of the proper sign; reuse the return below. */
> + p_cls = float_class_inf;
> + p_sign = c.sign;
> + }
> +
> + if (p_cls == float_class_inf) {
> + return packFloat128(p_sign ^ sign_flip, 0x7fff, 0, 0);
> + }
> +
> + if (p_cls == float_class_zero) {
> + if (c.cls == float_class_zero) {
> + if (p_sign != c.sign) {
> + p_sign = status->float_rounding_mode == float_round_down;
> + }
> + return packFloat128(p_sign ^ sign_flip, 0, 0, 0);
> + }
> +
> + if (flags & float_muladd_halve_result) {
> + c.exp -= 1;
> + }
> + return roundAndPackFloat128(c.sign ^ sign_flip,
> + c.exp + 0x3fff - 1,
> + c.frac0, c.frac1, 0, status);
> + }
> +
> + /* a & b should be normals now... */
> + assert(a.cls == float_class_normal && b.cls == float_class_normal);
> +
> + /* Multiply of 2 113-bit numbers produces a 226-bit result. */
> + mul128To256(a.frac0, a.frac1, b.frac0, b.frac1,
> + &p_frac[0], &p_frac[1], &p_frac[2], &p_frac[3]);
> +
> + /* Realign the binary point at bit 48 of p_frac[0]. */
> + shift = clz64(p_frac[0]) - 15;
> + g_assert(shift == 15 || shift == 16);
> + shortShift256Left(p_frac, shift);
> + p_exp = a.exp + b.exp - (shift - 16);
> + exp_diff = p_exp - c.exp;
> +
> + uint64_t c_frac[4] = { c.frac0, c.frac1, 0, 0 };
> +
> + /* Add or subtract C from the intermediate product. */
> + if (c.cls == float_class_zero) {
> + /* Fall through to rounding after addition (with zero). */
> + } else if (p_sign != c.sign) {
> + /* Subtraction */
> + if (exp_diff < 0) {
> + shift256RightJamming(p_frac, -exp_diff);
> + sub256(p_frac, c_frac, p_frac);
> + p_exp = c.exp;
> + p_sign ^= 1;
> + } else if (exp_diff > 0) {
> + shift256RightJamming(c_frac, exp_diff);
> + sub256(p_frac, p_frac, c_frac);
> + } else {
> + /* Low 128 bits of C are known to be zero. */
> + sub128(p_frac[0], p_frac[1], c_frac[0], c_frac[1],
> + &p_frac[0], &p_frac[1]);
> + /*
> + * Since we have normalized to bit 48 of p_frac[0],
> + * a negative result means C > P and we need to invert.
> + */
> + if ((int64_t)p_frac[0] < 0) {
> + neg256(p_frac);
> + p_sign ^= 1;
> + }
> + }
> +
> + /*
> + * Gross normalization of the 256-bit subtraction result.
> + * Fine tuning below shared with addition.
> + */
> + if (p_frac[0] != 0) {
> + /* nothing to do */
> + } else if (p_frac[1] != 0) {
> + p_exp -= 64;
> + p_frac[0] = p_frac[1];
> + p_frac[1] = p_frac[2];
> + p_frac[2] = p_frac[3];
> + p_frac[3] = 0;
> + } else if (p_frac[2] != 0) {
> + p_exp -= 128;
> + p_frac[0] = p_frac[2];
> + p_frac[1] = p_frac[3];
> + p_frac[2] = 0;
> + p_frac[3] = 0;
> + } else if (p_frac[3] != 0) {
> + p_exp -= 192;
> + p_frac[0] = p_frac[3];
> + p_frac[1] = 0;
> + p_frac[2] = 0;
> + p_frac[3] = 0;
> + } else {
> + /* Subtraction was exact: result is zero. */
> + p_sign = status->float_rounding_mode == float_round_down;
> + return packFloat128(p_sign ^ sign_flip, 0, 0, 0);
> + }
> + } else {
> + /* Addition */
> + if (exp_diff <= 0) {
> + shift256RightJamming(p_frac, -exp_diff);
> + /* Low 128 bits of C are known to be zero. */
> + add128(p_frac[0], p_frac[1], c_frac[0], c_frac[1],
> + &p_frac[0], &p_frac[1]);
> + p_exp = c.exp;
> + } else {
> + shift256RightJamming(c_frac, exp_diff);
> + add256(p_frac, c_frac);
> + }
> + }
> +
> + /* Fine normalization of the 256-bit result: p_frac[0] != 0. */
> + shift = clz64(p_frac[0]) - 15;
> + if (shift < 0) {
> + shift256RightJamming(p_frac, -shift);
> + } else if (shift > 0) {
> + shortShift256Left(p_frac, shift);
> + }
> + p_exp -= shift;
> +
> + if (flags & float_muladd_halve_result) {
> + p_exp -= 1;
> + }
> + return roundAndPackFloat128(p_sign ^ sign_flip,
> + p_exp + 0x3fff - 1,
> + p_frac[0], p_frac[1],
> + p_frac[2] | (p_frac[3] != 0),
> + status);
> +}
Wow, that's a beast :)
--
Thanks,
David / dhildenb
- Re: [PATCH 1/8] softfloat: Use mulu64 for mul64To128, (continued)
- [PATCH 2/8] softfloat: Use int128.h for some operations, Richard Henderson, 2020/09/23
- [PATCH 3/8] softfloat: Tidy a * b + inf return, Richard Henderson, 2020/09/23
- [PATCH 4/8] softfloat: Add float_cmask and constants, Richard Henderson, 2020/09/23
- [PATCH 5/8] softfloat: Inline pick_nan_muladd into its caller, Richard Henderson, 2020/09/23
- [PATCH 6/8] softfloat: Implement float128_muladd, Richard Henderson, 2020/09/23
- Re: [PATCH 6/8] softfloat: Implement float128_muladd,
David Hildenbrand <=
- [PATCH 7/8] softfloat: Use x86_64 assembly for {add,sub}{192,256}, Richard Henderson, 2020/09/23
- [PATCH 8/8] softfloat: Use aarch64 assembly for {add,sub}{192,256}, Richard Henderson, 2020/09/23
- Re: [PATCH 0/8] softfloat: Implement float128_muladd, David Hildenbrand, 2020/09/24