--- lib/sha1.c.orig 2011-09-12 15:22:57.104202004 +0200 +++ lib/sha1.c 2011-09-12 16:32:41.714201999 +0200 @@ -29,6 +29,7 @@ #include #include #include +#include #if USE_UNLOCKED_IO # include "unlocked-io.h" @@ -316,112 +317,138 @@ #define rol(x, n) (((x) << (n)) | ((uint32_t) (x) >> (32 - (n)))) -#define M(I) ( tm = x[I&0x0f] ^ x[(I-14)&0x0f] \ - ^ x[(I-8)&0x0f] ^ x[(I-3)&0x0f] \ - , (x[I&0x0f] = rol(tm, 1)) ) +#define X(I) ntohl(words[I]) -#define R(A,B,C,D,E,F,K,M) do { E += rol( A, 5 ) \ +/* + * If you have 32 registers or more, the compiler can (and should) + * try to change the array[] accesses into registers. However, on + * machines with less than ~25 registers, that won't really work, + * and at least gcc will make an unholy mess of it. + * + * So to avoid that mess which just slows things down, we force + * the stores to memory to actually happen (we might be better off + * with a 'W(t)=(val);asm("":"+m" (W(t))' there instead, as + * suggested by Artur Skawina - that will also make gcc unable to + * try to do the silly "optimize away loads" part because it won't + * see what the value will be). + * + * Ben Herrenschmidt reports that on PPC, the C version comes close + * to the optimized asm with this (ie on PPC you don't want that + * 'volatile', since there are lots of registers). + * + * On ARM we get the best code generation by forcing a full memory barrier + * between each SHA_ROUND, otherwise gcc happily get wild with spilling and + * the stack frame size simply explode and performance goes down the drain. + */ + +#if defined(__i386__) || defined(__x86_64__) + #define setX(I, val) (*(volatile unsigned int *)&x[(I)&0x0f] = (val)) +#elif defined(__GNUC__) && defined(__arm__) + #define setX(I, val) do { x[(I)&0x0f] = (val); __asm__("":::"memory"); } while (0) +#else + #define setX(I, val) (x[(I)&0x0f] = (val)) +#endif + +#define M(I) rol( x[I&0x0f] ^ x[(I-14)&0x0f] \ + ^ x[(I-8)&0x0f] ^ x[(I-3)&0x0f], 1) \ + +#define R(A,B,C,D,E,F,K,M,I) do { unsigned int TEMP = M(I); \ + setX(I,TEMP);\ + E += rol( A, 5 ) \ + F( B, C, D ) \ + K \ - + M; \ + + TEMP; \ B = rol( B, 30 ); \ } while(0) while (words < endp) { - uint32_t tm; - int t; - for (t = 0; t < 16; t++) - { - x[t] = SWAP (*words); - words++; - } - - R( a, b, c, d, e, F1, K1, x[ 0] ); - R( e, a, b, c, d, F1, K1, x[ 1] ); - R( d, e, a, b, c, F1, K1, x[ 2] ); - R( c, d, e, a, b, F1, K1, x[ 3] ); - R( b, c, d, e, a, F1, K1, x[ 4] ); - R( a, b, c, d, e, F1, K1, x[ 5] ); - R( e, a, b, c, d, F1, K1, x[ 6] ); - R( d, e, a, b, c, F1, K1, x[ 7] ); - R( c, d, e, a, b, F1, K1, x[ 8] ); - R( b, c, d, e, a, F1, K1, x[ 9] ); - R( a, b, c, d, e, F1, K1, x[10] ); - R( e, a, b, c, d, F1, K1, x[11] ); - R( d, e, a, b, c, F1, K1, x[12] ); - R( c, d, e, a, b, F1, K1, x[13] ); - R( b, c, d, e, a, F1, K1, x[14] ); - R( a, b, c, d, e, F1, K1, x[15] ); - R( e, a, b, c, d, F1, K1, M(16) ); - R( d, e, a, b, c, F1, K1, M(17) ); - R( c, d, e, a, b, F1, K1, M(18) ); - R( b, c, d, e, a, F1, K1, M(19) ); - R( a, b, c, d, e, F2, K2, M(20) ); - R( e, a, b, c, d, F2, K2, M(21) ); - R( d, e, a, b, c, F2, K2, M(22) ); - R( c, d, e, a, b, F2, K2, M(23) ); - R( b, c, d, e, a, F2, K2, M(24) ); - R( a, b, c, d, e, F2, K2, M(25) ); - R( e, a, b, c, d, F2, K2, M(26) ); - R( d, e, a, b, c, F2, K2, M(27) ); - R( c, d, e, a, b, F2, K2, M(28) ); - R( b, c, d, e, a, F2, K2, M(29) ); - R( a, b, c, d, e, F2, K2, M(30) ); - R( e, a, b, c, d, F2, K2, M(31) ); - R( d, e, a, b, c, F2, K2, M(32) ); - R( c, d, e, a, b, F2, K2, M(33) ); - R( b, c, d, e, a, F2, K2, M(34) ); - R( a, b, c, d, e, F2, K2, M(35) ); - R( e, a, b, c, d, F2, K2, M(36) ); - R( d, e, a, b, c, F2, K2, M(37) ); - R( c, d, e, a, b, F2, K2, M(38) ); - R( b, c, d, e, a, F2, K2, M(39) ); - R( a, b, c, d, e, F3, K3, M(40) ); - R( e, a, b, c, d, F3, K3, M(41) ); - R( d, e, a, b, c, F3, K3, M(42) ); - R( c, d, e, a, b, F3, K3, M(43) ); - R( b, c, d, e, a, F3, K3, M(44) ); - R( a, b, c, d, e, F3, K3, M(45) ); - R( e, a, b, c, d, F3, K3, M(46) ); - R( d, e, a, b, c, F3, K3, M(47) ); - R( c, d, e, a, b, F3, K3, M(48) ); - R( b, c, d, e, a, F3, K3, M(49) ); - R( a, b, c, d, e, F3, K3, M(50) ); - R( e, a, b, c, d, F3, K3, M(51) ); - R( d, e, a, b, c, F3, K3, M(52) ); - R( c, d, e, a, b, F3, K3, M(53) ); - R( b, c, d, e, a, F3, K3, M(54) ); - R( a, b, c, d, e, F3, K3, M(55) ); - R( e, a, b, c, d, F3, K3, M(56) ); - R( d, e, a, b, c, F3, K3, M(57) ); - R( c, d, e, a, b, F3, K3, M(58) ); - R( b, c, d, e, a, F3, K3, M(59) ); - R( a, b, c, d, e, F4, K4, M(60) ); - R( e, a, b, c, d, F4, K4, M(61) ); - R( d, e, a, b, c, F4, K4, M(62) ); - R( c, d, e, a, b, F4, K4, M(63) ); - R( b, c, d, e, a, F4, K4, M(64) ); - R( a, b, c, d, e, F4, K4, M(65) ); - R( e, a, b, c, d, F4, K4, M(66) ); - R( d, e, a, b, c, F4, K4, M(67) ); - R( c, d, e, a, b, F4, K4, M(68) ); - R( b, c, d, e, a, F4, K4, M(69) ); - R( a, b, c, d, e, F4, K4, M(70) ); - R( e, a, b, c, d, F4, K4, M(71) ); - R( d, e, a, b, c, F4, K4, M(72) ); - R( c, d, e, a, b, F4, K4, M(73) ); - R( b, c, d, e, a, F4, K4, M(74) ); - R( a, b, c, d, e, F4, K4, M(75) ); - R( e, a, b, c, d, F4, K4, M(76) ); - R( d, e, a, b, c, F4, K4, M(77) ); - R( c, d, e, a, b, F4, K4, M(78) ); - R( b, c, d, e, a, F4, K4, M(79) ); + R( a, b, c, d, e, F1, K1, X, 0); + R( e, a, b, c, d, F1, K1, X, 1); + R( d, e, a, b, c, F1, K1, X, 2); + R( c, d, e, a, b, F1, K1, X, 3); + R( b, c, d, e, a, F1, K1, X, 4); + R( a, b, c, d, e, F1, K1, X, 5); + R( e, a, b, c, d, F1, K1, X, 6); + R( d, e, a, b, c, F1, K1, X, 7); + R( c, d, e, a, b, F1, K1, X, 8); + R( b, c, d, e, a, F1, K1, X, 9); + R( a, b, c, d, e, F1, K1, X, 10); + R( e, a, b, c, d, F1, K1, X, 11); + R( d, e, a, b, c, F1, K1, X, 12); + R( c, d, e, a, b, F1, K1, X, 13); + R( b, c, d, e, a, F1, K1, X, 14); + R( a, b, c, d, e, F1, K1, X, 15); + R( e, a, b, c, d, F1, K1, M, 16); + R( d, e, a, b, c, F1, K1, M, 17); + R( c, d, e, a, b, F1, K1, M, 18); + R( b, c, d, e, a, F1, K1, M, 19); + R( a, b, c, d, e, F2, K2, M, 20); + R( e, a, b, c, d, F2, K2, M, 21); + R( d, e, a, b, c, F2, K2, M, 22); + R( c, d, e, a, b, F2, K2, M, 23); + R( b, c, d, e, a, F2, K2, M, 24); + R( a, b, c, d, e, F2, K2, M, 25); + R( e, a, b, c, d, F2, K2, M, 26); + R( d, e, a, b, c, F2, K2, M, 27); + R( c, d, e, a, b, F2, K2, M, 28); + R( b, c, d, e, a, F2, K2, M, 29); + R( a, b, c, d, e, F2, K2, M, 30); + R( e, a, b, c, d, F2, K2, M, 31); + R( d, e, a, b, c, F2, K2, M, 32); + R( c, d, e, a, b, F2, K2, M, 33); + R( b, c, d, e, a, F2, K2, M, 34); + R( a, b, c, d, e, F2, K2, M, 35); + R( e, a, b, c, d, F2, K2, M, 36); + R( d, e, a, b, c, F2, K2, M, 37); + R( c, d, e, a, b, F2, K2, M, 38); + R( b, c, d, e, a, F2, K2, M, 39); + R( a, b, c, d, e, F3, K3, M, 40); + R( e, a, b, c, d, F3, K3, M, 41); + R( d, e, a, b, c, F3, K3, M, 42); + R( c, d, e, a, b, F3, K3, M, 43); + R( b, c, d, e, a, F3, K3, M, 44); + R( a, b, c, d, e, F3, K3, M, 45); + R( e, a, b, c, d, F3, K3, M, 46); + R( d, e, a, b, c, F3, K3, M, 47); + R( c, d, e, a, b, F3, K3, M, 48); + R( b, c, d, e, a, F3, K3, M, 49); + R( a, b, c, d, e, F3, K3, M, 50); + R( e, a, b, c, d, F3, K3, M, 51); + R( d, e, a, b, c, F3, K3, M, 52); + R( c, d, e, a, b, F3, K3, M, 53); + R( b, c, d, e, a, F3, K3, M, 54); + R( a, b, c, d, e, F3, K3, M, 55); + R( e, a, b, c, d, F3, K3, M, 56); + R( d, e, a, b, c, F3, K3, M, 57); + R( c, d, e, a, b, F3, K3, M, 58); + R( b, c, d, e, a, F3, K3, M, 59); + R( a, b, c, d, e, F4, K4, M, 60); + R( e, a, b, c, d, F4, K4, M, 61); + R( d, e, a, b, c, F4, K4, M, 62); + R( c, d, e, a, b, F4, K4, M, 63); + R( b, c, d, e, a, F4, K4, M, 64); + R( a, b, c, d, e, F4, K4, M, 65); + R( e, a, b, c, d, F4, K4, M, 66); + R( d, e, a, b, c, F4, K4, M, 67); + R( c, d, e, a, b, F4, K4, M, 68); + R( b, c, d, e, a, F4, K4, M, 69); + R( a, b, c, d, e, F4, K4, M, 70); + R( e, a, b, c, d, F4, K4, M, 71); + R( d, e, a, b, c, F4, K4, M, 72); + R( c, d, e, a, b, F4, K4, M, 73); + R( b, c, d, e, a, F4, K4, M, 74); + R( a, b, c, d, e, F4, K4, M, 75); + R( e, a, b, c, d, F4, K4, M, 76); + R( d, e, a, b, c, F4, K4, M, 77); + R( c, d, e, a, b, F4, K4, M, 78); + R( b, c, d, e, a, F4, K4, M, 79); a = ctx->A += a; b = ctx->B += b; c = ctx->C += c; d = ctx->D += d; e = ctx->E += e; + words += 16; } } --- lib/sha256.c.orig 2011-09-09 23:08:04.521357998 +0200 +++ lib/sha256.c 2011-09-12 16:32:42.184202007 +0200 @@ -27,6 +27,7 @@ #include #include #include +#include #if USE_UNLOCKED_IO # include "unlocked-io.h" @@ -468,94 +469,88 @@ #define SS0(x) (rol(x,30)^rol(x,19)^rol(x,10)) #define SS1(x) (rol(x,26)^rol(x,21)^rol(x,7)) -#define M(I) ( tm = S1(x[(I-2)&0x0f]) + x[(I-7)&0x0f] \ - + S0(x[(I-15)&0x0f]) + x[I&0x0f] \ - , x[I&0x0f] = tm ) - -#define R(A,B,C,D,E,F,G,H,K,M) do { t0 = SS0(A) + F2(A,B,C); \ - t1 = H + SS1(E) \ - + F1(E,F,G) \ - + K \ - + M; \ - D += t1; H = t0 + t1; \ - } while(0) - +#define X(I) (ntohl(words[I])) +#define M(I) ( S1(x[(I-2)&0x0f]) + x[(I-7)&0x0f] \ + + S0(x[(I-15)&0x0f]) + x[I&0x0f] \ + ) + +#define R(A,B,C,D,E,F,G,H,M,I) do { \ + uint32_t t1; \ + uint32_t tm = M(I); \ + x[I&0x0f] = tm; \ + t1 = H + SS1(E) \ + + F1(E,F,G) \ + + K(I) \ + + tm; \ + D += t1; \ + H = SS0(A) + F2(A,B,C) + t1; \ + } while(0) while (words < endp) { - uint32_t tm; - uint32_t t0, t1; - int t; - /* FIXME: see sha1.c for a better implementation. */ - for (t = 0; t < 16; t++) - { - x[t] = SWAP (*words); - words++; - } - - R( a, b, c, d, e, f, g, h, K( 0), x[ 0] ); - R( h, a, b, c, d, e, f, g, K( 1), x[ 1] ); - R( g, h, a, b, c, d, e, f, K( 2), x[ 2] ); - R( f, g, h, a, b, c, d, e, K( 3), x[ 3] ); - R( e, f, g, h, a, b, c, d, K( 4), x[ 4] ); - R( d, e, f, g, h, a, b, c, K( 5), x[ 5] ); - R( c, d, e, f, g, h, a, b, K( 6), x[ 6] ); - R( b, c, d, e, f, g, h, a, K( 7), x[ 7] ); - R( a, b, c, d, e, f, g, h, K( 8), x[ 8] ); - R( h, a, b, c, d, e, f, g, K( 9), x[ 9] ); - R( g, h, a, b, c, d, e, f, K(10), x[10] ); - R( f, g, h, a, b, c, d, e, K(11), x[11] ); - R( e, f, g, h, a, b, c, d, K(12), x[12] ); - R( d, e, f, g, h, a, b, c, K(13), x[13] ); - R( c, d, e, f, g, h, a, b, K(14), x[14] ); - R( b, c, d, e, f, g, h, a, K(15), x[15] ); - R( a, b, c, d, e, f, g, h, K(16), M(16) ); - R( h, a, b, c, d, e, f, g, K(17), M(17) ); - R( g, h, a, b, c, d, e, f, K(18), M(18) ); - R( f, g, h, a, b, c, d, e, K(19), M(19) ); - R( e, f, g, h, a, b, c, d, K(20), M(20) ); - R( d, e, f, g, h, a, b, c, K(21), M(21) ); - R( c, d, e, f, g, h, a, b, K(22), M(22) ); - R( b, c, d, e, f, g, h, a, K(23), M(23) ); - R( a, b, c, d, e, f, g, h, K(24), M(24) ); - R( h, a, b, c, d, e, f, g, K(25), M(25) ); - R( g, h, a, b, c, d, e, f, K(26), M(26) ); - R( f, g, h, a, b, c, d, e, K(27), M(27) ); - R( e, f, g, h, a, b, c, d, K(28), M(28) ); - R( d, e, f, g, h, a, b, c, K(29), M(29) ); - R( c, d, e, f, g, h, a, b, K(30), M(30) ); - R( b, c, d, e, f, g, h, a, K(31), M(31) ); - R( a, b, c, d, e, f, g, h, K(32), M(32) ); - R( h, a, b, c, d, e, f, g, K(33), M(33) ); - R( g, h, a, b, c, d, e, f, K(34), M(34) ); - R( f, g, h, a, b, c, d, e, K(35), M(35) ); - R( e, f, g, h, a, b, c, d, K(36), M(36) ); - R( d, e, f, g, h, a, b, c, K(37), M(37) ); - R( c, d, e, f, g, h, a, b, K(38), M(38) ); - R( b, c, d, e, f, g, h, a, K(39), M(39) ); - R( a, b, c, d, e, f, g, h, K(40), M(40) ); - R( h, a, b, c, d, e, f, g, K(41), M(41) ); - R( g, h, a, b, c, d, e, f, K(42), M(42) ); - R( f, g, h, a, b, c, d, e, K(43), M(43) ); - R( e, f, g, h, a, b, c, d, K(44), M(44) ); - R( d, e, f, g, h, a, b, c, K(45), M(45) ); - R( c, d, e, f, g, h, a, b, K(46), M(46) ); - R( b, c, d, e, f, g, h, a, K(47), M(47) ); - R( a, b, c, d, e, f, g, h, K(48), M(48) ); - R( h, a, b, c, d, e, f, g, K(49), M(49) ); - R( g, h, a, b, c, d, e, f, K(50), M(50) ); - R( f, g, h, a, b, c, d, e, K(51), M(51) ); - R( e, f, g, h, a, b, c, d, K(52), M(52) ); - R( d, e, f, g, h, a, b, c, K(53), M(53) ); - R( c, d, e, f, g, h, a, b, K(54), M(54) ); - R( b, c, d, e, f, g, h, a, K(55), M(55) ); - R( a, b, c, d, e, f, g, h, K(56), M(56) ); - R( h, a, b, c, d, e, f, g, K(57), M(57) ); - R( g, h, a, b, c, d, e, f, K(58), M(58) ); - R( f, g, h, a, b, c, d, e, K(59), M(59) ); - R( e, f, g, h, a, b, c, d, K(60), M(60) ); - R( d, e, f, g, h, a, b, c, K(61), M(61) ); - R( c, d, e, f, g, h, a, b, K(62), M(62) ); - R( b, c, d, e, f, g, h, a, K(63), M(63) ); + R( a, b, c, d, e, f, g, h, X, 0 ); + R( h, a, b, c, d, e, f, g, X, 1 ); + R( g, h, a, b, c, d, e, f, X, 2 ); + R( f, g, h, a, b, c, d, e, X, 3 ); + R( e, f, g, h, a, b, c, d, X, 4 ); + R( d, e, f, g, h, a, b, c, X, 5 ); + R( c, d, e, f, g, h, a, b, X, 6 ); + R( b, c, d, e, f, g, h, a, X, 7 ); + R( a, b, c, d, e, f, g, h, X, 8 ); + R( h, a, b, c, d, e, f, g, X, 9 ); + R( g, h, a, b, c, d, e, f, X, 10 ); + R( f, g, h, a, b, c, d, e, X, 11 ); + R( e, f, g, h, a, b, c, d, X, 12 ); + R( d, e, f, g, h, a, b, c, X, 13 ); + R( c, d, e, f, g, h, a, b, X, 14 ); + R( b, c, d, e, f, g, h, a, X, 15 ); + R( a, b, c, d, e, f, g, h, M, 16 ); + R( h, a, b, c, d, e, f, g, M, 17 ); + R( g, h, a, b, c, d, e, f, M, 18 ); + R( f, g, h, a, b, c, d, e, M, 19 ); + R( e, f, g, h, a, b, c, d, M, 20 ); + R( d, e, f, g, h, a, b, c, M, 21 ); + R( c, d, e, f, g, h, a, b, M, 22 ); + R( b, c, d, e, f, g, h, a, M, 23 ); + R( a, b, c, d, e, f, g, h, M, 24 ); + R( h, a, b, c, d, e, f, g, M, 25 ); + R( g, h, a, b, c, d, e, f, M, 26 ); + R( f, g, h, a, b, c, d, e, M, 27 ); + R( e, f, g, h, a, b, c, d, M, 28 ); + R( d, e, f, g, h, a, b, c, M, 29 ); + R( c, d, e, f, g, h, a, b, M, 30 ); + R( b, c, d, e, f, g, h, a, M, 31 ); + R( a, b, c, d, e, f, g, h, M, 32 ); + R( h, a, b, c, d, e, f, g, M, 33 ); + R( g, h, a, b, c, d, e, f, M, 34 ); + R( f, g, h, a, b, c, d, e, M, 35 ); + R( e, f, g, h, a, b, c, d, M, 36 ); + R( d, e, f, g, h, a, b, c, M, 37 ); + R( c, d, e, f, g, h, a, b, M, 38 ); + R( b, c, d, e, f, g, h, a, M, 39 ); + R( a, b, c, d, e, f, g, h, M, 40 ); + R( h, a, b, c, d, e, f, g, M, 41 ); + R( g, h, a, b, c, d, e, f, M, 42 ); + R( f, g, h, a, b, c, d, e, M, 43 ); + R( e, f, g, h, a, b, c, d, M, 44 ); + R( d, e, f, g, h, a, b, c, M, 45 ); + R( c, d, e, f, g, h, a, b, M, 46 ); + R( b, c, d, e, f, g, h, a, M, 47 ); + R( a, b, c, d, e, f, g, h, M, 48 ); + R( h, a, b, c, d, e, f, g, M, 49 ); + R( g, h, a, b, c, d, e, f, M, 50 ); + R( f, g, h, a, b, c, d, e, M, 51 ); + R( e, f, g, h, a, b, c, d, M, 52 ); + R( d, e, f, g, h, a, b, c, M, 53 ); + R( c, d, e, f, g, h, a, b, M, 54 ); + R( b, c, d, e, f, g, h, a, M, 55 ); + R( a, b, c, d, e, f, g, h, M, 56 ); + R( h, a, b, c, d, e, f, g, M, 57 ); + R( g, h, a, b, c, d, e, f, M, 58 ); + R( f, g, h, a, b, c, d, e, M, 59 ); + R( e, f, g, h, a, b, c, d, M, 60 ); + R( d, e, f, g, h, a, b, c, M, 61 ); + R( c, d, e, f, g, h, a, b, M, 62 ); + R( b, c, d, e, f, g, h, a, M, 63 ); a = ctx->state[0] += a; b = ctx->state[1] += b; @@ -565,5 +560,6 @@ f = ctx->state[5] += f; g = ctx->state[6] += g; h = ctx->state[7] += h; + words += 16; } } --- lib/sha512.c.orig 2011-09-06 15:24:17.320209997 +0200 +++ lib/sha512.c 2011-09-12 16:32:42.604202003 +0200 @@ -27,6 +27,7 @@ #include #include #include +#include #if USE_UNLOCKED_IO # include "unlocked-io.h" @@ -383,6 +384,7 @@ #if !_STRING_ARCH_unaligned # define alignof(type) offsetof (struct { char c; type x; }, x) # define UNALIGNED_P(p) (((size_t) p) % alignof (u64) != 0) +#error "unaligned" if (UNALIGNED_P (buffer)) while (len > 128) { @@ -498,19 +500,29 @@ #define SS0(x) u64xor (u64rol (x, 36), u64xor (u64rol (x, 30), u64rol (x, 25))) #define SS1(x) u64xor (u64rol(x, 50), u64xor (u64rol (x, 46), u64rol (x, 23))) -#define M(I) (x[(I) & 15] \ - = u64plus (x[(I) & 15], \ - u64plus (S1 (x[((I) - 2) & 15]), \ - u64plus (x[((I) - 7) & 15], \ - S0 (x[((I) - 15) & 15]))))) +#if __WORDSIZE == 64 +#define setX(I, val) (*(volatile u64 *)&x[(I)&15] = (val)) +// be64toh in define in endian.h +#define X(I) (be64toh(words[I])) +#else +#define setX(I, val) (x[(I)&15] = (val)) +#define X(I) (SWAP(words[I])) +#endif -#define R(A, B, C, D, E, F, G, H, K, M) \ +#define M(I) (u64plus (x[(I) & 15], \ + u64plus (S1 (x[((I) - 2) & 15]), \ + u64plus (x[((I) - 7) & 15], \ + S0 (x[((I) - 15) & 15]))))) +#define R(A, B, C, D, E, F, G, H, M, I) \ do \ { \ + u64 temp = M(I); \ u64 t0 = u64plus (SS0 (A), F2 (A, B, C)); \ u64 t1 = \ u64plus (H, u64plus (SS1 (E), \ - u64plus (F1 (E, F, G), u64plus (K, M)))); \ + u64plus (F1 (E, F, G), \ + u64plus (K(I), temp)))); \ + setX(I, temp); \ D = u64plus (D, t1); \ H = u64plus (t0, t1); \ } \ @@ -518,94 +530,86 @@ while (words < endp) { - int t; - /* FIXME: see sha1.c for a better implementation. */ - for (t = 0; t < 16; t++) - { - x[t] = SWAP (*words); - words++; - } - - R( a, b, c, d, e, f, g, h, K( 0), x[ 0] ); - R( h, a, b, c, d, e, f, g, K( 1), x[ 1] ); - R( g, h, a, b, c, d, e, f, K( 2), x[ 2] ); - R( f, g, h, a, b, c, d, e, K( 3), x[ 3] ); - R( e, f, g, h, a, b, c, d, K( 4), x[ 4] ); - R( d, e, f, g, h, a, b, c, K( 5), x[ 5] ); - R( c, d, e, f, g, h, a, b, K( 6), x[ 6] ); - R( b, c, d, e, f, g, h, a, K( 7), x[ 7] ); - R( a, b, c, d, e, f, g, h, K( 8), x[ 8] ); - R( h, a, b, c, d, e, f, g, K( 9), x[ 9] ); - R( g, h, a, b, c, d, e, f, K(10), x[10] ); - R( f, g, h, a, b, c, d, e, K(11), x[11] ); - R( e, f, g, h, a, b, c, d, K(12), x[12] ); - R( d, e, f, g, h, a, b, c, K(13), x[13] ); - R( c, d, e, f, g, h, a, b, K(14), x[14] ); - R( b, c, d, e, f, g, h, a, K(15), x[15] ); - R( a, b, c, d, e, f, g, h, K(16), M(16) ); - R( h, a, b, c, d, e, f, g, K(17), M(17) ); - R( g, h, a, b, c, d, e, f, K(18), M(18) ); - R( f, g, h, a, b, c, d, e, K(19), M(19) ); - R( e, f, g, h, a, b, c, d, K(20), M(20) ); - R( d, e, f, g, h, a, b, c, K(21), M(21) ); - R( c, d, e, f, g, h, a, b, K(22), M(22) ); - R( b, c, d, e, f, g, h, a, K(23), M(23) ); - R( a, b, c, d, e, f, g, h, K(24), M(24) ); - R( h, a, b, c, d, e, f, g, K(25), M(25) ); - R( g, h, a, b, c, d, e, f, K(26), M(26) ); - R( f, g, h, a, b, c, d, e, K(27), M(27) ); - R( e, f, g, h, a, b, c, d, K(28), M(28) ); - R( d, e, f, g, h, a, b, c, K(29), M(29) ); - R( c, d, e, f, g, h, a, b, K(30), M(30) ); - R( b, c, d, e, f, g, h, a, K(31), M(31) ); - R( a, b, c, d, e, f, g, h, K(32), M(32) ); - R( h, a, b, c, d, e, f, g, K(33), M(33) ); - R( g, h, a, b, c, d, e, f, K(34), M(34) ); - R( f, g, h, a, b, c, d, e, K(35), M(35) ); - R( e, f, g, h, a, b, c, d, K(36), M(36) ); - R( d, e, f, g, h, a, b, c, K(37), M(37) ); - R( c, d, e, f, g, h, a, b, K(38), M(38) ); - R( b, c, d, e, f, g, h, a, K(39), M(39) ); - R( a, b, c, d, e, f, g, h, K(40), M(40) ); - R( h, a, b, c, d, e, f, g, K(41), M(41) ); - R( g, h, a, b, c, d, e, f, K(42), M(42) ); - R( f, g, h, a, b, c, d, e, K(43), M(43) ); - R( e, f, g, h, a, b, c, d, K(44), M(44) ); - R( d, e, f, g, h, a, b, c, K(45), M(45) ); - R( c, d, e, f, g, h, a, b, K(46), M(46) ); - R( b, c, d, e, f, g, h, a, K(47), M(47) ); - R( a, b, c, d, e, f, g, h, K(48), M(48) ); - R( h, a, b, c, d, e, f, g, K(49), M(49) ); - R( g, h, a, b, c, d, e, f, K(50), M(50) ); - R( f, g, h, a, b, c, d, e, K(51), M(51) ); - R( e, f, g, h, a, b, c, d, K(52), M(52) ); - R( d, e, f, g, h, a, b, c, K(53), M(53) ); - R( c, d, e, f, g, h, a, b, K(54), M(54) ); - R( b, c, d, e, f, g, h, a, K(55), M(55) ); - R( a, b, c, d, e, f, g, h, K(56), M(56) ); - R( h, a, b, c, d, e, f, g, K(57), M(57) ); - R( g, h, a, b, c, d, e, f, K(58), M(58) ); - R( f, g, h, a, b, c, d, e, K(59), M(59) ); - R( e, f, g, h, a, b, c, d, K(60), M(60) ); - R( d, e, f, g, h, a, b, c, K(61), M(61) ); - R( c, d, e, f, g, h, a, b, K(62), M(62) ); - R( b, c, d, e, f, g, h, a, K(63), M(63) ); - R( a, b, c, d, e, f, g, h, K(64), M(64) ); - R( h, a, b, c, d, e, f, g, K(65), M(65) ); - R( g, h, a, b, c, d, e, f, K(66), M(66) ); - R( f, g, h, a, b, c, d, e, K(67), M(67) ); - R( e, f, g, h, a, b, c, d, K(68), M(68) ); - R( d, e, f, g, h, a, b, c, K(69), M(69) ); - R( c, d, e, f, g, h, a, b, K(70), M(70) ); - R( b, c, d, e, f, g, h, a, K(71), M(71) ); - R( a, b, c, d, e, f, g, h, K(72), M(72) ); - R( h, a, b, c, d, e, f, g, K(73), M(73) ); - R( g, h, a, b, c, d, e, f, K(74), M(74) ); - R( f, g, h, a, b, c, d, e, K(75), M(75) ); - R( e, f, g, h, a, b, c, d, K(76), M(76) ); - R( d, e, f, g, h, a, b, c, K(77), M(77) ); - R( c, d, e, f, g, h, a, b, K(78), M(78) ); - R( b, c, d, e, f, g, h, a, K(79), M(79) ); + R( a, b, c, d, e, f, g, h, X, 0 ); + R( h, a, b, c, d, e, f, g, X, 1 ); + R( g, h, a, b, c, d, e, f, X, 2 ); + R( f, g, h, a, b, c, d, e, X, 3 ); + R( e, f, g, h, a, b, c, d, X, 4 ); + R( d, e, f, g, h, a, b, c, X, 5 ); + R( c, d, e, f, g, h, a, b, X, 6 ); + R( b, c, d, e, f, g, h, a, X, 7 ); + R( a, b, c, d, e, f, g, h, X, 8 ); + R( h, a, b, c, d, e, f, g, X, 9 ); + R( g, h, a, b, c, d, e, f, X, 10 ); + R( f, g, h, a, b, c, d, e, X, 11 ); + R( e, f, g, h, a, b, c, d, X, 12 ); + R( d, e, f, g, h, a, b, c, X, 13 ); + R( c, d, e, f, g, h, a, b, X, 14 ); + R( b, c, d, e, f, g, h, a, X, 15 ); + R( a, b, c, d, e, f, g, h, M, 16 ); + R( h, a, b, c, d, e, f, g, M, 17 ); + R( g, h, a, b, c, d, e, f, M, 18 ); + R( f, g, h, a, b, c, d, e, M, 19 ); + R( e, f, g, h, a, b, c, d, M, 20 ); + R( d, e, f, g, h, a, b, c, M, 21 ); + R( c, d, e, f, g, h, a, b, M, 22 ); + R( b, c, d, e, f, g, h, a, M, 23 ); + R( a, b, c, d, e, f, g, h, M, 24 ); + R( h, a, b, c, d, e, f, g, M, 25 ); + R( g, h, a, b, c, d, e, f, M, 26 ); + R( f, g, h, a, b, c, d, e, M, 27 ); + R( e, f, g, h, a, b, c, d, M, 28 ); + R( d, e, f, g, h, a, b, c, M, 29 ); + R( c, d, e, f, g, h, a, b, M, 30 ); + R( b, c, d, e, f, g, h, a, M, 31 ); + R( a, b, c, d, e, f, g, h, M, 32 ); + R( h, a, b, c, d, e, f, g, M, 33 ); + R( g, h, a, b, c, d, e, f, M, 34 ); + R( f, g, h, a, b, c, d, e, M, 35 ); + R( e, f, g, h, a, b, c, d, M, 36 ); + R( d, e, f, g, h, a, b, c, M, 37 ); + R( c, d, e, f, g, h, a, b, M, 38 ); + R( b, c, d, e, f, g, h, a, M, 39 ); + R( a, b, c, d, e, f, g, h, M, 40 ); + R( h, a, b, c, d, e, f, g, M, 41 ); + R( g, h, a, b, c, d, e, f, M, 42 ); + R( f, g, h, a, b, c, d, e, M, 43 ); + R( e, f, g, h, a, b, c, d, M, 44 ); + R( d, e, f, g, h, a, b, c, M, 45 ); + R( c, d, e, f, g, h, a, b, M, 46 ); + R( b, c, d, e, f, g, h, a, M, 47 ); + R( a, b, c, d, e, f, g, h, M, 48 ); + R( h, a, b, c, d, e, f, g, M, 49 ); + R( g, h, a, b, c, d, e, f, M, 50 ); + R( f, g, h, a, b, c, d, e, M, 51 ); + R( e, f, g, h, a, b, c, d, M, 52 ); + R( d, e, f, g, h, a, b, c, M, 53 ); + R( c, d, e, f, g, h, a, b, M, 54 ); + R( b, c, d, e, f, g, h, a, M, 55 ); + R( a, b, c, d, e, f, g, h, M, 56 ); + R( h, a, b, c, d, e, f, g, M, 57 ); + R( g, h, a, b, c, d, e, f, M, 58 ); + R( f, g, h, a, b, c, d, e, M, 59 ); + R( e, f, g, h, a, b, c, d, M, 60 ); + R( d, e, f, g, h, a, b, c, M, 61 ); + R( c, d, e, f, g, h, a, b, M, 62 ); + R( b, c, d, e, f, g, h, a, M, 63 ); + R( a, b, c, d, e, f, g, h, M, 64 ); + R( h, a, b, c, d, e, f, g, M, 65 ); + R( g, h, a, b, c, d, e, f, M, 66 ); + R( f, g, h, a, b, c, d, e, M, 67 ); + R( e, f, g, h, a, b, c, d, M, 68 ); + R( d, e, f, g, h, a, b, c, M, 69 ); + R( c, d, e, f, g, h, a, b, M, 70 ); + R( b, c, d, e, f, g, h, a, M, 71 ); + R( a, b, c, d, e, f, g, h, M, 72 ); + R( h, a, b, c, d, e, f, g, M, 73 ); + R( g, h, a, b, c, d, e, f, M, 74 ); + R( f, g, h, a, b, c, d, e, M, 75 ); + R( e, f, g, h, a, b, c, d, M, 76 ); + R( d, e, f, g, h, a, b, c, M, 77 ); + R( c, d, e, f, g, h, a, b, M, 78 ); + R( b, c, d, e, f, g, h, a, M, 79 ); a = ctx->state[0] = u64plus (ctx->state[0], a); b = ctx->state[1] = u64plus (ctx->state[1], b); @@ -615,5 +619,6 @@ f = ctx->state[5] = u64plus (ctx->state[5], f); g = ctx->state[6] = u64plus (ctx->state[6], g); h = ctx->state[7] = u64plus (ctx->state[7], h); + words += 16; } }