From: Jens Steube Date: Sat, 23 Apr 2016 18:52:16 +0000 (+0200) Subject: Fix -m 200 cracking performance X-Git-Tag: v3.00-beta~77 X-Git-Url: https://www.flypig.org.uk/git/?p=hashcat.git;a=commitdiff_plain;h=7ab180c868fe61fa6b90812a176ddafdadb8880f Fix -m 200 cracking performance --- diff --git a/OpenCL/m00200_a3.cl b/OpenCL/m00200_a3.cl index bdcadac..af3a745 100644 --- a/OpenCL/m00200_a3.cl +++ b/OpenCL/m00200_a3.cl @@ -20,6 +20,81 @@ #include "OpenCL/common.c" #include "OpenCL/simd.c" +#define ROUND(v) \ +{ \ + a ^= (((a & 0x3f) + add) * (v)) + (a << 8); \ + b += (b << 8) ^ a; \ + add += v; \ +} + +#define CODE_PRE \ +{ \ + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) \ + { \ + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; \ + \ + const u32x w0 = w0l | w0r; \ + \ + u32x a = MYSQL323_A; \ + u32x b = MYSQL323_B; \ + u32x c = 0; \ + u32x d = 0; \ + \ + u32x add = 7; \ + +#define CODE_LOOP(rest) \ + \ + int i; \ + int j; \ + \ + for (i = 0, j = 1; i <= (int) (rest) - 4; i += 4, j += 1) \ + { \ + const u32 wj = w[j]; \ + \ + ROUND ((wj >> 0) & 0xff); \ + ROUND ((wj >> 8) & 0xff); \ + ROUND ((wj >> 16) & 0xff); \ + ROUND ((wj >> 24) & 0xff); \ + } \ + \ + const u32 wj = w[j]; \ + \ + const u32 left = (rest) - i; \ + \ + if (left == 3) \ + { \ + ROUND ((wj >> 0) & 0xff); \ + ROUND ((wj >> 8) & 0xff); \ + ROUND ((wj >> 16) & 0xff); \ + } \ + else if (left == 2) \ + { \ + ROUND ((wj >> 0) & 0xff); \ + ROUND ((wj >> 8) & 0xff); \ + } \ + else if (left == 1) \ + { \ + ROUND ((wj >> 0) & 0xff); \ + } + +#define CODE_POST_M \ + \ + a &= 0x7fffffff; \ + b &= 0x7fffffff; \ + \ + COMPARE_M_SIMD (a, b, c, d); \ + } \ +} + +#define CODE_POST_S \ + \ + a &= 0x7fffffff; \ + b &= 0x7fffffff; \ + \ + COMPARE_S_SIMD (a, b, c, d); \ + } \ +} + static void m00200m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset) { /** @@ -30,87 +105,151 @@ static void m00200m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k const u32 lid = get_local_id (0); /** - * loop + * digest */ - u32 w0l = w[0]; - - for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + const u32 search[4] = { - const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; - - const u32x w0 = w0l | w0r; - - u32x w_t[16]; - - w_t[ 0] = w0; - w_t[ 1] = w[ 1]; - w_t[ 2] = w[ 2]; - w_t[ 3] = w[ 3]; - w_t[ 4] = w[ 4]; - w_t[ 5] = w[ 5]; - w_t[ 6] = w[ 6]; - w_t[ 7] = w[ 7]; - w_t[ 8] = w[ 8]; - w_t[ 9] = w[ 9]; - w_t[10] = w[10]; - w_t[11] = w[11]; - w_t[12] = w[12]; - w_t[13] = w[13]; - w_t[14] = w[14]; - w_t[15] = w[15]; - - u32x a = MYSQL323_A; - u32x b = MYSQL323_B; - u32x c = 0; - u32x d = 0; - - u32x add = 7; - - #define ROUND(v) \ - { \ - a ^= (((a & 0x3f) + add) * (v)) + (a << 8); \ - b += (b << 8) ^ a; \ - add += v; \ - } - - int i; - int j; - - for (i = 0, j = 0; i <= (int) pw_len - 4; i += 4, j += 1) - { - const u32x wj = w_t[j]; - - ROUND ((wj >> 0) & 0xff); - ROUND ((wj >> 8) & 0xff); - ROUND ((wj >> 16) & 0xff); - ROUND ((wj >> 24) & 0xff); - } - - const u32x wj = w_t[j]; - - const u32 left = pw_len - i; + digests_buf[digests_offset].digest_buf[DGST_R0], + digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[digests_offset].digest_buf[DGST_R2], + digests_buf[digests_offset].digest_buf[DGST_R3] + }; - if (left == 3) - { - ROUND ((wj >> 0) & 0xff); - ROUND ((wj >> 8) & 0xff); - ROUND ((wj >> 16) & 0xff); - } - else if (left == 2) - { - ROUND ((wj >> 0) & 0xff); - ROUND ((wj >> 8) & 0xff); - } - else if (left == 1) - { - ROUND ((wj >> 0) & 0xff); - } + /** + * loop + */ - a &= 0x7fffffff; - b &= 0x7fffffff; + u32 w0l = w[0]; - COMPARE_M_SIMD (a, b, c, d); + switch (pw_len) + { + case 1: + CODE_PRE; + ROUND ((w0 >> 0) & 0xff); + CODE_POST_M; + break; + + case 2: + CODE_PRE; + ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); + CODE_POST_M; + break; + + case 3: + CODE_PRE; + ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); + CODE_POST_M; + break; + + case 4: + CODE_PRE; + ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); + CODE_POST_M; + break; + + case 5: + CODE_PRE; + ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); + ROUND ((w[1] >> 0) & 0xff); + CODE_POST_M; + break; + + case 6: + CODE_PRE; + ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); + ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); + CODE_POST_M; + break; + + case 7: + CODE_PRE; + ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); + ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); ROUND ((w[1] >> 16) & 0xff); + CODE_POST_M; + break; + + case 8: + CODE_PRE; + ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); + ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); ROUND ((w[1] >> 16) & 0xff); ROUND ((w[1] >> 24) & 0xff); + CODE_POST_M; + break; + + case 9: + CODE_PRE; + ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); + ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); ROUND ((w[1] >> 16) & 0xff); ROUND ((w[1] >> 24) & 0xff); + ROUND ((w[2] >> 0) & 0xff); + CODE_POST_M; + break; + + case 10: + CODE_PRE; + ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); + ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); ROUND ((w[1] >> 16) & 0xff); ROUND ((w[1] >> 24) & 0xff); + ROUND ((w[2] >> 0) & 0xff); ROUND ((w[2] >> 8) & 0xff); + CODE_POST_M; + break; + + case 11: + CODE_PRE; + ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); + ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); ROUND ((w[1] >> 16) & 0xff); ROUND ((w[1] >> 24) & 0xff); + ROUND ((w[2] >> 0) & 0xff); ROUND ((w[2] >> 8) & 0xff); ROUND ((w[2] >> 16) & 0xff); + CODE_POST_M; + break; + + case 12: + CODE_PRE; + ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); + ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); ROUND ((w[1] >> 16) & 0xff); ROUND ((w[1] >> 24) & 0xff); + ROUND ((w[2] >> 0) & 0xff); ROUND ((w[2] >> 8) & 0xff); ROUND ((w[2] >> 16) & 0xff); ROUND ((w[2] >> 24) & 0xff); + CODE_POST_M; + break; + + case 13: + CODE_PRE; + ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); + ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); ROUND ((w[1] >> 16) & 0xff); ROUND ((w[1] >> 24) & 0xff); + ROUND ((w[2] >> 0) & 0xff); ROUND ((w[2] >> 8) & 0xff); ROUND ((w[2] >> 16) & 0xff); ROUND ((w[2] >> 24) & 0xff); + ROUND ((w[3] >> 0) & 0xff); + CODE_POST_M; + break; + + case 14: + CODE_PRE; + ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); + ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); ROUND ((w[1] >> 16) & 0xff); ROUND ((w[1] >> 24) & 0xff); + ROUND ((w[2] >> 0) & 0xff); ROUND ((w[2] >> 8) & 0xff); ROUND ((w[2] >> 16) & 0xff); ROUND ((w[2] >> 24) & 0xff); + ROUND ((w[3] >> 0) & 0xff); ROUND ((w[3] >> 8) & 0xff); + CODE_POST_M; + break; + + case 15: + CODE_PRE; + ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); + ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); ROUND ((w[1] >> 16) & 0xff); ROUND ((w[1] >> 24) & 0xff); + ROUND ((w[2] >> 0) & 0xff); ROUND ((w[2] >> 8) & 0xff); ROUND ((w[2] >> 16) & 0xff); ROUND ((w[2] >> 24) & 0xff); + ROUND ((w[3] >> 0) & 0xff); ROUND ((w[3] >> 8) & 0xff); ROUND ((w[3] >> 16) & 0xff); + CODE_POST_M; + break; + + case 16: + CODE_PRE; + ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); + ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); ROUND ((w[1] >> 16) & 0xff); ROUND ((w[1] >> 24) & 0xff); + ROUND ((w[2] >> 0) & 0xff); ROUND ((w[2] >> 8) & 0xff); ROUND ((w[2] >> 16) & 0xff); ROUND ((w[2] >> 24) & 0xff); + ROUND ((w[3] >> 0) & 0xff); ROUND ((w[3] >> 8) & 0xff); ROUND ((w[3] >> 16) & 0xff); ROUND ((w[3] >> 24) & 0xff); + CODE_POST_M; + break; + + default: + CODE_PRE; + ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); + CODE_LOOP (pw_len - 4); + CODE_POST_M; + break; } } @@ -141,82 +280,134 @@ static void m00200s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + switch (pw_len) { - const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; - - const u32x w0 = w0l | w0r; - - u32x w_t[16]; - - w_t[ 0] = w0; - w_t[ 1] = w[ 1]; - w_t[ 2] = w[ 2]; - w_t[ 3] = w[ 3]; - w_t[ 4] = w[ 4]; - w_t[ 5] = w[ 5]; - w_t[ 6] = w[ 6]; - w_t[ 7] = w[ 7]; - w_t[ 8] = w[ 8]; - w_t[ 9] = w[ 9]; - w_t[10] = w[10]; - w_t[11] = w[11]; - w_t[12] = w[12]; - w_t[13] = w[13]; - w_t[14] = w[14]; - w_t[15] = w[15]; - - u32x a = MYSQL323_A; - u32x b = MYSQL323_B; - u32x c = 0; - u32x d = 0; - - u32x add = 7; - - #define ROUND(v) \ - { \ - a ^= (((a & 0x3f) + add) * (v)) + (a << 8); \ - b += (b << 8) ^ a; \ - add += v; \ - } - - int i; - int j; - - for (i = 0, j = 0; i <= (int) pw_len - 4; i += 4, j += 1) - { - const u32x wj = w_t[j]; - - ROUND ((wj >> 0) & 0xff); - ROUND ((wj >> 8) & 0xff); - ROUND ((wj >> 16) & 0xff); - ROUND ((wj >> 24) & 0xff); - } - - const u32x wj = w_t[j]; - - const u32 left = pw_len - i; - - if (left == 3) - { - ROUND ((wj >> 0) & 0xff); - ROUND ((wj >> 8) & 0xff); - ROUND ((wj >> 16) & 0xff); - } - else if (left == 2) - { - ROUND ((wj >> 0) & 0xff); - ROUND ((wj >> 8) & 0xff); - } - else if (left == 1) - { - ROUND ((wj >> 0) & 0xff); - } - - a &= 0x7fffffff; - b &= 0x7fffffff; - - COMPARE_S_SIMD (a, b, c, d); + case 1: + CODE_PRE; + ROUND ((w0 >> 0) & 0xff); + CODE_POST_S; + break; + + case 2: + CODE_PRE; + ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); + CODE_POST_S; + break; + + case 3: + CODE_PRE; + ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); + CODE_POST_S; + break; + + case 4: + CODE_PRE; + ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); + CODE_POST_S; + break; + + case 5: + CODE_PRE; + ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); + ROUND ((w[1] >> 0) & 0xff); + CODE_POST_S; + break; + + case 6: + CODE_PRE; + ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); + ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); + CODE_POST_S; + break; + + case 7: + CODE_PRE; + ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); + ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); ROUND ((w[1] >> 16) & 0xff); + CODE_POST_S; + break; + + case 8: + CODE_PRE; + ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); + ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); ROUND ((w[1] >> 16) & 0xff); ROUND ((w[1] >> 24) & 0xff); + CODE_POST_S; + break; + + case 9: + CODE_PRE; + ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); + ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); ROUND ((w[1] >> 16) & 0xff); ROUND ((w[1] >> 24) & 0xff); + ROUND ((w[2] >> 0) & 0xff); + CODE_POST_S; + break; + + case 10: + CODE_PRE; + ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); + ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); ROUND ((w[1] >> 16) & 0xff); ROUND ((w[1] >> 24) & 0xff); + ROUND ((w[2] >> 0) & 0xff); ROUND ((w[2] >> 8) & 0xff); + CODE_POST_S; + break; + + case 11: + CODE_PRE; + ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); + ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); ROUND ((w[1] >> 16) & 0xff); ROUND ((w[1] >> 24) & 0xff); + ROUND ((w[2] >> 0) & 0xff); ROUND ((w[2] >> 8) & 0xff); ROUND ((w[2] >> 16) & 0xff); + CODE_POST_S; + break; + + case 12: + CODE_PRE; + ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); + ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); ROUND ((w[1] >> 16) & 0xff); ROUND ((w[1] >> 24) & 0xff); + ROUND ((w[2] >> 0) & 0xff); ROUND ((w[2] >> 8) & 0xff); ROUND ((w[2] >> 16) & 0xff); ROUND ((w[2] >> 24) & 0xff); + CODE_POST_S; + break; + + case 13: + CODE_PRE; + ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); + ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); ROUND ((w[1] >> 16) & 0xff); ROUND ((w[1] >> 24) & 0xff); + ROUND ((w[2] >> 0) & 0xff); ROUND ((w[2] >> 8) & 0xff); ROUND ((w[2] >> 16) & 0xff); ROUND ((w[2] >> 24) & 0xff); + ROUND ((w[3] >> 0) & 0xff); + CODE_POST_S; + break; + + case 14: + CODE_PRE; + ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); + ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); ROUND ((w[1] >> 16) & 0xff); ROUND ((w[1] >> 24) & 0xff); + ROUND ((w[2] >> 0) & 0xff); ROUND ((w[2] >> 8) & 0xff); ROUND ((w[2] >> 16) & 0xff); ROUND ((w[2] >> 24) & 0xff); + ROUND ((w[3] >> 0) & 0xff); ROUND ((w[3] >> 8) & 0xff); + CODE_POST_S; + break; + + case 15: + CODE_PRE; + ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); + ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); ROUND ((w[1] >> 16) & 0xff); ROUND ((w[1] >> 24) & 0xff); + ROUND ((w[2] >> 0) & 0xff); ROUND ((w[2] >> 8) & 0xff); ROUND ((w[2] >> 16) & 0xff); ROUND ((w[2] >> 24) & 0xff); + ROUND ((w[3] >> 0) & 0xff); ROUND ((w[3] >> 8) & 0xff); ROUND ((w[3] >> 16) & 0xff); + CODE_POST_S; + break; + + case 16: + CODE_PRE; + ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); + ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); ROUND ((w[1] >> 16) & 0xff); ROUND ((w[1] >> 24) & 0xff); + ROUND ((w[2] >> 0) & 0xff); ROUND ((w[2] >> 8) & 0xff); ROUND ((w[2] >> 16) & 0xff); ROUND ((w[2] >> 24) & 0xff); + ROUND ((w[3] >> 0) & 0xff); ROUND ((w[3] >> 8) & 0xff); ROUND ((w[3] >> 16) & 0xff); ROUND ((w[3] >> 24) & 0xff); + CODE_POST_S; + break; + + default: + CODE_PRE; + ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); + CODE_LOOP (pw_len - 4); + CODE_POST_S; + break; } }