X-Git-Url: https://www.flypig.org.uk/git/?a=blobdiff_plain;f=OpenCL%2Fm11400_a0.cl;h=b774ef2fc6bafb6c5051983357a65a8736e85b44;hb=161a6eb4bc643d8e636e96eda613f5137d30da59;hp=8b43a902bfbda0464d7a708150dad8d88edcddcf;hpb=5c07a412ec247a9e2b32c4d79996c4db5768560c;p=hashcat.git diff --git a/OpenCL/m11400_a0.cl b/OpenCL/m11400_a0.cl index 8b43a90..b774ef2 100644 --- a/OpenCL/m11400_a0.cl +++ b/OpenCL/m11400_a0.cl @@ -1,30 +1,37 @@ /** - * Author......: Jens Steube + * Authors.....: Jens Steube + * magnum + * * License.....: MIT */ #define _MD5_ -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "OpenCL/types_ocl.c" -#include "OpenCL/common.c" -#include "include/rp_kernel.h" -#include "OpenCL/rp.c" - -#define COMPARE_S "OpenCL/check_single_comp4.c" -#define COMPARE_M "OpenCL/check_multi_comp4.c" - -#define uint_to_hex_lower8(i) l_bin2asc[(i)] - -static u32 memcat32 (u32 block0[16], u32 block1[16], const u32 block_len, const u32 append0[4], const u32 append1[4], const u32 append2[4], const u32 append3[4], const u32 append_len) +//incompatible because of brances +//#define NEW_SIMD_CODE + +#include "inc_vendor.cl" +#include "inc_hash_constants.h" +#include "inc_hash_functions.cl" +#include "inc_types.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_simd.cl" + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +u32 memcat32 (u32x block0[16], u32x block1[16], const u32 block_len, const u32x append0[4], const u32x append1[4], const u32x append2[4], const u32x append3[4], const u32 append_len) { const u32 mod = block_len & 3; const u32 div = block_len / 4; @@ -32,35 +39,35 @@ static u32 memcat32 (u32 block0[16], u32 block1[16], const u32 block_len, const #if defined IS_AMD || defined IS_GENERIC const int offset_minus_4 = 4 - mod; - u32 append0_t[4]; + u32x append0_t[4]; append0_t[0] = amd_bytealign (append0[0], 0, offset_minus_4); append0_t[1] = amd_bytealign (append0[1], append0[0], offset_minus_4); append0_t[2] = amd_bytealign (append0[2], append0[1], offset_minus_4); append0_t[3] = amd_bytealign (append0[3], append0[2], offset_minus_4); - u32 append1_t[4]; + u32x append1_t[4]; append1_t[0] = amd_bytealign (append1[0], append0[3], offset_minus_4); append1_t[1] = amd_bytealign (append1[1], append1[0], offset_minus_4); append1_t[2] = amd_bytealign (append1[2], append1[1], offset_minus_4); append1_t[3] = amd_bytealign (append1[3], append1[2], offset_minus_4); - u32 append2_t[4]; + u32x append2_t[4]; append2_t[0] = amd_bytealign (append2[0], append1[3], offset_minus_4); append2_t[1] = amd_bytealign (append2[1], append2[0], offset_minus_4); append2_t[2] = amd_bytealign (append2[2], append2[1], offset_minus_4); append2_t[3] = amd_bytealign (append2[3], append2[2], offset_minus_4); - u32 append3_t[4]; + u32x append3_t[4]; append3_t[0] = amd_bytealign (append3[0], append2[3], offset_minus_4); append3_t[1] = amd_bytealign (append3[1], append3[0], offset_minus_4); append3_t[2] = amd_bytealign (append3[2], append3[1], offset_minus_4); append3_t[3] = amd_bytealign (append3[3], append3[2], offset_minus_4); - u32 append4_t[4]; + u32x append4_t[4]; append4_t[0] = amd_bytealign ( 0, append3[3], offset_minus_4); append4_t[1] = 0; @@ -102,35 +109,35 @@ static u32 memcat32 (u32 block0[16], u32 block1[16], const u32 block_len, const const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - u32 append0_t[4]; + u32x append0_t[4]; append0_t[0] = __byte_perm ( 0, append0[0], selector); append0_t[1] = __byte_perm (append0[0], append0[1], selector); append0_t[2] = __byte_perm (append0[1], append0[2], selector); append0_t[3] = __byte_perm (append0[2], append0[3], selector); - u32 append1_t[4]; + u32x append1_t[4]; append1_t[0] = __byte_perm (append0[3], append1[0], selector); append1_t[1] = __byte_perm (append1[0], append1[1], selector); append1_t[2] = __byte_perm (append1[1], append1[2], selector); append1_t[3] = __byte_perm (append1[2], append1[3], selector); - u32 append2_t[4]; + u32x append2_t[4]; append2_t[0] = __byte_perm (append1[3], append2[0], selector); append2_t[1] = __byte_perm (append2[0], append2[1], selector); append2_t[2] = __byte_perm (append2[1], append2[2], selector); append2_t[3] = __byte_perm (append2[2], append2[3], selector); - u32 append3_t[4]; + u32x append3_t[4]; append3_t[0] = __byte_perm (append2[3], append3[0], selector); append3_t[1] = __byte_perm (append3[0], append3[1], selector); append3_t[2] = __byte_perm (append3[1], append3[2], selector); append3_t[3] = __byte_perm (append3[2], append3[3], selector); - u32 append4_t[4]; + u32x append4_t[4]; append4_t[0] = __byte_perm (append3[3], 0, selector); append4_t[1] = 0; @@ -745,7 +752,7 @@ static u32 memcat32 (u32 block0[16], u32 block1[16], const u32 block_len, const return new_len; } -__kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global sip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global sip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * modifier @@ -779,14 +786,12 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, */ u32 pw_buf0[4]; + u32 pw_buf1[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - - u32 pw_buf1[4]; - pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; pw_buf1[2] = pws[gid].i[ 6]; @@ -801,6 +806,7 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, const u32 salt_len = esalt_bufs[salt_pos].salt_len; // not a bug, we need to get it from the esalt u32 salt_buf0[16]; + u32 salt_buf1[16]; salt_buf0[ 0] = esalt_bufs[salt_pos].salt_buf[ 0]; salt_buf0[ 1] = esalt_bufs[salt_pos].salt_buf[ 1]; @@ -818,9 +824,6 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, salt_buf0[13] = esalt_bufs[salt_pos].salt_buf[13]; salt_buf0[14] = esalt_bufs[salt_pos].salt_buf[14]; salt_buf0[15] = esalt_bufs[salt_pos].salt_buf[15]; - - u32 salt_buf1[16]; - salt_buf1[ 0] = esalt_bufs[salt_pos].salt_buf[16]; salt_buf1[ 1] = esalt_bufs[salt_pos].salt_buf[17]; salt_buf1[ 2] = esalt_bufs[salt_pos].salt_buf[18]; @@ -845,6 +848,8 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, const u32 esalt_len = esalt_bufs[salt_pos].esalt_len; u32 esalt_buf0[16]; + u32 esalt_buf1[16]; + u32 esalt_buf2[16]; esalt_buf0[ 0] = esalt_bufs[salt_pos].esalt_buf[ 0]; esalt_buf0[ 1] = esalt_bufs[salt_pos].esalt_buf[ 1]; @@ -862,9 +867,6 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, esalt_buf0[13] = esalt_bufs[salt_pos].esalt_buf[13]; esalt_buf0[14] = esalt_bufs[salt_pos].esalt_buf[14]; esalt_buf0[15] = esalt_bufs[salt_pos].esalt_buf[15]; - - u32 esalt_buf1[16]; - esalt_buf1[ 0] = esalt_bufs[salt_pos].esalt_buf[16]; esalt_buf1[ 1] = esalt_bufs[salt_pos].esalt_buf[17]; esalt_buf1[ 2] = esalt_bufs[salt_pos].esalt_buf[18]; @@ -881,9 +883,6 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, esalt_buf1[13] = esalt_bufs[salt_pos].esalt_buf[29]; esalt_buf1[14] = esalt_bufs[salt_pos].esalt_buf[30]; esalt_buf1[15] = esalt_bufs[salt_pos].esalt_buf[31]; - - u32 esalt_buf2[16]; - esalt_buf2[ 0] = esalt_bufs[salt_pos].esalt_buf[32]; esalt_buf2[ 1] = esalt_bufs[salt_pos].esalt_buf[33]; esalt_buf2[ 2] = esalt_bufs[salt_pos].esalt_buf[34]; @@ -908,41 +907,18 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, * loop */ - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) { - u32 w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32 w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; - u32 w2[4]; + const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; + append_0x80_2x4_VV (w0, w1, out_len); - u32 w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2x4 (w0, w1, out_len); - - const u32 pw_salt_len = salt_len + out_len; + const u32x pw_salt_len = salt_len + out_len; /* * HA1 = md5 ($salt . $pass) @@ -950,7 +926,8 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, // append the pass to the salt - u32 block0[16]; + u32x block0[16]; + u32x block1[16]; block0[ 0] = salt_buf0[ 0]; block0[ 1] = salt_buf0[ 1]; @@ -968,9 +945,6 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, block0[13] = salt_buf0[13]; block0[14] = salt_buf0[14]; block0[15] = salt_buf0[15]; - - u32 block1[16]; - block1[ 0] = salt_buf1[ 0]; block1[ 1] = salt_buf1[ 1]; block1[ 2] = salt_buf1[ 2]; @@ -992,29 +966,23 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, block_len = memcat32 (block0, block1, salt_len, w0, w1, w2, w3, out_len); - u32 w0_t[4]; + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; w0_t[0] = block0[ 0]; w0_t[1] = block0[ 1]; w0_t[2] = block0[ 2]; w0_t[3] = block0[ 3]; - - u32 w1_t[4]; - w1_t[0] = block0[ 4]; w1_t[1] = block0[ 5]; w1_t[2] = block0[ 6]; w1_t[3] = block0[ 7]; - - u32 w2_t[4]; - w2_t[0] = block0[ 8]; w2_t[1] = block0[ 9]; w2_t[2] = block0[10]; w2_t[3] = block0[11]; - - u32 w3_t[4]; - w3_t[0] = block0[12]; w3_t[1] = block0[13]; w3_t[2] = block0[14]; @@ -1027,12 +995,10 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, // md5 - u32 tmp2; - - u32 a = MD5M_A; - u32 b = MD5M_B; - u32 c = MD5M_C; - u32 d = MD5M_D; + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -1068,22 +1034,22 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); + MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); + MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); + MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); + MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); + MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); + MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); + MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); + MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); + MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); + MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); + MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); @@ -1109,26 +1075,23 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, if (block_len > 55) { - u32 r_a = a; - u32 r_b = b; - u32 r_c = c; - u32 r_d = d; + u32x r_a = a; + u32x r_b = b; + u32x r_c = c; + u32x r_d = d; w0_t[0] = block1[ 0]; w0_t[1] = block1[ 1]; w0_t[2] = block1[ 2]; w0_t[3] = block1[ 3]; - w1_t[0] = block1[ 4]; w1_t[1] = block1[ 5]; w1_t[2] = block1[ 6]; w1_t[3] = block1[ 7]; - w2_t[0] = block1[ 8]; w2_t[1] = block1[ 9]; w2_t[2] = block1[10]; w2_t[3] = block1[11]; - w3_t[0] = block1[12]; w3_t[1] = block1[13]; w3_t[2] = pw_salt_len * 8; @@ -1168,22 +1131,22 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); + MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); + MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); + MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); + MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); + MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); + MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); + MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); + MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); + MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); + MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); + MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); @@ -1229,12 +1192,10 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, | uint_to_hex_lower8 ((d >> 8) & 255) << 16; w1_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - w2_t[0] = esalt_buf0[0]; w2_t[1] = esalt_buf0[1]; w2_t[2] = esalt_buf0[2]; w2_t[3] = esalt_buf0[3]; - w3_t[0] = esalt_buf0[4]; w3_t[1] = esalt_buf0[5]; w3_t[2] = esalt_buf0[6]; @@ -1282,22 +1243,22 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); + MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); + MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); + MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); + MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); + MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); + MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); + MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); + MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); + MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); + MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); + MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); @@ -1321,10 +1282,10 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, c += MD5M_C; d += MD5M_D; - u32 r_a = a; - u32 r_b = b; - u32 r_c = c; - u32 r_d = d; + u32x r_a = a; + u32x r_b = b; + u32x r_c = c; + u32x r_d = d; // 2nd transform @@ -1332,17 +1293,14 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, w0_t[1] = esalt_buf0[ 9]; w0_t[2] = esalt_buf0[10]; w0_t[3] = esalt_buf0[11]; - w1_t[0] = esalt_buf0[12]; w1_t[1] = esalt_buf0[13]; w1_t[2] = esalt_buf0[14]; w1_t[3] = esalt_buf0[15]; - w2_t[0] = esalt_buf1[ 0]; w2_t[1] = esalt_buf1[ 1]; w2_t[2] = esalt_buf1[ 2]; w2_t[3] = esalt_buf1[ 3]; - w3_t[0] = esalt_buf1[ 4]; w3_t[1] = esalt_buf1[ 5]; w3_t[2] = esalt_buf1[ 6]; @@ -1391,22 +1349,22 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); + MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); + MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); + MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); + MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); + MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); + MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); + MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); + MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); + MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); + MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); + MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); @@ -1445,17 +1403,14 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, w0_t[1] = esalt_buf1[ 9]; w0_t[2] = esalt_buf1[10]; w0_t[3] = esalt_buf1[11]; - w1_t[0] = esalt_buf1[12]; w1_t[1] = esalt_buf1[13]; w1_t[2] = esalt_buf1[14]; w1_t[3] = esalt_buf1[15]; - w2_t[0] = esalt_buf2[ 0]; w2_t[1] = esalt_buf2[ 1]; w2_t[2] = esalt_buf2[ 2]; w2_t[3] = esalt_buf2[ 3]; - w3_t[0] = esalt_buf2[ 4]; w3_t[1] = esalt_buf2[ 5]; w3_t[2] = digest_esalt_len * 8; @@ -1495,22 +1450,22 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); + MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); + MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); + MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); + MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); + MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); + MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); + MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); + MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); + MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); + MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); + MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); @@ -1535,24 +1490,19 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, c += r_c; d += r_d; - const u32 r0 = a; - const u32 r1 = d; - const u32 r2 = c; - const u32 r3 = b; - - #include COMPARE_M + COMPARE_M_SIMD (a, d, c, b); } } -__kernel void m11400_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global sip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void m11400_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global sip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { } -__kernel void m11400_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global sip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void m11400_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global sip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { } -__kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global sip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global sip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * modifier @@ -1586,14 +1536,12 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, */ u32 pw_buf0[4]; + u32 pw_buf1[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - - u32 pw_buf1[4]; - pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; pw_buf1[2] = pws[gid].i[ 6]; @@ -1608,6 +1556,7 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, const u32 salt_len = esalt_bufs[salt_pos].salt_len; // not a bug, we need to get it from the esalt u32 salt_buf0[16]; + u32 salt_buf1[16]; salt_buf0[ 0] = esalt_bufs[salt_pos].salt_buf[ 0]; salt_buf0[ 1] = esalt_bufs[salt_pos].salt_buf[ 1]; @@ -1625,9 +1574,6 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, salt_buf0[13] = esalt_bufs[salt_pos].salt_buf[13]; salt_buf0[14] = esalt_bufs[salt_pos].salt_buf[14]; salt_buf0[15] = esalt_bufs[salt_pos].salt_buf[15]; - - u32 salt_buf1[16]; - salt_buf1[ 0] = esalt_bufs[salt_pos].salt_buf[16]; salt_buf1[ 1] = esalt_bufs[salt_pos].salt_buf[17]; salt_buf1[ 2] = esalt_bufs[salt_pos].salt_buf[18]; @@ -1652,6 +1598,8 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, const u32 esalt_len = esalt_bufs[salt_pos].esalt_len; u32 esalt_buf0[16]; + u32 esalt_buf1[16]; + u32 esalt_buf2[16]; esalt_buf0[ 0] = esalt_bufs[salt_pos].esalt_buf[ 0]; esalt_buf0[ 1] = esalt_bufs[salt_pos].esalt_buf[ 1]; @@ -1669,9 +1617,6 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, esalt_buf0[13] = esalt_bufs[salt_pos].esalt_buf[13]; esalt_buf0[14] = esalt_bufs[salt_pos].esalt_buf[14]; esalt_buf0[15] = esalt_bufs[salt_pos].esalt_buf[15]; - - u32 esalt_buf1[16]; - esalt_buf1[ 0] = esalt_bufs[salt_pos].esalt_buf[16]; esalt_buf1[ 1] = esalt_bufs[salt_pos].esalt_buf[17]; esalt_buf1[ 2] = esalt_bufs[salt_pos].esalt_buf[18]; @@ -1688,9 +1633,6 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, esalt_buf1[13] = esalt_bufs[salt_pos].esalt_buf[29]; esalt_buf1[14] = esalt_bufs[salt_pos].esalt_buf[30]; esalt_buf1[15] = esalt_bufs[salt_pos].esalt_buf[31]; - - u32 esalt_buf2[16]; - esalt_buf2[ 0] = esalt_bufs[salt_pos].esalt_buf[32]; esalt_buf2[ 1] = esalt_bufs[salt_pos].esalt_buf[33]; esalt_buf2[ 2] = esalt_bufs[salt_pos].esalt_buf[34]; @@ -1727,41 +1669,18 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, * loop */ - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) { - u32 w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32 w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32 w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; - u32 w3[4]; + const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; + append_0x80_2x4_VV (w0, w1, out_len); - const u32 out_len = apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2x4 (w0, w1, out_len); - - const u32 pw_salt_len = salt_len + out_len; + const u32x pw_salt_len = salt_len + out_len; /* * HA1 = md5 ($salt . $pass) @@ -1769,7 +1688,8 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, // append the pass to the salt - u32 block0[16]; + u32x block0[16]; + u32x block1[16]; block0[ 0] = salt_buf0[ 0]; block0[ 1] = salt_buf0[ 1]; @@ -1787,9 +1707,6 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, block0[13] = salt_buf0[13]; block0[14] = salt_buf0[14]; block0[15] = salt_buf0[15]; - - u32 block1[16]; - block1[ 0] = salt_buf1[ 0]; block1[ 1] = salt_buf1[ 1]; block1[ 2] = salt_buf1[ 2]; @@ -1811,29 +1728,23 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, block_len = memcat32 (block0, block1, salt_len, w0, w1, w2, w3, out_len); - u32 w0_t[4]; + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; w0_t[0] = block0[ 0]; w0_t[1] = block0[ 1]; w0_t[2] = block0[ 2]; w0_t[3] = block0[ 3]; - - u32 w1_t[4]; - w1_t[0] = block0[ 4]; w1_t[1] = block0[ 5]; w1_t[2] = block0[ 6]; w1_t[3] = block0[ 7]; - - u32 w2_t[4]; - w2_t[0] = block0[ 8]; w2_t[1] = block0[ 9]; w2_t[2] = block0[10]; w2_t[3] = block0[11]; - - u32 w3_t[4]; - w3_t[0] = block0[12]; w3_t[1] = block0[13]; w3_t[2] = block0[14]; @@ -1846,12 +1757,10 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, // md5 - u32 tmp2; - - u32 a = MD5M_A; - u32 b = MD5M_B; - u32 c = MD5M_C; - u32 d = MD5M_D; + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -1887,22 +1796,22 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); + MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); + MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); + MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); + MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); + MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); + MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); + MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); + MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); + MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); + MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); + MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); @@ -1928,26 +1837,23 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, if (block_len > 55) { - u32 r_a = a; - u32 r_b = b; - u32 r_c = c; - u32 r_d = d; + u32x r_a = a; + u32x r_b = b; + u32x r_c = c; + u32x r_d = d; w0_t[0] = block1[ 0]; w0_t[1] = block1[ 1]; w0_t[2] = block1[ 2]; w0_t[3] = block1[ 3]; - w1_t[0] = block1[ 4]; w1_t[1] = block1[ 5]; w1_t[2] = block1[ 6]; w1_t[3] = block1[ 7]; - w2_t[0] = block1[ 8]; w2_t[1] = block1[ 9]; w2_t[2] = block1[10]; w2_t[3] = block1[11]; - w3_t[0] = block1[12]; w3_t[1] = block1[13]; w3_t[2] = pw_salt_len * 8; @@ -1987,22 +1893,22 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); + MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); + MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); + MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); + MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); + MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); + MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); + MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); + MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); + MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); + MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); + MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); @@ -2048,12 +1954,10 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, | uint_to_hex_lower8 ((d >> 8) & 255) << 16; w1_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - w2_t[0] = esalt_buf0[0]; w2_t[1] = esalt_buf0[1]; w2_t[2] = esalt_buf0[2]; w2_t[3] = esalt_buf0[3]; - w3_t[0] = esalt_buf0[4]; w3_t[1] = esalt_buf0[5]; w3_t[2] = esalt_buf0[6]; @@ -2101,22 +2005,22 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); + MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); + MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); + MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); + MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); + MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); + MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); + MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); + MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); + MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); + MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); + MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); @@ -2140,10 +2044,10 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, c += MD5M_C; d += MD5M_D; - u32 r_a = a; - u32 r_b = b; - u32 r_c = c; - u32 r_d = d; + u32x r_a = a; + u32x r_b = b; + u32x r_c = c; + u32x r_d = d; // 2nd transform @@ -2151,17 +2055,14 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, w0_t[1] = esalt_buf0[ 9]; w0_t[2] = esalt_buf0[10]; w0_t[3] = esalt_buf0[11]; - w1_t[0] = esalt_buf0[12]; w1_t[1] = esalt_buf0[13]; w1_t[2] = esalt_buf0[14]; w1_t[3] = esalt_buf0[15]; - w2_t[0] = esalt_buf1[ 0]; w2_t[1] = esalt_buf1[ 1]; w2_t[2] = esalt_buf1[ 2]; w2_t[3] = esalt_buf1[ 3]; - w3_t[0] = esalt_buf1[ 4]; w3_t[1] = esalt_buf1[ 5]; w3_t[2] = esalt_buf1[ 6]; @@ -2210,22 +2111,22 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); + MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); + MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); + MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); + MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); + MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); + MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); + MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); + MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); + MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); + MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); + MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); @@ -2264,17 +2165,14 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, w0_t[1] = esalt_buf1[ 9]; w0_t[2] = esalt_buf1[10]; w0_t[3] = esalt_buf1[11]; - w1_t[0] = esalt_buf1[12]; w1_t[1] = esalt_buf1[13]; w1_t[2] = esalt_buf1[14]; w1_t[3] = esalt_buf1[15]; - w2_t[0] = esalt_buf2[ 0]; w2_t[1] = esalt_buf2[ 1]; w2_t[2] = esalt_buf2[ 2]; w2_t[3] = esalt_buf2[ 3]; - w3_t[0] = esalt_buf2[ 4]; w3_t[1] = esalt_buf2[ 5]; w3_t[2] = digest_esalt_len * 8; @@ -2314,22 +2212,22 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); + MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); + MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); + MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); + MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); + MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); + MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); + MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); + MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); + MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); + MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); + MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); @@ -2354,19 +2252,14 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, c += r_c; d += r_d; - const u32 r0 = a; - const u32 r1 = d; - const u32 r2 = c; - const u32 r3 = b; - - #include COMPARE_S + COMPARE_S_SIMD (a, d, c, b); } } -__kernel void m11400_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global sip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void m11400_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global sip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { } -__kernel void m11400_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global sip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void m11400_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global sip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { }