X-Git-Url: https://www.flypig.org.uk/git/?a=blobdiff_plain;f=OpenCL%2Fm11400_a0.cl;h=b774ef2fc6bafb6c5051983357a65a8736e85b44;hb=51e1d11b2d6dff4ae331cff6098169455733c412;hp=1fbc8987d22990cdf11a89b32a388b4231e64323;hpb=6534211f36e8aa785da93991cccc91789bcd3be3;p=hashcat.git diff --git a/OpenCL/m11400_a0.cl b/OpenCL/m11400_a0.cl index 1fbc898..b774ef2 100644 --- a/OpenCL/m11400_a0.cl +++ b/OpenCL/m11400_a0.cl @@ -1,26 +1,23 @@ /** - * Author......: Jens Steube + * Authors.....: Jens Steube + * magnum + * * License.....: MIT */ #define _MD5_ -#define NEW_SIMD_CODE +//incompatible because of brances +//#define NEW_SIMD_CODE -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "OpenCL/types_ocl.c" -#include "OpenCL/common.c" -#include "include/rp_kernel.h" -#include "OpenCL/rp.c" -#include "OpenCL/simd.c" +#include "inc_vendor.cl" +#include "inc_hash_constants.h" +#include "inc_hash_functions.cl" +#include "inc_types.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_simd.cl" #if VECT_SIZE == 1 #define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)]) @@ -30,9 +27,11 @@ #define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) #elif VECT_SIZE == 8 #define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) #endif -static u32 memcat32 (u32x block0[16], u32x block1[16], const u32 block_len, const u32x append0[4], const u32x append1[4], const u32x append2[4], const u32x append3[4], const u32 append_len) +u32 memcat32 (u32x block0[16], u32x block1[16], const u32 block_len, const u32x append0[4], const u32x append1[4], const u32x append2[4], const u32x append3[4], const u32 append_len) { const u32 mod = block_len & 3; const u32 div = block_len / 4; @@ -753,7 +752,7 @@ static u32 memcat32 (u32x block0[16], u32x block1[16], const u32 block_len, cons return new_len; } -__kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global sip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global sip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * modifier @@ -787,14 +786,12 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, */ u32 pw_buf0[4]; + u32 pw_buf1[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - - u32 pw_buf1[4]; - pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; pw_buf1[2] = pws[gid].i[ 6]; @@ -809,6 +806,7 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, const u32 salt_len = esalt_bufs[salt_pos].salt_len; // not a bug, we need to get it from the esalt u32 salt_buf0[16]; + u32 salt_buf1[16]; salt_buf0[ 0] = esalt_bufs[salt_pos].salt_buf[ 0]; salt_buf0[ 1] = esalt_bufs[salt_pos].salt_buf[ 1]; @@ -826,9 +824,6 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, salt_buf0[13] = esalt_bufs[salt_pos].salt_buf[13]; salt_buf0[14] = esalt_bufs[salt_pos].salt_buf[14]; salt_buf0[15] = esalt_bufs[salt_pos].salt_buf[15]; - - u32 salt_buf1[16]; - salt_buf1[ 0] = esalt_bufs[salt_pos].salt_buf[16]; salt_buf1[ 1] = esalt_bufs[salt_pos].salt_buf[17]; salt_buf1[ 2] = esalt_bufs[salt_pos].salt_buf[18]; @@ -853,6 +848,8 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, const u32 esalt_len = esalt_bufs[salt_pos].esalt_len; u32 esalt_buf0[16]; + u32 esalt_buf1[16]; + u32 esalt_buf2[16]; esalt_buf0[ 0] = esalt_bufs[salt_pos].esalt_buf[ 0]; esalt_buf0[ 1] = esalt_bufs[salt_pos].esalt_buf[ 1]; @@ -870,9 +867,6 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, esalt_buf0[13] = esalt_bufs[salt_pos].esalt_buf[13]; esalt_buf0[14] = esalt_bufs[salt_pos].esalt_buf[14]; esalt_buf0[15] = esalt_bufs[salt_pos].esalt_buf[15]; - - u32 esalt_buf1[16]; - esalt_buf1[ 0] = esalt_bufs[salt_pos].esalt_buf[16]; esalt_buf1[ 1] = esalt_bufs[salt_pos].esalt_buf[17]; esalt_buf1[ 2] = esalt_bufs[salt_pos].esalt_buf[18]; @@ -889,9 +883,6 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, esalt_buf1[13] = esalt_bufs[salt_pos].esalt_buf[29]; esalt_buf1[14] = esalt_bufs[salt_pos].esalt_buf[30]; esalt_buf1[15] = esalt_bufs[salt_pos].esalt_buf[31]; - - u32 esalt_buf2[16]; - esalt_buf2[ 0] = esalt_bufs[salt_pos].esalt_buf[32]; esalt_buf2[ 1] = esalt_bufs[salt_pos].esalt_buf[33]; esalt_buf2[ 2] = esalt_bufs[salt_pos].esalt_buf[34]; @@ -916,18 +907,18 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, * loop */ - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos += VECT_SIZE) + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) { u32x w0[4] = { 0 }; u32x w1[4] = { 0 }; u32x w2[4] = { 0 }; u32x w3[4] = { 0 }; - const u32 out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); - append_0x80_2x4 (w0, w1, out_len); + append_0x80_2x4_VV (w0, w1, out_len); - const u32 pw_salt_len = salt_len + out_len; + const u32x pw_salt_len = salt_len + out_len; /* * HA1 = md5 ($salt . $pass) @@ -936,6 +927,7 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, // append the pass to the salt u32x block0[16]; + u32x block1[16]; block0[ 0] = salt_buf0[ 0]; block0[ 1] = salt_buf0[ 1]; @@ -953,9 +945,6 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, block0[13] = salt_buf0[13]; block0[14] = salt_buf0[14]; block0[15] = salt_buf0[15]; - - u32x block1[16]; - block1[ 0] = salt_buf1[ 0]; block1[ 1] = salt_buf1[ 1]; block1[ 2] = salt_buf1[ 2]; @@ -978,28 +967,22 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, block_len = memcat32 (block0, block1, salt_len, w0, w1, w2, w3, out_len); u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; w0_t[0] = block0[ 0]; w0_t[1] = block0[ 1]; w0_t[2] = block0[ 2]; w0_t[3] = block0[ 3]; - - u32x w1_t[4]; - w1_t[0] = block0[ 4]; w1_t[1] = block0[ 5]; w1_t[2] = block0[ 6]; w1_t[3] = block0[ 7]; - - u32x w2_t[4]; - w2_t[0] = block0[ 8]; w2_t[1] = block0[ 9]; w2_t[2] = block0[10]; w2_t[3] = block0[11]; - - u32x w3_t[4]; - w3_t[0] = block0[12]; w3_t[1] = block0[13]; w3_t[2] = block0[14]; @@ -1101,17 +1084,14 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, w0_t[1] = block1[ 1]; w0_t[2] = block1[ 2]; w0_t[3] = block1[ 3]; - w1_t[0] = block1[ 4]; w1_t[1] = block1[ 5]; w1_t[2] = block1[ 6]; w1_t[3] = block1[ 7]; - w2_t[0] = block1[ 8]; w2_t[1] = block1[ 9]; w2_t[2] = block1[10]; w2_t[3] = block1[11]; - w3_t[0] = block1[12]; w3_t[1] = block1[13]; w3_t[2] = pw_salt_len * 8; @@ -1212,12 +1192,10 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, | uint_to_hex_lower8 ((d >> 8) & 255) << 16; w1_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - w2_t[0] = esalt_buf0[0]; w2_t[1] = esalt_buf0[1]; w2_t[2] = esalt_buf0[2]; w2_t[3] = esalt_buf0[3]; - w3_t[0] = esalt_buf0[4]; w3_t[1] = esalt_buf0[5]; w3_t[2] = esalt_buf0[6]; @@ -1315,17 +1293,14 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, w0_t[1] = esalt_buf0[ 9]; w0_t[2] = esalt_buf0[10]; w0_t[3] = esalt_buf0[11]; - w1_t[0] = esalt_buf0[12]; w1_t[1] = esalt_buf0[13]; w1_t[2] = esalt_buf0[14]; w1_t[3] = esalt_buf0[15]; - w2_t[0] = esalt_buf1[ 0]; w2_t[1] = esalt_buf1[ 1]; w2_t[2] = esalt_buf1[ 2]; w2_t[3] = esalt_buf1[ 3]; - w3_t[0] = esalt_buf1[ 4]; w3_t[1] = esalt_buf1[ 5]; w3_t[2] = esalt_buf1[ 6]; @@ -1428,17 +1403,14 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, w0_t[1] = esalt_buf1[ 9]; w0_t[2] = esalt_buf1[10]; w0_t[3] = esalt_buf1[11]; - w1_t[0] = esalt_buf1[12]; w1_t[1] = esalt_buf1[13]; w1_t[2] = esalt_buf1[14]; w1_t[3] = esalt_buf1[15]; - w2_t[0] = esalt_buf2[ 0]; w2_t[1] = esalt_buf2[ 1]; w2_t[2] = esalt_buf2[ 2]; w2_t[3] = esalt_buf2[ 3]; - w3_t[0] = esalt_buf2[ 4]; w3_t[1] = esalt_buf2[ 5]; w3_t[2] = digest_esalt_len * 8; @@ -1522,15 +1494,15 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, } } -__kernel void m11400_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global sip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void m11400_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global sip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { } -__kernel void m11400_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global sip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void m11400_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global sip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { } -__kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global sip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global sip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * modifier @@ -1564,14 +1536,12 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, */ u32 pw_buf0[4]; + u32 pw_buf1[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - - u32 pw_buf1[4]; - pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; pw_buf1[2] = pws[gid].i[ 6]; @@ -1586,6 +1556,7 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, const u32 salt_len = esalt_bufs[salt_pos].salt_len; // not a bug, we need to get it from the esalt u32 salt_buf0[16]; + u32 salt_buf1[16]; salt_buf0[ 0] = esalt_bufs[salt_pos].salt_buf[ 0]; salt_buf0[ 1] = esalt_bufs[salt_pos].salt_buf[ 1]; @@ -1603,9 +1574,6 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, salt_buf0[13] = esalt_bufs[salt_pos].salt_buf[13]; salt_buf0[14] = esalt_bufs[salt_pos].salt_buf[14]; salt_buf0[15] = esalt_bufs[salt_pos].salt_buf[15]; - - u32 salt_buf1[16]; - salt_buf1[ 0] = esalt_bufs[salt_pos].salt_buf[16]; salt_buf1[ 1] = esalt_bufs[salt_pos].salt_buf[17]; salt_buf1[ 2] = esalt_bufs[salt_pos].salt_buf[18]; @@ -1630,6 +1598,8 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, const u32 esalt_len = esalt_bufs[salt_pos].esalt_len; u32 esalt_buf0[16]; + u32 esalt_buf1[16]; + u32 esalt_buf2[16]; esalt_buf0[ 0] = esalt_bufs[salt_pos].esalt_buf[ 0]; esalt_buf0[ 1] = esalt_bufs[salt_pos].esalt_buf[ 1]; @@ -1647,9 +1617,6 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, esalt_buf0[13] = esalt_bufs[salt_pos].esalt_buf[13]; esalt_buf0[14] = esalt_bufs[salt_pos].esalt_buf[14]; esalt_buf0[15] = esalt_bufs[salt_pos].esalt_buf[15]; - - u32 esalt_buf1[16]; - esalt_buf1[ 0] = esalt_bufs[salt_pos].esalt_buf[16]; esalt_buf1[ 1] = esalt_bufs[salt_pos].esalt_buf[17]; esalt_buf1[ 2] = esalt_bufs[salt_pos].esalt_buf[18]; @@ -1666,9 +1633,6 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, esalt_buf1[13] = esalt_bufs[salt_pos].esalt_buf[29]; esalt_buf1[14] = esalt_bufs[salt_pos].esalt_buf[30]; esalt_buf1[15] = esalt_bufs[salt_pos].esalt_buf[31]; - - u32 esalt_buf2[16]; - esalt_buf2[ 0] = esalt_bufs[salt_pos].esalt_buf[32]; esalt_buf2[ 1] = esalt_bufs[salt_pos].esalt_buf[33]; esalt_buf2[ 2] = esalt_bufs[salt_pos].esalt_buf[34]; @@ -1705,18 +1669,18 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, * loop */ - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos += VECT_SIZE) + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) { u32x w0[4] = { 0 }; u32x w1[4] = { 0 }; u32x w2[4] = { 0 }; u32x w3[4] = { 0 }; - const u32 out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); - append_0x80_2x4 (w0, w1, out_len); + append_0x80_2x4_VV (w0, w1, out_len); - const u32 pw_salt_len = salt_len + out_len; + const u32x pw_salt_len = salt_len + out_len; /* * HA1 = md5 ($salt . $pass) @@ -1725,6 +1689,7 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, // append the pass to the salt u32x block0[16]; + u32x block1[16]; block0[ 0] = salt_buf0[ 0]; block0[ 1] = salt_buf0[ 1]; @@ -1742,9 +1707,6 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, block0[13] = salt_buf0[13]; block0[14] = salt_buf0[14]; block0[15] = salt_buf0[15]; - - u32x block1[16]; - block1[ 0] = salt_buf1[ 0]; block1[ 1] = salt_buf1[ 1]; block1[ 2] = salt_buf1[ 2]; @@ -1767,28 +1729,22 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, block_len = memcat32 (block0, block1, salt_len, w0, w1, w2, w3, out_len); u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; w0_t[0] = block0[ 0]; w0_t[1] = block0[ 1]; w0_t[2] = block0[ 2]; w0_t[3] = block0[ 3]; - - u32x w1_t[4]; - w1_t[0] = block0[ 4]; w1_t[1] = block0[ 5]; w1_t[2] = block0[ 6]; w1_t[3] = block0[ 7]; - - u32x w2_t[4]; - w2_t[0] = block0[ 8]; w2_t[1] = block0[ 9]; w2_t[2] = block0[10]; w2_t[3] = block0[11]; - - u32x w3_t[4]; - w3_t[0] = block0[12]; w3_t[1] = block0[13]; w3_t[2] = block0[14]; @@ -1890,17 +1846,14 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, w0_t[1] = block1[ 1]; w0_t[2] = block1[ 2]; w0_t[3] = block1[ 3]; - w1_t[0] = block1[ 4]; w1_t[1] = block1[ 5]; w1_t[2] = block1[ 6]; w1_t[3] = block1[ 7]; - w2_t[0] = block1[ 8]; w2_t[1] = block1[ 9]; w2_t[2] = block1[10]; w2_t[3] = block1[11]; - w3_t[0] = block1[12]; w3_t[1] = block1[13]; w3_t[2] = pw_salt_len * 8; @@ -2001,12 +1954,10 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, | uint_to_hex_lower8 ((d >> 8) & 255) << 16; w1_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - w2_t[0] = esalt_buf0[0]; w2_t[1] = esalt_buf0[1]; w2_t[2] = esalt_buf0[2]; w2_t[3] = esalt_buf0[3]; - w3_t[0] = esalt_buf0[4]; w3_t[1] = esalt_buf0[5]; w3_t[2] = esalt_buf0[6]; @@ -2104,17 +2055,14 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, w0_t[1] = esalt_buf0[ 9]; w0_t[2] = esalt_buf0[10]; w0_t[3] = esalt_buf0[11]; - w1_t[0] = esalt_buf0[12]; w1_t[1] = esalt_buf0[13]; w1_t[2] = esalt_buf0[14]; w1_t[3] = esalt_buf0[15]; - w2_t[0] = esalt_buf1[ 0]; w2_t[1] = esalt_buf1[ 1]; w2_t[2] = esalt_buf1[ 2]; w2_t[3] = esalt_buf1[ 3]; - w3_t[0] = esalt_buf1[ 4]; w3_t[1] = esalt_buf1[ 5]; w3_t[2] = esalt_buf1[ 6]; @@ -2217,17 +2165,14 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, w0_t[1] = esalt_buf1[ 9]; w0_t[2] = esalt_buf1[10]; w0_t[3] = esalt_buf1[11]; - w1_t[0] = esalt_buf1[12]; w1_t[1] = esalt_buf1[13]; w1_t[2] = esalt_buf1[14]; w1_t[3] = esalt_buf1[15]; - w2_t[0] = esalt_buf2[ 0]; w2_t[1] = esalt_buf2[ 1]; w2_t[2] = esalt_buf2[ 2]; w2_t[3] = esalt_buf2[ 3]; - w3_t[0] = esalt_buf2[ 4]; w3_t[1] = esalt_buf2[ 5]; w3_t[2] = digest_esalt_len * 8; @@ -2311,10 +2256,10 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, } } -__kernel void m11400_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global sip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void m11400_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global sip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { } -__kernel void m11400_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global sip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void m11400_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global sip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { }