X-Git-Url: https://www.flypig.org.uk/git/?a=blobdiff_plain;f=OpenCL%2Fm08700_a0.cl;h=6fd3abe11856818715561b21e6e4f4431a4af8bc;hb=161a6eb4bc643d8e636e96eda613f5137d30da59;hp=91785fcc9fee20e2db6c7e607d2f895f981bba74;hpb=28edfbd654959e02133501b88187595e5b40fd24;p=hashcat.git diff --git a/OpenCL/m08700_a0.cl b/OpenCL/m08700_a0.cl index 91785fc..6fd3abe 100644 --- a/OpenCL/m08700_a0.cl +++ b/OpenCL/m08700_a0.cl @@ -1,28 +1,24 @@ /** * Authors.....: Jens Steube * Gabriele Gristina + * magnum * * License.....: MIT */ #define _LOTUS6_ -#include "include/constants.h" -#include "include/kernel_vendor.h" +//incompatible +//#define NEW_SIMD_CODE -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "OpenCL/types_ocl.c" -#include "OpenCL/common.c" -#include "include/rp_kernel.h" -#include "OpenCL/rp.c" - -#define COMPARE_S "OpenCL/check_single_comp4.c" -#define COMPARE_M "OpenCL/check_multi_comp4.c" +#include "inc_vendor.cl" +#include "inc_hash_constants.h" +#include "inc_hash_functions.cl" +#include "inc_types.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_simd.cl" __constant u32 lotus_magic_table[256] = { @@ -60,8 +56,6 @@ __constant u32 lotus_magic_table[256] = 0x29, 0x39, 0xb9, 0xe9, 0x4c, 0xff, 0x43, 0xab, }; -#define BOX(S,i) (S)[(i)] - #if VECT_SIZE == 1 #define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i)]) #elif VECT_SIZE == 2 @@ -70,6 +64,8 @@ __constant u32 lotus_magic_table[256] = #define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) #elif VECT_SIZE == 8 #define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) #endif #if VECT_SIZE == 1 @@ -80,49 +76,52 @@ __constant u32 lotus_magic_table[256] = #define BOX1(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3]) #elif VECT_SIZE == 8 #define BOX1(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7]) +#elif VECT_SIZE == 16 +#define BOX1(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7], (S)[(i).s8], (S)[(i).s9], (S)[(i).sa], (S)[(i).sb], (S)[(i).sc], (S)[(i).sd], (S)[(i).se], (S)[(i).sf]) #endif -static void lotus_mix (u32 *in, __local u32 *s_lotus_magic_table) +void lotus_mix (u32x *in, __local u32 *s_lotus_magic_table) { - u32 p = 0; + u32x p = 0; for (int i = 0; i < 18; i++) { u32 s = 48; - #pragma unroll for (int j = 0; j < 12; j++) { - u32 tmp_in = in[j]; - u32 tmp_out = 0; + u32x tmp_in = in[j]; + u32x tmp_out = 0; - p = (p + s--) & 0xff; p = ((tmp_in >> 0) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 0; - p = (p + s--) & 0xff; p = ((tmp_in >> 8) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 8; - p = (p + s--) & 0xff; p = ((tmp_in >> 16) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 16; - p = (p + s--) & 0xff; p = ((tmp_in >> 24) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 24; + p = (p + s--) & 0xff; p = ((tmp_in >> 0) & 0xff) ^ BOX1 (s_lotus_magic_table, p); tmp_out |= p << 0; + p = (p + s--) & 0xff; p = ((tmp_in >> 8) & 0xff) ^ BOX1 (s_lotus_magic_table, p); tmp_out |= p << 8; + p = (p + s--) & 0xff; p = ((tmp_in >> 16) & 0xff) ^ BOX1 (s_lotus_magic_table, p); tmp_out |= p << 16; + p = (p + s--) & 0xff; p = ((tmp_in >> 24) & 0xff) ^ BOX1 (s_lotus_magic_table, p); tmp_out |= p << 24; in[j] = tmp_out; } } } -static void lotus_transform_password (u32 in[4], u32 out[4], __local u32 *s_lotus_magic_table) +void lotus_transform_password (u32x in[4], u32x out[4], __local u32 *s_lotus_magic_table) { - u32 t = out[3] >> 24; + u32x t = out[3] >> 24; - u32 c; + u32x c; - //#pragma unroll // kernel fails if used + #ifdef _unroll + #pragma unroll + #endif for (int i = 0; i < 4; i++) { - t ^= (in[i] >> 0) & 0xff; c = BOX (s_lotus_magic_table, t); out[i] ^= c << 0; t = ((out[i] >> 0) & 0xff); - t ^= (in[i] >> 8) & 0xff; c = BOX (s_lotus_magic_table, t); out[i] ^= c << 8; t = ((out[i] >> 8) & 0xff); - t ^= (in[i] >> 16) & 0xff; c = BOX (s_lotus_magic_table, t); out[i] ^= c << 16; t = ((out[i] >> 16) & 0xff); - t ^= (in[i] >> 24) & 0xff; c = BOX (s_lotus_magic_table, t); out[i] ^= c << 24; t = ((out[i] >> 24) & 0xff); + t ^= (in[i] >> 0) & 0xff; c = BOX1 (s_lotus_magic_table, t); out[i] ^= c << 0; t = ((out[i] >> 0) & 0xff); + t ^= (in[i] >> 8) & 0xff; c = BOX1 (s_lotus_magic_table, t); out[i] ^= c << 8; t = ((out[i] >> 8) & 0xff); + t ^= (in[i] >> 16) & 0xff; c = BOX1 (s_lotus_magic_table, t); out[i] ^= c << 16; t = ((out[i] >> 16) & 0xff); + t ^= (in[i] >> 24) & 0xff; c = BOX1 (s_lotus_magic_table, t); out[i] ^= c << 24; t = ((out[i] >> 24) & 0xff); } } -static void pad (u32 w[4], const u32 len) +void pad (u32 w[4], const u32 len) { const u32 val = 16 - len; @@ -201,9 +200,9 @@ static void pad (u32 w[4], const u32 len) } } -static void mdtransform_norecalc (u32 state[4], u32 block[4], __local u32 *s_lotus_magic_table) +void mdtransform_norecalc (u32x state[4], u32x block[4], __local u32 *s_lotus_magic_table) { - u32 x[12]; + u32x x[12]; x[ 0] = state[0]; x[ 1] = state[1]; @@ -226,23 +225,23 @@ static void mdtransform_norecalc (u32 state[4], u32 block[4], __local u32 *s_lot state[3] = x[3]; } -static void mdtransform (u32 state[4], u32 checksum[4], u32 block[4], __local u32 *s_lotus_magic_table) +void mdtransform (u32x state[4], u32x checksum[4], u32x block[4], __local u32 *s_lotus_magic_table) { mdtransform_norecalc (state, block, s_lotus_magic_table); lotus_transform_password (block, checksum, s_lotus_magic_table); } -static void domino_big_md (const u32 saved_key[16], const u32 size, u32 state[4], __local u32 *s_lotus_magic_table) +void domino_big_md (const u32x saved_key[16], const u32 size, u32x state[4], __local u32 *s_lotus_magic_table) { - u32 checksum[4]; + u32x checksum[4]; checksum[0] = 0; checksum[1] = 0; checksum[2] = 0; checksum[3] = 0; - u32 block[4]; + u32x block[4]; block[0] = 0; block[1] = 0; @@ -272,7 +271,7 @@ static void domino_big_md (const u32 saved_key[16], const u32 size, u32 state[4] mdtransform_norecalc (state, checksum, s_lotus_magic_table); } -__kernel void m08700_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void m08700_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -313,14 +312,12 @@ __kernel void m08700_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, */ u32 pw_buf0[4]; + u32 pw_buf1[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - - u32 pw_buf1[4]; - pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; pw_buf1[2] = pws[gid].i[ 6]; @@ -339,39 +336,20 @@ __kernel void m08700_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, * loop */ - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) { - u32 w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; - u32 w1[4]; + const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32 w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32 w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len); + /** + * domino + */ - u32 w[16]; + u32x w[16]; w[ 0] = w0[0]; w[ 1] = w0[1]; @@ -390,7 +368,7 @@ __kernel void m08700_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, w[14] = w3[2]; w[15] = w3[3]; - u32 state[4]; + u32x state[4]; state[0] = 0; state[1] = 0; @@ -420,22 +398,22 @@ __kernel void m08700_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, domino_big_md (w, out_len, state, s_lotus_magic_table); - const u32 w0_t = uint_to_hex_upper8 ((state[0] >> 0) & 255) << 0 - | uint_to_hex_upper8 ((state[0] >> 8) & 255) << 16; - const u32 w1_t = uint_to_hex_upper8 ((state[0] >> 16) & 255) << 0 - | uint_to_hex_upper8 ((state[0] >> 24) & 255) << 16; - const u32 w2_t = uint_to_hex_upper8 ((state[1] >> 0) & 255) << 0 - | uint_to_hex_upper8 ((state[1] >> 8) & 255) << 16; - const u32 w3_t = uint_to_hex_upper8 ((state[1] >> 16) & 255) << 0 - | uint_to_hex_upper8 ((state[1] >> 24) & 255) << 16; - const u32 w4_t = uint_to_hex_upper8 ((state[2] >> 0) & 255) << 0 - | uint_to_hex_upper8 ((state[2] >> 8) & 255) << 16; - const u32 w5_t = uint_to_hex_upper8 ((state[2] >> 16) & 255) << 0 - | uint_to_hex_upper8 ((state[2] >> 24) & 255) << 16; - const u32 w6_t = uint_to_hex_upper8 ((state[3] >> 0) & 255) << 0 - | uint_to_hex_upper8 ((state[3] >> 8) & 255) << 16; - //const u32 w7_t = uint_to_hex_upper8 ((state[3] >> 16) & 255) << 0 - // | uint_to_hex_upper8 ((state[3] >> 24) & 255) << 16; + const u32x w0_t = uint_to_hex_upper8 ((state[0] >> 0) & 255) << 0 + | uint_to_hex_upper8 ((state[0] >> 8) & 255) << 16; + const u32x w1_t = uint_to_hex_upper8 ((state[0] >> 16) & 255) << 0 + | uint_to_hex_upper8 ((state[0] >> 24) & 255) << 16; + const u32x w2_t = uint_to_hex_upper8 ((state[1] >> 0) & 255) << 0 + | uint_to_hex_upper8 ((state[1] >> 8) & 255) << 16; + const u32x w3_t = uint_to_hex_upper8 ((state[1] >> 16) & 255) << 0 + | uint_to_hex_upper8 ((state[1] >> 24) & 255) << 16; + const u32x w4_t = uint_to_hex_upper8 ((state[2] >> 0) & 255) << 0 + | uint_to_hex_upper8 ((state[2] >> 8) & 255) << 16; + const u32x w5_t = uint_to_hex_upper8 ((state[2] >> 16) & 255) << 0 + | uint_to_hex_upper8 ((state[2] >> 24) & 255) << 16; + const u32x w6_t = uint_to_hex_upper8 ((state[3] >> 0) & 255) << 0 + | uint_to_hex_upper8 ((state[3] >> 8) & 255) << 16; + //const u32x w7_t = uint_to_hex_upper8 ((state[3] >> 16) & 255) << 0 + // | uint_to_hex_upper8 ((state[3] >> 24) & 255) << 16; const u32 pade = 0x0e0e0e0e; @@ -463,29 +441,24 @@ __kernel void m08700_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, domino_big_md (w, 34, state, s_lotus_magic_table); - u32 a = state[0] & 0xffffffff; - u32 b = state[1] & 0xffffffff; - u32 c = state[2] & 0x000000ff; - u32 d = state[3] & 0x00000000; + u32x a = state[0] & 0xffffffff; + u32x b = state[1] & 0xffffffff; + u32x c = state[2] & 0x000000ff; + u32x d = state[3] & 0x00000000; - const u32 r0 = a; - const u32 r1 = b; - const u32 r2 = c; - const u32 r3 = d; - - #include COMPARE_M + COMPARE_M_SIMD (a, b, c, d); } } -__kernel void m08700_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void m08700_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { } -__kernel void m08700_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void m08700_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { } -__kernel void m08700_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void m08700_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -526,14 +499,12 @@ __kernel void m08700_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, */ u32 pw_buf0[4]; + u32 pw_buf1[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - - u32 pw_buf1[4]; - pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; pw_buf1[2] = pws[gid].i[ 6]; @@ -564,39 +535,20 @@ __kernel void m08700_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, * loop */ - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) { - u32 w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32 w1[4]; + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; + const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); - u32 w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32 w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len); + /** + * domino + */ - u32 w[16]; + u32x w[16]; w[ 0] = w0[0]; w[ 1] = w0[1]; @@ -615,7 +567,7 @@ __kernel void m08700_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, w[14] = w3[2]; w[15] = w3[3]; - u32 state[4]; + u32x state[4]; state[0] = 0; state[1] = 0; @@ -645,22 +597,22 @@ __kernel void m08700_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, domino_big_md (w, out_len, state, s_lotus_magic_table); - const u32 w0_t = uint_to_hex_upper8 ((state[0] >> 0) & 255) << 0 - | uint_to_hex_upper8 ((state[0] >> 8) & 255) << 16; - const u32 w1_t = uint_to_hex_upper8 ((state[0] >> 16) & 255) << 0 - | uint_to_hex_upper8 ((state[0] >> 24) & 255) << 16; - const u32 w2_t = uint_to_hex_upper8 ((state[1] >> 0) & 255) << 0 - | uint_to_hex_upper8 ((state[1] >> 8) & 255) << 16; - const u32 w3_t = uint_to_hex_upper8 ((state[1] >> 16) & 255) << 0 - | uint_to_hex_upper8 ((state[1] >> 24) & 255) << 16; - const u32 w4_t = uint_to_hex_upper8 ((state[2] >> 0) & 255) << 0 - | uint_to_hex_upper8 ((state[2] >> 8) & 255) << 16; - const u32 w5_t = uint_to_hex_upper8 ((state[2] >> 16) & 255) << 0 - | uint_to_hex_upper8 ((state[2] >> 24) & 255) << 16; - const u32 w6_t = uint_to_hex_upper8 ((state[3] >> 0) & 255) << 0 - | uint_to_hex_upper8 ((state[3] >> 8) & 255) << 16; - //const u32 w7_t = uint_to_hex_upper8 ((state[3] >> 16) & 255) << 0 - // | uint_to_hex_upper8 ((state[3] >> 24) & 255) << 16; + const u32x w0_t = uint_to_hex_upper8 ((state[0] >> 0) & 255) << 0 + | uint_to_hex_upper8 ((state[0] >> 8) & 255) << 16; + const u32x w1_t = uint_to_hex_upper8 ((state[0] >> 16) & 255) << 0 + | uint_to_hex_upper8 ((state[0] >> 24) & 255) << 16; + const u32x w2_t = uint_to_hex_upper8 ((state[1] >> 0) & 255) << 0 + | uint_to_hex_upper8 ((state[1] >> 8) & 255) << 16; + const u32x w3_t = uint_to_hex_upper8 ((state[1] >> 16) & 255) << 0 + | uint_to_hex_upper8 ((state[1] >> 24) & 255) << 16; + const u32x w4_t = uint_to_hex_upper8 ((state[2] >> 0) & 255) << 0 + | uint_to_hex_upper8 ((state[2] >> 8) & 255) << 16; + const u32x w5_t = uint_to_hex_upper8 ((state[2] >> 16) & 255) << 0 + | uint_to_hex_upper8 ((state[2] >> 24) & 255) << 16; + const u32x w6_t = uint_to_hex_upper8 ((state[3] >> 0) & 255) << 0 + | uint_to_hex_upper8 ((state[3] >> 8) & 255) << 16; + //const u32x w7_t = uint_to_hex_upper8 ((state[3] >> 16) & 255) << 0 + // | uint_to_hex_upper8 ((state[3] >> 24) & 255) << 16; const u32 pade = 0x0e0e0e0e; @@ -688,24 +640,19 @@ __kernel void m08700_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, domino_big_md (w, 34, state, s_lotus_magic_table); - u32 a = state[0] & 0xffffffff; - u32 b = state[1] & 0xffffffff; - u32 c = state[2] & 0x000000ff; - u32 d = state[3] & 0x00000000; - - const u32 r0 = a; - const u32 r1 = b; - const u32 r2 = c; - const u32 r3 = d; + u32x a = state[0] & 0xffffffff; + u32x b = state[1] & 0xffffffff; + u32x c = state[2] & 0x000000ff; + u32x d = state[3] & 0x00000000; - #include COMPARE_S + COMPARE_S_SIMD (a, b, c, d); } } -__kernel void m08700_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void m08700_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { } -__kernel void m08700_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void m08700_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { }