From 22137866186fe88f0d32756b06f21eeff4da7f64 Mon Sep 17 00:00:00 2001 From: jsteube Date: Sat, 28 May 2016 15:36:04 +0200 Subject: [PATCH] Increase Domain Cached Credentials (DCC), MS Cache cracking performance --- OpenCL/m01100_a0.cl | 142 ++++++++++++++++++---------------- OpenCL/m01100_a1.cl | 130 ++++++++++++++++--------------- OpenCL/m01100_a3.cl | 182 +++++++++++++++++++++++++++++--------------- 3 files changed, 268 insertions(+), 186 deletions(-) diff --git a/OpenCL/m01100_a0.cl b/OpenCL/m01100_a0.cl index e13d20f..5408b3a 100644 --- a/OpenCL/m01100_a0.cl +++ b/OpenCL/m01100_a0.cl @@ -24,19 +24,12 @@ __kernel void m01100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { - /** - * modifier - */ - - const u32 lid = get_local_id (0); - /** * base */ const u32 gid = get_global_id (0); - - if (gid >= gid_max) return; + const u32 lid = get_local_id (0); u32 pw_buf0[4]; u32 pw_buf1[4]; @@ -56,18 +49,30 @@ __kernel void m01100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, * salt */ - const u32 salt_buf0 = salt_bufs[salt_pos].salt_buf[0]; - const u32 salt_buf1 = salt_bufs[salt_pos].salt_buf[1]; - const u32 salt_buf2 = salt_bufs[salt_pos].salt_buf[2]; - const u32 salt_buf3 = salt_bufs[salt_pos].salt_buf[3]; - const u32 salt_buf4 = salt_bufs[salt_pos].salt_buf[4]; - const u32 salt_buf5 = salt_bufs[salt_pos].salt_buf[5]; - const u32 salt_buf6 = salt_bufs[salt_pos].salt_buf[6]; - const u32 salt_buf7 = salt_bufs[salt_pos].salt_buf[7]; - const u32 salt_buf8 = salt_bufs[salt_pos].salt_buf[8]; - const u32 salt_buf9 = salt_bufs[salt_pos].salt_buf[9]; + __local salt_t s_salt_buf[1]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + if (lid == 0) + { + s_salt_buf[0] = salt_bufs[salt_pos]; + + s_salt_buf[0].salt_buf[10] = (16 + s_salt_buf[0].salt_len) * 8; + } + + barrier (CLK_LOCAL_MEM_FENCE); + + if (gid >= gid_max) return; + + #define salt_buf00 s_salt_buf[0].salt_buf[ 0] + #define salt_buf01 s_salt_buf[0].salt_buf[ 1] + #define salt_buf02 s_salt_buf[0].salt_buf[ 2] + #define salt_buf03 s_salt_buf[0].salt_buf[ 3] + #define salt_buf04 s_salt_buf[0].salt_buf[ 4] + #define salt_buf05 s_salt_buf[0].salt_buf[ 5] + #define salt_buf06 s_salt_buf[0].salt_buf[ 6] + #define salt_buf07 s_salt_buf[0].salt_buf[ 7] + #define salt_buf08 s_salt_buf[0].salt_buf[ 8] + #define salt_buf09 s_salt_buf[0].salt_buf[ 9] + #define salt_buf10 s_salt_buf[0].salt_buf[10] /** * loop @@ -155,17 +160,17 @@ __kernel void m01100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, w0[1] = b; w0[2] = c; w0[3] = d; - w1[0] = salt_buf0; - w1[1] = salt_buf1; - w1[2] = salt_buf2; - w1[3] = salt_buf3; - w2[0] = salt_buf4; - w2[1] = salt_buf5; - w2[2] = salt_buf6; - w2[3] = salt_buf7; - w3[0] = salt_buf8; - w3[1] = salt_buf9; - w3[2] = (16 + salt_len) * 8; + w1[0] = salt_buf00; + w1[1] = salt_buf01; + w1[2] = salt_buf02; + w1[3] = salt_buf03; + w2[0] = salt_buf04; + w2[1] = salt_buf05; + w2[2] = salt_buf06; + w2[3] = salt_buf07; + w3[0] = salt_buf08; + w3[1] = salt_buf09; + w3[2] = salt_buf10; w3[3] = 0; a = MD4M_A; @@ -238,19 +243,12 @@ __kernel void m01100_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __kernel void m01100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { - /** - * modifier - */ - - const u32 lid = get_local_id (0); - /** * base */ const u32 gid = get_global_id (0); - - if (gid >= gid_max) return; + const u32 lid = get_local_id (0); u32 pw_buf0[4]; u32 pw_buf1[4]; @@ -266,6 +264,35 @@ __kernel void m01100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, const u32 pw_len = pws[gid].pw_len; + /** + * salt + */ + + __local salt_t s_salt_buf[1]; + + if (lid == 0) + { + s_salt_buf[0] = salt_bufs[salt_pos]; + + s_salt_buf[0].salt_buf[10] = (16 + s_salt_buf[0].salt_len) * 8; + } + + barrier (CLK_LOCAL_MEM_FENCE); + + if (gid >= gid_max) return; + + #define salt_buf00 s_salt_buf[0].salt_buf[ 0] + #define salt_buf01 s_salt_buf[0].salt_buf[ 1] + #define salt_buf02 s_salt_buf[0].salt_buf[ 2] + #define salt_buf03 s_salt_buf[0].salt_buf[ 3] + #define salt_buf04 s_salt_buf[0].salt_buf[ 4] + #define salt_buf05 s_salt_buf[0].salt_buf[ 5] + #define salt_buf06 s_salt_buf[0].salt_buf[ 6] + #define salt_buf07 s_salt_buf[0].salt_buf[ 7] + #define salt_buf08 s_salt_buf[0].salt_buf[ 8] + #define salt_buf09 s_salt_buf[0].salt_buf[ 9] + #define salt_buf10 s_salt_buf[0].salt_buf[10] + /** * digest */ @@ -278,23 +305,6 @@ __kernel void m01100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, digests_buf[digests_offset].digest_buf[DGST_R3] }; - /** - * salt - */ - - const u32 salt_buf0 = salt_bufs[salt_pos].salt_buf[0]; - const u32 salt_buf1 = salt_bufs[salt_pos].salt_buf[1]; - const u32 salt_buf2 = salt_bufs[salt_pos].salt_buf[2]; - const u32 salt_buf3 = salt_bufs[salt_pos].salt_buf[3]; - const u32 salt_buf4 = salt_bufs[salt_pos].salt_buf[4]; - const u32 salt_buf5 = salt_bufs[salt_pos].salt_buf[5]; - const u32 salt_buf6 = salt_bufs[salt_pos].salt_buf[6]; - const u32 salt_buf7 = salt_bufs[salt_pos].salt_buf[7]; - const u32 salt_buf8 = salt_bufs[salt_pos].salt_buf[8]; - const u32 salt_buf9 = salt_bufs[salt_pos].salt_buf[9]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - /** * loop */ @@ -381,17 +391,17 @@ __kernel void m01100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, w0[1] = b; w0[2] = c; w0[3] = d; - w1[0] = salt_buf0; - w1[1] = salt_buf1; - w1[2] = salt_buf2; - w1[3] = salt_buf3; - w2[0] = salt_buf4; - w2[1] = salt_buf5; - w2[2] = salt_buf6; - w2[3] = salt_buf7; - w3[0] = salt_buf8; - w3[1] = salt_buf9; - w3[2] = (16 + salt_len) * 8; + w1[0] = salt_buf00; + w1[1] = salt_buf01; + w1[2] = salt_buf02; + w1[3] = salt_buf03; + w2[0] = salt_buf04; + w2[1] = salt_buf05; + w2[2] = salt_buf06; + w2[3] = salt_buf07; + w3[0] = salt_buf08; + w3[1] = salt_buf09; + w3[2] = salt_buf10; w3[3] = 0; a = MD4M_A; diff --git a/OpenCL/m01100_a1.cl b/OpenCL/m01100_a1.cl index fbaba9b..3e3fd65 100644 --- a/OpenCL/m01100_a1.cl +++ b/OpenCL/m01100_a1.cl @@ -22,19 +22,12 @@ __kernel void m01100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { - /** - * modifier - */ - - const u32 lid = get_local_id (0); - /** * base */ const u32 gid = get_global_id (0); - - if (gid >= gid_max) return; + const u32 lid = get_local_id (0); u32 pw_buf0[4]; u32 pw_buf1[4]; @@ -54,18 +47,30 @@ __kernel void m01100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, * salt */ - const u32 salt_buf0 = salt_bufs[salt_pos].salt_buf[0]; - const u32 salt_buf1 = salt_bufs[salt_pos].salt_buf[1]; - const u32 salt_buf2 = salt_bufs[salt_pos].salt_buf[2]; - const u32 salt_buf3 = salt_bufs[salt_pos].salt_buf[3]; - const u32 salt_buf4 = salt_bufs[salt_pos].salt_buf[4]; - const u32 salt_buf5 = salt_bufs[salt_pos].salt_buf[5]; - const u32 salt_buf6 = salt_bufs[salt_pos].salt_buf[6]; - const u32 salt_buf7 = salt_bufs[salt_pos].salt_buf[7]; - const u32 salt_buf8 = salt_bufs[salt_pos].salt_buf[8]; - const u32 salt_buf9 = salt_bufs[salt_pos].salt_buf[9]; + __local salt_t s_salt_buf[1]; + + if (lid == 0) + { + s_salt_buf[0] = salt_bufs[salt_pos]; + + s_salt_buf[0].salt_buf[10] = (16 + s_salt_buf[0].salt_len) * 8; + } + + barrier (CLK_LOCAL_MEM_FENCE); + + if (gid >= gid_max) return; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + #define salt_buf00 s_salt_buf[0].salt_buf[ 0] + #define salt_buf01 s_salt_buf[0].salt_buf[ 1] + #define salt_buf02 s_salt_buf[0].salt_buf[ 2] + #define salt_buf03 s_salt_buf[0].salt_buf[ 3] + #define salt_buf04 s_salt_buf[0].salt_buf[ 4] + #define salt_buf05 s_salt_buf[0].salt_buf[ 5] + #define salt_buf06 s_salt_buf[0].salt_buf[ 6] + #define salt_buf07 s_salt_buf[0].salt_buf[ 7] + #define salt_buf08 s_salt_buf[0].salt_buf[ 8] + #define salt_buf09 s_salt_buf[0].salt_buf[ 9] + #define salt_buf10 s_salt_buf[0].salt_buf[10] /** * loop @@ -215,17 +220,17 @@ __kernel void m01100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, w0[1] = b; w0[2] = c; w0[3] = d; - w1[0] = salt_buf0; - w1[1] = salt_buf1; - w1[2] = salt_buf2; - w1[3] = salt_buf3; - w2[0] = salt_buf4; - w2[1] = salt_buf5; - w2[2] = salt_buf6; - w2[3] = salt_buf7; - w3[0] = salt_buf8; - w3[1] = salt_buf9; - w3[2] = (16 + salt_len) * 8; + w1[0] = salt_buf00; + w1[1] = salt_buf01; + w1[2] = salt_buf02; + w1[3] = salt_buf03; + w2[0] = salt_buf04; + w2[1] = salt_buf05; + w2[2] = salt_buf06; + w2[3] = salt_buf07; + w3[0] = salt_buf08; + w3[1] = salt_buf09; + w3[2] = salt_buf10; w3[3] = 0; a = MD4M_A; @@ -298,19 +303,12 @@ __kernel void m01100_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __kernel void m01100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { - /** - * modifier - */ - - const u32 lid = get_local_id (0); - /** * base */ const u32 gid = get_global_id (0); - - if (gid >= gid_max) return; + const u32 lid = get_local_id (0); u32 pw_buf0[4]; u32 pw_buf1[4]; @@ -330,18 +328,30 @@ __kernel void m01100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, * salt */ - const u32 salt_buf0 = salt_bufs[salt_pos].salt_buf[0]; - const u32 salt_buf1 = salt_bufs[salt_pos].salt_buf[1]; - const u32 salt_buf2 = salt_bufs[salt_pos].salt_buf[2]; - const u32 salt_buf3 = salt_bufs[salt_pos].salt_buf[3]; - const u32 salt_buf4 = salt_bufs[salt_pos].salt_buf[4]; - const u32 salt_buf5 = salt_bufs[salt_pos].salt_buf[5]; - const u32 salt_buf6 = salt_bufs[salt_pos].salt_buf[6]; - const u32 salt_buf7 = salt_bufs[salt_pos].salt_buf[7]; - const u32 salt_buf8 = salt_bufs[salt_pos].salt_buf[8]; - const u32 salt_buf9 = salt_bufs[salt_pos].salt_buf[9]; + __local salt_t s_salt_buf[1]; + + if (lid == 0) + { + s_salt_buf[0] = salt_bufs[salt_pos]; + + s_salt_buf[0].salt_buf[10] = (16 + s_salt_buf[0].salt_len) * 8; + } + + barrier (CLK_LOCAL_MEM_FENCE); + + if (gid >= gid_max) return; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + #define salt_buf00 s_salt_buf[0].salt_buf[ 0] + #define salt_buf01 s_salt_buf[0].salt_buf[ 1] + #define salt_buf02 s_salt_buf[0].salt_buf[ 2] + #define salt_buf03 s_salt_buf[0].salt_buf[ 3] + #define salt_buf04 s_salt_buf[0].salt_buf[ 4] + #define salt_buf05 s_salt_buf[0].salt_buf[ 5] + #define salt_buf06 s_salt_buf[0].salt_buf[ 6] + #define salt_buf07 s_salt_buf[0].salt_buf[ 7] + #define salt_buf08 s_salt_buf[0].salt_buf[ 8] + #define salt_buf09 s_salt_buf[0].salt_buf[ 9] + #define salt_buf10 s_salt_buf[0].salt_buf[10] /** * digest @@ -503,17 +513,17 @@ __kernel void m01100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, w0[1] = b; w0[2] = c; w0[3] = d; - w1[0] = salt_buf0; - w1[1] = salt_buf1; - w1[2] = salt_buf2; - w1[3] = salt_buf3; - w2[0] = salt_buf4; - w2[1] = salt_buf5; - w2[2] = salt_buf6; - w2[3] = salt_buf7; - w3[0] = salt_buf8; - w3[1] = salt_buf9; - w3[2] = (16 + salt_len) * 8; + w1[0] = salt_buf00; + w1[1] = salt_buf01; + w1[2] = salt_buf02; + w1[3] = salt_buf03; + w2[0] = salt_buf04; + w2[1] = salt_buf05; + w2[2] = salt_buf06; + w2[3] = salt_buf07; + w3[0] = salt_buf08; + w3[1] = salt_buf09; + w3[2] = salt_buf10; w3[3] = 0; a = MD4M_A; diff --git a/OpenCL/m01100_a3.cl b/OpenCL/m01100_a3.cl index 53d62d1..0a11010 100644 --- a/OpenCL/m01100_a3.cl +++ b/OpenCL/m01100_a3.cl @@ -20,7 +20,7 @@ #include "inc_common.cl" #include "inc_simd.cl" -void m01100m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset) +void m01100m (__local salt_t s_salt_buf[1], u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -33,18 +33,17 @@ void m01100m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_r * salt */ - const u32 salt_buf0 = salt_bufs[salt_pos].salt_buf[0]; - const u32 salt_buf1 = salt_bufs[salt_pos].salt_buf[1]; - const u32 salt_buf2 = salt_bufs[salt_pos].salt_buf[2]; - const u32 salt_buf3 = salt_bufs[salt_pos].salt_buf[3]; - const u32 salt_buf4 = salt_bufs[salt_pos].salt_buf[4]; - const u32 salt_buf5 = salt_bufs[salt_pos].salt_buf[5]; - const u32 salt_buf6 = salt_bufs[salt_pos].salt_buf[6]; - const u32 salt_buf7 = salt_bufs[salt_pos].salt_buf[7]; - const u32 salt_buf8 = salt_bufs[salt_pos].salt_buf[8]; - const u32 salt_buf9 = salt_bufs[salt_pos].salt_buf[9]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; + #define salt_buf00 s_salt_buf[0].salt_buf[ 0] + #define salt_buf01 s_salt_buf[0].salt_buf[ 1] + #define salt_buf02 s_salt_buf[0].salt_buf[ 2] + #define salt_buf03 s_salt_buf[0].salt_buf[ 3] + #define salt_buf04 s_salt_buf[0].salt_buf[ 4] + #define salt_buf05 s_salt_buf[0].salt_buf[ 5] + #define salt_buf06 s_salt_buf[0].salt_buf[ 6] + #define salt_buf07 s_salt_buf[0].salt_buf[ 7] + #define salt_buf08 s_salt_buf[0].salt_buf[ 8] + #define salt_buf09 s_salt_buf[0].salt_buf[ 9] + #define salt_buf10 s_salt_buf[0].salt_buf[10] /** * base @@ -183,17 +182,17 @@ void m01100m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_r w0_t[1] = b; w0_t[2] = c; w0_t[3] = d; - w1_t[0] = salt_buf0; - w1_t[1] = salt_buf1; - w1_t[2] = salt_buf2; - w1_t[3] = salt_buf3; - w2_t[0] = salt_buf4; - w2_t[1] = salt_buf5; - w2_t[2] = salt_buf6; - w2_t[3] = salt_buf7; - w3_t[0] = salt_buf8; - w3_t[1] = salt_buf9; - w3_t[2] = (16 + salt_len) * 8; + w1_t[0] = salt_buf00; + w1_t[1] = salt_buf01; + w1_t[2] = salt_buf02; + w1_t[3] = salt_buf03; + w2_t[0] = salt_buf04; + w2_t[1] = salt_buf05; + w2_t[2] = salt_buf06; + w2_t[3] = salt_buf07; + w3_t[0] = salt_buf08; + w3_t[1] = salt_buf09; + w3_t[2] = salt_buf10; w3_t[3] = 0; a = MD4M_A; @@ -256,7 +255,7 @@ void m01100m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_r } } -void m01100s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset) +void m01100s (__local salt_t s_salt_buf[1], u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -269,18 +268,17 @@ void m01100s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_r * salt */ - const u32 salt_buf0 = salt_bufs[salt_pos].salt_buf[0]; - const u32 salt_buf1 = salt_bufs[salt_pos].salt_buf[1]; - const u32 salt_buf2 = salt_bufs[salt_pos].salt_buf[2]; - const u32 salt_buf3 = salt_bufs[salt_pos].salt_buf[3]; - const u32 salt_buf4 = salt_bufs[salt_pos].salt_buf[4]; - const u32 salt_buf5 = salt_bufs[salt_pos].salt_buf[5]; - const u32 salt_buf6 = salt_bufs[salt_pos].salt_buf[6]; - const u32 salt_buf7 = salt_bufs[salt_pos].salt_buf[7]; - const u32 salt_buf8 = salt_bufs[salt_pos].salt_buf[8]; - const u32 salt_buf9 = salt_bufs[salt_pos].salt_buf[9]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; + #define salt_buf00 s_salt_buf[0].salt_buf[ 0] + #define salt_buf01 s_salt_buf[0].salt_buf[ 1] + #define salt_buf02 s_salt_buf[0].salt_buf[ 2] + #define salt_buf03 s_salt_buf[0].salt_buf[ 3] + #define salt_buf04 s_salt_buf[0].salt_buf[ 4] + #define salt_buf05 s_salt_buf[0].salt_buf[ 5] + #define salt_buf06 s_salt_buf[0].salt_buf[ 6] + #define salt_buf07 s_salt_buf[0].salt_buf[ 7] + #define salt_buf08 s_salt_buf[0].salt_buf[ 8] + #define salt_buf09 s_salt_buf[0].salt_buf[ 9] + #define salt_buf10 s_salt_buf[0].salt_buf[10] /** * base @@ -431,17 +429,17 @@ void m01100s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_r w0_t[1] = b; w0_t[2] = c; w0_t[3] = d; - w1_t[0] = salt_buf0; - w1_t[1] = salt_buf1; - w1_t[2] = salt_buf2; - w1_t[3] = salt_buf3; - w2_t[0] = salt_buf4; - w2_t[1] = salt_buf5; - w2_t[2] = salt_buf6; - w2_t[3] = salt_buf7; - w3_t[0] = salt_buf8; - w3_t[1] = salt_buf9; - w3_t[2] = (16 + salt_len) * 8; + w1_t[0] = salt_buf00; + w1_t[1] = salt_buf01; + w1_t[2] = salt_buf02; + w1_t[3] = salt_buf03; + w2_t[0] = salt_buf04; + w2_t[1] = salt_buf05; + w2_t[2] = salt_buf06; + w2_t[3] = salt_buf07; + w3_t[0] = salt_buf08; + w3_t[1] = salt_buf09; + w3_t[2] = salt_buf10; w3_t[3] = 0; a = MD4M_A; @@ -514,8 +512,7 @@ __kernel void m01100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, */ const u32 gid = get_global_id (0); - - if (gid >= gid_max) return; + const u32 lid = get_local_id (0); u32 w[16]; @@ -538,11 +535,28 @@ __kernel void m01100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, const u32 pw_len = pws[gid].pw_len; + /** + * salt + */ + + __local salt_t s_salt_buf[1]; + + if (lid == 0) + { + s_salt_buf[0] = salt_bufs[salt_pos]; + + s_salt_buf[0].salt_buf[10] = (16 + s_salt_buf[0].salt_len) * 8; + } + + barrier (CLK_LOCAL_MEM_FENCE); + + if (gid >= gid_max) return; + /** * main */ - m01100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); + m01100m (s_salt_buf, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); } __kernel void m01100_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) @@ -552,8 +566,7 @@ __kernel void m01100_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, */ const u32 gid = get_global_id (0); - - if (gid >= gid_max) return; + const u32 lid = get_local_id (0); u32 w[16]; @@ -576,11 +589,28 @@ __kernel void m01100_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, const u32 pw_len = pws[gid].pw_len; + /** + * salt + */ + + __local salt_t s_salt_buf[1]; + + if (lid == 0) + { + s_salt_buf[0] = salt_bufs[salt_pos]; + + s_salt_buf[0].salt_buf[10] = (16 + s_salt_buf[0].salt_len) * 8; + } + + barrier (CLK_LOCAL_MEM_FENCE); + + if (gid >= gid_max) return; + /** * main */ - m01100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); + m01100m (s_salt_buf, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); } __kernel void m01100_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) @@ -594,8 +624,7 @@ __kernel void m01100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, */ const u32 gid = get_global_id (0); - - if (gid >= gid_max) return; + const u32 lid = get_local_id (0); u32 w[16]; @@ -618,11 +647,28 @@ __kernel void m01100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, const u32 pw_len = pws[gid].pw_len; + /** + * salt + */ + + __local salt_t s_salt_buf[1]; + + if (lid == 0) + { + s_salt_buf[0] = salt_bufs[salt_pos]; + + s_salt_buf[0].salt_buf[10] = (16 + s_salt_buf[0].salt_len) * 8; + } + + barrier (CLK_LOCAL_MEM_FENCE); + + if (gid >= gid_max) return; + /** * main */ - m01100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); + m01100s (s_salt_buf, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); } __kernel void m01100_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) @@ -632,8 +678,7 @@ __kernel void m01100_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, */ const u32 gid = get_global_id (0); - - if (gid >= gid_max) return; + const u32 lid = get_local_id (0); u32 w[16]; @@ -656,11 +701,28 @@ __kernel void m01100_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, const u32 pw_len = pws[gid].pw_len; + /** + * salt + */ + + __local salt_t s_salt_buf[1]; + + if (lid == 0) + { + s_salt_buf[0] = salt_bufs[salt_pos]; + + s_salt_buf[0].salt_buf[10] = (16 + s_salt_buf[0].salt_len) * 8; + } + + barrier (CLK_LOCAL_MEM_FENCE); + + if (gid >= gid_max) return; + /** * main */ - m01100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); + m01100s (s_salt_buf, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); } __kernel void m01100_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -- 2.25.1