X-Git-Url: https://www.flypig.org.uk/git/?a=blobdiff_plain;f=OpenCL%2Fm01420_a1.cl;h=bf55faed9580111b1685df2162edf700393f868b;hb=e6e5005a6bac731c887d30e337bd417f3cf2a43a;hp=aca8ad99c302825157ed84d1dfc0a6434d748410;hpb=b409e5e9e162927b25af88e672326b246f17ec5f;p=hashcat.git diff --git a/OpenCL/m01420_a1.cl b/OpenCL/m01420_a1.cl index aca8ad9..bf55fae 100644 --- a/OpenCL/m01420_a1.cl +++ b/OpenCL/m01420_a1.cl @@ -5,8 +5,6 @@ #define _SHA256_ -#define NEW_SIMD_CODE - #include "include/constants.h" #include "include/kernel_vendor.h" @@ -18,7 +16,9 @@ #include "include/kernel_functions.c" #include "OpenCL/types_ocl.c" #include "OpenCL/common.c" -#include "OpenCL/simd.c" + +#define COMPARE_S "OpenCL/check_single_comp4.c" +#define COMPARE_M "OpenCL/check_multi_comp4.c" __kernel void m01420_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -36,31 +36,54 @@ __kernel void m01420_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, if (gid >= gid_max) return; - u32 pws0[4] = { 0 }; - u32 pws1[4] = { 0 }; + u32 wordl0[4]; + + wordl0[0] = pws[gid].i[ 0]; + wordl0[1] = pws[gid].i[ 1]; + wordl0[2] = pws[gid].i[ 2]; + wordl0[3] = pws[gid].i[ 3]; + + u32 wordl1[4]; + + wordl1[0] = pws[gid].i[ 4]; + wordl1[1] = pws[gid].i[ 5]; + wordl1[2] = pws[gid].i[ 6]; + wordl1[3] = pws[gid].i[ 7]; - pws0[0] = pws[gid].i[0]; - pws0[1] = pws[gid].i[1]; - pws0[2] = pws[gid].i[2]; - pws0[3] = pws[gid].i[3]; - pws1[0] = pws[gid].i[4]; - pws1[1] = pws[gid].i[5]; - pws1[2] = pws[gid].i[6]; - pws1[3] = pws[gid].i[7]; + u32 wordl2[4]; + + wordl2[0] = 0; + wordl2[1] = 0; + wordl2[2] = 0; + wordl2[3] = 0; + + u32 wordl3[4]; + + wordl3[0] = 0; + wordl3[1] = 0; + wordl3[2] = 0; + wordl3[3] = 0; const u32 pw_l_len = pws[gid].pw_len; + if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) + { + switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len); + } + /** * salt */ u32 salt_buf0[4]; - u32 salt_buf1[4]; salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; + + u32 salt_buf1[4]; + salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; @@ -72,35 +95,43 @@ __kernel void m01420_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, * loop */ - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE) + for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) { - const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos); - - const u32x pw_len = pw_l_len + pw_r_len; - - u32x wordr0[4] = { 0 }; - u32x wordr1[4] = { 0 }; - u32x wordr2[4] = { 0 }; - u32x wordr3[4] = { 0 }; - - wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); - wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); - wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); - wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); - wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); - wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); - wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); - wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + const u32 pw_r_len = combs_buf[il_pos].pw_len; + + const u32 pw_len = pw_l_len + pw_r_len; + + u32 wordr0[4]; + u32 wordr1[4]; + u32 wordr2[4]; + u32 wordr3[4]; + + wordr0[0] = combs_buf[il_pos].i[0]; + wordr0[1] = combs_buf[il_pos].i[1]; + wordr0[2] = combs_buf[il_pos].i[2]; + wordr0[3] = combs_buf[il_pos].i[3]; + wordr1[0] = combs_buf[il_pos].i[4]; + wordr1[1] = combs_buf[il_pos].i[5]; + wordr1[2] = combs_buf[il_pos].i[6]; + wordr1[3] = combs_buf[il_pos].i[7]; + wordr2[0] = 0; + wordr2[1] = 0; + wordr2[2] = 0; + wordr2[3] = 0; + wordr3[0] = 0; + wordr3[1] = 0; + wordr3[2] = 0; + wordr3[3] = 0; if (combs_mode == COMBINATOR_MODE_BASE_LEFT) { switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -123,7 +154,7 @@ __kernel void m01420_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, * prepend salt */ - const u32x pw_salt_len = pw_len + salt_len; + const u32 pw_salt_len = pw_len + salt_len; switch_buffer_by_offset_le (w0, w1, w2, w3, salt_len); @@ -142,31 +173,31 @@ __kernel void m01420_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, * sha256 */ - u32x w0_t = swap32 (w0[0]); - u32x w1_t = swap32 (w0[1]); - u32x w2_t = swap32 (w0[2]); - u32x w3_t = swap32 (w0[3]); - u32x w4_t = swap32 (w1[0]); - u32x w5_t = swap32 (w1[1]); - u32x w6_t = swap32 (w1[2]); - u32x w7_t = swap32 (w1[3]); - u32x w8_t = swap32 (w2[0]); - u32x w9_t = swap32 (w2[1]); - u32x wa_t = swap32 (w2[2]); - u32x wb_t = swap32 (w2[3]); - u32x wc_t = swap32 (w3[0]); - u32x wd_t = swap32 (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; + u32 w0_t = swap32 (w0[0]); + u32 w1_t = swap32 (w0[1]); + u32 w2_t = swap32 (w0[2]); + u32 w3_t = swap32 (w0[3]); + u32 w4_t = swap32 (w1[0]); + u32 w5_t = swap32 (w1[1]); + u32 w6_t = swap32 (w1[2]); + u32 w7_t = swap32 (w1[3]); + u32 w8_t = swap32 (w2[0]); + u32 w9_t = swap32 (w2[1]); + u32 wa_t = swap32 (w2[2]); + u32 wb_t = swap32 (w2[3]); + u32 wc_t = swap32 (w3[0]); + u32 wd_t = swap32 (w3[1]); + u32 we_t = 0; + u32 wf_t = pw_salt_len * 8; + + u32 a = SHA256M_A; + u32 b = SHA256M_B; + u32 c = SHA256M_C; + u32 d = SHA256M_D; + u32 e = SHA256M_E; + u32 f = SHA256M_F; + u32 g = SHA256M_G; + u32 h = SHA256M_H; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); @@ -236,7 +267,13 @@ __kernel void m01420_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - COMPARE_M_SIMD (d, h, c, g); + + const u32 r0 = d; + const u32 r1 = h; + const u32 r2 = c; + const u32 r3 = g; + + #include COMPARE_M } } @@ -264,31 +301,54 @@ __kernel void m01420_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, if (gid >= gid_max) return; - u32 pws0[4] = { 0 }; - u32 pws1[4] = { 0 }; + u32 wordl0[4]; + + wordl0[0] = pws[gid].i[ 0]; + wordl0[1] = pws[gid].i[ 1]; + wordl0[2] = pws[gid].i[ 2]; + wordl0[3] = pws[gid].i[ 3]; + + u32 wordl1[4]; + + wordl1[0] = pws[gid].i[ 4]; + wordl1[1] = pws[gid].i[ 5]; + wordl1[2] = pws[gid].i[ 6]; + wordl1[3] = pws[gid].i[ 7]; + + u32 wordl2[4]; - pws0[0] = pws[gid].i[0]; - pws0[1] = pws[gid].i[1]; - pws0[2] = pws[gid].i[2]; - pws0[3] = pws[gid].i[3]; - pws1[0] = pws[gid].i[4]; - pws1[1] = pws[gid].i[5]; - pws1[2] = pws[gid].i[6]; - pws1[3] = pws[gid].i[7]; + wordl2[0] = 0; + wordl2[1] = 0; + wordl2[2] = 0; + wordl2[3] = 0; + + u32 wordl3[4]; + + wordl3[0] = 0; + wordl3[1] = 0; + wordl3[2] = 0; + wordl3[3] = 0; const u32 pw_l_len = pws[gid].pw_len; + if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) + { + switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len); + } + /** * salt */ u32 salt_buf0[4]; - u32 salt_buf1[4]; salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; + + u32 salt_buf1[4]; + salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; @@ -312,35 +372,43 @@ __kernel void m01420_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, * loop */ - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE) + for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) { - const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos); - - const u32x pw_len = pw_l_len + pw_r_len; - - u32x wordr0[4] = { 0 }; - u32x wordr1[4] = { 0 }; - u32x wordr2[4] = { 0 }; - u32x wordr3[4] = { 0 }; - - wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); - wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); - wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); - wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); - wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); - wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); - wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); - wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + const u32 pw_r_len = combs_buf[il_pos].pw_len; + + const u32 pw_len = pw_l_len + pw_r_len; + + u32 wordr0[4]; + u32 wordr1[4]; + u32 wordr2[4]; + u32 wordr3[4]; + + wordr0[0] = combs_buf[il_pos].i[0]; + wordr0[1] = combs_buf[il_pos].i[1]; + wordr0[2] = combs_buf[il_pos].i[2]; + wordr0[3] = combs_buf[il_pos].i[3]; + wordr1[0] = combs_buf[il_pos].i[4]; + wordr1[1] = combs_buf[il_pos].i[5]; + wordr1[2] = combs_buf[il_pos].i[6]; + wordr1[3] = combs_buf[il_pos].i[7]; + wordr2[0] = 0; + wordr2[1] = 0; + wordr2[2] = 0; + wordr2[3] = 0; + wordr3[0] = 0; + wordr3[1] = 0; + wordr3[2] = 0; + wordr3[3] = 0; if (combs_mode == COMBINATOR_MODE_BASE_LEFT) { switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -363,7 +431,7 @@ __kernel void m01420_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, * prepend salt */ - const u32x pw_salt_len = pw_len + salt_len; + const u32 pw_salt_len = pw_len + salt_len; switch_buffer_by_offset_le (w0, w1, w2, w3, salt_len); @@ -382,31 +450,31 @@ __kernel void m01420_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, * sha256 */ - u32x w0_t = swap32 (w0[0]); - u32x w1_t = swap32 (w0[1]); - u32x w2_t = swap32 (w0[2]); - u32x w3_t = swap32 (w0[3]); - u32x w4_t = swap32 (w1[0]); - u32x w5_t = swap32 (w1[1]); - u32x w6_t = swap32 (w1[2]); - u32x w7_t = swap32 (w1[3]); - u32x w8_t = swap32 (w2[0]); - u32x w9_t = swap32 (w2[1]); - u32x wa_t = swap32 (w2[2]); - u32x wb_t = swap32 (w2[3]); - u32x wc_t = swap32 (w3[0]); - u32x wd_t = swap32 (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; + u32 w0_t = swap32 (w0[0]); + u32 w1_t = swap32 (w0[1]); + u32 w2_t = swap32 (w0[2]); + u32 w3_t = swap32 (w0[3]); + u32 w4_t = swap32 (w1[0]); + u32 w5_t = swap32 (w1[1]); + u32 w6_t = swap32 (w1[2]); + u32 w7_t = swap32 (w1[3]); + u32 w8_t = swap32 (w2[0]); + u32 w9_t = swap32 (w2[1]); + u32 wa_t = swap32 (w2[2]); + u32 wb_t = swap32 (w2[3]); + u32 wc_t = swap32 (w3[0]); + u32 wd_t = swap32 (w3[1]); + u32 we_t = 0; + u32 wf_t = pw_salt_len * 8; + + u32 a = SHA256M_A; + u32 b = SHA256M_B; + u32 c = SHA256M_C; + u32 d = SHA256M_D; + u32 e = SHA256M_E; + u32 f = SHA256M_F; + u32 g = SHA256M_G; + u32 h = SHA256M_H; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); @@ -476,7 +544,13 @@ __kernel void m01420_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - COMPARE_S_SIMD (d, h, c, g); + + const u32 r0 = d; + const u32 r1 = h; + const u32 r2 = c; + const u32 r3 = g; + + #include COMPARE_S } }