From: jsteube Date: Mon, 9 May 2016 21:07:23 +0000 (+0200) Subject: Use real registers for salt, high-end maxwells didn't like it with global memory... X-Git-Tag: v3.00-beta~5 X-Git-Url: https://www.flypig.org.uk/git/?p=hashcat.git;a=commitdiff_plain;h=9390be8594a603f5298c8aefc5bf2ee831d5e539 Use real registers for salt, high-end maxwells didn't like it with global memory access --- diff --git a/OpenCL/m01100_a0.cl b/OpenCL/m01100_a0.cl index 081fb74..1cdce45 100644 --- a/OpenCL/m01100_a0.cl +++ b/OpenCL/m01100_a0.cl @@ -56,24 +56,18 @@ __kernel void m01100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, * salt */ - u32 salt_buf0[4]; - u32 salt_buf1[4]; - u32 salt_buf2[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_buf0 = salt_bufs[salt_pos].salt_buf[0]; + const u32 salt_buf1 = salt_bufs[salt_pos].salt_buf[1]; + const u32 salt_buf2 = salt_bufs[salt_pos].salt_buf[2]; + const u32 salt_buf3 = salt_bufs[salt_pos].salt_buf[3]; + const u32 salt_buf4 = salt_bufs[salt_pos].salt_buf[4]; + const u32 salt_buf5 = salt_bufs[salt_pos].salt_buf[5]; + const u32 salt_buf6 = salt_bufs[salt_pos].salt_buf[6]; + const u32 salt_buf7 = salt_bufs[salt_pos].salt_buf[7]; + const u32 salt_buf8 = salt_bufs[salt_pos].salt_buf[8]; + const u32 salt_buf9 = salt_bufs[salt_pos].salt_buf[9]; + + const u32 salt_len = salt_bufs[salt_pos].salt_len; /** * loop @@ -161,16 +155,16 @@ __kernel void m01100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, w0[1] = b; w0[2] = c; w0[3] = d; - w1[0] = salt_buf0[0]; - w1[1] = salt_buf0[1]; - w1[2] = salt_buf0[2]; - w1[3] = salt_buf0[3]; - w2[0] = salt_buf1[0]; - w2[1] = salt_buf1[1]; - w2[2] = salt_buf1[2]; - w2[3] = salt_buf1[3]; - w3[0] = salt_buf2[0]; - w3[1] = salt_buf2[1]; + w1[0] = salt_buf0; + w1[1] = salt_buf1; + w1[2] = salt_buf2; + w1[3] = salt_buf3; + w2[0] = salt_buf4; + w2[1] = salt_buf5; + w2[2] = salt_buf6; + w2[3] = salt_buf7; + w3[0] = salt_buf8; + w3[1] = salt_buf9; w3[2] = (16 + salt_len) * 8; w3[3] = 0; @@ -288,24 +282,18 @@ __kernel void m01100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, * salt */ - u32 salt_buf0[4]; - u32 salt_buf1[4]; - u32 salt_buf2[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_buf0 = salt_bufs[salt_pos].salt_buf[0]; + const u32 salt_buf1 = salt_bufs[salt_pos].salt_buf[1]; + const u32 salt_buf2 = salt_bufs[salt_pos].salt_buf[2]; + const u32 salt_buf3 = salt_bufs[salt_pos].salt_buf[3]; + const u32 salt_buf4 = salt_bufs[salt_pos].salt_buf[4]; + const u32 salt_buf5 = salt_bufs[salt_pos].salt_buf[5]; + const u32 salt_buf6 = salt_bufs[salt_pos].salt_buf[6]; + const u32 salt_buf7 = salt_bufs[salt_pos].salt_buf[7]; + const u32 salt_buf8 = salt_bufs[salt_pos].salt_buf[8]; + const u32 salt_buf9 = salt_bufs[salt_pos].salt_buf[9]; + + const u32 salt_len = salt_bufs[salt_pos].salt_len; /** * loop @@ -393,16 +381,16 @@ __kernel void m01100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, w0[1] = b; w0[2] = c; w0[3] = d; - w1[0] = salt_buf0[0]; - w1[1] = salt_buf0[1]; - w1[2] = salt_buf0[2]; - w1[3] = salt_buf0[3]; - w2[0] = salt_buf1[0]; - w2[1] = salt_buf1[1]; - w2[2] = salt_buf1[2]; - w2[3] = salt_buf1[3]; - w3[0] = salt_buf2[0]; - w3[1] = salt_buf2[1]; + w1[0] = salt_buf0; + w1[1] = salt_buf1; + w1[2] = salt_buf2; + w1[3] = salt_buf3; + w2[0] = salt_buf4; + w2[1] = salt_buf5; + w2[2] = salt_buf6; + w2[3] = salt_buf7; + w3[0] = salt_buf8; + w3[1] = salt_buf9; w3[2] = (16 + salt_len) * 8; w3[3] = 0; diff --git a/OpenCL/m01100_a1.cl b/OpenCL/m01100_a1.cl index 3017c31..ebde581 100644 --- a/OpenCL/m01100_a1.cl +++ b/OpenCL/m01100_a1.cl @@ -54,24 +54,18 @@ __kernel void m01100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, * salt */ - u32 salt_buf0[4]; - u32 salt_buf1[4]; - u32 salt_buf2[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_buf0 = salt_bufs[salt_pos].salt_buf[0]; + const u32 salt_buf1 = salt_bufs[salt_pos].salt_buf[1]; + const u32 salt_buf2 = salt_bufs[salt_pos].salt_buf[2]; + const u32 salt_buf3 = salt_bufs[salt_pos].salt_buf[3]; + const u32 salt_buf4 = salt_bufs[salt_pos].salt_buf[4]; + const u32 salt_buf5 = salt_bufs[salt_pos].salt_buf[5]; + const u32 salt_buf6 = salt_bufs[salt_pos].salt_buf[6]; + const u32 salt_buf7 = salt_bufs[salt_pos].salt_buf[7]; + const u32 salt_buf8 = salt_bufs[salt_pos].salt_buf[8]; + const u32 salt_buf9 = salt_bufs[salt_pos].salt_buf[9]; + + const u32 salt_len = salt_bufs[salt_pos].salt_len; /** * loop @@ -221,16 +215,16 @@ __kernel void m01100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, w0[1] = b; w0[2] = c; w0[3] = d; - w1[0] = salt_buf0[0]; - w1[1] = salt_buf0[1]; - w1[2] = salt_buf0[2]; - w1[3] = salt_buf0[3]; - w2[0] = salt_buf1[0]; - w2[1] = salt_buf1[1]; - w2[2] = salt_buf1[2]; - w2[3] = salt_buf1[3]; - w3[0] = salt_buf2[0]; - w3[1] = salt_buf2[1]; + w1[0] = salt_buf0; + w1[1] = salt_buf1; + w1[2] = salt_buf2; + w1[3] = salt_buf3; + w2[0] = salt_buf4; + w2[1] = salt_buf5; + w2[2] = salt_buf6; + w2[3] = salt_buf7; + w3[0] = salt_buf8; + w3[1] = salt_buf9; w3[2] = (16 + salt_len) * 8; w3[3] = 0; @@ -336,24 +330,18 @@ __kernel void m01100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, * salt */ - u32 salt_buf0[4]; - u32 salt_buf1[4]; - u32 salt_buf2[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_buf0 = salt_bufs[salt_pos].salt_buf[0]; + const u32 salt_buf1 = salt_bufs[salt_pos].salt_buf[1]; + const u32 salt_buf2 = salt_bufs[salt_pos].salt_buf[2]; + const u32 salt_buf3 = salt_bufs[salt_pos].salt_buf[3]; + const u32 salt_buf4 = salt_bufs[salt_pos].salt_buf[4]; + const u32 salt_buf5 = salt_bufs[salt_pos].salt_buf[5]; + const u32 salt_buf6 = salt_bufs[salt_pos].salt_buf[6]; + const u32 salt_buf7 = salt_bufs[salt_pos].salt_buf[7]; + const u32 salt_buf8 = salt_bufs[salt_pos].salt_buf[8]; + const u32 salt_buf9 = salt_bufs[salt_pos].salt_buf[9]; + + const u32 salt_len = salt_bufs[salt_pos].salt_len; /** * digest @@ -515,16 +503,16 @@ __kernel void m01100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, w0[1] = b; w0[2] = c; w0[3] = d; - w1[0] = salt_buf0[0]; - w1[1] = salt_buf0[1]; - w1[2] = salt_buf0[2]; - w1[3] = salt_buf0[3]; - w2[0] = salt_buf1[0]; - w2[1] = salt_buf1[1]; - w2[2] = salt_buf1[2]; - w2[3] = salt_buf1[3]; - w3[0] = salt_buf2[0]; - w3[1] = salt_buf2[1]; + w1[0] = salt_buf0; + w1[1] = salt_buf1; + w1[2] = salt_buf2; + w1[3] = salt_buf3; + w2[0] = salt_buf4; + w2[1] = salt_buf5; + w2[2] = salt_buf6; + w2[3] = salt_buf7; + w3[0] = salt_buf8; + w3[1] = salt_buf9; w3[2] = (16 + salt_len) * 8; w3[3] = 0; diff --git a/OpenCL/m01100_a3.cl b/OpenCL/m01100_a3.cl index 793930b..b1f010a 100644 --- a/OpenCL/m01100_a3.cl +++ b/OpenCL/m01100_a3.cl @@ -33,24 +33,18 @@ void m01100m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_r * salt */ - #define salt_buf00 salt_bufs[salt_pos].salt_buf[ 0] - #define salt_buf01 salt_bufs[salt_pos].salt_buf[ 1] - #define salt_buf02 salt_bufs[salt_pos].salt_buf[ 2] - #define salt_buf03 salt_bufs[salt_pos].salt_buf[ 3] - #define salt_buf04 salt_bufs[salt_pos].salt_buf[ 4] - #define salt_buf05 salt_bufs[salt_pos].salt_buf[ 5] - #define salt_buf06 salt_bufs[salt_pos].salt_buf[ 6] - #define salt_buf07 salt_bufs[salt_pos].salt_buf[ 7] - #define salt_buf08 salt_bufs[salt_pos].salt_buf[ 8] - #define salt_buf09 salt_bufs[salt_pos].salt_buf[ 9] - #define salt_buf10 salt_bufs[salt_pos].salt_buf[10] - #define salt_buf11 salt_bufs[salt_pos].salt_buf[11] - #define salt_buf12 salt_bufs[salt_pos].salt_buf[12] - #define salt_buf13 salt_bufs[salt_pos].salt_buf[13] - #define salt_buf14 salt_bufs[salt_pos].salt_buf[14] - #define salt_buf15 salt_bufs[salt_pos].salt_buf[15] - - #define salt_len salt_bufs[salt_pos].salt_len + const u32 salt_buf0 = salt_bufs[salt_pos].salt_buf[0]; + const u32 salt_buf1 = salt_bufs[salt_pos].salt_buf[1]; + const u32 salt_buf2 = salt_bufs[salt_pos].salt_buf[2]; + const u32 salt_buf3 = salt_bufs[salt_pos].salt_buf[3]; + const u32 salt_buf4 = salt_bufs[salt_pos].salt_buf[4]; + const u32 salt_buf5 = salt_bufs[salt_pos].salt_buf[5]; + const u32 salt_buf6 = salt_bufs[salt_pos].salt_buf[6]; + const u32 salt_buf7 = salt_bufs[salt_pos].salt_buf[7]; + const u32 salt_buf8 = salt_bufs[salt_pos].salt_buf[8]; + const u32 salt_buf9 = salt_bufs[salt_pos].salt_buf[9]; + + const u32 salt_len = salt_bufs[salt_pos].salt_len; /** * base @@ -189,16 +183,16 @@ void m01100m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_r w0_t[1] = b; w0_t[2] = c; w0_t[3] = d; - w1_t[0] = salt_buf00; - w1_t[1] = salt_buf01; - w1_t[2] = salt_buf02; - w1_t[3] = salt_buf03; - w2_t[0] = salt_buf04; - w2_t[1] = salt_buf05; - w2_t[2] = salt_buf06; - w2_t[3] = salt_buf07; - w3_t[0] = salt_buf08; - w3_t[1] = salt_buf09; + w1_t[0] = salt_buf0; + w1_t[1] = salt_buf1; + w1_t[2] = salt_buf2; + w1_t[3] = salt_buf3; + w2_t[0] = salt_buf4; + w2_t[1] = salt_buf5; + w2_t[2] = salt_buf6; + w2_t[3] = salt_buf7; + w3_t[0] = salt_buf8; + w3_t[1] = salt_buf9; w3_t[2] = (16 + salt_len) * 8; w3_t[3] = 0; @@ -275,24 +269,18 @@ void m01100s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_r * salt */ - #define salt_buf00 salt_bufs[salt_pos].salt_buf[ 0] - #define salt_buf01 salt_bufs[salt_pos].salt_buf[ 1] - #define salt_buf02 salt_bufs[salt_pos].salt_buf[ 2] - #define salt_buf03 salt_bufs[salt_pos].salt_buf[ 3] - #define salt_buf04 salt_bufs[salt_pos].salt_buf[ 4] - #define salt_buf05 salt_bufs[salt_pos].salt_buf[ 5] - #define salt_buf06 salt_bufs[salt_pos].salt_buf[ 6] - #define salt_buf07 salt_bufs[salt_pos].salt_buf[ 7] - #define salt_buf08 salt_bufs[salt_pos].salt_buf[ 8] - #define salt_buf09 salt_bufs[salt_pos].salt_buf[ 9] - #define salt_buf10 salt_bufs[salt_pos].salt_buf[10] - #define salt_buf11 salt_bufs[salt_pos].salt_buf[11] - #define salt_buf12 salt_bufs[salt_pos].salt_buf[12] - #define salt_buf13 salt_bufs[salt_pos].salt_buf[13] - #define salt_buf14 salt_bufs[salt_pos].salt_buf[14] - #define salt_buf15 salt_bufs[salt_pos].salt_buf[15] - - #define salt_len salt_bufs[salt_pos].salt_len + const u32 salt_buf0 = salt_bufs[salt_pos].salt_buf[0]; + const u32 salt_buf1 = salt_bufs[salt_pos].salt_buf[1]; + const u32 salt_buf2 = salt_bufs[salt_pos].salt_buf[2]; + const u32 salt_buf3 = salt_bufs[salt_pos].salt_buf[3]; + const u32 salt_buf4 = salt_bufs[salt_pos].salt_buf[4]; + const u32 salt_buf5 = salt_bufs[salt_pos].salt_buf[5]; + const u32 salt_buf6 = salt_bufs[salt_pos].salt_buf[6]; + const u32 salt_buf7 = salt_bufs[salt_pos].salt_buf[7]; + const u32 salt_buf8 = salt_bufs[salt_pos].salt_buf[8]; + const u32 salt_buf9 = salt_bufs[salt_pos].salt_buf[9]; + + const u32 salt_len = salt_bufs[salt_pos].salt_len; /** * base @@ -443,16 +431,16 @@ void m01100s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_r w0_t[1] = b; w0_t[2] = c; w0_t[3] = d; - w1_t[0] = salt_buf00; - w1_t[1] = salt_buf01; - w1_t[2] = salt_buf02; - w1_t[3] = salt_buf03; - w2_t[0] = salt_buf04; - w2_t[1] = salt_buf05; - w2_t[2] = salt_buf06; - w2_t[3] = salt_buf07; - w3_t[0] = salt_buf08; - w3_t[1] = salt_buf09; + w1_t[0] = salt_buf0; + w1_t[1] = salt_buf1; + w1_t[2] = salt_buf2; + w1_t[3] = salt_buf3; + w2_t[0] = salt_buf4; + w2_t[1] = salt_buf5; + w2_t[2] = salt_buf6; + w2_t[3] = salt_buf7; + w3_t[0] = salt_buf8; + w3_t[1] = salt_buf9; w3_t[2] = (16 + salt_len) * 8; w3_t[3] = 0;