From: jsteube Date: Fri, 27 May 2016 22:25:41 +0000 (+0200) Subject: Increase PBKDF2-HMAC-SHA512 cracking performance X-Git-Tag: v3.00~126 X-Git-Url: https://www.flypig.org.uk/git/?a=commitdiff_plain;ds=sidebyside;h=072988f26c4aa80f82fc7fde1f4dfee84c8828ec;p=hashcat.git Increase PBKDF2-HMAC-SHA512 cracking performance --- diff --git a/OpenCL/inc_simd.cl b/OpenCL/inc_simd.cl index 882e320..2fc6481 100644 --- a/OpenCL/inc_simd.cl +++ b/OpenCL/inc_simd.cl @@ -1162,3 +1162,15 @@ inline u32x ix_create_combt (__global comb_t *combs_buf, const u32 il_pos, const #elif VECT_SIZE == 16 #define unpackv(arr,var,gid,idx,val) (arr)[((gid) * 16) + 0].var[(idx)] = val.s0; (arr)[((gid) * 16) + 1].var[(idx)] = val.s1; (arr)[((gid) * 16) + 2].var[(idx)] = val.s2; (arr)[((gid) * 16) + 3].var[(idx)] = val.s3; (arr)[((gid) * 16) + 4].var[(idx)] = val.s4; (arr)[((gid) * 16) + 5].var[(idx)] = val.s5; (arr)[((gid) * 16) + 6].var[(idx)] = val.s6; (arr)[((gid) * 16) + 7].var[(idx)] = val.s7; (arr)[((gid) * 16) + 8].var[(idx)] = val.s8; (arr)[((gid) * 16) + 9].var[(idx)] = val.s9; (arr)[((gid) * 16) + 10].var[(idx)] = val.sa; (arr)[((gid) * 16) + 11].var[(idx)] = val.sb; (arr)[((gid) * 16) + 12].var[(idx)] = val.sc; (arr)[((gid) * 16) + 13].var[(idx)] = val.sd; (arr)[((gid) * 16) + 14].var[(idx)] = val.se; (arr)[((gid) * 16) + 15].var[(idx)] = val.sf; #endif + +#if VECT_SIZE == 1 +#define unpackv_xor(arr,var,gid,idx,val) (arr)[((gid) * 1) + 0].var[(idx)] ^= val; +#elif VECT_SIZE == 2 +#define unpackv_xor(arr,var,gid,idx,val) (arr)[((gid) * 2) + 0].var[(idx)] ^= val.s0; (arr)[((gid) * 2) + 1].var[(idx)] ^= val.s1; +#elif VECT_SIZE == 4 +#define unpackv_xor(arr,var,gid,idx,val) (arr)[((gid) * 4) + 0].var[(idx)] ^= val.s0; (arr)[((gid) * 4) + 1].var[(idx)] ^= val.s1; (arr)[((gid) * 4) + 2].var[(idx)] ^= val.s2; (arr)[((gid) * 4) + 3].var[(idx)] ^= val.s3; +#elif VECT_SIZE == 8 +#define unpackv_xor(arr,var,gid,idx,val) (arr)[((gid) * 8) + 0].var[(idx)] ^= val.s0; (arr)[((gid) * 8) + 1].var[(idx)] ^= val.s1; (arr)[((gid) * 8) + 2].var[(idx)] ^= val.s2; (arr)[((gid) * 8) + 3].var[(idx)] ^= val.s3; (arr)[((gid) * 8) + 4].var[(idx)] ^= val.s4; (arr)[((gid) * 8) + 5].var[(idx)] ^= val.s5; (arr)[((gid) * 8) + 6].var[(idx)] ^= val.s6; (arr)[((gid) * 8) + 7].var[(idx)] ^= val.s7; +#elif VECT_SIZE == 16 +#define unpackv_xor(arr,var,gid,idx,val) (arr)[((gid) * 16) + 0].var[(idx)] ^= val.s0; (arr)[((gid) * 16) + 1].var[(idx)] ^= val.s1; (arr)[((gid) * 16) + 2].var[(idx)] ^= val.s2; (arr)[((gid) * 16) + 3].var[(idx)] ^= val.s3; (arr)[((gid) * 16) + 4].var[(idx)] ^= val.s4; (arr)[((gid) * 16) + 5].var[(idx)] ^= val.s5; (arr)[((gid) * 16) + 6].var[(idx)] ^= val.s6; (arr)[((gid) * 16) + 7].var[(idx)] ^= val.s7; (arr)[((gid) * 16) + 8].var[(idx)] ^= val.s8; (arr)[((gid) * 16) + 9].var[(idx)] ^= val.s9; (arr)[((gid) * 16) + 10].var[(idx)] ^= val.sa; (arr)[((gid) * 16) + 11].var[(idx)] ^= val.sb; (arr)[((gid) * 16) + 12].var[(idx)] ^= val.sc; (arr)[((gid) * 16) + 13].var[(idx)] ^= val.sd; (arr)[((gid) * 16) + 14].var[(idx)] ^= val.se; (arr)[((gid) * 16) + 15].var[(idx)] ^= val.sf; +#endif diff --git a/OpenCL/inc_vendor.cl b/OpenCL/inc_vendor.cl index 87a1d08..da048b1 100644 --- a/OpenCL/inc_vendor.cl +++ b/OpenCL/inc_vendor.cl @@ -92,9 +92,6 @@ #if KERN_TYPE == 6500 #undef _unroll #endif -#if KERN_TYPE == 7100 -#undef _unroll -#endif #if KERN_TYPE == 7400 #undef _unroll #endif diff --git a/OpenCL/m07100.cl b/OpenCL/m07100.cl index 6f64405..a455e59 100644 --- a/OpenCL/m07100.cl +++ b/OpenCL/m07100.cl @@ -370,6 +370,59 @@ void hmac_sha512_run_V (const u64x w1[16], const u64x ipad[8], const u64x opad[8 sha512_transform_V (w, dgst); } +void hmac_sha512_run_V_x (const u64x ipad[8], const u64x opad[8], u64x dgst[8]) +{ + u64x w[16]; + + w[ 0] = dgst[0]; + w[ 1] = dgst[1]; + w[ 2] = dgst[2]; + w[ 3] = dgst[3]; + w[ 4] = dgst[4]; + w[ 5] = dgst[5]; + w[ 6] = dgst[6]; + w[ 7] = dgst[7]; + w[ 8] = 0x8000000000000000; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = (128 + 64) * 8; + + dgst[0] = ipad[0]; + dgst[1] = ipad[1]; + dgst[2] = ipad[2]; + dgst[3] = ipad[3]; + dgst[4] = ipad[4]; + dgst[5] = ipad[5]; + dgst[6] = ipad[6]; + dgst[7] = ipad[7]; + + sha512_transform_V (w, dgst); + + w[ 0] = dgst[0]; + w[ 1] = dgst[1]; + w[ 2] = dgst[2]; + w[ 3] = dgst[3]; + w[ 4] = dgst[4]; + w[ 5] = dgst[5]; + w[ 6] = dgst[6]; + w[ 7] = dgst[7]; + + dgst[0] = opad[0]; + dgst[1] = opad[1]; + dgst[2] = opad[2]; + dgst[3] = opad[3]; + dgst[4] = opad[4]; + dgst[5] = opad[5]; + dgst[6] = opad[6]; + dgst[7] = opad[7]; + + sha512_transform_V (w, dgst); +} + void hmac_sha512_init_V (u64x w[16], u64x ipad[8], u64x opad[8]) { w[ 0] ^= 0x3636363636363636; @@ -590,7 +643,6 @@ __kernel void m07100_loop (__global pw_t *pws, __global kernel_rule_t *rules_buf for (u32 i = 0; i < 8; i += 8) { u64x dgst[8]; - u64x out[8]; dgst[0] = pack64v (tmps, dgst, gid, 0); dgst[1] = pack64v (tmps, dgst, gid, 1); @@ -601,46 +653,18 @@ __kernel void m07100_loop (__global pw_t *pws, __global kernel_rule_t *rules_buf dgst[6] = pack64v (tmps, dgst, gid, 6); dgst[7] = pack64v (tmps, dgst, gid, 7); - out[0] = pack64v (tmps, out, gid, 0); - out[1] = pack64v (tmps, out, gid, 1); - out[2] = pack64v (tmps, out, gid, 2); - out[3] = pack64v (tmps, out, gid, 3); - out[4] = pack64v (tmps, out, gid, 4); - out[5] = pack64v (tmps, out, gid, 5); - out[6] = pack64v (tmps, out, gid, 6); - out[7] = pack64v (tmps, out, gid, 7); - for (u32 j = 0; j < loop_cnt; j++) { - u64x w[16]; - - w[ 0] = dgst[0]; - w[ 1] = dgst[1]; - w[ 2] = dgst[2]; - w[ 3] = dgst[3]; - w[ 4] = dgst[4]; - w[ 5] = dgst[5]; - w[ 6] = dgst[6]; - w[ 7] = dgst[7]; - w[ 8] = 0x8000000000000000; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = (128 + 64) * 8; - - hmac_sha512_run_V (w, ipad, opad, dgst); - - out[0] ^= dgst[0]; - out[1] ^= dgst[1]; - out[2] ^= dgst[2]; - out[3] ^= dgst[3]; - out[4] ^= dgst[4]; - out[5] ^= dgst[5]; - out[6] ^= dgst[6]; - out[7] ^= dgst[7]; + hmac_sha512_run_V_x (ipad, opad, dgst); + + unpackv_xor (tmps, out, gid, 0, dgst[0]); + unpackv_xor (tmps, out, gid, 1, dgst[1]); + unpackv_xor (tmps, out, gid, 2, dgst[2]); + unpackv_xor (tmps, out, gid, 3, dgst[3]); + unpackv_xor (tmps, out, gid, 4, dgst[4]); + unpackv_xor (tmps, out, gid, 5, dgst[5]); + unpackv_xor (tmps, out, gid, 6, dgst[6]); + unpackv_xor (tmps, out, gid, 7, dgst[7]); } unpackv (tmps, dgst, gid, 0, dgst[0]); @@ -651,15 +675,6 @@ __kernel void m07100_loop (__global pw_t *pws, __global kernel_rule_t *rules_buf unpackv (tmps, dgst, gid, 5, dgst[5]); unpackv (tmps, dgst, gid, 6, dgst[6]); unpackv (tmps, dgst, gid, 7, dgst[7]); - - unpackv (tmps, out, gid, 0, out[0]); - unpackv (tmps, out, gid, 1, out[1]); - unpackv (tmps, out, gid, 2, out[2]); - unpackv (tmps, out, gid, 3, out[3]); - unpackv (tmps, out, gid, 4, out[4]); - unpackv (tmps, out, gid, 5, out[5]); - unpackv (tmps, out, gid, 6, out[6]); - unpackv (tmps, out, gid, 7, out[7]); } }