X-Git-Url: https://www.flypig.org.uk/git/?a=blobdiff_plain;f=OpenCL%2Fm09100.cl;h=f12c6411f581b12afb22d938e26cc4b98cddf37c;hb=161a6eb4bc643d8e636e96eda613f5137d30da59;hp=038b43865377924af00758cf091ba3dcdcb6e3f1;hpb=0bf4e3c34a6a799ccc34f403bed70119574ca9c8;p=hashcat.git diff --git a/OpenCL/m09100.cl b/OpenCL/m09100.cl index 038b438..f12c641 100644 --- a/OpenCL/m09100.cl +++ b/OpenCL/m09100.cl @@ -1,39 +1,28 @@ /** - * Author......: Jens Steube + * Authors.....: Jens Steube + * Gabriele Gristina + * * License.....: MIT */ #define _LOTUS8_ -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" +#include "inc_vendor.cl" +#include "inc_hash_constants.h" +#include "inc_hash_functions.cl" #undef _SHA1_ -#include "types_ocl.c" -#include "common.c" - -#ifdef VECT_SIZE1 -#define COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "inc_types.cl" +#include "inc_common.cl" -#ifdef VECT_SIZE2 -#define COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "inc_comp_single.cl" +#define COMPARE_M "inc_comp_multi.cl" __constant char lotus64_table[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/"; +#define uint_to_hex_upper8(i) l_bin2asc[(i)] + __constant u32 lotus_magic_table[256] = { 0xbd, 0x56, 0xea, 0xf2, 0xa2, 0xf1, 0xac, 0x2a, @@ -70,71 +59,60 @@ __constant u32 lotus_magic_table[256] = 0x29, 0x39, 0xb9, 0xe9, 0x4c, 0xff, 0x43, 0xab, }; -#ifdef VECT_SIZE1 -#define BOX(S,i) u32 ((S)[(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(S,i) u32 ((S)[(i).s0], (S)[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define BOX(S,i) u32 ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3]) -#endif - -#ifdef VECT_SIZE1 -#define uint_to_hex_upper8(i) u32 (l_bin2asc[(i)]) +#if VECT_SIZE == 1 +#define BOX1(S,i) (S)[(i)] +#elif VECT_SIZE == 2 +#define BOX1(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1]) +#elif VECT_SIZE == 4 +#define BOX1(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3]) +#elif VECT_SIZE == 8 +#define BOX1(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7]) +#elif VECT_SIZE == 16 +#define BOX1(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7], (S)[(i).s8], (S)[(i).s9], (S)[(i).sa], (S)[(i).sb], (S)[(i).sc], (S)[(i).sd], (S)[(i).se], (S)[(i).sf]) #endif -#ifdef VECT_SIZE2 -#define uint_to_hex_upper8(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_upper8(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) -#endif - -static void lotus_mix (u32 *in, __local u32 s_lotus_magic_table[256]) +void lotus_mix (u32x *in, __local u32 *s_lotus_magic_table) { - u32 p = 0; + u32x p = 0; for (int i = 0; i < 18; i++) { u32 s = 48; - #pragma unroll 12 for (int j = 0; j < 12; j++) { - u32 tmp_in = in[j]; - u32 tmp_out = 0; + u32x tmp_in = in[j]; + u32x tmp_out = 0; - p = (p + s--) & 0xff; p = ((tmp_in >> 0) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 0; - p = (p + s--) & 0xff; p = ((tmp_in >> 8) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 8; - p = (p + s--) & 0xff; p = ((tmp_in >> 16) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 16; - p = (p + s--) & 0xff; p = ((tmp_in >> 24) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 24; + p = (p + s--) & 0xff; p = ((tmp_in >> 0) & 0xff) ^ BOX1 (s_lotus_magic_table, p); tmp_out |= p << 0; + p = (p + s--) & 0xff; p = ((tmp_in >> 8) & 0xff) ^ BOX1 (s_lotus_magic_table, p); tmp_out |= p << 8; + p = (p + s--) & 0xff; p = ((tmp_in >> 16) & 0xff) ^ BOX1 (s_lotus_magic_table, p); tmp_out |= p << 16; + p = (p + s--) & 0xff; p = ((tmp_in >> 24) & 0xff) ^ BOX1 (s_lotus_magic_table, p); tmp_out |= p << 24; in[j] = tmp_out; } } } -static void lotus_transform_password (u32 *in, u32 *out, __local u32 s_lotus_magic_table[256]) +void lotus_transform_password (u32x in[4], u32x out[4], __local u32 *s_lotus_magic_table) { - u32 t = out[3] >> 24; + u32x t = out[3] >> 24; - u32 c; + u32x c; - #pragma unroll 4 + #ifdef _unroll + #pragma unroll + #endif for (int i = 0; i < 4; i++) { - t ^= (in[i] >> 0) & 0xff; c = BOX (s_lotus_magic_table, t); out[i] ^= c << 0; t = ((out[i] >> 0) & 0xff); - t ^= (in[i] >> 8) & 0xff; c = BOX (s_lotus_magic_table, t); out[i] ^= c << 8; t = ((out[i] >> 8) & 0xff); - t ^= (in[i] >> 16) & 0xff; c = BOX (s_lotus_magic_table, t); out[i] ^= c << 16; t = ((out[i] >> 16) & 0xff); - t ^= (in[i] >> 24) & 0xff; c = BOX (s_lotus_magic_table, t); out[i] ^= c << 24; t = ((out[i] >> 24) & 0xff); + t ^= (in[i] >> 0) & 0xff; c = BOX1 (s_lotus_magic_table, t); out[i] ^= c << 0; t = ((out[i] >> 0) & 0xff); + t ^= (in[i] >> 8) & 0xff; c = BOX1 (s_lotus_magic_table, t); out[i] ^= c << 8; t = ((out[i] >> 8) & 0xff); + t ^= (in[i] >> 16) & 0xff; c = BOX1 (s_lotus_magic_table, t); out[i] ^= c << 16; t = ((out[i] >> 16) & 0xff); + t ^= (in[i] >> 24) & 0xff; c = BOX1 (s_lotus_magic_table, t); out[i] ^= c << 24; t = ((out[i] >> 24) & 0xff); } } -static void pad (u32 w[4], const u32 len) +void pad (u32 w[4], const u32 len) { const u32 val = 16 - len; @@ -213,9 +191,9 @@ static void pad (u32 w[4], const u32 len) } } -static void mdtransform_norecalc (u32 state[4], u32 block[4], __local u32 s_lotus_magic_table[256]) +void mdtransform_norecalc (u32x state[4], u32x block[4], __local u32 *s_lotus_magic_table) { - u32 x[12]; + u32x x[12]; x[ 0] = state[0]; x[ 1] = state[1]; @@ -238,23 +216,23 @@ static void mdtransform_norecalc (u32 state[4], u32 block[4], __local u32 s_lotu state[3] = x[3]; } -static void mdtransform (u32 state[4], u32 checksum[4], u32 block[4], __local u32 s_lotus_magic_table[256]) +void mdtransform (u32x state[4], u32x checksum[4], u32x block[4], __local u32 *s_lotus_magic_table) { mdtransform_norecalc (state, block, s_lotus_magic_table); lotus_transform_password (block, checksum, s_lotus_magic_table); } -static void domino_big_md (const u32 saved_key[16], const u32 size, u32 state[4], __local u32 s_lotus_magic_table[256]) +void domino_big_md (const u32x saved_key[16], const u32 size, u32x state[4], __local u32 *s_lotus_magic_table) { - u32 checksum[4]; + u32x checksum[4]; checksum[0] = 0; checksum[1] = 0; checksum[2] = 0; checksum[3] = 0; - u32 block[4]; + u32x block[4]; block[0] = 0; block[1] = 0; @@ -274,8 +252,6 @@ static void domino_big_md (const u32 saved_key[16], const u32 size, u32 state[4] mdtransform (state, checksum, block, s_lotus_magic_table); } - u32 left = size - curpos; - block[0] = saved_key[idx + 0]; block[1] = saved_key[idx + 1]; block[2] = saved_key[idx + 2]; @@ -286,7 +262,7 @@ static void domino_big_md (const u32 saved_key[16], const u32 size, u32 state[4] mdtransform_norecalc (state, checksum, s_lotus_magic_table); } -static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) +void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { u32 A = digest[0]; u32 B = digest[1]; @@ -414,7 +390,7 @@ static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], c digest[4] += E; } -static void hmac_sha1_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5]) +void hmac_sha1_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -467,7 +443,7 @@ static void hmac_sha1_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[ sha1_transform (w0, w1, w2, w3, opad); } -static void hmac_sha1_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5], u32 digest[5]) +void hmac_sha1_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5], u32 digest[5]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -503,7 +479,7 @@ static void hmac_sha1_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[ sha1_transform (w0, w1, w2, w3, digest); } -static void base64_encode (u8 *base64_hash, const u32 len, const u8 *base64_plain) +void base64_encode (u8 *base64_hash, const u32 len, const u8 *base64_plain) { u8 *out_ptr = (u8 *) base64_hash; u8 *in_ptr = (u8 *) base64_plain; @@ -527,12 +503,11 @@ static void base64_encode (u8 *base64_hash, const u32 len, const u8 *base64_plai } } -static void lotus6_base64_encode (u8 base64_hash[24], const u32 salt0, const u32 salt1, u32 a, u32 b, u32 c) +void lotus6_base64_encode (u8 base64_hash[24], const u32 salt0, const u32 salt1, u32 a, u32 b, u32 c) { uchar4 salt0c = as_uchar4 (salt0); uchar4 salt1c = as_uchar4 (salt1); - #ifdef VECT_SIZE1 uchar4 ac; uchar4 bc; uchar4 cc; @@ -540,21 +515,6 @@ static void lotus6_base64_encode (u8 base64_hash[24], const u32 salt0, const u32 ac = as_uchar4 (a); bc = as_uchar4 (b); cc = as_uchar4 (c); - #endif - - #ifdef VECT_SIZE2 - uchar4 ac[2]; - uchar4 bc[2]; - uchar4 cc[2]; - - ac[0] = as_uchar4 (a.s0); - bc[0] = as_uchar4 (b.s0); - cc[0] = as_uchar4 (c.s0); - - ac[1] = as_uchar4 (a.s1); - bc[1] = as_uchar4 (b.s1); - cc[1] = as_uchar4 (c.s1); - #endif u8 tmp[24]; // size 22 (=pw_len) is needed but base64 needs size divisible by 4 @@ -570,8 +530,6 @@ static void lotus6_base64_encode (u8 base64_hash[24], const u32 salt0, const u32 base64_plain[ 3] = salt0c.s3; base64_plain[ 3] -= -4; // dont ask! base64_plain[ 4] = salt1c.s0; - - #ifdef VECT_SIZE1 base64_plain[ 5] = ac.s0; base64_plain[ 6] = ac.s1; base64_plain[ 7] = ac.s2; @@ -612,96 +570,9 @@ static void lotus6_base64_encode (u8 base64_hash[24], const u32 salt0, const u32 base64_hash[19] = tmp[19]; base64_hash[20] = tmp[20]; base64_hash[21] = ')'; - #endif - - #ifdef VECT_SIZE2 - base64_plain[ 5] = ac[0].s0; - base64_plain[ 6] = ac[0].s1; - base64_plain[ 7] = ac[0].s2; - base64_plain[ 8] = ac[0].s3; - base64_plain[ 9] = bc[0].s0; - base64_plain[10] = bc[0].s1; - base64_plain[11] = bc[0].s2; - base64_plain[12] = bc[0].s3; - base64_plain[13] = cc[0].s0; - base64_plain[14] = cc[0].s1; - base64_plain[15] = cc[0].s2; - - /* - * base64 encode the $salt.$digest string - */ - - base64_encode (tmp + 2, 14, base64_plain); - - base64_hash[ 0].s0 = '('; - base64_hash[ 1].s0 = 'G'; - base64_hash[ 2].s0 = tmp[ 2]; - base64_hash[ 3].s0 = tmp[ 3]; - base64_hash[ 4].s0 = tmp[ 4]; - base64_hash[ 5].s0 = tmp[ 5]; - base64_hash[ 6].s0 = tmp[ 6]; - base64_hash[ 7].s0 = tmp[ 7]; - base64_hash[ 8].s0 = tmp[ 8]; - base64_hash[ 9].s0 = tmp[ 9]; - base64_hash[10].s0 = tmp[10]; - base64_hash[11].s0 = tmp[11]; - base64_hash[12].s0 = tmp[12]; - base64_hash[13].s0 = tmp[13]; - base64_hash[14].s0 = tmp[14]; - base64_hash[15].s0 = tmp[15]; - base64_hash[16].s0 = tmp[16]; - base64_hash[17].s0 = tmp[17]; - base64_hash[18].s0 = tmp[18]; - base64_hash[19].s0 = tmp[19]; - base64_hash[20].s0 = tmp[20]; - base64_hash[21].s0 = ')'; - - base64_plain[ 5] = ac[1].s0; - base64_plain[ 6] = ac[1].s1; - base64_plain[ 7] = ac[1].s2; - base64_plain[ 8] = ac[1].s3; - base64_plain[ 9] = bc[1].s0; - base64_plain[10] = bc[1].s1; - base64_plain[11] = bc[1].s2; - base64_plain[12] = bc[1].s3; - base64_plain[13] = cc[1].s0; - base64_plain[14] = cc[1].s1; - base64_plain[15] = cc[1].s2; - - /* - * base64 encode the $salt.$digest string - */ - - base64_encode (tmp + 2, 14, base64_plain); - - base64_hash[ 0].s1 = '('; - base64_hash[ 1].s1 = 'G'; - base64_hash[ 2].s1 = tmp[ 2]; - base64_hash[ 3].s1 = tmp[ 3]; - base64_hash[ 4].s1 = tmp[ 4]; - base64_hash[ 5].s1 = tmp[ 5]; - base64_hash[ 6].s1 = tmp[ 6]; - base64_hash[ 7].s1 = tmp[ 7]; - base64_hash[ 8].s1 = tmp[ 8]; - base64_hash[ 9].s1 = tmp[ 9]; - base64_hash[10].s1 = tmp[10]; - base64_hash[11].s1 = tmp[11]; - base64_hash[12].s1 = tmp[12]; - base64_hash[13].s1 = tmp[13]; - base64_hash[14].s1 = tmp[14]; - base64_hash[15].s1 = tmp[15]; - base64_hash[16].s1 = tmp[16]; - base64_hash[17].s1 = tmp[17]; - base64_hash[18].s1 = tmp[18]; - base64_hash[19].s1 = tmp[19]; - base64_hash[20].s1 = tmp[20]; - base64_hash[21].s1 = ')'; - #endif - } - -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09100_init (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global lotus8_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global wpa_t *wpa_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void m09100_init (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global lotus8_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global wpa_t *wpa_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -709,55 +580,37 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09100_init (__gl const u32 gid = get_global_id (0); const u32 lid = get_local_id (0); + const u32 lsz = get_local_size (0); /** - * bin2asc table + * sbox */ - __local u32 l_bin2asc[256]; - - const u32 lid4 = lid * 4; - - const u32 lid40 = lid4 + 0; - const u32 lid41 = lid4 + 1; - const u32 lid42 = lid4 + 2; - const u32 lid43 = lid4 + 3; - - const u32 v400 = (lid40 >> 0) & 15; - const u32 v401 = (lid40 >> 4) & 15; - const u32 v410 = (lid41 >> 0) & 15; - const u32 v411 = (lid41 >> 4) & 15; - const u32 v420 = (lid42 >> 0) & 15; - const u32 v421 = (lid42 >> 4) & 15; - const u32 v430 = (lid43 >> 0) & 15; - const u32 v431 = (lid43 >> 4) & 15; - - l_bin2asc[lid40] = ((v400 < 10) ? '0' + v400 : 'A' - 10 + v400) << 8 - | ((v401 < 10) ? '0' + v401 : 'A' - 10 + v401) << 0; - l_bin2asc[lid41] = ((v410 < 10) ? '0' + v410 : 'A' - 10 + v410) << 8 - | ((v411 < 10) ? '0' + v411 : 'A' - 10 + v411) << 0; - l_bin2asc[lid42] = ((v420 < 10) ? '0' + v420 : 'A' - 10 + v420) << 8 - | ((v421 < 10) ? '0' + v421 : 'A' - 10 + v421) << 0; - l_bin2asc[lid43] = ((v430 < 10) ? '0' + v430 : 'A' - 10 + v430) << 8 - | ((v431 < 10) ? '0' + v431 : 'A' - 10 + v431) << 0; + __local u32 s_lotus_magic_table[256]; - barrier (CLK_LOCAL_MEM_FENCE); + for (u32 i = lid; i < 256; i += lsz) + { + s_lotus_magic_table[i] = lotus_magic_table[i]; + } - /** - * sbox - */ + __local u32 l_bin2asc[256]; - __local u32 s_lotus_magic_table[256]; + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; - s_lotus_magic_table[lid4 + 0] = lotus_magic_table[lid4 + 0]; - s_lotus_magic_table[lid4 + 1] = lotus_magic_table[lid4 + 1]; - s_lotus_magic_table[lid4 + 2] = lotus_magic_table[lid4 + 2]; - s_lotus_magic_table[lid4 + 3] = lotus_magic_table[lid4 + 3]; + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'A' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 0; + } barrier (CLK_LOCAL_MEM_FENCE); if (gid >= gid_max) return; + /** + * base + */ u32 w[16]; @@ -839,7 +692,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09100_init (__gl salt_buf3[3] = 0; const u32 salt0 = salt_buf0[0]; - const u32 salt1 = salt_buf0[1] & 0xff | '(' << 8; + const u32 salt1 = (salt_buf0[1] & 0xff) | ('(' << 8); /** * Lotus 6 hash - SEC_pwddigest_V2 @@ -928,62 +781,27 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09100_init (__gl lotus6_base64_encode (base64_hash, salt_buf0[0], salt_buf0[1], a, b, c); - /** * PBKDF2 - HMACSHA1 - 1st iteration */ - #ifdef VECT_SIZE1 u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = (base64_hash[ 0] << 24) | (base64_hash[ 1] << 16) | (base64_hash[ 2] << 8) | base64_hash[ 3]; w0[1] = (base64_hash[ 4] << 24) | (base64_hash[ 5] << 16) | (base64_hash[ 6] << 8) | base64_hash[ 7]; w0[2] = (base64_hash[ 8] << 24) | (base64_hash[ 9] << 16) | (base64_hash[10] << 8) | base64_hash[11]; w0[3] = (base64_hash[12] << 24) | (base64_hash[13] << 16) | (base64_hash[14] << 8) | base64_hash[15]; - - u32 w1[4]; - w1[0] = (base64_hash[16] << 24) | (base64_hash[17] << 16) | (base64_hash[18] << 8) | base64_hash[19]; w1[1] = (base64_hash[20] << 24) | (base64_hash[21] << 16); w1[2] = 0; w1[3] = 0; - #endif - - #ifdef VECT_SIZE2 - u32 w0[4]; - - w0[0].s0 = (base64_hash[ 0].s0 << 24) | (base64_hash[ 1].s0 << 16) | (base64_hash[ 2].s0 << 8) | base64_hash[ 3].s0; - w0[1].s0 = (base64_hash[ 4].s0 << 24) | (base64_hash[ 5].s0 << 16) | (base64_hash[ 6].s0 << 8) | base64_hash[ 7].s0; - w0[2].s0 = (base64_hash[ 8].s0 << 24) | (base64_hash[ 9].s0 << 16) | (base64_hash[10].s0 << 8) | base64_hash[11].s0; - w0[3].s0 = (base64_hash[12].s0 << 24) | (base64_hash[13].s0 << 16) | (base64_hash[14].s0 << 8) | base64_hash[15].s0; - - w0[0].s1 = (base64_hash[ 0].s1 << 24) | (base64_hash[ 1].s1 << 16) | (base64_hash[ 2].s1 << 8) | base64_hash[ 3].s1; - w0[1].s1 = (base64_hash[ 4].s1 << 24) | (base64_hash[ 5].s1 << 16) | (base64_hash[ 6].s1 << 8) | base64_hash[ 7].s1; - w0[2].s1 = (base64_hash[ 8].s1 << 24) | (base64_hash[ 9].s1 << 16) | (base64_hash[10].s1 << 8) | base64_hash[11].s1; - w0[3].s1 = (base64_hash[12].s1 << 24) | (base64_hash[13].s1 << 16) | (base64_hash[14].s1 << 8) | base64_hash[15].s1; - - u32 w1[4]; - - w1[0].s0 = (base64_hash[16].s0 << 24) | (base64_hash[17].s0 << 16) | (base64_hash[18].s0 << 8) | base64_hash[19].s0; - w1[1].s0 = (base64_hash[20].s0 << 24) | (base64_hash[21].s0 << 16); - w1[2].s0 = 0; - w1[3].s0 = 0; - - w1[0].s1 = (base64_hash[16].s1 << 24) | (base64_hash[17].s1 << 16) | (base64_hash[18].s1 << 8) | base64_hash[19].s1; - w1[1].s1 = (base64_hash[20].s1 << 24) | (base64_hash[21].s1 << 16); - w1[2].s1 = 0; - w1[3].s1 = 0; - #endif - - u32 w2[4]; - w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - - u32 w3[4]; - w3[0] = 0; w3[1] = 0; w3[2] = 0; @@ -1027,21 +845,21 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09100_init (__gl w3[2] = salt_buf3[2]; //w3[3] = salt_buf3[3]; - w0[0] = swap_workaround (w0[0]); - w0[1] = swap_workaround (w0[1]); - w0[2] = swap_workaround (w0[2]); - w0[3] = swap_workaround (w0[3]); - w1[0] = swap_workaround (w1[0]); - w1[1] = swap_workaround (w1[1]); - w1[2] = swap_workaround (w1[2]); - w1[3] = swap_workaround (w1[3]); - w2[0] = swap_workaround (w2[0]); - w2[1] = swap_workaround (w2[1]); - w2[2] = swap_workaround (w2[2]); - w2[3] = swap_workaround (w2[3]); - w3[0] = swap_workaround (w3[0]); - w3[1] = swap_workaround (w3[1]); - w3[2] = swap_workaround (w3[2]); + w0[0] = swap32 (w0[0]); + w0[1] = swap32 (w0[1]); + w0[2] = swap32 (w0[2]); + w0[3] = swap32 (w0[3]); + w1[0] = swap32 (w1[0]); + w1[1] = swap32 (w1[1]); + w1[2] = swap32 (w1[2]); + w1[3] = swap32 (w1[3]); + w2[0] = swap32 (w2[0]); + w2[1] = swap32 (w2[1]); + w2[2] = swap32 (w2[2]); + w2[3] = swap32 (w2[3]); + w3[0] = swap32 (w3[0]); + w3[1] = swap32 (w3[1]); + w3[2] = swap32 (w3[2]); w3[3] = (64 + salt_len + 4) * 8; u32 dgst[5]; @@ -1061,7 +879,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09100_init (__gl tmps[gid].out[4] = dgst[4]; } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09100_loop (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global lotus8_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global wpa_t *wpa_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void m09100_loop (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global lotus8_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global wpa_t *wpa_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { const u32 gid = get_global_id (0); @@ -1143,7 +961,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09100_loop (__gl tmps[gid].out[4] = out[4]; } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09100_comp (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global lotus8_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global wpa_t *wpa_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void m09100_comp (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global lotus8_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global wpa_t *wpa_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base