/** * Author......: Jens Steube * License.....: MIT */ #define _MYSQL323_ #define NEW_SIMD_CODE #include "include/constants.h" #include "include/kernel_vendor.h" #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" #include "OpenCL/types_ocl.c" #include "OpenCL/common.c" #include "OpenCL/simd.c" static void m00200m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier */ const u32 gid = get_global_id (0); const u32 lid = get_local_id (0); /** * loop */ u32 w0l = w[0]; for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) { const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; const u32x w0 = w0l | w0r; u32x w_t[16]; w_t[ 0] = w0; w_t[ 1] = w[ 1]; w_t[ 2] = w[ 2]; w_t[ 3] = w[ 3]; w_t[ 4] = w[ 4]; w_t[ 5] = w[ 5]; w_t[ 6] = w[ 6]; w_t[ 7] = w[ 7]; w_t[ 8] = w[ 8]; w_t[ 9] = w[ 9]; w_t[10] = w[10]; w_t[11] = w[11]; w_t[12] = w[12]; w_t[13] = w[13]; w_t[14] = w[14]; w_t[15] = w[15]; u32x a = MYSQL323_A; u32x b = MYSQL323_B; u32x c = 0; u32x d = 0; u32x add = 7; #define ROUND(v) \ { \ a ^= (((a & 0x3f) + add) * (v)) + (a << 8); \ b += (b << 8) ^ a; \ add += v; \ } int i; int j; for (i = 0, j = 0; i <= (int) pw_len - 4; i += 4, j += 1) { const u32x wj = w_t[j]; ROUND ((wj >> 0) & 0xff); ROUND ((wj >> 8) & 0xff); ROUND ((wj >> 16) & 0xff); ROUND ((wj >> 24) & 0xff); } const u32x wj = w_t[j]; const u32 left = pw_len - i; if (left == 3) { ROUND ((wj >> 0) & 0xff); ROUND ((wj >> 8) & 0xff); ROUND ((wj >> 16) & 0xff); } else if (left == 2) { ROUND ((wj >> 0) & 0xff); ROUND ((wj >> 8) & 0xff); } else if (left == 1) { ROUND ((wj >> 0) & 0xff); } a &= 0x7fffffff; b &= 0x7fffffff; COMPARE_M_SIMD (a, b, c, d); } } static void m00200s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier */ const u32 gid = get_global_id (0); const u32 lid = get_local_id (0); /** * digest */ const u32 search[4] = { digests_buf[digests_offset].digest_buf[DGST_R0], digests_buf[digests_offset].digest_buf[DGST_R1], digests_buf[digests_offset].digest_buf[DGST_R2], digests_buf[digests_offset].digest_buf[DGST_R3] }; /** * loop */ u32 w0l = w[0]; for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) { const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; const u32x w0 = w0l | w0r; u32x w_t[16]; w_t[ 0] = w0; w_t[ 1] = w[ 1]; w_t[ 2] = w[ 2]; w_t[ 3] = w[ 3]; w_t[ 4] = w[ 4]; w_t[ 5] = w[ 5]; w_t[ 6] = w[ 6]; w_t[ 7] = w[ 7]; w_t[ 8] = w[ 8]; w_t[ 9] = w[ 9]; w_t[10] = w[10]; w_t[11] = w[11]; w_t[12] = w[12]; w_t[13] = w[13]; w_t[14] = w[14]; w_t[15] = w[15]; u32x a = MYSQL323_A; u32x b = MYSQL323_B; u32x c = 0; u32x d = 0; u32x add = 7; #define ROUND(v) \ { \ a ^= (((a & 0x3f) + add) * (v)) + (a << 8); \ b += (b << 8) ^ a; \ add += v; \ } int i; int j; for (i = 0, j = 0; i <= (int) pw_len - 4; i += 4, j += 1) { const u32x wj = w_t[j]; ROUND ((wj >> 0) & 0xff); ROUND ((wj >> 8) & 0xff); ROUND ((wj >> 16) & 0xff); ROUND ((wj >> 24) & 0xff); } const u32x wj = w_t[j]; const u32 left = pw_len - i; if (left == 3) { ROUND ((wj >> 0) & 0xff); ROUND ((wj >> 8) & 0xff); ROUND ((wj >> 16) & 0xff); } else if (left == 2) { ROUND ((wj >> 0) & 0xff); ROUND ((wj >> 8) & 0xff); } else if (left == 1) { ROUND ((wj >> 0) & 0xff); } a &= 0x7fffffff; b &= 0x7fffffff; COMPARE_S_SIMD (a, b, c, d); } } __kernel void m00200_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base */ const u32 gid = get_global_id (0); if (gid >= gid_max) return; u32 w[16]; w[ 0] = pws[gid].i[ 0]; w[ 1] = pws[gid].i[ 1]; w[ 2] = pws[gid].i[ 2]; w[ 3] = pws[gid].i[ 3]; w[ 4] = 0; w[ 5] = 0; w[ 6] = 0; w[ 7] = 0; w[ 8] = 0; w[ 9] = 0; w[10] = 0; w[11] = 0; w[12] = 0; w[13] = 0; w[14] = 0; w[15] = 0; const u32 pw_len = pws[gid].pw_len; /** * main */ m00200m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); } __kernel void m00200_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base */ const u32 gid = get_global_id (0); if (gid >= gid_max) return; u32 w[16]; w[ 0] = pws[gid].i[ 0]; w[ 1] = pws[gid].i[ 1]; w[ 2] = pws[gid].i[ 2]; w[ 3] = pws[gid].i[ 3]; w[ 4] = pws[gid].i[ 4]; w[ 5] = pws[gid].i[ 5]; w[ 6] = pws[gid].i[ 6]; w[ 7] = pws[gid].i[ 7]; w[ 8] = 0; w[ 9] = 0; w[10] = 0; w[11] = 0; w[12] = 0; w[13] = 0; w[14] = 0; w[15] = 0; const u32 pw_len = pws[gid].pw_len; /** * main */ m00200m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); } __kernel void m00200_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base */ const u32 gid = get_global_id (0); if (gid >= gid_max) return; u32 w[16]; w[ 0] = pws[gid].i[ 0]; w[ 1] = pws[gid].i[ 1]; w[ 2] = pws[gid].i[ 2]; w[ 3] = pws[gid].i[ 3]; w[ 4] = pws[gid].i[ 4]; w[ 5] = pws[gid].i[ 5]; w[ 6] = pws[gid].i[ 6]; w[ 7] = pws[gid].i[ 7]; w[ 8] = pws[gid].i[ 8]; w[ 9] = pws[gid].i[ 9]; w[10] = pws[gid].i[10]; w[11] = pws[gid].i[11]; w[12] = pws[gid].i[12]; w[13] = pws[gid].i[13]; w[14] = pws[gid].i[14]; w[15] = pws[gid].i[15]; const u32 pw_len = pws[gid].pw_len; /** * main */ m00200m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); } __kernel void m00200_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base */ const u32 gid = get_global_id (0); if (gid >= gid_max) return; u32 w[16]; w[ 0] = pws[gid].i[ 0]; w[ 1] = pws[gid].i[ 1]; w[ 2] = pws[gid].i[ 2]; w[ 3] = pws[gid].i[ 3]; w[ 4] = 0; w[ 5] = 0; w[ 6] = 0; w[ 7] = 0; w[ 8] = 0; w[ 9] = 0; w[10] = 0; w[11] = 0; w[12] = 0; w[13] = 0; w[14] = 0; w[15] = 0; const u32 pw_len = pws[gid].pw_len; /** * main */ m00200s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); } __kernel void m00200_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base */ const u32 gid = get_global_id (0); if (gid >= gid_max) return; u32 w[16]; w[ 0] = pws[gid].i[ 0]; w[ 1] = pws[gid].i[ 1]; w[ 2] = pws[gid].i[ 2]; w[ 3] = pws[gid].i[ 3]; w[ 4] = pws[gid].i[ 4]; w[ 5] = pws[gid].i[ 5]; w[ 6] = pws[gid].i[ 6]; w[ 7] = pws[gid].i[ 7]; w[ 8] = 0; w[ 9] = 0; w[10] = 0; w[11] = 0; w[12] = 0; w[13] = 0; w[14] = 0; w[15] = 0; const u32 pw_len = pws[gid].pw_len; /** * main */ m00200s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); } __kernel void m00200_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base */ const u32 gid = get_global_id (0); if (gid >= gid_max) return; u32 w[16]; w[ 0] = pws[gid].i[ 0]; w[ 1] = pws[gid].i[ 1]; w[ 2] = pws[gid].i[ 2]; w[ 3] = pws[gid].i[ 3]; w[ 4] = pws[gid].i[ 4]; w[ 5] = pws[gid].i[ 5]; w[ 6] = pws[gid].i[ 6]; w[ 7] = pws[gid].i[ 7]; w[ 8] = pws[gid].i[ 8]; w[ 9] = pws[gid].i[ 9]; w[10] = pws[gid].i[10]; w[11] = pws[gid].i[11]; w[12] = pws[gid].i[12]; w[13] = pws[gid].i[13]; w[14] = pws[gid].i[14]; w[15] = pws[gid].i[15]; const u32 pw_len = pws[gid].pw_len; /** * main */ m00200s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); }