2 * Author......: Jens Steube <jens.steube@gmail.com>
9 //#define NEW_SIMD_CODE
11 #include "inc_vendor.cl"
12 #include "inc_hash_constants.h"
13 #include "inc_hash_functions.cl"
14 #include "inc_types.cl"
15 #include "inc_common.cl"
18 #include "inc_simd.cl"
20 __kernel void m00200_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
26 const u32 lid = get_local_id (0);
32 const u32 gid = get_global_id (0);
34 if (gid >= gid_max) return;
39 pw_buf0[0] = pws[gid].i[0];
40 pw_buf0[1] = pws[gid].i[1];
41 pw_buf0[2] = pws[gid].i[2];
42 pw_buf0[3] = pws[gid].i[3];
43 pw_buf1[0] = pws[gid].i[4];
44 pw_buf1[1] = pws[gid].i[5];
45 pw_buf1[2] = pws[gid].i[6];
46 pw_buf1[3] = pws[gid].i[7];
48 const u32 pw_len = pws[gid].pw_len;
54 for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
61 const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
91 a ^= (((a & 0x3f) + add) * (v)) + (a << 8); \
99 for (i = 0, j = 0; i <= (int) out_len - 4; i += 4, j += 1)
101 const u32x wj = w_t[j];
103 ROUND ((wj >> 0) & 0xff);
104 ROUND ((wj >> 8) & 0xff);
105 ROUND ((wj >> 16) & 0xff);
106 ROUND ((wj >> 24) & 0xff);
109 const u32x wj = w_t[j];
111 const u32 left = out_len - i;
115 ROUND ((wj >> 0) & 0xff);
116 ROUND ((wj >> 8) & 0xff);
117 ROUND ((wj >> 16) & 0xff);
121 ROUND ((wj >> 0) & 0xff);
122 ROUND ((wj >> 8) & 0xff);
126 ROUND ((wj >> 0) & 0xff);
134 COMPARE_M_SIMD (a, b, z, z);
138 __kernel void m00200_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
142 __kernel void m00200_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
146 __kernel void m00200_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
152 const u32 lid = get_local_id (0);
158 const u32 gid = get_global_id (0);
160 if (gid >= gid_max) return;
165 pw_buf0[0] = pws[gid].i[0];
166 pw_buf0[1] = pws[gid].i[1];
167 pw_buf0[2] = pws[gid].i[2];
168 pw_buf0[3] = pws[gid].i[3];
169 pw_buf1[0] = pws[gid].i[4];
170 pw_buf1[1] = pws[gid].i[5];
171 pw_buf1[2] = pws[gid].i[6];
172 pw_buf1[3] = pws[gid].i[7];
174 const u32 pw_len = pws[gid].pw_len;
180 const u32 search[4] =
182 digests_buf[digests_offset].digest_buf[DGST_R0],
183 digests_buf[digests_offset].digest_buf[DGST_R1],
192 for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
199 const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
229 a ^= (((a & 0x3f) + add) * (v)) + (a << 8); \
237 for (i = 0, j = 0; i <= (int) out_len - 4; i += 4, j += 1)
239 const u32x wj = w_t[j];
241 ROUND ((wj >> 0) & 0xff);
242 ROUND ((wj >> 8) & 0xff);
243 ROUND ((wj >> 16) & 0xff);
244 ROUND ((wj >> 24) & 0xff);
247 const u32x wj = w_t[j];
249 const u32 left = out_len - i;
253 ROUND ((wj >> 0) & 0xff);
254 ROUND ((wj >> 8) & 0xff);
255 ROUND ((wj >> 16) & 0xff);
259 ROUND ((wj >> 0) & 0xff);
260 ROUND ((wj >> 8) & 0xff);
264 ROUND ((wj >> 0) & 0xff);
272 COMPARE_S_SIMD (a, b, z, z);
276 __kernel void m00200_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
280 __kernel void m00200_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)