2 * Author......: Jens Steube <jens.steube@gmail.com>
9 //#define NEW_SIMD_CODE
11 #include "inc_vendor.cl"
12 #include "inc_hash_constants.h"
13 #include "inc_hash_functions.cl"
14 #include "inc_types.cl"
15 #include "inc_common.cl"
16 #include "inc_simd.cl"
18 __kernel void m00200_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
24 const u32 lid = get_local_id (0);
30 const u32 gid = get_global_id (0);
32 if (gid >= gid_max) return;
37 pw_buf0[0] = pws[gid].i[0];
38 pw_buf0[1] = pws[gid].i[1];
39 pw_buf0[2] = pws[gid].i[2];
40 pw_buf0[3] = pws[gid].i[3];
41 pw_buf1[0] = pws[gid].i[4];
42 pw_buf1[1] = pws[gid].i[5];
43 pw_buf1[2] = pws[gid].i[6];
44 pw_buf1[3] = pws[gid].i[7];
46 const u32 pw_l_len = pws[gid].pw_len;
52 for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
54 const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
56 const u32x pw_len = pw_l_len + pw_r_len;
59 * concat password candidate
62 u32x wordl0[4] = { 0 };
63 u32x wordl1[4] = { 0 };
64 u32x wordl2[4] = { 0 };
65 u32x wordl3[4] = { 0 };
67 wordl0[0] = pw_buf0[0];
68 wordl0[1] = pw_buf0[1];
69 wordl0[2] = pw_buf0[2];
70 wordl0[3] = pw_buf0[3];
71 wordl1[0] = pw_buf1[0];
72 wordl1[1] = pw_buf1[1];
73 wordl1[2] = pw_buf1[2];
74 wordl1[3] = pw_buf1[3];
76 u32x wordr0[4] = { 0 };
77 u32x wordr1[4] = { 0 };
78 u32x wordr2[4] = { 0 };
79 u32x wordr3[4] = { 0 };
81 wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
82 wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
83 wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
84 wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
85 wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
86 wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
87 wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
88 wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
90 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
92 switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
96 switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
104 w0[0] = wordl0[0] | wordr0[0];
105 w0[1] = wordl0[1] | wordr0[1];
106 w0[2] = wordl0[2] | wordr0[2];
107 w0[3] = wordl0[3] | wordr0[3];
108 w1[0] = wordl1[0] | wordr1[0];
109 w1[1] = wordl1[1] | wordr1[1];
110 w1[2] = wordl1[2] | wordr1[2];
111 w1[3] = wordl1[3] | wordr1[3];
112 w2[0] = wordl2[0] | wordr2[0];
113 w2[1] = wordl2[1] | wordr2[1];
114 w2[2] = wordl2[2] | wordr2[2];
115 w2[3] = wordl2[3] | wordr2[3];
116 w3[0] = wordl3[0] | wordr3[0];
117 w3[1] = wordl3[1] | wordr3[1];
151 a ^= (((a & 0x3f) + add) * (v)) + (a << 8); \
159 for (i = 0, j = 0; i <= (int) pw_len - 4; i += 4, j += 1)
161 const u32x wj = w_t[j];
163 ROUND ((wj >> 0) & 0xff);
164 ROUND ((wj >> 8) & 0xff);
165 ROUND ((wj >> 16) & 0xff);
166 ROUND ((wj >> 24) & 0xff);
169 const u32x wj = w_t[j];
171 const u32 left = pw_len - i;
175 ROUND ((wj >> 0) & 0xff);
176 ROUND ((wj >> 8) & 0xff);
177 ROUND ((wj >> 16) & 0xff);
181 ROUND ((wj >> 0) & 0xff);
182 ROUND ((wj >> 8) & 0xff);
186 ROUND ((wj >> 0) & 0xff);
194 COMPARE_M_SIMD (a, b, z, z);
198 __kernel void m00200_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
202 __kernel void m00200_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
206 __kernel void m00200_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
212 const u32 lid = get_local_id (0);
218 const u32 gid = get_global_id (0);
220 if (gid >= gid_max) return;
225 pw_buf0[0] = pws[gid].i[0];
226 pw_buf0[1] = pws[gid].i[1];
227 pw_buf0[2] = pws[gid].i[2];
228 pw_buf0[3] = pws[gid].i[3];
229 pw_buf1[0] = pws[gid].i[4];
230 pw_buf1[1] = pws[gid].i[5];
231 pw_buf1[2] = pws[gid].i[6];
232 pw_buf1[3] = pws[gid].i[7];
234 const u32 pw_l_len = pws[gid].pw_len;
240 const u32 search[4] =
242 digests_buf[digests_offset].digest_buf[DGST_R0],
243 digests_buf[digests_offset].digest_buf[DGST_R1],
252 for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
254 const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
256 const u32x pw_len = pw_l_len + pw_r_len;
259 * concat password candidate
262 u32x wordl0[4] = { 0 };
263 u32x wordl1[4] = { 0 };
264 u32x wordl2[4] = { 0 };
265 u32x wordl3[4] = { 0 };
267 wordl0[0] = pw_buf0[0];
268 wordl0[1] = pw_buf0[1];
269 wordl0[2] = pw_buf0[2];
270 wordl0[3] = pw_buf0[3];
271 wordl1[0] = pw_buf1[0];
272 wordl1[1] = pw_buf1[1];
273 wordl1[2] = pw_buf1[2];
274 wordl1[3] = pw_buf1[3];
276 u32x wordr0[4] = { 0 };
277 u32x wordr1[4] = { 0 };
278 u32x wordr2[4] = { 0 };
279 u32x wordr3[4] = { 0 };
281 wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
282 wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
283 wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
284 wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
285 wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
286 wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
287 wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
288 wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
290 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
292 switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
296 switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
304 w0[0] = wordl0[0] | wordr0[0];
305 w0[1] = wordl0[1] | wordr0[1];
306 w0[2] = wordl0[2] | wordr0[2];
307 w0[3] = wordl0[3] | wordr0[3];
308 w1[0] = wordl1[0] | wordr1[0];
309 w1[1] = wordl1[1] | wordr1[1];
310 w1[2] = wordl1[2] | wordr1[2];
311 w1[3] = wordl1[3] | wordr1[3];
312 w2[0] = wordl2[0] | wordr2[0];
313 w2[1] = wordl2[1] | wordr2[1];
314 w2[2] = wordl2[2] | wordr2[2];
315 w2[3] = wordl2[3] | wordr2[3];
316 w3[0] = wordl3[0] | wordr3[0];
317 w3[1] = wordl3[1] | wordr3[1];
353 a ^= (((a & 0x3f) + add) * (v)) + (a << 8); \
361 for (i = 0, j = 0; i <= (int) pw_len - 4; i += 4, j += 1)
363 const u32x wj = w_t[j];
365 ROUND ((wj >> 0) & 0xff);
366 ROUND ((wj >> 8) & 0xff);
367 ROUND ((wj >> 16) & 0xff);
368 ROUND ((wj >> 24) & 0xff);
371 const u32x wj = w_t[j];
373 const u32 left = pw_len - i;
377 ROUND ((wj >> 0) & 0xff);
378 ROUND ((wj >> 8) & 0xff);
379 ROUND ((wj >> 16) & 0xff);
383 ROUND ((wj >> 0) & 0xff);
384 ROUND ((wj >> 8) & 0xff);
388 ROUND ((wj >> 0) & 0xff);
396 COMPARE_S_SIMD (a, b, z, z);
400 __kernel void m00200_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
404 __kernel void m00200_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)