2 * Author......: Jens Steube <jens.steube@gmail.com>
9 //#define NEW_SIMD_CODE
11 #include "include/constants.h"
12 #include "include/kernel_vendor.h"
19 #include "include/kernel_functions.c"
20 #include "OpenCL/types_ocl.c"
21 #include "OpenCL/common.c"
22 #include "OpenCL/simd.c"
24 __kernel void m00200_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
30 const u32 lid = get_local_id (0);
36 const u32 gid = get_global_id (0);
38 if (gid >= gid_max) return;
43 pw_buf0[0] = pws[gid].i[0];
44 pw_buf0[1] = pws[gid].i[1];
45 pw_buf0[2] = pws[gid].i[2];
46 pw_buf0[3] = pws[gid].i[3];
47 pw_buf1[0] = pws[gid].i[4];
48 pw_buf1[1] = pws[gid].i[5];
49 pw_buf1[2] = pws[gid].i[6];
50 pw_buf1[3] = pws[gid].i[7];
52 const u32 pw_l_len = pws[gid].pw_len;
58 for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
60 const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
62 const u32x pw_len = pw_l_len + pw_r_len;
65 * concat password candidate
68 u32x wordl0[4] = { 0 };
69 u32x wordl1[4] = { 0 };
70 u32x wordl2[4] = { 0 };
71 u32x wordl3[4] = { 0 };
73 wordl0[0] = pw_buf0[0];
74 wordl0[1] = pw_buf0[1];
75 wordl0[2] = pw_buf0[2];
76 wordl0[3] = pw_buf0[3];
77 wordl1[0] = pw_buf1[0];
78 wordl1[1] = pw_buf1[1];
79 wordl1[2] = pw_buf1[2];
80 wordl1[3] = pw_buf1[3];
82 u32x wordr0[4] = { 0 };
83 u32x wordr1[4] = { 0 };
84 u32x wordr2[4] = { 0 };
85 u32x wordr3[4] = { 0 };
87 wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
88 wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
89 wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
90 wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
91 wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
92 wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
93 wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
94 wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
96 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
98 switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
102 switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
110 w0[0] = wordl0[0] | wordr0[0];
111 w0[1] = wordl0[1] | wordr0[1];
112 w0[2] = wordl0[2] | wordr0[2];
113 w0[3] = wordl0[3] | wordr0[3];
114 w1[0] = wordl1[0] | wordr1[0];
115 w1[1] = wordl1[1] | wordr1[1];
116 w1[2] = wordl1[2] | wordr1[2];
117 w1[3] = wordl1[3] | wordr1[3];
118 w2[0] = wordl2[0] | wordr2[0];
119 w2[1] = wordl2[1] | wordr2[1];
120 w2[2] = wordl2[2] | wordr2[2];
121 w2[3] = wordl2[3] | wordr2[3];
122 w3[0] = wordl3[0] | wordr3[0];
123 w3[1] = wordl3[1] | wordr3[1];
159 a ^= (((a & 0x3f) + add) * (v)) + (a << 8); \
167 for (i = 0, j = 0; i <= (int) pw_len - 4; i += 4, j += 1)
169 const u32x wj = w_t[j];
171 ROUND ((wj >> 0) & 0xff);
172 ROUND ((wj >> 8) & 0xff);
173 ROUND ((wj >> 16) & 0xff);
174 ROUND ((wj >> 24) & 0xff);
177 const u32x wj = w_t[j];
179 const u32 left = pw_len - i;
183 ROUND ((wj >> 0) & 0xff);
184 ROUND ((wj >> 8) & 0xff);
185 ROUND ((wj >> 16) & 0xff);
189 ROUND ((wj >> 0) & 0xff);
190 ROUND ((wj >> 8) & 0xff);
194 ROUND ((wj >> 0) & 0xff);
200 COMPARE_M_SIMD (a, b, c, d);
204 __kernel void m00200_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
208 __kernel void m00200_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
212 __kernel void m00200_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
218 const u32 lid = get_local_id (0);
224 const u32 gid = get_global_id (0);
226 if (gid >= gid_max) return;
231 pw_buf0[0] = pws[gid].i[0];
232 pw_buf0[1] = pws[gid].i[1];
233 pw_buf0[2] = pws[gid].i[2];
234 pw_buf0[3] = pws[gid].i[3];
235 pw_buf1[0] = pws[gid].i[4];
236 pw_buf1[1] = pws[gid].i[5];
237 pw_buf1[2] = pws[gid].i[6];
238 pw_buf1[3] = pws[gid].i[7];
240 const u32 pw_l_len = pws[gid].pw_len;
246 const u32 search[4] =
248 digests_buf[digests_offset].digest_buf[DGST_R0],
249 digests_buf[digests_offset].digest_buf[DGST_R1],
250 digests_buf[digests_offset].digest_buf[DGST_R2],
251 digests_buf[digests_offset].digest_buf[DGST_R3]
258 for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
260 const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
262 const u32x pw_len = pw_l_len + pw_r_len;
265 * concat password candidate
268 u32x wordl0[4] = { 0 };
269 u32x wordl1[4] = { 0 };
270 u32x wordl2[4] = { 0 };
271 u32x wordl3[4] = { 0 };
273 wordl0[0] = pw_buf0[0];
274 wordl0[1] = pw_buf0[1];
275 wordl0[2] = pw_buf0[2];
276 wordl0[3] = pw_buf0[3];
277 wordl1[0] = pw_buf1[0];
278 wordl1[1] = pw_buf1[1];
279 wordl1[2] = pw_buf1[2];
280 wordl1[3] = pw_buf1[3];
282 u32x wordr0[4] = { 0 };
283 u32x wordr1[4] = { 0 };
284 u32x wordr2[4] = { 0 };
285 u32x wordr3[4] = { 0 };
287 wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
288 wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
289 wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
290 wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
291 wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
292 wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
293 wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
294 wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
296 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
298 switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
302 switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
310 w0[0] = wordl0[0] | wordr0[0];
311 w0[1] = wordl0[1] | wordr0[1];
312 w0[2] = wordl0[2] | wordr0[2];
313 w0[3] = wordl0[3] | wordr0[3];
314 w1[0] = wordl1[0] | wordr1[0];
315 w1[1] = wordl1[1] | wordr1[1];
316 w1[2] = wordl1[2] | wordr1[2];
317 w1[3] = wordl1[3] | wordr1[3];
318 w2[0] = wordl2[0] | wordr2[0];
319 w2[1] = wordl2[1] | wordr2[1];
320 w2[2] = wordl2[2] | wordr2[2];
321 w2[3] = wordl2[3] | wordr2[3];
322 w3[0] = wordl3[0] | wordr3[0];
323 w3[1] = wordl3[1] | wordr3[1];
359 a ^= (((a & 0x3f) + add) * (v)) + (a << 8); \
367 for (i = 0, j = 0; i <= (int) pw_len - 4; i += 4, j += 1)
369 const u32x wj = w_t[j];
371 ROUND ((wj >> 0) & 0xff);
372 ROUND ((wj >> 8) & 0xff);
373 ROUND ((wj >> 16) & 0xff);
374 ROUND ((wj >> 24) & 0xff);
377 const u32x wj = w_t[j];
379 const u32 left = pw_len - i;
383 ROUND ((wj >> 0) & 0xff);
384 ROUND ((wj >> 8) & 0xff);
385 ROUND ((wj >> 16) & 0xff);
389 ROUND ((wj >> 0) & 0xff);
390 ROUND ((wj >> 8) & 0xff);
394 ROUND ((wj >> 0) & 0xff);
400 COMPARE_S_SIMD (a, b, c, d);
404 __kernel void m00200_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
408 __kernel void m00200_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)