2 * Author......: Jens Steube <jens.steube@gmail.com>
10 #include "include/constants.h"
11 #include "include/kernel_vendor.h"
18 #include "include/kernel_functions.c"
19 #include "OpenCL/types_ocl.c"
20 #include "OpenCL/common.c"
21 #include "OpenCL/simd.c"
23 __kernel void m00200_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
29 const u32 lid = get_local_id (0);
35 const u32 gid = get_global_id (0);
37 if (gid >= gid_max) return;
42 pws0[0] = pws[gid].i[0];
43 pws0[1] = pws[gid].i[1];
44 pws0[2] = pws[gid].i[2];
45 pws0[3] = pws[gid].i[3];
46 pws1[0] = pws[gid].i[4];
47 pws1[1] = pws[gid].i[5];
48 pws1[2] = pws[gid].i[6];
49 pws1[3] = pws[gid].i[7];
51 const u32 pw_l_len = pws[gid].pw_len;
57 for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
59 const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
61 const u32x pw_len = pw_l_len + pw_r_len;
63 u32x wordr0[4] = { 0 };
64 u32x wordr1[4] = { 0 };
65 u32x wordr2[4] = { 0 };
66 u32x wordr3[4] = { 0 };
68 wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
69 wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
70 wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
71 wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
72 wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
73 wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
74 wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
75 wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
77 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
79 switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
84 w_t[ 0] = wordl0[0] | wordr0[0];
85 w_t[ 1] = wordl0[1] | wordr0[1];
86 w_t[ 2] = wordl0[2] | wordr0[2];
87 w_t[ 3] = wordl0[3] | wordr0[3];
88 w_t[ 4] = wordl1[0] | wordr1[0];
89 w_t[ 5] = wordl1[1] | wordr1[1];
90 w_t[ 6] = wordl1[2] | wordr1[2];
91 w_t[ 7] = wordl1[3] | wordr1[3];
92 w_t[ 8] = wordl2[0] | wordr2[0];
93 w_t[ 9] = wordl2[1] | wordr2[1];
94 w_t[10] = wordl2[2] | wordr2[2];
95 w_t[11] = wordl2[3] | wordr2[3];
96 w_t[12] = wordl3[0] | wordr3[0];
97 w_t[13] = wordl3[1] | wordr3[1];
98 w_t[14] = wordl3[2] | wordr3[2];
110 a ^= (((a & 0x3f) + add) * (v)) + (a << 8); \
118 for (i = 0, j = 0; i <= (int) pw_len - 4; i += 4, j += 1)
120 const u32 wj = w_t[j];
122 ROUND ((wj >> 0) & 0xff);
123 ROUND ((wj >> 8) & 0xff);
124 ROUND ((wj >> 16) & 0xff);
125 ROUND ((wj >> 24) & 0xff);
128 const u32 wj = w_t[j];
130 const u32 left = pw_len - i;
134 ROUND ((wj >> 0) & 0xff);
135 ROUND ((wj >> 8) & 0xff);
136 ROUND ((wj >> 16) & 0xff);
140 ROUND ((wj >> 0) & 0xff);
141 ROUND ((wj >> 8) & 0xff);
145 ROUND ((wj >> 0) & 0xff);
151 COMPARE_M_SIMD (a, b, c, d);
155 __kernel void m00200_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
159 __kernel void m00200_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
163 __kernel void m00200_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
169 const u32 lid = get_local_id (0);
175 const u32 gid = get_global_id (0);
177 if (gid >= gid_max) return;
182 pws0[0] = pws[gid].i[0];
183 pws0[1] = pws[gid].i[1];
184 pws0[2] = pws[gid].i[2];
185 pws0[3] = pws[gid].i[3];
186 pws1[0] = pws[gid].i[4];
187 pws1[1] = pws[gid].i[5];
188 pws1[2] = pws[gid].i[6];
189 pws1[3] = pws[gid].i[7];
191 const u32 pw_l_len = pws[gid].pw_len;
197 const u32 search[4] =
199 digests_buf[digests_offset].digest_buf[DGST_R0],
200 digests_buf[digests_offset].digest_buf[DGST_R1],
201 digests_buf[digests_offset].digest_buf[DGST_R2],
202 digests_buf[digests_offset].digest_buf[DGST_R3]
209 for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
211 const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
213 const u32x pw_len = pw_l_len + pw_r_len;
215 u32x wordr0[4] = { 0 };
216 u32x wordr1[4] = { 0 };
217 u32x wordr2[4] = { 0 };
218 u32x wordr3[4] = { 0 };
220 wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
221 wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
222 wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
223 wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
224 wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
225 wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
226 wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
227 wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
229 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
231 switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
236 w_t[ 0] = wordl0[0] | wordr0[0];
237 w_t[ 1] = wordl0[1] | wordr0[1];
238 w_t[ 2] = wordl0[2] | wordr0[2];
239 w_t[ 3] = wordl0[3] | wordr0[3];
240 w_t[ 4] = wordl1[0] | wordr1[0];
241 w_t[ 5] = wordl1[1] | wordr1[1];
242 w_t[ 6] = wordl1[2] | wordr1[2];
243 w_t[ 7] = wordl1[3] | wordr1[3];
244 w_t[ 8] = wordl2[0] | wordr2[0];
245 w_t[ 9] = wordl2[1] | wordr2[1];
246 w_t[10] = wordl2[2] | wordr2[2];
247 w_t[11] = wordl2[3] | wordr2[3];
248 w_t[12] = wordl3[0] | wordr3[0];
249 w_t[13] = wordl3[1] | wordr3[1];
250 w_t[14] = wordl3[2] | wordr3[2];
262 a ^= (((a & 0x3f) + add) * (v)) + (a << 8); \
270 for (i = 0, j = 0; i <= (int) pw_len - 4; i += 4, j += 1)
272 const u32 wj = w_t[j];
274 ROUND ((wj >> 0) & 0xff);
275 ROUND ((wj >> 8) & 0xff);
276 ROUND ((wj >> 16) & 0xff);
277 ROUND ((wj >> 24) & 0xff);
280 const u32 wj = w_t[j];
282 const u32 left = pw_len - i;
286 ROUND ((wj >> 0) & 0xff);
287 ROUND ((wj >> 8) & 0xff);
288 ROUND ((wj >> 16) & 0xff);
292 ROUND ((wj >> 0) & 0xff);
293 ROUND ((wj >> 8) & 0xff);
297 ROUND ((wj >> 0) & 0xff);
303 COMPARE_S_SIMD (a, b, c, d);
307 __kernel void m00200_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
311 __kernel void m00200_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)