2 * Author......: Jens Steube <jens.steube@gmail.com>
10 #include "include/constants.h"
11 #include "include/kernel_vendor.h"
18 #include "include/kernel_functions.c"
19 #include "OpenCL/types_ocl.c"
20 #include "OpenCL/common.c"
21 #include "OpenCL/simd.c"
23 __kernel void m04900_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
29 const u32 lid = get_local_id (0);
35 const u32 gid = get_global_id (0);
37 if (gid >= gid_max) return;
42 pw_buf0[0] = pws[gid].i[0];
43 pw_buf0[1] = pws[gid].i[1];
44 pw_buf0[2] = pws[gid].i[2];
45 pw_buf0[3] = pws[gid].i[3];
46 pw_buf1[0] = pws[gid].i[4];
47 pw_buf1[1] = pws[gid].i[5];
48 pw_buf1[2] = pws[gid].i[6];
49 pw_buf1[3] = pws[gid].i[7];
51 const u32 pw_l_len = pws[gid].pw_len;
62 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
63 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
64 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
65 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
66 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
67 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
68 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
69 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
70 salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
71 salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9];
72 salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10];
73 salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11];
74 salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12];
75 salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13];
76 salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14];
77 salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15];
79 const u32 salt_len = salt_bufs[salt_pos].salt_len;
85 for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
87 const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
89 const u32x pw_len = pw_l_len + pw_r_len;
92 * concat password candidate
95 u32x wordl0[4] = { 0 };
96 u32x wordl1[4] = { 0 };
97 u32x wordl2[4] = { 0 };
98 u32x wordl3[4] = { 0 };
100 wordl0[0] = pw_buf0[0];
101 wordl0[1] = pw_buf0[1];
102 wordl0[2] = pw_buf0[2];
103 wordl0[3] = pw_buf0[3];
104 wordl1[0] = pw_buf1[0];
105 wordl1[1] = pw_buf1[1];
106 wordl1[2] = pw_buf1[2];
107 wordl1[3] = pw_buf1[3];
109 u32x wordr0[4] = { 0 };
110 u32x wordr1[4] = { 0 };
111 u32x wordr2[4] = { 0 };
112 u32x wordr3[4] = { 0 };
114 wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
115 wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
116 wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
117 wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
118 wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
119 wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
120 wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
121 wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
123 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
125 switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
129 switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
137 w0[0] = wordl0[0] | wordr0[0];
138 w0[1] = wordl0[1] | wordr0[1];
139 w0[2] = wordl0[2] | wordr0[2];
140 w0[3] = wordl0[3] | wordr0[3];
141 w1[0] = wordl1[0] | wordr1[0];
142 w1[1] = wordl1[1] | wordr1[1];
143 w1[2] = wordl1[2] | wordr1[2];
144 w1[3] = wordl1[3] | wordr1[3];
145 w2[0] = wordl2[0] | wordr2[0];
146 w2[1] = wordl2[1] | wordr2[1];
147 w2[2] = wordl2[2] | wordr2[2];
148 w2[3] = wordl2[3] | wordr2[3];
149 w3[0] = wordl3[0] | wordr3[0];
150 w3[1] = wordl3[1] | wordr3[1];
151 w3[2] = wordl3[2] | wordr3[2];
152 w3[3] = wordl3[3] | wordr3[3];
158 switch_buffer_by_offset_le (w0, w1, w2, w3, salt_len);
160 const u32x pw_salt_len = pw_len + salt_len;
162 w0[0] |= salt_buf0[0];
163 w0[1] |= salt_buf0[1];
164 w0[2] |= salt_buf0[2];
165 w0[3] |= salt_buf0[3];
166 w1[0] |= salt_buf1[0];
167 w1[1] |= salt_buf1[1];
168 w1[2] |= salt_buf1[2];
169 w1[3] |= salt_buf1[3];
170 w2[0] |= salt_buf2[0];
171 w2[1] |= salt_buf2[1];
172 w2[2] |= salt_buf2[2];
173 w2[3] |= salt_buf2[3];
174 w3[0] |= salt_buf3[0];
175 w3[1] |= salt_buf3[1];
176 w3[2] |= salt_buf3[2];
177 w3[3] |= salt_buf3[3];
188 s0[0] = salt_buf0[0];
189 s0[1] = salt_buf0[1];
190 s0[2] = salt_buf0[2];
191 s0[3] = salt_buf0[3];
192 s1[0] = salt_buf1[0];
193 s1[1] = salt_buf1[1];
194 s1[2] = salt_buf1[2];
195 s1[3] = salt_buf1[3];
196 s2[0] = salt_buf2[0];
197 s2[1] = salt_buf2[1];
198 s2[2] = salt_buf2[2];
199 s2[3] = salt_buf2[3];
200 s3[0] = salt_buf3[0];
201 s3[1] = salt_buf3[1];
202 s3[2] = salt_buf3[2];
203 s3[3] = salt_buf3[3];
205 switch_buffer_by_offset_le_VV (s0, s1, s2, s3, pw_salt_len);
224 const u32x salt_pw_salt_len = salt_len + pw_len + salt_len;
226 append_0x80_4x4_VV (w0, w1, w2, w3, salt_pw_salt_len);
232 u32x w0_t = swap32 (w0[0]);
233 u32x w1_t = swap32 (w0[1]);
234 u32x w2_t = swap32 (w0[2]);
235 u32x w3_t = swap32 (w0[3]);
236 u32x w4_t = swap32 (w1[0]);
237 u32x w5_t = swap32 (w1[1]);
238 u32x w6_t = swap32 (w1[2]);
239 u32x w7_t = swap32 (w1[3]);
240 u32x w8_t = swap32 (w2[0]);
241 u32x w9_t = swap32 (w2[1]);
242 u32x wa_t = swap32 (w2[2]);
243 u32x wb_t = swap32 (w2[3]);
244 u32x wc_t = swap32 (w3[0]);
245 u32x wd_t = swap32 (w3[1]);
247 u32x wf_t = salt_pw_salt_len * 8;
258 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t);
259 SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t);
260 SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t);
261 SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t);
262 SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t);
263 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t);
264 SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t);
265 SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t);
266 SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t);
267 SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t);
268 SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t);
269 SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t);
270 SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t);
271 SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t);
272 SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t);
273 SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t);
274 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t);
275 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t);
276 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t);
277 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t);
282 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t);
283 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t);
284 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t);
285 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t);
286 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t);
287 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t);
288 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t);
289 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t);
290 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t);
291 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t);
292 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t);
293 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t);
294 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t);
295 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t);
296 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t);
297 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t);
298 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t);
299 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t);
300 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t);
301 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t);
306 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t);
307 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t);
308 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t);
309 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t);
310 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t);
311 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t);
312 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t);
313 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t);
314 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t);
315 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t);
316 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t);
317 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t);
318 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t);
319 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t);
320 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t);
321 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t);
322 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t);
323 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t);
324 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t);
325 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t);
330 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t);
331 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t);
332 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t);
333 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t);
334 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t);
335 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t);
336 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t);
337 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t);
338 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t);
339 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t);
340 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t);
341 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t);
342 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t);
343 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t);
344 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t);
345 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t);
346 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t);
347 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t);
348 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
349 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
351 COMPARE_M_SIMD (d, e, c, b);
355 __kernel void m04900_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
359 __kernel void m04900_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
363 __kernel void m04900_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
369 const u32 lid = get_local_id (0);
375 const u32 gid = get_global_id (0);
377 if (gid >= gid_max) return;
382 pw_buf0[0] = pws[gid].i[0];
383 pw_buf0[1] = pws[gid].i[1];
384 pw_buf0[2] = pws[gid].i[2];
385 pw_buf0[3] = pws[gid].i[3];
386 pw_buf1[0] = pws[gid].i[4];
387 pw_buf1[1] = pws[gid].i[5];
388 pw_buf1[2] = pws[gid].i[6];
389 pw_buf1[3] = pws[gid].i[7];
391 const u32 pw_l_len = pws[gid].pw_len;
402 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
403 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
404 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
405 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
406 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
407 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
408 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
409 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
410 salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
411 salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9];
412 salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10];
413 salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11];
414 salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12];
415 salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13];
416 salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14];
417 salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15];
419 const u32 salt_len = salt_bufs[salt_pos].salt_len;
425 const u32 search[4] =
427 digests_buf[digests_offset].digest_buf[DGST_R0],
428 digests_buf[digests_offset].digest_buf[DGST_R1],
429 digests_buf[digests_offset].digest_buf[DGST_R2],
430 digests_buf[digests_offset].digest_buf[DGST_R3]
437 const u32 e_rev = rotl32_S (search[1], 2u);
443 for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
445 const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
447 const u32x pw_len = pw_l_len + pw_r_len;
450 * concat password candidate
453 u32x wordl0[4] = { 0 };
454 u32x wordl1[4] = { 0 };
455 u32x wordl2[4] = { 0 };
456 u32x wordl3[4] = { 0 };
458 wordl0[0] = pw_buf0[0];
459 wordl0[1] = pw_buf0[1];
460 wordl0[2] = pw_buf0[2];
461 wordl0[3] = pw_buf0[3];
462 wordl1[0] = pw_buf1[0];
463 wordl1[1] = pw_buf1[1];
464 wordl1[2] = pw_buf1[2];
465 wordl1[3] = pw_buf1[3];
467 u32x wordr0[4] = { 0 };
468 u32x wordr1[4] = { 0 };
469 u32x wordr2[4] = { 0 };
470 u32x wordr3[4] = { 0 };
472 wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
473 wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
474 wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
475 wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
476 wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
477 wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
478 wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
479 wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
481 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
483 switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
487 switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
495 w0[0] = wordl0[0] | wordr0[0];
496 w0[1] = wordl0[1] | wordr0[1];
497 w0[2] = wordl0[2] | wordr0[2];
498 w0[3] = wordl0[3] | wordr0[3];
499 w1[0] = wordl1[0] | wordr1[0];
500 w1[1] = wordl1[1] | wordr1[1];
501 w1[2] = wordl1[2] | wordr1[2];
502 w1[3] = wordl1[3] | wordr1[3];
503 w2[0] = wordl2[0] | wordr2[0];
504 w2[1] = wordl2[1] | wordr2[1];
505 w2[2] = wordl2[2] | wordr2[2];
506 w2[3] = wordl2[3] | wordr2[3];
507 w3[0] = wordl3[0] | wordr3[0];
508 w3[1] = wordl3[1] | wordr3[1];
509 w3[2] = wordl3[2] | wordr3[2];
510 w3[3] = wordl3[3] | wordr3[3];
516 switch_buffer_by_offset_le (w0, w1, w2, w3, salt_len);
518 const u32x pw_salt_len = pw_len + salt_len;
520 w0[0] |= salt_buf0[0];
521 w0[1] |= salt_buf0[1];
522 w0[2] |= salt_buf0[2];
523 w0[3] |= salt_buf0[3];
524 w1[0] |= salt_buf1[0];
525 w1[1] |= salt_buf1[1];
526 w1[2] |= salt_buf1[2];
527 w1[3] |= salt_buf1[3];
528 w2[0] |= salt_buf2[0];
529 w2[1] |= salt_buf2[1];
530 w2[2] |= salt_buf2[2];
531 w2[3] |= salt_buf2[3];
532 w3[0] |= salt_buf3[0];
533 w3[1] |= salt_buf3[1];
534 w3[2] |= salt_buf3[2];
535 w3[3] |= salt_buf3[3];
546 s0[0] = salt_buf0[0];
547 s0[1] = salt_buf0[1];
548 s0[2] = salt_buf0[2];
549 s0[3] = salt_buf0[3];
550 s1[0] = salt_buf1[0];
551 s1[1] = salt_buf1[1];
552 s1[2] = salt_buf1[2];
553 s1[3] = salt_buf1[3];
554 s2[0] = salt_buf2[0];
555 s2[1] = salt_buf2[1];
556 s2[2] = salt_buf2[2];
557 s2[3] = salt_buf2[3];
558 s3[0] = salt_buf3[0];
559 s3[1] = salt_buf3[1];
560 s3[2] = salt_buf3[2];
561 s3[3] = salt_buf3[3];
563 switch_buffer_by_offset_le_VV (s0, s1, s2, s3, pw_salt_len);
582 const u32x salt_pw_salt_len = salt_len + pw_len + salt_len;
584 append_0x80_4x4_VV (w0, w1, w2, w3, salt_pw_salt_len);
590 u32x w0_t = swap32 (w0[0]);
591 u32x w1_t = swap32 (w0[1]);
592 u32x w2_t = swap32 (w0[2]);
593 u32x w3_t = swap32 (w0[3]);
594 u32x w4_t = swap32 (w1[0]);
595 u32x w5_t = swap32 (w1[1]);
596 u32x w6_t = swap32 (w1[2]);
597 u32x w7_t = swap32 (w1[3]);
598 u32x w8_t = swap32 (w2[0]);
599 u32x w9_t = swap32 (w2[1]);
600 u32x wa_t = swap32 (w2[2]);
601 u32x wb_t = swap32 (w2[3]);
602 u32x wc_t = swap32 (w3[0]);
603 u32x wd_t = swap32 (w3[1]);
605 u32x wf_t = salt_pw_salt_len * 8;
616 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t);
617 SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t);
618 SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t);
619 SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t);
620 SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t);
621 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t);
622 SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t);
623 SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t);
624 SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t);
625 SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t);
626 SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t);
627 SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t);
628 SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t);
629 SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t);
630 SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t);
631 SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t);
632 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t);
633 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t);
634 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t);
635 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t);
640 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t);
641 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t);
642 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t);
643 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t);
644 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t);
645 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t);
646 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t);
647 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t);
648 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t);
649 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t);
650 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t);
651 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t);
652 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t);
653 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t);
654 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t);
655 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t);
656 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t);
657 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t);
658 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t);
659 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t);
664 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t);
665 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t);
666 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t);
667 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t);
668 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t);
669 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t);
670 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t);
671 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t);
672 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t);
673 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t);
674 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t);
675 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t);
676 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t);
677 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t);
678 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t);
679 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t);
680 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t);
681 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t);
682 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t);
683 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t);
688 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t);
689 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t);
690 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t);
691 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t);
692 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t);
693 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t);
694 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t);
695 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t);
696 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t);
697 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t);
698 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t);
699 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t);
700 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t);
701 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t);
702 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t);
703 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t);
705 if (MATCHES_NONE_VS (e, e_rev)) continue;
707 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t);
708 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t);
709 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
710 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
712 COMPARE_S_SIMD (d, e, c, b);
716 __kernel void m04900_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
720 __kernel void m04900_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)