2 * Author......: Jens Steube <jens.steube@gmail.com>
10 #include "include/constants.h"
11 #include "include/kernel_vendor.h"
18 #include "include/kernel_functions.c"
19 #include "OpenCL/types_ocl.c"
20 #include "OpenCL/common.c"
21 #include "include/rp_kernel.h"
22 #include "OpenCL/rp.c"
23 #include "OpenCL/simd.c"
25 __kernel void m04900_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
31 const u32 lid = get_local_id (0);
37 const u32 gid = get_global_id (0);
39 if (gid >= gid_max) return;
43 pw_buf0[0] = pws[gid].i[ 0];
44 pw_buf0[1] = pws[gid].i[ 1];
45 pw_buf0[2] = pws[gid].i[ 2];
46 pw_buf0[3] = pws[gid].i[ 3];
50 pw_buf1[0] = pws[gid].i[ 4];
51 pw_buf1[1] = pws[gid].i[ 5];
52 pw_buf1[2] = pws[gid].i[ 6];
53 pw_buf1[3] = pws[gid].i[ 7];
55 const u32 pw_len = pws[gid].pw_len;
63 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
64 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
65 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
66 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
70 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
71 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
72 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
73 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
89 const u32 salt_len = salt_bufs[salt_pos].salt_len;
95 for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
100 w0_t[1] = pw_buf0[1];
101 w0_t[2] = pw_buf0[2];
102 w0_t[3] = pw_buf0[3];
106 w1_t[0] = pw_buf1[0];
107 w1_t[1] = pw_buf1[1];
108 w1_t[2] = pw_buf1[2];
109 w1_t[3] = pw_buf1[3];
125 const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
131 switch_buffer_by_offset_le (w0_t, w1_t, w2_t, w3_t, salt_len);
133 w0_t[0] |= salt_buf0[0];
134 w0_t[1] |= salt_buf0[1];
135 w0_t[2] |= salt_buf0[2];
136 w0_t[3] |= salt_buf0[3];
137 w1_t[0] |= salt_buf1[0];
138 w1_t[1] |= salt_buf1[1];
139 w1_t[2] |= salt_buf1[2];
140 w1_t[3] |= salt_buf1[3];
141 w2_t[0] |= salt_buf2[0];
142 w2_t[1] |= salt_buf2[1];
143 w2_t[2] |= salt_buf2[2];
144 w2_t[3] |= salt_buf2[3];
145 w3_t[0] |= salt_buf3[0];
146 w3_t[1] |= salt_buf3[1];
147 w3_t[2] |= salt_buf3[2];
148 w3_t[3] |= salt_buf3[3];
156 s0[0] = salt_buf0[0];
157 s0[1] = salt_buf0[1];
158 s0[2] = salt_buf0[2];
159 s0[3] = salt_buf0[3];
163 s1[0] = salt_buf1[0];
164 s1[1] = salt_buf1[1];
165 s1[2] = salt_buf1[2];
166 s1[3] = salt_buf1[3];
182 switch_buffer_by_offset_le_VV (s0, s1, s2, s3, salt_len + out_len);
201 const u32x pw_salt_len = salt_len + out_len + salt_len;
203 append_0x80_4x4_VV (w0_t, w1_t, w2_t, w3_t, pw_salt_len);
205 u32x w0 = swap32 (w0_t[0]);
206 u32x w1 = swap32 (w0_t[1]);
207 u32x w2 = swap32 (w0_t[2]);
208 u32x w3 = swap32 (w0_t[3]);
209 u32x w4 = swap32 (w1_t[0]);
210 u32x w5 = swap32 (w1_t[1]);
211 u32x w6 = swap32 (w1_t[2]);
212 u32x w7 = swap32 (w1_t[3]);
213 u32x w8 = swap32 (w2_t[0]);
214 u32x w9 = swap32 (w2_t[1]);
215 u32x wa = swap32 (w2_t[2]);
216 u32x wb = swap32 (w2_t[3]);
217 u32x wc = swap32 (w3_t[0]);
218 u32x wd = swap32 (w3_t[1]);
220 u32x wf = pw_salt_len * 8;
235 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0);
236 SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1);
237 SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2);
238 SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3);
239 SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4);
240 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5);
241 SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6);
242 SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7);
243 SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8);
244 SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9);
245 SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa);
246 SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb);
247 SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc);
248 SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd);
249 SHA1_STEP (SHA1_F0o, b, c, d, e, a, we);
250 SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf);
251 w0 = rotl32 ((wd ^ w8 ^ w2 ^ w0), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0);
252 w1 = rotl32 ((we ^ w9 ^ w3 ^ w1), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1);
253 w2 = rotl32 ((wf ^ wa ^ w4 ^ w2), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2);
254 w3 = rotl32 ((w0 ^ wb ^ w5 ^ w3), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3);
259 w4 = rotl32 ((w1 ^ wc ^ w6 ^ w4), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4);
260 w5 = rotl32 ((w2 ^ wd ^ w7 ^ w5), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5);
261 w6 = rotl32 ((w3 ^ we ^ w8 ^ w6), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6);
262 w7 = rotl32 ((w4 ^ wf ^ w9 ^ w7), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7);
263 w8 = rotl32 ((w5 ^ w0 ^ wa ^ w8), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8);
264 w9 = rotl32 ((w6 ^ w1 ^ wb ^ w9), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9);
265 wa = rotl32 ((w7 ^ w2 ^ wc ^ wa), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa);
266 wb = rotl32 ((w8 ^ w3 ^ wd ^ wb), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb);
267 wc = rotl32 ((w9 ^ w4 ^ we ^ wc), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc);
268 wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd);
269 we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we);
270 wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf);
271 w0 = rotl32 ((wd ^ w8 ^ w2 ^ w0), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0);
272 w1 = rotl32 ((we ^ w9 ^ w3 ^ w1), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1);
273 w2 = rotl32 ((wf ^ wa ^ w4 ^ w2), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2);
274 w3 = rotl32 ((w0 ^ wb ^ w5 ^ w3), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3);
275 w4 = rotl32 ((w1 ^ wc ^ w6 ^ w4), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4);
276 w5 = rotl32 ((w2 ^ wd ^ w7 ^ w5), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5);
277 w6 = rotl32 ((w3 ^ we ^ w8 ^ w6), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6);
278 w7 = rotl32 ((w4 ^ wf ^ w9 ^ w7), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7);
283 w8 = rotl32 ((w5 ^ w0 ^ wa ^ w8), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8);
284 w9 = rotl32 ((w6 ^ w1 ^ wb ^ w9), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9);
285 wa = rotl32 ((w7 ^ w2 ^ wc ^ wa), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa);
286 wb = rotl32 ((w8 ^ w3 ^ wd ^ wb), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb);
287 wc = rotl32 ((w9 ^ w4 ^ we ^ wc), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc);
288 wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd);
289 we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we);
290 wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf);
291 w0 = rotl32 ((wd ^ w8 ^ w2 ^ w0), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0);
292 w1 = rotl32 ((we ^ w9 ^ w3 ^ w1), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1);
293 w2 = rotl32 ((wf ^ wa ^ w4 ^ w2), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2);
294 w3 = rotl32 ((w0 ^ wb ^ w5 ^ w3), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3);
295 w4 = rotl32 ((w1 ^ wc ^ w6 ^ w4), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4);
296 w5 = rotl32 ((w2 ^ wd ^ w7 ^ w5), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5);
297 w6 = rotl32 ((w3 ^ we ^ w8 ^ w6), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6);
298 w7 = rotl32 ((w4 ^ wf ^ w9 ^ w7), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7);
299 w8 = rotl32 ((w5 ^ w0 ^ wa ^ w8), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8);
300 w9 = rotl32 ((w6 ^ w1 ^ wb ^ w9), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9);
301 wa = rotl32 ((w7 ^ w2 ^ wc ^ wa), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa);
302 wb = rotl32 ((w8 ^ w3 ^ wd ^ wb), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb);
307 wc = rotl32 ((w9 ^ w4 ^ we ^ wc), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc);
308 wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd);
309 we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we);
310 wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf);
311 w0 = rotl32 ((wd ^ w8 ^ w2 ^ w0), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0);
312 w1 = rotl32 ((we ^ w9 ^ w3 ^ w1), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1);
313 w2 = rotl32 ((wf ^ wa ^ w4 ^ w2), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2);
314 w3 = rotl32 ((w0 ^ wb ^ w5 ^ w3), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3);
315 w4 = rotl32 ((w1 ^ wc ^ w6 ^ w4), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4);
316 w5 = rotl32 ((w2 ^ wd ^ w7 ^ w5), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5);
317 w6 = rotl32 ((w3 ^ we ^ w8 ^ w6), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6);
318 w7 = rotl32 ((w4 ^ wf ^ w9 ^ w7), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7);
319 w8 = rotl32 ((w5 ^ w0 ^ wa ^ w8), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8);
320 w9 = rotl32 ((w6 ^ w1 ^ wb ^ w9), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9);
321 wa = rotl32 ((w7 ^ w2 ^ wc ^ wa), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa);
322 wb = rotl32 ((w8 ^ w3 ^ wd ^ wb), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb);
323 wc = rotl32 ((w9 ^ w4 ^ we ^ wc), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc);
324 wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd);
325 we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we);
326 wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf);
328 COMPARE_M_SIMD (d, e, c, b);
332 __kernel void m04900_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
336 __kernel void m04900_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
340 __kernel void m04900_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
346 const u32 lid = get_local_id (0);
352 const u32 gid = get_global_id (0);
354 if (gid >= gid_max) return;
358 pw_buf0[0] = pws[gid].i[ 0];
359 pw_buf0[1] = pws[gid].i[ 1];
360 pw_buf0[2] = pws[gid].i[ 2];
361 pw_buf0[3] = pws[gid].i[ 3];
365 pw_buf1[0] = pws[gid].i[ 4];
366 pw_buf1[1] = pws[gid].i[ 5];
367 pw_buf1[2] = pws[gid].i[ 6];
368 pw_buf1[3] = pws[gid].i[ 7];
370 const u32 pw_len = pws[gid].pw_len;
378 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
379 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
380 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
381 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
385 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
386 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
387 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
388 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
404 const u32 salt_len = salt_bufs[salt_pos].salt_len;
410 const u32 search[4] =
412 digests_buf[digests_offset].digest_buf[DGST_R0],
413 digests_buf[digests_offset].digest_buf[DGST_R1],
414 digests_buf[digests_offset].digest_buf[DGST_R2],
415 digests_buf[digests_offset].digest_buf[DGST_R3]
422 const u32 e_rev = rotl32_S (search[1], 2u);
428 for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
432 w0_t[0] = pw_buf0[0];
433 w0_t[1] = pw_buf0[1];
434 w0_t[2] = pw_buf0[2];
435 w0_t[3] = pw_buf0[3];
439 w1_t[0] = pw_buf1[0];
440 w1_t[1] = pw_buf1[1];
441 w1_t[2] = pw_buf1[2];
442 w1_t[3] = pw_buf1[3];
458 const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
464 switch_buffer_by_offset_le (w0_t, w1_t, w2_t, w3_t, salt_len);
466 w0_t[0] |= salt_buf0[0];
467 w0_t[1] |= salt_buf0[1];
468 w0_t[2] |= salt_buf0[2];
469 w0_t[3] |= salt_buf0[3];
470 w1_t[0] |= salt_buf1[0];
471 w1_t[1] |= salt_buf1[1];
472 w1_t[2] |= salt_buf1[2];
473 w1_t[3] |= salt_buf1[3];
474 w2_t[0] |= salt_buf2[0];
475 w2_t[1] |= salt_buf2[1];
476 w2_t[2] |= salt_buf2[2];
477 w2_t[3] |= salt_buf2[3];
478 w3_t[0] |= salt_buf3[0];
479 w3_t[1] |= salt_buf3[1];
480 w3_t[2] |= salt_buf3[2];
481 w3_t[3] |= salt_buf3[3];
489 s0[0] = salt_buf0[0];
490 s0[1] = salt_buf0[1];
491 s0[2] = salt_buf0[2];
492 s0[3] = salt_buf0[3];
496 s1[0] = salt_buf1[0];
497 s1[1] = salt_buf1[1];
498 s1[2] = salt_buf1[2];
499 s1[3] = salt_buf1[3];
515 switch_buffer_by_offset_le_VV (s0, s1, s2, s3, salt_len + out_len);
534 const u32x pw_salt_len = salt_len + out_len + salt_len;
536 append_0x80_4x4_VV (w0_t, w1_t, w2_t, w3_t, pw_salt_len);
538 u32x w0 = swap32 (w0_t[0]);
539 u32x w1 = swap32 (w0_t[1]);
540 u32x w2 = swap32 (w0_t[2]);
541 u32x w3 = swap32 (w0_t[3]);
542 u32x w4 = swap32 (w1_t[0]);
543 u32x w5 = swap32 (w1_t[1]);
544 u32x w6 = swap32 (w1_t[2]);
545 u32x w7 = swap32 (w1_t[3]);
546 u32x w8 = swap32 (w2_t[0]);
547 u32x w9 = swap32 (w2_t[1]);
548 u32x wa = swap32 (w2_t[2]);
549 u32x wb = swap32 (w2_t[3]);
550 u32x wc = swap32 (w3_t[0]);
551 u32x wd = swap32 (w3_t[1]);
553 u32x wf = pw_salt_len * 8;
568 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0);
569 SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1);
570 SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2);
571 SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3);
572 SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4);
573 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5);
574 SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6);
575 SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7);
576 SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8);
577 SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9);
578 SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa);
579 SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb);
580 SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc);
581 SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd);
582 SHA1_STEP (SHA1_F0o, b, c, d, e, a, we);
583 SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf);
584 w0 = rotl32 ((wd ^ w8 ^ w2 ^ w0), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0);
585 w1 = rotl32 ((we ^ w9 ^ w3 ^ w1), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1);
586 w2 = rotl32 ((wf ^ wa ^ w4 ^ w2), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2);
587 w3 = rotl32 ((w0 ^ wb ^ w5 ^ w3), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3);
592 w4 = rotl32 ((w1 ^ wc ^ w6 ^ w4), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4);
593 w5 = rotl32 ((w2 ^ wd ^ w7 ^ w5), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5);
594 w6 = rotl32 ((w3 ^ we ^ w8 ^ w6), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6);
595 w7 = rotl32 ((w4 ^ wf ^ w9 ^ w7), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7);
596 w8 = rotl32 ((w5 ^ w0 ^ wa ^ w8), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8);
597 w9 = rotl32 ((w6 ^ w1 ^ wb ^ w9), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9);
598 wa = rotl32 ((w7 ^ w2 ^ wc ^ wa), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa);
599 wb = rotl32 ((w8 ^ w3 ^ wd ^ wb), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb);
600 wc = rotl32 ((w9 ^ w4 ^ we ^ wc), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc);
601 wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd);
602 we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we);
603 wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf);
604 w0 = rotl32 ((wd ^ w8 ^ w2 ^ w0), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0);
605 w1 = rotl32 ((we ^ w9 ^ w3 ^ w1), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1);
606 w2 = rotl32 ((wf ^ wa ^ w4 ^ w2), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2);
607 w3 = rotl32 ((w0 ^ wb ^ w5 ^ w3), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3);
608 w4 = rotl32 ((w1 ^ wc ^ w6 ^ w4), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4);
609 w5 = rotl32 ((w2 ^ wd ^ w7 ^ w5), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5);
610 w6 = rotl32 ((w3 ^ we ^ w8 ^ w6), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6);
611 w7 = rotl32 ((w4 ^ wf ^ w9 ^ w7), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7);
616 w8 = rotl32 ((w5 ^ w0 ^ wa ^ w8), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8);
617 w9 = rotl32 ((w6 ^ w1 ^ wb ^ w9), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9);
618 wa = rotl32 ((w7 ^ w2 ^ wc ^ wa), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa);
619 wb = rotl32 ((w8 ^ w3 ^ wd ^ wb), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb);
620 wc = rotl32 ((w9 ^ w4 ^ we ^ wc), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc);
621 wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd);
622 we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we);
623 wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf);
624 w0 = rotl32 ((wd ^ w8 ^ w2 ^ w0), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0);
625 w1 = rotl32 ((we ^ w9 ^ w3 ^ w1), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1);
626 w2 = rotl32 ((wf ^ wa ^ w4 ^ w2), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2);
627 w3 = rotl32 ((w0 ^ wb ^ w5 ^ w3), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3);
628 w4 = rotl32 ((w1 ^ wc ^ w6 ^ w4), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4);
629 w5 = rotl32 ((w2 ^ wd ^ w7 ^ w5), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5);
630 w6 = rotl32 ((w3 ^ we ^ w8 ^ w6), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6);
631 w7 = rotl32 ((w4 ^ wf ^ w9 ^ w7), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7);
632 w8 = rotl32 ((w5 ^ w0 ^ wa ^ w8), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8);
633 w9 = rotl32 ((w6 ^ w1 ^ wb ^ w9), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9);
634 wa = rotl32 ((w7 ^ w2 ^ wc ^ wa), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa);
635 wb = rotl32 ((w8 ^ w3 ^ wd ^ wb), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb);
640 wc = rotl32 ((w9 ^ w4 ^ we ^ wc), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc);
641 wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd);
642 we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we);
643 wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf);
644 w0 = rotl32 ((wd ^ w8 ^ w2 ^ w0), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0);
645 w1 = rotl32 ((we ^ w9 ^ w3 ^ w1), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1);
646 w2 = rotl32 ((wf ^ wa ^ w4 ^ w2), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2);
647 w3 = rotl32 ((w0 ^ wb ^ w5 ^ w3), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3);
648 w4 = rotl32 ((w1 ^ wc ^ w6 ^ w4), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4);
649 w5 = rotl32 ((w2 ^ wd ^ w7 ^ w5), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5);
650 w6 = rotl32 ((w3 ^ we ^ w8 ^ w6), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6);
651 w7 = rotl32 ((w4 ^ wf ^ w9 ^ w7), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7);
652 w8 = rotl32 ((w5 ^ w0 ^ wa ^ w8), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8);
653 w9 = rotl32 ((w6 ^ w1 ^ wb ^ w9), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9);
654 wa = rotl32 ((w7 ^ w2 ^ wc ^ wa), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa);
655 wb = rotl32 ((w8 ^ w3 ^ wd ^ wb), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb);
657 if (MATCHES_NONE_VS (e, e_rev)) continue;
659 wc = rotl32 ((w9 ^ w4 ^ we ^ wc), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc);
660 wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd);
661 we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we);
662 wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf);
664 COMPARE_S_SIMD (d, e, c, b);
668 __kernel void m04900_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
672 __kernel void m04900_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)