2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
24 #include "include/kernel_functions.c"
26 #include "common_nv.c"
27 #include "include/rp_gpu.h"
31 #define VECT_COMPARE_S "check_single_vect1_comp4.c"
32 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
36 #define VECT_COMPARE_S "check_single_vect2_comp4.c"
37 #define VECT_COMPARE_M "check_multi_vect2_comp4.c"
40 __device__ __constant__ gpu_rule_t c_rules[1024];
42 extern "C" __global__ void __launch_bounds__ (256, 1) m04900_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
48 const u32 lid = threadIdx.x;
54 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
56 if (gid >= gid_max) return;
60 pw_buf0[0] = pws[gid].i[ 0];
61 pw_buf0[1] = pws[gid].i[ 1];
62 pw_buf0[2] = pws[gid].i[ 2];
63 pw_buf0[3] = pws[gid].i[ 3];
67 pw_buf1[0] = pws[gid].i[ 4];
68 pw_buf1[1] = pws[gid].i[ 5];
69 pw_buf1[2] = pws[gid].i[ 6];
70 pw_buf1[3] = pws[gid].i[ 7];
72 const u32 pw_len = pws[gid].pw_len;
80 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
81 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
82 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
83 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
87 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
88 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
89 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
90 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
106 const u32 salt_len = salt_bufs[salt_pos].salt_len;
112 for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++)
116 w0_t[0] = pw_buf0[0];
117 w0_t[1] = pw_buf0[1];
118 w0_t[2] = pw_buf0[2];
119 w0_t[3] = pw_buf0[3];
123 w1_t[0] = pw_buf1[0];
124 w1_t[1] = pw_buf1[1];
125 w1_t[2] = pw_buf1[2];
126 w1_t[3] = pw_buf1[3];
142 const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0_t, w1_t, pw_len);
148 switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
150 w0_t[0] |= salt_buf0[0];
151 w0_t[1] |= salt_buf0[1];
152 w0_t[2] |= salt_buf0[2];
153 w0_t[3] |= salt_buf0[3];
154 w1_t[0] |= salt_buf1[0];
155 w1_t[1] |= salt_buf1[1];
156 w1_t[2] |= salt_buf1[2];
157 w1_t[3] |= salt_buf1[3];
158 w2_t[0] |= salt_buf2[0];
159 w2_t[1] |= salt_buf2[1];
160 w2_t[2] |= salt_buf2[2];
161 w2_t[3] |= salt_buf2[3];
162 w3_t[0] |= salt_buf3[0];
163 w3_t[1] |= salt_buf3[1];
164 w3_t[2] |= salt_buf3[2];
165 w3_t[3] |= salt_buf3[3];
173 s0[0] = salt_buf0[0];
174 s0[1] = salt_buf0[1];
175 s0[2] = salt_buf0[2];
176 s0[3] = salt_buf0[3];
180 s1[0] = salt_buf1[0];
181 s1[1] = salt_buf1[1];
182 s1[2] = salt_buf1[2];
183 s1[3] = salt_buf1[3];
199 switch_buffer_by_offset (s0, s1, s2, s3, salt_len + out_len);
218 const u32 pw_salt_len = salt_len + out_len + salt_len;
220 append_0x80_4 (w0_t, w1_t, w2_t, w3_t, pw_salt_len);
222 u32x w0 = swap_workaround (w0_t[0]);
223 u32x w1 = swap_workaround (w0_t[1]);
224 u32x w2 = swap_workaround (w0_t[2]);
225 u32x w3 = swap_workaround (w0_t[3]);
226 u32x w4 = swap_workaround (w1_t[0]);
227 u32x w5 = swap_workaround (w1_t[1]);
228 u32x w6 = swap_workaround (w1_t[2]);
229 u32x w7 = swap_workaround (w1_t[3]);
230 u32x w8 = swap_workaround (w2_t[0]);
231 u32x w9 = swap_workaround (w2_t[1]);
232 u32x wa = swap_workaround (w2_t[2]);
233 u32x wb = swap_workaround (w2_t[3]);
234 u32x wc = swap_workaround (w3_t[0]);
235 u32x wd = swap_workaround (w3_t[1]);
237 u32x wf = pw_salt_len * 8;
252 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0);
253 SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1);
254 SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2);
255 SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3);
256 SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4);
257 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5);
258 SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6);
259 SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7);
260 SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8);
261 SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9);
262 SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa);
263 SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb);
264 SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc);
265 SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd);
266 SHA1_STEP (SHA1_F0o, b, c, d, e, a, we);
267 SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf);
268 w0 = rotl32 ((wd ^ w8 ^ w2 ^ w0), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0);
269 w1 = rotl32 ((we ^ w9 ^ w3 ^ w1), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1);
270 w2 = rotl32 ((wf ^ wa ^ w4 ^ w2), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2);
271 w3 = rotl32 ((w0 ^ wb ^ w5 ^ w3), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3);
276 w4 = rotl32 ((w1 ^ wc ^ w6 ^ w4), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4);
277 w5 = rotl32 ((w2 ^ wd ^ w7 ^ w5), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5);
278 w6 = rotl32 ((w3 ^ we ^ w8 ^ w6), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6);
279 w7 = rotl32 ((w4 ^ wf ^ w9 ^ w7), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7);
280 w8 = rotl32 ((w5 ^ w0 ^ wa ^ w8), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8);
281 w9 = rotl32 ((w6 ^ w1 ^ wb ^ w9), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9);
282 wa = rotl32 ((w7 ^ w2 ^ wc ^ wa), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa);
283 wb = rotl32 ((w8 ^ w3 ^ wd ^ wb), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb);
284 wc = rotl32 ((w9 ^ w4 ^ we ^ wc), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc);
285 wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd);
286 we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we);
287 wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf);
288 w0 = rotl32 ((wd ^ w8 ^ w2 ^ w0), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0);
289 w1 = rotl32 ((we ^ w9 ^ w3 ^ w1), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1);
290 w2 = rotl32 ((wf ^ wa ^ w4 ^ w2), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2);
291 w3 = rotl32 ((w0 ^ wb ^ w5 ^ w3), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3);
292 w4 = rotl32 ((w1 ^ wc ^ w6 ^ w4), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4);
293 w5 = rotl32 ((w2 ^ wd ^ w7 ^ w5), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5);
294 w6 = rotl32 ((w3 ^ we ^ w8 ^ w6), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6);
295 w7 = rotl32 ((w4 ^ wf ^ w9 ^ w7), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7);
300 w8 = rotl32 ((w5 ^ w0 ^ wa ^ w8), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8);
301 w9 = rotl32 ((w6 ^ w1 ^ wb ^ w9), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9);
302 wa = rotl32 ((w7 ^ w2 ^ wc ^ wa), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa);
303 wb = rotl32 ((w8 ^ w3 ^ wd ^ wb), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb);
304 wc = rotl32 ((w9 ^ w4 ^ we ^ wc), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc);
305 wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd);
306 we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we);
307 wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf);
308 w0 = rotl32 ((wd ^ w8 ^ w2 ^ w0), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0);
309 w1 = rotl32 ((we ^ w9 ^ w3 ^ w1), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1);
310 w2 = rotl32 ((wf ^ wa ^ w4 ^ w2), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2);
311 w3 = rotl32 ((w0 ^ wb ^ w5 ^ w3), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3);
312 w4 = rotl32 ((w1 ^ wc ^ w6 ^ w4), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4);
313 w5 = rotl32 ((w2 ^ wd ^ w7 ^ w5), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5);
314 w6 = rotl32 ((w3 ^ we ^ w8 ^ w6), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6);
315 w7 = rotl32 ((w4 ^ wf ^ w9 ^ w7), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7);
316 w8 = rotl32 ((w5 ^ w0 ^ wa ^ w8), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8);
317 w9 = rotl32 ((w6 ^ w1 ^ wb ^ w9), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9);
318 wa = rotl32 ((w7 ^ w2 ^ wc ^ wa), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa);
319 wb = rotl32 ((w8 ^ w3 ^ wd ^ wb), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb);
324 wc = rotl32 ((w9 ^ w4 ^ we ^ wc), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc);
325 wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd);
326 we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we);
327 wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf);
328 w0 = rotl32 ((wd ^ w8 ^ w2 ^ w0), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0);
329 w1 = rotl32 ((we ^ w9 ^ w3 ^ w1), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1);
330 w2 = rotl32 ((wf ^ wa ^ w4 ^ w2), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2);
331 w3 = rotl32 ((w0 ^ wb ^ w5 ^ w3), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3);
332 w4 = rotl32 ((w1 ^ wc ^ w6 ^ w4), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4);
333 w5 = rotl32 ((w2 ^ wd ^ w7 ^ w5), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5);
334 w6 = rotl32 ((w3 ^ we ^ w8 ^ w6), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6);
335 w7 = rotl32 ((w4 ^ wf ^ w9 ^ w7), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7);
336 w8 = rotl32 ((w5 ^ w0 ^ wa ^ w8), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8);
337 w9 = rotl32 ((w6 ^ w1 ^ wb ^ w9), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9);
338 wa = rotl32 ((w7 ^ w2 ^ wc ^ wa), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa);
339 wb = rotl32 ((w8 ^ w3 ^ wd ^ wb), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb);
340 wc = rotl32 ((w9 ^ w4 ^ we ^ wc), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc);
341 wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd);
342 we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we);
343 wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf);
350 #include VECT_COMPARE_M
354 extern "C" __global__ void __launch_bounds__ (256, 1) m04900_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
358 extern "C" __global__ void __launch_bounds__ (256, 1) m04900_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
362 extern "C" __global__ void __launch_bounds__ (256, 1) m04900_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
368 const u32 lid = threadIdx.x;
374 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
376 if (gid >= gid_max) return;
380 pw_buf0[0] = pws[gid].i[ 0];
381 pw_buf0[1] = pws[gid].i[ 1];
382 pw_buf0[2] = pws[gid].i[ 2];
383 pw_buf0[3] = pws[gid].i[ 3];
387 pw_buf1[0] = pws[gid].i[ 4];
388 pw_buf1[1] = pws[gid].i[ 5];
389 pw_buf1[2] = pws[gid].i[ 6];
390 pw_buf1[3] = pws[gid].i[ 7];
392 const u32 pw_len = pws[gid].pw_len;
400 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
401 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
402 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
403 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
407 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
408 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
409 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
410 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
426 const u32 salt_len = salt_bufs[salt_pos].salt_len;
432 const u32 search[4] =
434 digests_buf[digests_offset].digest_buf[DGST_R0],
435 digests_buf[digests_offset].digest_buf[DGST_R1],
436 digests_buf[digests_offset].digest_buf[DGST_R2],
437 digests_buf[digests_offset].digest_buf[DGST_R3]
444 const u32 e_rev = rotl32 (search[1], 2u);
450 for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++)
454 w0_t[0] = pw_buf0[0];
455 w0_t[1] = pw_buf0[1];
456 w0_t[2] = pw_buf0[2];
457 w0_t[3] = pw_buf0[3];
461 w1_t[0] = pw_buf1[0];
462 w1_t[1] = pw_buf1[1];
463 w1_t[2] = pw_buf1[2];
464 w1_t[3] = pw_buf1[3];
480 const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0_t, w1_t, pw_len);
486 switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
488 w0_t[0] |= salt_buf0[0];
489 w0_t[1] |= salt_buf0[1];
490 w0_t[2] |= salt_buf0[2];
491 w0_t[3] |= salt_buf0[3];
492 w1_t[0] |= salt_buf1[0];
493 w1_t[1] |= salt_buf1[1];
494 w1_t[2] |= salt_buf1[2];
495 w1_t[3] |= salt_buf1[3];
496 w2_t[0] |= salt_buf2[0];
497 w2_t[1] |= salt_buf2[1];
498 w2_t[2] |= salt_buf2[2];
499 w2_t[3] |= salt_buf2[3];
500 w3_t[0] |= salt_buf3[0];
501 w3_t[1] |= salt_buf3[1];
502 w3_t[2] |= salt_buf3[2];
503 w3_t[3] |= salt_buf3[3];
511 s0[0] = salt_buf0[0];
512 s0[1] = salt_buf0[1];
513 s0[2] = salt_buf0[2];
514 s0[3] = salt_buf0[3];
518 s1[0] = salt_buf1[0];
519 s1[1] = salt_buf1[1];
520 s1[2] = salt_buf1[2];
521 s1[3] = salt_buf1[3];
537 switch_buffer_by_offset (s0, s1, s2, s3, salt_len + out_len);
556 const u32 pw_salt_len = salt_len + out_len + salt_len;
558 append_0x80_4 (w0_t, w1_t, w2_t, w3_t, pw_salt_len);
560 u32x w0 = swap_workaround (w0_t[0]);
561 u32x w1 = swap_workaround (w0_t[1]);
562 u32x w2 = swap_workaround (w0_t[2]);
563 u32x w3 = swap_workaround (w0_t[3]);
564 u32x w4 = swap_workaround (w1_t[0]);
565 u32x w5 = swap_workaround (w1_t[1]);
566 u32x w6 = swap_workaround (w1_t[2]);
567 u32x w7 = swap_workaround (w1_t[3]);
568 u32x w8 = swap_workaround (w2_t[0]);
569 u32x w9 = swap_workaround (w2_t[1]);
570 u32x wa = swap_workaround (w2_t[2]);
571 u32x wb = swap_workaround (w2_t[3]);
572 u32x wc = swap_workaround (w3_t[0]);
573 u32x wd = swap_workaround (w3_t[1]);
575 u32x wf = pw_salt_len * 8;
590 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0);
591 SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1);
592 SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2);
593 SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3);
594 SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4);
595 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5);
596 SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6);
597 SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7);
598 SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8);
599 SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9);
600 SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa);
601 SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb);
602 SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc);
603 SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd);
604 SHA1_STEP (SHA1_F0o, b, c, d, e, a, we);
605 SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf);
606 w0 = rotl32 ((wd ^ w8 ^ w2 ^ w0), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0);
607 w1 = rotl32 ((we ^ w9 ^ w3 ^ w1), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1);
608 w2 = rotl32 ((wf ^ wa ^ w4 ^ w2), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2);
609 w3 = rotl32 ((w0 ^ wb ^ w5 ^ w3), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3);
614 w4 = rotl32 ((w1 ^ wc ^ w6 ^ w4), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4);
615 w5 = rotl32 ((w2 ^ wd ^ w7 ^ w5), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5);
616 w6 = rotl32 ((w3 ^ we ^ w8 ^ w6), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6);
617 w7 = rotl32 ((w4 ^ wf ^ w9 ^ w7), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7);
618 w8 = rotl32 ((w5 ^ w0 ^ wa ^ w8), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8);
619 w9 = rotl32 ((w6 ^ w1 ^ wb ^ w9), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9);
620 wa = rotl32 ((w7 ^ w2 ^ wc ^ wa), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa);
621 wb = rotl32 ((w8 ^ w3 ^ wd ^ wb), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb);
622 wc = rotl32 ((w9 ^ w4 ^ we ^ wc), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc);
623 wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd);
624 we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we);
625 wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf);
626 w0 = rotl32 ((wd ^ w8 ^ w2 ^ w0), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0);
627 w1 = rotl32 ((we ^ w9 ^ w3 ^ w1), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1);
628 w2 = rotl32 ((wf ^ wa ^ w4 ^ w2), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2);
629 w3 = rotl32 ((w0 ^ wb ^ w5 ^ w3), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3);
630 w4 = rotl32 ((w1 ^ wc ^ w6 ^ w4), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4);
631 w5 = rotl32 ((w2 ^ wd ^ w7 ^ w5), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5);
632 w6 = rotl32 ((w3 ^ we ^ w8 ^ w6), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6);
633 w7 = rotl32 ((w4 ^ wf ^ w9 ^ w7), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7);
638 w8 = rotl32 ((w5 ^ w0 ^ wa ^ w8), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8);
639 w9 = rotl32 ((w6 ^ w1 ^ wb ^ w9), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9);
640 wa = rotl32 ((w7 ^ w2 ^ wc ^ wa), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa);
641 wb = rotl32 ((w8 ^ w3 ^ wd ^ wb), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb);
642 wc = rotl32 ((w9 ^ w4 ^ we ^ wc), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc);
643 wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd);
644 we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we);
645 wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf);
646 w0 = rotl32 ((wd ^ w8 ^ w2 ^ w0), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0);
647 w1 = rotl32 ((we ^ w9 ^ w3 ^ w1), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1);
648 w2 = rotl32 ((wf ^ wa ^ w4 ^ w2), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2);
649 w3 = rotl32 ((w0 ^ wb ^ w5 ^ w3), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3);
650 w4 = rotl32 ((w1 ^ wc ^ w6 ^ w4), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4);
651 w5 = rotl32 ((w2 ^ wd ^ w7 ^ w5), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5);
652 w6 = rotl32 ((w3 ^ we ^ w8 ^ w6), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6);
653 w7 = rotl32 ((w4 ^ wf ^ w9 ^ w7), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7);
654 w8 = rotl32 ((w5 ^ w0 ^ wa ^ w8), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8);
655 w9 = rotl32 ((w6 ^ w1 ^ wb ^ w9), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9);
656 wa = rotl32 ((w7 ^ w2 ^ wc ^ wa), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa);
657 wb = rotl32 ((w8 ^ w3 ^ wd ^ wb), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb);
662 wc = rotl32 ((w9 ^ w4 ^ we ^ wc), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc);
663 wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd);
664 we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we);
665 wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf);
666 w0 = rotl32 ((wd ^ w8 ^ w2 ^ w0), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0);
667 w1 = rotl32 ((we ^ w9 ^ w3 ^ w1), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1);
668 w2 = rotl32 ((wf ^ wa ^ w4 ^ w2), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2);
669 w3 = rotl32 ((w0 ^ wb ^ w5 ^ w3), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3);
670 w4 = rotl32 ((w1 ^ wc ^ w6 ^ w4), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4);
671 w5 = rotl32 ((w2 ^ wd ^ w7 ^ w5), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5);
672 w6 = rotl32 ((w3 ^ we ^ w8 ^ w6), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6);
673 w7 = rotl32 ((w4 ^ wf ^ w9 ^ w7), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7);
674 w8 = rotl32 ((w5 ^ w0 ^ wa ^ w8), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8);
675 w9 = rotl32 ((w6 ^ w1 ^ wb ^ w9), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9);
676 wa = rotl32 ((w7 ^ w2 ^ wc ^ wa), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa);
677 wb = rotl32 ((w8 ^ w3 ^ wd ^ wb), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb);
679 if (e != e_rev) continue;
681 wc = rotl32 ((w9 ^ w4 ^ we ^ wc), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc);
682 wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd);
683 we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we);
684 wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf);
691 #include VECT_COMPARE_S
695 extern "C" __global__ void __launch_bounds__ (256, 1) m04900_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
699 extern "C" __global__ void __launch_bounds__ (256, 1) m04900_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)