2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
24 #include "include/kernel_functions.c"
26 #include "common_nv.c"
29 #define VECT_COMPARE_S "check_single_vect1_comp4.c"
30 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
34 #define VECT_COMPARE_S "check_single_vect2_comp4.c"
35 #define VECT_COMPARE_M "check_multi_vect2_comp4.c"
39 #define VECT_COMPARE_S "check_single_vect4_comp4.c"
40 #define VECT_COMPARE_M "check_multi_vect4_comp4.c"
43 __device__ __constant__ comb_t c_combs[1024];
45 extern "C" __global__ void __launch_bounds__ (256, 1) m00110_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
51 const u32 lid = threadIdx.x;
57 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
59 if (gid >= gid_max) return;
63 wordl0[0] = pws[gid].i[ 0];
64 wordl0[1] = pws[gid].i[ 1];
65 wordl0[2] = pws[gid].i[ 2];
66 wordl0[3] = pws[gid].i[ 3];
70 wordl1[0] = pws[gid].i[ 4];
71 wordl1[1] = pws[gid].i[ 5];
72 wordl1[2] = pws[gid].i[ 6];
73 wordl1[3] = pws[gid].i[ 7];
89 const u32 pw_l_len = pws[gid].pw_len;
91 if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
93 switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len);
102 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
103 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
104 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
105 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
109 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
110 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
111 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
112 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
114 const u32 salt_len = salt_bufs[salt_pos].salt_len;
120 for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
122 const u32 pw_r_len = c_combs[il_pos].pw_len;
124 const u32 pw_len = pw_l_len + pw_r_len;
128 wordr0[0] = c_combs[il_pos].i[0];
129 wordr0[1] = c_combs[il_pos].i[1];
130 wordr0[2] = c_combs[il_pos].i[2];
131 wordr0[3] = c_combs[il_pos].i[3];
135 wordr1[0] = c_combs[il_pos].i[4];
136 wordr1[1] = c_combs[il_pos].i[5];
137 wordr1[2] = c_combs[il_pos].i[6];
138 wordr1[3] = c_combs[il_pos].i[7];
154 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
156 switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
165 s0[0] = salt_buf0[0];
166 s0[1] = salt_buf0[1];
167 s0[2] = salt_buf0[2];
168 s0[3] = salt_buf0[3];
172 s1[0] = salt_buf1[0];
173 s1[1] = salt_buf1[1];
174 s1[2] = salt_buf1[2];
175 s1[3] = salt_buf1[3];
191 switch_buffer_by_offset (s0, s1, s2, s3, pw_len);
193 const u32 pw_salt_len = pw_len + salt_len;
197 w0[0] = wordl0[0] | wordr0[0] | s0[0];
198 w0[1] = wordl0[1] | wordr0[1] | s0[1];
199 w0[2] = wordl0[2] | wordr0[2] | s0[2];
200 w0[3] = wordl0[3] | wordr0[3] | s0[3];
204 w1[0] = wordl1[0] | wordr1[0] | s1[0];
205 w1[1] = wordl1[1] | wordr1[1] | s1[1];
206 w1[2] = wordl1[2] | wordr1[2] | s1[2];
207 w1[3] = wordl1[3] | wordr1[3] | s1[3];
211 w2[0] = wordl2[0] | wordr2[0] | s2[0];
212 w2[1] = wordl2[1] | wordr2[1] | s2[1];
213 w2[2] = wordl2[2] | wordr2[2] | s2[2];
214 w2[3] = wordl2[3] | wordr2[3] | s2[3];
218 w3[0] = wordl3[0] | wordr3[0] | s3[0];
219 w3[1] = wordl3[1] | wordr3[1] | s3[1];
223 append_0x80_4 (w0, w1, w2, w3, pw_salt_len);
229 u32x w0_t = swap_workaround (w0[0]);
230 u32x w1_t = swap_workaround (w0[1]);
231 u32x w2_t = swap_workaround (w0[2]);
232 u32x w3_t = swap_workaround (w0[3]);
233 u32x w4_t = swap_workaround (w1[0]);
234 u32x w5_t = swap_workaround (w1[1]);
235 u32x w6_t = swap_workaround (w1[2]);
236 u32x w7_t = swap_workaround (w1[3]);
237 u32x w8_t = swap_workaround (w2[0]);
238 u32x w9_t = swap_workaround (w2[1]);
239 u32x wa_t = swap_workaround (w2[2]);
240 u32x wb_t = swap_workaround (w2[3]);
241 u32x wc_t = swap_workaround (w3[0]);
242 u32x wd_t = swap_workaround (w3[1]);
244 u32x wf_t = pw_salt_len * 8;
255 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t);
256 SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t);
257 SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t);
258 SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t);
259 SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t);
260 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t);
261 SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t);
262 SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t);
263 SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t);
264 SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t);
265 SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t);
266 SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t);
267 SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t);
268 SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t);
269 SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t);
270 SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t);
271 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t);
272 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t);
273 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t);
274 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t);
279 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t);
280 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t);
281 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t);
282 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t);
283 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t);
284 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t);
285 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t);
286 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t);
287 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t);
288 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t);
289 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t);
290 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t);
291 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t);
292 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t);
293 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t);
294 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t);
295 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t);
296 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t);
297 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t);
298 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t);
303 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t);
304 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t);
305 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t);
306 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t);
307 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t);
308 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t);
309 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t);
310 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t);
311 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t);
312 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t);
313 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t);
314 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t);
315 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t);
316 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t);
317 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t);
318 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t);
319 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t);
320 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t);
321 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t);
322 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t);
327 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t);
328 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t);
329 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t);
330 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t);
331 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t);
332 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t);
333 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t);
334 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t);
335 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t);
336 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t);
337 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t);
338 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t);
339 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t);
340 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t);
341 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t);
342 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t);
343 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t);
344 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t);
345 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
346 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
354 #include VECT_COMPARE_M
358 extern "C" __global__ void __launch_bounds__ (256, 1) m00110_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
362 extern "C" __global__ void __launch_bounds__ (256, 1) m00110_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
366 extern "C" __global__ void __launch_bounds__ (256, 1) m00110_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
372 const u32 lid = threadIdx.x;
378 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
380 if (gid >= gid_max) return;
384 wordl0[0] = pws[gid].i[ 0];
385 wordl0[1] = pws[gid].i[ 1];
386 wordl0[2] = pws[gid].i[ 2];
387 wordl0[3] = pws[gid].i[ 3];
391 wordl1[0] = pws[gid].i[ 4];
392 wordl1[1] = pws[gid].i[ 5];
393 wordl1[2] = pws[gid].i[ 6];
394 wordl1[3] = pws[gid].i[ 7];
410 const u32 pw_l_len = pws[gid].pw_len;
412 if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
414 switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len);
423 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
424 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
425 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
426 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
430 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
431 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
432 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
433 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
435 const u32 salt_len = salt_bufs[salt_pos].salt_len;
441 const u32 search[4] =
443 digests_buf[digests_offset].digest_buf[DGST_R0],
444 digests_buf[digests_offset].digest_buf[DGST_R1],
445 digests_buf[digests_offset].digest_buf[DGST_R2],
446 digests_buf[digests_offset].digest_buf[DGST_R3]
453 const u32 e_rev = rotl32 (search[1], 2u);
459 for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
461 const u32 pw_r_len = c_combs[il_pos].pw_len;
463 const u32 pw_len = pw_l_len + pw_r_len;
467 wordr0[0] = c_combs[il_pos].i[0];
468 wordr0[1] = c_combs[il_pos].i[1];
469 wordr0[2] = c_combs[il_pos].i[2];
470 wordr0[3] = c_combs[il_pos].i[3];
474 wordr1[0] = c_combs[il_pos].i[4];
475 wordr1[1] = c_combs[il_pos].i[5];
476 wordr1[2] = c_combs[il_pos].i[6];
477 wordr1[3] = c_combs[il_pos].i[7];
493 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
495 switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
504 s0[0] = salt_buf0[0];
505 s0[1] = salt_buf0[1];
506 s0[2] = salt_buf0[2];
507 s0[3] = salt_buf0[3];
511 s1[0] = salt_buf1[0];
512 s1[1] = salt_buf1[1];
513 s1[2] = salt_buf1[2];
514 s1[3] = salt_buf1[3];
530 switch_buffer_by_offset (s0, s1, s2, s3, pw_len);
532 const u32 pw_salt_len = pw_len + salt_len;
536 w0[0] = wordl0[0] | wordr0[0] | s0[0];
537 w0[1] = wordl0[1] | wordr0[1] | s0[1];
538 w0[2] = wordl0[2] | wordr0[2] | s0[2];
539 w0[3] = wordl0[3] | wordr0[3] | s0[3];
543 w1[0] = wordl1[0] | wordr1[0] | s1[0];
544 w1[1] = wordl1[1] | wordr1[1] | s1[1];
545 w1[2] = wordl1[2] | wordr1[2] | s1[2];
546 w1[3] = wordl1[3] | wordr1[3] | s1[3];
550 w2[0] = wordl2[0] | wordr2[0] | s2[0];
551 w2[1] = wordl2[1] | wordr2[1] | s2[1];
552 w2[2] = wordl2[2] | wordr2[2] | s2[2];
553 w2[3] = wordl2[3] | wordr2[3] | s2[3];
557 w3[0] = wordl3[0] | wordr3[0] | s3[0];
558 w3[1] = wordl3[1] | wordr3[1] | s3[1];
562 append_0x80_4 (w0, w1, w2, w3, pw_salt_len);
568 u32x w0_t = swap_workaround (w0[0]);
569 u32x w1_t = swap_workaround (w0[1]);
570 u32x w2_t = swap_workaround (w0[2]);
571 u32x w3_t = swap_workaround (w0[3]);
572 u32x w4_t = swap_workaround (w1[0]);
573 u32x w5_t = swap_workaround (w1[1]);
574 u32x w6_t = swap_workaround (w1[2]);
575 u32x w7_t = swap_workaround (w1[3]);
576 u32x w8_t = swap_workaround (w2[0]);
577 u32x w9_t = swap_workaround (w2[1]);
578 u32x wa_t = swap_workaround (w2[2]);
579 u32x wb_t = swap_workaround (w2[3]);
580 u32x wc_t = swap_workaround (w3[0]);
581 u32x wd_t = swap_workaround (w3[1]);
583 u32x wf_t = pw_salt_len * 8;
594 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t);
595 SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t);
596 SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t);
597 SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t);
598 SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t);
599 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t);
600 SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t);
601 SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t);
602 SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t);
603 SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t);
604 SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t);
605 SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t);
606 SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t);
607 SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t);
608 SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t);
609 SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t);
610 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t);
611 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t);
612 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t);
613 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t);
618 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t);
619 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t);
620 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t);
621 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t);
622 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t);
623 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t);
624 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t);
625 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t);
626 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t);
627 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t);
628 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t);
629 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t);
630 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t);
631 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t);
632 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t);
633 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t);
634 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t);
635 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t);
636 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t);
637 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t);
642 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t);
643 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t);
644 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t);
645 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t);
646 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t);
647 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t);
648 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t);
649 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t);
650 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t);
651 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t);
652 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t);
653 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t);
654 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t);
655 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t);
656 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t);
657 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t);
658 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t);
659 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t);
660 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t);
661 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t);
666 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t);
667 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t);
668 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t);
669 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t);
670 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t);
671 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t);
672 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t);
673 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t);
674 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t);
675 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t);
676 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t);
677 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t);
678 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t);
679 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t);
680 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t);
681 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t);
683 if (e != e_rev) continue;
685 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t);
686 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t);
687 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
688 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
696 #include VECT_COMPARE_S
700 extern "C" __global__ void __launch_bounds__ (256, 1) m00110_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
704 extern "C" __global__ void __launch_bounds__ (256, 1) m00110_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)