2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
24 #include "include/kernel_functions.c"
26 #include "common_nv.c"
27 #include "include/rp_gpu.h"
31 #define VECT_COMPARE_S "check_single_vect1_comp4.c"
32 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
36 #define VECT_COMPARE_S "check_single_vect2_comp4.c"
37 #define VECT_COMPARE_M "check_multi_vect2_comp4.c"
41 #define VECT_COMPARE_S "check_single_vect4_comp4.c"
42 #define VECT_COMPARE_M "check_multi_vect4_comp4.c"
45 __device__ __constant__ gpu_rule_t c_rules[1024];
47 extern "C" __global__ void __launch_bounds__ (256, 1) m01100_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
53 const u32 lid = threadIdx.x;
59 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
61 if (gid >= gid_max) return;
65 pw_buf0[0] = pws[gid].i[ 0];
66 pw_buf0[1] = pws[gid].i[ 1];
67 pw_buf0[2] = pws[gid].i[ 2];
68 pw_buf0[3] = pws[gid].i[ 3];
72 pw_buf1[0] = pws[gid].i[ 4];
73 pw_buf1[1] = pws[gid].i[ 5];
74 pw_buf1[2] = pws[gid].i[ 6];
75 pw_buf1[3] = pws[gid].i[ 7];
77 const u32 pw_len = pws[gid].pw_len;
85 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
86 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
87 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
88 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
92 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
93 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
94 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
95 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
99 salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
100 salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9];
104 const u32 salt_len = salt_bufs[salt_pos].salt_len;
110 for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++)
140 const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len);
142 append_0x80_2 (w0, w1, out_len);
149 make_unicode (w0, w0_t, w1_t);
150 make_unicode (w1, w2_t, w3_t);
152 w3_t[2] = out_len * 8 * 2;
159 MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00);
160 MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01);
161 MD4_STEP (MD4_Fo, c, d, a, b, w0_t[2], MD4C00, MD4S02);
162 MD4_STEP (MD4_Fo, b, c, d, a, w0_t[3], MD4C00, MD4S03);
163 MD4_STEP (MD4_Fo, a, b, c, d, w1_t[0], MD4C00, MD4S00);
164 MD4_STEP (MD4_Fo, d, a, b, c, w1_t[1], MD4C00, MD4S01);
165 MD4_STEP (MD4_Fo, c, d, a, b, w1_t[2], MD4C00, MD4S02);
166 MD4_STEP (MD4_Fo, b, c, d, a, w1_t[3], MD4C00, MD4S03);
167 MD4_STEP (MD4_Fo, a, b, c, d, w2_t[0], MD4C00, MD4S00);
168 MD4_STEP (MD4_Fo, d, a, b, c, w2_t[1], MD4C00, MD4S01);
169 MD4_STEP (MD4_Fo, c, d, a, b, w2_t[2], MD4C00, MD4S02);
170 MD4_STEP (MD4_Fo, b, c, d, a, w2_t[3], MD4C00, MD4S03);
171 MD4_STEP (MD4_Fo, a, b, c, d, w3_t[0], MD4C00, MD4S00);
172 MD4_STEP (MD4_Fo, d, a, b, c, w3_t[1], MD4C00, MD4S01);
173 MD4_STEP (MD4_Fo, c, d, a, b, w3_t[2], MD4C00, MD4S02);
174 MD4_STEP (MD4_Fo, b, c, d, a, w3_t[3], MD4C00, MD4S03);
176 MD4_STEP (MD4_Go, a, b, c, d, w0_t[0], MD4C01, MD4S10);
177 MD4_STEP (MD4_Go, d, a, b, c, w1_t[0], MD4C01, MD4S11);
178 MD4_STEP (MD4_Go, c, d, a, b, w2_t[0], MD4C01, MD4S12);
179 MD4_STEP (MD4_Go, b, c, d, a, w3_t[0], MD4C01, MD4S13);
180 MD4_STEP (MD4_Go, a, b, c, d, w0_t[1], MD4C01, MD4S10);
181 MD4_STEP (MD4_Go, d, a, b, c, w1_t[1], MD4C01, MD4S11);
182 MD4_STEP (MD4_Go, c, d, a, b, w2_t[1], MD4C01, MD4S12);
183 MD4_STEP (MD4_Go, b, c, d, a, w3_t[1], MD4C01, MD4S13);
184 MD4_STEP (MD4_Go, a, b, c, d, w0_t[2], MD4C01, MD4S10);
185 MD4_STEP (MD4_Go, d, a, b, c, w1_t[2], MD4C01, MD4S11);
186 MD4_STEP (MD4_Go, c, d, a, b, w2_t[2], MD4C01, MD4S12);
187 MD4_STEP (MD4_Go, b, c, d, a, w3_t[2], MD4C01, MD4S13);
188 MD4_STEP (MD4_Go, a, b, c, d, w0_t[3], MD4C01, MD4S10);
189 MD4_STEP (MD4_Go, d, a, b, c, w1_t[3], MD4C01, MD4S11);
190 MD4_STEP (MD4_Go, c, d, a, b, w2_t[3], MD4C01, MD4S12);
191 MD4_STEP (MD4_Go, b, c, d, a, w3_t[3], MD4C01, MD4S13);
193 MD4_STEP (MD4_H , a, b, c, d, w0_t[0], MD4C02, MD4S20);
194 MD4_STEP (MD4_H , d, a, b, c, w2_t[0], MD4C02, MD4S21);
195 MD4_STEP (MD4_H , c, d, a, b, w1_t[0], MD4C02, MD4S22);
196 MD4_STEP (MD4_H , b, c, d, a, w3_t[0], MD4C02, MD4S23);
197 MD4_STEP (MD4_H , a, b, c, d, w0_t[2], MD4C02, MD4S20);
198 MD4_STEP (MD4_H , d, a, b, c, w2_t[2], MD4C02, MD4S21);
199 MD4_STEP (MD4_H , c, d, a, b, w1_t[2], MD4C02, MD4S22);
200 MD4_STEP (MD4_H , b, c, d, a, w3_t[2], MD4C02, MD4S23);
201 MD4_STEP (MD4_H , a, b, c, d, w0_t[1], MD4C02, MD4S20);
202 MD4_STEP (MD4_H , d, a, b, c, w2_t[1], MD4C02, MD4S21);
203 MD4_STEP (MD4_H , c, d, a, b, w1_t[1], MD4C02, MD4S22);
204 MD4_STEP (MD4_H , b, c, d, a, w3_t[1], MD4C02, MD4S23);
205 MD4_STEP (MD4_H , a, b, c, d, w0_t[3], MD4C02, MD4S20);
206 MD4_STEP (MD4_H , d, a, b, c, w2_t[3], MD4C02, MD4S21);
207 MD4_STEP (MD4_H , c, d, a, b, w1_t[3], MD4C02, MD4S22);
208 MD4_STEP (MD4_H , b, c, d, a, w3_t[3], MD4C02, MD4S23);
219 w1_t[0] = salt_buf0[0];
220 w1_t[1] = salt_buf0[1];
221 w1_t[2] = salt_buf0[2];
222 w1_t[3] = salt_buf0[3];
223 w2_t[0] = salt_buf1[0];
224 w2_t[1] = salt_buf1[1];
225 w2_t[2] = salt_buf1[2];
226 w2_t[3] = salt_buf1[3];
227 w3_t[0] = salt_buf2[0];
228 w3_t[1] = salt_buf2[1];
229 w3_t[2] = (16 + salt_len) * 8;
237 MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00);
238 MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01);
239 MD4_STEP (MD4_Fo, c, d, a, b, w0_t[2], MD4C00, MD4S02);
240 MD4_STEP (MD4_Fo, b, c, d, a, w0_t[3], MD4C00, MD4S03);
241 MD4_STEP (MD4_Fo, a, b, c, d, w1_t[0], MD4C00, MD4S00);
242 MD4_STEP (MD4_Fo, d, a, b, c, w1_t[1], MD4C00, MD4S01);
243 MD4_STEP (MD4_Fo, c, d, a, b, w1_t[2], MD4C00, MD4S02);
244 MD4_STEP (MD4_Fo, b, c, d, a, w1_t[3], MD4C00, MD4S03);
245 MD4_STEP (MD4_Fo, a, b, c, d, w2_t[0], MD4C00, MD4S00);
246 MD4_STEP (MD4_Fo, d, a, b, c, w2_t[1], MD4C00, MD4S01);
247 MD4_STEP (MD4_Fo, c, d, a, b, w2_t[2], MD4C00, MD4S02);
248 MD4_STEP (MD4_Fo, b, c, d, a, w2_t[3], MD4C00, MD4S03);
249 MD4_STEP (MD4_Fo, a, b, c, d, w3_t[0], MD4C00, MD4S00);
250 MD4_STEP (MD4_Fo, d, a, b, c, w3_t[1], MD4C00, MD4S01);
251 MD4_STEP (MD4_Fo, c, d, a, b, w3_t[2], MD4C00, MD4S02);
252 MD4_STEP (MD4_Fo, b, c, d, a, w3_t[3], MD4C00, MD4S03);
254 MD4_STEP (MD4_Go, a, b, c, d, w0_t[0], MD4C01, MD4S10);
255 MD4_STEP (MD4_Go, d, a, b, c, w1_t[0], MD4C01, MD4S11);
256 MD4_STEP (MD4_Go, c, d, a, b, w2_t[0], MD4C01, MD4S12);
257 MD4_STEP (MD4_Go, b, c, d, a, w3_t[0], MD4C01, MD4S13);
258 MD4_STEP (MD4_Go, a, b, c, d, w0_t[1], MD4C01, MD4S10);
259 MD4_STEP (MD4_Go, d, a, b, c, w1_t[1], MD4C01, MD4S11);
260 MD4_STEP (MD4_Go, c, d, a, b, w2_t[1], MD4C01, MD4S12);
261 MD4_STEP (MD4_Go, b, c, d, a, w3_t[1], MD4C01, MD4S13);
262 MD4_STEP (MD4_Go, a, b, c, d, w0_t[2], MD4C01, MD4S10);
263 MD4_STEP (MD4_Go, d, a, b, c, w1_t[2], MD4C01, MD4S11);
264 MD4_STEP (MD4_Go, c, d, a, b, w2_t[2], MD4C01, MD4S12);
265 MD4_STEP (MD4_Go, b, c, d, a, w3_t[2], MD4C01, MD4S13);
266 MD4_STEP (MD4_Go, a, b, c, d, w0_t[3], MD4C01, MD4S10);
267 MD4_STEP (MD4_Go, d, a, b, c, w1_t[3], MD4C01, MD4S11);
268 MD4_STEP (MD4_Go, c, d, a, b, w2_t[3], MD4C01, MD4S12);
269 MD4_STEP (MD4_Go, b, c, d, a, w3_t[3], MD4C01, MD4S13);
271 MD4_STEP (MD4_H , a, b, c, d, w0_t[0], MD4C02, MD4S20);
272 MD4_STEP (MD4_H , d, a, b, c, w2_t[0], MD4C02, MD4S21);
273 MD4_STEP (MD4_H , c, d, a, b, w1_t[0], MD4C02, MD4S22);
274 MD4_STEP (MD4_H , b, c, d, a, w3_t[0], MD4C02, MD4S23);
275 MD4_STEP (MD4_H , a, b, c, d, w0_t[2], MD4C02, MD4S20);
276 MD4_STEP (MD4_H , d, a, b, c, w2_t[2], MD4C02, MD4S21);
277 MD4_STEP (MD4_H , c, d, a, b, w1_t[2], MD4C02, MD4S22);
278 MD4_STEP (MD4_H , b, c, d, a, w3_t[2], MD4C02, MD4S23);
279 MD4_STEP (MD4_H , a, b, c, d, w0_t[1], MD4C02, MD4S20);
280 MD4_STEP (MD4_H , d, a, b, c, w2_t[1], MD4C02, MD4S21);
281 MD4_STEP (MD4_H , c, d, a, b, w1_t[1], MD4C02, MD4S22);
282 MD4_STEP (MD4_H , b, c, d, a, w3_t[1], MD4C02, MD4S23);
283 MD4_STEP (MD4_H , a, b, c, d, w0_t[3], MD4C02, MD4S20);
284 MD4_STEP (MD4_H , d, a, b, c, w2_t[3], MD4C02, MD4S21);
285 MD4_STEP (MD4_H , c, d, a, b, w1_t[3], MD4C02, MD4S22);
286 MD4_STEP (MD4_H , b, c, d, a, w3_t[3], MD4C02, MD4S23);
293 #include VECT_COMPARE_M
297 extern "C" __global__ void __launch_bounds__ (256, 1) m01100_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
301 extern "C" __global__ void __launch_bounds__ (256, 1) m01100_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
305 extern "C" __global__ void __launch_bounds__ (256, 1) m01100_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
311 const u32 lid = threadIdx.x;
317 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
319 if (gid >= gid_max) return;
323 pw_buf0[0] = pws[gid].i[ 0];
324 pw_buf0[1] = pws[gid].i[ 1];
325 pw_buf0[2] = pws[gid].i[ 2];
326 pw_buf0[3] = pws[gid].i[ 3];
330 pw_buf1[0] = pws[gid].i[ 4];
331 pw_buf1[1] = pws[gid].i[ 5];
332 pw_buf1[2] = pws[gid].i[ 6];
333 pw_buf1[3] = pws[gid].i[ 7];
335 const u32 pw_len = pws[gid].pw_len;
341 const u32 search[4] =
343 digests_buf[digests_offset].digest_buf[DGST_R0],
344 digests_buf[digests_offset].digest_buf[DGST_R1],
345 digests_buf[digests_offset].digest_buf[DGST_R2],
346 digests_buf[digests_offset].digest_buf[DGST_R3]
355 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
356 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
357 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
358 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
362 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
363 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
364 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
365 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
369 salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
370 salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9];
374 const u32 salt_len = salt_bufs[salt_pos].salt_len;
380 for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++)
410 const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len);
412 append_0x80_2 (w0, w1, out_len);
419 make_unicode (w0, w0_t, w1_t);
420 make_unicode (w1, w2_t, w3_t);
422 w3_t[2] = out_len * 8 * 2;
429 MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00);
430 MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01);
431 MD4_STEP (MD4_Fo, c, d, a, b, w0_t[2], MD4C00, MD4S02);
432 MD4_STEP (MD4_Fo, b, c, d, a, w0_t[3], MD4C00, MD4S03);
433 MD4_STEP (MD4_Fo, a, b, c, d, w1_t[0], MD4C00, MD4S00);
434 MD4_STEP (MD4_Fo, d, a, b, c, w1_t[1], MD4C00, MD4S01);
435 MD4_STEP (MD4_Fo, c, d, a, b, w1_t[2], MD4C00, MD4S02);
436 MD4_STEP (MD4_Fo, b, c, d, a, w1_t[3], MD4C00, MD4S03);
437 MD4_STEP (MD4_Fo, a, b, c, d, w2_t[0], MD4C00, MD4S00);
438 MD4_STEP (MD4_Fo, d, a, b, c, w2_t[1], MD4C00, MD4S01);
439 MD4_STEP (MD4_Fo, c, d, a, b, w2_t[2], MD4C00, MD4S02);
440 MD4_STEP (MD4_Fo, b, c, d, a, w2_t[3], MD4C00, MD4S03);
441 MD4_STEP (MD4_Fo, a, b, c, d, w3_t[0], MD4C00, MD4S00);
442 MD4_STEP (MD4_Fo, d, a, b, c, w3_t[1], MD4C00, MD4S01);
443 MD4_STEP (MD4_Fo, c, d, a, b, w3_t[2], MD4C00, MD4S02);
444 MD4_STEP (MD4_Fo, b, c, d, a, w3_t[3], MD4C00, MD4S03);
446 MD4_STEP (MD4_Go, a, b, c, d, w0_t[0], MD4C01, MD4S10);
447 MD4_STEP (MD4_Go, d, a, b, c, w1_t[0], MD4C01, MD4S11);
448 MD4_STEP (MD4_Go, c, d, a, b, w2_t[0], MD4C01, MD4S12);
449 MD4_STEP (MD4_Go, b, c, d, a, w3_t[0], MD4C01, MD4S13);
450 MD4_STEP (MD4_Go, a, b, c, d, w0_t[1], MD4C01, MD4S10);
451 MD4_STEP (MD4_Go, d, a, b, c, w1_t[1], MD4C01, MD4S11);
452 MD4_STEP (MD4_Go, c, d, a, b, w2_t[1], MD4C01, MD4S12);
453 MD4_STEP (MD4_Go, b, c, d, a, w3_t[1], MD4C01, MD4S13);
454 MD4_STEP (MD4_Go, a, b, c, d, w0_t[2], MD4C01, MD4S10);
455 MD4_STEP (MD4_Go, d, a, b, c, w1_t[2], MD4C01, MD4S11);
456 MD4_STEP (MD4_Go, c, d, a, b, w2_t[2], MD4C01, MD4S12);
457 MD4_STEP (MD4_Go, b, c, d, a, w3_t[2], MD4C01, MD4S13);
458 MD4_STEP (MD4_Go, a, b, c, d, w0_t[3], MD4C01, MD4S10);
459 MD4_STEP (MD4_Go, d, a, b, c, w1_t[3], MD4C01, MD4S11);
460 MD4_STEP (MD4_Go, c, d, a, b, w2_t[3], MD4C01, MD4S12);
461 MD4_STEP (MD4_Go, b, c, d, a, w3_t[3], MD4C01, MD4S13);
463 MD4_STEP (MD4_H , a, b, c, d, w0_t[0], MD4C02, MD4S20);
464 MD4_STEP (MD4_H , d, a, b, c, w2_t[0], MD4C02, MD4S21);
465 MD4_STEP (MD4_H , c, d, a, b, w1_t[0], MD4C02, MD4S22);
466 MD4_STEP (MD4_H , b, c, d, a, w3_t[0], MD4C02, MD4S23);
467 MD4_STEP (MD4_H , a, b, c, d, w0_t[2], MD4C02, MD4S20);
468 MD4_STEP (MD4_H , d, a, b, c, w2_t[2], MD4C02, MD4S21);
469 MD4_STEP (MD4_H , c, d, a, b, w1_t[2], MD4C02, MD4S22);
470 MD4_STEP (MD4_H , b, c, d, a, w3_t[2], MD4C02, MD4S23);
471 MD4_STEP (MD4_H , a, b, c, d, w0_t[1], MD4C02, MD4S20);
472 MD4_STEP (MD4_H , d, a, b, c, w2_t[1], MD4C02, MD4S21);
473 MD4_STEP (MD4_H , c, d, a, b, w1_t[1], MD4C02, MD4S22);
474 MD4_STEP (MD4_H , b, c, d, a, w3_t[1], MD4C02, MD4S23);
475 MD4_STEP (MD4_H , a, b, c, d, w0_t[3], MD4C02, MD4S20);
476 MD4_STEP (MD4_H , d, a, b, c, w2_t[3], MD4C02, MD4S21);
477 MD4_STEP (MD4_H , c, d, a, b, w1_t[3], MD4C02, MD4S22);
478 MD4_STEP (MD4_H , b, c, d, a, w3_t[3], MD4C02, MD4S23);
489 w1_t[0] = salt_buf0[0];
490 w1_t[1] = salt_buf0[1];
491 w1_t[2] = salt_buf0[2];
492 w1_t[3] = salt_buf0[3];
493 w2_t[0] = salt_buf1[0];
494 w2_t[1] = salt_buf1[1];
495 w2_t[2] = salt_buf1[2];
496 w2_t[3] = salt_buf1[3];
497 w3_t[0] = salt_buf2[0];
498 w3_t[1] = salt_buf2[1];
499 w3_t[2] = (16 + salt_len) * 8;
507 MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00);
508 MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01);
509 MD4_STEP (MD4_Fo, c, d, a, b, w0_t[2], MD4C00, MD4S02);
510 MD4_STEP (MD4_Fo, b, c, d, a, w0_t[3], MD4C00, MD4S03);
511 MD4_STEP (MD4_Fo, a, b, c, d, w1_t[0], MD4C00, MD4S00);
512 MD4_STEP (MD4_Fo, d, a, b, c, w1_t[1], MD4C00, MD4S01);
513 MD4_STEP (MD4_Fo, c, d, a, b, w1_t[2], MD4C00, MD4S02);
514 MD4_STEP (MD4_Fo, b, c, d, a, w1_t[3], MD4C00, MD4S03);
515 MD4_STEP (MD4_Fo, a, b, c, d, w2_t[0], MD4C00, MD4S00);
516 MD4_STEP (MD4_Fo, d, a, b, c, w2_t[1], MD4C00, MD4S01);
517 MD4_STEP (MD4_Fo, c, d, a, b, w2_t[2], MD4C00, MD4S02);
518 MD4_STEP (MD4_Fo, b, c, d, a, w2_t[3], MD4C00, MD4S03);
519 MD4_STEP (MD4_Fo, a, b, c, d, w3_t[0], MD4C00, MD4S00);
520 MD4_STEP (MD4_Fo, d, a, b, c, w3_t[1], MD4C00, MD4S01);
521 MD4_STEP (MD4_Fo, c, d, a, b, w3_t[2], MD4C00, MD4S02);
522 MD4_STEP (MD4_Fo, b, c, d, a, w3_t[3], MD4C00, MD4S03);
524 MD4_STEP (MD4_Go, a, b, c, d, w0_t[0], MD4C01, MD4S10);
525 MD4_STEP (MD4_Go, d, a, b, c, w1_t[0], MD4C01, MD4S11);
526 MD4_STEP (MD4_Go, c, d, a, b, w2_t[0], MD4C01, MD4S12);
527 MD4_STEP (MD4_Go, b, c, d, a, w3_t[0], MD4C01, MD4S13);
528 MD4_STEP (MD4_Go, a, b, c, d, w0_t[1], MD4C01, MD4S10);
529 MD4_STEP (MD4_Go, d, a, b, c, w1_t[1], MD4C01, MD4S11);
530 MD4_STEP (MD4_Go, c, d, a, b, w2_t[1], MD4C01, MD4S12);
531 MD4_STEP (MD4_Go, b, c, d, a, w3_t[1], MD4C01, MD4S13);
532 MD4_STEP (MD4_Go, a, b, c, d, w0_t[2], MD4C01, MD4S10);
533 MD4_STEP (MD4_Go, d, a, b, c, w1_t[2], MD4C01, MD4S11);
534 MD4_STEP (MD4_Go, c, d, a, b, w2_t[2], MD4C01, MD4S12);
535 MD4_STEP (MD4_Go, b, c, d, a, w3_t[2], MD4C01, MD4S13);
536 MD4_STEP (MD4_Go, a, b, c, d, w0_t[3], MD4C01, MD4S10);
537 MD4_STEP (MD4_Go, d, a, b, c, w1_t[3], MD4C01, MD4S11);
538 MD4_STEP (MD4_Go, c, d, a, b, w2_t[3], MD4C01, MD4S12);
539 MD4_STEP (MD4_Go, b, c, d, a, w3_t[3], MD4C01, MD4S13);
541 MD4_STEP (MD4_H , a, b, c, d, w0_t[0], MD4C02, MD4S20);
542 MD4_STEP (MD4_H , d, a, b, c, w2_t[0], MD4C02, MD4S21);
543 MD4_STEP (MD4_H , c, d, a, b, w1_t[0], MD4C02, MD4S22);
544 MD4_STEP (MD4_H , b, c, d, a, w3_t[0], MD4C02, MD4S23);
545 MD4_STEP (MD4_H , a, b, c, d, w0_t[2], MD4C02, MD4S20);
546 MD4_STEP (MD4_H , d, a, b, c, w2_t[2], MD4C02, MD4S21);
547 MD4_STEP (MD4_H , c, d, a, b, w1_t[2], MD4C02, MD4S22);
548 MD4_STEP (MD4_H , b, c, d, a, w3_t[2], MD4C02, MD4S23);
549 MD4_STEP (MD4_H , a, b, c, d, w0_t[1], MD4C02, MD4S20);
550 MD4_STEP (MD4_H , d, a, b, c, w2_t[1], MD4C02, MD4S21);
551 MD4_STEP (MD4_H , c, d, a, b, w1_t[1], MD4C02, MD4S22);
552 MD4_STEP (MD4_H , b, c, d, a, w3_t[1], MD4C02, MD4S23);
553 MD4_STEP (MD4_H , a, b, c, d, w0_t[3], MD4C02, MD4S20);
555 bool q_cond = (search[0] != a);
557 if (q_cond) continue;
559 MD4_STEP (MD4_H , d, a, b, c, w2_t[3], MD4C02, MD4S21);
560 MD4_STEP (MD4_H , c, d, a, b, w1_t[3], MD4C02, MD4S22);
561 MD4_STEP (MD4_H , b, c, d, a, w3_t[3], MD4C02, MD4S23);
568 #include VECT_COMPARE_S
572 extern "C" __global__ void __launch_bounds__ (256, 1) m01100_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
576 extern "C" __global__ void __launch_bounds__ (256, 1) m01100_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)