2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
24 #include "include/kernel_functions.c"
26 #include "common_nv.c"
29 #define VECT_COMPARE_S "check_single_vect1_comp4.c"
30 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
34 #define VECT_COMPARE_S "check_single_vect2_comp4.c"
35 #define VECT_COMPARE_M "check_multi_vect2_comp4.c"
39 #define VECT_COMPARE_S "check_single_vect4_comp4.c"
40 #define VECT_COMPARE_M "check_multi_vect4_comp4.c"
43 __device__ __constant__ comb_t c_combs[1024];
45 extern "C" __global__ void __launch_bounds__ (256, 1) m00900_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
51 const u32 lid = threadIdx.x;
57 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
59 if (gid >= gid_max) return;
63 wordl0[0] = pws[gid].i[ 0];
64 wordl0[1] = pws[gid].i[ 1];
65 wordl0[2] = pws[gid].i[ 2];
66 wordl0[3] = pws[gid].i[ 3];
70 wordl1[0] = pws[gid].i[ 4];
71 wordl1[1] = pws[gid].i[ 5];
72 wordl1[2] = pws[gid].i[ 6];
73 wordl1[3] = pws[gid].i[ 7];
89 const u32 pw_l_len = pws[gid].pw_len;
91 if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
93 append_0x80_2 (wordl0, wordl1, pw_l_len);
95 switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len);
102 for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
104 const u32 pw_r_len = c_combs[il_pos].pw_len;
106 const u32 pw_len = pw_l_len + pw_r_len;
110 wordr0[0] = c_combs[il_pos].i[0];
111 wordr0[1] = c_combs[il_pos].i[1];
112 wordr0[2] = c_combs[il_pos].i[2];
113 wordr0[3] = c_combs[il_pos].i[3];
117 wordr1[0] = c_combs[il_pos].i[4];
118 wordr1[1] = c_combs[il_pos].i[5];
119 wordr1[2] = c_combs[il_pos].i[6];
120 wordr1[3] = c_combs[il_pos].i[7];
136 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
138 switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
143 w0[0] = wordl0[0] | wordr0[0];
144 w0[1] = wordl0[1] | wordr0[1];
145 w0[2] = wordl0[2] | wordr0[2];
146 w0[3] = wordl0[3] | wordr0[3];
150 w1[0] = wordl1[0] | wordr1[0];
151 w1[1] = wordl1[1] | wordr1[1];
152 w1[2] = wordl1[2] | wordr1[2];
153 w1[3] = wordl1[3] | wordr1[3];
157 w2[0] = wordl2[0] | wordr2[0];
158 w2[1] = wordl2[1] | wordr2[1];
159 w2[2] = wordl2[2] | wordr2[2];
160 w2[3] = wordl2[3] | wordr2[3];
164 w3[0] = wordl3[0] | wordr3[0];
165 w3[1] = wordl3[1] | wordr3[1];
174 MD4_STEP (MD4_Fo, a, b, c, d, w0[0], MD4C00, MD4S00);
175 MD4_STEP (MD4_Fo, d, a, b, c, w0[1], MD4C00, MD4S01);
176 MD4_STEP (MD4_Fo, c, d, a, b, w0[2], MD4C00, MD4S02);
177 MD4_STEP (MD4_Fo, b, c, d, a, w0[3], MD4C00, MD4S03);
178 MD4_STEP (MD4_Fo, a, b, c, d, w1[0], MD4C00, MD4S00);
179 MD4_STEP (MD4_Fo, d, a, b, c, w1[1], MD4C00, MD4S01);
180 MD4_STEP (MD4_Fo, c, d, a, b, w1[2], MD4C00, MD4S02);
181 MD4_STEP (MD4_Fo, b, c, d, a, w1[3], MD4C00, MD4S03);
182 MD4_STEP (MD4_Fo, a, b, c, d, w2[0], MD4C00, MD4S00);
183 MD4_STEP (MD4_Fo, d, a, b, c, w2[1], MD4C00, MD4S01);
184 MD4_STEP (MD4_Fo, c, d, a, b, w2[2], MD4C00, MD4S02);
185 MD4_STEP (MD4_Fo, b, c, d, a, w2[3], MD4C00, MD4S03);
186 MD4_STEP (MD4_Fo, a, b, c, d, w3[0], MD4C00, MD4S00);
187 MD4_STEP (MD4_Fo, d, a, b, c, w3[1], MD4C00, MD4S01);
188 MD4_STEP (MD4_Fo, c, d, a, b, w3[2], MD4C00, MD4S02);
189 MD4_STEP (MD4_Fo, b, c, d, a, w3[3], MD4C00, MD4S03);
191 MD4_STEP (MD4_Go, a, b, c, d, w0[0], MD4C01, MD4S10);
192 MD4_STEP (MD4_Go, d, a, b, c, w1[0], MD4C01, MD4S11);
193 MD4_STEP (MD4_Go, c, d, a, b, w2[0], MD4C01, MD4S12);
194 MD4_STEP (MD4_Go, b, c, d, a, w3[0], MD4C01, MD4S13);
195 MD4_STEP (MD4_Go, a, b, c, d, w0[1], MD4C01, MD4S10);
196 MD4_STEP (MD4_Go, d, a, b, c, w1[1], MD4C01, MD4S11);
197 MD4_STEP (MD4_Go, c, d, a, b, w2[1], MD4C01, MD4S12);
198 MD4_STEP (MD4_Go, b, c, d, a, w3[1], MD4C01, MD4S13);
199 MD4_STEP (MD4_Go, a, b, c, d, w0[2], MD4C01, MD4S10);
200 MD4_STEP (MD4_Go, d, a, b, c, w1[2], MD4C01, MD4S11);
201 MD4_STEP (MD4_Go, c, d, a, b, w2[2], MD4C01, MD4S12);
202 MD4_STEP (MD4_Go, b, c, d, a, w3[2], MD4C01, MD4S13);
203 MD4_STEP (MD4_Go, a, b, c, d, w0[3], MD4C01, MD4S10);
204 MD4_STEP (MD4_Go, d, a, b, c, w1[3], MD4C01, MD4S11);
205 MD4_STEP (MD4_Go, c, d, a, b, w2[3], MD4C01, MD4S12);
206 MD4_STEP (MD4_Go, b, c, d, a, w3[3], MD4C01, MD4S13);
208 MD4_STEP (MD4_H , a, b, c, d, w0[0], MD4C02, MD4S20);
209 MD4_STEP (MD4_H , d, a, b, c, w2[0], MD4C02, MD4S21);
210 MD4_STEP (MD4_H , c, d, a, b, w1[0], MD4C02, MD4S22);
211 MD4_STEP (MD4_H , b, c, d, a, w3[0], MD4C02, MD4S23);
212 MD4_STEP (MD4_H , a, b, c, d, w0[2], MD4C02, MD4S20);
213 MD4_STEP (MD4_H , d, a, b, c, w2[2], MD4C02, MD4S21);
214 MD4_STEP (MD4_H , c, d, a, b, w1[2], MD4C02, MD4S22);
215 MD4_STEP (MD4_H , b, c, d, a, w3[2], MD4C02, MD4S23);
216 MD4_STEP (MD4_H , a, b, c, d, w0[1], MD4C02, MD4S20);
217 MD4_STEP (MD4_H , d, a, b, c, w2[1], MD4C02, MD4S21);
218 MD4_STEP (MD4_H , c, d, a, b, w1[1], MD4C02, MD4S22);
219 MD4_STEP (MD4_H , b, c, d, a, w3[1], MD4C02, MD4S23);
220 MD4_STEP (MD4_H , a, b, c, d, w0[3], MD4C02, MD4S20);
221 MD4_STEP (MD4_H , d, a, b, c, w2[3], MD4C02, MD4S21);
222 MD4_STEP (MD4_H , c, d, a, b, w1[3], MD4C02, MD4S22);
223 MD4_STEP (MD4_H , b, c, d, a, w3[3], MD4C02, MD4S23);
230 #include VECT_COMPARE_M
234 extern "C" __global__ void __launch_bounds__ (256, 1) m00900_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
238 extern "C" __global__ void __launch_bounds__ (256, 1) m00900_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
242 extern "C" __global__ void __launch_bounds__ (256, 1) m00900_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
248 const u32 lid = threadIdx.x;
254 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
256 if (gid >= gid_max) return;
260 wordl0[0] = pws[gid].i[ 0];
261 wordl0[1] = pws[gid].i[ 1];
262 wordl0[2] = pws[gid].i[ 2];
263 wordl0[3] = pws[gid].i[ 3];
267 wordl1[0] = pws[gid].i[ 4];
268 wordl1[1] = pws[gid].i[ 5];
269 wordl1[2] = pws[gid].i[ 6];
270 wordl1[3] = pws[gid].i[ 7];
286 const u32 pw_l_len = pws[gid].pw_len;
288 if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
290 append_0x80_2 (wordl0, wordl1, pw_l_len);
292 switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len);
299 const u32 search[4] =
301 digests_buf[digests_offset].digest_buf[DGST_R0],
302 digests_buf[digests_offset].digest_buf[DGST_R1],
303 digests_buf[digests_offset].digest_buf[DGST_R2],
304 digests_buf[digests_offset].digest_buf[DGST_R3]
311 for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
313 const u32 pw_r_len = c_combs[il_pos].pw_len;
315 const u32 pw_len = pw_l_len + pw_r_len;
319 wordr0[0] = c_combs[il_pos].i[0];
320 wordr0[1] = c_combs[il_pos].i[1];
321 wordr0[2] = c_combs[il_pos].i[2];
322 wordr0[3] = c_combs[il_pos].i[3];
326 wordr1[0] = c_combs[il_pos].i[4];
327 wordr1[1] = c_combs[il_pos].i[5];
328 wordr1[2] = c_combs[il_pos].i[6];
329 wordr1[3] = c_combs[il_pos].i[7];
345 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
347 switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
352 w0[0] = wordl0[0] | wordr0[0];
353 w0[1] = wordl0[1] | wordr0[1];
354 w0[2] = wordl0[2] | wordr0[2];
355 w0[3] = wordl0[3] | wordr0[3];
359 w1[0] = wordl1[0] | wordr1[0];
360 w1[1] = wordl1[1] | wordr1[1];
361 w1[2] = wordl1[2] | wordr1[2];
362 w1[3] = wordl1[3] | wordr1[3];
366 w2[0] = wordl2[0] | wordr2[0];
367 w2[1] = wordl2[1] | wordr2[1];
368 w2[2] = wordl2[2] | wordr2[2];
369 w2[3] = wordl2[3] | wordr2[3];
373 w3[0] = wordl3[0] | wordr3[0];
374 w3[1] = wordl3[1] | wordr3[1];
383 MD4_STEP (MD4_Fo, a, b, c, d, w0[0], MD4C00, MD4S00);
384 MD4_STEP (MD4_Fo, d, a, b, c, w0[1], MD4C00, MD4S01);
385 MD4_STEP (MD4_Fo, c, d, a, b, w0[2], MD4C00, MD4S02);
386 MD4_STEP (MD4_Fo, b, c, d, a, w0[3], MD4C00, MD4S03);
387 MD4_STEP (MD4_Fo, a, b, c, d, w1[0], MD4C00, MD4S00);
388 MD4_STEP (MD4_Fo, d, a, b, c, w1[1], MD4C00, MD4S01);
389 MD4_STEP (MD4_Fo, c, d, a, b, w1[2], MD4C00, MD4S02);
390 MD4_STEP (MD4_Fo, b, c, d, a, w1[3], MD4C00, MD4S03);
391 MD4_STEP (MD4_Fo, a, b, c, d, w2[0], MD4C00, MD4S00);
392 MD4_STEP (MD4_Fo, d, a, b, c, w2[1], MD4C00, MD4S01);
393 MD4_STEP (MD4_Fo, c, d, a, b, w2[2], MD4C00, MD4S02);
394 MD4_STEP (MD4_Fo, b, c, d, a, w2[3], MD4C00, MD4S03);
395 MD4_STEP (MD4_Fo, a, b, c, d, w3[0], MD4C00, MD4S00);
396 MD4_STEP (MD4_Fo, d, a, b, c, w3[1], MD4C00, MD4S01);
397 MD4_STEP (MD4_Fo, c, d, a, b, w3[2], MD4C00, MD4S02);
398 MD4_STEP (MD4_Fo, b, c, d, a, w3[3], MD4C00, MD4S03);
400 MD4_STEP (MD4_Go, a, b, c, d, w0[0], MD4C01, MD4S10);
401 MD4_STEP (MD4_Go, d, a, b, c, w1[0], MD4C01, MD4S11);
402 MD4_STEP (MD4_Go, c, d, a, b, w2[0], MD4C01, MD4S12);
403 MD4_STEP (MD4_Go, b, c, d, a, w3[0], MD4C01, MD4S13);
404 MD4_STEP (MD4_Go, a, b, c, d, w0[1], MD4C01, MD4S10);
405 MD4_STEP (MD4_Go, d, a, b, c, w1[1], MD4C01, MD4S11);
406 MD4_STEP (MD4_Go, c, d, a, b, w2[1], MD4C01, MD4S12);
407 MD4_STEP (MD4_Go, b, c, d, a, w3[1], MD4C01, MD4S13);
408 MD4_STEP (MD4_Go, a, b, c, d, w0[2], MD4C01, MD4S10);
409 MD4_STEP (MD4_Go, d, a, b, c, w1[2], MD4C01, MD4S11);
410 MD4_STEP (MD4_Go, c, d, a, b, w2[2], MD4C01, MD4S12);
411 MD4_STEP (MD4_Go, b, c, d, a, w3[2], MD4C01, MD4S13);
412 MD4_STEP (MD4_Go, a, b, c, d, w0[3], MD4C01, MD4S10);
413 MD4_STEP (MD4_Go, d, a, b, c, w1[3], MD4C01, MD4S11);
414 MD4_STEP (MD4_Go, c, d, a, b, w2[3], MD4C01, MD4S12);
415 MD4_STEP (MD4_Go, b, c, d, a, w3[3], MD4C01, MD4S13);
417 MD4_STEP (MD4_H , a, b, c, d, w0[0], MD4C02, MD4S20);
418 MD4_STEP (MD4_H , d, a, b, c, w2[0], MD4C02, MD4S21);
419 MD4_STEP (MD4_H , c, d, a, b, w1[0], MD4C02, MD4S22);
420 MD4_STEP (MD4_H , b, c, d, a, w3[0], MD4C02, MD4S23);
421 MD4_STEP (MD4_H , a, b, c, d, w0[2], MD4C02, MD4S20);
422 MD4_STEP (MD4_H , d, a, b, c, w2[2], MD4C02, MD4S21);
423 MD4_STEP (MD4_H , c, d, a, b, w1[2], MD4C02, MD4S22);
424 MD4_STEP (MD4_H , b, c, d, a, w3[2], MD4C02, MD4S23);
425 MD4_STEP (MD4_H , a, b, c, d, w0[1], MD4C02, MD4S20);
426 MD4_STEP (MD4_H , d, a, b, c, w2[1], MD4C02, MD4S21);
427 MD4_STEP (MD4_H , c, d, a, b, w1[1], MD4C02, MD4S22);
428 MD4_STEP (MD4_H , b, c, d, a, w3[1], MD4C02, MD4S23);
429 MD4_STEP (MD4_H , a, b, c, d, w0[3], MD4C02, MD4S20);
430 MD4_STEP (MD4_H , d, a, b, c, w2[3], MD4C02, MD4S21);
431 MD4_STEP (MD4_H , c, d, a, b, w1[3], MD4C02, MD4S22);
432 MD4_STEP (MD4_H , b, c, d, a, w3[3], MD4C02, MD4S23);
439 #include VECT_COMPARE_S
443 extern "C" __global__ void __launch_bounds__ (256, 1) m00900_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
447 extern "C" __global__ void __launch_bounds__ (256, 1) m00900_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)