2 * Author......: Jens Steube <jens.steube@gmail.com>
9 #include "include/constants.h"
10 #include "include/kernel_vendor.h"
25 #include "include/kernel_functions.c"
27 #include "common_nv.c"
30 #define VECT_COMPARE_S "check_single_vect1_comp4_warp.c"
31 #define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c"
35 #define VECT_COMPARE_S "check_single_vect2_comp4_warp.c"
36 #define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c"
40 #define VECT_COMPARE_S "check_single_vect4_comp4_warp.c"
41 #define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c"
44 __device__ __constant__ u32x c_bfs[1024];
46 __device__ static void m00200m (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
52 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
53 const u32 lid = threadIdx.x;
59 const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV);
63 for (u32 il_pos = 0; il_pos < bf_loops; il_pos++)
65 const u32x w0r = c_bfs[il_pos];
67 const u32x w0 = w0l | w0r;
76 a ^= (((a & 0x3f) + add) * (v)) + (a << 8); \
83 ROUND ((w0 >> 0) & 0xff);
84 ROUND ((w0 >> 8) & 0xff);
85 ROUND ((w0 >> 16) & 0xff);
86 ROUND ((w0 >> 24) & 0xff);
90 ROUND ((w0 >> 0) & 0xff);
91 ROUND ((w0 >> 8) & 0xff);
92 ROUND ((w0 >> 16) & 0xff);
96 ROUND ((w0 >> 0) & 0xff);
97 ROUND ((w0 >> 8) & 0xff);
101 ROUND ((w0 >> 0) & 0xff);
107 for (i = 4, j = 1; i <= (int) pw_len - 4; i += 4, j += 1)
111 ROUND ((wj >> 0) & 0xff);
112 ROUND ((wj >> 8) & 0xff);
113 ROUND ((wj >> 16) & 0xff);
114 ROUND ((wj >> 24) & 0xff);
119 const u32 left = pw_len - i;
123 ROUND ((wj >> 0) & 0xff);
124 ROUND ((wj >> 8) & 0xff);
125 ROUND ((wj >> 16) & 0xff);
129 ROUND ((wj >> 0) & 0xff);
130 ROUND ((wj >> 8) & 0xff);
134 ROUND ((wj >> 0) & 0xff);
145 #include VECT_COMPARE_M
149 __device__ static void m00200s (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
155 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
156 const u32 lid = threadIdx.x;
162 const u32 search[4] =
164 digests_buf[digests_offset].digest_buf[DGST_R0],
165 digests_buf[digests_offset].digest_buf[DGST_R1],
166 digests_buf[digests_offset].digest_buf[DGST_R2],
167 digests_buf[digests_offset].digest_buf[DGST_R3]
174 const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV);
178 for (u32 il_pos = 0; il_pos < bf_loops; il_pos++)
180 const u32x w0r = c_bfs[il_pos];
182 const u32x w0 = w0l | w0r;
191 a ^= (((a & 0x3f) + add) * (v)) + (a << 8); \
198 ROUND ((w0 >> 0) & 0xff);
199 ROUND ((w0 >> 8) & 0xff);
200 ROUND ((w0 >> 16) & 0xff);
201 ROUND ((w0 >> 24) & 0xff);
203 else if (pw_len == 3)
205 ROUND ((w0 >> 0) & 0xff);
206 ROUND ((w0 >> 8) & 0xff);
207 ROUND ((w0 >> 16) & 0xff);
209 else if (pw_len == 2)
211 ROUND ((w0 >> 0) & 0xff);
212 ROUND ((w0 >> 8) & 0xff);
214 else if (pw_len == 1)
216 ROUND ((w0 >> 0) & 0xff);
222 for (i = 4, j = 1; i <= (int) pw_len - 4; i += 4, j += 1)
226 ROUND ((wj >> 0) & 0xff);
227 ROUND ((wj >> 8) & 0xff);
228 ROUND ((wj >> 16) & 0xff);
229 ROUND ((wj >> 24) & 0xff);
234 const u32 left = pw_len - i;
238 ROUND ((wj >> 0) & 0xff);
239 ROUND ((wj >> 8) & 0xff);
240 ROUND ((wj >> 16) & 0xff);
244 ROUND ((wj >> 0) & 0xff);
245 ROUND ((wj >> 8) & 0xff);
249 ROUND ((wj >> 0) & 0xff);
260 #include VECT_COMPARE_S
264 extern "C" __global__ void __launch_bounds__ (256, 1) m00200_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
270 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
272 if (gid >= gid_max) return;
276 w[ 0] = pws[gid].i[ 0];
277 w[ 1] = pws[gid].i[ 1];
278 w[ 2] = pws[gid].i[ 2];
279 w[ 3] = pws[gid].i[ 3];
293 const u32 pw_len = pws[gid].pw_len;
299 m00200m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
302 extern "C" __global__ void __launch_bounds__ (256, 1) m00200_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
308 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
310 if (gid >= gid_max) return;
314 w[ 0] = pws[gid].i[ 0];
315 w[ 1] = pws[gid].i[ 1];
316 w[ 2] = pws[gid].i[ 2];
317 w[ 3] = pws[gid].i[ 3];
318 w[ 4] = pws[gid].i[ 4];
319 w[ 5] = pws[gid].i[ 5];
320 w[ 6] = pws[gid].i[ 6];
321 w[ 7] = pws[gid].i[ 7];
331 const u32 pw_len = pws[gid].pw_len;
337 m00200m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
340 extern "C" __global__ void __launch_bounds__ (256, 1) m00200_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
346 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
348 if (gid >= gid_max) return;
352 w[ 0] = pws[gid].i[ 0];
353 w[ 1] = pws[gid].i[ 1];
354 w[ 2] = pws[gid].i[ 2];
355 w[ 3] = pws[gid].i[ 3];
356 w[ 4] = pws[gid].i[ 4];
357 w[ 5] = pws[gid].i[ 5];
358 w[ 6] = pws[gid].i[ 6];
359 w[ 7] = pws[gid].i[ 7];
360 w[ 8] = pws[gid].i[ 8];
361 w[ 9] = pws[gid].i[ 9];
362 w[10] = pws[gid].i[10];
363 w[11] = pws[gid].i[11];
364 w[12] = pws[gid].i[12];
365 w[13] = pws[gid].i[13];
366 w[14] = pws[gid].i[14];
367 w[15] = pws[gid].i[15];
369 const u32 pw_len = pws[gid].pw_len;
375 m00200m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
378 extern "C" __global__ void __launch_bounds__ (256, 1) m00200_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
384 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
386 if (gid >= gid_max) return;
390 w[ 0] = pws[gid].i[ 0];
391 w[ 1] = pws[gid].i[ 1];
392 w[ 2] = pws[gid].i[ 2];
393 w[ 3] = pws[gid].i[ 3];
407 const u32 pw_len = pws[gid].pw_len;
413 m00200s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
416 extern "C" __global__ void __launch_bounds__ (256, 1) m00200_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
422 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
424 if (gid >= gid_max) return;
428 w[ 0] = pws[gid].i[ 0];
429 w[ 1] = pws[gid].i[ 1];
430 w[ 2] = pws[gid].i[ 2];
431 w[ 3] = pws[gid].i[ 3];
432 w[ 4] = pws[gid].i[ 4];
433 w[ 5] = pws[gid].i[ 5];
434 w[ 6] = pws[gid].i[ 6];
435 w[ 7] = pws[gid].i[ 7];
445 const u32 pw_len = pws[gid].pw_len;
451 m00200s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
454 extern "C" __global__ void __launch_bounds__ (256, 1) m00200_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
460 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
462 if (gid >= gid_max) return;
466 w[ 0] = pws[gid].i[ 0];
467 w[ 1] = pws[gid].i[ 1];
468 w[ 2] = pws[gid].i[ 2];
469 w[ 3] = pws[gid].i[ 3];
470 w[ 4] = pws[gid].i[ 4];
471 w[ 5] = pws[gid].i[ 5];
472 w[ 6] = pws[gid].i[ 6];
473 w[ 7] = pws[gid].i[ 7];
474 w[ 8] = pws[gid].i[ 8];
475 w[ 9] = pws[gid].i[ 9];
476 w[10] = pws[gid].i[10];
477 w[11] = pws[gid].i[11];
478 w[12] = pws[gid].i[12];
479 w[13] = pws[gid].i[13];
480 w[14] = pws[gid].i[14];
481 w[15] = pws[gid].i[15];
483 const u32 pw_len = pws[gid].pw_len;
489 m00200s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);