2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
24 #include "include/kernel_functions.c"
26 #include "common_nv.c"
29 #define VECT_COMPARE_S "check_single_vect1_comp4.c"
30 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
34 #define VECT_COMPARE_S "check_single_vect2_comp4.c"
35 #define VECT_COMPARE_M "check_multi_vect2_comp4.c"
39 #define VECT_COMPARE_S "check_single_vect4_comp4.c"
40 #define VECT_COMPARE_M "check_multi_vect4_comp4.c"
43 __device__ __constant__ comb_t c_combs[1024];
45 extern "C" __global__ void __launch_bounds__ (256, 1) m00200_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
51 const u32 lid = threadIdx.x;
57 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
59 if (gid >= gid_max) return;
63 wordl0[0] = pws[gid].i[ 0];
64 wordl0[1] = pws[gid].i[ 1];
65 wordl0[2] = pws[gid].i[ 2];
66 wordl0[3] = pws[gid].i[ 3];
70 wordl1[0] = pws[gid].i[ 4];
71 wordl1[1] = pws[gid].i[ 5];
72 wordl1[2] = pws[gid].i[ 6];
73 wordl1[3] = pws[gid].i[ 7];
89 const u32 pw_l_len = pws[gid].pw_len;
91 if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
93 switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len);
100 for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
102 const u32 pw_r_len = c_combs[il_pos].pw_len;
104 const u32 pw_len = pw_l_len + pw_r_len;
108 wordr0[0] = c_combs[il_pos].i[0];
109 wordr0[1] = c_combs[il_pos].i[1];
110 wordr0[2] = c_combs[il_pos].i[2];
111 wordr0[3] = c_combs[il_pos].i[3];
115 wordr1[0] = c_combs[il_pos].i[4];
116 wordr1[1] = c_combs[il_pos].i[5];
117 wordr1[2] = c_combs[il_pos].i[6];
118 wordr1[3] = c_combs[il_pos].i[7];
134 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
136 switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
141 w_t[ 0] = wordl0[0] | wordr0[0];
142 w_t[ 1] = wordl0[1] | wordr0[1];
143 w_t[ 2] = wordl0[2] | wordr0[2];
144 w_t[ 3] = wordl0[3] | wordr0[3];
145 w_t[ 4] = wordl1[0] | wordr1[0];
146 w_t[ 5] = wordl1[1] | wordr1[1];
147 w_t[ 6] = wordl1[2] | wordr1[2];
148 w_t[ 7] = wordl1[3] | wordr1[3];
149 w_t[ 8] = wordl2[0] | wordr2[0];
150 w_t[ 9] = wordl2[1] | wordr2[1];
151 w_t[10] = wordl2[2] | wordr2[2];
152 w_t[11] = wordl2[3] | wordr2[3];
153 w_t[12] = wordl3[0] | wordr3[0];
154 w_t[13] = wordl3[1] | wordr3[1];
155 w_t[14] = wordl3[2] | wordr3[2];
165 a ^= (((a & 0x3f) + add) * (v)) + (a << 8); \
173 for (i = 0, j = 0; i <= (int) pw_len - 4; i += 4, j += 1)
175 const u32x wj = w_t[j];
177 ROUND ((wj >> 0) & 0xff);
178 ROUND ((wj >> 8) & 0xff);
179 ROUND ((wj >> 16) & 0xff);
180 ROUND ((wj >> 24) & 0xff);
183 const u32x wj = w_t[j];
185 const u32 left = pw_len - i;
189 ROUND ((wj >> 0) & 0xff);
190 ROUND ((wj >> 8) & 0xff);
191 ROUND ((wj >> 16) & 0xff);
195 ROUND ((wj >> 0) & 0xff);
196 ROUND ((wj >> 8) & 0xff);
200 ROUND ((wj >> 0) & 0xff);
211 #include VECT_COMPARE_M
215 extern "C" __global__ void __launch_bounds__ (256, 1) m00200_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
219 extern "C" __global__ void __launch_bounds__ (256, 1) m00200_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
223 extern "C" __global__ void __launch_bounds__ (256, 1) m00200_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
229 const u32 lid = threadIdx.x;
235 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
237 if (gid >= gid_max) return;
241 wordl0[0] = pws[gid].i[ 0];
242 wordl0[1] = pws[gid].i[ 1];
243 wordl0[2] = pws[gid].i[ 2];
244 wordl0[3] = pws[gid].i[ 3];
248 wordl1[0] = pws[gid].i[ 4];
249 wordl1[1] = pws[gid].i[ 5];
250 wordl1[2] = pws[gid].i[ 6];
251 wordl1[3] = pws[gid].i[ 7];
267 const u32 pw_l_len = pws[gid].pw_len;
269 if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
271 switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len);
278 const u32 search[4] =
280 digests_buf[digests_offset].digest_buf[DGST_R0],
281 digests_buf[digests_offset].digest_buf[DGST_R1],
282 digests_buf[digests_offset].digest_buf[DGST_R2],
283 digests_buf[digests_offset].digest_buf[DGST_R3]
290 for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
292 const u32 pw_r_len = c_combs[il_pos].pw_len;
294 const u32 pw_len = pw_l_len + pw_r_len;
298 wordr0[0] = c_combs[il_pos].i[0];
299 wordr0[1] = c_combs[il_pos].i[1];
300 wordr0[2] = c_combs[il_pos].i[2];
301 wordr0[3] = c_combs[il_pos].i[3];
305 wordr1[0] = c_combs[il_pos].i[4];
306 wordr1[1] = c_combs[il_pos].i[5];
307 wordr1[2] = c_combs[il_pos].i[6];
308 wordr1[3] = c_combs[il_pos].i[7];
324 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
326 switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
331 w_t[ 0] = wordl0[0] | wordr0[0];
332 w_t[ 1] = wordl0[1] | wordr0[1];
333 w_t[ 2] = wordl0[2] | wordr0[2];
334 w_t[ 3] = wordl0[3] | wordr0[3];
335 w_t[ 4] = wordl1[0] | wordr1[0];
336 w_t[ 5] = wordl1[1] | wordr1[1];
337 w_t[ 6] = wordl1[2] | wordr1[2];
338 w_t[ 7] = wordl1[3] | wordr1[3];
339 w_t[ 8] = wordl2[0] | wordr2[0];
340 w_t[ 9] = wordl2[1] | wordr2[1];
341 w_t[10] = wordl2[2] | wordr2[2];
342 w_t[11] = wordl2[3] | wordr2[3];
343 w_t[12] = wordl3[0] | wordr3[0];
344 w_t[13] = wordl3[1] | wordr3[1];
345 w_t[14] = wordl3[2] | wordr3[2];
355 a ^= (((a & 0x3f) + add) * (v)) + (a << 8); \
363 for (i = 0, j = 0; i <= (int) pw_len - 4; i += 4, j += 1)
365 const u32x wj = w_t[j];
367 ROUND ((wj >> 0) & 0xff);
368 ROUND ((wj >> 8) & 0xff);
369 ROUND ((wj >> 16) & 0xff);
370 ROUND ((wj >> 24) & 0xff);
373 const u32x wj = w_t[j];
375 const u32 left = pw_len - i;
379 ROUND ((wj >> 0) & 0xff);
380 ROUND ((wj >> 8) & 0xff);
381 ROUND ((wj >> 16) & 0xff);
385 ROUND ((wj >> 0) & 0xff);
386 ROUND ((wj >> 8) & 0xff);
390 ROUND ((wj >> 0) & 0xff);
401 #include VECT_COMPARE_S
405 extern "C" __global__ void __launch_bounds__ (256, 1) m00200_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
409 extern "C" __global__ void __launch_bounds__ (256, 1) m00200_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)