2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
24 #include "include/kernel_functions.c"
26 #include "common_nv.c"
29 #define VECT_COMPARE_S "check_single_vect1_comp4.c"
30 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
34 #define VECT_COMPARE_S "check_single_vect2_comp4.c"
35 #define VECT_COMPARE_M "check_multi_vect2_comp4.c"
39 #define VECT_COMPARE_S "check_single_vect4_comp4.c"
40 #define VECT_COMPARE_M "check_multi_vect4_comp4.c"
43 __device__ const u32 crc32tab[0x100] =
45 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
46 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
47 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
48 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
49 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
50 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
51 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
52 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
53 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
54 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
55 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940,
56 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
57 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116,
58 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
59 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
60 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
61 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a,
62 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
63 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
64 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
65 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
66 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
67 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c,
68 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
69 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
70 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
71 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
72 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
73 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
74 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
75 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4,
76 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
77 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
78 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
79 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
80 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
81 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe,
82 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
83 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
84 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
85 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252,
86 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
87 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60,
88 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
89 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
90 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
91 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04,
92 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
93 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
94 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
95 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
96 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
97 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e,
98 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
99 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
100 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
101 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
102 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
103 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
104 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
105 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6,
106 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
107 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
108 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
111 __device__ static u32x round_crc32 (u32x a, const u32x v)
113 const u32x k = (a ^ v) & 0xff;
115 const u32x s = a >> 8;
138 __device__ static u32x crc32 (const u32x w[16], const u32 pw_len, const u32 iv)
142 if (pw_len >= 1) a = round_crc32 (a, w[0] >> 0);
143 if (pw_len >= 2) a = round_crc32 (a, w[0] >> 8);
144 if (pw_len >= 3) a = round_crc32 (a, w[0] >> 16);
145 if (pw_len >= 4) a = round_crc32 (a, w[0] >> 24);
147 for (u32 i = 4, j = 1; i < pw_len; i += 4, j += 1)
149 if (pw_len >= (i + 1)) a = round_crc32 (a, w[j] >> 0);
150 if (pw_len >= (i + 2)) a = round_crc32 (a, w[j] >> 8);
151 if (pw_len >= (i + 3)) a = round_crc32 (a, w[j] >> 16);
152 if (pw_len >= (i + 4)) a = round_crc32 (a, w[j] >> 24);
158 __device__ __constant__ comb_t c_combs[1024];
160 extern "C" __global__ void __launch_bounds__ (256, 1) m11500_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
166 const u32 lid = threadIdx.x;
172 const u32 iv = salt_bufs[salt_pos].salt_buf[0];
178 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
180 if (gid >= gid_max) return;
184 wordl0[0] = pws[gid].i[ 0];
185 wordl0[1] = pws[gid].i[ 1];
186 wordl0[2] = pws[gid].i[ 2];
187 wordl0[3] = pws[gid].i[ 3];
191 wordl1[0] = pws[gid].i[ 4];
192 wordl1[1] = pws[gid].i[ 5];
193 wordl1[2] = pws[gid].i[ 6];
194 wordl1[3] = pws[gid].i[ 7];
210 const u32 pw_l_len = pws[gid].pw_len;
212 if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
214 switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len);
221 for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
223 const u32 pw_r_len = c_combs[il_pos].pw_len;
225 const u32 pw_len = pw_l_len + pw_r_len;
229 wordr0[0] = c_combs[il_pos].i[0];
230 wordr0[1] = c_combs[il_pos].i[1];
231 wordr0[2] = c_combs[il_pos].i[2];
232 wordr0[3] = c_combs[il_pos].i[3];
236 wordr1[0] = c_combs[il_pos].i[4];
237 wordr1[1] = c_combs[il_pos].i[5];
238 wordr1[2] = c_combs[il_pos].i[6];
239 wordr1[3] = c_combs[il_pos].i[7];
255 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
257 switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
262 w_t[ 0] = wordl0[0] | wordr0[0];
263 w_t[ 1] = wordl0[1] | wordr0[1];
264 w_t[ 2] = wordl0[2] | wordr0[2];
265 w_t[ 3] = wordl0[3] | wordr0[3];
266 w_t[ 4] = wordl1[0] | wordr1[0];
267 w_t[ 5] = wordl1[1] | wordr1[1];
268 w_t[ 6] = wordl1[2] | wordr1[2];
269 w_t[ 7] = wordl1[3] | wordr1[3];
270 w_t[ 8] = wordl2[0] | wordr2[0];
271 w_t[ 9] = wordl2[1] | wordr2[1];
272 w_t[10] = wordl2[2] | wordr2[2];
273 w_t[11] = wordl2[3] | wordr2[3];
274 w_t[12] = wordl3[0] | wordr3[0];
275 w_t[13] = wordl3[1] | wordr3[1];
276 w_t[14] = wordl3[2] | wordr3[2];
279 u32x a = crc32 (w_t, pw_len, iv);
287 #include VECT_COMPARE_M
291 extern "C" __global__ void __launch_bounds__ (256, 1) m11500_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
295 extern "C" __global__ void __launch_bounds__ (256, 1) m11500_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
299 extern "C" __global__ void __launch_bounds__ (256, 1) m11500_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
305 const u32 lid = threadIdx.x;
311 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
313 if (gid >= gid_max) return;
317 wordl0[0] = pws[gid].i[ 0];
318 wordl0[1] = pws[gid].i[ 1];
319 wordl0[2] = pws[gid].i[ 2];
320 wordl0[3] = pws[gid].i[ 3];
324 wordl1[0] = pws[gid].i[ 4];
325 wordl1[1] = pws[gid].i[ 5];
326 wordl1[2] = pws[gid].i[ 6];
327 wordl1[3] = pws[gid].i[ 7];
343 const u32 pw_l_len = pws[gid].pw_len;
345 if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
347 switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len);
354 const u32 iv = salt_bufs[salt_pos].salt_buf[0];
356 const u32 search[4] =
358 digests_buf[digests_offset].digest_buf[DGST_R0],
359 digests_buf[digests_offset].digest_buf[DGST_R1],
360 digests_buf[digests_offset].digest_buf[DGST_R2],
361 digests_buf[digests_offset].digest_buf[DGST_R3]
368 for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
370 const u32 pw_r_len = c_combs[il_pos].pw_len;
372 const u32 pw_len = pw_l_len + pw_r_len;
376 wordr0[0] = c_combs[il_pos].i[0];
377 wordr0[1] = c_combs[il_pos].i[1];
378 wordr0[2] = c_combs[il_pos].i[2];
379 wordr0[3] = c_combs[il_pos].i[3];
383 wordr1[0] = c_combs[il_pos].i[4];
384 wordr1[1] = c_combs[il_pos].i[5];
385 wordr1[2] = c_combs[il_pos].i[6];
386 wordr1[3] = c_combs[il_pos].i[7];
402 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
404 switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
409 w_t[ 0] = wordl0[0] | wordr0[0];
410 w_t[ 1] = wordl0[1] | wordr0[1];
411 w_t[ 2] = wordl0[2] | wordr0[2];
412 w_t[ 3] = wordl0[3] | wordr0[3];
413 w_t[ 4] = wordl1[0] | wordr1[0];
414 w_t[ 5] = wordl1[1] | wordr1[1];
415 w_t[ 6] = wordl1[2] | wordr1[2];
416 w_t[ 7] = wordl1[3] | wordr1[3];
417 w_t[ 8] = wordl2[0] | wordr2[0];
418 w_t[ 9] = wordl2[1] | wordr2[1];
419 w_t[10] = wordl2[2] | wordr2[2];
420 w_t[11] = wordl2[3] | wordr2[3];
421 w_t[12] = wordl3[0] | wordr3[0];
422 w_t[13] = wordl3[1] | wordr3[1];
423 w_t[14] = wordl3[2] | wordr3[2];
426 u32x a = crc32 (w_t, pw_len, iv);
434 #include VECT_COMPARE_S
438 extern "C" __global__ void __launch_bounds__ (256, 1) m11500_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
442 extern "C" __global__ void __launch_bounds__ (256, 1) m11500_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)