2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
24 #include "include/kernel_functions.c"
26 #include "common_nv.c"
27 #include "include/rp_gpu.h"
31 #define VECT_COMPARE_S "check_single_vect1_comp4.c"
32 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
36 #define VECT_COMPARE_S "check_single_vect2_comp4.c"
37 #define VECT_COMPARE_M "check_multi_vect2_comp4.c"
41 #define VECT_COMPARE_S "check_single_vect4_comp4.c"
42 #define VECT_COMPARE_M "check_multi_vect4_comp4.c"
45 __device__ const u32 crc32tab[0x100] =
47 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
48 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
49 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
50 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
51 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
52 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
53 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
54 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
55 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
56 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
57 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940,
58 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
59 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116,
60 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
61 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
62 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
63 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a,
64 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
65 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
66 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
67 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
68 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
69 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c,
70 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
71 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
72 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
73 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
74 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
75 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
76 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
77 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4,
78 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
79 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
80 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
81 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
82 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
83 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe,
84 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
85 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
86 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
87 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252,
88 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
89 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60,
90 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
91 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
92 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
93 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04,
94 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
95 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
96 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
97 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
98 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
99 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e,
100 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
101 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
102 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
103 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
104 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
105 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
106 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
107 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6,
108 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
109 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
110 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
113 __device__ static u32x round_crc32 (u32x a, const u32x v)
115 const u32x k = (a ^ v) & 0xff;
117 const u32x s = a >> 8;
140 __device__ static u32x crc32 (const u32x w[16], const u32 pw_len, const u32 iv)
144 if (pw_len >= 1) a = round_crc32 (a, w[0] >> 0);
145 if (pw_len >= 2) a = round_crc32 (a, w[0] >> 8);
146 if (pw_len >= 3) a = round_crc32 (a, w[0] >> 16);
147 if (pw_len >= 4) a = round_crc32 (a, w[0] >> 24);
149 for (u32 i = 4, j = 1; i < pw_len; i += 4, j += 1)
151 if (pw_len >= (i + 1)) a = round_crc32 (a, w[j] >> 0);
152 if (pw_len >= (i + 2)) a = round_crc32 (a, w[j] >> 8);
153 if (pw_len >= (i + 3)) a = round_crc32 (a, w[j] >> 16);
154 if (pw_len >= (i + 4)) a = round_crc32 (a, w[j] >> 24);
160 __device__ __constant__ gpu_rule_t c_rules[1024];
162 extern "C" __global__ void __launch_bounds__ (256, 1) m11500_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
168 const u32 lid = threadIdx.x;
174 const u32 iv = salt_bufs[salt_pos].salt_buf[0];
180 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
182 if (gid >= gid_max) return;
186 pw_buf0[0] = pws[gid].i[ 0];
187 pw_buf0[1] = pws[gid].i[ 1];
188 pw_buf0[2] = pws[gid].i[ 2];
189 pw_buf0[3] = pws[gid].i[ 3];
193 pw_buf1[0] = pws[gid].i[ 4];
194 pw_buf1[1] = pws[gid].i[ 5];
195 pw_buf1[2] = pws[gid].i[ 6];
196 pw_buf1[3] = pws[gid].i[ 7];
198 const u32 pw_len = pws[gid].pw_len;
204 for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++)
220 const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len);
241 u32x a = crc32 (w_t, out_len, iv);
249 #include VECT_COMPARE_M
253 extern "C" __global__ void __launch_bounds__ (256, 1) m11500_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
257 extern "C" __global__ void __launch_bounds__ (256, 1) m11500_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
261 extern "C" __global__ void __launch_bounds__ (256, 1) m11500_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
267 const u32 lid = threadIdx.x;
273 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
275 if (gid >= gid_max) return;
279 pw_buf0[0] = pws[gid].i[ 0];
280 pw_buf0[1] = pws[gid].i[ 1];
281 pw_buf0[2] = pws[gid].i[ 2];
282 pw_buf0[3] = pws[gid].i[ 3];
286 pw_buf1[0] = pws[gid].i[ 4];
287 pw_buf1[1] = pws[gid].i[ 5];
288 pw_buf1[2] = pws[gid].i[ 6];
289 pw_buf1[3] = pws[gid].i[ 7];
291 const u32 pw_len = pws[gid].pw_len;
297 const u32 iv = salt_bufs[salt_pos].salt_buf[0];
299 const u32 search[4] =
301 digests_buf[digests_offset].digest_buf[DGST_R0],
302 digests_buf[digests_offset].digest_buf[DGST_R1],
303 digests_buf[digests_offset].digest_buf[DGST_R2],
304 digests_buf[digests_offset].digest_buf[DGST_R3]
311 for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++)
327 const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len);
348 u32x a = crc32 (w_t, out_len, iv);
356 #include VECT_COMPARE_S
360 extern "C" __global__ void __launch_bounds__ (256, 1) m11500_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
364 extern "C" __global__ void __launch_bounds__ (256, 1) m11500_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)