2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
28 #include "include/kernel_functions.c"
29 #include "types_amd.c"
30 #include "common_amd.c"
33 #define VECT_COMPARE_S "check_single_vect1_comp4.c"
34 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
38 #define VECT_COMPARE_S "check_single_vect2_comp4.c"
39 #define VECT_COMPARE_M "check_multi_vect2_comp4.c"
43 #define VECT_COMPARE_S "check_single_vect4_comp4.c"
44 #define VECT_COMPARE_M "check_multi_vect4_comp4.c"
47 __constant u32 crc32tab[0x100] =
49 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
50 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
51 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
52 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
53 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
54 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
55 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
56 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
57 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
58 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
59 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940,
60 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
61 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116,
62 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
63 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
64 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
65 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a,
66 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
67 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
68 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
69 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
70 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
71 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c,
72 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
73 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
74 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
75 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
76 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
77 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
78 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
79 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4,
80 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
81 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
82 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
83 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
84 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
85 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe,
86 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
87 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
88 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
89 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252,
90 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
91 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60,
92 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
93 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
94 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
95 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04,
96 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
97 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
98 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
99 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
100 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
101 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e,
102 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
103 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
104 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
105 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
106 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
107 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
108 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
109 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6,
110 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
111 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
112 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
115 static u32x round_crc32 (u32x a, const u32x v)
117 const u32x k = (a ^ v) & 0xff;
119 const u32x s = a >> 8;
126 a.s0 = crc32tab[k.s0];
127 a.s1 = crc32tab[k.s1];
131 a.s0 = crc32tab[k.s0];
132 a.s1 = crc32tab[k.s1];
133 a.s2 = crc32tab[k.s2];
134 a.s3 = crc32tab[k.s3];
142 static u32x crc32 (const u32x w[16], const u32 pw_len, const u32 iv)
146 if (pw_len >= 1) a = round_crc32 (a, w[0] >> 0);
147 if (pw_len >= 2) a = round_crc32 (a, w[0] >> 8);
148 if (pw_len >= 3) a = round_crc32 (a, w[0] >> 16);
149 if (pw_len >= 4) a = round_crc32 (a, w[0] >> 24);
151 for (u32 i = 4, j = 1; i < pw_len; i += 4, j += 1)
153 if (pw_len >= (i + 1)) a = round_crc32 (a, w[j] >> 0);
154 if (pw_len >= (i + 2)) a = round_crc32 (a, w[j] >> 8);
155 if (pw_len >= (i + 3)) a = round_crc32 (a, w[j] >> 16);
156 if (pw_len >= (i + 4)) a = round_crc32 (a, w[j] >> 24);
162 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
168 const u32 lid = get_local_id (0);
174 const u32 iv = salt_bufs[salt_pos].salt_buf[0];
180 const u32 gid = get_global_id (0);
182 if (gid >= gid_max) return;
186 wordl0[0] = pws[gid].i[ 0];
187 wordl0[1] = pws[gid].i[ 1];
188 wordl0[2] = pws[gid].i[ 2];
189 wordl0[3] = pws[gid].i[ 3];
193 wordl1[0] = pws[gid].i[ 4];
194 wordl1[1] = pws[gid].i[ 5];
195 wordl1[2] = pws[gid].i[ 6];
196 wordl1[3] = pws[gid].i[ 7];
212 const u32 pw_l_len = pws[gid].pw_len;
214 if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
216 switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
223 for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
225 const u32 pw_r_len = combs_buf[il_pos].pw_len;
227 const u32 pw_len = pw_l_len + pw_r_len;
231 wordr0[0] = combs_buf[il_pos].i[0];
232 wordr0[1] = combs_buf[il_pos].i[1];
233 wordr0[2] = combs_buf[il_pos].i[2];
234 wordr0[3] = combs_buf[il_pos].i[3];
238 wordr1[0] = combs_buf[il_pos].i[4];
239 wordr1[1] = combs_buf[il_pos].i[5];
240 wordr1[2] = combs_buf[il_pos].i[6];
241 wordr1[3] = combs_buf[il_pos].i[7];
257 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
259 switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
264 w_t[ 0] = wordl0[0] | wordr0[0];
265 w_t[ 1] = wordl0[1] | wordr0[1];
266 w_t[ 2] = wordl0[2] | wordr0[2];
267 w_t[ 3] = wordl0[3] | wordr0[3];
268 w_t[ 4] = wordl1[0] | wordr1[0];
269 w_t[ 5] = wordl1[1] | wordr1[1];
270 w_t[ 6] = wordl1[2] | wordr1[2];
271 w_t[ 7] = wordl1[3] | wordr1[3];
272 w_t[ 8] = wordl2[0] | wordr2[0];
273 w_t[ 9] = wordl2[1] | wordr2[1];
274 w_t[10] = wordl2[2] | wordr2[2];
275 w_t[11] = wordl2[3] | wordr2[3];
276 w_t[12] = wordl3[0] | wordr3[0];
277 w_t[13] = wordl3[1] | wordr3[1];
278 w_t[14] = wordl3[2] | wordr3[2];
281 u32x a = crc32 (w_t, pw_len, iv);
289 #include VECT_COMPARE_M
293 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
297 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
301 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
307 const u32 lid = get_local_id (0);
313 const u32 gid = get_global_id (0);
315 if (gid >= gid_max) return;
319 wordl0[0] = pws[gid].i[ 0];
320 wordl0[1] = pws[gid].i[ 1];
321 wordl0[2] = pws[gid].i[ 2];
322 wordl0[3] = pws[gid].i[ 3];
326 wordl1[0] = pws[gid].i[ 4];
327 wordl1[1] = pws[gid].i[ 5];
328 wordl1[2] = pws[gid].i[ 6];
329 wordl1[3] = pws[gid].i[ 7];
345 const u32 pw_l_len = pws[gid].pw_len;
347 if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
349 switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
356 const u32 iv = salt_bufs[salt_pos].salt_buf[0];
358 const u32 search[4] =
360 digests_buf[digests_offset].digest_buf[DGST_R0],
361 digests_buf[digests_offset].digest_buf[DGST_R1],
362 digests_buf[digests_offset].digest_buf[DGST_R2],
363 digests_buf[digests_offset].digest_buf[DGST_R3]
370 for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
372 const u32 pw_r_len = combs_buf[il_pos].pw_len;
374 const u32 pw_len = pw_l_len + pw_r_len;
378 wordr0[0] = combs_buf[il_pos].i[0];
379 wordr0[1] = combs_buf[il_pos].i[1];
380 wordr0[2] = combs_buf[il_pos].i[2];
381 wordr0[3] = combs_buf[il_pos].i[3];
385 wordr1[0] = combs_buf[il_pos].i[4];
386 wordr1[1] = combs_buf[il_pos].i[5];
387 wordr1[2] = combs_buf[il_pos].i[6];
388 wordr1[3] = combs_buf[il_pos].i[7];
404 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
406 switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
411 w_t[ 0] = wordl0[0] | wordr0[0];
412 w_t[ 1] = wordl0[1] | wordr0[1];
413 w_t[ 2] = wordl0[2] | wordr0[2];
414 w_t[ 3] = wordl0[3] | wordr0[3];
415 w_t[ 4] = wordl1[0] | wordr1[0];
416 w_t[ 5] = wordl1[1] | wordr1[1];
417 w_t[ 6] = wordl1[2] | wordr1[2];
418 w_t[ 7] = wordl1[3] | wordr1[3];
419 w_t[ 8] = wordl2[0] | wordr2[0];
420 w_t[ 9] = wordl2[1] | wordr2[1];
421 w_t[10] = wordl2[2] | wordr2[2];
422 w_t[11] = wordl2[3] | wordr2[3];
423 w_t[12] = wordl3[0] | wordr3[0];
424 w_t[13] = wordl3[1] | wordr3[1];
425 w_t[14] = wordl3[2] | wordr3[2];
428 u32x a = crc32 (w_t, pw_len, iv);
436 #include VECT_COMPARE_S
440 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
444 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)