2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
28 #include "include/kernel_functions.c"
29 #include "types_amd.c"
30 #include "common_amd.c"
31 #include "include/rp_gpu.h"
35 #define VECT_COMPARE_S "check_single_vect1_comp4.c"
36 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
40 #define VECT_COMPARE_S "check_single_vect2_comp4.c"
41 #define VECT_COMPARE_M "check_multi_vect2_comp4.c"
45 #define VECT_COMPARE_S "check_single_vect4_comp4.c"
46 #define VECT_COMPARE_M "check_multi_vect4_comp4.c"
49 __constant u32 crc32tab[0x100] =
51 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
52 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
53 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
54 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
55 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
56 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
57 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
58 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
59 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
60 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
61 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940,
62 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
63 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116,
64 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
65 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
66 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
67 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a,
68 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
69 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
70 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
71 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
72 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
73 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c,
74 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
75 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
76 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
77 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
78 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
79 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
80 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
81 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4,
82 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
83 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
84 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
85 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
86 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
87 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe,
88 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
89 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
90 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
91 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252,
92 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
93 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60,
94 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
95 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
96 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
97 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04,
98 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
99 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
100 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
101 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
102 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
103 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e,
104 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
105 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
106 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
107 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
108 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
109 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
110 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
111 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6,
112 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
113 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
114 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
117 static u32x round_crc32 (u32x a, const u32x v)
119 const u32x k = (a ^ v) & 0xff;
121 const u32x s = a >> 8;
128 a.s0 = crc32tab[k.s0];
129 a.s1 = crc32tab[k.s1];
133 a.s0 = crc32tab[k.s0];
134 a.s1 = crc32tab[k.s1];
135 a.s2 = crc32tab[k.s2];
136 a.s3 = crc32tab[k.s3];
144 static u32x crc32 (const u32x w[16], const u32 pw_len, const u32 iv)
148 if (pw_len >= 1) a = round_crc32 (a, w[0] >> 0);
149 if (pw_len >= 2) a = round_crc32 (a, w[0] >> 8);
150 if (pw_len >= 3) a = round_crc32 (a, w[0] >> 16);
151 if (pw_len >= 4) a = round_crc32 (a, w[0] >> 24);
153 for (u32 i = 4, j = 1; i < pw_len; i += 4, j += 1)
155 if (pw_len >= (i + 1)) a = round_crc32 (a, w[j] >> 0);
156 if (pw_len >= (i + 2)) a = round_crc32 (a, w[j] >> 8);
157 if (pw_len >= (i + 3)) a = round_crc32 (a, w[j] >> 16);
158 if (pw_len >= (i + 4)) a = round_crc32 (a, w[j] >> 24);
164 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
170 const u32 lid = get_local_id (0);
176 const u32 iv = salt_bufs[salt_pos].salt_buf[0];
182 const u32 gid = get_global_id (0);
184 if (gid >= gid_max) return;
188 pw_buf0[0] = pws[gid].i[ 0];
189 pw_buf0[1] = pws[gid].i[ 1];
190 pw_buf0[2] = pws[gid].i[ 2];
191 pw_buf0[3] = pws[gid].i[ 3];
195 pw_buf1[0] = pws[gid].i[ 4];
196 pw_buf1[1] = pws[gid].i[ 5];
197 pw_buf1[2] = pws[gid].i[ 6];
198 pw_buf1[3] = pws[gid].i[ 7];
200 const u32 pw_len = pws[gid].pw_len;
206 for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++)
236 const u32 out_len = apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len);
257 u32x a = crc32 (w_t, out_len, iv);
265 #include VECT_COMPARE_M
269 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
273 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
277 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
283 const u32 lid = get_local_id (0);
289 const u32 gid = get_global_id (0);
291 if (gid >= gid_max) return;
295 pw_buf0[0] = pws[gid].i[ 0];
296 pw_buf0[1] = pws[gid].i[ 1];
297 pw_buf0[2] = pws[gid].i[ 2];
298 pw_buf0[3] = pws[gid].i[ 3];
302 pw_buf1[0] = pws[gid].i[ 4];
303 pw_buf1[1] = pws[gid].i[ 5];
304 pw_buf1[2] = pws[gid].i[ 6];
305 pw_buf1[3] = pws[gid].i[ 7];
307 const u32 pw_len = pws[gid].pw_len;
313 const u32 iv = salt_bufs[salt_pos].salt_buf[0];
315 const u32 search[4] =
317 digests_buf[digests_offset].digest_buf[DGST_R0],
318 digests_buf[digests_offset].digest_buf[DGST_R1],
319 digests_buf[digests_offset].digest_buf[DGST_R2],
320 digests_buf[digests_offset].digest_buf[DGST_R3]
327 for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++)
357 const u32 out_len = apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len);
378 u32x a = crc32 (w_t, out_len, iv);
386 #include VECT_COMPARE_S
390 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
394 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)