2 * Author......: Jens Steube <jens.steube@gmail.com>
9 #include "include/constants.h"
10 #include "include/kernel_vendor.h"
29 #include "include/kernel_functions.c"
30 #include "types_amd.c"
31 #include "common_amd.c"
34 #define VECT_COMPARE_S "check_single_vect1_comp4_warp.c"
35 #define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c"
39 #define VECT_COMPARE_S "check_single_vect2_comp4_warp.c"
40 #define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c"
44 #define VECT_COMPARE_S "check_single_vect4_comp4_warp.c"
45 #define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c"
48 static void m00200m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
54 const u32 gid = get_global_id (0);
55 const u32 lid = get_local_id (0);
61 const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV);
65 for (u32 il_pos = 0; il_pos < bf_loops; il_pos++)
67 const u32x w0r = words_buf_r[il_pos];
69 const u32x w0 = w0l | w0r;
78 a ^= (((a & 0x3f) + add) * (v)) + (a << 8); \
85 ROUND ((w0 >> 0) & 0xff);
86 ROUND ((w0 >> 8) & 0xff);
87 ROUND ((w0 >> 16) & 0xff);
88 ROUND ((w0 >> 24) & 0xff);
92 ROUND ((w0 >> 0) & 0xff);
93 ROUND ((w0 >> 8) & 0xff);
94 ROUND ((w0 >> 16) & 0xff);
98 ROUND ((w0 >> 0) & 0xff);
99 ROUND ((w0 >> 8) & 0xff);
101 else if (pw_len == 1)
103 ROUND ((w0 >> 0) & 0xff);
109 for (i = 4, j = 1; i <= (int) pw_len - 4; i += 4, j += 1)
113 ROUND ((wj >> 0) & 0xff);
114 ROUND ((wj >> 8) & 0xff);
115 ROUND ((wj >> 16) & 0xff);
116 ROUND ((wj >> 24) & 0xff);
121 const u32 left = pw_len - i;
125 ROUND ((wj >> 0) & 0xff);
126 ROUND ((wj >> 8) & 0xff);
127 ROUND ((wj >> 16) & 0xff);
131 ROUND ((wj >> 0) & 0xff);
132 ROUND ((wj >> 8) & 0xff);
136 ROUND ((wj >> 0) & 0xff);
147 #include VECT_COMPARE_M
151 static void m00200s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
157 const u32 gid = get_global_id (0);
158 const u32 lid = get_local_id (0);
164 const u32 search[4] =
166 digests_buf[digests_offset].digest_buf[DGST_R0],
167 digests_buf[digests_offset].digest_buf[DGST_R1],
168 digests_buf[digests_offset].digest_buf[DGST_R2],
169 digests_buf[digests_offset].digest_buf[DGST_R3]
176 const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV);
180 for (u32 il_pos = 0; il_pos < bf_loops; il_pos++)
182 const u32x w0r = words_buf_r[il_pos];
184 const u32x w0 = w0l | w0r;
193 a ^= (((a & 0x3f) + add) * (v)) + (a << 8); \
200 ROUND ((w0 >> 0) & 0xff);
201 ROUND ((w0 >> 8) & 0xff);
202 ROUND ((w0 >> 16) & 0xff);
203 ROUND ((w0 >> 24) & 0xff);
205 else if (pw_len == 3)
207 ROUND ((w0 >> 0) & 0xff);
208 ROUND ((w0 >> 8) & 0xff);
209 ROUND ((w0 >> 16) & 0xff);
211 else if (pw_len == 2)
213 ROUND ((w0 >> 0) & 0xff);
214 ROUND ((w0 >> 8) & 0xff);
216 else if (pw_len == 1)
218 ROUND ((w0 >> 0) & 0xff);
224 for (i = 4, j = 1; i <= (int) pw_len - 4; i += 4, j += 1)
228 ROUND ((wj >> 0) & 0xff);
229 ROUND ((wj >> 8) & 0xff);
230 ROUND ((wj >> 16) & 0xff);
231 ROUND ((wj >> 24) & 0xff);
236 const u32 left = pw_len - i;
240 ROUND ((wj >> 0) & 0xff);
241 ROUND ((wj >> 8) & 0xff);
242 ROUND ((wj >> 16) & 0xff);
246 ROUND ((wj >> 0) & 0xff);
247 ROUND ((wj >> 8) & 0xff);
251 ROUND ((wj >> 0) & 0xff);
262 #include VECT_COMPARE_S
266 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
272 const u32 gid = get_global_id (0);
274 if (gid >= gid_max) return;
278 w[ 0] = pws[gid].i[ 0];
279 w[ 1] = pws[gid].i[ 1];
280 w[ 2] = pws[gid].i[ 2];
281 w[ 3] = pws[gid].i[ 3];
295 const u32 pw_len = pws[gid].pw_len;
301 m00200m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
304 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
310 const u32 gid = get_global_id (0);
312 if (gid >= gid_max) return;
316 w[ 0] = pws[gid].i[ 0];
317 w[ 1] = pws[gid].i[ 1];
318 w[ 2] = pws[gid].i[ 2];
319 w[ 3] = pws[gid].i[ 3];
320 w[ 4] = pws[gid].i[ 4];
321 w[ 5] = pws[gid].i[ 5];
322 w[ 6] = pws[gid].i[ 6];
323 w[ 7] = pws[gid].i[ 7];
333 const u32 pw_len = pws[gid].pw_len;
339 m00200m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
342 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
348 const u32 gid = get_global_id (0);
350 if (gid >= gid_max) return;
354 w[ 0] = pws[gid].i[ 0];
355 w[ 1] = pws[gid].i[ 1];
356 w[ 2] = pws[gid].i[ 2];
357 w[ 3] = pws[gid].i[ 3];
358 w[ 4] = pws[gid].i[ 4];
359 w[ 5] = pws[gid].i[ 5];
360 w[ 6] = pws[gid].i[ 6];
361 w[ 7] = pws[gid].i[ 7];
362 w[ 8] = pws[gid].i[ 8];
363 w[ 9] = pws[gid].i[ 9];
364 w[10] = pws[gid].i[10];
365 w[11] = pws[gid].i[11];
366 w[12] = pws[gid].i[12];
367 w[13] = pws[gid].i[13];
368 w[14] = pws[gid].i[14];
369 w[15] = pws[gid].i[15];
371 const u32 pw_len = pws[gid].pw_len;
377 m00200m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
380 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
386 const u32 gid = get_global_id (0);
388 if (gid >= gid_max) return;
392 w[ 0] = pws[gid].i[ 0];
393 w[ 1] = pws[gid].i[ 1];
394 w[ 2] = pws[gid].i[ 2];
395 w[ 3] = pws[gid].i[ 3];
409 const u32 pw_len = pws[gid].pw_len;
415 m00200s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
418 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
424 const u32 gid = get_global_id (0);
426 if (gid >= gid_max) return;
430 w[ 0] = pws[gid].i[ 0];
431 w[ 1] = pws[gid].i[ 1];
432 w[ 2] = pws[gid].i[ 2];
433 w[ 3] = pws[gid].i[ 3];
434 w[ 4] = pws[gid].i[ 4];
435 w[ 5] = pws[gid].i[ 5];
436 w[ 6] = pws[gid].i[ 6];
437 w[ 7] = pws[gid].i[ 7];
447 const u32 pw_len = pws[gid].pw_len;
453 m00200s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
456 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
462 const u32 gid = get_global_id (0);
464 if (gid >= gid_max) return;
468 w[ 0] = pws[gid].i[ 0];
469 w[ 1] = pws[gid].i[ 1];
470 w[ 2] = pws[gid].i[ 2];
471 w[ 3] = pws[gid].i[ 3];
472 w[ 4] = pws[gid].i[ 4];
473 w[ 5] = pws[gid].i[ 5];
474 w[ 6] = pws[gid].i[ 6];
475 w[ 7] = pws[gid].i[ 7];
476 w[ 8] = pws[gid].i[ 8];
477 w[ 9] = pws[gid].i[ 9];
478 w[10] = pws[gid].i[10];
479 w[11] = pws[gid].i[11];
480 w[12] = pws[gid].i[12];
481 w[13] = pws[gid].i[13];
482 w[14] = pws[gid].i[14];
483 w[15] = pws[gid].i[15];
485 const u32 pw_len = pws[gid].pw_len;
491 m00200s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);