2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
28 #include "include/kernel_functions.c"
29 #include "types_amd.c"
30 #include "common_amd.c"
33 #define VECT_COMPARE_S "check_single_vect1_comp4.c"
34 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
38 #define VECT_COMPARE_S "check_single_vect2_comp4.c"
39 #define VECT_COMPARE_M "check_multi_vect2_comp4.c"
43 #define VECT_COMPARE_S "check_single_vect4_comp4.c"
44 #define VECT_COMPARE_M "check_multi_vect4_comp4.c"
49 #define SBOG_LPSti64 \
50 s_sbob_sl64[0][(t[0] >> (i * 8)) & 0xff] ^ \
51 s_sbob_sl64[1][(t[1] >> (i * 8)) & 0xff] ^ \
52 s_sbob_sl64[2][(t[2] >> (i * 8)) & 0xff] ^ \
53 s_sbob_sl64[3][(t[3] >> (i * 8)) & 0xff] ^ \
54 s_sbob_sl64[4][(t[4] >> (i * 8)) & 0xff] ^ \
55 s_sbob_sl64[5][(t[5] >> (i * 8)) & 0xff] ^ \
56 s_sbob_sl64[6][(t[6] >> (i * 8)) & 0xff] ^ \
57 s_sbob_sl64[7][(t[7] >> (i * 8)) & 0xff]
61 __constant u64 sbob_sl64[8][256] =
2129 __constant u64 sbob_rc64[12][8] =
2253 static void streebog_g (u64 h[8], const u64 m[8], __local u64 s_sbob_sl64[8][256])
2260 for (int i = 0; i < 8; i++)
2265 for (int i = 0; i < 8; i++)
2267 k[i] = SBOG_LPSti64;
2271 for (int i = 0; i < 8; i++)
2276 for (int r = 0; r < 12; r++)
2279 for (int i = 0; i < 8; i++)
2285 for (int i = 0; i < 8; i++)
2287 s[i] = SBOG_LPSti64;
2290 for (int i = 0; i < 8; i++)
2292 t[i] = k[i] ^ sbob_rc64[r][i];
2296 for (int i = 0; i < 8; i++)
2298 k[i] = SBOG_LPSti64;
2303 for (int i = 0; i < 8; i++)
2305 h[i] ^= s[i] ^ k[i] ^ m[i];
2309 static void m11800m (__local u64 s_sbob_sl64[8][256], u32x w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
2315 const u32 gid = get_global_id (0);
2316 const u32 lid = get_local_id (0);
2324 for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
2326 const u32 w0r = bfs_buf[il_pos].i;
2331 * reverse message block
2336 m[0] = hl32_to_64 (w[15], w[14]);
2337 m[1] = hl32_to_64 (w[13], w[12]);
2338 m[2] = hl32_to_64 (w[11], w[10]);
2339 m[3] = hl32_to_64 (w[ 9], w[ 8]);
2340 m[4] = hl32_to_64 (w[ 7], w[ 6]);
2341 m[5] = hl32_to_64 (w[ 5], w[ 4]);
2342 m[6] = hl32_to_64 (w[ 3], w[ 2]);
2343 m[7] = hl32_to_64 (w[ 1], w[ 0]);
2345 m[0] = swap_workaround (m[0]);
2346 m[1] = swap_workaround (m[1]);
2347 m[2] = swap_workaround (m[2]);
2348 m[3] = swap_workaround (m[3]);
2349 m[4] = swap_workaround (m[4]);
2350 m[5] = swap_workaround (m[5]);
2351 m[6] = swap_workaround (m[6]);
2352 m[7] = swap_workaround (m[7]);
2354 // state buffer (hash)
2367 streebog_g (h, m, s_sbob_sl64);
2378 z[7] = swap_workaround ((u64) (pw_len * 8));
2380 streebog_g (h, z, s_sbob_sl64);
2381 streebog_g (h, m, s_sbob_sl64);
2383 const u32 r0 = l32_from_64 (h[0]);
2384 const u32 r1 = h32_from_64 (h[0]);
2385 const u32 r2 = l32_from_64 (h[1]);
2386 const u32 r3 = h32_from_64 (h[1]);
2388 #include VECT_COMPARE_M
2392 static void m11800s (__local u64 s_sbob_sl64[8][256], u32x w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
2398 const u32 gid = get_global_id (0);
2399 const u32 lid = get_local_id (0);
2405 const u32 search[4] =
2407 digests_buf[digests_offset].digest_buf[DGST_R0],
2408 digests_buf[digests_offset].digest_buf[DGST_R1],
2409 digests_buf[digests_offset].digest_buf[DGST_R2],
2410 digests_buf[digests_offset].digest_buf[DGST_R3]
2419 for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
2421 const u32 w0r = bfs_buf[il_pos].i;
2426 * reverse message block
2431 m[0] = hl32_to_64 (w[15], w[14]);
2432 m[1] = hl32_to_64 (w[13], w[12]);
2433 m[2] = hl32_to_64 (w[11], w[10]);
2434 m[3] = hl32_to_64 (w[ 9], w[ 8]);
2435 m[4] = hl32_to_64 (w[ 7], w[ 6]);
2436 m[5] = hl32_to_64 (w[ 5], w[ 4]);
2437 m[6] = hl32_to_64 (w[ 3], w[ 2]);
2438 m[7] = hl32_to_64 (w[ 1], w[ 0]);
2440 m[0] = swap_workaround (m[0]);
2441 m[1] = swap_workaround (m[1]);
2442 m[2] = swap_workaround (m[2]);
2443 m[3] = swap_workaround (m[3]);
2444 m[4] = swap_workaround (m[4]);
2445 m[5] = swap_workaround (m[5]);
2446 m[6] = swap_workaround (m[6]);
2447 m[7] = swap_workaround (m[7]);
2449 // state buffer (hash)
2462 streebog_g (h, m, s_sbob_sl64);
2473 z[7] = swap_workaround ((u64) (pw_len * 8));
2475 streebog_g (h, z, s_sbob_sl64);
2476 streebog_g (h, m, s_sbob_sl64);
2478 const u32 r0 = l32_from_64 (h[0]);
2479 const u32 r1 = h32_from_64 (h[0]);
2480 const u32 r2 = l32_from_64 (h[1]);
2481 const u32 r3 = h32_from_64 (h[1]);
2483 #include VECT_COMPARE_S
2487 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11800_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2493 const u32 gid = get_global_id (0);
2494 const u32 lid = get_local_id (0);
2498 w[ 0] = pws[gid].i[ 0];
2499 w[ 1] = pws[gid].i[ 1];
2500 w[ 2] = pws[gid].i[ 2];
2501 w[ 3] = pws[gid].i[ 3];
2515 const u32 pw_len = pws[gid].pw_len;
2518 * shared lookup table
2521 const u32 lid4 = lid * 4;
2523 __local u64 s_sbob_sl64[8][256];
2525 s_sbob_sl64[0][lid4 + 0] = sbob_sl64[0][lid4 + 0];
2526 s_sbob_sl64[0][lid4 + 1] = sbob_sl64[0][lid4 + 1];
2527 s_sbob_sl64[0][lid4 + 2] = sbob_sl64[0][lid4 + 2];
2528 s_sbob_sl64[0][lid4 + 3] = sbob_sl64[0][lid4 + 3];
2529 s_sbob_sl64[1][lid4 + 0] = sbob_sl64[1][lid4 + 0];
2530 s_sbob_sl64[1][lid4 + 1] = sbob_sl64[1][lid4 + 1];
2531 s_sbob_sl64[1][lid4 + 2] = sbob_sl64[1][lid4 + 2];
2532 s_sbob_sl64[1][lid4 + 3] = sbob_sl64[1][lid4 + 3];
2533 s_sbob_sl64[2][lid4 + 0] = sbob_sl64[2][lid4 + 0];
2534 s_sbob_sl64[2][lid4 + 1] = sbob_sl64[2][lid4 + 1];
2535 s_sbob_sl64[2][lid4 + 2] = sbob_sl64[2][lid4 + 2];
2536 s_sbob_sl64[2][lid4 + 3] = sbob_sl64[2][lid4 + 3];
2537 s_sbob_sl64[3][lid4 + 0] = sbob_sl64[3][lid4 + 0];
2538 s_sbob_sl64[3][lid4 + 1] = sbob_sl64[3][lid4 + 1];
2539 s_sbob_sl64[3][lid4 + 2] = sbob_sl64[3][lid4 + 2];
2540 s_sbob_sl64[3][lid4 + 3] = sbob_sl64[3][lid4 + 3];
2541 s_sbob_sl64[4][lid4 + 0] = sbob_sl64[4][lid4 + 0];
2542 s_sbob_sl64[4][lid4 + 1] = sbob_sl64[4][lid4 + 1];
2543 s_sbob_sl64[4][lid4 + 2] = sbob_sl64[4][lid4 + 2];
2544 s_sbob_sl64[4][lid4 + 3] = sbob_sl64[4][lid4 + 3];
2545 s_sbob_sl64[5][lid4 + 0] = sbob_sl64[5][lid4 + 0];
2546 s_sbob_sl64[5][lid4 + 1] = sbob_sl64[5][lid4 + 1];
2547 s_sbob_sl64[5][lid4 + 2] = sbob_sl64[5][lid4 + 2];
2548 s_sbob_sl64[5][lid4 + 3] = sbob_sl64[5][lid4 + 3];
2549 s_sbob_sl64[6][lid4 + 0] = sbob_sl64[6][lid4 + 0];
2550 s_sbob_sl64[6][lid4 + 1] = sbob_sl64[6][lid4 + 1];
2551 s_sbob_sl64[6][lid4 + 2] = sbob_sl64[6][lid4 + 2];
2552 s_sbob_sl64[6][lid4 + 3] = sbob_sl64[6][lid4 + 3];
2553 s_sbob_sl64[7][lid4 + 0] = sbob_sl64[7][lid4 + 0];
2554 s_sbob_sl64[7][lid4 + 1] = sbob_sl64[7][lid4 + 1];
2555 s_sbob_sl64[7][lid4 + 2] = sbob_sl64[7][lid4 + 2];
2556 s_sbob_sl64[7][lid4 + 3] = sbob_sl64[7][lid4 + 3];
2558 barrier (CLK_LOCAL_MEM_FENCE);
2560 if (gid >= gid_max) return;
2566 m11800m (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
2569 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11800_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2575 const u32 gid = get_global_id (0);
2576 const u32 lid = get_local_id (0);
2580 w[ 0] = pws[gid].i[ 0];
2581 w[ 1] = pws[gid].i[ 1];
2582 w[ 2] = pws[gid].i[ 2];
2583 w[ 3] = pws[gid].i[ 3];
2584 w[ 4] = pws[gid].i[ 4];
2585 w[ 5] = pws[gid].i[ 5];
2586 w[ 6] = pws[gid].i[ 6];
2587 w[ 7] = pws[gid].i[ 7];
2597 const u32 pw_len = pws[gid].pw_len;
2600 * shared lookup table
2603 const u32 lid4 = lid * 4;
2605 __local u64 s_sbob_sl64[8][256];
2607 s_sbob_sl64[0][lid4 + 0] = sbob_sl64[0][lid4 + 0];
2608 s_sbob_sl64[0][lid4 + 1] = sbob_sl64[0][lid4 + 1];
2609 s_sbob_sl64[0][lid4 + 2] = sbob_sl64[0][lid4 + 2];
2610 s_sbob_sl64[0][lid4 + 3] = sbob_sl64[0][lid4 + 3];
2611 s_sbob_sl64[1][lid4 + 0] = sbob_sl64[1][lid4 + 0];
2612 s_sbob_sl64[1][lid4 + 1] = sbob_sl64[1][lid4 + 1];
2613 s_sbob_sl64[1][lid4 + 2] = sbob_sl64[1][lid4 + 2];
2614 s_sbob_sl64[1][lid4 + 3] = sbob_sl64[1][lid4 + 3];
2615 s_sbob_sl64[2][lid4 + 0] = sbob_sl64[2][lid4 + 0];
2616 s_sbob_sl64[2][lid4 + 1] = sbob_sl64[2][lid4 + 1];
2617 s_sbob_sl64[2][lid4 + 2] = sbob_sl64[2][lid4 + 2];
2618 s_sbob_sl64[2][lid4 + 3] = sbob_sl64[2][lid4 + 3];
2619 s_sbob_sl64[3][lid4 + 0] = sbob_sl64[3][lid4 + 0];
2620 s_sbob_sl64[3][lid4 + 1] = sbob_sl64[3][lid4 + 1];
2621 s_sbob_sl64[3][lid4 + 2] = sbob_sl64[3][lid4 + 2];
2622 s_sbob_sl64[3][lid4 + 3] = sbob_sl64[3][lid4 + 3];
2623 s_sbob_sl64[4][lid4 + 0] = sbob_sl64[4][lid4 + 0];
2624 s_sbob_sl64[4][lid4 + 1] = sbob_sl64[4][lid4 + 1];
2625 s_sbob_sl64[4][lid4 + 2] = sbob_sl64[4][lid4 + 2];
2626 s_sbob_sl64[4][lid4 + 3] = sbob_sl64[4][lid4 + 3];
2627 s_sbob_sl64[5][lid4 + 0] = sbob_sl64[5][lid4 + 0];
2628 s_sbob_sl64[5][lid4 + 1] = sbob_sl64[5][lid4 + 1];
2629 s_sbob_sl64[5][lid4 + 2] = sbob_sl64[5][lid4 + 2];
2630 s_sbob_sl64[5][lid4 + 3] = sbob_sl64[5][lid4 + 3];
2631 s_sbob_sl64[6][lid4 + 0] = sbob_sl64[6][lid4 + 0];
2632 s_sbob_sl64[6][lid4 + 1] = sbob_sl64[6][lid4 + 1];
2633 s_sbob_sl64[6][lid4 + 2] = sbob_sl64[6][lid4 + 2];
2634 s_sbob_sl64[6][lid4 + 3] = sbob_sl64[6][lid4 + 3];
2635 s_sbob_sl64[7][lid4 + 0] = sbob_sl64[7][lid4 + 0];
2636 s_sbob_sl64[7][lid4 + 1] = sbob_sl64[7][lid4 + 1];
2637 s_sbob_sl64[7][lid4 + 2] = sbob_sl64[7][lid4 + 2];
2638 s_sbob_sl64[7][lid4 + 3] = sbob_sl64[7][lid4 + 3];
2640 barrier (CLK_LOCAL_MEM_FENCE);
2642 if (gid >= gid_max) return;
2648 m11800m (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
2651 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11800_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2657 const u32 gid = get_global_id (0);
2658 const u32 lid = get_local_id (0);
2662 w[ 0] = pws[gid].i[ 0];
2663 w[ 1] = pws[gid].i[ 1];
2664 w[ 2] = pws[gid].i[ 2];
2665 w[ 3] = pws[gid].i[ 3];
2666 w[ 4] = pws[gid].i[ 4];
2667 w[ 5] = pws[gid].i[ 5];
2668 w[ 6] = pws[gid].i[ 6];
2669 w[ 7] = pws[gid].i[ 7];
2670 w[ 8] = pws[gid].i[ 8];
2671 w[ 9] = pws[gid].i[ 9];
2672 w[10] = pws[gid].i[10];
2673 w[11] = pws[gid].i[11];
2674 w[12] = pws[gid].i[12];
2675 w[13] = pws[gid].i[13];
2676 w[14] = pws[gid].i[14];
2677 w[15] = pws[gid].i[15];
2679 const u32 pw_len = pws[gid].pw_len;
2682 * shared lookup table
2685 const u32 lid4 = lid * 4;
2687 __local u64 s_sbob_sl64[8][256];
2689 s_sbob_sl64[0][lid4 + 0] = sbob_sl64[0][lid4 + 0];
2690 s_sbob_sl64[0][lid4 + 1] = sbob_sl64[0][lid4 + 1];
2691 s_sbob_sl64[0][lid4 + 2] = sbob_sl64[0][lid4 + 2];
2692 s_sbob_sl64[0][lid4 + 3] = sbob_sl64[0][lid4 + 3];
2693 s_sbob_sl64[1][lid4 + 0] = sbob_sl64[1][lid4 + 0];
2694 s_sbob_sl64[1][lid4 + 1] = sbob_sl64[1][lid4 + 1];
2695 s_sbob_sl64[1][lid4 + 2] = sbob_sl64[1][lid4 + 2];
2696 s_sbob_sl64[1][lid4 + 3] = sbob_sl64[1][lid4 + 3];
2697 s_sbob_sl64[2][lid4 + 0] = sbob_sl64[2][lid4 + 0];
2698 s_sbob_sl64[2][lid4 + 1] = sbob_sl64[2][lid4 + 1];
2699 s_sbob_sl64[2][lid4 + 2] = sbob_sl64[2][lid4 + 2];
2700 s_sbob_sl64[2][lid4 + 3] = sbob_sl64[2][lid4 + 3];
2701 s_sbob_sl64[3][lid4 + 0] = sbob_sl64[3][lid4 + 0];
2702 s_sbob_sl64[3][lid4 + 1] = sbob_sl64[3][lid4 + 1];
2703 s_sbob_sl64[3][lid4 + 2] = sbob_sl64[3][lid4 + 2];
2704 s_sbob_sl64[3][lid4 + 3] = sbob_sl64[3][lid4 + 3];
2705 s_sbob_sl64[4][lid4 + 0] = sbob_sl64[4][lid4 + 0];
2706 s_sbob_sl64[4][lid4 + 1] = sbob_sl64[4][lid4 + 1];
2707 s_sbob_sl64[4][lid4 + 2] = sbob_sl64[4][lid4 + 2];
2708 s_sbob_sl64[4][lid4 + 3] = sbob_sl64[4][lid4 + 3];
2709 s_sbob_sl64[5][lid4 + 0] = sbob_sl64[5][lid4 + 0];
2710 s_sbob_sl64[5][lid4 + 1] = sbob_sl64[5][lid4 + 1];
2711 s_sbob_sl64[5][lid4 + 2] = sbob_sl64[5][lid4 + 2];
2712 s_sbob_sl64[5][lid4 + 3] = sbob_sl64[5][lid4 + 3];
2713 s_sbob_sl64[6][lid4 + 0] = sbob_sl64[6][lid4 + 0];
2714 s_sbob_sl64[6][lid4 + 1] = sbob_sl64[6][lid4 + 1];
2715 s_sbob_sl64[6][lid4 + 2] = sbob_sl64[6][lid4 + 2];
2716 s_sbob_sl64[6][lid4 + 3] = sbob_sl64[6][lid4 + 3];
2717 s_sbob_sl64[7][lid4 + 0] = sbob_sl64[7][lid4 + 0];
2718 s_sbob_sl64[7][lid4 + 1] = sbob_sl64[7][lid4 + 1];
2719 s_sbob_sl64[7][lid4 + 2] = sbob_sl64[7][lid4 + 2];
2720 s_sbob_sl64[7][lid4 + 3] = sbob_sl64[7][lid4 + 3];
2722 barrier (CLK_LOCAL_MEM_FENCE);
2724 if (gid >= gid_max) return;
2730 m11800m (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
2733 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11800_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2739 const u32 gid = get_global_id (0);
2740 const u32 lid = get_local_id (0);
2744 w[ 0] = pws[gid].i[ 0];
2745 w[ 1] = pws[gid].i[ 1];
2746 w[ 2] = pws[gid].i[ 2];
2747 w[ 3] = pws[gid].i[ 3];
2761 const u32 pw_len = pws[gid].pw_len;
2764 * shared lookup table
2767 const u32 lid4 = lid * 4;
2769 __local u64 s_sbob_sl64[8][256];
2771 s_sbob_sl64[0][lid4 + 0] = sbob_sl64[0][lid4 + 0];
2772 s_sbob_sl64[0][lid4 + 1] = sbob_sl64[0][lid4 + 1];
2773 s_sbob_sl64[0][lid4 + 2] = sbob_sl64[0][lid4 + 2];
2774 s_sbob_sl64[0][lid4 + 3] = sbob_sl64[0][lid4 + 3];
2775 s_sbob_sl64[1][lid4 + 0] = sbob_sl64[1][lid4 + 0];
2776 s_sbob_sl64[1][lid4 + 1] = sbob_sl64[1][lid4 + 1];
2777 s_sbob_sl64[1][lid4 + 2] = sbob_sl64[1][lid4 + 2];
2778 s_sbob_sl64[1][lid4 + 3] = sbob_sl64[1][lid4 + 3];
2779 s_sbob_sl64[2][lid4 + 0] = sbob_sl64[2][lid4 + 0];
2780 s_sbob_sl64[2][lid4 + 1] = sbob_sl64[2][lid4 + 1];
2781 s_sbob_sl64[2][lid4 + 2] = sbob_sl64[2][lid4 + 2];
2782 s_sbob_sl64[2][lid4 + 3] = sbob_sl64[2][lid4 + 3];
2783 s_sbob_sl64[3][lid4 + 0] = sbob_sl64[3][lid4 + 0];
2784 s_sbob_sl64[3][lid4 + 1] = sbob_sl64[3][lid4 + 1];
2785 s_sbob_sl64[3][lid4 + 2] = sbob_sl64[3][lid4 + 2];
2786 s_sbob_sl64[3][lid4 + 3] = sbob_sl64[3][lid4 + 3];
2787 s_sbob_sl64[4][lid4 + 0] = sbob_sl64[4][lid4 + 0];
2788 s_sbob_sl64[4][lid4 + 1] = sbob_sl64[4][lid4 + 1];
2789 s_sbob_sl64[4][lid4 + 2] = sbob_sl64[4][lid4 + 2];
2790 s_sbob_sl64[4][lid4 + 3] = sbob_sl64[4][lid4 + 3];
2791 s_sbob_sl64[5][lid4 + 0] = sbob_sl64[5][lid4 + 0];
2792 s_sbob_sl64[5][lid4 + 1] = sbob_sl64[5][lid4 + 1];
2793 s_sbob_sl64[5][lid4 + 2] = sbob_sl64[5][lid4 + 2];
2794 s_sbob_sl64[5][lid4 + 3] = sbob_sl64[5][lid4 + 3];
2795 s_sbob_sl64[6][lid4 + 0] = sbob_sl64[6][lid4 + 0];
2796 s_sbob_sl64[6][lid4 + 1] = sbob_sl64[6][lid4 + 1];
2797 s_sbob_sl64[6][lid4 + 2] = sbob_sl64[6][lid4 + 2];
2798 s_sbob_sl64[6][lid4 + 3] = sbob_sl64[6][lid4 + 3];
2799 s_sbob_sl64[7][lid4 + 0] = sbob_sl64[7][lid4 + 0];
2800 s_sbob_sl64[7][lid4 + 1] = sbob_sl64[7][lid4 + 1];
2801 s_sbob_sl64[7][lid4 + 2] = sbob_sl64[7][lid4 + 2];
2802 s_sbob_sl64[7][lid4 + 3] = sbob_sl64[7][lid4 + 3];
2804 barrier (CLK_LOCAL_MEM_FENCE);
2806 if (gid >= gid_max) return;
2812 m11800s (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
2815 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11800_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2821 const u32 gid = get_global_id (0);
2822 const u32 lid = get_local_id (0);
2826 w[ 0] = pws[gid].i[ 0];
2827 w[ 1] = pws[gid].i[ 1];
2828 w[ 2] = pws[gid].i[ 2];
2829 w[ 3] = pws[gid].i[ 3];
2830 w[ 4] = pws[gid].i[ 4];
2831 w[ 5] = pws[gid].i[ 5];
2832 w[ 6] = pws[gid].i[ 6];
2833 w[ 7] = pws[gid].i[ 7];
2843 const u32 pw_len = pws[gid].pw_len;
2846 * shared lookup table
2849 const u32 lid4 = lid * 4;
2851 __local u64 s_sbob_sl64[8][256];
2853 s_sbob_sl64[0][lid4 + 0] = sbob_sl64[0][lid4 + 0];
2854 s_sbob_sl64[0][lid4 + 1] = sbob_sl64[0][lid4 + 1];
2855 s_sbob_sl64[0][lid4 + 2] = sbob_sl64[0][lid4 + 2];
2856 s_sbob_sl64[0][lid4 + 3] = sbob_sl64[0][lid4 + 3];
2857 s_sbob_sl64[1][lid4 + 0] = sbob_sl64[1][lid4 + 0];
2858 s_sbob_sl64[1][lid4 + 1] = sbob_sl64[1][lid4 + 1];
2859 s_sbob_sl64[1][lid4 + 2] = sbob_sl64[1][lid4 + 2];
2860 s_sbob_sl64[1][lid4 + 3] = sbob_sl64[1][lid4 + 3];
2861 s_sbob_sl64[2][lid4 + 0] = sbob_sl64[2][lid4 + 0];
2862 s_sbob_sl64[2][lid4 + 1] = sbob_sl64[2][lid4 + 1];
2863 s_sbob_sl64[2][lid4 + 2] = sbob_sl64[2][lid4 + 2];
2864 s_sbob_sl64[2][lid4 + 3] = sbob_sl64[2][lid4 + 3];
2865 s_sbob_sl64[3][lid4 + 0] = sbob_sl64[3][lid4 + 0];
2866 s_sbob_sl64[3][lid4 + 1] = sbob_sl64[3][lid4 + 1];
2867 s_sbob_sl64[3][lid4 + 2] = sbob_sl64[3][lid4 + 2];
2868 s_sbob_sl64[3][lid4 + 3] = sbob_sl64[3][lid4 + 3];
2869 s_sbob_sl64[4][lid4 + 0] = sbob_sl64[4][lid4 + 0];
2870 s_sbob_sl64[4][lid4 + 1] = sbob_sl64[4][lid4 + 1];
2871 s_sbob_sl64[4][lid4 + 2] = sbob_sl64[4][lid4 + 2];
2872 s_sbob_sl64[4][lid4 + 3] = sbob_sl64[4][lid4 + 3];
2873 s_sbob_sl64[5][lid4 + 0] = sbob_sl64[5][lid4 + 0];
2874 s_sbob_sl64[5][lid4 + 1] = sbob_sl64[5][lid4 + 1];
2875 s_sbob_sl64[5][lid4 + 2] = sbob_sl64[5][lid4 + 2];
2876 s_sbob_sl64[5][lid4 + 3] = sbob_sl64[5][lid4 + 3];
2877 s_sbob_sl64[6][lid4 + 0] = sbob_sl64[6][lid4 + 0];
2878 s_sbob_sl64[6][lid4 + 1] = sbob_sl64[6][lid4 + 1];
2879 s_sbob_sl64[6][lid4 + 2] = sbob_sl64[6][lid4 + 2];
2880 s_sbob_sl64[6][lid4 + 3] = sbob_sl64[6][lid4 + 3];
2881 s_sbob_sl64[7][lid4 + 0] = sbob_sl64[7][lid4 + 0];
2882 s_sbob_sl64[7][lid4 + 1] = sbob_sl64[7][lid4 + 1];
2883 s_sbob_sl64[7][lid4 + 2] = sbob_sl64[7][lid4 + 2];
2884 s_sbob_sl64[7][lid4 + 3] = sbob_sl64[7][lid4 + 3];
2886 barrier (CLK_LOCAL_MEM_FENCE);
2888 if (gid >= gid_max) return;
2894 m11800s (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
2897 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11800_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2903 const u32 gid = get_global_id (0);
2904 const u32 lid = get_local_id (0);
2908 w[ 0] = pws[gid].i[ 0];
2909 w[ 1] = pws[gid].i[ 1];
2910 w[ 2] = pws[gid].i[ 2];
2911 w[ 3] = pws[gid].i[ 3];
2912 w[ 4] = pws[gid].i[ 4];
2913 w[ 5] = pws[gid].i[ 5];
2914 w[ 6] = pws[gid].i[ 6];
2915 w[ 7] = pws[gid].i[ 7];
2916 w[ 8] = pws[gid].i[ 8];
2917 w[ 9] = pws[gid].i[ 9];
2918 w[10] = pws[gid].i[10];
2919 w[11] = pws[gid].i[11];
2920 w[12] = pws[gid].i[12];
2921 w[13] = pws[gid].i[13];
2922 w[14] = pws[gid].i[14];
2923 w[15] = pws[gid].i[15];
2925 const u32 pw_len = pws[gid].pw_len;
2928 * shared lookup table
2931 const u32 lid4 = lid * 4;
2933 __local u64 s_sbob_sl64[8][256];
2935 s_sbob_sl64[0][lid4 + 0] = sbob_sl64[0][lid4 + 0];
2936 s_sbob_sl64[0][lid4 + 1] = sbob_sl64[0][lid4 + 1];
2937 s_sbob_sl64[0][lid4 + 2] = sbob_sl64[0][lid4 + 2];
2938 s_sbob_sl64[0][lid4 + 3] = sbob_sl64[0][lid4 + 3];
2939 s_sbob_sl64[1][lid4 + 0] = sbob_sl64[1][lid4 + 0];
2940 s_sbob_sl64[1][lid4 + 1] = sbob_sl64[1][lid4 + 1];
2941 s_sbob_sl64[1][lid4 + 2] = sbob_sl64[1][lid4 + 2];
2942 s_sbob_sl64[1][lid4 + 3] = sbob_sl64[1][lid4 + 3];
2943 s_sbob_sl64[2][lid4 + 0] = sbob_sl64[2][lid4 + 0];
2944 s_sbob_sl64[2][lid4 + 1] = sbob_sl64[2][lid4 + 1];
2945 s_sbob_sl64[2][lid4 + 2] = sbob_sl64[2][lid4 + 2];
2946 s_sbob_sl64[2][lid4 + 3] = sbob_sl64[2][lid4 + 3];
2947 s_sbob_sl64[3][lid4 + 0] = sbob_sl64[3][lid4 + 0];
2948 s_sbob_sl64[3][lid4 + 1] = sbob_sl64[3][lid4 + 1];
2949 s_sbob_sl64[3][lid4 + 2] = sbob_sl64[3][lid4 + 2];
2950 s_sbob_sl64[3][lid4 + 3] = sbob_sl64[3][lid4 + 3];
2951 s_sbob_sl64[4][lid4 + 0] = sbob_sl64[4][lid4 + 0];
2952 s_sbob_sl64[4][lid4 + 1] = sbob_sl64[4][lid4 + 1];
2953 s_sbob_sl64[4][lid4 + 2] = sbob_sl64[4][lid4 + 2];
2954 s_sbob_sl64[4][lid4 + 3] = sbob_sl64[4][lid4 + 3];
2955 s_sbob_sl64[5][lid4 + 0] = sbob_sl64[5][lid4 + 0];
2956 s_sbob_sl64[5][lid4 + 1] = sbob_sl64[5][lid4 + 1];
2957 s_sbob_sl64[5][lid4 + 2] = sbob_sl64[5][lid4 + 2];
2958 s_sbob_sl64[5][lid4 + 3] = sbob_sl64[5][lid4 + 3];
2959 s_sbob_sl64[6][lid4 + 0] = sbob_sl64[6][lid4 + 0];
2960 s_sbob_sl64[6][lid4 + 1] = sbob_sl64[6][lid4 + 1];
2961 s_sbob_sl64[6][lid4 + 2] = sbob_sl64[6][lid4 + 2];
2962 s_sbob_sl64[6][lid4 + 3] = sbob_sl64[6][lid4 + 3];
2963 s_sbob_sl64[7][lid4 + 0] = sbob_sl64[7][lid4 + 0];
2964 s_sbob_sl64[7][lid4 + 1] = sbob_sl64[7][lid4 + 1];
2965 s_sbob_sl64[7][lid4 + 2] = sbob_sl64[7][lid4 + 2];
2966 s_sbob_sl64[7][lid4 + 3] = sbob_sl64[7][lid4 + 3];
2968 barrier (CLK_LOCAL_MEM_FENCE);
2970 if (gid >= gid_max) return;
2976 m11800s (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);