2 * Authors.....: Jens Steube <jens.steube@gmail.com>
3 * Gabriele Gristina <matrix@hashcat.net>
4 * magnum <john.magnum@hushmail.com>
11 //too much register pressure
12 //#define NEW_SIMD_CODE
14 #include "include/constants.h"
15 #include "include/kernel_vendor.h"
22 #include "include/kernel_functions.c"
23 #include "OpenCL/types_ocl.c"
24 #include "OpenCL/common.c"
25 #include "OpenCL/simd.c"
30 #define BOX(S,n,i) (S)[(n)][(i)]
32 #define BOX(S,n,i) (u64x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
34 #define BOX(S,n,i) (u64x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
36 #define BOX(S,n,i) (u64x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
38 #define BOX(S,n,i) (u64x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
41 #define SBOG_LPSti64 \
42 BOX (s_sbob_sl64, 0, ((t[0] >> (i * 8)) & 0xff)) ^ \
43 BOX (s_sbob_sl64, 1, ((t[1] >> (i * 8)) & 0xff)) ^ \
44 BOX (s_sbob_sl64, 2, ((t[2] >> (i * 8)) & 0xff)) ^ \
45 BOX (s_sbob_sl64, 3, ((t[3] >> (i * 8)) & 0xff)) ^ \
46 BOX (s_sbob_sl64, 4, ((t[4] >> (i * 8)) & 0xff)) ^ \
47 BOX (s_sbob_sl64, 5, ((t[5] >> (i * 8)) & 0xff)) ^ \
48 BOX (s_sbob_sl64, 6, ((t[6] >> (i * 8)) & 0xff)) ^ \
49 BOX (s_sbob_sl64, 7, ((t[7] >> (i * 8)) & 0xff))
53 __constant u64 sbob_sl64[8][256] =
2121 __constant u64 sbob_rc64[12][8] =
2245 void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256])
2254 for (int i = 0; i < 8; i++)
2259 for (int i = 0; i < 8; i++)
2261 k[i] = SBOG_LPSti64;
2267 for (int i = 0; i < 8; i++)
2272 for (int r = 0; r < 12; r++)
2277 for (int i = 0; i < 8; i++)
2285 for (int i = 0; i < 8; i++)
2287 s[i] = SBOG_LPSti64;
2290 for (int i = 0; i < 8; i++)
2292 t[i] = k[i] ^ sbob_rc64[r][i];
2298 for (int i = 0; i < 8; i++)
2300 k[i] = SBOG_LPSti64;
2307 for (int i = 0; i < 8; i++)
2309 h[i] ^= s[i] ^ k[i] ^ m[i];
2313 void m11800m (__local u64 (*s_sbob_sl64)[256], u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
2319 const u32 gid = get_global_id (0);
2320 const u32 lid = get_local_id (0);
2328 for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
2330 const u32x w0r = ix_create_bft (bfs_buf, il_pos);
2332 const u32x w0lr = w0l | w0r;
2340 m[0] = hl32_to_64 (w[15], w[14]);
2341 m[1] = hl32_to_64 (w[13], w[12]);
2342 m[2] = hl32_to_64 (w[11], w[10]);
2343 m[3] = hl32_to_64 (w[ 9], w[ 8]);
2344 m[4] = hl32_to_64 (w[ 7], w[ 6]);
2345 m[5] = hl32_to_64 (w[ 5], w[ 4]);
2346 m[6] = hl32_to_64 (w[ 3], w[ 2]);
2347 m[7] = hl32_to_64 (w[ 1], w0lr );
2349 m[0] = swap64 (m[0]);
2350 m[1] = swap64 (m[1]);
2351 m[2] = swap64 (m[2]);
2352 m[3] = swap64 (m[3]);
2353 m[4] = swap64 (m[4]);
2354 m[5] = swap64 (m[5]);
2355 m[6] = swap64 (m[6]);
2356 m[7] = swap64 (m[7]);
2358 // state buffer (hash)
2371 streebog_g (h, m, s_sbob_sl64);
2382 z[7] = swap64 ((u64) (pw_len * 8));
2384 streebog_g (h, z, s_sbob_sl64);
2385 streebog_g (h, m, s_sbob_sl64);
2387 const u32x r0 = l32_from_64 (h[0]);
2388 const u32x r1 = h32_from_64 (h[0]);
2389 const u32x r2 = l32_from_64 (h[1]);
2390 const u32x r3 = h32_from_64 (h[1]);
2392 COMPARE_M_SIMD (r0, r1, r2, r3);
2396 void m11800s (__local u64 (*s_sbob_sl64)[256], u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
2402 const u32 gid = get_global_id (0);
2403 const u32 lid = get_local_id (0);
2409 const u32 search[4] =
2411 digests_buf[digests_offset].digest_buf[DGST_R0],
2412 digests_buf[digests_offset].digest_buf[DGST_R1],
2413 digests_buf[digests_offset].digest_buf[DGST_R2],
2414 digests_buf[digests_offset].digest_buf[DGST_R3]
2423 for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
2425 const u32x w0r = ix_create_bft (bfs_buf, il_pos);
2427 const u32x w0lr = w0l | w0r;
2435 m[0] = hl32_to_64 (w[15], w[14]);
2436 m[1] = hl32_to_64 (w[13], w[12]);
2437 m[2] = hl32_to_64 (w[11], w[10]);
2438 m[3] = hl32_to_64 (w[ 9], w[ 8]);
2439 m[4] = hl32_to_64 (w[ 7], w[ 6]);
2440 m[5] = hl32_to_64 (w[ 5], w[ 4]);
2441 m[6] = hl32_to_64 (w[ 3], w[ 2]);
2442 m[7] = hl32_to_64 (w[ 1], w0lr );
2444 m[0] = swap64 (m[0]);
2445 m[1] = swap64 (m[1]);
2446 m[2] = swap64 (m[2]);
2447 m[3] = swap64 (m[3]);
2448 m[4] = swap64 (m[4]);
2449 m[5] = swap64 (m[5]);
2450 m[6] = swap64 (m[6]);
2451 m[7] = swap64 (m[7]);
2453 // state buffer (hash)
2466 streebog_g (h, m, s_sbob_sl64);
2477 z[7] = swap64 ((u64) (pw_len * 8));
2479 streebog_g (h, z, s_sbob_sl64);
2480 streebog_g (h, m, s_sbob_sl64);
2482 const u32x r0 = l32_from_64 (h[0]);
2483 const u32x r1 = h32_from_64 (h[0]);
2484 const u32x r2 = l32_from_64 (h[1]);
2485 const u32x r3 = h32_from_64 (h[1]);
2487 COMPARE_S_SIMD (r0, r1, r2, r3);
2491 __kernel void m11800_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2497 const u32 gid = get_global_id (0);
2498 const u32 lid = get_local_id (0);
2499 const u32 lsz = get_local_size (0);
2502 * shared lookup table
2505 __local u64 s_sbob_sl64[8][256];
2507 for (u32 i = lid; i < 256; i += lsz)
2509 s_sbob_sl64[0][i] = sbob_sl64[0][i];
2510 s_sbob_sl64[1][i] = sbob_sl64[1][i];
2511 s_sbob_sl64[2][i] = sbob_sl64[2][i];
2512 s_sbob_sl64[3][i] = sbob_sl64[3][i];
2513 s_sbob_sl64[4][i] = sbob_sl64[4][i];
2514 s_sbob_sl64[5][i] = sbob_sl64[5][i];
2515 s_sbob_sl64[6][i] = sbob_sl64[6][i];
2516 s_sbob_sl64[7][i] = sbob_sl64[7][i];
2519 barrier (CLK_LOCAL_MEM_FENCE);
2521 if (gid >= gid_max) return;
2529 w[ 0] = pws[gid].i[ 0];
2530 w[ 1] = pws[gid].i[ 1];
2531 w[ 2] = pws[gid].i[ 2];
2532 w[ 3] = pws[gid].i[ 3];
2546 const u32 pw_len = pws[gid].pw_len;
2552 m11800m (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
2555 __kernel void m11800_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2561 const u32 gid = get_global_id (0);
2562 const u32 lid = get_local_id (0);
2563 const u32 lsz = get_local_size (0);
2566 * shared lookup table
2569 __local u64 s_sbob_sl64[8][256];
2571 for (u32 i = lid; i < 256; i += lsz)
2573 s_sbob_sl64[0][i] = sbob_sl64[0][i];
2574 s_sbob_sl64[1][i] = sbob_sl64[1][i];
2575 s_sbob_sl64[2][i] = sbob_sl64[2][i];
2576 s_sbob_sl64[3][i] = sbob_sl64[3][i];
2577 s_sbob_sl64[4][i] = sbob_sl64[4][i];
2578 s_sbob_sl64[5][i] = sbob_sl64[5][i];
2579 s_sbob_sl64[6][i] = sbob_sl64[6][i];
2580 s_sbob_sl64[7][i] = sbob_sl64[7][i];
2583 barrier (CLK_LOCAL_MEM_FENCE);
2585 if (gid >= gid_max) return;
2593 w[ 0] = pws[gid].i[ 0];
2594 w[ 1] = pws[gid].i[ 1];
2595 w[ 2] = pws[gid].i[ 2];
2596 w[ 3] = pws[gid].i[ 3];
2597 w[ 4] = pws[gid].i[ 4];
2598 w[ 5] = pws[gid].i[ 5];
2599 w[ 6] = pws[gid].i[ 6];
2600 w[ 7] = pws[gid].i[ 7];
2610 const u32 pw_len = pws[gid].pw_len;
2616 m11800m (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
2619 __kernel void m11800_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2625 const u32 gid = get_global_id (0);
2626 const u32 lid = get_local_id (0);
2627 const u32 lsz = get_local_size (0);
2630 * shared lookup table
2633 __local u64 s_sbob_sl64[8][256];
2635 for (u32 i = lid; i < 256; i += lsz)
2637 s_sbob_sl64[0][i] = sbob_sl64[0][i];
2638 s_sbob_sl64[1][i] = sbob_sl64[1][i];
2639 s_sbob_sl64[2][i] = sbob_sl64[2][i];
2640 s_sbob_sl64[3][i] = sbob_sl64[3][i];
2641 s_sbob_sl64[4][i] = sbob_sl64[4][i];
2642 s_sbob_sl64[5][i] = sbob_sl64[5][i];
2643 s_sbob_sl64[6][i] = sbob_sl64[6][i];
2644 s_sbob_sl64[7][i] = sbob_sl64[7][i];
2647 barrier (CLK_LOCAL_MEM_FENCE);
2649 if (gid >= gid_max) return;
2657 w[ 0] = pws[gid].i[ 0];
2658 w[ 1] = pws[gid].i[ 1];
2659 w[ 2] = pws[gid].i[ 2];
2660 w[ 3] = pws[gid].i[ 3];
2661 w[ 4] = pws[gid].i[ 4];
2662 w[ 5] = pws[gid].i[ 5];
2663 w[ 6] = pws[gid].i[ 6];
2664 w[ 7] = pws[gid].i[ 7];
2665 w[ 8] = pws[gid].i[ 8];
2666 w[ 9] = pws[gid].i[ 9];
2667 w[10] = pws[gid].i[10];
2668 w[11] = pws[gid].i[11];
2669 w[12] = pws[gid].i[12];
2670 w[13] = pws[gid].i[13];
2671 w[14] = pws[gid].i[14];
2672 w[15] = pws[gid].i[15];
2674 const u32 pw_len = pws[gid].pw_len;
2680 m11800m (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
2683 __kernel void m11800_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2689 const u32 gid = get_global_id (0);
2690 const u32 lid = get_local_id (0);
2691 const u32 lsz = get_local_size (0);
2694 * shared lookup table
2697 __local u64 s_sbob_sl64[8][256];
2699 for (u32 i = lid; i < 256; i += lsz)
2701 s_sbob_sl64[0][i] = sbob_sl64[0][i];
2702 s_sbob_sl64[1][i] = sbob_sl64[1][i];
2703 s_sbob_sl64[2][i] = sbob_sl64[2][i];
2704 s_sbob_sl64[3][i] = sbob_sl64[3][i];
2705 s_sbob_sl64[4][i] = sbob_sl64[4][i];
2706 s_sbob_sl64[5][i] = sbob_sl64[5][i];
2707 s_sbob_sl64[6][i] = sbob_sl64[6][i];
2708 s_sbob_sl64[7][i] = sbob_sl64[7][i];
2711 barrier (CLK_LOCAL_MEM_FENCE);
2713 if (gid >= gid_max) return;
2721 w[ 0] = pws[gid].i[ 0];
2722 w[ 1] = pws[gid].i[ 1];
2723 w[ 2] = pws[gid].i[ 2];
2724 w[ 3] = pws[gid].i[ 3];
2738 const u32 pw_len = pws[gid].pw_len;
2744 m11800s (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
2747 __kernel void m11800_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2753 const u32 gid = get_global_id (0);
2754 const u32 lid = get_local_id (0);
2755 const u32 lsz = get_local_size (0);
2758 * shared lookup table
2761 __local u64 s_sbob_sl64[8][256];
2763 for (u32 i = lid; i < 256; i += lsz)
2765 s_sbob_sl64[0][i] = sbob_sl64[0][i];
2766 s_sbob_sl64[1][i] = sbob_sl64[1][i];
2767 s_sbob_sl64[2][i] = sbob_sl64[2][i];
2768 s_sbob_sl64[3][i] = sbob_sl64[3][i];
2769 s_sbob_sl64[4][i] = sbob_sl64[4][i];
2770 s_sbob_sl64[5][i] = sbob_sl64[5][i];
2771 s_sbob_sl64[6][i] = sbob_sl64[6][i];
2772 s_sbob_sl64[7][i] = sbob_sl64[7][i];
2775 barrier (CLK_LOCAL_MEM_FENCE);
2777 if (gid >= gid_max) return;
2785 w[ 0] = pws[gid].i[ 0];
2786 w[ 1] = pws[gid].i[ 1];
2787 w[ 2] = pws[gid].i[ 2];
2788 w[ 3] = pws[gid].i[ 3];
2789 w[ 4] = pws[gid].i[ 4];
2790 w[ 5] = pws[gid].i[ 5];
2791 w[ 6] = pws[gid].i[ 6];
2792 w[ 7] = pws[gid].i[ 7];
2802 const u32 pw_len = pws[gid].pw_len;
2808 m11800s (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
2811 __kernel void m11800_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2817 const u32 gid = get_global_id (0);
2818 const u32 lid = get_local_id (0);
2819 const u32 lsz = get_local_size (0);
2822 * shared lookup table
2825 __local u64 s_sbob_sl64[8][256];
2827 for (u32 i = lid; i < 256; i += lsz)
2829 s_sbob_sl64[0][i] = sbob_sl64[0][i];
2830 s_sbob_sl64[1][i] = sbob_sl64[1][i];
2831 s_sbob_sl64[2][i] = sbob_sl64[2][i];
2832 s_sbob_sl64[3][i] = sbob_sl64[3][i];
2833 s_sbob_sl64[4][i] = sbob_sl64[4][i];
2834 s_sbob_sl64[5][i] = sbob_sl64[5][i];
2835 s_sbob_sl64[6][i] = sbob_sl64[6][i];
2836 s_sbob_sl64[7][i] = sbob_sl64[7][i];
2839 barrier (CLK_LOCAL_MEM_FENCE);
2841 if (gid >= gid_max) return;
2849 w[ 0] = pws[gid].i[ 0];
2850 w[ 1] = pws[gid].i[ 1];
2851 w[ 2] = pws[gid].i[ 2];
2852 w[ 3] = pws[gid].i[ 3];
2853 w[ 4] = pws[gid].i[ 4];
2854 w[ 5] = pws[gid].i[ 5];
2855 w[ 6] = pws[gid].i[ 6];
2856 w[ 7] = pws[gid].i[ 7];
2857 w[ 8] = pws[gid].i[ 8];
2858 w[ 9] = pws[gid].i[ 9];
2859 w[10] = pws[gid].i[10];
2860 w[11] = pws[gid].i[11];
2861 w[12] = pws[gid].i[12];
2862 w[13] = pws[gid].i[13];
2863 w[14] = pws[gid].i[14];
2864 w[15] = pws[gid].i[15];
2866 const u32 pw_len = pws[gid].pw_len;
2872 m11800s (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);