2 * Authors.....: Jens Steube <jens.steube@gmail.com>
3 * Gabriele Gristina <matrix@hashcat.net>
4 * magnum <john.magnum@hushmail.com>
13 #include "include/constants.h"
14 #include "include/kernel_vendor.h"
21 #include "include/kernel_functions.c"
22 #include "OpenCL/types_ocl.c"
23 #include "OpenCL/common.c"
24 #include "include/rp_kernel.h"
25 #include "OpenCL/rp.c"
26 #include "OpenCL/simd.c"
28 #define INITVAL 0x0101010101010101
31 #define BOX(S,n,i) (S)[(n)][(i)]
33 #define BOX(S,n,i) (u64x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
35 #define BOX(S,n,i) (u64x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
37 #define BOX(S,n,i) (u64x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
39 #define BOX(S,n,i) (u64x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
42 #define SBOG_LPSti64 \
43 BOX (s_sbob_sl64, 0, ((t[0] >> (i * 8)) & 0xff)) ^ \
44 BOX (s_sbob_sl64, 1, ((t[1] >> (i * 8)) & 0xff)) ^ \
45 BOX (s_sbob_sl64, 2, ((t[2] >> (i * 8)) & 0xff)) ^ \
46 BOX (s_sbob_sl64, 3, ((t[3] >> (i * 8)) & 0xff)) ^ \
47 BOX (s_sbob_sl64, 4, ((t[4] >> (i * 8)) & 0xff)) ^ \
48 BOX (s_sbob_sl64, 5, ((t[5] >> (i * 8)) & 0xff)) ^ \
49 BOX (s_sbob_sl64, 6, ((t[6] >> (i * 8)) & 0xff)) ^ \
50 BOX (s_sbob_sl64, 7, ((t[7] >> (i * 8)) & 0xff))
54 __constant u64 sbob_sl64[8][256] =
2122 __constant u64 sbob_rc64[12][8] =
2246 static void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256])
2253 for (int i = 0; i < 8; i++)
2258 for (int i = 0; i < 8; i++)
2260 k[i] = SBOG_LPSti64;
2264 for (int i = 0; i < 8; i++)
2269 for (int r = 0; r < 12; r++)
2272 for (int i = 0; i < 8; i++)
2278 for (int i = 0; i < 8; i++)
2280 s[i] = SBOG_LPSti64;
2283 for (int i = 0; i < 8; i++)
2285 t[i] = k[i] ^ sbob_rc64[r][i];
2289 for (int i = 0; i < 8; i++)
2291 k[i] = SBOG_LPSti64;
2296 for (int i = 0; i < 8; i++)
2298 h[i] ^= s[i] ^ k[i] ^ m[i];
2302 __kernel void m11700_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2308 const u32 gid = get_global_id (0);
2309 const u32 lid = get_local_id (0);
2310 const u32 lsz = get_local_size (0);
2313 * shared lookup table
2316 __local u64 s_sbob_sl64[8][256];
2318 for (u32 i = lid; i < 256; i += lsz)
2320 s_sbob_sl64[0][i] = sbob_sl64[0][i];
2321 s_sbob_sl64[1][i] = sbob_sl64[1][i];
2322 s_sbob_sl64[2][i] = sbob_sl64[2][i];
2323 s_sbob_sl64[3][i] = sbob_sl64[3][i];
2324 s_sbob_sl64[4][i] = sbob_sl64[4][i];
2325 s_sbob_sl64[5][i] = sbob_sl64[5][i];
2326 s_sbob_sl64[6][i] = sbob_sl64[6][i];
2327 s_sbob_sl64[7][i] = sbob_sl64[7][i];
2330 barrier (CLK_LOCAL_MEM_FENCE);
2332 if (gid >= gid_max) return;
2340 pw_buf0[0] = pws[gid].i[ 0];
2341 pw_buf0[1] = pws[gid].i[ 1];
2342 pw_buf0[2] = pws[gid].i[ 2];
2343 pw_buf0[3] = pws[gid].i[ 3];
2347 pw_buf1[0] = pws[gid].i[ 4];
2348 pw_buf1[1] = pws[gid].i[ 5];
2349 pw_buf1[2] = pws[gid].i[ 6];
2350 pw_buf1[3] = pws[gid].i[ 7];
2352 const u32 pw_len = pws[gid].pw_len;
2358 for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
2365 const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
2367 append_0x01_2x4_VV (w0, w1, out_len);
2389 * reverse message block
2394 m[0] = hl32_to_64 (w[15], w[14]);
2395 m[1] = hl32_to_64 (w[13], w[12]);
2396 m[2] = hl32_to_64 (w[11], w[10]);
2397 m[3] = hl32_to_64 (w[ 9], w[ 8]);
2398 m[4] = hl32_to_64 (w[ 7], w[ 6]);
2399 m[5] = hl32_to_64 (w[ 5], w[ 4]);
2400 m[6] = hl32_to_64 (w[ 3], w[ 2]);
2401 m[7] = hl32_to_64 (w[ 1], w[ 0]);
2403 m[0] = swap64 (m[0]);
2404 m[1] = swap64 (m[1]);
2405 m[2] = swap64 (m[2]);
2406 m[3] = swap64 (m[3]);
2407 m[4] = swap64 (m[4]);
2408 m[5] = swap64 (m[5]);
2409 m[6] = swap64 (m[6]);
2410 m[7] = swap64 (m[7]);
2412 // state buffer (hash)
2425 streebog_g (h, m, s_sbob_sl64);
2436 z[7] = swap64 ((u64) (out_len * 8)); // maybe a bug
2438 streebog_g (h, z, s_sbob_sl64);
2439 streebog_g (h, m, s_sbob_sl64);
2441 const u32x r0 = l32_from_64 (h[0]);
2442 const u32x r1 = h32_from_64 (h[0]);
2443 const u32x r2 = l32_from_64 (h[1]);
2444 const u32x r3 = h32_from_64 (h[1]);
2446 COMPARE_M_SIMD (r0, r1, r2, r3);
2450 __kernel void m11700_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2454 __kernel void m11700_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2458 __kernel void m11700_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2464 const u32 gid = get_global_id (0);
2465 const u32 lid = get_local_id (0);
2466 const u32 lsz = get_local_size (0);
2469 * shared lookup table
2472 __local u64 s_sbob_sl64[8][256];
2474 for (u32 i = lid; i < 256; i += lsz)
2476 s_sbob_sl64[0][i] = sbob_sl64[0][i];
2477 s_sbob_sl64[1][i] = sbob_sl64[1][i];
2478 s_sbob_sl64[2][i] = sbob_sl64[2][i];
2479 s_sbob_sl64[3][i] = sbob_sl64[3][i];
2480 s_sbob_sl64[4][i] = sbob_sl64[4][i];
2481 s_sbob_sl64[5][i] = sbob_sl64[5][i];
2482 s_sbob_sl64[6][i] = sbob_sl64[6][i];
2483 s_sbob_sl64[7][i] = sbob_sl64[7][i];
2486 barrier (CLK_LOCAL_MEM_FENCE);
2488 if (gid >= gid_max) return;
2496 pw_buf0[0] = pws[gid].i[ 0];
2497 pw_buf0[1] = pws[gid].i[ 1];
2498 pw_buf0[2] = pws[gid].i[ 2];
2499 pw_buf0[3] = pws[gid].i[ 3];
2503 pw_buf1[0] = pws[gid].i[ 4];
2504 pw_buf1[1] = pws[gid].i[ 5];
2505 pw_buf1[2] = pws[gid].i[ 6];
2506 pw_buf1[3] = pws[gid].i[ 7];
2508 const u32 pw_len = pws[gid].pw_len;
2514 const u32 search[4] =
2516 digests_buf[digests_offset].digest_buf[DGST_R0],
2517 digests_buf[digests_offset].digest_buf[DGST_R1],
2518 digests_buf[digests_offset].digest_buf[DGST_R2],
2519 digests_buf[digests_offset].digest_buf[DGST_R3]
2526 for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
2533 const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
2535 append_0x01_2x4_VV (w0, w1, out_len);
2557 * reverse message block
2562 m[0] = hl32_to_64 (w[15], w[14]);
2563 m[1] = hl32_to_64 (w[13], w[12]);
2564 m[2] = hl32_to_64 (w[11], w[10]);
2565 m[3] = hl32_to_64 (w[ 9], w[ 8]);
2566 m[4] = hl32_to_64 (w[ 7], w[ 6]);
2567 m[5] = hl32_to_64 (w[ 5], w[ 4]);
2568 m[6] = hl32_to_64 (w[ 3], w[ 2]);
2569 m[7] = hl32_to_64 (w[ 1], w[ 0]);
2571 m[0] = swap64 (m[0]);
2572 m[1] = swap64 (m[1]);
2573 m[2] = swap64 (m[2]);
2574 m[3] = swap64 (m[3]);
2575 m[4] = swap64 (m[4]);
2576 m[5] = swap64 (m[5]);
2577 m[6] = swap64 (m[6]);
2578 m[7] = swap64 (m[7]);
2580 // state buffer (hash)
2593 streebog_g (h, m, s_sbob_sl64);
2604 z[7] = swap64 ((u64) (out_len * 8)); // maybe a bug
2606 streebog_g (h, z, s_sbob_sl64);
2607 streebog_g (h, m, s_sbob_sl64);
2609 const u32x r0 = l32_from_64 (h[0]);
2610 const u32x r1 = h32_from_64 (h[0]);
2611 const u32x r2 = l32_from_64 (h[1]);
2612 const u32x r3 = h32_from_64 (h[1]);
2614 COMPARE_S_SIMD (r0, r1, r2, r3);
2618 __kernel void m11700_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2622 __kernel void m11700_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)