2 * Author......: Jens Steube <jens.steube@gmail.com>
9 //#define NEW_SIMD_CODE
11 #include "include/constants.h"
12 #include "include/kernel_vendor.h"
19 #include "include/kernel_functions.c"
20 #include "OpenCL/types_ocl.c"
21 #include "OpenCL/common.c"
22 #include "include/rp_kernel.h"
23 #include "OpenCL/rp.c"
24 #include "OpenCL/simd.c"
26 __kernel void m00200_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
32 const u32 lid = get_local_id (0);
38 const u32 gid = get_global_id (0);
40 if (gid >= gid_max) return;
44 pw_buf0[0] = pws[gid].i[ 0];
45 pw_buf0[1] = pws[gid].i[ 1];
46 pw_buf0[2] = pws[gid].i[ 2];
47 pw_buf0[3] = pws[gid].i[ 3];
51 pw_buf1[0] = pws[gid].i[ 4];
52 pw_buf1[1] = pws[gid].i[ 5];
53 pw_buf1[2] = pws[gid].i[ 6];
54 pw_buf1[3] = pws[gid].i[ 7];
56 const u32 pw_len = pws[gid].pw_len;
62 for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
69 const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
99 a ^= (((a & 0x3f) + add) * (v)) + (a << 8); \
107 for (i = 0, j = 0; i <= (int) out_len - 4; i += 4, j += 1)
109 const u32x wj = w_t[j];
111 ROUND ((wj >> 0) & 0xff);
112 ROUND ((wj >> 8) & 0xff);
113 ROUND ((wj >> 16) & 0xff);
114 ROUND ((wj >> 24) & 0xff);
117 const u32x wj = w_t[j];
119 const u32 left = out_len - i;
123 ROUND ((wj >> 0) & 0xff);
124 ROUND ((wj >> 8) & 0xff);
125 ROUND ((wj >> 16) & 0xff);
129 ROUND ((wj >> 0) & 0xff);
130 ROUND ((wj >> 8) & 0xff);
134 ROUND ((wj >> 0) & 0xff);
140 COMPARE_M_SIMD (a, b, c, d);
144 __kernel void m00200_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
148 __kernel void m00200_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
152 __kernel void m00200_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
158 const u32 lid = get_local_id (0);
164 const u32 gid = get_global_id (0);
166 if (gid >= gid_max) return;
170 pw_buf0[0] = pws[gid].i[ 0];
171 pw_buf0[1] = pws[gid].i[ 1];
172 pw_buf0[2] = pws[gid].i[ 2];
173 pw_buf0[3] = pws[gid].i[ 3];
177 pw_buf1[0] = pws[gid].i[ 4];
178 pw_buf1[1] = pws[gid].i[ 5];
179 pw_buf1[2] = pws[gid].i[ 6];
180 pw_buf1[3] = pws[gid].i[ 7];
182 const u32 pw_len = pws[gid].pw_len;
188 const u32 search[4] =
190 digests_buf[digests_offset].digest_buf[DGST_R0],
191 digests_buf[digests_offset].digest_buf[DGST_R1],
192 digests_buf[digests_offset].digest_buf[DGST_R2],
193 digests_buf[digests_offset].digest_buf[DGST_R3]
200 for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
207 const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
237 a ^= (((a & 0x3f) + add) * (v)) + (a << 8); \
245 for (i = 0, j = 0; i <= (int) out_len - 4; i += 4, j += 1)
247 const u32x wj = w_t[j];
249 ROUND ((wj >> 0) & 0xff);
250 ROUND ((wj >> 8) & 0xff);
251 ROUND ((wj >> 16) & 0xff);
252 ROUND ((wj >> 24) & 0xff);
255 const u32x wj = w_t[j];
257 const u32 left = out_len - i;
261 ROUND ((wj >> 0) & 0xff);
262 ROUND ((wj >> 8) & 0xff);
263 ROUND ((wj >> 16) & 0xff);
267 ROUND ((wj >> 0) & 0xff);
268 ROUND ((wj >> 8) & 0xff);
272 ROUND ((wj >> 0) & 0xff);
278 COMPARE_S_SIMD (a, b, c, d);
282 __kernel void m00200_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
286 __kernel void m00200_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)