2 * Author......: Jens Steube <jens.steube@gmail.com>
10 #include "include/constants.h"
11 #include "include/kernel_vendor.h"
18 #include "include/kernel_functions.c"
19 #include "OpenCL/types_ocl.c"
20 #include "OpenCL/common.c"
21 #include "OpenCL/simd.c"
23 #define SIPROUND(v0,v1,v2,v3) \
25 (v1) = rotl64 ((v1), 13); \
27 (v0) = as_ulong (as_uint2 ((v0)).s10); \
29 (v3) = rotl64 ((v3), 16); \
32 (v3) = rotl64 ((v3), 21); \
35 (v1) = rotl64 ((v1), 17); \
37 (v2) = as_ulong (as_uint2 ((v2)).s10);
39 __kernel void m10100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
45 const u32 lid = get_local_id (0);
51 const u32 gid = get_global_id (0);
53 if (gid >= gid_max) return;
58 pws0[0] = pws[gid].i[0];
59 pws0[1] = pws[gid].i[1];
60 pws0[2] = pws[gid].i[2];
61 pws0[3] = pws[gid].i[3];
62 pws1[0] = pws[gid].i[4];
63 pws1[1] = pws[gid].i[5];
64 pws1[2] = pws[gid].i[6];
65 pws1[3] = pws[gid].i[7];
67 const u32 pw_l_len = pws[gid].pw_len;
78 v0p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[1], salt_bufs[salt_pos].salt_buf[0]);
79 v1p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[3], salt_bufs[salt_pos].salt_buf[2]);
80 v2p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[1], salt_bufs[salt_pos].salt_buf[0]);
81 v3p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[3], salt_bufs[salt_pos].salt_buf[2]);
87 for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
89 const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
91 const u32x pw_len = pw_l_len + pw_r_len;
93 u32x wordr0[4] = { 0 };
94 u32x wordr1[4] = { 0 };
95 u32x wordr2[4] = { 0 };
96 u32x wordr3[4] = { 0 };
98 wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
99 wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
100 wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
101 wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
102 wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
103 wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
104 wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
105 wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
107 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
109 switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
114 w[ 0] = wordl0[0] | wordr0[0];
115 w[ 1] = wordl0[1] | wordr0[1];
116 w[ 2] = wordl0[2] | wordr0[2];
117 w[ 3] = wordl0[3] | wordr0[3];
118 w[ 4] = wordl1[0] | wordr1[0];
119 w[ 5] = wordl1[1] | wordr1[1];
120 w[ 6] = wordl1[2] | wordr1[2];
121 w[ 7] = wordl1[3] | wordr1[3];
122 w[ 8] = wordl2[0] | wordr2[0];
123 w[ 9] = wordl2[1] | wordr2[1];
124 w[10] = wordl2[2] | wordr2[2];
125 w[11] = wordl2[3] | wordr2[3];
126 w[12] = wordl3[0] | wordr3[0];
127 w[13] = wordl3[1] | wordr3[1];
128 w[14] = wordl3[2] | wordr3[2];
129 w[15] = wordl3[3] | wordr3[3];
131 u64 *w_ptr = (u64 *) w;
133 w_ptr[pw_len / 8] |= (u64) pw_len << 56;
143 for (i = 0, j = 0; i <= pw_len; i += 8, j += 2)
145 u64 m = hl32_to_64 (w[j + 1], w[j + 0]);
149 SIPROUND (v0, v1, v2, v3);
150 SIPROUND (v0, v1, v2, v3);
157 SIPROUND (v0, v1, v2, v3);
158 SIPROUND (v0, v1, v2, v3);
159 SIPROUND (v0, v1, v2, v3);
160 SIPROUND (v0, v1, v2, v3);
162 const u64 v = v0 ^ v1 ^ v2 ^ v3;
164 const u32 a = l32_from_64 (v);
165 const u32 b = h32_from_64 (v);
176 __kernel void m10100_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
180 __kernel void m10100_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
184 __kernel void m10100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
190 const u32 lid = get_local_id (0);
196 const u32 gid = get_global_id (0);
198 if (gid >= gid_max) return;
203 pws0[0] = pws[gid].i[0];
204 pws0[1] = pws[gid].i[1];
205 pws0[2] = pws[gid].i[2];
206 pws0[3] = pws[gid].i[3];
207 pws1[0] = pws[gid].i[4];
208 pws1[1] = pws[gid].i[5];
209 pws1[2] = pws[gid].i[6];
210 pws1[3] = pws[gid].i[7];
212 const u32 pw_l_len = pws[gid].pw_len;
218 const u32 search[4] =
220 digests_buf[digests_offset].digest_buf[DGST_R0],
221 digests_buf[digests_offset].digest_buf[DGST_R1],
222 digests_buf[digests_offset].digest_buf[DGST_R2],
223 digests_buf[digests_offset].digest_buf[DGST_R3]
230 u64 v0p = SIPHASHM_0;
231 u64 v1p = SIPHASHM_1;
232 u64 v2p = SIPHASHM_2;
233 u64 v3p = SIPHASHM_3;
235 v0p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[1], salt_bufs[salt_pos].salt_buf[0]);
236 v1p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[3], salt_bufs[salt_pos].salt_buf[2]);
237 v2p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[1], salt_bufs[salt_pos].salt_buf[0]);
238 v3p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[3], salt_bufs[salt_pos].salt_buf[2]);
244 for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
246 const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
248 const u32x pw_len = pw_l_len + pw_r_len;
250 u32x wordr0[4] = { 0 };
251 u32x wordr1[4] = { 0 };
252 u32x wordr2[4] = { 0 };
253 u32x wordr3[4] = { 0 };
255 wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
256 wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
257 wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
258 wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
259 wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
260 wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
261 wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
262 wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
264 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
266 switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
271 w[ 0] = wordl0[0] | wordr0[0];
272 w[ 1] = wordl0[1] | wordr0[1];
273 w[ 2] = wordl0[2] | wordr0[2];
274 w[ 3] = wordl0[3] | wordr0[3];
275 w[ 4] = wordl1[0] | wordr1[0];
276 w[ 5] = wordl1[1] | wordr1[1];
277 w[ 6] = wordl1[2] | wordr1[2];
278 w[ 7] = wordl1[3] | wordr1[3];
279 w[ 8] = wordl2[0] | wordr2[0];
280 w[ 9] = wordl2[1] | wordr2[1];
281 w[10] = wordl2[2] | wordr2[2];
282 w[11] = wordl2[3] | wordr2[3];
283 w[12] = wordl3[0] | wordr3[0];
284 w[13] = wordl3[1] | wordr3[1];
285 w[14] = wordl3[2] | wordr3[2];
286 w[15] = wordl3[3] | wordr3[3];
288 u64 *w_ptr = (u64 *) w;
290 w_ptr[pw_len / 8] |= (u64) pw_len << 56;
300 for (i = 0, j = 0; i <= pw_len; i += 8, j += 2)
302 u64 m = hl32_to_64 (w[j + 1], w[j + 0]);
306 SIPROUND (v0, v1, v2, v3);
307 SIPROUND (v0, v1, v2, v3);
314 SIPROUND (v0, v1, v2, v3);
315 SIPROUND (v0, v1, v2, v3);
316 SIPROUND (v0, v1, v2, v3);
317 SIPROUND (v0, v1, v2, v3);
319 const u64 v = v0 ^ v1 ^ v2 ^ v3;
321 const u32 a = l32_from_64 (v);
322 const u32 b = h32_from_64 (v);
333 __kernel void m10100_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
337 __kernel void m10100_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)