2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
16 #include "include/kernel_functions.c"
17 #include "OpenCL/types_ocl.c"
18 #include "OpenCL/common.c"
20 #define COMPARE_S "OpenCL/check_single_comp4.c"
21 #define COMPARE_M "OpenCL/check_multi_comp4.c"
23 #define SIPROUND(v0,v1,v2,v3) \
25 (v1) = rotl64 ((v1), 13); \
27 (v0) = as_ulong (as_uint2 ((v0)).s10); \
29 (v3) = rotl64 ((v3), 16); \
32 (v3) = rotl64 ((v3), 21); \
35 (v1) = rotl64 ((v1), 17); \
37 (v2) = as_ulong (as_uint2 ((v2)).s10);
39 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
45 const u32 lid = get_local_id (0);
51 const u32 gid = get_global_id (0);
53 if (gid >= gid_max) return;
57 wordl0[0] = pws[gid].i[ 0];
58 wordl0[1] = pws[gid].i[ 1];
59 wordl0[2] = pws[gid].i[ 2];
60 wordl0[3] = pws[gid].i[ 3];
64 wordl1[0] = pws[gid].i[ 4];
65 wordl1[1] = pws[gid].i[ 5];
66 wordl1[2] = pws[gid].i[ 6];
67 wordl1[3] = pws[gid].i[ 7];
83 const u32 pw_l_len = pws[gid].pw_len;
85 if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
87 switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
99 v0p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[1], salt_bufs[salt_pos].salt_buf[0]);
100 v1p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[3], salt_bufs[salt_pos].salt_buf[2]);
101 v2p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[1], salt_bufs[salt_pos].salt_buf[0]);
102 v3p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[3], salt_bufs[salt_pos].salt_buf[2]);
108 for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
110 const u32 pw_r_len = combs_buf[il_pos].pw_len;
112 const u32 pw_len = pw_l_len + pw_r_len;
116 wordr0[0] = combs_buf[il_pos].i[0];
117 wordr0[1] = combs_buf[il_pos].i[1];
118 wordr0[2] = combs_buf[il_pos].i[2];
119 wordr0[3] = combs_buf[il_pos].i[3];
123 wordr1[0] = combs_buf[il_pos].i[4];
124 wordr1[1] = combs_buf[il_pos].i[5];
125 wordr1[2] = combs_buf[il_pos].i[6];
126 wordr1[3] = combs_buf[il_pos].i[7];
142 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
144 switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
149 w[ 0] = wordl0[0] | wordr0[0];
150 w[ 1] = wordl0[1] | wordr0[1];
151 w[ 2] = wordl0[2] | wordr0[2];
152 w[ 3] = wordl0[3] | wordr0[3];
153 w[ 4] = wordl1[0] | wordr1[0];
154 w[ 5] = wordl1[1] | wordr1[1];
155 w[ 6] = wordl1[2] | wordr1[2];
156 w[ 7] = wordl1[3] | wordr1[3];
157 w[ 8] = wordl2[0] | wordr2[0];
158 w[ 9] = wordl2[1] | wordr2[1];
159 w[10] = wordl2[2] | wordr2[2];
160 w[11] = wordl2[3] | wordr2[3];
161 w[12] = wordl3[0] | wordr3[0];
162 w[13] = wordl3[1] | wordr3[1];
163 w[14] = wordl3[2] | wordr3[2];
164 w[15] = wordl3[3] | wordr3[3];
166 u64 *w_ptr = (u64 *) w;
168 w_ptr[pw_len / 8] |= (u64) pw_len << 56;
178 for (i = 0, j = 0; i <= pw_len; i += 8, j += 2)
180 u64 m = hl32_to_64 (w[j + 1], w[j + 0]);
184 SIPROUND (v0, v1, v2, v3);
185 SIPROUND (v0, v1, v2, v3);
192 SIPROUND (v0, v1, v2, v3);
193 SIPROUND (v0, v1, v2, v3);
194 SIPROUND (v0, v1, v2, v3);
195 SIPROUND (v0, v1, v2, v3);
197 const u64 v = v0 ^ v1 ^ v2 ^ v3;
199 const u32 a = l32_from_64 (v);
200 const u32 b = h32_from_64 (v);
211 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
215 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
219 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
225 const u32 lid = get_local_id (0);
231 const u32 gid = get_global_id (0);
233 if (gid >= gid_max) return;
237 wordl0[0] = pws[gid].i[ 0];
238 wordl0[1] = pws[gid].i[ 1];
239 wordl0[2] = pws[gid].i[ 2];
240 wordl0[3] = pws[gid].i[ 3];
244 wordl1[0] = pws[gid].i[ 4];
245 wordl1[1] = pws[gid].i[ 5];
246 wordl1[2] = pws[gid].i[ 6];
247 wordl1[3] = pws[gid].i[ 7];
263 const u32 pw_l_len = pws[gid].pw_len;
265 if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
267 switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
274 const u32 search[4] =
276 digests_buf[digests_offset].digest_buf[DGST_R0],
277 digests_buf[digests_offset].digest_buf[DGST_R1],
278 digests_buf[digests_offset].digest_buf[DGST_R2],
279 digests_buf[digests_offset].digest_buf[DGST_R3]
286 u64 v0p = SIPHASHM_0;
287 u64 v1p = SIPHASHM_1;
288 u64 v2p = SIPHASHM_2;
289 u64 v3p = SIPHASHM_3;
291 v0p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[1], salt_bufs[salt_pos].salt_buf[0]);
292 v1p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[3], salt_bufs[salt_pos].salt_buf[2]);
293 v2p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[1], salt_bufs[salt_pos].salt_buf[0]);
294 v3p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[3], salt_bufs[salt_pos].salt_buf[2]);
300 for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
302 const u32 pw_r_len = combs_buf[il_pos].pw_len;
304 const u32 pw_len = pw_l_len + pw_r_len;
308 wordr0[0] = combs_buf[il_pos].i[0];
309 wordr0[1] = combs_buf[il_pos].i[1];
310 wordr0[2] = combs_buf[il_pos].i[2];
311 wordr0[3] = combs_buf[il_pos].i[3];
315 wordr1[0] = combs_buf[il_pos].i[4];
316 wordr1[1] = combs_buf[il_pos].i[5];
317 wordr1[2] = combs_buf[il_pos].i[6];
318 wordr1[3] = combs_buf[il_pos].i[7];
334 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
336 switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
341 w[ 0] = wordl0[0] | wordr0[0];
342 w[ 1] = wordl0[1] | wordr0[1];
343 w[ 2] = wordl0[2] | wordr0[2];
344 w[ 3] = wordl0[3] | wordr0[3];
345 w[ 4] = wordl1[0] | wordr1[0];
346 w[ 5] = wordl1[1] | wordr1[1];
347 w[ 6] = wordl1[2] | wordr1[2];
348 w[ 7] = wordl1[3] | wordr1[3];
349 w[ 8] = wordl2[0] | wordr2[0];
350 w[ 9] = wordl2[1] | wordr2[1];
351 w[10] = wordl2[2] | wordr2[2];
352 w[11] = wordl2[3] | wordr2[3];
353 w[12] = wordl3[0] | wordr3[0];
354 w[13] = wordl3[1] | wordr3[1];
355 w[14] = wordl3[2] | wordr3[2];
356 w[15] = wordl3[3] | wordr3[3];
358 u64 *w_ptr = (u64 *) w;
360 w_ptr[pw_len / 8] |= (u64) pw_len << 56;
370 for (i = 0, j = 0; i <= pw_len; i += 8, j += 2)
372 u64 m = hl32_to_64 (w[j + 1], w[j + 0]);
376 SIPROUND (v0, v1, v2, v3);
377 SIPROUND (v0, v1, v2, v3);
384 SIPROUND (v0, v1, v2, v3);
385 SIPROUND (v0, v1, v2, v3);
386 SIPROUND (v0, v1, v2, v3);
387 SIPROUND (v0, v1, v2, v3);
389 const u64 v = v0 ^ v1 ^ v2 ^ v3;
391 const u32 a = l32_from_64 (v);
392 const u32 b = h32_from_64 (v);
403 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
407 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)