2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
16 #include "include/kernel_functions.c"
17 #include "types_ocl.c"
20 #define COMPARE_S "check_single_comp4.c"
21 #define COMPARE_M "check_multi_comp4.c"
24 #define SIPROUND(v0,v1,v2,v3) \
26 (v1) = rotl64 ((v1), 13); \
28 (v0) = as_ulong (as_uint2 ((v0)).s10); \
30 (v3) = rotl64 ((v3), 16); \
33 (v3) = rotl64 ((v3), 21); \
36 (v1) = rotl64 ((v1), 17); \
38 (v2) = as_ulong (as_uint2 ((v2)).s10);
40 #define SIPROUND(v0,v1,v2,v3) \
42 (v1) = rotl64 ((v1), 13); \
44 (v0) = rotl64 ((v0), 32); \
46 (v3) = rotl64 ((v3), 16); \
49 (v3) = rotl64 ((v3), 21); \
52 (v1) = rotl64 ((v1), 17); \
54 (v2) = rotl64 ((v2), 32);
57 static void m10100m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
63 const u32 gid = get_global_id (0);
64 const u32 lid = get_local_id (0);
75 v0p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[1], salt_bufs[salt_pos].salt_buf[0]);
76 v1p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[3], salt_bufs[salt_pos].salt_buf[2]);
77 v2p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[1], salt_bufs[salt_pos].salt_buf[0]);
78 v3p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[3], salt_bufs[salt_pos].salt_buf[2]);
80 u64 *w_ptr = (u64 *) w;
82 w_ptr[pw_len / 8] |= (u64) pw_len << 56;
90 for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
92 const u32 w0r = words_buf_r[il_pos];
94 const u32 w0 = w0l | w0r;
101 u64 m = hl32_to_64 (w[1], w0);
105 SIPROUND (v0, v1, v2, v3);
106 SIPROUND (v0, v1, v2, v3);
113 for (i = 8, j = 2; i <= pw_len; i += 8, j += 2)
115 m = hl32_to_64 (w[j + 1], w[j + 0]);
119 SIPROUND (v0, v1, v2, v3);
120 SIPROUND (v0, v1, v2, v3);
127 SIPROUND (v0, v1, v2, v3);
128 SIPROUND (v0, v1, v2, v3);
129 SIPROUND (v0, v1, v2, v3);
130 SIPROUND (v0, v1, v2, v3);
132 const u64 v = v0 ^ v1 ^ v2 ^ v3;
134 const u32 a = l32_from_64 (v);
135 const u32 b = h32_from_64 (v);
146 static void m10100s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
152 const u32 gid = get_global_id (0);
153 const u32 lid = get_local_id (0);
159 const u32 search[4] =
161 digests_buf[digests_offset].digest_buf[DGST_R0],
162 digests_buf[digests_offset].digest_buf[DGST_R1],
163 digests_buf[digests_offset].digest_buf[DGST_R2],
164 digests_buf[digests_offset].digest_buf[DGST_R3]
171 u64 v0p = SIPHASHM_0;
172 u64 v1p = SIPHASHM_1;
173 u64 v2p = SIPHASHM_2;
174 u64 v3p = SIPHASHM_3;
176 v0p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[1], salt_bufs[salt_pos].salt_buf[0]);
177 v1p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[3], salt_bufs[salt_pos].salt_buf[2]);
178 v2p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[1], salt_bufs[salt_pos].salt_buf[0]);
179 v3p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[3], salt_bufs[salt_pos].salt_buf[2]);
181 u64 *w_ptr = (u64 *) w;
183 w_ptr[pw_len / 8] |= (u64) pw_len << 56;
191 for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
193 const u32 w0r = words_buf_r[il_pos];
195 const u32 w0 = w0l | w0r;
202 u64 m = hl32_to_64 (w[1], w0);
206 SIPROUND (v0, v1, v2, v3);
207 SIPROUND (v0, v1, v2, v3);
214 for (i = 8, j = 2; i <= pw_len; i += 8, j += 2)
216 m = hl32_to_64 (w[j + 1], w[j + 0]);
220 SIPROUND (v0, v1, v2, v3);
221 SIPROUND (v0, v1, v2, v3);
228 SIPROUND (v0, v1, v2, v3);
229 SIPROUND (v0, v1, v2, v3);
230 SIPROUND (v0, v1, v2, v3);
231 SIPROUND (v0, v1, v2, v3);
233 const u64 v = v0 ^ v1 ^ v2 ^ v3;
235 const u32 a = l32_from_64 (v);
236 const u32 b = h32_from_64 (v);
247 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
253 const u32 gid = get_global_id (0);
255 if (gid >= gid_max) return;
259 w[ 0] = pws[gid].i[ 0];
260 w[ 1] = pws[gid].i[ 1];
261 w[ 2] = pws[gid].i[ 2];
262 w[ 3] = pws[gid].i[ 3];
273 w[14] = pws[gid].i[14];
276 const u32 pw_len = pws[gid].pw_len;
282 m10100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
285 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
291 const u32 gid = get_global_id (0);
293 if (gid >= gid_max) return;
297 w[ 0] = pws[gid].i[ 0];
298 w[ 1] = pws[gid].i[ 1];
299 w[ 2] = pws[gid].i[ 2];
300 w[ 3] = pws[gid].i[ 3];
301 w[ 4] = pws[gid].i[ 4];
302 w[ 5] = pws[gid].i[ 5];
303 w[ 6] = pws[gid].i[ 6];
304 w[ 7] = pws[gid].i[ 7];
311 w[14] = pws[gid].i[14];
314 const u32 pw_len = pws[gid].pw_len;
320 m10100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
323 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
329 const u32 gid = get_global_id (0);
331 if (gid >= gid_max) return;
335 w[ 0] = pws[gid].i[ 0];
336 w[ 1] = pws[gid].i[ 1];
337 w[ 2] = pws[gid].i[ 2];
338 w[ 3] = pws[gid].i[ 3];
339 w[ 4] = pws[gid].i[ 4];
340 w[ 5] = pws[gid].i[ 5];
341 w[ 6] = pws[gid].i[ 6];
342 w[ 7] = pws[gid].i[ 7];
343 w[ 8] = pws[gid].i[ 8];
344 w[ 9] = pws[gid].i[ 9];
345 w[10] = pws[gid].i[10];
346 w[11] = pws[gid].i[11];
347 w[12] = pws[gid].i[12];
348 w[13] = pws[gid].i[13];
349 w[14] = pws[gid].i[14];
350 w[15] = pws[gid].i[15];
352 const u32 pw_len = pws[gid].pw_len;
358 m10100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
361 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
367 const u32 gid = get_global_id (0);
369 if (gid >= gid_max) return;
373 w[ 0] = pws[gid].i[ 0];
374 w[ 1] = pws[gid].i[ 1];
375 w[ 2] = pws[gid].i[ 2];
376 w[ 3] = pws[gid].i[ 3];
387 w[14] = pws[gid].i[14];
390 const u32 pw_len = pws[gid].pw_len;
396 m10100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
399 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
405 const u32 gid = get_global_id (0);
407 if (gid >= gid_max) return;
411 w[ 0] = pws[gid].i[ 0];
412 w[ 1] = pws[gid].i[ 1];
413 w[ 2] = pws[gid].i[ 2];
414 w[ 3] = pws[gid].i[ 3];
415 w[ 4] = pws[gid].i[ 4];
416 w[ 5] = pws[gid].i[ 5];
417 w[ 6] = pws[gid].i[ 6];
418 w[ 7] = pws[gid].i[ 7];
425 w[14] = pws[gid].i[14];
428 const u32 pw_len = pws[gid].pw_len;
434 m10100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
437 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
443 const u32 gid = get_global_id (0);
445 if (gid >= gid_max) return;
449 w[ 0] = pws[gid].i[ 0];
450 w[ 1] = pws[gid].i[ 1];
451 w[ 2] = pws[gid].i[ 2];
452 w[ 3] = pws[gid].i[ 3];
453 w[ 4] = pws[gid].i[ 4];
454 w[ 5] = pws[gid].i[ 5];
455 w[ 6] = pws[gid].i[ 6];
456 w[ 7] = pws[gid].i[ 7];
457 w[ 8] = pws[gid].i[ 8];
458 w[ 9] = pws[gid].i[ 9];
459 w[10] = pws[gid].i[10];
460 w[11] = pws[gid].i[11];
461 w[12] = pws[gid].i[12];
462 w[13] = pws[gid].i[13];
463 w[14] = pws[gid].i[14];
464 w[15] = pws[gid].i[15];
466 const u32 pw_len = pws[gid].pw_len;
472 m10100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);