2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
16 #include "include/kernel_functions.c"
17 #include "types_ocl.c"
20 #define COMPARE_S "check_single_comp4.c"
21 #define COMPARE_M "check_multi_comp4.c"
23 #define SIPROUND(v0,v1,v2,v3) \
25 (v1) = rotl64 ((v1), 13); \
27 (v0) = as_ulong (as_uint2 ((v0)).s10); \
29 (v3) = rotl64 ((v3), 16); \
32 (v3) = rotl64 ((v3), 21); \
35 (v1) = rotl64 ((v1), 17); \
37 (v2) = as_ulong (as_uint2 ((v2)).s10);
39 static void m10100m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
45 const u32 gid = get_global_id (0);
46 const u32 lid = get_local_id (0);
57 v0p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[1], salt_bufs[salt_pos].salt_buf[0]);
58 v1p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[3], salt_bufs[salt_pos].salt_buf[2]);
59 v2p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[1], salt_bufs[salt_pos].salt_buf[0]);
60 v3p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[3], salt_bufs[salt_pos].salt_buf[2]);
62 u64 *w_ptr = (u64 *) w;
64 w_ptr[pw_len / 8] |= (u64) pw_len << 56;
72 for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
74 const u32 w0r = words_buf_r[il_pos];
76 const u32 w0 = w0l | w0r;
83 u64 m = hl32_to_64 (w[1], w0);
87 SIPROUND (v0, v1, v2, v3);
88 SIPROUND (v0, v1, v2, v3);
95 for (i = 8, j = 2; i <= pw_len; i += 8, j += 2)
97 m = hl32_to_64 (w[j + 1], w[j + 0]);
101 SIPROUND (v0, v1, v2, v3);
102 SIPROUND (v0, v1, v2, v3);
109 SIPROUND (v0, v1, v2, v3);
110 SIPROUND (v0, v1, v2, v3);
111 SIPROUND (v0, v1, v2, v3);
112 SIPROUND (v0, v1, v2, v3);
114 const u64 v = v0 ^ v1 ^ v2 ^ v3;
116 const u32 a = l32_from_64 (v);
117 const u32 b = h32_from_64 (v);
128 static void m10100s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
134 const u32 gid = get_global_id (0);
135 const u32 lid = get_local_id (0);
141 const u32 search[4] =
143 digests_buf[digests_offset].digest_buf[DGST_R0],
144 digests_buf[digests_offset].digest_buf[DGST_R1],
145 digests_buf[digests_offset].digest_buf[DGST_R2],
146 digests_buf[digests_offset].digest_buf[DGST_R3]
153 u64 v0p = SIPHASHM_0;
154 u64 v1p = SIPHASHM_1;
155 u64 v2p = SIPHASHM_2;
156 u64 v3p = SIPHASHM_3;
158 v0p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[1], salt_bufs[salt_pos].salt_buf[0]);
159 v1p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[3], salt_bufs[salt_pos].salt_buf[2]);
160 v2p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[1], salt_bufs[salt_pos].salt_buf[0]);
161 v3p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[3], salt_bufs[salt_pos].salt_buf[2]);
163 u64 *w_ptr = (u64 *) w;
165 w_ptr[pw_len / 8] |= (u64) pw_len << 56;
173 for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
175 const u32 w0r = words_buf_r[il_pos];
177 const u32 w0 = w0l | w0r;
184 u64 m = hl32_to_64 (w[1], w0);
188 SIPROUND (v0, v1, v2, v3);
189 SIPROUND (v0, v1, v2, v3);
196 for (i = 8, j = 2; i <= pw_len; i += 8, j += 2)
198 m = hl32_to_64 (w[j + 1], w[j + 0]);
202 SIPROUND (v0, v1, v2, v3);
203 SIPROUND (v0, v1, v2, v3);
210 SIPROUND (v0, v1, v2, v3);
211 SIPROUND (v0, v1, v2, v3);
212 SIPROUND (v0, v1, v2, v3);
213 SIPROUND (v0, v1, v2, v3);
215 const u64 v = v0 ^ v1 ^ v2 ^ v3;
217 const u32 a = l32_from_64 (v);
218 const u32 b = h32_from_64 (v);
229 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
235 const u32 gid = get_global_id (0);
237 if (gid >= gid_max) return;
241 w[ 0] = pws[gid].i[ 0];
242 w[ 1] = pws[gid].i[ 1];
243 w[ 2] = pws[gid].i[ 2];
244 w[ 3] = pws[gid].i[ 3];
255 w[14] = pws[gid].i[14];
258 const u32 pw_len = pws[gid].pw_len;
264 m10100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
267 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
273 const u32 gid = get_global_id (0);
275 if (gid >= gid_max) return;
279 w[ 0] = pws[gid].i[ 0];
280 w[ 1] = pws[gid].i[ 1];
281 w[ 2] = pws[gid].i[ 2];
282 w[ 3] = pws[gid].i[ 3];
283 w[ 4] = pws[gid].i[ 4];
284 w[ 5] = pws[gid].i[ 5];
285 w[ 6] = pws[gid].i[ 6];
286 w[ 7] = pws[gid].i[ 7];
293 w[14] = pws[gid].i[14];
296 const u32 pw_len = pws[gid].pw_len;
302 m10100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
305 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
311 const u32 gid = get_global_id (0);
313 if (gid >= gid_max) return;
317 w[ 0] = pws[gid].i[ 0];
318 w[ 1] = pws[gid].i[ 1];
319 w[ 2] = pws[gid].i[ 2];
320 w[ 3] = pws[gid].i[ 3];
321 w[ 4] = pws[gid].i[ 4];
322 w[ 5] = pws[gid].i[ 5];
323 w[ 6] = pws[gid].i[ 6];
324 w[ 7] = pws[gid].i[ 7];
325 w[ 8] = pws[gid].i[ 8];
326 w[ 9] = pws[gid].i[ 9];
327 w[10] = pws[gid].i[10];
328 w[11] = pws[gid].i[11];
329 w[12] = pws[gid].i[12];
330 w[13] = pws[gid].i[13];
331 w[14] = pws[gid].i[14];
332 w[15] = pws[gid].i[15];
334 const u32 pw_len = pws[gid].pw_len;
340 m10100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
343 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
349 const u32 gid = get_global_id (0);
351 if (gid >= gid_max) return;
355 w[ 0] = pws[gid].i[ 0];
356 w[ 1] = pws[gid].i[ 1];
357 w[ 2] = pws[gid].i[ 2];
358 w[ 3] = pws[gid].i[ 3];
369 w[14] = pws[gid].i[14];
372 const u32 pw_len = pws[gid].pw_len;
378 m10100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
381 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
387 const u32 gid = get_global_id (0);
389 if (gid >= gid_max) return;
393 w[ 0] = pws[gid].i[ 0];
394 w[ 1] = pws[gid].i[ 1];
395 w[ 2] = pws[gid].i[ 2];
396 w[ 3] = pws[gid].i[ 3];
397 w[ 4] = pws[gid].i[ 4];
398 w[ 5] = pws[gid].i[ 5];
399 w[ 6] = pws[gid].i[ 6];
400 w[ 7] = pws[gid].i[ 7];
407 w[14] = pws[gid].i[14];
410 const u32 pw_len = pws[gid].pw_len;
416 m10100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
419 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
425 const u32 gid = get_global_id (0);
427 if (gid >= gid_max) return;
431 w[ 0] = pws[gid].i[ 0];
432 w[ 1] = pws[gid].i[ 1];
433 w[ 2] = pws[gid].i[ 2];
434 w[ 3] = pws[gid].i[ 3];
435 w[ 4] = pws[gid].i[ 4];
436 w[ 5] = pws[gid].i[ 5];
437 w[ 6] = pws[gid].i[ 6];
438 w[ 7] = pws[gid].i[ 7];
439 w[ 8] = pws[gid].i[ 8];
440 w[ 9] = pws[gid].i[ 9];
441 w[10] = pws[gid].i[10];
442 w[11] = pws[gid].i[11];
443 w[12] = pws[gid].i[12];
444 w[13] = pws[gid].i[13];
445 w[14] = pws[gid].i[14];
446 w[15] = pws[gid].i[15];
448 const u32 pw_len = pws[gid].pw_len;
454 m10100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);