2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
28 #include "include/kernel_functions.c"
29 #include "types_amd.c"
30 #include "common_amd.c"
33 #define VECT_COMPARE_S "check_single_vect1_comp4.c"
34 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
38 #define VECT_COMPARE_S "check_single_vect2_comp4.c"
39 #define VECT_COMPARE_M "check_multi_vect2_comp4.c"
43 #define VECT_COMPARE_S "check_single_vect4_comp4.c"
44 #define VECT_COMPARE_M "check_multi_vect4_comp4.c"
47 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00110_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
53 const u32 lid = get_local_id (0);
59 const u32 gid = get_global_id (0);
61 if (gid >= gid_max) return;
65 wordl0[0] = pws[gid].i[ 0];
66 wordl0[1] = pws[gid].i[ 1];
67 wordl0[2] = pws[gid].i[ 2];
68 wordl0[3] = pws[gid].i[ 3];
72 wordl1[0] = pws[gid].i[ 4];
73 wordl1[1] = pws[gid].i[ 5];
74 wordl1[2] = pws[gid].i[ 6];
75 wordl1[3] = pws[gid].i[ 7];
91 const u32 pw_l_len = pws[gid].pw_len;
93 if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
95 switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
104 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
105 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
106 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
107 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
111 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
112 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
113 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
114 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
116 const u32 salt_len = salt_bufs[salt_pos].salt_len;
122 for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
124 const u32 pw_r_len = combs_buf[il_pos].pw_len;
126 const u32 pw_len = pw_l_len + pw_r_len;
130 wordr0[0] = combs_buf[il_pos].i[0];
131 wordr0[1] = combs_buf[il_pos].i[1];
132 wordr0[2] = combs_buf[il_pos].i[2];
133 wordr0[3] = combs_buf[il_pos].i[3];
137 wordr1[0] = combs_buf[il_pos].i[4];
138 wordr1[1] = combs_buf[il_pos].i[5];
139 wordr1[2] = combs_buf[il_pos].i[6];
140 wordr1[3] = combs_buf[il_pos].i[7];
156 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
158 switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
167 s0[0] = salt_buf0[0];
168 s0[1] = salt_buf0[1];
169 s0[2] = salt_buf0[2];
170 s0[3] = salt_buf0[3];
174 s1[0] = salt_buf1[0];
175 s1[1] = salt_buf1[1];
176 s1[2] = salt_buf1[2];
177 s1[3] = salt_buf1[3];
193 switch_buffer_by_offset (s0, s1, s2, s3, pw_len);
195 const u32 pw_salt_len = pw_len + salt_len;
199 w0[0] = wordl0[0] | wordr0[0] | s0[0];
200 w0[1] = wordl0[1] | wordr0[1] | s0[1];
201 w0[2] = wordl0[2] | wordr0[2] | s0[2];
202 w0[3] = wordl0[3] | wordr0[3] | s0[3];
206 w1[0] = wordl1[0] | wordr1[0] | s1[0];
207 w1[1] = wordl1[1] | wordr1[1] | s1[1];
208 w1[2] = wordl1[2] | wordr1[2] | s1[2];
209 w1[3] = wordl1[3] | wordr1[3] | s1[3];
213 w2[0] = wordl2[0] | wordr2[0] | s2[0];
214 w2[1] = wordl2[1] | wordr2[1] | s2[1];
215 w2[2] = wordl2[2] | wordr2[2] | s2[2];
216 w2[3] = wordl2[3] | wordr2[3] | s2[3];
220 w3[0] = wordl3[0] | wordr3[0] | s3[0];
221 w3[1] = wordl3[1] | wordr3[1] | s3[1];
225 append_0x80_4 (w0, w1, w2, w3, pw_salt_len);
231 u32x w0_t = swap_workaround (w0[0]);
232 u32x w1_t = swap_workaround (w0[1]);
233 u32x w2_t = swap_workaround (w0[2]);
234 u32x w3_t = swap_workaround (w0[3]);
235 u32x w4_t = swap_workaround (w1[0]);
236 u32x w5_t = swap_workaround (w1[1]);
237 u32x w6_t = swap_workaround (w1[2]);
238 u32x w7_t = swap_workaround (w1[3]);
239 u32x w8_t = swap_workaround (w2[0]);
240 u32x w9_t = swap_workaround (w2[1]);
241 u32x wa_t = swap_workaround (w2[2]);
242 u32x wb_t = swap_workaround (w2[3]);
243 u32x wc_t = swap_workaround (w3[0]);
244 u32x wd_t = swap_workaround (w3[1]);
246 u32x wf_t = pw_salt_len * 8;
257 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t);
258 SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t);
259 SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t);
260 SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t);
261 SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t);
262 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t);
263 SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t);
264 SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t);
265 SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t);
266 SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t);
267 SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t);
268 SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t);
269 SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t);
270 SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t);
271 SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t);
272 SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t);
273 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t);
274 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t);
275 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t);
276 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t);
281 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t);
282 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t);
283 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t);
284 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t);
285 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t);
286 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t);
287 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t);
288 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t);
289 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t);
290 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t);
291 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t);
292 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t);
293 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t);
294 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t);
295 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t);
296 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t);
297 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t);
298 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t);
299 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t);
300 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t);
305 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t);
306 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t);
307 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t);
308 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t);
309 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t);
310 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t);
311 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t);
312 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t);
313 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t);
314 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t);
315 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t);
316 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t);
317 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t);
318 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t);
319 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t);
320 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t);
321 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t);
322 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t);
323 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t);
324 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t);
329 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t);
330 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t);
331 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t);
332 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t);
333 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t);
334 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t);
335 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t);
336 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t);
337 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t);
338 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t);
339 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t);
340 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t);
341 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t);
342 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t);
343 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t);
344 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t);
345 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t);
346 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t);
347 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
348 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
356 #include VECT_COMPARE_M
360 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00110_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
364 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00110_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
368 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00110_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
374 const u32 lid = get_local_id (0);
380 const u32 gid = get_global_id (0);
382 if (gid >= gid_max) return;
386 wordl0[0] = pws[gid].i[ 0];
387 wordl0[1] = pws[gid].i[ 1];
388 wordl0[2] = pws[gid].i[ 2];
389 wordl0[3] = pws[gid].i[ 3];
393 wordl1[0] = pws[gid].i[ 4];
394 wordl1[1] = pws[gid].i[ 5];
395 wordl1[2] = pws[gid].i[ 6];
396 wordl1[3] = pws[gid].i[ 7];
412 const u32 pw_l_len = pws[gid].pw_len;
414 if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
416 switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
425 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
426 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
427 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
428 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
432 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
433 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
434 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
435 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
437 const u32 salt_len = salt_bufs[salt_pos].salt_len;
443 const u32 search[4] =
445 digests_buf[digests_offset].digest_buf[DGST_R0],
446 digests_buf[digests_offset].digest_buf[DGST_R1],
447 digests_buf[digests_offset].digest_buf[DGST_R2],
448 digests_buf[digests_offset].digest_buf[DGST_R3]
455 const u32 e_rev = rotl32 (search[1], 2u);
461 for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
463 const u32 pw_r_len = combs_buf[il_pos].pw_len;
465 const u32 pw_len = pw_l_len + pw_r_len;
469 wordr0[0] = combs_buf[il_pos].i[0];
470 wordr0[1] = combs_buf[il_pos].i[1];
471 wordr0[2] = combs_buf[il_pos].i[2];
472 wordr0[3] = combs_buf[il_pos].i[3];
476 wordr1[0] = combs_buf[il_pos].i[4];
477 wordr1[1] = combs_buf[il_pos].i[5];
478 wordr1[2] = combs_buf[il_pos].i[6];
479 wordr1[3] = combs_buf[il_pos].i[7];
495 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
497 switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
506 s0[0] = salt_buf0[0];
507 s0[1] = salt_buf0[1];
508 s0[2] = salt_buf0[2];
509 s0[3] = salt_buf0[3];
513 s1[0] = salt_buf1[0];
514 s1[1] = salt_buf1[1];
515 s1[2] = salt_buf1[2];
516 s1[3] = salt_buf1[3];
532 switch_buffer_by_offset (s0, s1, s2, s3, pw_len);
534 const u32 pw_salt_len = pw_len + salt_len;
538 w0[0] = wordl0[0] | wordr0[0] | s0[0];
539 w0[1] = wordl0[1] | wordr0[1] | s0[1];
540 w0[2] = wordl0[2] | wordr0[2] | s0[2];
541 w0[3] = wordl0[3] | wordr0[3] | s0[3];
545 w1[0] = wordl1[0] | wordr1[0] | s1[0];
546 w1[1] = wordl1[1] | wordr1[1] | s1[1];
547 w1[2] = wordl1[2] | wordr1[2] | s1[2];
548 w1[3] = wordl1[3] | wordr1[3] | s1[3];
552 w2[0] = wordl2[0] | wordr2[0] | s2[0];
553 w2[1] = wordl2[1] | wordr2[1] | s2[1];
554 w2[2] = wordl2[2] | wordr2[2] | s2[2];
555 w2[3] = wordl2[3] | wordr2[3] | s2[3];
559 w3[0] = wordl3[0] | wordr3[0] | s3[0];
560 w3[1] = wordl3[1] | wordr3[1] | s3[1];
564 append_0x80_4 (w0, w1, w2, w3, pw_salt_len);
570 u32x w0_t = swap_workaround (w0[0]);
571 u32x w1_t = swap_workaround (w0[1]);
572 u32x w2_t = swap_workaround (w0[2]);
573 u32x w3_t = swap_workaround (w0[3]);
574 u32x w4_t = swap_workaround (w1[0]);
575 u32x w5_t = swap_workaround (w1[1]);
576 u32x w6_t = swap_workaround (w1[2]);
577 u32x w7_t = swap_workaround (w1[3]);
578 u32x w8_t = swap_workaround (w2[0]);
579 u32x w9_t = swap_workaround (w2[1]);
580 u32x wa_t = swap_workaround (w2[2]);
581 u32x wb_t = swap_workaround (w2[3]);
582 u32x wc_t = swap_workaround (w3[0]);
583 u32x wd_t = swap_workaround (w3[1]);
585 u32x wf_t = pw_salt_len * 8;
596 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t);
597 SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t);
598 SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t);
599 SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t);
600 SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t);
601 SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t);
602 SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t);
603 SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t);
604 SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t);
605 SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t);
606 SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t);
607 SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t);
608 SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t);
609 SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t);
610 SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t);
611 SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t);
612 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t);
613 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t);
614 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t);
615 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t);
620 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t);
621 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t);
622 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t);
623 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t);
624 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t);
625 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t);
626 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t);
627 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t);
628 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t);
629 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t);
630 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t);
631 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t);
632 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t);
633 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t);
634 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t);
635 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t);
636 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t);
637 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t);
638 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t);
639 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t);
644 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t);
645 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t);
646 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t);
647 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t);
648 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t);
649 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t);
650 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t);
651 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t);
652 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t);
653 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t);
654 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t);
655 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t);
656 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t);
657 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t);
658 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t);
659 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t);
660 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t);
661 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t);
662 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t);
663 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t);
668 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t);
669 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t);
670 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t);
671 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t);
672 w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t);
673 w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t);
674 w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t);
675 w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t);
676 w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t);
677 w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t);
678 w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t);
679 w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t);
680 w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t);
681 w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t);
682 wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t);
683 wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t);
685 if (allx (e != e_rev)) continue;
687 wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t);
688 wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t);
689 we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
690 wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
698 #include VECT_COMPARE_S
702 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00110_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
706 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00110_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)