2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
28 #include "include/kernel_functions.c"
29 #include "types_amd.c"
30 #include "common_amd.c"
31 #include "include/rp_gpu.h"
35 #define VECT_COMPARE_S "check_single_vect1_comp4.c"
36 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
40 #define VECT_COMPARE_S "check_single_vect2_comp4.c"
41 #define VECT_COMPARE_M "check_multi_vect2_comp4.c"
45 #define VECT_COMPARE_S "check_single_vect4_comp4.c"
46 #define VECT_COMPARE_M "check_multi_vect4_comp4.c"
49 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01100_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
55 const u32 lid = get_local_id (0);
61 const u32 gid = get_global_id (0);
63 if (gid >= gid_max) return;
67 pw_buf0[0] = pws[gid].i[ 0];
68 pw_buf0[1] = pws[gid].i[ 1];
69 pw_buf0[2] = pws[gid].i[ 2];
70 pw_buf0[3] = pws[gid].i[ 3];
74 pw_buf1[0] = pws[gid].i[ 4];
75 pw_buf1[1] = pws[gid].i[ 5];
76 pw_buf1[2] = pws[gid].i[ 6];
77 pw_buf1[3] = pws[gid].i[ 7];
79 const u32 pw_len = pws[gid].pw_len;
87 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
88 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
89 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
90 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
94 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
95 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
96 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
97 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
101 salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
102 salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9];
106 const u32 salt_len = salt_bufs[salt_pos].salt_len;
112 for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++)
142 const u32 out_len = apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len);
144 append_0x80_2 (w0, w1, out_len);
151 make_unicode (w0, w0_t, w1_t);
152 make_unicode (w1, w2_t, w3_t);
154 w3_t[2] = out_len * 8 * 2;
161 MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00);
162 MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01);
163 MD4_STEP (MD4_Fo, c, d, a, b, w0_t[2], MD4C00, MD4S02);
164 MD4_STEP (MD4_Fo, b, c, d, a, w0_t[3], MD4C00, MD4S03);
165 MD4_STEP (MD4_Fo, a, b, c, d, w1_t[0], MD4C00, MD4S00);
166 MD4_STEP (MD4_Fo, d, a, b, c, w1_t[1], MD4C00, MD4S01);
167 MD4_STEP (MD4_Fo, c, d, a, b, w1_t[2], MD4C00, MD4S02);
168 MD4_STEP (MD4_Fo, b, c, d, a, w1_t[3], MD4C00, MD4S03);
169 MD4_STEP (MD4_Fo, a, b, c, d, w2_t[0], MD4C00, MD4S00);
170 MD4_STEP (MD4_Fo, d, a, b, c, w2_t[1], MD4C00, MD4S01);
171 MD4_STEP (MD4_Fo, c, d, a, b, w2_t[2], MD4C00, MD4S02);
172 MD4_STEP (MD4_Fo, b, c, d, a, w2_t[3], MD4C00, MD4S03);
173 MD4_STEP (MD4_Fo, a, b, c, d, w3_t[0], MD4C00, MD4S00);
174 MD4_STEP (MD4_Fo, d, a, b, c, w3_t[1], MD4C00, MD4S01);
175 MD4_STEP (MD4_Fo, c, d, a, b, w3_t[2], MD4C00, MD4S02);
176 MD4_STEP (MD4_Fo, b, c, d, a, w3_t[3], MD4C00, MD4S03);
178 MD4_STEP (MD4_Go, a, b, c, d, w0_t[0], MD4C01, MD4S10);
179 MD4_STEP (MD4_Go, d, a, b, c, w1_t[0], MD4C01, MD4S11);
180 MD4_STEP (MD4_Go, c, d, a, b, w2_t[0], MD4C01, MD4S12);
181 MD4_STEP (MD4_Go, b, c, d, a, w3_t[0], MD4C01, MD4S13);
182 MD4_STEP (MD4_Go, a, b, c, d, w0_t[1], MD4C01, MD4S10);
183 MD4_STEP (MD4_Go, d, a, b, c, w1_t[1], MD4C01, MD4S11);
184 MD4_STEP (MD4_Go, c, d, a, b, w2_t[1], MD4C01, MD4S12);
185 MD4_STEP (MD4_Go, b, c, d, a, w3_t[1], MD4C01, MD4S13);
186 MD4_STEP (MD4_Go, a, b, c, d, w0_t[2], MD4C01, MD4S10);
187 MD4_STEP (MD4_Go, d, a, b, c, w1_t[2], MD4C01, MD4S11);
188 MD4_STEP (MD4_Go, c, d, a, b, w2_t[2], MD4C01, MD4S12);
189 MD4_STEP (MD4_Go, b, c, d, a, w3_t[2], MD4C01, MD4S13);
190 MD4_STEP (MD4_Go, a, b, c, d, w0_t[3], MD4C01, MD4S10);
191 MD4_STEP (MD4_Go, d, a, b, c, w1_t[3], MD4C01, MD4S11);
192 MD4_STEP (MD4_Go, c, d, a, b, w2_t[3], MD4C01, MD4S12);
193 MD4_STEP (MD4_Go, b, c, d, a, w3_t[3], MD4C01, MD4S13);
195 MD4_STEP (MD4_H , a, b, c, d, w0_t[0], MD4C02, MD4S20);
196 MD4_STEP (MD4_H , d, a, b, c, w2_t[0], MD4C02, MD4S21);
197 MD4_STEP (MD4_H , c, d, a, b, w1_t[0], MD4C02, MD4S22);
198 MD4_STEP (MD4_H , b, c, d, a, w3_t[0], MD4C02, MD4S23);
199 MD4_STEP (MD4_H , a, b, c, d, w0_t[2], MD4C02, MD4S20);
200 MD4_STEP (MD4_H , d, a, b, c, w2_t[2], MD4C02, MD4S21);
201 MD4_STEP (MD4_H , c, d, a, b, w1_t[2], MD4C02, MD4S22);
202 MD4_STEP (MD4_H , b, c, d, a, w3_t[2], MD4C02, MD4S23);
203 MD4_STEP (MD4_H , a, b, c, d, w0_t[1], MD4C02, MD4S20);
204 MD4_STEP (MD4_H , d, a, b, c, w2_t[1], MD4C02, MD4S21);
205 MD4_STEP (MD4_H , c, d, a, b, w1_t[1], MD4C02, MD4S22);
206 MD4_STEP (MD4_H , b, c, d, a, w3_t[1], MD4C02, MD4S23);
207 MD4_STEP (MD4_H , a, b, c, d, w0_t[3], MD4C02, MD4S20);
208 MD4_STEP (MD4_H , d, a, b, c, w2_t[3], MD4C02, MD4S21);
209 MD4_STEP (MD4_H , c, d, a, b, w1_t[3], MD4C02, MD4S22);
210 MD4_STEP (MD4_H , b, c, d, a, w3_t[3], MD4C02, MD4S23);
221 w1_t[0] = salt_buf0[0];
222 w1_t[1] = salt_buf0[1];
223 w1_t[2] = salt_buf0[2];
224 w1_t[3] = salt_buf0[3];
225 w2_t[0] = salt_buf1[0];
226 w2_t[1] = salt_buf1[1];
227 w2_t[2] = salt_buf1[2];
228 w2_t[3] = salt_buf1[3];
229 w3_t[0] = salt_buf2[0];
230 w3_t[1] = salt_buf2[1];
231 w3_t[2] = (16 + salt_len) * 8;
239 MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00);
240 MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01);
241 MD4_STEP (MD4_Fo, c, d, a, b, w0_t[2], MD4C00, MD4S02);
242 MD4_STEP (MD4_Fo, b, c, d, a, w0_t[3], MD4C00, MD4S03);
243 MD4_STEP (MD4_Fo, a, b, c, d, w1_t[0], MD4C00, MD4S00);
244 MD4_STEP (MD4_Fo, d, a, b, c, w1_t[1], MD4C00, MD4S01);
245 MD4_STEP (MD4_Fo, c, d, a, b, w1_t[2], MD4C00, MD4S02);
246 MD4_STEP (MD4_Fo, b, c, d, a, w1_t[3], MD4C00, MD4S03);
247 MD4_STEP (MD4_Fo, a, b, c, d, w2_t[0], MD4C00, MD4S00);
248 MD4_STEP (MD4_Fo, d, a, b, c, w2_t[1], MD4C00, MD4S01);
249 MD4_STEP (MD4_Fo, c, d, a, b, w2_t[2], MD4C00, MD4S02);
250 MD4_STEP (MD4_Fo, b, c, d, a, w2_t[3], MD4C00, MD4S03);
251 MD4_STEP (MD4_Fo, a, b, c, d, w3_t[0], MD4C00, MD4S00);
252 MD4_STEP (MD4_Fo, d, a, b, c, w3_t[1], MD4C00, MD4S01);
253 MD4_STEP (MD4_Fo, c, d, a, b, w3_t[2], MD4C00, MD4S02);
254 MD4_STEP (MD4_Fo, b, c, d, a, w3_t[3], MD4C00, MD4S03);
256 MD4_STEP (MD4_Go, a, b, c, d, w0_t[0], MD4C01, MD4S10);
257 MD4_STEP (MD4_Go, d, a, b, c, w1_t[0], MD4C01, MD4S11);
258 MD4_STEP (MD4_Go, c, d, a, b, w2_t[0], MD4C01, MD4S12);
259 MD4_STEP (MD4_Go, b, c, d, a, w3_t[0], MD4C01, MD4S13);
260 MD4_STEP (MD4_Go, a, b, c, d, w0_t[1], MD4C01, MD4S10);
261 MD4_STEP (MD4_Go, d, a, b, c, w1_t[1], MD4C01, MD4S11);
262 MD4_STEP (MD4_Go, c, d, a, b, w2_t[1], MD4C01, MD4S12);
263 MD4_STEP (MD4_Go, b, c, d, a, w3_t[1], MD4C01, MD4S13);
264 MD4_STEP (MD4_Go, a, b, c, d, w0_t[2], MD4C01, MD4S10);
265 MD4_STEP (MD4_Go, d, a, b, c, w1_t[2], MD4C01, MD4S11);
266 MD4_STEP (MD4_Go, c, d, a, b, w2_t[2], MD4C01, MD4S12);
267 MD4_STEP (MD4_Go, b, c, d, a, w3_t[2], MD4C01, MD4S13);
268 MD4_STEP (MD4_Go, a, b, c, d, w0_t[3], MD4C01, MD4S10);
269 MD4_STEP (MD4_Go, d, a, b, c, w1_t[3], MD4C01, MD4S11);
270 MD4_STEP (MD4_Go, c, d, a, b, w2_t[3], MD4C01, MD4S12);
271 MD4_STEP (MD4_Go, b, c, d, a, w3_t[3], MD4C01, MD4S13);
273 MD4_STEP (MD4_H , a, b, c, d, w0_t[0], MD4C02, MD4S20);
274 MD4_STEP (MD4_H , d, a, b, c, w2_t[0], MD4C02, MD4S21);
275 MD4_STEP (MD4_H , c, d, a, b, w1_t[0], MD4C02, MD4S22);
276 MD4_STEP (MD4_H , b, c, d, a, w3_t[0], MD4C02, MD4S23);
277 MD4_STEP (MD4_H , a, b, c, d, w0_t[2], MD4C02, MD4S20);
278 MD4_STEP (MD4_H , d, a, b, c, w2_t[2], MD4C02, MD4S21);
279 MD4_STEP (MD4_H , c, d, a, b, w1_t[2], MD4C02, MD4S22);
280 MD4_STEP (MD4_H , b, c, d, a, w3_t[2], MD4C02, MD4S23);
281 MD4_STEP (MD4_H , a, b, c, d, w0_t[1], MD4C02, MD4S20);
282 MD4_STEP (MD4_H , d, a, b, c, w2_t[1], MD4C02, MD4S21);
283 MD4_STEP (MD4_H , c, d, a, b, w1_t[1], MD4C02, MD4S22);
284 MD4_STEP (MD4_H , b, c, d, a, w3_t[1], MD4C02, MD4S23);
285 MD4_STEP (MD4_H , a, b, c, d, w0_t[3], MD4C02, MD4S20);
286 MD4_STEP (MD4_H , d, a, b, c, w2_t[3], MD4C02, MD4S21);
287 MD4_STEP (MD4_H , c, d, a, b, w1_t[3], MD4C02, MD4S22);
288 MD4_STEP (MD4_H , b, c, d, a, w3_t[3], MD4C02, MD4S23);
295 #include VECT_COMPARE_M
299 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01100_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
303 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01100_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
307 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01100_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
313 const u32 lid = get_local_id (0);
319 const u32 gid = get_global_id (0);
321 if (gid >= gid_max) return;
325 pw_buf0[0] = pws[gid].i[ 0];
326 pw_buf0[1] = pws[gid].i[ 1];
327 pw_buf0[2] = pws[gid].i[ 2];
328 pw_buf0[3] = pws[gid].i[ 3];
332 pw_buf1[0] = pws[gid].i[ 4];
333 pw_buf1[1] = pws[gid].i[ 5];
334 pw_buf1[2] = pws[gid].i[ 6];
335 pw_buf1[3] = pws[gid].i[ 7];
337 const u32 pw_len = pws[gid].pw_len;
343 const u32 search[4] =
345 digests_buf[digests_offset].digest_buf[DGST_R0],
346 digests_buf[digests_offset].digest_buf[DGST_R1],
347 digests_buf[digests_offset].digest_buf[DGST_R2],
348 digests_buf[digests_offset].digest_buf[DGST_R3]
357 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
358 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
359 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
360 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
364 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
365 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
366 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
367 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
371 salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
372 salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9];
376 const u32 salt_len = salt_bufs[salt_pos].salt_len;
382 for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++)
412 const u32 out_len = apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len);
414 append_0x80_2 (w0, w1, out_len);
421 make_unicode (w0, w0_t, w1_t);
422 make_unicode (w1, w2_t, w3_t);
424 w3_t[2] = out_len * 8 * 2;
431 MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00);
432 MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01);
433 MD4_STEP (MD4_Fo, c, d, a, b, w0_t[2], MD4C00, MD4S02);
434 MD4_STEP (MD4_Fo, b, c, d, a, w0_t[3], MD4C00, MD4S03);
435 MD4_STEP (MD4_Fo, a, b, c, d, w1_t[0], MD4C00, MD4S00);
436 MD4_STEP (MD4_Fo, d, a, b, c, w1_t[1], MD4C00, MD4S01);
437 MD4_STEP (MD4_Fo, c, d, a, b, w1_t[2], MD4C00, MD4S02);
438 MD4_STEP (MD4_Fo, b, c, d, a, w1_t[3], MD4C00, MD4S03);
439 MD4_STEP (MD4_Fo, a, b, c, d, w2_t[0], MD4C00, MD4S00);
440 MD4_STEP (MD4_Fo, d, a, b, c, w2_t[1], MD4C00, MD4S01);
441 MD4_STEP (MD4_Fo, c, d, a, b, w2_t[2], MD4C00, MD4S02);
442 MD4_STEP (MD4_Fo, b, c, d, a, w2_t[3], MD4C00, MD4S03);
443 MD4_STEP (MD4_Fo, a, b, c, d, w3_t[0], MD4C00, MD4S00);
444 MD4_STEP (MD4_Fo, d, a, b, c, w3_t[1], MD4C00, MD4S01);
445 MD4_STEP (MD4_Fo, c, d, a, b, w3_t[2], MD4C00, MD4S02);
446 MD4_STEP (MD4_Fo, b, c, d, a, w3_t[3], MD4C00, MD4S03);
448 MD4_STEP (MD4_Go, a, b, c, d, w0_t[0], MD4C01, MD4S10);
449 MD4_STEP (MD4_Go, d, a, b, c, w1_t[0], MD4C01, MD4S11);
450 MD4_STEP (MD4_Go, c, d, a, b, w2_t[0], MD4C01, MD4S12);
451 MD4_STEP (MD4_Go, b, c, d, a, w3_t[0], MD4C01, MD4S13);
452 MD4_STEP (MD4_Go, a, b, c, d, w0_t[1], MD4C01, MD4S10);
453 MD4_STEP (MD4_Go, d, a, b, c, w1_t[1], MD4C01, MD4S11);
454 MD4_STEP (MD4_Go, c, d, a, b, w2_t[1], MD4C01, MD4S12);
455 MD4_STEP (MD4_Go, b, c, d, a, w3_t[1], MD4C01, MD4S13);
456 MD4_STEP (MD4_Go, a, b, c, d, w0_t[2], MD4C01, MD4S10);
457 MD4_STEP (MD4_Go, d, a, b, c, w1_t[2], MD4C01, MD4S11);
458 MD4_STEP (MD4_Go, c, d, a, b, w2_t[2], MD4C01, MD4S12);
459 MD4_STEP (MD4_Go, b, c, d, a, w3_t[2], MD4C01, MD4S13);
460 MD4_STEP (MD4_Go, a, b, c, d, w0_t[3], MD4C01, MD4S10);
461 MD4_STEP (MD4_Go, d, a, b, c, w1_t[3], MD4C01, MD4S11);
462 MD4_STEP (MD4_Go, c, d, a, b, w2_t[3], MD4C01, MD4S12);
463 MD4_STEP (MD4_Go, b, c, d, a, w3_t[3], MD4C01, MD4S13);
465 MD4_STEP (MD4_H , a, b, c, d, w0_t[0], MD4C02, MD4S20);
466 MD4_STEP (MD4_H , d, a, b, c, w2_t[0], MD4C02, MD4S21);
467 MD4_STEP (MD4_H , c, d, a, b, w1_t[0], MD4C02, MD4S22);
468 MD4_STEP (MD4_H , b, c, d, a, w3_t[0], MD4C02, MD4S23);
469 MD4_STEP (MD4_H , a, b, c, d, w0_t[2], MD4C02, MD4S20);
470 MD4_STEP (MD4_H , d, a, b, c, w2_t[2], MD4C02, MD4S21);
471 MD4_STEP (MD4_H , c, d, a, b, w1_t[2], MD4C02, MD4S22);
472 MD4_STEP (MD4_H , b, c, d, a, w3_t[2], MD4C02, MD4S23);
473 MD4_STEP (MD4_H , a, b, c, d, w0_t[1], MD4C02, MD4S20);
474 MD4_STEP (MD4_H , d, a, b, c, w2_t[1], MD4C02, MD4S21);
475 MD4_STEP (MD4_H , c, d, a, b, w1_t[1], MD4C02, MD4S22);
476 MD4_STEP (MD4_H , b, c, d, a, w3_t[1], MD4C02, MD4S23);
477 MD4_STEP (MD4_H , a, b, c, d, w0_t[3], MD4C02, MD4S20);
478 MD4_STEP (MD4_H , d, a, b, c, w2_t[3], MD4C02, MD4S21);
479 MD4_STEP (MD4_H , c, d, a, b, w1_t[3], MD4C02, MD4S22);
480 MD4_STEP (MD4_H , b, c, d, a, w3_t[3], MD4C02, MD4S23);
491 w1_t[0] = salt_buf0[0];
492 w1_t[1] = salt_buf0[1];
493 w1_t[2] = salt_buf0[2];
494 w1_t[3] = salt_buf0[3];
495 w2_t[0] = salt_buf1[0];
496 w2_t[1] = salt_buf1[1];
497 w2_t[2] = salt_buf1[2];
498 w2_t[3] = salt_buf1[3];
499 w3_t[0] = salt_buf2[0];
500 w3_t[1] = salt_buf2[1];
501 w3_t[2] = (16 + salt_len) * 8;
509 MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00);
510 MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01);
511 MD4_STEP (MD4_Fo, c, d, a, b, w0_t[2], MD4C00, MD4S02);
512 MD4_STEP (MD4_Fo, b, c, d, a, w0_t[3], MD4C00, MD4S03);
513 MD4_STEP (MD4_Fo, a, b, c, d, w1_t[0], MD4C00, MD4S00);
514 MD4_STEP (MD4_Fo, d, a, b, c, w1_t[1], MD4C00, MD4S01);
515 MD4_STEP (MD4_Fo, c, d, a, b, w1_t[2], MD4C00, MD4S02);
516 MD4_STEP (MD4_Fo, b, c, d, a, w1_t[3], MD4C00, MD4S03);
517 MD4_STEP (MD4_Fo, a, b, c, d, w2_t[0], MD4C00, MD4S00);
518 MD4_STEP (MD4_Fo, d, a, b, c, w2_t[1], MD4C00, MD4S01);
519 MD4_STEP (MD4_Fo, c, d, a, b, w2_t[2], MD4C00, MD4S02);
520 MD4_STEP (MD4_Fo, b, c, d, a, w2_t[3], MD4C00, MD4S03);
521 MD4_STEP (MD4_Fo, a, b, c, d, w3_t[0], MD4C00, MD4S00);
522 MD4_STEP (MD4_Fo, d, a, b, c, w3_t[1], MD4C00, MD4S01);
523 MD4_STEP (MD4_Fo, c, d, a, b, w3_t[2], MD4C00, MD4S02);
524 MD4_STEP (MD4_Fo, b, c, d, a, w3_t[3], MD4C00, MD4S03);
526 MD4_STEP (MD4_Go, a, b, c, d, w0_t[0], MD4C01, MD4S10);
527 MD4_STEP (MD4_Go, d, a, b, c, w1_t[0], MD4C01, MD4S11);
528 MD4_STEP (MD4_Go, c, d, a, b, w2_t[0], MD4C01, MD4S12);
529 MD4_STEP (MD4_Go, b, c, d, a, w3_t[0], MD4C01, MD4S13);
530 MD4_STEP (MD4_Go, a, b, c, d, w0_t[1], MD4C01, MD4S10);
531 MD4_STEP (MD4_Go, d, a, b, c, w1_t[1], MD4C01, MD4S11);
532 MD4_STEP (MD4_Go, c, d, a, b, w2_t[1], MD4C01, MD4S12);
533 MD4_STEP (MD4_Go, b, c, d, a, w3_t[1], MD4C01, MD4S13);
534 MD4_STEP (MD4_Go, a, b, c, d, w0_t[2], MD4C01, MD4S10);
535 MD4_STEP (MD4_Go, d, a, b, c, w1_t[2], MD4C01, MD4S11);
536 MD4_STEP (MD4_Go, c, d, a, b, w2_t[2], MD4C01, MD4S12);
537 MD4_STEP (MD4_Go, b, c, d, a, w3_t[2], MD4C01, MD4S13);
538 MD4_STEP (MD4_Go, a, b, c, d, w0_t[3], MD4C01, MD4S10);
539 MD4_STEP (MD4_Go, d, a, b, c, w1_t[3], MD4C01, MD4S11);
540 MD4_STEP (MD4_Go, c, d, a, b, w2_t[3], MD4C01, MD4S12);
541 MD4_STEP (MD4_Go, b, c, d, a, w3_t[3], MD4C01, MD4S13);
543 MD4_STEP (MD4_H , a, b, c, d, w0_t[0], MD4C02, MD4S20);
544 MD4_STEP (MD4_H , d, a, b, c, w2_t[0], MD4C02, MD4S21);
545 MD4_STEP (MD4_H , c, d, a, b, w1_t[0], MD4C02, MD4S22);
546 MD4_STEP (MD4_H , b, c, d, a, w3_t[0], MD4C02, MD4S23);
547 MD4_STEP (MD4_H , a, b, c, d, w0_t[2], MD4C02, MD4S20);
548 MD4_STEP (MD4_H , d, a, b, c, w2_t[2], MD4C02, MD4S21);
549 MD4_STEP (MD4_H , c, d, a, b, w1_t[2], MD4C02, MD4S22);
550 MD4_STEP (MD4_H , b, c, d, a, w3_t[2], MD4C02, MD4S23);
551 MD4_STEP (MD4_H , a, b, c, d, w0_t[1], MD4C02, MD4S20);
552 MD4_STEP (MD4_H , d, a, b, c, w2_t[1], MD4C02, MD4S21);
553 MD4_STEP (MD4_H , c, d, a, b, w1_t[1], MD4C02, MD4S22);
554 MD4_STEP (MD4_H , b, c, d, a, w3_t[1], MD4C02, MD4S23);
555 MD4_STEP (MD4_H , a, b, c, d, w0_t[3], MD4C02, MD4S20);
557 bool q_cond = allx (search[0] != a);
559 if (q_cond) continue;
561 MD4_STEP (MD4_H , d, a, b, c, w2_t[3], MD4C02, MD4S21);
562 MD4_STEP (MD4_H , c, d, a, b, w1_t[3], MD4C02, MD4S22);
563 MD4_STEP (MD4_H , b, c, d, a, w3_t[3], MD4C02, MD4S23);
570 #include VECT_COMPARE_S
574 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01100_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
578 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01100_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)