2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
16 #include "include/kernel_functions.c"
17 #include "types_ocl.c"
21 #define COMPARE_M "check_multi_vect1_comp4.c"
25 #define COMPARE_M "check_multi_vect4_comp4.c"
28 static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4])
54 MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
55 MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
56 MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02);
57 MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03);
58 MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00);
59 MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01);
60 MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02);
61 MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03);
62 MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00);
63 MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01);
64 MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02);
65 MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03);
66 MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00);
67 MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01);
68 MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02);
69 MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03);
71 MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10);
72 MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11);
73 MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12);
74 MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13);
75 MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10);
76 MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11);
77 MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12);
78 MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13);
79 MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10);
80 MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11);
81 MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12);
82 MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13);
83 MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10);
84 MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11);
85 MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12);
86 MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13);
88 MD5_STEP (MD5_H1, a, b, c, d, w5_t, MD5C20, MD5S20);
89 MD5_STEP (MD5_H2, d, a, b, c, w8_t, MD5C21, MD5S21);
90 MD5_STEP (MD5_H1, c, d, a, b, wb_t, MD5C22, MD5S22);
91 MD5_STEP (MD5_H2, b, c, d, a, we_t, MD5C23, MD5S23);
92 MD5_STEP (MD5_H1, a, b, c, d, w1_t, MD5C24, MD5S20);
93 MD5_STEP (MD5_H2, d, a, b, c, w4_t, MD5C25, MD5S21);
94 MD5_STEP (MD5_H1, c, d, a, b, w7_t, MD5C26, MD5S22);
95 MD5_STEP (MD5_H2, b, c, d, a, wa_t, MD5C27, MD5S23);
96 MD5_STEP (MD5_H1, a, b, c, d, wd_t, MD5C28, MD5S20);
97 MD5_STEP (MD5_H2, d, a, b, c, w0_t, MD5C29, MD5S21);
98 MD5_STEP (MD5_H1, c, d, a, b, w3_t, MD5C2a, MD5S22);
99 MD5_STEP (MD5_H2, b, c, d, a, w6_t, MD5C2b, MD5S23);
100 MD5_STEP (MD5_H1, a, b, c, d, w9_t, MD5C2c, MD5S20);
101 MD5_STEP (MD5_H2, d, a, b, c, wc_t, MD5C2d, MD5S21);
102 MD5_STEP (MD5_H1, c, d, a, b, wf_t, MD5C2e, MD5S22);
103 MD5_STEP (MD5_H2, b, c, d, a, w2_t, MD5C2f, MD5S23);
105 MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30);
106 MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31);
107 MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32);
108 MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33);
109 MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30);
110 MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31);
111 MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32);
112 MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33);
113 MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30);
114 MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31);
115 MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32);
116 MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33);
117 MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30);
118 MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
119 MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
120 MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
128 static void memcat16 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append[4])
133 block0[0] = append[0];
134 block0[1] = append[1];
135 block0[2] = append[2];
136 block0[3] = append[3];
140 block0[0] = block0[0] | append[0] << 8;
141 block0[1] = append[0] >> 24 | append[1] << 8;
142 block0[2] = append[1] >> 24 | append[2] << 8;
143 block0[3] = append[2] >> 24 | append[3] << 8;
144 block1[0] = append[3] >> 24;
148 block0[0] = block0[0] | append[0] << 16;
149 block0[1] = append[0] >> 16 | append[1] << 16;
150 block0[2] = append[1] >> 16 | append[2] << 16;
151 block0[3] = append[2] >> 16 | append[3] << 16;
152 block1[0] = append[3] >> 16;
156 block0[0] = block0[0] | append[0] << 24;
157 block0[1] = append[0] >> 8 | append[1] << 24;
158 block0[2] = append[1] >> 8 | append[2] << 24;
159 block0[3] = append[2] >> 8 | append[3] << 24;
160 block1[0] = append[3] >> 8;
164 block0[1] = append[0];
165 block0[2] = append[1];
166 block0[3] = append[2];
167 block1[0] = append[3];
171 block0[1] = block0[1] | append[0] << 8;
172 block0[2] = append[0] >> 24 | append[1] << 8;
173 block0[3] = append[1] >> 24 | append[2] << 8;
174 block1[0] = append[2] >> 24 | append[3] << 8;
175 block1[1] = append[3] >> 24;
179 block0[1] = block0[1] | append[0] << 16;
180 block0[2] = append[0] >> 16 | append[1] << 16;
181 block0[3] = append[1] >> 16 | append[2] << 16;
182 block1[0] = append[2] >> 16 | append[3] << 16;
183 block1[1] = append[3] >> 16;
187 block0[1] = block0[1] | append[0] << 24;
188 block0[2] = append[0] >> 8 | append[1] << 24;
189 block0[3] = append[1] >> 8 | append[2] << 24;
190 block1[0] = append[2] >> 8 | append[3] << 24;
191 block1[1] = append[3] >> 8;
195 block0[2] = append[0];
196 block0[3] = append[1];
197 block1[0] = append[2];
198 block1[1] = append[3];
202 block0[2] = block0[2] | append[0] << 8;
203 block0[3] = append[0] >> 24 | append[1] << 8;
204 block1[0] = append[1] >> 24 | append[2] << 8;
205 block1[1] = append[2] >> 24 | append[3] << 8;
206 block1[2] = append[3] >> 24;
210 block0[2] = block0[2] | append[0] << 16;
211 block0[3] = append[0] >> 16 | append[1] << 16;
212 block1[0] = append[1] >> 16 | append[2] << 16;
213 block1[1] = append[2] >> 16 | append[3] << 16;
214 block1[2] = append[3] >> 16;
218 block0[2] = block0[2] | append[0] << 24;
219 block0[3] = append[0] >> 8 | append[1] << 24;
220 block1[0] = append[1] >> 8 | append[2] << 24;
221 block1[1] = append[2] >> 8 | append[3] << 24;
222 block1[2] = append[3] >> 8;
226 block0[3] = append[0];
227 block1[0] = append[1];
228 block1[1] = append[2];
229 block1[2] = append[3];
233 block0[3] = block0[3] | append[0] << 8;
234 block1[0] = append[0] >> 24 | append[1] << 8;
235 block1[1] = append[1] >> 24 | append[2] << 8;
236 block1[2] = append[2] >> 24 | append[3] << 8;
237 block1[3] = append[3] >> 24;
241 block0[3] = block0[3] | append[0] << 16;
242 block1[0] = append[0] >> 16 | append[1] << 16;
243 block1[1] = append[1] >> 16 | append[2] << 16;
244 block1[2] = append[2] >> 16 | append[3] << 16;
245 block1[3] = append[3] >> 16;
249 block0[3] = block0[3] | append[0] << 24;
250 block1[0] = append[0] >> 8 | append[1] << 24;
251 block1[1] = append[1] >> 8 | append[2] << 24;
252 block1[2] = append[2] >> 8 | append[3] << 24;
253 block1[3] = append[3] >> 8;
257 block1[0] = append[0];
258 block1[1] = append[1];
259 block1[2] = append[2];
260 block1[3] = append[3];
264 block1[0] = block1[0] | append[0] << 8;
265 block1[1] = append[0] >> 24 | append[1] << 8;
266 block1[2] = append[1] >> 24 | append[2] << 8;
267 block1[3] = append[2] >> 24 | append[3] << 8;
268 block2[0] = append[3] >> 24;
272 block1[0] = block1[0] | append[0] << 16;
273 block1[1] = append[0] >> 16 | append[1] << 16;
274 block1[2] = append[1] >> 16 | append[2] << 16;
275 block1[3] = append[2] >> 16 | append[3] << 16;
276 block2[0] = append[3] >> 16;
280 block1[0] = block1[0] | append[0] << 24;
281 block1[1] = append[0] >> 8 | append[1] << 24;
282 block1[2] = append[1] >> 8 | append[2] << 24;
283 block1[3] = append[2] >> 8 | append[3] << 24;
284 block2[0] = append[3] >> 8;
288 block1[1] = append[0];
289 block1[2] = append[1];
290 block1[3] = append[2];
291 block2[0] = append[3];
295 block1[1] = block1[1] | append[0] << 8;
296 block1[2] = append[0] >> 24 | append[1] << 8;
297 block1[3] = append[1] >> 24 | append[2] << 8;
298 block2[0] = append[2] >> 24 | append[3] << 8;
299 block2[1] = append[3] >> 24;
303 block1[1] = block1[1] | append[0] << 16;
304 block1[2] = append[0] >> 16 | append[1] << 16;
305 block1[3] = append[1] >> 16 | append[2] << 16;
306 block2[0] = append[2] >> 16 | append[3] << 16;
307 block2[1] = append[3] >> 16;
311 block1[1] = block1[1] | append[0] << 24;
312 block1[2] = append[0] >> 8 | append[1] << 24;
313 block1[3] = append[1] >> 8 | append[2] << 24;
314 block2[0] = append[2] >> 8 | append[3] << 24;
315 block2[1] = append[3] >> 8;
319 block1[2] = append[0];
320 block1[3] = append[1];
321 block2[0] = append[2];
322 block2[1] = append[3];
326 block1[2] = block1[2] | append[0] << 8;
327 block1[3] = append[0] >> 24 | append[1] << 8;
328 block2[0] = append[1] >> 24 | append[2] << 8;
329 block2[1] = append[2] >> 24 | append[3] << 8;
330 block2[2] = append[3] >> 24;
334 block1[2] = block1[2] | append[0] << 16;
335 block1[3] = append[0] >> 16 | append[1] << 16;
336 block2[0] = append[1] >> 16 | append[2] << 16;
337 block2[1] = append[2] >> 16 | append[3] << 16;
338 block2[2] = append[3] >> 16;
342 block1[2] = block1[2] | append[0] << 24;
343 block1[3] = append[0] >> 8 | append[1] << 24;
344 block2[0] = append[1] >> 8 | append[2] << 24;
345 block2[1] = append[2] >> 8 | append[3] << 24;
346 block2[2] = append[3] >> 8;
350 block1[3] = append[0];
351 block2[0] = append[1];
352 block2[1] = append[2];
353 block2[2] = append[3];
357 block1[3] = block1[3] | append[0] << 8;
358 block2[0] = append[0] >> 24 | append[1] << 8;
359 block2[1] = append[1] >> 24 | append[2] << 8;
360 block2[2] = append[2] >> 24 | append[3] << 8;
361 block2[3] = append[3] >> 24;
365 block1[3] = block1[3] | append[0] << 16;
366 block2[0] = append[0] >> 16 | append[1] << 16;
367 block2[1] = append[1] >> 16 | append[2] << 16;
368 block2[2] = append[2] >> 16 | append[3] << 16;
369 block2[3] = append[3] >> 16;
373 block1[3] = block1[3] | append[0] << 24;
374 block2[0] = append[0] >> 8 | append[1] << 24;
375 block2[1] = append[1] >> 8 | append[2] << 24;
376 block2[2] = append[2] >> 8 | append[3] << 24;
377 block2[3] = append[3] >> 8;
381 block2[0] = append[0];
382 block2[1] = append[1];
383 block2[2] = append[2];
384 block2[3] = append[3];
388 block2[0] = block2[0] | append[0] << 8;
389 block2[1] = append[0] >> 24 | append[1] << 8;
390 block2[2] = append[1] >> 24 | append[2] << 8;
391 block2[3] = append[2] >> 24 | append[3] << 8;
392 block3[0] = append[3] >> 24;
396 block2[0] = block2[0] | append[0] << 16;
397 block2[1] = append[0] >> 16 | append[1] << 16;
398 block2[2] = append[1] >> 16 | append[2] << 16;
399 block2[3] = append[2] >> 16 | append[3] << 16;
400 block3[0] = append[3] >> 16;
404 block2[0] = block2[0] | append[0] << 24;
405 block2[1] = append[0] >> 8 | append[1] << 24;
406 block2[2] = append[1] >> 8 | append[2] << 24;
407 block2[3] = append[2] >> 8 | append[3] << 24;
408 block3[0] = append[3] >> 8;
412 block2[1] = append[0];
413 block2[2] = append[1];
414 block2[3] = append[2];
415 block3[0] = append[3];
419 block2[1] = block2[1] | append[0] << 8;
420 block2[2] = append[0] >> 24 | append[1] << 8;
421 block2[3] = append[1] >> 24 | append[2] << 8;
422 block3[0] = append[2] >> 24 | append[3] << 8;
423 block3[1] = append[3] >> 24;
427 block2[1] = block2[1] | append[0] << 16;
428 block2[2] = append[0] >> 16 | append[1] << 16;
429 block2[3] = append[1] >> 16 | append[2] << 16;
430 block3[0] = append[2] >> 16 | append[3] << 16;
431 block3[1] = append[3] >> 16;
435 block2[1] = block2[1] | append[0] << 24;
436 block2[2] = append[0] >> 8 | append[1] << 24;
437 block2[3] = append[1] >> 8 | append[2] << 24;
438 block3[0] = append[2] >> 8 | append[3] << 24;
439 block3[1] = append[3] >> 8;
443 block2[2] = append[0];
444 block2[3] = append[1];
445 block3[0] = append[2];
446 block3[1] = append[3];
450 block2[2] = block2[2] | append[0] << 8;
451 block2[3] = append[0] >> 24 | append[1] << 8;
452 block3[0] = append[1] >> 24 | append[2] << 8;
453 block3[1] = append[2] >> 24 | append[3] << 8;
454 block3[2] = append[3] >> 24;
458 block2[2] = block2[2] | append[0] << 16;
459 block2[3] = append[0] >> 16 | append[1] << 16;
460 block3[0] = append[1] >> 16 | append[2] << 16;
461 block3[1] = append[2] >> 16 | append[3] << 16;
462 block3[2] = append[3] >> 16;
466 block2[2] = block2[2] | append[0] << 24;
467 block2[3] = append[0] >> 8 | append[1] << 24;
468 block3[0] = append[1] >> 8 | append[2] << 24;
469 block3[1] = append[2] >> 8 | append[3] << 24;
470 block3[2] = append[3] >> 8;
474 block2[3] = append[0];
475 block3[0] = append[1];
476 block3[1] = append[2];
477 block3[2] = append[3];
481 block2[3] = block2[3] | append[0] << 8;
482 block3[0] = append[0] >> 24 | append[1] << 8;
483 block3[1] = append[1] >> 24 | append[2] << 8;
484 block3[2] = append[2] >> 24 | append[3] << 8;
485 block3[3] = append[3] >> 24;
489 block2[3] = block2[3] | append[0] << 16;
490 block3[0] = append[0] >> 16 | append[1] << 16;
491 block3[1] = append[1] >> 16 | append[2] << 16;
492 block3[2] = append[2] >> 16 | append[3] << 16;
493 block3[3] = append[3] >> 16;
497 block2[3] = block2[3] | append[0] << 24;
498 block3[0] = append[0] >> 8 | append[1] << 24;
499 block3[1] = append[1] >> 8 | append[2] << 24;
500 block3[2] = append[2] >> 8 | append[3] << 24;
501 block3[3] = append[3] >> 8;
505 block3[0] = append[0];
506 block3[1] = append[1];
507 block3[2] = append[2];
508 block3[3] = append[3];
512 block3[0] = block3[0] | append[0] << 8;
513 block3[1] = append[0] >> 24 | append[1] << 8;
514 block3[2] = append[1] >> 24 | append[2] << 8;
515 block3[3] = append[2] >> 24 | append[3] << 8;
519 block3[0] = block3[0] | append[0] << 16;
520 block3[1] = append[0] >> 16 | append[1] << 16;
521 block3[2] = append[1] >> 16 | append[2] << 16;
522 block3[3] = append[2] >> 16 | append[3] << 16;
526 block3[0] = block3[0] | append[0] << 24;
527 block3[1] = append[0] >> 8 | append[1] << 24;
528 block3[2] = append[1] >> 8 | append[2] << 24;
529 block3[3] = append[2] >> 8 | append[3] << 24;
533 block3[1] = append[0];
534 block3[2] = append[1];
535 block3[3] = append[2];
539 block3[1] = block3[1] | append[0] << 8;
540 block3[2] = append[0] >> 24 | append[1] << 8;
541 block3[3] = append[1] >> 24 | append[2] << 8;
545 block3[1] = block3[1] | append[0] << 16;
546 block3[2] = append[0] >> 16 | append[1] << 16;
547 block3[3] = append[1] >> 16 | append[2] << 16;
551 block3[1] = block3[1] | append[0] << 24;
552 block3[2] = append[0] >> 8 | append[1] << 24;
553 block3[3] = append[1] >> 8 | append[2] << 24;
557 block3[2] = append[0];
558 block3[3] = append[1];
563 static void memcat16_x80 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append[4])
568 block0[0] = append[0];
569 block0[1] = append[1];
570 block0[2] = append[2];
571 block0[3] = append[3];
576 block0[0] = block0[0] | append[0] << 8;
577 block0[1] = append[0] >> 24 | append[1] << 8;
578 block0[2] = append[1] >> 24 | append[2] << 8;
579 block0[3] = append[2] >> 24 | append[3] << 8;
580 block1[0] = append[3] >> 24 | 0x80u << 8;
584 block0[0] = block0[0] | append[0] << 16;
585 block0[1] = append[0] >> 16 | append[1] << 16;
586 block0[2] = append[1] >> 16 | append[2] << 16;
587 block0[3] = append[2] >> 16 | append[3] << 16;
588 block1[0] = append[3] >> 16 | 0x80u << 16;
592 block0[0] = block0[0] | append[0] << 24;
593 block0[1] = append[0] >> 8 | append[1] << 24;
594 block0[2] = append[1] >> 8 | append[2] << 24;
595 block0[3] = append[2] >> 8 | append[3] << 24;
596 block1[0] = append[3] >> 8 | 0x80u << 24;
600 block0[1] = append[0];
601 block0[2] = append[1];
602 block0[3] = append[2];
603 block1[0] = append[3];
608 block0[1] = block0[1] | append[0] << 8;
609 block0[2] = append[0] >> 24 | append[1] << 8;
610 block0[3] = append[1] >> 24 | append[2] << 8;
611 block1[0] = append[2] >> 24 | append[3] << 8;
612 block1[1] = append[3] >> 24 | 0x80u << 8;
616 block0[1] = block0[1] | append[0] << 16;
617 block0[2] = append[0] >> 16 | append[1] << 16;
618 block0[3] = append[1] >> 16 | append[2] << 16;
619 block1[0] = append[2] >> 16 | append[3] << 16;
620 block1[1] = append[3] >> 16 | 0x80u << 16;
624 block0[1] = block0[1] | append[0] << 24;
625 block0[2] = append[0] >> 8 | append[1] << 24;
626 block0[3] = append[1] >> 8 | append[2] << 24;
627 block1[0] = append[2] >> 8 | append[3] << 24;
628 block1[1] = append[3] >> 8 | 0x80u << 24;
632 block0[2] = append[0];
633 block0[3] = append[1];
634 block1[0] = append[2];
635 block1[1] = append[3];
640 block0[2] = block0[2] | append[0] << 8;
641 block0[3] = append[0] >> 24 | append[1] << 8;
642 block1[0] = append[1] >> 24 | append[2] << 8;
643 block1[1] = append[2] >> 24 | append[3] << 8;
644 block1[2] = append[3] >> 24 | 0x80u << 8;
648 block0[2] = block0[2] | append[0] << 16;
649 block0[3] = append[0] >> 16 | append[1] << 16;
650 block1[0] = append[1] >> 16 | append[2] << 16;
651 block1[1] = append[2] >> 16 | append[3] << 16;
652 block1[2] = append[3] >> 16 | 0x80u << 16;
656 block0[2] = block0[2] | append[0] << 24;
657 block0[3] = append[0] >> 8 | append[1] << 24;
658 block1[0] = append[1] >> 8 | append[2] << 24;
659 block1[1] = append[2] >> 8 | append[3] << 24;
660 block1[2] = append[3] >> 8 | 0x80u << 24;
664 block0[3] = append[0];
665 block1[0] = append[1];
666 block1[1] = append[2];
667 block1[2] = append[3];
672 block0[3] = block0[3] | append[0] << 8;
673 block1[0] = append[0] >> 24 | append[1] << 8;
674 block1[1] = append[1] >> 24 | append[2] << 8;
675 block1[2] = append[2] >> 24 | append[3] << 8;
676 block1[3] = append[3] >> 24 | 0x80u << 8;
680 block0[3] = block0[3] | append[0] << 16;
681 block1[0] = append[0] >> 16 | append[1] << 16;
682 block1[1] = append[1] >> 16 | append[2] << 16;
683 block1[2] = append[2] >> 16 | append[3] << 16;
684 block1[3] = append[3] >> 16 | 0x80u << 16;
688 block0[3] = block0[3] | append[0] << 24;
689 block1[0] = append[0] >> 8 | append[1] << 24;
690 block1[1] = append[1] >> 8 | append[2] << 24;
691 block1[2] = append[2] >> 8 | append[3] << 24;
692 block1[3] = append[3] >> 8 | 0x80u << 24;
696 block1[0] = append[0];
697 block1[1] = append[1];
698 block1[2] = append[2];
699 block1[3] = append[3];
704 block1[0] = block1[0] | append[0] << 8;
705 block1[1] = append[0] >> 24 | append[1] << 8;
706 block1[2] = append[1] >> 24 | append[2] << 8;
707 block1[3] = append[2] >> 24 | append[3] << 8;
708 block2[0] = append[3] >> 24 | 0x80u << 8;
712 block1[0] = block1[0] | append[0] << 16;
713 block1[1] = append[0] >> 16 | append[1] << 16;
714 block1[2] = append[1] >> 16 | append[2] << 16;
715 block1[3] = append[2] >> 16 | append[3] << 16;
716 block2[0] = append[3] >> 16 | 0x80u << 16;
720 block1[0] = block1[0] | append[0] << 24;
721 block1[1] = append[0] >> 8 | append[1] << 24;
722 block1[2] = append[1] >> 8 | append[2] << 24;
723 block1[3] = append[2] >> 8 | append[3] << 24;
724 block2[0] = append[3] >> 8 | 0x80u << 24;
728 block1[1] = append[0];
729 block1[2] = append[1];
730 block1[3] = append[2];
731 block2[0] = append[3];
736 block1[1] = block1[1] | append[0] << 8;
737 block1[2] = append[0] >> 24 | append[1] << 8;
738 block1[3] = append[1] >> 24 | append[2] << 8;
739 block2[0] = append[2] >> 24 | append[3] << 8;
740 block2[1] = append[3] >> 24 | 0x80u << 8;
744 block1[1] = block1[1] | append[0] << 16;
745 block1[2] = append[0] >> 16 | append[1] << 16;
746 block1[3] = append[1] >> 16 | append[2] << 16;
747 block2[0] = append[2] >> 16 | append[3] << 16;
748 block2[1] = append[3] >> 16 | 0x80u << 16;
752 block1[1] = block1[1] | append[0] << 24;
753 block1[2] = append[0] >> 8 | append[1] << 24;
754 block1[3] = append[1] >> 8 | append[2] << 24;
755 block2[0] = append[2] >> 8 | append[3] << 24;
756 block2[1] = append[3] >> 8 | 0x80u << 24;
760 block1[2] = append[0];
761 block1[3] = append[1];
762 block2[0] = append[2];
763 block2[1] = append[3];
768 block1[2] = block1[2] | append[0] << 8;
769 block1[3] = append[0] >> 24 | append[1] << 8;
770 block2[0] = append[1] >> 24 | append[2] << 8;
771 block2[1] = append[2] >> 24 | append[3] << 8;
772 block2[2] = append[3] >> 24 | 0x80u << 8;
776 block1[2] = block1[2] | append[0] << 16;
777 block1[3] = append[0] >> 16 | append[1] << 16;
778 block2[0] = append[1] >> 16 | append[2] << 16;
779 block2[1] = append[2] >> 16 | append[3] << 16;
780 block2[2] = append[3] >> 16 | 0x80u << 16;
784 block1[2] = block1[2] | append[0] << 24;
785 block1[3] = append[0] >> 8 | append[1] << 24;
786 block2[0] = append[1] >> 8 | append[2] << 24;
787 block2[1] = append[2] >> 8 | append[3] << 24;
788 block2[2] = append[3] >> 8 | 0x80u << 24;
792 block1[3] = append[0];
793 block2[0] = append[1];
794 block2[1] = append[2];
795 block2[2] = append[3];
800 block1[3] = block1[3] | append[0] << 8;
801 block2[0] = append[0] >> 24 | append[1] << 8;
802 block2[1] = append[1] >> 24 | append[2] << 8;
803 block2[2] = append[2] >> 24 | append[3] << 8;
804 block2[3] = append[3] >> 24 | 0x80u << 8;
808 block1[3] = block1[3] | append[0] << 16;
809 block2[0] = append[0] >> 16 | append[1] << 16;
810 block2[1] = append[1] >> 16 | append[2] << 16;
811 block2[2] = append[2] >> 16 | append[3] << 16;
812 block2[3] = append[3] >> 16 | 0x80u << 16;
816 block1[3] = block1[3] | append[0] << 24;
817 block2[0] = append[0] >> 8 | append[1] << 24;
818 block2[1] = append[1] >> 8 | append[2] << 24;
819 block2[2] = append[2] >> 8 | append[3] << 24;
820 block2[3] = append[3] >> 8 | 0x80u << 24;
824 block2[0] = append[0];
825 block2[1] = append[1];
826 block2[2] = append[2];
827 block2[3] = append[3];
832 block2[0] = block2[0] | append[0] << 8;
833 block2[1] = append[0] >> 24 | append[1] << 8;
834 block2[2] = append[1] >> 24 | append[2] << 8;
835 block2[3] = append[2] >> 24 | append[3] << 8;
836 block3[0] = append[3] >> 24 | 0x80u << 8;
840 block2[0] = block2[0] | append[0] << 16;
841 block2[1] = append[0] >> 16 | append[1] << 16;
842 block2[2] = append[1] >> 16 | append[2] << 16;
843 block2[3] = append[2] >> 16 | append[3] << 16;
844 block3[0] = append[3] >> 16 | 0x80u << 16;
848 block2[0] = block2[0] | append[0] << 24;
849 block2[1] = append[0] >> 8 | append[1] << 24;
850 block2[2] = append[1] >> 8 | append[2] << 24;
851 block2[3] = append[2] >> 8 | append[3] << 24;
852 block3[0] = append[3] >> 8 | 0x80u << 24;
856 block2[1] = append[0];
857 block2[2] = append[1];
858 block2[3] = append[2];
859 block3[0] = append[3];
864 block2[1] = block2[1] | append[0] << 8;
865 block2[2] = append[0] >> 24 | append[1] << 8;
866 block2[3] = append[1] >> 24 | append[2] << 8;
867 block3[0] = append[2] >> 24 | append[3] << 8;
868 block3[1] = append[3] >> 24 | 0x80u << 8;
872 block2[1] = block2[1] | append[0] << 16;
873 block2[2] = append[0] >> 16 | append[1] << 16;
874 block2[3] = append[1] >> 16 | append[2] << 16;
875 block3[0] = append[2] >> 16 | append[3] << 16;
876 block3[1] = append[3] >> 16 | 0x80u << 16;
880 block2[1] = block2[1] | append[0] << 24;
881 block2[2] = append[0] >> 8 | append[1] << 24;
882 block2[3] = append[1] >> 8 | append[2] << 24;
883 block3[0] = append[2] >> 8 | append[3] << 24;
884 block3[1] = append[3] >> 8 | 0x80u << 24;
888 block2[2] = append[0];
889 block2[3] = append[1];
890 block3[0] = append[2];
891 block3[1] = append[3];
896 block2[2] = block2[2] | append[0] << 8;
897 block2[3] = append[0] >> 24 | append[1] << 8;
898 block3[0] = append[1] >> 24 | append[2] << 8;
899 block3[1] = append[2] >> 24 | append[3] << 8;
900 block3[2] = append[3] >> 24 | 0x80u << 8;
904 block2[2] = block2[2] | append[0] << 16;
905 block2[3] = append[0] >> 16 | append[1] << 16;
906 block3[0] = append[1] >> 16 | append[2] << 16;
907 block3[1] = append[2] >> 16 | append[3] << 16;
908 block3[2] = append[3] >> 16 | 0x80u << 16;
912 block2[2] = block2[2] | append[0] << 24;
913 block2[3] = append[0] >> 8 | append[1] << 24;
914 block3[0] = append[1] >> 8 | append[2] << 24;
915 block3[1] = append[2] >> 8 | append[3] << 24;
916 block3[2] = append[3] >> 8 | 0x80u << 24;
920 block2[3] = append[0];
921 block3[0] = append[1];
922 block3[1] = append[2];
923 block3[2] = append[3];
928 block2[3] = block2[3] | append[0] << 8;
929 block3[0] = append[0] >> 24 | append[1] << 8;
930 block3[1] = append[1] >> 24 | append[2] << 8;
931 block3[2] = append[2] >> 24 | append[3] << 8;
932 block3[3] = append[3] >> 24 | 0x80u << 8;
936 block2[3] = block2[3] | append[0] << 16;
937 block3[0] = append[0] >> 16 | append[1] << 16;
938 block3[1] = append[1] >> 16 | append[2] << 16;
939 block3[2] = append[2] >> 16 | append[3] << 16;
940 block3[3] = append[3] >> 16 | 0x80u << 16;
944 block2[3] = block2[3] | append[0] << 24;
945 block3[0] = append[0] >> 8 | append[1] << 24;
946 block3[1] = append[1] >> 8 | append[2] << 24;
947 block3[2] = append[2] >> 8 | append[3] << 24;
948 block3[3] = append[3] >> 8 | 0x80u << 24;
952 block3[0] = append[0];
953 block3[1] = append[1];
954 block3[2] = append[2];
955 block3[3] = append[3];
959 block3[0] = block3[0] | append[0] << 8;
960 block3[1] = append[0] >> 24 | append[1] << 8;
961 block3[2] = append[1] >> 24 | append[2] << 8;
962 block3[3] = append[2] >> 24 | append[3] << 8;
966 block3[0] = block3[0] | append[0] << 16;
967 block3[1] = append[0] >> 16 | append[1] << 16;
968 block3[2] = append[1] >> 16 | append[2] << 16;
969 block3[3] = append[2] >> 16 | append[3] << 16;
973 block3[0] = block3[0] | append[0] << 24;
974 block3[1] = append[0] >> 8 | append[1] << 24;
975 block3[2] = append[1] >> 8 | append[2] << 24;
976 block3[3] = append[2] >> 8 | append[3] << 24;
980 block3[1] = append[0];
981 block3[2] = append[1];
982 block3[3] = append[2];
986 block3[1] = block3[1] | append[0] << 8;
987 block3[2] = append[0] >> 24 | append[1] << 8;
988 block3[3] = append[1] >> 24 | append[2] << 8;
992 block3[1] = block3[1] | append[0] << 16;
993 block3[2] = append[0] >> 16 | append[1] << 16;
994 block3[3] = append[1] >> 16 | append[2] << 16;
998 block3[1] = block3[1] | append[0] << 24;
999 block3[2] = append[0] >> 8 | append[1] << 24;
1000 block3[3] = append[1] >> 8 | append[2] << 24;
1004 block3[2] = append[0];
1005 block3[3] = append[1];
1010 static void memcat8 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append[2])
1015 block0[0] = append[0];
1016 block0[1] = append[1];
1020 block0[0] = block0[0] | append[0] << 8;
1021 block0[1] = append[0] >> 24 | append[1] << 8;
1022 block0[2] = append[1] >> 24;
1026 block0[0] = block0[0] | append[0] << 16;
1027 block0[1] = append[0] >> 16 | append[1] << 16;
1028 block0[2] = append[1] >> 16;
1032 block0[0] = block0[0] | append[0] << 24;
1033 block0[1] = append[0] >> 8 | append[1] << 24;
1034 block0[2] = append[1] >> 8;
1038 block0[1] = append[0];
1039 block0[2] = append[1];
1043 block0[1] = block0[1] | append[0] << 8;
1044 block0[2] = append[0] >> 24 | append[1] << 8;
1045 block0[3] = append[1] >> 24;
1049 block0[1] = block0[1] | append[0] << 16;
1050 block0[2] = append[0] >> 16 | append[1] << 16;
1051 block0[3] = append[1] >> 16;
1055 block0[1] = block0[1] | append[0] << 24;
1056 block0[2] = append[0] >> 8 | append[1] << 24;
1057 block0[3] = append[1] >> 8;
1061 block0[2] = append[0];
1062 block0[3] = append[1];
1066 block0[2] = block0[2] | append[0] << 8;
1067 block0[3] = append[0] >> 24 | append[1] << 8;
1068 block1[0] = append[1] >> 24;
1072 block0[2] = block0[2] | append[0] << 16;
1073 block0[3] = append[0] >> 16 | append[1] << 16;
1074 block1[0] = append[1] >> 16;
1078 block0[2] = block0[2] | append[0] << 24;
1079 block0[3] = append[0] >> 8 | append[1] << 24;
1080 block1[0] = append[1] >> 8;
1084 block0[3] = append[0];
1085 block1[0] = append[1];
1089 block0[3] = block0[3] | append[0] << 8;
1090 block1[0] = append[0] >> 24 | append[1] << 8;
1091 block1[1] = append[1] >> 24;
1095 block0[3] = block0[3] | append[0] << 16;
1096 block1[0] = append[0] >> 16 | append[1] << 16;
1097 block1[1] = append[1] >> 16;
1101 block0[3] = block0[3] | append[0] << 24;
1102 block1[0] = append[0] >> 8 | append[1] << 24;
1103 block1[1] = append[1] >> 8;
1107 block1[0] = append[0];
1108 block1[1] = append[1];
1112 block1[0] = block1[0] | append[0] << 8;
1113 block1[1] = append[0] >> 24 | append[1] << 8;
1114 block1[2] = append[1] >> 24;
1118 block1[0] = block1[0] | append[0] << 16;
1119 block1[1] = append[0] >> 16 | append[1] << 16;
1120 block1[2] = append[1] >> 16;
1124 block1[0] = block1[0] | append[0] << 24;
1125 block1[1] = append[0] >> 8 | append[1] << 24;
1126 block1[2] = append[1] >> 8;
1130 block1[1] = append[0];
1131 block1[2] = append[1];
1135 block1[1] = block1[1] | append[0] << 8;
1136 block1[2] = append[0] >> 24 | append[1] << 8;
1137 block1[3] = append[1] >> 24;
1141 block1[1] = block1[1] | append[0] << 16;
1142 block1[2] = append[0] >> 16 | append[1] << 16;
1143 block1[3] = append[1] >> 16;
1147 block1[1] = block1[1] | append[0] << 24;
1148 block1[2] = append[0] >> 8 | append[1] << 24;
1149 block1[3] = append[1] >> 8;
1153 block1[2] = append[0];
1154 block1[3] = append[1];
1158 block1[2] = block1[2] | append[0] << 8;
1159 block1[3] = append[0] >> 24 | append[1] << 8;
1160 block2[0] = append[1] >> 24;
1164 block1[2] = block1[2] | append[0] << 16;
1165 block1[3] = append[0] >> 16 | append[1] << 16;
1166 block2[0] = append[1] >> 16;
1170 block1[2] = block1[2] | append[0] << 24;
1171 block1[3] = append[0] >> 8 | append[1] << 24;
1172 block2[0] = append[1] >> 8;
1176 block1[3] = append[0];
1177 block2[0] = append[1];
1181 block1[3] = block1[3] | append[0] << 8;
1182 block2[0] = append[0] >> 24 | append[1] << 8;
1183 block2[1] = append[1] >> 24;
1187 block1[3] = block1[3] | append[0] << 16;
1188 block2[0] = append[0] >> 16 | append[1] << 16;
1189 block2[1] = append[1] >> 16;
1193 block1[3] = block1[3] | append[0] << 24;
1194 block2[0] = append[0] >> 8 | append[1] << 24;
1195 block2[1] = append[1] >> 8;
1199 block2[0] = append[0];
1200 block2[1] = append[1];
1204 block2[0] = block2[0] | append[0] << 8;
1205 block2[1] = append[0] >> 24 | append[1] << 8;
1206 block2[2] = append[1] >> 24;
1210 block2[0] = block2[0] | append[0] << 16;
1211 block2[1] = append[0] >> 16 | append[1] << 16;
1212 block2[2] = append[1] >> 16;
1216 block2[0] = block2[0] | append[0] << 24;
1217 block2[1] = append[0] >> 8 | append[1] << 24;
1218 block2[2] = append[1] >> 8;
1222 block2[1] = append[0];
1223 block2[2] = append[1];
1227 block2[1] = block2[1] | append[0] << 8;
1228 block2[2] = append[0] >> 24 | append[1] << 8;
1229 block2[3] = append[1] >> 24;
1233 block2[1] = block2[1] | append[0] << 16;
1234 block2[2] = append[0] >> 16 | append[1] << 16;
1235 block2[3] = append[1] >> 16;
1239 block2[1] = block2[1] | append[0] << 24;
1240 block2[2] = append[0] >> 8 | append[1] << 24;
1241 block2[3] = append[1] >> 8;
1245 block2[2] = append[0];
1246 block2[3] = append[1];
1250 block2[2] = block2[2] | append[0] << 8;
1251 block2[3] = append[0] >> 24 | append[1] << 8;
1252 block3[0] = append[1] >> 24;
1256 block2[2] = block2[2] | append[0] << 16;
1257 block2[3] = append[0] >> 16 | append[1] << 16;
1258 block3[0] = append[1] >> 16;
1262 block2[2] = block2[2] | append[0] << 24;
1263 block2[3] = append[0] >> 8 | append[1] << 24;
1264 block3[0] = append[1] >> 8;
1268 block2[3] = append[0];
1269 block3[0] = append[1];
1273 block2[3] = block2[3] | append[0] << 8;
1274 block3[0] = append[0] >> 24 | append[1] << 8;
1275 block3[1] = append[1] >> 24;
1279 block2[3] = block2[3] | append[0] << 16;
1280 block3[0] = append[0] >> 16 | append[1] << 16;
1281 block3[1] = append[1] >> 16;
1285 block2[3] = block2[3] | append[0] << 24;
1286 block3[0] = append[0] >> 8 | append[1] << 24;
1287 block3[1] = append[1] >> 8;
1291 block3[0] = append[0];
1292 block3[1] = append[1];
1296 block3[0] = block3[0] | append[0] << 8;
1297 block3[1] = append[0] >> 24 | append[1] << 8;
1298 block3[2] = append[1] >> 24;
1302 block3[0] = block3[0] | append[0] << 16;
1303 block3[1] = append[0] >> 16 | append[1] << 16;
1304 block3[2] = append[1] >> 16;
1308 block3[0] = block3[0] | append[0] << 24;
1309 block3[1] = append[0] >> 8 | append[1] << 24;
1310 block3[2] = append[1] >> 8;
1314 block3[1] = append[0];
1315 block3[2] = append[1];
1319 block3[1] = block3[1] | append[0] << 8;
1320 block3[2] = append[0] >> 24 | append[1] << 8;
1321 block3[3] = append[1] >> 24;
1325 block3[1] = block3[1] | append[0] << 16;
1326 block3[2] = append[0] >> 16 | append[1] << 16;
1327 block3[3] = append[1] >> 16;
1331 block3[1] = block3[1] | append[0] << 24;
1332 block3[2] = append[0] >> 8 | append[1] << 24;
1333 block3[3] = append[1] >> 8;
1337 block3[2] = append[0];
1338 block3[3] = append[1];
1343 static void append_1st (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append)
1352 block0[0] = block0[0] | append << 8;
1356 block0[0] = block0[0] | append << 16;
1360 block0[0] = block0[0] | append << 24;
1368 block0[1] = block0[1] | append << 8;
1372 block0[1] = block0[1] | append << 16;
1376 block0[1] = block0[1] | append << 24;
1384 block0[2] = block0[2] | append << 8;
1388 block0[2] = block0[2] | append << 16;
1392 block0[2] = block0[2] | append << 24;
1400 block0[3] = block0[3] | append << 8;
1404 block0[3] = block0[3] | append << 16;
1408 block0[3] = block0[3] | append << 24;
1416 block1[0] = block1[0] | append << 8;
1420 block1[0] = block1[0] | append << 16;
1424 block1[0] = block1[0] | append << 24;
1432 block1[1] = block1[1] | append << 8;
1436 block1[1] = block1[1] | append << 16;
1440 block1[1] = block1[1] | append << 24;
1448 block1[2] = block1[2] | append << 8;
1452 block1[2] = block1[2] | append << 16;
1456 block1[2] = block1[2] | append << 24;
1464 block1[3] = block1[3] | append << 8;
1468 block1[3] = block1[3] | append << 16;
1472 block1[3] = block1[3] | append << 24;
1480 block2[0] = block2[0] | append << 8;
1484 block2[0] = block2[0] | append << 16;
1488 block2[0] = block2[0] | append << 24;
1496 block2[1] = block2[1] | append << 8;
1500 block2[1] = block2[1] | append << 16;
1504 block2[1] = block2[1] | append << 24;
1512 block2[2] = block2[2] | append << 8;
1516 block2[2] = block2[2] | append << 16;
1520 block2[2] = block2[2] | append << 24;
1528 block2[3] = block2[3] | append << 8;
1532 block2[3] = block2[3] | append << 16;
1536 block2[3] = block2[3] | append << 24;
1544 block3[0] = block3[0] | append << 8;
1548 block3[0] = block3[0] | append << 16;
1552 block3[0] = block3[0] | append << 24;
1560 block3[1] = block3[1] | append << 8;
1564 block3[1] = block3[1] | append << 16;
1568 block3[1] = block3[1] | append << 24;
1577 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06300_init (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global md5crypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1583 const u32 gid = get_global_id (0);
1585 if (gid >= gid_max) return;
1589 w0[0] = pws[gid].i[0];
1590 w0[1] = pws[gid].i[1];
1591 w0[2] = pws[gid].i[2];
1592 w0[3] = pws[gid].i[3];
1594 const u32 pw_len = pws[gid].pw_len;
1602 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
1603 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
1605 const u32 salt_len = salt_bufs[salt_pos].salt_len;
1611 //memcat16 (block0, block1, block2, block3, block_len, w0);
1612 //block_len += pw_len;
1614 u32 block_len = pw_len;
1644 memcat8 (block0, block1, block2, block3, block_len, salt_buf);
1646 block_len += salt_len;
1648 memcat16 (block0, block1, block2, block3, block_len, w0);
1650 block_len += pw_len;
1652 append_0x80_4x4 (block0, block1, block2, block3, block_len);
1654 block3[2] = block_len * 8;
1663 md5_transform (block0, block1, block2, block3, digest);
1665 /* The password first, since that is what is most unknown */
1666 /* Then the raw salt */
1667 /* Then just as many characters of the MD5(pw,salt,pw) */
1669 //memcat16 (block0, block1, block2, block3, block_len, w);
1670 //block_len += pw_len;
1694 memcat8 (block0, block1, block2, block3, block_len, salt_buf);
1696 block_len += salt_len;
1698 truncate_block (digest, pw_len);
1700 memcat16 (block0, block1, block2, block3, block_len, digest);
1702 block_len += pw_len;
1704 /* Then something really weird... */
1706 u32 append = block0[0] & 0xFF;
1708 for (u32 j = pw_len; j; j >>= 1)
1712 append_1st (block0, block1, block2, block3, block_len, append);
1718 append_0x80_4x4 (block0, block1, block2, block3, block_len);
1720 block3[2] = block_len * 8;
1727 md5_transform (block0, block1, block2, block3, digest);
1729 tmps[gid].digest_buf[0] = digest[0];
1730 tmps[gid].digest_buf[1] = digest[1];
1731 tmps[gid].digest_buf[2] = digest[2];
1732 tmps[gid].digest_buf[3] = digest[3];
1735 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06300_loop (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global md5crypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1741 const u32 gid = get_global_id (0);
1743 if (gid >= gid_max) return;
1747 w0[0] = pws[gid].i[0];
1748 w0[1] = pws[gid].i[1];
1749 w0[2] = pws[gid].i[2];
1750 w0[3] = pws[gid].i[3];
1752 const u32 pw_len = pws[gid].pw_len;
1761 append_0x80_1x4 (w0_x80, pw_len);
1769 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
1770 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
1772 const u32 salt_len = salt_bufs[salt_pos].salt_len;
1780 digest[0] = tmps[gid].digest_buf[0];
1781 digest[1] = tmps[gid].digest_buf[1];
1782 digest[2] = tmps[gid].digest_buf[2];
1783 digest[3] = tmps[gid].digest_buf[3];
1789 /* and now, just to make sure things don't run too fast */
1821 for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++)
1834 const u32 j1 = (j & 1) ? 1 : 0;
1835 const u32 j3 = (j % 3) ? 1 : 0;
1836 const u32 j7 = (j % 7) ? 1 : 0;
1849 memcat8 (block0, block1, block2, block3, block_len, salt_buf);
1851 block_len += salt_len;
1856 memcat16 (block0, block1, block2, block3, block_len, w0);
1858 block_len += pw_len;
1861 memcat16_x80 (block0, block1, block2, block3, block_len, digest);
1867 block0[0] = digest[0];
1868 block0[1] = digest[1];
1869 block0[2] = digest[2];
1870 block0[3] = digest[3];
1876 block1[0] = salt_buf[0];
1877 block1[1] = salt_buf[1];
1879 block_len += salt_len;
1881 memcat16 (block0, block1, block2, block3, block_len, w0);
1883 block_len += pw_len;
1887 block1[0] = salt_buf[0];
1888 block1[1] = salt_buf[1];
1890 block_len += salt_len;
1899 block_len += pw_len;
1902 memcat16 (block0, block1, block2, block3, block_len, w0_x80);
1904 block_len += pw_len;
1907 block3[2] = block_len * 8;
1914 md5_transform (block0, block1, block2, block3, digest);
1917 tmps[gid].digest_buf[0] = digest[0];
1918 tmps[gid].digest_buf[1] = digest[1];
1919 tmps[gid].digest_buf[2] = digest[2];
1920 tmps[gid].digest_buf[3] = digest[3];
1923 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06300_comp (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global md5crypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1929 const u32 gid = get_global_id (0);
1931 if (gid >= gid_max) return;
1933 const u32 lid = get_local_id (0);
1939 const u32 r0 = tmps[gid].digest_buf[DGST_R0];
1940 const u32 r1 = tmps[gid].digest_buf[DGST_R1];
1941 const u32 r2 = tmps[gid].digest_buf[DGST_R2];
1942 const u32 r3 = tmps[gid].digest_buf[DGST_R3];