2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
16 #include "include/kernel_functions.c"
17 #include "types_ocl.c"
20 #define COMPARE_S "check_single_comp4.c"
21 #define COMPARE_M "check_multi_comp4.c"
23 static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4])
49 MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
50 MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
51 MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02);
52 MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03);
53 MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00);
54 MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01);
55 MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02);
56 MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03);
57 MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00);
58 MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01);
59 MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02);
60 MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03);
61 MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00);
62 MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01);
63 MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02);
64 MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03);
66 MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10);
67 MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11);
68 MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12);
69 MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13);
70 MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10);
71 MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11);
72 MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12);
73 MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13);
74 MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10);
75 MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11);
76 MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12);
77 MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13);
78 MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10);
79 MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11);
80 MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12);
81 MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13);
83 MD5_STEP (MD5_H1, a, b, c, d, w5_t, MD5C20, MD5S20);
84 MD5_STEP (MD5_H2, d, a, b, c, w8_t, MD5C21, MD5S21);
85 MD5_STEP (MD5_H1, c, d, a, b, wb_t, MD5C22, MD5S22);
86 MD5_STEP (MD5_H2, b, c, d, a, we_t, MD5C23, MD5S23);
87 MD5_STEP (MD5_H1, a, b, c, d, w1_t, MD5C24, MD5S20);
88 MD5_STEP (MD5_H2, d, a, b, c, w4_t, MD5C25, MD5S21);
89 MD5_STEP (MD5_H1, c, d, a, b, w7_t, MD5C26, MD5S22);
90 MD5_STEP (MD5_H2, b, c, d, a, wa_t, MD5C27, MD5S23);
91 MD5_STEP (MD5_H1, a, b, c, d, wd_t, MD5C28, MD5S20);
92 MD5_STEP (MD5_H2, d, a, b, c, w0_t, MD5C29, MD5S21);
93 MD5_STEP (MD5_H1, c, d, a, b, w3_t, MD5C2a, MD5S22);
94 MD5_STEP (MD5_H2, b, c, d, a, w6_t, MD5C2b, MD5S23);
95 MD5_STEP (MD5_H1, a, b, c, d, w9_t, MD5C2c, MD5S20);
96 MD5_STEP (MD5_H2, d, a, b, c, wc_t, MD5C2d, MD5S21);
97 MD5_STEP (MD5_H1, c, d, a, b, wf_t, MD5C2e, MD5S22);
98 MD5_STEP (MD5_H2, b, c, d, a, w2_t, MD5C2f, MD5S23);
100 MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30);
101 MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31);
102 MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32);
103 MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33);
104 MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30);
105 MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31);
106 MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32);
107 MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33);
108 MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30);
109 MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31);
110 MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32);
111 MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33);
112 MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30);
113 MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
114 MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
115 MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
123 static void memcat16 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append[4])
128 block0[0] = append[0];
129 block0[1] = append[1];
130 block0[2] = append[2];
131 block0[3] = append[3];
135 block0[0] = block0[0] | append[0] << 8;
136 block0[1] = append[0] >> 24 | append[1] << 8;
137 block0[2] = append[1] >> 24 | append[2] << 8;
138 block0[3] = append[2] >> 24 | append[3] << 8;
139 block1[0] = append[3] >> 24;
143 block0[0] = block0[0] | append[0] << 16;
144 block0[1] = append[0] >> 16 | append[1] << 16;
145 block0[2] = append[1] >> 16 | append[2] << 16;
146 block0[3] = append[2] >> 16 | append[3] << 16;
147 block1[0] = append[3] >> 16;
151 block0[0] = block0[0] | append[0] << 24;
152 block0[1] = append[0] >> 8 | append[1] << 24;
153 block0[2] = append[1] >> 8 | append[2] << 24;
154 block0[3] = append[2] >> 8 | append[3] << 24;
155 block1[0] = append[3] >> 8;
159 block0[1] = append[0];
160 block0[2] = append[1];
161 block0[3] = append[2];
162 block1[0] = append[3];
166 block0[1] = block0[1] | append[0] << 8;
167 block0[2] = append[0] >> 24 | append[1] << 8;
168 block0[3] = append[1] >> 24 | append[2] << 8;
169 block1[0] = append[2] >> 24 | append[3] << 8;
170 block1[1] = append[3] >> 24;
174 block0[1] = block0[1] | append[0] << 16;
175 block0[2] = append[0] >> 16 | append[1] << 16;
176 block0[3] = append[1] >> 16 | append[2] << 16;
177 block1[0] = append[2] >> 16 | append[3] << 16;
178 block1[1] = append[3] >> 16;
182 block0[1] = block0[1] | append[0] << 24;
183 block0[2] = append[0] >> 8 | append[1] << 24;
184 block0[3] = append[1] >> 8 | append[2] << 24;
185 block1[0] = append[2] >> 8 | append[3] << 24;
186 block1[1] = append[3] >> 8;
190 block0[2] = append[0];
191 block0[3] = append[1];
192 block1[0] = append[2];
193 block1[1] = append[3];
197 block0[2] = block0[2] | append[0] << 8;
198 block0[3] = append[0] >> 24 | append[1] << 8;
199 block1[0] = append[1] >> 24 | append[2] << 8;
200 block1[1] = append[2] >> 24 | append[3] << 8;
201 block1[2] = append[3] >> 24;
205 block0[2] = block0[2] | append[0] << 16;
206 block0[3] = append[0] >> 16 | append[1] << 16;
207 block1[0] = append[1] >> 16 | append[2] << 16;
208 block1[1] = append[2] >> 16 | append[3] << 16;
209 block1[2] = append[3] >> 16;
213 block0[2] = block0[2] | append[0] << 24;
214 block0[3] = append[0] >> 8 | append[1] << 24;
215 block1[0] = append[1] >> 8 | append[2] << 24;
216 block1[1] = append[2] >> 8 | append[3] << 24;
217 block1[2] = append[3] >> 8;
221 block0[3] = append[0];
222 block1[0] = append[1];
223 block1[1] = append[2];
224 block1[2] = append[3];
228 block0[3] = block0[3] | append[0] << 8;
229 block1[0] = append[0] >> 24 | append[1] << 8;
230 block1[1] = append[1] >> 24 | append[2] << 8;
231 block1[2] = append[2] >> 24 | append[3] << 8;
232 block1[3] = append[3] >> 24;
236 block0[3] = block0[3] | append[0] << 16;
237 block1[0] = append[0] >> 16 | append[1] << 16;
238 block1[1] = append[1] >> 16 | append[2] << 16;
239 block1[2] = append[2] >> 16 | append[3] << 16;
240 block1[3] = append[3] >> 16;
244 block0[3] = block0[3] | append[0] << 24;
245 block1[0] = append[0] >> 8 | append[1] << 24;
246 block1[1] = append[1] >> 8 | append[2] << 24;
247 block1[2] = append[2] >> 8 | append[3] << 24;
248 block1[3] = append[3] >> 8;
252 block1[0] = append[0];
253 block1[1] = append[1];
254 block1[2] = append[2];
255 block1[3] = append[3];
259 block1[0] = block1[0] | append[0] << 8;
260 block1[1] = append[0] >> 24 | append[1] << 8;
261 block1[2] = append[1] >> 24 | append[2] << 8;
262 block1[3] = append[2] >> 24 | append[3] << 8;
263 block2[0] = append[3] >> 24;
267 block1[0] = block1[0] | append[0] << 16;
268 block1[1] = append[0] >> 16 | append[1] << 16;
269 block1[2] = append[1] >> 16 | append[2] << 16;
270 block1[3] = append[2] >> 16 | append[3] << 16;
271 block2[0] = append[3] >> 16;
275 block1[0] = block1[0] | append[0] << 24;
276 block1[1] = append[0] >> 8 | append[1] << 24;
277 block1[2] = append[1] >> 8 | append[2] << 24;
278 block1[3] = append[2] >> 8 | append[3] << 24;
279 block2[0] = append[3] >> 8;
283 block1[1] = append[0];
284 block1[2] = append[1];
285 block1[3] = append[2];
286 block2[0] = append[3];
290 block1[1] = block1[1] | append[0] << 8;
291 block1[2] = append[0] >> 24 | append[1] << 8;
292 block1[3] = append[1] >> 24 | append[2] << 8;
293 block2[0] = append[2] >> 24 | append[3] << 8;
294 block2[1] = append[3] >> 24;
298 block1[1] = block1[1] | append[0] << 16;
299 block1[2] = append[0] >> 16 | append[1] << 16;
300 block1[3] = append[1] >> 16 | append[2] << 16;
301 block2[0] = append[2] >> 16 | append[3] << 16;
302 block2[1] = append[3] >> 16;
306 block1[1] = block1[1] | append[0] << 24;
307 block1[2] = append[0] >> 8 | append[1] << 24;
308 block1[3] = append[1] >> 8 | append[2] << 24;
309 block2[0] = append[2] >> 8 | append[3] << 24;
310 block2[1] = append[3] >> 8;
314 block1[2] = append[0];
315 block1[3] = append[1];
316 block2[0] = append[2];
317 block2[1] = append[3];
321 block1[2] = block1[2] | append[0] << 8;
322 block1[3] = append[0] >> 24 | append[1] << 8;
323 block2[0] = append[1] >> 24 | append[2] << 8;
324 block2[1] = append[2] >> 24 | append[3] << 8;
325 block2[2] = append[3] >> 24;
329 block1[2] = block1[2] | append[0] << 16;
330 block1[3] = append[0] >> 16 | append[1] << 16;
331 block2[0] = append[1] >> 16 | append[2] << 16;
332 block2[1] = append[2] >> 16 | append[3] << 16;
333 block2[2] = append[3] >> 16;
337 block1[2] = block1[2] | append[0] << 24;
338 block1[3] = append[0] >> 8 | append[1] << 24;
339 block2[0] = append[1] >> 8 | append[2] << 24;
340 block2[1] = append[2] >> 8 | append[3] << 24;
341 block2[2] = append[3] >> 8;
345 block1[3] = append[0];
346 block2[0] = append[1];
347 block2[1] = append[2];
348 block2[2] = append[3];
352 block1[3] = block1[3] | append[0] << 8;
353 block2[0] = append[0] >> 24 | append[1] << 8;
354 block2[1] = append[1] >> 24 | append[2] << 8;
355 block2[2] = append[2] >> 24 | append[3] << 8;
356 block2[3] = append[3] >> 24;
360 block1[3] = block1[3] | append[0] << 16;
361 block2[0] = append[0] >> 16 | append[1] << 16;
362 block2[1] = append[1] >> 16 | append[2] << 16;
363 block2[2] = append[2] >> 16 | append[3] << 16;
364 block2[3] = append[3] >> 16;
368 block1[3] = block1[3] | append[0] << 24;
369 block2[0] = append[0] >> 8 | append[1] << 24;
370 block2[1] = append[1] >> 8 | append[2] << 24;
371 block2[2] = append[2] >> 8 | append[3] << 24;
372 block2[3] = append[3] >> 8;
376 block2[0] = append[0];
377 block2[1] = append[1];
378 block2[2] = append[2];
379 block2[3] = append[3];
383 block2[0] = block2[0] | append[0] << 8;
384 block2[1] = append[0] >> 24 | append[1] << 8;
385 block2[2] = append[1] >> 24 | append[2] << 8;
386 block2[3] = append[2] >> 24 | append[3] << 8;
387 block3[0] = append[3] >> 24;
391 block2[0] = block2[0] | append[0] << 16;
392 block2[1] = append[0] >> 16 | append[1] << 16;
393 block2[2] = append[1] >> 16 | append[2] << 16;
394 block2[3] = append[2] >> 16 | append[3] << 16;
395 block3[0] = append[3] >> 16;
399 block2[0] = block2[0] | append[0] << 24;
400 block2[1] = append[0] >> 8 | append[1] << 24;
401 block2[2] = append[1] >> 8 | append[2] << 24;
402 block2[3] = append[2] >> 8 | append[3] << 24;
403 block3[0] = append[3] >> 8;
407 block2[1] = append[0];
408 block2[2] = append[1];
409 block2[3] = append[2];
410 block3[0] = append[3];
414 block2[1] = block2[1] | append[0] << 8;
415 block2[2] = append[0] >> 24 | append[1] << 8;
416 block2[3] = append[1] >> 24 | append[2] << 8;
417 block3[0] = append[2] >> 24 | append[3] << 8;
418 block3[1] = append[3] >> 24;
422 block2[1] = block2[1] | append[0] << 16;
423 block2[2] = append[0] >> 16 | append[1] << 16;
424 block2[3] = append[1] >> 16 | append[2] << 16;
425 block3[0] = append[2] >> 16 | append[3] << 16;
426 block3[1] = append[3] >> 16;
430 block2[1] = block2[1] | append[0] << 24;
431 block2[2] = append[0] >> 8 | append[1] << 24;
432 block2[3] = append[1] >> 8 | append[2] << 24;
433 block3[0] = append[2] >> 8 | append[3] << 24;
434 block3[1] = append[3] >> 8;
438 block2[2] = append[0];
439 block2[3] = append[1];
440 block3[0] = append[2];
441 block3[1] = append[3];
445 block2[2] = block2[2] | append[0] << 8;
446 block2[3] = append[0] >> 24 | append[1] << 8;
447 block3[0] = append[1] >> 24 | append[2] << 8;
448 block3[1] = append[2] >> 24 | append[3] << 8;
449 block3[2] = append[3] >> 24;
453 block2[2] = block2[2] | append[0] << 16;
454 block2[3] = append[0] >> 16 | append[1] << 16;
455 block3[0] = append[1] >> 16 | append[2] << 16;
456 block3[1] = append[2] >> 16 | append[3] << 16;
457 block3[2] = append[3] >> 16;
461 block2[2] = block2[2] | append[0] << 24;
462 block2[3] = append[0] >> 8 | append[1] << 24;
463 block3[0] = append[1] >> 8 | append[2] << 24;
464 block3[1] = append[2] >> 8 | append[3] << 24;
465 block3[2] = append[3] >> 8;
469 block2[3] = append[0];
470 block3[0] = append[1];
471 block3[1] = append[2];
472 block3[2] = append[3];
476 block2[3] = block2[3] | append[0] << 8;
477 block3[0] = append[0] >> 24 | append[1] << 8;
478 block3[1] = append[1] >> 24 | append[2] << 8;
479 block3[2] = append[2] >> 24 | append[3] << 8;
480 block3[3] = append[3] >> 24;
484 block2[3] = block2[3] | append[0] << 16;
485 block3[0] = append[0] >> 16 | append[1] << 16;
486 block3[1] = append[1] >> 16 | append[2] << 16;
487 block3[2] = append[2] >> 16 | append[3] << 16;
488 block3[3] = append[3] >> 16;
492 block2[3] = block2[3] | append[0] << 24;
493 block3[0] = append[0] >> 8 | append[1] << 24;
494 block3[1] = append[1] >> 8 | append[2] << 24;
495 block3[2] = append[2] >> 8 | append[3] << 24;
496 block3[3] = append[3] >> 8;
500 block3[0] = append[0];
501 block3[1] = append[1];
502 block3[2] = append[2];
503 block3[3] = append[3];
507 block3[0] = block3[0] | append[0] << 8;
508 block3[1] = append[0] >> 24 | append[1] << 8;
509 block3[2] = append[1] >> 24 | append[2] << 8;
510 block3[3] = append[2] >> 24 | append[3] << 8;
514 block3[0] = block3[0] | append[0] << 16;
515 block3[1] = append[0] >> 16 | append[1] << 16;
516 block3[2] = append[1] >> 16 | append[2] << 16;
517 block3[3] = append[2] >> 16 | append[3] << 16;
521 block3[0] = block3[0] | append[0] << 24;
522 block3[1] = append[0] >> 8 | append[1] << 24;
523 block3[2] = append[1] >> 8 | append[2] << 24;
524 block3[3] = append[2] >> 8 | append[3] << 24;
528 block3[1] = append[0];
529 block3[2] = append[1];
530 block3[3] = append[2];
534 block3[1] = block3[1] | append[0] << 8;
535 block3[2] = append[0] >> 24 | append[1] << 8;
536 block3[3] = append[1] >> 24 | append[2] << 8;
540 block3[1] = block3[1] | append[0] << 16;
541 block3[2] = append[0] >> 16 | append[1] << 16;
542 block3[3] = append[1] >> 16 | append[2] << 16;
546 block3[1] = block3[1] | append[0] << 24;
547 block3[2] = append[0] >> 8 | append[1] << 24;
548 block3[3] = append[1] >> 8 | append[2] << 24;
552 block3[2] = append[0];
553 block3[3] = append[1];
558 static void memcat16_x80 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append[4])
563 block0[0] = append[0];
564 block0[1] = append[1];
565 block0[2] = append[2];
566 block0[3] = append[3];
571 block0[0] = block0[0] | append[0] << 8;
572 block0[1] = append[0] >> 24 | append[1] << 8;
573 block0[2] = append[1] >> 24 | append[2] << 8;
574 block0[3] = append[2] >> 24 | append[3] << 8;
575 block1[0] = append[3] >> 24 | 0x80u << 8;
579 block0[0] = block0[0] | append[0] << 16;
580 block0[1] = append[0] >> 16 | append[1] << 16;
581 block0[2] = append[1] >> 16 | append[2] << 16;
582 block0[3] = append[2] >> 16 | append[3] << 16;
583 block1[0] = append[3] >> 16 | 0x80u << 16;
587 block0[0] = block0[0] | append[0] << 24;
588 block0[1] = append[0] >> 8 | append[1] << 24;
589 block0[2] = append[1] >> 8 | append[2] << 24;
590 block0[3] = append[2] >> 8 | append[3] << 24;
591 block1[0] = append[3] >> 8 | 0x80u << 24;
595 block0[1] = append[0];
596 block0[2] = append[1];
597 block0[3] = append[2];
598 block1[0] = append[3];
603 block0[1] = block0[1] | append[0] << 8;
604 block0[2] = append[0] >> 24 | append[1] << 8;
605 block0[3] = append[1] >> 24 | append[2] << 8;
606 block1[0] = append[2] >> 24 | append[3] << 8;
607 block1[1] = append[3] >> 24 | 0x80u << 8;
611 block0[1] = block0[1] | append[0] << 16;
612 block0[2] = append[0] >> 16 | append[1] << 16;
613 block0[3] = append[1] >> 16 | append[2] << 16;
614 block1[0] = append[2] >> 16 | append[3] << 16;
615 block1[1] = append[3] >> 16 | 0x80u << 16;
619 block0[1] = block0[1] | append[0] << 24;
620 block0[2] = append[0] >> 8 | append[1] << 24;
621 block0[3] = append[1] >> 8 | append[2] << 24;
622 block1[0] = append[2] >> 8 | append[3] << 24;
623 block1[1] = append[3] >> 8 | 0x80u << 24;
627 block0[2] = append[0];
628 block0[3] = append[1];
629 block1[0] = append[2];
630 block1[1] = append[3];
635 block0[2] = block0[2] | append[0] << 8;
636 block0[3] = append[0] >> 24 | append[1] << 8;
637 block1[0] = append[1] >> 24 | append[2] << 8;
638 block1[1] = append[2] >> 24 | append[3] << 8;
639 block1[2] = append[3] >> 24 | 0x80u << 8;
643 block0[2] = block0[2] | append[0] << 16;
644 block0[3] = append[0] >> 16 | append[1] << 16;
645 block1[0] = append[1] >> 16 | append[2] << 16;
646 block1[1] = append[2] >> 16 | append[3] << 16;
647 block1[2] = append[3] >> 16 | 0x80u << 16;
651 block0[2] = block0[2] | append[0] << 24;
652 block0[3] = append[0] >> 8 | append[1] << 24;
653 block1[0] = append[1] >> 8 | append[2] << 24;
654 block1[1] = append[2] >> 8 | append[3] << 24;
655 block1[2] = append[3] >> 8 | 0x80u << 24;
659 block0[3] = append[0];
660 block1[0] = append[1];
661 block1[1] = append[2];
662 block1[2] = append[3];
667 block0[3] = block0[3] | append[0] << 8;
668 block1[0] = append[0] >> 24 | append[1] << 8;
669 block1[1] = append[1] >> 24 | append[2] << 8;
670 block1[2] = append[2] >> 24 | append[3] << 8;
671 block1[3] = append[3] >> 24 | 0x80u << 8;
675 block0[3] = block0[3] | append[0] << 16;
676 block1[0] = append[0] >> 16 | append[1] << 16;
677 block1[1] = append[1] >> 16 | append[2] << 16;
678 block1[2] = append[2] >> 16 | append[3] << 16;
679 block1[3] = append[3] >> 16 | 0x80u << 16;
683 block0[3] = block0[3] | append[0] << 24;
684 block1[0] = append[0] >> 8 | append[1] << 24;
685 block1[1] = append[1] >> 8 | append[2] << 24;
686 block1[2] = append[2] >> 8 | append[3] << 24;
687 block1[3] = append[3] >> 8 | 0x80u << 24;
691 block1[0] = append[0];
692 block1[1] = append[1];
693 block1[2] = append[2];
694 block1[3] = append[3];
699 block1[0] = block1[0] | append[0] << 8;
700 block1[1] = append[0] >> 24 | append[1] << 8;
701 block1[2] = append[1] >> 24 | append[2] << 8;
702 block1[3] = append[2] >> 24 | append[3] << 8;
703 block2[0] = append[3] >> 24 | 0x80u << 8;
707 block1[0] = block1[0] | append[0] << 16;
708 block1[1] = append[0] >> 16 | append[1] << 16;
709 block1[2] = append[1] >> 16 | append[2] << 16;
710 block1[3] = append[2] >> 16 | append[3] << 16;
711 block2[0] = append[3] >> 16 | 0x80u << 16;
715 block1[0] = block1[0] | append[0] << 24;
716 block1[1] = append[0] >> 8 | append[1] << 24;
717 block1[2] = append[1] >> 8 | append[2] << 24;
718 block1[3] = append[2] >> 8 | append[3] << 24;
719 block2[0] = append[3] >> 8 | 0x80u << 24;
723 block1[1] = append[0];
724 block1[2] = append[1];
725 block1[3] = append[2];
726 block2[0] = append[3];
731 block1[1] = block1[1] | append[0] << 8;
732 block1[2] = append[0] >> 24 | append[1] << 8;
733 block1[3] = append[1] >> 24 | append[2] << 8;
734 block2[0] = append[2] >> 24 | append[3] << 8;
735 block2[1] = append[3] >> 24 | 0x80u << 8;
739 block1[1] = block1[1] | append[0] << 16;
740 block1[2] = append[0] >> 16 | append[1] << 16;
741 block1[3] = append[1] >> 16 | append[2] << 16;
742 block2[0] = append[2] >> 16 | append[3] << 16;
743 block2[1] = append[3] >> 16 | 0x80u << 16;
747 block1[1] = block1[1] | append[0] << 24;
748 block1[2] = append[0] >> 8 | append[1] << 24;
749 block1[3] = append[1] >> 8 | append[2] << 24;
750 block2[0] = append[2] >> 8 | append[3] << 24;
751 block2[1] = append[3] >> 8 | 0x80u << 24;
755 block1[2] = append[0];
756 block1[3] = append[1];
757 block2[0] = append[2];
758 block2[1] = append[3];
763 block1[2] = block1[2] | append[0] << 8;
764 block1[3] = append[0] >> 24 | append[1] << 8;
765 block2[0] = append[1] >> 24 | append[2] << 8;
766 block2[1] = append[2] >> 24 | append[3] << 8;
767 block2[2] = append[3] >> 24 | 0x80u << 8;
771 block1[2] = block1[2] | append[0] << 16;
772 block1[3] = append[0] >> 16 | append[1] << 16;
773 block2[0] = append[1] >> 16 | append[2] << 16;
774 block2[1] = append[2] >> 16 | append[3] << 16;
775 block2[2] = append[3] >> 16 | 0x80u << 16;
779 block1[2] = block1[2] | append[0] << 24;
780 block1[3] = append[0] >> 8 | append[1] << 24;
781 block2[0] = append[1] >> 8 | append[2] << 24;
782 block2[1] = append[2] >> 8 | append[3] << 24;
783 block2[2] = append[3] >> 8 | 0x80u << 24;
787 block1[3] = append[0];
788 block2[0] = append[1];
789 block2[1] = append[2];
790 block2[2] = append[3];
795 block1[3] = block1[3] | append[0] << 8;
796 block2[0] = append[0] >> 24 | append[1] << 8;
797 block2[1] = append[1] >> 24 | append[2] << 8;
798 block2[2] = append[2] >> 24 | append[3] << 8;
799 block2[3] = append[3] >> 24 | 0x80u << 8;
803 block1[3] = block1[3] | append[0] << 16;
804 block2[0] = append[0] >> 16 | append[1] << 16;
805 block2[1] = append[1] >> 16 | append[2] << 16;
806 block2[2] = append[2] >> 16 | append[3] << 16;
807 block2[3] = append[3] >> 16 | 0x80u << 16;
811 block1[3] = block1[3] | append[0] << 24;
812 block2[0] = append[0] >> 8 | append[1] << 24;
813 block2[1] = append[1] >> 8 | append[2] << 24;
814 block2[2] = append[2] >> 8 | append[3] << 24;
815 block2[3] = append[3] >> 8 | 0x80u << 24;
819 block2[0] = append[0];
820 block2[1] = append[1];
821 block2[2] = append[2];
822 block2[3] = append[3];
827 block2[0] = block2[0] | append[0] << 8;
828 block2[1] = append[0] >> 24 | append[1] << 8;
829 block2[2] = append[1] >> 24 | append[2] << 8;
830 block2[3] = append[2] >> 24 | append[3] << 8;
831 block3[0] = append[3] >> 24 | 0x80u << 8;
835 block2[0] = block2[0] | append[0] << 16;
836 block2[1] = append[0] >> 16 | append[1] << 16;
837 block2[2] = append[1] >> 16 | append[2] << 16;
838 block2[3] = append[2] >> 16 | append[3] << 16;
839 block3[0] = append[3] >> 16 | 0x80u << 16;
843 block2[0] = block2[0] | append[0] << 24;
844 block2[1] = append[0] >> 8 | append[1] << 24;
845 block2[2] = append[1] >> 8 | append[2] << 24;
846 block2[3] = append[2] >> 8 | append[3] << 24;
847 block3[0] = append[3] >> 8 | 0x80u << 24;
851 block2[1] = append[0];
852 block2[2] = append[1];
853 block2[3] = append[2];
854 block3[0] = append[3];
859 block2[1] = block2[1] | append[0] << 8;
860 block2[2] = append[0] >> 24 | append[1] << 8;
861 block2[3] = append[1] >> 24 | append[2] << 8;
862 block3[0] = append[2] >> 24 | append[3] << 8;
863 block3[1] = append[3] >> 24 | 0x80u << 8;
867 block2[1] = block2[1] | append[0] << 16;
868 block2[2] = append[0] >> 16 | append[1] << 16;
869 block2[3] = append[1] >> 16 | append[2] << 16;
870 block3[0] = append[2] >> 16 | append[3] << 16;
871 block3[1] = append[3] >> 16 | 0x80u << 16;
875 block2[1] = block2[1] | append[0] << 24;
876 block2[2] = append[0] >> 8 | append[1] << 24;
877 block2[3] = append[1] >> 8 | append[2] << 24;
878 block3[0] = append[2] >> 8 | append[3] << 24;
879 block3[1] = append[3] >> 8 | 0x80u << 24;
883 block2[2] = append[0];
884 block2[3] = append[1];
885 block3[0] = append[2];
886 block3[1] = append[3];
891 block2[2] = block2[2] | append[0] << 8;
892 block2[3] = append[0] >> 24 | append[1] << 8;
893 block3[0] = append[1] >> 24 | append[2] << 8;
894 block3[1] = append[2] >> 24 | append[3] << 8;
895 block3[2] = append[3] >> 24 | 0x80u << 8;
899 block2[2] = block2[2] | append[0] << 16;
900 block2[3] = append[0] >> 16 | append[1] << 16;
901 block3[0] = append[1] >> 16 | append[2] << 16;
902 block3[1] = append[2] >> 16 | append[3] << 16;
903 block3[2] = append[3] >> 16 | 0x80u << 16;
907 block2[2] = block2[2] | append[0] << 24;
908 block2[3] = append[0] >> 8 | append[1] << 24;
909 block3[0] = append[1] >> 8 | append[2] << 24;
910 block3[1] = append[2] >> 8 | append[3] << 24;
911 block3[2] = append[3] >> 8 | 0x80u << 24;
915 block2[3] = append[0];
916 block3[0] = append[1];
917 block3[1] = append[2];
918 block3[2] = append[3];
923 block2[3] = block2[3] | append[0] << 8;
924 block3[0] = append[0] >> 24 | append[1] << 8;
925 block3[1] = append[1] >> 24 | append[2] << 8;
926 block3[2] = append[2] >> 24 | append[3] << 8;
927 block3[3] = append[3] >> 24 | 0x80u << 8;
931 block2[3] = block2[3] | append[0] << 16;
932 block3[0] = append[0] >> 16 | append[1] << 16;
933 block3[1] = append[1] >> 16 | append[2] << 16;
934 block3[2] = append[2] >> 16 | append[3] << 16;
935 block3[3] = append[3] >> 16 | 0x80u << 16;
939 block2[3] = block2[3] | append[0] << 24;
940 block3[0] = append[0] >> 8 | append[1] << 24;
941 block3[1] = append[1] >> 8 | append[2] << 24;
942 block3[2] = append[2] >> 8 | append[3] << 24;
943 block3[3] = append[3] >> 8 | 0x80u << 24;
947 block3[0] = append[0];
948 block3[1] = append[1];
949 block3[2] = append[2];
950 block3[3] = append[3];
954 block3[0] = block3[0] | append[0] << 8;
955 block3[1] = append[0] >> 24 | append[1] << 8;
956 block3[2] = append[1] >> 24 | append[2] << 8;
957 block3[3] = append[2] >> 24 | append[3] << 8;
961 block3[0] = block3[0] | append[0] << 16;
962 block3[1] = append[0] >> 16 | append[1] << 16;
963 block3[2] = append[1] >> 16 | append[2] << 16;
964 block3[3] = append[2] >> 16 | append[3] << 16;
968 block3[0] = block3[0] | append[0] << 24;
969 block3[1] = append[0] >> 8 | append[1] << 24;
970 block3[2] = append[1] >> 8 | append[2] << 24;
971 block3[3] = append[2] >> 8 | append[3] << 24;
975 block3[1] = append[0];
976 block3[2] = append[1];
977 block3[3] = append[2];
981 block3[1] = block3[1] | append[0] << 8;
982 block3[2] = append[0] >> 24 | append[1] << 8;
983 block3[3] = append[1] >> 24 | append[2] << 8;
987 block3[1] = block3[1] | append[0] << 16;
988 block3[2] = append[0] >> 16 | append[1] << 16;
989 block3[3] = append[1] >> 16 | append[2] << 16;
993 block3[1] = block3[1] | append[0] << 24;
994 block3[2] = append[0] >> 8 | append[1] << 24;
995 block3[3] = append[1] >> 8 | append[2] << 24;
999 block3[2] = append[0];
1000 block3[3] = append[1];
1005 static void memcat8 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append[2])
1010 block0[0] = append[0];
1011 block0[1] = append[1];
1015 block0[0] = block0[0] | append[0] << 8;
1016 block0[1] = append[0] >> 24 | append[1] << 8;
1017 block0[2] = append[1] >> 24;
1021 block0[0] = block0[0] | append[0] << 16;
1022 block0[1] = append[0] >> 16 | append[1] << 16;
1023 block0[2] = append[1] >> 16;
1027 block0[0] = block0[0] | append[0] << 24;
1028 block0[1] = append[0] >> 8 | append[1] << 24;
1029 block0[2] = append[1] >> 8;
1033 block0[1] = append[0];
1034 block0[2] = append[1];
1038 block0[1] = block0[1] | append[0] << 8;
1039 block0[2] = append[0] >> 24 | append[1] << 8;
1040 block0[3] = append[1] >> 24;
1044 block0[1] = block0[1] | append[0] << 16;
1045 block0[2] = append[0] >> 16 | append[1] << 16;
1046 block0[3] = append[1] >> 16;
1050 block0[1] = block0[1] | append[0] << 24;
1051 block0[2] = append[0] >> 8 | append[1] << 24;
1052 block0[3] = append[1] >> 8;
1056 block0[2] = append[0];
1057 block0[3] = append[1];
1061 block0[2] = block0[2] | append[0] << 8;
1062 block0[3] = append[0] >> 24 | append[1] << 8;
1063 block1[0] = append[1] >> 24;
1067 block0[2] = block0[2] | append[0] << 16;
1068 block0[3] = append[0] >> 16 | append[1] << 16;
1069 block1[0] = append[1] >> 16;
1073 block0[2] = block0[2] | append[0] << 24;
1074 block0[3] = append[0] >> 8 | append[1] << 24;
1075 block1[0] = append[1] >> 8;
1079 block0[3] = append[0];
1080 block1[0] = append[1];
1084 block0[3] = block0[3] | append[0] << 8;
1085 block1[0] = append[0] >> 24 | append[1] << 8;
1086 block1[1] = append[1] >> 24;
1090 block0[3] = block0[3] | append[0] << 16;
1091 block1[0] = append[0] >> 16 | append[1] << 16;
1092 block1[1] = append[1] >> 16;
1096 block0[3] = block0[3] | append[0] << 24;
1097 block1[0] = append[0] >> 8 | append[1] << 24;
1098 block1[1] = append[1] >> 8;
1102 block1[0] = append[0];
1103 block1[1] = append[1];
1107 block1[0] = block1[0] | append[0] << 8;
1108 block1[1] = append[0] >> 24 | append[1] << 8;
1109 block1[2] = append[1] >> 24;
1113 block1[0] = block1[0] | append[0] << 16;
1114 block1[1] = append[0] >> 16 | append[1] << 16;
1115 block1[2] = append[1] >> 16;
1119 block1[0] = block1[0] | append[0] << 24;
1120 block1[1] = append[0] >> 8 | append[1] << 24;
1121 block1[2] = append[1] >> 8;
1125 block1[1] = append[0];
1126 block1[2] = append[1];
1130 block1[1] = block1[1] | append[0] << 8;
1131 block1[2] = append[0] >> 24 | append[1] << 8;
1132 block1[3] = append[1] >> 24;
1136 block1[1] = block1[1] | append[0] << 16;
1137 block1[2] = append[0] >> 16 | append[1] << 16;
1138 block1[3] = append[1] >> 16;
1142 block1[1] = block1[1] | append[0] << 24;
1143 block1[2] = append[0] >> 8 | append[1] << 24;
1144 block1[3] = append[1] >> 8;
1148 block1[2] = append[0];
1149 block1[3] = append[1];
1153 block1[2] = block1[2] | append[0] << 8;
1154 block1[3] = append[0] >> 24 | append[1] << 8;
1155 block2[0] = append[1] >> 24;
1159 block1[2] = block1[2] | append[0] << 16;
1160 block1[3] = append[0] >> 16 | append[1] << 16;
1161 block2[0] = append[1] >> 16;
1165 block1[2] = block1[2] | append[0] << 24;
1166 block1[3] = append[0] >> 8 | append[1] << 24;
1167 block2[0] = append[1] >> 8;
1171 block1[3] = append[0];
1172 block2[0] = append[1];
1176 block1[3] = block1[3] | append[0] << 8;
1177 block2[0] = append[0] >> 24 | append[1] << 8;
1178 block2[1] = append[1] >> 24;
1182 block1[3] = block1[3] | append[0] << 16;
1183 block2[0] = append[0] >> 16 | append[1] << 16;
1184 block2[1] = append[1] >> 16;
1188 block1[3] = block1[3] | append[0] << 24;
1189 block2[0] = append[0] >> 8 | append[1] << 24;
1190 block2[1] = append[1] >> 8;
1194 block2[0] = append[0];
1195 block2[1] = append[1];
1199 block2[0] = block2[0] | append[0] << 8;
1200 block2[1] = append[0] >> 24 | append[1] << 8;
1201 block2[2] = append[1] >> 24;
1205 block2[0] = block2[0] | append[0] << 16;
1206 block2[1] = append[0] >> 16 | append[1] << 16;
1207 block2[2] = append[1] >> 16;
1211 block2[0] = block2[0] | append[0] << 24;
1212 block2[1] = append[0] >> 8 | append[1] << 24;
1213 block2[2] = append[1] >> 8;
1217 block2[1] = append[0];
1218 block2[2] = append[1];
1222 block2[1] = block2[1] | append[0] << 8;
1223 block2[2] = append[0] >> 24 | append[1] << 8;
1224 block2[3] = append[1] >> 24;
1228 block2[1] = block2[1] | append[0] << 16;
1229 block2[2] = append[0] >> 16 | append[1] << 16;
1230 block2[3] = append[1] >> 16;
1234 block2[1] = block2[1] | append[0] << 24;
1235 block2[2] = append[0] >> 8 | append[1] << 24;
1236 block2[3] = append[1] >> 8;
1240 block2[2] = append[0];
1241 block2[3] = append[1];
1245 block2[2] = block2[2] | append[0] << 8;
1246 block2[3] = append[0] >> 24 | append[1] << 8;
1247 block3[0] = append[1] >> 24;
1251 block2[2] = block2[2] | append[0] << 16;
1252 block2[3] = append[0] >> 16 | append[1] << 16;
1253 block3[0] = append[1] >> 16;
1257 block2[2] = block2[2] | append[0] << 24;
1258 block2[3] = append[0] >> 8 | append[1] << 24;
1259 block3[0] = append[1] >> 8;
1263 block2[3] = append[0];
1264 block3[0] = append[1];
1268 block2[3] = block2[3] | append[0] << 8;
1269 block3[0] = append[0] >> 24 | append[1] << 8;
1270 block3[1] = append[1] >> 24;
1274 block2[3] = block2[3] | append[0] << 16;
1275 block3[0] = append[0] >> 16 | append[1] << 16;
1276 block3[1] = append[1] >> 16;
1280 block2[3] = block2[3] | append[0] << 24;
1281 block3[0] = append[0] >> 8 | append[1] << 24;
1282 block3[1] = append[1] >> 8;
1286 block3[0] = append[0];
1287 block3[1] = append[1];
1291 block3[0] = block3[0] | append[0] << 8;
1292 block3[1] = append[0] >> 24 | append[1] << 8;
1293 block3[2] = append[1] >> 24;
1297 block3[0] = block3[0] | append[0] << 16;
1298 block3[1] = append[0] >> 16 | append[1] << 16;
1299 block3[2] = append[1] >> 16;
1303 block3[0] = block3[0] | append[0] << 24;
1304 block3[1] = append[0] >> 8 | append[1] << 24;
1305 block3[2] = append[1] >> 8;
1309 block3[1] = append[0];
1310 block3[2] = append[1];
1314 block3[1] = block3[1] | append[0] << 8;
1315 block3[2] = append[0] >> 24 | append[1] << 8;
1316 block3[3] = append[1] >> 24;
1320 block3[1] = block3[1] | append[0] << 16;
1321 block3[2] = append[0] >> 16 | append[1] << 16;
1322 block3[3] = append[1] >> 16;
1326 block3[1] = block3[1] | append[0] << 24;
1327 block3[2] = append[0] >> 8 | append[1] << 24;
1328 block3[3] = append[1] >> 8;
1332 block3[2] = append[0];
1333 block3[3] = append[1];
1338 static void append_1st (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append)
1347 block0[0] = block0[0] | append << 8;
1351 block0[0] = block0[0] | append << 16;
1355 block0[0] = block0[0] | append << 24;
1363 block0[1] = block0[1] | append << 8;
1367 block0[1] = block0[1] | append << 16;
1371 block0[1] = block0[1] | append << 24;
1379 block0[2] = block0[2] | append << 8;
1383 block0[2] = block0[2] | append << 16;
1387 block0[2] = block0[2] | append << 24;
1395 block0[3] = block0[3] | append << 8;
1399 block0[3] = block0[3] | append << 16;
1403 block0[3] = block0[3] | append << 24;
1411 block1[0] = block1[0] | append << 8;
1415 block1[0] = block1[0] | append << 16;
1419 block1[0] = block1[0] | append << 24;
1427 block1[1] = block1[1] | append << 8;
1431 block1[1] = block1[1] | append << 16;
1435 block1[1] = block1[1] | append << 24;
1443 block1[2] = block1[2] | append << 8;
1447 block1[2] = block1[2] | append << 16;
1451 block1[2] = block1[2] | append << 24;
1459 block1[3] = block1[3] | append << 8;
1463 block1[3] = block1[3] | append << 16;
1467 block1[3] = block1[3] | append << 24;
1475 block2[0] = block2[0] | append << 8;
1479 block2[0] = block2[0] | append << 16;
1483 block2[0] = block2[0] | append << 24;
1491 block2[1] = block2[1] | append << 8;
1495 block2[1] = block2[1] | append << 16;
1499 block2[1] = block2[1] | append << 24;
1507 block2[2] = block2[2] | append << 8;
1511 block2[2] = block2[2] | append << 16;
1515 block2[2] = block2[2] | append << 24;
1523 block2[3] = block2[3] | append << 8;
1527 block2[3] = block2[3] | append << 16;
1531 block2[3] = block2[3] | append << 24;
1539 block3[0] = block3[0] | append << 8;
1543 block3[0] = block3[0] | append << 16;
1547 block3[0] = block3[0] | append << 24;
1555 block3[1] = block3[1] | append << 8;
1559 block3[1] = block3[1] | append << 16;
1563 block3[1] = block3[1] | append << 24;
1572 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06300_init (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global md5crypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1578 const u32 gid = get_global_id (0);
1580 if (gid >= gid_max) return;
1584 w0[0] = pws[gid].i[0];
1585 w0[1] = pws[gid].i[1];
1586 w0[2] = pws[gid].i[2];
1587 w0[3] = pws[gid].i[3];
1589 const u32 pw_len = pws[gid].pw_len;
1597 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
1598 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
1600 const u32 salt_len = salt_bufs[salt_pos].salt_len;
1606 //memcat16 (block0, block1, block2, block3, block_len, w0);
1607 //block_len += pw_len;
1609 u32 block_len = pw_len;
1639 memcat8 (block0, block1, block2, block3, block_len, salt_buf);
1641 block_len += salt_len;
1643 memcat16 (block0, block1, block2, block3, block_len, w0);
1645 block_len += pw_len;
1647 append_0x80_4x4 (block0, block1, block2, block3, block_len);
1649 block3[2] = block_len * 8;
1658 md5_transform (block0, block1, block2, block3, digest);
1660 /* The password first, since that is what is most unknown */
1661 /* Then the raw salt */
1662 /* Then just as many characters of the MD5(pw,salt,pw) */
1664 //memcat16 (block0, block1, block2, block3, block_len, w);
1665 //block_len += pw_len;
1689 memcat8 (block0, block1, block2, block3, block_len, salt_buf);
1691 block_len += salt_len;
1693 truncate_block (digest, pw_len);
1695 memcat16 (block0, block1, block2, block3, block_len, digest);
1697 block_len += pw_len;
1699 /* Then something really weird... */
1701 u32 append = block0[0] & 0xFF;
1703 for (u32 j = pw_len; j; j >>= 1)
1707 append_1st (block0, block1, block2, block3, block_len, append);
1713 append_0x80_4x4 (block0, block1, block2, block3, block_len);
1715 block3[2] = block_len * 8;
1722 md5_transform (block0, block1, block2, block3, digest);
1724 tmps[gid].digest_buf[0] = digest[0];
1725 tmps[gid].digest_buf[1] = digest[1];
1726 tmps[gid].digest_buf[2] = digest[2];
1727 tmps[gid].digest_buf[3] = digest[3];
1730 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06300_loop (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global md5crypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1736 const u32 gid = get_global_id (0);
1738 if (gid >= gid_max) return;
1742 w0[0] = pws[gid].i[0];
1743 w0[1] = pws[gid].i[1];
1744 w0[2] = pws[gid].i[2];
1745 w0[3] = pws[gid].i[3];
1747 const u32 pw_len = pws[gid].pw_len;
1756 append_0x80_1x4 (w0_x80, pw_len);
1764 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
1765 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
1767 const u32 salt_len = salt_bufs[salt_pos].salt_len;
1775 digest[0] = tmps[gid].digest_buf[0];
1776 digest[1] = tmps[gid].digest_buf[1];
1777 digest[2] = tmps[gid].digest_buf[2];
1778 digest[3] = tmps[gid].digest_buf[3];
1784 /* and now, just to make sure things don't run too fast */
1816 for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++)
1829 const u32 j1 = (j & 1) ? 1 : 0;
1830 const u32 j3 = (j % 3) ? 1 : 0;
1831 const u32 j7 = (j % 7) ? 1 : 0;
1844 memcat8 (block0, block1, block2, block3, block_len, salt_buf);
1846 block_len += salt_len;
1851 memcat16 (block0, block1, block2, block3, block_len, w0);
1853 block_len += pw_len;
1856 memcat16_x80 (block0, block1, block2, block3, block_len, digest);
1862 block0[0] = digest[0];
1863 block0[1] = digest[1];
1864 block0[2] = digest[2];
1865 block0[3] = digest[3];
1871 block1[0] = salt_buf[0];
1872 block1[1] = salt_buf[1];
1874 block_len += salt_len;
1876 memcat16 (block0, block1, block2, block3, block_len, w0);
1878 block_len += pw_len;
1882 block1[0] = salt_buf[0];
1883 block1[1] = salt_buf[1];
1885 block_len += salt_len;
1894 block_len += pw_len;
1897 memcat16 (block0, block1, block2, block3, block_len, w0_x80);
1899 block_len += pw_len;
1902 block3[2] = block_len * 8;
1909 md5_transform (block0, block1, block2, block3, digest);
1912 tmps[gid].digest_buf[0] = digest[0];
1913 tmps[gid].digest_buf[1] = digest[1];
1914 tmps[gid].digest_buf[2] = digest[2];
1915 tmps[gid].digest_buf[3] = digest[3];
1918 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06300_comp (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global md5crypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1924 const u32 gid = get_global_id (0);
1926 if (gid >= gid_max) return;
1928 const u32 lid = get_local_id (0);
1934 const u32 r0 = tmps[gid].digest_buf[DGST_R0];
1935 const u32 r1 = tmps[gid].digest_buf[DGST_R1];
1936 const u32 r2 = tmps[gid].digest_buf[DGST_R2];
1937 const u32 r3 = tmps[gid].digest_buf[DGST_R3];