2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
16 #include "include/kernel_functions.c"
17 #include "types_ocl.c"
21 #define COMPARE_M "check_multi_vect1_comp4.c"
25 #define COMPARE_M "check_multi_vect4_comp4.c"
28 #define md5apr1_magic0 0x72706124
29 #define md5apr1_magic1 0x00002431
31 static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4])
57 MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
58 MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
59 MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02);
60 MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03);
61 MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00);
62 MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01);
63 MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02);
64 MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03);
65 MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00);
66 MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01);
67 MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02);
68 MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03);
69 MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00);
70 MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01);
71 MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02);
72 MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03);
74 MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10);
75 MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11);
76 MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12);
77 MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13);
78 MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10);
79 MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11);
80 MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12);
81 MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13);
82 MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10);
83 MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11);
84 MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12);
85 MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13);
86 MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10);
87 MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11);
88 MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12);
89 MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13);
91 MD5_STEP (MD5_H1, a, b, c, d, w5_t, MD5C20, MD5S20);
92 MD5_STEP (MD5_H2, d, a, b, c, w8_t, MD5C21, MD5S21);
93 MD5_STEP (MD5_H1, c, d, a, b, wb_t, MD5C22, MD5S22);
94 MD5_STEP (MD5_H2, b, c, d, a, we_t, MD5C23, MD5S23);
95 MD5_STEP (MD5_H1, a, b, c, d, w1_t, MD5C24, MD5S20);
96 MD5_STEP (MD5_H2, d, a, b, c, w4_t, MD5C25, MD5S21);
97 MD5_STEP (MD5_H1, c, d, a, b, w7_t, MD5C26, MD5S22);
98 MD5_STEP (MD5_H2, b, c, d, a, wa_t, MD5C27, MD5S23);
99 MD5_STEP (MD5_H1, a, b, c, d, wd_t, MD5C28, MD5S20);
100 MD5_STEP (MD5_H2, d, a, b, c, w0_t, MD5C29, MD5S21);
101 MD5_STEP (MD5_H1, c, d, a, b, w3_t, MD5C2a, MD5S22);
102 MD5_STEP (MD5_H2, b, c, d, a, w6_t, MD5C2b, MD5S23);
103 MD5_STEP (MD5_H1, a, b, c, d, w9_t, MD5C2c, MD5S20);
104 MD5_STEP (MD5_H2, d, a, b, c, wc_t, MD5C2d, MD5S21);
105 MD5_STEP (MD5_H1, c, d, a, b, wf_t, MD5C2e, MD5S22);
106 MD5_STEP (MD5_H2, b, c, d, a, w2_t, MD5C2f, MD5S23);
108 MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30);
109 MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31);
110 MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32);
111 MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33);
112 MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30);
113 MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31);
114 MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32);
115 MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33);
116 MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30);
117 MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31);
118 MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32);
119 MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33);
120 MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30);
121 MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
122 MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
123 MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
131 static void memcat16 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append[4])
136 block0[0] = append[0];
137 block0[1] = append[1];
138 block0[2] = append[2];
139 block0[3] = append[3];
143 block0[0] = block0[0] | append[0] << 8;
144 block0[1] = append[0] >> 24 | append[1] << 8;
145 block0[2] = append[1] >> 24 | append[2] << 8;
146 block0[3] = append[2] >> 24 | append[3] << 8;
147 block1[0] = append[3] >> 24;
151 block0[0] = block0[0] | append[0] << 16;
152 block0[1] = append[0] >> 16 | append[1] << 16;
153 block0[2] = append[1] >> 16 | append[2] << 16;
154 block0[3] = append[2] >> 16 | append[3] << 16;
155 block1[0] = append[3] >> 16;
159 block0[0] = block0[0] | append[0] << 24;
160 block0[1] = append[0] >> 8 | append[1] << 24;
161 block0[2] = append[1] >> 8 | append[2] << 24;
162 block0[3] = append[2] >> 8 | append[3] << 24;
163 block1[0] = append[3] >> 8;
167 block0[1] = append[0];
168 block0[2] = append[1];
169 block0[3] = append[2];
170 block1[0] = append[3];
174 block0[1] = block0[1] | append[0] << 8;
175 block0[2] = append[0] >> 24 | append[1] << 8;
176 block0[3] = append[1] >> 24 | append[2] << 8;
177 block1[0] = append[2] >> 24 | append[3] << 8;
178 block1[1] = append[3] >> 24;
182 block0[1] = block0[1] | append[0] << 16;
183 block0[2] = append[0] >> 16 | append[1] << 16;
184 block0[3] = append[1] >> 16 | append[2] << 16;
185 block1[0] = append[2] >> 16 | append[3] << 16;
186 block1[1] = append[3] >> 16;
190 block0[1] = block0[1] | append[0] << 24;
191 block0[2] = append[0] >> 8 | append[1] << 24;
192 block0[3] = append[1] >> 8 | append[2] << 24;
193 block1[0] = append[2] >> 8 | append[3] << 24;
194 block1[1] = append[3] >> 8;
198 block0[2] = append[0];
199 block0[3] = append[1];
200 block1[0] = append[2];
201 block1[1] = append[3];
205 block0[2] = block0[2] | append[0] << 8;
206 block0[3] = append[0] >> 24 | append[1] << 8;
207 block1[0] = append[1] >> 24 | append[2] << 8;
208 block1[1] = append[2] >> 24 | append[3] << 8;
209 block1[2] = append[3] >> 24;
213 block0[2] = block0[2] | append[0] << 16;
214 block0[3] = append[0] >> 16 | append[1] << 16;
215 block1[0] = append[1] >> 16 | append[2] << 16;
216 block1[1] = append[2] >> 16 | append[3] << 16;
217 block1[2] = append[3] >> 16;
221 block0[2] = block0[2] | append[0] << 24;
222 block0[3] = append[0] >> 8 | append[1] << 24;
223 block1[0] = append[1] >> 8 | append[2] << 24;
224 block1[1] = append[2] >> 8 | append[3] << 24;
225 block1[2] = append[3] >> 8;
229 block0[3] = append[0];
230 block1[0] = append[1];
231 block1[1] = append[2];
232 block1[2] = append[3];
236 block0[3] = block0[3] | append[0] << 8;
237 block1[0] = append[0] >> 24 | append[1] << 8;
238 block1[1] = append[1] >> 24 | append[2] << 8;
239 block1[2] = append[2] >> 24 | append[3] << 8;
240 block1[3] = append[3] >> 24;
244 block0[3] = block0[3] | append[0] << 16;
245 block1[0] = append[0] >> 16 | append[1] << 16;
246 block1[1] = append[1] >> 16 | append[2] << 16;
247 block1[2] = append[2] >> 16 | append[3] << 16;
248 block1[3] = append[3] >> 16;
252 block0[3] = block0[3] | append[0] << 24;
253 block1[0] = append[0] >> 8 | append[1] << 24;
254 block1[1] = append[1] >> 8 | append[2] << 24;
255 block1[2] = append[2] >> 8 | append[3] << 24;
256 block1[3] = append[3] >> 8;
260 block1[0] = append[0];
261 block1[1] = append[1];
262 block1[2] = append[2];
263 block1[3] = append[3];
267 block1[0] = block1[0] | append[0] << 8;
268 block1[1] = append[0] >> 24 | append[1] << 8;
269 block1[2] = append[1] >> 24 | append[2] << 8;
270 block1[3] = append[2] >> 24 | append[3] << 8;
271 block2[0] = append[3] >> 24;
275 block1[0] = block1[0] | append[0] << 16;
276 block1[1] = append[0] >> 16 | append[1] << 16;
277 block1[2] = append[1] >> 16 | append[2] << 16;
278 block1[3] = append[2] >> 16 | append[3] << 16;
279 block2[0] = append[3] >> 16;
283 block1[0] = block1[0] | append[0] << 24;
284 block1[1] = append[0] >> 8 | append[1] << 24;
285 block1[2] = append[1] >> 8 | append[2] << 24;
286 block1[3] = append[2] >> 8 | append[3] << 24;
287 block2[0] = append[3] >> 8;
291 block1[1] = append[0];
292 block1[2] = append[1];
293 block1[3] = append[2];
294 block2[0] = append[3];
298 block1[1] = block1[1] | append[0] << 8;
299 block1[2] = append[0] >> 24 | append[1] << 8;
300 block1[3] = append[1] >> 24 | append[2] << 8;
301 block2[0] = append[2] >> 24 | append[3] << 8;
302 block2[1] = append[3] >> 24;
306 block1[1] = block1[1] | append[0] << 16;
307 block1[2] = append[0] >> 16 | append[1] << 16;
308 block1[3] = append[1] >> 16 | append[2] << 16;
309 block2[0] = append[2] >> 16 | append[3] << 16;
310 block2[1] = append[3] >> 16;
314 block1[1] = block1[1] | append[0] << 24;
315 block1[2] = append[0] >> 8 | append[1] << 24;
316 block1[3] = append[1] >> 8 | append[2] << 24;
317 block2[0] = append[2] >> 8 | append[3] << 24;
318 block2[1] = append[3] >> 8;
322 block1[2] = append[0];
323 block1[3] = append[1];
324 block2[0] = append[2];
325 block2[1] = append[3];
329 block1[2] = block1[2] | append[0] << 8;
330 block1[3] = append[0] >> 24 | append[1] << 8;
331 block2[0] = append[1] >> 24 | append[2] << 8;
332 block2[1] = append[2] >> 24 | append[3] << 8;
333 block2[2] = append[3] >> 24;
337 block1[2] = block1[2] | append[0] << 16;
338 block1[3] = append[0] >> 16 | append[1] << 16;
339 block2[0] = append[1] >> 16 | append[2] << 16;
340 block2[1] = append[2] >> 16 | append[3] << 16;
341 block2[2] = append[3] >> 16;
345 block1[2] = block1[2] | append[0] << 24;
346 block1[3] = append[0] >> 8 | append[1] << 24;
347 block2[0] = append[1] >> 8 | append[2] << 24;
348 block2[1] = append[2] >> 8 | append[3] << 24;
349 block2[2] = append[3] >> 8;
353 block1[3] = append[0];
354 block2[0] = append[1];
355 block2[1] = append[2];
356 block2[2] = append[3];
360 block1[3] = block1[3] | append[0] << 8;
361 block2[0] = append[0] >> 24 | append[1] << 8;
362 block2[1] = append[1] >> 24 | append[2] << 8;
363 block2[2] = append[2] >> 24 | append[3] << 8;
364 block2[3] = append[3] >> 24;
368 block1[3] = block1[3] | append[0] << 16;
369 block2[0] = append[0] >> 16 | append[1] << 16;
370 block2[1] = append[1] >> 16 | append[2] << 16;
371 block2[2] = append[2] >> 16 | append[3] << 16;
372 block2[3] = append[3] >> 16;
376 block1[3] = block1[3] | append[0] << 24;
377 block2[0] = append[0] >> 8 | append[1] << 24;
378 block2[1] = append[1] >> 8 | append[2] << 24;
379 block2[2] = append[2] >> 8 | append[3] << 24;
380 block2[3] = append[3] >> 8;
384 block2[0] = append[0];
385 block2[1] = append[1];
386 block2[2] = append[2];
387 block2[3] = append[3];
391 block2[0] = block2[0] | append[0] << 8;
392 block2[1] = append[0] >> 24 | append[1] << 8;
393 block2[2] = append[1] >> 24 | append[2] << 8;
394 block2[3] = append[2] >> 24 | append[3] << 8;
395 block3[0] = append[3] >> 24;
399 block2[0] = block2[0] | append[0] << 16;
400 block2[1] = append[0] >> 16 | append[1] << 16;
401 block2[2] = append[1] >> 16 | append[2] << 16;
402 block2[3] = append[2] >> 16 | append[3] << 16;
403 block3[0] = append[3] >> 16;
407 block2[0] = block2[0] | append[0] << 24;
408 block2[1] = append[0] >> 8 | append[1] << 24;
409 block2[2] = append[1] >> 8 | append[2] << 24;
410 block2[3] = append[2] >> 8 | append[3] << 24;
411 block3[0] = append[3] >> 8;
415 block2[1] = append[0];
416 block2[2] = append[1];
417 block2[3] = append[2];
418 block3[0] = append[3];
422 block2[1] = block2[1] | append[0] << 8;
423 block2[2] = append[0] >> 24 | append[1] << 8;
424 block2[3] = append[1] >> 24 | append[2] << 8;
425 block3[0] = append[2] >> 24 | append[3] << 8;
426 block3[1] = append[3] >> 24;
430 block2[1] = block2[1] | append[0] << 16;
431 block2[2] = append[0] >> 16 | append[1] << 16;
432 block2[3] = append[1] >> 16 | append[2] << 16;
433 block3[0] = append[2] >> 16 | append[3] << 16;
434 block3[1] = append[3] >> 16;
438 block2[1] = block2[1] | append[0] << 24;
439 block2[2] = append[0] >> 8 | append[1] << 24;
440 block2[3] = append[1] >> 8 | append[2] << 24;
441 block3[0] = append[2] >> 8 | append[3] << 24;
442 block3[1] = append[3] >> 8;
446 block2[2] = append[0];
447 block2[3] = append[1];
448 block3[0] = append[2];
449 block3[1] = append[3];
453 block2[2] = block2[2] | append[0] << 8;
454 block2[3] = append[0] >> 24 | append[1] << 8;
455 block3[0] = append[1] >> 24 | append[2] << 8;
456 block3[1] = append[2] >> 24 | append[3] << 8;
457 block3[2] = append[3] >> 24;
461 block2[2] = block2[2] | append[0] << 16;
462 block2[3] = append[0] >> 16 | append[1] << 16;
463 block3[0] = append[1] >> 16 | append[2] << 16;
464 block3[1] = append[2] >> 16 | append[3] << 16;
465 block3[2] = append[3] >> 16;
469 block2[2] = block2[2] | append[0] << 24;
470 block2[3] = append[0] >> 8 | append[1] << 24;
471 block3[0] = append[1] >> 8 | append[2] << 24;
472 block3[1] = append[2] >> 8 | append[3] << 24;
473 block3[2] = append[3] >> 8;
477 block2[3] = append[0];
478 block3[0] = append[1];
479 block3[1] = append[2];
480 block3[2] = append[3];
484 block2[3] = block2[3] | append[0] << 8;
485 block3[0] = append[0] >> 24 | append[1] << 8;
486 block3[1] = append[1] >> 24 | append[2] << 8;
487 block3[2] = append[2] >> 24 | append[3] << 8;
488 block3[3] = append[3] >> 24;
492 block2[3] = block2[3] | append[0] << 16;
493 block3[0] = append[0] >> 16 | append[1] << 16;
494 block3[1] = append[1] >> 16 | append[2] << 16;
495 block3[2] = append[2] >> 16 | append[3] << 16;
496 block3[3] = append[3] >> 16;
500 block2[3] = block2[3] | append[0] << 24;
501 block3[0] = append[0] >> 8 | append[1] << 24;
502 block3[1] = append[1] >> 8 | append[2] << 24;
503 block3[2] = append[2] >> 8 | append[3] << 24;
504 block3[3] = append[3] >> 8;
508 block3[0] = append[0];
509 block3[1] = append[1];
510 block3[2] = append[2];
511 block3[3] = append[3];
515 block3[0] = block3[0] | append[0] << 8;
516 block3[1] = append[0] >> 24 | append[1] << 8;
517 block3[2] = append[1] >> 24 | append[2] << 8;
518 block3[3] = append[2] >> 24 | append[3] << 8;
522 block3[0] = block3[0] | append[0] << 16;
523 block3[1] = append[0] >> 16 | append[1] << 16;
524 block3[2] = append[1] >> 16 | append[2] << 16;
525 block3[3] = append[2] >> 16 | append[3] << 16;
529 block3[0] = block3[0] | append[0] << 24;
530 block3[1] = append[0] >> 8 | append[1] << 24;
531 block3[2] = append[1] >> 8 | append[2] << 24;
532 block3[3] = append[2] >> 8 | append[3] << 24;
536 block3[1] = append[0];
537 block3[2] = append[1];
538 block3[3] = append[2];
542 block3[1] = block3[1] | append[0] << 8;
543 block3[2] = append[0] >> 24 | append[1] << 8;
544 block3[3] = append[1] >> 24 | append[2] << 8;
548 block3[1] = block3[1] | append[0] << 16;
549 block3[2] = append[0] >> 16 | append[1] << 16;
550 block3[3] = append[1] >> 16 | append[2] << 16;
554 block3[1] = block3[1] | append[0] << 24;
555 block3[2] = append[0] >> 8 | append[1] << 24;
556 block3[3] = append[1] >> 8 | append[2] << 24;
560 block3[2] = append[0];
561 block3[3] = append[1];
566 static void memcat16_x80 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append[4])
571 block0[0] = append[0];
572 block0[1] = append[1];
573 block0[2] = append[2];
574 block0[3] = append[3];
579 block0[0] = block0[0] | append[0] << 8;
580 block0[1] = append[0] >> 24 | append[1] << 8;
581 block0[2] = append[1] >> 24 | append[2] << 8;
582 block0[3] = append[2] >> 24 | append[3] << 8;
583 block1[0] = append[3] >> 24 | 0x80u << 8;
587 block0[0] = block0[0] | append[0] << 16;
588 block0[1] = append[0] >> 16 | append[1] << 16;
589 block0[2] = append[1] >> 16 | append[2] << 16;
590 block0[3] = append[2] >> 16 | append[3] << 16;
591 block1[0] = append[3] >> 16 | 0x80u << 16;
595 block0[0] = block0[0] | append[0] << 24;
596 block0[1] = append[0] >> 8 | append[1] << 24;
597 block0[2] = append[1] >> 8 | append[2] << 24;
598 block0[3] = append[2] >> 8 | append[3] << 24;
599 block1[0] = append[3] >> 8 | 0x80u << 24;
603 block0[1] = append[0];
604 block0[2] = append[1];
605 block0[3] = append[2];
606 block1[0] = append[3];
611 block0[1] = block0[1] | append[0] << 8;
612 block0[2] = append[0] >> 24 | append[1] << 8;
613 block0[3] = append[1] >> 24 | append[2] << 8;
614 block1[0] = append[2] >> 24 | append[3] << 8;
615 block1[1] = append[3] >> 24 | 0x80u << 8;
619 block0[1] = block0[1] | append[0] << 16;
620 block0[2] = append[0] >> 16 | append[1] << 16;
621 block0[3] = append[1] >> 16 | append[2] << 16;
622 block1[0] = append[2] >> 16 | append[3] << 16;
623 block1[1] = append[3] >> 16 | 0x80u << 16;
627 block0[1] = block0[1] | append[0] << 24;
628 block0[2] = append[0] >> 8 | append[1] << 24;
629 block0[3] = append[1] >> 8 | append[2] << 24;
630 block1[0] = append[2] >> 8 | append[3] << 24;
631 block1[1] = append[3] >> 8 | 0x80u << 24;
635 block0[2] = append[0];
636 block0[3] = append[1];
637 block1[0] = append[2];
638 block1[1] = append[3];
643 block0[2] = block0[2] | append[0] << 8;
644 block0[3] = append[0] >> 24 | append[1] << 8;
645 block1[0] = append[1] >> 24 | append[2] << 8;
646 block1[1] = append[2] >> 24 | append[3] << 8;
647 block1[2] = append[3] >> 24 | 0x80u << 8;
651 block0[2] = block0[2] | append[0] << 16;
652 block0[3] = append[0] >> 16 | append[1] << 16;
653 block1[0] = append[1] >> 16 | append[2] << 16;
654 block1[1] = append[2] >> 16 | append[3] << 16;
655 block1[2] = append[3] >> 16 | 0x80u << 16;
659 block0[2] = block0[2] | append[0] << 24;
660 block0[3] = append[0] >> 8 | append[1] << 24;
661 block1[0] = append[1] >> 8 | append[2] << 24;
662 block1[1] = append[2] >> 8 | append[3] << 24;
663 block1[2] = append[3] >> 8 | 0x80u << 24;
667 block0[3] = append[0];
668 block1[0] = append[1];
669 block1[1] = append[2];
670 block1[2] = append[3];
675 block0[3] = block0[3] | append[0] << 8;
676 block1[0] = append[0] >> 24 | append[1] << 8;
677 block1[1] = append[1] >> 24 | append[2] << 8;
678 block1[2] = append[2] >> 24 | append[3] << 8;
679 block1[3] = append[3] >> 24 | 0x80u << 8;
683 block0[3] = block0[3] | append[0] << 16;
684 block1[0] = append[0] >> 16 | append[1] << 16;
685 block1[1] = append[1] >> 16 | append[2] << 16;
686 block1[2] = append[2] >> 16 | append[3] << 16;
687 block1[3] = append[3] >> 16 | 0x80u << 16;
691 block0[3] = block0[3] | append[0] << 24;
692 block1[0] = append[0] >> 8 | append[1] << 24;
693 block1[1] = append[1] >> 8 | append[2] << 24;
694 block1[2] = append[2] >> 8 | append[3] << 24;
695 block1[3] = append[3] >> 8 | 0x80u << 24;
699 block1[0] = append[0];
700 block1[1] = append[1];
701 block1[2] = append[2];
702 block1[3] = append[3];
707 block1[0] = block1[0] | append[0] << 8;
708 block1[1] = append[0] >> 24 | append[1] << 8;
709 block1[2] = append[1] >> 24 | append[2] << 8;
710 block1[3] = append[2] >> 24 | append[3] << 8;
711 block2[0] = append[3] >> 24 | 0x80u << 8;
715 block1[0] = block1[0] | append[0] << 16;
716 block1[1] = append[0] >> 16 | append[1] << 16;
717 block1[2] = append[1] >> 16 | append[2] << 16;
718 block1[3] = append[2] >> 16 | append[3] << 16;
719 block2[0] = append[3] >> 16 | 0x80u << 16;
723 block1[0] = block1[0] | append[0] << 24;
724 block1[1] = append[0] >> 8 | append[1] << 24;
725 block1[2] = append[1] >> 8 | append[2] << 24;
726 block1[3] = append[2] >> 8 | append[3] << 24;
727 block2[0] = append[3] >> 8 | 0x80u << 24;
731 block1[1] = append[0];
732 block1[2] = append[1];
733 block1[3] = append[2];
734 block2[0] = append[3];
739 block1[1] = block1[1] | append[0] << 8;
740 block1[2] = append[0] >> 24 | append[1] << 8;
741 block1[3] = append[1] >> 24 | append[2] << 8;
742 block2[0] = append[2] >> 24 | append[3] << 8;
743 block2[1] = append[3] >> 24 | 0x80u << 8;
747 block1[1] = block1[1] | append[0] << 16;
748 block1[2] = append[0] >> 16 | append[1] << 16;
749 block1[3] = append[1] >> 16 | append[2] << 16;
750 block2[0] = append[2] >> 16 | append[3] << 16;
751 block2[1] = append[3] >> 16 | 0x80u << 16;
755 block1[1] = block1[1] | append[0] << 24;
756 block1[2] = append[0] >> 8 | append[1] << 24;
757 block1[3] = append[1] >> 8 | append[2] << 24;
758 block2[0] = append[2] >> 8 | append[3] << 24;
759 block2[1] = append[3] >> 8 | 0x80u << 24;
763 block1[2] = append[0];
764 block1[3] = append[1];
765 block2[0] = append[2];
766 block2[1] = append[3];
771 block1[2] = block1[2] | append[0] << 8;
772 block1[3] = append[0] >> 24 | append[1] << 8;
773 block2[0] = append[1] >> 24 | append[2] << 8;
774 block2[1] = append[2] >> 24 | append[3] << 8;
775 block2[2] = append[3] >> 24 | 0x80u << 8;
779 block1[2] = block1[2] | append[0] << 16;
780 block1[3] = append[0] >> 16 | append[1] << 16;
781 block2[0] = append[1] >> 16 | append[2] << 16;
782 block2[1] = append[2] >> 16 | append[3] << 16;
783 block2[2] = append[3] >> 16 | 0x80u << 16;
787 block1[2] = block1[2] | append[0] << 24;
788 block1[3] = append[0] >> 8 | append[1] << 24;
789 block2[0] = append[1] >> 8 | append[2] << 24;
790 block2[1] = append[2] >> 8 | append[3] << 24;
791 block2[2] = append[3] >> 8 | 0x80u << 24;
795 block1[3] = append[0];
796 block2[0] = append[1];
797 block2[1] = append[2];
798 block2[2] = append[3];
803 block1[3] = block1[3] | append[0] << 8;
804 block2[0] = append[0] >> 24 | append[1] << 8;
805 block2[1] = append[1] >> 24 | append[2] << 8;
806 block2[2] = append[2] >> 24 | append[3] << 8;
807 block2[3] = append[3] >> 24 | 0x80u << 8;
811 block1[3] = block1[3] | append[0] << 16;
812 block2[0] = append[0] >> 16 | append[1] << 16;
813 block2[1] = append[1] >> 16 | append[2] << 16;
814 block2[2] = append[2] >> 16 | append[3] << 16;
815 block2[3] = append[3] >> 16 | 0x80u << 16;
819 block1[3] = block1[3] | append[0] << 24;
820 block2[0] = append[0] >> 8 | append[1] << 24;
821 block2[1] = append[1] >> 8 | append[2] << 24;
822 block2[2] = append[2] >> 8 | append[3] << 24;
823 block2[3] = append[3] >> 8 | 0x80u << 24;
827 block2[0] = append[0];
828 block2[1] = append[1];
829 block2[2] = append[2];
830 block2[3] = append[3];
835 block2[0] = block2[0] | append[0] << 8;
836 block2[1] = append[0] >> 24 | append[1] << 8;
837 block2[2] = append[1] >> 24 | append[2] << 8;
838 block2[3] = append[2] >> 24 | append[3] << 8;
839 block3[0] = append[3] >> 24 | 0x80u << 8;
843 block2[0] = block2[0] | append[0] << 16;
844 block2[1] = append[0] >> 16 | append[1] << 16;
845 block2[2] = append[1] >> 16 | append[2] << 16;
846 block2[3] = append[2] >> 16 | append[3] << 16;
847 block3[0] = append[3] >> 16 | 0x80u << 16;
851 block2[0] = block2[0] | append[0] << 24;
852 block2[1] = append[0] >> 8 | append[1] << 24;
853 block2[2] = append[1] >> 8 | append[2] << 24;
854 block2[3] = append[2] >> 8 | append[3] << 24;
855 block3[0] = append[3] >> 8 | 0x80u << 24;
859 block2[1] = append[0];
860 block2[2] = append[1];
861 block2[3] = append[2];
862 block3[0] = append[3];
867 block2[1] = block2[1] | append[0] << 8;
868 block2[2] = append[0] >> 24 | append[1] << 8;
869 block2[3] = append[1] >> 24 | append[2] << 8;
870 block3[0] = append[2] >> 24 | append[3] << 8;
871 block3[1] = append[3] >> 24 | 0x80u << 8;
875 block2[1] = block2[1] | append[0] << 16;
876 block2[2] = append[0] >> 16 | append[1] << 16;
877 block2[3] = append[1] >> 16 | append[2] << 16;
878 block3[0] = append[2] >> 16 | append[3] << 16;
879 block3[1] = append[3] >> 16 | 0x80u << 16;
883 block2[1] = block2[1] | append[0] << 24;
884 block2[2] = append[0] >> 8 | append[1] << 24;
885 block2[3] = append[1] >> 8 | append[2] << 24;
886 block3[0] = append[2] >> 8 | append[3] << 24;
887 block3[1] = append[3] >> 8 | 0x80u << 24;
891 block2[2] = append[0];
892 block2[3] = append[1];
893 block3[0] = append[2];
894 block3[1] = append[3];
899 block2[2] = block2[2] | append[0] << 8;
900 block2[3] = append[0] >> 24 | append[1] << 8;
901 block3[0] = append[1] >> 24 | append[2] << 8;
902 block3[1] = append[2] >> 24 | append[3] << 8;
903 block3[2] = append[3] >> 24 | 0x80u << 8;
907 block2[2] = block2[2] | append[0] << 16;
908 block2[3] = append[0] >> 16 | append[1] << 16;
909 block3[0] = append[1] >> 16 | append[2] << 16;
910 block3[1] = append[2] >> 16 | append[3] << 16;
911 block3[2] = append[3] >> 16 | 0x80u << 16;
915 block2[2] = block2[2] | append[0] << 24;
916 block2[3] = append[0] >> 8 | append[1] << 24;
917 block3[0] = append[1] >> 8 | append[2] << 24;
918 block3[1] = append[2] >> 8 | append[3] << 24;
919 block3[2] = append[3] >> 8 | 0x80u << 24;
923 block2[3] = append[0];
924 block3[0] = append[1];
925 block3[1] = append[2];
926 block3[2] = append[3];
931 block2[3] = block2[3] | append[0] << 8;
932 block3[0] = append[0] >> 24 | append[1] << 8;
933 block3[1] = append[1] >> 24 | append[2] << 8;
934 block3[2] = append[2] >> 24 | append[3] << 8;
935 block3[3] = append[3] >> 24 | 0x80u << 8;
939 block2[3] = block2[3] | append[0] << 16;
940 block3[0] = append[0] >> 16 | append[1] << 16;
941 block3[1] = append[1] >> 16 | append[2] << 16;
942 block3[2] = append[2] >> 16 | append[3] << 16;
943 block3[3] = append[3] >> 16 | 0x80u << 16;
947 block2[3] = block2[3] | append[0] << 24;
948 block3[0] = append[0] >> 8 | append[1] << 24;
949 block3[1] = append[1] >> 8 | append[2] << 24;
950 block3[2] = append[2] >> 8 | append[3] << 24;
951 block3[3] = append[3] >> 8 | 0x80u << 24;
955 block3[0] = append[0];
956 block3[1] = append[1];
957 block3[2] = append[2];
958 block3[3] = append[3];
962 block3[0] = block3[0] | append[0] << 8;
963 block3[1] = append[0] >> 24 | append[1] << 8;
964 block3[2] = append[1] >> 24 | append[2] << 8;
965 block3[3] = append[2] >> 24 | append[3] << 8;
969 block3[0] = block3[0] | append[0] << 16;
970 block3[1] = append[0] >> 16 | append[1] << 16;
971 block3[2] = append[1] >> 16 | append[2] << 16;
972 block3[3] = append[2] >> 16 | append[3] << 16;
976 block3[0] = block3[0] | append[0] << 24;
977 block3[1] = append[0] >> 8 | append[1] << 24;
978 block3[2] = append[1] >> 8 | append[2] << 24;
979 block3[3] = append[2] >> 8 | append[3] << 24;
983 block3[1] = append[0];
984 block3[2] = append[1];
985 block3[3] = append[2];
989 block3[1] = block3[1] | append[0] << 8;
990 block3[2] = append[0] >> 24 | append[1] << 8;
991 block3[3] = append[1] >> 24 | append[2] << 8;
995 block3[1] = block3[1] | append[0] << 16;
996 block3[2] = append[0] >> 16 | append[1] << 16;
997 block3[3] = append[1] >> 16 | append[2] << 16;
1001 block3[1] = block3[1] | append[0] << 24;
1002 block3[2] = append[0] >> 8 | append[1] << 24;
1003 block3[3] = append[1] >> 8 | append[2] << 24;
1007 block3[2] = append[0];
1008 block3[3] = append[1];
1013 static void memcat8 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append[2])
1018 block0[0] = append[0];
1019 block0[1] = append[1];
1023 block0[0] = block0[0] | append[0] << 8;
1024 block0[1] = append[0] >> 24 | append[1] << 8;
1025 block0[2] = append[1] >> 24;
1029 block0[0] = block0[0] | append[0] << 16;
1030 block0[1] = append[0] >> 16 | append[1] << 16;
1031 block0[2] = append[1] >> 16;
1035 block0[0] = block0[0] | append[0] << 24;
1036 block0[1] = append[0] >> 8 | append[1] << 24;
1037 block0[2] = append[1] >> 8;
1041 block0[1] = append[0];
1042 block0[2] = append[1];
1046 block0[1] = block0[1] | append[0] << 8;
1047 block0[2] = append[0] >> 24 | append[1] << 8;
1048 block0[3] = append[1] >> 24;
1052 block0[1] = block0[1] | append[0] << 16;
1053 block0[2] = append[0] >> 16 | append[1] << 16;
1054 block0[3] = append[1] >> 16;
1058 block0[1] = block0[1] | append[0] << 24;
1059 block0[2] = append[0] >> 8 | append[1] << 24;
1060 block0[3] = append[1] >> 8;
1064 block0[2] = append[0];
1065 block0[3] = append[1];
1069 block0[2] = block0[2] | append[0] << 8;
1070 block0[3] = append[0] >> 24 | append[1] << 8;
1071 block1[0] = append[1] >> 24;
1075 block0[2] = block0[2] | append[0] << 16;
1076 block0[3] = append[0] >> 16 | append[1] << 16;
1077 block1[0] = append[1] >> 16;
1081 block0[2] = block0[2] | append[0] << 24;
1082 block0[3] = append[0] >> 8 | append[1] << 24;
1083 block1[0] = append[1] >> 8;
1087 block0[3] = append[0];
1088 block1[0] = append[1];
1092 block0[3] = block0[3] | append[0] << 8;
1093 block1[0] = append[0] >> 24 | append[1] << 8;
1094 block1[1] = append[1] >> 24;
1098 block0[3] = block0[3] | append[0] << 16;
1099 block1[0] = append[0] >> 16 | append[1] << 16;
1100 block1[1] = append[1] >> 16;
1104 block0[3] = block0[3] | append[0] << 24;
1105 block1[0] = append[0] >> 8 | append[1] << 24;
1106 block1[1] = append[1] >> 8;
1110 block1[0] = append[0];
1111 block1[1] = append[1];
1115 block1[0] = block1[0] | append[0] << 8;
1116 block1[1] = append[0] >> 24 | append[1] << 8;
1117 block1[2] = append[1] >> 24;
1121 block1[0] = block1[0] | append[0] << 16;
1122 block1[1] = append[0] >> 16 | append[1] << 16;
1123 block1[2] = append[1] >> 16;
1127 block1[0] = block1[0] | append[0] << 24;
1128 block1[1] = append[0] >> 8 | append[1] << 24;
1129 block1[2] = append[1] >> 8;
1133 block1[1] = append[0];
1134 block1[2] = append[1];
1138 block1[1] = block1[1] | append[0] << 8;
1139 block1[2] = append[0] >> 24 | append[1] << 8;
1140 block1[3] = append[1] >> 24;
1144 block1[1] = block1[1] | append[0] << 16;
1145 block1[2] = append[0] >> 16 | append[1] << 16;
1146 block1[3] = append[1] >> 16;
1150 block1[1] = block1[1] | append[0] << 24;
1151 block1[2] = append[0] >> 8 | append[1] << 24;
1152 block1[3] = append[1] >> 8;
1156 block1[2] = append[0];
1157 block1[3] = append[1];
1161 block1[2] = block1[2] | append[0] << 8;
1162 block1[3] = append[0] >> 24 | append[1] << 8;
1163 block2[0] = append[1] >> 24;
1167 block1[2] = block1[2] | append[0] << 16;
1168 block1[3] = append[0] >> 16 | append[1] << 16;
1169 block2[0] = append[1] >> 16;
1173 block1[2] = block1[2] | append[0] << 24;
1174 block1[3] = append[0] >> 8 | append[1] << 24;
1175 block2[0] = append[1] >> 8;
1179 block1[3] = append[0];
1180 block2[0] = append[1];
1184 block1[3] = block1[3] | append[0] << 8;
1185 block2[0] = append[0] >> 24 | append[1] << 8;
1186 block2[1] = append[1] >> 24;
1190 block1[3] = block1[3] | append[0] << 16;
1191 block2[0] = append[0] >> 16 | append[1] << 16;
1192 block2[1] = append[1] >> 16;
1196 block1[3] = block1[3] | append[0] << 24;
1197 block2[0] = append[0] >> 8 | append[1] << 24;
1198 block2[1] = append[1] >> 8;
1202 block2[0] = append[0];
1203 block2[1] = append[1];
1207 block2[0] = block2[0] | append[0] << 8;
1208 block2[1] = append[0] >> 24 | append[1] << 8;
1209 block2[2] = append[1] >> 24;
1213 block2[0] = block2[0] | append[0] << 16;
1214 block2[1] = append[0] >> 16 | append[1] << 16;
1215 block2[2] = append[1] >> 16;
1219 block2[0] = block2[0] | append[0] << 24;
1220 block2[1] = append[0] >> 8 | append[1] << 24;
1221 block2[2] = append[1] >> 8;
1225 block2[1] = append[0];
1226 block2[2] = append[1];
1230 block2[1] = block2[1] | append[0] << 8;
1231 block2[2] = append[0] >> 24 | append[1] << 8;
1232 block2[3] = append[1] >> 24;
1236 block2[1] = block2[1] | append[0] << 16;
1237 block2[2] = append[0] >> 16 | append[1] << 16;
1238 block2[3] = append[1] >> 16;
1242 block2[1] = block2[1] | append[0] << 24;
1243 block2[2] = append[0] >> 8 | append[1] << 24;
1244 block2[3] = append[1] >> 8;
1248 block2[2] = append[0];
1249 block2[3] = append[1];
1253 block2[2] = block2[2] | append[0] << 8;
1254 block2[3] = append[0] >> 24 | append[1] << 8;
1255 block3[0] = append[1] >> 24;
1259 block2[2] = block2[2] | append[0] << 16;
1260 block2[3] = append[0] >> 16 | append[1] << 16;
1261 block3[0] = append[1] >> 16;
1265 block2[2] = block2[2] | append[0] << 24;
1266 block2[3] = append[0] >> 8 | append[1] << 24;
1267 block3[0] = append[1] >> 8;
1271 block2[3] = append[0];
1272 block3[0] = append[1];
1276 block2[3] = block2[3] | append[0] << 8;
1277 block3[0] = append[0] >> 24 | append[1] << 8;
1278 block3[1] = append[1] >> 24;
1282 block2[3] = block2[3] | append[0] << 16;
1283 block3[0] = append[0] >> 16 | append[1] << 16;
1284 block3[1] = append[1] >> 16;
1288 block2[3] = block2[3] | append[0] << 24;
1289 block3[0] = append[0] >> 8 | append[1] << 24;
1290 block3[1] = append[1] >> 8;
1294 block3[0] = append[0];
1295 block3[1] = append[1];
1299 block3[0] = block3[0] | append[0] << 8;
1300 block3[1] = append[0] >> 24 | append[1] << 8;
1301 block3[2] = append[1] >> 24;
1305 block3[0] = block3[0] | append[0] << 16;
1306 block3[1] = append[0] >> 16 | append[1] << 16;
1307 block3[2] = append[1] >> 16;
1311 block3[0] = block3[0] | append[0] << 24;
1312 block3[1] = append[0] >> 8 | append[1] << 24;
1313 block3[2] = append[1] >> 8;
1317 block3[1] = append[0];
1318 block3[2] = append[1];
1322 block3[1] = block3[1] | append[0] << 8;
1323 block3[2] = append[0] >> 24 | append[1] << 8;
1324 block3[3] = append[1] >> 24;
1328 block3[1] = block3[1] | append[0] << 16;
1329 block3[2] = append[0] >> 16 | append[1] << 16;
1330 block3[3] = append[1] >> 16;
1334 block3[1] = block3[1] | append[0] << 24;
1335 block3[2] = append[0] >> 8 | append[1] << 24;
1336 block3[3] = append[1] >> 8;
1340 block3[2] = append[0];
1341 block3[3] = append[1];
1346 static void append_sign (u32 block0[4], u32 block1[4], const u32 block_len)
1351 block0[0] = md5apr1_magic0;
1352 block0[1] = md5apr1_magic1;
1356 block0[0] = block0[0] | md5apr1_magic0 << 8;
1357 block0[1] = md5apr1_magic0 >> 24 | md5apr1_magic1 << 8;
1358 block0[2] = md5apr1_magic1 >> 24;
1362 block0[0] = block0[0] | md5apr1_magic0 << 16;
1363 block0[1] = md5apr1_magic0 >> 16 | md5apr1_magic1 << 16;
1364 block0[2] = md5apr1_magic1 >> 16;
1368 block0[0] = block0[0] | md5apr1_magic0 << 24;
1369 block0[1] = md5apr1_magic0 >> 8 | md5apr1_magic1 << 24;
1370 block0[2] = md5apr1_magic1 >> 8;
1374 block0[1] = md5apr1_magic0;
1375 block0[2] = md5apr1_magic1;
1379 block0[1] = block0[1] | md5apr1_magic0 << 8;
1380 block0[2] = md5apr1_magic0 >> 24 | md5apr1_magic1 << 8;
1381 block0[3] = md5apr1_magic1 >> 24;
1385 block0[1] = block0[1] | md5apr1_magic0 << 16;
1386 block0[2] = md5apr1_magic0 >> 16 | md5apr1_magic1 << 16;
1387 block0[3] = md5apr1_magic1 >> 16;
1391 block0[1] = block0[1] | md5apr1_magic0 << 24;
1392 block0[2] = md5apr1_magic0 >> 8 | md5apr1_magic1 << 24;
1393 block0[3] = md5apr1_magic1 >> 8;
1397 block0[2] = md5apr1_magic0;
1398 block0[3] = md5apr1_magic1;
1402 block0[2] = block0[2] | md5apr1_magic0 << 8;
1403 block0[3] = md5apr1_magic0 >> 24 | md5apr1_magic1 << 8;
1404 block1[0] = md5apr1_magic1 >> 24;
1408 block0[2] = block0[2] | md5apr1_magic0 << 16;
1409 block0[3] = md5apr1_magic0 >> 16 | md5apr1_magic1 << 16;
1410 block1[0] = md5apr1_magic1 >> 16;
1414 block0[2] = block0[2] | md5apr1_magic0 << 24;
1415 block0[3] = md5apr1_magic0 >> 8 | md5apr1_magic1 << 24;
1416 block1[0] = md5apr1_magic1 >> 8;
1420 block0[3] = md5apr1_magic0;
1421 block1[0] = md5apr1_magic1;
1425 block0[3] = block0[3] | md5apr1_magic0 << 8;
1426 block1[0] = md5apr1_magic0 >> 24 | md5apr1_magic1 << 8;
1427 block1[1] = md5apr1_magic1 >> 24;
1431 block0[3] = block0[3] | md5apr1_magic0 << 16;
1432 block1[0] = md5apr1_magic0 >> 16 | md5apr1_magic1 << 16;
1433 block1[1] = md5apr1_magic1 >> 16;
1437 block0[3] = block0[3] | md5apr1_magic0 << 24;
1438 block1[0] = md5apr1_magic0 >> 8 | md5apr1_magic1 << 24;
1439 block1[1] = md5apr1_magic1 >> 8;
1444 static void append_1st (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append)
1453 block0[0] = block0[0] | append << 8;
1457 block0[0] = block0[0] | append << 16;
1461 block0[0] = block0[0] | append << 24;
1469 block0[1] = block0[1] | append << 8;
1473 block0[1] = block0[1] | append << 16;
1477 block0[1] = block0[1] | append << 24;
1485 block0[2] = block0[2] | append << 8;
1489 block0[2] = block0[2] | append << 16;
1493 block0[2] = block0[2] | append << 24;
1501 block0[3] = block0[3] | append << 8;
1505 block0[3] = block0[3] | append << 16;
1509 block0[3] = block0[3] | append << 24;
1517 block1[0] = block1[0] | append << 8;
1521 block1[0] = block1[0] | append << 16;
1525 block1[0] = block1[0] | append << 24;
1533 block1[1] = block1[1] | append << 8;
1537 block1[1] = block1[1] | append << 16;
1541 block1[1] = block1[1] | append << 24;
1549 block1[2] = block1[2] | append << 8;
1553 block1[2] = block1[2] | append << 16;
1557 block1[2] = block1[2] | append << 24;
1565 block1[3] = block1[3] | append << 8;
1569 block1[3] = block1[3] | append << 16;
1573 block1[3] = block1[3] | append << 24;
1581 block2[0] = block2[0] | append << 8;
1585 block2[0] = block2[0] | append << 16;
1589 block2[0] = block2[0] | append << 24;
1597 block2[1] = block2[1] | append << 8;
1601 block2[1] = block2[1] | append << 16;
1605 block2[1] = block2[1] | append << 24;
1613 block2[2] = block2[2] | append << 8;
1617 block2[2] = block2[2] | append << 16;
1621 block2[2] = block2[2] | append << 24;
1629 block2[3] = block2[3] | append << 8;
1633 block2[3] = block2[3] | append << 16;
1637 block2[3] = block2[3] | append << 24;
1645 block3[0] = block3[0] | append << 8;
1649 block3[0] = block3[0] | append << 16;
1653 block3[0] = block3[0] | append << 24;
1661 block3[1] = block3[1] | append << 8;
1665 block3[1] = block3[1] | append << 16;
1669 block3[1] = block3[1] | append << 24;
1678 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01600_init (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global md5crypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1684 const u32 gid = get_global_id (0);
1686 if (gid >= gid_max) return;
1690 w0[0] = pws[gid].i[0];
1691 w0[1] = pws[gid].i[1];
1692 w0[2] = pws[gid].i[2];
1693 w0[3] = pws[gid].i[3];
1695 const u32 pw_len = pws[gid].pw_len;
1703 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
1704 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
1706 const u32 salt_len = salt_bufs[salt_pos].salt_len;
1712 //memcat16 (block0, block1, block2, block3, block_len, w0);
1713 //block_len += pw_len;
1715 u32 block_len = pw_len;
1745 memcat8 (block0, block1, block2, block3, block_len, salt_buf);
1747 block_len += salt_len;
1749 memcat16 (block0, block1, block2, block3, block_len, w0);
1751 block_len += pw_len;
1753 append_0x80_4 (block0, block1, block2, block3, block_len);
1755 block3[2] = block_len * 8;
1764 md5_transform (block0, block1, block2, block3, digest);
1766 /* The password first, since that is what is most unknown */
1767 /* Then our magic string */
1768 /* Then the raw salt */
1769 /* Then just as many characters of the MD5(pw,salt,pw) */
1771 //memcat16 (block0, block1, block2, block3, block_len, w);
1772 //block_len += pw_len;
1796 append_sign (block0, block1, block_len);
1800 memcat8 (block0, block1, block2, block3, block_len, salt_buf);
1802 block_len += salt_len;
1804 truncate_block (digest, pw_len);
1806 memcat16 (block0, block1, block2, block3, block_len, digest);
1808 block_len += pw_len;
1810 /* Then something really weird... */
1812 u32 append = block0[0] & 0xFF;
1814 for (u32 j = pw_len; j; j >>= 1)
1818 append_1st (block0, block1, block2, block3, block_len, append);
1824 append_0x80_4 (block0, block1, block2, block3, block_len);
1826 block3[2] = block_len * 8;
1833 md5_transform (block0, block1, block2, block3, digest);
1835 tmps[gid].digest_buf[0] = digest[0];
1836 tmps[gid].digest_buf[1] = digest[1];
1837 tmps[gid].digest_buf[2] = digest[2];
1838 tmps[gid].digest_buf[3] = digest[3];
1841 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01600_loop (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global md5crypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1847 const u32 gid = get_global_id (0);
1849 if (gid >= gid_max) return;
1853 w0[0] = pws[gid].i[0];
1854 w0[1] = pws[gid].i[1];
1855 w0[2] = pws[gid].i[2];
1856 w0[3] = pws[gid].i[3];
1858 const u32 pw_len = pws[gid].pw_len;
1867 append_0x80_1 (w0_x80, pw_len);
1875 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
1876 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
1878 const u32 salt_len = salt_bufs[salt_pos].salt_len;
1886 digest[0] = tmps[gid].digest_buf[0];
1887 digest[1] = tmps[gid].digest_buf[1];
1888 digest[2] = tmps[gid].digest_buf[2];
1889 digest[3] = tmps[gid].digest_buf[3];
1895 /* and now, just to make sure things don't run too fast */
1927 for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++)
1940 const u32 j1 = (j & 1) ? 1 : 0;
1941 const u32 j3 = (j % 3) ? 1 : 0;
1942 const u32 j7 = (j % 7) ? 1 : 0;
1955 memcat8 (block0, block1, block2, block3, block_len, salt_buf);
1957 block_len += salt_len;
1962 memcat16 (block0, block1, block2, block3, block_len, w0);
1964 block_len += pw_len;
1967 memcat16_x80 (block0, block1, block2, block3, block_len, digest);
1973 block0[0] = digest[0];
1974 block0[1] = digest[1];
1975 block0[2] = digest[2];
1976 block0[3] = digest[3];
1982 block1[0] = salt_buf[0];
1983 block1[1] = salt_buf[1];
1985 block_len += salt_len;
1987 memcat16 (block0, block1, block2, block3, block_len, w0);
1989 block_len += pw_len;
1993 block1[0] = salt_buf[0];
1994 block1[1] = salt_buf[1];
1996 block_len += salt_len;
2005 block_len += pw_len;
2008 memcat16 (block0, block1, block2, block3, block_len, w0_x80);
2010 block_len += pw_len;
2013 block3[2] = block_len * 8;
2020 md5_transform (block0, block1, block2, block3, digest);
2023 tmps[gid].digest_buf[0] = digest[0];
2024 tmps[gid].digest_buf[1] = digest[1];
2025 tmps[gid].digest_buf[2] = digest[2];
2026 tmps[gid].digest_buf[3] = digest[3];
2029 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01600_comp (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global md5crypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2035 const u32 gid = get_global_id (0);
2037 if (gid >= gid_max) return;
2039 const u32 lid = get_local_id (0);
2045 const u32 r0 = tmps[gid].digest_buf[DGST_R0];
2046 const u32 r1 = tmps[gid].digest_buf[DGST_R1];
2047 const u32 r2 = tmps[gid].digest_buf[DGST_R2];
2048 const u32 r3 = tmps[gid].digest_buf[DGST_R3];