2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
16 #include "include/kernel_functions.c"
17 #include "types_ocl.c"
20 #define COMPARE_S "check_single_comp4.c"
21 #define COMPARE_M "check_multi_comp4.c"
23 #define md5apr1_magic0 0x72706124
24 #define md5apr1_magic1 0x00002431
26 static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4])
52 MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
53 MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
54 MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02);
55 MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03);
56 MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00);
57 MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01);
58 MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02);
59 MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03);
60 MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00);
61 MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01);
62 MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02);
63 MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03);
64 MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00);
65 MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01);
66 MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02);
67 MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03);
69 MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10);
70 MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11);
71 MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12);
72 MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13);
73 MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10);
74 MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11);
75 MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12);
76 MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13);
77 MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10);
78 MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11);
79 MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12);
80 MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13);
81 MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10);
82 MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11);
83 MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12);
84 MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13);
86 MD5_STEP (MD5_H1, a, b, c, d, w5_t, MD5C20, MD5S20);
87 MD5_STEP (MD5_H2, d, a, b, c, w8_t, MD5C21, MD5S21);
88 MD5_STEP (MD5_H1, c, d, a, b, wb_t, MD5C22, MD5S22);
89 MD5_STEP (MD5_H2, b, c, d, a, we_t, MD5C23, MD5S23);
90 MD5_STEP (MD5_H1, a, b, c, d, w1_t, MD5C24, MD5S20);
91 MD5_STEP (MD5_H2, d, a, b, c, w4_t, MD5C25, MD5S21);
92 MD5_STEP (MD5_H1, c, d, a, b, w7_t, MD5C26, MD5S22);
93 MD5_STEP (MD5_H2, b, c, d, a, wa_t, MD5C27, MD5S23);
94 MD5_STEP (MD5_H1, a, b, c, d, wd_t, MD5C28, MD5S20);
95 MD5_STEP (MD5_H2, d, a, b, c, w0_t, MD5C29, MD5S21);
96 MD5_STEP (MD5_H1, c, d, a, b, w3_t, MD5C2a, MD5S22);
97 MD5_STEP (MD5_H2, b, c, d, a, w6_t, MD5C2b, MD5S23);
98 MD5_STEP (MD5_H1, a, b, c, d, w9_t, MD5C2c, MD5S20);
99 MD5_STEP (MD5_H2, d, a, b, c, wc_t, MD5C2d, MD5S21);
100 MD5_STEP (MD5_H1, c, d, a, b, wf_t, MD5C2e, MD5S22);
101 MD5_STEP (MD5_H2, b, c, d, a, w2_t, MD5C2f, MD5S23);
103 MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30);
104 MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31);
105 MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32);
106 MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33);
107 MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30);
108 MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31);
109 MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32);
110 MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33);
111 MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30);
112 MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31);
113 MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32);
114 MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33);
115 MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30);
116 MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
117 MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
118 MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
126 static void memcat16 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append[4])
131 block0[0] = append[0];
132 block0[1] = append[1];
133 block0[2] = append[2];
134 block0[3] = append[3];
138 block0[0] = block0[0] | append[0] << 8;
139 block0[1] = append[0] >> 24 | append[1] << 8;
140 block0[2] = append[1] >> 24 | append[2] << 8;
141 block0[3] = append[2] >> 24 | append[3] << 8;
142 block1[0] = append[3] >> 24;
146 block0[0] = block0[0] | append[0] << 16;
147 block0[1] = append[0] >> 16 | append[1] << 16;
148 block0[2] = append[1] >> 16 | append[2] << 16;
149 block0[3] = append[2] >> 16 | append[3] << 16;
150 block1[0] = append[3] >> 16;
154 block0[0] = block0[0] | append[0] << 24;
155 block0[1] = append[0] >> 8 | append[1] << 24;
156 block0[2] = append[1] >> 8 | append[2] << 24;
157 block0[3] = append[2] >> 8 | append[3] << 24;
158 block1[0] = append[3] >> 8;
162 block0[1] = append[0];
163 block0[2] = append[1];
164 block0[3] = append[2];
165 block1[0] = append[3];
169 block0[1] = block0[1] | append[0] << 8;
170 block0[2] = append[0] >> 24 | append[1] << 8;
171 block0[3] = append[1] >> 24 | append[2] << 8;
172 block1[0] = append[2] >> 24 | append[3] << 8;
173 block1[1] = append[3] >> 24;
177 block0[1] = block0[1] | append[0] << 16;
178 block0[2] = append[0] >> 16 | append[1] << 16;
179 block0[3] = append[1] >> 16 | append[2] << 16;
180 block1[0] = append[2] >> 16 | append[3] << 16;
181 block1[1] = append[3] >> 16;
185 block0[1] = block0[1] | append[0] << 24;
186 block0[2] = append[0] >> 8 | append[1] << 24;
187 block0[3] = append[1] >> 8 | append[2] << 24;
188 block1[0] = append[2] >> 8 | append[3] << 24;
189 block1[1] = append[3] >> 8;
193 block0[2] = append[0];
194 block0[3] = append[1];
195 block1[0] = append[2];
196 block1[1] = append[3];
200 block0[2] = block0[2] | append[0] << 8;
201 block0[3] = append[0] >> 24 | append[1] << 8;
202 block1[0] = append[1] >> 24 | append[2] << 8;
203 block1[1] = append[2] >> 24 | append[3] << 8;
204 block1[2] = append[3] >> 24;
208 block0[2] = block0[2] | append[0] << 16;
209 block0[3] = append[0] >> 16 | append[1] << 16;
210 block1[0] = append[1] >> 16 | append[2] << 16;
211 block1[1] = append[2] >> 16 | append[3] << 16;
212 block1[2] = append[3] >> 16;
216 block0[2] = block0[2] | append[0] << 24;
217 block0[3] = append[0] >> 8 | append[1] << 24;
218 block1[0] = append[1] >> 8 | append[2] << 24;
219 block1[1] = append[2] >> 8 | append[3] << 24;
220 block1[2] = append[3] >> 8;
224 block0[3] = append[0];
225 block1[0] = append[1];
226 block1[1] = append[2];
227 block1[2] = append[3];
231 block0[3] = block0[3] | append[0] << 8;
232 block1[0] = append[0] >> 24 | append[1] << 8;
233 block1[1] = append[1] >> 24 | append[2] << 8;
234 block1[2] = append[2] >> 24 | append[3] << 8;
235 block1[3] = append[3] >> 24;
239 block0[3] = block0[3] | append[0] << 16;
240 block1[0] = append[0] >> 16 | append[1] << 16;
241 block1[1] = append[1] >> 16 | append[2] << 16;
242 block1[2] = append[2] >> 16 | append[3] << 16;
243 block1[3] = append[3] >> 16;
247 block0[3] = block0[3] | append[0] << 24;
248 block1[0] = append[0] >> 8 | append[1] << 24;
249 block1[1] = append[1] >> 8 | append[2] << 24;
250 block1[2] = append[2] >> 8 | append[3] << 24;
251 block1[3] = append[3] >> 8;
255 block1[0] = append[0];
256 block1[1] = append[1];
257 block1[2] = append[2];
258 block1[3] = append[3];
262 block1[0] = block1[0] | append[0] << 8;
263 block1[1] = append[0] >> 24 | append[1] << 8;
264 block1[2] = append[1] >> 24 | append[2] << 8;
265 block1[3] = append[2] >> 24 | append[3] << 8;
266 block2[0] = append[3] >> 24;
270 block1[0] = block1[0] | append[0] << 16;
271 block1[1] = append[0] >> 16 | append[1] << 16;
272 block1[2] = append[1] >> 16 | append[2] << 16;
273 block1[3] = append[2] >> 16 | append[3] << 16;
274 block2[0] = append[3] >> 16;
278 block1[0] = block1[0] | append[0] << 24;
279 block1[1] = append[0] >> 8 | append[1] << 24;
280 block1[2] = append[1] >> 8 | append[2] << 24;
281 block1[3] = append[2] >> 8 | append[3] << 24;
282 block2[0] = append[3] >> 8;
286 block1[1] = append[0];
287 block1[2] = append[1];
288 block1[3] = append[2];
289 block2[0] = append[3];
293 block1[1] = block1[1] | append[0] << 8;
294 block1[2] = append[0] >> 24 | append[1] << 8;
295 block1[3] = append[1] >> 24 | append[2] << 8;
296 block2[0] = append[2] >> 24 | append[3] << 8;
297 block2[1] = append[3] >> 24;
301 block1[1] = block1[1] | append[0] << 16;
302 block1[2] = append[0] >> 16 | append[1] << 16;
303 block1[3] = append[1] >> 16 | append[2] << 16;
304 block2[0] = append[2] >> 16 | append[3] << 16;
305 block2[1] = append[3] >> 16;
309 block1[1] = block1[1] | append[0] << 24;
310 block1[2] = append[0] >> 8 | append[1] << 24;
311 block1[3] = append[1] >> 8 | append[2] << 24;
312 block2[0] = append[2] >> 8 | append[3] << 24;
313 block2[1] = append[3] >> 8;
317 block1[2] = append[0];
318 block1[3] = append[1];
319 block2[0] = append[2];
320 block2[1] = append[3];
324 block1[2] = block1[2] | append[0] << 8;
325 block1[3] = append[0] >> 24 | append[1] << 8;
326 block2[0] = append[1] >> 24 | append[2] << 8;
327 block2[1] = append[2] >> 24 | append[3] << 8;
328 block2[2] = append[3] >> 24;
332 block1[2] = block1[2] | append[0] << 16;
333 block1[3] = append[0] >> 16 | append[1] << 16;
334 block2[0] = append[1] >> 16 | append[2] << 16;
335 block2[1] = append[2] >> 16 | append[3] << 16;
336 block2[2] = append[3] >> 16;
340 block1[2] = block1[2] | append[0] << 24;
341 block1[3] = append[0] >> 8 | append[1] << 24;
342 block2[0] = append[1] >> 8 | append[2] << 24;
343 block2[1] = append[2] >> 8 | append[3] << 24;
344 block2[2] = append[3] >> 8;
348 block1[3] = append[0];
349 block2[0] = append[1];
350 block2[1] = append[2];
351 block2[2] = append[3];
355 block1[3] = block1[3] | append[0] << 8;
356 block2[0] = append[0] >> 24 | append[1] << 8;
357 block2[1] = append[1] >> 24 | append[2] << 8;
358 block2[2] = append[2] >> 24 | append[3] << 8;
359 block2[3] = append[3] >> 24;
363 block1[3] = block1[3] | append[0] << 16;
364 block2[0] = append[0] >> 16 | append[1] << 16;
365 block2[1] = append[1] >> 16 | append[2] << 16;
366 block2[2] = append[2] >> 16 | append[3] << 16;
367 block2[3] = append[3] >> 16;
371 block1[3] = block1[3] | append[0] << 24;
372 block2[0] = append[0] >> 8 | append[1] << 24;
373 block2[1] = append[1] >> 8 | append[2] << 24;
374 block2[2] = append[2] >> 8 | append[3] << 24;
375 block2[3] = append[3] >> 8;
379 block2[0] = append[0];
380 block2[1] = append[1];
381 block2[2] = append[2];
382 block2[3] = append[3];
386 block2[0] = block2[0] | append[0] << 8;
387 block2[1] = append[0] >> 24 | append[1] << 8;
388 block2[2] = append[1] >> 24 | append[2] << 8;
389 block2[3] = append[2] >> 24 | append[3] << 8;
390 block3[0] = append[3] >> 24;
394 block2[0] = block2[0] | append[0] << 16;
395 block2[1] = append[0] >> 16 | append[1] << 16;
396 block2[2] = append[1] >> 16 | append[2] << 16;
397 block2[3] = append[2] >> 16 | append[3] << 16;
398 block3[0] = append[3] >> 16;
402 block2[0] = block2[0] | append[0] << 24;
403 block2[1] = append[0] >> 8 | append[1] << 24;
404 block2[2] = append[1] >> 8 | append[2] << 24;
405 block2[3] = append[2] >> 8 | append[3] << 24;
406 block3[0] = append[3] >> 8;
410 block2[1] = append[0];
411 block2[2] = append[1];
412 block2[3] = append[2];
413 block3[0] = append[3];
417 block2[1] = block2[1] | append[0] << 8;
418 block2[2] = append[0] >> 24 | append[1] << 8;
419 block2[3] = append[1] >> 24 | append[2] << 8;
420 block3[0] = append[2] >> 24 | append[3] << 8;
421 block3[1] = append[3] >> 24;
425 block2[1] = block2[1] | append[0] << 16;
426 block2[2] = append[0] >> 16 | append[1] << 16;
427 block2[3] = append[1] >> 16 | append[2] << 16;
428 block3[0] = append[2] >> 16 | append[3] << 16;
429 block3[1] = append[3] >> 16;
433 block2[1] = block2[1] | append[0] << 24;
434 block2[2] = append[0] >> 8 | append[1] << 24;
435 block2[3] = append[1] >> 8 | append[2] << 24;
436 block3[0] = append[2] >> 8 | append[3] << 24;
437 block3[1] = append[3] >> 8;
441 block2[2] = append[0];
442 block2[3] = append[1];
443 block3[0] = append[2];
444 block3[1] = append[3];
448 block2[2] = block2[2] | append[0] << 8;
449 block2[3] = append[0] >> 24 | append[1] << 8;
450 block3[0] = append[1] >> 24 | append[2] << 8;
451 block3[1] = append[2] >> 24 | append[3] << 8;
452 block3[2] = append[3] >> 24;
456 block2[2] = block2[2] | append[0] << 16;
457 block2[3] = append[0] >> 16 | append[1] << 16;
458 block3[0] = append[1] >> 16 | append[2] << 16;
459 block3[1] = append[2] >> 16 | append[3] << 16;
460 block3[2] = append[3] >> 16;
464 block2[2] = block2[2] | append[0] << 24;
465 block2[3] = append[0] >> 8 | append[1] << 24;
466 block3[0] = append[1] >> 8 | append[2] << 24;
467 block3[1] = append[2] >> 8 | append[3] << 24;
468 block3[2] = append[3] >> 8;
472 block2[3] = append[0];
473 block3[0] = append[1];
474 block3[1] = append[2];
475 block3[2] = append[3];
479 block2[3] = block2[3] | append[0] << 8;
480 block3[0] = append[0] >> 24 | append[1] << 8;
481 block3[1] = append[1] >> 24 | append[2] << 8;
482 block3[2] = append[2] >> 24 | append[3] << 8;
483 block3[3] = append[3] >> 24;
487 block2[3] = block2[3] | append[0] << 16;
488 block3[0] = append[0] >> 16 | append[1] << 16;
489 block3[1] = append[1] >> 16 | append[2] << 16;
490 block3[2] = append[2] >> 16 | append[3] << 16;
491 block3[3] = append[3] >> 16;
495 block2[3] = block2[3] | append[0] << 24;
496 block3[0] = append[0] >> 8 | append[1] << 24;
497 block3[1] = append[1] >> 8 | append[2] << 24;
498 block3[2] = append[2] >> 8 | append[3] << 24;
499 block3[3] = append[3] >> 8;
503 block3[0] = append[0];
504 block3[1] = append[1];
505 block3[2] = append[2];
506 block3[3] = append[3];
510 block3[0] = block3[0] | append[0] << 8;
511 block3[1] = append[0] >> 24 | append[1] << 8;
512 block3[2] = append[1] >> 24 | append[2] << 8;
513 block3[3] = append[2] >> 24 | append[3] << 8;
517 block3[0] = block3[0] | append[0] << 16;
518 block3[1] = append[0] >> 16 | append[1] << 16;
519 block3[2] = append[1] >> 16 | append[2] << 16;
520 block3[3] = append[2] >> 16 | append[3] << 16;
524 block3[0] = block3[0] | append[0] << 24;
525 block3[1] = append[0] >> 8 | append[1] << 24;
526 block3[2] = append[1] >> 8 | append[2] << 24;
527 block3[3] = append[2] >> 8 | append[3] << 24;
531 block3[1] = append[0];
532 block3[2] = append[1];
533 block3[3] = append[2];
537 block3[1] = block3[1] | append[0] << 8;
538 block3[2] = append[0] >> 24 | append[1] << 8;
539 block3[3] = append[1] >> 24 | append[2] << 8;
543 block3[1] = block3[1] | append[0] << 16;
544 block3[2] = append[0] >> 16 | append[1] << 16;
545 block3[3] = append[1] >> 16 | append[2] << 16;
549 block3[1] = block3[1] | append[0] << 24;
550 block3[2] = append[0] >> 8 | append[1] << 24;
551 block3[3] = append[1] >> 8 | append[2] << 24;
555 block3[2] = append[0];
556 block3[3] = append[1];
561 static void memcat16_x80 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append[4])
566 block0[0] = append[0];
567 block0[1] = append[1];
568 block0[2] = append[2];
569 block0[3] = append[3];
574 block0[0] = block0[0] | append[0] << 8;
575 block0[1] = append[0] >> 24 | append[1] << 8;
576 block0[2] = append[1] >> 24 | append[2] << 8;
577 block0[3] = append[2] >> 24 | append[3] << 8;
578 block1[0] = append[3] >> 24 | 0x80u << 8;
582 block0[0] = block0[0] | append[0] << 16;
583 block0[1] = append[0] >> 16 | append[1] << 16;
584 block0[2] = append[1] >> 16 | append[2] << 16;
585 block0[3] = append[2] >> 16 | append[3] << 16;
586 block1[0] = append[3] >> 16 | 0x80u << 16;
590 block0[0] = block0[0] | append[0] << 24;
591 block0[1] = append[0] >> 8 | append[1] << 24;
592 block0[2] = append[1] >> 8 | append[2] << 24;
593 block0[3] = append[2] >> 8 | append[3] << 24;
594 block1[0] = append[3] >> 8 | 0x80u << 24;
598 block0[1] = append[0];
599 block0[2] = append[1];
600 block0[3] = append[2];
601 block1[0] = append[3];
606 block0[1] = block0[1] | append[0] << 8;
607 block0[2] = append[0] >> 24 | append[1] << 8;
608 block0[3] = append[1] >> 24 | append[2] << 8;
609 block1[0] = append[2] >> 24 | append[3] << 8;
610 block1[1] = append[3] >> 24 | 0x80u << 8;
614 block0[1] = block0[1] | append[0] << 16;
615 block0[2] = append[0] >> 16 | append[1] << 16;
616 block0[3] = append[1] >> 16 | append[2] << 16;
617 block1[0] = append[2] >> 16 | append[3] << 16;
618 block1[1] = append[3] >> 16 | 0x80u << 16;
622 block0[1] = block0[1] | append[0] << 24;
623 block0[2] = append[0] >> 8 | append[1] << 24;
624 block0[3] = append[1] >> 8 | append[2] << 24;
625 block1[0] = append[2] >> 8 | append[3] << 24;
626 block1[1] = append[3] >> 8 | 0x80u << 24;
630 block0[2] = append[0];
631 block0[3] = append[1];
632 block1[0] = append[2];
633 block1[1] = append[3];
638 block0[2] = block0[2] | append[0] << 8;
639 block0[3] = append[0] >> 24 | append[1] << 8;
640 block1[0] = append[1] >> 24 | append[2] << 8;
641 block1[1] = append[2] >> 24 | append[3] << 8;
642 block1[2] = append[3] >> 24 | 0x80u << 8;
646 block0[2] = block0[2] | append[0] << 16;
647 block0[3] = append[0] >> 16 | append[1] << 16;
648 block1[0] = append[1] >> 16 | append[2] << 16;
649 block1[1] = append[2] >> 16 | append[3] << 16;
650 block1[2] = append[3] >> 16 | 0x80u << 16;
654 block0[2] = block0[2] | append[0] << 24;
655 block0[3] = append[0] >> 8 | append[1] << 24;
656 block1[0] = append[1] >> 8 | append[2] << 24;
657 block1[1] = append[2] >> 8 | append[3] << 24;
658 block1[2] = append[3] >> 8 | 0x80u << 24;
662 block0[3] = append[0];
663 block1[0] = append[1];
664 block1[1] = append[2];
665 block1[2] = append[3];
670 block0[3] = block0[3] | append[0] << 8;
671 block1[0] = append[0] >> 24 | append[1] << 8;
672 block1[1] = append[1] >> 24 | append[2] << 8;
673 block1[2] = append[2] >> 24 | append[3] << 8;
674 block1[3] = append[3] >> 24 | 0x80u << 8;
678 block0[3] = block0[3] | append[0] << 16;
679 block1[0] = append[0] >> 16 | append[1] << 16;
680 block1[1] = append[1] >> 16 | append[2] << 16;
681 block1[2] = append[2] >> 16 | append[3] << 16;
682 block1[3] = append[3] >> 16 | 0x80u << 16;
686 block0[3] = block0[3] | append[0] << 24;
687 block1[0] = append[0] >> 8 | append[1] << 24;
688 block1[1] = append[1] >> 8 | append[2] << 24;
689 block1[2] = append[2] >> 8 | append[3] << 24;
690 block1[3] = append[3] >> 8 | 0x80u << 24;
694 block1[0] = append[0];
695 block1[1] = append[1];
696 block1[2] = append[2];
697 block1[3] = append[3];
702 block1[0] = block1[0] | append[0] << 8;
703 block1[1] = append[0] >> 24 | append[1] << 8;
704 block1[2] = append[1] >> 24 | append[2] << 8;
705 block1[3] = append[2] >> 24 | append[3] << 8;
706 block2[0] = append[3] >> 24 | 0x80u << 8;
710 block1[0] = block1[0] | append[0] << 16;
711 block1[1] = append[0] >> 16 | append[1] << 16;
712 block1[2] = append[1] >> 16 | append[2] << 16;
713 block1[3] = append[2] >> 16 | append[3] << 16;
714 block2[0] = append[3] >> 16 | 0x80u << 16;
718 block1[0] = block1[0] | append[0] << 24;
719 block1[1] = append[0] >> 8 | append[1] << 24;
720 block1[2] = append[1] >> 8 | append[2] << 24;
721 block1[3] = append[2] >> 8 | append[3] << 24;
722 block2[0] = append[3] >> 8 | 0x80u << 24;
726 block1[1] = append[0];
727 block1[2] = append[1];
728 block1[3] = append[2];
729 block2[0] = append[3];
734 block1[1] = block1[1] | append[0] << 8;
735 block1[2] = append[0] >> 24 | append[1] << 8;
736 block1[3] = append[1] >> 24 | append[2] << 8;
737 block2[0] = append[2] >> 24 | append[3] << 8;
738 block2[1] = append[3] >> 24 | 0x80u << 8;
742 block1[1] = block1[1] | append[0] << 16;
743 block1[2] = append[0] >> 16 | append[1] << 16;
744 block1[3] = append[1] >> 16 | append[2] << 16;
745 block2[0] = append[2] >> 16 | append[3] << 16;
746 block2[1] = append[3] >> 16 | 0x80u << 16;
750 block1[1] = block1[1] | append[0] << 24;
751 block1[2] = append[0] >> 8 | append[1] << 24;
752 block1[3] = append[1] >> 8 | append[2] << 24;
753 block2[0] = append[2] >> 8 | append[3] << 24;
754 block2[1] = append[3] >> 8 | 0x80u << 24;
758 block1[2] = append[0];
759 block1[3] = append[1];
760 block2[0] = append[2];
761 block2[1] = append[3];
766 block1[2] = block1[2] | append[0] << 8;
767 block1[3] = append[0] >> 24 | append[1] << 8;
768 block2[0] = append[1] >> 24 | append[2] << 8;
769 block2[1] = append[2] >> 24 | append[3] << 8;
770 block2[2] = append[3] >> 24 | 0x80u << 8;
774 block1[2] = block1[2] | append[0] << 16;
775 block1[3] = append[0] >> 16 | append[1] << 16;
776 block2[0] = append[1] >> 16 | append[2] << 16;
777 block2[1] = append[2] >> 16 | append[3] << 16;
778 block2[2] = append[3] >> 16 | 0x80u << 16;
782 block1[2] = block1[2] | append[0] << 24;
783 block1[3] = append[0] >> 8 | append[1] << 24;
784 block2[0] = append[1] >> 8 | append[2] << 24;
785 block2[1] = append[2] >> 8 | append[3] << 24;
786 block2[2] = append[3] >> 8 | 0x80u << 24;
790 block1[3] = append[0];
791 block2[0] = append[1];
792 block2[1] = append[2];
793 block2[2] = append[3];
798 block1[3] = block1[3] | append[0] << 8;
799 block2[0] = append[0] >> 24 | append[1] << 8;
800 block2[1] = append[1] >> 24 | append[2] << 8;
801 block2[2] = append[2] >> 24 | append[3] << 8;
802 block2[3] = append[3] >> 24 | 0x80u << 8;
806 block1[3] = block1[3] | append[0] << 16;
807 block2[0] = append[0] >> 16 | append[1] << 16;
808 block2[1] = append[1] >> 16 | append[2] << 16;
809 block2[2] = append[2] >> 16 | append[3] << 16;
810 block2[3] = append[3] >> 16 | 0x80u << 16;
814 block1[3] = block1[3] | append[0] << 24;
815 block2[0] = append[0] >> 8 | append[1] << 24;
816 block2[1] = append[1] >> 8 | append[2] << 24;
817 block2[2] = append[2] >> 8 | append[3] << 24;
818 block2[3] = append[3] >> 8 | 0x80u << 24;
822 block2[0] = append[0];
823 block2[1] = append[1];
824 block2[2] = append[2];
825 block2[3] = append[3];
830 block2[0] = block2[0] | append[0] << 8;
831 block2[1] = append[0] >> 24 | append[1] << 8;
832 block2[2] = append[1] >> 24 | append[2] << 8;
833 block2[3] = append[2] >> 24 | append[3] << 8;
834 block3[0] = append[3] >> 24 | 0x80u << 8;
838 block2[0] = block2[0] | append[0] << 16;
839 block2[1] = append[0] >> 16 | append[1] << 16;
840 block2[2] = append[1] >> 16 | append[2] << 16;
841 block2[3] = append[2] >> 16 | append[3] << 16;
842 block3[0] = append[3] >> 16 | 0x80u << 16;
846 block2[0] = block2[0] | append[0] << 24;
847 block2[1] = append[0] >> 8 | append[1] << 24;
848 block2[2] = append[1] >> 8 | append[2] << 24;
849 block2[3] = append[2] >> 8 | append[3] << 24;
850 block3[0] = append[3] >> 8 | 0x80u << 24;
854 block2[1] = append[0];
855 block2[2] = append[1];
856 block2[3] = append[2];
857 block3[0] = append[3];
862 block2[1] = block2[1] | append[0] << 8;
863 block2[2] = append[0] >> 24 | append[1] << 8;
864 block2[3] = append[1] >> 24 | append[2] << 8;
865 block3[0] = append[2] >> 24 | append[3] << 8;
866 block3[1] = append[3] >> 24 | 0x80u << 8;
870 block2[1] = block2[1] | append[0] << 16;
871 block2[2] = append[0] >> 16 | append[1] << 16;
872 block2[3] = append[1] >> 16 | append[2] << 16;
873 block3[0] = append[2] >> 16 | append[3] << 16;
874 block3[1] = append[3] >> 16 | 0x80u << 16;
878 block2[1] = block2[1] | append[0] << 24;
879 block2[2] = append[0] >> 8 | append[1] << 24;
880 block2[3] = append[1] >> 8 | append[2] << 24;
881 block3[0] = append[2] >> 8 | append[3] << 24;
882 block3[1] = append[3] >> 8 | 0x80u << 24;
886 block2[2] = append[0];
887 block2[3] = append[1];
888 block3[0] = append[2];
889 block3[1] = append[3];
894 block2[2] = block2[2] | append[0] << 8;
895 block2[3] = append[0] >> 24 | append[1] << 8;
896 block3[0] = append[1] >> 24 | append[2] << 8;
897 block3[1] = append[2] >> 24 | append[3] << 8;
898 block3[2] = append[3] >> 24 | 0x80u << 8;
902 block2[2] = block2[2] | append[0] << 16;
903 block2[3] = append[0] >> 16 | append[1] << 16;
904 block3[0] = append[1] >> 16 | append[2] << 16;
905 block3[1] = append[2] >> 16 | append[3] << 16;
906 block3[2] = append[3] >> 16 | 0x80u << 16;
910 block2[2] = block2[2] | append[0] << 24;
911 block2[3] = append[0] >> 8 | append[1] << 24;
912 block3[0] = append[1] >> 8 | append[2] << 24;
913 block3[1] = append[2] >> 8 | append[3] << 24;
914 block3[2] = append[3] >> 8 | 0x80u << 24;
918 block2[3] = append[0];
919 block3[0] = append[1];
920 block3[1] = append[2];
921 block3[2] = append[3];
926 block2[3] = block2[3] | append[0] << 8;
927 block3[0] = append[0] >> 24 | append[1] << 8;
928 block3[1] = append[1] >> 24 | append[2] << 8;
929 block3[2] = append[2] >> 24 | append[3] << 8;
930 block3[3] = append[3] >> 24 | 0x80u << 8;
934 block2[3] = block2[3] | append[0] << 16;
935 block3[0] = append[0] >> 16 | append[1] << 16;
936 block3[1] = append[1] >> 16 | append[2] << 16;
937 block3[2] = append[2] >> 16 | append[3] << 16;
938 block3[3] = append[3] >> 16 | 0x80u << 16;
942 block2[3] = block2[3] | append[0] << 24;
943 block3[0] = append[0] >> 8 | append[1] << 24;
944 block3[1] = append[1] >> 8 | append[2] << 24;
945 block3[2] = append[2] >> 8 | append[3] << 24;
946 block3[3] = append[3] >> 8 | 0x80u << 24;
950 block3[0] = append[0];
951 block3[1] = append[1];
952 block3[2] = append[2];
953 block3[3] = append[3];
957 block3[0] = block3[0] | append[0] << 8;
958 block3[1] = append[0] >> 24 | append[1] << 8;
959 block3[2] = append[1] >> 24 | append[2] << 8;
960 block3[3] = append[2] >> 24 | append[3] << 8;
964 block3[0] = block3[0] | append[0] << 16;
965 block3[1] = append[0] >> 16 | append[1] << 16;
966 block3[2] = append[1] >> 16 | append[2] << 16;
967 block3[3] = append[2] >> 16 | append[3] << 16;
971 block3[0] = block3[0] | append[0] << 24;
972 block3[1] = append[0] >> 8 | append[1] << 24;
973 block3[2] = append[1] >> 8 | append[2] << 24;
974 block3[3] = append[2] >> 8 | append[3] << 24;
978 block3[1] = append[0];
979 block3[2] = append[1];
980 block3[3] = append[2];
984 block3[1] = block3[1] | append[0] << 8;
985 block3[2] = append[0] >> 24 | append[1] << 8;
986 block3[3] = append[1] >> 24 | append[2] << 8;
990 block3[1] = block3[1] | append[0] << 16;
991 block3[2] = append[0] >> 16 | append[1] << 16;
992 block3[3] = append[1] >> 16 | append[2] << 16;
996 block3[1] = block3[1] | append[0] << 24;
997 block3[2] = append[0] >> 8 | append[1] << 24;
998 block3[3] = append[1] >> 8 | append[2] << 24;
1002 block3[2] = append[0];
1003 block3[3] = append[1];
1008 static void memcat8 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append[2])
1013 block0[0] = append[0];
1014 block0[1] = append[1];
1018 block0[0] = block0[0] | append[0] << 8;
1019 block0[1] = append[0] >> 24 | append[1] << 8;
1020 block0[2] = append[1] >> 24;
1024 block0[0] = block0[0] | append[0] << 16;
1025 block0[1] = append[0] >> 16 | append[1] << 16;
1026 block0[2] = append[1] >> 16;
1030 block0[0] = block0[0] | append[0] << 24;
1031 block0[1] = append[0] >> 8 | append[1] << 24;
1032 block0[2] = append[1] >> 8;
1036 block0[1] = append[0];
1037 block0[2] = append[1];
1041 block0[1] = block0[1] | append[0] << 8;
1042 block0[2] = append[0] >> 24 | append[1] << 8;
1043 block0[3] = append[1] >> 24;
1047 block0[1] = block0[1] | append[0] << 16;
1048 block0[2] = append[0] >> 16 | append[1] << 16;
1049 block0[3] = append[1] >> 16;
1053 block0[1] = block0[1] | append[0] << 24;
1054 block0[2] = append[0] >> 8 | append[1] << 24;
1055 block0[3] = append[1] >> 8;
1059 block0[2] = append[0];
1060 block0[3] = append[1];
1064 block0[2] = block0[2] | append[0] << 8;
1065 block0[3] = append[0] >> 24 | append[1] << 8;
1066 block1[0] = append[1] >> 24;
1070 block0[2] = block0[2] | append[0] << 16;
1071 block0[3] = append[0] >> 16 | append[1] << 16;
1072 block1[0] = append[1] >> 16;
1076 block0[2] = block0[2] | append[0] << 24;
1077 block0[3] = append[0] >> 8 | append[1] << 24;
1078 block1[0] = append[1] >> 8;
1082 block0[3] = append[0];
1083 block1[0] = append[1];
1087 block0[3] = block0[3] | append[0] << 8;
1088 block1[0] = append[0] >> 24 | append[1] << 8;
1089 block1[1] = append[1] >> 24;
1093 block0[3] = block0[3] | append[0] << 16;
1094 block1[0] = append[0] >> 16 | append[1] << 16;
1095 block1[1] = append[1] >> 16;
1099 block0[3] = block0[3] | append[0] << 24;
1100 block1[0] = append[0] >> 8 | append[1] << 24;
1101 block1[1] = append[1] >> 8;
1105 block1[0] = append[0];
1106 block1[1] = append[1];
1110 block1[0] = block1[0] | append[0] << 8;
1111 block1[1] = append[0] >> 24 | append[1] << 8;
1112 block1[2] = append[1] >> 24;
1116 block1[0] = block1[0] | append[0] << 16;
1117 block1[1] = append[0] >> 16 | append[1] << 16;
1118 block1[2] = append[1] >> 16;
1122 block1[0] = block1[0] | append[0] << 24;
1123 block1[1] = append[0] >> 8 | append[1] << 24;
1124 block1[2] = append[1] >> 8;
1128 block1[1] = append[0];
1129 block1[2] = append[1];
1133 block1[1] = block1[1] | append[0] << 8;
1134 block1[2] = append[0] >> 24 | append[1] << 8;
1135 block1[3] = append[1] >> 24;
1139 block1[1] = block1[1] | append[0] << 16;
1140 block1[2] = append[0] >> 16 | append[1] << 16;
1141 block1[3] = append[1] >> 16;
1145 block1[1] = block1[1] | append[0] << 24;
1146 block1[2] = append[0] >> 8 | append[1] << 24;
1147 block1[3] = append[1] >> 8;
1151 block1[2] = append[0];
1152 block1[3] = append[1];
1156 block1[2] = block1[2] | append[0] << 8;
1157 block1[3] = append[0] >> 24 | append[1] << 8;
1158 block2[0] = append[1] >> 24;
1162 block1[2] = block1[2] | append[0] << 16;
1163 block1[3] = append[0] >> 16 | append[1] << 16;
1164 block2[0] = append[1] >> 16;
1168 block1[2] = block1[2] | append[0] << 24;
1169 block1[3] = append[0] >> 8 | append[1] << 24;
1170 block2[0] = append[1] >> 8;
1174 block1[3] = append[0];
1175 block2[0] = append[1];
1179 block1[3] = block1[3] | append[0] << 8;
1180 block2[0] = append[0] >> 24 | append[1] << 8;
1181 block2[1] = append[1] >> 24;
1185 block1[3] = block1[3] | append[0] << 16;
1186 block2[0] = append[0] >> 16 | append[1] << 16;
1187 block2[1] = append[1] >> 16;
1191 block1[3] = block1[3] | append[0] << 24;
1192 block2[0] = append[0] >> 8 | append[1] << 24;
1193 block2[1] = append[1] >> 8;
1197 block2[0] = append[0];
1198 block2[1] = append[1];
1202 block2[0] = block2[0] | append[0] << 8;
1203 block2[1] = append[0] >> 24 | append[1] << 8;
1204 block2[2] = append[1] >> 24;
1208 block2[0] = block2[0] | append[0] << 16;
1209 block2[1] = append[0] >> 16 | append[1] << 16;
1210 block2[2] = append[1] >> 16;
1214 block2[0] = block2[0] | append[0] << 24;
1215 block2[1] = append[0] >> 8 | append[1] << 24;
1216 block2[2] = append[1] >> 8;
1220 block2[1] = append[0];
1221 block2[2] = append[1];
1225 block2[1] = block2[1] | append[0] << 8;
1226 block2[2] = append[0] >> 24 | append[1] << 8;
1227 block2[3] = append[1] >> 24;
1231 block2[1] = block2[1] | append[0] << 16;
1232 block2[2] = append[0] >> 16 | append[1] << 16;
1233 block2[3] = append[1] >> 16;
1237 block2[1] = block2[1] | append[0] << 24;
1238 block2[2] = append[0] >> 8 | append[1] << 24;
1239 block2[3] = append[1] >> 8;
1243 block2[2] = append[0];
1244 block2[3] = append[1];
1248 block2[2] = block2[2] | append[0] << 8;
1249 block2[3] = append[0] >> 24 | append[1] << 8;
1250 block3[0] = append[1] >> 24;
1254 block2[2] = block2[2] | append[0] << 16;
1255 block2[3] = append[0] >> 16 | append[1] << 16;
1256 block3[0] = append[1] >> 16;
1260 block2[2] = block2[2] | append[0] << 24;
1261 block2[3] = append[0] >> 8 | append[1] << 24;
1262 block3[0] = append[1] >> 8;
1266 block2[3] = append[0];
1267 block3[0] = append[1];
1271 block2[3] = block2[3] | append[0] << 8;
1272 block3[0] = append[0] >> 24 | append[1] << 8;
1273 block3[1] = append[1] >> 24;
1277 block2[3] = block2[3] | append[0] << 16;
1278 block3[0] = append[0] >> 16 | append[1] << 16;
1279 block3[1] = append[1] >> 16;
1283 block2[3] = block2[3] | append[0] << 24;
1284 block3[0] = append[0] >> 8 | append[1] << 24;
1285 block3[1] = append[1] >> 8;
1289 block3[0] = append[0];
1290 block3[1] = append[1];
1294 block3[0] = block3[0] | append[0] << 8;
1295 block3[1] = append[0] >> 24 | append[1] << 8;
1296 block3[2] = append[1] >> 24;
1300 block3[0] = block3[0] | append[0] << 16;
1301 block3[1] = append[0] >> 16 | append[1] << 16;
1302 block3[2] = append[1] >> 16;
1306 block3[0] = block3[0] | append[0] << 24;
1307 block3[1] = append[0] >> 8 | append[1] << 24;
1308 block3[2] = append[1] >> 8;
1312 block3[1] = append[0];
1313 block3[2] = append[1];
1317 block3[1] = block3[1] | append[0] << 8;
1318 block3[2] = append[0] >> 24 | append[1] << 8;
1319 block3[3] = append[1] >> 24;
1323 block3[1] = block3[1] | append[0] << 16;
1324 block3[2] = append[0] >> 16 | append[1] << 16;
1325 block3[3] = append[1] >> 16;
1329 block3[1] = block3[1] | append[0] << 24;
1330 block3[2] = append[0] >> 8 | append[1] << 24;
1331 block3[3] = append[1] >> 8;
1335 block3[2] = append[0];
1336 block3[3] = append[1];
1341 static void append_sign (u32 block0[4], u32 block1[4], const u32 block_len)
1346 block0[0] = md5apr1_magic0;
1347 block0[1] = md5apr1_magic1;
1351 block0[0] = block0[0] | md5apr1_magic0 << 8;
1352 block0[1] = md5apr1_magic0 >> 24 | md5apr1_magic1 << 8;
1353 block0[2] = md5apr1_magic1 >> 24;
1357 block0[0] = block0[0] | md5apr1_magic0 << 16;
1358 block0[1] = md5apr1_magic0 >> 16 | md5apr1_magic1 << 16;
1359 block0[2] = md5apr1_magic1 >> 16;
1363 block0[0] = block0[0] | md5apr1_magic0 << 24;
1364 block0[1] = md5apr1_magic0 >> 8 | md5apr1_magic1 << 24;
1365 block0[2] = md5apr1_magic1 >> 8;
1369 block0[1] = md5apr1_magic0;
1370 block0[2] = md5apr1_magic1;
1374 block0[1] = block0[1] | md5apr1_magic0 << 8;
1375 block0[2] = md5apr1_magic0 >> 24 | md5apr1_magic1 << 8;
1376 block0[3] = md5apr1_magic1 >> 24;
1380 block0[1] = block0[1] | md5apr1_magic0 << 16;
1381 block0[2] = md5apr1_magic0 >> 16 | md5apr1_magic1 << 16;
1382 block0[3] = md5apr1_magic1 >> 16;
1386 block0[1] = block0[1] | md5apr1_magic0 << 24;
1387 block0[2] = md5apr1_magic0 >> 8 | md5apr1_magic1 << 24;
1388 block0[3] = md5apr1_magic1 >> 8;
1392 block0[2] = md5apr1_magic0;
1393 block0[3] = md5apr1_magic1;
1397 block0[2] = block0[2] | md5apr1_magic0 << 8;
1398 block0[3] = md5apr1_magic0 >> 24 | md5apr1_magic1 << 8;
1399 block1[0] = md5apr1_magic1 >> 24;
1403 block0[2] = block0[2] | md5apr1_magic0 << 16;
1404 block0[3] = md5apr1_magic0 >> 16 | md5apr1_magic1 << 16;
1405 block1[0] = md5apr1_magic1 >> 16;
1409 block0[2] = block0[2] | md5apr1_magic0 << 24;
1410 block0[3] = md5apr1_magic0 >> 8 | md5apr1_magic1 << 24;
1411 block1[0] = md5apr1_magic1 >> 8;
1415 block0[3] = md5apr1_magic0;
1416 block1[0] = md5apr1_magic1;
1420 block0[3] = block0[3] | md5apr1_magic0 << 8;
1421 block1[0] = md5apr1_magic0 >> 24 | md5apr1_magic1 << 8;
1422 block1[1] = md5apr1_magic1 >> 24;
1426 block0[3] = block0[3] | md5apr1_magic0 << 16;
1427 block1[0] = md5apr1_magic0 >> 16 | md5apr1_magic1 << 16;
1428 block1[1] = md5apr1_magic1 >> 16;
1432 block0[3] = block0[3] | md5apr1_magic0 << 24;
1433 block1[0] = md5apr1_magic0 >> 8 | md5apr1_magic1 << 24;
1434 block1[1] = md5apr1_magic1 >> 8;
1439 static void append_1st (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append)
1448 block0[0] = block0[0] | append << 8;
1452 block0[0] = block0[0] | append << 16;
1456 block0[0] = block0[0] | append << 24;
1464 block0[1] = block0[1] | append << 8;
1468 block0[1] = block0[1] | append << 16;
1472 block0[1] = block0[1] | append << 24;
1480 block0[2] = block0[2] | append << 8;
1484 block0[2] = block0[2] | append << 16;
1488 block0[2] = block0[2] | append << 24;
1496 block0[3] = block0[3] | append << 8;
1500 block0[3] = block0[3] | append << 16;
1504 block0[3] = block0[3] | append << 24;
1512 block1[0] = block1[0] | append << 8;
1516 block1[0] = block1[0] | append << 16;
1520 block1[0] = block1[0] | append << 24;
1528 block1[1] = block1[1] | append << 8;
1532 block1[1] = block1[1] | append << 16;
1536 block1[1] = block1[1] | append << 24;
1544 block1[2] = block1[2] | append << 8;
1548 block1[2] = block1[2] | append << 16;
1552 block1[2] = block1[2] | append << 24;
1560 block1[3] = block1[3] | append << 8;
1564 block1[3] = block1[3] | append << 16;
1568 block1[3] = block1[3] | append << 24;
1576 block2[0] = block2[0] | append << 8;
1580 block2[0] = block2[0] | append << 16;
1584 block2[0] = block2[0] | append << 24;
1592 block2[1] = block2[1] | append << 8;
1596 block2[1] = block2[1] | append << 16;
1600 block2[1] = block2[1] | append << 24;
1608 block2[2] = block2[2] | append << 8;
1612 block2[2] = block2[2] | append << 16;
1616 block2[2] = block2[2] | append << 24;
1624 block2[3] = block2[3] | append << 8;
1628 block2[3] = block2[3] | append << 16;
1632 block2[3] = block2[3] | append << 24;
1640 block3[0] = block3[0] | append << 8;
1644 block3[0] = block3[0] | append << 16;
1648 block3[0] = block3[0] | append << 24;
1656 block3[1] = block3[1] | append << 8;
1660 block3[1] = block3[1] | append << 16;
1664 block3[1] = block3[1] | append << 24;
1673 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01600_init (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global md5crypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1679 const u32 gid = get_global_id (0);
1681 if (gid >= gid_max) return;
1685 w0[0] = pws[gid].i[0];
1686 w0[1] = pws[gid].i[1];
1687 w0[2] = pws[gid].i[2];
1688 w0[3] = pws[gid].i[3];
1690 const u32 pw_len = pws[gid].pw_len;
1698 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
1699 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
1701 const u32 salt_len = salt_bufs[salt_pos].salt_len;
1707 //memcat16 (block0, block1, block2, block3, block_len, w0);
1708 //block_len += pw_len;
1710 u32 block_len = pw_len;
1740 memcat8 (block0, block1, block2, block3, block_len, salt_buf);
1742 block_len += salt_len;
1744 memcat16 (block0, block1, block2, block3, block_len, w0);
1746 block_len += pw_len;
1748 append_0x80_4x4 (block0, block1, block2, block3, block_len);
1750 block3[2] = block_len * 8;
1759 md5_transform (block0, block1, block2, block3, digest);
1761 /* The password first, since that is what is most unknown */
1762 /* Then our magic string */
1763 /* Then the raw salt */
1764 /* Then just as many characters of the MD5(pw,salt,pw) */
1766 //memcat16 (block0, block1, block2, block3, block_len, w);
1767 //block_len += pw_len;
1791 append_sign (block0, block1, block_len);
1795 memcat8 (block0, block1, block2, block3, block_len, salt_buf);
1797 block_len += salt_len;
1799 truncate_block (digest, pw_len);
1801 memcat16 (block0, block1, block2, block3, block_len, digest);
1803 block_len += pw_len;
1805 /* Then something really weird... */
1807 u32 append = block0[0] & 0xFF;
1809 for (u32 j = pw_len; j; j >>= 1)
1813 append_1st (block0, block1, block2, block3, block_len, append);
1819 append_0x80_4x4 (block0, block1, block2, block3, block_len);
1821 block3[2] = block_len * 8;
1828 md5_transform (block0, block1, block2, block3, digest);
1830 tmps[gid].digest_buf[0] = digest[0];
1831 tmps[gid].digest_buf[1] = digest[1];
1832 tmps[gid].digest_buf[2] = digest[2];
1833 tmps[gid].digest_buf[3] = digest[3];
1836 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01600_loop (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global md5crypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1842 const u32 gid = get_global_id (0);
1844 if (gid >= gid_max) return;
1848 w0[0] = pws[gid].i[0];
1849 w0[1] = pws[gid].i[1];
1850 w0[2] = pws[gid].i[2];
1851 w0[3] = pws[gid].i[3];
1853 const u32 pw_len = pws[gid].pw_len;
1862 append_0x80_1x4 (w0_x80, pw_len);
1870 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
1871 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
1873 const u32 salt_len = salt_bufs[salt_pos].salt_len;
1881 digest[0] = tmps[gid].digest_buf[0];
1882 digest[1] = tmps[gid].digest_buf[1];
1883 digest[2] = tmps[gid].digest_buf[2];
1884 digest[3] = tmps[gid].digest_buf[3];
1890 /* and now, just to make sure things don't run too fast */
1922 for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++)
1935 const u32 j1 = (j & 1) ? 1 : 0;
1936 const u32 j3 = (j % 3) ? 1 : 0;
1937 const u32 j7 = (j % 7) ? 1 : 0;
1950 memcat8 (block0, block1, block2, block3, block_len, salt_buf);
1952 block_len += salt_len;
1957 memcat16 (block0, block1, block2, block3, block_len, w0);
1959 block_len += pw_len;
1962 memcat16_x80 (block0, block1, block2, block3, block_len, digest);
1968 block0[0] = digest[0];
1969 block0[1] = digest[1];
1970 block0[2] = digest[2];
1971 block0[3] = digest[3];
1977 block1[0] = salt_buf[0];
1978 block1[1] = salt_buf[1];
1980 block_len += salt_len;
1982 memcat16 (block0, block1, block2, block3, block_len, w0);
1984 block_len += pw_len;
1988 block1[0] = salt_buf[0];
1989 block1[1] = salt_buf[1];
1991 block_len += salt_len;
2000 block_len += pw_len;
2003 memcat16 (block0, block1, block2, block3, block_len, w0_x80);
2005 block_len += pw_len;
2008 block3[2] = block_len * 8;
2015 md5_transform (block0, block1, block2, block3, digest);
2018 tmps[gid].digest_buf[0] = digest[0];
2019 tmps[gid].digest_buf[1] = digest[1];
2020 tmps[gid].digest_buf[2] = digest[2];
2021 tmps[gid].digest_buf[3] = digest[3];
2024 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01600_comp (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global md5crypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2030 const u32 gid = get_global_id (0);
2032 if (gid >= gid_max) return;
2034 const u32 lid = get_local_id (0);
2040 const u32 r0 = tmps[gid].digest_buf[DGST_R0];
2041 const u32 r1 = tmps[gid].digest_buf[DGST_R1];
2042 const u32 r2 = tmps[gid].digest_buf[DGST_R2];
2043 const u32 r3 = tmps[gid].digest_buf[DGST_R3];