2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
28 #include "include/kernel_functions.c"
29 #include "types_amd.c"
30 #include "common_amd.c"
33 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
37 #define VECT_COMPARE_M "check_multi_vect4_comp4.c"
40 #define md5apr1_magic0 0x72706124
41 #define md5apr1_magic1 0x00002431
43 static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4])
69 MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
70 MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
71 MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02);
72 MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03);
73 MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00);
74 MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01);
75 MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02);
76 MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03);
77 MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00);
78 MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01);
79 MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02);
80 MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03);
81 MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00);
82 MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01);
83 MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02);
84 MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03);
86 MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10);
87 MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11);
88 MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12);
89 MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13);
90 MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10);
91 MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11);
92 MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12);
93 MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13);
94 MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10);
95 MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11);
96 MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12);
97 MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13);
98 MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10);
99 MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11);
100 MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12);
101 MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13);
103 MD5_STEP (MD5_H1, a, b, c, d, w5_t, MD5C20, MD5S20);
104 MD5_STEP (MD5_H2, d, a, b, c, w8_t, MD5C21, MD5S21);
105 MD5_STEP (MD5_H1, c, d, a, b, wb_t, MD5C22, MD5S22);
106 MD5_STEP (MD5_H2, b, c, d, a, we_t, MD5C23, MD5S23);
107 MD5_STEP (MD5_H1, a, b, c, d, w1_t, MD5C24, MD5S20);
108 MD5_STEP (MD5_H2, d, a, b, c, w4_t, MD5C25, MD5S21);
109 MD5_STEP (MD5_H1, c, d, a, b, w7_t, MD5C26, MD5S22);
110 MD5_STEP (MD5_H2, b, c, d, a, wa_t, MD5C27, MD5S23);
111 MD5_STEP (MD5_H1, a, b, c, d, wd_t, MD5C28, MD5S20);
112 MD5_STEP (MD5_H2, d, a, b, c, w0_t, MD5C29, MD5S21);
113 MD5_STEP (MD5_H1, c, d, a, b, w3_t, MD5C2a, MD5S22);
114 MD5_STEP (MD5_H2, b, c, d, a, w6_t, MD5C2b, MD5S23);
115 MD5_STEP (MD5_H1, a, b, c, d, w9_t, MD5C2c, MD5S20);
116 MD5_STEP (MD5_H2, d, a, b, c, wc_t, MD5C2d, MD5S21);
117 MD5_STEP (MD5_H1, c, d, a, b, wf_t, MD5C2e, MD5S22);
118 MD5_STEP (MD5_H2, b, c, d, a, w2_t, MD5C2f, MD5S23);
120 MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30);
121 MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31);
122 MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32);
123 MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33);
124 MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30);
125 MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31);
126 MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32);
127 MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33);
128 MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30);
129 MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31);
130 MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32);
131 MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33);
132 MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30);
133 MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
134 MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
135 MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
143 static void memcat16 (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3[4], const u32 block_len, const u32x append[4])
148 block0[0] = append[0];
149 block0[1] = append[1];
150 block0[2] = append[2];
151 block0[3] = append[3];
155 block0[0] = block0[0] | append[0] << 8;
156 block0[1] = append[0] >> 24 | append[1] << 8;
157 block0[2] = append[1] >> 24 | append[2] << 8;
158 block0[3] = append[2] >> 24 | append[3] << 8;
159 block1[0] = append[3] >> 24;
163 block0[0] = block0[0] | append[0] << 16;
164 block0[1] = append[0] >> 16 | append[1] << 16;
165 block0[2] = append[1] >> 16 | append[2] << 16;
166 block0[3] = append[2] >> 16 | append[3] << 16;
167 block1[0] = append[3] >> 16;
171 block0[0] = block0[0] | append[0] << 24;
172 block0[1] = append[0] >> 8 | append[1] << 24;
173 block0[2] = append[1] >> 8 | append[2] << 24;
174 block0[3] = append[2] >> 8 | append[3] << 24;
175 block1[0] = append[3] >> 8;
179 block0[1] = append[0];
180 block0[2] = append[1];
181 block0[3] = append[2];
182 block1[0] = append[3];
186 block0[1] = block0[1] | append[0] << 8;
187 block0[2] = append[0] >> 24 | append[1] << 8;
188 block0[3] = append[1] >> 24 | append[2] << 8;
189 block1[0] = append[2] >> 24 | append[3] << 8;
190 block1[1] = append[3] >> 24;
194 block0[1] = block0[1] | append[0] << 16;
195 block0[2] = append[0] >> 16 | append[1] << 16;
196 block0[3] = append[1] >> 16 | append[2] << 16;
197 block1[0] = append[2] >> 16 | append[3] << 16;
198 block1[1] = append[3] >> 16;
202 block0[1] = block0[1] | append[0] << 24;
203 block0[2] = append[0] >> 8 | append[1] << 24;
204 block0[3] = append[1] >> 8 | append[2] << 24;
205 block1[0] = append[2] >> 8 | append[3] << 24;
206 block1[1] = append[3] >> 8;
210 block0[2] = append[0];
211 block0[3] = append[1];
212 block1[0] = append[2];
213 block1[1] = append[3];
217 block0[2] = block0[2] | append[0] << 8;
218 block0[3] = append[0] >> 24 | append[1] << 8;
219 block1[0] = append[1] >> 24 | append[2] << 8;
220 block1[1] = append[2] >> 24 | append[3] << 8;
221 block1[2] = append[3] >> 24;
225 block0[2] = block0[2] | append[0] << 16;
226 block0[3] = append[0] >> 16 | append[1] << 16;
227 block1[0] = append[1] >> 16 | append[2] << 16;
228 block1[1] = append[2] >> 16 | append[3] << 16;
229 block1[2] = append[3] >> 16;
233 block0[2] = block0[2] | append[0] << 24;
234 block0[3] = append[0] >> 8 | append[1] << 24;
235 block1[0] = append[1] >> 8 | append[2] << 24;
236 block1[1] = append[2] >> 8 | append[3] << 24;
237 block1[2] = append[3] >> 8;
241 block0[3] = append[0];
242 block1[0] = append[1];
243 block1[1] = append[2];
244 block1[2] = append[3];
248 block0[3] = block0[3] | append[0] << 8;
249 block1[0] = append[0] >> 24 | append[1] << 8;
250 block1[1] = append[1] >> 24 | append[2] << 8;
251 block1[2] = append[2] >> 24 | append[3] << 8;
252 block1[3] = append[3] >> 24;
256 block0[3] = block0[3] | append[0] << 16;
257 block1[0] = append[0] >> 16 | append[1] << 16;
258 block1[1] = append[1] >> 16 | append[2] << 16;
259 block1[2] = append[2] >> 16 | append[3] << 16;
260 block1[3] = append[3] >> 16;
264 block0[3] = block0[3] | append[0] << 24;
265 block1[0] = append[0] >> 8 | append[1] << 24;
266 block1[1] = append[1] >> 8 | append[2] << 24;
267 block1[2] = append[2] >> 8 | append[3] << 24;
268 block1[3] = append[3] >> 8;
272 block1[0] = append[0];
273 block1[1] = append[1];
274 block1[2] = append[2];
275 block1[3] = append[3];
279 block1[0] = block1[0] | append[0] << 8;
280 block1[1] = append[0] >> 24 | append[1] << 8;
281 block1[2] = append[1] >> 24 | append[2] << 8;
282 block1[3] = append[2] >> 24 | append[3] << 8;
283 block2[0] = append[3] >> 24;
287 block1[0] = block1[0] | append[0] << 16;
288 block1[1] = append[0] >> 16 | append[1] << 16;
289 block1[2] = append[1] >> 16 | append[2] << 16;
290 block1[3] = append[2] >> 16 | append[3] << 16;
291 block2[0] = append[3] >> 16;
295 block1[0] = block1[0] | append[0] << 24;
296 block1[1] = append[0] >> 8 | append[1] << 24;
297 block1[2] = append[1] >> 8 | append[2] << 24;
298 block1[3] = append[2] >> 8 | append[3] << 24;
299 block2[0] = append[3] >> 8;
303 block1[1] = append[0];
304 block1[2] = append[1];
305 block1[3] = append[2];
306 block2[0] = append[3];
310 block1[1] = block1[1] | append[0] << 8;
311 block1[2] = append[0] >> 24 | append[1] << 8;
312 block1[3] = append[1] >> 24 | append[2] << 8;
313 block2[0] = append[2] >> 24 | append[3] << 8;
314 block2[1] = append[3] >> 24;
318 block1[1] = block1[1] | append[0] << 16;
319 block1[2] = append[0] >> 16 | append[1] << 16;
320 block1[3] = append[1] >> 16 | append[2] << 16;
321 block2[0] = append[2] >> 16 | append[3] << 16;
322 block2[1] = append[3] >> 16;
326 block1[1] = block1[1] | append[0] << 24;
327 block1[2] = append[0] >> 8 | append[1] << 24;
328 block1[3] = append[1] >> 8 | append[2] << 24;
329 block2[0] = append[2] >> 8 | append[3] << 24;
330 block2[1] = append[3] >> 8;
334 block1[2] = append[0];
335 block1[3] = append[1];
336 block2[0] = append[2];
337 block2[1] = append[3];
341 block1[2] = block1[2] | append[0] << 8;
342 block1[3] = append[0] >> 24 | append[1] << 8;
343 block2[0] = append[1] >> 24 | append[2] << 8;
344 block2[1] = append[2] >> 24 | append[3] << 8;
345 block2[2] = append[3] >> 24;
349 block1[2] = block1[2] | append[0] << 16;
350 block1[3] = append[0] >> 16 | append[1] << 16;
351 block2[0] = append[1] >> 16 | append[2] << 16;
352 block2[1] = append[2] >> 16 | append[3] << 16;
353 block2[2] = append[3] >> 16;
357 block1[2] = block1[2] | append[0] << 24;
358 block1[3] = append[0] >> 8 | append[1] << 24;
359 block2[0] = append[1] >> 8 | append[2] << 24;
360 block2[1] = append[2] >> 8 | append[3] << 24;
361 block2[2] = append[3] >> 8;
365 block1[3] = append[0];
366 block2[0] = append[1];
367 block2[1] = append[2];
368 block2[2] = append[3];
372 block1[3] = block1[3] | append[0] << 8;
373 block2[0] = append[0] >> 24 | append[1] << 8;
374 block2[1] = append[1] >> 24 | append[2] << 8;
375 block2[2] = append[2] >> 24 | append[3] << 8;
376 block2[3] = append[3] >> 24;
380 block1[3] = block1[3] | append[0] << 16;
381 block2[0] = append[0] >> 16 | append[1] << 16;
382 block2[1] = append[1] >> 16 | append[2] << 16;
383 block2[2] = append[2] >> 16 | append[3] << 16;
384 block2[3] = append[3] >> 16;
388 block1[3] = block1[3] | append[0] << 24;
389 block2[0] = append[0] >> 8 | append[1] << 24;
390 block2[1] = append[1] >> 8 | append[2] << 24;
391 block2[2] = append[2] >> 8 | append[3] << 24;
392 block2[3] = append[3] >> 8;
396 block2[0] = append[0];
397 block2[1] = append[1];
398 block2[2] = append[2];
399 block2[3] = append[3];
403 block2[0] = block2[0] | append[0] << 8;
404 block2[1] = append[0] >> 24 | append[1] << 8;
405 block2[2] = append[1] >> 24 | append[2] << 8;
406 block2[3] = append[2] >> 24 | append[3] << 8;
407 block3[0] = append[3] >> 24;
411 block2[0] = block2[0] | append[0] << 16;
412 block2[1] = append[0] >> 16 | append[1] << 16;
413 block2[2] = append[1] >> 16 | append[2] << 16;
414 block2[3] = append[2] >> 16 | append[3] << 16;
415 block3[0] = append[3] >> 16;
419 block2[0] = block2[0] | append[0] << 24;
420 block2[1] = append[0] >> 8 | append[1] << 24;
421 block2[2] = append[1] >> 8 | append[2] << 24;
422 block2[3] = append[2] >> 8 | append[3] << 24;
423 block3[0] = append[3] >> 8;
427 block2[1] = append[0];
428 block2[2] = append[1];
429 block2[3] = append[2];
430 block3[0] = append[3];
434 block2[1] = block2[1] | append[0] << 8;
435 block2[2] = append[0] >> 24 | append[1] << 8;
436 block2[3] = append[1] >> 24 | append[2] << 8;
437 block3[0] = append[2] >> 24 | append[3] << 8;
438 block3[1] = append[3] >> 24;
442 block2[1] = block2[1] | append[0] << 16;
443 block2[2] = append[0] >> 16 | append[1] << 16;
444 block2[3] = append[1] >> 16 | append[2] << 16;
445 block3[0] = append[2] >> 16 | append[3] << 16;
446 block3[1] = append[3] >> 16;
450 block2[1] = block2[1] | append[0] << 24;
451 block2[2] = append[0] >> 8 | append[1] << 24;
452 block2[3] = append[1] >> 8 | append[2] << 24;
453 block3[0] = append[2] >> 8 | append[3] << 24;
454 block3[1] = append[3] >> 8;
458 block2[2] = append[0];
459 block2[3] = append[1];
460 block3[0] = append[2];
461 block3[1] = append[3];
465 block2[2] = block2[2] | append[0] << 8;
466 block2[3] = append[0] >> 24 | append[1] << 8;
467 block3[0] = append[1] >> 24 | append[2] << 8;
468 block3[1] = append[2] >> 24 | append[3] << 8;
469 block3[2] = append[3] >> 24;
473 block2[2] = block2[2] | append[0] << 16;
474 block2[3] = append[0] >> 16 | append[1] << 16;
475 block3[0] = append[1] >> 16 | append[2] << 16;
476 block3[1] = append[2] >> 16 | append[3] << 16;
477 block3[2] = append[3] >> 16;
481 block2[2] = block2[2] | append[0] << 24;
482 block2[3] = append[0] >> 8 | append[1] << 24;
483 block3[0] = append[1] >> 8 | append[2] << 24;
484 block3[1] = append[2] >> 8 | append[3] << 24;
485 block3[2] = append[3] >> 8;
489 block2[3] = append[0];
490 block3[0] = append[1];
491 block3[1] = append[2];
492 block3[2] = append[3];
496 block2[3] = block2[3] | append[0] << 8;
497 block3[0] = append[0] >> 24 | append[1] << 8;
498 block3[1] = append[1] >> 24 | append[2] << 8;
499 block3[2] = append[2] >> 24 | append[3] << 8;
500 block3[3] = append[3] >> 24;
504 block2[3] = block2[3] | append[0] << 16;
505 block3[0] = append[0] >> 16 | append[1] << 16;
506 block3[1] = append[1] >> 16 | append[2] << 16;
507 block3[2] = append[2] >> 16 | append[3] << 16;
508 block3[3] = append[3] >> 16;
512 block2[3] = block2[3] | append[0] << 24;
513 block3[0] = append[0] >> 8 | append[1] << 24;
514 block3[1] = append[1] >> 8 | append[2] << 24;
515 block3[2] = append[2] >> 8 | append[3] << 24;
516 block3[3] = append[3] >> 8;
520 block3[0] = append[0];
521 block3[1] = append[1];
522 block3[2] = append[2];
523 block3[3] = append[3];
527 block3[0] = block3[0] | append[0] << 8;
528 block3[1] = append[0] >> 24 | append[1] << 8;
529 block3[2] = append[1] >> 24 | append[2] << 8;
530 block3[3] = append[2] >> 24 | append[3] << 8;
534 block3[0] = block3[0] | append[0] << 16;
535 block3[1] = append[0] >> 16 | append[1] << 16;
536 block3[2] = append[1] >> 16 | append[2] << 16;
537 block3[3] = append[2] >> 16 | append[3] << 16;
541 block3[0] = block3[0] | append[0] << 24;
542 block3[1] = append[0] >> 8 | append[1] << 24;
543 block3[2] = append[1] >> 8 | append[2] << 24;
544 block3[3] = append[2] >> 8 | append[3] << 24;
548 block3[1] = append[0];
549 block3[2] = append[1];
550 block3[3] = append[2];
554 block3[1] = block3[1] | append[0] << 8;
555 block3[2] = append[0] >> 24 | append[1] << 8;
556 block3[3] = append[1] >> 24 | append[2] << 8;
560 block3[1] = block3[1] | append[0] << 16;
561 block3[2] = append[0] >> 16 | append[1] << 16;
562 block3[3] = append[1] >> 16 | append[2] << 16;
566 block3[1] = block3[1] | append[0] << 24;
567 block3[2] = append[0] >> 8 | append[1] << 24;
568 block3[3] = append[1] >> 8 | append[2] << 24;
572 block3[2] = append[0];
573 block3[3] = append[1];
578 static void memcat16_x80 (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3[4], const u32 block_len, const u32x append[4])
583 block0[0] = append[0];
584 block0[1] = append[1];
585 block0[2] = append[2];
586 block0[3] = append[3];
591 block0[0] = block0[0] | append[0] << 8;
592 block0[1] = append[0] >> 24 | append[1] << 8;
593 block0[2] = append[1] >> 24 | append[2] << 8;
594 block0[3] = append[2] >> 24 | append[3] << 8;
595 block1[0] = append[3] >> 24 | 0x80u << 8;
599 block0[0] = block0[0] | append[0] << 16;
600 block0[1] = append[0] >> 16 | append[1] << 16;
601 block0[2] = append[1] >> 16 | append[2] << 16;
602 block0[3] = append[2] >> 16 | append[3] << 16;
603 block1[0] = append[3] >> 16 | 0x80u << 16;
607 block0[0] = block0[0] | append[0] << 24;
608 block0[1] = append[0] >> 8 | append[1] << 24;
609 block0[2] = append[1] >> 8 | append[2] << 24;
610 block0[3] = append[2] >> 8 | append[3] << 24;
611 block1[0] = append[3] >> 8 | 0x80u << 24;
615 block0[1] = append[0];
616 block0[2] = append[1];
617 block0[3] = append[2];
618 block1[0] = append[3];
623 block0[1] = block0[1] | append[0] << 8;
624 block0[2] = append[0] >> 24 | append[1] << 8;
625 block0[3] = append[1] >> 24 | append[2] << 8;
626 block1[0] = append[2] >> 24 | append[3] << 8;
627 block1[1] = append[3] >> 24 | 0x80u << 8;
631 block0[1] = block0[1] | append[0] << 16;
632 block0[2] = append[0] >> 16 | append[1] << 16;
633 block0[3] = append[1] >> 16 | append[2] << 16;
634 block1[0] = append[2] >> 16 | append[3] << 16;
635 block1[1] = append[3] >> 16 | 0x80u << 16;
639 block0[1] = block0[1] | append[0] << 24;
640 block0[2] = append[0] >> 8 | append[1] << 24;
641 block0[3] = append[1] >> 8 | append[2] << 24;
642 block1[0] = append[2] >> 8 | append[3] << 24;
643 block1[1] = append[3] >> 8 | 0x80u << 24;
647 block0[2] = append[0];
648 block0[3] = append[1];
649 block1[0] = append[2];
650 block1[1] = append[3];
655 block0[2] = block0[2] | append[0] << 8;
656 block0[3] = append[0] >> 24 | append[1] << 8;
657 block1[0] = append[1] >> 24 | append[2] << 8;
658 block1[1] = append[2] >> 24 | append[3] << 8;
659 block1[2] = append[3] >> 24 | 0x80u << 8;
663 block0[2] = block0[2] | append[0] << 16;
664 block0[3] = append[0] >> 16 | append[1] << 16;
665 block1[0] = append[1] >> 16 | append[2] << 16;
666 block1[1] = append[2] >> 16 | append[3] << 16;
667 block1[2] = append[3] >> 16 | 0x80u << 16;
671 block0[2] = block0[2] | append[0] << 24;
672 block0[3] = append[0] >> 8 | append[1] << 24;
673 block1[0] = append[1] >> 8 | append[2] << 24;
674 block1[1] = append[2] >> 8 | append[3] << 24;
675 block1[2] = append[3] >> 8 | 0x80u << 24;
679 block0[3] = append[0];
680 block1[0] = append[1];
681 block1[1] = append[2];
682 block1[2] = append[3];
687 block0[3] = block0[3] | append[0] << 8;
688 block1[0] = append[0] >> 24 | append[1] << 8;
689 block1[1] = append[1] >> 24 | append[2] << 8;
690 block1[2] = append[2] >> 24 | append[3] << 8;
691 block1[3] = append[3] >> 24 | 0x80u << 8;
695 block0[3] = block0[3] | append[0] << 16;
696 block1[0] = append[0] >> 16 | append[1] << 16;
697 block1[1] = append[1] >> 16 | append[2] << 16;
698 block1[2] = append[2] >> 16 | append[3] << 16;
699 block1[3] = append[3] >> 16 | 0x80u << 16;
703 block0[3] = block0[3] | append[0] << 24;
704 block1[0] = append[0] >> 8 | append[1] << 24;
705 block1[1] = append[1] >> 8 | append[2] << 24;
706 block1[2] = append[2] >> 8 | append[3] << 24;
707 block1[3] = append[3] >> 8 | 0x80u << 24;
711 block1[0] = append[0];
712 block1[1] = append[1];
713 block1[2] = append[2];
714 block1[3] = append[3];
719 block1[0] = block1[0] | append[0] << 8;
720 block1[1] = append[0] >> 24 | append[1] << 8;
721 block1[2] = append[1] >> 24 | append[2] << 8;
722 block1[3] = append[2] >> 24 | append[3] << 8;
723 block2[0] = append[3] >> 24 | 0x80u << 8;
727 block1[0] = block1[0] | append[0] << 16;
728 block1[1] = append[0] >> 16 | append[1] << 16;
729 block1[2] = append[1] >> 16 | append[2] << 16;
730 block1[3] = append[2] >> 16 | append[3] << 16;
731 block2[0] = append[3] >> 16 | 0x80u << 16;
735 block1[0] = block1[0] | append[0] << 24;
736 block1[1] = append[0] >> 8 | append[1] << 24;
737 block1[2] = append[1] >> 8 | append[2] << 24;
738 block1[3] = append[2] >> 8 | append[3] << 24;
739 block2[0] = append[3] >> 8 | 0x80u << 24;
743 block1[1] = append[0];
744 block1[2] = append[1];
745 block1[3] = append[2];
746 block2[0] = append[3];
751 block1[1] = block1[1] | append[0] << 8;
752 block1[2] = append[0] >> 24 | append[1] << 8;
753 block1[3] = append[1] >> 24 | append[2] << 8;
754 block2[0] = append[2] >> 24 | append[3] << 8;
755 block2[1] = append[3] >> 24 | 0x80u << 8;
759 block1[1] = block1[1] | append[0] << 16;
760 block1[2] = append[0] >> 16 | append[1] << 16;
761 block1[3] = append[1] >> 16 | append[2] << 16;
762 block2[0] = append[2] >> 16 | append[3] << 16;
763 block2[1] = append[3] >> 16 | 0x80u << 16;
767 block1[1] = block1[1] | append[0] << 24;
768 block1[2] = append[0] >> 8 | append[1] << 24;
769 block1[3] = append[1] >> 8 | append[2] << 24;
770 block2[0] = append[2] >> 8 | append[3] << 24;
771 block2[1] = append[3] >> 8 | 0x80u << 24;
775 block1[2] = append[0];
776 block1[3] = append[1];
777 block2[0] = append[2];
778 block2[1] = append[3];
783 block1[2] = block1[2] | append[0] << 8;
784 block1[3] = append[0] >> 24 | append[1] << 8;
785 block2[0] = append[1] >> 24 | append[2] << 8;
786 block2[1] = append[2] >> 24 | append[3] << 8;
787 block2[2] = append[3] >> 24 | 0x80u << 8;
791 block1[2] = block1[2] | append[0] << 16;
792 block1[3] = append[0] >> 16 | append[1] << 16;
793 block2[0] = append[1] >> 16 | append[2] << 16;
794 block2[1] = append[2] >> 16 | append[3] << 16;
795 block2[2] = append[3] >> 16 | 0x80u << 16;
799 block1[2] = block1[2] | append[0] << 24;
800 block1[3] = append[0] >> 8 | append[1] << 24;
801 block2[0] = append[1] >> 8 | append[2] << 24;
802 block2[1] = append[2] >> 8 | append[3] << 24;
803 block2[2] = append[3] >> 8 | 0x80u << 24;
807 block1[3] = append[0];
808 block2[0] = append[1];
809 block2[1] = append[2];
810 block2[2] = append[3];
815 block1[3] = block1[3] | append[0] << 8;
816 block2[0] = append[0] >> 24 | append[1] << 8;
817 block2[1] = append[1] >> 24 | append[2] << 8;
818 block2[2] = append[2] >> 24 | append[3] << 8;
819 block2[3] = append[3] >> 24 | 0x80u << 8;
823 block1[3] = block1[3] | append[0] << 16;
824 block2[0] = append[0] >> 16 | append[1] << 16;
825 block2[1] = append[1] >> 16 | append[2] << 16;
826 block2[2] = append[2] >> 16 | append[3] << 16;
827 block2[3] = append[3] >> 16 | 0x80u << 16;
831 block1[3] = block1[3] | append[0] << 24;
832 block2[0] = append[0] >> 8 | append[1] << 24;
833 block2[1] = append[1] >> 8 | append[2] << 24;
834 block2[2] = append[2] >> 8 | append[3] << 24;
835 block2[3] = append[3] >> 8 | 0x80u << 24;
839 block2[0] = append[0];
840 block2[1] = append[1];
841 block2[2] = append[2];
842 block2[3] = append[3];
847 block2[0] = block2[0] | append[0] << 8;
848 block2[1] = append[0] >> 24 | append[1] << 8;
849 block2[2] = append[1] >> 24 | append[2] << 8;
850 block2[3] = append[2] >> 24 | append[3] << 8;
851 block3[0] = append[3] >> 24 | 0x80u << 8;
855 block2[0] = block2[0] | append[0] << 16;
856 block2[1] = append[0] >> 16 | append[1] << 16;
857 block2[2] = append[1] >> 16 | append[2] << 16;
858 block2[3] = append[2] >> 16 | append[3] << 16;
859 block3[0] = append[3] >> 16 | 0x80u << 16;
863 block2[0] = block2[0] | append[0] << 24;
864 block2[1] = append[0] >> 8 | append[1] << 24;
865 block2[2] = append[1] >> 8 | append[2] << 24;
866 block2[3] = append[2] >> 8 | append[3] << 24;
867 block3[0] = append[3] >> 8 | 0x80u << 24;
871 block2[1] = append[0];
872 block2[2] = append[1];
873 block2[3] = append[2];
874 block3[0] = append[3];
879 block2[1] = block2[1] | append[0] << 8;
880 block2[2] = append[0] >> 24 | append[1] << 8;
881 block2[3] = append[1] >> 24 | append[2] << 8;
882 block3[0] = append[2] >> 24 | append[3] << 8;
883 block3[1] = append[3] >> 24 | 0x80u << 8;
887 block2[1] = block2[1] | append[0] << 16;
888 block2[2] = append[0] >> 16 | append[1] << 16;
889 block2[3] = append[1] >> 16 | append[2] << 16;
890 block3[0] = append[2] >> 16 | append[3] << 16;
891 block3[1] = append[3] >> 16 | 0x80u << 16;
895 block2[1] = block2[1] | append[0] << 24;
896 block2[2] = append[0] >> 8 | append[1] << 24;
897 block2[3] = append[1] >> 8 | append[2] << 24;
898 block3[0] = append[2] >> 8 | append[3] << 24;
899 block3[1] = append[3] >> 8 | 0x80u << 24;
903 block2[2] = append[0];
904 block2[3] = append[1];
905 block3[0] = append[2];
906 block3[1] = append[3];
911 block2[2] = block2[2] | append[0] << 8;
912 block2[3] = append[0] >> 24 | append[1] << 8;
913 block3[0] = append[1] >> 24 | append[2] << 8;
914 block3[1] = append[2] >> 24 | append[3] << 8;
915 block3[2] = append[3] >> 24 | 0x80u << 8;
919 block2[2] = block2[2] | append[0] << 16;
920 block2[3] = append[0] >> 16 | append[1] << 16;
921 block3[0] = append[1] >> 16 | append[2] << 16;
922 block3[1] = append[2] >> 16 | append[3] << 16;
923 block3[2] = append[3] >> 16 | 0x80u << 16;
927 block2[2] = block2[2] | append[0] << 24;
928 block2[3] = append[0] >> 8 | append[1] << 24;
929 block3[0] = append[1] >> 8 | append[2] << 24;
930 block3[1] = append[2] >> 8 | append[3] << 24;
931 block3[2] = append[3] >> 8 | 0x80u << 24;
935 block2[3] = append[0];
936 block3[0] = append[1];
937 block3[1] = append[2];
938 block3[2] = append[3];
943 block2[3] = block2[3] | append[0] << 8;
944 block3[0] = append[0] >> 24 | append[1] << 8;
945 block3[1] = append[1] >> 24 | append[2] << 8;
946 block3[2] = append[2] >> 24 | append[3] << 8;
947 block3[3] = append[3] >> 24 | 0x80u << 8;
951 block2[3] = block2[3] | append[0] << 16;
952 block3[0] = append[0] >> 16 | append[1] << 16;
953 block3[1] = append[1] >> 16 | append[2] << 16;
954 block3[2] = append[2] >> 16 | append[3] << 16;
955 block3[3] = append[3] >> 16 | 0x80u << 16;
959 block2[3] = block2[3] | append[0] << 24;
960 block3[0] = append[0] >> 8 | append[1] << 24;
961 block3[1] = append[1] >> 8 | append[2] << 24;
962 block3[2] = append[2] >> 8 | append[3] << 24;
963 block3[3] = append[3] >> 8 | 0x80u << 24;
967 block3[0] = append[0];
968 block3[1] = append[1];
969 block3[2] = append[2];
970 block3[3] = append[3];
974 block3[0] = block3[0] | append[0] << 8;
975 block3[1] = append[0] >> 24 | append[1] << 8;
976 block3[2] = append[1] >> 24 | append[2] << 8;
977 block3[3] = append[2] >> 24 | append[3] << 8;
981 block3[0] = block3[0] | append[0] << 16;
982 block3[1] = append[0] >> 16 | append[1] << 16;
983 block3[2] = append[1] >> 16 | append[2] << 16;
984 block3[3] = append[2] >> 16 | append[3] << 16;
988 block3[0] = block3[0] | append[0] << 24;
989 block3[1] = append[0] >> 8 | append[1] << 24;
990 block3[2] = append[1] >> 8 | append[2] << 24;
991 block3[3] = append[2] >> 8 | append[3] << 24;
995 block3[1] = append[0];
996 block3[2] = append[1];
997 block3[3] = append[2];
1001 block3[1] = block3[1] | append[0] << 8;
1002 block3[2] = append[0] >> 24 | append[1] << 8;
1003 block3[3] = append[1] >> 24 | append[2] << 8;
1007 block3[1] = block3[1] | append[0] << 16;
1008 block3[2] = append[0] >> 16 | append[1] << 16;
1009 block3[3] = append[1] >> 16 | append[2] << 16;
1013 block3[1] = block3[1] | append[0] << 24;
1014 block3[2] = append[0] >> 8 | append[1] << 24;
1015 block3[3] = append[1] >> 8 | append[2] << 24;
1019 block3[2] = append[0];
1020 block3[3] = append[1];
1025 static void memcat8 (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3[4], const u32 block_len, const u32 append[2])
1030 block0[0] = append[0];
1031 block0[1] = append[1];
1035 block0[0] = block0[0] | append[0] << 8;
1036 block0[1] = append[0] >> 24 | append[1] << 8;
1037 block0[2] = append[1] >> 24;
1041 block0[0] = block0[0] | append[0] << 16;
1042 block0[1] = append[0] >> 16 | append[1] << 16;
1043 block0[2] = append[1] >> 16;
1047 block0[0] = block0[0] | append[0] << 24;
1048 block0[1] = append[0] >> 8 | append[1] << 24;
1049 block0[2] = append[1] >> 8;
1053 block0[1] = append[0];
1054 block0[2] = append[1];
1058 block0[1] = block0[1] | append[0] << 8;
1059 block0[2] = append[0] >> 24 | append[1] << 8;
1060 block0[3] = append[1] >> 24;
1064 block0[1] = block0[1] | append[0] << 16;
1065 block0[2] = append[0] >> 16 | append[1] << 16;
1066 block0[3] = append[1] >> 16;
1070 block0[1] = block0[1] | append[0] << 24;
1071 block0[2] = append[0] >> 8 | append[1] << 24;
1072 block0[3] = append[1] >> 8;
1076 block0[2] = append[0];
1077 block0[3] = append[1];
1081 block0[2] = block0[2] | append[0] << 8;
1082 block0[3] = append[0] >> 24 | append[1] << 8;
1083 block1[0] = append[1] >> 24;
1087 block0[2] = block0[2] | append[0] << 16;
1088 block0[3] = append[0] >> 16 | append[1] << 16;
1089 block1[0] = append[1] >> 16;
1093 block0[2] = block0[2] | append[0] << 24;
1094 block0[3] = append[0] >> 8 | append[1] << 24;
1095 block1[0] = append[1] >> 8;
1099 block0[3] = append[0];
1100 block1[0] = append[1];
1104 block0[3] = block0[3] | append[0] << 8;
1105 block1[0] = append[0] >> 24 | append[1] << 8;
1106 block1[1] = append[1] >> 24;
1110 block0[3] = block0[3] | append[0] << 16;
1111 block1[0] = append[0] >> 16 | append[1] << 16;
1112 block1[1] = append[1] >> 16;
1116 block0[3] = block0[3] | append[0] << 24;
1117 block1[0] = append[0] >> 8 | append[1] << 24;
1118 block1[1] = append[1] >> 8;
1122 block1[0] = append[0];
1123 block1[1] = append[1];
1127 block1[0] = block1[0] | append[0] << 8;
1128 block1[1] = append[0] >> 24 | append[1] << 8;
1129 block1[2] = append[1] >> 24;
1133 block1[0] = block1[0] | append[0] << 16;
1134 block1[1] = append[0] >> 16 | append[1] << 16;
1135 block1[2] = append[1] >> 16;
1139 block1[0] = block1[0] | append[0] << 24;
1140 block1[1] = append[0] >> 8 | append[1] << 24;
1141 block1[2] = append[1] >> 8;
1145 block1[1] = append[0];
1146 block1[2] = append[1];
1150 block1[1] = block1[1] | append[0] << 8;
1151 block1[2] = append[0] >> 24 | append[1] << 8;
1152 block1[3] = append[1] >> 24;
1156 block1[1] = block1[1] | append[0] << 16;
1157 block1[2] = append[0] >> 16 | append[1] << 16;
1158 block1[3] = append[1] >> 16;
1162 block1[1] = block1[1] | append[0] << 24;
1163 block1[2] = append[0] >> 8 | append[1] << 24;
1164 block1[3] = append[1] >> 8;
1168 block1[2] = append[0];
1169 block1[3] = append[1];
1173 block1[2] = block1[2] | append[0] << 8;
1174 block1[3] = append[0] >> 24 | append[1] << 8;
1175 block2[0] = append[1] >> 24;
1179 block1[2] = block1[2] | append[0] << 16;
1180 block1[3] = append[0] >> 16 | append[1] << 16;
1181 block2[0] = append[1] >> 16;
1185 block1[2] = block1[2] | append[0] << 24;
1186 block1[3] = append[0] >> 8 | append[1] << 24;
1187 block2[0] = append[1] >> 8;
1191 block1[3] = append[0];
1192 block2[0] = append[1];
1196 block1[3] = block1[3] | append[0] << 8;
1197 block2[0] = append[0] >> 24 | append[1] << 8;
1198 block2[1] = append[1] >> 24;
1202 block1[3] = block1[3] | append[0] << 16;
1203 block2[0] = append[0] >> 16 | append[1] << 16;
1204 block2[1] = append[1] >> 16;
1208 block1[3] = block1[3] | append[0] << 24;
1209 block2[0] = append[0] >> 8 | append[1] << 24;
1210 block2[1] = append[1] >> 8;
1214 block2[0] = append[0];
1215 block2[1] = append[1];
1219 block2[0] = block2[0] | append[0] << 8;
1220 block2[1] = append[0] >> 24 | append[1] << 8;
1221 block2[2] = append[1] >> 24;
1225 block2[0] = block2[0] | append[0] << 16;
1226 block2[1] = append[0] >> 16 | append[1] << 16;
1227 block2[2] = append[1] >> 16;
1231 block2[0] = block2[0] | append[0] << 24;
1232 block2[1] = append[0] >> 8 | append[1] << 24;
1233 block2[2] = append[1] >> 8;
1237 block2[1] = append[0];
1238 block2[2] = append[1];
1242 block2[1] = block2[1] | append[0] << 8;
1243 block2[2] = append[0] >> 24 | append[1] << 8;
1244 block2[3] = append[1] >> 24;
1248 block2[1] = block2[1] | append[0] << 16;
1249 block2[2] = append[0] >> 16 | append[1] << 16;
1250 block2[3] = append[1] >> 16;
1254 block2[1] = block2[1] | append[0] << 24;
1255 block2[2] = append[0] >> 8 | append[1] << 24;
1256 block2[3] = append[1] >> 8;
1260 block2[2] = append[0];
1261 block2[3] = append[1];
1265 block2[2] = block2[2] | append[0] << 8;
1266 block2[3] = append[0] >> 24 | append[1] << 8;
1267 block3[0] = append[1] >> 24;
1271 block2[2] = block2[2] | append[0] << 16;
1272 block2[3] = append[0] >> 16 | append[1] << 16;
1273 block3[0] = append[1] >> 16;
1277 block2[2] = block2[2] | append[0] << 24;
1278 block2[3] = append[0] >> 8 | append[1] << 24;
1279 block3[0] = append[1] >> 8;
1283 block2[3] = append[0];
1284 block3[0] = append[1];
1288 block2[3] = block2[3] | append[0] << 8;
1289 block3[0] = append[0] >> 24 | append[1] << 8;
1290 block3[1] = append[1] >> 24;
1294 block2[3] = block2[3] | append[0] << 16;
1295 block3[0] = append[0] >> 16 | append[1] << 16;
1296 block3[1] = append[1] >> 16;
1300 block2[3] = block2[3] | append[0] << 24;
1301 block3[0] = append[0] >> 8 | append[1] << 24;
1302 block3[1] = append[1] >> 8;
1306 block3[0] = append[0];
1307 block3[1] = append[1];
1311 block3[0] = block3[0] | append[0] << 8;
1312 block3[1] = append[0] >> 24 | append[1] << 8;
1313 block3[2] = append[1] >> 24;
1317 block3[0] = block3[0] | append[0] << 16;
1318 block3[1] = append[0] >> 16 | append[1] << 16;
1319 block3[2] = append[1] >> 16;
1323 block3[0] = block3[0] | append[0] << 24;
1324 block3[1] = append[0] >> 8 | append[1] << 24;
1325 block3[2] = append[1] >> 8;
1329 block3[1] = append[0];
1330 block3[2] = append[1];
1334 block3[1] = block3[1] | append[0] << 8;
1335 block3[2] = append[0] >> 24 | append[1] << 8;
1336 block3[3] = append[1] >> 24;
1340 block3[1] = block3[1] | append[0] << 16;
1341 block3[2] = append[0] >> 16 | append[1] << 16;
1342 block3[3] = append[1] >> 16;
1346 block3[1] = block3[1] | append[0] << 24;
1347 block3[2] = append[0] >> 8 | append[1] << 24;
1348 block3[3] = append[1] >> 8;
1352 block3[2] = append[0];
1353 block3[3] = append[1];
1358 static void append_sign (u32x block0[4], u32x block1[4], const u32 block_len)
1363 block0[0] = md5apr1_magic0;
1364 block0[1] = md5apr1_magic1;
1368 block0[0] = block0[0] | md5apr1_magic0 << 8;
1369 block0[1] = md5apr1_magic0 >> 24 | md5apr1_magic1 << 8;
1370 block0[2] = md5apr1_magic1 >> 24;
1374 block0[0] = block0[0] | md5apr1_magic0 << 16;
1375 block0[1] = md5apr1_magic0 >> 16 | md5apr1_magic1 << 16;
1376 block0[2] = md5apr1_magic1 >> 16;
1380 block0[0] = block0[0] | md5apr1_magic0 << 24;
1381 block0[1] = md5apr1_magic0 >> 8 | md5apr1_magic1 << 24;
1382 block0[2] = md5apr1_magic1 >> 8;
1386 block0[1] = md5apr1_magic0;
1387 block0[2] = md5apr1_magic1;
1391 block0[1] = block0[1] | md5apr1_magic0 << 8;
1392 block0[2] = md5apr1_magic0 >> 24 | md5apr1_magic1 << 8;
1393 block0[3] = md5apr1_magic1 >> 24;
1397 block0[1] = block0[1] | md5apr1_magic0 << 16;
1398 block0[2] = md5apr1_magic0 >> 16 | md5apr1_magic1 << 16;
1399 block0[3] = md5apr1_magic1 >> 16;
1403 block0[1] = block0[1] | md5apr1_magic0 << 24;
1404 block0[2] = md5apr1_magic0 >> 8 | md5apr1_magic1 << 24;
1405 block0[3] = md5apr1_magic1 >> 8;
1409 block0[2] = md5apr1_magic0;
1410 block0[3] = md5apr1_magic1;
1414 block0[2] = block0[2] | md5apr1_magic0 << 8;
1415 block0[3] = md5apr1_magic0 >> 24 | md5apr1_magic1 << 8;
1416 block1[0] = md5apr1_magic1 >> 24;
1420 block0[2] = block0[2] | md5apr1_magic0 << 16;
1421 block0[3] = md5apr1_magic0 >> 16 | md5apr1_magic1 << 16;
1422 block1[0] = md5apr1_magic1 >> 16;
1426 block0[2] = block0[2] | md5apr1_magic0 << 24;
1427 block0[3] = md5apr1_magic0 >> 8 | md5apr1_magic1 << 24;
1428 block1[0] = md5apr1_magic1 >> 8;
1432 block0[3] = md5apr1_magic0;
1433 block1[0] = md5apr1_magic1;
1437 block0[3] = block0[3] | md5apr1_magic0 << 8;
1438 block1[0] = md5apr1_magic0 >> 24 | md5apr1_magic1 << 8;
1439 block1[1] = md5apr1_magic1 >> 24;
1443 block0[3] = block0[3] | md5apr1_magic0 << 16;
1444 block1[0] = md5apr1_magic0 >> 16 | md5apr1_magic1 << 16;
1445 block1[1] = md5apr1_magic1 >> 16;
1449 block0[3] = block0[3] | md5apr1_magic0 << 24;
1450 block1[0] = md5apr1_magic0 >> 8 | md5apr1_magic1 << 24;
1451 block1[1] = md5apr1_magic1 >> 8;
1456 static void append_1st (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3[4], const u32 block_len, const u32x append)
1465 block0[0] = block0[0] | append << 8;
1469 block0[0] = block0[0] | append << 16;
1473 block0[0] = block0[0] | append << 24;
1481 block0[1] = block0[1] | append << 8;
1485 block0[1] = block0[1] | append << 16;
1489 block0[1] = block0[1] | append << 24;
1497 block0[2] = block0[2] | append << 8;
1501 block0[2] = block0[2] | append << 16;
1505 block0[2] = block0[2] | append << 24;
1513 block0[3] = block0[3] | append << 8;
1517 block0[3] = block0[3] | append << 16;
1521 block0[3] = block0[3] | append << 24;
1529 block1[0] = block1[0] | append << 8;
1533 block1[0] = block1[0] | append << 16;
1537 block1[0] = block1[0] | append << 24;
1545 block1[1] = block1[1] | append << 8;
1549 block1[1] = block1[1] | append << 16;
1553 block1[1] = block1[1] | append << 24;
1561 block1[2] = block1[2] | append << 8;
1565 block1[2] = block1[2] | append << 16;
1569 block1[2] = block1[2] | append << 24;
1577 block1[3] = block1[3] | append << 8;
1581 block1[3] = block1[3] | append << 16;
1585 block1[3] = block1[3] | append << 24;
1593 block2[0] = block2[0] | append << 8;
1597 block2[0] = block2[0] | append << 16;
1601 block2[0] = block2[0] | append << 24;
1609 block2[1] = block2[1] | append << 8;
1613 block2[1] = block2[1] | append << 16;
1617 block2[1] = block2[1] | append << 24;
1625 block2[2] = block2[2] | append << 8;
1629 block2[2] = block2[2] | append << 16;
1633 block2[2] = block2[2] | append << 24;
1641 block2[3] = block2[3] | append << 8;
1645 block2[3] = block2[3] | append << 16;
1649 block2[3] = block2[3] | append << 24;
1657 block3[0] = block3[0] | append << 8;
1661 block3[0] = block3[0] | append << 16;
1665 block3[0] = block3[0] | append << 24;
1673 block3[1] = block3[1] | append << 8;
1677 block3[1] = block3[1] | append << 16;
1681 block3[1] = block3[1] | append << 24;
1690 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01600_init (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global md5crypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1696 const u32 gid = get_global_id (0);
1698 if (gid >= gid_max) return;
1702 w0[0] = pws[gid].i[0];
1703 w0[1] = pws[gid].i[1];
1704 w0[2] = pws[gid].i[2];
1705 w0[3] = pws[gid].i[3];
1707 const u32 pw_len = pws[gid].pw_len;
1715 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
1716 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
1718 const u32 salt_len = salt_bufs[salt_pos].salt_len;
1724 //memcat16 (block0, block1, block2, block3, block_len, w0);
1725 //block_len += pw_len;
1727 u32 block_len = pw_len;
1757 memcat8 (block0, block1, block2, block3, block_len, salt_buf);
1759 block_len += salt_len;
1761 memcat16 (block0, block1, block2, block3, block_len, w0);
1763 block_len += pw_len;
1765 append_0x80_4 (block0, block1, block2, block3, block_len);
1767 block3[2] = block_len * 8;
1776 md5_transform (block0, block1, block2, block3, digest);
1778 /* The password first, since that is what is most unknown */
1779 /* Then our magic string */
1780 /* Then the raw salt */
1781 /* Then just as many characters of the MD5(pw,salt,pw) */
1783 //memcat16 (block0, block1, block2, block3, block_len, w);
1784 //block_len += pw_len;
1808 append_sign (block0, block1, block_len);
1812 memcat8 (block0, block1, block2, block3, block_len, salt_buf);
1814 block_len += salt_len;
1816 truncate_block (digest, pw_len);
1818 memcat16 (block0, block1, block2, block3, block_len, digest);
1820 block_len += pw_len;
1822 /* Then something really weird... */
1824 u32x append = block0[0] & 0xFF;
1826 for (u32 j = pw_len; j; j >>= 1)
1830 append_1st (block0, block1, block2, block3, block_len, append);
1836 append_0x80_4 (block0, block1, block2, block3, block_len);
1838 block3[2] = block_len * 8;
1845 md5_transform (block0, block1, block2, block3, digest);
1847 tmps[gid].digest_buf[0] = digest[0];
1848 tmps[gid].digest_buf[1] = digest[1];
1849 tmps[gid].digest_buf[2] = digest[2];
1850 tmps[gid].digest_buf[3] = digest[3];
1853 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01600_loop (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global md5crypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1859 const u32 gid = get_global_id (0);
1861 if (gid >= gid_max) return;
1865 w0[0] = pws[gid].i[0];
1866 w0[1] = pws[gid].i[1];
1867 w0[2] = pws[gid].i[2];
1868 w0[3] = pws[gid].i[3];
1870 const u32 pw_len = pws[gid].pw_len;
1879 append_0x80_1 (w0_x80, pw_len);
1887 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
1888 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
1890 const u32 salt_len = salt_bufs[salt_pos].salt_len;
1898 digest[0] = tmps[gid].digest_buf[0];
1899 digest[1] = tmps[gid].digest_buf[1];
1900 digest[2] = tmps[gid].digest_buf[2];
1901 digest[3] = tmps[gid].digest_buf[3];
1907 /* and now, just to make sure things don't run too fast */
1939 for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++)
1952 const u32 j1 = (j & 1) ? 1 : 0;
1953 const u32 j3 = (j % 3) ? 1 : 0;
1954 const u32 j7 = (j % 7) ? 1 : 0;
1967 memcat8 (block0, block1, block2, block3, block_len, salt_buf);
1969 block_len += salt_len;
1974 memcat16 (block0, block1, block2, block3, block_len, w0);
1976 block_len += pw_len;
1979 memcat16_x80 (block0, block1, block2, block3, block_len, digest);
1985 block0[0] = digest[0];
1986 block0[1] = digest[1];
1987 block0[2] = digest[2];
1988 block0[3] = digest[3];
1994 block1[0] = salt_buf[0];
1995 block1[1] = salt_buf[1];
1997 block_len += salt_len;
1999 memcat16 (block0, block1, block2, block3, block_len, w0);
2001 block_len += pw_len;
2005 block1[0] = salt_buf[0];
2006 block1[1] = salt_buf[1];
2008 block_len += salt_len;
2017 block_len += pw_len;
2020 memcat16 (block0, block1, block2, block3, block_len, w0_x80);
2022 block_len += pw_len;
2025 block3[2] = block_len * 8;
2032 md5_transform (block0, block1, block2, block3, digest);
2035 tmps[gid].digest_buf[0] = digest[0];
2036 tmps[gid].digest_buf[1] = digest[1];
2037 tmps[gid].digest_buf[2] = digest[2];
2038 tmps[gid].digest_buf[3] = digest[3];
2041 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01600_comp (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global md5crypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2047 const u32 gid = get_global_id (0);
2049 if (gid >= gid_max) return;
2051 const u32 lid = get_local_id (0);
2057 const u32x r0 = tmps[gid].digest_buf[DGST_R0];
2058 const u32x r1 = tmps[gid].digest_buf[DGST_R1];
2059 const u32x r2 = tmps[gid].digest_buf[DGST_R2];
2060 const u32x r3 = tmps[gid].digest_buf[DGST_R3];
2064 #include VECT_COMPARE_M