2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
28 #include "include/kernel_functions.c"
29 #include "types_amd.c"
30 #include "common_amd.c"
33 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
37 #define VECT_COMPARE_M "check_multi_vect4_comp4.c"
40 static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4])
66 MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
67 MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
68 MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02);
69 MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03);
70 MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00);
71 MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01);
72 MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02);
73 MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03);
74 MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00);
75 MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01);
76 MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02);
77 MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03);
78 MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00);
79 MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01);
80 MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02);
81 MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03);
83 MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10);
84 MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11);
85 MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12);
86 MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13);
87 MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10);
88 MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11);
89 MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12);
90 MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13);
91 MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10);
92 MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11);
93 MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12);
94 MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13);
95 MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10);
96 MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11);
97 MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12);
98 MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13);
100 MD5_STEP (MD5_H1, a, b, c, d, w5_t, MD5C20, MD5S20);
101 MD5_STEP (MD5_H2, d, a, b, c, w8_t, MD5C21, MD5S21);
102 MD5_STEP (MD5_H1, c, d, a, b, wb_t, MD5C22, MD5S22);
103 MD5_STEP (MD5_H2, b, c, d, a, we_t, MD5C23, MD5S23);
104 MD5_STEP (MD5_H1, a, b, c, d, w1_t, MD5C24, MD5S20);
105 MD5_STEP (MD5_H2, d, a, b, c, w4_t, MD5C25, MD5S21);
106 MD5_STEP (MD5_H1, c, d, a, b, w7_t, MD5C26, MD5S22);
107 MD5_STEP (MD5_H2, b, c, d, a, wa_t, MD5C27, MD5S23);
108 MD5_STEP (MD5_H1, a, b, c, d, wd_t, MD5C28, MD5S20);
109 MD5_STEP (MD5_H2, d, a, b, c, w0_t, MD5C29, MD5S21);
110 MD5_STEP (MD5_H1, c, d, a, b, w3_t, MD5C2a, MD5S22);
111 MD5_STEP (MD5_H2, b, c, d, a, w6_t, MD5C2b, MD5S23);
112 MD5_STEP (MD5_H1, a, b, c, d, w9_t, MD5C2c, MD5S20);
113 MD5_STEP (MD5_H2, d, a, b, c, wc_t, MD5C2d, MD5S21);
114 MD5_STEP (MD5_H1, c, d, a, b, wf_t, MD5C2e, MD5S22);
115 MD5_STEP (MD5_H2, b, c, d, a, w2_t, MD5C2f, MD5S23);
117 MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30);
118 MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31);
119 MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32);
120 MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33);
121 MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30);
122 MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31);
123 MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32);
124 MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33);
125 MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30);
126 MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31);
127 MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32);
128 MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33);
129 MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30);
130 MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
131 MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
132 MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
140 static void memcat16 (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3[4], const u32 block_len, const u32x append[4])
145 block0[0] = append[0];
146 block0[1] = append[1];
147 block0[2] = append[2];
148 block0[3] = append[3];
152 block0[0] = block0[0] | append[0] << 8;
153 block0[1] = append[0] >> 24 | append[1] << 8;
154 block0[2] = append[1] >> 24 | append[2] << 8;
155 block0[3] = append[2] >> 24 | append[3] << 8;
156 block1[0] = append[3] >> 24;
160 block0[0] = block0[0] | append[0] << 16;
161 block0[1] = append[0] >> 16 | append[1] << 16;
162 block0[2] = append[1] >> 16 | append[2] << 16;
163 block0[3] = append[2] >> 16 | append[3] << 16;
164 block1[0] = append[3] >> 16;
168 block0[0] = block0[0] | append[0] << 24;
169 block0[1] = append[0] >> 8 | append[1] << 24;
170 block0[2] = append[1] >> 8 | append[2] << 24;
171 block0[3] = append[2] >> 8 | append[3] << 24;
172 block1[0] = append[3] >> 8;
176 block0[1] = append[0];
177 block0[2] = append[1];
178 block0[3] = append[2];
179 block1[0] = append[3];
183 block0[1] = block0[1] | append[0] << 8;
184 block0[2] = append[0] >> 24 | append[1] << 8;
185 block0[3] = append[1] >> 24 | append[2] << 8;
186 block1[0] = append[2] >> 24 | append[3] << 8;
187 block1[1] = append[3] >> 24;
191 block0[1] = block0[1] | append[0] << 16;
192 block0[2] = append[0] >> 16 | append[1] << 16;
193 block0[3] = append[1] >> 16 | append[2] << 16;
194 block1[0] = append[2] >> 16 | append[3] << 16;
195 block1[1] = append[3] >> 16;
199 block0[1] = block0[1] | append[0] << 24;
200 block0[2] = append[0] >> 8 | append[1] << 24;
201 block0[3] = append[1] >> 8 | append[2] << 24;
202 block1[0] = append[2] >> 8 | append[3] << 24;
203 block1[1] = append[3] >> 8;
207 block0[2] = append[0];
208 block0[3] = append[1];
209 block1[0] = append[2];
210 block1[1] = append[3];
214 block0[2] = block0[2] | append[0] << 8;
215 block0[3] = append[0] >> 24 | append[1] << 8;
216 block1[0] = append[1] >> 24 | append[2] << 8;
217 block1[1] = append[2] >> 24 | append[3] << 8;
218 block1[2] = append[3] >> 24;
222 block0[2] = block0[2] | append[0] << 16;
223 block0[3] = append[0] >> 16 | append[1] << 16;
224 block1[0] = append[1] >> 16 | append[2] << 16;
225 block1[1] = append[2] >> 16 | append[3] << 16;
226 block1[2] = append[3] >> 16;
230 block0[2] = block0[2] | append[0] << 24;
231 block0[3] = append[0] >> 8 | append[1] << 24;
232 block1[0] = append[1] >> 8 | append[2] << 24;
233 block1[1] = append[2] >> 8 | append[3] << 24;
234 block1[2] = append[3] >> 8;
238 block0[3] = append[0];
239 block1[0] = append[1];
240 block1[1] = append[2];
241 block1[2] = append[3];
245 block0[3] = block0[3] | append[0] << 8;
246 block1[0] = append[0] >> 24 | append[1] << 8;
247 block1[1] = append[1] >> 24 | append[2] << 8;
248 block1[2] = append[2] >> 24 | append[3] << 8;
249 block1[3] = append[3] >> 24;
253 block0[3] = block0[3] | append[0] << 16;
254 block1[0] = append[0] >> 16 | append[1] << 16;
255 block1[1] = append[1] >> 16 | append[2] << 16;
256 block1[2] = append[2] >> 16 | append[3] << 16;
257 block1[3] = append[3] >> 16;
261 block0[3] = block0[3] | append[0] << 24;
262 block1[0] = append[0] >> 8 | append[1] << 24;
263 block1[1] = append[1] >> 8 | append[2] << 24;
264 block1[2] = append[2] >> 8 | append[3] << 24;
265 block1[3] = append[3] >> 8;
269 block1[0] = append[0];
270 block1[1] = append[1];
271 block1[2] = append[2];
272 block1[3] = append[3];
276 block1[0] = block1[0] | append[0] << 8;
277 block1[1] = append[0] >> 24 | append[1] << 8;
278 block1[2] = append[1] >> 24 | append[2] << 8;
279 block1[3] = append[2] >> 24 | append[3] << 8;
280 block2[0] = append[3] >> 24;
284 block1[0] = block1[0] | append[0] << 16;
285 block1[1] = append[0] >> 16 | append[1] << 16;
286 block1[2] = append[1] >> 16 | append[2] << 16;
287 block1[3] = append[2] >> 16 | append[3] << 16;
288 block2[0] = append[3] >> 16;
292 block1[0] = block1[0] | append[0] << 24;
293 block1[1] = append[0] >> 8 | append[1] << 24;
294 block1[2] = append[1] >> 8 | append[2] << 24;
295 block1[3] = append[2] >> 8 | append[3] << 24;
296 block2[0] = append[3] >> 8;
300 block1[1] = append[0];
301 block1[2] = append[1];
302 block1[3] = append[2];
303 block2[0] = append[3];
307 block1[1] = block1[1] | append[0] << 8;
308 block1[2] = append[0] >> 24 | append[1] << 8;
309 block1[3] = append[1] >> 24 | append[2] << 8;
310 block2[0] = append[2] >> 24 | append[3] << 8;
311 block2[1] = append[3] >> 24;
315 block1[1] = block1[1] | append[0] << 16;
316 block1[2] = append[0] >> 16 | append[1] << 16;
317 block1[3] = append[1] >> 16 | append[2] << 16;
318 block2[0] = append[2] >> 16 | append[3] << 16;
319 block2[1] = append[3] >> 16;
323 block1[1] = block1[1] | append[0] << 24;
324 block1[2] = append[0] >> 8 | append[1] << 24;
325 block1[3] = append[1] >> 8 | append[2] << 24;
326 block2[0] = append[2] >> 8 | append[3] << 24;
327 block2[1] = append[3] >> 8;
331 block1[2] = append[0];
332 block1[3] = append[1];
333 block2[0] = append[2];
334 block2[1] = append[3];
338 block1[2] = block1[2] | append[0] << 8;
339 block1[3] = append[0] >> 24 | append[1] << 8;
340 block2[0] = append[1] >> 24 | append[2] << 8;
341 block2[1] = append[2] >> 24 | append[3] << 8;
342 block2[2] = append[3] >> 24;
346 block1[2] = block1[2] | append[0] << 16;
347 block1[3] = append[0] >> 16 | append[1] << 16;
348 block2[0] = append[1] >> 16 | append[2] << 16;
349 block2[1] = append[2] >> 16 | append[3] << 16;
350 block2[2] = append[3] >> 16;
354 block1[2] = block1[2] | append[0] << 24;
355 block1[3] = append[0] >> 8 | append[1] << 24;
356 block2[0] = append[1] >> 8 | append[2] << 24;
357 block2[1] = append[2] >> 8 | append[3] << 24;
358 block2[2] = append[3] >> 8;
362 block1[3] = append[0];
363 block2[0] = append[1];
364 block2[1] = append[2];
365 block2[2] = append[3];
369 block1[3] = block1[3] | append[0] << 8;
370 block2[0] = append[0] >> 24 | append[1] << 8;
371 block2[1] = append[1] >> 24 | append[2] << 8;
372 block2[2] = append[2] >> 24 | append[3] << 8;
373 block2[3] = append[3] >> 24;
377 block1[3] = block1[3] | append[0] << 16;
378 block2[0] = append[0] >> 16 | append[1] << 16;
379 block2[1] = append[1] >> 16 | append[2] << 16;
380 block2[2] = append[2] >> 16 | append[3] << 16;
381 block2[3] = append[3] >> 16;
385 block1[3] = block1[3] | append[0] << 24;
386 block2[0] = append[0] >> 8 | append[1] << 24;
387 block2[1] = append[1] >> 8 | append[2] << 24;
388 block2[2] = append[2] >> 8 | append[3] << 24;
389 block2[3] = append[3] >> 8;
393 block2[0] = append[0];
394 block2[1] = append[1];
395 block2[2] = append[2];
396 block2[3] = append[3];
400 block2[0] = block2[0] | append[0] << 8;
401 block2[1] = append[0] >> 24 | append[1] << 8;
402 block2[2] = append[1] >> 24 | append[2] << 8;
403 block2[3] = append[2] >> 24 | append[3] << 8;
404 block3[0] = append[3] >> 24;
408 block2[0] = block2[0] | append[0] << 16;
409 block2[1] = append[0] >> 16 | append[1] << 16;
410 block2[2] = append[1] >> 16 | append[2] << 16;
411 block2[3] = append[2] >> 16 | append[3] << 16;
412 block3[0] = append[3] >> 16;
416 block2[0] = block2[0] | append[0] << 24;
417 block2[1] = append[0] >> 8 | append[1] << 24;
418 block2[2] = append[1] >> 8 | append[2] << 24;
419 block2[3] = append[2] >> 8 | append[3] << 24;
420 block3[0] = append[3] >> 8;
424 block2[1] = append[0];
425 block2[2] = append[1];
426 block2[3] = append[2];
427 block3[0] = append[3];
431 block2[1] = block2[1] | append[0] << 8;
432 block2[2] = append[0] >> 24 | append[1] << 8;
433 block2[3] = append[1] >> 24 | append[2] << 8;
434 block3[0] = append[2] >> 24 | append[3] << 8;
435 block3[1] = append[3] >> 24;
439 block2[1] = block2[1] | append[0] << 16;
440 block2[2] = append[0] >> 16 | append[1] << 16;
441 block2[3] = append[1] >> 16 | append[2] << 16;
442 block3[0] = append[2] >> 16 | append[3] << 16;
443 block3[1] = append[3] >> 16;
447 block2[1] = block2[1] | append[0] << 24;
448 block2[2] = append[0] >> 8 | append[1] << 24;
449 block2[3] = append[1] >> 8 | append[2] << 24;
450 block3[0] = append[2] >> 8 | append[3] << 24;
451 block3[1] = append[3] >> 8;
455 block2[2] = append[0];
456 block2[3] = append[1];
457 block3[0] = append[2];
458 block3[1] = append[3];
462 block2[2] = block2[2] | append[0] << 8;
463 block2[3] = append[0] >> 24 | append[1] << 8;
464 block3[0] = append[1] >> 24 | append[2] << 8;
465 block3[1] = append[2] >> 24 | append[3] << 8;
466 block3[2] = append[3] >> 24;
470 block2[2] = block2[2] | append[0] << 16;
471 block2[3] = append[0] >> 16 | append[1] << 16;
472 block3[0] = append[1] >> 16 | append[2] << 16;
473 block3[1] = append[2] >> 16 | append[3] << 16;
474 block3[2] = append[3] >> 16;
478 block2[2] = block2[2] | append[0] << 24;
479 block2[3] = append[0] >> 8 | append[1] << 24;
480 block3[0] = append[1] >> 8 | append[2] << 24;
481 block3[1] = append[2] >> 8 | append[3] << 24;
482 block3[2] = append[3] >> 8;
486 block2[3] = append[0];
487 block3[0] = append[1];
488 block3[1] = append[2];
489 block3[2] = append[3];
493 block2[3] = block2[3] | append[0] << 8;
494 block3[0] = append[0] >> 24 | append[1] << 8;
495 block3[1] = append[1] >> 24 | append[2] << 8;
496 block3[2] = append[2] >> 24 | append[3] << 8;
497 block3[3] = append[3] >> 24;
501 block2[3] = block2[3] | append[0] << 16;
502 block3[0] = append[0] >> 16 | append[1] << 16;
503 block3[1] = append[1] >> 16 | append[2] << 16;
504 block3[2] = append[2] >> 16 | append[3] << 16;
505 block3[3] = append[3] >> 16;
509 block2[3] = block2[3] | append[0] << 24;
510 block3[0] = append[0] >> 8 | append[1] << 24;
511 block3[1] = append[1] >> 8 | append[2] << 24;
512 block3[2] = append[2] >> 8 | append[3] << 24;
513 block3[3] = append[3] >> 8;
517 block3[0] = append[0];
518 block3[1] = append[1];
519 block3[2] = append[2];
520 block3[3] = append[3];
524 block3[0] = block3[0] | append[0] << 8;
525 block3[1] = append[0] >> 24 | append[1] << 8;
526 block3[2] = append[1] >> 24 | append[2] << 8;
527 block3[3] = append[2] >> 24 | append[3] << 8;
531 block3[0] = block3[0] | append[0] << 16;
532 block3[1] = append[0] >> 16 | append[1] << 16;
533 block3[2] = append[1] >> 16 | append[2] << 16;
534 block3[3] = append[2] >> 16 | append[3] << 16;
538 block3[0] = block3[0] | append[0] << 24;
539 block3[1] = append[0] >> 8 | append[1] << 24;
540 block3[2] = append[1] >> 8 | append[2] << 24;
541 block3[3] = append[2] >> 8 | append[3] << 24;
545 block3[1] = append[0];
546 block3[2] = append[1];
547 block3[3] = append[2];
551 block3[1] = block3[1] | append[0] << 8;
552 block3[2] = append[0] >> 24 | append[1] << 8;
553 block3[3] = append[1] >> 24 | append[2] << 8;
557 block3[1] = block3[1] | append[0] << 16;
558 block3[2] = append[0] >> 16 | append[1] << 16;
559 block3[3] = append[1] >> 16 | append[2] << 16;
563 block3[1] = block3[1] | append[0] << 24;
564 block3[2] = append[0] >> 8 | append[1] << 24;
565 block3[3] = append[1] >> 8 | append[2] << 24;
569 block3[2] = append[0];
570 block3[3] = append[1];
575 static void memcat16_x80 (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3[4], const u32 block_len, const u32x append[4])
580 block0[0] = append[0];
581 block0[1] = append[1];
582 block0[2] = append[2];
583 block0[3] = append[3];
588 block0[0] = block0[0] | append[0] << 8;
589 block0[1] = append[0] >> 24 | append[1] << 8;
590 block0[2] = append[1] >> 24 | append[2] << 8;
591 block0[3] = append[2] >> 24 | append[3] << 8;
592 block1[0] = append[3] >> 24 | 0x80u << 8;
596 block0[0] = block0[0] | append[0] << 16;
597 block0[1] = append[0] >> 16 | append[1] << 16;
598 block0[2] = append[1] >> 16 | append[2] << 16;
599 block0[3] = append[2] >> 16 | append[3] << 16;
600 block1[0] = append[3] >> 16 | 0x80u << 16;
604 block0[0] = block0[0] | append[0] << 24;
605 block0[1] = append[0] >> 8 | append[1] << 24;
606 block0[2] = append[1] >> 8 | append[2] << 24;
607 block0[3] = append[2] >> 8 | append[3] << 24;
608 block1[0] = append[3] >> 8 | 0x80u << 24;
612 block0[1] = append[0];
613 block0[2] = append[1];
614 block0[3] = append[2];
615 block1[0] = append[3];
620 block0[1] = block0[1] | append[0] << 8;
621 block0[2] = append[0] >> 24 | append[1] << 8;
622 block0[3] = append[1] >> 24 | append[2] << 8;
623 block1[0] = append[2] >> 24 | append[3] << 8;
624 block1[1] = append[3] >> 24 | 0x80u << 8;
628 block0[1] = block0[1] | append[0] << 16;
629 block0[2] = append[0] >> 16 | append[1] << 16;
630 block0[3] = append[1] >> 16 | append[2] << 16;
631 block1[0] = append[2] >> 16 | append[3] << 16;
632 block1[1] = append[3] >> 16 | 0x80u << 16;
636 block0[1] = block0[1] | append[0] << 24;
637 block0[2] = append[0] >> 8 | append[1] << 24;
638 block0[3] = append[1] >> 8 | append[2] << 24;
639 block1[0] = append[2] >> 8 | append[3] << 24;
640 block1[1] = append[3] >> 8 | 0x80u << 24;
644 block0[2] = append[0];
645 block0[3] = append[1];
646 block1[0] = append[2];
647 block1[1] = append[3];
652 block0[2] = block0[2] | append[0] << 8;
653 block0[3] = append[0] >> 24 | append[1] << 8;
654 block1[0] = append[1] >> 24 | append[2] << 8;
655 block1[1] = append[2] >> 24 | append[3] << 8;
656 block1[2] = append[3] >> 24 | 0x80u << 8;
660 block0[2] = block0[2] | append[0] << 16;
661 block0[3] = append[0] >> 16 | append[1] << 16;
662 block1[0] = append[1] >> 16 | append[2] << 16;
663 block1[1] = append[2] >> 16 | append[3] << 16;
664 block1[2] = append[3] >> 16 | 0x80u << 16;
668 block0[2] = block0[2] | append[0] << 24;
669 block0[3] = append[0] >> 8 | append[1] << 24;
670 block1[0] = append[1] >> 8 | append[2] << 24;
671 block1[1] = append[2] >> 8 | append[3] << 24;
672 block1[2] = append[3] >> 8 | 0x80u << 24;
676 block0[3] = append[0];
677 block1[0] = append[1];
678 block1[1] = append[2];
679 block1[2] = append[3];
684 block0[3] = block0[3] | append[0] << 8;
685 block1[0] = append[0] >> 24 | append[1] << 8;
686 block1[1] = append[1] >> 24 | append[2] << 8;
687 block1[2] = append[2] >> 24 | append[3] << 8;
688 block1[3] = append[3] >> 24 | 0x80u << 8;
692 block0[3] = block0[3] | append[0] << 16;
693 block1[0] = append[0] >> 16 | append[1] << 16;
694 block1[1] = append[1] >> 16 | append[2] << 16;
695 block1[2] = append[2] >> 16 | append[3] << 16;
696 block1[3] = append[3] >> 16 | 0x80u << 16;
700 block0[3] = block0[3] | append[0] << 24;
701 block1[0] = append[0] >> 8 | append[1] << 24;
702 block1[1] = append[1] >> 8 | append[2] << 24;
703 block1[2] = append[2] >> 8 | append[3] << 24;
704 block1[3] = append[3] >> 8 | 0x80u << 24;
708 block1[0] = append[0];
709 block1[1] = append[1];
710 block1[2] = append[2];
711 block1[3] = append[3];
716 block1[0] = block1[0] | append[0] << 8;
717 block1[1] = append[0] >> 24 | append[1] << 8;
718 block1[2] = append[1] >> 24 | append[2] << 8;
719 block1[3] = append[2] >> 24 | append[3] << 8;
720 block2[0] = append[3] >> 24 | 0x80u << 8;
724 block1[0] = block1[0] | append[0] << 16;
725 block1[1] = append[0] >> 16 | append[1] << 16;
726 block1[2] = append[1] >> 16 | append[2] << 16;
727 block1[3] = append[2] >> 16 | append[3] << 16;
728 block2[0] = append[3] >> 16 | 0x80u << 16;
732 block1[0] = block1[0] | append[0] << 24;
733 block1[1] = append[0] >> 8 | append[1] << 24;
734 block1[2] = append[1] >> 8 | append[2] << 24;
735 block1[3] = append[2] >> 8 | append[3] << 24;
736 block2[0] = append[3] >> 8 | 0x80u << 24;
740 block1[1] = append[0];
741 block1[2] = append[1];
742 block1[3] = append[2];
743 block2[0] = append[3];
748 block1[1] = block1[1] | append[0] << 8;
749 block1[2] = append[0] >> 24 | append[1] << 8;
750 block1[3] = append[1] >> 24 | append[2] << 8;
751 block2[0] = append[2] >> 24 | append[3] << 8;
752 block2[1] = append[3] >> 24 | 0x80u << 8;
756 block1[1] = block1[1] | append[0] << 16;
757 block1[2] = append[0] >> 16 | append[1] << 16;
758 block1[3] = append[1] >> 16 | append[2] << 16;
759 block2[0] = append[2] >> 16 | append[3] << 16;
760 block2[1] = append[3] >> 16 | 0x80u << 16;
764 block1[1] = block1[1] | append[0] << 24;
765 block1[2] = append[0] >> 8 | append[1] << 24;
766 block1[3] = append[1] >> 8 | append[2] << 24;
767 block2[0] = append[2] >> 8 | append[3] << 24;
768 block2[1] = append[3] >> 8 | 0x80u << 24;
772 block1[2] = append[0];
773 block1[3] = append[1];
774 block2[0] = append[2];
775 block2[1] = append[3];
780 block1[2] = block1[2] | append[0] << 8;
781 block1[3] = append[0] >> 24 | append[1] << 8;
782 block2[0] = append[1] >> 24 | append[2] << 8;
783 block2[1] = append[2] >> 24 | append[3] << 8;
784 block2[2] = append[3] >> 24 | 0x80u << 8;
788 block1[2] = block1[2] | append[0] << 16;
789 block1[3] = append[0] >> 16 | append[1] << 16;
790 block2[0] = append[1] >> 16 | append[2] << 16;
791 block2[1] = append[2] >> 16 | append[3] << 16;
792 block2[2] = append[3] >> 16 | 0x80u << 16;
796 block1[2] = block1[2] | append[0] << 24;
797 block1[3] = append[0] >> 8 | append[1] << 24;
798 block2[0] = append[1] >> 8 | append[2] << 24;
799 block2[1] = append[2] >> 8 | append[3] << 24;
800 block2[2] = append[3] >> 8 | 0x80u << 24;
804 block1[3] = append[0];
805 block2[0] = append[1];
806 block2[1] = append[2];
807 block2[2] = append[3];
812 block1[3] = block1[3] | append[0] << 8;
813 block2[0] = append[0] >> 24 | append[1] << 8;
814 block2[1] = append[1] >> 24 | append[2] << 8;
815 block2[2] = append[2] >> 24 | append[3] << 8;
816 block2[3] = append[3] >> 24 | 0x80u << 8;
820 block1[3] = block1[3] | append[0] << 16;
821 block2[0] = append[0] >> 16 | append[1] << 16;
822 block2[1] = append[1] >> 16 | append[2] << 16;
823 block2[2] = append[2] >> 16 | append[3] << 16;
824 block2[3] = append[3] >> 16 | 0x80u << 16;
828 block1[3] = block1[3] | append[0] << 24;
829 block2[0] = append[0] >> 8 | append[1] << 24;
830 block2[1] = append[1] >> 8 | append[2] << 24;
831 block2[2] = append[2] >> 8 | append[3] << 24;
832 block2[3] = append[3] >> 8 | 0x80u << 24;
836 block2[0] = append[0];
837 block2[1] = append[1];
838 block2[2] = append[2];
839 block2[3] = append[3];
844 block2[0] = block2[0] | append[0] << 8;
845 block2[1] = append[0] >> 24 | append[1] << 8;
846 block2[2] = append[1] >> 24 | append[2] << 8;
847 block2[3] = append[2] >> 24 | append[3] << 8;
848 block3[0] = append[3] >> 24 | 0x80u << 8;
852 block2[0] = block2[0] | append[0] << 16;
853 block2[1] = append[0] >> 16 | append[1] << 16;
854 block2[2] = append[1] >> 16 | append[2] << 16;
855 block2[3] = append[2] >> 16 | append[3] << 16;
856 block3[0] = append[3] >> 16 | 0x80u << 16;
860 block2[0] = block2[0] | append[0] << 24;
861 block2[1] = append[0] >> 8 | append[1] << 24;
862 block2[2] = append[1] >> 8 | append[2] << 24;
863 block2[3] = append[2] >> 8 | append[3] << 24;
864 block3[0] = append[3] >> 8 | 0x80u << 24;
868 block2[1] = append[0];
869 block2[2] = append[1];
870 block2[3] = append[2];
871 block3[0] = append[3];
876 block2[1] = block2[1] | append[0] << 8;
877 block2[2] = append[0] >> 24 | append[1] << 8;
878 block2[3] = append[1] >> 24 | append[2] << 8;
879 block3[0] = append[2] >> 24 | append[3] << 8;
880 block3[1] = append[3] >> 24 | 0x80u << 8;
884 block2[1] = block2[1] | append[0] << 16;
885 block2[2] = append[0] >> 16 | append[1] << 16;
886 block2[3] = append[1] >> 16 | append[2] << 16;
887 block3[0] = append[2] >> 16 | append[3] << 16;
888 block3[1] = append[3] >> 16 | 0x80u << 16;
892 block2[1] = block2[1] | append[0] << 24;
893 block2[2] = append[0] >> 8 | append[1] << 24;
894 block2[3] = append[1] >> 8 | append[2] << 24;
895 block3[0] = append[2] >> 8 | append[3] << 24;
896 block3[1] = append[3] >> 8 | 0x80u << 24;
900 block2[2] = append[0];
901 block2[3] = append[1];
902 block3[0] = append[2];
903 block3[1] = append[3];
908 block2[2] = block2[2] | append[0] << 8;
909 block2[3] = append[0] >> 24 | append[1] << 8;
910 block3[0] = append[1] >> 24 | append[2] << 8;
911 block3[1] = append[2] >> 24 | append[3] << 8;
912 block3[2] = append[3] >> 24 | 0x80u << 8;
916 block2[2] = block2[2] | append[0] << 16;
917 block2[3] = append[0] >> 16 | append[1] << 16;
918 block3[0] = append[1] >> 16 | append[2] << 16;
919 block3[1] = append[2] >> 16 | append[3] << 16;
920 block3[2] = append[3] >> 16 | 0x80u << 16;
924 block2[2] = block2[2] | append[0] << 24;
925 block2[3] = append[0] >> 8 | append[1] << 24;
926 block3[0] = append[1] >> 8 | append[2] << 24;
927 block3[1] = append[2] >> 8 | append[3] << 24;
928 block3[2] = append[3] >> 8 | 0x80u << 24;
932 block2[3] = append[0];
933 block3[0] = append[1];
934 block3[1] = append[2];
935 block3[2] = append[3];
940 block2[3] = block2[3] | append[0] << 8;
941 block3[0] = append[0] >> 24 | append[1] << 8;
942 block3[1] = append[1] >> 24 | append[2] << 8;
943 block3[2] = append[2] >> 24 | append[3] << 8;
944 block3[3] = append[3] >> 24 | 0x80u << 8;
948 block2[3] = block2[3] | append[0] << 16;
949 block3[0] = append[0] >> 16 | append[1] << 16;
950 block3[1] = append[1] >> 16 | append[2] << 16;
951 block3[2] = append[2] >> 16 | append[3] << 16;
952 block3[3] = append[3] >> 16 | 0x80u << 16;
956 block2[3] = block2[3] | append[0] << 24;
957 block3[0] = append[0] >> 8 | append[1] << 24;
958 block3[1] = append[1] >> 8 | append[2] << 24;
959 block3[2] = append[2] >> 8 | append[3] << 24;
960 block3[3] = append[3] >> 8 | 0x80u << 24;
964 block3[0] = append[0];
965 block3[1] = append[1];
966 block3[2] = append[2];
967 block3[3] = append[3];
971 block3[0] = block3[0] | append[0] << 8;
972 block3[1] = append[0] >> 24 | append[1] << 8;
973 block3[2] = append[1] >> 24 | append[2] << 8;
974 block3[3] = append[2] >> 24 | append[3] << 8;
978 block3[0] = block3[0] | append[0] << 16;
979 block3[1] = append[0] >> 16 | append[1] << 16;
980 block3[2] = append[1] >> 16 | append[2] << 16;
981 block3[3] = append[2] >> 16 | append[3] << 16;
985 block3[0] = block3[0] | append[0] << 24;
986 block3[1] = append[0] >> 8 | append[1] << 24;
987 block3[2] = append[1] >> 8 | append[2] << 24;
988 block3[3] = append[2] >> 8 | append[3] << 24;
992 block3[1] = append[0];
993 block3[2] = append[1];
994 block3[3] = append[2];
998 block3[1] = block3[1] | append[0] << 8;
999 block3[2] = append[0] >> 24 | append[1] << 8;
1000 block3[3] = append[1] >> 24 | append[2] << 8;
1004 block3[1] = block3[1] | append[0] << 16;
1005 block3[2] = append[0] >> 16 | append[1] << 16;
1006 block3[3] = append[1] >> 16 | append[2] << 16;
1010 block3[1] = block3[1] | append[0] << 24;
1011 block3[2] = append[0] >> 8 | append[1] << 24;
1012 block3[3] = append[1] >> 8 | append[2] << 24;
1016 block3[2] = append[0];
1017 block3[3] = append[1];
1022 static void memcat8 (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3[4], const u32 block_len, const u32 append[2])
1027 block0[0] = append[0];
1028 block0[1] = append[1];
1032 block0[0] = block0[0] | append[0] << 8;
1033 block0[1] = append[0] >> 24 | append[1] << 8;
1034 block0[2] = append[1] >> 24;
1038 block0[0] = block0[0] | append[0] << 16;
1039 block0[1] = append[0] >> 16 | append[1] << 16;
1040 block0[2] = append[1] >> 16;
1044 block0[0] = block0[0] | append[0] << 24;
1045 block0[1] = append[0] >> 8 | append[1] << 24;
1046 block0[2] = append[1] >> 8;
1050 block0[1] = append[0];
1051 block0[2] = append[1];
1055 block0[1] = block0[1] | append[0] << 8;
1056 block0[2] = append[0] >> 24 | append[1] << 8;
1057 block0[3] = append[1] >> 24;
1061 block0[1] = block0[1] | append[0] << 16;
1062 block0[2] = append[0] >> 16 | append[1] << 16;
1063 block0[3] = append[1] >> 16;
1067 block0[1] = block0[1] | append[0] << 24;
1068 block0[2] = append[0] >> 8 | append[1] << 24;
1069 block0[3] = append[1] >> 8;
1073 block0[2] = append[0];
1074 block0[3] = append[1];
1078 block0[2] = block0[2] | append[0] << 8;
1079 block0[3] = append[0] >> 24 | append[1] << 8;
1080 block1[0] = append[1] >> 24;
1084 block0[2] = block0[2] | append[0] << 16;
1085 block0[3] = append[0] >> 16 | append[1] << 16;
1086 block1[0] = append[1] >> 16;
1090 block0[2] = block0[2] | append[0] << 24;
1091 block0[3] = append[0] >> 8 | append[1] << 24;
1092 block1[0] = append[1] >> 8;
1096 block0[3] = append[0];
1097 block1[0] = append[1];
1101 block0[3] = block0[3] | append[0] << 8;
1102 block1[0] = append[0] >> 24 | append[1] << 8;
1103 block1[1] = append[1] >> 24;
1107 block0[3] = block0[3] | append[0] << 16;
1108 block1[0] = append[0] >> 16 | append[1] << 16;
1109 block1[1] = append[1] >> 16;
1113 block0[3] = block0[3] | append[0] << 24;
1114 block1[0] = append[0] >> 8 | append[1] << 24;
1115 block1[1] = append[1] >> 8;
1119 block1[0] = append[0];
1120 block1[1] = append[1];
1124 block1[0] = block1[0] | append[0] << 8;
1125 block1[1] = append[0] >> 24 | append[1] << 8;
1126 block1[2] = append[1] >> 24;
1130 block1[0] = block1[0] | append[0] << 16;
1131 block1[1] = append[0] >> 16 | append[1] << 16;
1132 block1[2] = append[1] >> 16;
1136 block1[0] = block1[0] | append[0] << 24;
1137 block1[1] = append[0] >> 8 | append[1] << 24;
1138 block1[2] = append[1] >> 8;
1142 block1[1] = append[0];
1143 block1[2] = append[1];
1147 block1[1] = block1[1] | append[0] << 8;
1148 block1[2] = append[0] >> 24 | append[1] << 8;
1149 block1[3] = append[1] >> 24;
1153 block1[1] = block1[1] | append[0] << 16;
1154 block1[2] = append[0] >> 16 | append[1] << 16;
1155 block1[3] = append[1] >> 16;
1159 block1[1] = block1[1] | append[0] << 24;
1160 block1[2] = append[0] >> 8 | append[1] << 24;
1161 block1[3] = append[1] >> 8;
1165 block1[2] = append[0];
1166 block1[3] = append[1];
1170 block1[2] = block1[2] | append[0] << 8;
1171 block1[3] = append[0] >> 24 | append[1] << 8;
1172 block2[0] = append[1] >> 24;
1176 block1[2] = block1[2] | append[0] << 16;
1177 block1[3] = append[0] >> 16 | append[1] << 16;
1178 block2[0] = append[1] >> 16;
1182 block1[2] = block1[2] | append[0] << 24;
1183 block1[3] = append[0] >> 8 | append[1] << 24;
1184 block2[0] = append[1] >> 8;
1188 block1[3] = append[0];
1189 block2[0] = append[1];
1193 block1[3] = block1[3] | append[0] << 8;
1194 block2[0] = append[0] >> 24 | append[1] << 8;
1195 block2[1] = append[1] >> 24;
1199 block1[3] = block1[3] | append[0] << 16;
1200 block2[0] = append[0] >> 16 | append[1] << 16;
1201 block2[1] = append[1] >> 16;
1205 block1[3] = block1[3] | append[0] << 24;
1206 block2[0] = append[0] >> 8 | append[1] << 24;
1207 block2[1] = append[1] >> 8;
1211 block2[0] = append[0];
1212 block2[1] = append[1];
1216 block2[0] = block2[0] | append[0] << 8;
1217 block2[1] = append[0] >> 24 | append[1] << 8;
1218 block2[2] = append[1] >> 24;
1222 block2[0] = block2[0] | append[0] << 16;
1223 block2[1] = append[0] >> 16 | append[1] << 16;
1224 block2[2] = append[1] >> 16;
1228 block2[0] = block2[0] | append[0] << 24;
1229 block2[1] = append[0] >> 8 | append[1] << 24;
1230 block2[2] = append[1] >> 8;
1234 block2[1] = append[0];
1235 block2[2] = append[1];
1239 block2[1] = block2[1] | append[0] << 8;
1240 block2[2] = append[0] >> 24 | append[1] << 8;
1241 block2[3] = append[1] >> 24;
1245 block2[1] = block2[1] | append[0] << 16;
1246 block2[2] = append[0] >> 16 | append[1] << 16;
1247 block2[3] = append[1] >> 16;
1251 block2[1] = block2[1] | append[0] << 24;
1252 block2[2] = append[0] >> 8 | append[1] << 24;
1253 block2[3] = append[1] >> 8;
1257 block2[2] = append[0];
1258 block2[3] = append[1];
1262 block2[2] = block2[2] | append[0] << 8;
1263 block2[3] = append[0] >> 24 | append[1] << 8;
1264 block3[0] = append[1] >> 24;
1268 block2[2] = block2[2] | append[0] << 16;
1269 block2[3] = append[0] >> 16 | append[1] << 16;
1270 block3[0] = append[1] >> 16;
1274 block2[2] = block2[2] | append[0] << 24;
1275 block2[3] = append[0] >> 8 | append[1] << 24;
1276 block3[0] = append[1] >> 8;
1280 block2[3] = append[0];
1281 block3[0] = append[1];
1285 block2[3] = block2[3] | append[0] << 8;
1286 block3[0] = append[0] >> 24 | append[1] << 8;
1287 block3[1] = append[1] >> 24;
1291 block2[3] = block2[3] | append[0] << 16;
1292 block3[0] = append[0] >> 16 | append[1] << 16;
1293 block3[1] = append[1] >> 16;
1297 block2[3] = block2[3] | append[0] << 24;
1298 block3[0] = append[0] >> 8 | append[1] << 24;
1299 block3[1] = append[1] >> 8;
1303 block3[0] = append[0];
1304 block3[1] = append[1];
1308 block3[0] = block3[0] | append[0] << 8;
1309 block3[1] = append[0] >> 24 | append[1] << 8;
1310 block3[2] = append[1] >> 24;
1314 block3[0] = block3[0] | append[0] << 16;
1315 block3[1] = append[0] >> 16 | append[1] << 16;
1316 block3[2] = append[1] >> 16;
1320 block3[0] = block3[0] | append[0] << 24;
1321 block3[1] = append[0] >> 8 | append[1] << 24;
1322 block3[2] = append[1] >> 8;
1326 block3[1] = append[0];
1327 block3[2] = append[1];
1331 block3[1] = block3[1] | append[0] << 8;
1332 block3[2] = append[0] >> 24 | append[1] << 8;
1333 block3[3] = append[1] >> 24;
1337 block3[1] = block3[1] | append[0] << 16;
1338 block3[2] = append[0] >> 16 | append[1] << 16;
1339 block3[3] = append[1] >> 16;
1343 block3[1] = block3[1] | append[0] << 24;
1344 block3[2] = append[0] >> 8 | append[1] << 24;
1345 block3[3] = append[1] >> 8;
1349 block3[2] = append[0];
1350 block3[3] = append[1];
1355 static void append_1st (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3[4], const u32 block_len, const u32x append)
1364 block0[0] = block0[0] | append << 8;
1368 block0[0] = block0[0] | append << 16;
1372 block0[0] = block0[0] | append << 24;
1380 block0[1] = block0[1] | append << 8;
1384 block0[1] = block0[1] | append << 16;
1388 block0[1] = block0[1] | append << 24;
1396 block0[2] = block0[2] | append << 8;
1400 block0[2] = block0[2] | append << 16;
1404 block0[2] = block0[2] | append << 24;
1412 block0[3] = block0[3] | append << 8;
1416 block0[3] = block0[3] | append << 16;
1420 block0[3] = block0[3] | append << 24;
1428 block1[0] = block1[0] | append << 8;
1432 block1[0] = block1[0] | append << 16;
1436 block1[0] = block1[0] | append << 24;
1444 block1[1] = block1[1] | append << 8;
1448 block1[1] = block1[1] | append << 16;
1452 block1[1] = block1[1] | append << 24;
1460 block1[2] = block1[2] | append << 8;
1464 block1[2] = block1[2] | append << 16;
1468 block1[2] = block1[2] | append << 24;
1476 block1[3] = block1[3] | append << 8;
1480 block1[3] = block1[3] | append << 16;
1484 block1[3] = block1[3] | append << 24;
1492 block2[0] = block2[0] | append << 8;
1496 block2[0] = block2[0] | append << 16;
1500 block2[0] = block2[0] | append << 24;
1508 block2[1] = block2[1] | append << 8;
1512 block2[1] = block2[1] | append << 16;
1516 block2[1] = block2[1] | append << 24;
1524 block2[2] = block2[2] | append << 8;
1528 block2[2] = block2[2] | append << 16;
1532 block2[2] = block2[2] | append << 24;
1540 block2[3] = block2[3] | append << 8;
1544 block2[3] = block2[3] | append << 16;
1548 block2[3] = block2[3] | append << 24;
1556 block3[0] = block3[0] | append << 8;
1560 block3[0] = block3[0] | append << 16;
1564 block3[0] = block3[0] | append << 24;
1572 block3[1] = block3[1] | append << 8;
1576 block3[1] = block3[1] | append << 16;
1580 block3[1] = block3[1] | append << 24;
1589 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06300_init (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global md5crypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1595 const u32 gid = get_global_id (0);
1597 if (gid >= gid_max) return;
1601 w0[0] = pws[gid].i[0];
1602 w0[1] = pws[gid].i[1];
1603 w0[2] = pws[gid].i[2];
1604 w0[3] = pws[gid].i[3];
1606 const u32 pw_len = pws[gid].pw_len;
1614 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
1615 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
1617 const u32 salt_len = salt_bufs[salt_pos].salt_len;
1623 //memcat16 (block0, block1, block2, block3, block_len, w0);
1624 //block_len += pw_len;
1626 u32 block_len = pw_len;
1656 memcat8 (block0, block1, block2, block3, block_len, salt_buf);
1658 block_len += salt_len;
1660 memcat16 (block0, block1, block2, block3, block_len, w0);
1662 block_len += pw_len;
1664 append_0x80_4 (block0, block1, block2, block3, block_len);
1666 block3[2] = block_len * 8;
1675 md5_transform (block0, block1, block2, block3, digest);
1677 /* The password first, since that is what is most unknown */
1678 /* Then the raw salt */
1679 /* Then just as many characters of the MD5(pw,salt,pw) */
1681 //memcat16 (block0, block1, block2, block3, block_len, w);
1682 //block_len += pw_len;
1706 memcat8 (block0, block1, block2, block3, block_len, salt_buf);
1708 block_len += salt_len;
1710 truncate_block (digest, pw_len);
1712 memcat16 (block0, block1, block2, block3, block_len, digest);
1714 block_len += pw_len;
1716 /* Then something really weird... */
1718 u32x append = block0[0] & 0xFF;
1720 for (u32 j = pw_len; j; j >>= 1)
1724 append_1st (block0, block1, block2, block3, block_len, append);
1730 append_0x80_4 (block0, block1, block2, block3, block_len);
1732 block3[2] = block_len * 8;
1739 md5_transform (block0, block1, block2, block3, digest);
1741 tmps[gid].digest_buf[0] = digest[0];
1742 tmps[gid].digest_buf[1] = digest[1];
1743 tmps[gid].digest_buf[2] = digest[2];
1744 tmps[gid].digest_buf[3] = digest[3];
1747 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06300_loop (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global md5crypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1753 const u32 gid = get_global_id (0);
1755 if (gid >= gid_max) return;
1759 w0[0] = pws[gid].i[0];
1760 w0[1] = pws[gid].i[1];
1761 w0[2] = pws[gid].i[2];
1762 w0[3] = pws[gid].i[3];
1764 const u32 pw_len = pws[gid].pw_len;
1773 append_0x80_1 (w0_x80, pw_len);
1781 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
1782 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
1784 const u32 salt_len = salt_bufs[salt_pos].salt_len;
1792 digest[0] = tmps[gid].digest_buf[0];
1793 digest[1] = tmps[gid].digest_buf[1];
1794 digest[2] = tmps[gid].digest_buf[2];
1795 digest[3] = tmps[gid].digest_buf[3];
1801 /* and now, just to make sure things don't run too fast */
1833 for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++)
1846 const u32 j1 = (j & 1) ? 1 : 0;
1847 const u32 j3 = (j % 3) ? 1 : 0;
1848 const u32 j7 = (j % 7) ? 1 : 0;
1861 memcat8 (block0, block1, block2, block3, block_len, salt_buf);
1863 block_len += salt_len;
1868 memcat16 (block0, block1, block2, block3, block_len, w0);
1870 block_len += pw_len;
1873 memcat16_x80 (block0, block1, block2, block3, block_len, digest);
1879 block0[0] = digest[0];
1880 block0[1] = digest[1];
1881 block0[2] = digest[2];
1882 block0[3] = digest[3];
1888 block1[0] = salt_buf[0];
1889 block1[1] = salt_buf[1];
1891 block_len += salt_len;
1893 memcat16 (block0, block1, block2, block3, block_len, w0);
1895 block_len += pw_len;
1899 block1[0] = salt_buf[0];
1900 block1[1] = salt_buf[1];
1902 block_len += salt_len;
1911 block_len += pw_len;
1914 memcat16 (block0, block1, block2, block3, block_len, w0_x80);
1916 block_len += pw_len;
1919 block3[2] = block_len * 8;
1926 md5_transform (block0, block1, block2, block3, digest);
1929 tmps[gid].digest_buf[0] = digest[0];
1930 tmps[gid].digest_buf[1] = digest[1];
1931 tmps[gid].digest_buf[2] = digest[2];
1932 tmps[gid].digest_buf[3] = digest[3];
1935 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06300_comp (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global md5crypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1941 const u32 gid = get_global_id (0);
1943 if (gid >= gid_max) return;
1945 const u32 lid = get_local_id (0);
1951 const u32x r0 = tmps[gid].digest_buf[DGST_R0];
1952 const u32x r1 = tmps[gid].digest_buf[DGST_R1];
1953 const u32x r2 = tmps[gid].digest_buf[DGST_R2];
1954 const u32x r3 = tmps[gid].digest_buf[DGST_R3];
1958 #include VECT_COMPARE_M