2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
28 #include "include/kernel_functions.c"
29 #include "types_amd.c"
30 #include "common_amd.c"
33 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
37 #define VECT_COMPARE_M "check_multi_vect2_comp4.c"
41 #define VECT_COMPARE_M "check_multi_vect4_comp4.c"
44 #define md5crypt_magic 0x00243124
46 static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4])
72 MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
73 MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
74 MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02);
75 MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03);
76 MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00);
77 MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01);
78 MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02);
79 MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03);
80 MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00);
81 MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01);
82 MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02);
83 MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03);
84 MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00);
85 MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01);
86 MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02);
87 MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03);
89 MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10);
90 MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11);
91 MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12);
92 MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13);
93 MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10);
94 MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11);
95 MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12);
96 MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13);
97 MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10);
98 MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11);
99 MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12);
100 MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13);
101 MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10);
102 MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11);
103 MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12);
104 MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13);
106 MD5_STEP (MD5_H1, a, b, c, d, w5_t, MD5C20, MD5S20);
107 MD5_STEP (MD5_H2, d, a, b, c, w8_t, MD5C21, MD5S21);
108 MD5_STEP (MD5_H1, c, d, a, b, wb_t, MD5C22, MD5S22);
109 MD5_STEP (MD5_H2, b, c, d, a, we_t, MD5C23, MD5S23);
110 MD5_STEP (MD5_H1, a, b, c, d, w1_t, MD5C24, MD5S20);
111 MD5_STEP (MD5_H2, d, a, b, c, w4_t, MD5C25, MD5S21);
112 MD5_STEP (MD5_H1, c, d, a, b, w7_t, MD5C26, MD5S22);
113 MD5_STEP (MD5_H2, b, c, d, a, wa_t, MD5C27, MD5S23);
114 MD5_STEP (MD5_H1, a, b, c, d, wd_t, MD5C28, MD5S20);
115 MD5_STEP (MD5_H2, d, a, b, c, w0_t, MD5C29, MD5S21);
116 MD5_STEP (MD5_H1, c, d, a, b, w3_t, MD5C2a, MD5S22);
117 MD5_STEP (MD5_H2, b, c, d, a, w6_t, MD5C2b, MD5S23);
118 MD5_STEP (MD5_H1, a, b, c, d, w9_t, MD5C2c, MD5S20);
119 MD5_STEP (MD5_H2, d, a, b, c, wc_t, MD5C2d, MD5S21);
120 MD5_STEP (MD5_H1, c, d, a, b, wf_t, MD5C2e, MD5S22);
121 MD5_STEP (MD5_H2, b, c, d, a, w2_t, MD5C2f, MD5S23);
123 MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30);
124 MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31);
125 MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32);
126 MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33);
127 MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30);
128 MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31);
129 MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32);
130 MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33);
131 MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30);
132 MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31);
133 MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32);
134 MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33);
135 MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30);
136 MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
137 MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
138 MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
146 static void memcat16 (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3[4], const u32 block_len, const u32x append[4])
151 block0[0] = append[0];
152 block0[1] = append[1];
153 block0[2] = append[2];
154 block0[3] = append[3];
158 block0[0] = block0[0] | append[0] << 8;
159 block0[1] = append[0] >> 24 | append[1] << 8;
160 block0[2] = append[1] >> 24 | append[2] << 8;
161 block0[3] = append[2] >> 24 | append[3] << 8;
162 block1[0] = append[3] >> 24;
166 block0[0] = block0[0] | append[0] << 16;
167 block0[1] = append[0] >> 16 | append[1] << 16;
168 block0[2] = append[1] >> 16 | append[2] << 16;
169 block0[3] = append[2] >> 16 | append[3] << 16;
170 block1[0] = append[3] >> 16;
174 block0[0] = block0[0] | append[0] << 24;
175 block0[1] = append[0] >> 8 | append[1] << 24;
176 block0[2] = append[1] >> 8 | append[2] << 24;
177 block0[3] = append[2] >> 8 | append[3] << 24;
178 block1[0] = append[3] >> 8;
182 block0[1] = append[0];
183 block0[2] = append[1];
184 block0[3] = append[2];
185 block1[0] = append[3];
189 block0[1] = block0[1] | append[0] << 8;
190 block0[2] = append[0] >> 24 | append[1] << 8;
191 block0[3] = append[1] >> 24 | append[2] << 8;
192 block1[0] = append[2] >> 24 | append[3] << 8;
193 block1[1] = append[3] >> 24;
197 block0[1] = block0[1] | append[0] << 16;
198 block0[2] = append[0] >> 16 | append[1] << 16;
199 block0[3] = append[1] >> 16 | append[2] << 16;
200 block1[0] = append[2] >> 16 | append[3] << 16;
201 block1[1] = append[3] >> 16;
205 block0[1] = block0[1] | append[0] << 24;
206 block0[2] = append[0] >> 8 | append[1] << 24;
207 block0[3] = append[1] >> 8 | append[2] << 24;
208 block1[0] = append[2] >> 8 | append[3] << 24;
209 block1[1] = append[3] >> 8;
213 block0[2] = append[0];
214 block0[3] = append[1];
215 block1[0] = append[2];
216 block1[1] = append[3];
220 block0[2] = block0[2] | append[0] << 8;
221 block0[3] = append[0] >> 24 | append[1] << 8;
222 block1[0] = append[1] >> 24 | append[2] << 8;
223 block1[1] = append[2] >> 24 | append[3] << 8;
224 block1[2] = append[3] >> 24;
228 block0[2] = block0[2] | append[0] << 16;
229 block0[3] = append[0] >> 16 | append[1] << 16;
230 block1[0] = append[1] >> 16 | append[2] << 16;
231 block1[1] = append[2] >> 16 | append[3] << 16;
232 block1[2] = append[3] >> 16;
236 block0[2] = block0[2] | append[0] << 24;
237 block0[3] = append[0] >> 8 | append[1] << 24;
238 block1[0] = append[1] >> 8 | append[2] << 24;
239 block1[1] = append[2] >> 8 | append[3] << 24;
240 block1[2] = append[3] >> 8;
244 block0[3] = append[0];
245 block1[0] = append[1];
246 block1[1] = append[2];
247 block1[2] = append[3];
251 block0[3] = block0[3] | append[0] << 8;
252 block1[0] = append[0] >> 24 | append[1] << 8;
253 block1[1] = append[1] >> 24 | append[2] << 8;
254 block1[2] = append[2] >> 24 | append[3] << 8;
255 block1[3] = append[3] >> 24;
259 block0[3] = block0[3] | append[0] << 16;
260 block1[0] = append[0] >> 16 | append[1] << 16;
261 block1[1] = append[1] >> 16 | append[2] << 16;
262 block1[2] = append[2] >> 16 | append[3] << 16;
263 block1[3] = append[3] >> 16;
267 block0[3] = block0[3] | append[0] << 24;
268 block1[0] = append[0] >> 8 | append[1] << 24;
269 block1[1] = append[1] >> 8 | append[2] << 24;
270 block1[2] = append[2] >> 8 | append[3] << 24;
271 block1[3] = append[3] >> 8;
275 block1[0] = append[0];
276 block1[1] = append[1];
277 block1[2] = append[2];
278 block1[3] = append[3];
282 block1[0] = block1[0] | append[0] << 8;
283 block1[1] = append[0] >> 24 | append[1] << 8;
284 block1[2] = append[1] >> 24 | append[2] << 8;
285 block1[3] = append[2] >> 24 | append[3] << 8;
286 block2[0] = append[3] >> 24;
290 block1[0] = block1[0] | append[0] << 16;
291 block1[1] = append[0] >> 16 | append[1] << 16;
292 block1[2] = append[1] >> 16 | append[2] << 16;
293 block1[3] = append[2] >> 16 | append[3] << 16;
294 block2[0] = append[3] >> 16;
298 block1[0] = block1[0] | append[0] << 24;
299 block1[1] = append[0] >> 8 | append[1] << 24;
300 block1[2] = append[1] >> 8 | append[2] << 24;
301 block1[3] = append[2] >> 8 | append[3] << 24;
302 block2[0] = append[3] >> 8;
306 block1[1] = append[0];
307 block1[2] = append[1];
308 block1[3] = append[2];
309 block2[0] = append[3];
313 block1[1] = block1[1] | append[0] << 8;
314 block1[2] = append[0] >> 24 | append[1] << 8;
315 block1[3] = append[1] >> 24 | append[2] << 8;
316 block2[0] = append[2] >> 24 | append[3] << 8;
317 block2[1] = append[3] >> 24;
321 block1[1] = block1[1] | append[0] << 16;
322 block1[2] = append[0] >> 16 | append[1] << 16;
323 block1[3] = append[1] >> 16 | append[2] << 16;
324 block2[0] = append[2] >> 16 | append[3] << 16;
325 block2[1] = append[3] >> 16;
329 block1[1] = block1[1] | append[0] << 24;
330 block1[2] = append[0] >> 8 | append[1] << 24;
331 block1[3] = append[1] >> 8 | append[2] << 24;
332 block2[0] = append[2] >> 8 | append[3] << 24;
333 block2[1] = append[3] >> 8;
337 block1[2] = append[0];
338 block1[3] = append[1];
339 block2[0] = append[2];
340 block2[1] = append[3];
344 block1[2] = block1[2] | append[0] << 8;
345 block1[3] = append[0] >> 24 | append[1] << 8;
346 block2[0] = append[1] >> 24 | append[2] << 8;
347 block2[1] = append[2] >> 24 | append[3] << 8;
348 block2[2] = append[3] >> 24;
352 block1[2] = block1[2] | append[0] << 16;
353 block1[3] = append[0] >> 16 | append[1] << 16;
354 block2[0] = append[1] >> 16 | append[2] << 16;
355 block2[1] = append[2] >> 16 | append[3] << 16;
356 block2[2] = append[3] >> 16;
360 block1[2] = block1[2] | append[0] << 24;
361 block1[3] = append[0] >> 8 | append[1] << 24;
362 block2[0] = append[1] >> 8 | append[2] << 24;
363 block2[1] = append[2] >> 8 | append[3] << 24;
364 block2[2] = append[3] >> 8;
368 block1[3] = append[0];
369 block2[0] = append[1];
370 block2[1] = append[2];
371 block2[2] = append[3];
375 block1[3] = block1[3] | append[0] << 8;
376 block2[0] = append[0] >> 24 | append[1] << 8;
377 block2[1] = append[1] >> 24 | append[2] << 8;
378 block2[2] = append[2] >> 24 | append[3] << 8;
379 block2[3] = append[3] >> 24;
383 block1[3] = block1[3] | append[0] << 16;
384 block2[0] = append[0] >> 16 | append[1] << 16;
385 block2[1] = append[1] >> 16 | append[2] << 16;
386 block2[2] = append[2] >> 16 | append[3] << 16;
387 block2[3] = append[3] >> 16;
391 block1[3] = block1[3] | append[0] << 24;
392 block2[0] = append[0] >> 8 | append[1] << 24;
393 block2[1] = append[1] >> 8 | append[2] << 24;
394 block2[2] = append[2] >> 8 | append[3] << 24;
395 block2[3] = append[3] >> 8;
399 block2[0] = append[0];
400 block2[1] = append[1];
401 block2[2] = append[2];
402 block2[3] = append[3];
406 block2[0] = block2[0] | append[0] << 8;
407 block2[1] = append[0] >> 24 | append[1] << 8;
408 block2[2] = append[1] >> 24 | append[2] << 8;
409 block2[3] = append[2] >> 24 | append[3] << 8;
410 block3[0] = append[3] >> 24;
414 block2[0] = block2[0] | append[0] << 16;
415 block2[1] = append[0] >> 16 | append[1] << 16;
416 block2[2] = append[1] >> 16 | append[2] << 16;
417 block2[3] = append[2] >> 16 | append[3] << 16;
418 block3[0] = append[3] >> 16;
422 block2[0] = block2[0] | append[0] << 24;
423 block2[1] = append[0] >> 8 | append[1] << 24;
424 block2[2] = append[1] >> 8 | append[2] << 24;
425 block2[3] = append[2] >> 8 | append[3] << 24;
426 block3[0] = append[3] >> 8;
430 block2[1] = append[0];
431 block2[2] = append[1];
432 block2[3] = append[2];
433 block3[0] = append[3];
437 block2[1] = block2[1] | append[0] << 8;
438 block2[2] = append[0] >> 24 | append[1] << 8;
439 block2[3] = append[1] >> 24 | append[2] << 8;
440 block3[0] = append[2] >> 24 | append[3] << 8;
441 block3[1] = append[3] >> 24;
445 block2[1] = block2[1] | append[0] << 16;
446 block2[2] = append[0] >> 16 | append[1] << 16;
447 block2[3] = append[1] >> 16 | append[2] << 16;
448 block3[0] = append[2] >> 16 | append[3] << 16;
449 block3[1] = append[3] >> 16;
453 block2[1] = block2[1] | append[0] << 24;
454 block2[2] = append[0] >> 8 | append[1] << 24;
455 block2[3] = append[1] >> 8 | append[2] << 24;
456 block3[0] = append[2] >> 8 | append[3] << 24;
457 block3[1] = append[3] >> 8;
461 block2[2] = append[0];
462 block2[3] = append[1];
463 block3[0] = append[2];
464 block3[1] = append[3];
468 block2[2] = block2[2] | append[0] << 8;
469 block2[3] = append[0] >> 24 | append[1] << 8;
470 block3[0] = append[1] >> 24 | append[2] << 8;
471 block3[1] = append[2] >> 24 | append[3] << 8;
472 block3[2] = append[3] >> 24;
476 block2[2] = block2[2] | append[0] << 16;
477 block2[3] = append[0] >> 16 | append[1] << 16;
478 block3[0] = append[1] >> 16 | append[2] << 16;
479 block3[1] = append[2] >> 16 | append[3] << 16;
480 block3[2] = append[3] >> 16;
484 block2[2] = block2[2] | append[0] << 24;
485 block2[3] = append[0] >> 8 | append[1] << 24;
486 block3[0] = append[1] >> 8 | append[2] << 24;
487 block3[1] = append[2] >> 8 | append[3] << 24;
488 block3[2] = append[3] >> 8;
492 block2[3] = append[0];
493 block3[0] = append[1];
494 block3[1] = append[2];
495 block3[2] = append[3];
499 block2[3] = block2[3] | append[0] << 8;
500 block3[0] = append[0] >> 24 | append[1] << 8;
501 block3[1] = append[1] >> 24 | append[2] << 8;
502 block3[2] = append[2] >> 24 | append[3] << 8;
503 block3[3] = append[3] >> 24;
507 block2[3] = block2[3] | append[0] << 16;
508 block3[0] = append[0] >> 16 | append[1] << 16;
509 block3[1] = append[1] >> 16 | append[2] << 16;
510 block3[2] = append[2] >> 16 | append[3] << 16;
511 block3[3] = append[3] >> 16;
515 block2[3] = block2[3] | append[0] << 24;
516 block3[0] = append[0] >> 8 | append[1] << 24;
517 block3[1] = append[1] >> 8 | append[2] << 24;
518 block3[2] = append[2] >> 8 | append[3] << 24;
519 block3[3] = append[3] >> 8;
523 block3[0] = append[0];
524 block3[1] = append[1];
525 block3[2] = append[2];
526 block3[3] = append[3];
530 block3[0] = block3[0] | append[0] << 8;
531 block3[1] = append[0] >> 24 | append[1] << 8;
532 block3[2] = append[1] >> 24 | append[2] << 8;
533 block3[3] = append[2] >> 24 | append[3] << 8;
537 block3[0] = block3[0] | append[0] << 16;
538 block3[1] = append[0] >> 16 | append[1] << 16;
539 block3[2] = append[1] >> 16 | append[2] << 16;
540 block3[3] = append[2] >> 16 | append[3] << 16;
544 block3[0] = block3[0] | append[0] << 24;
545 block3[1] = append[0] >> 8 | append[1] << 24;
546 block3[2] = append[1] >> 8 | append[2] << 24;
547 block3[3] = append[2] >> 8 | append[3] << 24;
551 block3[1] = append[0];
552 block3[2] = append[1];
553 block3[3] = append[2];
557 block3[1] = block3[1] | append[0] << 8;
558 block3[2] = append[0] >> 24 | append[1] << 8;
559 block3[3] = append[1] >> 24 | append[2] << 8;
563 block3[1] = block3[1] | append[0] << 16;
564 block3[2] = append[0] >> 16 | append[1] << 16;
565 block3[3] = append[1] >> 16 | append[2] << 16;
569 block3[1] = block3[1] | append[0] << 24;
570 block3[2] = append[0] >> 8 | append[1] << 24;
571 block3[3] = append[1] >> 8 | append[2] << 24;
575 block3[2] = append[0];
576 block3[3] = append[1];
581 static void memcat16_x80 (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3[4], const u32 block_len, const u32x append[4])
586 block0[0] = append[0];
587 block0[1] = append[1];
588 block0[2] = append[2];
589 block0[3] = append[3];
594 block0[0] = block0[0] | append[0] << 8;
595 block0[1] = append[0] >> 24 | append[1] << 8;
596 block0[2] = append[1] >> 24 | append[2] << 8;
597 block0[3] = append[2] >> 24 | append[3] << 8;
598 block1[0] = append[3] >> 24 | 0x80u << 8;
602 block0[0] = block0[0] | append[0] << 16;
603 block0[1] = append[0] >> 16 | append[1] << 16;
604 block0[2] = append[1] >> 16 | append[2] << 16;
605 block0[3] = append[2] >> 16 | append[3] << 16;
606 block1[0] = append[3] >> 16 | 0x80u << 16;
610 block0[0] = block0[0] | append[0] << 24;
611 block0[1] = append[0] >> 8 | append[1] << 24;
612 block0[2] = append[1] >> 8 | append[2] << 24;
613 block0[3] = append[2] >> 8 | append[3] << 24;
614 block1[0] = append[3] >> 8 | 0x80u << 24;
618 block0[1] = append[0];
619 block0[2] = append[1];
620 block0[3] = append[2];
621 block1[0] = append[3];
626 block0[1] = block0[1] | append[0] << 8;
627 block0[2] = append[0] >> 24 | append[1] << 8;
628 block0[3] = append[1] >> 24 | append[2] << 8;
629 block1[0] = append[2] >> 24 | append[3] << 8;
630 block1[1] = append[3] >> 24 | 0x80u << 8;
634 block0[1] = block0[1] | append[0] << 16;
635 block0[2] = append[0] >> 16 | append[1] << 16;
636 block0[3] = append[1] >> 16 | append[2] << 16;
637 block1[0] = append[2] >> 16 | append[3] << 16;
638 block1[1] = append[3] >> 16 | 0x80u << 16;
642 block0[1] = block0[1] | append[0] << 24;
643 block0[2] = append[0] >> 8 | append[1] << 24;
644 block0[3] = append[1] >> 8 | append[2] << 24;
645 block1[0] = append[2] >> 8 | append[3] << 24;
646 block1[1] = append[3] >> 8 | 0x80u << 24;
650 block0[2] = append[0];
651 block0[3] = append[1];
652 block1[0] = append[2];
653 block1[1] = append[3];
658 block0[2] = block0[2] | append[0] << 8;
659 block0[3] = append[0] >> 24 | append[1] << 8;
660 block1[0] = append[1] >> 24 | append[2] << 8;
661 block1[1] = append[2] >> 24 | append[3] << 8;
662 block1[2] = append[3] >> 24 | 0x80u << 8;
666 block0[2] = block0[2] | append[0] << 16;
667 block0[3] = append[0] >> 16 | append[1] << 16;
668 block1[0] = append[1] >> 16 | append[2] << 16;
669 block1[1] = append[2] >> 16 | append[3] << 16;
670 block1[2] = append[3] >> 16 | 0x80u << 16;
674 block0[2] = block0[2] | append[0] << 24;
675 block0[3] = append[0] >> 8 | append[1] << 24;
676 block1[0] = append[1] >> 8 | append[2] << 24;
677 block1[1] = append[2] >> 8 | append[3] << 24;
678 block1[2] = append[3] >> 8 | 0x80u << 24;
682 block0[3] = append[0];
683 block1[0] = append[1];
684 block1[1] = append[2];
685 block1[2] = append[3];
690 block0[3] = block0[3] | append[0] << 8;
691 block1[0] = append[0] >> 24 | append[1] << 8;
692 block1[1] = append[1] >> 24 | append[2] << 8;
693 block1[2] = append[2] >> 24 | append[3] << 8;
694 block1[3] = append[3] >> 24 | 0x80u << 8;
698 block0[3] = block0[3] | append[0] << 16;
699 block1[0] = append[0] >> 16 | append[1] << 16;
700 block1[1] = append[1] >> 16 | append[2] << 16;
701 block1[2] = append[2] >> 16 | append[3] << 16;
702 block1[3] = append[3] >> 16 | 0x80u << 16;
706 block0[3] = block0[3] | append[0] << 24;
707 block1[0] = append[0] >> 8 | append[1] << 24;
708 block1[1] = append[1] >> 8 | append[2] << 24;
709 block1[2] = append[2] >> 8 | append[3] << 24;
710 block1[3] = append[3] >> 8 | 0x80u << 24;
714 block1[0] = append[0];
715 block1[1] = append[1];
716 block1[2] = append[2];
717 block1[3] = append[3];
722 block1[0] = block1[0] | append[0] << 8;
723 block1[1] = append[0] >> 24 | append[1] << 8;
724 block1[2] = append[1] >> 24 | append[2] << 8;
725 block1[3] = append[2] >> 24 | append[3] << 8;
726 block2[0] = append[3] >> 24 | 0x80u << 8;
730 block1[0] = block1[0] | append[0] << 16;
731 block1[1] = append[0] >> 16 | append[1] << 16;
732 block1[2] = append[1] >> 16 | append[2] << 16;
733 block1[3] = append[2] >> 16 | append[3] << 16;
734 block2[0] = append[3] >> 16 | 0x80u << 16;
738 block1[0] = block1[0] | append[0] << 24;
739 block1[1] = append[0] >> 8 | append[1] << 24;
740 block1[2] = append[1] >> 8 | append[2] << 24;
741 block1[3] = append[2] >> 8 | append[3] << 24;
742 block2[0] = append[3] >> 8 | 0x80u << 24;
746 block1[1] = append[0];
747 block1[2] = append[1];
748 block1[3] = append[2];
749 block2[0] = append[3];
754 block1[1] = block1[1] | append[0] << 8;
755 block1[2] = append[0] >> 24 | append[1] << 8;
756 block1[3] = append[1] >> 24 | append[2] << 8;
757 block2[0] = append[2] >> 24 | append[3] << 8;
758 block2[1] = append[3] >> 24 | 0x80u << 8;
762 block1[1] = block1[1] | append[0] << 16;
763 block1[2] = append[0] >> 16 | append[1] << 16;
764 block1[3] = append[1] >> 16 | append[2] << 16;
765 block2[0] = append[2] >> 16 | append[3] << 16;
766 block2[1] = append[3] >> 16 | 0x80u << 16;
770 block1[1] = block1[1] | append[0] << 24;
771 block1[2] = append[0] >> 8 | append[1] << 24;
772 block1[3] = append[1] >> 8 | append[2] << 24;
773 block2[0] = append[2] >> 8 | append[3] << 24;
774 block2[1] = append[3] >> 8 | 0x80u << 24;
778 block1[2] = append[0];
779 block1[3] = append[1];
780 block2[0] = append[2];
781 block2[1] = append[3];
786 block1[2] = block1[2] | append[0] << 8;
787 block1[3] = append[0] >> 24 | append[1] << 8;
788 block2[0] = append[1] >> 24 | append[2] << 8;
789 block2[1] = append[2] >> 24 | append[3] << 8;
790 block2[2] = append[3] >> 24 | 0x80u << 8;
794 block1[2] = block1[2] | append[0] << 16;
795 block1[3] = append[0] >> 16 | append[1] << 16;
796 block2[0] = append[1] >> 16 | append[2] << 16;
797 block2[1] = append[2] >> 16 | append[3] << 16;
798 block2[2] = append[3] >> 16 | 0x80u << 16;
802 block1[2] = block1[2] | append[0] << 24;
803 block1[3] = append[0] >> 8 | append[1] << 24;
804 block2[0] = append[1] >> 8 | append[2] << 24;
805 block2[1] = append[2] >> 8 | append[3] << 24;
806 block2[2] = append[3] >> 8 | 0x80u << 24;
810 block1[3] = append[0];
811 block2[0] = append[1];
812 block2[1] = append[2];
813 block2[2] = append[3];
818 block1[3] = block1[3] | append[0] << 8;
819 block2[0] = append[0] >> 24 | append[1] << 8;
820 block2[1] = append[1] >> 24 | append[2] << 8;
821 block2[2] = append[2] >> 24 | append[3] << 8;
822 block2[3] = append[3] >> 24 | 0x80u << 8;
826 block1[3] = block1[3] | append[0] << 16;
827 block2[0] = append[0] >> 16 | append[1] << 16;
828 block2[1] = append[1] >> 16 | append[2] << 16;
829 block2[2] = append[2] >> 16 | append[3] << 16;
830 block2[3] = append[3] >> 16 | 0x80u << 16;
834 block1[3] = block1[3] | append[0] << 24;
835 block2[0] = append[0] >> 8 | append[1] << 24;
836 block2[1] = append[1] >> 8 | append[2] << 24;
837 block2[2] = append[2] >> 8 | append[3] << 24;
838 block2[3] = append[3] >> 8 | 0x80u << 24;
842 block2[0] = append[0];
843 block2[1] = append[1];
844 block2[2] = append[2];
845 block2[3] = append[3];
850 block2[0] = block2[0] | append[0] << 8;
851 block2[1] = append[0] >> 24 | append[1] << 8;
852 block2[2] = append[1] >> 24 | append[2] << 8;
853 block2[3] = append[2] >> 24 | append[3] << 8;
854 block3[0] = append[3] >> 24 | 0x80u << 8;
858 block2[0] = block2[0] | append[0] << 16;
859 block2[1] = append[0] >> 16 | append[1] << 16;
860 block2[2] = append[1] >> 16 | append[2] << 16;
861 block2[3] = append[2] >> 16 | append[3] << 16;
862 block3[0] = append[3] >> 16 | 0x80u << 16;
866 block2[0] = block2[0] | append[0] << 24;
867 block2[1] = append[0] >> 8 | append[1] << 24;
868 block2[2] = append[1] >> 8 | append[2] << 24;
869 block2[3] = append[2] >> 8 | append[3] << 24;
870 block3[0] = append[3] >> 8 | 0x80u << 24;
874 block2[1] = append[0];
875 block2[2] = append[1];
876 block2[3] = append[2];
877 block3[0] = append[3];
882 block2[1] = block2[1] | append[0] << 8;
883 block2[2] = append[0] >> 24 | append[1] << 8;
884 block2[3] = append[1] >> 24 | append[2] << 8;
885 block3[0] = append[2] >> 24 | append[3] << 8;
886 block3[1] = append[3] >> 24 | 0x80u << 8;
890 block2[1] = block2[1] | append[0] << 16;
891 block2[2] = append[0] >> 16 | append[1] << 16;
892 block2[3] = append[1] >> 16 | append[2] << 16;
893 block3[0] = append[2] >> 16 | append[3] << 16;
894 block3[1] = append[3] >> 16 | 0x80u << 16;
898 block2[1] = block2[1] | append[0] << 24;
899 block2[2] = append[0] >> 8 | append[1] << 24;
900 block2[3] = append[1] >> 8 | append[2] << 24;
901 block3[0] = append[2] >> 8 | append[3] << 24;
902 block3[1] = append[3] >> 8 | 0x80u << 24;
906 block2[2] = append[0];
907 block2[3] = append[1];
908 block3[0] = append[2];
909 block3[1] = append[3];
914 block2[2] = block2[2] | append[0] << 8;
915 block2[3] = append[0] >> 24 | append[1] << 8;
916 block3[0] = append[1] >> 24 | append[2] << 8;
917 block3[1] = append[2] >> 24 | append[3] << 8;
918 block3[2] = append[3] >> 24 | 0x80u << 8;
922 block2[2] = block2[2] | append[0] << 16;
923 block2[3] = append[0] >> 16 | append[1] << 16;
924 block3[0] = append[1] >> 16 | append[2] << 16;
925 block3[1] = append[2] >> 16 | append[3] << 16;
926 block3[2] = append[3] >> 16 | 0x80u << 16;
930 block2[2] = block2[2] | append[0] << 24;
931 block2[3] = append[0] >> 8 | append[1] << 24;
932 block3[0] = append[1] >> 8 | append[2] << 24;
933 block3[1] = append[2] >> 8 | append[3] << 24;
934 block3[2] = append[3] >> 8 | 0x80u << 24;
938 block2[3] = append[0];
939 block3[0] = append[1];
940 block3[1] = append[2];
941 block3[2] = append[3];
946 block2[3] = block2[3] | append[0] << 8;
947 block3[0] = append[0] >> 24 | append[1] << 8;
948 block3[1] = append[1] >> 24 | append[2] << 8;
949 block3[2] = append[2] >> 24 | append[3] << 8;
950 block3[3] = append[3] >> 24 | 0x80u << 8;
954 block2[3] = block2[3] | append[0] << 16;
955 block3[0] = append[0] >> 16 | append[1] << 16;
956 block3[1] = append[1] >> 16 | append[2] << 16;
957 block3[2] = append[2] >> 16 | append[3] << 16;
958 block3[3] = append[3] >> 16 | 0x80u << 16;
962 block2[3] = block2[3] | append[0] << 24;
963 block3[0] = append[0] >> 8 | append[1] << 24;
964 block3[1] = append[1] >> 8 | append[2] << 24;
965 block3[2] = append[2] >> 8 | append[3] << 24;
966 block3[3] = append[3] >> 8 | 0x80u << 24;
970 block3[0] = append[0];
971 block3[1] = append[1];
972 block3[2] = append[2];
973 block3[3] = append[3];
977 block3[0] = block3[0] | append[0] << 8;
978 block3[1] = append[0] >> 24 | append[1] << 8;
979 block3[2] = append[1] >> 24 | append[2] << 8;
980 block3[3] = append[2] >> 24 | append[3] << 8;
984 block3[0] = block3[0] | append[0] << 16;
985 block3[1] = append[0] >> 16 | append[1] << 16;
986 block3[2] = append[1] >> 16 | append[2] << 16;
987 block3[3] = append[2] >> 16 | append[3] << 16;
991 block3[0] = block3[0] | append[0] << 24;
992 block3[1] = append[0] >> 8 | append[1] << 24;
993 block3[2] = append[1] >> 8 | append[2] << 24;
994 block3[3] = append[2] >> 8 | append[3] << 24;
998 block3[1] = append[0];
999 block3[2] = append[1];
1000 block3[3] = append[2];
1004 block3[1] = block3[1] | append[0] << 8;
1005 block3[2] = append[0] >> 24 | append[1] << 8;
1006 block3[3] = append[1] >> 24 | append[2] << 8;
1010 block3[1] = block3[1] | append[0] << 16;
1011 block3[2] = append[0] >> 16 | append[1] << 16;
1012 block3[3] = append[1] >> 16 | append[2] << 16;
1016 block3[1] = block3[1] | append[0] << 24;
1017 block3[2] = append[0] >> 8 | append[1] << 24;
1018 block3[3] = append[1] >> 8 | append[2] << 24;
1022 block3[2] = append[0];
1023 block3[3] = append[1];
1028 static void memcat8 (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3[4], const u32 block_len, const u32 append[2])
1033 block0[0] = append[0];
1034 block0[1] = append[1];
1038 block0[0] = block0[0] | append[0] << 8;
1039 block0[1] = append[0] >> 24 | append[1] << 8;
1040 block0[2] = append[1] >> 24;
1044 block0[0] = block0[0] | append[0] << 16;
1045 block0[1] = append[0] >> 16 | append[1] << 16;
1046 block0[2] = append[1] >> 16;
1050 block0[0] = block0[0] | append[0] << 24;
1051 block0[1] = append[0] >> 8 | append[1] << 24;
1052 block0[2] = append[1] >> 8;
1056 block0[1] = append[0];
1057 block0[2] = append[1];
1061 block0[1] = block0[1] | append[0] << 8;
1062 block0[2] = append[0] >> 24 | append[1] << 8;
1063 block0[3] = append[1] >> 24;
1067 block0[1] = block0[1] | append[0] << 16;
1068 block0[2] = append[0] >> 16 | append[1] << 16;
1069 block0[3] = append[1] >> 16;
1073 block0[1] = block0[1] | append[0] << 24;
1074 block0[2] = append[0] >> 8 | append[1] << 24;
1075 block0[3] = append[1] >> 8;
1079 block0[2] = append[0];
1080 block0[3] = append[1];
1084 block0[2] = block0[2] | append[0] << 8;
1085 block0[3] = append[0] >> 24 | append[1] << 8;
1086 block1[0] = append[1] >> 24;
1090 block0[2] = block0[2] | append[0] << 16;
1091 block0[3] = append[0] >> 16 | append[1] << 16;
1092 block1[0] = append[1] >> 16;
1096 block0[2] = block0[2] | append[0] << 24;
1097 block0[3] = append[0] >> 8 | append[1] << 24;
1098 block1[0] = append[1] >> 8;
1102 block0[3] = append[0];
1103 block1[0] = append[1];
1107 block0[3] = block0[3] | append[0] << 8;
1108 block1[0] = append[0] >> 24 | append[1] << 8;
1109 block1[1] = append[1] >> 24;
1113 block0[3] = block0[3] | append[0] << 16;
1114 block1[0] = append[0] >> 16 | append[1] << 16;
1115 block1[1] = append[1] >> 16;
1119 block0[3] = block0[3] | append[0] << 24;
1120 block1[0] = append[0] >> 8 | append[1] << 24;
1121 block1[1] = append[1] >> 8;
1125 block1[0] = append[0];
1126 block1[1] = append[1];
1130 block1[0] = block1[0] | append[0] << 8;
1131 block1[1] = append[0] >> 24 | append[1] << 8;
1132 block1[2] = append[1] >> 24;
1136 block1[0] = block1[0] | append[0] << 16;
1137 block1[1] = append[0] >> 16 | append[1] << 16;
1138 block1[2] = append[1] >> 16;
1142 block1[0] = block1[0] | append[0] << 24;
1143 block1[1] = append[0] >> 8 | append[1] << 24;
1144 block1[2] = append[1] >> 8;
1148 block1[1] = append[0];
1149 block1[2] = append[1];
1153 block1[1] = block1[1] | append[0] << 8;
1154 block1[2] = append[0] >> 24 | append[1] << 8;
1155 block1[3] = append[1] >> 24;
1159 block1[1] = block1[1] | append[0] << 16;
1160 block1[2] = append[0] >> 16 | append[1] << 16;
1161 block1[3] = append[1] >> 16;
1165 block1[1] = block1[1] | append[0] << 24;
1166 block1[2] = append[0] >> 8 | append[1] << 24;
1167 block1[3] = append[1] >> 8;
1171 block1[2] = append[0];
1172 block1[3] = append[1];
1176 block1[2] = block1[2] | append[0] << 8;
1177 block1[3] = append[0] >> 24 | append[1] << 8;
1178 block2[0] = append[1] >> 24;
1182 block1[2] = block1[2] | append[0] << 16;
1183 block1[3] = append[0] >> 16 | append[1] << 16;
1184 block2[0] = append[1] >> 16;
1188 block1[2] = block1[2] | append[0] << 24;
1189 block1[3] = append[0] >> 8 | append[1] << 24;
1190 block2[0] = append[1] >> 8;
1194 block1[3] = append[0];
1195 block2[0] = append[1];
1199 block1[3] = block1[3] | append[0] << 8;
1200 block2[0] = append[0] >> 24 | append[1] << 8;
1201 block2[1] = append[1] >> 24;
1205 block1[3] = block1[3] | append[0] << 16;
1206 block2[0] = append[0] >> 16 | append[1] << 16;
1207 block2[1] = append[1] >> 16;
1211 block1[3] = block1[3] | append[0] << 24;
1212 block2[0] = append[0] >> 8 | append[1] << 24;
1213 block2[1] = append[1] >> 8;
1217 block2[0] = append[0];
1218 block2[1] = append[1];
1222 block2[0] = block2[0] | append[0] << 8;
1223 block2[1] = append[0] >> 24 | append[1] << 8;
1224 block2[2] = append[1] >> 24;
1228 block2[0] = block2[0] | append[0] << 16;
1229 block2[1] = append[0] >> 16 | append[1] << 16;
1230 block2[2] = append[1] >> 16;
1234 block2[0] = block2[0] | append[0] << 24;
1235 block2[1] = append[0] >> 8 | append[1] << 24;
1236 block2[2] = append[1] >> 8;
1240 block2[1] = append[0];
1241 block2[2] = append[1];
1245 block2[1] = block2[1] | append[0] << 8;
1246 block2[2] = append[0] >> 24 | append[1] << 8;
1247 block2[3] = append[1] >> 24;
1251 block2[1] = block2[1] | append[0] << 16;
1252 block2[2] = append[0] >> 16 | append[1] << 16;
1253 block2[3] = append[1] >> 16;
1257 block2[1] = block2[1] | append[0] << 24;
1258 block2[2] = append[0] >> 8 | append[1] << 24;
1259 block2[3] = append[1] >> 8;
1263 block2[2] = append[0];
1264 block2[3] = append[1];
1268 block2[2] = block2[2] | append[0] << 8;
1269 block2[3] = append[0] >> 24 | append[1] << 8;
1270 block3[0] = append[1] >> 24;
1274 block2[2] = block2[2] | append[0] << 16;
1275 block2[3] = append[0] >> 16 | append[1] << 16;
1276 block3[0] = append[1] >> 16;
1280 block2[2] = block2[2] | append[0] << 24;
1281 block2[3] = append[0] >> 8 | append[1] << 24;
1282 block3[0] = append[1] >> 8;
1286 block2[3] = append[0];
1287 block3[0] = append[1];
1291 block2[3] = block2[3] | append[0] << 8;
1292 block3[0] = append[0] >> 24 | append[1] << 8;
1293 block3[1] = append[1] >> 24;
1297 block2[3] = block2[3] | append[0] << 16;
1298 block3[0] = append[0] >> 16 | append[1] << 16;
1299 block3[1] = append[1] >> 16;
1303 block2[3] = block2[3] | append[0] << 24;
1304 block3[0] = append[0] >> 8 | append[1] << 24;
1305 block3[1] = append[1] >> 8;
1309 block3[0] = append[0];
1310 block3[1] = append[1];
1314 block3[0] = block3[0] | append[0] << 8;
1315 block3[1] = append[0] >> 24 | append[1] << 8;
1316 block3[2] = append[1] >> 24;
1320 block3[0] = block3[0] | append[0] << 16;
1321 block3[1] = append[0] >> 16 | append[1] << 16;
1322 block3[2] = append[1] >> 16;
1326 block3[0] = block3[0] | append[0] << 24;
1327 block3[1] = append[0] >> 8 | append[1] << 24;
1328 block3[2] = append[1] >> 8;
1332 block3[1] = append[0];
1333 block3[2] = append[1];
1337 block3[1] = block3[1] | append[0] << 8;
1338 block3[2] = append[0] >> 24 | append[1] << 8;
1339 block3[3] = append[1] >> 24;
1343 block3[1] = block3[1] | append[0] << 16;
1344 block3[2] = append[0] >> 16 | append[1] << 16;
1345 block3[3] = append[1] >> 16;
1349 block3[1] = block3[1] | append[0] << 24;
1350 block3[2] = append[0] >> 8 | append[1] << 24;
1351 block3[3] = append[1] >> 8;
1355 block3[2] = append[0];
1356 block3[3] = append[1];
1361 static void append_sign (u32x block0[4], u32x block1[4], const u32 block_len)
1366 block0[0] = md5crypt_magic;
1370 block0[0] = block0[0] | md5crypt_magic << 8;
1371 block0[1] = md5crypt_magic >> 24;
1375 block0[0] = block0[0] | md5crypt_magic << 16;
1376 block0[1] = md5crypt_magic >> 16;
1380 block0[0] = block0[0] | md5crypt_magic << 24;
1381 block0[1] = md5crypt_magic >> 8;
1385 block0[1] = md5crypt_magic;
1389 block0[1] = block0[1] | md5crypt_magic << 8;
1390 block0[2] = md5crypt_magic >> 24;
1394 block0[1] = block0[1] | md5crypt_magic << 16;
1395 block0[2] = md5crypt_magic >> 16;
1399 block0[1] = block0[1] | md5crypt_magic << 24;
1400 block0[2] = md5crypt_magic >> 8;
1404 block0[2] = md5crypt_magic;
1408 block0[2] = block0[2] | md5crypt_magic << 8;
1409 block0[3] = md5crypt_magic >> 24;
1413 block0[2] = block0[2] | md5crypt_magic << 16;
1414 block0[3] = md5crypt_magic >> 16;
1418 block0[2] = block0[2] | md5crypt_magic << 24;
1419 block0[3] = md5crypt_magic >> 8;
1423 block0[3] = md5crypt_magic;
1427 block0[3] = block0[3] | md5crypt_magic << 8;
1428 block1[0] = md5crypt_magic >> 24;
1432 block0[3] = block0[3] | md5crypt_magic << 16;
1433 block1[0] = md5crypt_magic >> 16;
1437 block0[3] = block0[3] | md5crypt_magic << 24;
1438 block1[0] = md5crypt_magic >> 8;
1443 static void append_1st (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3[4], const u32 block_len, const u32x append)
1452 block0[0] = block0[0] | append << 8;
1456 block0[0] = block0[0] | append << 16;
1460 block0[0] = block0[0] | append << 24;
1468 block0[1] = block0[1] | append << 8;
1472 block0[1] = block0[1] | append << 16;
1476 block0[1] = block0[1] | append << 24;
1484 block0[2] = block0[2] | append << 8;
1488 block0[2] = block0[2] | append << 16;
1492 block0[2] = block0[2] | append << 24;
1500 block0[3] = block0[3] | append << 8;
1504 block0[3] = block0[3] | append << 16;
1508 block0[3] = block0[3] | append << 24;
1516 block1[0] = block1[0] | append << 8;
1520 block1[0] = block1[0] | append << 16;
1524 block1[0] = block1[0] | append << 24;
1532 block1[1] = block1[1] | append << 8;
1536 block1[1] = block1[1] | append << 16;
1540 block1[1] = block1[1] | append << 24;
1548 block1[2] = block1[2] | append << 8;
1552 block1[2] = block1[2] | append << 16;
1556 block1[2] = block1[2] | append << 24;
1564 block1[3] = block1[3] | append << 8;
1568 block1[3] = block1[3] | append << 16;
1572 block1[3] = block1[3] | append << 24;
1580 block2[0] = block2[0] | append << 8;
1584 block2[0] = block2[0] | append << 16;
1588 block2[0] = block2[0] | append << 24;
1596 block2[1] = block2[1] | append << 8;
1600 block2[1] = block2[1] | append << 16;
1604 block2[1] = block2[1] | append << 24;
1612 block2[2] = block2[2] | append << 8;
1616 block2[2] = block2[2] | append << 16;
1620 block2[2] = block2[2] | append << 24;
1628 block2[3] = block2[3] | append << 8;
1632 block2[3] = block2[3] | append << 16;
1636 block2[3] = block2[3] | append << 24;
1644 block3[0] = block3[0] | append << 8;
1648 block3[0] = block3[0] | append << 16;
1652 block3[0] = block3[0] | append << 24;
1660 block3[1] = block3[1] | append << 8;
1664 block3[1] = block3[1] | append << 16;
1668 block3[1] = block3[1] | append << 24;
1677 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00500_init (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global md5crypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1683 const u32 gid = get_global_id (0);
1685 if (gid >= gid_max) return;
1689 w0[0] = pws[gid].i[0];
1690 w0[1] = pws[gid].i[1];
1691 w0[2] = pws[gid].i[2];
1692 w0[3] = pws[gid].i[3];
1694 const u32 pw_len = pws[gid].pw_len;
1702 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
1703 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
1705 const u32 salt_len = salt_bufs[salt_pos].salt_len;
1711 //memcat16 (block0, block1, block2, block3, block_len, w0);
1712 //block_len += pw_len;
1714 u32 block_len = pw_len;
1744 memcat8 (block0, block1, block2, block3, block_len, salt_buf);
1746 block_len += salt_len;
1748 memcat16 (block0, block1, block2, block3, block_len, w0);
1750 block_len += pw_len;
1752 append_0x80_4 (block0, block1, block2, block3, block_len);
1754 block3[2] = block_len * 8;
1763 md5_transform (block0, block1, block2, block3, digest);
1765 /* The password first, since that is what is most unknown */
1766 /* Then our magic string */
1767 /* Then the raw salt */
1768 /* Then just as many characters of the MD5(pw,salt,pw) */
1770 //memcat16 (block0, block1, block2, block3, block_len, w);
1771 //block_len += pw_len;
1795 append_sign (block0, block1, block_len);
1799 memcat8 (block0, block1, block2, block3, block_len, salt_buf);
1801 block_len += salt_len;
1803 truncate_block (digest, pw_len);
1805 memcat16 (block0, block1, block2, block3, block_len, digest);
1807 block_len += pw_len;
1809 /* Then something really weird... */
1811 u32x append = block0[0] & 0xFF;
1813 for (u32 j = pw_len; j; j >>= 1)
1817 append_1st (block0, block1, block2, block3, block_len, append);
1823 append_0x80_4 (block0, block1, block2, block3, block_len);
1825 block3[2] = block_len * 8;
1832 md5_transform (block0, block1, block2, block3, digest);
1834 tmps[gid].digest_buf[0] = digest[0];
1835 tmps[gid].digest_buf[1] = digest[1];
1836 tmps[gid].digest_buf[2] = digest[2];
1837 tmps[gid].digest_buf[3] = digest[3];
1840 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00500_loop (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global md5crypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1846 const u32 gid = get_global_id (0);
1848 if (gid >= gid_max) return;
1852 w0[0] = pws[gid].i[0];
1853 w0[1] = pws[gid].i[1];
1854 w0[2] = pws[gid].i[2];
1855 w0[3] = pws[gid].i[3];
1857 const u32 pw_len = pws[gid].pw_len;
1866 append_0x80_1 (w0_x80, pw_len);
1874 salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
1875 salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
1877 const u32 salt_len = salt_bufs[salt_pos].salt_len;
1885 digest[0] = tmps[gid].digest_buf[0];
1886 digest[1] = tmps[gid].digest_buf[1];
1887 digest[2] = tmps[gid].digest_buf[2];
1888 digest[3] = tmps[gid].digest_buf[3];
1894 /* and now, just to make sure things don't run too fast */
1926 for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++)
1939 const u32 j1 = (j & 1) ? 1 : 0;
1940 const u32 j3 = (j % 3) ? 1 : 0;
1941 const u32 j7 = (j % 7) ? 1 : 0;
1954 memcat8 (block0, block1, block2, block3, block_len, salt_buf);
1956 block_len += salt_len;
1961 memcat16 (block0, block1, block2, block3, block_len, w0);
1963 block_len += pw_len;
1966 memcat16_x80 (block0, block1, block2, block3, block_len, digest);
1972 block0[0] = digest[0];
1973 block0[1] = digest[1];
1974 block0[2] = digest[2];
1975 block0[3] = digest[3];
1981 block1[0] = salt_buf[0];
1982 block1[1] = salt_buf[1];
1984 block_len += salt_len;
1986 memcat16 (block0, block1, block2, block3, block_len, w0);
1988 block_len += pw_len;
1992 block1[0] = salt_buf[0];
1993 block1[1] = salt_buf[1];
1995 block_len += salt_len;
2004 block_len += pw_len;
2007 memcat16 (block0, block1, block2, block3, block_len, w0_x80);
2009 block_len += pw_len;
2012 block3[2] = block_len * 8;
2019 md5_transform (block0, block1, block2, block3, digest);
2022 tmps[gid].digest_buf[0] = digest[0];
2023 tmps[gid].digest_buf[1] = digest[1];
2024 tmps[gid].digest_buf[2] = digest[2];
2025 tmps[gid].digest_buf[3] = digest[3];
2028 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00500_comp (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global md5crypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2034 const u32 gid = get_global_id (0);
2036 if (gid >= gid_max) return;
2038 const u32 lid = get_local_id (0);
2044 const u32x r0 = tmps[gid].digest_buf[DGST_R0];
2045 const u32x r1 = tmps[gid].digest_buf[DGST_R1];
2046 const u32x r2 = tmps[gid].digest_buf[DGST_R2];
2047 const u32x r3 = tmps[gid].digest_buf[DGST_R3];
2051 #include VECT_COMPARE_M