2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
24 #include "include/kernel_functions.c"
26 #include "common_nv.c"
27 #include "include/rp_gpu.h"
31 #define VECT_COMPARE_S "check_single_vect1_comp4.c"
32 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
36 #define VECT_COMPARE_S "check_single_vect2_comp4.c"
37 #define VECT_COMPARE_M "check_multi_vect2_comp4.c"
41 #define BOX(i,n,S) u32x ((S)[(n)][(i)])
45 #define BOX(i,n,S) u32x ((S)[(n)][(i).x], (S)[(n)][(i).y])
48 #define round(k1,k2,tbl) \
52 l ^= BOX ((t >> 0) & 0xff, 0, tbl) ^ \
53 BOX ((t >> 8) & 0xff, 1, tbl) ^ \
54 BOX ((t >> 16) & 0xff, 2, tbl) ^ \
55 BOX ((t >> 24) & 0xff, 3, tbl); \
57 r ^= BOX ((t >> 0) & 0xff, 0, tbl) ^ \
58 BOX ((t >> 8) & 0xff, 1, tbl) ^ \
59 BOX ((t >> 16) & 0xff, 2, tbl) ^ \
60 BOX ((t >> 24) & 0xff, 3, tbl); \
63 #define R(k,h,s,i,t) \
69 round (k[0], k[1], t); \
70 round (k[2], k[3], t); \
71 round (k[4], k[5], t); \
72 round (k[6], k[7], t); \
73 round (k[0], k[1], t); \
74 round (k[2], k[3], t); \
75 round (k[4], k[5], t); \
76 round (k[6], k[7], t); \
77 round (k[0], k[1], t); \
78 round (k[2], k[3], t); \
79 round (k[4], k[5], t); \
80 round (k[6], k[7], t); \
81 round (k[7], k[6], t); \
82 round (k[5], k[4], t); \
83 round (k[3], k[2], t); \
84 round (k[1], k[0], t); \
100 k[0] = ((w[0] & 0x000000ff) << 0) \
101 | ((w[2] & 0x000000ff) << 8) \
102 | ((w[4] & 0x000000ff) << 16) \
103 | ((w[6] & 0x000000ff) << 24); \
104 k[1] = ((w[0] & 0x0000ff00) >> 8) \
105 | ((w[2] & 0x0000ff00) >> 0) \
106 | ((w[4] & 0x0000ff00) << 8) \
107 | ((w[6] & 0x0000ff00) << 16); \
108 k[2] = ((w[0] & 0x00ff0000) >> 16) \
109 | ((w[2] & 0x00ff0000) >> 8) \
110 | ((w[4] & 0x00ff0000) << 0) \
111 | ((w[6] & 0x00ff0000) << 8); \
112 k[3] = ((w[0] & 0xff000000) >> 24) \
113 | ((w[2] & 0xff000000) >> 16) \
114 | ((w[4] & 0xff000000) >> 8) \
115 | ((w[6] & 0xff000000) >> 0); \
116 k[4] = ((w[1] & 0x000000ff) << 0) \
117 | ((w[3] & 0x000000ff) << 8) \
118 | ((w[5] & 0x000000ff) << 16) \
119 | ((w[7] & 0x000000ff) << 24); \
120 k[5] = ((w[1] & 0x0000ff00) >> 8) \
121 | ((w[3] & 0x0000ff00) >> 0) \
122 | ((w[5] & 0x0000ff00) << 8) \
123 | ((w[7] & 0x0000ff00) << 16); \
124 k[6] = ((w[1] & 0x00ff0000) >> 16) \
125 | ((w[3] & 0x00ff0000) >> 8) \
126 | ((w[5] & 0x00ff0000) << 0) \
127 | ((w[7] & 0x00ff0000) << 8); \
128 k[7] = ((w[1] & 0xff000000) >> 24) \
129 | ((w[3] & 0xff000000) >> 16) \
130 | ((w[5] & 0xff000000) >> 8) \
131 | ((w[7] & 0xff000000) >> 0);
168 x[0] ^= 0xff00ff00; \
169 x[1] ^= 0xff00ff00; \
170 x[2] ^= 0x00ff00ff; \
171 x[3] ^= 0x00ff00ff; \
172 x[4] ^= 0x00ffff00; \
173 x[5] ^= 0xff0000ff; \
174 x[6] ^= 0x000000ff; \
177 #define SHIFT12(u,m,s) \
178 u[0] = m[0] ^ s[6]; \
179 u[1] = m[1] ^ s[7]; \
180 u[2] = m[2] ^ (s[0] << 16) \
182 ^ (s[0] & 0x0000ffff) \
183 ^ (s[1] & 0x0000ffff) \
188 ^ (s[7] & 0xffff0000) \
190 u[3] = m[3] ^ (s[0] & 0x0000ffff) \
192 ^ (s[1] & 0x0000ffff) \
201 ^ (s[7] & 0x0000ffff) \
204 u[4] = m[4] ^ (s[0] & 0xffff0000) \
207 ^ (s[1] & 0xffff0000) \
216 ^ (s[7] & 0x0000ffff) \
219 u[5] = m[5] ^ (s[0] << 16) \
221 ^ (s[0] & 0xffff0000) \
222 ^ (s[1] & 0x0000ffff) \
232 ^ (s[7] & 0xffff0000) \
248 u[7] = m[7] ^ (s[0] & 0xffff0000) \
250 ^ (s[1] & 0x0000ffff) \
259 ^ (s[7] & 0x0000ffff) \
263 #define SHIFT16(h,v,u) \
264 v[0] = h[0] ^ (u[1] << 16) \
266 v[1] = h[1] ^ (u[2] << 16) \
268 v[2] = h[2] ^ (u[3] << 16) \
270 v[3] = h[3] ^ (u[4] << 16) \
272 v[4] = h[4] ^ (u[5] << 16) \
274 v[5] = h[5] ^ (u[6] << 16) \
276 v[6] = h[6] ^ (u[7] << 16) \
278 v[7] = h[7] ^ (u[0] & 0xffff0000) \
281 ^ (u[1] & 0xffff0000) \
284 ^ (u[7] & 0xffff0000);
286 #define SHIFT61(h,v) \
287 h[0] = (v[0] & 0xffff0000) \
291 ^ (v[1] & 0xffff0000) \
300 ^ (v[7] & 0x0000ffff); \
301 h[1] = (v[0] << 16) \
303 ^ (v[0] & 0xffff0000) \
304 ^ (v[1] & 0x0000ffff) \
312 ^ (v[7] & 0xffff0000) \
314 h[2] = (v[0] & 0x0000ffff) \
318 ^ (v[1] & 0xffff0000) \
326 ^ (v[7] & 0x0000ffff) \
329 h[3] = (v[0] << 16) \
331 ^ (v[0] & 0xffff0000) \
332 ^ (v[1] & 0xffff0000) \
342 ^ (v[7] & 0x0000ffff) \
344 h[4] = (v[0] >> 16) \
358 h[5] = (v[0] << 16) \
359 ^ (v[0] & 0xffff0000) \
362 ^ (v[1] & 0xffff0000) \
376 ^ (v[7] & 0xffff0000); \
408 #define PASS0(h,s,u,v,t) \
419 #define PASS2(h,s,u,v,t) \
431 #define PASS4(h,s,u,v,t) \
442 #define PASS6(h,s,u,v,t) \
451 __device__ __constant__ u32 c_tables[4][256] =
454 0x00072000, 0x00075000, 0x00074800, 0x00071000,
455 0x00076800, 0x00074000, 0x00070000, 0x00077000,
456 0x00073000, 0x00075800, 0x00070800, 0x00076000,
457 0x00073800, 0x00077800, 0x00072800, 0x00071800,
458 0x0005a000, 0x0005d000, 0x0005c800, 0x00059000,
459 0x0005e800, 0x0005c000, 0x00058000, 0x0005f000,
460 0x0005b000, 0x0005d800, 0x00058800, 0x0005e000,
461 0x0005b800, 0x0005f800, 0x0005a800, 0x00059800,
462 0x00022000, 0x00025000, 0x00024800, 0x00021000,
463 0x00026800, 0x00024000, 0x00020000, 0x00027000,
464 0x00023000, 0x00025800, 0x00020800, 0x00026000,
465 0x00023800, 0x00027800, 0x00022800, 0x00021800,
466 0x00062000, 0x00065000, 0x00064800, 0x00061000,
467 0x00066800, 0x00064000, 0x00060000, 0x00067000,
468 0x00063000, 0x00065800, 0x00060800, 0x00066000,
469 0x00063800, 0x00067800, 0x00062800, 0x00061800,
470 0x00032000, 0x00035000, 0x00034800, 0x00031000,
471 0x00036800, 0x00034000, 0x00030000, 0x00037000,
472 0x00033000, 0x00035800, 0x00030800, 0x00036000,
473 0x00033800, 0x00037800, 0x00032800, 0x00031800,
474 0x0006a000, 0x0006d000, 0x0006c800, 0x00069000,
475 0x0006e800, 0x0006c000, 0x00068000, 0x0006f000,
476 0x0006b000, 0x0006d800, 0x00068800, 0x0006e000,
477 0x0006b800, 0x0006f800, 0x0006a800, 0x00069800,
478 0x0007a000, 0x0007d000, 0x0007c800, 0x00079000,
479 0x0007e800, 0x0007c000, 0x00078000, 0x0007f000,
480 0x0007b000, 0x0007d800, 0x00078800, 0x0007e000,
481 0x0007b800, 0x0007f800, 0x0007a800, 0x00079800,
482 0x00052000, 0x00055000, 0x00054800, 0x00051000,
483 0x00056800, 0x00054000, 0x00050000, 0x00057000,
484 0x00053000, 0x00055800, 0x00050800, 0x00056000,
485 0x00053800, 0x00057800, 0x00052800, 0x00051800,
486 0x00012000, 0x00015000, 0x00014800, 0x00011000,
487 0x00016800, 0x00014000, 0x00010000, 0x00017000,
488 0x00013000, 0x00015800, 0x00010800, 0x00016000,
489 0x00013800, 0x00017800, 0x00012800, 0x00011800,
490 0x0001a000, 0x0001d000, 0x0001c800, 0x00019000,
491 0x0001e800, 0x0001c000, 0x00018000, 0x0001f000,
492 0x0001b000, 0x0001d800, 0x00018800, 0x0001e000,
493 0x0001b800, 0x0001f800, 0x0001a800, 0x00019800,
494 0x00042000, 0x00045000, 0x00044800, 0x00041000,
495 0x00046800, 0x00044000, 0x00040000, 0x00047000,
496 0x00043000, 0x00045800, 0x00040800, 0x00046000,
497 0x00043800, 0x00047800, 0x00042800, 0x00041800,
498 0x0000a000, 0x0000d000, 0x0000c800, 0x00009000,
499 0x0000e800, 0x0000c000, 0x00008000, 0x0000f000,
500 0x0000b000, 0x0000d800, 0x00008800, 0x0000e000,
501 0x0000b800, 0x0000f800, 0x0000a800, 0x00009800,
502 0x00002000, 0x00005000, 0x00004800, 0x00001000,
503 0x00006800, 0x00004000, 0x00000000, 0x00007000,
504 0x00003000, 0x00005800, 0x00000800, 0x00006000,
505 0x00003800, 0x00007800, 0x00002800, 0x00001800,
506 0x0003a000, 0x0003d000, 0x0003c800, 0x00039000,
507 0x0003e800, 0x0003c000, 0x00038000, 0x0003f000,
508 0x0003b000, 0x0003d800, 0x00038800, 0x0003e000,
509 0x0003b800, 0x0003f800, 0x0003a800, 0x00039800,
510 0x0002a000, 0x0002d000, 0x0002c800, 0x00029000,
511 0x0002e800, 0x0002c000, 0x00028000, 0x0002f000,
512 0x0002b000, 0x0002d800, 0x00028800, 0x0002e000,
513 0x0002b800, 0x0002f800, 0x0002a800, 0x00029800,
514 0x0004a000, 0x0004d000, 0x0004c800, 0x00049000,
515 0x0004e800, 0x0004c000, 0x00048000, 0x0004f000,
516 0x0004b000, 0x0004d800, 0x00048800, 0x0004e000,
517 0x0004b800, 0x0004f800, 0x0004a800, 0x00049800,
520 0x03a80000, 0x03c00000, 0x03880000, 0x03e80000,
521 0x03d00000, 0x03980000, 0x03a00000, 0x03900000,
522 0x03f00000, 0x03f80000, 0x03e00000, 0x03b80000,
523 0x03b00000, 0x03800000, 0x03c80000, 0x03d80000,
524 0x06a80000, 0x06c00000, 0x06880000, 0x06e80000,
525 0x06d00000, 0x06980000, 0x06a00000, 0x06900000,
526 0x06f00000, 0x06f80000, 0x06e00000, 0x06b80000,
527 0x06b00000, 0x06800000, 0x06c80000, 0x06d80000,
528 0x05280000, 0x05400000, 0x05080000, 0x05680000,
529 0x05500000, 0x05180000, 0x05200000, 0x05100000,
530 0x05700000, 0x05780000, 0x05600000, 0x05380000,
531 0x05300000, 0x05000000, 0x05480000, 0x05580000,
532 0x00a80000, 0x00c00000, 0x00880000, 0x00e80000,
533 0x00d00000, 0x00980000, 0x00a00000, 0x00900000,
534 0x00f00000, 0x00f80000, 0x00e00000, 0x00b80000,
535 0x00b00000, 0x00800000, 0x00c80000, 0x00d80000,
536 0x00280000, 0x00400000, 0x00080000, 0x00680000,
537 0x00500000, 0x00180000, 0x00200000, 0x00100000,
538 0x00700000, 0x00780000, 0x00600000, 0x00380000,
539 0x00300000, 0x00000000, 0x00480000, 0x00580000,
540 0x04280000, 0x04400000, 0x04080000, 0x04680000,
541 0x04500000, 0x04180000, 0x04200000, 0x04100000,
542 0x04700000, 0x04780000, 0x04600000, 0x04380000,
543 0x04300000, 0x04000000, 0x04480000, 0x04580000,
544 0x04a80000, 0x04c00000, 0x04880000, 0x04e80000,
545 0x04d00000, 0x04980000, 0x04a00000, 0x04900000,
546 0x04f00000, 0x04f80000, 0x04e00000, 0x04b80000,
547 0x04b00000, 0x04800000, 0x04c80000, 0x04d80000,
548 0x07a80000, 0x07c00000, 0x07880000, 0x07e80000,
549 0x07d00000, 0x07980000, 0x07a00000, 0x07900000,
550 0x07f00000, 0x07f80000, 0x07e00000, 0x07b80000,
551 0x07b00000, 0x07800000, 0x07c80000, 0x07d80000,
552 0x07280000, 0x07400000, 0x07080000, 0x07680000,
553 0x07500000, 0x07180000, 0x07200000, 0x07100000,
554 0x07700000, 0x07780000, 0x07600000, 0x07380000,
555 0x07300000, 0x07000000, 0x07480000, 0x07580000,
556 0x02280000, 0x02400000, 0x02080000, 0x02680000,
557 0x02500000, 0x02180000, 0x02200000, 0x02100000,
558 0x02700000, 0x02780000, 0x02600000, 0x02380000,
559 0x02300000, 0x02000000, 0x02480000, 0x02580000,
560 0x03280000, 0x03400000, 0x03080000, 0x03680000,
561 0x03500000, 0x03180000, 0x03200000, 0x03100000,
562 0x03700000, 0x03780000, 0x03600000, 0x03380000,
563 0x03300000, 0x03000000, 0x03480000, 0x03580000,
564 0x06280000, 0x06400000, 0x06080000, 0x06680000,
565 0x06500000, 0x06180000, 0x06200000, 0x06100000,
566 0x06700000, 0x06780000, 0x06600000, 0x06380000,
567 0x06300000, 0x06000000, 0x06480000, 0x06580000,
568 0x05a80000, 0x05c00000, 0x05880000, 0x05e80000,
569 0x05d00000, 0x05980000, 0x05a00000, 0x05900000,
570 0x05f00000, 0x05f80000, 0x05e00000, 0x05b80000,
571 0x05b00000, 0x05800000, 0x05c80000, 0x05d80000,
572 0x01280000, 0x01400000, 0x01080000, 0x01680000,
573 0x01500000, 0x01180000, 0x01200000, 0x01100000,
574 0x01700000, 0x01780000, 0x01600000, 0x01380000,
575 0x01300000, 0x01000000, 0x01480000, 0x01580000,
576 0x02a80000, 0x02c00000, 0x02880000, 0x02e80000,
577 0x02d00000, 0x02980000, 0x02a00000, 0x02900000,
578 0x02f00000, 0x02f80000, 0x02e00000, 0x02b80000,
579 0x02b00000, 0x02800000, 0x02c80000, 0x02d80000,
580 0x01a80000, 0x01c00000, 0x01880000, 0x01e80000,
581 0x01d00000, 0x01980000, 0x01a00000, 0x01900000,
582 0x01f00000, 0x01f80000, 0x01e00000, 0x01b80000,
583 0x01b00000, 0x01800000, 0x01c80000, 0x01d80000,
586 0x30000002, 0x60000002, 0x38000002, 0x08000002,
587 0x28000002, 0x78000002, 0x68000002, 0x40000002,
588 0x20000002, 0x50000002, 0x48000002, 0x70000002,
589 0x00000002, 0x18000002, 0x58000002, 0x10000002,
590 0xb0000005, 0xe0000005, 0xb8000005, 0x88000005,
591 0xa8000005, 0xf8000005, 0xe8000005, 0xc0000005,
592 0xa0000005, 0xd0000005, 0xc8000005, 0xf0000005,
593 0x80000005, 0x98000005, 0xd8000005, 0x90000005,
594 0x30000005, 0x60000005, 0x38000005, 0x08000005,
595 0x28000005, 0x78000005, 0x68000005, 0x40000005,
596 0x20000005, 0x50000005, 0x48000005, 0x70000005,
597 0x00000005, 0x18000005, 0x58000005, 0x10000005,
598 0x30000000, 0x60000000, 0x38000000, 0x08000000,
599 0x28000000, 0x78000000, 0x68000000, 0x40000000,
600 0x20000000, 0x50000000, 0x48000000, 0x70000000,
601 0x00000000, 0x18000000, 0x58000000, 0x10000000,
602 0xb0000003, 0xe0000003, 0xb8000003, 0x88000003,
603 0xa8000003, 0xf8000003, 0xe8000003, 0xc0000003,
604 0xa0000003, 0xd0000003, 0xc8000003, 0xf0000003,
605 0x80000003, 0x98000003, 0xd8000003, 0x90000003,
606 0x30000001, 0x60000001, 0x38000001, 0x08000001,
607 0x28000001, 0x78000001, 0x68000001, 0x40000001,
608 0x20000001, 0x50000001, 0x48000001, 0x70000001,
609 0x00000001, 0x18000001, 0x58000001, 0x10000001,
610 0xb0000000, 0xe0000000, 0xb8000000, 0x88000000,
611 0xa8000000, 0xf8000000, 0xe8000000, 0xc0000000,
612 0xa0000000, 0xd0000000, 0xc8000000, 0xf0000000,
613 0x80000000, 0x98000000, 0xd8000000, 0x90000000,
614 0xb0000006, 0xe0000006, 0xb8000006, 0x88000006,
615 0xa8000006, 0xf8000006, 0xe8000006, 0xc0000006,
616 0xa0000006, 0xd0000006, 0xc8000006, 0xf0000006,
617 0x80000006, 0x98000006, 0xd8000006, 0x90000006,
618 0xb0000001, 0xe0000001, 0xb8000001, 0x88000001,
619 0xa8000001, 0xf8000001, 0xe8000001, 0xc0000001,
620 0xa0000001, 0xd0000001, 0xc8000001, 0xf0000001,
621 0x80000001, 0x98000001, 0xd8000001, 0x90000001,
622 0x30000003, 0x60000003, 0x38000003, 0x08000003,
623 0x28000003, 0x78000003, 0x68000003, 0x40000003,
624 0x20000003, 0x50000003, 0x48000003, 0x70000003,
625 0x00000003, 0x18000003, 0x58000003, 0x10000003,
626 0x30000004, 0x60000004, 0x38000004, 0x08000004,
627 0x28000004, 0x78000004, 0x68000004, 0x40000004,
628 0x20000004, 0x50000004, 0x48000004, 0x70000004,
629 0x00000004, 0x18000004, 0x58000004, 0x10000004,
630 0xb0000002, 0xe0000002, 0xb8000002, 0x88000002,
631 0xa8000002, 0xf8000002, 0xe8000002, 0xc0000002,
632 0xa0000002, 0xd0000002, 0xc8000002, 0xf0000002,
633 0x80000002, 0x98000002, 0xd8000002, 0x90000002,
634 0xb0000004, 0xe0000004, 0xb8000004, 0x88000004,
635 0xa8000004, 0xf8000004, 0xe8000004, 0xc0000004,
636 0xa0000004, 0xd0000004, 0xc8000004, 0xf0000004,
637 0x80000004, 0x98000004, 0xd8000004, 0x90000004,
638 0x30000006, 0x60000006, 0x38000006, 0x08000006,
639 0x28000006, 0x78000006, 0x68000006, 0x40000006,
640 0x20000006, 0x50000006, 0x48000006, 0x70000006,
641 0x00000006, 0x18000006, 0x58000006, 0x10000006,
642 0xb0000007, 0xe0000007, 0xb8000007, 0x88000007,
643 0xa8000007, 0xf8000007, 0xe8000007, 0xc0000007,
644 0xa0000007, 0xd0000007, 0xc8000007, 0xf0000007,
645 0x80000007, 0x98000007, 0xd8000007, 0x90000007,
646 0x30000007, 0x60000007, 0x38000007, 0x08000007,
647 0x28000007, 0x78000007, 0x68000007, 0x40000007,
648 0x20000007, 0x50000007, 0x48000007, 0x70000007,
649 0x00000007, 0x18000007, 0x58000007, 0x10000007,
652 0x000000e8, 0x000000d8, 0x000000a0, 0x00000088,
653 0x00000098, 0x000000f8, 0x000000a8, 0x000000c8,
654 0x00000080, 0x000000d0, 0x000000f0, 0x000000b8,
655 0x000000b0, 0x000000c0, 0x00000090, 0x000000e0,
656 0x000007e8, 0x000007d8, 0x000007a0, 0x00000788,
657 0x00000798, 0x000007f8, 0x000007a8, 0x000007c8,
658 0x00000780, 0x000007d0, 0x000007f0, 0x000007b8,
659 0x000007b0, 0x000007c0, 0x00000790, 0x000007e0,
660 0x000006e8, 0x000006d8, 0x000006a0, 0x00000688,
661 0x00000698, 0x000006f8, 0x000006a8, 0x000006c8,
662 0x00000680, 0x000006d0, 0x000006f0, 0x000006b8,
663 0x000006b0, 0x000006c0, 0x00000690, 0x000006e0,
664 0x00000068, 0x00000058, 0x00000020, 0x00000008,
665 0x00000018, 0x00000078, 0x00000028, 0x00000048,
666 0x00000000, 0x00000050, 0x00000070, 0x00000038,
667 0x00000030, 0x00000040, 0x00000010, 0x00000060,
668 0x000002e8, 0x000002d8, 0x000002a0, 0x00000288,
669 0x00000298, 0x000002f8, 0x000002a8, 0x000002c8,
670 0x00000280, 0x000002d0, 0x000002f0, 0x000002b8,
671 0x000002b0, 0x000002c0, 0x00000290, 0x000002e0,
672 0x000003e8, 0x000003d8, 0x000003a0, 0x00000388,
673 0x00000398, 0x000003f8, 0x000003a8, 0x000003c8,
674 0x00000380, 0x000003d0, 0x000003f0, 0x000003b8,
675 0x000003b0, 0x000003c0, 0x00000390, 0x000003e0,
676 0x00000568, 0x00000558, 0x00000520, 0x00000508,
677 0x00000518, 0x00000578, 0x00000528, 0x00000548,
678 0x00000500, 0x00000550, 0x00000570, 0x00000538,
679 0x00000530, 0x00000540, 0x00000510, 0x00000560,
680 0x00000268, 0x00000258, 0x00000220, 0x00000208,
681 0x00000218, 0x00000278, 0x00000228, 0x00000248,
682 0x00000200, 0x00000250, 0x00000270, 0x00000238,
683 0x00000230, 0x00000240, 0x00000210, 0x00000260,
684 0x000004e8, 0x000004d8, 0x000004a0, 0x00000488,
685 0x00000498, 0x000004f8, 0x000004a8, 0x000004c8,
686 0x00000480, 0x000004d0, 0x000004f0, 0x000004b8,
687 0x000004b0, 0x000004c0, 0x00000490, 0x000004e0,
688 0x00000168, 0x00000158, 0x00000120, 0x00000108,
689 0x00000118, 0x00000178, 0x00000128, 0x00000148,
690 0x00000100, 0x00000150, 0x00000170, 0x00000138,
691 0x00000130, 0x00000140, 0x00000110, 0x00000160,
692 0x000001e8, 0x000001d8, 0x000001a0, 0x00000188,
693 0x00000198, 0x000001f8, 0x000001a8, 0x000001c8,
694 0x00000180, 0x000001d0, 0x000001f0, 0x000001b8,
695 0x000001b0, 0x000001c0, 0x00000190, 0x000001e0,
696 0x00000768, 0x00000758, 0x00000720, 0x00000708,
697 0x00000718, 0x00000778, 0x00000728, 0x00000748,
698 0x00000700, 0x00000750, 0x00000770, 0x00000738,
699 0x00000730, 0x00000740, 0x00000710, 0x00000760,
700 0x00000368, 0x00000358, 0x00000320, 0x00000308,
701 0x00000318, 0x00000378, 0x00000328, 0x00000348,
702 0x00000300, 0x00000350, 0x00000370, 0x00000338,
703 0x00000330, 0x00000340, 0x00000310, 0x00000360,
704 0x000005e8, 0x000005d8, 0x000005a0, 0x00000588,
705 0x00000598, 0x000005f8, 0x000005a8, 0x000005c8,
706 0x00000580, 0x000005d0, 0x000005f0, 0x000005b8,
707 0x000005b0, 0x000005c0, 0x00000590, 0x000005e0,
708 0x00000468, 0x00000458, 0x00000420, 0x00000408,
709 0x00000418, 0x00000478, 0x00000428, 0x00000448,
710 0x00000400, 0x00000450, 0x00000470, 0x00000438,
711 0x00000430, 0x00000440, 0x00000410, 0x00000460,
712 0x00000668, 0x00000658, 0x00000620, 0x00000608,
713 0x00000618, 0x00000678, 0x00000628, 0x00000648,
714 0x00000600, 0x00000650, 0x00000670, 0x00000638,
715 0x00000630, 0x00000640, 0x00000610, 0x00000660,
719 __device__ __constant__ gpu_rule_t c_rules[1024];
721 extern "C" __global__ void __launch_bounds__ (256, 1) m06900_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
727 const u32 lid = threadIdx.x;
733 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
737 pw_buf0[0] = pws[gid].i[ 0];
738 pw_buf0[1] = pws[gid].i[ 1];
739 pw_buf0[2] = pws[gid].i[ 2];
740 pw_buf0[3] = pws[gid].i[ 3];
744 pw_buf1[0] = pws[gid].i[ 4];
745 pw_buf1[1] = pws[gid].i[ 5];
746 pw_buf1[2] = pws[gid].i[ 6];
747 pw_buf1[3] = pws[gid].i[ 7];
749 const u32 pw_len = pws[gid].pw_len;
755 __shared__ u32 s_tables[4][256];
757 s_tables[0][lid] = c_tables[0][lid];
758 s_tables[1][lid] = c_tables[1][lid];
759 s_tables[2][lid] = c_tables[2][lid];
760 s_tables[3][lid] = c_tables[3][lid];
764 if (gid >= gid_max) return;
770 for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++)
800 const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len);
802 u32 w14 = out_len * 8;
839 state_m[0] = state[0];
840 state_m[1] = state[1];
841 state_m[2] = state[2];
842 state_m[3] = state[3];
843 state_m[4] = state[4];
844 state_m[5] = state[5];
845 state_m[6] = state[6];
846 state_m[7] = state[7];
859 PASS0 (state, tmp, state_m, data_m, s_tables);
860 PASS2 (state, tmp, state_m, data_m, s_tables);
861 PASS4 (state, tmp, state_m, data_m, s_tables);
862 PASS6 (state, tmp, state_m, data_m, s_tables);
864 SHIFT12 (state_m, data, tmp);
865 SHIFT16 (state, data_m, state_m);
866 SHIFT61 (state, data_m);
879 state_m[0] = state[0];
880 state_m[1] = state[1];
881 state_m[2] = state[2];
882 state_m[3] = state[3];
883 state_m[4] = state[4];
884 state_m[5] = state[5];
885 state_m[6] = state[6];
886 state_m[7] = state[7];
897 PASS0 (state, tmp, state_m, data_m, s_tables);
898 PASS2 (state, tmp, state_m, data_m, s_tables);
899 PASS4 (state, tmp, state_m, data_m, s_tables);
900 PASS6 (state, tmp, state_m, data_m, s_tables);
902 SHIFT12 (state_m, data, tmp);
903 SHIFT16 (state, data_m, state_m);
904 SHIFT61 (state, data_m);
917 state_m[0] = state[0];
918 state_m[1] = state[1];
919 state_m[2] = state[2];
920 state_m[3] = state[3];
921 state_m[4] = state[4];
922 state_m[5] = state[5];
923 state_m[6] = state[6];
924 state_m[7] = state[7];
935 PASS0 (state, tmp, state_m, data_m, s_tables);
936 PASS2 (state, tmp, state_m, data_m, s_tables);
937 PASS4 (state, tmp, state_m, data_m, s_tables);
938 PASS6 (state, tmp, state_m, data_m, s_tables);
940 SHIFT12 (state_m, data, tmp);
941 SHIFT16 (state, data_m, state_m);
942 SHIFT61 (state, data_m);
946 const u32x r0 = state[0];
947 const u32x r1 = state[1];
948 const u32x r2 = state[2];
949 const u32x r3 = state[3];
951 #include VECT_COMPARE_M
955 extern "C" __global__ void __launch_bounds__ (256, 1) m06900_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
959 extern "C" __global__ void __launch_bounds__ (256, 1) m06900_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
963 extern "C" __global__ void __launch_bounds__ (256, 1) m06900_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
969 const u32 lid = threadIdx.x;
975 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
979 pw_buf0[0] = pws[gid].i[ 0];
980 pw_buf0[1] = pws[gid].i[ 1];
981 pw_buf0[2] = pws[gid].i[ 2];
982 pw_buf0[3] = pws[gid].i[ 3];
986 pw_buf1[0] = pws[gid].i[ 4];
987 pw_buf1[1] = pws[gid].i[ 5];
988 pw_buf1[2] = pws[gid].i[ 6];
989 pw_buf1[3] = pws[gid].i[ 7];
991 const u32 pw_len = pws[gid].pw_len;
997 __shared__ u32 s_tables[4][256];
999 s_tables[0][lid] = c_tables[0][lid];
1000 s_tables[1][lid] = c_tables[1][lid];
1001 s_tables[2][lid] = c_tables[2][lid];
1002 s_tables[3][lid] = c_tables[3][lid];
1006 if (gid >= gid_max) return;
1012 const u32 search[4] =
1014 digests_buf[digests_offset].digest_buf[DGST_R0],
1015 digests_buf[digests_offset].digest_buf[DGST_R1],
1016 digests_buf[digests_offset].digest_buf[DGST_R2],
1017 digests_buf[digests_offset].digest_buf[DGST_R3]
1024 for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++)
1054 const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len);
1056 u32 w14 = out_len * 8;
1079 state[ 8] = data[0];
1080 state[ 9] = data[1];
1081 state[10] = data[2];
1082 state[11] = data[3];
1083 state[12] = data[4];
1084 state[13] = data[5];
1085 state[14] = data[6];
1086 state[15] = data[7];
1093 state_m[0] = state[0];
1094 state_m[1] = state[1];
1095 state_m[2] = state[2];
1096 state_m[3] = state[3];
1097 state_m[4] = state[4];
1098 state_m[5] = state[5];
1099 state_m[6] = state[6];
1100 state_m[7] = state[7];
1102 data_m[0] = data[0];
1103 data_m[1] = data[1];
1104 data_m[2] = data[2];
1105 data_m[3] = data[3];
1106 data_m[4] = data[4];
1107 data_m[5] = data[5];
1108 data_m[6] = data[6];
1109 data_m[7] = data[7];
1113 PASS0 (state, tmp, state_m, data_m, s_tables);
1114 PASS2 (state, tmp, state_m, data_m, s_tables);
1115 PASS4 (state, tmp, state_m, data_m, s_tables);
1116 PASS6 (state, tmp, state_m, data_m, s_tables);
1118 SHIFT12 (state_m, data, tmp);
1119 SHIFT16 (state, data_m, state_m);
1120 SHIFT61 (state, data_m);
1133 state_m[0] = state[0];
1134 state_m[1] = state[1];
1135 state_m[2] = state[2];
1136 state_m[3] = state[3];
1137 state_m[4] = state[4];
1138 state_m[5] = state[5];
1139 state_m[6] = state[6];
1140 state_m[7] = state[7];
1142 data_m[0] = data[0];
1143 data_m[1] = data[1];
1144 data_m[2] = data[2];
1145 data_m[3] = data[3];
1146 data_m[4] = data[4];
1147 data_m[5] = data[5];
1148 data_m[6] = data[6];
1149 data_m[7] = data[7];
1151 PASS0 (state, tmp, state_m, data_m, s_tables);
1152 PASS2 (state, tmp, state_m, data_m, s_tables);
1153 PASS4 (state, tmp, state_m, data_m, s_tables);
1154 PASS6 (state, tmp, state_m, data_m, s_tables);
1156 SHIFT12 (state_m, data, tmp);
1157 SHIFT16 (state, data_m, state_m);
1158 SHIFT61 (state, data_m);
1162 data[0] = state[ 8];
1163 data[1] = state[ 9];
1164 data[2] = state[10];
1165 data[3] = state[11];
1166 data[4] = state[12];
1167 data[5] = state[13];
1168 data[6] = state[14];
1169 data[7] = state[15];
1171 state_m[0] = state[0];
1172 state_m[1] = state[1];
1173 state_m[2] = state[2];
1174 state_m[3] = state[3];
1175 state_m[4] = state[4];
1176 state_m[5] = state[5];
1177 state_m[6] = state[6];
1178 state_m[7] = state[7];
1180 data_m[0] = data[0];
1181 data_m[1] = data[1];
1182 data_m[2] = data[2];
1183 data_m[3] = data[3];
1184 data_m[4] = data[4];
1185 data_m[5] = data[5];
1186 data_m[6] = data[6];
1187 data_m[7] = data[7];
1189 PASS0 (state, tmp, state_m, data_m, s_tables);
1190 PASS2 (state, tmp, state_m, data_m, s_tables);
1191 PASS4 (state, tmp, state_m, data_m, s_tables);
1192 PASS6 (state, tmp, state_m, data_m, s_tables);
1194 SHIFT12 (state_m, data, tmp);
1195 SHIFT16 (state, data_m, state_m);
1196 SHIFT61 (state, data_m);
1200 const u32x r0 = state[0];
1201 const u32x r1 = state[1];
1202 const u32x r2 = state[2];
1203 const u32x r3 = state[3];
1205 #include VECT_COMPARE_S
1209 extern "C" __global__ void __launch_bounds__ (256, 1) m06900_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1213 extern "C" __global__ void __launch_bounds__ (256, 1) m06900_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)