2 * Author......: Jens Steube <jens.steube@gmail.com>
4 * NOTE........: sboxes for maxwell were taken from DeepLearningJohnDoe, license below
5 * : sboxes for others were takes fron JtR, license below
10 #include "include/constants.h"
11 #include "include/kernel_vendor.h"
18 #include "include/kernel_functions.c"
19 #include "OpenCL/types_ocl.c"
20 #include "OpenCL/common.c"
22 #define COMPARE_S "OpenCL/check_single_comp4_bs.c"
23 #define COMPARE_M "OpenCL/check_multi_comp4_bs.c"
25 #define myselx(a,b,c) ((c) ? (b) : (a))
32 // Bitslice DES S-boxes with LOP3.LUT instructions
33 // For NVIDIA Maxwell architecture and CUDA 7.5 RC
34 // by DeepLearningJohnDoe, version 0.1.6, 2015/07/19
36 // Gate counts: 25 24 25 18 25 24 24 23
38 // Depth: 8 7 7 6 8 10 10 8
41 // Note that same S-box function with a lower gate count isn't necessarily faster.
43 // These Boolean expressions corresponding to DES S-boxes were
44 // discovered by <deeplearningjohndoe at gmail.com>
46 // This file itself is Copyright (c) 2015 by <deeplearningjohndoe at gmail.com>
47 // Redistribution and use in source and binary forms, with or without
48 // modification, are permitted.
50 // The underlying mathematical formulas are NOT copyrighted.
53 #define LUT(a,b,c,d,e) u32 a; asm ("lop3.b32 %0, %1, %2, %3, "#e";" : "=r"(a): "r"(b), "r"(c), "r"(d));
55 static void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
57 LUT(xAA55AA5500550055, a1, a4, a6, 0xC1)
58 LUT(xA55AA55AF0F5F0F5, a3, a6, xAA55AA5500550055, 0x9E)
59 LUT(x5F5F5F5FA5A5A5A5, a1, a3, a6, 0xD6)
60 LUT(xF5A0F5A0A55AA55A, a4, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x56)
61 LUT(x947A947AD1E7D1E7, a2, xA55AA55AF0F5F0F5, xF5A0F5A0A55AA55A, 0x6C)
62 LUT(x5FFF5FFFFFFAFFFA, a6, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x7B)
63 LUT(xB96CB96C69936993, a2, xF5A0F5A0A55AA55A, x5FFF5FFFFFFAFFFA, 0xD6)
64 LUT(x3, a5, x947A947AD1E7D1E7, xB96CB96C69936993, 0x6A)
65 LUT(x55EE55EE55EE55EE, a1, a2, a4, 0x7A)
66 LUT(x084C084CB77BB77B, a2, a6, xF5A0F5A0A55AA55A, 0xC9)
67 LUT(x9C329C32E295E295, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x72)
68 LUT(xA51EA51E50E050E0, a3, a6, x55EE55EE55EE55EE, 0x29)
69 LUT(x4AD34AD3BE3CBE3C, a2, x947A947AD1E7D1E7, xA51EA51E50E050E0, 0x95)
70 LUT(x2, a5, x9C329C32E295E295, x4AD34AD3BE3CBE3C, 0xC6)
71 LUT(xD955D95595D195D1, a1, a2, x9C329C32E295E295, 0xD2)
72 LUT(x8058805811621162, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x90)
73 LUT(x7D0F7D0FC4B3C4B3, xA51EA51E50E050E0, xD955D95595D195D1, x8058805811621162, 0x76)
74 LUT(x0805080500010001, a3, xAA55AA5500550055, xD955D95595D195D1, 0x80)
75 LUT(x4A964A96962D962D, xB96CB96C69936993, x4AD34AD3BE3CBE3C, x0805080500010001, 0xA6)
76 LUT(x4, a5, x7D0F7D0FC4B3C4B3, x4A964A96962D962D, 0xA6)
77 LUT(x148014807B087B08, a1, xAA55AA5500550055, x947A947AD1E7D1E7, 0x21)
78 LUT(x94D894D86B686B68, xA55AA55AF0F5F0F5, x8058805811621162, x148014807B087B08, 0x6A)
79 LUT(x5555555540044004, a1, a6, x084C084CB77BB77B, 0x70)
80 LUT(xAFB4AFB4BF5BBF5B, x5F5F5F5FA5A5A5A5, xA51EA51E50E050E0, x5555555540044004, 0x97)
81 LUT(x1, a5, x94D894D86B686B68, xAFB4AFB4BF5BBF5B, 0x6C)
89 static void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
91 LUT(xEEEEEEEE99999999, a1, a2, a6, 0x97)
92 LUT(xFFFFEEEE66666666, a5, a6, xEEEEEEEE99999999, 0x67)
93 LUT(x5555FFFFFFFF0000, a1, a5, a6, 0x76)
94 LUT(x6666DDDD5555AAAA, a2, xFFFFEEEE66666666, x5555FFFFFFFF0000, 0x69)
95 LUT(x6969D3D35353ACAC, a3, xFFFFEEEE66666666, x6666DDDD5555AAAA, 0x6A)
96 LUT(xCFCF3030CFCF3030, a2, a3, a5, 0x65)
97 LUT(xE4E4EEEE9999F0F0, a3, xEEEEEEEE99999999, x5555FFFFFFFF0000, 0x8D)
98 LUT(xE5E5BABACDCDB0B0, a1, xCFCF3030CFCF3030, xE4E4EEEE9999F0F0, 0xCA)
99 LUT(x3, a4, x6969D3D35353ACAC, xE5E5BABACDCDB0B0, 0xC6)
100 LUT(x3333CCCC00000000, a2, a5, a6, 0x14)
101 LUT(xCCCCDDDDFFFF0F0F, a5, xE4E4EEEE9999F0F0, x3333CCCC00000000, 0xB5)
102 LUT(x00000101F0F0F0F0, a3, a6, xFFFFEEEE66666666, 0x1C)
103 LUT(x9A9A64646A6A9595, a1, xCFCF3030CFCF3030, x00000101F0F0F0F0, 0x96)
104 LUT(x2, a4, xCCCCDDDDFFFF0F0F, x9A9A64646A6A9595, 0x6A)
105 LUT(x3333BBBB3333FFFF, a1, a2, x6666DDDD5555AAAA, 0xDE)
106 LUT(x1414141441410000, a1, a3, xE4E4EEEE9999F0F0, 0x90)
107 LUT(x7F7FF3F3F5F53939, x6969D3D35353ACAC, x9A9A64646A6A9595, x3333BBBB3333FFFF, 0x79)
108 LUT(x9494E3E34B4B3939, a5, x1414141441410000, x7F7FF3F3F5F53939, 0x29)
109 LUT(x1, a4, x3333BBBB3333FFFF, x9494E3E34B4B3939, 0xA6)
110 LUT(xB1B1BBBBCCCCA5A5, a1, a1, xE4E4EEEE9999F0F0, 0x4A)
111 LUT(xFFFFECECEEEEDDDD, a2, x3333CCCC00000000, x9A9A64646A6A9595, 0xEF)
112 LUT(xB1B1A9A9DCDC8787, xE5E5BABACDCDB0B0, xB1B1BBBBCCCCA5A5, xFFFFECECEEEEDDDD, 0x8D)
113 LUT(xFFFFCCCCEEEE4444, a2, a5, xFFFFEEEE66666666, 0x2B)
114 LUT(x4, a4, xB1B1A9A9DCDC8787, xFFFFCCCCEEEE4444, 0x6C)
122 static void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
124 LUT(xA50FA50FA50FA50F, a1, a3, a4, 0xC9)
125 LUT(xF0F00F0FF0F0F0F0, a3, a5, a6, 0x4B)
126 LUT(xAF0FA0AAAF0FAF0F, a1, xA50FA50FA50FA50F, xF0F00F0FF0F0F0F0, 0x4D)
127 LUT(x5AA5A55A5AA55AA5, a1, a4, xF0F00F0FF0F0F0F0, 0x69)
128 LUT(xAA005FFFAA005FFF, a3, a5, xA50FA50FA50FA50F, 0xD6)
129 LUT(x5AA5A55A0F5AFAA5, a6, x5AA5A55A5AA55AA5, xAA005FFFAA005FFF, 0x9C)
130 LUT(x1, a2, xAF0FA0AAAF0FAF0F, x5AA5A55A0F5AFAA5, 0xA6)
131 LUT(xAA55AA5500AA00AA, a1, a4, a6, 0x49)
132 LUT(xFAFAA50FFAFAA50F, a1, a5, xA50FA50FA50FA50F, 0x9B)
133 LUT(x50AF0F5AFA50A5A5, a1, xAA55AA5500AA00AA, xFAFAA50FFAFAA50F, 0x66)
134 LUT(xAFAFAFAFFAFAFAFA, a1, a3, a6, 0x6F)
135 LUT(xAFAFFFFFFFFAFAFF, a4, x50AF0F5AFA50A5A5, xAFAFAFAFFAFAFAFA, 0xEB)
136 LUT(x4, a2, x50AF0F5AFA50A5A5, xAFAFFFFFFFFAFAFF, 0x6C)
137 LUT(x500F500F500F500F, a1, a3, a4, 0x98)
138 LUT(xF0505A0505A5050F, x5AA5A55A0F5AFAA5, xAA55AA5500AA00AA, xAFAFAFAFFAFAFAFA, 0x1D)
139 LUT(xF0505A05AA55AAFF, a6, x500F500F500F500F, xF0505A0505A5050F, 0x9A)
140 LUT(xFF005F55FF005F55, a1, a4, xAA005FFFAA005FFF, 0xB2)
141 LUT(xA55F5AF0A55F5AF0, a5, xA50FA50FA50FA50F, x5AA5A55A5AA55AA5, 0x3D)
142 LUT(x5A5F05A5A55F5AF0, a6, xFF005F55FF005F55, xA55F5AF0A55F5AF0, 0xA6)
143 LUT(x3, a2, xF0505A05AA55AAFF, x5A5F05A5A55F5AF0, 0xA6)
144 LUT(x0F0F0F0FA5A5A5A5, a1, a3, a6, 0xC6)
145 LUT(x5FFFFF5FFFA0FFA0, x5AA5A55A5AA55AA5, xAFAFAFAFFAFAFAFA, x0F0F0F0FA5A5A5A5, 0xDB)
146 LUT(xF5555AF500A05FFF, a5, xFAFAA50FFAFAA50F, xF0505A0505A5050F, 0xB9)
147 LUT(x05A5AAF55AFA55A5, xF0505A05AA55AAFF, x0F0F0F0FA5A5A5A5, xF5555AF500A05FFF, 0x9B)
148 LUT(x2, a2, x5FFFFF5FFFA0FFA0, x05A5AAF55AFA55A5, 0xA6)
156 static void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
158 LUT(x55F055F055F055F0, a1, a3, a4, 0x72)
159 LUT(xA500F5F0A500F5F0, a3, a5, x55F055F055F055F0, 0xAD)
160 LUT(xF50AF50AF50AF50A, a1, a3, a4, 0x59)
161 LUT(xF5FA0FFFF5FA0FFF, a3, a5, xF50AF50AF50AF50A, 0xE7)
162 LUT(x61C8F93C61C8F93C, a2, xA500F5F0A500F5F0, xF5FA0FFFF5FA0FFF, 0xC6)
163 LUT(x9999666699996666, a1, a2, a5, 0x69)
164 LUT(x22C022C022C022C0, a2, a4, x55F055F055F055F0, 0x18)
165 LUT(xB35C94A6B35C94A6, xF5FA0FFFF5FA0FFF, x9999666699996666, x22C022C022C022C0, 0x63)
166 LUT(x4, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x6A)
167 LUT(x4848484848484848, a1, a2, a3, 0x12)
168 LUT(x55500AAA55500AAA, a1, a5, xF5FA0FFFF5FA0FFF, 0x28)
169 LUT(x3C90B3D63C90B3D6, x61C8F93C61C8F93C, x4848484848484848, x55500AAA55500AAA, 0x1E)
170 LUT(x8484333384843333, a1, x9999666699996666, x4848484848484848, 0x14)
171 LUT(x4452F1AC4452F1AC, xF50AF50AF50AF50A, xF5FA0FFFF5FA0FFF, xB35C94A6B35C94A6, 0x78)
172 LUT(x9586CA379586CA37, x55500AAA55500AAA, x8484333384843333, x4452F1AC4452F1AC, 0xD6)
173 LUT(x2, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0x6A)
174 LUT(x1, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0xA9)
175 LUT(x3, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x56)
183 static void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
185 LUT(xA0A0A0A0FFFFFFFF, a1, a3, a6, 0xAB)
186 LUT(xFFFF00005555FFFF, a1, a5, a6, 0xB9)
187 LUT(xB3B320207777FFFF, a2, xA0A0A0A0FFFFFFFF, xFFFF00005555FFFF, 0xE8)
188 LUT(x50505A5A5A5A5050, a1, a3, xFFFF00005555FFFF, 0x34)
189 LUT(xA2A2FFFF2222FFFF, a1, a5, xB3B320207777FFFF, 0xCE)
190 LUT(x2E2E6969A4A46363, a2, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, 0x29)
191 LUT(x3, a4, xB3B320207777FFFF, x2E2E6969A4A46363, 0xA6)
192 LUT(xA5A50A0AA5A50A0A, a1, a3, a5, 0x49)
193 LUT(x969639396969C6C6, a2, a6, xA5A50A0AA5A50A0A, 0x96)
194 LUT(x1B1B1B1B1B1B1B1B, a1, a2, a3, 0xCA)
195 LUT(xBFBFBFBFF6F6F9F9, a3, xA0A0A0A0FFFFFFFF, x969639396969C6C6, 0x7E)
196 LUT(x5B5BA4A4B8B81D1D, xFFFF00005555FFFF, x1B1B1B1B1B1B1B1B, xBFBFBFBFF6F6F9F9, 0x96)
197 LUT(x2, a4, x969639396969C6C6, x5B5BA4A4B8B81D1D, 0xCA)
198 LUT(x5555BBBBFFFF5555, a1, a2, xFFFF00005555FFFF, 0xE5)
199 LUT(x6D6D9C9C95956969, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, x969639396969C6C6, 0x97)
200 LUT(x1A1A67676A6AB4B4, xA5A50A0AA5A50A0A, x5555BBBBFFFF5555, x6D6D9C9C95956969, 0x47)
201 LUT(xA0A0FFFFAAAA0000, a3, xFFFF00005555FFFF, xA5A50A0AA5A50A0A, 0x3B)
202 LUT(x36369C9CC1C1D6D6, x969639396969C6C6, x6D6D9C9C95956969, xA0A0FFFFAAAA0000, 0xD9)
203 LUT(x1, a4, x1A1A67676A6AB4B4, x36369C9CC1C1D6D6, 0xCA)
204 LUT(x5555F0F0F5F55555, a1, a3, xFFFF00005555FFFF, 0xB1)
205 LUT(x79790202DCDC0808, xA2A2FFFF2222FFFF, xA5A50A0AA5A50A0A, x969639396969C6C6, 0x47)
206 LUT(x6C6CF2F229295D5D, xBFBFBFBFF6F6F9F9, x5555F0F0F5F55555, x79790202DCDC0808, 0x6E)
207 LUT(xA3A3505010101A1A, a2, xA2A2FFFF2222FFFF, x36369C9CC1C1D6D6, 0x94)
208 LUT(x7676C7C74F4FC7C7, a1, x2E2E6969A4A46363, xA3A3505010101A1A, 0xD9)
209 LUT(x4, a4, x6C6CF2F229295D5D, x7676C7C74F4FC7C7, 0xC6)
217 static void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
219 LUT(x5050F5F55050F5F5, a1, a3, a5, 0xB2)
220 LUT(x6363C6C66363C6C6, a1, a2, x5050F5F55050F5F5, 0x66)
221 LUT(xAAAA5555AAAA5555, a1, a1, a5, 0xA9)
222 LUT(x3A3A65653A3A6565, a3, x6363C6C66363C6C6, xAAAA5555AAAA5555, 0xA9)
223 LUT(x5963A3C65963A3C6, a4, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xC6)
224 LUT(xE7E76565E7E76565, a5, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xAD)
225 LUT(x455D45DF455D45DF, a1, a4, xE7E76565E7E76565, 0xE4)
226 LUT(x4, a6, x5963A3C65963A3C6, x455D45DF455D45DF, 0x6C)
227 LUT(x1101220211012202, a2, xAAAA5555AAAA5555, x5963A3C65963A3C6, 0x20)
228 LUT(xF00F0FF0F00F0FF0, a3, a4, a5, 0x69)
229 LUT(x16E94A9716E94A97, xE7E76565E7E76565, x1101220211012202, xF00F0FF0F00F0FF0, 0x9E)
230 LUT(x2992922929929229, a1, a2, xF00F0FF0F00F0FF0, 0x49)
231 LUT(xAFAF9823AFAF9823, a5, x5050F5F55050F5F5, x2992922929929229, 0x93)
232 LUT(x3, a6, x16E94A9716E94A97, xAFAF9823AFAF9823, 0x6C)
233 LUT(x4801810248018102, a4, x5963A3C65963A3C6, x1101220211012202, 0xA4)
234 LUT(x5EE8FFFD5EE8FFFD, a5, x16E94A9716E94A97, x4801810248018102, 0x76)
235 LUT(xF0FF00FFF0FF00FF, a3, a4, a5, 0xCD)
236 LUT(x942D9A67942D9A67, x3A3A65653A3A6565, x5EE8FFFD5EE8FFFD, xF0FF00FFF0FF00FF, 0x86)
237 LUT(x1, a6, x5EE8FFFD5EE8FFFD, x942D9A67942D9A67, 0xA6)
238 LUT(x6A40D4ED6F4DD4EE, a2, x4, xAFAF9823AFAF9823, 0x2D)
239 LUT(x6CA89C7869A49C79, x1101220211012202, x16E94A9716E94A97, x6A40D4ED6F4DD4EE, 0x26)
240 LUT(xD6DE73F9D6DE73F9, a3, x6363C6C66363C6C6, x455D45DF455D45DF, 0x6B)
241 LUT(x925E63E1965A63E1, x3A3A65653A3A6565, x6CA89C7869A49C79, xD6DE73F9D6DE73F9, 0xA2)
242 LUT(x2, a6, x6CA89C7869A49C79, x925E63E1965A63E1, 0xCA)
250 static void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
252 LUT(x88AA88AA88AA88AA, a1, a2, a4, 0x0B)
253 LUT(xAAAAFF00AAAAFF00, a1, a4, a5, 0x27)
254 LUT(xADAFF8A5ADAFF8A5, a3, x88AA88AA88AA88AA, xAAAAFF00AAAAFF00, 0x9E)
255 LUT(x0A0AF5F50A0AF5F5, a1, a3, a5, 0xA6)
256 LUT(x6B69C5DC6B69C5DC, a2, xADAFF8A5ADAFF8A5, x0A0AF5F50A0AF5F5, 0x6B)
257 LUT(x1C69B2DC1C69B2DC, a4, x88AA88AA88AA88AA, x6B69C5DC6B69C5DC, 0xA9)
258 LUT(x1, a6, xADAFF8A5ADAFF8A5, x1C69B2DC1C69B2DC, 0x6A)
259 LUT(x9C9C9C9C9C9C9C9C, a1, a2, a3, 0x63)
260 LUT(xE6E63BFDE6E63BFD, a2, xAAAAFF00AAAAFF00, x0A0AF5F50A0AF5F5, 0xE7)
261 LUT(x6385639E6385639E, a4, x9C9C9C9C9C9C9C9C, xE6E63BFDE6E63BFD, 0x93)
262 LUT(x5959C4CE5959C4CE, a2, x6B69C5DC6B69C5DC, xE6E63BFDE6E63BFD, 0x5D)
263 LUT(x5B53F53B5B53F53B, a4, x0A0AF5F50A0AF5F5, x5959C4CE5959C4CE, 0x6E)
264 LUT(x3, a6, x6385639E6385639E, x5B53F53B5B53F53B, 0xC6)
265 LUT(xFAF505FAFAF505FA, a3, a4, x0A0AF5F50A0AF5F5, 0x6D)
266 LUT(x6A65956A6A65956A, a3, x9C9C9C9C9C9C9C9C, xFAF505FAFAF505FA, 0xA6)
267 LUT(x8888CCCC8888CCCC, a1, a2, a5, 0x23)
268 LUT(x94E97A9494E97A94, x1C69B2DC1C69B2DC, x6A65956A6A65956A, x8888CCCC8888CCCC, 0x72)
269 LUT(x4, a6, x6A65956A6A65956A, x94E97A9494E97A94, 0xAC)
270 LUT(xA050A050A050A050, a1, a3, a4, 0x21)
271 LUT(xC1B87A2BC1B87A2B, xAAAAFF00AAAAFF00, x5B53F53B5B53F53B, x94E97A9494E97A94, 0xA4)
272 LUT(xE96016B7E96016B7, x8888CCCC8888CCCC, xA050A050A050A050, xC1B87A2BC1B87A2B, 0x96)
273 LUT(xE3CF1FD5E3CF1FD5, x88AA88AA88AA88AA, x6A65956A6A65956A, xE96016B7E96016B7, 0x3E)
274 LUT(x6776675B6776675B, xADAFF8A5ADAFF8A5, x94E97A9494E97A94, xE3CF1FD5E3CF1FD5, 0x6B)
275 LUT(x2, a6, xE96016B7E96016B7, x6776675B6776675B, 0xC6)
283 static void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
285 LUT(xEEEE3333EEEE3333, a1, a2, a5, 0x9D)
286 LUT(xBBBBBBBBBBBBBBBB, a1, a1, a2, 0x83)
287 LUT(xDDDDAAAADDDDAAAA, a1, a2, a5, 0x5B)
288 LUT(x29295A5A29295A5A, a3, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0x85)
289 LUT(xC729695AC729695A, a4, xEEEE3333EEEE3333, x29295A5A29295A5A, 0xA6)
290 LUT(x3BF77B7B3BF77B7B, a2, a5, xC729695AC729695A, 0xF9)
291 LUT(x2900FF002900FF00, a4, a5, x29295A5A29295A5A, 0x0E)
292 LUT(x56B3803F56B3803F, xBBBBBBBBBBBBBBBB, x3BF77B7B3BF77B7B, x2900FF002900FF00, 0x61)
293 LUT(x4, a6, xC729695AC729695A, x56B3803F56B3803F, 0x6C)
294 LUT(xFBFBFBFBFBFBFBFB, a1, a2, a3, 0xDF)
295 LUT(x3012B7B73012B7B7, a2, a5, xC729695AC729695A, 0xD4)
296 LUT(x34E9B34C34E9B34C, a4, xFBFBFBFBFBFBFBFB, x3012B7B73012B7B7, 0x69)
297 LUT(xBFEAEBBEBFEAEBBE, a1, x29295A5A29295A5A, x34E9B34C34E9B34C, 0x6F)
298 LUT(xFFAEAFFEFFAEAFFE, a3, xBBBBBBBBBBBBBBBB, xBFEAEBBEBFEAEBBE, 0xB9)
299 LUT(x2, a6, x34E9B34C34E9B34C, xFFAEAFFEFFAEAFFE, 0xC6)
300 LUT(xCFDE88BBCFDE88BB, a2, xDDDDAAAADDDDAAAA, x34E9B34C34E9B34C, 0x5C)
301 LUT(x3055574530555745, a1, xC729695AC729695A, xCFDE88BBCFDE88BB, 0x71)
302 LUT(x99DDEEEE99DDEEEE, a4, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0xB9)
303 LUT(x693CD926693CD926, x3BF77B7B3BF77B7B, x34E9B34C34E9B34C, x99DDEEEE99DDEEEE, 0x69)
304 LUT(x3, a6, x3055574530555745, x693CD926693CD926, 0x6A)
305 LUT(x9955EE559955EE55, a1, a4, x99DDEEEE99DDEEEE, 0xE2)
306 LUT(x9D48FA949D48FA94, x3BF77B7B3BF77B7B, xBFEAEBBEBFEAEBBE, x9955EE559955EE55, 0x9C)
307 LUT(x1, a6, xC729695AC729695A, x9D48FA949D48FA94, 0x39)
318 * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC
319 * architectures. These use AND, OR, XOR, NOT, and AND-NOT gates.
321 * Gate counts: 49 44 46 33 48 46 46 41
324 * Several same-gate-count expressions for each S-box are included (for use on
325 * different CPUs/GPUs).
327 * These Boolean expressions corresponding to DES S-boxes have been generated
328 * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's
329 * John the Ripper password cracker: http://www.openwall.com/john/
330 * Being mathematical formulas, they are not copyrighted and are free for reuse
333 * This file (a specific representation of the S-box expressions, surrounding
334 * logic) is Copyright (c) 2011 by Solar Designer <solar at openwall.com>.
335 * Redistribution and use in source and binary forms, with or without
336 * modification, are permitted. (This is a heavily cut-down "BSD license".)
338 * The effort has been sponsored by Rapid7: http://www.rapid7.com
341 static void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
343 u32 x55005500, x5A0F5A0F, x3333FFFF, x66666666, x22226666, x2D2D6969,
345 u32 x00FFFF00, x33CCCC33, x4803120C, x2222FFFF, x6A21EDF3, x4A01CC93;
346 u32 x5555FFFF, x7F75FFFF, x00D20096, x7FA7FF69;
347 u32 x0A0A0000, x0AD80096, x00999900, x0AD99996;
348 u32 x22332233, x257AA5F0, x054885C0, xFAB77A3F, x2221EDF3, xD89697CC;
349 u32 x05B77AC0, x05F77AD6, x36C48529, x6391D07C, xBB0747B0;
350 u32 x4C460000, x4EDF9996, x2D4E49EA, xBBFFFFB0, x96B1B65A;
351 u32 x5AFF5AFF, x52B11215, x4201C010, x10B0D205;
352 u32 x00, x01, x10, x11, x20, x21, x30, x31;
354 x55005500 = a1 & ~a5;
355 x5A0F5A0F = a4 ^ x55005500;
358 x22226666 = x3333FFFF & x66666666;
359 x2D2D6969 = a4 ^ x22226666;
360 x25202160 = x2D2D6969 & ~x5A0F5A0F;
363 x33CCCC33 = a3 ^ x00FFFF00;
364 x4803120C = x5A0F5A0F & ~x33CCCC33;
365 x2222FFFF = a6 | x22226666;
366 x6A21EDF3 = x4803120C ^ x2222FFFF;
367 x4A01CC93 = x6A21EDF3 & ~x25202160;
370 x7F75FFFF = x6A21EDF3 | x5555FFFF;
371 x00D20096 = a5 & ~x2D2D6969;
372 x7FA7FF69 = x7F75FFFF ^ x00D20096;
374 x0A0A0000 = a4 & ~x5555FFFF;
375 x0AD80096 = x00D20096 ^ x0A0A0000;
376 x00999900 = x00FFFF00 & ~x66666666;
377 x0AD99996 = x0AD80096 | x00999900;
379 x22332233 = a3 & ~x55005500;
380 x257AA5F0 = x5A0F5A0F ^ x7F75FFFF;
381 x054885C0 = x257AA5F0 & ~x22332233;
382 xFAB77A3F = ~x054885C0;
383 x2221EDF3 = x3333FFFF & x6A21EDF3;
384 xD89697CC = xFAB77A3F ^ x2221EDF3;
385 x20 = x7FA7FF69 & ~a2;
386 x21 = x20 ^ xD89697CC;
389 x05B77AC0 = x00FFFF00 ^ x054885C0;
390 x05F77AD6 = x00D20096 | x05B77AC0;
391 x36C48529 = x3333FFFF ^ x05F77AD6;
392 x6391D07C = a1 ^ x36C48529;
393 xBB0747B0 = xD89697CC ^ x6391D07C;
394 x00 = x25202160 | a2;
395 x01 = x00 ^ xBB0747B0;
398 x4C460000 = x3333FFFF ^ x7F75FFFF;
399 x4EDF9996 = x0AD99996 | x4C460000;
400 x2D4E49EA = x6391D07C ^ x4EDF9996;
401 xBBFFFFB0 = x00FFFF00 | xBB0747B0;
402 x96B1B65A = x2D4E49EA ^ xBBFFFFB0;
403 x10 = x4A01CC93 | a2;
404 x11 = x10 ^ x96B1B65A;
407 x5AFF5AFF = a5 | x5A0F5A0F;
408 x52B11215 = x5AFF5AFF & ~x2D4E49EA;
409 x4201C010 = x4A01CC93 & x6391D07C;
410 x10B0D205 = x52B11215 ^ x4201C010;
411 x30 = x10B0D205 | a2;
412 x31 = x30 ^ x0AD99996;
416 static void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
419 u32 x55550000, x00AA00FF, x33BB33FF;
420 u32 x33CC0000, x11441144, x11BB11BB, x003311BB;
421 u32 x00000F0F, x336600FF, x332200FF, x332200F0;
422 u32 x0302000F, xAAAAAAAA, xA9A8AAA5, x33CCCC33, x33CCC030, x9A646A95;
423 u32 x00333303, x118822B8, xA8208805, x3CC3C33C, x94E34B39;
424 u32 x0331330C, x3FF3F33C, xA9DF596A, xA9DF5F6F, x962CAC53;
425 u32 xA9466A6A, x3DA52153, x29850143, x33C0330C, x1A45324F;
426 u32 x0A451047, xBBDFDD7B, xB19ACD3C;
427 u32 x00, x01, x10, x11, x20, x21, x30, x31;
431 x55550000 = a1 & ~a6;
432 x00AA00FF = a5 & ~x55550000;
433 x33BB33FF = a2 | x00AA00FF;
435 x33CC0000 = x33CC33CC & ~a6;
436 x11441144 = a1 & x33CC33CC;
437 x11BB11BB = a5 ^ x11441144;
438 x003311BB = x11BB11BB & ~x33CC0000;
441 x336600FF = x00AA00FF ^ x33CC0000;
442 x332200FF = x33BB33FF & x336600FF;
443 x332200F0 = x332200FF & ~x00000F0F;
445 x0302000F = a3 & x332200FF;
447 xA9A8AAA5 = x0302000F ^ xAAAAAAAA;
448 x33CCCC33 = a6 ^ x33CC33CC;
449 x33CCC030 = x33CCCC33 & ~x00000F0F;
450 x9A646A95 = xA9A8AAA5 ^ x33CCC030;
451 x10 = a4 & ~x332200F0;
452 x11 = x10 ^ x9A646A95;
455 x00333303 = a2 & ~x33CCC030;
456 x118822B8 = x11BB11BB ^ x00333303;
457 xA8208805 = xA9A8AAA5 & ~x118822B8;
458 x3CC3C33C = a3 ^ x33CCCC33;
459 x94E34B39 = xA8208805 ^ x3CC3C33C;
460 x00 = x33BB33FF & ~a4;
461 x01 = x00 ^ x94E34B39;
464 x0331330C = x0302000F ^ x00333303;
465 x3FF3F33C = x3CC3C33C | x0331330C;
466 xA9DF596A = x33BB33FF ^ x9A646A95;
467 xA9DF5F6F = x00000F0F | xA9DF596A;
468 x962CAC53 = x3FF3F33C ^ xA9DF5F6F;
470 xA9466A6A = x332200FF ^ x9A646A95;
471 x3DA52153 = x94E34B39 ^ xA9466A6A;
472 x29850143 = xA9DF5F6F & x3DA52153;
473 x33C0330C = x33CC33CC & x3FF3F33C;
474 x1A45324F = x29850143 ^ x33C0330C;
475 x20 = x1A45324F | a4;
476 x21 = x20 ^ x962CAC53;
479 x0A451047 = x1A45324F & ~x118822B8;
480 xBBDFDD7B = x33CCCC33 | xA9DF596A;
481 xB19ACD3C = x0A451047 ^ xBBDFDD7B;
482 x30 = x003311BB | a4;
483 x31 = x30 ^ xB19ACD3C;
487 static void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
489 u32 x44444444, x0F0FF0F0, x4F4FF4F4, x00FFFF00, x00AAAA00, x4FE55EF4;
490 u32 x3C3CC3C3, x3C3C0000, x7373F4F4, x0C840A00;
491 u32 x00005EF4, x00FF5EFF, x00555455, x3C699796;
492 u32 x000FF000, x55AA55AA, x26D9A15E, x2FDFAF5F, x2FD00F5F;
493 u32 x55AAFFAA, x28410014, x000000FF, x000000CC, x284100D8;
494 u32 x204100D0, x3C3CC3FF, x1C3CC32F, x4969967A;
495 u32 x4CC44CC4, x40C040C0, xC3C33C3C, x9669C396, xD6A98356;
496 u32 xD6E9C3D6, x4CEEEEC4, x9A072D12, x001A000B, x9A1F2D1B;
497 u32 x00, x01, x10, x11, x20, x21, x30, x31;
499 x44444444 = a1 & ~a2;
501 x4F4FF4F4 = x44444444 | x0F0FF0F0;
503 x00AAAA00 = x00FFFF00 & ~a1;
504 x4FE55EF4 = x4F4FF4F4 ^ x00AAAA00;
506 x3C3CC3C3 = a2 ^ x0F0FF0F0;
507 x3C3C0000 = x3C3CC3C3 & ~a6;
508 x7373F4F4 = x4F4FF4F4 ^ x3C3C0000;
509 x0C840A00 = x4FE55EF4 & ~x7373F4F4;
511 x00005EF4 = a6 & x4FE55EF4;
512 x00FF5EFF = a4 | x00005EF4;
513 x00555455 = a1 & x00FF5EFF;
514 x3C699796 = x3C3CC3C3 ^ x00555455;
515 x30 = x4FE55EF4 & ~a5;
516 x31 = x30 ^ x3C699796;
519 x000FF000 = x0F0FF0F0 & x00FFFF00;
521 x26D9A15E = x7373F4F4 ^ x55AA55AA;
522 x2FDFAF5F = a3 | x26D9A15E;
523 x2FD00F5F = x2FDFAF5F & ~x000FF000;
525 x55AAFFAA = x00AAAA00 | x55AA55AA;
526 x28410014 = x3C699796 & ~x55AAFFAA;
528 x000000CC = x000000FF & ~a2;
529 x284100D8 = x28410014 ^ x000000CC;
531 x204100D0 = x7373F4F4 & x284100D8;
532 x3C3CC3FF = x3C3CC3C3 | x000000FF;
533 x1C3CC32F = x3C3CC3FF & ~x204100D0;
534 x4969967A = a1 ^ x1C3CC32F;
535 x10 = x2FD00F5F & a5;
536 x11 = x10 ^ x4969967A;
539 x4CC44CC4 = x4FE55EF4 & ~a2;
540 x40C040C0 = x4CC44CC4 & ~a3;
541 xC3C33C3C = ~x3C3CC3C3;
542 x9669C396 = x55AAFFAA ^ xC3C33C3C;
543 xD6A98356 = x40C040C0 ^ x9669C396;
544 x00 = a5 & ~x0C840A00;
545 x01 = x00 ^ xD6A98356;
548 xD6E9C3D6 = x40C040C0 | x9669C396;
549 x4CEEEEC4 = x00AAAA00 | x4CC44CC4;
550 x9A072D12 = xD6E9C3D6 ^ x4CEEEEC4;
551 x001A000B = a4 & ~x4FE55EF4;
552 x9A1F2D1B = x9A072D12 | x001A000B;
553 x20 = a5 & ~x284100D8;
554 x21 = x20 ^ x9A1F2D1B;
558 static void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
560 u32 x5A5A5A5A, x0F0FF0F0;
561 u32 x33FF33FF, x33FFCC00, x0C0030F0, x0C0CC0C0, x0CF3C03F, x5EFBDA7F,
562 x52FBCA0F, x61C8F93C;
563 u32 x00C0C03C, x0F0F30C0, x3B92A366, x30908326, x3C90B3D6;
564 u32 x33CC33CC, x0C0CFFFF, x379E5C99, x04124C11, x56E9861E, xA91679E1;
565 u32 x9586CA37, x8402C833, x84C2C83F, xB35C94A6;
566 u32 x00, x01, x10, x11, x20, x21, x30, x31;
571 x33FFCC00 = a5 ^ x33FF33FF;
572 x0C0030F0 = x0F0FF0F0 & ~x33FFCC00;
573 x0C0CC0C0 = x0F0FF0F0 & ~a2;
574 x0CF3C03F = a4 ^ x0C0CC0C0;
575 x5EFBDA7F = x5A5A5A5A | x0CF3C03F;
576 x52FBCA0F = x5EFBDA7F & ~x0C0030F0;
577 x61C8F93C = a2 ^ x52FBCA0F;
579 x00C0C03C = x0CF3C03F & x61C8F93C;
580 x0F0F30C0 = x0F0FF0F0 & ~x00C0C03C;
581 x3B92A366 = x5A5A5A5A ^ x61C8F93C;
582 x30908326 = x3B92A366 & ~x0F0F30C0;
583 x3C90B3D6 = x0C0030F0 ^ x30908326;
586 x0C0CFFFF = a5 | x0C0CC0C0;
587 x379E5C99 = x3B92A366 ^ x0C0CFFFF;
588 x04124C11 = x379E5C99 & ~x33CC33CC;
589 x56E9861E = x52FBCA0F ^ x04124C11;
590 x00 = a6 & ~x3C90B3D6;
591 x01 = x00 ^ x56E9861E;
594 xA91679E1 = ~x56E9861E;
595 x10 = x3C90B3D6 & ~a6;
596 x11 = x10 ^ xA91679E1;
599 x9586CA37 = x3C90B3D6 ^ xA91679E1;
600 x8402C833 = x9586CA37 & ~x33CC33CC;
601 x84C2C83F = x00C0C03C | x8402C833;
602 xB35C94A6 = x379E5C99 ^ x84C2C83F;
603 x20 = x61C8F93C | a6;
604 x21 = x20 ^ xB35C94A6;
607 x30 = a6 & x61C8F93C;
608 x31 = x30 ^ xB35C94A6;
612 static void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
614 u32 x77777777, x77770000, x22225555, x11116666, x1F1F6F6F;
615 u32 x70700000, x43433333, x00430033, x55557777, x55167744, x5A19784B;
616 u32 x5A1987B4, x7A3BD7F5, x003B00F5, x221955A0, x05050707, x271C52A7;
617 u32 x2A2A82A0, x6969B193, x1FE06F90, x16804E00, xE97FB1FF;
618 u32 x43403302, x35CAED30, x37DEFFB7, x349ECCB5, x0B01234A;
619 u32 x101884B4, x0FF8EB24, x41413333, x4FF9FB37, x4FC2FBC2;
620 u32 x22222222, x16BCEE97, x0F080B04, x19B4E593;
621 u32 x5C5C5C5C, x4448184C, x2DDABE71, x6992A63D;
622 u32 x00, x01, x10, x11, x20, x21, x30, x31;
625 x77770000 = x77777777 & ~a6;
626 x22225555 = a1 ^ x77770000;
627 x11116666 = a3 ^ x22225555;
628 x1F1F6F6F = a4 | x11116666;
630 x70700000 = x77770000 & ~a4;
631 x43433333 = a3 ^ x70700000;
632 x00430033 = a5 & x43433333;
633 x55557777 = a1 | x11116666;
634 x55167744 = x00430033 ^ x55557777;
635 x5A19784B = a4 ^ x55167744;
637 x5A1987B4 = a6 ^ x5A19784B;
638 x7A3BD7F5 = x22225555 | x5A1987B4;
639 x003B00F5 = a5 & x7A3BD7F5;
640 x221955A0 = x22225555 ^ x003B00F5;
641 x05050707 = a4 & x55557777;
642 x271C52A7 = x221955A0 ^ x05050707;
644 x2A2A82A0 = x7A3BD7F5 & ~a1;
645 x6969B193 = x43433333 ^ x2A2A82A0;
646 x1FE06F90 = a5 ^ x1F1F6F6F;
647 x16804E00 = x1FE06F90 & ~x6969B193;
648 xE97FB1FF = ~x16804E00;
649 x20 = xE97FB1FF & ~a2;
650 x21 = x20 ^ x5A19784B;
653 x43403302 = x43433333 & ~x003B00F5;
654 x35CAED30 = x2A2A82A0 ^ x1FE06F90;
655 x37DEFFB7 = x271C52A7 | x35CAED30;
656 x349ECCB5 = x37DEFFB7 & ~x43403302;
657 x0B01234A = x1F1F6F6F & ~x349ECCB5;
659 x101884B4 = x5A1987B4 & x349ECCB5;
660 x0FF8EB24 = x1FE06F90 ^ x101884B4;
661 x41413333 = x43433333 & x55557777;
662 x4FF9FB37 = x0FF8EB24 | x41413333;
663 x4FC2FBC2 = x003B00F5 ^ x4FF9FB37;
664 x30 = x4FC2FBC2 & a2;
665 x31 = x30 ^ x271C52A7;
668 x22222222 = a1 ^ x77777777;
669 x16BCEE97 = x349ECCB5 ^ x22222222;
670 x0F080B04 = a4 & x0FF8EB24;
671 x19B4E593 = x16BCEE97 ^ x0F080B04;
672 x00 = x0B01234A | a2;
673 x01 = x00 ^ x19B4E593;
676 x5C5C5C5C = x1F1F6F6F ^ x43433333;
677 x4448184C = x5C5C5C5C & ~x19B4E593;
678 x2DDABE71 = x22225555 ^ x0FF8EB24;
679 x6992A63D = x4448184C ^ x2DDABE71;
680 x10 = x1F1F6F6F & a2;
681 x11 = x10 ^ x6992A63D;
685 static void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
688 u32 x3333FFFF, x11115555, x22DD6699, x22DD9966, x00220099;
689 u32 x00551144, x33662277, x5A5A5A5A, x7B7E7A7F, x59A31CE6;
690 u32 x09030C06, x09030000, x336622FF, x3A6522FF;
691 u32 x484D494C, x0000B6B3, x0F0FB9BC, x00FC00F9, x0FFFB9FD;
692 u32 x5DF75DF7, x116600F7, x1E69B94B, x1668B94B;
693 u32 x7B7B7B7B, x411E5984, x1FFFFDFD, x5EE1A479;
694 u32 x3CB4DFD2, x004B002D, xB7B2B6B3, xCCC9CDC8, xCC82CDE5;
695 u32 x0055EEBB, x5A5AECE9, x0050ECA9, xC5CAC1CE, xC59A2D67;
696 u32 x00, x01, x10, x11, x20, x21, x30, x31;
701 x11115555 = a1 & x3333FFFF;
702 x22DD6699 = x33CC33CC ^ x11115555;
703 x22DD9966 = a6 ^ x22DD6699;
704 x00220099 = a5 & ~x22DD9966;
706 x00551144 = a1 & x22DD9966;
707 x33662277 = a2 ^ x00551144;
709 x7B7E7A7F = x33662277 | x5A5A5A5A;
710 x59A31CE6 = x22DD6699 ^ x7B7E7A7F;
712 x09030C06 = a3 & x59A31CE6;
713 x09030000 = x09030C06 & ~a6;
714 x336622FF = x00220099 | x33662277;
715 x3A6522FF = x09030000 ^ x336622FF;
716 x30 = x3A6522FF & a4;
717 x31 = x30 ^ x59A31CE6;
720 x484D494C = a2 ^ x7B7E7A7F;
721 x0000B6B3 = a6 & ~x484D494C;
722 x0F0FB9BC = a3 ^ x0000B6B3;
723 x00FC00F9 = a5 & ~x09030C06;
724 x0FFFB9FD = x0F0FB9BC | x00FC00F9;
726 x5DF75DF7 = a1 | x59A31CE6;
727 x116600F7 = x336622FF & x5DF75DF7;
728 x1E69B94B = x0F0FB9BC ^ x116600F7;
729 x1668B94B = x1E69B94B & ~x09030000;
730 x20 = x00220099 | a4;
731 x21 = x20 ^ x1668B94B;
734 x7B7B7B7B = a2 | x5A5A5A5A;
735 x411E5984 = x3A6522FF ^ x7B7B7B7B;
736 x1FFFFDFD = x11115555 | x0FFFB9FD;
737 x5EE1A479 = x411E5984 ^ x1FFFFDFD;
739 x3CB4DFD2 = x22DD6699 ^ x1E69B94B;
740 x004B002D = a5 & ~x3CB4DFD2;
741 xB7B2B6B3 = ~x484D494C;
742 xCCC9CDC8 = x7B7B7B7B ^ xB7B2B6B3;
743 xCC82CDE5 = x004B002D ^ xCCC9CDC8;
744 x10 = xCC82CDE5 & ~a4;
745 x11 = x10 ^ x5EE1A479;
748 x0055EEBB = a6 ^ x00551144;
749 x5A5AECE9 = a1 ^ x0F0FB9BC;
750 x0050ECA9 = x0055EEBB & x5A5AECE9;
751 xC5CAC1CE = x09030C06 ^ xCCC9CDC8;
752 xC59A2D67 = x0050ECA9 ^ xC5CAC1CE;
753 x00 = x0FFFB9FD & ~a4;
754 x01 = x00 ^ xC59A2D67;
758 static void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
760 u32 x0FF00FF0, x3CC33CC3, x00003CC3, x0F000F00, x5A555A55, x00001841;
761 u32 x00000F00, x33333C33, x7B777E77, x0FF0F00F, x74878E78;
762 u32 x003C003C, x5A7D5A7D, x333300F0, x694E5A8D;
763 u32 x0FF0CCCC, x000F0303, x5A505854, x33CC000F, x699C585B;
764 u32 x7F878F78, x21101013, x7F979F7B, x30030CC0, x4F9493BB;
765 u32 x6F9CDBFB, x0000DBFB, x00005151, x26DAC936, x26DA9867;
766 u32 x27DA9877, x27DA438C, x2625C9C9, x27FFCBCD;
767 u32 x27FF1036, x27FF103E, xB06B6C44, x97947C7A;
768 u32 x00, x01, x10, x11, x20, x21, x30, x31;
771 x3CC33CC3 = a3 ^ x0FF00FF0;
772 x00003CC3 = a6 & x3CC33CC3;
773 x0F000F00 = a4 & x0FF00FF0;
774 x5A555A55 = a2 ^ x0F000F00;
775 x00001841 = x00003CC3 & x5A555A55;
777 x00000F00 = a6 & x0F000F00;
778 x33333C33 = a3 ^ x00000F00;
779 x7B777E77 = x5A555A55 | x33333C33;
780 x0FF0F00F = a6 ^ x0FF00FF0;
781 x74878E78 = x7B777E77 ^ x0FF0F00F;
782 x30 = a1 & ~x00001841;
783 x31 = x30 ^ x74878E78;
786 x003C003C = a5 & ~x3CC33CC3;
787 x5A7D5A7D = x5A555A55 | x003C003C;
788 x333300F0 = x00003CC3 ^ x33333C33;
789 x694E5A8D = x5A7D5A7D ^ x333300F0;
791 x0FF0CCCC = x00003CC3 ^ x0FF0F00F;
792 x000F0303 = a4 & ~x0FF0CCCC;
793 x5A505854 = x5A555A55 & ~x000F0303;
794 x33CC000F = a5 ^ x333300F0;
795 x699C585B = x5A505854 ^ x33CC000F;
797 x7F878F78 = x0F000F00 | x74878E78;
798 x21101013 = a3 & x699C585B;
799 x7F979F7B = x7F878F78 | x21101013;
800 x30030CC0 = x3CC33CC3 & ~x0FF0F00F;
801 x4F9493BB = x7F979F7B ^ x30030CC0;
802 x00 = x4F9493BB & ~a1;
803 x01 = x00 ^ x694E5A8D;
806 x6F9CDBFB = x699C585B | x4F9493BB;
807 x0000DBFB = a6 & x6F9CDBFB;
808 x00005151 = a2 & x0000DBFB;
809 x26DAC936 = x694E5A8D ^ x4F9493BB;
810 x26DA9867 = x00005151 ^ x26DAC936;
812 x27DA9877 = x21101013 | x26DA9867;
813 x27DA438C = x0000DBFB ^ x27DA9877;
814 x2625C9C9 = a5 ^ x26DAC936;
815 x27FFCBCD = x27DA438C | x2625C9C9;
816 x20 = x27FFCBCD & a1;
817 x21 = x20 ^ x699C585B;
820 x27FF1036 = x0000DBFB ^ x27FFCBCD;
821 x27FF103E = x003C003C | x27FF1036;
822 xB06B6C44 = ~x4F9493BB;
823 x97947C7A = x27FF103E ^ xB06B6C44;
824 x10 = x97947C7A & ~a1;
825 x11 = x10 ^ x26DA9867;
829 static void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
831 u32 x0C0C0C0C, x0000F0F0, x00FFF00F, x00555005, x00515001;
832 u32 x33000330, x77555775, x30303030, x3030CFCF, x30104745, x30555745;
833 u32 xFF000FF0, xCF1048B5, x080A080A, xC71A40BF, xCB164CB3;
834 u32 x9E4319E6, x000019E6, xF429738C, xF4296A6A, xC729695A;
835 u32 xC47C3D2F, xF77F3F3F, x9E43E619, x693CD926;
836 u32 xF719A695, xF4FF73FF, x03E6D56A, x56B3803F;
837 u32 xF700A600, x61008000, x03B7856B, x62B7056B;
838 u32 x00, x01, x10, x11, x20, x21, x30, x31;
840 x0C0C0C0C = a3 & ~a2;
841 x0000F0F0 = a5 & ~a3;
842 x00FFF00F = a4 ^ x0000F0F0;
843 x00555005 = a1 & x00FFF00F;
844 x00515001 = x00555005 & ~x0C0C0C0C;
846 x33000330 = a2 & ~x00FFF00F;
847 x77555775 = a1 | x33000330;
848 x30303030 = a2 & ~a3;
849 x3030CFCF = a5 ^ x30303030;
850 x30104745 = x77555775 & x3030CFCF;
851 x30555745 = x00555005 | x30104745;
853 xFF000FF0 = ~x00FFF00F;
854 xCF1048B5 = x30104745 ^ xFF000FF0;
855 x080A080A = a3 & ~x77555775;
856 xC71A40BF = xCF1048B5 ^ x080A080A;
857 xCB164CB3 = x0C0C0C0C ^ xC71A40BF;
858 x10 = x00515001 | a6;
859 x11 = x10 ^ xCB164CB3;
862 x9E4319E6 = a1 ^ xCB164CB3;
863 x000019E6 = a5 & x9E4319E6;
864 xF429738C = a2 ^ xC71A40BF;
865 xF4296A6A = x000019E6 ^ xF429738C;
866 xC729695A = x33000330 ^ xF4296A6A;
868 xC47C3D2F = x30555745 ^ xF4296A6A;
869 xF77F3F3F = a2 | xC47C3D2F;
870 x9E43E619 = a5 ^ x9E4319E6;
871 x693CD926 = xF77F3F3F ^ x9E43E619;
872 x20 = x30555745 & a6;
873 x21 = x20 ^ x693CD926;
876 xF719A695 = x3030CFCF ^ xC729695A;
877 xF4FF73FF = a4 | xF429738C;
878 x03E6D56A = xF719A695 ^ xF4FF73FF;
879 x56B3803F = a1 ^ x03E6D56A;
880 x30 = x56B3803F & a6;
881 x31 = x30 ^ xC729695A;
884 xF700A600 = xF719A695 & ~a4;
885 x61008000 = x693CD926 & xF700A600;
886 x03B7856B = x00515001 ^ x03E6D56A;
887 x62B7056B = x61008000 ^ x03B7856B;
888 x00 = x62B7056B | a6;
889 x01 = x00 ^ xC729695A;
896 #if defined IS_AMD || defined IS_GENERIC
899 * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC
900 * architectures. These use AND, OR, XOR, NOT, and AND-NOT gates.
902 * Gate counts: 49 44 46 33 48 46 46 41
905 * Several same-gate-count expressions for each S-box are included (for use on
906 * different CPUs/GPUs).
908 * These Boolean expressions corresponding to DES S-boxes have been generated
909 * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's
910 * John the Ripper password cracker: http://www.openwall.com/john/
911 * Being mathematical formulas, they are not copyrighted and are free for reuse
914 * This file (a specific representation of the S-box expressions, surrounding
915 * logic) is Copyright (c) 2011 by Solar Designer <solar at openwall.com>.
916 * Redistribution and use in source and binary forms, with or without
917 * modification, are permitted. (This is a heavily cut-down "BSD license".)
919 * The effort has been sponsored by Rapid7: http://www.rapid7.com
922 static void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
924 u32 x55005500, x5A0F5A0F, x3333FFFF, x66666666, x22226666, x2D2D6969,
926 u32 x00FFFF00, x33CCCC33, x4803120C, x2222FFFF, x6A21EDF3, x4A01CC93;
927 u32 x5555FFFF, x7F75FFFF, x00D20096, x7FA7FF69;
928 u32 x0A0A0000, x0AD80096, x00999900, x0AD99996;
929 u32 x22332233, x257AA5F0, x054885C0, xFAB77A3F, x2221EDF3, xD89697CC;
930 u32 x05B77AC0, x05F77AD6, x36C48529, x6391D07C, xBB0747B0;
931 u32 x4C460000, x4EDF9996, x2D4E49EA, xBBFFFFB0, x96B1B65A;
932 u32 x5AFF5AFF, x52B11215, x4201C010, x10B0D205;
933 u32 x00, x01, x10, x11, x20, x21, x30, x31;
935 x55005500 = a1 & ~a5;
936 x5A0F5A0F = a4 ^ x55005500;
939 x22226666 = x3333FFFF & x66666666;
940 x2D2D6969 = a4 ^ x22226666;
941 x25202160 = x2D2D6969 & ~x5A0F5A0F;
944 x33CCCC33 = a3 ^ x00FFFF00;
945 x4803120C = x5A0F5A0F & ~x33CCCC33;
946 x2222FFFF = a6 | x22226666;
947 x6A21EDF3 = x4803120C ^ x2222FFFF;
948 x4A01CC93 = x6A21EDF3 & ~x25202160;
951 x7F75FFFF = x6A21EDF3 | x5555FFFF;
952 x00D20096 = a5 & ~x2D2D6969;
953 x7FA7FF69 = x7F75FFFF ^ x00D20096;
955 x0A0A0000 = a4 & ~x5555FFFF;
956 x0AD80096 = x00D20096 ^ x0A0A0000;
957 x00999900 = x00FFFF00 & ~x66666666;
958 x0AD99996 = x0AD80096 | x00999900;
960 x22332233 = a3 & ~x55005500;
961 x257AA5F0 = x5A0F5A0F ^ x7F75FFFF;
962 x054885C0 = x257AA5F0 & ~x22332233;
963 xFAB77A3F = ~x054885C0;
964 x2221EDF3 = x3333FFFF & x6A21EDF3;
965 xD89697CC = xFAB77A3F ^ x2221EDF3;
966 x20 = x7FA7FF69 & ~a2;
967 x21 = x20 ^ xD89697CC;
970 x05B77AC0 = x00FFFF00 ^ x054885C0;
971 x05F77AD6 = x00D20096 | x05B77AC0;
972 x36C48529 = x3333FFFF ^ x05F77AD6;
973 x6391D07C = a1 ^ x36C48529;
974 xBB0747B0 = xD89697CC ^ x6391D07C;
975 x00 = x25202160 | a2;
976 x01 = x00 ^ xBB0747B0;
979 x4C460000 = x3333FFFF ^ x7F75FFFF;
980 x4EDF9996 = x0AD99996 | x4C460000;
981 x2D4E49EA = x6391D07C ^ x4EDF9996;
982 xBBFFFFB0 = x00FFFF00 | xBB0747B0;
983 x96B1B65A = x2D4E49EA ^ xBBFFFFB0;
984 x10 = x4A01CC93 | a2;
985 x11 = x10 ^ x96B1B65A;
988 x5AFF5AFF = a5 | x5A0F5A0F;
989 x52B11215 = x5AFF5AFF & ~x2D4E49EA;
990 x4201C010 = x4A01CC93 & x6391D07C;
991 x10B0D205 = x52B11215 ^ x4201C010;
992 x30 = x10B0D205 | a2;
993 x31 = x30 ^ x0AD99996;
997 static void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
1000 u32 x55550000, x00AA00FF, x33BB33FF;
1001 u32 x33CC0000, x11441144, x11BB11BB, x003311BB;
1002 u32 x00000F0F, x336600FF, x332200FF, x332200F0;
1003 u32 x0302000F, xAAAAAAAA, xA9A8AAA5, x33CCCC33, x33CCC030, x9A646A95;
1004 u32 x00333303, x118822B8, xA8208805, x3CC3C33C, x94E34B39;
1005 u32 x0331330C, x3FF3F33C, xA9DF596A, xA9DF5F6F, x962CAC53;
1006 u32 xA9466A6A, x3DA52153, x29850143, x33C0330C, x1A45324F;
1007 u32 x0A451047, xBBDFDD7B, xB19ACD3C;
1008 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1010 x33CC33CC = a2 ^ a5;
1012 x55550000 = a1 & ~a6;
1013 x00AA00FF = a5 & ~x55550000;
1014 x33BB33FF = a2 | x00AA00FF;
1016 x33CC0000 = x33CC33CC & ~a6;
1017 x11441144 = a1 & x33CC33CC;
1018 x11BB11BB = a5 ^ x11441144;
1019 x003311BB = x11BB11BB & ~x33CC0000;
1021 x00000F0F = a3 & a6;
1022 x336600FF = x00AA00FF ^ x33CC0000;
1023 x332200FF = x33BB33FF & x336600FF;
1024 x332200F0 = x332200FF & ~x00000F0F;
1026 x0302000F = a3 & x332200FF;
1028 xA9A8AAA5 = x0302000F ^ xAAAAAAAA;
1029 x33CCCC33 = a6 ^ x33CC33CC;
1030 x33CCC030 = x33CCCC33 & ~x00000F0F;
1031 x9A646A95 = xA9A8AAA5 ^ x33CCC030;
1032 x10 = a4 & ~x332200F0;
1033 x11 = x10 ^ x9A646A95;
1036 x00333303 = a2 & ~x33CCC030;
1037 x118822B8 = x11BB11BB ^ x00333303;
1038 xA8208805 = xA9A8AAA5 & ~x118822B8;
1039 x3CC3C33C = a3 ^ x33CCCC33;
1040 x94E34B39 = xA8208805 ^ x3CC3C33C;
1041 x00 = x33BB33FF & ~a4;
1042 x01 = x00 ^ x94E34B39;
1045 x0331330C = x0302000F ^ x00333303;
1046 x3FF3F33C = x3CC3C33C | x0331330C;
1047 xA9DF596A = x33BB33FF ^ x9A646A95;
1048 xA9DF5F6F = x00000F0F | xA9DF596A;
1049 x962CAC53 = x3FF3F33C ^ xA9DF5F6F;
1051 xA9466A6A = x332200FF ^ x9A646A95;
1052 x3DA52153 = x94E34B39 ^ xA9466A6A;
1053 x29850143 = xA9DF5F6F & x3DA52153;
1054 x33C0330C = x33CC33CC & x3FF3F33C;
1055 x1A45324F = x29850143 ^ x33C0330C;
1056 x20 = x1A45324F | a4;
1057 x21 = x20 ^ x962CAC53;
1060 x0A451047 = x1A45324F & ~x118822B8;
1061 xBBDFDD7B = x33CCCC33 | xA9DF596A;
1062 xB19ACD3C = x0A451047 ^ xBBDFDD7B;
1063 x30 = x003311BB | a4;
1064 x31 = x30 ^ xB19ACD3C;
1068 static void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
1070 u32 x44444444, x0F0FF0F0, x4F4FF4F4, x00FFFF00, x00AAAA00, x4FE55EF4;
1071 u32 x3C3CC3C3, x3C3C0000, x7373F4F4, x0C840A00;
1072 u32 x00005EF4, x00FF5EFF, x00555455, x3C699796;
1073 u32 x000FF000, x55AA55AA, x26D9A15E, x2FDFAF5F, x2FD00F5F;
1074 u32 x55AAFFAA, x28410014, x000000FF, x000000CC, x284100D8;
1075 u32 x204100D0, x3C3CC3FF, x1C3CC32F, x4969967A;
1076 u32 x4CC44CC4, x40C040C0, xC3C33C3C, x9669C396, xD6A98356;
1077 u32 xD6E9C3D6, x4CEEEEC4, x9A072D12, x001A000B, x9A1F2D1B;
1078 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1080 x44444444 = a1 & ~a2;
1081 x0F0FF0F0 = a3 ^ a6;
1082 x4F4FF4F4 = x44444444 | x0F0FF0F0;
1083 x00FFFF00 = a4 ^ a6;
1084 x00AAAA00 = x00FFFF00 & ~a1;
1085 x4FE55EF4 = x4F4FF4F4 ^ x00AAAA00;
1087 x3C3CC3C3 = a2 ^ x0F0FF0F0;
1088 x3C3C0000 = x3C3CC3C3 & ~a6;
1089 x7373F4F4 = x4F4FF4F4 ^ x3C3C0000;
1090 x0C840A00 = x4FE55EF4 & ~x7373F4F4;
1092 x00005EF4 = a6 & x4FE55EF4;
1093 x00FF5EFF = a4 | x00005EF4;
1094 x00555455 = a1 & x00FF5EFF;
1095 x3C699796 = x3C3CC3C3 ^ x00555455;
1096 x30 = x4FE55EF4 & ~a5;
1097 x31 = x30 ^ x3C699796;
1100 x000FF000 = x0F0FF0F0 & x00FFFF00;
1101 x55AA55AA = a1 ^ a4;
1102 x26D9A15E = x7373F4F4 ^ x55AA55AA;
1103 x2FDFAF5F = a3 | x26D9A15E;
1104 x2FD00F5F = x2FDFAF5F & ~x000FF000;
1106 x55AAFFAA = x00AAAA00 | x55AA55AA;
1107 x28410014 = x3C699796 & ~x55AAFFAA;
1108 x000000FF = a4 & a6;
1109 x000000CC = x000000FF & ~a2;
1110 x284100D8 = x28410014 ^ x000000CC;
1112 x204100D0 = x7373F4F4 & x284100D8;
1113 x3C3CC3FF = x3C3CC3C3 | x000000FF;
1114 x1C3CC32F = x3C3CC3FF & ~x204100D0;
1115 x4969967A = a1 ^ x1C3CC32F;
1116 x10 = x2FD00F5F & a5;
1117 x11 = x10 ^ x4969967A;
1120 x4CC44CC4 = x4FE55EF4 & ~a2;
1121 x40C040C0 = x4CC44CC4 & ~a3;
1122 xC3C33C3C = ~x3C3CC3C3;
1123 x9669C396 = x55AAFFAA ^ xC3C33C3C;
1124 xD6A98356 = x40C040C0 ^ x9669C396;
1125 x00 = a5 & ~x0C840A00;
1126 x01 = x00 ^ xD6A98356;
1129 xD6E9C3D6 = x40C040C0 | x9669C396;
1130 x4CEEEEC4 = x00AAAA00 | x4CC44CC4;
1131 x9A072D12 = xD6E9C3D6 ^ x4CEEEEC4;
1132 x001A000B = a4 & ~x4FE55EF4;
1133 x9A1F2D1B = x9A072D12 | x001A000B;
1134 x20 = a5 & ~x284100D8;
1135 x21 = x20 ^ x9A1F2D1B;
1139 static void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
1141 u32 x5A5A5A5A, x0F0FF0F0;
1142 u32 x33FF33FF, x33FFCC00, x0C0030F0, x0C0CC0C0, x0CF3C03F, x5EFBDA7F,
1143 x52FBCA0F, x61C8F93C;
1144 u32 x00C0C03C, x0F0F30C0, x3B92A366, x30908326, x3C90B3D6;
1145 u32 x33CC33CC, x0C0CFFFF, x379E5C99, x04124C11, x56E9861E, xA91679E1;
1146 u32 x9586CA37, x8402C833, x84C2C83F, xB35C94A6;
1147 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1149 x5A5A5A5A = a1 ^ a3;
1150 x0F0FF0F0 = a3 ^ a5;
1151 x33FF33FF = a2 | a4;
1152 x33FFCC00 = a5 ^ x33FF33FF;
1153 x0C0030F0 = x0F0FF0F0 & ~x33FFCC00;
1154 x0C0CC0C0 = x0F0FF0F0 & ~a2;
1155 x0CF3C03F = a4 ^ x0C0CC0C0;
1156 x5EFBDA7F = x5A5A5A5A | x0CF3C03F;
1157 x52FBCA0F = x5EFBDA7F & ~x0C0030F0;
1158 x61C8F93C = a2 ^ x52FBCA0F;
1160 x00C0C03C = x0CF3C03F & x61C8F93C;
1161 x0F0F30C0 = x0F0FF0F0 & ~x00C0C03C;
1162 x3B92A366 = x5A5A5A5A ^ x61C8F93C;
1163 x30908326 = x3B92A366 & ~x0F0F30C0;
1164 x3C90B3D6 = x0C0030F0 ^ x30908326;
1166 x33CC33CC = a2 ^ a4;
1167 x0C0CFFFF = a5 | x0C0CC0C0;
1168 x379E5C99 = x3B92A366 ^ x0C0CFFFF;
1169 x04124C11 = x379E5C99 & ~x33CC33CC;
1170 x56E9861E = x52FBCA0F ^ x04124C11;
1171 x00 = a6 & ~x3C90B3D6;
1172 x01 = x00 ^ x56E9861E;
1175 xA91679E1 = ~x56E9861E;
1176 x10 = x3C90B3D6 & ~a6;
1177 x11 = x10 ^ xA91679E1;
1180 x9586CA37 = x3C90B3D6 ^ xA91679E1;
1181 x8402C833 = x9586CA37 & ~x33CC33CC;
1182 x84C2C83F = x00C0C03C | x8402C833;
1183 xB35C94A6 = x379E5C99 ^ x84C2C83F;
1184 x20 = x61C8F93C | a6;
1185 x21 = x20 ^ xB35C94A6;
1188 x30 = a6 & x61C8F93C;
1189 x31 = x30 ^ xB35C94A6;
1193 static void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
1195 u32 x77777777, x77770000, x22225555, x11116666, x1F1F6F6F;
1196 u32 x70700000, x43433333, x00430033, x55557777, x55167744, x5A19784B;
1197 u32 x5A1987B4, x7A3BD7F5, x003B00F5, x221955A0, x05050707, x271C52A7;
1198 u32 x2A2A82A0, x6969B193, x1FE06F90, x16804E00, xE97FB1FF;
1199 u32 x43403302, x35CAED30, x37DEFFB7, x349ECCB5, x0B01234A;
1200 u32 x101884B4, x0FF8EB24, x41413333, x4FF9FB37, x4FC2FBC2;
1201 u32 x22222222, x16BCEE97, x0F080B04, x19B4E593;
1202 u32 x5C5C5C5C, x4448184C, x2DDABE71, x6992A63D;
1203 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1205 x77777777 = a1 | a3;
1206 x77770000 = x77777777 & ~a6;
1207 x22225555 = a1 ^ x77770000;
1208 x11116666 = a3 ^ x22225555;
1209 x1F1F6F6F = a4 | x11116666;
1211 x70700000 = x77770000 & ~a4;
1212 x43433333 = a3 ^ x70700000;
1213 x00430033 = a5 & x43433333;
1214 x55557777 = a1 | x11116666;
1215 x55167744 = x00430033 ^ x55557777;
1216 x5A19784B = a4 ^ x55167744;
1218 x5A1987B4 = a6 ^ x5A19784B;
1219 x7A3BD7F5 = x22225555 | x5A1987B4;
1220 x003B00F5 = a5 & x7A3BD7F5;
1221 x221955A0 = x22225555 ^ x003B00F5;
1222 x05050707 = a4 & x55557777;
1223 x271C52A7 = x221955A0 ^ x05050707;
1225 x2A2A82A0 = x7A3BD7F5 & ~a1;
1226 x6969B193 = x43433333 ^ x2A2A82A0;
1227 x1FE06F90 = a5 ^ x1F1F6F6F;
1228 x16804E00 = x1FE06F90 & ~x6969B193;
1229 xE97FB1FF = ~x16804E00;
1230 x20 = xE97FB1FF & ~a2;
1231 x21 = x20 ^ x5A19784B;
1234 x43403302 = x43433333 & ~x003B00F5;
1235 x35CAED30 = x2A2A82A0 ^ x1FE06F90;
1236 x37DEFFB7 = x271C52A7 | x35CAED30;
1237 x349ECCB5 = x37DEFFB7 & ~x43403302;
1238 x0B01234A = x1F1F6F6F & ~x349ECCB5;
1240 x101884B4 = x5A1987B4 & x349ECCB5;
1241 x0FF8EB24 = x1FE06F90 ^ x101884B4;
1242 x41413333 = x43433333 & x55557777;
1243 x4FF9FB37 = x0FF8EB24 | x41413333;
1244 x4FC2FBC2 = x003B00F5 ^ x4FF9FB37;
1245 x30 = x4FC2FBC2 & a2;
1246 x31 = x30 ^ x271C52A7;
1249 x22222222 = a1 ^ x77777777;
1250 x16BCEE97 = x349ECCB5 ^ x22222222;
1251 x0F080B04 = a4 & x0FF8EB24;
1252 x19B4E593 = x16BCEE97 ^ x0F080B04;
1253 x00 = x0B01234A | a2;
1254 x01 = x00 ^ x19B4E593;
1257 x5C5C5C5C = x1F1F6F6F ^ x43433333;
1258 x4448184C = x5C5C5C5C & ~x19B4E593;
1259 x2DDABE71 = x22225555 ^ x0FF8EB24;
1260 x6992A63D = x4448184C ^ x2DDABE71;
1261 x10 = x1F1F6F6F & a2;
1262 x11 = x10 ^ x6992A63D;
1266 static void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
1269 u32 x3333FFFF, x11115555, x22DD6699, x22DD9966, x00220099;
1270 u32 x00551144, x33662277, x5A5A5A5A, x7B7E7A7F, x59A31CE6;
1271 u32 x09030C06, x09030000, x336622FF, x3A6522FF;
1272 u32 x484D494C, x0000B6B3, x0F0FB9BC, x00FC00F9, x0FFFB9FD;
1273 u32 x5DF75DF7, x116600F7, x1E69B94B, x1668B94B;
1274 u32 x7B7B7B7B, x411E5984, x1FFFFDFD, x5EE1A479;
1275 u32 x3CB4DFD2, x004B002D, xB7B2B6B3, xCCC9CDC8, xCC82CDE5;
1276 u32 x0055EEBB, x5A5AECE9, x0050ECA9, xC5CAC1CE, xC59A2D67;
1277 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1279 x33CC33CC = a2 ^ a5;
1281 x3333FFFF = a2 | a6;
1282 x11115555 = a1 & x3333FFFF;
1283 x22DD6699 = x33CC33CC ^ x11115555;
1284 x22DD9966 = a6 ^ x22DD6699;
1285 x00220099 = a5 & ~x22DD9966;
1287 x00551144 = a1 & x22DD9966;
1288 x33662277 = a2 ^ x00551144;
1289 x5A5A5A5A = a1 ^ a3;
1290 x7B7E7A7F = x33662277 | x5A5A5A5A;
1291 x59A31CE6 = x22DD6699 ^ x7B7E7A7F;
1293 x09030C06 = a3 & x59A31CE6;
1294 x09030000 = x09030C06 & ~a6;
1295 x336622FF = x00220099 | x33662277;
1296 x3A6522FF = x09030000 ^ x336622FF;
1297 x30 = x3A6522FF & a4;
1298 x31 = x30 ^ x59A31CE6;
1301 x484D494C = a2 ^ x7B7E7A7F;
1302 x0000B6B3 = a6 & ~x484D494C;
1303 x0F0FB9BC = a3 ^ x0000B6B3;
1304 x00FC00F9 = a5 & ~x09030C06;
1305 x0FFFB9FD = x0F0FB9BC | x00FC00F9;
1307 x5DF75DF7 = a1 | x59A31CE6;
1308 x116600F7 = x336622FF & x5DF75DF7;
1309 x1E69B94B = x0F0FB9BC ^ x116600F7;
1310 x1668B94B = x1E69B94B & ~x09030000;
1311 x20 = x00220099 | a4;
1312 x21 = x20 ^ x1668B94B;
1315 x7B7B7B7B = a2 | x5A5A5A5A;
1316 x411E5984 = x3A6522FF ^ x7B7B7B7B;
1317 x1FFFFDFD = x11115555 | x0FFFB9FD;
1318 x5EE1A479 = x411E5984 ^ x1FFFFDFD;
1320 x3CB4DFD2 = x22DD6699 ^ x1E69B94B;
1321 x004B002D = a5 & ~x3CB4DFD2;
1322 xB7B2B6B3 = ~x484D494C;
1323 xCCC9CDC8 = x7B7B7B7B ^ xB7B2B6B3;
1324 xCC82CDE5 = x004B002D ^ xCCC9CDC8;
1325 x10 = xCC82CDE5 & ~a4;
1326 x11 = x10 ^ x5EE1A479;
1329 x0055EEBB = a6 ^ x00551144;
1330 x5A5AECE9 = a1 ^ x0F0FB9BC;
1331 x0050ECA9 = x0055EEBB & x5A5AECE9;
1332 xC5CAC1CE = x09030C06 ^ xCCC9CDC8;
1333 xC59A2D67 = x0050ECA9 ^ xC5CAC1CE;
1334 x00 = x0FFFB9FD & ~a4;
1335 x01 = x00 ^ xC59A2D67;
1339 static void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
1341 u32 x0FF00FF0, x3CC33CC3, x00003CC3, x0F000F00, x5A555A55, x00001841;
1342 u32 x00000F00, x33333C33, x7B777E77, x0FF0F00F, x74878E78;
1343 u32 x003C003C, x5A7D5A7D, x333300F0, x694E5A8D;
1344 u32 x0FF0CCCC, x000F0303, x5A505854, x33CC000F, x699C585B;
1345 u32 x7F878F78, x21101013, x7F979F7B, x30030CC0, x4F9493BB;
1346 u32 x6F9CDBFB, x0000DBFB, x00005151, x26DAC936, x26DA9867;
1347 u32 x27DA9877, x27DA438C, x2625C9C9, x27FFCBCD;
1348 u32 x27FF1036, x27FF103E, xB06B6C44, x97947C7A;
1349 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1351 x0FF00FF0 = a4 ^ a5;
1352 x3CC33CC3 = a3 ^ x0FF00FF0;
1353 x00003CC3 = a6 & x3CC33CC3;
1354 x0F000F00 = a4 & x0FF00FF0;
1355 x5A555A55 = a2 ^ x0F000F00;
1356 x00001841 = x00003CC3 & x5A555A55;
1358 x00000F00 = a6 & x0F000F00;
1359 x33333C33 = a3 ^ x00000F00;
1360 x7B777E77 = x5A555A55 | x33333C33;
1361 x0FF0F00F = a6 ^ x0FF00FF0;
1362 x74878E78 = x7B777E77 ^ x0FF0F00F;
1363 x30 = a1 & ~x00001841;
1364 x31 = x30 ^ x74878E78;
1367 x003C003C = a5 & ~x3CC33CC3;
1368 x5A7D5A7D = x5A555A55 | x003C003C;
1369 x333300F0 = x00003CC3 ^ x33333C33;
1370 x694E5A8D = x5A7D5A7D ^ x333300F0;
1372 x0FF0CCCC = x00003CC3 ^ x0FF0F00F;
1373 x000F0303 = a4 & ~x0FF0CCCC;
1374 x5A505854 = x5A555A55 & ~x000F0303;
1375 x33CC000F = a5 ^ x333300F0;
1376 x699C585B = x5A505854 ^ x33CC000F;
1378 x7F878F78 = x0F000F00 | x74878E78;
1379 x21101013 = a3 & x699C585B;
1380 x7F979F7B = x7F878F78 | x21101013;
1381 x30030CC0 = x3CC33CC3 & ~x0FF0F00F;
1382 x4F9493BB = x7F979F7B ^ x30030CC0;
1383 x00 = x4F9493BB & ~a1;
1384 x01 = x00 ^ x694E5A8D;
1387 x6F9CDBFB = x699C585B | x4F9493BB;
1388 x0000DBFB = a6 & x6F9CDBFB;
1389 x00005151 = a2 & x0000DBFB;
1390 x26DAC936 = x694E5A8D ^ x4F9493BB;
1391 x26DA9867 = x00005151 ^ x26DAC936;
1393 x27DA9877 = x21101013 | x26DA9867;
1394 x27DA438C = x0000DBFB ^ x27DA9877;
1395 x2625C9C9 = a5 ^ x26DAC936;
1396 x27FFCBCD = x27DA438C | x2625C9C9;
1397 x20 = x27FFCBCD & a1;
1398 x21 = x20 ^ x699C585B;
1401 x27FF1036 = x0000DBFB ^ x27FFCBCD;
1402 x27FF103E = x003C003C | x27FF1036;
1403 xB06B6C44 = ~x4F9493BB;
1404 x97947C7A = x27FF103E ^ xB06B6C44;
1405 x10 = x97947C7A & ~a1;
1406 x11 = x10 ^ x26DA9867;
1410 static void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
1412 u32 x0C0C0C0C, x0000F0F0, x00FFF00F, x00555005, x00515001;
1413 u32 x33000330, x77555775, x30303030, x3030CFCF, x30104745, x30555745;
1414 u32 xFF000FF0, xCF1048B5, x080A080A, xC71A40BF, xCB164CB3;
1415 u32 x9E4319E6, x000019E6, xF429738C, xF4296A6A, xC729695A;
1416 u32 xC47C3D2F, xF77F3F3F, x9E43E619, x693CD926;
1417 u32 xF719A695, xF4FF73FF, x03E6D56A, x56B3803F;
1418 u32 xF700A600, x61008000, x03B7856B, x62B7056B;
1419 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1421 x0C0C0C0C = a3 & ~a2;
1422 x0000F0F0 = a5 & ~a3;
1423 x00FFF00F = a4 ^ x0000F0F0;
1424 x00555005 = a1 & x00FFF00F;
1425 x00515001 = x00555005 & ~x0C0C0C0C;
1427 x33000330 = a2 & ~x00FFF00F;
1428 x77555775 = a1 | x33000330;
1429 x30303030 = a2 & ~a3;
1430 x3030CFCF = a5 ^ x30303030;
1431 x30104745 = x77555775 & x3030CFCF;
1432 x30555745 = x00555005 | x30104745;
1434 xFF000FF0 = ~x00FFF00F;
1435 xCF1048B5 = x30104745 ^ xFF000FF0;
1436 x080A080A = a3 & ~x77555775;
1437 xC71A40BF = xCF1048B5 ^ x080A080A;
1438 xCB164CB3 = x0C0C0C0C ^ xC71A40BF;
1439 x10 = x00515001 | a6;
1440 x11 = x10 ^ xCB164CB3;
1443 x9E4319E6 = a1 ^ xCB164CB3;
1444 x000019E6 = a5 & x9E4319E6;
1445 xF429738C = a2 ^ xC71A40BF;
1446 xF4296A6A = x000019E6 ^ xF429738C;
1447 xC729695A = x33000330 ^ xF4296A6A;
1449 xC47C3D2F = x30555745 ^ xF4296A6A;
1450 xF77F3F3F = a2 | xC47C3D2F;
1451 x9E43E619 = a5 ^ x9E4319E6;
1452 x693CD926 = xF77F3F3F ^ x9E43E619;
1453 x20 = x30555745 & a6;
1454 x21 = x20 ^ x693CD926;
1457 xF719A695 = x3030CFCF ^ xC729695A;
1458 xF4FF73FF = a4 | xF429738C;
1459 x03E6D56A = xF719A695 ^ xF4FF73FF;
1460 x56B3803F = a1 ^ x03E6D56A;
1461 x30 = x56B3803F & a6;
1462 x31 = x30 ^ xC729695A;
1465 xF700A600 = xF719A695 & ~a4;
1466 x61008000 = x693CD926 & xF700A600;
1467 x03B7856B = x00515001 ^ x03E6D56A;
1468 x62B7056B = x61008000 ^ x03B7856B;
1469 x00 = x62B7056B | a6;
1470 x01 = x00 ^ xC729695A;
1476 #define SWAP(a, b) { u32 tmp=*a;*a=*b;*b=tmp; }
1512 #define KEYSET00 { k00 = K08; k01 = K44; k02 = K29; k03 = K52; k04 = K42; k05 = K14; k06 = K28; k07 = K49; k08 = K01; k09 = K07; k10 = K16; k11 = K36; k12 = K02; k13 = K30; k14 = K22; k15 = K21; k16 = K38; k17 = K50; k18 = K51; k19 = K00; k20 = K31; k21 = K23; k22 = K15; k23 = K35; k24 = K19; k25 = K24; k26 = K34; k27 = K47; k28 = K32; k29 = K03; k30 = K41; k31 = K26; k32 = K04; k33 = K46; k34 = K20; k35 = K25; k36 = K53; k37 = K18; k38 = K33; k39 = K55; k40 = K13; k41 = K17; k42 = K39; k43 = K12; k44 = K11; k45 = K54; k46 = K48; k47 = K27; }
1513 #define KEYSET10 { k00 = K49; k01 = K28; k02 = K45; k03 = K36; k04 = K01; k05 = K30; k06 = K44; k07 = K08; k08 = K42; k09 = K23; k10 = K00; k11 = K52; k12 = K43; k13 = K14; k14 = K38; k15 = K37; k16 = K22; k17 = K09; k18 = K35; k19 = K16; k20 = K15; k21 = K07; k22 = K31; k23 = K51; k24 = K03; k25 = K40; k26 = K46; k27 = K04; k28 = K20; k29 = K19; k30 = K53; k31 = K10; k32 = K47; k33 = K34; k34 = K32; k35 = K13; k36 = K41; k37 = K06; k38 = K17; k39 = K12; k40 = K25; k41 = K33; k42 = K27; k43 = K55; k44 = K54; k45 = K11; k46 = K05; k47 = K39; }
1514 #define KEYSET01 { k00 = K01; k01 = K37; k02 = K22; k03 = K45; k04 = K35; k05 = K07; k06 = K21; k07 = K42; k08 = K51; k09 = K00; k10 = K09; k11 = K29; k12 = K52; k13 = K23; k14 = K15; k15 = K14; k16 = K31; k17 = K43; k18 = K44; k19 = K50; k20 = K49; k21 = K16; k22 = K08; k23 = K28; k24 = K12; k25 = K17; k26 = K27; k27 = K40; k28 = K25; k29 = K55; k30 = K34; k31 = K19; k32 = K24; k33 = K39; k34 = K13; k35 = K18; k36 = K46; k37 = K11; k38 = K26; k39 = K48; k40 = K06; k41 = K10; k42 = K32; k43 = K05; k44 = K04; k45 = K47; k46 = K41; k47 = K20; }
1515 #define KEYSET11 { k00 = K35; k01 = K14; k02 = K31; k03 = K22; k04 = K44; k05 = K16; k06 = K30; k07 = K51; k08 = K28; k09 = K09; k10 = K43; k11 = K38; k12 = K29; k13 = K00; k14 = K49; k15 = K23; k16 = K08; k17 = K52; k18 = K21; k19 = K02; k20 = K01; k21 = K50; k22 = K42; k23 = K37; k24 = K48; k25 = K26; k26 = K32; k27 = K17; k28 = K06; k29 = K05; k30 = K39; k31 = K55; k32 = K33; k33 = K20; k34 = K18; k35 = K54; k36 = K27; k37 = K47; k38 = K03; k39 = K53; k40 = K11; k41 = K19; k42 = K13; k43 = K41; k44 = K40; k45 = K24; k46 = K46; k47 = K25; }
1516 #define KEYSET02 { k00 = K44; k01 = K23; k02 = K08; k03 = K31; k04 = K21; k05 = K50; k06 = K07; k07 = K28; k08 = K37; k09 = K43; k10 = K52; k11 = K15; k12 = K38; k13 = K09; k14 = K01; k15 = K00; k16 = K42; k17 = K29; k18 = K30; k19 = K36; k20 = K35; k21 = K02; k22 = K51; k23 = K14; k24 = K53; k25 = K03; k26 = K13; k27 = K26; k28 = K11; k29 = K41; k30 = K20; k31 = K05; k32 = K10; k33 = K25; k34 = K54; k35 = K04; k36 = K32; k37 = K24; k38 = K12; k39 = K34; k40 = K47; k41 = K55; k42 = K18; k43 = K46; k44 = K17; k45 = K33; k46 = K27; k47 = K06; }
1517 #define KEYSET12 { k00 = K21; k01 = K00; k02 = K42; k03 = K08; k04 = K30; k05 = K02; k06 = K16; k07 = K37; k08 = K14; k09 = K52; k10 = K29; k11 = K49; k12 = K15; k13 = K43; k14 = K35; k15 = K09; k16 = K51; k17 = K38; k18 = K07; k19 = K45; k20 = K44; k21 = K36; k22 = K28; k23 = K23; k24 = K34; k25 = K12; k26 = K18; k27 = K03; k28 = K47; k29 = K46; k30 = K25; k31 = K41; k32 = K19; k33 = K06; k34 = K04; k35 = K40; k36 = K13; k37 = K33; k38 = K48; k39 = K39; k40 = K24; k41 = K05; k42 = K54; k43 = K27; k44 = K26; k45 = K10; k46 = K32; k47 = K11; }
1518 #define KEYSET03 { k00 = K30; k01 = K09; k02 = K51; k03 = K42; k04 = K07; k05 = K36; k06 = K50; k07 = K14; k08 = K23; k09 = K29; k10 = K38; k11 = K01; k12 = K49; k13 = K52; k14 = K44; k15 = K43; k16 = K28; k17 = K15; k18 = K16; k19 = K22; k20 = K21; k21 = K45; k22 = K37; k23 = K00; k24 = K39; k25 = K48; k26 = K54; k27 = K12; k28 = K24; k29 = K27; k30 = K06; k31 = K46; k32 = K55; k33 = K11; k34 = K40; k35 = K17; k36 = K18; k37 = K10; k38 = K53; k39 = K20; k40 = K33; k41 = K41; k42 = K04; k43 = K32; k44 = K03; k45 = K19; k46 = K13; k47 = K47; }
1519 #define KEYSET13 { k00 = K07; k01 = K43; k02 = K28; k03 = K51; k04 = K16; k05 = K45; k06 = K02; k07 = K23; k08 = K00; k09 = K38; k10 = K15; k11 = K35; k12 = K01; k13 = K29; k14 = K21; k15 = K52; k16 = K37; k17 = K49; k18 = K50; k19 = K31; k20 = K30; k21 = K22; k22 = K14; k23 = K09; k24 = K20; k25 = K53; k26 = K04; k27 = K48; k28 = K33; k29 = K32; k30 = K11; k31 = K27; k32 = K05; k33 = K47; k34 = K17; k35 = K26; k36 = K54; k37 = K19; k38 = K34; k39 = K25; k40 = K10; k41 = K46; k42 = K40; k43 = K13; k44 = K12; k45 = K55; k46 = K18; k47 = K24; }
1520 #define KEYSET04 { k00 = K16; k01 = K52; k02 = K37; k03 = K28; k04 = K50; k05 = K22; k06 = K36; k07 = K00; k08 = K09; k09 = K15; k10 = K49; k11 = K44; k12 = K35; k13 = K38; k14 = K30; k15 = K29; k16 = K14; k17 = K01; k18 = K02; k19 = K08; k20 = K07; k21 = K31; k22 = K23; k23 = K43; k24 = K25; k25 = K34; k26 = K40; k27 = K53; k28 = K10; k29 = K13; k30 = K47; k31 = K32; k32 = K41; k33 = K24; k34 = K26; k35 = K03; k36 = K04; k37 = K55; k38 = K39; k39 = K06; k40 = K19; k41 = K27; k42 = K17; k43 = K18; k44 = K48; k45 = K05; k46 = K54; k47 = K33; }
1521 #define KEYSET14 { k00 = K50; k01 = K29; k02 = K14; k03 = K37; k04 = K02; k05 = K31; k06 = K45; k07 = K09; k08 = K43; k09 = K49; k10 = K01; k11 = K21; k12 = K44; k13 = K15; k14 = K07; k15 = K38; k16 = K23; k17 = K35; k18 = K36; k19 = K42; k20 = K16; k21 = K08; k22 = K00; k23 = K52; k24 = K06; k25 = K39; k26 = K17; k27 = K34; k28 = K19; k29 = K18; k30 = K24; k31 = K13; k32 = K46; k33 = K33; k34 = K03; k35 = K12; k36 = K40; k37 = K05; k38 = K20; k39 = K11; k40 = K55; k41 = K32; k42 = K26; k43 = K54; k44 = K53; k45 = K41; k46 = K04; k47 = K10; }
1522 #define KEYSET05 { k00 = K02; k01 = K38; k02 = K23; k03 = K14; k04 = K36; k05 = K08; k06 = K22; k07 = K43; k08 = K52; k09 = K01; k10 = K35; k11 = K30; k12 = K21; k13 = K49; k14 = K16; k15 = K15; k16 = K00; k17 = K44; k18 = K45; k19 = K51; k20 = K50; k21 = K42; k22 = K09; k23 = K29; k24 = K11; k25 = K20; k26 = K26; k27 = K39; k28 = K55; k29 = K54; k30 = K33; k31 = K18; k32 = K27; k33 = K10; k34 = K12; k35 = K48; k36 = K17; k37 = K41; k38 = K25; k39 = K47; k40 = K05; k41 = K13; k42 = K03; k43 = K04; k44 = K34; k45 = K46; k46 = K40; k47 = K19; }
1523 #define KEYSET15 { k00 = K36; k01 = K15; k02 = K00; k03 = K23; k04 = K45; k05 = K42; k06 = K31; k07 = K52; k08 = K29; k09 = K35; k10 = K44; k11 = K07; k12 = K30; k13 = K01; k14 = K50; k15 = K49; k16 = K09; k17 = K21; k18 = K22; k19 = K28; k20 = K02; k21 = K51; k22 = K43; k23 = K38; k24 = K47; k25 = K25; k26 = K03; k27 = K20; k28 = K05; k29 = K04; k30 = K10; k31 = K54; k32 = K32; k33 = K19; k34 = K48; k35 = K53; k36 = K26; k37 = K46; k38 = K06; k39 = K24; k40 = K41; k41 = K18; k42 = K12; k43 = K40; k44 = K39; k45 = K27; k46 = K17; k47 = K55; }
1524 #define KEYSET06 { k00 = K45; k01 = K49; k02 = K09; k03 = K00; k04 = K22; k05 = K51; k06 = K08; k07 = K29; k08 = K38; k09 = K44; k10 = K21; k11 = K16; k12 = K07; k13 = K35; k14 = K02; k15 = K01; k16 = K43; k17 = K30; k18 = K31; k19 = K37; k20 = K36; k21 = K28; k22 = K52; k23 = K15; k24 = K24; k25 = K06; k26 = K12; k27 = K25; k28 = K41; k29 = K40; k30 = K19; k31 = K04; k32 = K13; k33 = K55; k34 = K53; k35 = K34; k36 = K03; k37 = K27; k38 = K11; k39 = K33; k40 = K46; k41 = K54; k42 = K48; k43 = K17; k44 = K20; k45 = K32; k46 = K26; k47 = K05; }
1525 #define KEYSET16 { k00 = K22; k01 = K01; k02 = K43; k03 = K09; k04 = K31; k05 = K28; k06 = K42; k07 = K38; k08 = K15; k09 = K21; k10 = K30; k11 = K50; k12 = K16; k13 = K44; k14 = K36; k15 = K35; k16 = K52; k17 = K07; k18 = K08; k19 = K14; k20 = K45; k21 = K37; k22 = K29; k23 = K49; k24 = K33; k25 = K11; k26 = K48; k27 = K06; k28 = K46; k29 = K17; k30 = K55; k31 = K40; k32 = K18; k33 = K05; k34 = K34; k35 = K39; k36 = K12; k37 = K32; k38 = K47; k39 = K10; k40 = K27; k41 = K04; k42 = K53; k43 = K26; k44 = K25; k45 = K13; k46 = K03; k47 = K41; }
1526 #define KEYSET07 { k00 = K31; k01 = K35; k02 = K52; k03 = K43; k04 = K08; k05 = K37; k06 = K51; k07 = K15; k08 = K49; k09 = K30; k10 = K07; k11 = K02; k12 = K50; k13 = K21; k14 = K45; k15 = K44; k16 = K29; k17 = K16; k18 = K42; k19 = K23; k20 = K22; k21 = K14; k22 = K38; k23 = K01; k24 = K10; k25 = K47; k26 = K53; k27 = K11; k28 = K27; k29 = K26; k30 = K05; k31 = K17; k32 = K54; k33 = K41; k34 = K39; k35 = K20; k36 = K48; k37 = K13; k38 = K24; k39 = K19; k40 = K32; k41 = K40; k42 = K34; k43 = K03; k44 = K06; k45 = K18; k46 = K12; k47 = K46; }
1527 #define KEYSET17 { k00 = K15; k01 = K51; k02 = K36; k03 = K02; k04 = K49; k05 = K21; k06 = K35; k07 = K31; k08 = K08; k09 = K14; k10 = K23; k11 = K43; k12 = K09; k13 = K37; k14 = K29; k15 = K28; k16 = K45; k17 = K00; k18 = K01; k19 = K07; k20 = K38; k21 = K30; k22 = K22; k23 = K42; k24 = K26; k25 = K04; k26 = K41; k27 = K54; k28 = K39; k29 = K10; k30 = K48; k31 = K33; k32 = K11; k33 = K53; k34 = K27; k35 = K32; k36 = K05; k37 = K25; k38 = K40; k39 = K03; k40 = K20; k41 = K24; k42 = K46; k43 = K19; k44 = K18; k45 = K06; k46 = K55; k47 = K34; }
1544 #ifdef DESCRYPT_SALT
1546 static void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 *D00, u32 *D01, u32 *D02, u32 *D03, u32 *D04, u32 *D05, u32 *D06, u32 *D07, u32 *D08, u32 *D09, u32 *D10, u32 *D11, u32 *D12, u32 *D13, u32 *D14, u32 *D15, u32 *D16, u32 *D17, u32 *D18, u32 *D19, u32 *D20, u32 *D21, u32 *D22, u32 *D23, u32 *D24, u32 *D25, u32 *D26, u32 *D27, u32 *D28, u32 *D29, u32 *D30, u32 *D31, u32 *D32, u32 *D33, u32 *D34, u32 *D35, u32 *D36, u32 *D37, u32 *D38, u32 *D39, u32 *D40, u32 *D41, u32 *D42, u32 *D43, u32 *D44, u32 *D45, u32 *D46, u32 *D47, u32 *D48, u32 *D49, u32 *D50, u32 *D51, u32 *D52, u32 *D53, u32 *D54, u32 *D55, u32 *D56, u32 *D57, u32 *D58, u32 *D59, u32 *D60, u32 *D61, u32 *D62, u32 *D63)
1548 sXXX_DECL u32 s001 = (0x001 & DESCRYPT_SALT) ? 0xffffffff : 0;
1549 sXXX_DECL u32 s002 = (0x002 & DESCRYPT_SALT) ? 0xffffffff : 0;
1550 sXXX_DECL u32 s004 = (0x004 & DESCRYPT_SALT) ? 0xffffffff : 0;
1551 sXXX_DECL u32 s008 = (0x008 & DESCRYPT_SALT) ? 0xffffffff : 0;
1552 sXXX_DECL u32 s010 = (0x010 & DESCRYPT_SALT) ? 0xffffffff : 0;
1553 sXXX_DECL u32 s020 = (0x020 & DESCRYPT_SALT) ? 0xffffffff : 0;
1554 sXXX_DECL u32 s040 = (0x040 & DESCRYPT_SALT) ? 0xffffffff : 0;
1555 sXXX_DECL u32 s080 = (0x080 & DESCRYPT_SALT) ? 0xffffffff : 0;
1556 sXXX_DECL u32 s100 = (0x100 & DESCRYPT_SALT) ? 0xffffffff : 0;
1557 sXXX_DECL u32 s200 = (0x200 & DESCRYPT_SALT) ? 0xffffffff : 0;
1558 sXXX_DECL u32 s400 = (0x400 & DESCRYPT_SALT) ? 0xffffffff : 0;
1559 sXXX_DECL u32 s800 = (0x800 & DESCRYPT_SALT) ? 0xffffffff : 0;
1561 KXX_DECL u32 k00, k01, k02, k03, k04, k05;
1562 KXX_DECL u32 k06, k07, k08, k09, k10, k11;
1563 KXX_DECL u32 k12, k13, k14, k15, k16, k17;
1564 KXX_DECL u32 k18, k19, k20, k21, k22, k23;
1565 KXX_DECL u32 k24, k25, k26, k27, k28, k29;
1566 KXX_DECL u32 k30, k31, k32, k33, k34, k35;
1567 KXX_DECL u32 k36, k37, k38, k39, k40, k41;
1568 KXX_DECL u32 k42, k43, k44, k45, k46, k47;
1570 for (u32 ii = 0; ii < 25; ii++)
1573 #if CUDA_ARCH >= 500
1583 for (u32 i = 0; i < 2; i++)
1585 if (i) KEYSET10 else KEYSET00
1587 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1588 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1589 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1590 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1591 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1592 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1593 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1594 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1596 if (i) KEYSET11 else KEYSET01
1598 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1599 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1600 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1601 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1602 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1603 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1604 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1605 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1607 if (i) KEYSET12 else KEYSET02
1609 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1610 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1611 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1612 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1613 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1614 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1615 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1616 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1618 if (i) KEYSET13 else KEYSET03
1620 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1621 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1622 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1623 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1624 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1625 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1626 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1627 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1629 if (i) KEYSET14 else KEYSET04
1631 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1632 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1633 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1634 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1635 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1636 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1637 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1638 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1640 if (i) KEYSET15 else KEYSET05
1642 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1643 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1644 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1645 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1646 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1647 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1648 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1649 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1651 if (i) KEYSET16 else KEYSET06
1653 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1654 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1655 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1656 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1657 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1658 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1659 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1660 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1662 if (i) KEYSET17 else KEYSET07
1664 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1665 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1666 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1667 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1668 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1669 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1670 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1671 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1682 static void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 *D00, u32 *D01, u32 *D02, u32 *D03, u32 *D04, u32 *D05, u32 *D06, u32 *D07, u32 *D08, u32 *D09, u32 *D10, u32 *D11, u32 *D12, u32 *D13, u32 *D14, u32 *D15, u32 *D16, u32 *D17, u32 *D18, u32 *D19, u32 *D20, u32 *D21, u32 *D22, u32 *D23, u32 *D24, u32 *D25, u32 *D26, u32 *D27, u32 *D28, u32 *D29, u32 *D30, u32 *D31, u32 *D32, u32 *D33, u32 *D34, u32 *D35, u32 *D36, u32 *D37, u32 *D38, u32 *D39, u32 *D40, u32 *D41, u32 *D42, u32 *D43, u32 *D44, u32 *D45, u32 *D46, u32 *D47, u32 *D48, u32 *D49, u32 *D50, u32 *D51, u32 *D52, u32 *D53, u32 *D54, u32 *D55, u32 *D56, u32 *D57, u32 *D58, u32 *D59, u32 *D60, u32 *D61, u32 *D62, u32 *D63)
1684 sXXX_DECL u32 s001 = (0x001 & SALT) ? 0xffffffff : 0;
1685 sXXX_DECL u32 s002 = (0x002 & SALT) ? 0xffffffff : 0;
1686 sXXX_DECL u32 s004 = (0x004 & SALT) ? 0xffffffff : 0;
1687 sXXX_DECL u32 s008 = (0x008 & SALT) ? 0xffffffff : 0;
1688 sXXX_DECL u32 s010 = (0x010 & SALT) ? 0xffffffff : 0;
1689 sXXX_DECL u32 s020 = (0x020 & SALT) ? 0xffffffff : 0;
1690 sXXX_DECL u32 s040 = (0x040 & SALT) ? 0xffffffff : 0;
1691 sXXX_DECL u32 s080 = (0x080 & SALT) ? 0xffffffff : 0;
1692 sXXX_DECL u32 s100 = (0x100 & SALT) ? 0xffffffff : 0;
1693 sXXX_DECL u32 s200 = (0x200 & SALT) ? 0xffffffff : 0;
1694 sXXX_DECL u32 s400 = (0x400 & SALT) ? 0xffffffff : 0;
1695 sXXX_DECL u32 s800 = (0x800 & SALT) ? 0xffffffff : 0;
1697 KXX_DECL u32 k00, k01, k02, k03, k04, k05;
1698 KXX_DECL u32 k06, k07, k08, k09, k10, k11;
1699 KXX_DECL u32 k12, k13, k14, k15, k16, k17;
1700 KXX_DECL u32 k18, k19, k20, k21, k22, k23;
1701 KXX_DECL u32 k24, k25, k26, k27, k28, k29;
1702 KXX_DECL u32 k30, k31, k32, k33, k34, k35;
1703 KXX_DECL u32 k36, k37, k38, k39, k40, k41;
1704 KXX_DECL u32 k42, k43, k44, k45, k46, k47;
1706 for (u32 ii = 0; ii < 25; ii++)
1709 #if CUDA_ARCH >= 500
1719 for (u32 i = 0; i < 2; i++)
1721 if (i) KEYSET10 else KEYSET00
1723 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1724 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1725 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1726 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1727 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1728 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1729 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1730 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1732 if (i) KEYSET11 else KEYSET01
1734 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1735 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1736 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1737 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1738 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1739 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1740 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1741 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1743 if (i) KEYSET12 else KEYSET02
1745 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1746 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1747 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1748 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1749 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1750 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1751 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1752 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1754 if (i) KEYSET13 else KEYSET03
1756 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1757 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1758 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1759 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1760 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1761 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1762 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1763 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1765 if (i) KEYSET14 else KEYSET04
1767 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1768 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1769 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1770 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1771 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1772 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1773 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1774 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1776 if (i) KEYSET15 else KEYSET05
1778 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1779 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1780 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1781 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1782 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1783 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1784 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1785 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1787 if (i) KEYSET16 else KEYSET06
1789 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1790 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1791 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1792 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1793 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1794 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1795 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1796 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1798 if (i) KEYSET17 else KEYSET07
1800 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1801 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1802 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1803 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1804 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1805 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1806 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1807 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1818 static void transpose32c (u32 data[32])
1820 #define swap(x,y,j,m) \
1821 t = ((x) ^ ((y) >> (j))) & (m); \
1823 (y) = (y) ^ (t << (j));
1827 swap (data[ 0], data[16], 16, 0x0000ffff);
1828 swap (data[ 1], data[17], 16, 0x0000ffff);
1829 swap (data[ 2], data[18], 16, 0x0000ffff);
1830 swap (data[ 3], data[19], 16, 0x0000ffff);
1831 swap (data[ 4], data[20], 16, 0x0000ffff);
1832 swap (data[ 5], data[21], 16, 0x0000ffff);
1833 swap (data[ 6], data[22], 16, 0x0000ffff);
1834 swap (data[ 7], data[23], 16, 0x0000ffff);
1835 swap (data[ 8], data[24], 16, 0x0000ffff);
1836 swap (data[ 9], data[25], 16, 0x0000ffff);
1837 swap (data[10], data[26], 16, 0x0000ffff);
1838 swap (data[11], data[27], 16, 0x0000ffff);
1839 swap (data[12], data[28], 16, 0x0000ffff);
1840 swap (data[13], data[29], 16, 0x0000ffff);
1841 swap (data[14], data[30], 16, 0x0000ffff);
1842 swap (data[15], data[31], 16, 0x0000ffff);
1843 swap (data[ 0], data[ 8], 8, 0x00ff00ff);
1844 swap (data[ 1], data[ 9], 8, 0x00ff00ff);
1845 swap (data[ 2], data[10], 8, 0x00ff00ff);
1846 swap (data[ 3], data[11], 8, 0x00ff00ff);
1847 swap (data[ 4], data[12], 8, 0x00ff00ff);
1848 swap (data[ 5], data[13], 8, 0x00ff00ff);
1849 swap (data[ 6], data[14], 8, 0x00ff00ff);
1850 swap (data[ 7], data[15], 8, 0x00ff00ff);
1851 swap (data[ 0], data[ 4], 4, 0x0f0f0f0f);
1852 swap (data[ 1], data[ 5], 4, 0x0f0f0f0f);
1853 swap (data[ 2], data[ 6], 4, 0x0f0f0f0f);
1854 swap (data[ 3], data[ 7], 4, 0x0f0f0f0f);
1855 swap (data[ 0], data[ 2], 2, 0x33333333);
1856 swap (data[ 1], data[ 3], 2, 0x33333333);
1857 swap (data[ 0], data[ 1], 1, 0x55555555);
1858 swap (data[ 2], data[ 3], 1, 0x55555555);
1859 swap (data[ 4], data[ 6], 2, 0x33333333);
1860 swap (data[ 5], data[ 7], 2, 0x33333333);
1861 swap (data[ 4], data[ 5], 1, 0x55555555);
1862 swap (data[ 6], data[ 7], 1, 0x55555555);
1863 swap (data[ 8], data[12], 4, 0x0f0f0f0f);
1864 swap (data[ 9], data[13], 4, 0x0f0f0f0f);
1865 swap (data[10], data[14], 4, 0x0f0f0f0f);
1866 swap (data[11], data[15], 4, 0x0f0f0f0f);
1867 swap (data[ 8], data[10], 2, 0x33333333);
1868 swap (data[ 9], data[11], 2, 0x33333333);
1869 swap (data[ 8], data[ 9], 1, 0x55555555);
1870 swap (data[10], data[11], 1, 0x55555555);
1871 swap (data[12], data[14], 2, 0x33333333);
1872 swap (data[13], data[15], 2, 0x33333333);
1873 swap (data[12], data[13], 1, 0x55555555);
1874 swap (data[14], data[15], 1, 0x55555555);
1875 swap (data[16], data[24], 8, 0x00ff00ff);
1876 swap (data[17], data[25], 8, 0x00ff00ff);
1877 swap (data[18], data[26], 8, 0x00ff00ff);
1878 swap (data[19], data[27], 8, 0x00ff00ff);
1879 swap (data[20], data[28], 8, 0x00ff00ff);
1880 swap (data[21], data[29], 8, 0x00ff00ff);
1881 swap (data[22], data[30], 8, 0x00ff00ff);
1882 swap (data[23], data[31], 8, 0x00ff00ff);
1883 swap (data[16], data[20], 4, 0x0f0f0f0f);
1884 swap (data[17], data[21], 4, 0x0f0f0f0f);
1885 swap (data[18], data[22], 4, 0x0f0f0f0f);
1886 swap (data[19], data[23], 4, 0x0f0f0f0f);
1887 swap (data[16], data[18], 2, 0x33333333);
1888 swap (data[17], data[19], 2, 0x33333333);
1889 swap (data[16], data[17], 1, 0x55555555);
1890 swap (data[18], data[19], 1, 0x55555555);
1891 swap (data[20], data[22], 2, 0x33333333);
1892 swap (data[21], data[23], 2, 0x33333333);
1893 swap (data[20], data[21], 1, 0x55555555);
1894 swap (data[22], data[23], 1, 0x55555555);
1895 swap (data[24], data[28], 4, 0x0f0f0f0f);
1896 swap (data[25], data[29], 4, 0x0f0f0f0f);
1897 swap (data[26], data[30], 4, 0x0f0f0f0f);
1898 swap (data[27], data[31], 4, 0x0f0f0f0f);
1899 swap (data[24], data[26], 2, 0x33333333);
1900 swap (data[25], data[27], 2, 0x33333333);
1901 swap (data[24], data[25], 1, 0x55555555);
1902 swap (data[26], data[27], 1, 0x55555555);
1903 swap (data[28], data[30], 2, 0x33333333);
1904 swap (data[29], data[31], 2, 0x33333333);
1905 swap (data[28], data[29], 1, 0x55555555);
1906 swap (data[30], data[31], 1, 0x55555555);
1909 static void m01500m (__local u32 *s_S, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
1915 const u32 gid = get_global_id (0);
1916 const u32 lid = get_local_id (0);
1922 const u32 salt = salt_bufs[salt_pos].salt_buf[0];
1928 const u32 w0 = pws[gid].i[0];
1929 const u32 w1 = pws[gid].i[1];
1931 const u32 w0s = (w0 << 1) & 0xfefefefe;
1932 const u32 w1s = (w1 << 1) & 0xfefefefe;
1934 const u32 K00 = -((w0s >> ( 0 + 7)) & 1);
1935 const u32 K01 = -((w0s >> ( 0 + 6)) & 1);
1936 const u32 K02 = -((w0s >> ( 0 + 5)) & 1);
1937 const u32 K03 = -((w0s >> ( 0 + 4)) & 1);
1938 const u32 K04 = -((w0s >> ( 0 + 3)) & 1);
1939 const u32 K05 = -((w0s >> ( 0 + 2)) & 1);
1940 const u32 K06 = -((w0s >> ( 0 + 1)) & 1);
1941 const u32 K07 = -((w0s >> ( 8 + 7)) & 1);
1942 const u32 K08 = -((w0s >> ( 8 + 6)) & 1);
1943 const u32 K09 = -((w0s >> ( 8 + 5)) & 1);
1944 const u32 K10 = -((w0s >> ( 8 + 4)) & 1);
1945 const u32 K11 = -((w0s >> ( 8 + 3)) & 1);
1946 const u32 K12 = -((w0s >> ( 8 + 2)) & 1);
1947 const u32 K13 = -((w0s >> ( 8 + 1)) & 1);
1948 const u32 K14 = -((w0s >> (16 + 7)) & 1);
1949 const u32 K15 = -((w0s >> (16 + 6)) & 1);
1950 const u32 K16 = -((w0s >> (16 + 5)) & 1);
1951 const u32 K17 = -((w0s >> (16 + 4)) & 1);
1952 const u32 K18 = -((w0s >> (16 + 3)) & 1);
1953 const u32 K19 = -((w0s >> (16 + 2)) & 1);
1954 const u32 K20 = -((w0s >> (16 + 1)) & 1);
1955 const u32 K21 = -((w0s >> (24 + 7)) & 1);
1956 const u32 K22 = -((w0s >> (24 + 6)) & 1);
1957 const u32 K23 = -((w0s >> (24 + 5)) & 1);
1958 const u32 K24 = -((w0s >> (24 + 4)) & 1);
1959 const u32 K25 = -((w0s >> (24 + 3)) & 1);
1960 const u32 K26 = -((w0s >> (24 + 2)) & 1);
1961 const u32 K27 = -((w0s >> (24 + 1)) & 1);
1962 const u32 K28 = -((w1s >> ( 0 + 7)) & 1);
1963 const u32 K29 = -((w1s >> ( 0 + 6)) & 1);
1964 const u32 K30 = -((w1s >> ( 0 + 5)) & 1);
1965 const u32 K31 = -((w1s >> ( 0 + 4)) & 1);
1966 const u32 K32 = -((w1s >> ( 0 + 3)) & 1);
1967 const u32 K33 = -((w1s >> ( 0 + 2)) & 1);
1968 const u32 K34 = -((w1s >> ( 0 + 1)) & 1);
1969 const u32 K35 = -((w1s >> ( 8 + 7)) & 1);
1970 const u32 K36 = -((w1s >> ( 8 + 6)) & 1);
1971 const u32 K37 = -((w1s >> ( 8 + 5)) & 1);
1972 const u32 K38 = -((w1s >> ( 8 + 4)) & 1);
1973 const u32 K39 = -((w1s >> ( 8 + 3)) & 1);
1974 const u32 K40 = -((w1s >> ( 8 + 2)) & 1);
1975 const u32 K41 = -((w1s >> ( 8 + 1)) & 1);
1976 const u32 K42 = -((w1s >> (16 + 7)) & 1);
1977 const u32 K43 = -((w1s >> (16 + 6)) & 1);
1978 const u32 K44 = -((w1s >> (16 + 5)) & 1);
1979 const u32 K45 = -((w1s >> (16 + 4)) & 1);
1980 const u32 K46 = -((w1s >> (16 + 3)) & 1);
1981 const u32 K47 = -((w1s >> (16 + 2)) & 1);
1982 const u32 K48 = -((w1s >> (16 + 1)) & 1);
1983 const u32 K49 = -((w1s >> (24 + 7)) & 1);
1984 const u32 K50 = -((w1s >> (24 + 6)) & 1);
1985 const u32 K51 = -((w1s >> (24 + 5)) & 1);
1986 const u32 K52 = -((w1s >> (24 + 4)) & 1);
1987 const u32 K53 = -((w1s >> (24 + 3)) & 1);
1988 const u32 K54 = -((w1s >> (24 + 2)) & 1);
1989 const u32 K55 = -((w1s >> (24 + 1)) & 1);
1995 const u32 pc_pos = get_local_id (1);
1997 const u32 il_pos = pc_pos * 32;
2028 k00 |= words_buf_r[pc_pos].b[ 0];
2029 k01 |= words_buf_r[pc_pos].b[ 1];
2030 k02 |= words_buf_r[pc_pos].b[ 2];
2031 k03 |= words_buf_r[pc_pos].b[ 3];
2032 k04 |= words_buf_r[pc_pos].b[ 4];
2033 k05 |= words_buf_r[pc_pos].b[ 5];
2034 k06 |= words_buf_r[pc_pos].b[ 6];
2035 k07 |= words_buf_r[pc_pos].b[ 7];
2036 k08 |= words_buf_r[pc_pos].b[ 8];
2037 k09 |= words_buf_r[pc_pos].b[ 9];
2038 k10 |= words_buf_r[pc_pos].b[10];
2039 k11 |= words_buf_r[pc_pos].b[11];
2040 k12 |= words_buf_r[pc_pos].b[12];
2041 k13 |= words_buf_r[pc_pos].b[13];
2042 k14 |= words_buf_r[pc_pos].b[14];
2043 k15 |= words_buf_r[pc_pos].b[15];
2044 k16 |= words_buf_r[pc_pos].b[16];
2045 k17 |= words_buf_r[pc_pos].b[17];
2046 k18 |= words_buf_r[pc_pos].b[18];
2047 k19 |= words_buf_r[pc_pos].b[19];
2048 k20 |= words_buf_r[pc_pos].b[20];
2049 k21 |= words_buf_r[pc_pos].b[21];
2050 k22 |= words_buf_r[pc_pos].b[22];
2051 k23 |= words_buf_r[pc_pos].b[23];
2052 k24 |= words_buf_r[pc_pos].b[24];
2053 k25 |= words_buf_r[pc_pos].b[25];
2054 k26 |= words_buf_r[pc_pos].b[26];
2055 k27 |= words_buf_r[pc_pos].b[27];
2125 k00, k01, k02, k03, k04, k05, k06,
2126 k07, k08, k09, k10, k11, k12, k13,
2127 k14, k15, k16, k17, k18, k19, k20,
2128 k21, k22, k23, k24, k25, k26, k27,
2129 K28, K29, K30, K31, K32, K33, K34,
2130 K35, K36, K37, K38, K39, K40, K41,
2131 K42, K43, K44, K45, K46, K47, K48,
2132 K49, K50, K51, K52, K53, K54, K55,
2133 &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07,
2134 &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15,
2135 &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23,
2136 &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31,
2137 &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39,
2138 &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47,
2139 &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55,
2140 &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63
2210 if (digests_cnt < 16)
2212 for (u32 d = 0; d < digests_cnt; d++)
2214 const u32 final_hash_pos = digests_offset + d;
2216 if (hashes_shown[final_hash_pos]) continue;
2220 search[0] = digests_buf[final_hash_pos].digest_buf[DGST_R0];
2221 search[1] = digests_buf[final_hash_pos].digest_buf[DGST_R1];
2226 for (int i = 0; i < 32; i++)
2228 const u32 b0 = -((search[0] >> i) & 1);
2229 const u32 b1 = -((search[1] >> i) & 1);
2231 tmpResult |= out[ 0 + i] ^ b0;
2232 tmpResult |= out[32 + i] ^ b1;
2235 if (tmpResult == 0xffffffff) continue;
2237 const u32 slice = 31 - clz (~tmpResult);
2239 const u32 r0 = search[0];
2240 const u32 r1 = search[1];
2253 for (int i = 0; i < 32; i++)
2255 out0[i] = out[ 0 + 31 - i];
2256 out1[i] = out[32 + 31 - i];
2259 transpose32c (out0);
2260 transpose32c (out1);
2263 for (int slice = 0; slice < 32; slice++)
2265 const u32 r0 = out0[31 - slice];
2266 const u32 r1 = out1[31 - slice];
2275 static void m01500s (__local u32 *s_S, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
2281 const u32 gid = get_global_id (0);
2282 const u32 lid = get_local_id (0);
2288 const u32 salt = salt_bufs[salt_pos].salt_buf[0];
2363 const u32 w0 = pws[gid].i[0];
2364 const u32 w1 = pws[gid].i[1];
2366 const u32 w0s = (w0 << 1) & 0xfefefefe;
2367 const u32 w1s = (w1 << 1) & 0xfefefefe;
2369 const u32 K00 = -((w0s >> ( 0 + 7)) & 1);
2370 const u32 K01 = -((w0s >> ( 0 + 6)) & 1);
2371 const u32 K02 = -((w0s >> ( 0 + 5)) & 1);
2372 const u32 K03 = -((w0s >> ( 0 + 4)) & 1);
2373 const u32 K04 = -((w0s >> ( 0 + 3)) & 1);
2374 const u32 K05 = -((w0s >> ( 0 + 2)) & 1);
2375 const u32 K06 = -((w0s >> ( 0 + 1)) & 1);
2376 const u32 K07 = -((w0s >> ( 8 + 7)) & 1);
2377 const u32 K08 = -((w0s >> ( 8 + 6)) & 1);
2378 const u32 K09 = -((w0s >> ( 8 + 5)) & 1);
2379 const u32 K10 = -((w0s >> ( 8 + 4)) & 1);
2380 const u32 K11 = -((w0s >> ( 8 + 3)) & 1);
2381 const u32 K12 = -((w0s >> ( 8 + 2)) & 1);
2382 const u32 K13 = -((w0s >> ( 8 + 1)) & 1);
2383 const u32 K14 = -((w0s >> (16 + 7)) & 1);
2384 const u32 K15 = -((w0s >> (16 + 6)) & 1);
2385 const u32 K16 = -((w0s >> (16 + 5)) & 1);
2386 const u32 K17 = -((w0s >> (16 + 4)) & 1);
2387 const u32 K18 = -((w0s >> (16 + 3)) & 1);
2388 const u32 K19 = -((w0s >> (16 + 2)) & 1);
2389 const u32 K20 = -((w0s >> (16 + 1)) & 1);
2390 const u32 K21 = -((w0s >> (24 + 7)) & 1);
2391 const u32 K22 = -((w0s >> (24 + 6)) & 1);
2392 const u32 K23 = -((w0s >> (24 + 5)) & 1);
2393 const u32 K24 = -((w0s >> (24 + 4)) & 1);
2394 const u32 K25 = -((w0s >> (24 + 3)) & 1);
2395 const u32 K26 = -((w0s >> (24 + 2)) & 1);
2396 const u32 K27 = -((w0s >> (24 + 1)) & 1);
2397 const u32 K28 = -((w1s >> ( 0 + 7)) & 1);
2398 const u32 K29 = -((w1s >> ( 0 + 6)) & 1);
2399 const u32 K30 = -((w1s >> ( 0 + 5)) & 1);
2400 const u32 K31 = -((w1s >> ( 0 + 4)) & 1);
2401 const u32 K32 = -((w1s >> ( 0 + 3)) & 1);
2402 const u32 K33 = -((w1s >> ( 0 + 2)) & 1);
2403 const u32 K34 = -((w1s >> ( 0 + 1)) & 1);
2404 const u32 K35 = -((w1s >> ( 8 + 7)) & 1);
2405 const u32 K36 = -((w1s >> ( 8 + 6)) & 1);
2406 const u32 K37 = -((w1s >> ( 8 + 5)) & 1);
2407 const u32 K38 = -((w1s >> ( 8 + 4)) & 1);
2408 const u32 K39 = -((w1s >> ( 8 + 3)) & 1);
2409 const u32 K40 = -((w1s >> ( 8 + 2)) & 1);
2410 const u32 K41 = -((w1s >> ( 8 + 1)) & 1);
2411 const u32 K42 = -((w1s >> (16 + 7)) & 1);
2412 const u32 K43 = -((w1s >> (16 + 6)) & 1);
2413 const u32 K44 = -((w1s >> (16 + 5)) & 1);
2414 const u32 K45 = -((w1s >> (16 + 4)) & 1);
2415 const u32 K46 = -((w1s >> (16 + 3)) & 1);
2416 const u32 K47 = -((w1s >> (16 + 2)) & 1);
2417 const u32 K48 = -((w1s >> (16 + 1)) & 1);
2418 const u32 K49 = -((w1s >> (24 + 7)) & 1);
2419 const u32 K50 = -((w1s >> (24 + 6)) & 1);
2420 const u32 K51 = -((w1s >> (24 + 5)) & 1);
2421 const u32 K52 = -((w1s >> (24 + 4)) & 1);
2422 const u32 K53 = -((w1s >> (24 + 3)) & 1);
2423 const u32 K54 = -((w1s >> (24 + 2)) & 1);
2424 const u32 K55 = -((w1s >> (24 + 1)) & 1);
2430 const u32 pc_pos = get_local_id (1);
2432 const u32 il_pos = pc_pos * 32;
2463 k00 |= words_buf_r[pc_pos].b[ 0];
2464 k01 |= words_buf_r[pc_pos].b[ 1];
2465 k02 |= words_buf_r[pc_pos].b[ 2];
2466 k03 |= words_buf_r[pc_pos].b[ 3];
2467 k04 |= words_buf_r[pc_pos].b[ 4];
2468 k05 |= words_buf_r[pc_pos].b[ 5];
2469 k06 |= words_buf_r[pc_pos].b[ 6];
2470 k07 |= words_buf_r[pc_pos].b[ 7];
2471 k08 |= words_buf_r[pc_pos].b[ 8];
2472 k09 |= words_buf_r[pc_pos].b[ 9];
2473 k10 |= words_buf_r[pc_pos].b[10];
2474 k11 |= words_buf_r[pc_pos].b[11];
2475 k12 |= words_buf_r[pc_pos].b[12];
2476 k13 |= words_buf_r[pc_pos].b[13];
2477 k14 |= words_buf_r[pc_pos].b[14];
2478 k15 |= words_buf_r[pc_pos].b[15];
2479 k16 |= words_buf_r[pc_pos].b[16];
2480 k17 |= words_buf_r[pc_pos].b[17];
2481 k18 |= words_buf_r[pc_pos].b[18];
2482 k19 |= words_buf_r[pc_pos].b[19];
2483 k20 |= words_buf_r[pc_pos].b[20];
2484 k21 |= words_buf_r[pc_pos].b[21];
2485 k22 |= words_buf_r[pc_pos].b[22];
2486 k23 |= words_buf_r[pc_pos].b[23];
2487 k24 |= words_buf_r[pc_pos].b[24];
2488 k25 |= words_buf_r[pc_pos].b[25];
2489 k26 |= words_buf_r[pc_pos].b[26];
2490 k27 |= words_buf_r[pc_pos].b[27];
2560 k00, k01, k02, k03, k04, k05, k06,
2561 k07, k08, k09, k10, k11, k12, k13,
2562 k14, k15, k16, k17, k18, k19, k20,
2563 k21, k22, k23, k24, k25, k26, k27,
2564 K28, K29, K30, K31, K32, K33, K34,
2565 K35, K36, K37, K38, K39, K40, K41,
2566 K42, K43, K44, K45, K46, K47, K48,
2567 K49, K50, K51, K52, K53, K54, K55,
2568 &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07,
2569 &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15,
2570 &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23,
2571 &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31,
2572 &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39,
2573 &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47,
2574 &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55,
2575 &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63
2580 tmpResult |= D00 ^ S00;
2581 tmpResult |= D01 ^ S01;
2582 tmpResult |= D02 ^ S02;
2583 tmpResult |= D03 ^ S03;
2584 tmpResult |= D04 ^ S04;
2585 tmpResult |= D05 ^ S05;
2586 tmpResult |= D06 ^ S06;
2587 tmpResult |= D07 ^ S07;
2588 tmpResult |= D08 ^ S08;
2589 tmpResult |= D09 ^ S09;
2590 tmpResult |= D10 ^ S10;
2591 tmpResult |= D11 ^ S11;
2592 tmpResult |= D12 ^ S12;
2593 tmpResult |= D13 ^ S13;
2594 tmpResult |= D14 ^ S14;
2595 tmpResult |= D15 ^ S15;
2596 tmpResult |= D16 ^ S16;
2597 tmpResult |= D17 ^ S17;
2598 tmpResult |= D18 ^ S18;
2599 tmpResult |= D19 ^ S19;
2600 tmpResult |= D20 ^ S20;
2601 tmpResult |= D21 ^ S21;
2602 tmpResult |= D22 ^ S22;
2603 tmpResult |= D23 ^ S23;
2604 tmpResult |= D24 ^ S24;
2605 tmpResult |= D25 ^ S25;
2606 tmpResult |= D26 ^ S26;
2607 tmpResult |= D27 ^ S27;
2608 tmpResult |= D28 ^ S28;
2609 tmpResult |= D29 ^ S29;
2610 tmpResult |= D30 ^ S30;
2611 tmpResult |= D31 ^ S31;
2612 tmpResult |= D32 ^ S32;
2613 tmpResult |= D33 ^ S33;
2614 tmpResult |= D34 ^ S34;
2615 tmpResult |= D35 ^ S35;
2616 tmpResult |= D36 ^ S36;
2617 tmpResult |= D37 ^ S37;
2618 tmpResult |= D38 ^ S38;
2619 tmpResult |= D39 ^ S39;
2620 tmpResult |= D40 ^ S40;
2621 tmpResult |= D41 ^ S41;
2622 tmpResult |= D42 ^ S42;
2623 tmpResult |= D43 ^ S43;
2624 tmpResult |= D44 ^ S44;
2625 tmpResult |= D45 ^ S45;
2626 tmpResult |= D46 ^ S46;
2627 tmpResult |= D47 ^ S47;
2629 if (tmpResult == 0xffffffff) return;
2631 tmpResult |= D48 ^ S48;
2632 tmpResult |= D49 ^ S49;
2633 tmpResult |= D50 ^ S50;
2634 tmpResult |= D51 ^ S51;
2635 tmpResult |= D52 ^ S52;
2636 tmpResult |= D53 ^ S53;
2637 tmpResult |= D54 ^ S54;
2638 tmpResult |= D55 ^ S55;
2639 tmpResult |= D56 ^ S56;
2640 tmpResult |= D57 ^ S57;
2641 tmpResult |= D58 ^ S58;
2642 tmpResult |= D59 ^ S59;
2643 tmpResult |= D60 ^ S60;
2644 tmpResult |= D61 ^ S61;
2645 tmpResult |= D62 ^ S62;
2646 tmpResult |= D63 ^ S63;
2648 if (tmpResult == 0xffffffff) return;
2650 const u32 slice = 31 - clz (~tmpResult);
2656 // transpose bitslice mod : attention race conditions, need different buffers for *in and *out
2659 __kernel void m01500_tm (__global u32 *mod, __global bs_word_t *words_buf_r)
2661 const u32 gid = get_global_id (0);
2663 const u32 block = gid / 32;
2664 const u32 slice = gid % 32;
2666 const u32 w0 = mod[gid];
2668 const u32 w0s = (w0 << 1) & 0xfefefefe;
2671 for (int i = 0, j = 0; i < 32; i += 8, j += 7)
2673 atomic_or (&words_buf_r[block].b[j + 0], (((w0s >> (i + 7)) & 1) << slice));
2674 atomic_or (&words_buf_r[block].b[j + 1], (((w0s >> (i + 6)) & 1) << slice));
2675 atomic_or (&words_buf_r[block].b[j + 2], (((w0s >> (i + 5)) & 1) << slice));
2676 atomic_or (&words_buf_r[block].b[j + 3], (((w0s >> (i + 4)) & 1) << slice));
2677 atomic_or (&words_buf_r[block].b[j + 4], (((w0s >> (i + 3)) & 1) << slice));
2678 atomic_or (&words_buf_r[block].b[j + 5], (((w0s >> (i + 2)) & 1) << slice));
2679 atomic_or (&words_buf_r[block].b[j + 6], (((w0s >> (i + 1)) & 1) << slice));
2683 __kernel void m01500_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2689 const u32 gid = get_global_id (0);
2690 const u32 lid = get_local_id (0);
2691 const u32 vid = get_local_id (1);
2693 const u32 s0 = digests_buf[digests_offset].digest_buf[0];
2694 const u32 s1 = digests_buf[digests_offset].digest_buf[1];
2696 __local u32 s_S[64];
2700 s_S[ 0 + vid] = -((s0 >> vid) & 1);
2704 s_S[32 + vid] = -((s1 >> vid) & 1);
2707 barrier (CLK_LOCAL_MEM_FENCE);
2709 if (gid >= gid_max) return;
2715 m01500m (s_S, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
2718 __kernel void m01500_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2722 __kernel void m01500_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2726 __kernel void m01500_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2732 const u32 gid = get_global_id (0);
2733 const u32 lid = get_local_id (0);
2734 const u32 vid = get_local_id (1);
2736 const u32 s0 = digests_buf[digests_offset].digest_buf[0];
2737 const u32 s1 = digests_buf[digests_offset].digest_buf[1];
2739 __local u32 s_S[64];
2743 s_S[ 0 + vid] = -((s0 >> vid) & 1);
2747 s_S[32 + vid] = -((s1 >> vid) & 1);
2750 barrier (CLK_LOCAL_MEM_FENCE);
2752 if (gid >= gid_max) return;
2758 m01500s (s_S, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
2761 __kernel void m01500_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2765 __kernel void m01500_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)