2 * Author......: Jens Steube <jens.steube@gmail.com>
4 * NOTE........: sboxes for maxwell were taken from DeepLearningJohnDoe, license below
5 * : sboxes for others were takes fron JtR, license below
10 #include "include/constants.h"
11 #include "include/kernel_vendor.h"
18 #include "include/kernel_functions.c"
19 #include "OpenCL/types_ocl.c"
20 #include "OpenCL/common.c"
22 #define COMPARE_S "OpenCL/check_single_comp4_bs.c"
23 #define COMPARE_M "OpenCL/check_multi_comp4_bs.c"
25 #define myselx(a,b,c) ((c) ? (b) : (a))
32 // Bitslice DES S-boxes with LOP3.LUT instructions
33 // For NVIDIA Maxwell architecture and CUDA 7.5 RC
34 // by DeepLearningJohnDoe, version 0.1.6, 2015/07/19
36 // Gate counts: 25 24 25 18 25 24 24 23
38 // Depth: 8 7 7 6 8 10 10 8
41 // Note that same S-box function with a lower gate count isn't necessarily faster.
43 // These Boolean expressions corresponding to DES S-boxes were
44 // discovered by <deeplearningjohndoe at gmail.com>
46 // This file itself is Copyright (c) 2015 by <deeplearningjohndoe at gmail.com>
47 // Redistribution and use in source and binary forms, with or without
48 // modification, are permitted.
50 // The underlying mathematical formulas are NOT copyrighted.
53 #define LUT(a,b,c,d,e) u32 a; asm ("lop3.b32 %0, %1, %2, %3, "#e";" : "=r"(a): "r"(b), "r"(c), "r"(d));
55 void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
57 LUT(xAA55AA5500550055, a1, a4, a6, 0xC1)
58 LUT(xA55AA55AF0F5F0F5, a3, a6, xAA55AA5500550055, 0x9E)
59 LUT(x5F5F5F5FA5A5A5A5, a1, a3, a6, 0xD6)
60 LUT(xF5A0F5A0A55AA55A, a4, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x56)
61 LUT(x947A947AD1E7D1E7, a2, xA55AA55AF0F5F0F5, xF5A0F5A0A55AA55A, 0x6C)
62 LUT(x5FFF5FFFFFFAFFFA, a6, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x7B)
63 LUT(xB96CB96C69936993, a2, xF5A0F5A0A55AA55A, x5FFF5FFFFFFAFFFA, 0xD6)
64 LUT(x3, a5, x947A947AD1E7D1E7, xB96CB96C69936993, 0x6A)
65 LUT(x55EE55EE55EE55EE, a1, a2, a4, 0x7A)
66 LUT(x084C084CB77BB77B, a2, a6, xF5A0F5A0A55AA55A, 0xC9)
67 LUT(x9C329C32E295E295, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x72)
68 LUT(xA51EA51E50E050E0, a3, a6, x55EE55EE55EE55EE, 0x29)
69 LUT(x4AD34AD3BE3CBE3C, a2, x947A947AD1E7D1E7, xA51EA51E50E050E0, 0x95)
70 LUT(x2, a5, x9C329C32E295E295, x4AD34AD3BE3CBE3C, 0xC6)
71 LUT(xD955D95595D195D1, a1, a2, x9C329C32E295E295, 0xD2)
72 LUT(x8058805811621162, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x90)
73 LUT(x7D0F7D0FC4B3C4B3, xA51EA51E50E050E0, xD955D95595D195D1, x8058805811621162, 0x76)
74 LUT(x0805080500010001, a3, xAA55AA5500550055, xD955D95595D195D1, 0x80)
75 LUT(x4A964A96962D962D, xB96CB96C69936993, x4AD34AD3BE3CBE3C, x0805080500010001, 0xA6)
76 LUT(x4, a5, x7D0F7D0FC4B3C4B3, x4A964A96962D962D, 0xA6)
77 LUT(x148014807B087B08, a1, xAA55AA5500550055, x947A947AD1E7D1E7, 0x21)
78 LUT(x94D894D86B686B68, xA55AA55AF0F5F0F5, x8058805811621162, x148014807B087B08, 0x6A)
79 LUT(x5555555540044004, a1, a6, x084C084CB77BB77B, 0x70)
80 LUT(xAFB4AFB4BF5BBF5B, x5F5F5F5FA5A5A5A5, xA51EA51E50E050E0, x5555555540044004, 0x97)
81 LUT(x1, a5, x94D894D86B686B68, xAFB4AFB4BF5BBF5B, 0x6C)
89 void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
91 LUT(xEEEEEEEE99999999, a1, a2, a6, 0x97)
92 LUT(xFFFFEEEE66666666, a5, a6, xEEEEEEEE99999999, 0x67)
93 LUT(x5555FFFFFFFF0000, a1, a5, a6, 0x76)
94 LUT(x6666DDDD5555AAAA, a2, xFFFFEEEE66666666, x5555FFFFFFFF0000, 0x69)
95 LUT(x6969D3D35353ACAC, a3, xFFFFEEEE66666666, x6666DDDD5555AAAA, 0x6A)
96 LUT(xCFCF3030CFCF3030, a2, a3, a5, 0x65)
97 LUT(xE4E4EEEE9999F0F0, a3, xEEEEEEEE99999999, x5555FFFFFFFF0000, 0x8D)
98 LUT(xE5E5BABACDCDB0B0, a1, xCFCF3030CFCF3030, xE4E4EEEE9999F0F0, 0xCA)
99 LUT(x3, a4, x6969D3D35353ACAC, xE5E5BABACDCDB0B0, 0xC6)
100 LUT(x3333CCCC00000000, a2, a5, a6, 0x14)
101 LUT(xCCCCDDDDFFFF0F0F, a5, xE4E4EEEE9999F0F0, x3333CCCC00000000, 0xB5)
102 LUT(x00000101F0F0F0F0, a3, a6, xFFFFEEEE66666666, 0x1C)
103 LUT(x9A9A64646A6A9595, a1, xCFCF3030CFCF3030, x00000101F0F0F0F0, 0x96)
104 LUT(x2, a4, xCCCCDDDDFFFF0F0F, x9A9A64646A6A9595, 0x6A)
105 LUT(x3333BBBB3333FFFF, a1, a2, x6666DDDD5555AAAA, 0xDE)
106 LUT(x1414141441410000, a1, a3, xE4E4EEEE9999F0F0, 0x90)
107 LUT(x7F7FF3F3F5F53939, x6969D3D35353ACAC, x9A9A64646A6A9595, x3333BBBB3333FFFF, 0x79)
108 LUT(x9494E3E34B4B3939, a5, x1414141441410000, x7F7FF3F3F5F53939, 0x29)
109 LUT(x1, a4, x3333BBBB3333FFFF, x9494E3E34B4B3939, 0xA6)
110 LUT(xB1B1BBBBCCCCA5A5, a1, a1, xE4E4EEEE9999F0F0, 0x4A)
111 LUT(xFFFFECECEEEEDDDD, a2, x3333CCCC00000000, x9A9A64646A6A9595, 0xEF)
112 LUT(xB1B1A9A9DCDC8787, xE5E5BABACDCDB0B0, xB1B1BBBBCCCCA5A5, xFFFFECECEEEEDDDD, 0x8D)
113 LUT(xFFFFCCCCEEEE4444, a2, a5, xFFFFEEEE66666666, 0x2B)
114 LUT(x4, a4, xB1B1A9A9DCDC8787, xFFFFCCCCEEEE4444, 0x6C)
122 void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
124 LUT(xA50FA50FA50FA50F, a1, a3, a4, 0xC9)
125 LUT(xF0F00F0FF0F0F0F0, a3, a5, a6, 0x4B)
126 LUT(xAF0FA0AAAF0FAF0F, a1, xA50FA50FA50FA50F, xF0F00F0FF0F0F0F0, 0x4D)
127 LUT(x5AA5A55A5AA55AA5, a1, a4, xF0F00F0FF0F0F0F0, 0x69)
128 LUT(xAA005FFFAA005FFF, a3, a5, xA50FA50FA50FA50F, 0xD6)
129 LUT(x5AA5A55A0F5AFAA5, a6, x5AA5A55A5AA55AA5, xAA005FFFAA005FFF, 0x9C)
130 LUT(x1, a2, xAF0FA0AAAF0FAF0F, x5AA5A55A0F5AFAA5, 0xA6)
131 LUT(xAA55AA5500AA00AA, a1, a4, a6, 0x49)
132 LUT(xFAFAA50FFAFAA50F, a1, a5, xA50FA50FA50FA50F, 0x9B)
133 LUT(x50AF0F5AFA50A5A5, a1, xAA55AA5500AA00AA, xFAFAA50FFAFAA50F, 0x66)
134 LUT(xAFAFAFAFFAFAFAFA, a1, a3, a6, 0x6F)
135 LUT(xAFAFFFFFFFFAFAFF, a4, x50AF0F5AFA50A5A5, xAFAFAFAFFAFAFAFA, 0xEB)
136 LUT(x4, a2, x50AF0F5AFA50A5A5, xAFAFFFFFFFFAFAFF, 0x6C)
137 LUT(x500F500F500F500F, a1, a3, a4, 0x98)
138 LUT(xF0505A0505A5050F, x5AA5A55A0F5AFAA5, xAA55AA5500AA00AA, xAFAFAFAFFAFAFAFA, 0x1D)
139 LUT(xF0505A05AA55AAFF, a6, x500F500F500F500F, xF0505A0505A5050F, 0x9A)
140 LUT(xFF005F55FF005F55, a1, a4, xAA005FFFAA005FFF, 0xB2)
141 LUT(xA55F5AF0A55F5AF0, a5, xA50FA50FA50FA50F, x5AA5A55A5AA55AA5, 0x3D)
142 LUT(x5A5F05A5A55F5AF0, a6, xFF005F55FF005F55, xA55F5AF0A55F5AF0, 0xA6)
143 LUT(x3, a2, xF0505A05AA55AAFF, x5A5F05A5A55F5AF0, 0xA6)
144 LUT(x0F0F0F0FA5A5A5A5, a1, a3, a6, 0xC6)
145 LUT(x5FFFFF5FFFA0FFA0, x5AA5A55A5AA55AA5, xAFAFAFAFFAFAFAFA, x0F0F0F0FA5A5A5A5, 0xDB)
146 LUT(xF5555AF500A05FFF, a5, xFAFAA50FFAFAA50F, xF0505A0505A5050F, 0xB9)
147 LUT(x05A5AAF55AFA55A5, xF0505A05AA55AAFF, x0F0F0F0FA5A5A5A5, xF5555AF500A05FFF, 0x9B)
148 LUT(x2, a2, x5FFFFF5FFFA0FFA0, x05A5AAF55AFA55A5, 0xA6)
156 void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
158 LUT(x55F055F055F055F0, a1, a3, a4, 0x72)
159 LUT(xA500F5F0A500F5F0, a3, a5, x55F055F055F055F0, 0xAD)
160 LUT(xF50AF50AF50AF50A, a1, a3, a4, 0x59)
161 LUT(xF5FA0FFFF5FA0FFF, a3, a5, xF50AF50AF50AF50A, 0xE7)
162 LUT(x61C8F93C61C8F93C, a2, xA500F5F0A500F5F0, xF5FA0FFFF5FA0FFF, 0xC6)
163 LUT(x9999666699996666, a1, a2, a5, 0x69)
164 LUT(x22C022C022C022C0, a2, a4, x55F055F055F055F0, 0x18)
165 LUT(xB35C94A6B35C94A6, xF5FA0FFFF5FA0FFF, x9999666699996666, x22C022C022C022C0, 0x63)
166 LUT(x4, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x6A)
167 LUT(x4848484848484848, a1, a2, a3, 0x12)
168 LUT(x55500AAA55500AAA, a1, a5, xF5FA0FFFF5FA0FFF, 0x28)
169 LUT(x3C90B3D63C90B3D6, x61C8F93C61C8F93C, x4848484848484848, x55500AAA55500AAA, 0x1E)
170 LUT(x8484333384843333, a1, x9999666699996666, x4848484848484848, 0x14)
171 LUT(x4452F1AC4452F1AC, xF50AF50AF50AF50A, xF5FA0FFFF5FA0FFF, xB35C94A6B35C94A6, 0x78)
172 LUT(x9586CA379586CA37, x55500AAA55500AAA, x8484333384843333, x4452F1AC4452F1AC, 0xD6)
173 LUT(x2, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0x6A)
174 LUT(x1, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0xA9)
175 LUT(x3, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x56)
183 void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
185 LUT(xA0A0A0A0FFFFFFFF, a1, a3, a6, 0xAB)
186 LUT(xFFFF00005555FFFF, a1, a5, a6, 0xB9)
187 LUT(xB3B320207777FFFF, a2, xA0A0A0A0FFFFFFFF, xFFFF00005555FFFF, 0xE8)
188 LUT(x50505A5A5A5A5050, a1, a3, xFFFF00005555FFFF, 0x34)
189 LUT(xA2A2FFFF2222FFFF, a1, a5, xB3B320207777FFFF, 0xCE)
190 LUT(x2E2E6969A4A46363, a2, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, 0x29)
191 LUT(x3, a4, xB3B320207777FFFF, x2E2E6969A4A46363, 0xA6)
192 LUT(xA5A50A0AA5A50A0A, a1, a3, a5, 0x49)
193 LUT(x969639396969C6C6, a2, a6, xA5A50A0AA5A50A0A, 0x96)
194 LUT(x1B1B1B1B1B1B1B1B, a1, a2, a3, 0xCA)
195 LUT(xBFBFBFBFF6F6F9F9, a3, xA0A0A0A0FFFFFFFF, x969639396969C6C6, 0x7E)
196 LUT(x5B5BA4A4B8B81D1D, xFFFF00005555FFFF, x1B1B1B1B1B1B1B1B, xBFBFBFBFF6F6F9F9, 0x96)
197 LUT(x2, a4, x969639396969C6C6, x5B5BA4A4B8B81D1D, 0xCA)
198 LUT(x5555BBBBFFFF5555, a1, a2, xFFFF00005555FFFF, 0xE5)
199 LUT(x6D6D9C9C95956969, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, x969639396969C6C6, 0x97)
200 LUT(x1A1A67676A6AB4B4, xA5A50A0AA5A50A0A, x5555BBBBFFFF5555, x6D6D9C9C95956969, 0x47)
201 LUT(xA0A0FFFFAAAA0000, a3, xFFFF00005555FFFF, xA5A50A0AA5A50A0A, 0x3B)
202 LUT(x36369C9CC1C1D6D6, x969639396969C6C6, x6D6D9C9C95956969, xA0A0FFFFAAAA0000, 0xD9)
203 LUT(x1, a4, x1A1A67676A6AB4B4, x36369C9CC1C1D6D6, 0xCA)
204 LUT(x5555F0F0F5F55555, a1, a3, xFFFF00005555FFFF, 0xB1)
205 LUT(x79790202DCDC0808, xA2A2FFFF2222FFFF, xA5A50A0AA5A50A0A, x969639396969C6C6, 0x47)
206 LUT(x6C6CF2F229295D5D, xBFBFBFBFF6F6F9F9, x5555F0F0F5F55555, x79790202DCDC0808, 0x6E)
207 LUT(xA3A3505010101A1A, a2, xA2A2FFFF2222FFFF, x36369C9CC1C1D6D6, 0x94)
208 LUT(x7676C7C74F4FC7C7, a1, x2E2E6969A4A46363, xA3A3505010101A1A, 0xD9)
209 LUT(x4, a4, x6C6CF2F229295D5D, x7676C7C74F4FC7C7, 0xC6)
217 void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
219 LUT(x5050F5F55050F5F5, a1, a3, a5, 0xB2)
220 LUT(x6363C6C66363C6C6, a1, a2, x5050F5F55050F5F5, 0x66)
221 LUT(xAAAA5555AAAA5555, a1, a1, a5, 0xA9)
222 LUT(x3A3A65653A3A6565, a3, x6363C6C66363C6C6, xAAAA5555AAAA5555, 0xA9)
223 LUT(x5963A3C65963A3C6, a4, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xC6)
224 LUT(xE7E76565E7E76565, a5, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xAD)
225 LUT(x455D45DF455D45DF, a1, a4, xE7E76565E7E76565, 0xE4)
226 LUT(x4, a6, x5963A3C65963A3C6, x455D45DF455D45DF, 0x6C)
227 LUT(x1101220211012202, a2, xAAAA5555AAAA5555, x5963A3C65963A3C6, 0x20)
228 LUT(xF00F0FF0F00F0FF0, a3, a4, a5, 0x69)
229 LUT(x16E94A9716E94A97, xE7E76565E7E76565, x1101220211012202, xF00F0FF0F00F0FF0, 0x9E)
230 LUT(x2992922929929229, a1, a2, xF00F0FF0F00F0FF0, 0x49)
231 LUT(xAFAF9823AFAF9823, a5, x5050F5F55050F5F5, x2992922929929229, 0x93)
232 LUT(x3, a6, x16E94A9716E94A97, xAFAF9823AFAF9823, 0x6C)
233 LUT(x4801810248018102, a4, x5963A3C65963A3C6, x1101220211012202, 0xA4)
234 LUT(x5EE8FFFD5EE8FFFD, a5, x16E94A9716E94A97, x4801810248018102, 0x76)
235 LUT(xF0FF00FFF0FF00FF, a3, a4, a5, 0xCD)
236 LUT(x942D9A67942D9A67, x3A3A65653A3A6565, x5EE8FFFD5EE8FFFD, xF0FF00FFF0FF00FF, 0x86)
237 LUT(x1, a6, x5EE8FFFD5EE8FFFD, x942D9A67942D9A67, 0xA6)
238 LUT(x6A40D4ED6F4DD4EE, a2, x4, xAFAF9823AFAF9823, 0x2D)
239 LUT(x6CA89C7869A49C79, x1101220211012202, x16E94A9716E94A97, x6A40D4ED6F4DD4EE, 0x26)
240 LUT(xD6DE73F9D6DE73F9, a3, x6363C6C66363C6C6, x455D45DF455D45DF, 0x6B)
241 LUT(x925E63E1965A63E1, x3A3A65653A3A6565, x6CA89C7869A49C79, xD6DE73F9D6DE73F9, 0xA2)
242 LUT(x2, a6, x6CA89C7869A49C79, x925E63E1965A63E1, 0xCA)
250 void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
252 LUT(x88AA88AA88AA88AA, a1, a2, a4, 0x0B)
253 LUT(xAAAAFF00AAAAFF00, a1, a4, a5, 0x27)
254 LUT(xADAFF8A5ADAFF8A5, a3, x88AA88AA88AA88AA, xAAAAFF00AAAAFF00, 0x9E)
255 LUT(x0A0AF5F50A0AF5F5, a1, a3, a5, 0xA6)
256 LUT(x6B69C5DC6B69C5DC, a2, xADAFF8A5ADAFF8A5, x0A0AF5F50A0AF5F5, 0x6B)
257 LUT(x1C69B2DC1C69B2DC, a4, x88AA88AA88AA88AA, x6B69C5DC6B69C5DC, 0xA9)
258 LUT(x1, a6, xADAFF8A5ADAFF8A5, x1C69B2DC1C69B2DC, 0x6A)
259 LUT(x9C9C9C9C9C9C9C9C, a1, a2, a3, 0x63)
260 LUT(xE6E63BFDE6E63BFD, a2, xAAAAFF00AAAAFF00, x0A0AF5F50A0AF5F5, 0xE7)
261 LUT(x6385639E6385639E, a4, x9C9C9C9C9C9C9C9C, xE6E63BFDE6E63BFD, 0x93)
262 LUT(x5959C4CE5959C4CE, a2, x6B69C5DC6B69C5DC, xE6E63BFDE6E63BFD, 0x5D)
263 LUT(x5B53F53B5B53F53B, a4, x0A0AF5F50A0AF5F5, x5959C4CE5959C4CE, 0x6E)
264 LUT(x3, a6, x6385639E6385639E, x5B53F53B5B53F53B, 0xC6)
265 LUT(xFAF505FAFAF505FA, a3, a4, x0A0AF5F50A0AF5F5, 0x6D)
266 LUT(x6A65956A6A65956A, a3, x9C9C9C9C9C9C9C9C, xFAF505FAFAF505FA, 0xA6)
267 LUT(x8888CCCC8888CCCC, a1, a2, a5, 0x23)
268 LUT(x94E97A9494E97A94, x1C69B2DC1C69B2DC, x6A65956A6A65956A, x8888CCCC8888CCCC, 0x72)
269 LUT(x4, a6, x6A65956A6A65956A, x94E97A9494E97A94, 0xAC)
270 LUT(xA050A050A050A050, a1, a3, a4, 0x21)
271 LUT(xC1B87A2BC1B87A2B, xAAAAFF00AAAAFF00, x5B53F53B5B53F53B, x94E97A9494E97A94, 0xA4)
272 LUT(xE96016B7E96016B7, x8888CCCC8888CCCC, xA050A050A050A050, xC1B87A2BC1B87A2B, 0x96)
273 LUT(xE3CF1FD5E3CF1FD5, x88AA88AA88AA88AA, x6A65956A6A65956A, xE96016B7E96016B7, 0x3E)
274 LUT(x6776675B6776675B, xADAFF8A5ADAFF8A5, x94E97A9494E97A94, xE3CF1FD5E3CF1FD5, 0x6B)
275 LUT(x2, a6, xE96016B7E96016B7, x6776675B6776675B, 0xC6)
283 void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
285 LUT(xEEEE3333EEEE3333, a1, a2, a5, 0x9D)
286 LUT(xBBBBBBBBBBBBBBBB, a1, a1, a2, 0x83)
287 LUT(xDDDDAAAADDDDAAAA, a1, a2, a5, 0x5B)
288 LUT(x29295A5A29295A5A, a3, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0x85)
289 LUT(xC729695AC729695A, a4, xEEEE3333EEEE3333, x29295A5A29295A5A, 0xA6)
290 LUT(x3BF77B7B3BF77B7B, a2, a5, xC729695AC729695A, 0xF9)
291 LUT(x2900FF002900FF00, a4, a5, x29295A5A29295A5A, 0x0E)
292 LUT(x56B3803F56B3803F, xBBBBBBBBBBBBBBBB, x3BF77B7B3BF77B7B, x2900FF002900FF00, 0x61)
293 LUT(x4, a6, xC729695AC729695A, x56B3803F56B3803F, 0x6C)
294 LUT(xFBFBFBFBFBFBFBFB, a1, a2, a3, 0xDF)
295 LUT(x3012B7B73012B7B7, a2, a5, xC729695AC729695A, 0xD4)
296 LUT(x34E9B34C34E9B34C, a4, xFBFBFBFBFBFBFBFB, x3012B7B73012B7B7, 0x69)
297 LUT(xBFEAEBBEBFEAEBBE, a1, x29295A5A29295A5A, x34E9B34C34E9B34C, 0x6F)
298 LUT(xFFAEAFFEFFAEAFFE, a3, xBBBBBBBBBBBBBBBB, xBFEAEBBEBFEAEBBE, 0xB9)
299 LUT(x2, a6, x34E9B34C34E9B34C, xFFAEAFFEFFAEAFFE, 0xC6)
300 LUT(xCFDE88BBCFDE88BB, a2, xDDDDAAAADDDDAAAA, x34E9B34C34E9B34C, 0x5C)
301 LUT(x3055574530555745, a1, xC729695AC729695A, xCFDE88BBCFDE88BB, 0x71)
302 LUT(x99DDEEEE99DDEEEE, a4, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0xB9)
303 LUT(x693CD926693CD926, x3BF77B7B3BF77B7B, x34E9B34C34E9B34C, x99DDEEEE99DDEEEE, 0x69)
304 LUT(x3, a6, x3055574530555745, x693CD926693CD926, 0x6A)
305 LUT(x9955EE559955EE55, a1, a4, x99DDEEEE99DDEEEE, 0xE2)
306 LUT(x9D48FA949D48FA94, x3BF77B7B3BF77B7B, xBFEAEBBEBFEAEBBE, x9955EE559955EE55, 0x9C)
307 LUT(x1, a6, xC729695AC729695A, x9D48FA949D48FA94, 0x39)
318 * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC
319 * architectures. These use AND, OR, XOR, NOT, and AND-NOT gates.
321 * Gate counts: 49 44 46 33 48 46 46 41
324 * Several same-gate-count expressions for each S-box are included (for use on
325 * different CPUs/GPUs).
327 * These Boolean expressions corresponding to DES S-boxes have been generated
328 * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's
329 * John the Ripper password cracker: http://www.openwall.com/john/
330 * Being mathematical formulas, they are not copyrighted and are free for reuse
333 * This file (a specific representation of the S-box expressions, surrounding
334 * logic) is Copyright (c) 2011 by Solar Designer <solar at openwall.com>.
335 * Redistribution and use in source and binary forms, with or without
336 * modification, are permitted. (This is a heavily cut-down "BSD license".)
338 * The effort has been sponsored by Rapid7: http://www.rapid7.com
341 void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
343 u32 x55005500, x5A0F5A0F, x3333FFFF, x66666666, x22226666, x2D2D6969,
345 u32 x00FFFF00, x33CCCC33, x4803120C, x2222FFFF, x6A21EDF3, x4A01CC93;
346 u32 x5555FFFF, x7F75FFFF, x00D20096, x7FA7FF69;
347 u32 x0A0A0000, x0AD80096, x00999900, x0AD99996;
348 u32 x22332233, x257AA5F0, x054885C0, xFAB77A3F, x2221EDF3, xD89697CC;
349 u32 x05B77AC0, x05F77AD6, x36C48529, x6391D07C, xBB0747B0;
350 u32 x4C460000, x4EDF9996, x2D4E49EA, xBBFFFFB0, x96B1B65A;
351 u32 x5AFF5AFF, x52B11215, x4201C010, x10B0D205;
352 u32 x00, x01, x10, x11, x20, x21, x30, x31;
354 x55005500 = a1 & ~a5;
355 x5A0F5A0F = a4 ^ x55005500;
358 x22226666 = x3333FFFF & x66666666;
359 x2D2D6969 = a4 ^ x22226666;
360 x25202160 = x2D2D6969 & ~x5A0F5A0F;
363 x33CCCC33 = a3 ^ x00FFFF00;
364 x4803120C = x5A0F5A0F & ~x33CCCC33;
365 x2222FFFF = a6 | x22226666;
366 x6A21EDF3 = x4803120C ^ x2222FFFF;
367 x4A01CC93 = x6A21EDF3 & ~x25202160;
370 x7F75FFFF = x6A21EDF3 | x5555FFFF;
371 x00D20096 = a5 & ~x2D2D6969;
372 x7FA7FF69 = x7F75FFFF ^ x00D20096;
374 x0A0A0000 = a4 & ~x5555FFFF;
375 x0AD80096 = x00D20096 ^ x0A0A0000;
376 x00999900 = x00FFFF00 & ~x66666666;
377 x0AD99996 = x0AD80096 | x00999900;
379 x22332233 = a3 & ~x55005500;
380 x257AA5F0 = x5A0F5A0F ^ x7F75FFFF;
381 x054885C0 = x257AA5F0 & ~x22332233;
382 xFAB77A3F = ~x054885C0;
383 x2221EDF3 = x3333FFFF & x6A21EDF3;
384 xD89697CC = xFAB77A3F ^ x2221EDF3;
385 x20 = x7FA7FF69 & ~a2;
386 x21 = x20 ^ xD89697CC;
389 x05B77AC0 = x00FFFF00 ^ x054885C0;
390 x05F77AD6 = x00D20096 | x05B77AC0;
391 x36C48529 = x3333FFFF ^ x05F77AD6;
392 x6391D07C = a1 ^ x36C48529;
393 xBB0747B0 = xD89697CC ^ x6391D07C;
394 x00 = x25202160 | a2;
395 x01 = x00 ^ xBB0747B0;
398 x4C460000 = x3333FFFF ^ x7F75FFFF;
399 x4EDF9996 = x0AD99996 | x4C460000;
400 x2D4E49EA = x6391D07C ^ x4EDF9996;
401 xBBFFFFB0 = x00FFFF00 | xBB0747B0;
402 x96B1B65A = x2D4E49EA ^ xBBFFFFB0;
403 x10 = x4A01CC93 | a2;
404 x11 = x10 ^ x96B1B65A;
407 x5AFF5AFF = a5 | x5A0F5A0F;
408 x52B11215 = x5AFF5AFF & ~x2D4E49EA;
409 x4201C010 = x4A01CC93 & x6391D07C;
410 x10B0D205 = x52B11215 ^ x4201C010;
411 x30 = x10B0D205 | a2;
412 x31 = x30 ^ x0AD99996;
416 void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
419 u32 x55550000, x00AA00FF, x33BB33FF;
420 u32 x33CC0000, x11441144, x11BB11BB, x003311BB;
421 u32 x00000F0F, x336600FF, x332200FF, x332200F0;
422 u32 x0302000F, xAAAAAAAA, xA9A8AAA5, x33CCCC33, x33CCC030, x9A646A95;
423 u32 x00333303, x118822B8, xA8208805, x3CC3C33C, x94E34B39;
424 u32 x0331330C, x3FF3F33C, xA9DF596A, xA9DF5F6F, x962CAC53;
425 u32 xA9466A6A, x3DA52153, x29850143, x33C0330C, x1A45324F;
426 u32 x0A451047, xBBDFDD7B, xB19ACD3C;
427 u32 x00, x01, x10, x11, x20, x21, x30, x31;
431 x55550000 = a1 & ~a6;
432 x00AA00FF = a5 & ~x55550000;
433 x33BB33FF = a2 | x00AA00FF;
435 x33CC0000 = x33CC33CC & ~a6;
436 x11441144 = a1 & x33CC33CC;
437 x11BB11BB = a5 ^ x11441144;
438 x003311BB = x11BB11BB & ~x33CC0000;
441 x336600FF = x00AA00FF ^ x33CC0000;
442 x332200FF = x33BB33FF & x336600FF;
443 x332200F0 = x332200FF & ~x00000F0F;
445 x0302000F = a3 & x332200FF;
447 xA9A8AAA5 = x0302000F ^ xAAAAAAAA;
448 x33CCCC33 = a6 ^ x33CC33CC;
449 x33CCC030 = x33CCCC33 & ~x00000F0F;
450 x9A646A95 = xA9A8AAA5 ^ x33CCC030;
451 x10 = a4 & ~x332200F0;
452 x11 = x10 ^ x9A646A95;
455 x00333303 = a2 & ~x33CCC030;
456 x118822B8 = x11BB11BB ^ x00333303;
457 xA8208805 = xA9A8AAA5 & ~x118822B8;
458 x3CC3C33C = a3 ^ x33CCCC33;
459 x94E34B39 = xA8208805 ^ x3CC3C33C;
460 x00 = x33BB33FF & ~a4;
461 x01 = x00 ^ x94E34B39;
464 x0331330C = x0302000F ^ x00333303;
465 x3FF3F33C = x3CC3C33C | x0331330C;
466 xA9DF596A = x33BB33FF ^ x9A646A95;
467 xA9DF5F6F = x00000F0F | xA9DF596A;
468 x962CAC53 = x3FF3F33C ^ xA9DF5F6F;
470 xA9466A6A = x332200FF ^ x9A646A95;
471 x3DA52153 = x94E34B39 ^ xA9466A6A;
472 x29850143 = xA9DF5F6F & x3DA52153;
473 x33C0330C = x33CC33CC & x3FF3F33C;
474 x1A45324F = x29850143 ^ x33C0330C;
475 x20 = x1A45324F | a4;
476 x21 = x20 ^ x962CAC53;
479 x0A451047 = x1A45324F & ~x118822B8;
480 xBBDFDD7B = x33CCCC33 | xA9DF596A;
481 xB19ACD3C = x0A451047 ^ xBBDFDD7B;
482 x30 = x003311BB | a4;
483 x31 = x30 ^ xB19ACD3C;
487 void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
489 u32 x44444444, x0F0FF0F0, x4F4FF4F4, x00FFFF00, x00AAAA00, x4FE55EF4;
490 u32 x3C3CC3C3, x3C3C0000, x7373F4F4, x0C840A00;
491 u32 x00005EF4, x00FF5EFF, x00555455, x3C699796;
492 u32 x000FF000, x55AA55AA, x26D9A15E, x2FDFAF5F, x2FD00F5F;
493 u32 x55AAFFAA, x28410014, x000000FF, x000000CC, x284100D8;
494 u32 x204100D0, x3C3CC3FF, x1C3CC32F, x4969967A;
495 u32 x4CC44CC4, x40C040C0, xC3C33C3C, x9669C396, xD6A98356;
496 u32 xD6E9C3D6, x4CEEEEC4, x9A072D12, x001A000B, x9A1F2D1B;
497 u32 x00, x01, x10, x11, x20, x21, x30, x31;
499 x44444444 = a1 & ~a2;
501 x4F4FF4F4 = x44444444 | x0F0FF0F0;
503 x00AAAA00 = x00FFFF00 & ~a1;
504 x4FE55EF4 = x4F4FF4F4 ^ x00AAAA00;
506 x3C3CC3C3 = a2 ^ x0F0FF0F0;
507 x3C3C0000 = x3C3CC3C3 & ~a6;
508 x7373F4F4 = x4F4FF4F4 ^ x3C3C0000;
509 x0C840A00 = x4FE55EF4 & ~x7373F4F4;
511 x00005EF4 = a6 & x4FE55EF4;
512 x00FF5EFF = a4 | x00005EF4;
513 x00555455 = a1 & x00FF5EFF;
514 x3C699796 = x3C3CC3C3 ^ x00555455;
515 x30 = x4FE55EF4 & ~a5;
516 x31 = x30 ^ x3C699796;
519 x000FF000 = x0F0FF0F0 & x00FFFF00;
521 x26D9A15E = x7373F4F4 ^ x55AA55AA;
522 x2FDFAF5F = a3 | x26D9A15E;
523 x2FD00F5F = x2FDFAF5F & ~x000FF000;
525 x55AAFFAA = x00AAAA00 | x55AA55AA;
526 x28410014 = x3C699796 & ~x55AAFFAA;
528 x000000CC = x000000FF & ~a2;
529 x284100D8 = x28410014 ^ x000000CC;
531 x204100D0 = x7373F4F4 & x284100D8;
532 x3C3CC3FF = x3C3CC3C3 | x000000FF;
533 x1C3CC32F = x3C3CC3FF & ~x204100D0;
534 x4969967A = a1 ^ x1C3CC32F;
535 x10 = x2FD00F5F & a5;
536 x11 = x10 ^ x4969967A;
539 x4CC44CC4 = x4FE55EF4 & ~a2;
540 x40C040C0 = x4CC44CC4 & ~a3;
541 xC3C33C3C = ~x3C3CC3C3;
542 x9669C396 = x55AAFFAA ^ xC3C33C3C;
543 xD6A98356 = x40C040C0 ^ x9669C396;
544 x00 = a5 & ~x0C840A00;
545 x01 = x00 ^ xD6A98356;
548 xD6E9C3D6 = x40C040C0 | x9669C396;
549 x4CEEEEC4 = x00AAAA00 | x4CC44CC4;
550 x9A072D12 = xD6E9C3D6 ^ x4CEEEEC4;
551 x001A000B = a4 & ~x4FE55EF4;
552 x9A1F2D1B = x9A072D12 | x001A000B;
553 x20 = a5 & ~x284100D8;
554 x21 = x20 ^ x9A1F2D1B;
558 void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
560 u32 x5A5A5A5A, x0F0FF0F0;
561 u32 x33FF33FF, x33FFCC00, x0C0030F0, x0C0CC0C0, x0CF3C03F, x5EFBDA7F,
562 x52FBCA0F, x61C8F93C;
563 u32 x00C0C03C, x0F0F30C0, x3B92A366, x30908326, x3C90B3D6;
564 u32 x33CC33CC, x0C0CFFFF, x379E5C99, x04124C11, x56E9861E, xA91679E1;
565 u32 x9586CA37, x8402C833, x84C2C83F, xB35C94A6;
566 u32 x00, x01, x10, x11, x20, x21, x30, x31;
571 x33FFCC00 = a5 ^ x33FF33FF;
572 x0C0030F0 = x0F0FF0F0 & ~x33FFCC00;
573 x0C0CC0C0 = x0F0FF0F0 & ~a2;
574 x0CF3C03F = a4 ^ x0C0CC0C0;
575 x5EFBDA7F = x5A5A5A5A | x0CF3C03F;
576 x52FBCA0F = x5EFBDA7F & ~x0C0030F0;
577 x61C8F93C = a2 ^ x52FBCA0F;
579 x00C0C03C = x0CF3C03F & x61C8F93C;
580 x0F0F30C0 = x0F0FF0F0 & ~x00C0C03C;
581 x3B92A366 = x5A5A5A5A ^ x61C8F93C;
582 x30908326 = x3B92A366 & ~x0F0F30C0;
583 x3C90B3D6 = x0C0030F0 ^ x30908326;
586 x0C0CFFFF = a5 | x0C0CC0C0;
587 x379E5C99 = x3B92A366 ^ x0C0CFFFF;
588 x04124C11 = x379E5C99 & ~x33CC33CC;
589 x56E9861E = x52FBCA0F ^ x04124C11;
590 x00 = a6 & ~x3C90B3D6;
591 x01 = x00 ^ x56E9861E;
594 xA91679E1 = ~x56E9861E;
595 x10 = x3C90B3D6 & ~a6;
596 x11 = x10 ^ xA91679E1;
599 x9586CA37 = x3C90B3D6 ^ xA91679E1;
600 x8402C833 = x9586CA37 & ~x33CC33CC;
601 x84C2C83F = x00C0C03C | x8402C833;
602 xB35C94A6 = x379E5C99 ^ x84C2C83F;
603 x20 = x61C8F93C | a6;
604 x21 = x20 ^ xB35C94A6;
607 x30 = a6 & x61C8F93C;
608 x31 = x30 ^ xB35C94A6;
612 void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
614 u32 x77777777, x77770000, x22225555, x11116666, x1F1F6F6F;
615 u32 x70700000, x43433333, x00430033, x55557777, x55167744, x5A19784B;
616 u32 x5A1987B4, x7A3BD7F5, x003B00F5, x221955A0, x05050707, x271C52A7;
617 u32 x2A2A82A0, x6969B193, x1FE06F90, x16804E00, xE97FB1FF;
618 u32 x43403302, x35CAED30, x37DEFFB7, x349ECCB5, x0B01234A;
619 u32 x101884B4, x0FF8EB24, x41413333, x4FF9FB37, x4FC2FBC2;
620 u32 x22222222, x16BCEE97, x0F080B04, x19B4E593;
621 u32 x5C5C5C5C, x4448184C, x2DDABE71, x6992A63D;
622 u32 x00, x01, x10, x11, x20, x21, x30, x31;
625 x77770000 = x77777777 & ~a6;
626 x22225555 = a1 ^ x77770000;
627 x11116666 = a3 ^ x22225555;
628 x1F1F6F6F = a4 | x11116666;
630 x70700000 = x77770000 & ~a4;
631 x43433333 = a3 ^ x70700000;
632 x00430033 = a5 & x43433333;
633 x55557777 = a1 | x11116666;
634 x55167744 = x00430033 ^ x55557777;
635 x5A19784B = a4 ^ x55167744;
637 x5A1987B4 = a6 ^ x5A19784B;
638 x7A3BD7F5 = x22225555 | x5A1987B4;
639 x003B00F5 = a5 & x7A3BD7F5;
640 x221955A0 = x22225555 ^ x003B00F5;
641 x05050707 = a4 & x55557777;
642 x271C52A7 = x221955A0 ^ x05050707;
644 x2A2A82A0 = x7A3BD7F5 & ~a1;
645 x6969B193 = x43433333 ^ x2A2A82A0;
646 x1FE06F90 = a5 ^ x1F1F6F6F;
647 x16804E00 = x1FE06F90 & ~x6969B193;
648 xE97FB1FF = ~x16804E00;
649 x20 = xE97FB1FF & ~a2;
650 x21 = x20 ^ x5A19784B;
653 x43403302 = x43433333 & ~x003B00F5;
654 x35CAED30 = x2A2A82A0 ^ x1FE06F90;
655 x37DEFFB7 = x271C52A7 | x35CAED30;
656 x349ECCB5 = x37DEFFB7 & ~x43403302;
657 x0B01234A = x1F1F6F6F & ~x349ECCB5;
659 x101884B4 = x5A1987B4 & x349ECCB5;
660 x0FF8EB24 = x1FE06F90 ^ x101884B4;
661 x41413333 = x43433333 & x55557777;
662 x4FF9FB37 = x0FF8EB24 | x41413333;
663 x4FC2FBC2 = x003B00F5 ^ x4FF9FB37;
664 x30 = x4FC2FBC2 & a2;
665 x31 = x30 ^ x271C52A7;
668 x22222222 = a1 ^ x77777777;
669 x16BCEE97 = x349ECCB5 ^ x22222222;
670 x0F080B04 = a4 & x0FF8EB24;
671 x19B4E593 = x16BCEE97 ^ x0F080B04;
672 x00 = x0B01234A | a2;
673 x01 = x00 ^ x19B4E593;
676 x5C5C5C5C = x1F1F6F6F ^ x43433333;
677 x4448184C = x5C5C5C5C & ~x19B4E593;
678 x2DDABE71 = x22225555 ^ x0FF8EB24;
679 x6992A63D = x4448184C ^ x2DDABE71;
680 x10 = x1F1F6F6F & a2;
681 x11 = x10 ^ x6992A63D;
685 void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
688 u32 x3333FFFF, x11115555, x22DD6699, x22DD9966, x00220099;
689 u32 x00551144, x33662277, x5A5A5A5A, x7B7E7A7F, x59A31CE6;
690 u32 x09030C06, x09030000, x336622FF, x3A6522FF;
691 u32 x484D494C, x0000B6B3, x0F0FB9BC, x00FC00F9, x0FFFB9FD;
692 u32 x5DF75DF7, x116600F7, x1E69B94B, x1668B94B;
693 u32 x7B7B7B7B, x411E5984, x1FFFFDFD, x5EE1A479;
694 u32 x3CB4DFD2, x004B002D, xB7B2B6B3, xCCC9CDC8, xCC82CDE5;
695 u32 x0055EEBB, x5A5AECE9, x0050ECA9, xC5CAC1CE, xC59A2D67;
696 u32 x00, x01, x10, x11, x20, x21, x30, x31;
701 x11115555 = a1 & x3333FFFF;
702 x22DD6699 = x33CC33CC ^ x11115555;
703 x22DD9966 = a6 ^ x22DD6699;
704 x00220099 = a5 & ~x22DD9966;
706 x00551144 = a1 & x22DD9966;
707 x33662277 = a2 ^ x00551144;
709 x7B7E7A7F = x33662277 | x5A5A5A5A;
710 x59A31CE6 = x22DD6699 ^ x7B7E7A7F;
712 x09030C06 = a3 & x59A31CE6;
713 x09030000 = x09030C06 & ~a6;
714 x336622FF = x00220099 | x33662277;
715 x3A6522FF = x09030000 ^ x336622FF;
716 x30 = x3A6522FF & a4;
717 x31 = x30 ^ x59A31CE6;
720 x484D494C = a2 ^ x7B7E7A7F;
721 x0000B6B3 = a6 & ~x484D494C;
722 x0F0FB9BC = a3 ^ x0000B6B3;
723 x00FC00F9 = a5 & ~x09030C06;
724 x0FFFB9FD = x0F0FB9BC | x00FC00F9;
726 x5DF75DF7 = a1 | x59A31CE6;
727 x116600F7 = x336622FF & x5DF75DF7;
728 x1E69B94B = x0F0FB9BC ^ x116600F7;
729 x1668B94B = x1E69B94B & ~x09030000;
730 x20 = x00220099 | a4;
731 x21 = x20 ^ x1668B94B;
734 x7B7B7B7B = a2 | x5A5A5A5A;
735 x411E5984 = x3A6522FF ^ x7B7B7B7B;
736 x1FFFFDFD = x11115555 | x0FFFB9FD;
737 x5EE1A479 = x411E5984 ^ x1FFFFDFD;
739 x3CB4DFD2 = x22DD6699 ^ x1E69B94B;
740 x004B002D = a5 & ~x3CB4DFD2;
741 xB7B2B6B3 = ~x484D494C;
742 xCCC9CDC8 = x7B7B7B7B ^ xB7B2B6B3;
743 xCC82CDE5 = x004B002D ^ xCCC9CDC8;
744 x10 = xCC82CDE5 & ~a4;
745 x11 = x10 ^ x5EE1A479;
748 x0055EEBB = a6 ^ x00551144;
749 x5A5AECE9 = a1 ^ x0F0FB9BC;
750 x0050ECA9 = x0055EEBB & x5A5AECE9;
751 xC5CAC1CE = x09030C06 ^ xCCC9CDC8;
752 xC59A2D67 = x0050ECA9 ^ xC5CAC1CE;
753 x00 = x0FFFB9FD & ~a4;
754 x01 = x00 ^ xC59A2D67;
758 void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
760 u32 x0FF00FF0, x3CC33CC3, x00003CC3, x0F000F00, x5A555A55, x00001841;
761 u32 x00000F00, x33333C33, x7B777E77, x0FF0F00F, x74878E78;
762 u32 x003C003C, x5A7D5A7D, x333300F0, x694E5A8D;
763 u32 x0FF0CCCC, x000F0303, x5A505854, x33CC000F, x699C585B;
764 u32 x7F878F78, x21101013, x7F979F7B, x30030CC0, x4F9493BB;
765 u32 x6F9CDBFB, x0000DBFB, x00005151, x26DAC936, x26DA9867;
766 u32 x27DA9877, x27DA438C, x2625C9C9, x27FFCBCD;
767 u32 x27FF1036, x27FF103E, xB06B6C44, x97947C7A;
768 u32 x00, x01, x10, x11, x20, x21, x30, x31;
771 x3CC33CC3 = a3 ^ x0FF00FF0;
772 x00003CC3 = a6 & x3CC33CC3;
773 x0F000F00 = a4 & x0FF00FF0;
774 x5A555A55 = a2 ^ x0F000F00;
775 x00001841 = x00003CC3 & x5A555A55;
777 x00000F00 = a6 & x0F000F00;
778 x33333C33 = a3 ^ x00000F00;
779 x7B777E77 = x5A555A55 | x33333C33;
780 x0FF0F00F = a6 ^ x0FF00FF0;
781 x74878E78 = x7B777E77 ^ x0FF0F00F;
782 x30 = a1 & ~x00001841;
783 x31 = x30 ^ x74878E78;
786 x003C003C = a5 & ~x3CC33CC3;
787 x5A7D5A7D = x5A555A55 | x003C003C;
788 x333300F0 = x00003CC3 ^ x33333C33;
789 x694E5A8D = x5A7D5A7D ^ x333300F0;
791 x0FF0CCCC = x00003CC3 ^ x0FF0F00F;
792 x000F0303 = a4 & ~x0FF0CCCC;
793 x5A505854 = x5A555A55 & ~x000F0303;
794 x33CC000F = a5 ^ x333300F0;
795 x699C585B = x5A505854 ^ x33CC000F;
797 x7F878F78 = x0F000F00 | x74878E78;
798 x21101013 = a3 & x699C585B;
799 x7F979F7B = x7F878F78 | x21101013;
800 x30030CC0 = x3CC33CC3 & ~x0FF0F00F;
801 x4F9493BB = x7F979F7B ^ x30030CC0;
802 x00 = x4F9493BB & ~a1;
803 x01 = x00 ^ x694E5A8D;
806 x6F9CDBFB = x699C585B | x4F9493BB;
807 x0000DBFB = a6 & x6F9CDBFB;
808 x00005151 = a2 & x0000DBFB;
809 x26DAC936 = x694E5A8D ^ x4F9493BB;
810 x26DA9867 = x00005151 ^ x26DAC936;
812 x27DA9877 = x21101013 | x26DA9867;
813 x27DA438C = x0000DBFB ^ x27DA9877;
814 x2625C9C9 = a5 ^ x26DAC936;
815 x27FFCBCD = x27DA438C | x2625C9C9;
816 x20 = x27FFCBCD & a1;
817 x21 = x20 ^ x699C585B;
820 x27FF1036 = x0000DBFB ^ x27FFCBCD;
821 x27FF103E = x003C003C | x27FF1036;
822 xB06B6C44 = ~x4F9493BB;
823 x97947C7A = x27FF103E ^ xB06B6C44;
824 x10 = x97947C7A & ~a1;
825 x11 = x10 ^ x26DA9867;
829 void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
831 u32 x0C0C0C0C, x0000F0F0, x00FFF00F, x00555005, x00515001;
832 u32 x33000330, x77555775, x30303030, x3030CFCF, x30104745, x30555745;
833 u32 xFF000FF0, xCF1048B5, x080A080A, xC71A40BF, xCB164CB3;
834 u32 x9E4319E6, x000019E6, xF429738C, xF4296A6A, xC729695A;
835 u32 xC47C3D2F, xF77F3F3F, x9E43E619, x693CD926;
836 u32 xF719A695, xF4FF73FF, x03E6D56A, x56B3803F;
837 u32 xF700A600, x61008000, x03B7856B, x62B7056B;
838 u32 x00, x01, x10, x11, x20, x21, x30, x31;
840 x0C0C0C0C = a3 & ~a2;
841 x0000F0F0 = a5 & ~a3;
842 x00FFF00F = a4 ^ x0000F0F0;
843 x00555005 = a1 & x00FFF00F;
844 x00515001 = x00555005 & ~x0C0C0C0C;
846 x33000330 = a2 & ~x00FFF00F;
847 x77555775 = a1 | x33000330;
848 x30303030 = a2 & ~a3;
849 x3030CFCF = a5 ^ x30303030;
850 x30104745 = x77555775 & x3030CFCF;
851 x30555745 = x00555005 | x30104745;
853 xFF000FF0 = ~x00FFF00F;
854 xCF1048B5 = x30104745 ^ xFF000FF0;
855 x080A080A = a3 & ~x77555775;
856 xC71A40BF = xCF1048B5 ^ x080A080A;
857 xCB164CB3 = x0C0C0C0C ^ xC71A40BF;
858 x10 = x00515001 | a6;
859 x11 = x10 ^ xCB164CB3;
862 x9E4319E6 = a1 ^ xCB164CB3;
863 x000019E6 = a5 & x9E4319E6;
864 xF429738C = a2 ^ xC71A40BF;
865 xF4296A6A = x000019E6 ^ xF429738C;
866 xC729695A = x33000330 ^ xF4296A6A;
868 xC47C3D2F = x30555745 ^ xF4296A6A;
869 xF77F3F3F = a2 | xC47C3D2F;
870 x9E43E619 = a5 ^ x9E4319E6;
871 x693CD926 = xF77F3F3F ^ x9E43E619;
872 x20 = x30555745 & a6;
873 x21 = x20 ^ x693CD926;
876 xF719A695 = x3030CFCF ^ xC729695A;
877 xF4FF73FF = a4 | xF429738C;
878 x03E6D56A = xF719A695 ^ xF4FF73FF;
879 x56B3803F = a1 ^ x03E6D56A;
880 x30 = x56B3803F & a6;
881 x31 = x30 ^ xC729695A;
884 xF700A600 = xF719A695 & ~a4;
885 x61008000 = x693CD926 & xF700A600;
886 x03B7856B = x00515001 ^ x03E6D56A;
887 x62B7056B = x61008000 ^ x03B7856B;
888 x00 = x62B7056B | a6;
889 x01 = x00 ^ xC729695A;
896 #if defined IS_AMD || defined IS_GENERIC
899 * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC
900 * architectures. These use AND, OR, XOR, NOT, and AND-NOT gates.
902 * Gate counts: 49 44 46 33 48 46 46 41
905 * Several same-gate-count expressions for each S-box are included (for use on
906 * different CPUs/GPUs).
908 * These Boolean expressions corresponding to DES S-boxes have been generated
909 * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's
910 * John the Ripper password cracker: http://www.openwall.com/john/
911 * Being mathematical formulas, they are not copyrighted and are free for reuse
914 * This file (a specific representation of the S-box expressions, surrounding
915 * logic) is Copyright (c) 2011 by Solar Designer <solar at openwall.com>.
916 * Redistribution and use in source and binary forms, with or without
917 * modification, are permitted. (This is a heavily cut-down "BSD license".)
919 * The effort has been sponsored by Rapid7: http://www.rapid7.com
922 void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
924 u32 x55005500, x5A0F5A0F, x3333FFFF, x66666666, x22226666, x2D2D6969,
926 u32 x00FFFF00, x33CCCC33, x4803120C, x2222FFFF, x6A21EDF3, x4A01CC93;
927 u32 x5555FFFF, x7F75FFFF, x00D20096, x7FA7FF69;
928 u32 x0A0A0000, x0AD80096, x00999900, x0AD99996;
929 u32 x22332233, x257AA5F0, x054885C0, xFAB77A3F, x2221EDF3, xD89697CC;
930 u32 x05B77AC0, x05F77AD6, x36C48529, x6391D07C, xBB0747B0;
931 u32 x4C460000, x4EDF9996, x2D4E49EA, xBBFFFFB0, x96B1B65A;
932 u32 x5AFF5AFF, x52B11215, x4201C010, x10B0D205;
933 u32 x00, x01, x10, x11, x20, x21, x30, x31;
935 x55005500 = a1 & ~a5;
936 x5A0F5A0F = a4 ^ x55005500;
939 x22226666 = x3333FFFF & x66666666;
940 x2D2D6969 = a4 ^ x22226666;
941 x25202160 = x2D2D6969 & ~x5A0F5A0F;
944 x33CCCC33 = a3 ^ x00FFFF00;
945 x4803120C = x5A0F5A0F & ~x33CCCC33;
946 x2222FFFF = a6 | x22226666;
947 x6A21EDF3 = x4803120C ^ x2222FFFF;
948 x4A01CC93 = x6A21EDF3 & ~x25202160;
951 x7F75FFFF = x6A21EDF3 | x5555FFFF;
952 x00D20096 = a5 & ~x2D2D6969;
953 x7FA7FF69 = x7F75FFFF ^ x00D20096;
955 x0A0A0000 = a4 & ~x5555FFFF;
956 x0AD80096 = x00D20096 ^ x0A0A0000;
957 x00999900 = x00FFFF00 & ~x66666666;
958 x0AD99996 = x0AD80096 | x00999900;
960 x22332233 = a3 & ~x55005500;
961 x257AA5F0 = x5A0F5A0F ^ x7F75FFFF;
962 x054885C0 = x257AA5F0 & ~x22332233;
963 xFAB77A3F = ~x054885C0;
964 x2221EDF3 = x3333FFFF & x6A21EDF3;
965 xD89697CC = xFAB77A3F ^ x2221EDF3;
966 x20 = x7FA7FF69 & ~a2;
967 x21 = x20 ^ xD89697CC;
970 x05B77AC0 = x00FFFF00 ^ x054885C0;
971 x05F77AD6 = x00D20096 | x05B77AC0;
972 x36C48529 = x3333FFFF ^ x05F77AD6;
973 x6391D07C = a1 ^ x36C48529;
974 xBB0747B0 = xD89697CC ^ x6391D07C;
975 x00 = x25202160 | a2;
976 x01 = x00 ^ xBB0747B0;
979 x4C460000 = x3333FFFF ^ x7F75FFFF;
980 x4EDF9996 = x0AD99996 | x4C460000;
981 x2D4E49EA = x6391D07C ^ x4EDF9996;
982 xBBFFFFB0 = x00FFFF00 | xBB0747B0;
983 x96B1B65A = x2D4E49EA ^ xBBFFFFB0;
984 x10 = x4A01CC93 | a2;
985 x11 = x10 ^ x96B1B65A;
988 x5AFF5AFF = a5 | x5A0F5A0F;
989 x52B11215 = x5AFF5AFF & ~x2D4E49EA;
990 x4201C010 = x4A01CC93 & x6391D07C;
991 x10B0D205 = x52B11215 ^ x4201C010;
992 x30 = x10B0D205 | a2;
993 x31 = x30 ^ x0AD99996;
997 void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
1000 u32 x55550000, x00AA00FF, x33BB33FF;
1001 u32 x33CC0000, x11441144, x11BB11BB, x003311BB;
1002 u32 x00000F0F, x336600FF, x332200FF, x332200F0;
1003 u32 x0302000F, xAAAAAAAA, xA9A8AAA5, x33CCCC33, x33CCC030, x9A646A95;
1004 u32 x00333303, x118822B8, xA8208805, x3CC3C33C, x94E34B39;
1005 u32 x0331330C, x3FF3F33C, xA9DF596A, xA9DF5F6F, x962CAC53;
1006 u32 xA9466A6A, x3DA52153, x29850143, x33C0330C, x1A45324F;
1007 u32 x0A451047, xBBDFDD7B, xB19ACD3C;
1008 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1010 x33CC33CC = a2 ^ a5;
1012 x55550000 = a1 & ~a6;
1013 x00AA00FF = a5 & ~x55550000;
1014 x33BB33FF = a2 | x00AA00FF;
1016 x33CC0000 = x33CC33CC & ~a6;
1017 x11441144 = a1 & x33CC33CC;
1018 x11BB11BB = a5 ^ x11441144;
1019 x003311BB = x11BB11BB & ~x33CC0000;
1021 x00000F0F = a3 & a6;
1022 x336600FF = x00AA00FF ^ x33CC0000;
1023 x332200FF = x33BB33FF & x336600FF;
1024 x332200F0 = x332200FF & ~x00000F0F;
1026 x0302000F = a3 & x332200FF;
1028 xA9A8AAA5 = x0302000F ^ xAAAAAAAA;
1029 x33CCCC33 = a6 ^ x33CC33CC;
1030 x33CCC030 = x33CCCC33 & ~x00000F0F;
1031 x9A646A95 = xA9A8AAA5 ^ x33CCC030;
1032 x10 = a4 & ~x332200F0;
1033 x11 = x10 ^ x9A646A95;
1036 x00333303 = a2 & ~x33CCC030;
1037 x118822B8 = x11BB11BB ^ x00333303;
1038 xA8208805 = xA9A8AAA5 & ~x118822B8;
1039 x3CC3C33C = a3 ^ x33CCCC33;
1040 x94E34B39 = xA8208805 ^ x3CC3C33C;
1041 x00 = x33BB33FF & ~a4;
1042 x01 = x00 ^ x94E34B39;
1045 x0331330C = x0302000F ^ x00333303;
1046 x3FF3F33C = x3CC3C33C | x0331330C;
1047 xA9DF596A = x33BB33FF ^ x9A646A95;
1048 xA9DF5F6F = x00000F0F | xA9DF596A;
1049 x962CAC53 = x3FF3F33C ^ xA9DF5F6F;
1051 xA9466A6A = x332200FF ^ x9A646A95;
1052 x3DA52153 = x94E34B39 ^ xA9466A6A;
1053 x29850143 = xA9DF5F6F & x3DA52153;
1054 x33C0330C = x33CC33CC & x3FF3F33C;
1055 x1A45324F = x29850143 ^ x33C0330C;
1056 x20 = x1A45324F | a4;
1057 x21 = x20 ^ x962CAC53;
1060 x0A451047 = x1A45324F & ~x118822B8;
1061 xBBDFDD7B = x33CCCC33 | xA9DF596A;
1062 xB19ACD3C = x0A451047 ^ xBBDFDD7B;
1063 x30 = x003311BB | a4;
1064 x31 = x30 ^ xB19ACD3C;
1068 void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
1070 u32 x44444444, x0F0FF0F0, x4F4FF4F4, x00FFFF00, x00AAAA00, x4FE55EF4;
1071 u32 x3C3CC3C3, x3C3C0000, x7373F4F4, x0C840A00;
1072 u32 x00005EF4, x00FF5EFF, x00555455, x3C699796;
1073 u32 x000FF000, x55AA55AA, x26D9A15E, x2FDFAF5F, x2FD00F5F;
1074 u32 x55AAFFAA, x28410014, x000000FF, x000000CC, x284100D8;
1075 u32 x204100D0, x3C3CC3FF, x1C3CC32F, x4969967A;
1076 u32 x4CC44CC4, x40C040C0, xC3C33C3C, x9669C396, xD6A98356;
1077 u32 xD6E9C3D6, x4CEEEEC4, x9A072D12, x001A000B, x9A1F2D1B;
1078 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1080 x44444444 = a1 & ~a2;
1081 x0F0FF0F0 = a3 ^ a6;
1082 x4F4FF4F4 = x44444444 | x0F0FF0F0;
1083 x00FFFF00 = a4 ^ a6;
1084 x00AAAA00 = x00FFFF00 & ~a1;
1085 x4FE55EF4 = x4F4FF4F4 ^ x00AAAA00;
1087 x3C3CC3C3 = a2 ^ x0F0FF0F0;
1088 x3C3C0000 = x3C3CC3C3 & ~a6;
1089 x7373F4F4 = x4F4FF4F4 ^ x3C3C0000;
1090 x0C840A00 = x4FE55EF4 & ~x7373F4F4;
1092 x00005EF4 = a6 & x4FE55EF4;
1093 x00FF5EFF = a4 | x00005EF4;
1094 x00555455 = a1 & x00FF5EFF;
1095 x3C699796 = x3C3CC3C3 ^ x00555455;
1096 x30 = x4FE55EF4 & ~a5;
1097 x31 = x30 ^ x3C699796;
1100 x000FF000 = x0F0FF0F0 & x00FFFF00;
1101 x55AA55AA = a1 ^ a4;
1102 x26D9A15E = x7373F4F4 ^ x55AA55AA;
1103 x2FDFAF5F = a3 | x26D9A15E;
1104 x2FD00F5F = x2FDFAF5F & ~x000FF000;
1106 x55AAFFAA = x00AAAA00 | x55AA55AA;
1107 x28410014 = x3C699796 & ~x55AAFFAA;
1108 x000000FF = a4 & a6;
1109 x000000CC = x000000FF & ~a2;
1110 x284100D8 = x28410014 ^ x000000CC;
1112 x204100D0 = x7373F4F4 & x284100D8;
1113 x3C3CC3FF = x3C3CC3C3 | x000000FF;
1114 x1C3CC32F = x3C3CC3FF & ~x204100D0;
1115 x4969967A = a1 ^ x1C3CC32F;
1116 x10 = x2FD00F5F & a5;
1117 x11 = x10 ^ x4969967A;
1120 x4CC44CC4 = x4FE55EF4 & ~a2;
1121 x40C040C0 = x4CC44CC4 & ~a3;
1122 xC3C33C3C = ~x3C3CC3C3;
1123 x9669C396 = x55AAFFAA ^ xC3C33C3C;
1124 xD6A98356 = x40C040C0 ^ x9669C396;
1125 x00 = a5 & ~x0C840A00;
1126 x01 = x00 ^ xD6A98356;
1129 xD6E9C3D6 = x40C040C0 | x9669C396;
1130 x4CEEEEC4 = x00AAAA00 | x4CC44CC4;
1131 x9A072D12 = xD6E9C3D6 ^ x4CEEEEC4;
1132 x001A000B = a4 & ~x4FE55EF4;
1133 x9A1F2D1B = x9A072D12 | x001A000B;
1134 x20 = a5 & ~x284100D8;
1135 x21 = x20 ^ x9A1F2D1B;
1139 void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
1141 u32 x5A5A5A5A, x0F0FF0F0;
1142 u32 x33FF33FF, x33FFCC00, x0C0030F0, x0C0CC0C0, x0CF3C03F, x5EFBDA7F,
1143 x52FBCA0F, x61C8F93C;
1144 u32 x00C0C03C, x0F0F30C0, x3B92A366, x30908326, x3C90B3D6;
1145 u32 x33CC33CC, x0C0CFFFF, x379E5C99, x04124C11, x56E9861E, xA91679E1;
1146 u32 x9586CA37, x8402C833, x84C2C83F, xB35C94A6;
1147 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1149 x5A5A5A5A = a1 ^ a3;
1150 x0F0FF0F0 = a3 ^ a5;
1151 x33FF33FF = a2 | a4;
1152 x33FFCC00 = a5 ^ x33FF33FF;
1153 x0C0030F0 = x0F0FF0F0 & ~x33FFCC00;
1154 x0C0CC0C0 = x0F0FF0F0 & ~a2;
1155 x0CF3C03F = a4 ^ x0C0CC0C0;
1156 x5EFBDA7F = x5A5A5A5A | x0CF3C03F;
1157 x52FBCA0F = x5EFBDA7F & ~x0C0030F0;
1158 x61C8F93C = a2 ^ x52FBCA0F;
1160 x00C0C03C = x0CF3C03F & x61C8F93C;
1161 x0F0F30C0 = x0F0FF0F0 & ~x00C0C03C;
1162 x3B92A366 = x5A5A5A5A ^ x61C8F93C;
1163 x30908326 = x3B92A366 & ~x0F0F30C0;
1164 x3C90B3D6 = x0C0030F0 ^ x30908326;
1166 x33CC33CC = a2 ^ a4;
1167 x0C0CFFFF = a5 | x0C0CC0C0;
1168 x379E5C99 = x3B92A366 ^ x0C0CFFFF;
1169 x04124C11 = x379E5C99 & ~x33CC33CC;
1170 x56E9861E = x52FBCA0F ^ x04124C11;
1171 x00 = a6 & ~x3C90B3D6;
1172 x01 = x00 ^ x56E9861E;
1175 xA91679E1 = ~x56E9861E;
1176 x10 = x3C90B3D6 & ~a6;
1177 x11 = x10 ^ xA91679E1;
1180 x9586CA37 = x3C90B3D6 ^ xA91679E1;
1181 x8402C833 = x9586CA37 & ~x33CC33CC;
1182 x84C2C83F = x00C0C03C | x8402C833;
1183 xB35C94A6 = x379E5C99 ^ x84C2C83F;
1184 x20 = x61C8F93C | a6;
1185 x21 = x20 ^ xB35C94A6;
1188 x30 = a6 & x61C8F93C;
1189 x31 = x30 ^ xB35C94A6;
1193 void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
1195 u32 x77777777, x77770000, x22225555, x11116666, x1F1F6F6F;
1196 u32 x70700000, x43433333, x00430033, x55557777, x55167744, x5A19784B;
1197 u32 x5A1987B4, x7A3BD7F5, x003B00F5, x221955A0, x05050707, x271C52A7;
1198 u32 x2A2A82A0, x6969B193, x1FE06F90, x16804E00, xE97FB1FF;
1199 u32 x43403302, x35CAED30, x37DEFFB7, x349ECCB5, x0B01234A;
1200 u32 x101884B4, x0FF8EB24, x41413333, x4FF9FB37, x4FC2FBC2;
1201 u32 x22222222, x16BCEE97, x0F080B04, x19B4E593;
1202 u32 x5C5C5C5C, x4448184C, x2DDABE71, x6992A63D;
1203 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1205 x77777777 = a1 | a3;
1206 x77770000 = x77777777 & ~a6;
1207 x22225555 = a1 ^ x77770000;
1208 x11116666 = a3 ^ x22225555;
1209 x1F1F6F6F = a4 | x11116666;
1211 x70700000 = x77770000 & ~a4;
1212 x43433333 = a3 ^ x70700000;
1213 x00430033 = a5 & x43433333;
1214 x55557777 = a1 | x11116666;
1215 x55167744 = x00430033 ^ x55557777;
1216 x5A19784B = a4 ^ x55167744;
1218 x5A1987B4 = a6 ^ x5A19784B;
1219 x7A3BD7F5 = x22225555 | x5A1987B4;
1220 x003B00F5 = a5 & x7A3BD7F5;
1221 x221955A0 = x22225555 ^ x003B00F5;
1222 x05050707 = a4 & x55557777;
1223 x271C52A7 = x221955A0 ^ x05050707;
1225 x2A2A82A0 = x7A3BD7F5 & ~a1;
1226 x6969B193 = x43433333 ^ x2A2A82A0;
1227 x1FE06F90 = a5 ^ x1F1F6F6F;
1228 x16804E00 = x1FE06F90 & ~x6969B193;
1229 xE97FB1FF = ~x16804E00;
1230 x20 = xE97FB1FF & ~a2;
1231 x21 = x20 ^ x5A19784B;
1234 x43403302 = x43433333 & ~x003B00F5;
1235 x35CAED30 = x2A2A82A0 ^ x1FE06F90;
1236 x37DEFFB7 = x271C52A7 | x35CAED30;
1237 x349ECCB5 = x37DEFFB7 & ~x43403302;
1238 x0B01234A = x1F1F6F6F & ~x349ECCB5;
1240 x101884B4 = x5A1987B4 & x349ECCB5;
1241 x0FF8EB24 = x1FE06F90 ^ x101884B4;
1242 x41413333 = x43433333 & x55557777;
1243 x4FF9FB37 = x0FF8EB24 | x41413333;
1244 x4FC2FBC2 = x003B00F5 ^ x4FF9FB37;
1245 x30 = x4FC2FBC2 & a2;
1246 x31 = x30 ^ x271C52A7;
1249 x22222222 = a1 ^ x77777777;
1250 x16BCEE97 = x349ECCB5 ^ x22222222;
1251 x0F080B04 = a4 & x0FF8EB24;
1252 x19B4E593 = x16BCEE97 ^ x0F080B04;
1253 x00 = x0B01234A | a2;
1254 x01 = x00 ^ x19B4E593;
1257 x5C5C5C5C = x1F1F6F6F ^ x43433333;
1258 x4448184C = x5C5C5C5C & ~x19B4E593;
1259 x2DDABE71 = x22225555 ^ x0FF8EB24;
1260 x6992A63D = x4448184C ^ x2DDABE71;
1261 x10 = x1F1F6F6F & a2;
1262 x11 = x10 ^ x6992A63D;
1266 void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
1269 u32 x3333FFFF, x11115555, x22DD6699, x22DD9966, x00220099;
1270 u32 x00551144, x33662277, x5A5A5A5A, x7B7E7A7F, x59A31CE6;
1271 u32 x09030C06, x09030000, x336622FF, x3A6522FF;
1272 u32 x484D494C, x0000B6B3, x0F0FB9BC, x00FC00F9, x0FFFB9FD;
1273 u32 x5DF75DF7, x116600F7, x1E69B94B, x1668B94B;
1274 u32 x7B7B7B7B, x411E5984, x1FFFFDFD, x5EE1A479;
1275 u32 x3CB4DFD2, x004B002D, xB7B2B6B3, xCCC9CDC8, xCC82CDE5;
1276 u32 x0055EEBB, x5A5AECE9, x0050ECA9, xC5CAC1CE, xC59A2D67;
1277 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1279 x33CC33CC = a2 ^ a5;
1281 x3333FFFF = a2 | a6;
1282 x11115555 = a1 & x3333FFFF;
1283 x22DD6699 = x33CC33CC ^ x11115555;
1284 x22DD9966 = a6 ^ x22DD6699;
1285 x00220099 = a5 & ~x22DD9966;
1287 x00551144 = a1 & x22DD9966;
1288 x33662277 = a2 ^ x00551144;
1289 x5A5A5A5A = a1 ^ a3;
1290 x7B7E7A7F = x33662277 | x5A5A5A5A;
1291 x59A31CE6 = x22DD6699 ^ x7B7E7A7F;
1293 x09030C06 = a3 & x59A31CE6;
1294 x09030000 = x09030C06 & ~a6;
1295 x336622FF = x00220099 | x33662277;
1296 x3A6522FF = x09030000 ^ x336622FF;
1297 x30 = x3A6522FF & a4;
1298 x31 = x30 ^ x59A31CE6;
1301 x484D494C = a2 ^ x7B7E7A7F;
1302 x0000B6B3 = a6 & ~x484D494C;
1303 x0F0FB9BC = a3 ^ x0000B6B3;
1304 x00FC00F9 = a5 & ~x09030C06;
1305 x0FFFB9FD = x0F0FB9BC | x00FC00F9;
1307 x5DF75DF7 = a1 | x59A31CE6;
1308 x116600F7 = x336622FF & x5DF75DF7;
1309 x1E69B94B = x0F0FB9BC ^ x116600F7;
1310 x1668B94B = x1E69B94B & ~x09030000;
1311 x20 = x00220099 | a4;
1312 x21 = x20 ^ x1668B94B;
1315 x7B7B7B7B = a2 | x5A5A5A5A;
1316 x411E5984 = x3A6522FF ^ x7B7B7B7B;
1317 x1FFFFDFD = x11115555 | x0FFFB9FD;
1318 x5EE1A479 = x411E5984 ^ x1FFFFDFD;
1320 x3CB4DFD2 = x22DD6699 ^ x1E69B94B;
1321 x004B002D = a5 & ~x3CB4DFD2;
1322 xB7B2B6B3 = ~x484D494C;
1323 xCCC9CDC8 = x7B7B7B7B ^ xB7B2B6B3;
1324 xCC82CDE5 = x004B002D ^ xCCC9CDC8;
1325 x10 = xCC82CDE5 & ~a4;
1326 x11 = x10 ^ x5EE1A479;
1329 x0055EEBB = a6 ^ x00551144;
1330 x5A5AECE9 = a1 ^ x0F0FB9BC;
1331 x0050ECA9 = x0055EEBB & x5A5AECE9;
1332 xC5CAC1CE = x09030C06 ^ xCCC9CDC8;
1333 xC59A2D67 = x0050ECA9 ^ xC5CAC1CE;
1334 x00 = x0FFFB9FD & ~a4;
1335 x01 = x00 ^ xC59A2D67;
1339 void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
1341 u32 x0FF00FF0, x3CC33CC3, x00003CC3, x0F000F00, x5A555A55, x00001841;
1342 u32 x00000F00, x33333C33, x7B777E77, x0FF0F00F, x74878E78;
1343 u32 x003C003C, x5A7D5A7D, x333300F0, x694E5A8D;
1344 u32 x0FF0CCCC, x000F0303, x5A505854, x33CC000F, x699C585B;
1345 u32 x7F878F78, x21101013, x7F979F7B, x30030CC0, x4F9493BB;
1346 u32 x6F9CDBFB, x0000DBFB, x00005151, x26DAC936, x26DA9867;
1347 u32 x27DA9877, x27DA438C, x2625C9C9, x27FFCBCD;
1348 u32 x27FF1036, x27FF103E, xB06B6C44, x97947C7A;
1349 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1351 x0FF00FF0 = a4 ^ a5;
1352 x3CC33CC3 = a3 ^ x0FF00FF0;
1353 x00003CC3 = a6 & x3CC33CC3;
1354 x0F000F00 = a4 & x0FF00FF0;
1355 x5A555A55 = a2 ^ x0F000F00;
1356 x00001841 = x00003CC3 & x5A555A55;
1358 x00000F00 = a6 & x0F000F00;
1359 x33333C33 = a3 ^ x00000F00;
1360 x7B777E77 = x5A555A55 | x33333C33;
1361 x0FF0F00F = a6 ^ x0FF00FF0;
1362 x74878E78 = x7B777E77 ^ x0FF0F00F;
1363 x30 = a1 & ~x00001841;
1364 x31 = x30 ^ x74878E78;
1367 x003C003C = a5 & ~x3CC33CC3;
1368 x5A7D5A7D = x5A555A55 | x003C003C;
1369 x333300F0 = x00003CC3 ^ x33333C33;
1370 x694E5A8D = x5A7D5A7D ^ x333300F0;
1372 x0FF0CCCC = x00003CC3 ^ x0FF0F00F;
1373 x000F0303 = a4 & ~x0FF0CCCC;
1374 x5A505854 = x5A555A55 & ~x000F0303;
1375 x33CC000F = a5 ^ x333300F0;
1376 x699C585B = x5A505854 ^ x33CC000F;
1378 x7F878F78 = x0F000F00 | x74878E78;
1379 x21101013 = a3 & x699C585B;
1380 x7F979F7B = x7F878F78 | x21101013;
1381 x30030CC0 = x3CC33CC3 & ~x0FF0F00F;
1382 x4F9493BB = x7F979F7B ^ x30030CC0;
1383 x00 = x4F9493BB & ~a1;
1384 x01 = x00 ^ x694E5A8D;
1387 x6F9CDBFB = x699C585B | x4F9493BB;
1388 x0000DBFB = a6 & x6F9CDBFB;
1389 x00005151 = a2 & x0000DBFB;
1390 x26DAC936 = x694E5A8D ^ x4F9493BB;
1391 x26DA9867 = x00005151 ^ x26DAC936;
1393 x27DA9877 = x21101013 | x26DA9867;
1394 x27DA438C = x0000DBFB ^ x27DA9877;
1395 x2625C9C9 = a5 ^ x26DAC936;
1396 x27FFCBCD = x27DA438C | x2625C9C9;
1397 x20 = x27FFCBCD & a1;
1398 x21 = x20 ^ x699C585B;
1401 x27FF1036 = x0000DBFB ^ x27FFCBCD;
1402 x27FF103E = x003C003C | x27FF1036;
1403 xB06B6C44 = ~x4F9493BB;
1404 x97947C7A = x27FF103E ^ xB06B6C44;
1405 x10 = x97947C7A & ~a1;
1406 x11 = x10 ^ x26DA9867;
1410 void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
1412 u32 x0C0C0C0C, x0000F0F0, x00FFF00F, x00555005, x00515001;
1413 u32 x33000330, x77555775, x30303030, x3030CFCF, x30104745, x30555745;
1414 u32 xFF000FF0, xCF1048B5, x080A080A, xC71A40BF, xCB164CB3;
1415 u32 x9E4319E6, x000019E6, xF429738C, xF4296A6A, xC729695A;
1416 u32 xC47C3D2F, xF77F3F3F, x9E43E619, x693CD926;
1417 u32 xF719A695, xF4FF73FF, x03E6D56A, x56B3803F;
1418 u32 xF700A600, x61008000, x03B7856B, x62B7056B;
1419 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1421 x0C0C0C0C = a3 & ~a2;
1422 x0000F0F0 = a5 & ~a3;
1423 x00FFF00F = a4 ^ x0000F0F0;
1424 x00555005 = a1 & x00FFF00F;
1425 x00515001 = x00555005 & ~x0C0C0C0C;
1427 x33000330 = a2 & ~x00FFF00F;
1428 x77555775 = a1 | x33000330;
1429 x30303030 = a2 & ~a3;
1430 x3030CFCF = a5 ^ x30303030;
1431 x30104745 = x77555775 & x3030CFCF;
1432 x30555745 = x00555005 | x30104745;
1434 xFF000FF0 = ~x00FFF00F;
1435 xCF1048B5 = x30104745 ^ xFF000FF0;
1436 x080A080A = a3 & ~x77555775;
1437 xC71A40BF = xCF1048B5 ^ x080A080A;
1438 xCB164CB3 = x0C0C0C0C ^ xC71A40BF;
1439 x10 = x00515001 | a6;
1440 x11 = x10 ^ xCB164CB3;
1443 x9E4319E6 = a1 ^ xCB164CB3;
1444 x000019E6 = a5 & x9E4319E6;
1445 xF429738C = a2 ^ xC71A40BF;
1446 xF4296A6A = x000019E6 ^ xF429738C;
1447 xC729695A = x33000330 ^ xF4296A6A;
1449 xC47C3D2F = x30555745 ^ xF4296A6A;
1450 xF77F3F3F = a2 | xC47C3D2F;
1451 x9E43E619 = a5 ^ x9E4319E6;
1452 x693CD926 = xF77F3F3F ^ x9E43E619;
1453 x20 = x30555745 & a6;
1454 x21 = x20 ^ x693CD926;
1457 xF719A695 = x3030CFCF ^ xC729695A;
1458 xF4FF73FF = a4 | xF429738C;
1459 x03E6D56A = xF719A695 ^ xF4FF73FF;
1460 x56B3803F = a1 ^ x03E6D56A;
1461 x30 = x56B3803F & a6;
1462 x31 = x30 ^ xC729695A;
1465 xF700A600 = xF719A695 & ~a4;
1466 x61008000 = x693CD926 & xF700A600;
1467 x03B7856B = x00515001 ^ x03E6D56A;
1468 x62B7056B = x61008000 ^ x03B7856B;
1469 x00 = x62B7056B | a6;
1470 x01 = x00 ^ xC729695A;
1476 #define SWAP(a, b) { u32 tmp=*a;*a=*b;*b=tmp; }
1512 #define KEYSET00 { k00 = K08; k01 = K44; k02 = K29; k03 = K52; k04 = K42; k05 = K14; k06 = K28; k07 = K49; k08 = K01; k09 = K07; k10 = K16; k11 = K36; k12 = K02; k13 = K30; k14 = K22; k15 = K21; k16 = K38; k17 = K50; k18 = K51; k19 = K00; k20 = K31; k21 = K23; k22 = K15; k23 = K35; k24 = K19; k25 = K24; k26 = K34; k27 = K47; k28 = K32; k29 = K03; k30 = K41; k31 = K26; k32 = K04; k33 = K46; k34 = K20; k35 = K25; k36 = K53; k37 = K18; k38 = K33; k39 = K55; k40 = K13; k41 = K17; k42 = K39; k43 = K12; k44 = K11; k45 = K54; k46 = K48; k47 = K27; }
1513 #define KEYSET10 { k00 = K49; k01 = K28; k02 = K45; k03 = K36; k04 = K01; k05 = K30; k06 = K44; k07 = K08; k08 = K42; k09 = K23; k10 = K00; k11 = K52; k12 = K43; k13 = K14; k14 = K38; k15 = K37; k16 = K22; k17 = K09; k18 = K35; k19 = K16; k20 = K15; k21 = K07; k22 = K31; k23 = K51; k24 = K03; k25 = K40; k26 = K46; k27 = K04; k28 = K20; k29 = K19; k30 = K53; k31 = K10; k32 = K47; k33 = K34; k34 = K32; k35 = K13; k36 = K41; k37 = K06; k38 = K17; k39 = K12; k40 = K25; k41 = K33; k42 = K27; k43 = K55; k44 = K54; k45 = K11; k46 = K05; k47 = K39; }
1514 #define KEYSET01 { k00 = K01; k01 = K37; k02 = K22; k03 = K45; k04 = K35; k05 = K07; k06 = K21; k07 = K42; k08 = K51; k09 = K00; k10 = K09; k11 = K29; k12 = K52; k13 = K23; k14 = K15; k15 = K14; k16 = K31; k17 = K43; k18 = K44; k19 = K50; k20 = K49; k21 = K16; k22 = K08; k23 = K28; k24 = K12; k25 = K17; k26 = K27; k27 = K40; k28 = K25; k29 = K55; k30 = K34; k31 = K19; k32 = K24; k33 = K39; k34 = K13; k35 = K18; k36 = K46; k37 = K11; k38 = K26; k39 = K48; k40 = K06; k41 = K10; k42 = K32; k43 = K05; k44 = K04; k45 = K47; k46 = K41; k47 = K20; }
1515 #define KEYSET11 { k00 = K35; k01 = K14; k02 = K31; k03 = K22; k04 = K44; k05 = K16; k06 = K30; k07 = K51; k08 = K28; k09 = K09; k10 = K43; k11 = K38; k12 = K29; k13 = K00; k14 = K49; k15 = K23; k16 = K08; k17 = K52; k18 = K21; k19 = K02; k20 = K01; k21 = K50; k22 = K42; k23 = K37; k24 = K48; k25 = K26; k26 = K32; k27 = K17; k28 = K06; k29 = K05; k30 = K39; k31 = K55; k32 = K33; k33 = K20; k34 = K18; k35 = K54; k36 = K27; k37 = K47; k38 = K03; k39 = K53; k40 = K11; k41 = K19; k42 = K13; k43 = K41; k44 = K40; k45 = K24; k46 = K46; k47 = K25; }
1516 #define KEYSET02 { k00 = K44; k01 = K23; k02 = K08; k03 = K31; k04 = K21; k05 = K50; k06 = K07; k07 = K28; k08 = K37; k09 = K43; k10 = K52; k11 = K15; k12 = K38; k13 = K09; k14 = K01; k15 = K00; k16 = K42; k17 = K29; k18 = K30; k19 = K36; k20 = K35; k21 = K02; k22 = K51; k23 = K14; k24 = K53; k25 = K03; k26 = K13; k27 = K26; k28 = K11; k29 = K41; k30 = K20; k31 = K05; k32 = K10; k33 = K25; k34 = K54; k35 = K04; k36 = K32; k37 = K24; k38 = K12; k39 = K34; k40 = K47; k41 = K55; k42 = K18; k43 = K46; k44 = K17; k45 = K33; k46 = K27; k47 = K06; }
1517 #define KEYSET12 { k00 = K21; k01 = K00; k02 = K42; k03 = K08; k04 = K30; k05 = K02; k06 = K16; k07 = K37; k08 = K14; k09 = K52; k10 = K29; k11 = K49; k12 = K15; k13 = K43; k14 = K35; k15 = K09; k16 = K51; k17 = K38; k18 = K07; k19 = K45; k20 = K44; k21 = K36; k22 = K28; k23 = K23; k24 = K34; k25 = K12; k26 = K18; k27 = K03; k28 = K47; k29 = K46; k30 = K25; k31 = K41; k32 = K19; k33 = K06; k34 = K04; k35 = K40; k36 = K13; k37 = K33; k38 = K48; k39 = K39; k40 = K24; k41 = K05; k42 = K54; k43 = K27; k44 = K26; k45 = K10; k46 = K32; k47 = K11; }
1518 #define KEYSET03 { k00 = K30; k01 = K09; k02 = K51; k03 = K42; k04 = K07; k05 = K36; k06 = K50; k07 = K14; k08 = K23; k09 = K29; k10 = K38; k11 = K01; k12 = K49; k13 = K52; k14 = K44; k15 = K43; k16 = K28; k17 = K15; k18 = K16; k19 = K22; k20 = K21; k21 = K45; k22 = K37; k23 = K00; k24 = K39; k25 = K48; k26 = K54; k27 = K12; k28 = K24; k29 = K27; k30 = K06; k31 = K46; k32 = K55; k33 = K11; k34 = K40; k35 = K17; k36 = K18; k37 = K10; k38 = K53; k39 = K20; k40 = K33; k41 = K41; k42 = K04; k43 = K32; k44 = K03; k45 = K19; k46 = K13; k47 = K47; }
1519 #define KEYSET13 { k00 = K07; k01 = K43; k02 = K28; k03 = K51; k04 = K16; k05 = K45; k06 = K02; k07 = K23; k08 = K00; k09 = K38; k10 = K15; k11 = K35; k12 = K01; k13 = K29; k14 = K21; k15 = K52; k16 = K37; k17 = K49; k18 = K50; k19 = K31; k20 = K30; k21 = K22; k22 = K14; k23 = K09; k24 = K20; k25 = K53; k26 = K04; k27 = K48; k28 = K33; k29 = K32; k30 = K11; k31 = K27; k32 = K05; k33 = K47; k34 = K17; k35 = K26; k36 = K54; k37 = K19; k38 = K34; k39 = K25; k40 = K10; k41 = K46; k42 = K40; k43 = K13; k44 = K12; k45 = K55; k46 = K18; k47 = K24; }
1520 #define KEYSET04 { k00 = K16; k01 = K52; k02 = K37; k03 = K28; k04 = K50; k05 = K22; k06 = K36; k07 = K00; k08 = K09; k09 = K15; k10 = K49; k11 = K44; k12 = K35; k13 = K38; k14 = K30; k15 = K29; k16 = K14; k17 = K01; k18 = K02; k19 = K08; k20 = K07; k21 = K31; k22 = K23; k23 = K43; k24 = K25; k25 = K34; k26 = K40; k27 = K53; k28 = K10; k29 = K13; k30 = K47; k31 = K32; k32 = K41; k33 = K24; k34 = K26; k35 = K03; k36 = K04; k37 = K55; k38 = K39; k39 = K06; k40 = K19; k41 = K27; k42 = K17; k43 = K18; k44 = K48; k45 = K05; k46 = K54; k47 = K33; }
1521 #define KEYSET14 { k00 = K50; k01 = K29; k02 = K14; k03 = K37; k04 = K02; k05 = K31; k06 = K45; k07 = K09; k08 = K43; k09 = K49; k10 = K01; k11 = K21; k12 = K44; k13 = K15; k14 = K07; k15 = K38; k16 = K23; k17 = K35; k18 = K36; k19 = K42; k20 = K16; k21 = K08; k22 = K00; k23 = K52; k24 = K06; k25 = K39; k26 = K17; k27 = K34; k28 = K19; k29 = K18; k30 = K24; k31 = K13; k32 = K46; k33 = K33; k34 = K03; k35 = K12; k36 = K40; k37 = K05; k38 = K20; k39 = K11; k40 = K55; k41 = K32; k42 = K26; k43 = K54; k44 = K53; k45 = K41; k46 = K04; k47 = K10; }
1522 #define KEYSET05 { k00 = K02; k01 = K38; k02 = K23; k03 = K14; k04 = K36; k05 = K08; k06 = K22; k07 = K43; k08 = K52; k09 = K01; k10 = K35; k11 = K30; k12 = K21; k13 = K49; k14 = K16; k15 = K15; k16 = K00; k17 = K44; k18 = K45; k19 = K51; k20 = K50; k21 = K42; k22 = K09; k23 = K29; k24 = K11; k25 = K20; k26 = K26; k27 = K39; k28 = K55; k29 = K54; k30 = K33; k31 = K18; k32 = K27; k33 = K10; k34 = K12; k35 = K48; k36 = K17; k37 = K41; k38 = K25; k39 = K47; k40 = K05; k41 = K13; k42 = K03; k43 = K04; k44 = K34; k45 = K46; k46 = K40; k47 = K19; }
1523 #define KEYSET15 { k00 = K36; k01 = K15; k02 = K00; k03 = K23; k04 = K45; k05 = K42; k06 = K31; k07 = K52; k08 = K29; k09 = K35; k10 = K44; k11 = K07; k12 = K30; k13 = K01; k14 = K50; k15 = K49; k16 = K09; k17 = K21; k18 = K22; k19 = K28; k20 = K02; k21 = K51; k22 = K43; k23 = K38; k24 = K47; k25 = K25; k26 = K03; k27 = K20; k28 = K05; k29 = K04; k30 = K10; k31 = K54; k32 = K32; k33 = K19; k34 = K48; k35 = K53; k36 = K26; k37 = K46; k38 = K06; k39 = K24; k40 = K41; k41 = K18; k42 = K12; k43 = K40; k44 = K39; k45 = K27; k46 = K17; k47 = K55; }
1524 #define KEYSET06 { k00 = K45; k01 = K49; k02 = K09; k03 = K00; k04 = K22; k05 = K51; k06 = K08; k07 = K29; k08 = K38; k09 = K44; k10 = K21; k11 = K16; k12 = K07; k13 = K35; k14 = K02; k15 = K01; k16 = K43; k17 = K30; k18 = K31; k19 = K37; k20 = K36; k21 = K28; k22 = K52; k23 = K15; k24 = K24; k25 = K06; k26 = K12; k27 = K25; k28 = K41; k29 = K40; k30 = K19; k31 = K04; k32 = K13; k33 = K55; k34 = K53; k35 = K34; k36 = K03; k37 = K27; k38 = K11; k39 = K33; k40 = K46; k41 = K54; k42 = K48; k43 = K17; k44 = K20; k45 = K32; k46 = K26; k47 = K05; }
1525 #define KEYSET16 { k00 = K22; k01 = K01; k02 = K43; k03 = K09; k04 = K31; k05 = K28; k06 = K42; k07 = K38; k08 = K15; k09 = K21; k10 = K30; k11 = K50; k12 = K16; k13 = K44; k14 = K36; k15 = K35; k16 = K52; k17 = K07; k18 = K08; k19 = K14; k20 = K45; k21 = K37; k22 = K29; k23 = K49; k24 = K33; k25 = K11; k26 = K48; k27 = K06; k28 = K46; k29 = K17; k30 = K55; k31 = K40; k32 = K18; k33 = K05; k34 = K34; k35 = K39; k36 = K12; k37 = K32; k38 = K47; k39 = K10; k40 = K27; k41 = K04; k42 = K53; k43 = K26; k44 = K25; k45 = K13; k46 = K03; k47 = K41; }
1526 #define KEYSET07 { k00 = K31; k01 = K35; k02 = K52; k03 = K43; k04 = K08; k05 = K37; k06 = K51; k07 = K15; k08 = K49; k09 = K30; k10 = K07; k11 = K02; k12 = K50; k13 = K21; k14 = K45; k15 = K44; k16 = K29; k17 = K16; k18 = K42; k19 = K23; k20 = K22; k21 = K14; k22 = K38; k23 = K01; k24 = K10; k25 = K47; k26 = K53; k27 = K11; k28 = K27; k29 = K26; k30 = K05; k31 = K17; k32 = K54; k33 = K41; k34 = K39; k35 = K20; k36 = K48; k37 = K13; k38 = K24; k39 = K19; k40 = K32; k41 = K40; k42 = K34; k43 = K03; k44 = K06; k45 = K18; k46 = K12; k47 = K46; }
1527 #define KEYSET17 { k00 = K15; k01 = K51; k02 = K36; k03 = K02; k04 = K49; k05 = K21; k06 = K35; k07 = K31; k08 = K08; k09 = K14; k10 = K23; k11 = K43; k12 = K09; k13 = K37; k14 = K29; k15 = K28; k16 = K45; k17 = K00; k18 = K01; k19 = K07; k20 = K38; k21 = K30; k22 = K22; k23 = K42; k24 = K26; k25 = K04; k26 = K41; k27 = K54; k28 = K39; k29 = K10; k30 = K48; k31 = K33; k32 = K11; k33 = K53; k34 = K27; k35 = K32; k36 = K05; k37 = K25; k38 = K40; k39 = K03; k40 = K20; k41 = K24; k42 = K46; k43 = K19; k44 = K18; k45 = K06; k46 = K55; k47 = K34; }
1544 #ifdef DESCRYPT_SALT
1546 void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 *D00, u32 *D01, u32 *D02, u32 *D03, u32 *D04, u32 *D05, u32 *D06, u32 *D07, u32 *D08, u32 *D09, u32 *D10, u32 *D11, u32 *D12, u32 *D13, u32 *D14, u32 *D15, u32 *D16, u32 *D17, u32 *D18, u32 *D19, u32 *D20, u32 *D21, u32 *D22, u32 *D23, u32 *D24, u32 *D25, u32 *D26, u32 *D27, u32 *D28, u32 *D29, u32 *D30, u32 *D31, u32 *D32, u32 *D33, u32 *D34, u32 *D35, u32 *D36, u32 *D37, u32 *D38, u32 *D39, u32 *D40, u32 *D41, u32 *D42, u32 *D43, u32 *D44, u32 *D45, u32 *D46, u32 *D47, u32 *D48, u32 *D49, u32 *D50, u32 *D51, u32 *D52, u32 *D53, u32 *D54, u32 *D55, u32 *D56, u32 *D57, u32 *D58, u32 *D59, u32 *D60, u32 *D61, u32 *D62, u32 *D63)
1548 sXXX_DECL u32 s001 = (0x001 & DESCRYPT_SALT) ? 0xffffffff : 0;
1549 sXXX_DECL u32 s002 = (0x002 & DESCRYPT_SALT) ? 0xffffffff : 0;
1550 sXXX_DECL u32 s004 = (0x004 & DESCRYPT_SALT) ? 0xffffffff : 0;
1551 sXXX_DECL u32 s008 = (0x008 & DESCRYPT_SALT) ? 0xffffffff : 0;
1552 sXXX_DECL u32 s010 = (0x010 & DESCRYPT_SALT) ? 0xffffffff : 0;
1553 sXXX_DECL u32 s020 = (0x020 & DESCRYPT_SALT) ? 0xffffffff : 0;
1554 sXXX_DECL u32 s040 = (0x040 & DESCRYPT_SALT) ? 0xffffffff : 0;
1555 sXXX_DECL u32 s080 = (0x080 & DESCRYPT_SALT) ? 0xffffffff : 0;
1556 sXXX_DECL u32 s100 = (0x100 & DESCRYPT_SALT) ? 0xffffffff : 0;
1557 sXXX_DECL u32 s200 = (0x200 & DESCRYPT_SALT) ? 0xffffffff : 0;
1558 sXXX_DECL u32 s400 = (0x400 & DESCRYPT_SALT) ? 0xffffffff : 0;
1559 sXXX_DECL u32 s800 = (0x800 & DESCRYPT_SALT) ? 0xffffffff : 0;
1561 KXX_DECL u32 k00, k01, k02, k03, k04, k05;
1562 KXX_DECL u32 k06, k07, k08, k09, k10, k11;
1563 KXX_DECL u32 k12, k13, k14, k15, k16, k17;
1564 KXX_DECL u32 k18, k19, k20, k21, k22, k23;
1565 KXX_DECL u32 k24, k25, k26, k27, k28, k29;
1566 KXX_DECL u32 k30, k31, k32, k33, k34, k35;
1567 KXX_DECL u32 k36, k37, k38, k39, k40, k41;
1568 KXX_DECL u32 k42, k43, k44, k45, k46, k47;
1570 for (u32 ii = 0; ii < 25; ii++)
1575 for (u32 i = 0; i < 2; i++)
1577 if (i) KEYSET10 else KEYSET00
1579 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1580 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1581 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1582 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1583 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1584 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1585 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1586 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1588 if (i) KEYSET11 else KEYSET01
1590 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1591 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1592 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1593 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1594 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1595 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1596 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1597 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1599 if (i) KEYSET12 else KEYSET02
1601 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1602 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1603 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1604 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1605 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1606 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1607 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1608 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1610 if (i) KEYSET13 else KEYSET03
1612 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1613 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1614 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1615 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1616 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1617 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1618 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1619 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1621 if (i) KEYSET14 else KEYSET04
1623 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1624 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1625 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1626 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1627 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1628 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1629 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1630 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1632 if (i) KEYSET15 else KEYSET05
1634 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1635 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1636 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1637 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1638 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1639 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1640 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1641 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1643 if (i) KEYSET16 else KEYSET06
1645 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1646 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1647 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1648 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1649 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1650 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1651 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1652 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1654 if (i) KEYSET17 else KEYSET07
1656 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1657 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1658 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1659 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1660 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1661 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1662 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1663 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1674 void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 *D00, u32 *D01, u32 *D02, u32 *D03, u32 *D04, u32 *D05, u32 *D06, u32 *D07, u32 *D08, u32 *D09, u32 *D10, u32 *D11, u32 *D12, u32 *D13, u32 *D14, u32 *D15, u32 *D16, u32 *D17, u32 *D18, u32 *D19, u32 *D20, u32 *D21, u32 *D22, u32 *D23, u32 *D24, u32 *D25, u32 *D26, u32 *D27, u32 *D28, u32 *D29, u32 *D30, u32 *D31, u32 *D32, u32 *D33, u32 *D34, u32 *D35, u32 *D36, u32 *D37, u32 *D38, u32 *D39, u32 *D40, u32 *D41, u32 *D42, u32 *D43, u32 *D44, u32 *D45, u32 *D46, u32 *D47, u32 *D48, u32 *D49, u32 *D50, u32 *D51, u32 *D52, u32 *D53, u32 *D54, u32 *D55, u32 *D56, u32 *D57, u32 *D58, u32 *D59, u32 *D60, u32 *D61, u32 *D62, u32 *D63)
1676 sXXX_DECL u32 s001 = (0x001 & SALT) ? 0xffffffff : 0;
1677 sXXX_DECL u32 s002 = (0x002 & SALT) ? 0xffffffff : 0;
1678 sXXX_DECL u32 s004 = (0x004 & SALT) ? 0xffffffff : 0;
1679 sXXX_DECL u32 s008 = (0x008 & SALT) ? 0xffffffff : 0;
1680 sXXX_DECL u32 s010 = (0x010 & SALT) ? 0xffffffff : 0;
1681 sXXX_DECL u32 s020 = (0x020 & SALT) ? 0xffffffff : 0;
1682 sXXX_DECL u32 s040 = (0x040 & SALT) ? 0xffffffff : 0;
1683 sXXX_DECL u32 s080 = (0x080 & SALT) ? 0xffffffff : 0;
1684 sXXX_DECL u32 s100 = (0x100 & SALT) ? 0xffffffff : 0;
1685 sXXX_DECL u32 s200 = (0x200 & SALT) ? 0xffffffff : 0;
1686 sXXX_DECL u32 s400 = (0x400 & SALT) ? 0xffffffff : 0;
1687 sXXX_DECL u32 s800 = (0x800 & SALT) ? 0xffffffff : 0;
1689 KXX_DECL u32 k00, k01, k02, k03, k04, k05;
1690 KXX_DECL u32 k06, k07, k08, k09, k10, k11;
1691 KXX_DECL u32 k12, k13, k14, k15, k16, k17;
1692 KXX_DECL u32 k18, k19, k20, k21, k22, k23;
1693 KXX_DECL u32 k24, k25, k26, k27, k28, k29;
1694 KXX_DECL u32 k30, k31, k32, k33, k34, k35;
1695 KXX_DECL u32 k36, k37, k38, k39, k40, k41;
1696 KXX_DECL u32 k42, k43, k44, k45, k46, k47;
1698 for (u32 ii = 0; ii < 25; ii++)
1703 for (u32 i = 0; i < 2; i++)
1705 if (i) KEYSET10 else KEYSET00
1707 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1708 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1709 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1710 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1711 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1712 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1713 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1714 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1716 if (i) KEYSET11 else KEYSET01
1718 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1719 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1720 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1721 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1722 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1723 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1724 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1725 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1727 if (i) KEYSET12 else KEYSET02
1729 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1730 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1731 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1732 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1733 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1734 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1735 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1736 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1738 if (i) KEYSET13 else KEYSET03
1740 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1741 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1742 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1743 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1744 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1745 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1746 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1747 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1749 if (i) KEYSET14 else KEYSET04
1751 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1752 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1753 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1754 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1755 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1756 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1757 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1758 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1760 if (i) KEYSET15 else KEYSET05
1762 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1763 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1764 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1765 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1766 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1767 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1768 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1769 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1771 if (i) KEYSET16 else KEYSET06
1773 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1774 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1775 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1776 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1777 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1778 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1779 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1780 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1782 if (i) KEYSET17 else KEYSET07
1784 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1785 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1786 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1787 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1788 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1789 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1790 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1791 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1802 void transpose32c (u32 data[32])
1804 #define swap(x,y,j,m) \
1805 t = ((x) ^ ((y) >> (j))) & (m); \
1807 (y) = (y) ^ (t << (j));
1811 swap (data[ 0], data[16], 16, 0x0000ffff);
1812 swap (data[ 1], data[17], 16, 0x0000ffff);
1813 swap (data[ 2], data[18], 16, 0x0000ffff);
1814 swap (data[ 3], data[19], 16, 0x0000ffff);
1815 swap (data[ 4], data[20], 16, 0x0000ffff);
1816 swap (data[ 5], data[21], 16, 0x0000ffff);
1817 swap (data[ 6], data[22], 16, 0x0000ffff);
1818 swap (data[ 7], data[23], 16, 0x0000ffff);
1819 swap (data[ 8], data[24], 16, 0x0000ffff);
1820 swap (data[ 9], data[25], 16, 0x0000ffff);
1821 swap (data[10], data[26], 16, 0x0000ffff);
1822 swap (data[11], data[27], 16, 0x0000ffff);
1823 swap (data[12], data[28], 16, 0x0000ffff);
1824 swap (data[13], data[29], 16, 0x0000ffff);
1825 swap (data[14], data[30], 16, 0x0000ffff);
1826 swap (data[15], data[31], 16, 0x0000ffff);
1827 swap (data[ 0], data[ 8], 8, 0x00ff00ff);
1828 swap (data[ 1], data[ 9], 8, 0x00ff00ff);
1829 swap (data[ 2], data[10], 8, 0x00ff00ff);
1830 swap (data[ 3], data[11], 8, 0x00ff00ff);
1831 swap (data[ 4], data[12], 8, 0x00ff00ff);
1832 swap (data[ 5], data[13], 8, 0x00ff00ff);
1833 swap (data[ 6], data[14], 8, 0x00ff00ff);
1834 swap (data[ 7], data[15], 8, 0x00ff00ff);
1835 swap (data[ 0], data[ 4], 4, 0x0f0f0f0f);
1836 swap (data[ 1], data[ 5], 4, 0x0f0f0f0f);
1837 swap (data[ 2], data[ 6], 4, 0x0f0f0f0f);
1838 swap (data[ 3], data[ 7], 4, 0x0f0f0f0f);
1839 swap (data[ 0], data[ 2], 2, 0x33333333);
1840 swap (data[ 1], data[ 3], 2, 0x33333333);
1841 swap (data[ 0], data[ 1], 1, 0x55555555);
1842 swap (data[ 2], data[ 3], 1, 0x55555555);
1843 swap (data[ 4], data[ 6], 2, 0x33333333);
1844 swap (data[ 5], data[ 7], 2, 0x33333333);
1845 swap (data[ 4], data[ 5], 1, 0x55555555);
1846 swap (data[ 6], data[ 7], 1, 0x55555555);
1847 swap (data[ 8], data[12], 4, 0x0f0f0f0f);
1848 swap (data[ 9], data[13], 4, 0x0f0f0f0f);
1849 swap (data[10], data[14], 4, 0x0f0f0f0f);
1850 swap (data[11], data[15], 4, 0x0f0f0f0f);
1851 swap (data[ 8], data[10], 2, 0x33333333);
1852 swap (data[ 9], data[11], 2, 0x33333333);
1853 swap (data[ 8], data[ 9], 1, 0x55555555);
1854 swap (data[10], data[11], 1, 0x55555555);
1855 swap (data[12], data[14], 2, 0x33333333);
1856 swap (data[13], data[15], 2, 0x33333333);
1857 swap (data[12], data[13], 1, 0x55555555);
1858 swap (data[14], data[15], 1, 0x55555555);
1859 swap (data[16], data[24], 8, 0x00ff00ff);
1860 swap (data[17], data[25], 8, 0x00ff00ff);
1861 swap (data[18], data[26], 8, 0x00ff00ff);
1862 swap (data[19], data[27], 8, 0x00ff00ff);
1863 swap (data[20], data[28], 8, 0x00ff00ff);
1864 swap (data[21], data[29], 8, 0x00ff00ff);
1865 swap (data[22], data[30], 8, 0x00ff00ff);
1866 swap (data[23], data[31], 8, 0x00ff00ff);
1867 swap (data[16], data[20], 4, 0x0f0f0f0f);
1868 swap (data[17], data[21], 4, 0x0f0f0f0f);
1869 swap (data[18], data[22], 4, 0x0f0f0f0f);
1870 swap (data[19], data[23], 4, 0x0f0f0f0f);
1871 swap (data[16], data[18], 2, 0x33333333);
1872 swap (data[17], data[19], 2, 0x33333333);
1873 swap (data[16], data[17], 1, 0x55555555);
1874 swap (data[18], data[19], 1, 0x55555555);
1875 swap (data[20], data[22], 2, 0x33333333);
1876 swap (data[21], data[23], 2, 0x33333333);
1877 swap (data[20], data[21], 1, 0x55555555);
1878 swap (data[22], data[23], 1, 0x55555555);
1879 swap (data[24], data[28], 4, 0x0f0f0f0f);
1880 swap (data[25], data[29], 4, 0x0f0f0f0f);
1881 swap (data[26], data[30], 4, 0x0f0f0f0f);
1882 swap (data[27], data[31], 4, 0x0f0f0f0f);
1883 swap (data[24], data[26], 2, 0x33333333);
1884 swap (data[25], data[27], 2, 0x33333333);
1885 swap (data[24], data[25], 1, 0x55555555);
1886 swap (data[26], data[27], 1, 0x55555555);
1887 swap (data[28], data[30], 2, 0x33333333);
1888 swap (data[29], data[31], 2, 0x33333333);
1889 swap (data[28], data[29], 1, 0x55555555);
1890 swap (data[30], data[31], 1, 0x55555555);
1893 void m01500m (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
1899 const u32 gid = get_global_id (0);
1900 const u32 lid = get_local_id (0);
1906 const u32 salt = salt_bufs[salt_pos].salt_buf[0];
1912 const u32 w0 = pws[gid].i[0];
1913 const u32 w1 = pws[gid].i[1];
1915 const u32 w0s = (w0 << 1) & 0xfefefefe;
1916 const u32 w1s = (w1 << 1) & 0xfefefefe;
1918 #define K00 (((w0s >> ( 0 + 7)) & 1) ? -1 : 0)
1919 #define K01 (((w0s >> ( 0 + 6)) & 1) ? -1 : 0)
1920 #define K02 (((w0s >> ( 0 + 5)) & 1) ? -1 : 0)
1921 #define K03 (((w0s >> ( 0 + 4)) & 1) ? -1 : 0)
1922 #define K04 (((w0s >> ( 0 + 3)) & 1) ? -1 : 0)
1923 #define K05 (((w0s >> ( 0 + 2)) & 1) ? -1 : 0)
1924 #define K06 (((w0s >> ( 0 + 1)) & 1) ? -1 : 0)
1925 #define K07 (((w0s >> ( 8 + 7)) & 1) ? -1 : 0)
1926 #define K08 (((w0s >> ( 8 + 6)) & 1) ? -1 : 0)
1927 #define K09 (((w0s >> ( 8 + 5)) & 1) ? -1 : 0)
1928 #define K10 (((w0s >> ( 8 + 4)) & 1) ? -1 : 0)
1929 #define K11 (((w0s >> ( 8 + 3)) & 1) ? -1 : 0)
1930 #define K12 (((w0s >> ( 8 + 2)) & 1) ? -1 : 0)
1931 #define K13 (((w0s >> ( 8 + 1)) & 1) ? -1 : 0)
1932 #define K14 (((w0s >> (16 + 7)) & 1) ? -1 : 0)
1933 #define K15 (((w0s >> (16 + 6)) & 1) ? -1 : 0)
1934 #define K16 (((w0s >> (16 + 5)) & 1) ? -1 : 0)
1935 #define K17 (((w0s >> (16 + 4)) & 1) ? -1 : 0)
1936 #define K18 (((w0s >> (16 + 3)) & 1) ? -1 : 0)
1937 #define K19 (((w0s >> (16 + 2)) & 1) ? -1 : 0)
1938 #define K20 (((w0s >> (16 + 1)) & 1) ? -1 : 0)
1939 #define K21 (((w0s >> (24 + 7)) & 1) ? -1 : 0)
1940 #define K22 (((w0s >> (24 + 6)) & 1) ? -1 : 0)
1941 #define K23 (((w0s >> (24 + 5)) & 1) ? -1 : 0)
1942 #define K24 (((w0s >> (24 + 4)) & 1) ? -1 : 0)
1943 #define K25 (((w0s >> (24 + 3)) & 1) ? -1 : 0)
1944 #define K26 (((w0s >> (24 + 2)) & 1) ? -1 : 0)
1945 #define K27 (((w0s >> (24 + 1)) & 1) ? -1 : 0)
1946 #define K28 (((w1s >> ( 0 + 7)) & 1) ? -1 : 0)
1947 #define K29 (((w1s >> ( 0 + 6)) & 1) ? -1 : 0)
1948 #define K30 (((w1s >> ( 0 + 5)) & 1) ? -1 : 0)
1949 #define K31 (((w1s >> ( 0 + 4)) & 1) ? -1 : 0)
1950 #define K32 (((w1s >> ( 0 + 3)) & 1) ? -1 : 0)
1951 #define K33 (((w1s >> ( 0 + 2)) & 1) ? -1 : 0)
1952 #define K34 (((w1s >> ( 0 + 1)) & 1) ? -1 : 0)
1953 #define K35 (((w1s >> ( 8 + 7)) & 1) ? -1 : 0)
1954 #define K36 (((w1s >> ( 8 + 6)) & 1) ? -1 : 0)
1955 #define K37 (((w1s >> ( 8 + 5)) & 1) ? -1 : 0)
1956 #define K38 (((w1s >> ( 8 + 4)) & 1) ? -1 : 0)
1957 #define K39 (((w1s >> ( 8 + 3)) & 1) ? -1 : 0)
1958 #define K40 (((w1s >> ( 8 + 2)) & 1) ? -1 : 0)
1959 #define K41 (((w1s >> ( 8 + 1)) & 1) ? -1 : 0)
1960 #define K42 (((w1s >> (16 + 7)) & 1) ? -1 : 0)
1961 #define K43 (((w1s >> (16 + 6)) & 1) ? -1 : 0)
1962 #define K44 (((w1s >> (16 + 5)) & 1) ? -1 : 0)
1963 #define K45 (((w1s >> (16 + 4)) & 1) ? -1 : 0)
1964 #define K46 (((w1s >> (16 + 3)) & 1) ? -1 : 0)
1965 #define K47 (((w1s >> (16 + 2)) & 1) ? -1 : 0)
1966 #define K48 (((w1s >> (16 + 1)) & 1) ? -1 : 0)
1967 #define K49 (((w1s >> (24 + 7)) & 1) ? -1 : 0)
1968 #define K50 (((w1s >> (24 + 6)) & 1) ? -1 : 0)
1969 #define K51 (((w1s >> (24 + 5)) & 1) ? -1 : 0)
1970 #define K52 (((w1s >> (24 + 4)) & 1) ? -1 : 0)
1971 #define K53 (((w1s >> (24 + 3)) & 1) ? -1 : 0)
1972 #define K54 (((w1s >> (24 + 2)) & 1) ? -1 : 0)
1973 #define K55 (((w1s >> (24 + 1)) & 1) ? -1 : 0)
1979 const u32 pc_pos = get_local_id (1);
1981 const u32 il_pos = pc_pos * 32;
2012 k00 |= words_buf_r[pc_pos].b[ 0];
2013 k01 |= words_buf_r[pc_pos].b[ 1];
2014 k02 |= words_buf_r[pc_pos].b[ 2];
2015 k03 |= words_buf_r[pc_pos].b[ 3];
2016 k04 |= words_buf_r[pc_pos].b[ 4];
2017 k05 |= words_buf_r[pc_pos].b[ 5];
2018 k06 |= words_buf_r[pc_pos].b[ 6];
2019 k07 |= words_buf_r[pc_pos].b[ 7];
2020 k08 |= words_buf_r[pc_pos].b[ 8];
2021 k09 |= words_buf_r[pc_pos].b[ 9];
2022 k10 |= words_buf_r[pc_pos].b[10];
2023 k11 |= words_buf_r[pc_pos].b[11];
2024 k12 |= words_buf_r[pc_pos].b[12];
2025 k13 |= words_buf_r[pc_pos].b[13];
2026 k14 |= words_buf_r[pc_pos].b[14];
2027 k15 |= words_buf_r[pc_pos].b[15];
2028 k16 |= words_buf_r[pc_pos].b[16];
2029 k17 |= words_buf_r[pc_pos].b[17];
2030 k18 |= words_buf_r[pc_pos].b[18];
2031 k19 |= words_buf_r[pc_pos].b[19];
2032 k20 |= words_buf_r[pc_pos].b[20];
2033 k21 |= words_buf_r[pc_pos].b[21];
2034 k22 |= words_buf_r[pc_pos].b[22];
2035 k23 |= words_buf_r[pc_pos].b[23];
2036 k24 |= words_buf_r[pc_pos].b[24];
2037 k25 |= words_buf_r[pc_pos].b[25];
2038 k26 |= words_buf_r[pc_pos].b[26];
2039 k27 |= words_buf_r[pc_pos].b[27];
2109 k00, k01, k02, k03, k04, k05, k06,
2110 k07, k08, k09, k10, k11, k12, k13,
2111 k14, k15, k16, k17, k18, k19, k20,
2112 k21, k22, k23, k24, k25, k26, k27,
2113 K28, K29, K30, K31, K32, K33, K34,
2114 K35, K36, K37, K38, K39, K40, K41,
2115 K42, K43, K44, K45, K46, K47, K48,
2116 K49, K50, K51, K52, K53, K54, K55,
2117 &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07,
2118 &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15,
2119 &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23,
2120 &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31,
2121 &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39,
2122 &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47,
2123 &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55,
2124 &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63
2194 if (digests_cnt < 16)
2196 for (u32 d = 0; d < digests_cnt; d++)
2198 const u32 final_hash_pos = digests_offset + d;
2200 if (hashes_shown[final_hash_pos]) continue;
2204 search[0] = digests_buf[final_hash_pos].digest_buf[DGST_R0];
2205 search[1] = digests_buf[final_hash_pos].digest_buf[DGST_R1];
2212 for (int i = 0; i < 32; i++)
2214 const u32 b0 = -((search[0] >> i) & 1);
2215 const u32 b1 = -((search[1] >> i) & 1);
2217 tmpResult |= out[ 0 + i] ^ b0;
2218 tmpResult |= out[32 + i] ^ b1;
2221 if (tmpResult == 0xffffffff) continue;
2223 const u32 slice = 31 - clz (~tmpResult);
2225 const u32 r0 = search[0];
2226 const u32 r1 = search[1];
2241 for (int i = 0; i < 32; i++)
2243 out0[i] = out[ 0 + 31 - i];
2244 out1[i] = out[32 + 31 - i];
2247 transpose32c (out0);
2248 transpose32c (out1);
2253 for (int slice = 0; slice < 32; slice++)
2255 const u32 r0 = out0[31 - slice];
2256 const u32 r1 = out1[31 - slice];
2265 void m01500s (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
2271 const u32 gid = get_global_id (0);
2272 const u32 lid = get_local_id (0);
2278 const u32 salt = salt_bufs[salt_pos].salt_buf[0];
2284 const u32 s0 = digests_buf[0].digest_buf[0];
2285 const u32 s1 = digests_buf[0].digest_buf[1];
2287 #define S00 (((s0 >> 0) & 1) ? -1 : 0)
2288 #define S01 (((s0 >> 1) & 1) ? -1 : 0)
2289 #define S02 (((s0 >> 2) & 1) ? -1 : 0)
2290 #define S03 (((s0 >> 3) & 1) ? -1 : 0)
2291 #define S04 (((s0 >> 4) & 1) ? -1 : 0)
2292 #define S05 (((s0 >> 5) & 1) ? -1 : 0)
2293 #define S06 (((s0 >> 6) & 1) ? -1 : 0)
2294 #define S07 (((s0 >> 7) & 1) ? -1 : 0)
2295 #define S08 (((s0 >> 8) & 1) ? -1 : 0)
2296 #define S09 (((s0 >> 9) & 1) ? -1 : 0)
2297 #define S10 (((s0 >> 10) & 1) ? -1 : 0)
2298 #define S11 (((s0 >> 11) & 1) ? -1 : 0)
2299 #define S12 (((s0 >> 12) & 1) ? -1 : 0)
2300 #define S13 (((s0 >> 13) & 1) ? -1 : 0)
2301 #define S14 (((s0 >> 14) & 1) ? -1 : 0)
2302 #define S15 (((s0 >> 15) & 1) ? -1 : 0)
2303 #define S16 (((s0 >> 16) & 1) ? -1 : 0)
2304 #define S17 (((s0 >> 17) & 1) ? -1 : 0)
2305 #define S18 (((s0 >> 18) & 1) ? -1 : 0)
2306 #define S19 (((s0 >> 19) & 1) ? -1 : 0)
2307 #define S20 (((s0 >> 20) & 1) ? -1 : 0)
2308 #define S21 (((s0 >> 21) & 1) ? -1 : 0)
2309 #define S22 (((s0 >> 22) & 1) ? -1 : 0)
2310 #define S23 (((s0 >> 23) & 1) ? -1 : 0)
2311 #define S24 (((s0 >> 24) & 1) ? -1 : 0)
2312 #define S25 (((s0 >> 25) & 1) ? -1 : 0)
2313 #define S26 (((s0 >> 26) & 1) ? -1 : 0)
2314 #define S27 (((s0 >> 27) & 1) ? -1 : 0)
2315 #define S28 (((s0 >> 28) & 1) ? -1 : 0)
2316 #define S29 (((s0 >> 29) & 1) ? -1 : 0)
2317 #define S30 (((s0 >> 30) & 1) ? -1 : 0)
2318 #define S31 (((s0 >> 31) & 1) ? -1 : 0)
2319 #define S32 (((s1 >> 0) & 1) ? -1 : 0)
2320 #define S33 (((s1 >> 1) & 1) ? -1 : 0)
2321 #define S34 (((s1 >> 2) & 1) ? -1 : 0)
2322 #define S35 (((s1 >> 3) & 1) ? -1 : 0)
2323 #define S36 (((s1 >> 4) & 1) ? -1 : 0)
2324 #define S37 (((s1 >> 5) & 1) ? -1 : 0)
2325 #define S38 (((s1 >> 6) & 1) ? -1 : 0)
2326 #define S39 (((s1 >> 7) & 1) ? -1 : 0)
2327 #define S40 (((s1 >> 8) & 1) ? -1 : 0)
2328 #define S41 (((s1 >> 9) & 1) ? -1 : 0)
2329 #define S42 (((s1 >> 10) & 1) ? -1 : 0)
2330 #define S43 (((s1 >> 11) & 1) ? -1 : 0)
2331 #define S44 (((s1 >> 12) & 1) ? -1 : 0)
2332 #define S45 (((s1 >> 13) & 1) ? -1 : 0)
2333 #define S46 (((s1 >> 14) & 1) ? -1 : 0)
2334 #define S47 (((s1 >> 15) & 1) ? -1 : 0)
2335 #define S48 (((s1 >> 16) & 1) ? -1 : 0)
2336 #define S49 (((s1 >> 17) & 1) ? -1 : 0)
2337 #define S50 (((s1 >> 18) & 1) ? -1 : 0)
2338 #define S51 (((s1 >> 19) & 1) ? -1 : 0)
2339 #define S52 (((s1 >> 20) & 1) ? -1 : 0)
2340 #define S53 (((s1 >> 21) & 1) ? -1 : 0)
2341 #define S54 (((s1 >> 22) & 1) ? -1 : 0)
2342 #define S55 (((s1 >> 23) & 1) ? -1 : 0)
2343 #define S56 (((s1 >> 24) & 1) ? -1 : 0)
2344 #define S57 (((s1 >> 25) & 1) ? -1 : 0)
2345 #define S58 (((s1 >> 26) & 1) ? -1 : 0)
2346 #define S59 (((s1 >> 27) & 1) ? -1 : 0)
2347 #define S60 (((s1 >> 28) & 1) ? -1 : 0)
2348 #define S61 (((s1 >> 29) & 1) ? -1 : 0)
2349 #define S62 (((s1 >> 30) & 1) ? -1 : 0)
2350 #define S63 (((s1 >> 31) & 1) ? -1 : 0)
2356 const u32 w0 = pws[gid].i[0];
2357 const u32 w1 = pws[gid].i[1];
2359 const u32 w0s = (w0 << 1) & 0xfefefefe;
2360 const u32 w1s = (w1 << 1) & 0xfefefefe;
2362 #define K00 (((w0s >> ( 0 + 7)) & 1) ? -1 : 0)
2363 #define K01 (((w0s >> ( 0 + 6)) & 1) ? -1 : 0)
2364 #define K02 (((w0s >> ( 0 + 5)) & 1) ? -1 : 0)
2365 #define K03 (((w0s >> ( 0 + 4)) & 1) ? -1 : 0)
2366 #define K04 (((w0s >> ( 0 + 3)) & 1) ? -1 : 0)
2367 #define K05 (((w0s >> ( 0 + 2)) & 1) ? -1 : 0)
2368 #define K06 (((w0s >> ( 0 + 1)) & 1) ? -1 : 0)
2369 #define K07 (((w0s >> ( 8 + 7)) & 1) ? -1 : 0)
2370 #define K08 (((w0s >> ( 8 + 6)) & 1) ? -1 : 0)
2371 #define K09 (((w0s >> ( 8 + 5)) & 1) ? -1 : 0)
2372 #define K10 (((w0s >> ( 8 + 4)) & 1) ? -1 : 0)
2373 #define K11 (((w0s >> ( 8 + 3)) & 1) ? -1 : 0)
2374 #define K12 (((w0s >> ( 8 + 2)) & 1) ? -1 : 0)
2375 #define K13 (((w0s >> ( 8 + 1)) & 1) ? -1 : 0)
2376 #define K14 (((w0s >> (16 + 7)) & 1) ? -1 : 0)
2377 #define K15 (((w0s >> (16 + 6)) & 1) ? -1 : 0)
2378 #define K16 (((w0s >> (16 + 5)) & 1) ? -1 : 0)
2379 #define K17 (((w0s >> (16 + 4)) & 1) ? -1 : 0)
2380 #define K18 (((w0s >> (16 + 3)) & 1) ? -1 : 0)
2381 #define K19 (((w0s >> (16 + 2)) & 1) ? -1 : 0)
2382 #define K20 (((w0s >> (16 + 1)) & 1) ? -1 : 0)
2383 #define K21 (((w0s >> (24 + 7)) & 1) ? -1 : 0)
2384 #define K22 (((w0s >> (24 + 6)) & 1) ? -1 : 0)
2385 #define K23 (((w0s >> (24 + 5)) & 1) ? -1 : 0)
2386 #define K24 (((w0s >> (24 + 4)) & 1) ? -1 : 0)
2387 #define K25 (((w0s >> (24 + 3)) & 1) ? -1 : 0)
2388 #define K26 (((w0s >> (24 + 2)) & 1) ? -1 : 0)
2389 #define K27 (((w0s >> (24 + 1)) & 1) ? -1 : 0)
2390 #define K28 (((w1s >> ( 0 + 7)) & 1) ? -1 : 0)
2391 #define K29 (((w1s >> ( 0 + 6)) & 1) ? -1 : 0)
2392 #define K30 (((w1s >> ( 0 + 5)) & 1) ? -1 : 0)
2393 #define K31 (((w1s >> ( 0 + 4)) & 1) ? -1 : 0)
2394 #define K32 (((w1s >> ( 0 + 3)) & 1) ? -1 : 0)
2395 #define K33 (((w1s >> ( 0 + 2)) & 1) ? -1 : 0)
2396 #define K34 (((w1s >> ( 0 + 1)) & 1) ? -1 : 0)
2397 #define K35 (((w1s >> ( 8 + 7)) & 1) ? -1 : 0)
2398 #define K36 (((w1s >> ( 8 + 6)) & 1) ? -1 : 0)
2399 #define K37 (((w1s >> ( 8 + 5)) & 1) ? -1 : 0)
2400 #define K38 (((w1s >> ( 8 + 4)) & 1) ? -1 : 0)
2401 #define K39 (((w1s >> ( 8 + 3)) & 1) ? -1 : 0)
2402 #define K40 (((w1s >> ( 8 + 2)) & 1) ? -1 : 0)
2403 #define K41 (((w1s >> ( 8 + 1)) & 1) ? -1 : 0)
2404 #define K42 (((w1s >> (16 + 7)) & 1) ? -1 : 0)
2405 #define K43 (((w1s >> (16 + 6)) & 1) ? -1 : 0)
2406 #define K44 (((w1s >> (16 + 5)) & 1) ? -1 : 0)
2407 #define K45 (((w1s >> (16 + 4)) & 1) ? -1 : 0)
2408 #define K46 (((w1s >> (16 + 3)) & 1) ? -1 : 0)
2409 #define K47 (((w1s >> (16 + 2)) & 1) ? -1 : 0)
2410 #define K48 (((w1s >> (16 + 1)) & 1) ? -1 : 0)
2411 #define K49 (((w1s >> (24 + 7)) & 1) ? -1 : 0)
2412 #define K50 (((w1s >> (24 + 6)) & 1) ? -1 : 0)
2413 #define K51 (((w1s >> (24 + 5)) & 1) ? -1 : 0)
2414 #define K52 (((w1s >> (24 + 4)) & 1) ? -1 : 0)
2415 #define K53 (((w1s >> (24 + 3)) & 1) ? -1 : 0)
2416 #define K54 (((w1s >> (24 + 2)) & 1) ? -1 : 0)
2417 #define K55 (((w1s >> (24 + 1)) & 1) ? -1 : 0)
2423 const u32 pc_pos = get_local_id (1);
2425 const u32 il_pos = pc_pos * 32;
2456 k00 |= words_buf_r[pc_pos].b[ 0];
2457 k01 |= words_buf_r[pc_pos].b[ 1];
2458 k02 |= words_buf_r[pc_pos].b[ 2];
2459 k03 |= words_buf_r[pc_pos].b[ 3];
2460 k04 |= words_buf_r[pc_pos].b[ 4];
2461 k05 |= words_buf_r[pc_pos].b[ 5];
2462 k06 |= words_buf_r[pc_pos].b[ 6];
2463 k07 |= words_buf_r[pc_pos].b[ 7];
2464 k08 |= words_buf_r[pc_pos].b[ 8];
2465 k09 |= words_buf_r[pc_pos].b[ 9];
2466 k10 |= words_buf_r[pc_pos].b[10];
2467 k11 |= words_buf_r[pc_pos].b[11];
2468 k12 |= words_buf_r[pc_pos].b[12];
2469 k13 |= words_buf_r[pc_pos].b[13];
2470 k14 |= words_buf_r[pc_pos].b[14];
2471 k15 |= words_buf_r[pc_pos].b[15];
2472 k16 |= words_buf_r[pc_pos].b[16];
2473 k17 |= words_buf_r[pc_pos].b[17];
2474 k18 |= words_buf_r[pc_pos].b[18];
2475 k19 |= words_buf_r[pc_pos].b[19];
2476 k20 |= words_buf_r[pc_pos].b[20];
2477 k21 |= words_buf_r[pc_pos].b[21];
2478 k22 |= words_buf_r[pc_pos].b[22];
2479 k23 |= words_buf_r[pc_pos].b[23];
2480 k24 |= words_buf_r[pc_pos].b[24];
2481 k25 |= words_buf_r[pc_pos].b[25];
2482 k26 |= words_buf_r[pc_pos].b[26];
2483 k27 |= words_buf_r[pc_pos].b[27];
2553 k00, k01, k02, k03, k04, k05, k06,
2554 k07, k08, k09, k10, k11, k12, k13,
2555 k14, k15, k16, k17, k18, k19, k20,
2556 k21, k22, k23, k24, k25, k26, k27,
2557 K28, K29, K30, K31, K32, K33, K34,
2558 K35, K36, K37, K38, K39, K40, K41,
2559 K42, K43, K44, K45, K46, K47, K48,
2560 K49, K50, K51, K52, K53, K54, K55,
2561 &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07,
2562 &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15,
2563 &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23,
2564 &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31,
2565 &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39,
2566 &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47,
2567 &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55,
2568 &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63
2573 tmpResult |= D00 ^ S00;
2574 tmpResult |= D01 ^ S01;
2575 tmpResult |= D02 ^ S02;
2576 tmpResult |= D03 ^ S03;
2577 tmpResult |= D04 ^ S04;
2578 tmpResult |= D05 ^ S05;
2579 tmpResult |= D06 ^ S06;
2580 tmpResult |= D07 ^ S07;
2581 tmpResult |= D08 ^ S08;
2582 tmpResult |= D09 ^ S09;
2583 tmpResult |= D10 ^ S10;
2584 tmpResult |= D11 ^ S11;
2585 tmpResult |= D12 ^ S12;
2586 tmpResult |= D13 ^ S13;
2587 tmpResult |= D14 ^ S14;
2588 tmpResult |= D15 ^ S15;
2590 if (tmpResult == 0xffffffff) return;
2592 tmpResult |= D16 ^ S16;
2593 tmpResult |= D17 ^ S17;
2594 tmpResult |= D18 ^ S18;
2595 tmpResult |= D19 ^ S19;
2596 tmpResult |= D20 ^ S20;
2597 tmpResult |= D21 ^ S21;
2598 tmpResult |= D22 ^ S22;
2599 tmpResult |= D23 ^ S23;
2600 tmpResult |= D24 ^ S24;
2601 tmpResult |= D25 ^ S25;
2602 tmpResult |= D26 ^ S26;
2603 tmpResult |= D27 ^ S27;
2604 tmpResult |= D28 ^ S28;
2605 tmpResult |= D29 ^ S29;
2606 tmpResult |= D30 ^ S30;
2607 tmpResult |= D31 ^ S31;
2609 if (tmpResult == 0xffffffff) return;
2611 tmpResult |= D32 ^ S32;
2612 tmpResult |= D33 ^ S33;
2613 tmpResult |= D34 ^ S34;
2614 tmpResult |= D35 ^ S35;
2615 tmpResult |= D36 ^ S36;
2616 tmpResult |= D37 ^ S37;
2617 tmpResult |= D38 ^ S38;
2618 tmpResult |= D39 ^ S39;
2619 tmpResult |= D40 ^ S40;
2620 tmpResult |= D41 ^ S41;
2621 tmpResult |= D42 ^ S42;
2622 tmpResult |= D43 ^ S43;
2623 tmpResult |= D44 ^ S44;
2624 tmpResult |= D45 ^ S45;
2625 tmpResult |= D46 ^ S46;
2626 tmpResult |= D47 ^ S47;
2628 if (tmpResult == 0xffffffff) return;
2630 tmpResult |= D48 ^ S48;
2631 tmpResult |= D49 ^ S49;
2632 tmpResult |= D50 ^ S50;
2633 tmpResult |= D51 ^ S51;
2634 tmpResult |= D52 ^ S52;
2635 tmpResult |= D53 ^ S53;
2636 tmpResult |= D54 ^ S54;
2637 tmpResult |= D55 ^ S55;
2638 tmpResult |= D56 ^ S56;
2639 tmpResult |= D57 ^ S57;
2640 tmpResult |= D58 ^ S58;
2641 tmpResult |= D59 ^ S59;
2642 tmpResult |= D60 ^ S60;
2643 tmpResult |= D61 ^ S61;
2644 tmpResult |= D62 ^ S62;
2645 tmpResult |= D63 ^ S63;
2647 if (tmpResult == 0xffffffff) return;
2649 const u32 slice = 31 - clz (~tmpResult);
2655 // transpose bitslice mod : attention race conditions, need different buffers for *in and *out
2658 __kernel void m01500_tm (__global u32 *mod, __global bs_word_t *words_buf_r)
2660 const u32 gid = get_global_id (0);
2662 const u32 block = gid / 32;
2663 const u32 slice = gid % 32;
2665 const u32 w0 = mod[gid];
2667 const u32 w0s = (w0 << 1) & 0xfefefefe;
2672 for (int i = 0, j = 0; i < 32; i += 8, j += 7)
2674 atomic_or (&words_buf_r[block].b[j + 0], (((w0s >> (i + 7)) & 1) << slice));
2675 atomic_or (&words_buf_r[block].b[j + 1], (((w0s >> (i + 6)) & 1) << slice));
2676 atomic_or (&words_buf_r[block].b[j + 2], (((w0s >> (i + 5)) & 1) << slice));
2677 atomic_or (&words_buf_r[block].b[j + 3], (((w0s >> (i + 4)) & 1) << slice));
2678 atomic_or (&words_buf_r[block].b[j + 4], (((w0s >> (i + 3)) & 1) << slice));
2679 atomic_or (&words_buf_r[block].b[j + 5], (((w0s >> (i + 2)) & 1) << slice));
2680 atomic_or (&words_buf_r[block].b[j + 6], (((w0s >> (i + 1)) & 1) << slice));
2684 __kernel void m01500_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2690 const u32 gid = get_global_id (0);
2691 const u32 lid = get_local_id (0);
2693 if (gid >= gid_max) return;
2699 m01500m (pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
2702 __kernel void m01500_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2706 __kernel void m01500_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2710 __kernel void m01500_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2716 const u32 gid = get_global_id (0);
2717 const u32 lid = get_local_id (0);
2719 if (gid >= gid_max) return;
2725 m01500s (pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
2728 __kernel void m01500_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2732 __kernel void m01500_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)