2 * Author......: Jens Steube <jens.steube@gmail.com>
4 * NOTE........: sboxes for maxwell were taken from DeepLearningJohnDoe, license below
5 * : sboxes for others were takes fron JtR, license below
10 #include "inc_vendor.cl"
11 #include "inc_hash_constants.h"
12 #include "inc_hash_functions.cl"
13 #include "inc_types.cl"
14 #include "inc_common.cl"
16 #define COMPARE_S "inc_comp_single_bs.cl"
17 #define COMPARE_M "inc_comp_multi_bs.cl"
19 #define myselx(a,b,c) ((c) ? (b) : (a))
26 // Bitslice DES S-boxes with LOP3.LUT instructions
27 // For NVIDIA Maxwell architecture and CUDA 7.5 RC
28 // by DeepLearningJohnDoe, version 0.1.6, 2015/07/19
30 // Gate counts: 25 24 25 18 25 24 24 23
32 // Depth: 8 7 7 6 8 10 10 8
35 // Note that same S-box function with a lower gate count isn't necessarily faster.
37 // These Boolean expressions corresponding to DES S-boxes were
38 // discovered by <deeplearningjohndoe at gmail.com>
40 // This file itself is Copyright (c) 2015 by <deeplearningjohndoe at gmail.com>
41 // Redistribution and use in source and binary forms, with or without
42 // modification, are permitted.
44 // The underlying mathematical formulas are NOT copyrighted.
47 #define LUT(a,b,c,d,e) u32 a; asm ("lop3.b32 %0, %1, %2, %3, "#e";" : "=r"(a): "r"(b), "r"(c), "r"(d));
49 void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
51 LUT(xAA55AA5500550055, a1, a4, a6, 0xC1)
52 LUT(xA55AA55AF0F5F0F5, a3, a6, xAA55AA5500550055, 0x9E)
53 LUT(x5F5F5F5FA5A5A5A5, a1, a3, a6, 0xD6)
54 LUT(xF5A0F5A0A55AA55A, a4, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x56)
55 LUT(x947A947AD1E7D1E7, a2, xA55AA55AF0F5F0F5, xF5A0F5A0A55AA55A, 0x6C)
56 LUT(x5FFF5FFFFFFAFFFA, a6, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x7B)
57 LUT(xB96CB96C69936993, a2, xF5A0F5A0A55AA55A, x5FFF5FFFFFFAFFFA, 0xD6)
58 LUT(x3, a5, x947A947AD1E7D1E7, xB96CB96C69936993, 0x6A)
59 LUT(x55EE55EE55EE55EE, a1, a2, a4, 0x7A)
60 LUT(x084C084CB77BB77B, a2, a6, xF5A0F5A0A55AA55A, 0xC9)
61 LUT(x9C329C32E295E295, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x72)
62 LUT(xA51EA51E50E050E0, a3, a6, x55EE55EE55EE55EE, 0x29)
63 LUT(x4AD34AD3BE3CBE3C, a2, x947A947AD1E7D1E7, xA51EA51E50E050E0, 0x95)
64 LUT(x2, a5, x9C329C32E295E295, x4AD34AD3BE3CBE3C, 0xC6)
65 LUT(xD955D95595D195D1, a1, a2, x9C329C32E295E295, 0xD2)
66 LUT(x8058805811621162, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x90)
67 LUT(x7D0F7D0FC4B3C4B3, xA51EA51E50E050E0, xD955D95595D195D1, x8058805811621162, 0x76)
68 LUT(x0805080500010001, a3, xAA55AA5500550055, xD955D95595D195D1, 0x80)
69 LUT(x4A964A96962D962D, xB96CB96C69936993, x4AD34AD3BE3CBE3C, x0805080500010001, 0xA6)
70 LUT(x4, a5, x7D0F7D0FC4B3C4B3, x4A964A96962D962D, 0xA6)
71 LUT(x148014807B087B08, a1, xAA55AA5500550055, x947A947AD1E7D1E7, 0x21)
72 LUT(x94D894D86B686B68, xA55AA55AF0F5F0F5, x8058805811621162, x148014807B087B08, 0x6A)
73 LUT(x5555555540044004, a1, a6, x084C084CB77BB77B, 0x70)
74 LUT(xAFB4AFB4BF5BBF5B, x5F5F5F5FA5A5A5A5, xA51EA51E50E050E0, x5555555540044004, 0x97)
75 LUT(x1, a5, x94D894D86B686B68, xAFB4AFB4BF5BBF5B, 0x6C)
83 void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
85 LUT(xEEEEEEEE99999999, a1, a2, a6, 0x97)
86 LUT(xFFFFEEEE66666666, a5, a6, xEEEEEEEE99999999, 0x67)
87 LUT(x5555FFFFFFFF0000, a1, a5, a6, 0x76)
88 LUT(x6666DDDD5555AAAA, a2, xFFFFEEEE66666666, x5555FFFFFFFF0000, 0x69)
89 LUT(x6969D3D35353ACAC, a3, xFFFFEEEE66666666, x6666DDDD5555AAAA, 0x6A)
90 LUT(xCFCF3030CFCF3030, a2, a3, a5, 0x65)
91 LUT(xE4E4EEEE9999F0F0, a3, xEEEEEEEE99999999, x5555FFFFFFFF0000, 0x8D)
92 LUT(xE5E5BABACDCDB0B0, a1, xCFCF3030CFCF3030, xE4E4EEEE9999F0F0, 0xCA)
93 LUT(x3, a4, x6969D3D35353ACAC, xE5E5BABACDCDB0B0, 0xC6)
94 LUT(x3333CCCC00000000, a2, a5, a6, 0x14)
95 LUT(xCCCCDDDDFFFF0F0F, a5, xE4E4EEEE9999F0F0, x3333CCCC00000000, 0xB5)
96 LUT(x00000101F0F0F0F0, a3, a6, xFFFFEEEE66666666, 0x1C)
97 LUT(x9A9A64646A6A9595, a1, xCFCF3030CFCF3030, x00000101F0F0F0F0, 0x96)
98 LUT(x2, a4, xCCCCDDDDFFFF0F0F, x9A9A64646A6A9595, 0x6A)
99 LUT(x3333BBBB3333FFFF, a1, a2, x6666DDDD5555AAAA, 0xDE)
100 LUT(x1414141441410000, a1, a3, xE4E4EEEE9999F0F0, 0x90)
101 LUT(x7F7FF3F3F5F53939, x6969D3D35353ACAC, x9A9A64646A6A9595, x3333BBBB3333FFFF, 0x79)
102 LUT(x9494E3E34B4B3939, a5, x1414141441410000, x7F7FF3F3F5F53939, 0x29)
103 LUT(x1, a4, x3333BBBB3333FFFF, x9494E3E34B4B3939, 0xA6)
104 LUT(xB1B1BBBBCCCCA5A5, a1, a1, xE4E4EEEE9999F0F0, 0x4A)
105 LUT(xFFFFECECEEEEDDDD, a2, x3333CCCC00000000, x9A9A64646A6A9595, 0xEF)
106 LUT(xB1B1A9A9DCDC8787, xE5E5BABACDCDB0B0, xB1B1BBBBCCCCA5A5, xFFFFECECEEEEDDDD, 0x8D)
107 LUT(xFFFFCCCCEEEE4444, a2, a5, xFFFFEEEE66666666, 0x2B)
108 LUT(x4, a4, xB1B1A9A9DCDC8787, xFFFFCCCCEEEE4444, 0x6C)
116 void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
118 LUT(xA50FA50FA50FA50F, a1, a3, a4, 0xC9)
119 LUT(xF0F00F0FF0F0F0F0, a3, a5, a6, 0x4B)
120 LUT(xAF0FA0AAAF0FAF0F, a1, xA50FA50FA50FA50F, xF0F00F0FF0F0F0F0, 0x4D)
121 LUT(x5AA5A55A5AA55AA5, a1, a4, xF0F00F0FF0F0F0F0, 0x69)
122 LUT(xAA005FFFAA005FFF, a3, a5, xA50FA50FA50FA50F, 0xD6)
123 LUT(x5AA5A55A0F5AFAA5, a6, x5AA5A55A5AA55AA5, xAA005FFFAA005FFF, 0x9C)
124 LUT(x1, a2, xAF0FA0AAAF0FAF0F, x5AA5A55A0F5AFAA5, 0xA6)
125 LUT(xAA55AA5500AA00AA, a1, a4, a6, 0x49)
126 LUT(xFAFAA50FFAFAA50F, a1, a5, xA50FA50FA50FA50F, 0x9B)
127 LUT(x50AF0F5AFA50A5A5, a1, xAA55AA5500AA00AA, xFAFAA50FFAFAA50F, 0x66)
128 LUT(xAFAFAFAFFAFAFAFA, a1, a3, a6, 0x6F)
129 LUT(xAFAFFFFFFFFAFAFF, a4, x50AF0F5AFA50A5A5, xAFAFAFAFFAFAFAFA, 0xEB)
130 LUT(x4, a2, x50AF0F5AFA50A5A5, xAFAFFFFFFFFAFAFF, 0x6C)
131 LUT(x500F500F500F500F, a1, a3, a4, 0x98)
132 LUT(xF0505A0505A5050F, x5AA5A55A0F5AFAA5, xAA55AA5500AA00AA, xAFAFAFAFFAFAFAFA, 0x1D)
133 LUT(xF0505A05AA55AAFF, a6, x500F500F500F500F, xF0505A0505A5050F, 0x9A)
134 LUT(xFF005F55FF005F55, a1, a4, xAA005FFFAA005FFF, 0xB2)
135 LUT(xA55F5AF0A55F5AF0, a5, xA50FA50FA50FA50F, x5AA5A55A5AA55AA5, 0x3D)
136 LUT(x5A5F05A5A55F5AF0, a6, xFF005F55FF005F55, xA55F5AF0A55F5AF0, 0xA6)
137 LUT(x3, a2, xF0505A05AA55AAFF, x5A5F05A5A55F5AF0, 0xA6)
138 LUT(x0F0F0F0FA5A5A5A5, a1, a3, a6, 0xC6)
139 LUT(x5FFFFF5FFFA0FFA0, x5AA5A55A5AA55AA5, xAFAFAFAFFAFAFAFA, x0F0F0F0FA5A5A5A5, 0xDB)
140 LUT(xF5555AF500A05FFF, a5, xFAFAA50FFAFAA50F, xF0505A0505A5050F, 0xB9)
141 LUT(x05A5AAF55AFA55A5, xF0505A05AA55AAFF, x0F0F0F0FA5A5A5A5, xF5555AF500A05FFF, 0x9B)
142 LUT(x2, a2, x5FFFFF5FFFA0FFA0, x05A5AAF55AFA55A5, 0xA6)
150 void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
152 LUT(x55F055F055F055F0, a1, a3, a4, 0x72)
153 LUT(xA500F5F0A500F5F0, a3, a5, x55F055F055F055F0, 0xAD)
154 LUT(xF50AF50AF50AF50A, a1, a3, a4, 0x59)
155 LUT(xF5FA0FFFF5FA0FFF, a3, a5, xF50AF50AF50AF50A, 0xE7)
156 LUT(x61C8F93C61C8F93C, a2, xA500F5F0A500F5F0, xF5FA0FFFF5FA0FFF, 0xC6)
157 LUT(x9999666699996666, a1, a2, a5, 0x69)
158 LUT(x22C022C022C022C0, a2, a4, x55F055F055F055F0, 0x18)
159 LUT(xB35C94A6B35C94A6, xF5FA0FFFF5FA0FFF, x9999666699996666, x22C022C022C022C0, 0x63)
160 LUT(x4, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x6A)
161 LUT(x4848484848484848, a1, a2, a3, 0x12)
162 LUT(x55500AAA55500AAA, a1, a5, xF5FA0FFFF5FA0FFF, 0x28)
163 LUT(x3C90B3D63C90B3D6, x61C8F93C61C8F93C, x4848484848484848, x55500AAA55500AAA, 0x1E)
164 LUT(x8484333384843333, a1, x9999666699996666, x4848484848484848, 0x14)
165 LUT(x4452F1AC4452F1AC, xF50AF50AF50AF50A, xF5FA0FFFF5FA0FFF, xB35C94A6B35C94A6, 0x78)
166 LUT(x9586CA379586CA37, x55500AAA55500AAA, x8484333384843333, x4452F1AC4452F1AC, 0xD6)
167 LUT(x2, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0x6A)
168 LUT(x1, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0xA9)
169 LUT(x3, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x56)
177 void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
179 LUT(xA0A0A0A0FFFFFFFF, a1, a3, a6, 0xAB)
180 LUT(xFFFF00005555FFFF, a1, a5, a6, 0xB9)
181 LUT(xB3B320207777FFFF, a2, xA0A0A0A0FFFFFFFF, xFFFF00005555FFFF, 0xE8)
182 LUT(x50505A5A5A5A5050, a1, a3, xFFFF00005555FFFF, 0x34)
183 LUT(xA2A2FFFF2222FFFF, a1, a5, xB3B320207777FFFF, 0xCE)
184 LUT(x2E2E6969A4A46363, a2, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, 0x29)
185 LUT(x3, a4, xB3B320207777FFFF, x2E2E6969A4A46363, 0xA6)
186 LUT(xA5A50A0AA5A50A0A, a1, a3, a5, 0x49)
187 LUT(x969639396969C6C6, a2, a6, xA5A50A0AA5A50A0A, 0x96)
188 LUT(x1B1B1B1B1B1B1B1B, a1, a2, a3, 0xCA)
189 LUT(xBFBFBFBFF6F6F9F9, a3, xA0A0A0A0FFFFFFFF, x969639396969C6C6, 0x7E)
190 LUT(x5B5BA4A4B8B81D1D, xFFFF00005555FFFF, x1B1B1B1B1B1B1B1B, xBFBFBFBFF6F6F9F9, 0x96)
191 LUT(x2, a4, x969639396969C6C6, x5B5BA4A4B8B81D1D, 0xCA)
192 LUT(x5555BBBBFFFF5555, a1, a2, xFFFF00005555FFFF, 0xE5)
193 LUT(x6D6D9C9C95956969, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, x969639396969C6C6, 0x97)
194 LUT(x1A1A67676A6AB4B4, xA5A50A0AA5A50A0A, x5555BBBBFFFF5555, x6D6D9C9C95956969, 0x47)
195 LUT(xA0A0FFFFAAAA0000, a3, xFFFF00005555FFFF, xA5A50A0AA5A50A0A, 0x3B)
196 LUT(x36369C9CC1C1D6D6, x969639396969C6C6, x6D6D9C9C95956969, xA0A0FFFFAAAA0000, 0xD9)
197 LUT(x1, a4, x1A1A67676A6AB4B4, x36369C9CC1C1D6D6, 0xCA)
198 LUT(x5555F0F0F5F55555, a1, a3, xFFFF00005555FFFF, 0xB1)
199 LUT(x79790202DCDC0808, xA2A2FFFF2222FFFF, xA5A50A0AA5A50A0A, x969639396969C6C6, 0x47)
200 LUT(x6C6CF2F229295D5D, xBFBFBFBFF6F6F9F9, x5555F0F0F5F55555, x79790202DCDC0808, 0x6E)
201 LUT(xA3A3505010101A1A, a2, xA2A2FFFF2222FFFF, x36369C9CC1C1D6D6, 0x94)
202 LUT(x7676C7C74F4FC7C7, a1, x2E2E6969A4A46363, xA3A3505010101A1A, 0xD9)
203 LUT(x4, a4, x6C6CF2F229295D5D, x7676C7C74F4FC7C7, 0xC6)
211 void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
213 LUT(x5050F5F55050F5F5, a1, a3, a5, 0xB2)
214 LUT(x6363C6C66363C6C6, a1, a2, x5050F5F55050F5F5, 0x66)
215 LUT(xAAAA5555AAAA5555, a1, a1, a5, 0xA9)
216 LUT(x3A3A65653A3A6565, a3, x6363C6C66363C6C6, xAAAA5555AAAA5555, 0xA9)
217 LUT(x5963A3C65963A3C6, a4, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xC6)
218 LUT(xE7E76565E7E76565, a5, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xAD)
219 LUT(x455D45DF455D45DF, a1, a4, xE7E76565E7E76565, 0xE4)
220 LUT(x4, a6, x5963A3C65963A3C6, x455D45DF455D45DF, 0x6C)
221 LUT(x1101220211012202, a2, xAAAA5555AAAA5555, x5963A3C65963A3C6, 0x20)
222 LUT(xF00F0FF0F00F0FF0, a3, a4, a5, 0x69)
223 LUT(x16E94A9716E94A97, xE7E76565E7E76565, x1101220211012202, xF00F0FF0F00F0FF0, 0x9E)
224 LUT(x2992922929929229, a1, a2, xF00F0FF0F00F0FF0, 0x49)
225 LUT(xAFAF9823AFAF9823, a5, x5050F5F55050F5F5, x2992922929929229, 0x93)
226 LUT(x3, a6, x16E94A9716E94A97, xAFAF9823AFAF9823, 0x6C)
227 LUT(x4801810248018102, a4, x5963A3C65963A3C6, x1101220211012202, 0xA4)
228 LUT(x5EE8FFFD5EE8FFFD, a5, x16E94A9716E94A97, x4801810248018102, 0x76)
229 LUT(xF0FF00FFF0FF00FF, a3, a4, a5, 0xCD)
230 LUT(x942D9A67942D9A67, x3A3A65653A3A6565, x5EE8FFFD5EE8FFFD, xF0FF00FFF0FF00FF, 0x86)
231 LUT(x1, a6, x5EE8FFFD5EE8FFFD, x942D9A67942D9A67, 0xA6)
232 LUT(x6A40D4ED6F4DD4EE, a2, x4, xAFAF9823AFAF9823, 0x2D)
233 LUT(x6CA89C7869A49C79, x1101220211012202, x16E94A9716E94A97, x6A40D4ED6F4DD4EE, 0x26)
234 LUT(xD6DE73F9D6DE73F9, a3, x6363C6C66363C6C6, x455D45DF455D45DF, 0x6B)
235 LUT(x925E63E1965A63E1, x3A3A65653A3A6565, x6CA89C7869A49C79, xD6DE73F9D6DE73F9, 0xA2)
236 LUT(x2, a6, x6CA89C7869A49C79, x925E63E1965A63E1, 0xCA)
244 void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
246 LUT(x88AA88AA88AA88AA, a1, a2, a4, 0x0B)
247 LUT(xAAAAFF00AAAAFF00, a1, a4, a5, 0x27)
248 LUT(xADAFF8A5ADAFF8A5, a3, x88AA88AA88AA88AA, xAAAAFF00AAAAFF00, 0x9E)
249 LUT(x0A0AF5F50A0AF5F5, a1, a3, a5, 0xA6)
250 LUT(x6B69C5DC6B69C5DC, a2, xADAFF8A5ADAFF8A5, x0A0AF5F50A0AF5F5, 0x6B)
251 LUT(x1C69B2DC1C69B2DC, a4, x88AA88AA88AA88AA, x6B69C5DC6B69C5DC, 0xA9)
252 LUT(x1, a6, xADAFF8A5ADAFF8A5, x1C69B2DC1C69B2DC, 0x6A)
253 LUT(x9C9C9C9C9C9C9C9C, a1, a2, a3, 0x63)
254 LUT(xE6E63BFDE6E63BFD, a2, xAAAAFF00AAAAFF00, x0A0AF5F50A0AF5F5, 0xE7)
255 LUT(x6385639E6385639E, a4, x9C9C9C9C9C9C9C9C, xE6E63BFDE6E63BFD, 0x93)
256 LUT(x5959C4CE5959C4CE, a2, x6B69C5DC6B69C5DC, xE6E63BFDE6E63BFD, 0x5D)
257 LUT(x5B53F53B5B53F53B, a4, x0A0AF5F50A0AF5F5, x5959C4CE5959C4CE, 0x6E)
258 LUT(x3, a6, x6385639E6385639E, x5B53F53B5B53F53B, 0xC6)
259 LUT(xFAF505FAFAF505FA, a3, a4, x0A0AF5F50A0AF5F5, 0x6D)
260 LUT(x6A65956A6A65956A, a3, x9C9C9C9C9C9C9C9C, xFAF505FAFAF505FA, 0xA6)
261 LUT(x8888CCCC8888CCCC, a1, a2, a5, 0x23)
262 LUT(x94E97A9494E97A94, x1C69B2DC1C69B2DC, x6A65956A6A65956A, x8888CCCC8888CCCC, 0x72)
263 LUT(x4, a6, x6A65956A6A65956A, x94E97A9494E97A94, 0xAC)
264 LUT(xA050A050A050A050, a1, a3, a4, 0x21)
265 LUT(xC1B87A2BC1B87A2B, xAAAAFF00AAAAFF00, x5B53F53B5B53F53B, x94E97A9494E97A94, 0xA4)
266 LUT(xE96016B7E96016B7, x8888CCCC8888CCCC, xA050A050A050A050, xC1B87A2BC1B87A2B, 0x96)
267 LUT(xE3CF1FD5E3CF1FD5, x88AA88AA88AA88AA, x6A65956A6A65956A, xE96016B7E96016B7, 0x3E)
268 LUT(x6776675B6776675B, xADAFF8A5ADAFF8A5, x94E97A9494E97A94, xE3CF1FD5E3CF1FD5, 0x6B)
269 LUT(x2, a6, xE96016B7E96016B7, x6776675B6776675B, 0xC6)
277 void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
279 LUT(xEEEE3333EEEE3333, a1, a2, a5, 0x9D)
280 LUT(xBBBBBBBBBBBBBBBB, a1, a1, a2, 0x83)
281 LUT(xDDDDAAAADDDDAAAA, a1, a2, a5, 0x5B)
282 LUT(x29295A5A29295A5A, a3, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0x85)
283 LUT(xC729695AC729695A, a4, xEEEE3333EEEE3333, x29295A5A29295A5A, 0xA6)
284 LUT(x3BF77B7B3BF77B7B, a2, a5, xC729695AC729695A, 0xF9)
285 LUT(x2900FF002900FF00, a4, a5, x29295A5A29295A5A, 0x0E)
286 LUT(x56B3803F56B3803F, xBBBBBBBBBBBBBBBB, x3BF77B7B3BF77B7B, x2900FF002900FF00, 0x61)
287 LUT(x4, a6, xC729695AC729695A, x56B3803F56B3803F, 0x6C)
288 LUT(xFBFBFBFBFBFBFBFB, a1, a2, a3, 0xDF)
289 LUT(x3012B7B73012B7B7, a2, a5, xC729695AC729695A, 0xD4)
290 LUT(x34E9B34C34E9B34C, a4, xFBFBFBFBFBFBFBFB, x3012B7B73012B7B7, 0x69)
291 LUT(xBFEAEBBEBFEAEBBE, a1, x29295A5A29295A5A, x34E9B34C34E9B34C, 0x6F)
292 LUT(xFFAEAFFEFFAEAFFE, a3, xBBBBBBBBBBBBBBBB, xBFEAEBBEBFEAEBBE, 0xB9)
293 LUT(x2, a6, x34E9B34C34E9B34C, xFFAEAFFEFFAEAFFE, 0xC6)
294 LUT(xCFDE88BBCFDE88BB, a2, xDDDDAAAADDDDAAAA, x34E9B34C34E9B34C, 0x5C)
295 LUT(x3055574530555745, a1, xC729695AC729695A, xCFDE88BBCFDE88BB, 0x71)
296 LUT(x99DDEEEE99DDEEEE, a4, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0xB9)
297 LUT(x693CD926693CD926, x3BF77B7B3BF77B7B, x34E9B34C34E9B34C, x99DDEEEE99DDEEEE, 0x69)
298 LUT(x3, a6, x3055574530555745, x693CD926693CD926, 0x6A)
299 LUT(x9955EE559955EE55, a1, a4, x99DDEEEE99DDEEEE, 0xE2)
300 LUT(x9D48FA949D48FA94, x3BF77B7B3BF77B7B, xBFEAEBBEBFEAEBBE, x9955EE559955EE55, 0x9C)
301 LUT(x1, a6, xC729695AC729695A, x9D48FA949D48FA94, 0x39)
312 * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC
313 * architectures. These use AND, OR, XOR, NOT, and AND-NOT gates.
315 * Gate counts: 49 44 46 33 48 46 46 41
318 * Several same-gate-count expressions for each S-box are included (for use on
319 * different CPUs/GPUs).
321 * These Boolean expressions corresponding to DES S-boxes have been generated
322 * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's
323 * John the Ripper password cracker: http://www.openwall.com/john/
324 * Being mathematical formulas, they are not copyrighted and are free for reuse
327 * This file (a specific representation of the S-box expressions, surrounding
328 * logic) is Copyright (c) 2011 by Solar Designer <solar at openwall.com>.
329 * Redistribution and use in source and binary forms, with or without
330 * modification, are permitted. (This is a heavily cut-down "BSD license".)
332 * The effort has been sponsored by Rapid7: http://www.rapid7.com
335 void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
337 u32 x55005500, x5A0F5A0F, x3333FFFF, x66666666, x22226666, x2D2D6969,
339 u32 x00FFFF00, x33CCCC33, x4803120C, x2222FFFF, x6A21EDF3, x4A01CC93;
340 u32 x5555FFFF, x7F75FFFF, x00D20096, x7FA7FF69;
341 u32 x0A0A0000, x0AD80096, x00999900, x0AD99996;
342 u32 x22332233, x257AA5F0, x054885C0, xFAB77A3F, x2221EDF3, xD89697CC;
343 u32 x05B77AC0, x05F77AD6, x36C48529, x6391D07C, xBB0747B0;
344 u32 x4C460000, x4EDF9996, x2D4E49EA, xBBFFFFB0, x96B1B65A;
345 u32 x5AFF5AFF, x52B11215, x4201C010, x10B0D205;
346 u32 x00, x01, x10, x11, x20, x21, x30, x31;
348 x55005500 = a1 & ~a5;
349 x5A0F5A0F = a4 ^ x55005500;
352 x22226666 = x3333FFFF & x66666666;
353 x2D2D6969 = a4 ^ x22226666;
354 x25202160 = x2D2D6969 & ~x5A0F5A0F;
357 x33CCCC33 = a3 ^ x00FFFF00;
358 x4803120C = x5A0F5A0F & ~x33CCCC33;
359 x2222FFFF = a6 | x22226666;
360 x6A21EDF3 = x4803120C ^ x2222FFFF;
361 x4A01CC93 = x6A21EDF3 & ~x25202160;
364 x7F75FFFF = x6A21EDF3 | x5555FFFF;
365 x00D20096 = a5 & ~x2D2D6969;
366 x7FA7FF69 = x7F75FFFF ^ x00D20096;
368 x0A0A0000 = a4 & ~x5555FFFF;
369 x0AD80096 = x00D20096 ^ x0A0A0000;
370 x00999900 = x00FFFF00 & ~x66666666;
371 x0AD99996 = x0AD80096 | x00999900;
373 x22332233 = a3 & ~x55005500;
374 x257AA5F0 = x5A0F5A0F ^ x7F75FFFF;
375 x054885C0 = x257AA5F0 & ~x22332233;
376 xFAB77A3F = ~x054885C0;
377 x2221EDF3 = x3333FFFF & x6A21EDF3;
378 xD89697CC = xFAB77A3F ^ x2221EDF3;
379 x20 = x7FA7FF69 & ~a2;
380 x21 = x20 ^ xD89697CC;
383 x05B77AC0 = x00FFFF00 ^ x054885C0;
384 x05F77AD6 = x00D20096 | x05B77AC0;
385 x36C48529 = x3333FFFF ^ x05F77AD6;
386 x6391D07C = a1 ^ x36C48529;
387 xBB0747B0 = xD89697CC ^ x6391D07C;
388 x00 = x25202160 | a2;
389 x01 = x00 ^ xBB0747B0;
392 x4C460000 = x3333FFFF ^ x7F75FFFF;
393 x4EDF9996 = x0AD99996 | x4C460000;
394 x2D4E49EA = x6391D07C ^ x4EDF9996;
395 xBBFFFFB0 = x00FFFF00 | xBB0747B0;
396 x96B1B65A = x2D4E49EA ^ xBBFFFFB0;
397 x10 = x4A01CC93 | a2;
398 x11 = x10 ^ x96B1B65A;
401 x5AFF5AFF = a5 | x5A0F5A0F;
402 x52B11215 = x5AFF5AFF & ~x2D4E49EA;
403 x4201C010 = x4A01CC93 & x6391D07C;
404 x10B0D205 = x52B11215 ^ x4201C010;
405 x30 = x10B0D205 | a2;
406 x31 = x30 ^ x0AD99996;
410 void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
413 u32 x55550000, x00AA00FF, x33BB33FF;
414 u32 x33CC0000, x11441144, x11BB11BB, x003311BB;
415 u32 x00000F0F, x336600FF, x332200FF, x332200F0;
416 u32 x0302000F, xAAAAAAAA, xA9A8AAA5, x33CCCC33, x33CCC030, x9A646A95;
417 u32 x00333303, x118822B8, xA8208805, x3CC3C33C, x94E34B39;
418 u32 x0331330C, x3FF3F33C, xA9DF596A, xA9DF5F6F, x962CAC53;
419 u32 xA9466A6A, x3DA52153, x29850143, x33C0330C, x1A45324F;
420 u32 x0A451047, xBBDFDD7B, xB19ACD3C;
421 u32 x00, x01, x10, x11, x20, x21, x30, x31;
425 x55550000 = a1 & ~a6;
426 x00AA00FF = a5 & ~x55550000;
427 x33BB33FF = a2 | x00AA00FF;
429 x33CC0000 = x33CC33CC & ~a6;
430 x11441144 = a1 & x33CC33CC;
431 x11BB11BB = a5 ^ x11441144;
432 x003311BB = x11BB11BB & ~x33CC0000;
435 x336600FF = x00AA00FF ^ x33CC0000;
436 x332200FF = x33BB33FF & x336600FF;
437 x332200F0 = x332200FF & ~x00000F0F;
439 x0302000F = a3 & x332200FF;
441 xA9A8AAA5 = x0302000F ^ xAAAAAAAA;
442 x33CCCC33 = a6 ^ x33CC33CC;
443 x33CCC030 = x33CCCC33 & ~x00000F0F;
444 x9A646A95 = xA9A8AAA5 ^ x33CCC030;
445 x10 = a4 & ~x332200F0;
446 x11 = x10 ^ x9A646A95;
449 x00333303 = a2 & ~x33CCC030;
450 x118822B8 = x11BB11BB ^ x00333303;
451 xA8208805 = xA9A8AAA5 & ~x118822B8;
452 x3CC3C33C = a3 ^ x33CCCC33;
453 x94E34B39 = xA8208805 ^ x3CC3C33C;
454 x00 = x33BB33FF & ~a4;
455 x01 = x00 ^ x94E34B39;
458 x0331330C = x0302000F ^ x00333303;
459 x3FF3F33C = x3CC3C33C | x0331330C;
460 xA9DF596A = x33BB33FF ^ x9A646A95;
461 xA9DF5F6F = x00000F0F | xA9DF596A;
462 x962CAC53 = x3FF3F33C ^ xA9DF5F6F;
464 xA9466A6A = x332200FF ^ x9A646A95;
465 x3DA52153 = x94E34B39 ^ xA9466A6A;
466 x29850143 = xA9DF5F6F & x3DA52153;
467 x33C0330C = x33CC33CC & x3FF3F33C;
468 x1A45324F = x29850143 ^ x33C0330C;
469 x20 = x1A45324F | a4;
470 x21 = x20 ^ x962CAC53;
473 x0A451047 = x1A45324F & ~x118822B8;
474 xBBDFDD7B = x33CCCC33 | xA9DF596A;
475 xB19ACD3C = x0A451047 ^ xBBDFDD7B;
476 x30 = x003311BB | a4;
477 x31 = x30 ^ xB19ACD3C;
481 void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
483 u32 x44444444, x0F0FF0F0, x4F4FF4F4, x00FFFF00, x00AAAA00, x4FE55EF4;
484 u32 x3C3CC3C3, x3C3C0000, x7373F4F4, x0C840A00;
485 u32 x00005EF4, x00FF5EFF, x00555455, x3C699796;
486 u32 x000FF000, x55AA55AA, x26D9A15E, x2FDFAF5F, x2FD00F5F;
487 u32 x55AAFFAA, x28410014, x000000FF, x000000CC, x284100D8;
488 u32 x204100D0, x3C3CC3FF, x1C3CC32F, x4969967A;
489 u32 x4CC44CC4, x40C040C0, xC3C33C3C, x9669C396, xD6A98356;
490 u32 xD6E9C3D6, x4CEEEEC4, x9A072D12, x001A000B, x9A1F2D1B;
491 u32 x00, x01, x10, x11, x20, x21, x30, x31;
493 x44444444 = a1 & ~a2;
495 x4F4FF4F4 = x44444444 | x0F0FF0F0;
497 x00AAAA00 = x00FFFF00 & ~a1;
498 x4FE55EF4 = x4F4FF4F4 ^ x00AAAA00;
500 x3C3CC3C3 = a2 ^ x0F0FF0F0;
501 x3C3C0000 = x3C3CC3C3 & ~a6;
502 x7373F4F4 = x4F4FF4F4 ^ x3C3C0000;
503 x0C840A00 = x4FE55EF4 & ~x7373F4F4;
505 x00005EF4 = a6 & x4FE55EF4;
506 x00FF5EFF = a4 | x00005EF4;
507 x00555455 = a1 & x00FF5EFF;
508 x3C699796 = x3C3CC3C3 ^ x00555455;
509 x30 = x4FE55EF4 & ~a5;
510 x31 = x30 ^ x3C699796;
513 x000FF000 = x0F0FF0F0 & x00FFFF00;
515 x26D9A15E = x7373F4F4 ^ x55AA55AA;
516 x2FDFAF5F = a3 | x26D9A15E;
517 x2FD00F5F = x2FDFAF5F & ~x000FF000;
519 x55AAFFAA = x00AAAA00 | x55AA55AA;
520 x28410014 = x3C699796 & ~x55AAFFAA;
522 x000000CC = x000000FF & ~a2;
523 x284100D8 = x28410014 ^ x000000CC;
525 x204100D0 = x7373F4F4 & x284100D8;
526 x3C3CC3FF = x3C3CC3C3 | x000000FF;
527 x1C3CC32F = x3C3CC3FF & ~x204100D0;
528 x4969967A = a1 ^ x1C3CC32F;
529 x10 = x2FD00F5F & a5;
530 x11 = x10 ^ x4969967A;
533 x4CC44CC4 = x4FE55EF4 & ~a2;
534 x40C040C0 = x4CC44CC4 & ~a3;
535 xC3C33C3C = ~x3C3CC3C3;
536 x9669C396 = x55AAFFAA ^ xC3C33C3C;
537 xD6A98356 = x40C040C0 ^ x9669C396;
538 x00 = a5 & ~x0C840A00;
539 x01 = x00 ^ xD6A98356;
542 xD6E9C3D6 = x40C040C0 | x9669C396;
543 x4CEEEEC4 = x00AAAA00 | x4CC44CC4;
544 x9A072D12 = xD6E9C3D6 ^ x4CEEEEC4;
545 x001A000B = a4 & ~x4FE55EF4;
546 x9A1F2D1B = x9A072D12 | x001A000B;
547 x20 = a5 & ~x284100D8;
548 x21 = x20 ^ x9A1F2D1B;
552 void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
554 u32 x5A5A5A5A, x0F0FF0F0;
555 u32 x33FF33FF, x33FFCC00, x0C0030F0, x0C0CC0C0, x0CF3C03F, x5EFBDA7F,
556 x52FBCA0F, x61C8F93C;
557 u32 x00C0C03C, x0F0F30C0, x3B92A366, x30908326, x3C90B3D6;
558 u32 x33CC33CC, x0C0CFFFF, x379E5C99, x04124C11, x56E9861E, xA91679E1;
559 u32 x9586CA37, x8402C833, x84C2C83F, xB35C94A6;
560 u32 x00, x01, x10, x11, x20, x21, x30, x31;
565 x33FFCC00 = a5 ^ x33FF33FF;
566 x0C0030F0 = x0F0FF0F0 & ~x33FFCC00;
567 x0C0CC0C0 = x0F0FF0F0 & ~a2;
568 x0CF3C03F = a4 ^ x0C0CC0C0;
569 x5EFBDA7F = x5A5A5A5A | x0CF3C03F;
570 x52FBCA0F = x5EFBDA7F & ~x0C0030F0;
571 x61C8F93C = a2 ^ x52FBCA0F;
573 x00C0C03C = x0CF3C03F & x61C8F93C;
574 x0F0F30C0 = x0F0FF0F0 & ~x00C0C03C;
575 x3B92A366 = x5A5A5A5A ^ x61C8F93C;
576 x30908326 = x3B92A366 & ~x0F0F30C0;
577 x3C90B3D6 = x0C0030F0 ^ x30908326;
580 x0C0CFFFF = a5 | x0C0CC0C0;
581 x379E5C99 = x3B92A366 ^ x0C0CFFFF;
582 x04124C11 = x379E5C99 & ~x33CC33CC;
583 x56E9861E = x52FBCA0F ^ x04124C11;
584 x00 = a6 & ~x3C90B3D6;
585 x01 = x00 ^ x56E9861E;
588 xA91679E1 = ~x56E9861E;
589 x10 = x3C90B3D6 & ~a6;
590 x11 = x10 ^ xA91679E1;
593 x9586CA37 = x3C90B3D6 ^ xA91679E1;
594 x8402C833 = x9586CA37 & ~x33CC33CC;
595 x84C2C83F = x00C0C03C | x8402C833;
596 xB35C94A6 = x379E5C99 ^ x84C2C83F;
597 x20 = x61C8F93C | a6;
598 x21 = x20 ^ xB35C94A6;
601 x30 = a6 & x61C8F93C;
602 x31 = x30 ^ xB35C94A6;
606 void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
608 u32 x77777777, x77770000, x22225555, x11116666, x1F1F6F6F;
609 u32 x70700000, x43433333, x00430033, x55557777, x55167744, x5A19784B;
610 u32 x5A1987B4, x7A3BD7F5, x003B00F5, x221955A0, x05050707, x271C52A7;
611 u32 x2A2A82A0, x6969B193, x1FE06F90, x16804E00, xE97FB1FF;
612 u32 x43403302, x35CAED30, x37DEFFB7, x349ECCB5, x0B01234A;
613 u32 x101884B4, x0FF8EB24, x41413333, x4FF9FB37, x4FC2FBC2;
614 u32 x22222222, x16BCEE97, x0F080B04, x19B4E593;
615 u32 x5C5C5C5C, x4448184C, x2DDABE71, x6992A63D;
616 u32 x00, x01, x10, x11, x20, x21, x30, x31;
619 x77770000 = x77777777 & ~a6;
620 x22225555 = a1 ^ x77770000;
621 x11116666 = a3 ^ x22225555;
622 x1F1F6F6F = a4 | x11116666;
624 x70700000 = x77770000 & ~a4;
625 x43433333 = a3 ^ x70700000;
626 x00430033 = a5 & x43433333;
627 x55557777 = a1 | x11116666;
628 x55167744 = x00430033 ^ x55557777;
629 x5A19784B = a4 ^ x55167744;
631 x5A1987B4 = a6 ^ x5A19784B;
632 x7A3BD7F5 = x22225555 | x5A1987B4;
633 x003B00F5 = a5 & x7A3BD7F5;
634 x221955A0 = x22225555 ^ x003B00F5;
635 x05050707 = a4 & x55557777;
636 x271C52A7 = x221955A0 ^ x05050707;
638 x2A2A82A0 = x7A3BD7F5 & ~a1;
639 x6969B193 = x43433333 ^ x2A2A82A0;
640 x1FE06F90 = a5 ^ x1F1F6F6F;
641 x16804E00 = x1FE06F90 & ~x6969B193;
642 xE97FB1FF = ~x16804E00;
643 x20 = xE97FB1FF & ~a2;
644 x21 = x20 ^ x5A19784B;
647 x43403302 = x43433333 & ~x003B00F5;
648 x35CAED30 = x2A2A82A0 ^ x1FE06F90;
649 x37DEFFB7 = x271C52A7 | x35CAED30;
650 x349ECCB5 = x37DEFFB7 & ~x43403302;
651 x0B01234A = x1F1F6F6F & ~x349ECCB5;
653 x101884B4 = x5A1987B4 & x349ECCB5;
654 x0FF8EB24 = x1FE06F90 ^ x101884B4;
655 x41413333 = x43433333 & x55557777;
656 x4FF9FB37 = x0FF8EB24 | x41413333;
657 x4FC2FBC2 = x003B00F5 ^ x4FF9FB37;
658 x30 = x4FC2FBC2 & a2;
659 x31 = x30 ^ x271C52A7;
662 x22222222 = a1 ^ x77777777;
663 x16BCEE97 = x349ECCB5 ^ x22222222;
664 x0F080B04 = a4 & x0FF8EB24;
665 x19B4E593 = x16BCEE97 ^ x0F080B04;
666 x00 = x0B01234A | a2;
667 x01 = x00 ^ x19B4E593;
670 x5C5C5C5C = x1F1F6F6F ^ x43433333;
671 x4448184C = x5C5C5C5C & ~x19B4E593;
672 x2DDABE71 = x22225555 ^ x0FF8EB24;
673 x6992A63D = x4448184C ^ x2DDABE71;
674 x10 = x1F1F6F6F & a2;
675 x11 = x10 ^ x6992A63D;
679 void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
682 u32 x3333FFFF, x11115555, x22DD6699, x22DD9966, x00220099;
683 u32 x00551144, x33662277, x5A5A5A5A, x7B7E7A7F, x59A31CE6;
684 u32 x09030C06, x09030000, x336622FF, x3A6522FF;
685 u32 x484D494C, x0000B6B3, x0F0FB9BC, x00FC00F9, x0FFFB9FD;
686 u32 x5DF75DF7, x116600F7, x1E69B94B, x1668B94B;
687 u32 x7B7B7B7B, x411E5984, x1FFFFDFD, x5EE1A479;
688 u32 x3CB4DFD2, x004B002D, xB7B2B6B3, xCCC9CDC8, xCC82CDE5;
689 u32 x0055EEBB, x5A5AECE9, x0050ECA9, xC5CAC1CE, xC59A2D67;
690 u32 x00, x01, x10, x11, x20, x21, x30, x31;
695 x11115555 = a1 & x3333FFFF;
696 x22DD6699 = x33CC33CC ^ x11115555;
697 x22DD9966 = a6 ^ x22DD6699;
698 x00220099 = a5 & ~x22DD9966;
700 x00551144 = a1 & x22DD9966;
701 x33662277 = a2 ^ x00551144;
703 x7B7E7A7F = x33662277 | x5A5A5A5A;
704 x59A31CE6 = x22DD6699 ^ x7B7E7A7F;
706 x09030C06 = a3 & x59A31CE6;
707 x09030000 = x09030C06 & ~a6;
708 x336622FF = x00220099 | x33662277;
709 x3A6522FF = x09030000 ^ x336622FF;
710 x30 = x3A6522FF & a4;
711 x31 = x30 ^ x59A31CE6;
714 x484D494C = a2 ^ x7B7E7A7F;
715 x0000B6B3 = a6 & ~x484D494C;
716 x0F0FB9BC = a3 ^ x0000B6B3;
717 x00FC00F9 = a5 & ~x09030C06;
718 x0FFFB9FD = x0F0FB9BC | x00FC00F9;
720 x5DF75DF7 = a1 | x59A31CE6;
721 x116600F7 = x336622FF & x5DF75DF7;
722 x1E69B94B = x0F0FB9BC ^ x116600F7;
723 x1668B94B = x1E69B94B & ~x09030000;
724 x20 = x00220099 | a4;
725 x21 = x20 ^ x1668B94B;
728 x7B7B7B7B = a2 | x5A5A5A5A;
729 x411E5984 = x3A6522FF ^ x7B7B7B7B;
730 x1FFFFDFD = x11115555 | x0FFFB9FD;
731 x5EE1A479 = x411E5984 ^ x1FFFFDFD;
733 x3CB4DFD2 = x22DD6699 ^ x1E69B94B;
734 x004B002D = a5 & ~x3CB4DFD2;
735 xB7B2B6B3 = ~x484D494C;
736 xCCC9CDC8 = x7B7B7B7B ^ xB7B2B6B3;
737 xCC82CDE5 = x004B002D ^ xCCC9CDC8;
738 x10 = xCC82CDE5 & ~a4;
739 x11 = x10 ^ x5EE1A479;
742 x0055EEBB = a6 ^ x00551144;
743 x5A5AECE9 = a1 ^ x0F0FB9BC;
744 x0050ECA9 = x0055EEBB & x5A5AECE9;
745 xC5CAC1CE = x09030C06 ^ xCCC9CDC8;
746 xC59A2D67 = x0050ECA9 ^ xC5CAC1CE;
747 x00 = x0FFFB9FD & ~a4;
748 x01 = x00 ^ xC59A2D67;
752 void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
754 u32 x0FF00FF0, x3CC33CC3, x00003CC3, x0F000F00, x5A555A55, x00001841;
755 u32 x00000F00, x33333C33, x7B777E77, x0FF0F00F, x74878E78;
756 u32 x003C003C, x5A7D5A7D, x333300F0, x694E5A8D;
757 u32 x0FF0CCCC, x000F0303, x5A505854, x33CC000F, x699C585B;
758 u32 x7F878F78, x21101013, x7F979F7B, x30030CC0, x4F9493BB;
759 u32 x6F9CDBFB, x0000DBFB, x00005151, x26DAC936, x26DA9867;
760 u32 x27DA9877, x27DA438C, x2625C9C9, x27FFCBCD;
761 u32 x27FF1036, x27FF103E, xB06B6C44, x97947C7A;
762 u32 x00, x01, x10, x11, x20, x21, x30, x31;
765 x3CC33CC3 = a3 ^ x0FF00FF0;
766 x00003CC3 = a6 & x3CC33CC3;
767 x0F000F00 = a4 & x0FF00FF0;
768 x5A555A55 = a2 ^ x0F000F00;
769 x00001841 = x00003CC3 & x5A555A55;
771 x00000F00 = a6 & x0F000F00;
772 x33333C33 = a3 ^ x00000F00;
773 x7B777E77 = x5A555A55 | x33333C33;
774 x0FF0F00F = a6 ^ x0FF00FF0;
775 x74878E78 = x7B777E77 ^ x0FF0F00F;
776 x30 = a1 & ~x00001841;
777 x31 = x30 ^ x74878E78;
780 x003C003C = a5 & ~x3CC33CC3;
781 x5A7D5A7D = x5A555A55 | x003C003C;
782 x333300F0 = x00003CC3 ^ x33333C33;
783 x694E5A8D = x5A7D5A7D ^ x333300F0;
785 x0FF0CCCC = x00003CC3 ^ x0FF0F00F;
786 x000F0303 = a4 & ~x0FF0CCCC;
787 x5A505854 = x5A555A55 & ~x000F0303;
788 x33CC000F = a5 ^ x333300F0;
789 x699C585B = x5A505854 ^ x33CC000F;
791 x7F878F78 = x0F000F00 | x74878E78;
792 x21101013 = a3 & x699C585B;
793 x7F979F7B = x7F878F78 | x21101013;
794 x30030CC0 = x3CC33CC3 & ~x0FF0F00F;
795 x4F9493BB = x7F979F7B ^ x30030CC0;
796 x00 = x4F9493BB & ~a1;
797 x01 = x00 ^ x694E5A8D;
800 x6F9CDBFB = x699C585B | x4F9493BB;
801 x0000DBFB = a6 & x6F9CDBFB;
802 x00005151 = a2 & x0000DBFB;
803 x26DAC936 = x694E5A8D ^ x4F9493BB;
804 x26DA9867 = x00005151 ^ x26DAC936;
806 x27DA9877 = x21101013 | x26DA9867;
807 x27DA438C = x0000DBFB ^ x27DA9877;
808 x2625C9C9 = a5 ^ x26DAC936;
809 x27FFCBCD = x27DA438C | x2625C9C9;
810 x20 = x27FFCBCD & a1;
811 x21 = x20 ^ x699C585B;
814 x27FF1036 = x0000DBFB ^ x27FFCBCD;
815 x27FF103E = x003C003C | x27FF1036;
816 xB06B6C44 = ~x4F9493BB;
817 x97947C7A = x27FF103E ^ xB06B6C44;
818 x10 = x97947C7A & ~a1;
819 x11 = x10 ^ x26DA9867;
823 void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
825 u32 x0C0C0C0C, x0000F0F0, x00FFF00F, x00555005, x00515001;
826 u32 x33000330, x77555775, x30303030, x3030CFCF, x30104745, x30555745;
827 u32 xFF000FF0, xCF1048B5, x080A080A, xC71A40BF, xCB164CB3;
828 u32 x9E4319E6, x000019E6, xF429738C, xF4296A6A, xC729695A;
829 u32 xC47C3D2F, xF77F3F3F, x9E43E619, x693CD926;
830 u32 xF719A695, xF4FF73FF, x03E6D56A, x56B3803F;
831 u32 xF700A600, x61008000, x03B7856B, x62B7056B;
832 u32 x00, x01, x10, x11, x20, x21, x30, x31;
834 x0C0C0C0C = a3 & ~a2;
835 x0000F0F0 = a5 & ~a3;
836 x00FFF00F = a4 ^ x0000F0F0;
837 x00555005 = a1 & x00FFF00F;
838 x00515001 = x00555005 & ~x0C0C0C0C;
840 x33000330 = a2 & ~x00FFF00F;
841 x77555775 = a1 | x33000330;
842 x30303030 = a2 & ~a3;
843 x3030CFCF = a5 ^ x30303030;
844 x30104745 = x77555775 & x3030CFCF;
845 x30555745 = x00555005 | x30104745;
847 xFF000FF0 = ~x00FFF00F;
848 xCF1048B5 = x30104745 ^ xFF000FF0;
849 x080A080A = a3 & ~x77555775;
850 xC71A40BF = xCF1048B5 ^ x080A080A;
851 xCB164CB3 = x0C0C0C0C ^ xC71A40BF;
852 x10 = x00515001 | a6;
853 x11 = x10 ^ xCB164CB3;
856 x9E4319E6 = a1 ^ xCB164CB3;
857 x000019E6 = a5 & x9E4319E6;
858 xF429738C = a2 ^ xC71A40BF;
859 xF4296A6A = x000019E6 ^ xF429738C;
860 xC729695A = x33000330 ^ xF4296A6A;
862 xC47C3D2F = x30555745 ^ xF4296A6A;
863 xF77F3F3F = a2 | xC47C3D2F;
864 x9E43E619 = a5 ^ x9E4319E6;
865 x693CD926 = xF77F3F3F ^ x9E43E619;
866 x20 = x30555745 & a6;
867 x21 = x20 ^ x693CD926;
870 xF719A695 = x3030CFCF ^ xC729695A;
871 xF4FF73FF = a4 | xF429738C;
872 x03E6D56A = xF719A695 ^ xF4FF73FF;
873 x56B3803F = a1 ^ x03E6D56A;
874 x30 = x56B3803F & a6;
875 x31 = x30 ^ xC729695A;
878 xF700A600 = xF719A695 & ~a4;
879 x61008000 = x693CD926 & xF700A600;
880 x03B7856B = x00515001 ^ x03E6D56A;
881 x62B7056B = x61008000 ^ x03B7856B;
882 x00 = x62B7056B | a6;
883 x01 = x00 ^ xC729695A;
890 #if defined IS_AMD || defined IS_GENERIC
893 * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC
894 * architectures. These use AND, OR, XOR, NOT, and AND-NOT gates.
896 * Gate counts: 49 44 46 33 48 46 46 41
899 * Several same-gate-count expressions for each S-box are included (for use on
900 * different CPUs/GPUs).
902 * These Boolean expressions corresponding to DES S-boxes have been generated
903 * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's
904 * John the Ripper password cracker: http://www.openwall.com/john/
905 * Being mathematical formulas, they are not copyrighted and are free for reuse
908 * This file (a specific representation of the S-box expressions, surrounding
909 * logic) is Copyright (c) 2011 by Solar Designer <solar at openwall.com>.
910 * Redistribution and use in source and binary forms, with or without
911 * modification, are permitted. (This is a heavily cut-down "BSD license".)
913 * The effort has been sponsored by Rapid7: http://www.rapid7.com
916 void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
918 u32 x55005500, x5A0F5A0F, x3333FFFF, x66666666, x22226666, x2D2D6969,
920 u32 x00FFFF00, x33CCCC33, x4803120C, x2222FFFF, x6A21EDF3, x4A01CC93;
921 u32 x5555FFFF, x7F75FFFF, x00D20096, x7FA7FF69;
922 u32 x0A0A0000, x0AD80096, x00999900, x0AD99996;
923 u32 x22332233, x257AA5F0, x054885C0, xFAB77A3F, x2221EDF3, xD89697CC;
924 u32 x05B77AC0, x05F77AD6, x36C48529, x6391D07C, xBB0747B0;
925 u32 x4C460000, x4EDF9996, x2D4E49EA, xBBFFFFB0, x96B1B65A;
926 u32 x5AFF5AFF, x52B11215, x4201C010, x10B0D205;
927 u32 x00, x01, x10, x11, x20, x21, x30, x31;
929 x55005500 = a1 & ~a5;
930 x5A0F5A0F = a4 ^ x55005500;
933 x22226666 = x3333FFFF & x66666666;
934 x2D2D6969 = a4 ^ x22226666;
935 x25202160 = x2D2D6969 & ~x5A0F5A0F;
938 x33CCCC33 = a3 ^ x00FFFF00;
939 x4803120C = x5A0F5A0F & ~x33CCCC33;
940 x2222FFFF = a6 | x22226666;
941 x6A21EDF3 = x4803120C ^ x2222FFFF;
942 x4A01CC93 = x6A21EDF3 & ~x25202160;
945 x7F75FFFF = x6A21EDF3 | x5555FFFF;
946 x00D20096 = a5 & ~x2D2D6969;
947 x7FA7FF69 = x7F75FFFF ^ x00D20096;
949 x0A0A0000 = a4 & ~x5555FFFF;
950 x0AD80096 = x00D20096 ^ x0A0A0000;
951 x00999900 = x00FFFF00 & ~x66666666;
952 x0AD99996 = x0AD80096 | x00999900;
954 x22332233 = a3 & ~x55005500;
955 x257AA5F0 = x5A0F5A0F ^ x7F75FFFF;
956 x054885C0 = x257AA5F0 & ~x22332233;
957 xFAB77A3F = ~x054885C0;
958 x2221EDF3 = x3333FFFF & x6A21EDF3;
959 xD89697CC = xFAB77A3F ^ x2221EDF3;
960 x20 = x7FA7FF69 & ~a2;
961 x21 = x20 ^ xD89697CC;
964 x05B77AC0 = x00FFFF00 ^ x054885C0;
965 x05F77AD6 = x00D20096 | x05B77AC0;
966 x36C48529 = x3333FFFF ^ x05F77AD6;
967 x6391D07C = a1 ^ x36C48529;
968 xBB0747B0 = xD89697CC ^ x6391D07C;
969 x00 = x25202160 | a2;
970 x01 = x00 ^ xBB0747B0;
973 x4C460000 = x3333FFFF ^ x7F75FFFF;
974 x4EDF9996 = x0AD99996 | x4C460000;
975 x2D4E49EA = x6391D07C ^ x4EDF9996;
976 xBBFFFFB0 = x00FFFF00 | xBB0747B0;
977 x96B1B65A = x2D4E49EA ^ xBBFFFFB0;
978 x10 = x4A01CC93 | a2;
979 x11 = x10 ^ x96B1B65A;
982 x5AFF5AFF = a5 | x5A0F5A0F;
983 x52B11215 = x5AFF5AFF & ~x2D4E49EA;
984 x4201C010 = x4A01CC93 & x6391D07C;
985 x10B0D205 = x52B11215 ^ x4201C010;
986 x30 = x10B0D205 | a2;
987 x31 = x30 ^ x0AD99996;
991 void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
994 u32 x55550000, x00AA00FF, x33BB33FF;
995 u32 x33CC0000, x11441144, x11BB11BB, x003311BB;
996 u32 x00000F0F, x336600FF, x332200FF, x332200F0;
997 u32 x0302000F, xAAAAAAAA, xA9A8AAA5, x33CCCC33, x33CCC030, x9A646A95;
998 u32 x00333303, x118822B8, xA8208805, x3CC3C33C, x94E34B39;
999 u32 x0331330C, x3FF3F33C, xA9DF596A, xA9DF5F6F, x962CAC53;
1000 u32 xA9466A6A, x3DA52153, x29850143, x33C0330C, x1A45324F;
1001 u32 x0A451047, xBBDFDD7B, xB19ACD3C;
1002 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1004 x33CC33CC = a2 ^ a5;
1006 x55550000 = a1 & ~a6;
1007 x00AA00FF = a5 & ~x55550000;
1008 x33BB33FF = a2 | x00AA00FF;
1010 x33CC0000 = x33CC33CC & ~a6;
1011 x11441144 = a1 & x33CC33CC;
1012 x11BB11BB = a5 ^ x11441144;
1013 x003311BB = x11BB11BB & ~x33CC0000;
1015 x00000F0F = a3 & a6;
1016 x336600FF = x00AA00FF ^ x33CC0000;
1017 x332200FF = x33BB33FF & x336600FF;
1018 x332200F0 = x332200FF & ~x00000F0F;
1020 x0302000F = a3 & x332200FF;
1022 xA9A8AAA5 = x0302000F ^ xAAAAAAAA;
1023 x33CCCC33 = a6 ^ x33CC33CC;
1024 x33CCC030 = x33CCCC33 & ~x00000F0F;
1025 x9A646A95 = xA9A8AAA5 ^ x33CCC030;
1026 x10 = a4 & ~x332200F0;
1027 x11 = x10 ^ x9A646A95;
1030 x00333303 = a2 & ~x33CCC030;
1031 x118822B8 = x11BB11BB ^ x00333303;
1032 xA8208805 = xA9A8AAA5 & ~x118822B8;
1033 x3CC3C33C = a3 ^ x33CCCC33;
1034 x94E34B39 = xA8208805 ^ x3CC3C33C;
1035 x00 = x33BB33FF & ~a4;
1036 x01 = x00 ^ x94E34B39;
1039 x0331330C = x0302000F ^ x00333303;
1040 x3FF3F33C = x3CC3C33C | x0331330C;
1041 xA9DF596A = x33BB33FF ^ x9A646A95;
1042 xA9DF5F6F = x00000F0F | xA9DF596A;
1043 x962CAC53 = x3FF3F33C ^ xA9DF5F6F;
1045 xA9466A6A = x332200FF ^ x9A646A95;
1046 x3DA52153 = x94E34B39 ^ xA9466A6A;
1047 x29850143 = xA9DF5F6F & x3DA52153;
1048 x33C0330C = x33CC33CC & x3FF3F33C;
1049 x1A45324F = x29850143 ^ x33C0330C;
1050 x20 = x1A45324F | a4;
1051 x21 = x20 ^ x962CAC53;
1054 x0A451047 = x1A45324F & ~x118822B8;
1055 xBBDFDD7B = x33CCCC33 | xA9DF596A;
1056 xB19ACD3C = x0A451047 ^ xBBDFDD7B;
1057 x30 = x003311BB | a4;
1058 x31 = x30 ^ xB19ACD3C;
1062 void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
1064 u32 x44444444, x0F0FF0F0, x4F4FF4F4, x00FFFF00, x00AAAA00, x4FE55EF4;
1065 u32 x3C3CC3C3, x3C3C0000, x7373F4F4, x0C840A00;
1066 u32 x00005EF4, x00FF5EFF, x00555455, x3C699796;
1067 u32 x000FF000, x55AA55AA, x26D9A15E, x2FDFAF5F, x2FD00F5F;
1068 u32 x55AAFFAA, x28410014, x000000FF, x000000CC, x284100D8;
1069 u32 x204100D0, x3C3CC3FF, x1C3CC32F, x4969967A;
1070 u32 x4CC44CC4, x40C040C0, xC3C33C3C, x9669C396, xD6A98356;
1071 u32 xD6E9C3D6, x4CEEEEC4, x9A072D12, x001A000B, x9A1F2D1B;
1072 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1074 x44444444 = a1 & ~a2;
1075 x0F0FF0F0 = a3 ^ a6;
1076 x4F4FF4F4 = x44444444 | x0F0FF0F0;
1077 x00FFFF00 = a4 ^ a6;
1078 x00AAAA00 = x00FFFF00 & ~a1;
1079 x4FE55EF4 = x4F4FF4F4 ^ x00AAAA00;
1081 x3C3CC3C3 = a2 ^ x0F0FF0F0;
1082 x3C3C0000 = x3C3CC3C3 & ~a6;
1083 x7373F4F4 = x4F4FF4F4 ^ x3C3C0000;
1084 x0C840A00 = x4FE55EF4 & ~x7373F4F4;
1086 x00005EF4 = a6 & x4FE55EF4;
1087 x00FF5EFF = a4 | x00005EF4;
1088 x00555455 = a1 & x00FF5EFF;
1089 x3C699796 = x3C3CC3C3 ^ x00555455;
1090 x30 = x4FE55EF4 & ~a5;
1091 x31 = x30 ^ x3C699796;
1094 x000FF000 = x0F0FF0F0 & x00FFFF00;
1095 x55AA55AA = a1 ^ a4;
1096 x26D9A15E = x7373F4F4 ^ x55AA55AA;
1097 x2FDFAF5F = a3 | x26D9A15E;
1098 x2FD00F5F = x2FDFAF5F & ~x000FF000;
1100 x55AAFFAA = x00AAAA00 | x55AA55AA;
1101 x28410014 = x3C699796 & ~x55AAFFAA;
1102 x000000FF = a4 & a6;
1103 x000000CC = x000000FF & ~a2;
1104 x284100D8 = x28410014 ^ x000000CC;
1106 x204100D0 = x7373F4F4 & x284100D8;
1107 x3C3CC3FF = x3C3CC3C3 | x000000FF;
1108 x1C3CC32F = x3C3CC3FF & ~x204100D0;
1109 x4969967A = a1 ^ x1C3CC32F;
1110 x10 = x2FD00F5F & a5;
1111 x11 = x10 ^ x4969967A;
1114 x4CC44CC4 = x4FE55EF4 & ~a2;
1115 x40C040C0 = x4CC44CC4 & ~a3;
1116 xC3C33C3C = ~x3C3CC3C3;
1117 x9669C396 = x55AAFFAA ^ xC3C33C3C;
1118 xD6A98356 = x40C040C0 ^ x9669C396;
1119 x00 = a5 & ~x0C840A00;
1120 x01 = x00 ^ xD6A98356;
1123 xD6E9C3D6 = x40C040C0 | x9669C396;
1124 x4CEEEEC4 = x00AAAA00 | x4CC44CC4;
1125 x9A072D12 = xD6E9C3D6 ^ x4CEEEEC4;
1126 x001A000B = a4 & ~x4FE55EF4;
1127 x9A1F2D1B = x9A072D12 | x001A000B;
1128 x20 = a5 & ~x284100D8;
1129 x21 = x20 ^ x9A1F2D1B;
1133 void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
1135 u32 x5A5A5A5A, x0F0FF0F0;
1136 u32 x33FF33FF, x33FFCC00, x0C0030F0, x0C0CC0C0, x0CF3C03F, x5EFBDA7F,
1137 x52FBCA0F, x61C8F93C;
1138 u32 x00C0C03C, x0F0F30C0, x3B92A366, x30908326, x3C90B3D6;
1139 u32 x33CC33CC, x0C0CFFFF, x379E5C99, x04124C11, x56E9861E, xA91679E1;
1140 u32 x9586CA37, x8402C833, x84C2C83F, xB35C94A6;
1141 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1143 x5A5A5A5A = a1 ^ a3;
1144 x0F0FF0F0 = a3 ^ a5;
1145 x33FF33FF = a2 | a4;
1146 x33FFCC00 = a5 ^ x33FF33FF;
1147 x0C0030F0 = x0F0FF0F0 & ~x33FFCC00;
1148 x0C0CC0C0 = x0F0FF0F0 & ~a2;
1149 x0CF3C03F = a4 ^ x0C0CC0C0;
1150 x5EFBDA7F = x5A5A5A5A | x0CF3C03F;
1151 x52FBCA0F = x5EFBDA7F & ~x0C0030F0;
1152 x61C8F93C = a2 ^ x52FBCA0F;
1154 x00C0C03C = x0CF3C03F & x61C8F93C;
1155 x0F0F30C0 = x0F0FF0F0 & ~x00C0C03C;
1156 x3B92A366 = x5A5A5A5A ^ x61C8F93C;
1157 x30908326 = x3B92A366 & ~x0F0F30C0;
1158 x3C90B3D6 = x0C0030F0 ^ x30908326;
1160 x33CC33CC = a2 ^ a4;
1161 x0C0CFFFF = a5 | x0C0CC0C0;
1162 x379E5C99 = x3B92A366 ^ x0C0CFFFF;
1163 x04124C11 = x379E5C99 & ~x33CC33CC;
1164 x56E9861E = x52FBCA0F ^ x04124C11;
1165 x00 = a6 & ~x3C90B3D6;
1166 x01 = x00 ^ x56E9861E;
1169 xA91679E1 = ~x56E9861E;
1170 x10 = x3C90B3D6 & ~a6;
1171 x11 = x10 ^ xA91679E1;
1174 x9586CA37 = x3C90B3D6 ^ xA91679E1;
1175 x8402C833 = x9586CA37 & ~x33CC33CC;
1176 x84C2C83F = x00C0C03C | x8402C833;
1177 xB35C94A6 = x379E5C99 ^ x84C2C83F;
1178 x20 = x61C8F93C | a6;
1179 x21 = x20 ^ xB35C94A6;
1182 x30 = a6 & x61C8F93C;
1183 x31 = x30 ^ xB35C94A6;
1187 void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
1189 u32 x77777777, x77770000, x22225555, x11116666, x1F1F6F6F;
1190 u32 x70700000, x43433333, x00430033, x55557777, x55167744, x5A19784B;
1191 u32 x5A1987B4, x7A3BD7F5, x003B00F5, x221955A0, x05050707, x271C52A7;
1192 u32 x2A2A82A0, x6969B193, x1FE06F90, x16804E00, xE97FB1FF;
1193 u32 x43403302, x35CAED30, x37DEFFB7, x349ECCB5, x0B01234A;
1194 u32 x101884B4, x0FF8EB24, x41413333, x4FF9FB37, x4FC2FBC2;
1195 u32 x22222222, x16BCEE97, x0F080B04, x19B4E593;
1196 u32 x5C5C5C5C, x4448184C, x2DDABE71, x6992A63D;
1197 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1199 x77777777 = a1 | a3;
1200 x77770000 = x77777777 & ~a6;
1201 x22225555 = a1 ^ x77770000;
1202 x11116666 = a3 ^ x22225555;
1203 x1F1F6F6F = a4 | x11116666;
1205 x70700000 = x77770000 & ~a4;
1206 x43433333 = a3 ^ x70700000;
1207 x00430033 = a5 & x43433333;
1208 x55557777 = a1 | x11116666;
1209 x55167744 = x00430033 ^ x55557777;
1210 x5A19784B = a4 ^ x55167744;
1212 x5A1987B4 = a6 ^ x5A19784B;
1213 x7A3BD7F5 = x22225555 | x5A1987B4;
1214 x003B00F5 = a5 & x7A3BD7F5;
1215 x221955A0 = x22225555 ^ x003B00F5;
1216 x05050707 = a4 & x55557777;
1217 x271C52A7 = x221955A0 ^ x05050707;
1219 x2A2A82A0 = x7A3BD7F5 & ~a1;
1220 x6969B193 = x43433333 ^ x2A2A82A0;
1221 x1FE06F90 = a5 ^ x1F1F6F6F;
1222 x16804E00 = x1FE06F90 & ~x6969B193;
1223 xE97FB1FF = ~x16804E00;
1224 x20 = xE97FB1FF & ~a2;
1225 x21 = x20 ^ x5A19784B;
1228 x43403302 = x43433333 & ~x003B00F5;
1229 x35CAED30 = x2A2A82A0 ^ x1FE06F90;
1230 x37DEFFB7 = x271C52A7 | x35CAED30;
1231 x349ECCB5 = x37DEFFB7 & ~x43403302;
1232 x0B01234A = x1F1F6F6F & ~x349ECCB5;
1234 x101884B4 = x5A1987B4 & x349ECCB5;
1235 x0FF8EB24 = x1FE06F90 ^ x101884B4;
1236 x41413333 = x43433333 & x55557777;
1237 x4FF9FB37 = x0FF8EB24 | x41413333;
1238 x4FC2FBC2 = x003B00F5 ^ x4FF9FB37;
1239 x30 = x4FC2FBC2 & a2;
1240 x31 = x30 ^ x271C52A7;
1243 x22222222 = a1 ^ x77777777;
1244 x16BCEE97 = x349ECCB5 ^ x22222222;
1245 x0F080B04 = a4 & x0FF8EB24;
1246 x19B4E593 = x16BCEE97 ^ x0F080B04;
1247 x00 = x0B01234A | a2;
1248 x01 = x00 ^ x19B4E593;
1251 x5C5C5C5C = x1F1F6F6F ^ x43433333;
1252 x4448184C = x5C5C5C5C & ~x19B4E593;
1253 x2DDABE71 = x22225555 ^ x0FF8EB24;
1254 x6992A63D = x4448184C ^ x2DDABE71;
1255 x10 = x1F1F6F6F & a2;
1256 x11 = x10 ^ x6992A63D;
1260 void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
1263 u32 x3333FFFF, x11115555, x22DD6699, x22DD9966, x00220099;
1264 u32 x00551144, x33662277, x5A5A5A5A, x7B7E7A7F, x59A31CE6;
1265 u32 x09030C06, x09030000, x336622FF, x3A6522FF;
1266 u32 x484D494C, x0000B6B3, x0F0FB9BC, x00FC00F9, x0FFFB9FD;
1267 u32 x5DF75DF7, x116600F7, x1E69B94B, x1668B94B;
1268 u32 x7B7B7B7B, x411E5984, x1FFFFDFD, x5EE1A479;
1269 u32 x3CB4DFD2, x004B002D, xB7B2B6B3, xCCC9CDC8, xCC82CDE5;
1270 u32 x0055EEBB, x5A5AECE9, x0050ECA9, xC5CAC1CE, xC59A2D67;
1271 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1273 x33CC33CC = a2 ^ a5;
1275 x3333FFFF = a2 | a6;
1276 x11115555 = a1 & x3333FFFF;
1277 x22DD6699 = x33CC33CC ^ x11115555;
1278 x22DD9966 = a6 ^ x22DD6699;
1279 x00220099 = a5 & ~x22DD9966;
1281 x00551144 = a1 & x22DD9966;
1282 x33662277 = a2 ^ x00551144;
1283 x5A5A5A5A = a1 ^ a3;
1284 x7B7E7A7F = x33662277 | x5A5A5A5A;
1285 x59A31CE6 = x22DD6699 ^ x7B7E7A7F;
1287 x09030C06 = a3 & x59A31CE6;
1288 x09030000 = x09030C06 & ~a6;
1289 x336622FF = x00220099 | x33662277;
1290 x3A6522FF = x09030000 ^ x336622FF;
1291 x30 = x3A6522FF & a4;
1292 x31 = x30 ^ x59A31CE6;
1295 x484D494C = a2 ^ x7B7E7A7F;
1296 x0000B6B3 = a6 & ~x484D494C;
1297 x0F0FB9BC = a3 ^ x0000B6B3;
1298 x00FC00F9 = a5 & ~x09030C06;
1299 x0FFFB9FD = x0F0FB9BC | x00FC00F9;
1301 x5DF75DF7 = a1 | x59A31CE6;
1302 x116600F7 = x336622FF & x5DF75DF7;
1303 x1E69B94B = x0F0FB9BC ^ x116600F7;
1304 x1668B94B = x1E69B94B & ~x09030000;
1305 x20 = x00220099 | a4;
1306 x21 = x20 ^ x1668B94B;
1309 x7B7B7B7B = a2 | x5A5A5A5A;
1310 x411E5984 = x3A6522FF ^ x7B7B7B7B;
1311 x1FFFFDFD = x11115555 | x0FFFB9FD;
1312 x5EE1A479 = x411E5984 ^ x1FFFFDFD;
1314 x3CB4DFD2 = x22DD6699 ^ x1E69B94B;
1315 x004B002D = a5 & ~x3CB4DFD2;
1316 xB7B2B6B3 = ~x484D494C;
1317 xCCC9CDC8 = x7B7B7B7B ^ xB7B2B6B3;
1318 xCC82CDE5 = x004B002D ^ xCCC9CDC8;
1319 x10 = xCC82CDE5 & ~a4;
1320 x11 = x10 ^ x5EE1A479;
1323 x0055EEBB = a6 ^ x00551144;
1324 x5A5AECE9 = a1 ^ x0F0FB9BC;
1325 x0050ECA9 = x0055EEBB & x5A5AECE9;
1326 xC5CAC1CE = x09030C06 ^ xCCC9CDC8;
1327 xC59A2D67 = x0050ECA9 ^ xC5CAC1CE;
1328 x00 = x0FFFB9FD & ~a4;
1329 x01 = x00 ^ xC59A2D67;
1333 void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
1335 u32 x0FF00FF0, x3CC33CC3, x00003CC3, x0F000F00, x5A555A55, x00001841;
1336 u32 x00000F00, x33333C33, x7B777E77, x0FF0F00F, x74878E78;
1337 u32 x003C003C, x5A7D5A7D, x333300F0, x694E5A8D;
1338 u32 x0FF0CCCC, x000F0303, x5A505854, x33CC000F, x699C585B;
1339 u32 x7F878F78, x21101013, x7F979F7B, x30030CC0, x4F9493BB;
1340 u32 x6F9CDBFB, x0000DBFB, x00005151, x26DAC936, x26DA9867;
1341 u32 x27DA9877, x27DA438C, x2625C9C9, x27FFCBCD;
1342 u32 x27FF1036, x27FF103E, xB06B6C44, x97947C7A;
1343 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1345 x0FF00FF0 = a4 ^ a5;
1346 x3CC33CC3 = a3 ^ x0FF00FF0;
1347 x00003CC3 = a6 & x3CC33CC3;
1348 x0F000F00 = a4 & x0FF00FF0;
1349 x5A555A55 = a2 ^ x0F000F00;
1350 x00001841 = x00003CC3 & x5A555A55;
1352 x00000F00 = a6 & x0F000F00;
1353 x33333C33 = a3 ^ x00000F00;
1354 x7B777E77 = x5A555A55 | x33333C33;
1355 x0FF0F00F = a6 ^ x0FF00FF0;
1356 x74878E78 = x7B777E77 ^ x0FF0F00F;
1357 x30 = a1 & ~x00001841;
1358 x31 = x30 ^ x74878E78;
1361 x003C003C = a5 & ~x3CC33CC3;
1362 x5A7D5A7D = x5A555A55 | x003C003C;
1363 x333300F0 = x00003CC3 ^ x33333C33;
1364 x694E5A8D = x5A7D5A7D ^ x333300F0;
1366 x0FF0CCCC = x00003CC3 ^ x0FF0F00F;
1367 x000F0303 = a4 & ~x0FF0CCCC;
1368 x5A505854 = x5A555A55 & ~x000F0303;
1369 x33CC000F = a5 ^ x333300F0;
1370 x699C585B = x5A505854 ^ x33CC000F;
1372 x7F878F78 = x0F000F00 | x74878E78;
1373 x21101013 = a3 & x699C585B;
1374 x7F979F7B = x7F878F78 | x21101013;
1375 x30030CC0 = x3CC33CC3 & ~x0FF0F00F;
1376 x4F9493BB = x7F979F7B ^ x30030CC0;
1377 x00 = x4F9493BB & ~a1;
1378 x01 = x00 ^ x694E5A8D;
1381 x6F9CDBFB = x699C585B | x4F9493BB;
1382 x0000DBFB = a6 & x6F9CDBFB;
1383 x00005151 = a2 & x0000DBFB;
1384 x26DAC936 = x694E5A8D ^ x4F9493BB;
1385 x26DA9867 = x00005151 ^ x26DAC936;
1387 x27DA9877 = x21101013 | x26DA9867;
1388 x27DA438C = x0000DBFB ^ x27DA9877;
1389 x2625C9C9 = a5 ^ x26DAC936;
1390 x27FFCBCD = x27DA438C | x2625C9C9;
1391 x20 = x27FFCBCD & a1;
1392 x21 = x20 ^ x699C585B;
1395 x27FF1036 = x0000DBFB ^ x27FFCBCD;
1396 x27FF103E = x003C003C | x27FF1036;
1397 xB06B6C44 = ~x4F9493BB;
1398 x97947C7A = x27FF103E ^ xB06B6C44;
1399 x10 = x97947C7A & ~a1;
1400 x11 = x10 ^ x26DA9867;
1404 void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
1406 u32 x0C0C0C0C, x0000F0F0, x00FFF00F, x00555005, x00515001;
1407 u32 x33000330, x77555775, x30303030, x3030CFCF, x30104745, x30555745;
1408 u32 xFF000FF0, xCF1048B5, x080A080A, xC71A40BF, xCB164CB3;
1409 u32 x9E4319E6, x000019E6, xF429738C, xF4296A6A, xC729695A;
1410 u32 xC47C3D2F, xF77F3F3F, x9E43E619, x693CD926;
1411 u32 xF719A695, xF4FF73FF, x03E6D56A, x56B3803F;
1412 u32 xF700A600, x61008000, x03B7856B, x62B7056B;
1413 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1415 x0C0C0C0C = a3 & ~a2;
1416 x0000F0F0 = a5 & ~a3;
1417 x00FFF00F = a4 ^ x0000F0F0;
1418 x00555005 = a1 & x00FFF00F;
1419 x00515001 = x00555005 & ~x0C0C0C0C;
1421 x33000330 = a2 & ~x00FFF00F;
1422 x77555775 = a1 | x33000330;
1423 x30303030 = a2 & ~a3;
1424 x3030CFCF = a5 ^ x30303030;
1425 x30104745 = x77555775 & x3030CFCF;
1426 x30555745 = x00555005 | x30104745;
1428 xFF000FF0 = ~x00FFF00F;
1429 xCF1048B5 = x30104745 ^ xFF000FF0;
1430 x080A080A = a3 & ~x77555775;
1431 xC71A40BF = xCF1048B5 ^ x080A080A;
1432 xCB164CB3 = x0C0C0C0C ^ xC71A40BF;
1433 x10 = x00515001 | a6;
1434 x11 = x10 ^ xCB164CB3;
1437 x9E4319E6 = a1 ^ xCB164CB3;
1438 x000019E6 = a5 & x9E4319E6;
1439 xF429738C = a2 ^ xC71A40BF;
1440 xF4296A6A = x000019E6 ^ xF429738C;
1441 xC729695A = x33000330 ^ xF4296A6A;
1443 xC47C3D2F = x30555745 ^ xF4296A6A;
1444 xF77F3F3F = a2 | xC47C3D2F;
1445 x9E43E619 = a5 ^ x9E4319E6;
1446 x693CD926 = xF77F3F3F ^ x9E43E619;
1447 x20 = x30555745 & a6;
1448 x21 = x20 ^ x693CD926;
1451 xF719A695 = x3030CFCF ^ xC729695A;
1452 xF4FF73FF = a4 | xF429738C;
1453 x03E6D56A = xF719A695 ^ xF4FF73FF;
1454 x56B3803F = a1 ^ x03E6D56A;
1455 x30 = x56B3803F & a6;
1456 x31 = x30 ^ xC729695A;
1459 xF700A600 = xF719A695 & ~a4;
1460 x61008000 = x693CD926 & xF700A600;
1461 x03B7856B = x00515001 ^ x03E6D56A;
1462 x62B7056B = x61008000 ^ x03B7856B;
1463 x00 = x62B7056B | a6;
1464 x01 = x00 ^ xC729695A;
1470 #define SWAP(a, b) { u32 tmp=*a;*a=*b;*b=tmp; }
1506 #define KEYSET00 { k00 = K08; k01 = K44; k02 = K29; k03 = K52; k04 = K42; k05 = K14; k06 = K28; k07 = K49; k08 = K01; k09 = K07; k10 = K16; k11 = K36; k12 = K02; k13 = K30; k14 = K22; k15 = K21; k16 = K38; k17 = K50; k18 = K51; k19 = K00; k20 = K31; k21 = K23; k22 = K15; k23 = K35; k24 = K19; k25 = K24; k26 = K34; k27 = K47; k28 = K32; k29 = K03; k30 = K41; k31 = K26; k32 = K04; k33 = K46; k34 = K20; k35 = K25; k36 = K53; k37 = K18; k38 = K33; k39 = K55; k40 = K13; k41 = K17; k42 = K39; k43 = K12; k44 = K11; k45 = K54; k46 = K48; k47 = K27; }
1507 #define KEYSET10 { k00 = K49; k01 = K28; k02 = K45; k03 = K36; k04 = K01; k05 = K30; k06 = K44; k07 = K08; k08 = K42; k09 = K23; k10 = K00; k11 = K52; k12 = K43; k13 = K14; k14 = K38; k15 = K37; k16 = K22; k17 = K09; k18 = K35; k19 = K16; k20 = K15; k21 = K07; k22 = K31; k23 = K51; k24 = K03; k25 = K40; k26 = K46; k27 = K04; k28 = K20; k29 = K19; k30 = K53; k31 = K10; k32 = K47; k33 = K34; k34 = K32; k35 = K13; k36 = K41; k37 = K06; k38 = K17; k39 = K12; k40 = K25; k41 = K33; k42 = K27; k43 = K55; k44 = K54; k45 = K11; k46 = K05; k47 = K39; }
1508 #define KEYSET01 { k00 = K01; k01 = K37; k02 = K22; k03 = K45; k04 = K35; k05 = K07; k06 = K21; k07 = K42; k08 = K51; k09 = K00; k10 = K09; k11 = K29; k12 = K52; k13 = K23; k14 = K15; k15 = K14; k16 = K31; k17 = K43; k18 = K44; k19 = K50; k20 = K49; k21 = K16; k22 = K08; k23 = K28; k24 = K12; k25 = K17; k26 = K27; k27 = K40; k28 = K25; k29 = K55; k30 = K34; k31 = K19; k32 = K24; k33 = K39; k34 = K13; k35 = K18; k36 = K46; k37 = K11; k38 = K26; k39 = K48; k40 = K06; k41 = K10; k42 = K32; k43 = K05; k44 = K04; k45 = K47; k46 = K41; k47 = K20; }
1509 #define KEYSET11 { k00 = K35; k01 = K14; k02 = K31; k03 = K22; k04 = K44; k05 = K16; k06 = K30; k07 = K51; k08 = K28; k09 = K09; k10 = K43; k11 = K38; k12 = K29; k13 = K00; k14 = K49; k15 = K23; k16 = K08; k17 = K52; k18 = K21; k19 = K02; k20 = K01; k21 = K50; k22 = K42; k23 = K37; k24 = K48; k25 = K26; k26 = K32; k27 = K17; k28 = K06; k29 = K05; k30 = K39; k31 = K55; k32 = K33; k33 = K20; k34 = K18; k35 = K54; k36 = K27; k37 = K47; k38 = K03; k39 = K53; k40 = K11; k41 = K19; k42 = K13; k43 = K41; k44 = K40; k45 = K24; k46 = K46; k47 = K25; }
1510 #define KEYSET02 { k00 = K44; k01 = K23; k02 = K08; k03 = K31; k04 = K21; k05 = K50; k06 = K07; k07 = K28; k08 = K37; k09 = K43; k10 = K52; k11 = K15; k12 = K38; k13 = K09; k14 = K01; k15 = K00; k16 = K42; k17 = K29; k18 = K30; k19 = K36; k20 = K35; k21 = K02; k22 = K51; k23 = K14; k24 = K53; k25 = K03; k26 = K13; k27 = K26; k28 = K11; k29 = K41; k30 = K20; k31 = K05; k32 = K10; k33 = K25; k34 = K54; k35 = K04; k36 = K32; k37 = K24; k38 = K12; k39 = K34; k40 = K47; k41 = K55; k42 = K18; k43 = K46; k44 = K17; k45 = K33; k46 = K27; k47 = K06; }
1511 #define KEYSET12 { k00 = K21; k01 = K00; k02 = K42; k03 = K08; k04 = K30; k05 = K02; k06 = K16; k07 = K37; k08 = K14; k09 = K52; k10 = K29; k11 = K49; k12 = K15; k13 = K43; k14 = K35; k15 = K09; k16 = K51; k17 = K38; k18 = K07; k19 = K45; k20 = K44; k21 = K36; k22 = K28; k23 = K23; k24 = K34; k25 = K12; k26 = K18; k27 = K03; k28 = K47; k29 = K46; k30 = K25; k31 = K41; k32 = K19; k33 = K06; k34 = K04; k35 = K40; k36 = K13; k37 = K33; k38 = K48; k39 = K39; k40 = K24; k41 = K05; k42 = K54; k43 = K27; k44 = K26; k45 = K10; k46 = K32; k47 = K11; }
1512 #define KEYSET03 { k00 = K30; k01 = K09; k02 = K51; k03 = K42; k04 = K07; k05 = K36; k06 = K50; k07 = K14; k08 = K23; k09 = K29; k10 = K38; k11 = K01; k12 = K49; k13 = K52; k14 = K44; k15 = K43; k16 = K28; k17 = K15; k18 = K16; k19 = K22; k20 = K21; k21 = K45; k22 = K37; k23 = K00; k24 = K39; k25 = K48; k26 = K54; k27 = K12; k28 = K24; k29 = K27; k30 = K06; k31 = K46; k32 = K55; k33 = K11; k34 = K40; k35 = K17; k36 = K18; k37 = K10; k38 = K53; k39 = K20; k40 = K33; k41 = K41; k42 = K04; k43 = K32; k44 = K03; k45 = K19; k46 = K13; k47 = K47; }
1513 #define KEYSET13 { k00 = K07; k01 = K43; k02 = K28; k03 = K51; k04 = K16; k05 = K45; k06 = K02; k07 = K23; k08 = K00; k09 = K38; k10 = K15; k11 = K35; k12 = K01; k13 = K29; k14 = K21; k15 = K52; k16 = K37; k17 = K49; k18 = K50; k19 = K31; k20 = K30; k21 = K22; k22 = K14; k23 = K09; k24 = K20; k25 = K53; k26 = K04; k27 = K48; k28 = K33; k29 = K32; k30 = K11; k31 = K27; k32 = K05; k33 = K47; k34 = K17; k35 = K26; k36 = K54; k37 = K19; k38 = K34; k39 = K25; k40 = K10; k41 = K46; k42 = K40; k43 = K13; k44 = K12; k45 = K55; k46 = K18; k47 = K24; }
1514 #define KEYSET04 { k00 = K16; k01 = K52; k02 = K37; k03 = K28; k04 = K50; k05 = K22; k06 = K36; k07 = K00; k08 = K09; k09 = K15; k10 = K49; k11 = K44; k12 = K35; k13 = K38; k14 = K30; k15 = K29; k16 = K14; k17 = K01; k18 = K02; k19 = K08; k20 = K07; k21 = K31; k22 = K23; k23 = K43; k24 = K25; k25 = K34; k26 = K40; k27 = K53; k28 = K10; k29 = K13; k30 = K47; k31 = K32; k32 = K41; k33 = K24; k34 = K26; k35 = K03; k36 = K04; k37 = K55; k38 = K39; k39 = K06; k40 = K19; k41 = K27; k42 = K17; k43 = K18; k44 = K48; k45 = K05; k46 = K54; k47 = K33; }
1515 #define KEYSET14 { k00 = K50; k01 = K29; k02 = K14; k03 = K37; k04 = K02; k05 = K31; k06 = K45; k07 = K09; k08 = K43; k09 = K49; k10 = K01; k11 = K21; k12 = K44; k13 = K15; k14 = K07; k15 = K38; k16 = K23; k17 = K35; k18 = K36; k19 = K42; k20 = K16; k21 = K08; k22 = K00; k23 = K52; k24 = K06; k25 = K39; k26 = K17; k27 = K34; k28 = K19; k29 = K18; k30 = K24; k31 = K13; k32 = K46; k33 = K33; k34 = K03; k35 = K12; k36 = K40; k37 = K05; k38 = K20; k39 = K11; k40 = K55; k41 = K32; k42 = K26; k43 = K54; k44 = K53; k45 = K41; k46 = K04; k47 = K10; }
1516 #define KEYSET05 { k00 = K02; k01 = K38; k02 = K23; k03 = K14; k04 = K36; k05 = K08; k06 = K22; k07 = K43; k08 = K52; k09 = K01; k10 = K35; k11 = K30; k12 = K21; k13 = K49; k14 = K16; k15 = K15; k16 = K00; k17 = K44; k18 = K45; k19 = K51; k20 = K50; k21 = K42; k22 = K09; k23 = K29; k24 = K11; k25 = K20; k26 = K26; k27 = K39; k28 = K55; k29 = K54; k30 = K33; k31 = K18; k32 = K27; k33 = K10; k34 = K12; k35 = K48; k36 = K17; k37 = K41; k38 = K25; k39 = K47; k40 = K05; k41 = K13; k42 = K03; k43 = K04; k44 = K34; k45 = K46; k46 = K40; k47 = K19; }
1517 #define KEYSET15 { k00 = K36; k01 = K15; k02 = K00; k03 = K23; k04 = K45; k05 = K42; k06 = K31; k07 = K52; k08 = K29; k09 = K35; k10 = K44; k11 = K07; k12 = K30; k13 = K01; k14 = K50; k15 = K49; k16 = K09; k17 = K21; k18 = K22; k19 = K28; k20 = K02; k21 = K51; k22 = K43; k23 = K38; k24 = K47; k25 = K25; k26 = K03; k27 = K20; k28 = K05; k29 = K04; k30 = K10; k31 = K54; k32 = K32; k33 = K19; k34 = K48; k35 = K53; k36 = K26; k37 = K46; k38 = K06; k39 = K24; k40 = K41; k41 = K18; k42 = K12; k43 = K40; k44 = K39; k45 = K27; k46 = K17; k47 = K55; }
1518 #define KEYSET06 { k00 = K45; k01 = K49; k02 = K09; k03 = K00; k04 = K22; k05 = K51; k06 = K08; k07 = K29; k08 = K38; k09 = K44; k10 = K21; k11 = K16; k12 = K07; k13 = K35; k14 = K02; k15 = K01; k16 = K43; k17 = K30; k18 = K31; k19 = K37; k20 = K36; k21 = K28; k22 = K52; k23 = K15; k24 = K24; k25 = K06; k26 = K12; k27 = K25; k28 = K41; k29 = K40; k30 = K19; k31 = K04; k32 = K13; k33 = K55; k34 = K53; k35 = K34; k36 = K03; k37 = K27; k38 = K11; k39 = K33; k40 = K46; k41 = K54; k42 = K48; k43 = K17; k44 = K20; k45 = K32; k46 = K26; k47 = K05; }
1519 #define KEYSET16 { k00 = K22; k01 = K01; k02 = K43; k03 = K09; k04 = K31; k05 = K28; k06 = K42; k07 = K38; k08 = K15; k09 = K21; k10 = K30; k11 = K50; k12 = K16; k13 = K44; k14 = K36; k15 = K35; k16 = K52; k17 = K07; k18 = K08; k19 = K14; k20 = K45; k21 = K37; k22 = K29; k23 = K49; k24 = K33; k25 = K11; k26 = K48; k27 = K06; k28 = K46; k29 = K17; k30 = K55; k31 = K40; k32 = K18; k33 = K05; k34 = K34; k35 = K39; k36 = K12; k37 = K32; k38 = K47; k39 = K10; k40 = K27; k41 = K04; k42 = K53; k43 = K26; k44 = K25; k45 = K13; k46 = K03; k47 = K41; }
1520 #define KEYSET07 { k00 = K31; k01 = K35; k02 = K52; k03 = K43; k04 = K08; k05 = K37; k06 = K51; k07 = K15; k08 = K49; k09 = K30; k10 = K07; k11 = K02; k12 = K50; k13 = K21; k14 = K45; k15 = K44; k16 = K29; k17 = K16; k18 = K42; k19 = K23; k20 = K22; k21 = K14; k22 = K38; k23 = K01; k24 = K10; k25 = K47; k26 = K53; k27 = K11; k28 = K27; k29 = K26; k30 = K05; k31 = K17; k32 = K54; k33 = K41; k34 = K39; k35 = K20; k36 = K48; k37 = K13; k38 = K24; k39 = K19; k40 = K32; k41 = K40; k42 = K34; k43 = K03; k44 = K06; k45 = K18; k46 = K12; k47 = K46; }
1521 #define KEYSET17 { k00 = K15; k01 = K51; k02 = K36; k03 = K02; k04 = K49; k05 = K21; k06 = K35; k07 = K31; k08 = K08; k09 = K14; k10 = K23; k11 = K43; k12 = K09; k13 = K37; k14 = K29; k15 = K28; k16 = K45; k17 = K00; k18 = K01; k19 = K07; k20 = K38; k21 = K30; k22 = K22; k23 = K42; k24 = K26; k25 = K04; k26 = K41; k27 = K54; k28 = K39; k29 = K10; k30 = K48; k31 = K33; k32 = K11; k33 = K53; k34 = K27; k35 = K32; k36 = K05; k37 = K25; k38 = K40; k39 = K03; k40 = K20; k41 = K24; k42 = K46; k43 = K19; k44 = K18; k45 = K06; k46 = K55; k47 = K34; }
1538 #ifdef DESCRYPT_SALT
1540 void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 *D00, u32 *D01, u32 *D02, u32 *D03, u32 *D04, u32 *D05, u32 *D06, u32 *D07, u32 *D08, u32 *D09, u32 *D10, u32 *D11, u32 *D12, u32 *D13, u32 *D14, u32 *D15, u32 *D16, u32 *D17, u32 *D18, u32 *D19, u32 *D20, u32 *D21, u32 *D22, u32 *D23, u32 *D24, u32 *D25, u32 *D26, u32 *D27, u32 *D28, u32 *D29, u32 *D30, u32 *D31, u32 *D32, u32 *D33, u32 *D34, u32 *D35, u32 *D36, u32 *D37, u32 *D38, u32 *D39, u32 *D40, u32 *D41, u32 *D42, u32 *D43, u32 *D44, u32 *D45, u32 *D46, u32 *D47, u32 *D48, u32 *D49, u32 *D50, u32 *D51, u32 *D52, u32 *D53, u32 *D54, u32 *D55, u32 *D56, u32 *D57, u32 *D58, u32 *D59, u32 *D60, u32 *D61, u32 *D62, u32 *D63)
1542 sXXX_DECL u32 s001 = (0x001 & DESCRYPT_SALT) ? 0xffffffff : 0;
1543 sXXX_DECL u32 s002 = (0x002 & DESCRYPT_SALT) ? 0xffffffff : 0;
1544 sXXX_DECL u32 s004 = (0x004 & DESCRYPT_SALT) ? 0xffffffff : 0;
1545 sXXX_DECL u32 s008 = (0x008 & DESCRYPT_SALT) ? 0xffffffff : 0;
1546 sXXX_DECL u32 s010 = (0x010 & DESCRYPT_SALT) ? 0xffffffff : 0;
1547 sXXX_DECL u32 s020 = (0x020 & DESCRYPT_SALT) ? 0xffffffff : 0;
1548 sXXX_DECL u32 s040 = (0x040 & DESCRYPT_SALT) ? 0xffffffff : 0;
1549 sXXX_DECL u32 s080 = (0x080 & DESCRYPT_SALT) ? 0xffffffff : 0;
1550 sXXX_DECL u32 s100 = (0x100 & DESCRYPT_SALT) ? 0xffffffff : 0;
1551 sXXX_DECL u32 s200 = (0x200 & DESCRYPT_SALT) ? 0xffffffff : 0;
1552 sXXX_DECL u32 s400 = (0x400 & DESCRYPT_SALT) ? 0xffffffff : 0;
1553 sXXX_DECL u32 s800 = (0x800 & DESCRYPT_SALT) ? 0xffffffff : 0;
1555 KXX_DECL u32 k00, k01, k02, k03, k04, k05;
1556 KXX_DECL u32 k06, k07, k08, k09, k10, k11;
1557 KXX_DECL u32 k12, k13, k14, k15, k16, k17;
1558 KXX_DECL u32 k18, k19, k20, k21, k22, k23;
1559 KXX_DECL u32 k24, k25, k26, k27, k28, k29;
1560 KXX_DECL u32 k30, k31, k32, k33, k34, k35;
1561 KXX_DECL u32 k36, k37, k38, k39, k40, k41;
1562 KXX_DECL u32 k42, k43, k44, k45, k46, k47;
1564 for (u32 ii = 0; ii < 25; ii++)
1569 for (u32 i = 0; i < 2; i++)
1571 if (i) KEYSET10 else KEYSET00
1573 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1574 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1575 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1576 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1577 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1578 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1579 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1580 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1582 if (i) KEYSET11 else KEYSET01
1584 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1585 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1586 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1587 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1588 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1589 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1590 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1591 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1593 if (i) KEYSET12 else KEYSET02
1595 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1596 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1597 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1598 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1599 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1600 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1601 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1602 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1604 if (i) KEYSET13 else KEYSET03
1606 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1607 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1608 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1609 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1610 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1611 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1612 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1613 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1615 if (i) KEYSET14 else KEYSET04
1617 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1618 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1619 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1620 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1621 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1622 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1623 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1624 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1626 if (i) KEYSET15 else KEYSET05
1628 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1629 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1630 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1631 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1632 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1633 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1634 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1635 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1637 if (i) KEYSET16 else KEYSET06
1639 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1640 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1641 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1642 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1643 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1644 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1645 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1646 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1648 if (i) KEYSET17 else KEYSET07
1650 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1651 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1652 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1653 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1654 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1655 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1656 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1657 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1668 void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 *D00, u32 *D01, u32 *D02, u32 *D03, u32 *D04, u32 *D05, u32 *D06, u32 *D07, u32 *D08, u32 *D09, u32 *D10, u32 *D11, u32 *D12, u32 *D13, u32 *D14, u32 *D15, u32 *D16, u32 *D17, u32 *D18, u32 *D19, u32 *D20, u32 *D21, u32 *D22, u32 *D23, u32 *D24, u32 *D25, u32 *D26, u32 *D27, u32 *D28, u32 *D29, u32 *D30, u32 *D31, u32 *D32, u32 *D33, u32 *D34, u32 *D35, u32 *D36, u32 *D37, u32 *D38, u32 *D39, u32 *D40, u32 *D41, u32 *D42, u32 *D43, u32 *D44, u32 *D45, u32 *D46, u32 *D47, u32 *D48, u32 *D49, u32 *D50, u32 *D51, u32 *D52, u32 *D53, u32 *D54, u32 *D55, u32 *D56, u32 *D57, u32 *D58, u32 *D59, u32 *D60, u32 *D61, u32 *D62, u32 *D63)
1670 sXXX_DECL u32 s001 = (0x001 & SALT) ? 0xffffffff : 0;
1671 sXXX_DECL u32 s002 = (0x002 & SALT) ? 0xffffffff : 0;
1672 sXXX_DECL u32 s004 = (0x004 & SALT) ? 0xffffffff : 0;
1673 sXXX_DECL u32 s008 = (0x008 & SALT) ? 0xffffffff : 0;
1674 sXXX_DECL u32 s010 = (0x010 & SALT) ? 0xffffffff : 0;
1675 sXXX_DECL u32 s020 = (0x020 & SALT) ? 0xffffffff : 0;
1676 sXXX_DECL u32 s040 = (0x040 & SALT) ? 0xffffffff : 0;
1677 sXXX_DECL u32 s080 = (0x080 & SALT) ? 0xffffffff : 0;
1678 sXXX_DECL u32 s100 = (0x100 & SALT) ? 0xffffffff : 0;
1679 sXXX_DECL u32 s200 = (0x200 & SALT) ? 0xffffffff : 0;
1680 sXXX_DECL u32 s400 = (0x400 & SALT) ? 0xffffffff : 0;
1681 sXXX_DECL u32 s800 = (0x800 & SALT) ? 0xffffffff : 0;
1683 KXX_DECL u32 k00, k01, k02, k03, k04, k05;
1684 KXX_DECL u32 k06, k07, k08, k09, k10, k11;
1685 KXX_DECL u32 k12, k13, k14, k15, k16, k17;
1686 KXX_DECL u32 k18, k19, k20, k21, k22, k23;
1687 KXX_DECL u32 k24, k25, k26, k27, k28, k29;
1688 KXX_DECL u32 k30, k31, k32, k33, k34, k35;
1689 KXX_DECL u32 k36, k37, k38, k39, k40, k41;
1690 KXX_DECL u32 k42, k43, k44, k45, k46, k47;
1692 for (u32 ii = 0; ii < 25; ii++)
1697 for (u32 i = 0; i < 2; i++)
1699 if (i) KEYSET10 else KEYSET00
1701 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1702 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1703 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1704 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1705 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1706 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1707 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1708 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1710 if (i) KEYSET11 else KEYSET01
1712 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1713 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1714 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1715 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1716 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1717 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1718 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1719 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1721 if (i) KEYSET12 else KEYSET02
1723 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1724 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1725 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1726 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1727 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1728 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1729 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1730 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1732 if (i) KEYSET13 else KEYSET03
1734 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1735 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1736 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1737 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1738 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1739 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1740 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1741 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1743 if (i) KEYSET14 else KEYSET04
1745 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1746 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1747 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1748 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1749 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1750 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1751 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1752 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1754 if (i) KEYSET15 else KEYSET05
1756 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1757 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1758 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1759 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1760 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1761 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1762 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1763 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1765 if (i) KEYSET16 else KEYSET06
1767 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1768 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1769 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1770 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1771 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1772 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1773 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1774 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1776 if (i) KEYSET17 else KEYSET07
1778 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1779 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1780 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1781 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1782 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1783 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1784 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1785 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1796 void transpose32c (u32 data[32])
1798 #define swap(x,y,j,m) \
1799 t = ((x) ^ ((y) >> (j))) & (m); \
1801 (y) = (y) ^ (t << (j));
1805 swap (data[ 0], data[16], 16, 0x0000ffff);
1806 swap (data[ 1], data[17], 16, 0x0000ffff);
1807 swap (data[ 2], data[18], 16, 0x0000ffff);
1808 swap (data[ 3], data[19], 16, 0x0000ffff);
1809 swap (data[ 4], data[20], 16, 0x0000ffff);
1810 swap (data[ 5], data[21], 16, 0x0000ffff);
1811 swap (data[ 6], data[22], 16, 0x0000ffff);
1812 swap (data[ 7], data[23], 16, 0x0000ffff);
1813 swap (data[ 8], data[24], 16, 0x0000ffff);
1814 swap (data[ 9], data[25], 16, 0x0000ffff);
1815 swap (data[10], data[26], 16, 0x0000ffff);
1816 swap (data[11], data[27], 16, 0x0000ffff);
1817 swap (data[12], data[28], 16, 0x0000ffff);
1818 swap (data[13], data[29], 16, 0x0000ffff);
1819 swap (data[14], data[30], 16, 0x0000ffff);
1820 swap (data[15], data[31], 16, 0x0000ffff);
1821 swap (data[ 0], data[ 8], 8, 0x00ff00ff);
1822 swap (data[ 1], data[ 9], 8, 0x00ff00ff);
1823 swap (data[ 2], data[10], 8, 0x00ff00ff);
1824 swap (data[ 3], data[11], 8, 0x00ff00ff);
1825 swap (data[ 4], data[12], 8, 0x00ff00ff);
1826 swap (data[ 5], data[13], 8, 0x00ff00ff);
1827 swap (data[ 6], data[14], 8, 0x00ff00ff);
1828 swap (data[ 7], data[15], 8, 0x00ff00ff);
1829 swap (data[ 0], data[ 4], 4, 0x0f0f0f0f);
1830 swap (data[ 1], data[ 5], 4, 0x0f0f0f0f);
1831 swap (data[ 2], data[ 6], 4, 0x0f0f0f0f);
1832 swap (data[ 3], data[ 7], 4, 0x0f0f0f0f);
1833 swap (data[ 0], data[ 2], 2, 0x33333333);
1834 swap (data[ 1], data[ 3], 2, 0x33333333);
1835 swap (data[ 0], data[ 1], 1, 0x55555555);
1836 swap (data[ 2], data[ 3], 1, 0x55555555);
1837 swap (data[ 4], data[ 6], 2, 0x33333333);
1838 swap (data[ 5], data[ 7], 2, 0x33333333);
1839 swap (data[ 4], data[ 5], 1, 0x55555555);
1840 swap (data[ 6], data[ 7], 1, 0x55555555);
1841 swap (data[ 8], data[12], 4, 0x0f0f0f0f);
1842 swap (data[ 9], data[13], 4, 0x0f0f0f0f);
1843 swap (data[10], data[14], 4, 0x0f0f0f0f);
1844 swap (data[11], data[15], 4, 0x0f0f0f0f);
1845 swap (data[ 8], data[10], 2, 0x33333333);
1846 swap (data[ 9], data[11], 2, 0x33333333);
1847 swap (data[ 8], data[ 9], 1, 0x55555555);
1848 swap (data[10], data[11], 1, 0x55555555);
1849 swap (data[12], data[14], 2, 0x33333333);
1850 swap (data[13], data[15], 2, 0x33333333);
1851 swap (data[12], data[13], 1, 0x55555555);
1852 swap (data[14], data[15], 1, 0x55555555);
1853 swap (data[16], data[24], 8, 0x00ff00ff);
1854 swap (data[17], data[25], 8, 0x00ff00ff);
1855 swap (data[18], data[26], 8, 0x00ff00ff);
1856 swap (data[19], data[27], 8, 0x00ff00ff);
1857 swap (data[20], data[28], 8, 0x00ff00ff);
1858 swap (data[21], data[29], 8, 0x00ff00ff);
1859 swap (data[22], data[30], 8, 0x00ff00ff);
1860 swap (data[23], data[31], 8, 0x00ff00ff);
1861 swap (data[16], data[20], 4, 0x0f0f0f0f);
1862 swap (data[17], data[21], 4, 0x0f0f0f0f);
1863 swap (data[18], data[22], 4, 0x0f0f0f0f);
1864 swap (data[19], data[23], 4, 0x0f0f0f0f);
1865 swap (data[16], data[18], 2, 0x33333333);
1866 swap (data[17], data[19], 2, 0x33333333);
1867 swap (data[16], data[17], 1, 0x55555555);
1868 swap (data[18], data[19], 1, 0x55555555);
1869 swap (data[20], data[22], 2, 0x33333333);
1870 swap (data[21], data[23], 2, 0x33333333);
1871 swap (data[20], data[21], 1, 0x55555555);
1872 swap (data[22], data[23], 1, 0x55555555);
1873 swap (data[24], data[28], 4, 0x0f0f0f0f);
1874 swap (data[25], data[29], 4, 0x0f0f0f0f);
1875 swap (data[26], data[30], 4, 0x0f0f0f0f);
1876 swap (data[27], data[31], 4, 0x0f0f0f0f);
1877 swap (data[24], data[26], 2, 0x33333333);
1878 swap (data[25], data[27], 2, 0x33333333);
1879 swap (data[24], data[25], 1, 0x55555555);
1880 swap (data[26], data[27], 1, 0x55555555);
1881 swap (data[28], data[30], 2, 0x33333333);
1882 swap (data[29], data[31], 2, 0x33333333);
1883 swap (data[28], data[29], 1, 0x55555555);
1884 swap (data[30], data[31], 1, 0x55555555);
1887 void m01500m (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
1893 const u32 gid = get_global_id (0);
1894 const u32 lid = get_local_id (0);
1900 const u32 salt = salt_bufs[salt_pos].salt_buf[0];
1906 const u32 w0 = pws[gid].i[0];
1907 const u32 w1 = pws[gid].i[1];
1909 const u32 w0s = (w0 << 1) & 0xfefefefe;
1910 const u32 w1s = (w1 << 1) & 0xfefefefe;
1912 #define K00 (((w0s >> ( 0 + 7)) & 1) ? -1 : 0)
1913 #define K01 (((w0s >> ( 0 + 6)) & 1) ? -1 : 0)
1914 #define K02 (((w0s >> ( 0 + 5)) & 1) ? -1 : 0)
1915 #define K03 (((w0s >> ( 0 + 4)) & 1) ? -1 : 0)
1916 #define K04 (((w0s >> ( 0 + 3)) & 1) ? -1 : 0)
1917 #define K05 (((w0s >> ( 0 + 2)) & 1) ? -1 : 0)
1918 #define K06 (((w0s >> ( 0 + 1)) & 1) ? -1 : 0)
1919 #define K07 (((w0s >> ( 8 + 7)) & 1) ? -1 : 0)
1920 #define K08 (((w0s >> ( 8 + 6)) & 1) ? -1 : 0)
1921 #define K09 (((w0s >> ( 8 + 5)) & 1) ? -1 : 0)
1922 #define K10 (((w0s >> ( 8 + 4)) & 1) ? -1 : 0)
1923 #define K11 (((w0s >> ( 8 + 3)) & 1) ? -1 : 0)
1924 #define K12 (((w0s >> ( 8 + 2)) & 1) ? -1 : 0)
1925 #define K13 (((w0s >> ( 8 + 1)) & 1) ? -1 : 0)
1926 #define K14 (((w0s >> (16 + 7)) & 1) ? -1 : 0)
1927 #define K15 (((w0s >> (16 + 6)) & 1) ? -1 : 0)
1928 #define K16 (((w0s >> (16 + 5)) & 1) ? -1 : 0)
1929 #define K17 (((w0s >> (16 + 4)) & 1) ? -1 : 0)
1930 #define K18 (((w0s >> (16 + 3)) & 1) ? -1 : 0)
1931 #define K19 (((w0s >> (16 + 2)) & 1) ? -1 : 0)
1932 #define K20 (((w0s >> (16 + 1)) & 1) ? -1 : 0)
1933 #define K21 (((w0s >> (24 + 7)) & 1) ? -1 : 0)
1934 #define K22 (((w0s >> (24 + 6)) & 1) ? -1 : 0)
1935 #define K23 (((w0s >> (24 + 5)) & 1) ? -1 : 0)
1936 #define K24 (((w0s >> (24 + 4)) & 1) ? -1 : 0)
1937 #define K25 (((w0s >> (24 + 3)) & 1) ? -1 : 0)
1938 #define K26 (((w0s >> (24 + 2)) & 1) ? -1 : 0)
1939 #define K27 (((w0s >> (24 + 1)) & 1) ? -1 : 0)
1940 #define K28 (((w1s >> ( 0 + 7)) & 1) ? -1 : 0)
1941 #define K29 (((w1s >> ( 0 + 6)) & 1) ? -1 : 0)
1942 #define K30 (((w1s >> ( 0 + 5)) & 1) ? -1 : 0)
1943 #define K31 (((w1s >> ( 0 + 4)) & 1) ? -1 : 0)
1944 #define K32 (((w1s >> ( 0 + 3)) & 1) ? -1 : 0)
1945 #define K33 (((w1s >> ( 0 + 2)) & 1) ? -1 : 0)
1946 #define K34 (((w1s >> ( 0 + 1)) & 1) ? -1 : 0)
1947 #define K35 (((w1s >> ( 8 + 7)) & 1) ? -1 : 0)
1948 #define K36 (((w1s >> ( 8 + 6)) & 1) ? -1 : 0)
1949 #define K37 (((w1s >> ( 8 + 5)) & 1) ? -1 : 0)
1950 #define K38 (((w1s >> ( 8 + 4)) & 1) ? -1 : 0)
1951 #define K39 (((w1s >> ( 8 + 3)) & 1) ? -1 : 0)
1952 #define K40 (((w1s >> ( 8 + 2)) & 1) ? -1 : 0)
1953 #define K41 (((w1s >> ( 8 + 1)) & 1) ? -1 : 0)
1954 #define K42 (((w1s >> (16 + 7)) & 1) ? -1 : 0)
1955 #define K43 (((w1s >> (16 + 6)) & 1) ? -1 : 0)
1956 #define K44 (((w1s >> (16 + 5)) & 1) ? -1 : 0)
1957 #define K45 (((w1s >> (16 + 4)) & 1) ? -1 : 0)
1958 #define K46 (((w1s >> (16 + 3)) & 1) ? -1 : 0)
1959 #define K47 (((w1s >> (16 + 2)) & 1) ? -1 : 0)
1960 #define K48 (((w1s >> (16 + 1)) & 1) ? -1 : 0)
1961 #define K49 (((w1s >> (24 + 7)) & 1) ? -1 : 0)
1962 #define K50 (((w1s >> (24 + 6)) & 1) ? -1 : 0)
1963 #define K51 (((w1s >> (24 + 5)) & 1) ? -1 : 0)
1964 #define K52 (((w1s >> (24 + 4)) & 1) ? -1 : 0)
1965 #define K53 (((w1s >> (24 + 3)) & 1) ? -1 : 0)
1966 #define K54 (((w1s >> (24 + 2)) & 1) ? -1 : 0)
1967 #define K55 (((w1s >> (24 + 1)) & 1) ? -1 : 0)
1973 const u32 pc_pos = get_local_id (1);
1975 const u32 il_pos = pc_pos * 32;
2006 k00 |= words_buf_r[pc_pos].b[ 0];
2007 k01 |= words_buf_r[pc_pos].b[ 1];
2008 k02 |= words_buf_r[pc_pos].b[ 2];
2009 k03 |= words_buf_r[pc_pos].b[ 3];
2010 k04 |= words_buf_r[pc_pos].b[ 4];
2011 k05 |= words_buf_r[pc_pos].b[ 5];
2012 k06 |= words_buf_r[pc_pos].b[ 6];
2013 k07 |= words_buf_r[pc_pos].b[ 7];
2014 k08 |= words_buf_r[pc_pos].b[ 8];
2015 k09 |= words_buf_r[pc_pos].b[ 9];
2016 k10 |= words_buf_r[pc_pos].b[10];
2017 k11 |= words_buf_r[pc_pos].b[11];
2018 k12 |= words_buf_r[pc_pos].b[12];
2019 k13 |= words_buf_r[pc_pos].b[13];
2020 k14 |= words_buf_r[pc_pos].b[14];
2021 k15 |= words_buf_r[pc_pos].b[15];
2022 k16 |= words_buf_r[pc_pos].b[16];
2023 k17 |= words_buf_r[pc_pos].b[17];
2024 k18 |= words_buf_r[pc_pos].b[18];
2025 k19 |= words_buf_r[pc_pos].b[19];
2026 k20 |= words_buf_r[pc_pos].b[20];
2027 k21 |= words_buf_r[pc_pos].b[21];
2028 k22 |= words_buf_r[pc_pos].b[22];
2029 k23 |= words_buf_r[pc_pos].b[23];
2030 k24 |= words_buf_r[pc_pos].b[24];
2031 k25 |= words_buf_r[pc_pos].b[25];
2032 k26 |= words_buf_r[pc_pos].b[26];
2033 k27 |= words_buf_r[pc_pos].b[27];
2103 k00, k01, k02, k03, k04, k05, k06,
2104 k07, k08, k09, k10, k11, k12, k13,
2105 k14, k15, k16, k17, k18, k19, k20,
2106 k21, k22, k23, k24, k25, k26, k27,
2107 K28, K29, K30, K31, K32, K33, K34,
2108 K35, K36, K37, K38, K39, K40, K41,
2109 K42, K43, K44, K45, K46, K47, K48,
2110 K49, K50, K51, K52, K53, K54, K55,
2111 &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07,
2112 &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15,
2113 &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23,
2114 &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31,
2115 &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39,
2116 &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47,
2117 &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55,
2118 &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63
2188 if (digests_cnt < 16)
2190 for (u32 d = 0; d < digests_cnt; d++)
2192 const u32 final_hash_pos = digests_offset + d;
2194 if (hashes_shown[final_hash_pos]) continue;
2198 search[0] = digests_buf[final_hash_pos].digest_buf[DGST_R0];
2199 search[1] = digests_buf[final_hash_pos].digest_buf[DGST_R1];
2206 for (int i = 0; i < 32; i++)
2208 const u32 b0 = -((search[0] >> i) & 1);
2209 const u32 b1 = -((search[1] >> i) & 1);
2211 tmpResult |= out[ 0 + i] ^ b0;
2212 tmpResult |= out[32 + i] ^ b1;
2215 if (tmpResult == 0xffffffff) continue;
2217 const u32 slice = 31 - clz (~tmpResult);
2219 const u32 r0 = search[0];
2220 const u32 r1 = search[1];
2235 for (int i = 0; i < 32; i++)
2237 out0[i] = out[ 0 + 31 - i];
2238 out1[i] = out[32 + 31 - i];
2241 transpose32c (out0);
2242 transpose32c (out1);
2247 for (int slice = 0; slice < 32; slice++)
2249 const u32 r0 = out0[31 - slice];
2250 const u32 r1 = out1[31 - slice];
2259 void m01500s (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
2265 const u32 gid = get_global_id (0);
2266 const u32 lid = get_local_id (0);
2272 const u32 salt = salt_bufs[salt_pos].salt_buf[0];
2278 const u32 s0 = digests_buf[0].digest_buf[0];
2279 const u32 s1 = digests_buf[0].digest_buf[1];
2281 #define S00 (((s0 >> 0) & 1) ? -1 : 0)
2282 #define S01 (((s0 >> 1) & 1) ? -1 : 0)
2283 #define S02 (((s0 >> 2) & 1) ? -1 : 0)
2284 #define S03 (((s0 >> 3) & 1) ? -1 : 0)
2285 #define S04 (((s0 >> 4) & 1) ? -1 : 0)
2286 #define S05 (((s0 >> 5) & 1) ? -1 : 0)
2287 #define S06 (((s0 >> 6) & 1) ? -1 : 0)
2288 #define S07 (((s0 >> 7) & 1) ? -1 : 0)
2289 #define S08 (((s0 >> 8) & 1) ? -1 : 0)
2290 #define S09 (((s0 >> 9) & 1) ? -1 : 0)
2291 #define S10 (((s0 >> 10) & 1) ? -1 : 0)
2292 #define S11 (((s0 >> 11) & 1) ? -1 : 0)
2293 #define S12 (((s0 >> 12) & 1) ? -1 : 0)
2294 #define S13 (((s0 >> 13) & 1) ? -1 : 0)
2295 #define S14 (((s0 >> 14) & 1) ? -1 : 0)
2296 #define S15 (((s0 >> 15) & 1) ? -1 : 0)
2297 #define S16 (((s0 >> 16) & 1) ? -1 : 0)
2298 #define S17 (((s0 >> 17) & 1) ? -1 : 0)
2299 #define S18 (((s0 >> 18) & 1) ? -1 : 0)
2300 #define S19 (((s0 >> 19) & 1) ? -1 : 0)
2301 #define S20 (((s0 >> 20) & 1) ? -1 : 0)
2302 #define S21 (((s0 >> 21) & 1) ? -1 : 0)
2303 #define S22 (((s0 >> 22) & 1) ? -1 : 0)
2304 #define S23 (((s0 >> 23) & 1) ? -1 : 0)
2305 #define S24 (((s0 >> 24) & 1) ? -1 : 0)
2306 #define S25 (((s0 >> 25) & 1) ? -1 : 0)
2307 #define S26 (((s0 >> 26) & 1) ? -1 : 0)
2308 #define S27 (((s0 >> 27) & 1) ? -1 : 0)
2309 #define S28 (((s0 >> 28) & 1) ? -1 : 0)
2310 #define S29 (((s0 >> 29) & 1) ? -1 : 0)
2311 #define S30 (((s0 >> 30) & 1) ? -1 : 0)
2312 #define S31 (((s0 >> 31) & 1) ? -1 : 0)
2313 #define S32 (((s1 >> 0) & 1) ? -1 : 0)
2314 #define S33 (((s1 >> 1) & 1) ? -1 : 0)
2315 #define S34 (((s1 >> 2) & 1) ? -1 : 0)
2316 #define S35 (((s1 >> 3) & 1) ? -1 : 0)
2317 #define S36 (((s1 >> 4) & 1) ? -1 : 0)
2318 #define S37 (((s1 >> 5) & 1) ? -1 : 0)
2319 #define S38 (((s1 >> 6) & 1) ? -1 : 0)
2320 #define S39 (((s1 >> 7) & 1) ? -1 : 0)
2321 #define S40 (((s1 >> 8) & 1) ? -1 : 0)
2322 #define S41 (((s1 >> 9) & 1) ? -1 : 0)
2323 #define S42 (((s1 >> 10) & 1) ? -1 : 0)
2324 #define S43 (((s1 >> 11) & 1) ? -1 : 0)
2325 #define S44 (((s1 >> 12) & 1) ? -1 : 0)
2326 #define S45 (((s1 >> 13) & 1) ? -1 : 0)
2327 #define S46 (((s1 >> 14) & 1) ? -1 : 0)
2328 #define S47 (((s1 >> 15) & 1) ? -1 : 0)
2329 #define S48 (((s1 >> 16) & 1) ? -1 : 0)
2330 #define S49 (((s1 >> 17) & 1) ? -1 : 0)
2331 #define S50 (((s1 >> 18) & 1) ? -1 : 0)
2332 #define S51 (((s1 >> 19) & 1) ? -1 : 0)
2333 #define S52 (((s1 >> 20) & 1) ? -1 : 0)
2334 #define S53 (((s1 >> 21) & 1) ? -1 : 0)
2335 #define S54 (((s1 >> 22) & 1) ? -1 : 0)
2336 #define S55 (((s1 >> 23) & 1) ? -1 : 0)
2337 #define S56 (((s1 >> 24) & 1) ? -1 : 0)
2338 #define S57 (((s1 >> 25) & 1) ? -1 : 0)
2339 #define S58 (((s1 >> 26) & 1) ? -1 : 0)
2340 #define S59 (((s1 >> 27) & 1) ? -1 : 0)
2341 #define S60 (((s1 >> 28) & 1) ? -1 : 0)
2342 #define S61 (((s1 >> 29) & 1) ? -1 : 0)
2343 #define S62 (((s1 >> 30) & 1) ? -1 : 0)
2344 #define S63 (((s1 >> 31) & 1) ? -1 : 0)
2350 const u32 w0 = pws[gid].i[0];
2351 const u32 w1 = pws[gid].i[1];
2353 const u32 w0s = (w0 << 1) & 0xfefefefe;
2354 const u32 w1s = (w1 << 1) & 0xfefefefe;
2356 #define K00 (((w0s >> ( 0 + 7)) & 1) ? -1 : 0)
2357 #define K01 (((w0s >> ( 0 + 6)) & 1) ? -1 : 0)
2358 #define K02 (((w0s >> ( 0 + 5)) & 1) ? -1 : 0)
2359 #define K03 (((w0s >> ( 0 + 4)) & 1) ? -1 : 0)
2360 #define K04 (((w0s >> ( 0 + 3)) & 1) ? -1 : 0)
2361 #define K05 (((w0s >> ( 0 + 2)) & 1) ? -1 : 0)
2362 #define K06 (((w0s >> ( 0 + 1)) & 1) ? -1 : 0)
2363 #define K07 (((w0s >> ( 8 + 7)) & 1) ? -1 : 0)
2364 #define K08 (((w0s >> ( 8 + 6)) & 1) ? -1 : 0)
2365 #define K09 (((w0s >> ( 8 + 5)) & 1) ? -1 : 0)
2366 #define K10 (((w0s >> ( 8 + 4)) & 1) ? -1 : 0)
2367 #define K11 (((w0s >> ( 8 + 3)) & 1) ? -1 : 0)
2368 #define K12 (((w0s >> ( 8 + 2)) & 1) ? -1 : 0)
2369 #define K13 (((w0s >> ( 8 + 1)) & 1) ? -1 : 0)
2370 #define K14 (((w0s >> (16 + 7)) & 1) ? -1 : 0)
2371 #define K15 (((w0s >> (16 + 6)) & 1) ? -1 : 0)
2372 #define K16 (((w0s >> (16 + 5)) & 1) ? -1 : 0)
2373 #define K17 (((w0s >> (16 + 4)) & 1) ? -1 : 0)
2374 #define K18 (((w0s >> (16 + 3)) & 1) ? -1 : 0)
2375 #define K19 (((w0s >> (16 + 2)) & 1) ? -1 : 0)
2376 #define K20 (((w0s >> (16 + 1)) & 1) ? -1 : 0)
2377 #define K21 (((w0s >> (24 + 7)) & 1) ? -1 : 0)
2378 #define K22 (((w0s >> (24 + 6)) & 1) ? -1 : 0)
2379 #define K23 (((w0s >> (24 + 5)) & 1) ? -1 : 0)
2380 #define K24 (((w0s >> (24 + 4)) & 1) ? -1 : 0)
2381 #define K25 (((w0s >> (24 + 3)) & 1) ? -1 : 0)
2382 #define K26 (((w0s >> (24 + 2)) & 1) ? -1 : 0)
2383 #define K27 (((w0s >> (24 + 1)) & 1) ? -1 : 0)
2384 #define K28 (((w1s >> ( 0 + 7)) & 1) ? -1 : 0)
2385 #define K29 (((w1s >> ( 0 + 6)) & 1) ? -1 : 0)
2386 #define K30 (((w1s >> ( 0 + 5)) & 1) ? -1 : 0)
2387 #define K31 (((w1s >> ( 0 + 4)) & 1) ? -1 : 0)
2388 #define K32 (((w1s >> ( 0 + 3)) & 1) ? -1 : 0)
2389 #define K33 (((w1s >> ( 0 + 2)) & 1) ? -1 : 0)
2390 #define K34 (((w1s >> ( 0 + 1)) & 1) ? -1 : 0)
2391 #define K35 (((w1s >> ( 8 + 7)) & 1) ? -1 : 0)
2392 #define K36 (((w1s >> ( 8 + 6)) & 1) ? -1 : 0)
2393 #define K37 (((w1s >> ( 8 + 5)) & 1) ? -1 : 0)
2394 #define K38 (((w1s >> ( 8 + 4)) & 1) ? -1 : 0)
2395 #define K39 (((w1s >> ( 8 + 3)) & 1) ? -1 : 0)
2396 #define K40 (((w1s >> ( 8 + 2)) & 1) ? -1 : 0)
2397 #define K41 (((w1s >> ( 8 + 1)) & 1) ? -1 : 0)
2398 #define K42 (((w1s >> (16 + 7)) & 1) ? -1 : 0)
2399 #define K43 (((w1s >> (16 + 6)) & 1) ? -1 : 0)
2400 #define K44 (((w1s >> (16 + 5)) & 1) ? -1 : 0)
2401 #define K45 (((w1s >> (16 + 4)) & 1) ? -1 : 0)
2402 #define K46 (((w1s >> (16 + 3)) & 1) ? -1 : 0)
2403 #define K47 (((w1s >> (16 + 2)) & 1) ? -1 : 0)
2404 #define K48 (((w1s >> (16 + 1)) & 1) ? -1 : 0)
2405 #define K49 (((w1s >> (24 + 7)) & 1) ? -1 : 0)
2406 #define K50 (((w1s >> (24 + 6)) & 1) ? -1 : 0)
2407 #define K51 (((w1s >> (24 + 5)) & 1) ? -1 : 0)
2408 #define K52 (((w1s >> (24 + 4)) & 1) ? -1 : 0)
2409 #define K53 (((w1s >> (24 + 3)) & 1) ? -1 : 0)
2410 #define K54 (((w1s >> (24 + 2)) & 1) ? -1 : 0)
2411 #define K55 (((w1s >> (24 + 1)) & 1) ? -1 : 0)
2417 const u32 pc_pos = get_local_id (1);
2419 const u32 il_pos = pc_pos * 32;
2450 k00 |= words_buf_r[pc_pos].b[ 0];
2451 k01 |= words_buf_r[pc_pos].b[ 1];
2452 k02 |= words_buf_r[pc_pos].b[ 2];
2453 k03 |= words_buf_r[pc_pos].b[ 3];
2454 k04 |= words_buf_r[pc_pos].b[ 4];
2455 k05 |= words_buf_r[pc_pos].b[ 5];
2456 k06 |= words_buf_r[pc_pos].b[ 6];
2457 k07 |= words_buf_r[pc_pos].b[ 7];
2458 k08 |= words_buf_r[pc_pos].b[ 8];
2459 k09 |= words_buf_r[pc_pos].b[ 9];
2460 k10 |= words_buf_r[pc_pos].b[10];
2461 k11 |= words_buf_r[pc_pos].b[11];
2462 k12 |= words_buf_r[pc_pos].b[12];
2463 k13 |= words_buf_r[pc_pos].b[13];
2464 k14 |= words_buf_r[pc_pos].b[14];
2465 k15 |= words_buf_r[pc_pos].b[15];
2466 k16 |= words_buf_r[pc_pos].b[16];
2467 k17 |= words_buf_r[pc_pos].b[17];
2468 k18 |= words_buf_r[pc_pos].b[18];
2469 k19 |= words_buf_r[pc_pos].b[19];
2470 k20 |= words_buf_r[pc_pos].b[20];
2471 k21 |= words_buf_r[pc_pos].b[21];
2472 k22 |= words_buf_r[pc_pos].b[22];
2473 k23 |= words_buf_r[pc_pos].b[23];
2474 k24 |= words_buf_r[pc_pos].b[24];
2475 k25 |= words_buf_r[pc_pos].b[25];
2476 k26 |= words_buf_r[pc_pos].b[26];
2477 k27 |= words_buf_r[pc_pos].b[27];
2547 k00, k01, k02, k03, k04, k05, k06,
2548 k07, k08, k09, k10, k11, k12, k13,
2549 k14, k15, k16, k17, k18, k19, k20,
2550 k21, k22, k23, k24, k25, k26, k27,
2551 K28, K29, K30, K31, K32, K33, K34,
2552 K35, K36, K37, K38, K39, K40, K41,
2553 K42, K43, K44, K45, K46, K47, K48,
2554 K49, K50, K51, K52, K53, K54, K55,
2555 &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07,
2556 &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15,
2557 &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23,
2558 &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31,
2559 &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39,
2560 &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47,
2561 &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55,
2562 &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63
2567 tmpResult |= D00 ^ S00;
2568 tmpResult |= D01 ^ S01;
2569 tmpResult |= D02 ^ S02;
2570 tmpResult |= D03 ^ S03;
2571 tmpResult |= D04 ^ S04;
2572 tmpResult |= D05 ^ S05;
2573 tmpResult |= D06 ^ S06;
2574 tmpResult |= D07 ^ S07;
2575 tmpResult |= D08 ^ S08;
2576 tmpResult |= D09 ^ S09;
2577 tmpResult |= D10 ^ S10;
2578 tmpResult |= D11 ^ S11;
2579 tmpResult |= D12 ^ S12;
2580 tmpResult |= D13 ^ S13;
2581 tmpResult |= D14 ^ S14;
2582 tmpResult |= D15 ^ S15;
2584 if (tmpResult == 0xffffffff) return;
2586 tmpResult |= D16 ^ S16;
2587 tmpResult |= D17 ^ S17;
2588 tmpResult |= D18 ^ S18;
2589 tmpResult |= D19 ^ S19;
2590 tmpResult |= D20 ^ S20;
2591 tmpResult |= D21 ^ S21;
2592 tmpResult |= D22 ^ S22;
2593 tmpResult |= D23 ^ S23;
2594 tmpResult |= D24 ^ S24;
2595 tmpResult |= D25 ^ S25;
2596 tmpResult |= D26 ^ S26;
2597 tmpResult |= D27 ^ S27;
2598 tmpResult |= D28 ^ S28;
2599 tmpResult |= D29 ^ S29;
2600 tmpResult |= D30 ^ S30;
2601 tmpResult |= D31 ^ S31;
2603 if (tmpResult == 0xffffffff) return;
2605 tmpResult |= D32 ^ S32;
2606 tmpResult |= D33 ^ S33;
2607 tmpResult |= D34 ^ S34;
2608 tmpResult |= D35 ^ S35;
2609 tmpResult |= D36 ^ S36;
2610 tmpResult |= D37 ^ S37;
2611 tmpResult |= D38 ^ S38;
2612 tmpResult |= D39 ^ S39;
2613 tmpResult |= D40 ^ S40;
2614 tmpResult |= D41 ^ S41;
2615 tmpResult |= D42 ^ S42;
2616 tmpResult |= D43 ^ S43;
2617 tmpResult |= D44 ^ S44;
2618 tmpResult |= D45 ^ S45;
2619 tmpResult |= D46 ^ S46;
2620 tmpResult |= D47 ^ S47;
2622 if (tmpResult == 0xffffffff) return;
2624 tmpResult |= D48 ^ S48;
2625 tmpResult |= D49 ^ S49;
2626 tmpResult |= D50 ^ S50;
2627 tmpResult |= D51 ^ S51;
2628 tmpResult |= D52 ^ S52;
2629 tmpResult |= D53 ^ S53;
2630 tmpResult |= D54 ^ S54;
2631 tmpResult |= D55 ^ S55;
2632 tmpResult |= D56 ^ S56;
2633 tmpResult |= D57 ^ S57;
2634 tmpResult |= D58 ^ S58;
2635 tmpResult |= D59 ^ S59;
2636 tmpResult |= D60 ^ S60;
2637 tmpResult |= D61 ^ S61;
2638 tmpResult |= D62 ^ S62;
2639 tmpResult |= D63 ^ S63;
2641 if (tmpResult == 0xffffffff) return;
2643 const u32 slice = 31 - clz (~tmpResult);
2649 // transpose bitslice mod : attention race conditions, need different buffers for *in and *out
2652 __kernel void m01500_tm (__global u32 *mod, __global bs_word_t *words_buf_r)
2654 const u32 gid = get_global_id (0);
2656 const u32 block = gid / 32;
2657 const u32 slice = gid % 32;
2659 const u32 w0 = mod[gid];
2661 const u32 w0s = (w0 << 1) & 0xfefefefe;
2666 for (int i = 0, j = 0; i < 32; i += 8, j += 7)
2668 atomic_or (&words_buf_r[block].b[j + 0], (((w0s >> (i + 7)) & 1) << slice));
2669 atomic_or (&words_buf_r[block].b[j + 1], (((w0s >> (i + 6)) & 1) << slice));
2670 atomic_or (&words_buf_r[block].b[j + 2], (((w0s >> (i + 5)) & 1) << slice));
2671 atomic_or (&words_buf_r[block].b[j + 3], (((w0s >> (i + 4)) & 1) << slice));
2672 atomic_or (&words_buf_r[block].b[j + 4], (((w0s >> (i + 3)) & 1) << slice));
2673 atomic_or (&words_buf_r[block].b[j + 5], (((w0s >> (i + 2)) & 1) << slice));
2674 atomic_or (&words_buf_r[block].b[j + 6], (((w0s >> (i + 1)) & 1) << slice));
2678 __kernel void m01500_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2684 const u32 gid = get_global_id (0);
2685 const u32 lid = get_local_id (0);
2687 if (gid >= gid_max) return;
2693 m01500m (pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
2696 __kernel void m01500_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2700 __kernel void m01500_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2704 __kernel void m01500_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2710 const u32 gid = get_global_id (0);
2711 const u32 lid = get_local_id (0);
2713 if (gid >= gid_max) return;
2719 m01500s (pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
2722 __kernel void m01500_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2726 __kernel void m01500_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)