2 * Author......: Jens Steube <jens.steube@gmail.com>
4 * NOTE........: sboxes were taken from JtR, license below
9 #include "inc_vendor.cl"
10 #include "inc_hash_constants.h"
11 #include "inc_hash_functions.cl"
12 #include "inc_types.cl"
13 #include "inc_common.cl"
15 #define COMPARE_S "inc_comp_single_bs.cl"
16 #define COMPARE_M "inc_comp_multi_bs.cl"
35 // Bitslice DES S-boxes with LOP3.LUT instructions
36 // For NVIDIA Maxwell architecture and CUDA 7.5 RC
37 // by DeepLearningJohnDoe, version 0.1.6, 2015/07/19
39 // Gate counts: 25 24 25 18 25 24 24 23
41 // Depth: 8 7 7 6 8 10 10 8
44 // Note that same S-box function with a lower gate count isn't necessarily faster.
46 // These Boolean expressions corresponding to DES S-boxes were
47 // discovered by <deeplearningjohndoe at gmail.com>
49 // This file itself is Copyright (c) 2015 by <deeplearningjohndoe at gmail.com>
50 // Redistribution and use in source and binary forms, with or without
51 // modification, are permitted.
53 // The underlying mathematical formulas are NOT copyrighted.
56 #define LUT(a,b,c,d,e) u32 a; asm ("lop3.b32 %0, %1, %2, %3, "#e";" : "=r"(a): "r"(b), "r"(c), "r"(d));
58 void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
60 LUT(xAA55AA5500550055, a1, a4, a6, 0xC1)
61 LUT(xA55AA55AF0F5F0F5, a3, a6, xAA55AA5500550055, 0x9E)
62 LUT(x5F5F5F5FA5A5A5A5, a1, a3, a6, 0xD6)
63 LUT(xF5A0F5A0A55AA55A, a4, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x56)
64 LUT(x947A947AD1E7D1E7, a2, xA55AA55AF0F5F0F5, xF5A0F5A0A55AA55A, 0x6C)
65 LUT(x5FFF5FFFFFFAFFFA, a6, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x7B)
66 LUT(xB96CB96C69936993, a2, xF5A0F5A0A55AA55A, x5FFF5FFFFFFAFFFA, 0xD6)
67 LUT(x3, a5, x947A947AD1E7D1E7, xB96CB96C69936993, 0x6A)
68 LUT(x55EE55EE55EE55EE, a1, a2, a4, 0x7A)
69 LUT(x084C084CB77BB77B, a2, a6, xF5A0F5A0A55AA55A, 0xC9)
70 LUT(x9C329C32E295E295, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x72)
71 LUT(xA51EA51E50E050E0, a3, a6, x55EE55EE55EE55EE, 0x29)
72 LUT(x4AD34AD3BE3CBE3C, a2, x947A947AD1E7D1E7, xA51EA51E50E050E0, 0x95)
73 LUT(x2, a5, x9C329C32E295E295, x4AD34AD3BE3CBE3C, 0xC6)
74 LUT(xD955D95595D195D1, a1, a2, x9C329C32E295E295, 0xD2)
75 LUT(x8058805811621162, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x90)
76 LUT(x7D0F7D0FC4B3C4B3, xA51EA51E50E050E0, xD955D95595D195D1, x8058805811621162, 0x76)
77 LUT(x0805080500010001, a3, xAA55AA5500550055, xD955D95595D195D1, 0x80)
78 LUT(x4A964A96962D962D, xB96CB96C69936993, x4AD34AD3BE3CBE3C, x0805080500010001, 0xA6)
79 LUT(x4, a5, x7D0F7D0FC4B3C4B3, x4A964A96962D962D, 0xA6)
80 LUT(x148014807B087B08, a1, xAA55AA5500550055, x947A947AD1E7D1E7, 0x21)
81 LUT(x94D894D86B686B68, xA55AA55AF0F5F0F5, x8058805811621162, x148014807B087B08, 0x6A)
82 LUT(x5555555540044004, a1, a6, x084C084CB77BB77B, 0x70)
83 LUT(xAFB4AFB4BF5BBF5B, x5F5F5F5FA5A5A5A5, xA51EA51E50E050E0, x5555555540044004, 0x97)
84 LUT(x1, a5, x94D894D86B686B68, xAFB4AFB4BF5BBF5B, 0x6C)
92 void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
94 LUT(xEEEEEEEE99999999, a1, a2, a6, 0x97)
95 LUT(xFFFFEEEE66666666, a5, a6, xEEEEEEEE99999999, 0x67)
96 LUT(x5555FFFFFFFF0000, a1, a5, a6, 0x76)
97 LUT(x6666DDDD5555AAAA, a2, xFFFFEEEE66666666, x5555FFFFFFFF0000, 0x69)
98 LUT(x6969D3D35353ACAC, a3, xFFFFEEEE66666666, x6666DDDD5555AAAA, 0x6A)
99 LUT(xCFCF3030CFCF3030, a2, a3, a5, 0x65)
100 LUT(xE4E4EEEE9999F0F0, a3, xEEEEEEEE99999999, x5555FFFFFFFF0000, 0x8D)
101 LUT(xE5E5BABACDCDB0B0, a1, xCFCF3030CFCF3030, xE4E4EEEE9999F0F0, 0xCA)
102 LUT(x3, a4, x6969D3D35353ACAC, xE5E5BABACDCDB0B0, 0xC6)
103 LUT(x3333CCCC00000000, a2, a5, a6, 0x14)
104 LUT(xCCCCDDDDFFFF0F0F, a5, xE4E4EEEE9999F0F0, x3333CCCC00000000, 0xB5)
105 LUT(x00000101F0F0F0F0, a3, a6, xFFFFEEEE66666666, 0x1C)
106 LUT(x9A9A64646A6A9595, a1, xCFCF3030CFCF3030, x00000101F0F0F0F0, 0x96)
107 LUT(x2, a4, xCCCCDDDDFFFF0F0F, x9A9A64646A6A9595, 0x6A)
108 LUT(x3333BBBB3333FFFF, a1, a2, x6666DDDD5555AAAA, 0xDE)
109 LUT(x1414141441410000, a1, a3, xE4E4EEEE9999F0F0, 0x90)
110 LUT(x7F7FF3F3F5F53939, x6969D3D35353ACAC, x9A9A64646A6A9595, x3333BBBB3333FFFF, 0x79)
111 LUT(x9494E3E34B4B3939, a5, x1414141441410000, x7F7FF3F3F5F53939, 0x29)
112 LUT(x1, a4, x3333BBBB3333FFFF, x9494E3E34B4B3939, 0xA6)
113 LUT(xB1B1BBBBCCCCA5A5, a1, a1, xE4E4EEEE9999F0F0, 0x4A)
114 LUT(xFFFFECECEEEEDDDD, a2, x3333CCCC00000000, x9A9A64646A6A9595, 0xEF)
115 LUT(xB1B1A9A9DCDC8787, xE5E5BABACDCDB0B0, xB1B1BBBBCCCCA5A5, xFFFFECECEEEEDDDD, 0x8D)
116 LUT(xFFFFCCCCEEEE4444, a2, a5, xFFFFEEEE66666666, 0x2B)
117 LUT(x4, a4, xB1B1A9A9DCDC8787, xFFFFCCCCEEEE4444, 0x6C)
125 void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
127 LUT(xA50FA50FA50FA50F, a1, a3, a4, 0xC9)
128 LUT(xF0F00F0FF0F0F0F0, a3, a5, a6, 0x4B)
129 LUT(xAF0FA0AAAF0FAF0F, a1, xA50FA50FA50FA50F, xF0F00F0FF0F0F0F0, 0x4D)
130 LUT(x5AA5A55A5AA55AA5, a1, a4, xF0F00F0FF0F0F0F0, 0x69)
131 LUT(xAA005FFFAA005FFF, a3, a5, xA50FA50FA50FA50F, 0xD6)
132 LUT(x5AA5A55A0F5AFAA5, a6, x5AA5A55A5AA55AA5, xAA005FFFAA005FFF, 0x9C)
133 LUT(x1, a2, xAF0FA0AAAF0FAF0F, x5AA5A55A0F5AFAA5, 0xA6)
134 LUT(xAA55AA5500AA00AA, a1, a4, a6, 0x49)
135 LUT(xFAFAA50FFAFAA50F, a1, a5, xA50FA50FA50FA50F, 0x9B)
136 LUT(x50AF0F5AFA50A5A5, a1, xAA55AA5500AA00AA, xFAFAA50FFAFAA50F, 0x66)
137 LUT(xAFAFAFAFFAFAFAFA, a1, a3, a6, 0x6F)
138 LUT(xAFAFFFFFFFFAFAFF, a4, x50AF0F5AFA50A5A5, xAFAFAFAFFAFAFAFA, 0xEB)
139 LUT(x4, a2, x50AF0F5AFA50A5A5, xAFAFFFFFFFFAFAFF, 0x6C)
140 LUT(x500F500F500F500F, a1, a3, a4, 0x98)
141 LUT(xF0505A0505A5050F, x5AA5A55A0F5AFAA5, xAA55AA5500AA00AA, xAFAFAFAFFAFAFAFA, 0x1D)
142 LUT(xF0505A05AA55AAFF, a6, x500F500F500F500F, xF0505A0505A5050F, 0x9A)
143 LUT(xFF005F55FF005F55, a1, a4, xAA005FFFAA005FFF, 0xB2)
144 LUT(xA55F5AF0A55F5AF0, a5, xA50FA50FA50FA50F, x5AA5A55A5AA55AA5, 0x3D)
145 LUT(x5A5F05A5A55F5AF0, a6, xFF005F55FF005F55, xA55F5AF0A55F5AF0, 0xA6)
146 LUT(x3, a2, xF0505A05AA55AAFF, x5A5F05A5A55F5AF0, 0xA6)
147 LUT(x0F0F0F0FA5A5A5A5, a1, a3, a6, 0xC6)
148 LUT(x5FFFFF5FFFA0FFA0, x5AA5A55A5AA55AA5, xAFAFAFAFFAFAFAFA, x0F0F0F0FA5A5A5A5, 0xDB)
149 LUT(xF5555AF500A05FFF, a5, xFAFAA50FFAFAA50F, xF0505A0505A5050F, 0xB9)
150 LUT(x05A5AAF55AFA55A5, xF0505A05AA55AAFF, x0F0F0F0FA5A5A5A5, xF5555AF500A05FFF, 0x9B)
151 LUT(x2, a2, x5FFFFF5FFFA0FFA0, x05A5AAF55AFA55A5, 0xA6)
159 void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
161 LUT(x55F055F055F055F0, a1, a3, a4, 0x72)
162 LUT(xA500F5F0A500F5F0, a3, a5, x55F055F055F055F0, 0xAD)
163 LUT(xF50AF50AF50AF50A, a1, a3, a4, 0x59)
164 LUT(xF5FA0FFFF5FA0FFF, a3, a5, xF50AF50AF50AF50A, 0xE7)
165 LUT(x61C8F93C61C8F93C, a2, xA500F5F0A500F5F0, xF5FA0FFFF5FA0FFF, 0xC6)
166 LUT(x9999666699996666, a1, a2, a5, 0x69)
167 LUT(x22C022C022C022C0, a2, a4, x55F055F055F055F0, 0x18)
168 LUT(xB35C94A6B35C94A6, xF5FA0FFFF5FA0FFF, x9999666699996666, x22C022C022C022C0, 0x63)
169 LUT(x4, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x6A)
170 LUT(x4848484848484848, a1, a2, a3, 0x12)
171 LUT(x55500AAA55500AAA, a1, a5, xF5FA0FFFF5FA0FFF, 0x28)
172 LUT(x3C90B3D63C90B3D6, x61C8F93C61C8F93C, x4848484848484848, x55500AAA55500AAA, 0x1E)
173 LUT(x8484333384843333, a1, x9999666699996666, x4848484848484848, 0x14)
174 LUT(x4452F1AC4452F1AC, xF50AF50AF50AF50A, xF5FA0FFFF5FA0FFF, xB35C94A6B35C94A6, 0x78)
175 LUT(x9586CA379586CA37, x55500AAA55500AAA, x8484333384843333, x4452F1AC4452F1AC, 0xD6)
176 LUT(x2, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0x6A)
177 LUT(x1, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0xA9)
178 LUT(x3, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x56)
186 void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
188 LUT(xA0A0A0A0FFFFFFFF, a1, a3, a6, 0xAB)
189 LUT(xFFFF00005555FFFF, a1, a5, a6, 0xB9)
190 LUT(xB3B320207777FFFF, a2, xA0A0A0A0FFFFFFFF, xFFFF00005555FFFF, 0xE8)
191 LUT(x50505A5A5A5A5050, a1, a3, xFFFF00005555FFFF, 0x34)
192 LUT(xA2A2FFFF2222FFFF, a1, a5, xB3B320207777FFFF, 0xCE)
193 LUT(x2E2E6969A4A46363, a2, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, 0x29)
194 LUT(x3, a4, xB3B320207777FFFF, x2E2E6969A4A46363, 0xA6)
195 LUT(xA5A50A0AA5A50A0A, a1, a3, a5, 0x49)
196 LUT(x969639396969C6C6, a2, a6, xA5A50A0AA5A50A0A, 0x96)
197 LUT(x1B1B1B1B1B1B1B1B, a1, a2, a3, 0xCA)
198 LUT(xBFBFBFBFF6F6F9F9, a3, xA0A0A0A0FFFFFFFF, x969639396969C6C6, 0x7E)
199 LUT(x5B5BA4A4B8B81D1D, xFFFF00005555FFFF, x1B1B1B1B1B1B1B1B, xBFBFBFBFF6F6F9F9, 0x96)
200 LUT(x2, a4, x969639396969C6C6, x5B5BA4A4B8B81D1D, 0xCA)
201 LUT(x5555BBBBFFFF5555, a1, a2, xFFFF00005555FFFF, 0xE5)
202 LUT(x6D6D9C9C95956969, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, x969639396969C6C6, 0x97)
203 LUT(x1A1A67676A6AB4B4, xA5A50A0AA5A50A0A, x5555BBBBFFFF5555, x6D6D9C9C95956969, 0x47)
204 LUT(xA0A0FFFFAAAA0000, a3, xFFFF00005555FFFF, xA5A50A0AA5A50A0A, 0x3B)
205 LUT(x36369C9CC1C1D6D6, x969639396969C6C6, x6D6D9C9C95956969, xA0A0FFFFAAAA0000, 0xD9)
206 LUT(x1, a4, x1A1A67676A6AB4B4, x36369C9CC1C1D6D6, 0xCA)
207 LUT(x5555F0F0F5F55555, a1, a3, xFFFF00005555FFFF, 0xB1)
208 LUT(x79790202DCDC0808, xA2A2FFFF2222FFFF, xA5A50A0AA5A50A0A, x969639396969C6C6, 0x47)
209 LUT(x6C6CF2F229295D5D, xBFBFBFBFF6F6F9F9, x5555F0F0F5F55555, x79790202DCDC0808, 0x6E)
210 LUT(xA3A3505010101A1A, a2, xA2A2FFFF2222FFFF, x36369C9CC1C1D6D6, 0x94)
211 LUT(x7676C7C74F4FC7C7, a1, x2E2E6969A4A46363, xA3A3505010101A1A, 0xD9)
212 LUT(x4, a4, x6C6CF2F229295D5D, x7676C7C74F4FC7C7, 0xC6)
220 void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
222 LUT(x5050F5F55050F5F5, a1, a3, a5, 0xB2)
223 LUT(x6363C6C66363C6C6, a1, a2, x5050F5F55050F5F5, 0x66)
224 LUT(xAAAA5555AAAA5555, a1, a1, a5, 0xA9)
225 LUT(x3A3A65653A3A6565, a3, x6363C6C66363C6C6, xAAAA5555AAAA5555, 0xA9)
226 LUT(x5963A3C65963A3C6, a4, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xC6)
227 LUT(xE7E76565E7E76565, a5, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xAD)
228 LUT(x455D45DF455D45DF, a1, a4, xE7E76565E7E76565, 0xE4)
229 LUT(x4, a6, x5963A3C65963A3C6, x455D45DF455D45DF, 0x6C)
230 LUT(x1101220211012202, a2, xAAAA5555AAAA5555, x5963A3C65963A3C6, 0x20)
231 LUT(xF00F0FF0F00F0FF0, a3, a4, a5, 0x69)
232 LUT(x16E94A9716E94A97, xE7E76565E7E76565, x1101220211012202, xF00F0FF0F00F0FF0, 0x9E)
233 LUT(x2992922929929229, a1, a2, xF00F0FF0F00F0FF0, 0x49)
234 LUT(xAFAF9823AFAF9823, a5, x5050F5F55050F5F5, x2992922929929229, 0x93)
235 LUT(x3, a6, x16E94A9716E94A97, xAFAF9823AFAF9823, 0x6C)
236 LUT(x4801810248018102, a4, x5963A3C65963A3C6, x1101220211012202, 0xA4)
237 LUT(x5EE8FFFD5EE8FFFD, a5, x16E94A9716E94A97, x4801810248018102, 0x76)
238 LUT(xF0FF00FFF0FF00FF, a3, a4, a5, 0xCD)
239 LUT(x942D9A67942D9A67, x3A3A65653A3A6565, x5EE8FFFD5EE8FFFD, xF0FF00FFF0FF00FF, 0x86)
240 LUT(x1, a6, x5EE8FFFD5EE8FFFD, x942D9A67942D9A67, 0xA6)
241 LUT(x6A40D4ED6F4DD4EE, a2, x4, xAFAF9823AFAF9823, 0x2D)
242 LUT(x6CA89C7869A49C79, x1101220211012202, x16E94A9716E94A97, x6A40D4ED6F4DD4EE, 0x26)
243 LUT(xD6DE73F9D6DE73F9, a3, x6363C6C66363C6C6, x455D45DF455D45DF, 0x6B)
244 LUT(x925E63E1965A63E1, x3A3A65653A3A6565, x6CA89C7869A49C79, xD6DE73F9D6DE73F9, 0xA2)
245 LUT(x2, a6, x6CA89C7869A49C79, x925E63E1965A63E1, 0xCA)
253 void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
255 LUT(x88AA88AA88AA88AA, a1, a2, a4, 0x0B)
256 LUT(xAAAAFF00AAAAFF00, a1, a4, a5, 0x27)
257 LUT(xADAFF8A5ADAFF8A5, a3, x88AA88AA88AA88AA, xAAAAFF00AAAAFF00, 0x9E)
258 LUT(x0A0AF5F50A0AF5F5, a1, a3, a5, 0xA6)
259 LUT(x6B69C5DC6B69C5DC, a2, xADAFF8A5ADAFF8A5, x0A0AF5F50A0AF5F5, 0x6B)
260 LUT(x1C69B2DC1C69B2DC, a4, x88AA88AA88AA88AA, x6B69C5DC6B69C5DC, 0xA9)
261 LUT(x1, a6, xADAFF8A5ADAFF8A5, x1C69B2DC1C69B2DC, 0x6A)
262 LUT(x9C9C9C9C9C9C9C9C, a1, a2, a3, 0x63)
263 LUT(xE6E63BFDE6E63BFD, a2, xAAAAFF00AAAAFF00, x0A0AF5F50A0AF5F5, 0xE7)
264 LUT(x6385639E6385639E, a4, x9C9C9C9C9C9C9C9C, xE6E63BFDE6E63BFD, 0x93)
265 LUT(x5959C4CE5959C4CE, a2, x6B69C5DC6B69C5DC, xE6E63BFDE6E63BFD, 0x5D)
266 LUT(x5B53F53B5B53F53B, a4, x0A0AF5F50A0AF5F5, x5959C4CE5959C4CE, 0x6E)
267 LUT(x3, a6, x6385639E6385639E, x5B53F53B5B53F53B, 0xC6)
268 LUT(xFAF505FAFAF505FA, a3, a4, x0A0AF5F50A0AF5F5, 0x6D)
269 LUT(x6A65956A6A65956A, a3, x9C9C9C9C9C9C9C9C, xFAF505FAFAF505FA, 0xA6)
270 LUT(x8888CCCC8888CCCC, a1, a2, a5, 0x23)
271 LUT(x94E97A9494E97A94, x1C69B2DC1C69B2DC, x6A65956A6A65956A, x8888CCCC8888CCCC, 0x72)
272 LUT(x4, a6, x6A65956A6A65956A, x94E97A9494E97A94, 0xAC)
273 LUT(xA050A050A050A050, a1, a3, a4, 0x21)
274 LUT(xC1B87A2BC1B87A2B, xAAAAFF00AAAAFF00, x5B53F53B5B53F53B, x94E97A9494E97A94, 0xA4)
275 LUT(xE96016B7E96016B7, x8888CCCC8888CCCC, xA050A050A050A050, xC1B87A2BC1B87A2B, 0x96)
276 LUT(xE3CF1FD5E3CF1FD5, x88AA88AA88AA88AA, x6A65956A6A65956A, xE96016B7E96016B7, 0x3E)
277 LUT(x6776675B6776675B, xADAFF8A5ADAFF8A5, x94E97A9494E97A94, xE3CF1FD5E3CF1FD5, 0x6B)
278 LUT(x2, a6, xE96016B7E96016B7, x6776675B6776675B, 0xC6)
286 void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
288 LUT(xEEEE3333EEEE3333, a1, a2, a5, 0x9D)
289 LUT(xBBBBBBBBBBBBBBBB, a1, a1, a2, 0x83)
290 LUT(xDDDDAAAADDDDAAAA, a1, a2, a5, 0x5B)
291 LUT(x29295A5A29295A5A, a3, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0x85)
292 LUT(xC729695AC729695A, a4, xEEEE3333EEEE3333, x29295A5A29295A5A, 0xA6)
293 LUT(x3BF77B7B3BF77B7B, a2, a5, xC729695AC729695A, 0xF9)
294 LUT(x2900FF002900FF00, a4, a5, x29295A5A29295A5A, 0x0E)
295 LUT(x56B3803F56B3803F, xBBBBBBBBBBBBBBBB, x3BF77B7B3BF77B7B, x2900FF002900FF00, 0x61)
296 LUT(x4, a6, xC729695AC729695A, x56B3803F56B3803F, 0x6C)
297 LUT(xFBFBFBFBFBFBFBFB, a1, a2, a3, 0xDF)
298 LUT(x3012B7B73012B7B7, a2, a5, xC729695AC729695A, 0xD4)
299 LUT(x34E9B34C34E9B34C, a4, xFBFBFBFBFBFBFBFB, x3012B7B73012B7B7, 0x69)
300 LUT(xBFEAEBBEBFEAEBBE, a1, x29295A5A29295A5A, x34E9B34C34E9B34C, 0x6F)
301 LUT(xFFAEAFFEFFAEAFFE, a3, xBBBBBBBBBBBBBBBB, xBFEAEBBEBFEAEBBE, 0xB9)
302 LUT(x2, a6, x34E9B34C34E9B34C, xFFAEAFFEFFAEAFFE, 0xC6)
303 LUT(xCFDE88BBCFDE88BB, a2, xDDDDAAAADDDDAAAA, x34E9B34C34E9B34C, 0x5C)
304 LUT(x3055574530555745, a1, xC729695AC729695A, xCFDE88BBCFDE88BB, 0x71)
305 LUT(x99DDEEEE99DDEEEE, a4, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0xB9)
306 LUT(x693CD926693CD926, x3BF77B7B3BF77B7B, x34E9B34C34E9B34C, x99DDEEEE99DDEEEE, 0x69)
307 LUT(x3, a6, x3055574530555745, x693CD926693CD926, 0x6A)
308 LUT(x9955EE559955EE55, a1, a4, x99DDEEEE99DDEEEE, 0xE2)
309 LUT(x9D48FA949D48FA94, x3BF77B7B3BF77B7B, xBFEAEBBEBFEAEBBE, x9955EE559955EE55, 0x9C)
310 LUT(x1, a6, xC729695AC729695A, x9D48FA949D48FA94, 0x39)
321 * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC
322 * architectures. These use AND, OR, XOR, NOT, and AND-NOT gates.
324 * Gate counts: 49 44 46 33 48 46 46 41
327 * Several same-gate-count expressions for each S-box are included (for use on
328 * different CPUs/GPUs).
330 * These Boolean expressions corresponding to DES S-boxes have been generated
331 * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's
332 * John the Ripper password cracker: http://www.openwall.com/john/
333 * Being mathematical formulas, they are not copyrighted and are free for reuse
336 * This file (a specific representation of the S-box expressions, surrounding
337 * logic) is Copyright (c) 2011 by Solar Designer <solar at openwall.com>.
338 * Redistribution and use in source and binary forms, with or without
339 * modification, are permitted. (This is a heavily cut-down "BSD license".)
341 * The effort has been sponsored by Rapid7: http://www.rapid7.com
344 void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
346 u32 x55005500, x5A0F5A0F, x3333FFFF, x66666666, x22226666, x2D2D6969,
348 u32 x00FFFF00, x33CCCC33, x4803120C, x2222FFFF, x6A21EDF3, x4A01CC93;
349 u32 x5555FFFF, x7F75FFFF, x00D20096, x7FA7FF69;
350 u32 x0A0A0000, x0AD80096, x00999900, x0AD99996;
351 u32 x22332233, x257AA5F0, x054885C0, xFAB77A3F, x2221EDF3, xD89697CC;
352 u32 x05B77AC0, x05F77AD6, x36C48529, x6391D07C, xBB0747B0;
353 u32 x4C460000, x4EDF9996, x2D4E49EA, xBBFFFFB0, x96B1B65A;
354 u32 x5AFF5AFF, x52B11215, x4201C010, x10B0D205;
355 u32 x00, x01, x10, x11, x20, x21, x30, x31;
357 x55005500 = a1 & ~a5;
358 x5A0F5A0F = a4 ^ x55005500;
361 x22226666 = x3333FFFF & x66666666;
362 x2D2D6969 = a4 ^ x22226666;
363 x25202160 = x2D2D6969 & ~x5A0F5A0F;
366 x33CCCC33 = a3 ^ x00FFFF00;
367 x4803120C = x5A0F5A0F & ~x33CCCC33;
368 x2222FFFF = a6 | x22226666;
369 x6A21EDF3 = x4803120C ^ x2222FFFF;
370 x4A01CC93 = x6A21EDF3 & ~x25202160;
373 x7F75FFFF = x6A21EDF3 | x5555FFFF;
374 x00D20096 = a5 & ~x2D2D6969;
375 x7FA7FF69 = x7F75FFFF ^ x00D20096;
377 x0A0A0000 = a4 & ~x5555FFFF;
378 x0AD80096 = x00D20096 ^ x0A0A0000;
379 x00999900 = x00FFFF00 & ~x66666666;
380 x0AD99996 = x0AD80096 | x00999900;
382 x22332233 = a3 & ~x55005500;
383 x257AA5F0 = x5A0F5A0F ^ x7F75FFFF;
384 x054885C0 = x257AA5F0 & ~x22332233;
385 xFAB77A3F = ~x054885C0;
386 x2221EDF3 = x3333FFFF & x6A21EDF3;
387 xD89697CC = xFAB77A3F ^ x2221EDF3;
388 x20 = x7FA7FF69 & ~a2;
389 x21 = x20 ^ xD89697CC;
392 x05B77AC0 = x00FFFF00 ^ x054885C0;
393 x05F77AD6 = x00D20096 | x05B77AC0;
394 x36C48529 = x3333FFFF ^ x05F77AD6;
395 x6391D07C = a1 ^ x36C48529;
396 xBB0747B0 = xD89697CC ^ x6391D07C;
397 x00 = x25202160 | a2;
398 x01 = x00 ^ xBB0747B0;
401 x4C460000 = x3333FFFF ^ x7F75FFFF;
402 x4EDF9996 = x0AD99996 | x4C460000;
403 x2D4E49EA = x6391D07C ^ x4EDF9996;
404 xBBFFFFB0 = x00FFFF00 | xBB0747B0;
405 x96B1B65A = x2D4E49EA ^ xBBFFFFB0;
406 x10 = x4A01CC93 | a2;
407 x11 = x10 ^ x96B1B65A;
410 x5AFF5AFF = a5 | x5A0F5A0F;
411 x52B11215 = x5AFF5AFF & ~x2D4E49EA;
412 x4201C010 = x4A01CC93 & x6391D07C;
413 x10B0D205 = x52B11215 ^ x4201C010;
414 x30 = x10B0D205 | a2;
415 x31 = x30 ^ x0AD99996;
419 void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
422 u32 x55550000, x00AA00FF, x33BB33FF;
423 u32 x33CC0000, x11441144, x11BB11BB, x003311BB;
424 u32 x00000F0F, x336600FF, x332200FF, x332200F0;
425 u32 x0302000F, xAAAAAAAA, xA9A8AAA5, x33CCCC33, x33CCC030, x9A646A95;
426 u32 x00333303, x118822B8, xA8208805, x3CC3C33C, x94E34B39;
427 u32 x0331330C, x3FF3F33C, xA9DF596A, xA9DF5F6F, x962CAC53;
428 u32 xA9466A6A, x3DA52153, x29850143, x33C0330C, x1A45324F;
429 u32 x0A451047, xBBDFDD7B, xB19ACD3C;
430 u32 x00, x01, x10, x11, x20, x21, x30, x31;
434 x55550000 = a1 & ~a6;
435 x00AA00FF = a5 & ~x55550000;
436 x33BB33FF = a2 | x00AA00FF;
438 x33CC0000 = x33CC33CC & ~a6;
439 x11441144 = a1 & x33CC33CC;
440 x11BB11BB = a5 ^ x11441144;
441 x003311BB = x11BB11BB & ~x33CC0000;
444 x336600FF = x00AA00FF ^ x33CC0000;
445 x332200FF = x33BB33FF & x336600FF;
446 x332200F0 = x332200FF & ~x00000F0F;
448 x0302000F = a3 & x332200FF;
450 xA9A8AAA5 = x0302000F ^ xAAAAAAAA;
451 x33CCCC33 = a6 ^ x33CC33CC;
452 x33CCC030 = x33CCCC33 & ~x00000F0F;
453 x9A646A95 = xA9A8AAA5 ^ x33CCC030;
454 x10 = a4 & ~x332200F0;
455 x11 = x10 ^ x9A646A95;
458 x00333303 = a2 & ~x33CCC030;
459 x118822B8 = x11BB11BB ^ x00333303;
460 xA8208805 = xA9A8AAA5 & ~x118822B8;
461 x3CC3C33C = a3 ^ x33CCCC33;
462 x94E34B39 = xA8208805 ^ x3CC3C33C;
463 x00 = x33BB33FF & ~a4;
464 x01 = x00 ^ x94E34B39;
467 x0331330C = x0302000F ^ x00333303;
468 x3FF3F33C = x3CC3C33C | x0331330C;
469 xA9DF596A = x33BB33FF ^ x9A646A95;
470 xA9DF5F6F = x00000F0F | xA9DF596A;
471 x962CAC53 = x3FF3F33C ^ xA9DF5F6F;
473 xA9466A6A = x332200FF ^ x9A646A95;
474 x3DA52153 = x94E34B39 ^ xA9466A6A;
475 x29850143 = xA9DF5F6F & x3DA52153;
476 x33C0330C = x33CC33CC & x3FF3F33C;
477 x1A45324F = x29850143 ^ x33C0330C;
478 x20 = x1A45324F | a4;
479 x21 = x20 ^ x962CAC53;
482 x0A451047 = x1A45324F & ~x118822B8;
483 xBBDFDD7B = x33CCCC33 | xA9DF596A;
484 xB19ACD3C = x0A451047 ^ xBBDFDD7B;
485 x30 = x003311BB | a4;
486 x31 = x30 ^ xB19ACD3C;
490 void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
492 u32 x44444444, x0F0FF0F0, x4F4FF4F4, x00FFFF00, x00AAAA00, x4FE55EF4;
493 u32 x3C3CC3C3, x3C3C0000, x7373F4F4, x0C840A00;
494 u32 x00005EF4, x00FF5EFF, x00555455, x3C699796;
495 u32 x000FF000, x55AA55AA, x26D9A15E, x2FDFAF5F, x2FD00F5F;
496 u32 x55AAFFAA, x28410014, x000000FF, x000000CC, x284100D8;
497 u32 x204100D0, x3C3CC3FF, x1C3CC32F, x4969967A;
498 u32 x4CC44CC4, x40C040C0, xC3C33C3C, x9669C396, xD6A98356;
499 u32 xD6E9C3D6, x4CEEEEC4, x9A072D12, x001A000B, x9A1F2D1B;
500 u32 x00, x01, x10, x11, x20, x21, x30, x31;
502 x44444444 = a1 & ~a2;
504 x4F4FF4F4 = x44444444 | x0F0FF0F0;
506 x00AAAA00 = x00FFFF00 & ~a1;
507 x4FE55EF4 = x4F4FF4F4 ^ x00AAAA00;
509 x3C3CC3C3 = a2 ^ x0F0FF0F0;
510 x3C3C0000 = x3C3CC3C3 & ~a6;
511 x7373F4F4 = x4F4FF4F4 ^ x3C3C0000;
512 x0C840A00 = x4FE55EF4 & ~x7373F4F4;
514 x00005EF4 = a6 & x4FE55EF4;
515 x00FF5EFF = a4 | x00005EF4;
516 x00555455 = a1 & x00FF5EFF;
517 x3C699796 = x3C3CC3C3 ^ x00555455;
518 x30 = x4FE55EF4 & ~a5;
519 x31 = x30 ^ x3C699796;
522 x000FF000 = x0F0FF0F0 & x00FFFF00;
524 x26D9A15E = x7373F4F4 ^ x55AA55AA;
525 x2FDFAF5F = a3 | x26D9A15E;
526 x2FD00F5F = x2FDFAF5F & ~x000FF000;
528 x55AAFFAA = x00AAAA00 | x55AA55AA;
529 x28410014 = x3C699796 & ~x55AAFFAA;
531 x000000CC = x000000FF & ~a2;
532 x284100D8 = x28410014 ^ x000000CC;
534 x204100D0 = x7373F4F4 & x284100D8;
535 x3C3CC3FF = x3C3CC3C3 | x000000FF;
536 x1C3CC32F = x3C3CC3FF & ~x204100D0;
537 x4969967A = a1 ^ x1C3CC32F;
538 x10 = x2FD00F5F & a5;
539 x11 = x10 ^ x4969967A;
542 x4CC44CC4 = x4FE55EF4 & ~a2;
543 x40C040C0 = x4CC44CC4 & ~a3;
544 xC3C33C3C = ~x3C3CC3C3;
545 x9669C396 = x55AAFFAA ^ xC3C33C3C;
546 xD6A98356 = x40C040C0 ^ x9669C396;
547 x00 = a5 & ~x0C840A00;
548 x01 = x00 ^ xD6A98356;
551 xD6E9C3D6 = x40C040C0 | x9669C396;
552 x4CEEEEC4 = x00AAAA00 | x4CC44CC4;
553 x9A072D12 = xD6E9C3D6 ^ x4CEEEEC4;
554 x001A000B = a4 & ~x4FE55EF4;
555 x9A1F2D1B = x9A072D12 | x001A000B;
556 x20 = a5 & ~x284100D8;
557 x21 = x20 ^ x9A1F2D1B;
561 void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
563 u32 x5A5A5A5A, x0F0FF0F0;
564 u32 x33FF33FF, x33FFCC00, x0C0030F0, x0C0CC0C0, x0CF3C03F, x5EFBDA7F,
565 x52FBCA0F, x61C8F93C;
566 u32 x00C0C03C, x0F0F30C0, x3B92A366, x30908326, x3C90B3D6;
567 u32 x33CC33CC, x0C0CFFFF, x379E5C99, x04124C11, x56E9861E, xA91679E1;
568 u32 x9586CA37, x8402C833, x84C2C83F, xB35C94A6;
569 u32 x00, x01, x10, x11, x20, x21, x30, x31;
574 x33FFCC00 = a5 ^ x33FF33FF;
575 x0C0030F0 = x0F0FF0F0 & ~x33FFCC00;
576 x0C0CC0C0 = x0F0FF0F0 & ~a2;
577 x0CF3C03F = a4 ^ x0C0CC0C0;
578 x5EFBDA7F = x5A5A5A5A | x0CF3C03F;
579 x52FBCA0F = x5EFBDA7F & ~x0C0030F0;
580 x61C8F93C = a2 ^ x52FBCA0F;
582 x00C0C03C = x0CF3C03F & x61C8F93C;
583 x0F0F30C0 = x0F0FF0F0 & ~x00C0C03C;
584 x3B92A366 = x5A5A5A5A ^ x61C8F93C;
585 x30908326 = x3B92A366 & ~x0F0F30C0;
586 x3C90B3D6 = x0C0030F0 ^ x30908326;
589 x0C0CFFFF = a5 | x0C0CC0C0;
590 x379E5C99 = x3B92A366 ^ x0C0CFFFF;
591 x04124C11 = x379E5C99 & ~x33CC33CC;
592 x56E9861E = x52FBCA0F ^ x04124C11;
593 x00 = a6 & ~x3C90B3D6;
594 x01 = x00 ^ x56E9861E;
597 xA91679E1 = ~x56E9861E;
598 x10 = x3C90B3D6 & ~a6;
599 x11 = x10 ^ xA91679E1;
602 x9586CA37 = x3C90B3D6 ^ xA91679E1;
603 x8402C833 = x9586CA37 & ~x33CC33CC;
604 x84C2C83F = x00C0C03C | x8402C833;
605 xB35C94A6 = x379E5C99 ^ x84C2C83F;
606 x20 = x61C8F93C | a6;
607 x21 = x20 ^ xB35C94A6;
610 x30 = a6 & x61C8F93C;
611 x31 = x30 ^ xB35C94A6;
615 void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
617 u32 x77777777, x77770000, x22225555, x11116666, x1F1F6F6F;
618 u32 x70700000, x43433333, x00430033, x55557777, x55167744, x5A19784B;
619 u32 x5A1987B4, x7A3BD7F5, x003B00F5, x221955A0, x05050707, x271C52A7;
620 u32 x2A2A82A0, x6969B193, x1FE06F90, x16804E00, xE97FB1FF;
621 u32 x43403302, x35CAED30, x37DEFFB7, x349ECCB5, x0B01234A;
622 u32 x101884B4, x0FF8EB24, x41413333, x4FF9FB37, x4FC2FBC2;
623 u32 x22222222, x16BCEE97, x0F080B04, x19B4E593;
624 u32 x5C5C5C5C, x4448184C, x2DDABE71, x6992A63D;
625 u32 x00, x01, x10, x11, x20, x21, x30, x31;
628 x77770000 = x77777777 & ~a6;
629 x22225555 = a1 ^ x77770000;
630 x11116666 = a3 ^ x22225555;
631 x1F1F6F6F = a4 | x11116666;
633 x70700000 = x77770000 & ~a4;
634 x43433333 = a3 ^ x70700000;
635 x00430033 = a5 & x43433333;
636 x55557777 = a1 | x11116666;
637 x55167744 = x00430033 ^ x55557777;
638 x5A19784B = a4 ^ x55167744;
640 x5A1987B4 = a6 ^ x5A19784B;
641 x7A3BD7F5 = x22225555 | x5A1987B4;
642 x003B00F5 = a5 & x7A3BD7F5;
643 x221955A0 = x22225555 ^ x003B00F5;
644 x05050707 = a4 & x55557777;
645 x271C52A7 = x221955A0 ^ x05050707;
647 x2A2A82A0 = x7A3BD7F5 & ~a1;
648 x6969B193 = x43433333 ^ x2A2A82A0;
649 x1FE06F90 = a5 ^ x1F1F6F6F;
650 x16804E00 = x1FE06F90 & ~x6969B193;
651 xE97FB1FF = ~x16804E00;
652 x20 = xE97FB1FF & ~a2;
653 x21 = x20 ^ x5A19784B;
656 x43403302 = x43433333 & ~x003B00F5;
657 x35CAED30 = x2A2A82A0 ^ x1FE06F90;
658 x37DEFFB7 = x271C52A7 | x35CAED30;
659 x349ECCB5 = x37DEFFB7 & ~x43403302;
660 x0B01234A = x1F1F6F6F & ~x349ECCB5;
662 x101884B4 = x5A1987B4 & x349ECCB5;
663 x0FF8EB24 = x1FE06F90 ^ x101884B4;
664 x41413333 = x43433333 & x55557777;
665 x4FF9FB37 = x0FF8EB24 | x41413333;
666 x4FC2FBC2 = x003B00F5 ^ x4FF9FB37;
667 x30 = x4FC2FBC2 & a2;
668 x31 = x30 ^ x271C52A7;
671 x22222222 = a1 ^ x77777777;
672 x16BCEE97 = x349ECCB5 ^ x22222222;
673 x0F080B04 = a4 & x0FF8EB24;
674 x19B4E593 = x16BCEE97 ^ x0F080B04;
675 x00 = x0B01234A | a2;
676 x01 = x00 ^ x19B4E593;
679 x5C5C5C5C = x1F1F6F6F ^ x43433333;
680 x4448184C = x5C5C5C5C & ~x19B4E593;
681 x2DDABE71 = x22225555 ^ x0FF8EB24;
682 x6992A63D = x4448184C ^ x2DDABE71;
683 x10 = x1F1F6F6F & a2;
684 x11 = x10 ^ x6992A63D;
688 void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
691 u32 x3333FFFF, x11115555, x22DD6699, x22DD9966, x00220099;
692 u32 x00551144, x33662277, x5A5A5A5A, x7B7E7A7F, x59A31CE6;
693 u32 x09030C06, x09030000, x336622FF, x3A6522FF;
694 u32 x484D494C, x0000B6B3, x0F0FB9BC, x00FC00F9, x0FFFB9FD;
695 u32 x5DF75DF7, x116600F7, x1E69B94B, x1668B94B;
696 u32 x7B7B7B7B, x411E5984, x1FFFFDFD, x5EE1A479;
697 u32 x3CB4DFD2, x004B002D, xB7B2B6B3, xCCC9CDC8, xCC82CDE5;
698 u32 x0055EEBB, x5A5AECE9, x0050ECA9, xC5CAC1CE, xC59A2D67;
699 u32 x00, x01, x10, x11, x20, x21, x30, x31;
704 x11115555 = a1 & x3333FFFF;
705 x22DD6699 = x33CC33CC ^ x11115555;
706 x22DD9966 = a6 ^ x22DD6699;
707 x00220099 = a5 & ~x22DD9966;
709 x00551144 = a1 & x22DD9966;
710 x33662277 = a2 ^ x00551144;
712 x7B7E7A7F = x33662277 | x5A5A5A5A;
713 x59A31CE6 = x22DD6699 ^ x7B7E7A7F;
715 x09030C06 = a3 & x59A31CE6;
716 x09030000 = x09030C06 & ~a6;
717 x336622FF = x00220099 | x33662277;
718 x3A6522FF = x09030000 ^ x336622FF;
719 x30 = x3A6522FF & a4;
720 x31 = x30 ^ x59A31CE6;
723 x484D494C = a2 ^ x7B7E7A7F;
724 x0000B6B3 = a6 & ~x484D494C;
725 x0F0FB9BC = a3 ^ x0000B6B3;
726 x00FC00F9 = a5 & ~x09030C06;
727 x0FFFB9FD = x0F0FB9BC | x00FC00F9;
729 x5DF75DF7 = a1 | x59A31CE6;
730 x116600F7 = x336622FF & x5DF75DF7;
731 x1E69B94B = x0F0FB9BC ^ x116600F7;
732 x1668B94B = x1E69B94B & ~x09030000;
733 x20 = x00220099 | a4;
734 x21 = x20 ^ x1668B94B;
737 x7B7B7B7B = a2 | x5A5A5A5A;
738 x411E5984 = x3A6522FF ^ x7B7B7B7B;
739 x1FFFFDFD = x11115555 | x0FFFB9FD;
740 x5EE1A479 = x411E5984 ^ x1FFFFDFD;
742 x3CB4DFD2 = x22DD6699 ^ x1E69B94B;
743 x004B002D = a5 & ~x3CB4DFD2;
744 xB7B2B6B3 = ~x484D494C;
745 xCCC9CDC8 = x7B7B7B7B ^ xB7B2B6B3;
746 xCC82CDE5 = x004B002D ^ xCCC9CDC8;
747 x10 = xCC82CDE5 & ~a4;
748 x11 = x10 ^ x5EE1A479;
751 x0055EEBB = a6 ^ x00551144;
752 x5A5AECE9 = a1 ^ x0F0FB9BC;
753 x0050ECA9 = x0055EEBB & x5A5AECE9;
754 xC5CAC1CE = x09030C06 ^ xCCC9CDC8;
755 xC59A2D67 = x0050ECA9 ^ xC5CAC1CE;
756 x00 = x0FFFB9FD & ~a4;
757 x01 = x00 ^ xC59A2D67;
761 void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
763 u32 x0FF00FF0, x3CC33CC3, x00003CC3, x0F000F00, x5A555A55, x00001841;
764 u32 x00000F00, x33333C33, x7B777E77, x0FF0F00F, x74878E78;
765 u32 x003C003C, x5A7D5A7D, x333300F0, x694E5A8D;
766 u32 x0FF0CCCC, x000F0303, x5A505854, x33CC000F, x699C585B;
767 u32 x7F878F78, x21101013, x7F979F7B, x30030CC0, x4F9493BB;
768 u32 x6F9CDBFB, x0000DBFB, x00005151, x26DAC936, x26DA9867;
769 u32 x27DA9877, x27DA438C, x2625C9C9, x27FFCBCD;
770 u32 x27FF1036, x27FF103E, xB06B6C44, x97947C7A;
771 u32 x00, x01, x10, x11, x20, x21, x30, x31;
774 x3CC33CC3 = a3 ^ x0FF00FF0;
775 x00003CC3 = a6 & x3CC33CC3;
776 x0F000F00 = a4 & x0FF00FF0;
777 x5A555A55 = a2 ^ x0F000F00;
778 x00001841 = x00003CC3 & x5A555A55;
780 x00000F00 = a6 & x0F000F00;
781 x33333C33 = a3 ^ x00000F00;
782 x7B777E77 = x5A555A55 | x33333C33;
783 x0FF0F00F = a6 ^ x0FF00FF0;
784 x74878E78 = x7B777E77 ^ x0FF0F00F;
785 x30 = a1 & ~x00001841;
786 x31 = x30 ^ x74878E78;
789 x003C003C = a5 & ~x3CC33CC3;
790 x5A7D5A7D = x5A555A55 | x003C003C;
791 x333300F0 = x00003CC3 ^ x33333C33;
792 x694E5A8D = x5A7D5A7D ^ x333300F0;
794 x0FF0CCCC = x00003CC3 ^ x0FF0F00F;
795 x000F0303 = a4 & ~x0FF0CCCC;
796 x5A505854 = x5A555A55 & ~x000F0303;
797 x33CC000F = a5 ^ x333300F0;
798 x699C585B = x5A505854 ^ x33CC000F;
800 x7F878F78 = x0F000F00 | x74878E78;
801 x21101013 = a3 & x699C585B;
802 x7F979F7B = x7F878F78 | x21101013;
803 x30030CC0 = x3CC33CC3 & ~x0FF0F00F;
804 x4F9493BB = x7F979F7B ^ x30030CC0;
805 x00 = x4F9493BB & ~a1;
806 x01 = x00 ^ x694E5A8D;
809 x6F9CDBFB = x699C585B | x4F9493BB;
810 x0000DBFB = a6 & x6F9CDBFB;
811 x00005151 = a2 & x0000DBFB;
812 x26DAC936 = x694E5A8D ^ x4F9493BB;
813 x26DA9867 = x00005151 ^ x26DAC936;
815 x27DA9877 = x21101013 | x26DA9867;
816 x27DA438C = x0000DBFB ^ x27DA9877;
817 x2625C9C9 = a5 ^ x26DAC936;
818 x27FFCBCD = x27DA438C | x2625C9C9;
819 x20 = x27FFCBCD & a1;
820 x21 = x20 ^ x699C585B;
823 x27FF1036 = x0000DBFB ^ x27FFCBCD;
824 x27FF103E = x003C003C | x27FF1036;
825 xB06B6C44 = ~x4F9493BB;
826 x97947C7A = x27FF103E ^ xB06B6C44;
827 x10 = x97947C7A & ~a1;
828 x11 = x10 ^ x26DA9867;
832 void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
834 u32 x0C0C0C0C, x0000F0F0, x00FFF00F, x00555005, x00515001;
835 u32 x33000330, x77555775, x30303030, x3030CFCF, x30104745, x30555745;
836 u32 xFF000FF0, xCF1048B5, x080A080A, xC71A40BF, xCB164CB3;
837 u32 x9E4319E6, x000019E6, xF429738C, xF4296A6A, xC729695A;
838 u32 xC47C3D2F, xF77F3F3F, x9E43E619, x693CD926;
839 u32 xF719A695, xF4FF73FF, x03E6D56A, x56B3803F;
840 u32 xF700A600, x61008000, x03B7856B, x62B7056B;
841 u32 x00, x01, x10, x11, x20, x21, x30, x31;
843 x0C0C0C0C = a3 & ~a2;
844 x0000F0F0 = a5 & ~a3;
845 x00FFF00F = a4 ^ x0000F0F0;
846 x00555005 = a1 & x00FFF00F;
847 x00515001 = x00555005 & ~x0C0C0C0C;
849 x33000330 = a2 & ~x00FFF00F;
850 x77555775 = a1 | x33000330;
851 x30303030 = a2 & ~a3;
852 x3030CFCF = a5 ^ x30303030;
853 x30104745 = x77555775 & x3030CFCF;
854 x30555745 = x00555005 | x30104745;
856 xFF000FF0 = ~x00FFF00F;
857 xCF1048B5 = x30104745 ^ xFF000FF0;
858 x080A080A = a3 & ~x77555775;
859 xC71A40BF = xCF1048B5 ^ x080A080A;
860 xCB164CB3 = x0C0C0C0C ^ xC71A40BF;
861 x10 = x00515001 | a6;
862 x11 = x10 ^ xCB164CB3;
865 x9E4319E6 = a1 ^ xCB164CB3;
866 x000019E6 = a5 & x9E4319E6;
867 xF429738C = a2 ^ xC71A40BF;
868 xF4296A6A = x000019E6 ^ xF429738C;
869 xC729695A = x33000330 ^ xF4296A6A;
871 xC47C3D2F = x30555745 ^ xF4296A6A;
872 xF77F3F3F = a2 | xC47C3D2F;
873 x9E43E619 = a5 ^ x9E4319E6;
874 x693CD926 = xF77F3F3F ^ x9E43E619;
875 x20 = x30555745 & a6;
876 x21 = x20 ^ x693CD926;
879 xF719A695 = x3030CFCF ^ xC729695A;
880 xF4FF73FF = a4 | xF429738C;
881 x03E6D56A = xF719A695 ^ xF4FF73FF;
882 x56B3803F = a1 ^ x03E6D56A;
883 x30 = x56B3803F & a6;
884 x31 = x30 ^ xC729695A;
887 xF700A600 = xF719A695 & ~a4;
888 x61008000 = x693CD926 & xF700A600;
889 x03B7856B = x00515001 ^ x03E6D56A;
890 x62B7056B = x61008000 ^ x03B7856B;
891 x00 = x62B7056B | a6;
892 x01 = x00 ^ xC729695A;
899 #if defined IS_AMD || defined IS_GENERIC
902 * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC
903 * architectures. These use AND, OR, XOR, NOT, and AND-NOT gates.
905 * Gate counts: 49 44 46 33 48 46 46 41
908 * Several same-gate-count expressions for each S-box are included (for use on
909 * different CPUs/GPUs).
911 * These Boolean expressions corresponding to DES S-boxes have been generated
912 * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's
913 * John the Ripper password cracker: http://www.openwall.com/john/
914 * Being mathematical formulas, they are not copyrighted and are free for reuse
917 * This file (a specific representation of the S-box expressions, surrounding
918 * logic) is Copyright (c) 2011 by Solar Designer <solar at openwall.com>.
919 * Redistribution and use in source and binary forms, with or without
920 * modification, are permitted. (This is a heavily cut-down "BSD license".)
922 * The effort has been sponsored by Rapid7: http://www.rapid7.com
925 void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
927 u32 x55005500, x5A0F5A0F, x3333FFFF, x66666666, x22226666, x2D2D6969,
929 u32 x00FFFF00, x33CCCC33, x4803120C, x2222FFFF, x6A21EDF3, x4A01CC93;
930 u32 x5555FFFF, x7F75FFFF, x00D20096, x7FA7FF69;
931 u32 x0A0A0000, x0AD80096, x00999900, x0AD99996;
932 u32 x22332233, x257AA5F0, x054885C0, xFAB77A3F, x2221EDF3, xD89697CC;
933 u32 x05B77AC0, x05F77AD6, x36C48529, x6391D07C, xBB0747B0;
934 u32 x4C460000, x4EDF9996, x2D4E49EA, xBBFFFFB0, x96B1B65A;
935 u32 x5AFF5AFF, x52B11215, x4201C010, x10B0D205;
936 u32 x00, x01, x10, x11, x20, x21, x30, x31;
938 x55005500 = a1 & ~a5;
939 x5A0F5A0F = a4 ^ x55005500;
942 x22226666 = x3333FFFF & x66666666;
943 x2D2D6969 = a4 ^ x22226666;
944 x25202160 = x2D2D6969 & ~x5A0F5A0F;
947 x33CCCC33 = a3 ^ x00FFFF00;
948 x4803120C = x5A0F5A0F & ~x33CCCC33;
949 x2222FFFF = a6 | x22226666;
950 x6A21EDF3 = x4803120C ^ x2222FFFF;
951 x4A01CC93 = x6A21EDF3 & ~x25202160;
954 x7F75FFFF = x6A21EDF3 | x5555FFFF;
955 x00D20096 = a5 & ~x2D2D6969;
956 x7FA7FF69 = x7F75FFFF ^ x00D20096;
958 x0A0A0000 = a4 & ~x5555FFFF;
959 x0AD80096 = x00D20096 ^ x0A0A0000;
960 x00999900 = x00FFFF00 & ~x66666666;
961 x0AD99996 = x0AD80096 | x00999900;
963 x22332233 = a3 & ~x55005500;
964 x257AA5F0 = x5A0F5A0F ^ x7F75FFFF;
965 x054885C0 = x257AA5F0 & ~x22332233;
966 xFAB77A3F = ~x054885C0;
967 x2221EDF3 = x3333FFFF & x6A21EDF3;
968 xD89697CC = xFAB77A3F ^ x2221EDF3;
969 x20 = x7FA7FF69 & ~a2;
970 x21 = x20 ^ xD89697CC;
973 x05B77AC0 = x00FFFF00 ^ x054885C0;
974 x05F77AD6 = x00D20096 | x05B77AC0;
975 x36C48529 = x3333FFFF ^ x05F77AD6;
976 x6391D07C = a1 ^ x36C48529;
977 xBB0747B0 = xD89697CC ^ x6391D07C;
978 x00 = x25202160 | a2;
979 x01 = x00 ^ xBB0747B0;
982 x4C460000 = x3333FFFF ^ x7F75FFFF;
983 x4EDF9996 = x0AD99996 | x4C460000;
984 x2D4E49EA = x6391D07C ^ x4EDF9996;
985 xBBFFFFB0 = x00FFFF00 | xBB0747B0;
986 x96B1B65A = x2D4E49EA ^ xBBFFFFB0;
987 x10 = x4A01CC93 | a2;
988 x11 = x10 ^ x96B1B65A;
991 x5AFF5AFF = a5 | x5A0F5A0F;
992 x52B11215 = x5AFF5AFF & ~x2D4E49EA;
993 x4201C010 = x4A01CC93 & x6391D07C;
994 x10B0D205 = x52B11215 ^ x4201C010;
995 x30 = x10B0D205 | a2;
996 x31 = x30 ^ x0AD99996;
1000 void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
1003 u32 x55550000, x00AA00FF, x33BB33FF;
1004 u32 x33CC0000, x11441144, x11BB11BB, x003311BB;
1005 u32 x00000F0F, x336600FF, x332200FF, x332200F0;
1006 u32 x0302000F, xAAAAAAAA, xA9A8AAA5, x33CCCC33, x33CCC030, x9A646A95;
1007 u32 x00333303, x118822B8, xA8208805, x3CC3C33C, x94E34B39;
1008 u32 x0331330C, x3FF3F33C, xA9DF596A, xA9DF5F6F, x962CAC53;
1009 u32 xA9466A6A, x3DA52153, x29850143, x33C0330C, x1A45324F;
1010 u32 x0A451047, xBBDFDD7B, xB19ACD3C;
1011 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1013 x33CC33CC = a2 ^ a5;
1015 x55550000 = a1 & ~a6;
1016 x00AA00FF = a5 & ~x55550000;
1017 x33BB33FF = a2 | x00AA00FF;
1019 x33CC0000 = x33CC33CC & ~a6;
1020 x11441144 = a1 & x33CC33CC;
1021 x11BB11BB = a5 ^ x11441144;
1022 x003311BB = x11BB11BB & ~x33CC0000;
1024 x00000F0F = a3 & a6;
1025 x336600FF = x00AA00FF ^ x33CC0000;
1026 x332200FF = x33BB33FF & x336600FF;
1027 x332200F0 = x332200FF & ~x00000F0F;
1029 x0302000F = a3 & x332200FF;
1031 xA9A8AAA5 = x0302000F ^ xAAAAAAAA;
1032 x33CCCC33 = a6 ^ x33CC33CC;
1033 x33CCC030 = x33CCCC33 & ~x00000F0F;
1034 x9A646A95 = xA9A8AAA5 ^ x33CCC030;
1035 x10 = a4 & ~x332200F0;
1036 x11 = x10 ^ x9A646A95;
1039 x00333303 = a2 & ~x33CCC030;
1040 x118822B8 = x11BB11BB ^ x00333303;
1041 xA8208805 = xA9A8AAA5 & ~x118822B8;
1042 x3CC3C33C = a3 ^ x33CCCC33;
1043 x94E34B39 = xA8208805 ^ x3CC3C33C;
1044 x00 = x33BB33FF & ~a4;
1045 x01 = x00 ^ x94E34B39;
1048 x0331330C = x0302000F ^ x00333303;
1049 x3FF3F33C = x3CC3C33C | x0331330C;
1050 xA9DF596A = x33BB33FF ^ x9A646A95;
1051 xA9DF5F6F = x00000F0F | xA9DF596A;
1052 x962CAC53 = x3FF3F33C ^ xA9DF5F6F;
1054 xA9466A6A = x332200FF ^ x9A646A95;
1055 x3DA52153 = x94E34B39 ^ xA9466A6A;
1056 x29850143 = xA9DF5F6F & x3DA52153;
1057 x33C0330C = x33CC33CC & x3FF3F33C;
1058 x1A45324F = x29850143 ^ x33C0330C;
1059 x20 = x1A45324F | a4;
1060 x21 = x20 ^ x962CAC53;
1063 x0A451047 = x1A45324F & ~x118822B8;
1064 xBBDFDD7B = x33CCCC33 | xA9DF596A;
1065 xB19ACD3C = x0A451047 ^ xBBDFDD7B;
1066 x30 = x003311BB | a4;
1067 x31 = x30 ^ xB19ACD3C;
1071 void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
1073 u32 x44444444, x0F0FF0F0, x4F4FF4F4, x00FFFF00, x00AAAA00, x4FE55EF4;
1074 u32 x3C3CC3C3, x3C3C0000, x7373F4F4, x0C840A00;
1075 u32 x00005EF4, x00FF5EFF, x00555455, x3C699796;
1076 u32 x000FF000, x55AA55AA, x26D9A15E, x2FDFAF5F, x2FD00F5F;
1077 u32 x55AAFFAA, x28410014, x000000FF, x000000CC, x284100D8;
1078 u32 x204100D0, x3C3CC3FF, x1C3CC32F, x4969967A;
1079 u32 x4CC44CC4, x40C040C0, xC3C33C3C, x9669C396, xD6A98356;
1080 u32 xD6E9C3D6, x4CEEEEC4, x9A072D12, x001A000B, x9A1F2D1B;
1081 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1083 x44444444 = a1 & ~a2;
1084 x0F0FF0F0 = a3 ^ a6;
1085 x4F4FF4F4 = x44444444 | x0F0FF0F0;
1086 x00FFFF00 = a4 ^ a6;
1087 x00AAAA00 = x00FFFF00 & ~a1;
1088 x4FE55EF4 = x4F4FF4F4 ^ x00AAAA00;
1090 x3C3CC3C3 = a2 ^ x0F0FF0F0;
1091 x3C3C0000 = x3C3CC3C3 & ~a6;
1092 x7373F4F4 = x4F4FF4F4 ^ x3C3C0000;
1093 x0C840A00 = x4FE55EF4 & ~x7373F4F4;
1095 x00005EF4 = a6 & x4FE55EF4;
1096 x00FF5EFF = a4 | x00005EF4;
1097 x00555455 = a1 & x00FF5EFF;
1098 x3C699796 = x3C3CC3C3 ^ x00555455;
1099 x30 = x4FE55EF4 & ~a5;
1100 x31 = x30 ^ x3C699796;
1103 x000FF000 = x0F0FF0F0 & x00FFFF00;
1104 x55AA55AA = a1 ^ a4;
1105 x26D9A15E = x7373F4F4 ^ x55AA55AA;
1106 x2FDFAF5F = a3 | x26D9A15E;
1107 x2FD00F5F = x2FDFAF5F & ~x000FF000;
1109 x55AAFFAA = x00AAAA00 | x55AA55AA;
1110 x28410014 = x3C699796 & ~x55AAFFAA;
1111 x000000FF = a4 & a6;
1112 x000000CC = x000000FF & ~a2;
1113 x284100D8 = x28410014 ^ x000000CC;
1115 x204100D0 = x7373F4F4 & x284100D8;
1116 x3C3CC3FF = x3C3CC3C3 | x000000FF;
1117 x1C3CC32F = x3C3CC3FF & ~x204100D0;
1118 x4969967A = a1 ^ x1C3CC32F;
1119 x10 = x2FD00F5F & a5;
1120 x11 = x10 ^ x4969967A;
1123 x4CC44CC4 = x4FE55EF4 & ~a2;
1124 x40C040C0 = x4CC44CC4 & ~a3;
1125 xC3C33C3C = ~x3C3CC3C3;
1126 x9669C396 = x55AAFFAA ^ xC3C33C3C;
1127 xD6A98356 = x40C040C0 ^ x9669C396;
1128 x00 = a5 & ~x0C840A00;
1129 x01 = x00 ^ xD6A98356;
1132 xD6E9C3D6 = x40C040C0 | x9669C396;
1133 x4CEEEEC4 = x00AAAA00 | x4CC44CC4;
1134 x9A072D12 = xD6E9C3D6 ^ x4CEEEEC4;
1135 x001A000B = a4 & ~x4FE55EF4;
1136 x9A1F2D1B = x9A072D12 | x001A000B;
1137 x20 = a5 & ~x284100D8;
1138 x21 = x20 ^ x9A1F2D1B;
1142 void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
1144 u32 x5A5A5A5A, x0F0FF0F0;
1145 u32 x33FF33FF, x33FFCC00, x0C0030F0, x0C0CC0C0, x0CF3C03F, x5EFBDA7F,
1146 x52FBCA0F, x61C8F93C;
1147 u32 x00C0C03C, x0F0F30C0, x3B92A366, x30908326, x3C90B3D6;
1148 u32 x33CC33CC, x0C0CFFFF, x379E5C99, x04124C11, x56E9861E, xA91679E1;
1149 u32 x9586CA37, x8402C833, x84C2C83F, xB35C94A6;
1150 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1152 x5A5A5A5A = a1 ^ a3;
1153 x0F0FF0F0 = a3 ^ a5;
1154 x33FF33FF = a2 | a4;
1155 x33FFCC00 = a5 ^ x33FF33FF;
1156 x0C0030F0 = x0F0FF0F0 & ~x33FFCC00;
1157 x0C0CC0C0 = x0F0FF0F0 & ~a2;
1158 x0CF3C03F = a4 ^ x0C0CC0C0;
1159 x5EFBDA7F = x5A5A5A5A | x0CF3C03F;
1160 x52FBCA0F = x5EFBDA7F & ~x0C0030F0;
1161 x61C8F93C = a2 ^ x52FBCA0F;
1163 x00C0C03C = x0CF3C03F & x61C8F93C;
1164 x0F0F30C0 = x0F0FF0F0 & ~x00C0C03C;
1165 x3B92A366 = x5A5A5A5A ^ x61C8F93C;
1166 x30908326 = x3B92A366 & ~x0F0F30C0;
1167 x3C90B3D6 = x0C0030F0 ^ x30908326;
1169 x33CC33CC = a2 ^ a4;
1170 x0C0CFFFF = a5 | x0C0CC0C0;
1171 x379E5C99 = x3B92A366 ^ x0C0CFFFF;
1172 x04124C11 = x379E5C99 & ~x33CC33CC;
1173 x56E9861E = x52FBCA0F ^ x04124C11;
1174 x00 = a6 & ~x3C90B3D6;
1175 x01 = x00 ^ x56E9861E;
1178 xA91679E1 = ~x56E9861E;
1179 x10 = x3C90B3D6 & ~a6;
1180 x11 = x10 ^ xA91679E1;
1183 x9586CA37 = x3C90B3D6 ^ xA91679E1;
1184 x8402C833 = x9586CA37 & ~x33CC33CC;
1185 x84C2C83F = x00C0C03C | x8402C833;
1186 xB35C94A6 = x379E5C99 ^ x84C2C83F;
1187 x20 = x61C8F93C | a6;
1188 x21 = x20 ^ xB35C94A6;
1191 x30 = a6 & x61C8F93C;
1192 x31 = x30 ^ xB35C94A6;
1196 void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
1198 u32 x77777777, x77770000, x22225555, x11116666, x1F1F6F6F;
1199 u32 x70700000, x43433333, x00430033, x55557777, x55167744, x5A19784B;
1200 u32 x5A1987B4, x7A3BD7F5, x003B00F5, x221955A0, x05050707, x271C52A7;
1201 u32 x2A2A82A0, x6969B193, x1FE06F90, x16804E00, xE97FB1FF;
1202 u32 x43403302, x35CAED30, x37DEFFB7, x349ECCB5, x0B01234A;
1203 u32 x101884B4, x0FF8EB24, x41413333, x4FF9FB37, x4FC2FBC2;
1204 u32 x22222222, x16BCEE97, x0F080B04, x19B4E593;
1205 u32 x5C5C5C5C, x4448184C, x2DDABE71, x6992A63D;
1206 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1208 x77777777 = a1 | a3;
1209 x77770000 = x77777777 & ~a6;
1210 x22225555 = a1 ^ x77770000;
1211 x11116666 = a3 ^ x22225555;
1212 x1F1F6F6F = a4 | x11116666;
1214 x70700000 = x77770000 & ~a4;
1215 x43433333 = a3 ^ x70700000;
1216 x00430033 = a5 & x43433333;
1217 x55557777 = a1 | x11116666;
1218 x55167744 = x00430033 ^ x55557777;
1219 x5A19784B = a4 ^ x55167744;
1221 x5A1987B4 = a6 ^ x5A19784B;
1222 x7A3BD7F5 = x22225555 | x5A1987B4;
1223 x003B00F5 = a5 & x7A3BD7F5;
1224 x221955A0 = x22225555 ^ x003B00F5;
1225 x05050707 = a4 & x55557777;
1226 x271C52A7 = x221955A0 ^ x05050707;
1228 x2A2A82A0 = x7A3BD7F5 & ~a1;
1229 x6969B193 = x43433333 ^ x2A2A82A0;
1230 x1FE06F90 = a5 ^ x1F1F6F6F;
1231 x16804E00 = x1FE06F90 & ~x6969B193;
1232 xE97FB1FF = ~x16804E00;
1233 x20 = xE97FB1FF & ~a2;
1234 x21 = x20 ^ x5A19784B;
1237 x43403302 = x43433333 & ~x003B00F5;
1238 x35CAED30 = x2A2A82A0 ^ x1FE06F90;
1239 x37DEFFB7 = x271C52A7 | x35CAED30;
1240 x349ECCB5 = x37DEFFB7 & ~x43403302;
1241 x0B01234A = x1F1F6F6F & ~x349ECCB5;
1243 x101884B4 = x5A1987B4 & x349ECCB5;
1244 x0FF8EB24 = x1FE06F90 ^ x101884B4;
1245 x41413333 = x43433333 & x55557777;
1246 x4FF9FB37 = x0FF8EB24 | x41413333;
1247 x4FC2FBC2 = x003B00F5 ^ x4FF9FB37;
1248 x30 = x4FC2FBC2 & a2;
1249 x31 = x30 ^ x271C52A7;
1252 x22222222 = a1 ^ x77777777;
1253 x16BCEE97 = x349ECCB5 ^ x22222222;
1254 x0F080B04 = a4 & x0FF8EB24;
1255 x19B4E593 = x16BCEE97 ^ x0F080B04;
1256 x00 = x0B01234A | a2;
1257 x01 = x00 ^ x19B4E593;
1260 x5C5C5C5C = x1F1F6F6F ^ x43433333;
1261 x4448184C = x5C5C5C5C & ~x19B4E593;
1262 x2DDABE71 = x22225555 ^ x0FF8EB24;
1263 x6992A63D = x4448184C ^ x2DDABE71;
1264 x10 = x1F1F6F6F & a2;
1265 x11 = x10 ^ x6992A63D;
1269 void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
1272 u32 x3333FFFF, x11115555, x22DD6699, x22DD9966, x00220099;
1273 u32 x00551144, x33662277, x5A5A5A5A, x7B7E7A7F, x59A31CE6;
1274 u32 x09030C06, x09030000, x336622FF, x3A6522FF;
1275 u32 x484D494C, x0000B6B3, x0F0FB9BC, x00FC00F9, x0FFFB9FD;
1276 u32 x5DF75DF7, x116600F7, x1E69B94B, x1668B94B;
1277 u32 x7B7B7B7B, x411E5984, x1FFFFDFD, x5EE1A479;
1278 u32 x3CB4DFD2, x004B002D, xB7B2B6B3, xCCC9CDC8, xCC82CDE5;
1279 u32 x0055EEBB, x5A5AECE9, x0050ECA9, xC5CAC1CE, xC59A2D67;
1280 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1282 x33CC33CC = a2 ^ a5;
1284 x3333FFFF = a2 | a6;
1285 x11115555 = a1 & x3333FFFF;
1286 x22DD6699 = x33CC33CC ^ x11115555;
1287 x22DD9966 = a6 ^ x22DD6699;
1288 x00220099 = a5 & ~x22DD9966;
1290 x00551144 = a1 & x22DD9966;
1291 x33662277 = a2 ^ x00551144;
1292 x5A5A5A5A = a1 ^ a3;
1293 x7B7E7A7F = x33662277 | x5A5A5A5A;
1294 x59A31CE6 = x22DD6699 ^ x7B7E7A7F;
1296 x09030C06 = a3 & x59A31CE6;
1297 x09030000 = x09030C06 & ~a6;
1298 x336622FF = x00220099 | x33662277;
1299 x3A6522FF = x09030000 ^ x336622FF;
1300 x30 = x3A6522FF & a4;
1301 x31 = x30 ^ x59A31CE6;
1304 x484D494C = a2 ^ x7B7E7A7F;
1305 x0000B6B3 = a6 & ~x484D494C;
1306 x0F0FB9BC = a3 ^ x0000B6B3;
1307 x00FC00F9 = a5 & ~x09030C06;
1308 x0FFFB9FD = x0F0FB9BC | x00FC00F9;
1310 x5DF75DF7 = a1 | x59A31CE6;
1311 x116600F7 = x336622FF & x5DF75DF7;
1312 x1E69B94B = x0F0FB9BC ^ x116600F7;
1313 x1668B94B = x1E69B94B & ~x09030000;
1314 x20 = x00220099 | a4;
1315 x21 = x20 ^ x1668B94B;
1318 x7B7B7B7B = a2 | x5A5A5A5A;
1319 x411E5984 = x3A6522FF ^ x7B7B7B7B;
1320 x1FFFFDFD = x11115555 | x0FFFB9FD;
1321 x5EE1A479 = x411E5984 ^ x1FFFFDFD;
1323 x3CB4DFD2 = x22DD6699 ^ x1E69B94B;
1324 x004B002D = a5 & ~x3CB4DFD2;
1325 xB7B2B6B3 = ~x484D494C;
1326 xCCC9CDC8 = x7B7B7B7B ^ xB7B2B6B3;
1327 xCC82CDE5 = x004B002D ^ xCCC9CDC8;
1328 x10 = xCC82CDE5 & ~a4;
1329 x11 = x10 ^ x5EE1A479;
1332 x0055EEBB = a6 ^ x00551144;
1333 x5A5AECE9 = a1 ^ x0F0FB9BC;
1334 x0050ECA9 = x0055EEBB & x5A5AECE9;
1335 xC5CAC1CE = x09030C06 ^ xCCC9CDC8;
1336 xC59A2D67 = x0050ECA9 ^ xC5CAC1CE;
1337 x00 = x0FFFB9FD & ~a4;
1338 x01 = x00 ^ xC59A2D67;
1342 void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
1344 u32 x0FF00FF0, x3CC33CC3, x00003CC3, x0F000F00, x5A555A55, x00001841;
1345 u32 x00000F00, x33333C33, x7B777E77, x0FF0F00F, x74878E78;
1346 u32 x003C003C, x5A7D5A7D, x333300F0, x694E5A8D;
1347 u32 x0FF0CCCC, x000F0303, x5A505854, x33CC000F, x699C585B;
1348 u32 x7F878F78, x21101013, x7F979F7B, x30030CC0, x4F9493BB;
1349 u32 x6F9CDBFB, x0000DBFB, x00005151, x26DAC936, x26DA9867;
1350 u32 x27DA9877, x27DA438C, x2625C9C9, x27FFCBCD;
1351 u32 x27FF1036, x27FF103E, xB06B6C44, x97947C7A;
1352 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1354 x0FF00FF0 = a4 ^ a5;
1355 x3CC33CC3 = a3 ^ x0FF00FF0;
1356 x00003CC3 = a6 & x3CC33CC3;
1357 x0F000F00 = a4 & x0FF00FF0;
1358 x5A555A55 = a2 ^ x0F000F00;
1359 x00001841 = x00003CC3 & x5A555A55;
1361 x00000F00 = a6 & x0F000F00;
1362 x33333C33 = a3 ^ x00000F00;
1363 x7B777E77 = x5A555A55 | x33333C33;
1364 x0FF0F00F = a6 ^ x0FF00FF0;
1365 x74878E78 = x7B777E77 ^ x0FF0F00F;
1366 x30 = a1 & ~x00001841;
1367 x31 = x30 ^ x74878E78;
1370 x003C003C = a5 & ~x3CC33CC3;
1371 x5A7D5A7D = x5A555A55 | x003C003C;
1372 x333300F0 = x00003CC3 ^ x33333C33;
1373 x694E5A8D = x5A7D5A7D ^ x333300F0;
1375 x0FF0CCCC = x00003CC3 ^ x0FF0F00F;
1376 x000F0303 = a4 & ~x0FF0CCCC;
1377 x5A505854 = x5A555A55 & ~x000F0303;
1378 x33CC000F = a5 ^ x333300F0;
1379 x699C585B = x5A505854 ^ x33CC000F;
1381 x7F878F78 = x0F000F00 | x74878E78;
1382 x21101013 = a3 & x699C585B;
1383 x7F979F7B = x7F878F78 | x21101013;
1384 x30030CC0 = x3CC33CC3 & ~x0FF0F00F;
1385 x4F9493BB = x7F979F7B ^ x30030CC0;
1386 x00 = x4F9493BB & ~a1;
1387 x01 = x00 ^ x694E5A8D;
1390 x6F9CDBFB = x699C585B | x4F9493BB;
1391 x0000DBFB = a6 & x6F9CDBFB;
1392 x00005151 = a2 & x0000DBFB;
1393 x26DAC936 = x694E5A8D ^ x4F9493BB;
1394 x26DA9867 = x00005151 ^ x26DAC936;
1396 x27DA9877 = x21101013 | x26DA9867;
1397 x27DA438C = x0000DBFB ^ x27DA9877;
1398 x2625C9C9 = a5 ^ x26DAC936;
1399 x27FFCBCD = x27DA438C | x2625C9C9;
1400 x20 = x27FFCBCD & a1;
1401 x21 = x20 ^ x699C585B;
1404 x27FF1036 = x0000DBFB ^ x27FFCBCD;
1405 x27FF103E = x003C003C | x27FF1036;
1406 xB06B6C44 = ~x4F9493BB;
1407 x97947C7A = x27FF103E ^ xB06B6C44;
1408 x10 = x97947C7A & ~a1;
1409 x11 = x10 ^ x26DA9867;
1413 void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
1415 u32 x0C0C0C0C, x0000F0F0, x00FFF00F, x00555005, x00515001;
1416 u32 x33000330, x77555775, x30303030, x3030CFCF, x30104745, x30555745;
1417 u32 xFF000FF0, xCF1048B5, x080A080A, xC71A40BF, xCB164CB3;
1418 u32 x9E4319E6, x000019E6, xF429738C, xF4296A6A, xC729695A;
1419 u32 xC47C3D2F, xF77F3F3F, x9E43E619, x693CD926;
1420 u32 xF719A695, xF4FF73FF, x03E6D56A, x56B3803F;
1421 u32 xF700A600, x61008000, x03B7856B, x62B7056B;
1422 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1424 x0C0C0C0C = a3 & ~a2;
1425 x0000F0F0 = a5 & ~a3;
1426 x00FFF00F = a4 ^ x0000F0F0;
1427 x00555005 = a1 & x00FFF00F;
1428 x00515001 = x00555005 & ~x0C0C0C0C;
1430 x33000330 = a2 & ~x00FFF00F;
1431 x77555775 = a1 | x33000330;
1432 x30303030 = a2 & ~a3;
1433 x3030CFCF = a5 ^ x30303030;
1434 x30104745 = x77555775 & x3030CFCF;
1435 x30555745 = x00555005 | x30104745;
1437 xFF000FF0 = ~x00FFF00F;
1438 xCF1048B5 = x30104745 ^ xFF000FF0;
1439 x080A080A = a3 & ~x77555775;
1440 xC71A40BF = xCF1048B5 ^ x080A080A;
1441 xCB164CB3 = x0C0C0C0C ^ xC71A40BF;
1442 x10 = x00515001 | a6;
1443 x11 = x10 ^ xCB164CB3;
1446 x9E4319E6 = a1 ^ xCB164CB3;
1447 x000019E6 = a5 & x9E4319E6;
1448 xF429738C = a2 ^ xC71A40BF;
1449 xF4296A6A = x000019E6 ^ xF429738C;
1450 xC729695A = x33000330 ^ xF4296A6A;
1452 xC47C3D2F = x30555745 ^ xF4296A6A;
1453 xF77F3F3F = a2 | xC47C3D2F;
1454 x9E43E619 = a5 ^ x9E4319E6;
1455 x693CD926 = xF77F3F3F ^ x9E43E619;
1456 x20 = x30555745 & a6;
1457 x21 = x20 ^ x693CD926;
1460 xF719A695 = x3030CFCF ^ xC729695A;
1461 xF4FF73FF = a4 | xF429738C;
1462 x03E6D56A = xF719A695 ^ xF4FF73FF;
1463 x56B3803F = a1 ^ x03E6D56A;
1464 x30 = x56B3803F & a6;
1465 x31 = x30 ^ xC729695A;
1468 xF700A600 = xF719A695 & ~a4;
1469 x61008000 = x693CD926 & xF700A600;
1470 x03B7856B = x00515001 ^ x03E6D56A;
1471 x62B7056B = x61008000 ^ x03B7856B;
1472 x00 = x62B7056B | a6;
1473 x01 = x00 ^ xC729695A;
1479 #define SWAP(a, b) { u32 tmp=*a;*a=*b;*b=tmp; }
1515 #define KEYSET00 { k00 = K08; k01 = K44; k02 = K29; k03 = K52; k04 = K42; k05 = K14; k06 = K28; k07 = K49; k08 = K01; k09 = K07; k10 = K16; k11 = K36; k12 = K02; k13 = K30; k14 = K22; k15 = K21; k16 = K38; k17 = K50; k18 = K51; k19 = K00; k20 = K31; k21 = K23; k22 = K15; k23 = K35; k24 = K19; k25 = K24; k26 = K34; k27 = K47; k28 = K32; k29 = K03; k30 = K41; k31 = K26; k32 = K04; k33 = K46; k34 = K20; k35 = K25; k36 = K53; k37 = K18; k38 = K33; k39 = K55; k40 = K13; k41 = K17; k42 = K39; k43 = K12; k44 = K11; k45 = K54; k46 = K48; k47 = K27; }
1516 #define KEYSET10 { k00 = K49; k01 = K28; k02 = K45; k03 = K36; k04 = K01; k05 = K30; k06 = K44; k07 = K08; k08 = K42; k09 = K23; k10 = K00; k11 = K52; k12 = K43; k13 = K14; k14 = K38; k15 = K37; k16 = K22; k17 = K09; k18 = K35; k19 = K16; k20 = K15; k21 = K07; k22 = K31; k23 = K51; k24 = K03; k25 = K40; k26 = K46; k27 = K04; k28 = K20; k29 = K19; k30 = K53; k31 = K10; k32 = K47; k33 = K34; k34 = K32; k35 = K13; k36 = K41; k37 = K06; k38 = K17; k39 = K12; k40 = K25; k41 = K33; k42 = K27; k43 = K55; k44 = K54; k45 = K11; k46 = K05; k47 = K39; }
1517 #define KEYSET01 { k00 = K01; k01 = K37; k02 = K22; k03 = K45; k04 = K35; k05 = K07; k06 = K21; k07 = K42; k08 = K51; k09 = K00; k10 = K09; k11 = K29; k12 = K52; k13 = K23; k14 = K15; k15 = K14; k16 = K31; k17 = K43; k18 = K44; k19 = K50; k20 = K49; k21 = K16; k22 = K08; k23 = K28; k24 = K12; k25 = K17; k26 = K27; k27 = K40; k28 = K25; k29 = K55; k30 = K34; k31 = K19; k32 = K24; k33 = K39; k34 = K13; k35 = K18; k36 = K46; k37 = K11; k38 = K26; k39 = K48; k40 = K06; k41 = K10; k42 = K32; k43 = K05; k44 = K04; k45 = K47; k46 = K41; k47 = K20; }
1518 #define KEYSET11 { k00 = K35; k01 = K14; k02 = K31; k03 = K22; k04 = K44; k05 = K16; k06 = K30; k07 = K51; k08 = K28; k09 = K09; k10 = K43; k11 = K38; k12 = K29; k13 = K00; k14 = K49; k15 = K23; k16 = K08; k17 = K52; k18 = K21; k19 = K02; k20 = K01; k21 = K50; k22 = K42; k23 = K37; k24 = K48; k25 = K26; k26 = K32; k27 = K17; k28 = K06; k29 = K05; k30 = K39; k31 = K55; k32 = K33; k33 = K20; k34 = K18; k35 = K54; k36 = K27; k37 = K47; k38 = K03; k39 = K53; k40 = K11; k41 = K19; k42 = K13; k43 = K41; k44 = K40; k45 = K24; k46 = K46; k47 = K25; }
1519 #define KEYSET02 { k00 = K44; k01 = K23; k02 = K08; k03 = K31; k04 = K21; k05 = K50; k06 = K07; k07 = K28; k08 = K37; k09 = K43; k10 = K52; k11 = K15; k12 = K38; k13 = K09; k14 = K01; k15 = K00; k16 = K42; k17 = K29; k18 = K30; k19 = K36; k20 = K35; k21 = K02; k22 = K51; k23 = K14; k24 = K53; k25 = K03; k26 = K13; k27 = K26; k28 = K11; k29 = K41; k30 = K20; k31 = K05; k32 = K10; k33 = K25; k34 = K54; k35 = K04; k36 = K32; k37 = K24; k38 = K12; k39 = K34; k40 = K47; k41 = K55; k42 = K18; k43 = K46; k44 = K17; k45 = K33; k46 = K27; k47 = K06; }
1520 #define KEYSET12 { k00 = K21; k01 = K00; k02 = K42; k03 = K08; k04 = K30; k05 = K02; k06 = K16; k07 = K37; k08 = K14; k09 = K52; k10 = K29; k11 = K49; k12 = K15; k13 = K43; k14 = K35; k15 = K09; k16 = K51; k17 = K38; k18 = K07; k19 = K45; k20 = K44; k21 = K36; k22 = K28; k23 = K23; k24 = K34; k25 = K12; k26 = K18; k27 = K03; k28 = K47; k29 = K46; k30 = K25; k31 = K41; k32 = K19; k33 = K06; k34 = K04; k35 = K40; k36 = K13; k37 = K33; k38 = K48; k39 = K39; k40 = K24; k41 = K05; k42 = K54; k43 = K27; k44 = K26; k45 = K10; k46 = K32; k47 = K11; }
1521 #define KEYSET03 { k00 = K30; k01 = K09; k02 = K51; k03 = K42; k04 = K07; k05 = K36; k06 = K50; k07 = K14; k08 = K23; k09 = K29; k10 = K38; k11 = K01; k12 = K49; k13 = K52; k14 = K44; k15 = K43; k16 = K28; k17 = K15; k18 = K16; k19 = K22; k20 = K21; k21 = K45; k22 = K37; k23 = K00; k24 = K39; k25 = K48; k26 = K54; k27 = K12; k28 = K24; k29 = K27; k30 = K06; k31 = K46; k32 = K55; k33 = K11; k34 = K40; k35 = K17; k36 = K18; k37 = K10; k38 = K53; k39 = K20; k40 = K33; k41 = K41; k42 = K04; k43 = K32; k44 = K03; k45 = K19; k46 = K13; k47 = K47; }
1522 #define KEYSET13 { k00 = K07; k01 = K43; k02 = K28; k03 = K51; k04 = K16; k05 = K45; k06 = K02; k07 = K23; k08 = K00; k09 = K38; k10 = K15; k11 = K35; k12 = K01; k13 = K29; k14 = K21; k15 = K52; k16 = K37; k17 = K49; k18 = K50; k19 = K31; k20 = K30; k21 = K22; k22 = K14; k23 = K09; k24 = K20; k25 = K53; k26 = K04; k27 = K48; k28 = K33; k29 = K32; k30 = K11; k31 = K27; k32 = K05; k33 = K47; k34 = K17; k35 = K26; k36 = K54; k37 = K19; k38 = K34; k39 = K25; k40 = K10; k41 = K46; k42 = K40; k43 = K13; k44 = K12; k45 = K55; k46 = K18; k47 = K24; }
1523 #define KEYSET04 { k00 = K16; k01 = K52; k02 = K37; k03 = K28; k04 = K50; k05 = K22; k06 = K36; k07 = K00; k08 = K09; k09 = K15; k10 = K49; k11 = K44; k12 = K35; k13 = K38; k14 = K30; k15 = K29; k16 = K14; k17 = K01; k18 = K02; k19 = K08; k20 = K07; k21 = K31; k22 = K23; k23 = K43; k24 = K25; k25 = K34; k26 = K40; k27 = K53; k28 = K10; k29 = K13; k30 = K47; k31 = K32; k32 = K41; k33 = K24; k34 = K26; k35 = K03; k36 = K04; k37 = K55; k38 = K39; k39 = K06; k40 = K19; k41 = K27; k42 = K17; k43 = K18; k44 = K48; k45 = K05; k46 = K54; k47 = K33; }
1524 #define KEYSET14 { k00 = K50; k01 = K29; k02 = K14; k03 = K37; k04 = K02; k05 = K31; k06 = K45; k07 = K09; k08 = K43; k09 = K49; k10 = K01; k11 = K21; k12 = K44; k13 = K15; k14 = K07; k15 = K38; k16 = K23; k17 = K35; k18 = K36; k19 = K42; k20 = K16; k21 = K08; k22 = K00; k23 = K52; k24 = K06; k25 = K39; k26 = K17; k27 = K34; k28 = K19; k29 = K18; k30 = K24; k31 = K13; k32 = K46; k33 = K33; k34 = K03; k35 = K12; k36 = K40; k37 = K05; k38 = K20; k39 = K11; k40 = K55; k41 = K32; k42 = K26; k43 = K54; k44 = K53; k45 = K41; k46 = K04; k47 = K10; }
1525 #define KEYSET05 { k00 = K02; k01 = K38; k02 = K23; k03 = K14; k04 = K36; k05 = K08; k06 = K22; k07 = K43; k08 = K52; k09 = K01; k10 = K35; k11 = K30; k12 = K21; k13 = K49; k14 = K16; k15 = K15; k16 = K00; k17 = K44; k18 = K45; k19 = K51; k20 = K50; k21 = K42; k22 = K09; k23 = K29; k24 = K11; k25 = K20; k26 = K26; k27 = K39; k28 = K55; k29 = K54; k30 = K33; k31 = K18; k32 = K27; k33 = K10; k34 = K12; k35 = K48; k36 = K17; k37 = K41; k38 = K25; k39 = K47; k40 = K05; k41 = K13; k42 = K03; k43 = K04; k44 = K34; k45 = K46; k46 = K40; k47 = K19; }
1526 #define KEYSET15 { k00 = K36; k01 = K15; k02 = K00; k03 = K23; k04 = K45; k05 = K42; k06 = K31; k07 = K52; k08 = K29; k09 = K35; k10 = K44; k11 = K07; k12 = K30; k13 = K01; k14 = K50; k15 = K49; k16 = K09; k17 = K21; k18 = K22; k19 = K28; k20 = K02; k21 = K51; k22 = K43; k23 = K38; k24 = K47; k25 = K25; k26 = K03; k27 = K20; k28 = K05; k29 = K04; k30 = K10; k31 = K54; k32 = K32; k33 = K19; k34 = K48; k35 = K53; k36 = K26; k37 = K46; k38 = K06; k39 = K24; k40 = K41; k41 = K18; k42 = K12; k43 = K40; k44 = K39; k45 = K27; k46 = K17; k47 = K55; }
1527 #define KEYSET06 { k00 = K45; k01 = K49; k02 = K09; k03 = K00; k04 = K22; k05 = K51; k06 = K08; k07 = K29; k08 = K38; k09 = K44; k10 = K21; k11 = K16; k12 = K07; k13 = K35; k14 = K02; k15 = K01; k16 = K43; k17 = K30; k18 = K31; k19 = K37; k20 = K36; k21 = K28; k22 = K52; k23 = K15; k24 = K24; k25 = K06; k26 = K12; k27 = K25; k28 = K41; k29 = K40; k30 = K19; k31 = K04; k32 = K13; k33 = K55; k34 = K53; k35 = K34; k36 = K03; k37 = K27; k38 = K11; k39 = K33; k40 = K46; k41 = K54; k42 = K48; k43 = K17; k44 = K20; k45 = K32; k46 = K26; k47 = K05; }
1528 #define KEYSET16 { k00 = K22; k01 = K01; k02 = K43; k03 = K09; k04 = K31; k05 = K28; k06 = K42; k07 = K38; k08 = K15; k09 = K21; k10 = K30; k11 = K50; k12 = K16; k13 = K44; k14 = K36; k15 = K35; k16 = K52; k17 = K07; k18 = K08; k19 = K14; k20 = K45; k21 = K37; k22 = K29; k23 = K49; k24 = K33; k25 = K11; k26 = K48; k27 = K06; k28 = K46; k29 = K17; k30 = K55; k31 = K40; k32 = K18; k33 = K05; k34 = K34; k35 = K39; k36 = K12; k37 = K32; k38 = K47; k39 = K10; k40 = K27; k41 = K04; k42 = K53; k43 = K26; k44 = K25; k45 = K13; k46 = K03; k47 = K41; }
1529 #define KEYSET07 { k00 = K31; k01 = K35; k02 = K52; k03 = K43; k04 = K08; k05 = K37; k06 = K51; k07 = K15; k08 = K49; k09 = K30; k10 = K07; k11 = K02; k12 = K50; k13 = K21; k14 = K45; k15 = K44; k16 = K29; k17 = K16; k18 = K42; k19 = K23; k20 = K22; k21 = K14; k22 = K38; k23 = K01; k24 = K10; k25 = K47; k26 = K53; k27 = K11; k28 = K27; k29 = K26; k30 = K05; k31 = K17; k32 = K54; k33 = K41; k34 = K39; k35 = K20; k36 = K48; k37 = K13; k38 = K24; k39 = K19; k40 = K32; k41 = K40; k42 = K34; k43 = K03; k44 = K06; k45 = K18; k46 = K12; k47 = K46; }
1530 #define KEYSET17 { k00 = K15; k01 = K51; k02 = K36; k03 = K02; k04 = K49; k05 = K21; k06 = K35; k07 = K31; k08 = K08; k09 = K14; k10 = K23; k11 = K43; k12 = K09; k13 = K37; k14 = K29; k15 = K28; k16 = K45; k17 = K00; k18 = K01; k19 = K07; k20 = K38; k21 = K30; k22 = K22; k23 = K42; k24 = K26; k25 = K04; k26 = K41; k27 = K54; k28 = K39; k29 = K10; k30 = K48; k31 = K33; k32 = K11; k33 = K53; k34 = K27; k35 = K32; k36 = K05; k37 = K25; k38 = K40; k39 = K03; k40 = K20; k41 = K24; k42 = K46; k43 = K19; k44 = K18; k45 = K06; k46 = K55; k47 = K34; }
1532 void DES (const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 *D00, u32 *D01, u32 *D02, u32 *D03, u32 *D04, u32 *D05, u32 *D06, u32 *D07, u32 *D08, u32 *D09, u32 *D10, u32 *D11, u32 *D12, u32 *D13, u32 *D14, u32 *D15, u32 *D16, u32 *D17, u32 *D18, u32 *D19, u32 *D20, u32 *D21, u32 *D22, u32 *D23, u32 *D24, u32 *D25, u32 *D26, u32 *D27, u32 *D28, u32 *D29, u32 *D30, u32 *D31, u32 *D32, u32 *D33, u32 *D34, u32 *D35, u32 *D36, u32 *D37, u32 *D38, u32 *D39, u32 *D40, u32 *D41, u32 *D42, u32 *D43, u32 *D44, u32 *D45, u32 *D46, u32 *D47, u32 *D48, u32 *D49, u32 *D50, u32 *D51, u32 *D52, u32 *D53, u32 *D54, u32 *D55, u32 *D56, u32 *D57, u32 *D58, u32 *D59, u32 *D60, u32 *D61, u32 *D62, u32 *D63)
1534 KXX_DECL u32 k00, k01, k02, k03, k04, k05;
1535 KXX_DECL u32 k06, k07, k08, k09, k10, k11;
1536 KXX_DECL u32 k12, k13, k14, k15, k16, k17;
1537 KXX_DECL u32 k18, k19, k20, k21, k22, k23;
1538 KXX_DECL u32 k24, k25, k26, k27, k28, k29;
1539 KXX_DECL u32 k30, k31, k32, k33, k34, k35;
1540 KXX_DECL u32 k36, k37, k38, k39, k40, k41;
1541 KXX_DECL u32 k42, k43, k44, k45, k46, k47;
1546 for (u32 i = 0; i < 2; i++)
1548 if (i) KEYSET10 else KEYSET00
1550 s1(*D63 ^ k00, *D32 ^ k01, *D33 ^ k02, *D34 ^ k03, *D35 ^ k04, *D36 ^ k05, D08, D16, D22, D30);
1551 s2(*D35 ^ k06, *D36 ^ k07, *D37 ^ k08, *D38 ^ k09, *D39 ^ k10, *D40 ^ k11, D12, D27, D01, D17);
1552 s3(*D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1553 s4(*D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1554 s5(*D47 ^ k24, *D48 ^ k25, *D49 ^ k26, *D50 ^ k27, *D51 ^ k28, *D52 ^ k29, D07, D13, D24, D02);
1555 s6(*D51 ^ k30, *D52 ^ k31, *D53 ^ k32, *D54 ^ k33, *D55 ^ k34, *D56 ^ k35, D03, D28, D10, D18);
1556 s7(*D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1557 s8(*D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1559 if (i) KEYSET11 else KEYSET01
1561 s1(*D31 ^ k00, *D00 ^ k01, *D01 ^ k02, *D02 ^ k03, *D03 ^ k04, *D04 ^ k05, D40, D48, D54, D62);
1562 s2(*D03 ^ k06, *D04 ^ k07, *D05 ^ k08, *D06 ^ k09, *D07 ^ k10, *D08 ^ k11, D44, D59, D33, D49);
1563 s3(*D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1564 s4(*D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1565 s5(*D15 ^ k24, *D16 ^ k25, *D17 ^ k26, *D18 ^ k27, *D19 ^ k28, *D20 ^ k29, D39, D45, D56, D34);
1566 s6(*D19 ^ k30, *D20 ^ k31, *D21 ^ k32, *D22 ^ k33, *D23 ^ k34, *D24 ^ k35, D35, D60, D42, D50);
1567 s7(*D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1568 s8(*D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1570 if (i) KEYSET12 else KEYSET02
1572 s1(*D63 ^ k00, *D32 ^ k01, *D33 ^ k02, *D34 ^ k03, *D35 ^ k04, *D36 ^ k05, D08, D16, D22, D30);
1573 s2(*D35 ^ k06, *D36 ^ k07, *D37 ^ k08, *D38 ^ k09, *D39 ^ k10, *D40 ^ k11, D12, D27, D01, D17);
1574 s3(*D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1575 s4(*D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1576 s5(*D47 ^ k24, *D48 ^ k25, *D49 ^ k26, *D50 ^ k27, *D51 ^ k28, *D52 ^ k29, D07, D13, D24, D02);
1577 s6(*D51 ^ k30, *D52 ^ k31, *D53 ^ k32, *D54 ^ k33, *D55 ^ k34, *D56 ^ k35, D03, D28, D10, D18);
1578 s7(*D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1579 s8(*D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1581 if (i) KEYSET13 else KEYSET03
1583 s1(*D31 ^ k00, *D00 ^ k01, *D01 ^ k02, *D02 ^ k03, *D03 ^ k04, *D04 ^ k05, D40, D48, D54, D62);
1584 s2(*D03 ^ k06, *D04 ^ k07, *D05 ^ k08, *D06 ^ k09, *D07 ^ k10, *D08 ^ k11, D44, D59, D33, D49);
1585 s3(*D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1586 s4(*D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1587 s5(*D15 ^ k24, *D16 ^ k25, *D17 ^ k26, *D18 ^ k27, *D19 ^ k28, *D20 ^ k29, D39, D45, D56, D34);
1588 s6(*D19 ^ k30, *D20 ^ k31, *D21 ^ k32, *D22 ^ k33, *D23 ^ k34, *D24 ^ k35, D35, D60, D42, D50);
1589 s7(*D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1590 s8(*D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1592 if (i) KEYSET14 else KEYSET04
1594 s1(*D63 ^ k00, *D32 ^ k01, *D33 ^ k02, *D34 ^ k03, *D35 ^ k04, *D36 ^ k05, D08, D16, D22, D30);
1595 s2(*D35 ^ k06, *D36 ^ k07, *D37 ^ k08, *D38 ^ k09, *D39 ^ k10, *D40 ^ k11, D12, D27, D01, D17);
1596 s3(*D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1597 s4(*D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1598 s5(*D47 ^ k24, *D48 ^ k25, *D49 ^ k26, *D50 ^ k27, *D51 ^ k28, *D52 ^ k29, D07, D13, D24, D02);
1599 s6(*D51 ^ k30, *D52 ^ k31, *D53 ^ k32, *D54 ^ k33, *D55 ^ k34, *D56 ^ k35, D03, D28, D10, D18);
1600 s7(*D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1601 s8(*D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1603 if (i) KEYSET15 else KEYSET05
1605 s1(*D31 ^ k00, *D00 ^ k01, *D01 ^ k02, *D02 ^ k03, *D03 ^ k04, *D04 ^ k05, D40, D48, D54, D62);
1606 s2(*D03 ^ k06, *D04 ^ k07, *D05 ^ k08, *D06 ^ k09, *D07 ^ k10, *D08 ^ k11, D44, D59, D33, D49);
1607 s3(*D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1608 s4(*D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1609 s5(*D15 ^ k24, *D16 ^ k25, *D17 ^ k26, *D18 ^ k27, *D19 ^ k28, *D20 ^ k29, D39, D45, D56, D34);
1610 s6(*D19 ^ k30, *D20 ^ k31, *D21 ^ k32, *D22 ^ k33, *D23 ^ k34, *D24 ^ k35, D35, D60, D42, D50);
1611 s7(*D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1612 s8(*D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1614 if (i) KEYSET16 else KEYSET06
1616 s1(*D63 ^ k00, *D32 ^ k01, *D33 ^ k02, *D34 ^ k03, *D35 ^ k04, *D36 ^ k05, D08, D16, D22, D30);
1617 s2(*D35 ^ k06, *D36 ^ k07, *D37 ^ k08, *D38 ^ k09, *D39 ^ k10, *D40 ^ k11, D12, D27, D01, D17);
1618 s3(*D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1619 s4(*D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1620 s5(*D47 ^ k24, *D48 ^ k25, *D49 ^ k26, *D50 ^ k27, *D51 ^ k28, *D52 ^ k29, D07, D13, D24, D02);
1621 s6(*D51 ^ k30, *D52 ^ k31, *D53 ^ k32, *D54 ^ k33, *D55 ^ k34, *D56 ^ k35, D03, D28, D10, D18);
1622 s7(*D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1623 s8(*D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1625 if (i) KEYSET17 else KEYSET07
1627 s1(*D31 ^ k00, *D00 ^ k01, *D01 ^ k02, *D02 ^ k03, *D03 ^ k04, *D04 ^ k05, D40, D48, D54, D62);
1628 s2(*D03 ^ k06, *D04 ^ k07, *D05 ^ k08, *D06 ^ k09, *D07 ^ k10, *D08 ^ k11, D44, D59, D33, D49);
1629 s3(*D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1630 s4(*D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1631 s5(*D15 ^ k24, *D16 ^ k25, *D17 ^ k26, *D18 ^ k27, *D19 ^ k28, *D20 ^ k29, D39, D45, D56, D34);
1632 s6(*D19 ^ k30, *D20 ^ k31, *D21 ^ k32, *D22 ^ k33, *D23 ^ k34, *D24 ^ k35, D35, D60, D42, D50);
1633 s7(*D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1634 s8(*D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1638 void transpose32c (u32 data[32])
1640 #define swap(x,y,j,m) \
1641 t = ((x) ^ ((y) >> (j))) & (m); \
1643 (y) = (y) ^ (t << (j));
1647 swap (data[ 0], data[16], 16, 0x0000ffff);
1648 swap (data[ 1], data[17], 16, 0x0000ffff);
1649 swap (data[ 2], data[18], 16, 0x0000ffff);
1650 swap (data[ 3], data[19], 16, 0x0000ffff);
1651 swap (data[ 4], data[20], 16, 0x0000ffff);
1652 swap (data[ 5], data[21], 16, 0x0000ffff);
1653 swap (data[ 6], data[22], 16, 0x0000ffff);
1654 swap (data[ 7], data[23], 16, 0x0000ffff);
1655 swap (data[ 8], data[24], 16, 0x0000ffff);
1656 swap (data[ 9], data[25], 16, 0x0000ffff);
1657 swap (data[10], data[26], 16, 0x0000ffff);
1658 swap (data[11], data[27], 16, 0x0000ffff);
1659 swap (data[12], data[28], 16, 0x0000ffff);
1660 swap (data[13], data[29], 16, 0x0000ffff);
1661 swap (data[14], data[30], 16, 0x0000ffff);
1662 swap (data[15], data[31], 16, 0x0000ffff);
1663 swap (data[ 0], data[ 8], 8, 0x00ff00ff);
1664 swap (data[ 1], data[ 9], 8, 0x00ff00ff);
1665 swap (data[ 2], data[10], 8, 0x00ff00ff);
1666 swap (data[ 3], data[11], 8, 0x00ff00ff);
1667 swap (data[ 4], data[12], 8, 0x00ff00ff);
1668 swap (data[ 5], data[13], 8, 0x00ff00ff);
1669 swap (data[ 6], data[14], 8, 0x00ff00ff);
1670 swap (data[ 7], data[15], 8, 0x00ff00ff);
1671 swap (data[ 0], data[ 4], 4, 0x0f0f0f0f);
1672 swap (data[ 1], data[ 5], 4, 0x0f0f0f0f);
1673 swap (data[ 2], data[ 6], 4, 0x0f0f0f0f);
1674 swap (data[ 3], data[ 7], 4, 0x0f0f0f0f);
1675 swap (data[ 0], data[ 2], 2, 0x33333333);
1676 swap (data[ 1], data[ 3], 2, 0x33333333);
1677 swap (data[ 0], data[ 1], 1, 0x55555555);
1678 swap (data[ 2], data[ 3], 1, 0x55555555);
1679 swap (data[ 4], data[ 6], 2, 0x33333333);
1680 swap (data[ 5], data[ 7], 2, 0x33333333);
1681 swap (data[ 4], data[ 5], 1, 0x55555555);
1682 swap (data[ 6], data[ 7], 1, 0x55555555);
1683 swap (data[ 8], data[12], 4, 0x0f0f0f0f);
1684 swap (data[ 9], data[13], 4, 0x0f0f0f0f);
1685 swap (data[10], data[14], 4, 0x0f0f0f0f);
1686 swap (data[11], data[15], 4, 0x0f0f0f0f);
1687 swap (data[ 8], data[10], 2, 0x33333333);
1688 swap (data[ 9], data[11], 2, 0x33333333);
1689 swap (data[ 8], data[ 9], 1, 0x55555555);
1690 swap (data[10], data[11], 1, 0x55555555);
1691 swap (data[12], data[14], 2, 0x33333333);
1692 swap (data[13], data[15], 2, 0x33333333);
1693 swap (data[12], data[13], 1, 0x55555555);
1694 swap (data[14], data[15], 1, 0x55555555);
1695 swap (data[16], data[24], 8, 0x00ff00ff);
1696 swap (data[17], data[25], 8, 0x00ff00ff);
1697 swap (data[18], data[26], 8, 0x00ff00ff);
1698 swap (data[19], data[27], 8, 0x00ff00ff);
1699 swap (data[20], data[28], 8, 0x00ff00ff);
1700 swap (data[21], data[29], 8, 0x00ff00ff);
1701 swap (data[22], data[30], 8, 0x00ff00ff);
1702 swap (data[23], data[31], 8, 0x00ff00ff);
1703 swap (data[16], data[20], 4, 0x0f0f0f0f);
1704 swap (data[17], data[21], 4, 0x0f0f0f0f);
1705 swap (data[18], data[22], 4, 0x0f0f0f0f);
1706 swap (data[19], data[23], 4, 0x0f0f0f0f);
1707 swap (data[16], data[18], 2, 0x33333333);
1708 swap (data[17], data[19], 2, 0x33333333);
1709 swap (data[16], data[17], 1, 0x55555555);
1710 swap (data[18], data[19], 1, 0x55555555);
1711 swap (data[20], data[22], 2, 0x33333333);
1712 swap (data[21], data[23], 2, 0x33333333);
1713 swap (data[20], data[21], 1, 0x55555555);
1714 swap (data[22], data[23], 1, 0x55555555);
1715 swap (data[24], data[28], 4, 0x0f0f0f0f);
1716 swap (data[25], data[29], 4, 0x0f0f0f0f);
1717 swap (data[26], data[30], 4, 0x0f0f0f0f);
1718 swap (data[27], data[31], 4, 0x0f0f0f0f);
1719 swap (data[24], data[26], 2, 0x33333333);
1720 swap (data[25], data[27], 2, 0x33333333);
1721 swap (data[24], data[25], 1, 0x55555555);
1722 swap (data[26], data[27], 1, 0x55555555);
1723 swap (data[28], data[30], 2, 0x33333333);
1724 swap (data[29], data[31], 2, 0x33333333);
1725 swap (data[28], data[29], 1, 0x55555555);
1726 swap (data[30], data[31], 1, 0x55555555);
1729 void m03000m (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
1735 const u32 gid = get_global_id (0);
1736 const u32 lid = get_local_id (0);
1742 const u32 w0s = pws[gid].i[0];
1743 const u32 w1s = pws[gid].i[1];
1745 #define K00 (((w0s >> ( 0 + 7)) & 1) ? -1 : 0)
1746 #define K01 (((w0s >> ( 0 + 6)) & 1) ? -1 : 0)
1747 #define K02 (((w0s >> ( 0 + 5)) & 1) ? -1 : 0)
1748 #define K03 (((w0s >> ( 0 + 4)) & 1) ? -1 : 0)
1749 #define K04 (((w0s >> ( 0 + 3)) & 1) ? -1 : 0)
1750 #define K05 (((w0s >> ( 0 + 2)) & 1) ? -1 : 0)
1751 #define K06 (((w0s >> ( 0 + 1)) & 1) ? -1 : 0)
1752 #define K07 (((w0s >> ( 0 + 0)) & 1) ? -1 : 0)
1753 #define K08 (((w0s >> ( 8 + 7)) & 1) ? -1 : 0)
1754 #define K09 (((w0s >> ( 8 + 6)) & 1) ? -1 : 0)
1755 #define K10 (((w0s >> ( 8 + 5)) & 1) ? -1 : 0)
1756 #define K11 (((w0s >> ( 8 + 4)) & 1) ? -1 : 0)
1757 #define K12 (((w0s >> ( 8 + 3)) & 1) ? -1 : 0)
1758 #define K13 (((w0s >> ( 8 + 2)) & 1) ? -1 : 0)
1759 #define K14 (((w0s >> ( 8 + 1)) & 1) ? -1 : 0)
1760 #define K15 (((w0s >> ( 8 + 0)) & 1) ? -1 : 0)
1761 #define K16 (((w0s >> (16 + 7)) & 1) ? -1 : 0)
1762 #define K17 (((w0s >> (16 + 6)) & 1) ? -1 : 0)
1763 #define K18 (((w0s >> (16 + 5)) & 1) ? -1 : 0)
1764 #define K19 (((w0s >> (16 + 4)) & 1) ? -1 : 0)
1765 #define K20 (((w0s >> (16 + 3)) & 1) ? -1 : 0)
1766 #define K21 (((w0s >> (16 + 2)) & 1) ? -1 : 0)
1767 #define K22 (((w0s >> (16 + 1)) & 1) ? -1 : 0)
1768 #define K23 (((w0s >> (16 + 0)) & 1) ? -1 : 0)
1769 #define K24 (((w0s >> (24 + 7)) & 1) ? -1 : 0)
1770 #define K25 (((w0s >> (24 + 6)) & 1) ? -1 : 0)
1771 #define K26 (((w0s >> (24 + 5)) & 1) ? -1 : 0)
1772 #define K27 (((w0s >> (24 + 4)) & 1) ? -1 : 0)
1773 #define K28 (((w0s >> (24 + 3)) & 1) ? -1 : 0)
1774 #define K29 (((w0s >> (24 + 2)) & 1) ? -1 : 0)
1775 #define K30 (((w0s >> (24 + 1)) & 1) ? -1 : 0)
1776 #define K31 (((w0s >> (24 + 0)) & 1) ? -1 : 0)
1777 #define K32 (((w1s >> ( 0 + 7)) & 1) ? -1 : 0)
1778 #define K33 (((w1s >> ( 0 + 6)) & 1) ? -1 : 0)
1779 #define K34 (((w1s >> ( 0 + 5)) & 1) ? -1 : 0)
1780 #define K35 (((w1s >> ( 0 + 4)) & 1) ? -1 : 0)
1781 #define K36 (((w1s >> ( 0 + 3)) & 1) ? -1 : 0)
1782 #define K37 (((w1s >> ( 0 + 2)) & 1) ? -1 : 0)
1783 #define K38 (((w1s >> ( 0 + 1)) & 1) ? -1 : 0)
1784 #define K39 (((w1s >> ( 0 + 0)) & 1) ? -1 : 0)
1785 #define K40 (((w1s >> ( 8 + 7)) & 1) ? -1 : 0)
1786 #define K41 (((w1s >> ( 8 + 6)) & 1) ? -1 : 0)
1787 #define K42 (((w1s >> ( 8 + 5)) & 1) ? -1 : 0)
1788 #define K43 (((w1s >> ( 8 + 4)) & 1) ? -1 : 0)
1789 #define K44 (((w1s >> ( 8 + 3)) & 1) ? -1 : 0)
1790 #define K45 (((w1s >> ( 8 + 2)) & 1) ? -1 : 0)
1791 #define K46 (((w1s >> ( 8 + 1)) & 1) ? -1 : 0)
1792 #define K47 (((w1s >> ( 8 + 0)) & 1) ? -1 : 0)
1793 #define K48 (((w1s >> (16 + 7)) & 1) ? -1 : 0)
1794 #define K49 (((w1s >> (16 + 6)) & 1) ? -1 : 0)
1795 #define K50 (((w1s >> (16 + 5)) & 1) ? -1 : 0)
1796 #define K51 (((w1s >> (16 + 4)) & 1) ? -1 : 0)
1797 #define K52 (((w1s >> (16 + 3)) & 1) ? -1 : 0)
1798 #define K53 (((w1s >> (16 + 2)) & 1) ? -1 : 0)
1799 #define K54 (((w1s >> (16 + 1)) & 1) ? -1 : 0)
1800 #define K55 (((w1s >> (16 + 0)) & 1) ? -1 : 0)
1806 const u32 pc_pos = get_local_id (1);
1808 const u32 il_pos = pc_pos * 32;
1843 k00 |= words_buf_r[pc_pos].b[ 0];
1844 k01 |= words_buf_r[pc_pos].b[ 1];
1845 k02 |= words_buf_r[pc_pos].b[ 2];
1846 k03 |= words_buf_r[pc_pos].b[ 3];
1847 k04 |= words_buf_r[pc_pos].b[ 4];
1848 k05 |= words_buf_r[pc_pos].b[ 5];
1849 k06 |= words_buf_r[pc_pos].b[ 6];
1850 k07 |= words_buf_r[pc_pos].b[ 7];
1851 k08 |= words_buf_r[pc_pos].b[ 8];
1852 k09 |= words_buf_r[pc_pos].b[ 9];
1853 k10 |= words_buf_r[pc_pos].b[10];
1854 k11 |= words_buf_r[pc_pos].b[11];
1855 k12 |= words_buf_r[pc_pos].b[12];
1856 k13 |= words_buf_r[pc_pos].b[13];
1857 k14 |= words_buf_r[pc_pos].b[14];
1858 k15 |= words_buf_r[pc_pos].b[15];
1859 k16 |= words_buf_r[pc_pos].b[16];
1860 k17 |= words_buf_r[pc_pos].b[17];
1861 k18 |= words_buf_r[pc_pos].b[18];
1862 k19 |= words_buf_r[pc_pos].b[19];
1863 k20 |= words_buf_r[pc_pos].b[20];
1864 k21 |= words_buf_r[pc_pos].b[21];
1865 k22 |= words_buf_r[pc_pos].b[22];
1866 k23 |= words_buf_r[pc_pos].b[23];
1867 k24 |= words_buf_r[pc_pos].b[24];
1868 k25 |= words_buf_r[pc_pos].b[25];
1869 k26 |= words_buf_r[pc_pos].b[26];
1870 k27 |= words_buf_r[pc_pos].b[27];
1871 k28 |= words_buf_r[pc_pos].b[28];
1872 k29 |= words_buf_r[pc_pos].b[29];
1873 k30 |= words_buf_r[pc_pos].b[30];
1874 k31 |= words_buf_r[pc_pos].b[31];
1876 // KGS!@#$% including IP
1881 u32 D03 = 0xffffffff;
1883 u32 D05 = 0xffffffff;
1884 u32 D06 = 0xffffffff;
1885 u32 D07 = 0xffffffff;
1891 u32 D13 = 0xffffffff;
1894 u32 D16 = 0xffffffff;
1895 u32 D17 = 0xffffffff;
1900 u32 D22 = 0xffffffff;
1902 u32 D24 = 0xffffffff;
1904 u32 D26 = 0xffffffff;
1906 u32 D28 = 0xffffffff;
1907 u32 D29 = 0xffffffff;
1908 u32 D30 = 0xffffffff;
1909 u32 D31 = 0xffffffff;
1918 u32 D40 = 0xffffffff;
1919 u32 D41 = 0xffffffff;
1920 u32 D42 = 0xffffffff;
1922 u32 D44 = 0xffffffff;
1933 u32 D55 = 0xffffffff;
1936 u32 D58 = 0xffffffff;
1939 u32 D61 = 0xffffffff;
1940 u32 D62 = 0xffffffff;
1941 u32 D63 = 0xffffffff;
1945 k00, k01, k02, k03, k04, k05, k06,
1946 k07, k08, k09, k10, k11, k12, k13,
1947 k14, k15, k16, k17, k18, k19, k20,
1948 k21, k22, k23, k24, k25, k26, k27,
1949 k28, k29, k30, k31, K32, K33, K34,
1950 K35, K36, K37, K38, K39, K40, K41,
1951 K42, K43, K44, K45, K46, K47, K48,
1952 K49, K50, K51, K52, K53, K54, K55,
1953 &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07,
1954 &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15,
1955 &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23,
1956 &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31,
1957 &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39,
1958 &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47,
1959 &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55,
1960 &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63
2030 if (digests_cnt < 16)
2032 for (u32 d = 0; d < digests_cnt; d++)
2034 const u32 final_hash_pos = digests_offset + d;
2036 if (hashes_shown[final_hash_pos]) continue;
2040 search[0] = digests_buf[final_hash_pos].digest_buf[DGST_R0];
2041 search[1] = digests_buf[final_hash_pos].digest_buf[DGST_R1];
2048 for (int i = 0; i < 32; i++)
2050 const u32 b0 = -((search[0] >> i) & 1);
2051 const u32 b1 = -((search[1] >> i) & 1);
2053 tmpResult |= out[ 0 + i] ^ b0;
2054 tmpResult |= out[32 + i] ^ b1;
2057 if (tmpResult == 0xffffffff) continue;
2059 const u32 slice = 31 - clz (~tmpResult);
2061 const u32 r0 = search[0];
2062 const u32 r1 = search[1];
2077 for (int i = 0; i < 32; i++)
2079 out0[i] = out[ 0 + 31 - i];
2080 out1[i] = out[32 + 31 - i];
2083 transpose32c (out0);
2084 transpose32c (out1);
2089 for (int slice = 0; slice < 32; slice++)
2091 const u32 r0 = out0[31 - slice];
2092 const u32 r1 = out1[31 - slice];
2101 void m03000s (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
2107 const u32 gid = get_global_id (0);
2108 const u32 lid = get_local_id (0);
2114 const u32 s0 = digests_buf[0].digest_buf[0];
2115 const u32 s1 = digests_buf[0].digest_buf[1];
2117 #define S00 (((s0 >> 0) & 1) ? -1 : 0)
2118 #define S01 (((s0 >> 1) & 1) ? -1 : 0)
2119 #define S02 (((s0 >> 2) & 1) ? -1 : 0)
2120 #define S03 (((s0 >> 3) & 1) ? -1 : 0)
2121 #define S04 (((s0 >> 4) & 1) ? -1 : 0)
2122 #define S05 (((s0 >> 5) & 1) ? -1 : 0)
2123 #define S06 (((s0 >> 6) & 1) ? -1 : 0)
2124 #define S07 (((s0 >> 7) & 1) ? -1 : 0)
2125 #define S08 (((s0 >> 8) & 1) ? -1 : 0)
2126 #define S09 (((s0 >> 9) & 1) ? -1 : 0)
2127 #define S10 (((s0 >> 10) & 1) ? -1 : 0)
2128 #define S11 (((s0 >> 11) & 1) ? -1 : 0)
2129 #define S12 (((s0 >> 12) & 1) ? -1 : 0)
2130 #define S13 (((s0 >> 13) & 1) ? -1 : 0)
2131 #define S14 (((s0 >> 14) & 1) ? -1 : 0)
2132 #define S15 (((s0 >> 15) & 1) ? -1 : 0)
2133 #define S16 (((s0 >> 16) & 1) ? -1 : 0)
2134 #define S17 (((s0 >> 17) & 1) ? -1 : 0)
2135 #define S18 (((s0 >> 18) & 1) ? -1 : 0)
2136 #define S19 (((s0 >> 19) & 1) ? -1 : 0)
2137 #define S20 (((s0 >> 20) & 1) ? -1 : 0)
2138 #define S21 (((s0 >> 21) & 1) ? -1 : 0)
2139 #define S22 (((s0 >> 22) & 1) ? -1 : 0)
2140 #define S23 (((s0 >> 23) & 1) ? -1 : 0)
2141 #define S24 (((s0 >> 24) & 1) ? -1 : 0)
2142 #define S25 (((s0 >> 25) & 1) ? -1 : 0)
2143 #define S26 (((s0 >> 26) & 1) ? -1 : 0)
2144 #define S27 (((s0 >> 27) & 1) ? -1 : 0)
2145 #define S28 (((s0 >> 28) & 1) ? -1 : 0)
2146 #define S29 (((s0 >> 29) & 1) ? -1 : 0)
2147 #define S30 (((s0 >> 30) & 1) ? -1 : 0)
2148 #define S31 (((s0 >> 31) & 1) ? -1 : 0)
2149 #define S32 (((s1 >> 0) & 1) ? -1 : 0)
2150 #define S33 (((s1 >> 1) & 1) ? -1 : 0)
2151 #define S34 (((s1 >> 2) & 1) ? -1 : 0)
2152 #define S35 (((s1 >> 3) & 1) ? -1 : 0)
2153 #define S36 (((s1 >> 4) & 1) ? -1 : 0)
2154 #define S37 (((s1 >> 5) & 1) ? -1 : 0)
2155 #define S38 (((s1 >> 6) & 1) ? -1 : 0)
2156 #define S39 (((s1 >> 7) & 1) ? -1 : 0)
2157 #define S40 (((s1 >> 8) & 1) ? -1 : 0)
2158 #define S41 (((s1 >> 9) & 1) ? -1 : 0)
2159 #define S42 (((s1 >> 10) & 1) ? -1 : 0)
2160 #define S43 (((s1 >> 11) & 1) ? -1 : 0)
2161 #define S44 (((s1 >> 12) & 1) ? -1 : 0)
2162 #define S45 (((s1 >> 13) & 1) ? -1 : 0)
2163 #define S46 (((s1 >> 14) & 1) ? -1 : 0)
2164 #define S47 (((s1 >> 15) & 1) ? -1 : 0)
2165 #define S48 (((s1 >> 16) & 1) ? -1 : 0)
2166 #define S49 (((s1 >> 17) & 1) ? -1 : 0)
2167 #define S50 (((s1 >> 18) & 1) ? -1 : 0)
2168 #define S51 (((s1 >> 19) & 1) ? -1 : 0)
2169 #define S52 (((s1 >> 20) & 1) ? -1 : 0)
2170 #define S53 (((s1 >> 21) & 1) ? -1 : 0)
2171 #define S54 (((s1 >> 22) & 1) ? -1 : 0)
2172 #define S55 (((s1 >> 23) & 1) ? -1 : 0)
2173 #define S56 (((s1 >> 24) & 1) ? -1 : 0)
2174 #define S57 (((s1 >> 25) & 1) ? -1 : 0)
2175 #define S58 (((s1 >> 26) & 1) ? -1 : 0)
2176 #define S59 (((s1 >> 27) & 1) ? -1 : 0)
2177 #define S60 (((s1 >> 28) & 1) ? -1 : 0)
2178 #define S61 (((s1 >> 29) & 1) ? -1 : 0)
2179 #define S62 (((s1 >> 30) & 1) ? -1 : 0)
2180 #define S63 (((s1 >> 31) & 1) ? -1 : 0)
2186 const u32 w0s = pws[gid].i[0];
2187 const u32 w1s = pws[gid].i[1];
2189 #define K00 (((w0s >> ( 0 + 7)) & 1) ? -1 : 0)
2190 #define K01 (((w0s >> ( 0 + 6)) & 1) ? -1 : 0)
2191 #define K02 (((w0s >> ( 0 + 5)) & 1) ? -1 : 0)
2192 #define K03 (((w0s >> ( 0 + 4)) & 1) ? -1 : 0)
2193 #define K04 (((w0s >> ( 0 + 3)) & 1) ? -1 : 0)
2194 #define K05 (((w0s >> ( 0 + 2)) & 1) ? -1 : 0)
2195 #define K06 (((w0s >> ( 0 + 1)) & 1) ? -1 : 0)
2196 #define K07 (((w0s >> ( 0 + 0)) & 1) ? -1 : 0)
2197 #define K08 (((w0s >> ( 8 + 7)) & 1) ? -1 : 0)
2198 #define K09 (((w0s >> ( 8 + 6)) & 1) ? -1 : 0)
2199 #define K10 (((w0s >> ( 8 + 5)) & 1) ? -1 : 0)
2200 #define K11 (((w0s >> ( 8 + 4)) & 1) ? -1 : 0)
2201 #define K12 (((w0s >> ( 8 + 3)) & 1) ? -1 : 0)
2202 #define K13 (((w0s >> ( 8 + 2)) & 1) ? -1 : 0)
2203 #define K14 (((w0s >> ( 8 + 1)) & 1) ? -1 : 0)
2204 #define K15 (((w0s >> ( 8 + 0)) & 1) ? -1 : 0)
2205 #define K16 (((w0s >> (16 + 7)) & 1) ? -1 : 0)
2206 #define K17 (((w0s >> (16 + 6)) & 1) ? -1 : 0)
2207 #define K18 (((w0s >> (16 + 5)) & 1) ? -1 : 0)
2208 #define K19 (((w0s >> (16 + 4)) & 1) ? -1 : 0)
2209 #define K20 (((w0s >> (16 + 3)) & 1) ? -1 : 0)
2210 #define K21 (((w0s >> (16 + 2)) & 1) ? -1 : 0)
2211 #define K22 (((w0s >> (16 + 1)) & 1) ? -1 : 0)
2212 #define K23 (((w0s >> (16 + 0)) & 1) ? -1 : 0)
2213 #define K24 (((w0s >> (24 + 7)) & 1) ? -1 : 0)
2214 #define K25 (((w0s >> (24 + 6)) & 1) ? -1 : 0)
2215 #define K26 (((w0s >> (24 + 5)) & 1) ? -1 : 0)
2216 #define K27 (((w0s >> (24 + 4)) & 1) ? -1 : 0)
2217 #define K28 (((w0s >> (24 + 3)) & 1) ? -1 : 0)
2218 #define K29 (((w0s >> (24 + 2)) & 1) ? -1 : 0)
2219 #define K30 (((w0s >> (24 + 1)) & 1) ? -1 : 0)
2220 #define K31 (((w0s >> (24 + 0)) & 1) ? -1 : 0)
2221 #define K32 (((w1s >> ( 0 + 7)) & 1) ? -1 : 0)
2222 #define K33 (((w1s >> ( 0 + 6)) & 1) ? -1 : 0)
2223 #define K34 (((w1s >> ( 0 + 5)) & 1) ? -1 : 0)
2224 #define K35 (((w1s >> ( 0 + 4)) & 1) ? -1 : 0)
2225 #define K36 (((w1s >> ( 0 + 3)) & 1) ? -1 : 0)
2226 #define K37 (((w1s >> ( 0 + 2)) & 1) ? -1 : 0)
2227 #define K38 (((w1s >> ( 0 + 1)) & 1) ? -1 : 0)
2228 #define K39 (((w1s >> ( 0 + 0)) & 1) ? -1 : 0)
2229 #define K40 (((w1s >> ( 8 + 7)) & 1) ? -1 : 0)
2230 #define K41 (((w1s >> ( 8 + 6)) & 1) ? -1 : 0)
2231 #define K42 (((w1s >> ( 8 + 5)) & 1) ? -1 : 0)
2232 #define K43 (((w1s >> ( 8 + 4)) & 1) ? -1 : 0)
2233 #define K44 (((w1s >> ( 8 + 3)) & 1) ? -1 : 0)
2234 #define K45 (((w1s >> ( 8 + 2)) & 1) ? -1 : 0)
2235 #define K46 (((w1s >> ( 8 + 1)) & 1) ? -1 : 0)
2236 #define K47 (((w1s >> ( 8 + 0)) & 1) ? -1 : 0)
2237 #define K48 (((w1s >> (16 + 7)) & 1) ? -1 : 0)
2238 #define K49 (((w1s >> (16 + 6)) & 1) ? -1 : 0)
2239 #define K50 (((w1s >> (16 + 5)) & 1) ? -1 : 0)
2240 #define K51 (((w1s >> (16 + 4)) & 1) ? -1 : 0)
2241 #define K52 (((w1s >> (16 + 3)) & 1) ? -1 : 0)
2242 #define K53 (((w1s >> (16 + 2)) & 1) ? -1 : 0)
2243 #define K54 (((w1s >> (16 + 1)) & 1) ? -1 : 0)
2244 #define K55 (((w1s >> (16 + 0)) & 1) ? -1 : 0)
2250 const u32 pc_pos = get_local_id (1);
2252 const u32 il_pos = pc_pos * 32;
2287 k00 |= words_buf_r[pc_pos].b[ 0];
2288 k01 |= words_buf_r[pc_pos].b[ 1];
2289 k02 |= words_buf_r[pc_pos].b[ 2];
2290 k03 |= words_buf_r[pc_pos].b[ 3];
2291 k04 |= words_buf_r[pc_pos].b[ 4];
2292 k05 |= words_buf_r[pc_pos].b[ 5];
2293 k06 |= words_buf_r[pc_pos].b[ 6];
2294 k07 |= words_buf_r[pc_pos].b[ 7];
2295 k08 |= words_buf_r[pc_pos].b[ 8];
2296 k09 |= words_buf_r[pc_pos].b[ 9];
2297 k10 |= words_buf_r[pc_pos].b[10];
2298 k11 |= words_buf_r[pc_pos].b[11];
2299 k12 |= words_buf_r[pc_pos].b[12];
2300 k13 |= words_buf_r[pc_pos].b[13];
2301 k14 |= words_buf_r[pc_pos].b[14];
2302 k15 |= words_buf_r[pc_pos].b[15];
2303 k16 |= words_buf_r[pc_pos].b[16];
2304 k17 |= words_buf_r[pc_pos].b[17];
2305 k18 |= words_buf_r[pc_pos].b[18];
2306 k19 |= words_buf_r[pc_pos].b[19];
2307 k20 |= words_buf_r[pc_pos].b[20];
2308 k21 |= words_buf_r[pc_pos].b[21];
2309 k22 |= words_buf_r[pc_pos].b[22];
2310 k23 |= words_buf_r[pc_pos].b[23];
2311 k24 |= words_buf_r[pc_pos].b[24];
2312 k25 |= words_buf_r[pc_pos].b[25];
2313 k26 |= words_buf_r[pc_pos].b[26];
2314 k27 |= words_buf_r[pc_pos].b[27];
2315 k28 |= words_buf_r[pc_pos].b[28];
2316 k29 |= words_buf_r[pc_pos].b[29];
2317 k30 |= words_buf_r[pc_pos].b[30];
2318 k31 |= words_buf_r[pc_pos].b[31];
2320 // KGS!@#$% including IP
2325 u32 D03 = 0xffffffff;
2327 u32 D05 = 0xffffffff;
2328 u32 D06 = 0xffffffff;
2329 u32 D07 = 0xffffffff;
2335 u32 D13 = 0xffffffff;
2338 u32 D16 = 0xffffffff;
2339 u32 D17 = 0xffffffff;
2344 u32 D22 = 0xffffffff;
2346 u32 D24 = 0xffffffff;
2348 u32 D26 = 0xffffffff;
2350 u32 D28 = 0xffffffff;
2351 u32 D29 = 0xffffffff;
2352 u32 D30 = 0xffffffff;
2353 u32 D31 = 0xffffffff;
2362 u32 D40 = 0xffffffff;
2363 u32 D41 = 0xffffffff;
2364 u32 D42 = 0xffffffff;
2366 u32 D44 = 0xffffffff;
2377 u32 D55 = 0xffffffff;
2380 u32 D58 = 0xffffffff;
2383 u32 D61 = 0xffffffff;
2384 u32 D62 = 0xffffffff;
2385 u32 D63 = 0xffffffff;
2389 k00, k01, k02, k03, k04, k05, k06,
2390 k07, k08, k09, k10, k11, k12, k13,
2391 k14, k15, k16, k17, k18, k19, k20,
2392 k21, k22, k23, k24, k25, k26, k27,
2393 k28, k29, k30, k31, K32, K33, K34,
2394 K35, K36, K37, K38, K39, K40, K41,
2395 K42, K43, K44, K45, K46, K47, K48,
2396 K49, K50, K51, K52, K53, K54, K55,
2397 &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07,
2398 &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15,
2399 &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23,
2400 &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31,
2401 &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39,
2402 &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47,
2403 &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55,
2404 &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63
2409 tmpResult |= D00 ^ S00;
2410 tmpResult |= D01 ^ S01;
2411 tmpResult |= D02 ^ S02;
2412 tmpResult |= D03 ^ S03;
2413 tmpResult |= D04 ^ S04;
2414 tmpResult |= D05 ^ S05;
2415 tmpResult |= D06 ^ S06;
2416 tmpResult |= D07 ^ S07;
2417 tmpResult |= D08 ^ S08;
2418 tmpResult |= D09 ^ S09;
2419 tmpResult |= D10 ^ S10;
2420 tmpResult |= D11 ^ S11;
2421 tmpResult |= D12 ^ S12;
2422 tmpResult |= D13 ^ S13;
2423 tmpResult |= D14 ^ S14;
2424 tmpResult |= D15 ^ S15;
2426 if (tmpResult == 0xffffffff) return;
2428 tmpResult |= D16 ^ S16;
2429 tmpResult |= D17 ^ S17;
2430 tmpResult |= D18 ^ S18;
2431 tmpResult |= D19 ^ S19;
2432 tmpResult |= D20 ^ S20;
2433 tmpResult |= D21 ^ S21;
2434 tmpResult |= D22 ^ S22;
2435 tmpResult |= D23 ^ S23;
2436 tmpResult |= D24 ^ S24;
2437 tmpResult |= D25 ^ S25;
2438 tmpResult |= D26 ^ S26;
2439 tmpResult |= D27 ^ S27;
2440 tmpResult |= D28 ^ S28;
2441 tmpResult |= D29 ^ S29;
2442 tmpResult |= D30 ^ S30;
2443 tmpResult |= D31 ^ S31;
2445 if (tmpResult == 0xffffffff) return;
2447 tmpResult |= D32 ^ S32;
2448 tmpResult |= D33 ^ S33;
2449 tmpResult |= D34 ^ S34;
2450 tmpResult |= D35 ^ S35;
2451 tmpResult |= D36 ^ S36;
2452 tmpResult |= D37 ^ S37;
2453 tmpResult |= D38 ^ S38;
2454 tmpResult |= D39 ^ S39;
2455 tmpResult |= D40 ^ S40;
2456 tmpResult |= D41 ^ S41;
2457 tmpResult |= D42 ^ S42;
2458 tmpResult |= D43 ^ S43;
2459 tmpResult |= D44 ^ S44;
2460 tmpResult |= D45 ^ S45;
2461 tmpResult |= D46 ^ S46;
2462 tmpResult |= D47 ^ S47;
2464 if (tmpResult == 0xffffffff) return;
2466 tmpResult |= D48 ^ S48;
2467 tmpResult |= D49 ^ S49;
2468 tmpResult |= D50 ^ S50;
2469 tmpResult |= D51 ^ S51;
2470 tmpResult |= D52 ^ S52;
2471 tmpResult |= D53 ^ S53;
2472 tmpResult |= D54 ^ S54;
2473 tmpResult |= D55 ^ S55;
2474 tmpResult |= D56 ^ S56;
2475 tmpResult |= D57 ^ S57;
2476 tmpResult |= D58 ^ S58;
2477 tmpResult |= D59 ^ S59;
2478 tmpResult |= D60 ^ S60;
2479 tmpResult |= D61 ^ S61;
2480 tmpResult |= D62 ^ S62;
2481 tmpResult |= D63 ^ S63;
2483 if (tmpResult == 0xffffffff) return;
2485 const u32 slice = 31 - clz (~tmpResult);
2491 // transpose bitslice mod : attention race conditions, need different buffers for *in and *out
2494 __kernel void m03000_tm (__global u32 *mod, __global bs_word_t *words_buf_r)
2496 const u32 gid = get_global_id (0);
2498 const u32 block = gid / 32;
2499 const u32 slice = gid % 32;
2501 const u32 w0 = mod[gid];
2503 for (int i = 0; i < 32; i += 8)
2505 atomic_or (&words_buf_r[block].b[i + 0], (((w0 >> (i + 7)) & 1) << slice));
2506 atomic_or (&words_buf_r[block].b[i + 1], (((w0 >> (i + 6)) & 1) << slice));
2507 atomic_or (&words_buf_r[block].b[i + 2], (((w0 >> (i + 5)) & 1) << slice));
2508 atomic_or (&words_buf_r[block].b[i + 3], (((w0 >> (i + 4)) & 1) << slice));
2509 atomic_or (&words_buf_r[block].b[i + 4], (((w0 >> (i + 3)) & 1) << slice));
2510 atomic_or (&words_buf_r[block].b[i + 5], (((w0 >> (i + 2)) & 1) << slice));
2511 atomic_or (&words_buf_r[block].b[i + 6], (((w0 >> (i + 1)) & 1) << slice));
2512 atomic_or (&words_buf_r[block].b[i + 7], (((w0 >> (i + 0)) & 1) << slice));
2516 __kernel void m03000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2522 const u32 gid = get_global_id (0);
2523 const u32 lid = get_local_id (0);
2525 if (gid >= gid_max) return;
2531 m03000m (pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
2534 __kernel void m03000_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2538 __kernel void m03000_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2542 __kernel void m03000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2548 const u32 gid = get_global_id (0);
2549 const u32 lid = get_local_id (0);
2551 if (gid >= gid_max) return;
2557 m03000s (pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
2560 __kernel void m03000_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2564 __kernel void m03000_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)