2 * Author......: Jens Steube <jens.steube@gmail.com>
4 * NOTE........: sboxes were taken from JtR, license below
9 #include "include/constants.h"
10 #include "include/kernel_vendor.h"
17 #include "include/kernel_functions.c"
18 #include "OpenCL/types_ocl.c"
19 #include "OpenCL/common.c"
21 #define COMPARE_S "OpenCL/check_single_comp4_bs.c"
22 #define COMPARE_M "OpenCL/check_multi_comp4_bs.c"
41 // Bitslice DES S-boxes with LOP3.LUT instructions
42 // For NVIDIA Maxwell architecture and CUDA 7.5 RC
43 // by DeepLearningJohnDoe, version 0.1.6, 2015/07/19
45 // Gate counts: 25 24 25 18 25 24 24 23
47 // Depth: 8 7 7 6 8 10 10 8
50 // Note that same S-box function with a lower gate count isn't necessarily faster.
52 // These Boolean expressions corresponding to DES S-boxes were
53 // discovered by <deeplearningjohndoe at gmail.com>
55 // This file itself is Copyright (c) 2015 by <deeplearningjohndoe at gmail.com>
56 // Redistribution and use in source and binary forms, with or without
57 // modification, are permitted.
59 // The underlying mathematical formulas are NOT copyrighted.
62 #define LUT(a,b,c,d,e) u32 a; asm ("lop3.b32 %0, %1, %2, %3, "#e";" : "=r"(a): "r"(b), "r"(c), "r"(d));
64 static void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
66 LUT(xAA55AA5500550055, a1, a4, a6, 0xC1)
67 LUT(xA55AA55AF0F5F0F5, a3, a6, xAA55AA5500550055, 0x9E)
68 LUT(x5F5F5F5FA5A5A5A5, a1, a3, a6, 0xD6)
69 LUT(xF5A0F5A0A55AA55A, a4, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x56)
70 LUT(x947A947AD1E7D1E7, a2, xA55AA55AF0F5F0F5, xF5A0F5A0A55AA55A, 0x6C)
71 LUT(x5FFF5FFFFFFAFFFA, a6, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x7B)
72 LUT(xB96CB96C69936993, a2, xF5A0F5A0A55AA55A, x5FFF5FFFFFFAFFFA, 0xD6)
73 LUT(x3, a5, x947A947AD1E7D1E7, xB96CB96C69936993, 0x6A)
74 LUT(x55EE55EE55EE55EE, a1, a2, a4, 0x7A)
75 LUT(x084C084CB77BB77B, a2, a6, xF5A0F5A0A55AA55A, 0xC9)
76 LUT(x9C329C32E295E295, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x72)
77 LUT(xA51EA51E50E050E0, a3, a6, x55EE55EE55EE55EE, 0x29)
78 LUT(x4AD34AD3BE3CBE3C, a2, x947A947AD1E7D1E7, xA51EA51E50E050E0, 0x95)
79 LUT(x2, a5, x9C329C32E295E295, x4AD34AD3BE3CBE3C, 0xC6)
80 LUT(xD955D95595D195D1, a1, a2, x9C329C32E295E295, 0xD2)
81 LUT(x8058805811621162, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x90)
82 LUT(x7D0F7D0FC4B3C4B3, xA51EA51E50E050E0, xD955D95595D195D1, x8058805811621162, 0x76)
83 LUT(x0805080500010001, a3, xAA55AA5500550055, xD955D95595D195D1, 0x80)
84 LUT(x4A964A96962D962D, xB96CB96C69936993, x4AD34AD3BE3CBE3C, x0805080500010001, 0xA6)
85 LUT(x4, a5, x7D0F7D0FC4B3C4B3, x4A964A96962D962D, 0xA6)
86 LUT(x148014807B087B08, a1, xAA55AA5500550055, x947A947AD1E7D1E7, 0x21)
87 LUT(x94D894D86B686B68, xA55AA55AF0F5F0F5, x8058805811621162, x148014807B087B08, 0x6A)
88 LUT(x5555555540044004, a1, a6, x084C084CB77BB77B, 0x70)
89 LUT(xAFB4AFB4BF5BBF5B, x5F5F5F5FA5A5A5A5, xA51EA51E50E050E0, x5555555540044004, 0x97)
90 LUT(x1, a5, x94D894D86B686B68, xAFB4AFB4BF5BBF5B, 0x6C)
98 static void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
100 LUT(xEEEEEEEE99999999, a1, a2, a6, 0x97)
101 LUT(xFFFFEEEE66666666, a5, a6, xEEEEEEEE99999999, 0x67)
102 LUT(x5555FFFFFFFF0000, a1, a5, a6, 0x76)
103 LUT(x6666DDDD5555AAAA, a2, xFFFFEEEE66666666, x5555FFFFFFFF0000, 0x69)
104 LUT(x6969D3D35353ACAC, a3, xFFFFEEEE66666666, x6666DDDD5555AAAA, 0x6A)
105 LUT(xCFCF3030CFCF3030, a2, a3, a5, 0x65)
106 LUT(xE4E4EEEE9999F0F0, a3, xEEEEEEEE99999999, x5555FFFFFFFF0000, 0x8D)
107 LUT(xE5E5BABACDCDB0B0, a1, xCFCF3030CFCF3030, xE4E4EEEE9999F0F0, 0xCA)
108 LUT(x3, a4, x6969D3D35353ACAC, xE5E5BABACDCDB0B0, 0xC6)
109 LUT(x3333CCCC00000000, a2, a5, a6, 0x14)
110 LUT(xCCCCDDDDFFFF0F0F, a5, xE4E4EEEE9999F0F0, x3333CCCC00000000, 0xB5)
111 LUT(x00000101F0F0F0F0, a3, a6, xFFFFEEEE66666666, 0x1C)
112 LUT(x9A9A64646A6A9595, a1, xCFCF3030CFCF3030, x00000101F0F0F0F0, 0x96)
113 LUT(x2, a4, xCCCCDDDDFFFF0F0F, x9A9A64646A6A9595, 0x6A)
114 LUT(x3333BBBB3333FFFF, a1, a2, x6666DDDD5555AAAA, 0xDE)
115 LUT(x1414141441410000, a1, a3, xE4E4EEEE9999F0F0, 0x90)
116 LUT(x7F7FF3F3F5F53939, x6969D3D35353ACAC, x9A9A64646A6A9595, x3333BBBB3333FFFF, 0x79)
117 LUT(x9494E3E34B4B3939, a5, x1414141441410000, x7F7FF3F3F5F53939, 0x29)
118 LUT(x1, a4, x3333BBBB3333FFFF, x9494E3E34B4B3939, 0xA6)
119 LUT(xB1B1BBBBCCCCA5A5, a1, a1, xE4E4EEEE9999F0F0, 0x4A)
120 LUT(xFFFFECECEEEEDDDD, a2, x3333CCCC00000000, x9A9A64646A6A9595, 0xEF)
121 LUT(xB1B1A9A9DCDC8787, xE5E5BABACDCDB0B0, xB1B1BBBBCCCCA5A5, xFFFFECECEEEEDDDD, 0x8D)
122 LUT(xFFFFCCCCEEEE4444, a2, a5, xFFFFEEEE66666666, 0x2B)
123 LUT(x4, a4, xB1B1A9A9DCDC8787, xFFFFCCCCEEEE4444, 0x6C)
131 static void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
133 LUT(xA50FA50FA50FA50F, a1, a3, a4, 0xC9)
134 LUT(xF0F00F0FF0F0F0F0, a3, a5, a6, 0x4B)
135 LUT(xAF0FA0AAAF0FAF0F, a1, xA50FA50FA50FA50F, xF0F00F0FF0F0F0F0, 0x4D)
136 LUT(x5AA5A55A5AA55AA5, a1, a4, xF0F00F0FF0F0F0F0, 0x69)
137 LUT(xAA005FFFAA005FFF, a3, a5, xA50FA50FA50FA50F, 0xD6)
138 LUT(x5AA5A55A0F5AFAA5, a6, x5AA5A55A5AA55AA5, xAA005FFFAA005FFF, 0x9C)
139 LUT(x1, a2, xAF0FA0AAAF0FAF0F, x5AA5A55A0F5AFAA5, 0xA6)
140 LUT(xAA55AA5500AA00AA, a1, a4, a6, 0x49)
141 LUT(xFAFAA50FFAFAA50F, a1, a5, xA50FA50FA50FA50F, 0x9B)
142 LUT(x50AF0F5AFA50A5A5, a1, xAA55AA5500AA00AA, xFAFAA50FFAFAA50F, 0x66)
143 LUT(xAFAFAFAFFAFAFAFA, a1, a3, a6, 0x6F)
144 LUT(xAFAFFFFFFFFAFAFF, a4, x50AF0F5AFA50A5A5, xAFAFAFAFFAFAFAFA, 0xEB)
145 LUT(x4, a2, x50AF0F5AFA50A5A5, xAFAFFFFFFFFAFAFF, 0x6C)
146 LUT(x500F500F500F500F, a1, a3, a4, 0x98)
147 LUT(xF0505A0505A5050F, x5AA5A55A0F5AFAA5, xAA55AA5500AA00AA, xAFAFAFAFFAFAFAFA, 0x1D)
148 LUT(xF0505A05AA55AAFF, a6, x500F500F500F500F, xF0505A0505A5050F, 0x9A)
149 LUT(xFF005F55FF005F55, a1, a4, xAA005FFFAA005FFF, 0xB2)
150 LUT(xA55F5AF0A55F5AF0, a5, xA50FA50FA50FA50F, x5AA5A55A5AA55AA5, 0x3D)
151 LUT(x5A5F05A5A55F5AF0, a6, xFF005F55FF005F55, xA55F5AF0A55F5AF0, 0xA6)
152 LUT(x3, a2, xF0505A05AA55AAFF, x5A5F05A5A55F5AF0, 0xA6)
153 LUT(x0F0F0F0FA5A5A5A5, a1, a3, a6, 0xC6)
154 LUT(x5FFFFF5FFFA0FFA0, x5AA5A55A5AA55AA5, xAFAFAFAFFAFAFAFA, x0F0F0F0FA5A5A5A5, 0xDB)
155 LUT(xF5555AF500A05FFF, a5, xFAFAA50FFAFAA50F, xF0505A0505A5050F, 0xB9)
156 LUT(x05A5AAF55AFA55A5, xF0505A05AA55AAFF, x0F0F0F0FA5A5A5A5, xF5555AF500A05FFF, 0x9B)
157 LUT(x2, a2, x5FFFFF5FFFA0FFA0, x05A5AAF55AFA55A5, 0xA6)
165 static void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
167 LUT(x55F055F055F055F0, a1, a3, a4, 0x72)
168 LUT(xA500F5F0A500F5F0, a3, a5, x55F055F055F055F0, 0xAD)
169 LUT(xF50AF50AF50AF50A, a1, a3, a4, 0x59)
170 LUT(xF5FA0FFFF5FA0FFF, a3, a5, xF50AF50AF50AF50A, 0xE7)
171 LUT(x61C8F93C61C8F93C, a2, xA500F5F0A500F5F0, xF5FA0FFFF5FA0FFF, 0xC6)
172 LUT(x9999666699996666, a1, a2, a5, 0x69)
173 LUT(x22C022C022C022C0, a2, a4, x55F055F055F055F0, 0x18)
174 LUT(xB35C94A6B35C94A6, xF5FA0FFFF5FA0FFF, x9999666699996666, x22C022C022C022C0, 0x63)
175 LUT(x4, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x6A)
176 LUT(x4848484848484848, a1, a2, a3, 0x12)
177 LUT(x55500AAA55500AAA, a1, a5, xF5FA0FFFF5FA0FFF, 0x28)
178 LUT(x3C90B3D63C90B3D6, x61C8F93C61C8F93C, x4848484848484848, x55500AAA55500AAA, 0x1E)
179 LUT(x8484333384843333, a1, x9999666699996666, x4848484848484848, 0x14)
180 LUT(x4452F1AC4452F1AC, xF50AF50AF50AF50A, xF5FA0FFFF5FA0FFF, xB35C94A6B35C94A6, 0x78)
181 LUT(x9586CA379586CA37, x55500AAA55500AAA, x8484333384843333, x4452F1AC4452F1AC, 0xD6)
182 LUT(x2, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0x6A)
183 LUT(x1, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0xA9)
184 LUT(x3, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x56)
192 static void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
194 LUT(xA0A0A0A0FFFFFFFF, a1, a3, a6, 0xAB)
195 LUT(xFFFF00005555FFFF, a1, a5, a6, 0xB9)
196 LUT(xB3B320207777FFFF, a2, xA0A0A0A0FFFFFFFF, xFFFF00005555FFFF, 0xE8)
197 LUT(x50505A5A5A5A5050, a1, a3, xFFFF00005555FFFF, 0x34)
198 LUT(xA2A2FFFF2222FFFF, a1, a5, xB3B320207777FFFF, 0xCE)
199 LUT(x2E2E6969A4A46363, a2, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, 0x29)
200 LUT(x3, a4, xB3B320207777FFFF, x2E2E6969A4A46363, 0xA6)
201 LUT(xA5A50A0AA5A50A0A, a1, a3, a5, 0x49)
202 LUT(x969639396969C6C6, a2, a6, xA5A50A0AA5A50A0A, 0x96)
203 LUT(x1B1B1B1B1B1B1B1B, a1, a2, a3, 0xCA)
204 LUT(xBFBFBFBFF6F6F9F9, a3, xA0A0A0A0FFFFFFFF, x969639396969C6C6, 0x7E)
205 LUT(x5B5BA4A4B8B81D1D, xFFFF00005555FFFF, x1B1B1B1B1B1B1B1B, xBFBFBFBFF6F6F9F9, 0x96)
206 LUT(x2, a4, x969639396969C6C6, x5B5BA4A4B8B81D1D, 0xCA)
207 LUT(x5555BBBBFFFF5555, a1, a2, xFFFF00005555FFFF, 0xE5)
208 LUT(x6D6D9C9C95956969, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, x969639396969C6C6, 0x97)
209 LUT(x1A1A67676A6AB4B4, xA5A50A0AA5A50A0A, x5555BBBBFFFF5555, x6D6D9C9C95956969, 0x47)
210 LUT(xA0A0FFFFAAAA0000, a3, xFFFF00005555FFFF, xA5A50A0AA5A50A0A, 0x3B)
211 LUT(x36369C9CC1C1D6D6, x969639396969C6C6, x6D6D9C9C95956969, xA0A0FFFFAAAA0000, 0xD9)
212 LUT(x1, a4, x1A1A67676A6AB4B4, x36369C9CC1C1D6D6, 0xCA)
213 LUT(x5555F0F0F5F55555, a1, a3, xFFFF00005555FFFF, 0xB1)
214 LUT(x79790202DCDC0808, xA2A2FFFF2222FFFF, xA5A50A0AA5A50A0A, x969639396969C6C6, 0x47)
215 LUT(x6C6CF2F229295D5D, xBFBFBFBFF6F6F9F9, x5555F0F0F5F55555, x79790202DCDC0808, 0x6E)
216 LUT(xA3A3505010101A1A, a2, xA2A2FFFF2222FFFF, x36369C9CC1C1D6D6, 0x94)
217 LUT(x7676C7C74F4FC7C7, a1, x2E2E6969A4A46363, xA3A3505010101A1A, 0xD9)
218 LUT(x4, a4, x6C6CF2F229295D5D, x7676C7C74F4FC7C7, 0xC6)
226 static void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
228 LUT(x5050F5F55050F5F5, a1, a3, a5, 0xB2)
229 LUT(x6363C6C66363C6C6, a1, a2, x5050F5F55050F5F5, 0x66)
230 LUT(xAAAA5555AAAA5555, a1, a1, a5, 0xA9)
231 LUT(x3A3A65653A3A6565, a3, x6363C6C66363C6C6, xAAAA5555AAAA5555, 0xA9)
232 LUT(x5963A3C65963A3C6, a4, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xC6)
233 LUT(xE7E76565E7E76565, a5, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xAD)
234 LUT(x455D45DF455D45DF, a1, a4, xE7E76565E7E76565, 0xE4)
235 LUT(x4, a6, x5963A3C65963A3C6, x455D45DF455D45DF, 0x6C)
236 LUT(x1101220211012202, a2, xAAAA5555AAAA5555, x5963A3C65963A3C6, 0x20)
237 LUT(xF00F0FF0F00F0FF0, a3, a4, a5, 0x69)
238 LUT(x16E94A9716E94A97, xE7E76565E7E76565, x1101220211012202, xF00F0FF0F00F0FF0, 0x9E)
239 LUT(x2992922929929229, a1, a2, xF00F0FF0F00F0FF0, 0x49)
240 LUT(xAFAF9823AFAF9823, a5, x5050F5F55050F5F5, x2992922929929229, 0x93)
241 LUT(x3, a6, x16E94A9716E94A97, xAFAF9823AFAF9823, 0x6C)
242 LUT(x4801810248018102, a4, x5963A3C65963A3C6, x1101220211012202, 0xA4)
243 LUT(x5EE8FFFD5EE8FFFD, a5, x16E94A9716E94A97, x4801810248018102, 0x76)
244 LUT(xF0FF00FFF0FF00FF, a3, a4, a5, 0xCD)
245 LUT(x942D9A67942D9A67, x3A3A65653A3A6565, x5EE8FFFD5EE8FFFD, xF0FF00FFF0FF00FF, 0x86)
246 LUT(x1, a6, x5EE8FFFD5EE8FFFD, x942D9A67942D9A67, 0xA6)
247 LUT(x6A40D4ED6F4DD4EE, a2, x4, xAFAF9823AFAF9823, 0x2D)
248 LUT(x6CA89C7869A49C79, x1101220211012202, x16E94A9716E94A97, x6A40D4ED6F4DD4EE, 0x26)
249 LUT(xD6DE73F9D6DE73F9, a3, x6363C6C66363C6C6, x455D45DF455D45DF, 0x6B)
250 LUT(x925E63E1965A63E1, x3A3A65653A3A6565, x6CA89C7869A49C79, xD6DE73F9D6DE73F9, 0xA2)
251 LUT(x2, a6, x6CA89C7869A49C79, x925E63E1965A63E1, 0xCA)
259 static void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
261 LUT(x88AA88AA88AA88AA, a1, a2, a4, 0x0B)
262 LUT(xAAAAFF00AAAAFF00, a1, a4, a5, 0x27)
263 LUT(xADAFF8A5ADAFF8A5, a3, x88AA88AA88AA88AA, xAAAAFF00AAAAFF00, 0x9E)
264 LUT(x0A0AF5F50A0AF5F5, a1, a3, a5, 0xA6)
265 LUT(x6B69C5DC6B69C5DC, a2, xADAFF8A5ADAFF8A5, x0A0AF5F50A0AF5F5, 0x6B)
266 LUT(x1C69B2DC1C69B2DC, a4, x88AA88AA88AA88AA, x6B69C5DC6B69C5DC, 0xA9)
267 LUT(x1, a6, xADAFF8A5ADAFF8A5, x1C69B2DC1C69B2DC, 0x6A)
268 LUT(x9C9C9C9C9C9C9C9C, a1, a2, a3, 0x63)
269 LUT(xE6E63BFDE6E63BFD, a2, xAAAAFF00AAAAFF00, x0A0AF5F50A0AF5F5, 0xE7)
270 LUT(x6385639E6385639E, a4, x9C9C9C9C9C9C9C9C, xE6E63BFDE6E63BFD, 0x93)
271 LUT(x5959C4CE5959C4CE, a2, x6B69C5DC6B69C5DC, xE6E63BFDE6E63BFD, 0x5D)
272 LUT(x5B53F53B5B53F53B, a4, x0A0AF5F50A0AF5F5, x5959C4CE5959C4CE, 0x6E)
273 LUT(x3, a6, x6385639E6385639E, x5B53F53B5B53F53B, 0xC6)
274 LUT(xFAF505FAFAF505FA, a3, a4, x0A0AF5F50A0AF5F5, 0x6D)
275 LUT(x6A65956A6A65956A, a3, x9C9C9C9C9C9C9C9C, xFAF505FAFAF505FA, 0xA6)
276 LUT(x8888CCCC8888CCCC, a1, a2, a5, 0x23)
277 LUT(x94E97A9494E97A94, x1C69B2DC1C69B2DC, x6A65956A6A65956A, x8888CCCC8888CCCC, 0x72)
278 LUT(x4, a6, x6A65956A6A65956A, x94E97A9494E97A94, 0xAC)
279 LUT(xA050A050A050A050, a1, a3, a4, 0x21)
280 LUT(xC1B87A2BC1B87A2B, xAAAAFF00AAAAFF00, x5B53F53B5B53F53B, x94E97A9494E97A94, 0xA4)
281 LUT(xE96016B7E96016B7, x8888CCCC8888CCCC, xA050A050A050A050, xC1B87A2BC1B87A2B, 0x96)
282 LUT(xE3CF1FD5E3CF1FD5, x88AA88AA88AA88AA, x6A65956A6A65956A, xE96016B7E96016B7, 0x3E)
283 LUT(x6776675B6776675B, xADAFF8A5ADAFF8A5, x94E97A9494E97A94, xE3CF1FD5E3CF1FD5, 0x6B)
284 LUT(x2, a6, xE96016B7E96016B7, x6776675B6776675B, 0xC6)
292 static void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
294 LUT(xEEEE3333EEEE3333, a1, a2, a5, 0x9D)
295 LUT(xBBBBBBBBBBBBBBBB, a1, a1, a2, 0x83)
296 LUT(xDDDDAAAADDDDAAAA, a1, a2, a5, 0x5B)
297 LUT(x29295A5A29295A5A, a3, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0x85)
298 LUT(xC729695AC729695A, a4, xEEEE3333EEEE3333, x29295A5A29295A5A, 0xA6)
299 LUT(x3BF77B7B3BF77B7B, a2, a5, xC729695AC729695A, 0xF9)
300 LUT(x2900FF002900FF00, a4, a5, x29295A5A29295A5A, 0x0E)
301 LUT(x56B3803F56B3803F, xBBBBBBBBBBBBBBBB, x3BF77B7B3BF77B7B, x2900FF002900FF00, 0x61)
302 LUT(x4, a6, xC729695AC729695A, x56B3803F56B3803F, 0x6C)
303 LUT(xFBFBFBFBFBFBFBFB, a1, a2, a3, 0xDF)
304 LUT(x3012B7B73012B7B7, a2, a5, xC729695AC729695A, 0xD4)
305 LUT(x34E9B34C34E9B34C, a4, xFBFBFBFBFBFBFBFB, x3012B7B73012B7B7, 0x69)
306 LUT(xBFEAEBBEBFEAEBBE, a1, x29295A5A29295A5A, x34E9B34C34E9B34C, 0x6F)
307 LUT(xFFAEAFFEFFAEAFFE, a3, xBBBBBBBBBBBBBBBB, xBFEAEBBEBFEAEBBE, 0xB9)
308 LUT(x2, a6, x34E9B34C34E9B34C, xFFAEAFFEFFAEAFFE, 0xC6)
309 LUT(xCFDE88BBCFDE88BB, a2, xDDDDAAAADDDDAAAA, x34E9B34C34E9B34C, 0x5C)
310 LUT(x3055574530555745, a1, xC729695AC729695A, xCFDE88BBCFDE88BB, 0x71)
311 LUT(x99DDEEEE99DDEEEE, a4, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0xB9)
312 LUT(x693CD926693CD926, x3BF77B7B3BF77B7B, x34E9B34C34E9B34C, x99DDEEEE99DDEEEE, 0x69)
313 LUT(x3, a6, x3055574530555745, x693CD926693CD926, 0x6A)
314 LUT(x9955EE559955EE55, a1, a4, x99DDEEEE99DDEEEE, 0xE2)
315 LUT(x9D48FA949D48FA94, x3BF77B7B3BF77B7B, xBFEAEBBEBFEAEBBE, x9955EE559955EE55, 0x9C)
316 LUT(x1, a6, xC729695AC729695A, x9D48FA949D48FA94, 0x39)
327 * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC
328 * architectures. These use AND, OR, XOR, NOT, and AND-NOT gates.
330 * Gate counts: 49 44 46 33 48 46 46 41
333 * Several same-gate-count expressions for each S-box are included (for use on
334 * different CPUs/GPUs).
336 * These Boolean expressions corresponding to DES S-boxes have been generated
337 * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's
338 * John the Ripper password cracker: http://www.openwall.com/john/
339 * Being mathematical formulas, they are not copyrighted and are free for reuse
342 * This file (a specific representation of the S-box expressions, surrounding
343 * logic) is Copyright (c) 2011 by Solar Designer <solar at openwall.com>.
344 * Redistribution and use in source and binary forms, with or without
345 * modification, are permitted. (This is a heavily cut-down "BSD license".)
347 * The effort has been sponsored by Rapid7: http://www.rapid7.com
350 static void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
352 u32 x55005500, x5A0F5A0F, x3333FFFF, x66666666, x22226666, x2D2D6969,
354 u32 x00FFFF00, x33CCCC33, x4803120C, x2222FFFF, x6A21EDF3, x4A01CC93;
355 u32 x5555FFFF, x7F75FFFF, x00D20096, x7FA7FF69;
356 u32 x0A0A0000, x0AD80096, x00999900, x0AD99996;
357 u32 x22332233, x257AA5F0, x054885C0, xFAB77A3F, x2221EDF3, xD89697CC;
358 u32 x05B77AC0, x05F77AD6, x36C48529, x6391D07C, xBB0747B0;
359 u32 x4C460000, x4EDF9996, x2D4E49EA, xBBFFFFB0, x96B1B65A;
360 u32 x5AFF5AFF, x52B11215, x4201C010, x10B0D205;
361 u32 x00, x01, x10, x11, x20, x21, x30, x31;
363 x55005500 = a1 & ~a5;
364 x5A0F5A0F = a4 ^ x55005500;
367 x22226666 = x3333FFFF & x66666666;
368 x2D2D6969 = a4 ^ x22226666;
369 x25202160 = x2D2D6969 & ~x5A0F5A0F;
372 x33CCCC33 = a3 ^ x00FFFF00;
373 x4803120C = x5A0F5A0F & ~x33CCCC33;
374 x2222FFFF = a6 | x22226666;
375 x6A21EDF3 = x4803120C ^ x2222FFFF;
376 x4A01CC93 = x6A21EDF3 & ~x25202160;
379 x7F75FFFF = x6A21EDF3 | x5555FFFF;
380 x00D20096 = a5 & ~x2D2D6969;
381 x7FA7FF69 = x7F75FFFF ^ x00D20096;
383 x0A0A0000 = a4 & ~x5555FFFF;
384 x0AD80096 = x00D20096 ^ x0A0A0000;
385 x00999900 = x00FFFF00 & ~x66666666;
386 x0AD99996 = x0AD80096 | x00999900;
388 x22332233 = a3 & ~x55005500;
389 x257AA5F0 = x5A0F5A0F ^ x7F75FFFF;
390 x054885C0 = x257AA5F0 & ~x22332233;
391 xFAB77A3F = ~x054885C0;
392 x2221EDF3 = x3333FFFF & x6A21EDF3;
393 xD89697CC = xFAB77A3F ^ x2221EDF3;
394 x20 = x7FA7FF69 & ~a2;
395 x21 = x20 ^ xD89697CC;
398 x05B77AC0 = x00FFFF00 ^ x054885C0;
399 x05F77AD6 = x00D20096 | x05B77AC0;
400 x36C48529 = x3333FFFF ^ x05F77AD6;
401 x6391D07C = a1 ^ x36C48529;
402 xBB0747B0 = xD89697CC ^ x6391D07C;
403 x00 = x25202160 | a2;
404 x01 = x00 ^ xBB0747B0;
407 x4C460000 = x3333FFFF ^ x7F75FFFF;
408 x4EDF9996 = x0AD99996 | x4C460000;
409 x2D4E49EA = x6391D07C ^ x4EDF9996;
410 xBBFFFFB0 = x00FFFF00 | xBB0747B0;
411 x96B1B65A = x2D4E49EA ^ xBBFFFFB0;
412 x10 = x4A01CC93 | a2;
413 x11 = x10 ^ x96B1B65A;
416 x5AFF5AFF = a5 | x5A0F5A0F;
417 x52B11215 = x5AFF5AFF & ~x2D4E49EA;
418 x4201C010 = x4A01CC93 & x6391D07C;
419 x10B0D205 = x52B11215 ^ x4201C010;
420 x30 = x10B0D205 | a2;
421 x31 = x30 ^ x0AD99996;
425 static void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
428 u32 x55550000, x00AA00FF, x33BB33FF;
429 u32 x33CC0000, x11441144, x11BB11BB, x003311BB;
430 u32 x00000F0F, x336600FF, x332200FF, x332200F0;
431 u32 x0302000F, xAAAAAAAA, xA9A8AAA5, x33CCCC33, x33CCC030, x9A646A95;
432 u32 x00333303, x118822B8, xA8208805, x3CC3C33C, x94E34B39;
433 u32 x0331330C, x3FF3F33C, xA9DF596A, xA9DF5F6F, x962CAC53;
434 u32 xA9466A6A, x3DA52153, x29850143, x33C0330C, x1A45324F;
435 u32 x0A451047, xBBDFDD7B, xB19ACD3C;
436 u32 x00, x01, x10, x11, x20, x21, x30, x31;
440 x55550000 = a1 & ~a6;
441 x00AA00FF = a5 & ~x55550000;
442 x33BB33FF = a2 | x00AA00FF;
444 x33CC0000 = x33CC33CC & ~a6;
445 x11441144 = a1 & x33CC33CC;
446 x11BB11BB = a5 ^ x11441144;
447 x003311BB = x11BB11BB & ~x33CC0000;
450 x336600FF = x00AA00FF ^ x33CC0000;
451 x332200FF = x33BB33FF & x336600FF;
452 x332200F0 = x332200FF & ~x00000F0F;
454 x0302000F = a3 & x332200FF;
456 xA9A8AAA5 = x0302000F ^ xAAAAAAAA;
457 x33CCCC33 = a6 ^ x33CC33CC;
458 x33CCC030 = x33CCCC33 & ~x00000F0F;
459 x9A646A95 = xA9A8AAA5 ^ x33CCC030;
460 x10 = a4 & ~x332200F0;
461 x11 = x10 ^ x9A646A95;
464 x00333303 = a2 & ~x33CCC030;
465 x118822B8 = x11BB11BB ^ x00333303;
466 xA8208805 = xA9A8AAA5 & ~x118822B8;
467 x3CC3C33C = a3 ^ x33CCCC33;
468 x94E34B39 = xA8208805 ^ x3CC3C33C;
469 x00 = x33BB33FF & ~a4;
470 x01 = x00 ^ x94E34B39;
473 x0331330C = x0302000F ^ x00333303;
474 x3FF3F33C = x3CC3C33C | x0331330C;
475 xA9DF596A = x33BB33FF ^ x9A646A95;
476 xA9DF5F6F = x00000F0F | xA9DF596A;
477 x962CAC53 = x3FF3F33C ^ xA9DF5F6F;
479 xA9466A6A = x332200FF ^ x9A646A95;
480 x3DA52153 = x94E34B39 ^ xA9466A6A;
481 x29850143 = xA9DF5F6F & x3DA52153;
482 x33C0330C = x33CC33CC & x3FF3F33C;
483 x1A45324F = x29850143 ^ x33C0330C;
484 x20 = x1A45324F | a4;
485 x21 = x20 ^ x962CAC53;
488 x0A451047 = x1A45324F & ~x118822B8;
489 xBBDFDD7B = x33CCCC33 | xA9DF596A;
490 xB19ACD3C = x0A451047 ^ xBBDFDD7B;
491 x30 = x003311BB | a4;
492 x31 = x30 ^ xB19ACD3C;
496 static void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
498 u32 x44444444, x0F0FF0F0, x4F4FF4F4, x00FFFF00, x00AAAA00, x4FE55EF4;
499 u32 x3C3CC3C3, x3C3C0000, x7373F4F4, x0C840A00;
500 u32 x00005EF4, x00FF5EFF, x00555455, x3C699796;
501 u32 x000FF000, x55AA55AA, x26D9A15E, x2FDFAF5F, x2FD00F5F;
502 u32 x55AAFFAA, x28410014, x000000FF, x000000CC, x284100D8;
503 u32 x204100D0, x3C3CC3FF, x1C3CC32F, x4969967A;
504 u32 x4CC44CC4, x40C040C0, xC3C33C3C, x9669C396, xD6A98356;
505 u32 xD6E9C3D6, x4CEEEEC4, x9A072D12, x001A000B, x9A1F2D1B;
506 u32 x00, x01, x10, x11, x20, x21, x30, x31;
508 x44444444 = a1 & ~a2;
510 x4F4FF4F4 = x44444444 | x0F0FF0F0;
512 x00AAAA00 = x00FFFF00 & ~a1;
513 x4FE55EF4 = x4F4FF4F4 ^ x00AAAA00;
515 x3C3CC3C3 = a2 ^ x0F0FF0F0;
516 x3C3C0000 = x3C3CC3C3 & ~a6;
517 x7373F4F4 = x4F4FF4F4 ^ x3C3C0000;
518 x0C840A00 = x4FE55EF4 & ~x7373F4F4;
520 x00005EF4 = a6 & x4FE55EF4;
521 x00FF5EFF = a4 | x00005EF4;
522 x00555455 = a1 & x00FF5EFF;
523 x3C699796 = x3C3CC3C3 ^ x00555455;
524 x30 = x4FE55EF4 & ~a5;
525 x31 = x30 ^ x3C699796;
528 x000FF000 = x0F0FF0F0 & x00FFFF00;
530 x26D9A15E = x7373F4F4 ^ x55AA55AA;
531 x2FDFAF5F = a3 | x26D9A15E;
532 x2FD00F5F = x2FDFAF5F & ~x000FF000;
534 x55AAFFAA = x00AAAA00 | x55AA55AA;
535 x28410014 = x3C699796 & ~x55AAFFAA;
537 x000000CC = x000000FF & ~a2;
538 x284100D8 = x28410014 ^ x000000CC;
540 x204100D0 = x7373F4F4 & x284100D8;
541 x3C3CC3FF = x3C3CC3C3 | x000000FF;
542 x1C3CC32F = x3C3CC3FF & ~x204100D0;
543 x4969967A = a1 ^ x1C3CC32F;
544 x10 = x2FD00F5F & a5;
545 x11 = x10 ^ x4969967A;
548 x4CC44CC4 = x4FE55EF4 & ~a2;
549 x40C040C0 = x4CC44CC4 & ~a3;
550 xC3C33C3C = ~x3C3CC3C3;
551 x9669C396 = x55AAFFAA ^ xC3C33C3C;
552 xD6A98356 = x40C040C0 ^ x9669C396;
553 x00 = a5 & ~x0C840A00;
554 x01 = x00 ^ xD6A98356;
557 xD6E9C3D6 = x40C040C0 | x9669C396;
558 x4CEEEEC4 = x00AAAA00 | x4CC44CC4;
559 x9A072D12 = xD6E9C3D6 ^ x4CEEEEC4;
560 x001A000B = a4 & ~x4FE55EF4;
561 x9A1F2D1B = x9A072D12 | x001A000B;
562 x20 = a5 & ~x284100D8;
563 x21 = x20 ^ x9A1F2D1B;
567 static void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
569 u32 x5A5A5A5A, x0F0FF0F0;
570 u32 x33FF33FF, x33FFCC00, x0C0030F0, x0C0CC0C0, x0CF3C03F, x5EFBDA7F,
571 x52FBCA0F, x61C8F93C;
572 u32 x00C0C03C, x0F0F30C0, x3B92A366, x30908326, x3C90B3D6;
573 u32 x33CC33CC, x0C0CFFFF, x379E5C99, x04124C11, x56E9861E, xA91679E1;
574 u32 x9586CA37, x8402C833, x84C2C83F, xB35C94A6;
575 u32 x00, x01, x10, x11, x20, x21, x30, x31;
580 x33FFCC00 = a5 ^ x33FF33FF;
581 x0C0030F0 = x0F0FF0F0 & ~x33FFCC00;
582 x0C0CC0C0 = x0F0FF0F0 & ~a2;
583 x0CF3C03F = a4 ^ x0C0CC0C0;
584 x5EFBDA7F = x5A5A5A5A | x0CF3C03F;
585 x52FBCA0F = x5EFBDA7F & ~x0C0030F0;
586 x61C8F93C = a2 ^ x52FBCA0F;
588 x00C0C03C = x0CF3C03F & x61C8F93C;
589 x0F0F30C0 = x0F0FF0F0 & ~x00C0C03C;
590 x3B92A366 = x5A5A5A5A ^ x61C8F93C;
591 x30908326 = x3B92A366 & ~x0F0F30C0;
592 x3C90B3D6 = x0C0030F0 ^ x30908326;
595 x0C0CFFFF = a5 | x0C0CC0C0;
596 x379E5C99 = x3B92A366 ^ x0C0CFFFF;
597 x04124C11 = x379E5C99 & ~x33CC33CC;
598 x56E9861E = x52FBCA0F ^ x04124C11;
599 x00 = a6 & ~x3C90B3D6;
600 x01 = x00 ^ x56E9861E;
603 xA91679E1 = ~x56E9861E;
604 x10 = x3C90B3D6 & ~a6;
605 x11 = x10 ^ xA91679E1;
608 x9586CA37 = x3C90B3D6 ^ xA91679E1;
609 x8402C833 = x9586CA37 & ~x33CC33CC;
610 x84C2C83F = x00C0C03C | x8402C833;
611 xB35C94A6 = x379E5C99 ^ x84C2C83F;
612 x20 = x61C8F93C | a6;
613 x21 = x20 ^ xB35C94A6;
616 x30 = a6 & x61C8F93C;
617 x31 = x30 ^ xB35C94A6;
621 static void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
623 u32 x77777777, x77770000, x22225555, x11116666, x1F1F6F6F;
624 u32 x70700000, x43433333, x00430033, x55557777, x55167744, x5A19784B;
625 u32 x5A1987B4, x7A3BD7F5, x003B00F5, x221955A0, x05050707, x271C52A7;
626 u32 x2A2A82A0, x6969B193, x1FE06F90, x16804E00, xE97FB1FF;
627 u32 x43403302, x35CAED30, x37DEFFB7, x349ECCB5, x0B01234A;
628 u32 x101884B4, x0FF8EB24, x41413333, x4FF9FB37, x4FC2FBC2;
629 u32 x22222222, x16BCEE97, x0F080B04, x19B4E593;
630 u32 x5C5C5C5C, x4448184C, x2DDABE71, x6992A63D;
631 u32 x00, x01, x10, x11, x20, x21, x30, x31;
634 x77770000 = x77777777 & ~a6;
635 x22225555 = a1 ^ x77770000;
636 x11116666 = a3 ^ x22225555;
637 x1F1F6F6F = a4 | x11116666;
639 x70700000 = x77770000 & ~a4;
640 x43433333 = a3 ^ x70700000;
641 x00430033 = a5 & x43433333;
642 x55557777 = a1 | x11116666;
643 x55167744 = x00430033 ^ x55557777;
644 x5A19784B = a4 ^ x55167744;
646 x5A1987B4 = a6 ^ x5A19784B;
647 x7A3BD7F5 = x22225555 | x5A1987B4;
648 x003B00F5 = a5 & x7A3BD7F5;
649 x221955A0 = x22225555 ^ x003B00F5;
650 x05050707 = a4 & x55557777;
651 x271C52A7 = x221955A0 ^ x05050707;
653 x2A2A82A0 = x7A3BD7F5 & ~a1;
654 x6969B193 = x43433333 ^ x2A2A82A0;
655 x1FE06F90 = a5 ^ x1F1F6F6F;
656 x16804E00 = x1FE06F90 & ~x6969B193;
657 xE97FB1FF = ~x16804E00;
658 x20 = xE97FB1FF & ~a2;
659 x21 = x20 ^ x5A19784B;
662 x43403302 = x43433333 & ~x003B00F5;
663 x35CAED30 = x2A2A82A0 ^ x1FE06F90;
664 x37DEFFB7 = x271C52A7 | x35CAED30;
665 x349ECCB5 = x37DEFFB7 & ~x43403302;
666 x0B01234A = x1F1F6F6F & ~x349ECCB5;
668 x101884B4 = x5A1987B4 & x349ECCB5;
669 x0FF8EB24 = x1FE06F90 ^ x101884B4;
670 x41413333 = x43433333 & x55557777;
671 x4FF9FB37 = x0FF8EB24 | x41413333;
672 x4FC2FBC2 = x003B00F5 ^ x4FF9FB37;
673 x30 = x4FC2FBC2 & a2;
674 x31 = x30 ^ x271C52A7;
677 x22222222 = a1 ^ x77777777;
678 x16BCEE97 = x349ECCB5 ^ x22222222;
679 x0F080B04 = a4 & x0FF8EB24;
680 x19B4E593 = x16BCEE97 ^ x0F080B04;
681 x00 = x0B01234A | a2;
682 x01 = x00 ^ x19B4E593;
685 x5C5C5C5C = x1F1F6F6F ^ x43433333;
686 x4448184C = x5C5C5C5C & ~x19B4E593;
687 x2DDABE71 = x22225555 ^ x0FF8EB24;
688 x6992A63D = x4448184C ^ x2DDABE71;
689 x10 = x1F1F6F6F & a2;
690 x11 = x10 ^ x6992A63D;
694 static void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
697 u32 x3333FFFF, x11115555, x22DD6699, x22DD9966, x00220099;
698 u32 x00551144, x33662277, x5A5A5A5A, x7B7E7A7F, x59A31CE6;
699 u32 x09030C06, x09030000, x336622FF, x3A6522FF;
700 u32 x484D494C, x0000B6B3, x0F0FB9BC, x00FC00F9, x0FFFB9FD;
701 u32 x5DF75DF7, x116600F7, x1E69B94B, x1668B94B;
702 u32 x7B7B7B7B, x411E5984, x1FFFFDFD, x5EE1A479;
703 u32 x3CB4DFD2, x004B002D, xB7B2B6B3, xCCC9CDC8, xCC82CDE5;
704 u32 x0055EEBB, x5A5AECE9, x0050ECA9, xC5CAC1CE, xC59A2D67;
705 u32 x00, x01, x10, x11, x20, x21, x30, x31;
710 x11115555 = a1 & x3333FFFF;
711 x22DD6699 = x33CC33CC ^ x11115555;
712 x22DD9966 = a6 ^ x22DD6699;
713 x00220099 = a5 & ~x22DD9966;
715 x00551144 = a1 & x22DD9966;
716 x33662277 = a2 ^ x00551144;
718 x7B7E7A7F = x33662277 | x5A5A5A5A;
719 x59A31CE6 = x22DD6699 ^ x7B7E7A7F;
721 x09030C06 = a3 & x59A31CE6;
722 x09030000 = x09030C06 & ~a6;
723 x336622FF = x00220099 | x33662277;
724 x3A6522FF = x09030000 ^ x336622FF;
725 x30 = x3A6522FF & a4;
726 x31 = x30 ^ x59A31CE6;
729 x484D494C = a2 ^ x7B7E7A7F;
730 x0000B6B3 = a6 & ~x484D494C;
731 x0F0FB9BC = a3 ^ x0000B6B3;
732 x00FC00F9 = a5 & ~x09030C06;
733 x0FFFB9FD = x0F0FB9BC | x00FC00F9;
735 x5DF75DF7 = a1 | x59A31CE6;
736 x116600F7 = x336622FF & x5DF75DF7;
737 x1E69B94B = x0F0FB9BC ^ x116600F7;
738 x1668B94B = x1E69B94B & ~x09030000;
739 x20 = x00220099 | a4;
740 x21 = x20 ^ x1668B94B;
743 x7B7B7B7B = a2 | x5A5A5A5A;
744 x411E5984 = x3A6522FF ^ x7B7B7B7B;
745 x1FFFFDFD = x11115555 | x0FFFB9FD;
746 x5EE1A479 = x411E5984 ^ x1FFFFDFD;
748 x3CB4DFD2 = x22DD6699 ^ x1E69B94B;
749 x004B002D = a5 & ~x3CB4DFD2;
750 xB7B2B6B3 = ~x484D494C;
751 xCCC9CDC8 = x7B7B7B7B ^ xB7B2B6B3;
752 xCC82CDE5 = x004B002D ^ xCCC9CDC8;
753 x10 = xCC82CDE5 & ~a4;
754 x11 = x10 ^ x5EE1A479;
757 x0055EEBB = a6 ^ x00551144;
758 x5A5AECE9 = a1 ^ x0F0FB9BC;
759 x0050ECA9 = x0055EEBB & x5A5AECE9;
760 xC5CAC1CE = x09030C06 ^ xCCC9CDC8;
761 xC59A2D67 = x0050ECA9 ^ xC5CAC1CE;
762 x00 = x0FFFB9FD & ~a4;
763 x01 = x00 ^ xC59A2D67;
767 static void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
769 u32 x0FF00FF0, x3CC33CC3, x00003CC3, x0F000F00, x5A555A55, x00001841;
770 u32 x00000F00, x33333C33, x7B777E77, x0FF0F00F, x74878E78;
771 u32 x003C003C, x5A7D5A7D, x333300F0, x694E5A8D;
772 u32 x0FF0CCCC, x000F0303, x5A505854, x33CC000F, x699C585B;
773 u32 x7F878F78, x21101013, x7F979F7B, x30030CC0, x4F9493BB;
774 u32 x6F9CDBFB, x0000DBFB, x00005151, x26DAC936, x26DA9867;
775 u32 x27DA9877, x27DA438C, x2625C9C9, x27FFCBCD;
776 u32 x27FF1036, x27FF103E, xB06B6C44, x97947C7A;
777 u32 x00, x01, x10, x11, x20, x21, x30, x31;
780 x3CC33CC3 = a3 ^ x0FF00FF0;
781 x00003CC3 = a6 & x3CC33CC3;
782 x0F000F00 = a4 & x0FF00FF0;
783 x5A555A55 = a2 ^ x0F000F00;
784 x00001841 = x00003CC3 & x5A555A55;
786 x00000F00 = a6 & x0F000F00;
787 x33333C33 = a3 ^ x00000F00;
788 x7B777E77 = x5A555A55 | x33333C33;
789 x0FF0F00F = a6 ^ x0FF00FF0;
790 x74878E78 = x7B777E77 ^ x0FF0F00F;
791 x30 = a1 & ~x00001841;
792 x31 = x30 ^ x74878E78;
795 x003C003C = a5 & ~x3CC33CC3;
796 x5A7D5A7D = x5A555A55 | x003C003C;
797 x333300F0 = x00003CC3 ^ x33333C33;
798 x694E5A8D = x5A7D5A7D ^ x333300F0;
800 x0FF0CCCC = x00003CC3 ^ x0FF0F00F;
801 x000F0303 = a4 & ~x0FF0CCCC;
802 x5A505854 = x5A555A55 & ~x000F0303;
803 x33CC000F = a5 ^ x333300F0;
804 x699C585B = x5A505854 ^ x33CC000F;
806 x7F878F78 = x0F000F00 | x74878E78;
807 x21101013 = a3 & x699C585B;
808 x7F979F7B = x7F878F78 | x21101013;
809 x30030CC0 = x3CC33CC3 & ~x0FF0F00F;
810 x4F9493BB = x7F979F7B ^ x30030CC0;
811 x00 = x4F9493BB & ~a1;
812 x01 = x00 ^ x694E5A8D;
815 x6F9CDBFB = x699C585B | x4F9493BB;
816 x0000DBFB = a6 & x6F9CDBFB;
817 x00005151 = a2 & x0000DBFB;
818 x26DAC936 = x694E5A8D ^ x4F9493BB;
819 x26DA9867 = x00005151 ^ x26DAC936;
821 x27DA9877 = x21101013 | x26DA9867;
822 x27DA438C = x0000DBFB ^ x27DA9877;
823 x2625C9C9 = a5 ^ x26DAC936;
824 x27FFCBCD = x27DA438C | x2625C9C9;
825 x20 = x27FFCBCD & a1;
826 x21 = x20 ^ x699C585B;
829 x27FF1036 = x0000DBFB ^ x27FFCBCD;
830 x27FF103E = x003C003C | x27FF1036;
831 xB06B6C44 = ~x4F9493BB;
832 x97947C7A = x27FF103E ^ xB06B6C44;
833 x10 = x97947C7A & ~a1;
834 x11 = x10 ^ x26DA9867;
838 static void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
840 u32 x0C0C0C0C, x0000F0F0, x00FFF00F, x00555005, x00515001;
841 u32 x33000330, x77555775, x30303030, x3030CFCF, x30104745, x30555745;
842 u32 xFF000FF0, xCF1048B5, x080A080A, xC71A40BF, xCB164CB3;
843 u32 x9E4319E6, x000019E6, xF429738C, xF4296A6A, xC729695A;
844 u32 xC47C3D2F, xF77F3F3F, x9E43E619, x693CD926;
845 u32 xF719A695, xF4FF73FF, x03E6D56A, x56B3803F;
846 u32 xF700A600, x61008000, x03B7856B, x62B7056B;
847 u32 x00, x01, x10, x11, x20, x21, x30, x31;
849 x0C0C0C0C = a3 & ~a2;
850 x0000F0F0 = a5 & ~a3;
851 x00FFF00F = a4 ^ x0000F0F0;
852 x00555005 = a1 & x00FFF00F;
853 x00515001 = x00555005 & ~x0C0C0C0C;
855 x33000330 = a2 & ~x00FFF00F;
856 x77555775 = a1 | x33000330;
857 x30303030 = a2 & ~a3;
858 x3030CFCF = a5 ^ x30303030;
859 x30104745 = x77555775 & x3030CFCF;
860 x30555745 = x00555005 | x30104745;
862 xFF000FF0 = ~x00FFF00F;
863 xCF1048B5 = x30104745 ^ xFF000FF0;
864 x080A080A = a3 & ~x77555775;
865 xC71A40BF = xCF1048B5 ^ x080A080A;
866 xCB164CB3 = x0C0C0C0C ^ xC71A40BF;
867 x10 = x00515001 | a6;
868 x11 = x10 ^ xCB164CB3;
871 x9E4319E6 = a1 ^ xCB164CB3;
872 x000019E6 = a5 & x9E4319E6;
873 xF429738C = a2 ^ xC71A40BF;
874 xF4296A6A = x000019E6 ^ xF429738C;
875 xC729695A = x33000330 ^ xF4296A6A;
877 xC47C3D2F = x30555745 ^ xF4296A6A;
878 xF77F3F3F = a2 | xC47C3D2F;
879 x9E43E619 = a5 ^ x9E4319E6;
880 x693CD926 = xF77F3F3F ^ x9E43E619;
881 x20 = x30555745 & a6;
882 x21 = x20 ^ x693CD926;
885 xF719A695 = x3030CFCF ^ xC729695A;
886 xF4FF73FF = a4 | xF429738C;
887 x03E6D56A = xF719A695 ^ xF4FF73FF;
888 x56B3803F = a1 ^ x03E6D56A;
889 x30 = x56B3803F & a6;
890 x31 = x30 ^ xC729695A;
893 xF700A600 = xF719A695 & ~a4;
894 x61008000 = x693CD926 & xF700A600;
895 x03B7856B = x00515001 ^ x03E6D56A;
896 x62B7056B = x61008000 ^ x03B7856B;
897 x00 = x62B7056B | a6;
898 x01 = x00 ^ xC729695A;
905 #if defined IS_AMD || defined IS_GENERIC
908 * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC
909 * architectures. These use AND, OR, XOR, NOT, and AND-NOT gates.
911 * Gate counts: 49 44 46 33 48 46 46 41
914 * Several same-gate-count expressions for each S-box are included (for use on
915 * different CPUs/GPUs).
917 * These Boolean expressions corresponding to DES S-boxes have been generated
918 * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's
919 * John the Ripper password cracker: http://www.openwall.com/john/
920 * Being mathematical formulas, they are not copyrighted and are free for reuse
923 * This file (a specific representation of the S-box expressions, surrounding
924 * logic) is Copyright (c) 2011 by Solar Designer <solar at openwall.com>.
925 * Redistribution and use in source and binary forms, with or without
926 * modification, are permitted. (This is a heavily cut-down "BSD license".)
928 * The effort has been sponsored by Rapid7: http://www.rapid7.com
931 static void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
933 u32 x55005500, x5A0F5A0F, x3333FFFF, x66666666, x22226666, x2D2D6969,
935 u32 x00FFFF00, x33CCCC33, x4803120C, x2222FFFF, x6A21EDF3, x4A01CC93;
936 u32 x5555FFFF, x7F75FFFF, x00D20096, x7FA7FF69;
937 u32 x0A0A0000, x0AD80096, x00999900, x0AD99996;
938 u32 x22332233, x257AA5F0, x054885C0, xFAB77A3F, x2221EDF3, xD89697CC;
939 u32 x05B77AC0, x05F77AD6, x36C48529, x6391D07C, xBB0747B0;
940 u32 x4C460000, x4EDF9996, x2D4E49EA, xBBFFFFB0, x96B1B65A;
941 u32 x5AFF5AFF, x52B11215, x4201C010, x10B0D205;
942 u32 x00, x01, x10, x11, x20, x21, x30, x31;
944 x55005500 = a1 & ~a5;
945 x5A0F5A0F = a4 ^ x55005500;
948 x22226666 = x3333FFFF & x66666666;
949 x2D2D6969 = a4 ^ x22226666;
950 x25202160 = x2D2D6969 & ~x5A0F5A0F;
953 x33CCCC33 = a3 ^ x00FFFF00;
954 x4803120C = x5A0F5A0F & ~x33CCCC33;
955 x2222FFFF = a6 | x22226666;
956 x6A21EDF3 = x4803120C ^ x2222FFFF;
957 x4A01CC93 = x6A21EDF3 & ~x25202160;
960 x7F75FFFF = x6A21EDF3 | x5555FFFF;
961 x00D20096 = a5 & ~x2D2D6969;
962 x7FA7FF69 = x7F75FFFF ^ x00D20096;
964 x0A0A0000 = a4 & ~x5555FFFF;
965 x0AD80096 = x00D20096 ^ x0A0A0000;
966 x00999900 = x00FFFF00 & ~x66666666;
967 x0AD99996 = x0AD80096 | x00999900;
969 x22332233 = a3 & ~x55005500;
970 x257AA5F0 = x5A0F5A0F ^ x7F75FFFF;
971 x054885C0 = x257AA5F0 & ~x22332233;
972 xFAB77A3F = ~x054885C0;
973 x2221EDF3 = x3333FFFF & x6A21EDF3;
974 xD89697CC = xFAB77A3F ^ x2221EDF3;
975 x20 = x7FA7FF69 & ~a2;
976 x21 = x20 ^ xD89697CC;
979 x05B77AC0 = x00FFFF00 ^ x054885C0;
980 x05F77AD6 = x00D20096 | x05B77AC0;
981 x36C48529 = x3333FFFF ^ x05F77AD6;
982 x6391D07C = a1 ^ x36C48529;
983 xBB0747B0 = xD89697CC ^ x6391D07C;
984 x00 = x25202160 | a2;
985 x01 = x00 ^ xBB0747B0;
988 x4C460000 = x3333FFFF ^ x7F75FFFF;
989 x4EDF9996 = x0AD99996 | x4C460000;
990 x2D4E49EA = x6391D07C ^ x4EDF9996;
991 xBBFFFFB0 = x00FFFF00 | xBB0747B0;
992 x96B1B65A = x2D4E49EA ^ xBBFFFFB0;
993 x10 = x4A01CC93 | a2;
994 x11 = x10 ^ x96B1B65A;
997 x5AFF5AFF = a5 | x5A0F5A0F;
998 x52B11215 = x5AFF5AFF & ~x2D4E49EA;
999 x4201C010 = x4A01CC93 & x6391D07C;
1000 x10B0D205 = x52B11215 ^ x4201C010;
1001 x30 = x10B0D205 | a2;
1002 x31 = x30 ^ x0AD99996;
1006 static void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
1009 u32 x55550000, x00AA00FF, x33BB33FF;
1010 u32 x33CC0000, x11441144, x11BB11BB, x003311BB;
1011 u32 x00000F0F, x336600FF, x332200FF, x332200F0;
1012 u32 x0302000F, xAAAAAAAA, xA9A8AAA5, x33CCCC33, x33CCC030, x9A646A95;
1013 u32 x00333303, x118822B8, xA8208805, x3CC3C33C, x94E34B39;
1014 u32 x0331330C, x3FF3F33C, xA9DF596A, xA9DF5F6F, x962CAC53;
1015 u32 xA9466A6A, x3DA52153, x29850143, x33C0330C, x1A45324F;
1016 u32 x0A451047, xBBDFDD7B, xB19ACD3C;
1017 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1019 x33CC33CC = a2 ^ a5;
1021 x55550000 = a1 & ~a6;
1022 x00AA00FF = a5 & ~x55550000;
1023 x33BB33FF = a2 | x00AA00FF;
1025 x33CC0000 = x33CC33CC & ~a6;
1026 x11441144 = a1 & x33CC33CC;
1027 x11BB11BB = a5 ^ x11441144;
1028 x003311BB = x11BB11BB & ~x33CC0000;
1030 x00000F0F = a3 & a6;
1031 x336600FF = x00AA00FF ^ x33CC0000;
1032 x332200FF = x33BB33FF & x336600FF;
1033 x332200F0 = x332200FF & ~x00000F0F;
1035 x0302000F = a3 & x332200FF;
1037 xA9A8AAA5 = x0302000F ^ xAAAAAAAA;
1038 x33CCCC33 = a6 ^ x33CC33CC;
1039 x33CCC030 = x33CCCC33 & ~x00000F0F;
1040 x9A646A95 = xA9A8AAA5 ^ x33CCC030;
1041 x10 = a4 & ~x332200F0;
1042 x11 = x10 ^ x9A646A95;
1045 x00333303 = a2 & ~x33CCC030;
1046 x118822B8 = x11BB11BB ^ x00333303;
1047 xA8208805 = xA9A8AAA5 & ~x118822B8;
1048 x3CC3C33C = a3 ^ x33CCCC33;
1049 x94E34B39 = xA8208805 ^ x3CC3C33C;
1050 x00 = x33BB33FF & ~a4;
1051 x01 = x00 ^ x94E34B39;
1054 x0331330C = x0302000F ^ x00333303;
1055 x3FF3F33C = x3CC3C33C | x0331330C;
1056 xA9DF596A = x33BB33FF ^ x9A646A95;
1057 xA9DF5F6F = x00000F0F | xA9DF596A;
1058 x962CAC53 = x3FF3F33C ^ xA9DF5F6F;
1060 xA9466A6A = x332200FF ^ x9A646A95;
1061 x3DA52153 = x94E34B39 ^ xA9466A6A;
1062 x29850143 = xA9DF5F6F & x3DA52153;
1063 x33C0330C = x33CC33CC & x3FF3F33C;
1064 x1A45324F = x29850143 ^ x33C0330C;
1065 x20 = x1A45324F | a4;
1066 x21 = x20 ^ x962CAC53;
1069 x0A451047 = x1A45324F & ~x118822B8;
1070 xBBDFDD7B = x33CCCC33 | xA9DF596A;
1071 xB19ACD3C = x0A451047 ^ xBBDFDD7B;
1072 x30 = x003311BB | a4;
1073 x31 = x30 ^ xB19ACD3C;
1077 static void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
1079 u32 x44444444, x0F0FF0F0, x4F4FF4F4, x00FFFF00, x00AAAA00, x4FE55EF4;
1080 u32 x3C3CC3C3, x3C3C0000, x7373F4F4, x0C840A00;
1081 u32 x00005EF4, x00FF5EFF, x00555455, x3C699796;
1082 u32 x000FF000, x55AA55AA, x26D9A15E, x2FDFAF5F, x2FD00F5F;
1083 u32 x55AAFFAA, x28410014, x000000FF, x000000CC, x284100D8;
1084 u32 x204100D0, x3C3CC3FF, x1C3CC32F, x4969967A;
1085 u32 x4CC44CC4, x40C040C0, xC3C33C3C, x9669C396, xD6A98356;
1086 u32 xD6E9C3D6, x4CEEEEC4, x9A072D12, x001A000B, x9A1F2D1B;
1087 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1089 x44444444 = a1 & ~a2;
1090 x0F0FF0F0 = a3 ^ a6;
1091 x4F4FF4F4 = x44444444 | x0F0FF0F0;
1092 x00FFFF00 = a4 ^ a6;
1093 x00AAAA00 = x00FFFF00 & ~a1;
1094 x4FE55EF4 = x4F4FF4F4 ^ x00AAAA00;
1096 x3C3CC3C3 = a2 ^ x0F0FF0F0;
1097 x3C3C0000 = x3C3CC3C3 & ~a6;
1098 x7373F4F4 = x4F4FF4F4 ^ x3C3C0000;
1099 x0C840A00 = x4FE55EF4 & ~x7373F4F4;
1101 x00005EF4 = a6 & x4FE55EF4;
1102 x00FF5EFF = a4 | x00005EF4;
1103 x00555455 = a1 & x00FF5EFF;
1104 x3C699796 = x3C3CC3C3 ^ x00555455;
1105 x30 = x4FE55EF4 & ~a5;
1106 x31 = x30 ^ x3C699796;
1109 x000FF000 = x0F0FF0F0 & x00FFFF00;
1110 x55AA55AA = a1 ^ a4;
1111 x26D9A15E = x7373F4F4 ^ x55AA55AA;
1112 x2FDFAF5F = a3 | x26D9A15E;
1113 x2FD00F5F = x2FDFAF5F & ~x000FF000;
1115 x55AAFFAA = x00AAAA00 | x55AA55AA;
1116 x28410014 = x3C699796 & ~x55AAFFAA;
1117 x000000FF = a4 & a6;
1118 x000000CC = x000000FF & ~a2;
1119 x284100D8 = x28410014 ^ x000000CC;
1121 x204100D0 = x7373F4F4 & x284100D8;
1122 x3C3CC3FF = x3C3CC3C3 | x000000FF;
1123 x1C3CC32F = x3C3CC3FF & ~x204100D0;
1124 x4969967A = a1 ^ x1C3CC32F;
1125 x10 = x2FD00F5F & a5;
1126 x11 = x10 ^ x4969967A;
1129 x4CC44CC4 = x4FE55EF4 & ~a2;
1130 x40C040C0 = x4CC44CC4 & ~a3;
1131 xC3C33C3C = ~x3C3CC3C3;
1132 x9669C396 = x55AAFFAA ^ xC3C33C3C;
1133 xD6A98356 = x40C040C0 ^ x9669C396;
1134 x00 = a5 & ~x0C840A00;
1135 x01 = x00 ^ xD6A98356;
1138 xD6E9C3D6 = x40C040C0 | x9669C396;
1139 x4CEEEEC4 = x00AAAA00 | x4CC44CC4;
1140 x9A072D12 = xD6E9C3D6 ^ x4CEEEEC4;
1141 x001A000B = a4 & ~x4FE55EF4;
1142 x9A1F2D1B = x9A072D12 | x001A000B;
1143 x20 = a5 & ~x284100D8;
1144 x21 = x20 ^ x9A1F2D1B;
1148 static void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
1150 u32 x5A5A5A5A, x0F0FF0F0;
1151 u32 x33FF33FF, x33FFCC00, x0C0030F0, x0C0CC0C0, x0CF3C03F, x5EFBDA7F,
1152 x52FBCA0F, x61C8F93C;
1153 u32 x00C0C03C, x0F0F30C0, x3B92A366, x30908326, x3C90B3D6;
1154 u32 x33CC33CC, x0C0CFFFF, x379E5C99, x04124C11, x56E9861E, xA91679E1;
1155 u32 x9586CA37, x8402C833, x84C2C83F, xB35C94A6;
1156 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1158 x5A5A5A5A = a1 ^ a3;
1159 x0F0FF0F0 = a3 ^ a5;
1160 x33FF33FF = a2 | a4;
1161 x33FFCC00 = a5 ^ x33FF33FF;
1162 x0C0030F0 = x0F0FF0F0 & ~x33FFCC00;
1163 x0C0CC0C0 = x0F0FF0F0 & ~a2;
1164 x0CF3C03F = a4 ^ x0C0CC0C0;
1165 x5EFBDA7F = x5A5A5A5A | x0CF3C03F;
1166 x52FBCA0F = x5EFBDA7F & ~x0C0030F0;
1167 x61C8F93C = a2 ^ x52FBCA0F;
1169 x00C0C03C = x0CF3C03F & x61C8F93C;
1170 x0F0F30C0 = x0F0FF0F0 & ~x00C0C03C;
1171 x3B92A366 = x5A5A5A5A ^ x61C8F93C;
1172 x30908326 = x3B92A366 & ~x0F0F30C0;
1173 x3C90B3D6 = x0C0030F0 ^ x30908326;
1175 x33CC33CC = a2 ^ a4;
1176 x0C0CFFFF = a5 | x0C0CC0C0;
1177 x379E5C99 = x3B92A366 ^ x0C0CFFFF;
1178 x04124C11 = x379E5C99 & ~x33CC33CC;
1179 x56E9861E = x52FBCA0F ^ x04124C11;
1180 x00 = a6 & ~x3C90B3D6;
1181 x01 = x00 ^ x56E9861E;
1184 xA91679E1 = ~x56E9861E;
1185 x10 = x3C90B3D6 & ~a6;
1186 x11 = x10 ^ xA91679E1;
1189 x9586CA37 = x3C90B3D6 ^ xA91679E1;
1190 x8402C833 = x9586CA37 & ~x33CC33CC;
1191 x84C2C83F = x00C0C03C | x8402C833;
1192 xB35C94A6 = x379E5C99 ^ x84C2C83F;
1193 x20 = x61C8F93C | a6;
1194 x21 = x20 ^ xB35C94A6;
1197 x30 = a6 & x61C8F93C;
1198 x31 = x30 ^ xB35C94A6;
1202 static void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
1204 u32 x77777777, x77770000, x22225555, x11116666, x1F1F6F6F;
1205 u32 x70700000, x43433333, x00430033, x55557777, x55167744, x5A19784B;
1206 u32 x5A1987B4, x7A3BD7F5, x003B00F5, x221955A0, x05050707, x271C52A7;
1207 u32 x2A2A82A0, x6969B193, x1FE06F90, x16804E00, xE97FB1FF;
1208 u32 x43403302, x35CAED30, x37DEFFB7, x349ECCB5, x0B01234A;
1209 u32 x101884B4, x0FF8EB24, x41413333, x4FF9FB37, x4FC2FBC2;
1210 u32 x22222222, x16BCEE97, x0F080B04, x19B4E593;
1211 u32 x5C5C5C5C, x4448184C, x2DDABE71, x6992A63D;
1212 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1214 x77777777 = a1 | a3;
1215 x77770000 = x77777777 & ~a6;
1216 x22225555 = a1 ^ x77770000;
1217 x11116666 = a3 ^ x22225555;
1218 x1F1F6F6F = a4 | x11116666;
1220 x70700000 = x77770000 & ~a4;
1221 x43433333 = a3 ^ x70700000;
1222 x00430033 = a5 & x43433333;
1223 x55557777 = a1 | x11116666;
1224 x55167744 = x00430033 ^ x55557777;
1225 x5A19784B = a4 ^ x55167744;
1227 x5A1987B4 = a6 ^ x5A19784B;
1228 x7A3BD7F5 = x22225555 | x5A1987B4;
1229 x003B00F5 = a5 & x7A3BD7F5;
1230 x221955A0 = x22225555 ^ x003B00F5;
1231 x05050707 = a4 & x55557777;
1232 x271C52A7 = x221955A0 ^ x05050707;
1234 x2A2A82A0 = x7A3BD7F5 & ~a1;
1235 x6969B193 = x43433333 ^ x2A2A82A0;
1236 x1FE06F90 = a5 ^ x1F1F6F6F;
1237 x16804E00 = x1FE06F90 & ~x6969B193;
1238 xE97FB1FF = ~x16804E00;
1239 x20 = xE97FB1FF & ~a2;
1240 x21 = x20 ^ x5A19784B;
1243 x43403302 = x43433333 & ~x003B00F5;
1244 x35CAED30 = x2A2A82A0 ^ x1FE06F90;
1245 x37DEFFB7 = x271C52A7 | x35CAED30;
1246 x349ECCB5 = x37DEFFB7 & ~x43403302;
1247 x0B01234A = x1F1F6F6F & ~x349ECCB5;
1249 x101884B4 = x5A1987B4 & x349ECCB5;
1250 x0FF8EB24 = x1FE06F90 ^ x101884B4;
1251 x41413333 = x43433333 & x55557777;
1252 x4FF9FB37 = x0FF8EB24 | x41413333;
1253 x4FC2FBC2 = x003B00F5 ^ x4FF9FB37;
1254 x30 = x4FC2FBC2 & a2;
1255 x31 = x30 ^ x271C52A7;
1258 x22222222 = a1 ^ x77777777;
1259 x16BCEE97 = x349ECCB5 ^ x22222222;
1260 x0F080B04 = a4 & x0FF8EB24;
1261 x19B4E593 = x16BCEE97 ^ x0F080B04;
1262 x00 = x0B01234A | a2;
1263 x01 = x00 ^ x19B4E593;
1266 x5C5C5C5C = x1F1F6F6F ^ x43433333;
1267 x4448184C = x5C5C5C5C & ~x19B4E593;
1268 x2DDABE71 = x22225555 ^ x0FF8EB24;
1269 x6992A63D = x4448184C ^ x2DDABE71;
1270 x10 = x1F1F6F6F & a2;
1271 x11 = x10 ^ x6992A63D;
1275 static void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
1278 u32 x3333FFFF, x11115555, x22DD6699, x22DD9966, x00220099;
1279 u32 x00551144, x33662277, x5A5A5A5A, x7B7E7A7F, x59A31CE6;
1280 u32 x09030C06, x09030000, x336622FF, x3A6522FF;
1281 u32 x484D494C, x0000B6B3, x0F0FB9BC, x00FC00F9, x0FFFB9FD;
1282 u32 x5DF75DF7, x116600F7, x1E69B94B, x1668B94B;
1283 u32 x7B7B7B7B, x411E5984, x1FFFFDFD, x5EE1A479;
1284 u32 x3CB4DFD2, x004B002D, xB7B2B6B3, xCCC9CDC8, xCC82CDE5;
1285 u32 x0055EEBB, x5A5AECE9, x0050ECA9, xC5CAC1CE, xC59A2D67;
1286 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1288 x33CC33CC = a2 ^ a5;
1290 x3333FFFF = a2 | a6;
1291 x11115555 = a1 & x3333FFFF;
1292 x22DD6699 = x33CC33CC ^ x11115555;
1293 x22DD9966 = a6 ^ x22DD6699;
1294 x00220099 = a5 & ~x22DD9966;
1296 x00551144 = a1 & x22DD9966;
1297 x33662277 = a2 ^ x00551144;
1298 x5A5A5A5A = a1 ^ a3;
1299 x7B7E7A7F = x33662277 | x5A5A5A5A;
1300 x59A31CE6 = x22DD6699 ^ x7B7E7A7F;
1302 x09030C06 = a3 & x59A31CE6;
1303 x09030000 = x09030C06 & ~a6;
1304 x336622FF = x00220099 | x33662277;
1305 x3A6522FF = x09030000 ^ x336622FF;
1306 x30 = x3A6522FF & a4;
1307 x31 = x30 ^ x59A31CE6;
1310 x484D494C = a2 ^ x7B7E7A7F;
1311 x0000B6B3 = a6 & ~x484D494C;
1312 x0F0FB9BC = a3 ^ x0000B6B3;
1313 x00FC00F9 = a5 & ~x09030C06;
1314 x0FFFB9FD = x0F0FB9BC | x00FC00F9;
1316 x5DF75DF7 = a1 | x59A31CE6;
1317 x116600F7 = x336622FF & x5DF75DF7;
1318 x1E69B94B = x0F0FB9BC ^ x116600F7;
1319 x1668B94B = x1E69B94B & ~x09030000;
1320 x20 = x00220099 | a4;
1321 x21 = x20 ^ x1668B94B;
1324 x7B7B7B7B = a2 | x5A5A5A5A;
1325 x411E5984 = x3A6522FF ^ x7B7B7B7B;
1326 x1FFFFDFD = x11115555 | x0FFFB9FD;
1327 x5EE1A479 = x411E5984 ^ x1FFFFDFD;
1329 x3CB4DFD2 = x22DD6699 ^ x1E69B94B;
1330 x004B002D = a5 & ~x3CB4DFD2;
1331 xB7B2B6B3 = ~x484D494C;
1332 xCCC9CDC8 = x7B7B7B7B ^ xB7B2B6B3;
1333 xCC82CDE5 = x004B002D ^ xCCC9CDC8;
1334 x10 = xCC82CDE5 & ~a4;
1335 x11 = x10 ^ x5EE1A479;
1338 x0055EEBB = a6 ^ x00551144;
1339 x5A5AECE9 = a1 ^ x0F0FB9BC;
1340 x0050ECA9 = x0055EEBB & x5A5AECE9;
1341 xC5CAC1CE = x09030C06 ^ xCCC9CDC8;
1342 xC59A2D67 = x0050ECA9 ^ xC5CAC1CE;
1343 x00 = x0FFFB9FD & ~a4;
1344 x01 = x00 ^ xC59A2D67;
1348 static void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
1350 u32 x0FF00FF0, x3CC33CC3, x00003CC3, x0F000F00, x5A555A55, x00001841;
1351 u32 x00000F00, x33333C33, x7B777E77, x0FF0F00F, x74878E78;
1352 u32 x003C003C, x5A7D5A7D, x333300F0, x694E5A8D;
1353 u32 x0FF0CCCC, x000F0303, x5A505854, x33CC000F, x699C585B;
1354 u32 x7F878F78, x21101013, x7F979F7B, x30030CC0, x4F9493BB;
1355 u32 x6F9CDBFB, x0000DBFB, x00005151, x26DAC936, x26DA9867;
1356 u32 x27DA9877, x27DA438C, x2625C9C9, x27FFCBCD;
1357 u32 x27FF1036, x27FF103E, xB06B6C44, x97947C7A;
1358 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1360 x0FF00FF0 = a4 ^ a5;
1361 x3CC33CC3 = a3 ^ x0FF00FF0;
1362 x00003CC3 = a6 & x3CC33CC3;
1363 x0F000F00 = a4 & x0FF00FF0;
1364 x5A555A55 = a2 ^ x0F000F00;
1365 x00001841 = x00003CC3 & x5A555A55;
1367 x00000F00 = a6 & x0F000F00;
1368 x33333C33 = a3 ^ x00000F00;
1369 x7B777E77 = x5A555A55 | x33333C33;
1370 x0FF0F00F = a6 ^ x0FF00FF0;
1371 x74878E78 = x7B777E77 ^ x0FF0F00F;
1372 x30 = a1 & ~x00001841;
1373 x31 = x30 ^ x74878E78;
1376 x003C003C = a5 & ~x3CC33CC3;
1377 x5A7D5A7D = x5A555A55 | x003C003C;
1378 x333300F0 = x00003CC3 ^ x33333C33;
1379 x694E5A8D = x5A7D5A7D ^ x333300F0;
1381 x0FF0CCCC = x00003CC3 ^ x0FF0F00F;
1382 x000F0303 = a4 & ~x0FF0CCCC;
1383 x5A505854 = x5A555A55 & ~x000F0303;
1384 x33CC000F = a5 ^ x333300F0;
1385 x699C585B = x5A505854 ^ x33CC000F;
1387 x7F878F78 = x0F000F00 | x74878E78;
1388 x21101013 = a3 & x699C585B;
1389 x7F979F7B = x7F878F78 | x21101013;
1390 x30030CC0 = x3CC33CC3 & ~x0FF0F00F;
1391 x4F9493BB = x7F979F7B ^ x30030CC0;
1392 x00 = x4F9493BB & ~a1;
1393 x01 = x00 ^ x694E5A8D;
1396 x6F9CDBFB = x699C585B | x4F9493BB;
1397 x0000DBFB = a6 & x6F9CDBFB;
1398 x00005151 = a2 & x0000DBFB;
1399 x26DAC936 = x694E5A8D ^ x4F9493BB;
1400 x26DA9867 = x00005151 ^ x26DAC936;
1402 x27DA9877 = x21101013 | x26DA9867;
1403 x27DA438C = x0000DBFB ^ x27DA9877;
1404 x2625C9C9 = a5 ^ x26DAC936;
1405 x27FFCBCD = x27DA438C | x2625C9C9;
1406 x20 = x27FFCBCD & a1;
1407 x21 = x20 ^ x699C585B;
1410 x27FF1036 = x0000DBFB ^ x27FFCBCD;
1411 x27FF103E = x003C003C | x27FF1036;
1412 xB06B6C44 = ~x4F9493BB;
1413 x97947C7A = x27FF103E ^ xB06B6C44;
1414 x10 = x97947C7A & ~a1;
1415 x11 = x10 ^ x26DA9867;
1419 static void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
1421 u32 x0C0C0C0C, x0000F0F0, x00FFF00F, x00555005, x00515001;
1422 u32 x33000330, x77555775, x30303030, x3030CFCF, x30104745, x30555745;
1423 u32 xFF000FF0, xCF1048B5, x080A080A, xC71A40BF, xCB164CB3;
1424 u32 x9E4319E6, x000019E6, xF429738C, xF4296A6A, xC729695A;
1425 u32 xC47C3D2F, xF77F3F3F, x9E43E619, x693CD926;
1426 u32 xF719A695, xF4FF73FF, x03E6D56A, x56B3803F;
1427 u32 xF700A600, x61008000, x03B7856B, x62B7056B;
1428 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1430 x0C0C0C0C = a3 & ~a2;
1431 x0000F0F0 = a5 & ~a3;
1432 x00FFF00F = a4 ^ x0000F0F0;
1433 x00555005 = a1 & x00FFF00F;
1434 x00515001 = x00555005 & ~x0C0C0C0C;
1436 x33000330 = a2 & ~x00FFF00F;
1437 x77555775 = a1 | x33000330;
1438 x30303030 = a2 & ~a3;
1439 x3030CFCF = a5 ^ x30303030;
1440 x30104745 = x77555775 & x3030CFCF;
1441 x30555745 = x00555005 | x30104745;
1443 xFF000FF0 = ~x00FFF00F;
1444 xCF1048B5 = x30104745 ^ xFF000FF0;
1445 x080A080A = a3 & ~x77555775;
1446 xC71A40BF = xCF1048B5 ^ x080A080A;
1447 xCB164CB3 = x0C0C0C0C ^ xC71A40BF;
1448 x10 = x00515001 | a6;
1449 x11 = x10 ^ xCB164CB3;
1452 x9E4319E6 = a1 ^ xCB164CB3;
1453 x000019E6 = a5 & x9E4319E6;
1454 xF429738C = a2 ^ xC71A40BF;
1455 xF4296A6A = x000019E6 ^ xF429738C;
1456 xC729695A = x33000330 ^ xF4296A6A;
1458 xC47C3D2F = x30555745 ^ xF4296A6A;
1459 xF77F3F3F = a2 | xC47C3D2F;
1460 x9E43E619 = a5 ^ x9E4319E6;
1461 x693CD926 = xF77F3F3F ^ x9E43E619;
1462 x20 = x30555745 & a6;
1463 x21 = x20 ^ x693CD926;
1466 xF719A695 = x3030CFCF ^ xC729695A;
1467 xF4FF73FF = a4 | xF429738C;
1468 x03E6D56A = xF719A695 ^ xF4FF73FF;
1469 x56B3803F = a1 ^ x03E6D56A;
1470 x30 = x56B3803F & a6;
1471 x31 = x30 ^ xC729695A;
1474 xF700A600 = xF719A695 & ~a4;
1475 x61008000 = x693CD926 & xF700A600;
1476 x03B7856B = x00515001 ^ x03E6D56A;
1477 x62B7056B = x61008000 ^ x03B7856B;
1478 x00 = x62B7056B | a6;
1479 x01 = x00 ^ xC729695A;
1485 #define SWAP(a, b) { u32 tmp=*a;*a=*b;*b=tmp; }
1521 #define KEYSET00 { k00 = K08; k01 = K44; k02 = K29; k03 = K52; k04 = K42; k05 = K14; k06 = K28; k07 = K49; k08 = K01; k09 = K07; k10 = K16; k11 = K36; k12 = K02; k13 = K30; k14 = K22; k15 = K21; k16 = K38; k17 = K50; k18 = K51; k19 = K00; k20 = K31; k21 = K23; k22 = K15; k23 = K35; k24 = K19; k25 = K24; k26 = K34; k27 = K47; k28 = K32; k29 = K03; k30 = K41; k31 = K26; k32 = K04; k33 = K46; k34 = K20; k35 = K25; k36 = K53; k37 = K18; k38 = K33; k39 = K55; k40 = K13; k41 = K17; k42 = K39; k43 = K12; k44 = K11; k45 = K54; k46 = K48; k47 = K27; }
1522 #define KEYSET10 { k00 = K49; k01 = K28; k02 = K45; k03 = K36; k04 = K01; k05 = K30; k06 = K44; k07 = K08; k08 = K42; k09 = K23; k10 = K00; k11 = K52; k12 = K43; k13 = K14; k14 = K38; k15 = K37; k16 = K22; k17 = K09; k18 = K35; k19 = K16; k20 = K15; k21 = K07; k22 = K31; k23 = K51; k24 = K03; k25 = K40; k26 = K46; k27 = K04; k28 = K20; k29 = K19; k30 = K53; k31 = K10; k32 = K47; k33 = K34; k34 = K32; k35 = K13; k36 = K41; k37 = K06; k38 = K17; k39 = K12; k40 = K25; k41 = K33; k42 = K27; k43 = K55; k44 = K54; k45 = K11; k46 = K05; k47 = K39; }
1523 #define KEYSET01 { k00 = K01; k01 = K37; k02 = K22; k03 = K45; k04 = K35; k05 = K07; k06 = K21; k07 = K42; k08 = K51; k09 = K00; k10 = K09; k11 = K29; k12 = K52; k13 = K23; k14 = K15; k15 = K14; k16 = K31; k17 = K43; k18 = K44; k19 = K50; k20 = K49; k21 = K16; k22 = K08; k23 = K28; k24 = K12; k25 = K17; k26 = K27; k27 = K40; k28 = K25; k29 = K55; k30 = K34; k31 = K19; k32 = K24; k33 = K39; k34 = K13; k35 = K18; k36 = K46; k37 = K11; k38 = K26; k39 = K48; k40 = K06; k41 = K10; k42 = K32; k43 = K05; k44 = K04; k45 = K47; k46 = K41; k47 = K20; }
1524 #define KEYSET11 { k00 = K35; k01 = K14; k02 = K31; k03 = K22; k04 = K44; k05 = K16; k06 = K30; k07 = K51; k08 = K28; k09 = K09; k10 = K43; k11 = K38; k12 = K29; k13 = K00; k14 = K49; k15 = K23; k16 = K08; k17 = K52; k18 = K21; k19 = K02; k20 = K01; k21 = K50; k22 = K42; k23 = K37; k24 = K48; k25 = K26; k26 = K32; k27 = K17; k28 = K06; k29 = K05; k30 = K39; k31 = K55; k32 = K33; k33 = K20; k34 = K18; k35 = K54; k36 = K27; k37 = K47; k38 = K03; k39 = K53; k40 = K11; k41 = K19; k42 = K13; k43 = K41; k44 = K40; k45 = K24; k46 = K46; k47 = K25; }
1525 #define KEYSET02 { k00 = K44; k01 = K23; k02 = K08; k03 = K31; k04 = K21; k05 = K50; k06 = K07; k07 = K28; k08 = K37; k09 = K43; k10 = K52; k11 = K15; k12 = K38; k13 = K09; k14 = K01; k15 = K00; k16 = K42; k17 = K29; k18 = K30; k19 = K36; k20 = K35; k21 = K02; k22 = K51; k23 = K14; k24 = K53; k25 = K03; k26 = K13; k27 = K26; k28 = K11; k29 = K41; k30 = K20; k31 = K05; k32 = K10; k33 = K25; k34 = K54; k35 = K04; k36 = K32; k37 = K24; k38 = K12; k39 = K34; k40 = K47; k41 = K55; k42 = K18; k43 = K46; k44 = K17; k45 = K33; k46 = K27; k47 = K06; }
1526 #define KEYSET12 { k00 = K21; k01 = K00; k02 = K42; k03 = K08; k04 = K30; k05 = K02; k06 = K16; k07 = K37; k08 = K14; k09 = K52; k10 = K29; k11 = K49; k12 = K15; k13 = K43; k14 = K35; k15 = K09; k16 = K51; k17 = K38; k18 = K07; k19 = K45; k20 = K44; k21 = K36; k22 = K28; k23 = K23; k24 = K34; k25 = K12; k26 = K18; k27 = K03; k28 = K47; k29 = K46; k30 = K25; k31 = K41; k32 = K19; k33 = K06; k34 = K04; k35 = K40; k36 = K13; k37 = K33; k38 = K48; k39 = K39; k40 = K24; k41 = K05; k42 = K54; k43 = K27; k44 = K26; k45 = K10; k46 = K32; k47 = K11; }
1527 #define KEYSET03 { k00 = K30; k01 = K09; k02 = K51; k03 = K42; k04 = K07; k05 = K36; k06 = K50; k07 = K14; k08 = K23; k09 = K29; k10 = K38; k11 = K01; k12 = K49; k13 = K52; k14 = K44; k15 = K43; k16 = K28; k17 = K15; k18 = K16; k19 = K22; k20 = K21; k21 = K45; k22 = K37; k23 = K00; k24 = K39; k25 = K48; k26 = K54; k27 = K12; k28 = K24; k29 = K27; k30 = K06; k31 = K46; k32 = K55; k33 = K11; k34 = K40; k35 = K17; k36 = K18; k37 = K10; k38 = K53; k39 = K20; k40 = K33; k41 = K41; k42 = K04; k43 = K32; k44 = K03; k45 = K19; k46 = K13; k47 = K47; }
1528 #define KEYSET13 { k00 = K07; k01 = K43; k02 = K28; k03 = K51; k04 = K16; k05 = K45; k06 = K02; k07 = K23; k08 = K00; k09 = K38; k10 = K15; k11 = K35; k12 = K01; k13 = K29; k14 = K21; k15 = K52; k16 = K37; k17 = K49; k18 = K50; k19 = K31; k20 = K30; k21 = K22; k22 = K14; k23 = K09; k24 = K20; k25 = K53; k26 = K04; k27 = K48; k28 = K33; k29 = K32; k30 = K11; k31 = K27; k32 = K05; k33 = K47; k34 = K17; k35 = K26; k36 = K54; k37 = K19; k38 = K34; k39 = K25; k40 = K10; k41 = K46; k42 = K40; k43 = K13; k44 = K12; k45 = K55; k46 = K18; k47 = K24; }
1529 #define KEYSET04 { k00 = K16; k01 = K52; k02 = K37; k03 = K28; k04 = K50; k05 = K22; k06 = K36; k07 = K00; k08 = K09; k09 = K15; k10 = K49; k11 = K44; k12 = K35; k13 = K38; k14 = K30; k15 = K29; k16 = K14; k17 = K01; k18 = K02; k19 = K08; k20 = K07; k21 = K31; k22 = K23; k23 = K43; k24 = K25; k25 = K34; k26 = K40; k27 = K53; k28 = K10; k29 = K13; k30 = K47; k31 = K32; k32 = K41; k33 = K24; k34 = K26; k35 = K03; k36 = K04; k37 = K55; k38 = K39; k39 = K06; k40 = K19; k41 = K27; k42 = K17; k43 = K18; k44 = K48; k45 = K05; k46 = K54; k47 = K33; }
1530 #define KEYSET14 { k00 = K50; k01 = K29; k02 = K14; k03 = K37; k04 = K02; k05 = K31; k06 = K45; k07 = K09; k08 = K43; k09 = K49; k10 = K01; k11 = K21; k12 = K44; k13 = K15; k14 = K07; k15 = K38; k16 = K23; k17 = K35; k18 = K36; k19 = K42; k20 = K16; k21 = K08; k22 = K00; k23 = K52; k24 = K06; k25 = K39; k26 = K17; k27 = K34; k28 = K19; k29 = K18; k30 = K24; k31 = K13; k32 = K46; k33 = K33; k34 = K03; k35 = K12; k36 = K40; k37 = K05; k38 = K20; k39 = K11; k40 = K55; k41 = K32; k42 = K26; k43 = K54; k44 = K53; k45 = K41; k46 = K04; k47 = K10; }
1531 #define KEYSET05 { k00 = K02; k01 = K38; k02 = K23; k03 = K14; k04 = K36; k05 = K08; k06 = K22; k07 = K43; k08 = K52; k09 = K01; k10 = K35; k11 = K30; k12 = K21; k13 = K49; k14 = K16; k15 = K15; k16 = K00; k17 = K44; k18 = K45; k19 = K51; k20 = K50; k21 = K42; k22 = K09; k23 = K29; k24 = K11; k25 = K20; k26 = K26; k27 = K39; k28 = K55; k29 = K54; k30 = K33; k31 = K18; k32 = K27; k33 = K10; k34 = K12; k35 = K48; k36 = K17; k37 = K41; k38 = K25; k39 = K47; k40 = K05; k41 = K13; k42 = K03; k43 = K04; k44 = K34; k45 = K46; k46 = K40; k47 = K19; }
1532 #define KEYSET15 { k00 = K36; k01 = K15; k02 = K00; k03 = K23; k04 = K45; k05 = K42; k06 = K31; k07 = K52; k08 = K29; k09 = K35; k10 = K44; k11 = K07; k12 = K30; k13 = K01; k14 = K50; k15 = K49; k16 = K09; k17 = K21; k18 = K22; k19 = K28; k20 = K02; k21 = K51; k22 = K43; k23 = K38; k24 = K47; k25 = K25; k26 = K03; k27 = K20; k28 = K05; k29 = K04; k30 = K10; k31 = K54; k32 = K32; k33 = K19; k34 = K48; k35 = K53; k36 = K26; k37 = K46; k38 = K06; k39 = K24; k40 = K41; k41 = K18; k42 = K12; k43 = K40; k44 = K39; k45 = K27; k46 = K17; k47 = K55; }
1533 #define KEYSET06 { k00 = K45; k01 = K49; k02 = K09; k03 = K00; k04 = K22; k05 = K51; k06 = K08; k07 = K29; k08 = K38; k09 = K44; k10 = K21; k11 = K16; k12 = K07; k13 = K35; k14 = K02; k15 = K01; k16 = K43; k17 = K30; k18 = K31; k19 = K37; k20 = K36; k21 = K28; k22 = K52; k23 = K15; k24 = K24; k25 = K06; k26 = K12; k27 = K25; k28 = K41; k29 = K40; k30 = K19; k31 = K04; k32 = K13; k33 = K55; k34 = K53; k35 = K34; k36 = K03; k37 = K27; k38 = K11; k39 = K33; k40 = K46; k41 = K54; k42 = K48; k43 = K17; k44 = K20; k45 = K32; k46 = K26; k47 = K05; }
1534 #define KEYSET16 { k00 = K22; k01 = K01; k02 = K43; k03 = K09; k04 = K31; k05 = K28; k06 = K42; k07 = K38; k08 = K15; k09 = K21; k10 = K30; k11 = K50; k12 = K16; k13 = K44; k14 = K36; k15 = K35; k16 = K52; k17 = K07; k18 = K08; k19 = K14; k20 = K45; k21 = K37; k22 = K29; k23 = K49; k24 = K33; k25 = K11; k26 = K48; k27 = K06; k28 = K46; k29 = K17; k30 = K55; k31 = K40; k32 = K18; k33 = K05; k34 = K34; k35 = K39; k36 = K12; k37 = K32; k38 = K47; k39 = K10; k40 = K27; k41 = K04; k42 = K53; k43 = K26; k44 = K25; k45 = K13; k46 = K03; k47 = K41; }
1535 #define KEYSET07 { k00 = K31; k01 = K35; k02 = K52; k03 = K43; k04 = K08; k05 = K37; k06 = K51; k07 = K15; k08 = K49; k09 = K30; k10 = K07; k11 = K02; k12 = K50; k13 = K21; k14 = K45; k15 = K44; k16 = K29; k17 = K16; k18 = K42; k19 = K23; k20 = K22; k21 = K14; k22 = K38; k23 = K01; k24 = K10; k25 = K47; k26 = K53; k27 = K11; k28 = K27; k29 = K26; k30 = K05; k31 = K17; k32 = K54; k33 = K41; k34 = K39; k35 = K20; k36 = K48; k37 = K13; k38 = K24; k39 = K19; k40 = K32; k41 = K40; k42 = K34; k43 = K03; k44 = K06; k45 = K18; k46 = K12; k47 = K46; }
1536 #define KEYSET17 { k00 = K15; k01 = K51; k02 = K36; k03 = K02; k04 = K49; k05 = K21; k06 = K35; k07 = K31; k08 = K08; k09 = K14; k10 = K23; k11 = K43; k12 = K09; k13 = K37; k14 = K29; k15 = K28; k16 = K45; k17 = K00; k18 = K01; k19 = K07; k20 = K38; k21 = K30; k22 = K22; k23 = K42; k24 = K26; k25 = K04; k26 = K41; k27 = K54; k28 = K39; k29 = K10; k30 = K48; k31 = K33; k32 = K11; k33 = K53; k34 = K27; k35 = K32; k36 = K05; k37 = K25; k38 = K40; k39 = K03; k40 = K20; k41 = K24; k42 = K46; k43 = K19; k44 = K18; k45 = K06; k46 = K55; k47 = K34; }
1538 static void DES (const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 *D00, u32 *D01, u32 *D02, u32 *D03, u32 *D04, u32 *D05, u32 *D06, u32 *D07, u32 *D08, u32 *D09, u32 *D10, u32 *D11, u32 *D12, u32 *D13, u32 *D14, u32 *D15, u32 *D16, u32 *D17, u32 *D18, u32 *D19, u32 *D20, u32 *D21, u32 *D22, u32 *D23, u32 *D24, u32 *D25, u32 *D26, u32 *D27, u32 *D28, u32 *D29, u32 *D30, u32 *D31, u32 *D32, u32 *D33, u32 *D34, u32 *D35, u32 *D36, u32 *D37, u32 *D38, u32 *D39, u32 *D40, u32 *D41, u32 *D42, u32 *D43, u32 *D44, u32 *D45, u32 *D46, u32 *D47, u32 *D48, u32 *D49, u32 *D50, u32 *D51, u32 *D52, u32 *D53, u32 *D54, u32 *D55, u32 *D56, u32 *D57, u32 *D58, u32 *D59, u32 *D60, u32 *D61, u32 *D62, u32 *D63)
1540 KXX_DECL u32 k00, k01, k02, k03, k04, k05;
1541 KXX_DECL u32 k06, k07, k08, k09, k10, k11;
1542 KXX_DECL u32 k12, k13, k14, k15, k16, k17;
1543 KXX_DECL u32 k18, k19, k20, k21, k22, k23;
1544 KXX_DECL u32 k24, k25, k26, k27, k28, k29;
1545 KXX_DECL u32 k30, k31, k32, k33, k34, k35;
1546 KXX_DECL u32 k36, k37, k38, k39, k40, k41;
1547 KXX_DECL u32 k42, k43, k44, k45, k46, k47;
1550 #if CUDA_ARCH >= 500
1564 for (u32 i = 0; i < 2; i++)
1566 if (i) KEYSET10 else KEYSET00
1568 s1(*D63 ^ k00, *D32 ^ k01, *D33 ^ k02, *D34 ^ k03, *D35 ^ k04, *D36 ^ k05, D08, D16, D22, D30);
1569 s2(*D35 ^ k06, *D36 ^ k07, *D37 ^ k08, *D38 ^ k09, *D39 ^ k10, *D40 ^ k11, D12, D27, D01, D17);
1570 s3(*D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1571 s4(*D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1572 s5(*D47 ^ k24, *D48 ^ k25, *D49 ^ k26, *D50 ^ k27, *D51 ^ k28, *D52 ^ k29, D07, D13, D24, D02);
1573 s6(*D51 ^ k30, *D52 ^ k31, *D53 ^ k32, *D54 ^ k33, *D55 ^ k34, *D56 ^ k35, D03, D28, D10, D18);
1574 s7(*D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1575 s8(*D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1577 if (i) KEYSET11 else KEYSET01
1579 s1(*D31 ^ k00, *D00 ^ k01, *D01 ^ k02, *D02 ^ k03, *D03 ^ k04, *D04 ^ k05, D40, D48, D54, D62);
1580 s2(*D03 ^ k06, *D04 ^ k07, *D05 ^ k08, *D06 ^ k09, *D07 ^ k10, *D08 ^ k11, D44, D59, D33, D49);
1581 s3(*D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1582 s4(*D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1583 s5(*D15 ^ k24, *D16 ^ k25, *D17 ^ k26, *D18 ^ k27, *D19 ^ k28, *D20 ^ k29, D39, D45, D56, D34);
1584 s6(*D19 ^ k30, *D20 ^ k31, *D21 ^ k32, *D22 ^ k33, *D23 ^ k34, *D24 ^ k35, D35, D60, D42, D50);
1585 s7(*D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1586 s8(*D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1588 if (i) KEYSET12 else KEYSET02
1590 s1(*D63 ^ k00, *D32 ^ k01, *D33 ^ k02, *D34 ^ k03, *D35 ^ k04, *D36 ^ k05, D08, D16, D22, D30);
1591 s2(*D35 ^ k06, *D36 ^ k07, *D37 ^ k08, *D38 ^ k09, *D39 ^ k10, *D40 ^ k11, D12, D27, D01, D17);
1592 s3(*D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1593 s4(*D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1594 s5(*D47 ^ k24, *D48 ^ k25, *D49 ^ k26, *D50 ^ k27, *D51 ^ k28, *D52 ^ k29, D07, D13, D24, D02);
1595 s6(*D51 ^ k30, *D52 ^ k31, *D53 ^ k32, *D54 ^ k33, *D55 ^ k34, *D56 ^ k35, D03, D28, D10, D18);
1596 s7(*D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1597 s8(*D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1599 if (i) KEYSET13 else KEYSET03
1601 s1(*D31 ^ k00, *D00 ^ k01, *D01 ^ k02, *D02 ^ k03, *D03 ^ k04, *D04 ^ k05, D40, D48, D54, D62);
1602 s2(*D03 ^ k06, *D04 ^ k07, *D05 ^ k08, *D06 ^ k09, *D07 ^ k10, *D08 ^ k11, D44, D59, D33, D49);
1603 s3(*D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1604 s4(*D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1605 s5(*D15 ^ k24, *D16 ^ k25, *D17 ^ k26, *D18 ^ k27, *D19 ^ k28, *D20 ^ k29, D39, D45, D56, D34);
1606 s6(*D19 ^ k30, *D20 ^ k31, *D21 ^ k32, *D22 ^ k33, *D23 ^ k34, *D24 ^ k35, D35, D60, D42, D50);
1607 s7(*D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1608 s8(*D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1610 if (i) KEYSET14 else KEYSET04
1612 s1(*D63 ^ k00, *D32 ^ k01, *D33 ^ k02, *D34 ^ k03, *D35 ^ k04, *D36 ^ k05, D08, D16, D22, D30);
1613 s2(*D35 ^ k06, *D36 ^ k07, *D37 ^ k08, *D38 ^ k09, *D39 ^ k10, *D40 ^ k11, D12, D27, D01, D17);
1614 s3(*D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1615 s4(*D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1616 s5(*D47 ^ k24, *D48 ^ k25, *D49 ^ k26, *D50 ^ k27, *D51 ^ k28, *D52 ^ k29, D07, D13, D24, D02);
1617 s6(*D51 ^ k30, *D52 ^ k31, *D53 ^ k32, *D54 ^ k33, *D55 ^ k34, *D56 ^ k35, D03, D28, D10, D18);
1618 s7(*D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1619 s8(*D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1621 if (i) KEYSET15 else KEYSET05
1623 s1(*D31 ^ k00, *D00 ^ k01, *D01 ^ k02, *D02 ^ k03, *D03 ^ k04, *D04 ^ k05, D40, D48, D54, D62);
1624 s2(*D03 ^ k06, *D04 ^ k07, *D05 ^ k08, *D06 ^ k09, *D07 ^ k10, *D08 ^ k11, D44, D59, D33, D49);
1625 s3(*D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1626 s4(*D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1627 s5(*D15 ^ k24, *D16 ^ k25, *D17 ^ k26, *D18 ^ k27, *D19 ^ k28, *D20 ^ k29, D39, D45, D56, D34);
1628 s6(*D19 ^ k30, *D20 ^ k31, *D21 ^ k32, *D22 ^ k33, *D23 ^ k34, *D24 ^ k35, D35, D60, D42, D50);
1629 s7(*D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1630 s8(*D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1632 if (i) KEYSET16 else KEYSET06
1634 s1(*D63 ^ k00, *D32 ^ k01, *D33 ^ k02, *D34 ^ k03, *D35 ^ k04, *D36 ^ k05, D08, D16, D22, D30);
1635 s2(*D35 ^ k06, *D36 ^ k07, *D37 ^ k08, *D38 ^ k09, *D39 ^ k10, *D40 ^ k11, D12, D27, D01, D17);
1636 s3(*D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1637 s4(*D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1638 s5(*D47 ^ k24, *D48 ^ k25, *D49 ^ k26, *D50 ^ k27, *D51 ^ k28, *D52 ^ k29, D07, D13, D24, D02);
1639 s6(*D51 ^ k30, *D52 ^ k31, *D53 ^ k32, *D54 ^ k33, *D55 ^ k34, *D56 ^ k35, D03, D28, D10, D18);
1640 s7(*D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1641 s8(*D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1643 if (i) KEYSET17 else KEYSET07
1645 s1(*D31 ^ k00, *D00 ^ k01, *D01 ^ k02, *D02 ^ k03, *D03 ^ k04, *D04 ^ k05, D40, D48, D54, D62);
1646 s2(*D03 ^ k06, *D04 ^ k07, *D05 ^ k08, *D06 ^ k09, *D07 ^ k10, *D08 ^ k11, D44, D59, D33, D49);
1647 s3(*D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1648 s4(*D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1649 s5(*D15 ^ k24, *D16 ^ k25, *D17 ^ k26, *D18 ^ k27, *D19 ^ k28, *D20 ^ k29, D39, D45, D56, D34);
1650 s6(*D19 ^ k30, *D20 ^ k31, *D21 ^ k32, *D22 ^ k33, *D23 ^ k34, *D24 ^ k35, D35, D60, D42, D50);
1651 s7(*D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1652 s8(*D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1656 static void transpose32c (u32 data[32])
1658 #define swap(x,y,j,m) \
1659 t = ((x) ^ ((y) >> (j))) & (m); \
1661 (y) = (y) ^ (t << (j));
1665 swap (data[ 0], data[16], 16, 0x0000ffff);
1666 swap (data[ 1], data[17], 16, 0x0000ffff);
1667 swap (data[ 2], data[18], 16, 0x0000ffff);
1668 swap (data[ 3], data[19], 16, 0x0000ffff);
1669 swap (data[ 4], data[20], 16, 0x0000ffff);
1670 swap (data[ 5], data[21], 16, 0x0000ffff);
1671 swap (data[ 6], data[22], 16, 0x0000ffff);
1672 swap (data[ 7], data[23], 16, 0x0000ffff);
1673 swap (data[ 8], data[24], 16, 0x0000ffff);
1674 swap (data[ 9], data[25], 16, 0x0000ffff);
1675 swap (data[10], data[26], 16, 0x0000ffff);
1676 swap (data[11], data[27], 16, 0x0000ffff);
1677 swap (data[12], data[28], 16, 0x0000ffff);
1678 swap (data[13], data[29], 16, 0x0000ffff);
1679 swap (data[14], data[30], 16, 0x0000ffff);
1680 swap (data[15], data[31], 16, 0x0000ffff);
1681 swap (data[ 0], data[ 8], 8, 0x00ff00ff);
1682 swap (data[ 1], data[ 9], 8, 0x00ff00ff);
1683 swap (data[ 2], data[10], 8, 0x00ff00ff);
1684 swap (data[ 3], data[11], 8, 0x00ff00ff);
1685 swap (data[ 4], data[12], 8, 0x00ff00ff);
1686 swap (data[ 5], data[13], 8, 0x00ff00ff);
1687 swap (data[ 6], data[14], 8, 0x00ff00ff);
1688 swap (data[ 7], data[15], 8, 0x00ff00ff);
1689 swap (data[ 0], data[ 4], 4, 0x0f0f0f0f);
1690 swap (data[ 1], data[ 5], 4, 0x0f0f0f0f);
1691 swap (data[ 2], data[ 6], 4, 0x0f0f0f0f);
1692 swap (data[ 3], data[ 7], 4, 0x0f0f0f0f);
1693 swap (data[ 0], data[ 2], 2, 0x33333333);
1694 swap (data[ 1], data[ 3], 2, 0x33333333);
1695 swap (data[ 0], data[ 1], 1, 0x55555555);
1696 swap (data[ 2], data[ 3], 1, 0x55555555);
1697 swap (data[ 4], data[ 6], 2, 0x33333333);
1698 swap (data[ 5], data[ 7], 2, 0x33333333);
1699 swap (data[ 4], data[ 5], 1, 0x55555555);
1700 swap (data[ 6], data[ 7], 1, 0x55555555);
1701 swap (data[ 8], data[12], 4, 0x0f0f0f0f);
1702 swap (data[ 9], data[13], 4, 0x0f0f0f0f);
1703 swap (data[10], data[14], 4, 0x0f0f0f0f);
1704 swap (data[11], data[15], 4, 0x0f0f0f0f);
1705 swap (data[ 8], data[10], 2, 0x33333333);
1706 swap (data[ 9], data[11], 2, 0x33333333);
1707 swap (data[ 8], data[ 9], 1, 0x55555555);
1708 swap (data[10], data[11], 1, 0x55555555);
1709 swap (data[12], data[14], 2, 0x33333333);
1710 swap (data[13], data[15], 2, 0x33333333);
1711 swap (data[12], data[13], 1, 0x55555555);
1712 swap (data[14], data[15], 1, 0x55555555);
1713 swap (data[16], data[24], 8, 0x00ff00ff);
1714 swap (data[17], data[25], 8, 0x00ff00ff);
1715 swap (data[18], data[26], 8, 0x00ff00ff);
1716 swap (data[19], data[27], 8, 0x00ff00ff);
1717 swap (data[20], data[28], 8, 0x00ff00ff);
1718 swap (data[21], data[29], 8, 0x00ff00ff);
1719 swap (data[22], data[30], 8, 0x00ff00ff);
1720 swap (data[23], data[31], 8, 0x00ff00ff);
1721 swap (data[16], data[20], 4, 0x0f0f0f0f);
1722 swap (data[17], data[21], 4, 0x0f0f0f0f);
1723 swap (data[18], data[22], 4, 0x0f0f0f0f);
1724 swap (data[19], data[23], 4, 0x0f0f0f0f);
1725 swap (data[16], data[18], 2, 0x33333333);
1726 swap (data[17], data[19], 2, 0x33333333);
1727 swap (data[16], data[17], 1, 0x55555555);
1728 swap (data[18], data[19], 1, 0x55555555);
1729 swap (data[20], data[22], 2, 0x33333333);
1730 swap (data[21], data[23], 2, 0x33333333);
1731 swap (data[20], data[21], 1, 0x55555555);
1732 swap (data[22], data[23], 1, 0x55555555);
1733 swap (data[24], data[28], 4, 0x0f0f0f0f);
1734 swap (data[25], data[29], 4, 0x0f0f0f0f);
1735 swap (data[26], data[30], 4, 0x0f0f0f0f);
1736 swap (data[27], data[31], 4, 0x0f0f0f0f);
1737 swap (data[24], data[26], 2, 0x33333333);
1738 swap (data[25], data[27], 2, 0x33333333);
1739 swap (data[24], data[25], 1, 0x55555555);
1740 swap (data[26], data[27], 1, 0x55555555);
1741 swap (data[28], data[30], 2, 0x33333333);
1742 swap (data[29], data[31], 2, 0x33333333);
1743 swap (data[28], data[29], 1, 0x55555555);
1744 swap (data[30], data[31], 1, 0x55555555);
1747 static void m03000m (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
1753 const u32 gid = get_global_id (0);
1754 const u32 lid = get_local_id (0);
1760 const u32 w0s = pws[gid].i[0];
1761 const u32 w1s = pws[gid].i[1];
1763 const u32 K00 = -((w0s >> ( 0 + 7)) & 1);
1764 const u32 K01 = -((w0s >> ( 0 + 6)) & 1);
1765 const u32 K02 = -((w0s >> ( 0 + 5)) & 1);
1766 const u32 K03 = -((w0s >> ( 0 + 4)) & 1);
1767 const u32 K04 = -((w0s >> ( 0 + 3)) & 1);
1768 const u32 K05 = -((w0s >> ( 0 + 2)) & 1);
1769 const u32 K06 = -((w0s >> ( 0 + 1)) & 1);
1770 const u32 K07 = -((w0s >> ( 0 + 0)) & 1);
1771 const u32 K08 = -((w0s >> ( 8 + 7)) & 1);
1772 const u32 K09 = -((w0s >> ( 8 + 6)) & 1);
1773 const u32 K10 = -((w0s >> ( 8 + 5)) & 1);
1774 const u32 K11 = -((w0s >> ( 8 + 4)) & 1);
1775 const u32 K12 = -((w0s >> ( 8 + 3)) & 1);
1776 const u32 K13 = -((w0s >> ( 8 + 2)) & 1);
1777 const u32 K14 = -((w0s >> ( 8 + 1)) & 1);
1778 const u32 K15 = -((w0s >> ( 8 + 0)) & 1);
1779 const u32 K16 = -((w0s >> (16 + 7)) & 1);
1780 const u32 K17 = -((w0s >> (16 + 6)) & 1);
1781 const u32 K18 = -((w0s >> (16 + 5)) & 1);
1782 const u32 K19 = -((w0s >> (16 + 4)) & 1);
1783 const u32 K20 = -((w0s >> (16 + 3)) & 1);
1784 const u32 K21 = -((w0s >> (16 + 2)) & 1);
1785 const u32 K22 = -((w0s >> (16 + 1)) & 1);
1786 const u32 K23 = -((w0s >> (16 + 0)) & 1);
1787 const u32 K24 = -((w0s >> (24 + 7)) & 1);
1788 const u32 K25 = -((w0s >> (24 + 6)) & 1);
1789 const u32 K26 = -((w0s >> (24 + 5)) & 1);
1790 const u32 K27 = -((w0s >> (24 + 4)) & 1);
1791 const u32 K28 = -((w0s >> (24 + 3)) & 1);
1792 const u32 K29 = -((w0s >> (24 + 2)) & 1);
1793 const u32 K30 = -((w0s >> (24 + 1)) & 1);
1794 const u32 K31 = -((w0s >> (24 + 0)) & 1);
1795 const u32 K32 = -((w1s >> ( 0 + 7)) & 1);
1796 const u32 K33 = -((w1s >> ( 0 + 6)) & 1);
1797 const u32 K34 = -((w1s >> ( 0 + 5)) & 1);
1798 const u32 K35 = -((w1s >> ( 0 + 4)) & 1);
1799 const u32 K36 = -((w1s >> ( 0 + 3)) & 1);
1800 const u32 K37 = -((w1s >> ( 0 + 2)) & 1);
1801 const u32 K38 = -((w1s >> ( 0 + 1)) & 1);
1802 const u32 K39 = -((w1s >> ( 0 + 0)) & 1);
1803 const u32 K40 = -((w1s >> ( 8 + 7)) & 1);
1804 const u32 K41 = -((w1s >> ( 8 + 6)) & 1);
1805 const u32 K42 = -((w1s >> ( 8 + 5)) & 1);
1806 const u32 K43 = -((w1s >> ( 8 + 4)) & 1);
1807 const u32 K44 = -((w1s >> ( 8 + 3)) & 1);
1808 const u32 K45 = -((w1s >> ( 8 + 2)) & 1);
1809 const u32 K46 = -((w1s >> ( 8 + 1)) & 1);
1810 const u32 K47 = -((w1s >> ( 8 + 0)) & 1);
1811 const u32 K48 = -((w1s >> (16 + 7)) & 1);
1812 const u32 K49 = -((w1s >> (16 + 6)) & 1);
1813 const u32 K50 = -((w1s >> (16 + 5)) & 1);
1814 const u32 K51 = -((w1s >> (16 + 4)) & 1);
1815 const u32 K52 = -((w1s >> (16 + 3)) & 1);
1816 const u32 K53 = -((w1s >> (16 + 2)) & 1);
1817 const u32 K54 = -((w1s >> (16 + 1)) & 1);
1818 const u32 K55 = -((w1s >> (16 + 0)) & 1);
1824 const u32 pc_pos = get_local_id (1);
1826 const u32 il_pos = pc_pos * 32;
1861 k00 |= words_buf_r[pc_pos].b[ 0];
1862 k01 |= words_buf_r[pc_pos].b[ 1];
1863 k02 |= words_buf_r[pc_pos].b[ 2];
1864 k03 |= words_buf_r[pc_pos].b[ 3];
1865 k04 |= words_buf_r[pc_pos].b[ 4];
1866 k05 |= words_buf_r[pc_pos].b[ 5];
1867 k06 |= words_buf_r[pc_pos].b[ 6];
1868 k07 |= words_buf_r[pc_pos].b[ 7];
1869 k08 |= words_buf_r[pc_pos].b[ 8];
1870 k09 |= words_buf_r[pc_pos].b[ 9];
1871 k10 |= words_buf_r[pc_pos].b[10];
1872 k11 |= words_buf_r[pc_pos].b[11];
1873 k12 |= words_buf_r[pc_pos].b[12];
1874 k13 |= words_buf_r[pc_pos].b[13];
1875 k14 |= words_buf_r[pc_pos].b[14];
1876 k15 |= words_buf_r[pc_pos].b[15];
1877 k16 |= words_buf_r[pc_pos].b[16];
1878 k17 |= words_buf_r[pc_pos].b[17];
1879 k18 |= words_buf_r[pc_pos].b[18];
1880 k19 |= words_buf_r[pc_pos].b[19];
1881 k20 |= words_buf_r[pc_pos].b[20];
1882 k21 |= words_buf_r[pc_pos].b[21];
1883 k22 |= words_buf_r[pc_pos].b[22];
1884 k23 |= words_buf_r[pc_pos].b[23];
1885 k24 |= words_buf_r[pc_pos].b[24];
1886 k25 |= words_buf_r[pc_pos].b[25];
1887 k26 |= words_buf_r[pc_pos].b[26];
1888 k27 |= words_buf_r[pc_pos].b[27];
1889 k28 |= words_buf_r[pc_pos].b[28];
1890 k29 |= words_buf_r[pc_pos].b[29];
1891 k30 |= words_buf_r[pc_pos].b[30];
1892 k31 |= words_buf_r[pc_pos].b[31];
1894 // KGS!@#$% including IP
1899 u32 D03 = 0xffffffff;
1901 u32 D05 = 0xffffffff;
1902 u32 D06 = 0xffffffff;
1903 u32 D07 = 0xffffffff;
1909 u32 D13 = 0xffffffff;
1912 u32 D16 = 0xffffffff;
1913 u32 D17 = 0xffffffff;
1918 u32 D22 = 0xffffffff;
1920 u32 D24 = 0xffffffff;
1922 u32 D26 = 0xffffffff;
1924 u32 D28 = 0xffffffff;
1925 u32 D29 = 0xffffffff;
1926 u32 D30 = 0xffffffff;
1927 u32 D31 = 0xffffffff;
1936 u32 D40 = 0xffffffff;
1937 u32 D41 = 0xffffffff;
1938 u32 D42 = 0xffffffff;
1940 u32 D44 = 0xffffffff;
1951 u32 D55 = 0xffffffff;
1954 u32 D58 = 0xffffffff;
1957 u32 D61 = 0xffffffff;
1958 u32 D62 = 0xffffffff;
1959 u32 D63 = 0xffffffff;
1963 k00, k01, k02, k03, k04, k05, k06,
1964 k07, k08, k09, k10, k11, k12, k13,
1965 k14, k15, k16, k17, k18, k19, k20,
1966 k21, k22, k23, k24, k25, k26, k27,
1967 k28, k29, k30, k31, K32, K33, K34,
1968 K35, K36, K37, K38, K39, K40, K41,
1969 K42, K43, K44, K45, K46, K47, K48,
1970 K49, K50, K51, K52, K53, K54, K55,
1971 &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07,
1972 &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15,
1973 &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23,
1974 &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31,
1975 &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39,
1976 &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47,
1977 &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55,
1978 &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63
2048 if (digests_cnt < 16)
2050 for (u32 d = 0; d < digests_cnt; d++)
2052 const u32 final_hash_pos = digests_offset + d;
2054 if (hashes_shown[final_hash_pos]) continue;
2058 search[0] = digests_buf[final_hash_pos].digest_buf[DGST_R0];
2059 search[1] = digests_buf[final_hash_pos].digest_buf[DGST_R1];
2064 for (int i = 0; i < 32; i++)
2066 const u32 b0 = -((search[0] >> i) & 1);
2067 const u32 b1 = -((search[1] >> i) & 1);
2069 tmpResult |= out[ 0 + i] ^ b0;
2070 tmpResult |= out[32 + i] ^ b1;
2073 if (tmpResult == 0xffffffff) continue;
2075 const u32 slice = 31 - clz (~tmpResult);
2077 const u32 r0 = search[0];
2078 const u32 r1 = search[1];
2091 for (int i = 0; i < 32; i++)
2093 out0[i] = out[ 0 + 31 - i];
2094 out1[i] = out[32 + 31 - i];
2097 transpose32c (out0);
2098 transpose32c (out1);
2101 for (int slice = 0; slice < 32; slice++)
2103 const u32 r0 = out0[31 - slice];
2104 const u32 r1 = out1[31 - slice];
2113 static void m03000s (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
2119 const u32 gid = get_global_id (0);
2120 const u32 lid = get_local_id (0);
2126 const u32 s0 = digests_buf[digests_offset].digest_buf[0];
2127 const u32 s1 = digests_buf[digests_offset].digest_buf[1];
2129 const u32 S00 = -((s0 >> 0) & 1);
2130 const u32 S01 = -((s0 >> 1) & 1);
2131 const u32 S02 = -((s0 >> 2) & 1);
2132 const u32 S03 = -((s0 >> 3) & 1);
2133 const u32 S04 = -((s0 >> 4) & 1);
2134 const u32 S05 = -((s0 >> 5) & 1);
2135 const u32 S06 = -((s0 >> 6) & 1);
2136 const u32 S07 = -((s0 >> 7) & 1);
2137 const u32 S08 = -((s0 >> 8) & 1);
2138 const u32 S09 = -((s0 >> 9) & 1);
2139 const u32 S10 = -((s0 >> 10) & 1);
2140 const u32 S11 = -((s0 >> 11) & 1);
2141 const u32 S12 = -((s0 >> 12) & 1);
2142 const u32 S13 = -((s0 >> 13) & 1);
2143 const u32 S14 = -((s0 >> 14) & 1);
2144 const u32 S15 = -((s0 >> 15) & 1);
2145 const u32 S16 = -((s0 >> 16) & 1);
2146 const u32 S17 = -((s0 >> 17) & 1);
2147 const u32 S18 = -((s0 >> 18) & 1);
2148 const u32 S19 = -((s0 >> 19) & 1);
2149 const u32 S20 = -((s0 >> 20) & 1);
2150 const u32 S21 = -((s0 >> 21) & 1);
2151 const u32 S22 = -((s0 >> 22) & 1);
2152 const u32 S23 = -((s0 >> 23) & 1);
2153 const u32 S24 = -((s0 >> 24) & 1);
2154 const u32 S25 = -((s0 >> 25) & 1);
2155 const u32 S26 = -((s0 >> 26) & 1);
2156 const u32 S27 = -((s0 >> 27) & 1);
2157 const u32 S28 = -((s0 >> 28) & 1);
2158 const u32 S29 = -((s0 >> 29) & 1);
2159 const u32 S30 = -((s0 >> 30) & 1);
2160 const u32 S31 = -((s0 >> 31) & 1);
2161 const u32 S32 = -((s1 >> 0) & 1);
2162 const u32 S33 = -((s1 >> 1) & 1);
2163 const u32 S34 = -((s1 >> 2) & 1);
2164 const u32 S35 = -((s1 >> 3) & 1);
2165 const u32 S36 = -((s1 >> 4) & 1);
2166 const u32 S37 = -((s1 >> 5) & 1);
2167 const u32 S38 = -((s1 >> 6) & 1);
2168 const u32 S39 = -((s1 >> 7) & 1);
2169 const u32 S40 = -((s1 >> 8) & 1);
2170 const u32 S41 = -((s1 >> 9) & 1);
2171 const u32 S42 = -((s1 >> 10) & 1);
2172 const u32 S43 = -((s1 >> 11) & 1);
2173 const u32 S44 = -((s1 >> 12) & 1);
2174 const u32 S45 = -((s1 >> 13) & 1);
2175 const u32 S46 = -((s1 >> 14) & 1);
2176 const u32 S47 = -((s1 >> 15) & 1);
2177 const u32 S48 = -((s1 >> 16) & 1);
2178 const u32 S49 = -((s1 >> 17) & 1);
2179 const u32 S50 = -((s1 >> 18) & 1);
2180 const u32 S51 = -((s1 >> 19) & 1);
2181 const u32 S52 = -((s1 >> 20) & 1);
2182 const u32 S53 = -((s1 >> 21) & 1);
2183 const u32 S54 = -((s1 >> 22) & 1);
2184 const u32 S55 = -((s1 >> 23) & 1);
2185 const u32 S56 = -((s1 >> 24) & 1);
2186 const u32 S57 = -((s1 >> 25) & 1);
2187 const u32 S58 = -((s1 >> 26) & 1);
2188 const u32 S59 = -((s1 >> 27) & 1);
2189 const u32 S60 = -((s1 >> 28) & 1);
2190 const u32 S61 = -((s1 >> 29) & 1);
2191 const u32 S62 = -((s1 >> 30) & 1);
2192 const u32 S63 = -((s1 >> 31) & 1);
2198 const u32 w0s = pws[gid].i[0];
2199 const u32 w1s = pws[gid].i[1];
2201 const u32 K00 = -((w0s >> ( 0 + 7)) & 1);
2202 const u32 K01 = -((w0s >> ( 0 + 6)) & 1);
2203 const u32 K02 = -((w0s >> ( 0 + 5)) & 1);
2204 const u32 K03 = -((w0s >> ( 0 + 4)) & 1);
2205 const u32 K04 = -((w0s >> ( 0 + 3)) & 1);
2206 const u32 K05 = -((w0s >> ( 0 + 2)) & 1);
2207 const u32 K06 = -((w0s >> ( 0 + 1)) & 1);
2208 const u32 K07 = -((w0s >> ( 0 + 0)) & 1);
2209 const u32 K08 = -((w0s >> ( 8 + 7)) & 1);
2210 const u32 K09 = -((w0s >> ( 8 + 6)) & 1);
2211 const u32 K10 = -((w0s >> ( 8 + 5)) & 1);
2212 const u32 K11 = -((w0s >> ( 8 + 4)) & 1);
2213 const u32 K12 = -((w0s >> ( 8 + 3)) & 1);
2214 const u32 K13 = -((w0s >> ( 8 + 2)) & 1);
2215 const u32 K14 = -((w0s >> ( 8 + 1)) & 1);
2216 const u32 K15 = -((w0s >> ( 8 + 0)) & 1);
2217 const u32 K16 = -((w0s >> (16 + 7)) & 1);
2218 const u32 K17 = -((w0s >> (16 + 6)) & 1);
2219 const u32 K18 = -((w0s >> (16 + 5)) & 1);
2220 const u32 K19 = -((w0s >> (16 + 4)) & 1);
2221 const u32 K20 = -((w0s >> (16 + 3)) & 1);
2222 const u32 K21 = -((w0s >> (16 + 2)) & 1);
2223 const u32 K22 = -((w0s >> (16 + 1)) & 1);
2224 const u32 K23 = -((w0s >> (16 + 0)) & 1);
2225 const u32 K24 = -((w0s >> (24 + 7)) & 1);
2226 const u32 K25 = -((w0s >> (24 + 6)) & 1);
2227 const u32 K26 = -((w0s >> (24 + 5)) & 1);
2228 const u32 K27 = -((w0s >> (24 + 4)) & 1);
2229 const u32 K28 = -((w0s >> (24 + 3)) & 1);
2230 const u32 K29 = -((w0s >> (24 + 2)) & 1);
2231 const u32 K30 = -((w0s >> (24 + 1)) & 1);
2232 const u32 K31 = -((w0s >> (24 + 0)) & 1);
2233 const u32 K32 = -((w1s >> ( 0 + 7)) & 1);
2234 const u32 K33 = -((w1s >> ( 0 + 6)) & 1);
2235 const u32 K34 = -((w1s >> ( 0 + 5)) & 1);
2236 const u32 K35 = -((w1s >> ( 0 + 4)) & 1);
2237 const u32 K36 = -((w1s >> ( 0 + 3)) & 1);
2238 const u32 K37 = -((w1s >> ( 0 + 2)) & 1);
2239 const u32 K38 = -((w1s >> ( 0 + 1)) & 1);
2240 const u32 K39 = -((w1s >> ( 0 + 0)) & 1);
2241 const u32 K40 = -((w1s >> ( 8 + 7)) & 1);
2242 const u32 K41 = -((w1s >> ( 8 + 6)) & 1);
2243 const u32 K42 = -((w1s >> ( 8 + 5)) & 1);
2244 const u32 K43 = -((w1s >> ( 8 + 4)) & 1);
2245 const u32 K44 = -((w1s >> ( 8 + 3)) & 1);
2246 const u32 K45 = -((w1s >> ( 8 + 2)) & 1);
2247 const u32 K46 = -((w1s >> ( 8 + 1)) & 1);
2248 const u32 K47 = -((w1s >> ( 8 + 0)) & 1);
2249 const u32 K48 = -((w1s >> (16 + 7)) & 1);
2250 const u32 K49 = -((w1s >> (16 + 6)) & 1);
2251 const u32 K50 = -((w1s >> (16 + 5)) & 1);
2252 const u32 K51 = -((w1s >> (16 + 4)) & 1);
2253 const u32 K52 = -((w1s >> (16 + 3)) & 1);
2254 const u32 K53 = -((w1s >> (16 + 2)) & 1);
2255 const u32 K54 = -((w1s >> (16 + 1)) & 1);
2256 const u32 K55 = -((w1s >> (16 + 0)) & 1);
2262 const u32 pc_pos = get_local_id (1);
2264 const u32 il_pos = pc_pos * 32;
2299 k00 |= words_buf_r[pc_pos].b[ 0];
2300 k01 |= words_buf_r[pc_pos].b[ 1];
2301 k02 |= words_buf_r[pc_pos].b[ 2];
2302 k03 |= words_buf_r[pc_pos].b[ 3];
2303 k04 |= words_buf_r[pc_pos].b[ 4];
2304 k05 |= words_buf_r[pc_pos].b[ 5];
2305 k06 |= words_buf_r[pc_pos].b[ 6];
2306 k07 |= words_buf_r[pc_pos].b[ 7];
2307 k08 |= words_buf_r[pc_pos].b[ 8];
2308 k09 |= words_buf_r[pc_pos].b[ 9];
2309 k10 |= words_buf_r[pc_pos].b[10];
2310 k11 |= words_buf_r[pc_pos].b[11];
2311 k12 |= words_buf_r[pc_pos].b[12];
2312 k13 |= words_buf_r[pc_pos].b[13];
2313 k14 |= words_buf_r[pc_pos].b[14];
2314 k15 |= words_buf_r[pc_pos].b[15];
2315 k16 |= words_buf_r[pc_pos].b[16];
2316 k17 |= words_buf_r[pc_pos].b[17];
2317 k18 |= words_buf_r[pc_pos].b[18];
2318 k19 |= words_buf_r[pc_pos].b[19];
2319 k20 |= words_buf_r[pc_pos].b[20];
2320 k21 |= words_buf_r[pc_pos].b[21];
2321 k22 |= words_buf_r[pc_pos].b[22];
2322 k23 |= words_buf_r[pc_pos].b[23];
2323 k24 |= words_buf_r[pc_pos].b[24];
2324 k25 |= words_buf_r[pc_pos].b[25];
2325 k26 |= words_buf_r[pc_pos].b[26];
2326 k27 |= words_buf_r[pc_pos].b[27];
2327 k28 |= words_buf_r[pc_pos].b[28];
2328 k29 |= words_buf_r[pc_pos].b[29];
2329 k30 |= words_buf_r[pc_pos].b[30];
2330 k31 |= words_buf_r[pc_pos].b[31];
2332 // KGS!@#$% including IP
2337 u32 D03 = 0xffffffff;
2339 u32 D05 = 0xffffffff;
2340 u32 D06 = 0xffffffff;
2341 u32 D07 = 0xffffffff;
2347 u32 D13 = 0xffffffff;
2350 u32 D16 = 0xffffffff;
2351 u32 D17 = 0xffffffff;
2356 u32 D22 = 0xffffffff;
2358 u32 D24 = 0xffffffff;
2360 u32 D26 = 0xffffffff;
2362 u32 D28 = 0xffffffff;
2363 u32 D29 = 0xffffffff;
2364 u32 D30 = 0xffffffff;
2365 u32 D31 = 0xffffffff;
2374 u32 D40 = 0xffffffff;
2375 u32 D41 = 0xffffffff;
2376 u32 D42 = 0xffffffff;
2378 u32 D44 = 0xffffffff;
2389 u32 D55 = 0xffffffff;
2392 u32 D58 = 0xffffffff;
2395 u32 D61 = 0xffffffff;
2396 u32 D62 = 0xffffffff;
2397 u32 D63 = 0xffffffff;
2401 k00, k01, k02, k03, k04, k05, k06,
2402 k07, k08, k09, k10, k11, k12, k13,
2403 k14, k15, k16, k17, k18, k19, k20,
2404 k21, k22, k23, k24, k25, k26, k27,
2405 k28, k29, k30, k31, K32, K33, K34,
2406 K35, K36, K37, K38, K39, K40, K41,
2407 K42, K43, K44, K45, K46, K47, K48,
2408 K49, K50, K51, K52, K53, K54, K55,
2409 &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07,
2410 &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15,
2411 &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23,
2412 &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31,
2413 &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39,
2414 &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47,
2415 &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55,
2416 &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63
2421 tmpResult |= D00 ^ S00;
2422 tmpResult |= D01 ^ S01;
2423 tmpResult |= D02 ^ S02;
2424 tmpResult |= D03 ^ S03;
2425 tmpResult |= D04 ^ S04;
2426 tmpResult |= D05 ^ S05;
2427 tmpResult |= D06 ^ S06;
2428 tmpResult |= D07 ^ S07;
2429 tmpResult |= D08 ^ S08;
2430 tmpResult |= D09 ^ S09;
2431 tmpResult |= D10 ^ S10;
2432 tmpResult |= D11 ^ S11;
2433 tmpResult |= D12 ^ S12;
2434 tmpResult |= D13 ^ S13;
2435 tmpResult |= D14 ^ S14;
2436 tmpResult |= D15 ^ S15;
2438 if (tmpResult == 0xffffffff) return;
2440 tmpResult |= D16 ^ S16;
2441 tmpResult |= D17 ^ S17;
2442 tmpResult |= D18 ^ S18;
2443 tmpResult |= D19 ^ S19;
2444 tmpResult |= D20 ^ S20;
2445 tmpResult |= D21 ^ S21;
2446 tmpResult |= D22 ^ S22;
2447 tmpResult |= D23 ^ S23;
2448 tmpResult |= D24 ^ S24;
2449 tmpResult |= D25 ^ S25;
2450 tmpResult |= D26 ^ S26;
2451 tmpResult |= D27 ^ S27;
2452 tmpResult |= D28 ^ S28;
2453 tmpResult |= D29 ^ S29;
2454 tmpResult |= D30 ^ S30;
2455 tmpResult |= D31 ^ S31;
2457 if (tmpResult == 0xffffffff) return;
2459 tmpResult |= D32 ^ S32;
2460 tmpResult |= D33 ^ S33;
2461 tmpResult |= D34 ^ S34;
2462 tmpResult |= D35 ^ S35;
2463 tmpResult |= D36 ^ S36;
2464 tmpResult |= D37 ^ S37;
2465 tmpResult |= D38 ^ S38;
2466 tmpResult |= D39 ^ S39;
2467 tmpResult |= D40 ^ S40;
2468 tmpResult |= D41 ^ S41;
2469 tmpResult |= D42 ^ S42;
2470 tmpResult |= D43 ^ S43;
2471 tmpResult |= D44 ^ S44;
2472 tmpResult |= D45 ^ S45;
2473 tmpResult |= D46 ^ S46;
2474 tmpResult |= D47 ^ S47;
2476 if (tmpResult == 0xffffffff) return;
2478 tmpResult |= D48 ^ S48;
2479 tmpResult |= D49 ^ S49;
2480 tmpResult |= D50 ^ S50;
2481 tmpResult |= D51 ^ S51;
2482 tmpResult |= D52 ^ S52;
2483 tmpResult |= D53 ^ S53;
2484 tmpResult |= D54 ^ S54;
2485 tmpResult |= D55 ^ S55;
2486 tmpResult |= D56 ^ S56;
2487 tmpResult |= D57 ^ S57;
2488 tmpResult |= D58 ^ S58;
2489 tmpResult |= D59 ^ S59;
2490 tmpResult |= D60 ^ S60;
2491 tmpResult |= D61 ^ S61;
2492 tmpResult |= D62 ^ S62;
2493 tmpResult |= D63 ^ S63;
2495 if (tmpResult == 0xffffffff) return;
2497 const u32 slice = 31 - clz (~tmpResult);
2503 // transpose bitslice mod : attention race conditions, need different buffers for *in and *out
2506 __kernel void m03000_tm (__global u32 *mod, __global bs_word_t *words_buf_r)
2508 const u32 gid = get_global_id (0);
2510 const u32 block = gid / 32;
2511 const u32 slice = gid % 32;
2513 const u32 w0 = mod[gid];
2516 for (int i = 0; i < 32; i += 8)
2518 atomic_or (&words_buf_r[block].b[i + 0], (((w0 >> (i + 7)) & 1) << slice));
2519 atomic_or (&words_buf_r[block].b[i + 1], (((w0 >> (i + 6)) & 1) << slice));
2520 atomic_or (&words_buf_r[block].b[i + 2], (((w0 >> (i + 5)) & 1) << slice));
2521 atomic_or (&words_buf_r[block].b[i + 3], (((w0 >> (i + 4)) & 1) << slice));
2522 atomic_or (&words_buf_r[block].b[i + 4], (((w0 >> (i + 3)) & 1) << slice));
2523 atomic_or (&words_buf_r[block].b[i + 5], (((w0 >> (i + 2)) & 1) << slice));
2524 atomic_or (&words_buf_r[block].b[i + 6], (((w0 >> (i + 1)) & 1) << slice));
2525 atomic_or (&words_buf_r[block].b[i + 7], (((w0 >> (i + 0)) & 1) << slice));
2529 __kernel void m03000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2535 const u32 gid = get_global_id (0);
2536 const u32 lid = get_local_id (0);
2538 if (gid >= gid_max) return;
2544 m03000m (pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
2547 __kernel void m03000_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2551 __kernel void m03000_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2555 __kernel void m03000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2561 const u32 gid = get_global_id (0);
2562 const u32 lid = get_local_id (0);
2564 if (gid >= gid_max) return;
2570 m03000s (pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
2573 __kernel void m03000_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2577 __kernel void m03000_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)