2 * Author......: Jens Steube <jens.steube@gmail.com>
4 * NOTE........: sboxes were taken from JtR, license below
9 #include "include/constants.h"
10 #include "include/kernel_vendor.h"
17 #include "include/kernel_functions.c"
18 #include "OpenCL/types_ocl.c"
19 #include "OpenCL/common.c"
21 #define COMPARE_S "OpenCL/check_single_comp4_bs.c"
22 #define COMPARE_M "OpenCL/check_multi_comp4_bs.c"
41 // Bitslice DES S-boxes with LOP3.LUT instructions
42 // For NVIDIA Maxwell architecture and CUDA 7.5 RC
43 // by DeepLearningJohnDoe, version 0.1.6, 2015/07/19
45 // Gate counts: 25 24 25 18 25 24 24 23
47 // Depth: 8 7 7 6 8 10 10 8
50 // Note that same S-box function with a lower gate count isn't necessarily faster.
52 // These Boolean expressions corresponding to DES S-boxes were
53 // discovered by <deeplearningjohndoe at gmail.com>
55 // This file itself is Copyright (c) 2015 by <deeplearningjohndoe at gmail.com>
56 // Redistribution and use in source and binary forms, with or without
57 // modification, are permitted.
59 // The underlying mathematical formulas are NOT copyrighted.
62 #define LUT(a,b,c,d,e) u32 a; asm ("lop3.b32 %0, %1, %2, %3, "#e";" : "=r"(a): "r"(b), "r"(c), "r"(d));
64 static void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
66 LUT(xAA55AA5500550055, a1, a4, a6, 0xC1)
67 LUT(xA55AA55AF0F5F0F5, a3, a6, xAA55AA5500550055, 0x9E)
68 LUT(x5F5F5F5FA5A5A5A5, a1, a3, a6, 0xD6)
69 LUT(xF5A0F5A0A55AA55A, a4, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x56)
70 LUT(x947A947AD1E7D1E7, a2, xA55AA55AF0F5F0F5, xF5A0F5A0A55AA55A, 0x6C)
71 LUT(x5FFF5FFFFFFAFFFA, a6, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x7B)
72 LUT(xB96CB96C69936993, a2, xF5A0F5A0A55AA55A, x5FFF5FFFFFFAFFFA, 0xD6)
73 LUT(x3, a5, x947A947AD1E7D1E7, xB96CB96C69936993, 0x6A)
74 LUT(x55EE55EE55EE55EE, a1, a2, a4, 0x7A)
75 LUT(x084C084CB77BB77B, a2, a6, xF5A0F5A0A55AA55A, 0xC9)
76 LUT(x9C329C32E295E295, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x72)
77 LUT(xA51EA51E50E050E0, a3, a6, x55EE55EE55EE55EE, 0x29)
78 LUT(x4AD34AD3BE3CBE3C, a2, x947A947AD1E7D1E7, xA51EA51E50E050E0, 0x95)
79 LUT(x2, a5, x9C329C32E295E295, x4AD34AD3BE3CBE3C, 0xC6)
80 LUT(xD955D95595D195D1, a1, a2, x9C329C32E295E295, 0xD2)
81 LUT(x8058805811621162, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x90)
82 LUT(x7D0F7D0FC4B3C4B3, xA51EA51E50E050E0, xD955D95595D195D1, x8058805811621162, 0x76)
83 LUT(x0805080500010001, a3, xAA55AA5500550055, xD955D95595D195D1, 0x80)
84 LUT(x4A964A96962D962D, xB96CB96C69936993, x4AD34AD3BE3CBE3C, x0805080500010001, 0xA6)
85 LUT(x4, a5, x7D0F7D0FC4B3C4B3, x4A964A96962D962D, 0xA6)
86 LUT(x148014807B087B08, a1, xAA55AA5500550055, x947A947AD1E7D1E7, 0x21)
87 LUT(x94D894D86B686B68, xA55AA55AF0F5F0F5, x8058805811621162, x148014807B087B08, 0x6A)
88 LUT(x5555555540044004, a1, a6, x084C084CB77BB77B, 0x70)
89 LUT(xAFB4AFB4BF5BBF5B, x5F5F5F5FA5A5A5A5, xA51EA51E50E050E0, x5555555540044004, 0x97)
90 LUT(x1, a5, x94D894D86B686B68, xAFB4AFB4BF5BBF5B, 0x6C)
98 static void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
100 LUT(xEEEEEEEE99999999, a1, a2, a6, 0x97)
101 LUT(xFFFFEEEE66666666, a5, a6, xEEEEEEEE99999999, 0x67)
102 LUT(x5555FFFFFFFF0000, a1, a5, a6, 0x76)
103 LUT(x6666DDDD5555AAAA, a2, xFFFFEEEE66666666, x5555FFFFFFFF0000, 0x69)
104 LUT(x6969D3D35353ACAC, a3, xFFFFEEEE66666666, x6666DDDD5555AAAA, 0x6A)
105 LUT(xCFCF3030CFCF3030, a2, a3, a5, 0x65)
106 LUT(xE4E4EEEE9999F0F0, a3, xEEEEEEEE99999999, x5555FFFFFFFF0000, 0x8D)
107 LUT(xE5E5BABACDCDB0B0, a1, xCFCF3030CFCF3030, xE4E4EEEE9999F0F0, 0xCA)
108 LUT(x3, a4, x6969D3D35353ACAC, xE5E5BABACDCDB0B0, 0xC6)
109 LUT(x3333CCCC00000000, a2, a5, a6, 0x14)
110 LUT(xCCCCDDDDFFFF0F0F, a5, xE4E4EEEE9999F0F0, x3333CCCC00000000, 0xB5)
111 LUT(x00000101F0F0F0F0, a3, a6, xFFFFEEEE66666666, 0x1C)
112 LUT(x9A9A64646A6A9595, a1, xCFCF3030CFCF3030, x00000101F0F0F0F0, 0x96)
113 LUT(x2, a4, xCCCCDDDDFFFF0F0F, x9A9A64646A6A9595, 0x6A)
114 LUT(x3333BBBB3333FFFF, a1, a2, x6666DDDD5555AAAA, 0xDE)
115 LUT(x1414141441410000, a1, a3, xE4E4EEEE9999F0F0, 0x90)
116 LUT(x7F7FF3F3F5F53939, x6969D3D35353ACAC, x9A9A64646A6A9595, x3333BBBB3333FFFF, 0x79)
117 LUT(x9494E3E34B4B3939, a5, x1414141441410000, x7F7FF3F3F5F53939, 0x29)
118 LUT(x1, a4, x3333BBBB3333FFFF, x9494E3E34B4B3939, 0xA6)
119 LUT(xB1B1BBBBCCCCA5A5, a1, a1, xE4E4EEEE9999F0F0, 0x4A)
120 LUT(xFFFFECECEEEEDDDD, a2, x3333CCCC00000000, x9A9A64646A6A9595, 0xEF)
121 LUT(xB1B1A9A9DCDC8787, xE5E5BABACDCDB0B0, xB1B1BBBBCCCCA5A5, xFFFFECECEEEEDDDD, 0x8D)
122 LUT(xFFFFCCCCEEEE4444, a2, a5, xFFFFEEEE66666666, 0x2B)
123 LUT(x4, a4, xB1B1A9A9DCDC8787, xFFFFCCCCEEEE4444, 0x6C)
131 static void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
133 LUT(xA50FA50FA50FA50F, a1, a3, a4, 0xC9)
134 LUT(xF0F00F0FF0F0F0F0, a3, a5, a6, 0x4B)
135 LUT(xAF0FA0AAAF0FAF0F, a1, xA50FA50FA50FA50F, xF0F00F0FF0F0F0F0, 0x4D)
136 LUT(x5AA5A55A5AA55AA5, a1, a4, xF0F00F0FF0F0F0F0, 0x69)
137 LUT(xAA005FFFAA005FFF, a3, a5, xA50FA50FA50FA50F, 0xD6)
138 LUT(x5AA5A55A0F5AFAA5, a6, x5AA5A55A5AA55AA5, xAA005FFFAA005FFF, 0x9C)
139 LUT(x1, a2, xAF0FA0AAAF0FAF0F, x5AA5A55A0F5AFAA5, 0xA6)
140 LUT(xAA55AA5500AA00AA, a1, a4, a6, 0x49)
141 LUT(xFAFAA50FFAFAA50F, a1, a5, xA50FA50FA50FA50F, 0x9B)
142 LUT(x50AF0F5AFA50A5A5, a1, xAA55AA5500AA00AA, xFAFAA50FFAFAA50F, 0x66)
143 LUT(xAFAFAFAFFAFAFAFA, a1, a3, a6, 0x6F)
144 LUT(xAFAFFFFFFFFAFAFF, a4, x50AF0F5AFA50A5A5, xAFAFAFAFFAFAFAFA, 0xEB)
145 LUT(x4, a2, x50AF0F5AFA50A5A5, xAFAFFFFFFFFAFAFF, 0x6C)
146 LUT(x500F500F500F500F, a1, a3, a4, 0x98)
147 LUT(xF0505A0505A5050F, x5AA5A55A0F5AFAA5, xAA55AA5500AA00AA, xAFAFAFAFFAFAFAFA, 0x1D)
148 LUT(xF0505A05AA55AAFF, a6, x500F500F500F500F, xF0505A0505A5050F, 0x9A)
149 LUT(xFF005F55FF005F55, a1, a4, xAA005FFFAA005FFF, 0xB2)
150 LUT(xA55F5AF0A55F5AF0, a5, xA50FA50FA50FA50F, x5AA5A55A5AA55AA5, 0x3D)
151 LUT(x5A5F05A5A55F5AF0, a6, xFF005F55FF005F55, xA55F5AF0A55F5AF0, 0xA6)
152 LUT(x3, a2, xF0505A05AA55AAFF, x5A5F05A5A55F5AF0, 0xA6)
153 LUT(x0F0F0F0FA5A5A5A5, a1, a3, a6, 0xC6)
154 LUT(x5FFFFF5FFFA0FFA0, x5AA5A55A5AA55AA5, xAFAFAFAFFAFAFAFA, x0F0F0F0FA5A5A5A5, 0xDB)
155 LUT(xF5555AF500A05FFF, a5, xFAFAA50FFAFAA50F, xF0505A0505A5050F, 0xB9)
156 LUT(x05A5AAF55AFA55A5, xF0505A05AA55AAFF, x0F0F0F0FA5A5A5A5, xF5555AF500A05FFF, 0x9B)
157 LUT(x2, a2, x5FFFFF5FFFA0FFA0, x05A5AAF55AFA55A5, 0xA6)
165 static void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
167 LUT(x55F055F055F055F0, a1, a3, a4, 0x72)
168 LUT(xA500F5F0A500F5F0, a3, a5, x55F055F055F055F0, 0xAD)
169 LUT(xF50AF50AF50AF50A, a1, a3, a4, 0x59)
170 LUT(xF5FA0FFFF5FA0FFF, a3, a5, xF50AF50AF50AF50A, 0xE7)
171 LUT(x61C8F93C61C8F93C, a2, xA500F5F0A500F5F0, xF5FA0FFFF5FA0FFF, 0xC6)
172 LUT(x9999666699996666, a1, a2, a5, 0x69)
173 LUT(x22C022C022C022C0, a2, a4, x55F055F055F055F0, 0x18)
174 LUT(xB35C94A6B35C94A6, xF5FA0FFFF5FA0FFF, x9999666699996666, x22C022C022C022C0, 0x63)
175 LUT(x4, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x6A)
176 LUT(x4848484848484848, a1, a2, a3, 0x12)
177 LUT(x55500AAA55500AAA, a1, a5, xF5FA0FFFF5FA0FFF, 0x28)
178 LUT(x3C90B3D63C90B3D6, x61C8F93C61C8F93C, x4848484848484848, x55500AAA55500AAA, 0x1E)
179 LUT(x8484333384843333, a1, x9999666699996666, x4848484848484848, 0x14)
180 LUT(x4452F1AC4452F1AC, xF50AF50AF50AF50A, xF5FA0FFFF5FA0FFF, xB35C94A6B35C94A6, 0x78)
181 LUT(x9586CA379586CA37, x55500AAA55500AAA, x8484333384843333, x4452F1AC4452F1AC, 0xD6)
182 LUT(x2, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0x6A)
183 LUT(x1, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0xA9)
184 LUT(x3, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x56)
192 static void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
194 LUT(xA0A0A0A0FFFFFFFF, a1, a3, a6, 0xAB)
195 LUT(xFFFF00005555FFFF, a1, a5, a6, 0xB9)
196 LUT(xB3B320207777FFFF, a2, xA0A0A0A0FFFFFFFF, xFFFF00005555FFFF, 0xE8)
197 LUT(x50505A5A5A5A5050, a1, a3, xFFFF00005555FFFF, 0x34)
198 LUT(xA2A2FFFF2222FFFF, a1, a5, xB3B320207777FFFF, 0xCE)
199 LUT(x2E2E6969A4A46363, a2, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, 0x29)
200 LUT(x3, a4, xB3B320207777FFFF, x2E2E6969A4A46363, 0xA6)
201 LUT(xA5A50A0AA5A50A0A, a1, a3, a5, 0x49)
202 LUT(x969639396969C6C6, a2, a6, xA5A50A0AA5A50A0A, 0x96)
203 LUT(x1B1B1B1B1B1B1B1B, a1, a2, a3, 0xCA)
204 LUT(xBFBFBFBFF6F6F9F9, a3, xA0A0A0A0FFFFFFFF, x969639396969C6C6, 0x7E)
205 LUT(x5B5BA4A4B8B81D1D, xFFFF00005555FFFF, x1B1B1B1B1B1B1B1B, xBFBFBFBFF6F6F9F9, 0x96)
206 LUT(x2, a4, x969639396969C6C6, x5B5BA4A4B8B81D1D, 0xCA)
207 LUT(x5555BBBBFFFF5555, a1, a2, xFFFF00005555FFFF, 0xE5)
208 LUT(x6D6D9C9C95956969, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, x969639396969C6C6, 0x97)
209 LUT(x1A1A67676A6AB4B4, xA5A50A0AA5A50A0A, x5555BBBBFFFF5555, x6D6D9C9C95956969, 0x47)
210 LUT(xA0A0FFFFAAAA0000, a3, xFFFF00005555FFFF, xA5A50A0AA5A50A0A, 0x3B)
211 LUT(x36369C9CC1C1D6D6, x969639396969C6C6, x6D6D9C9C95956969, xA0A0FFFFAAAA0000, 0xD9)
212 LUT(x1, a4, x1A1A67676A6AB4B4, x36369C9CC1C1D6D6, 0xCA)
213 LUT(x5555F0F0F5F55555, a1, a3, xFFFF00005555FFFF, 0xB1)
214 LUT(x79790202DCDC0808, xA2A2FFFF2222FFFF, xA5A50A0AA5A50A0A, x969639396969C6C6, 0x47)
215 LUT(x6C6CF2F229295D5D, xBFBFBFBFF6F6F9F9, x5555F0F0F5F55555, x79790202DCDC0808, 0x6E)
216 LUT(xA3A3505010101A1A, a2, xA2A2FFFF2222FFFF, x36369C9CC1C1D6D6, 0x94)
217 LUT(x7676C7C74F4FC7C7, a1, x2E2E6969A4A46363, xA3A3505010101A1A, 0xD9)
218 LUT(x4, a4, x6C6CF2F229295D5D, x7676C7C74F4FC7C7, 0xC6)
226 static void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
228 LUT(x5050F5F55050F5F5, a1, a3, a5, 0xB2)
229 LUT(x6363C6C66363C6C6, a1, a2, x5050F5F55050F5F5, 0x66)
230 LUT(xAAAA5555AAAA5555, a1, a1, a5, 0xA9)
231 LUT(x3A3A65653A3A6565, a3, x6363C6C66363C6C6, xAAAA5555AAAA5555, 0xA9)
232 LUT(x5963A3C65963A3C6, a4, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xC6)
233 LUT(xE7E76565E7E76565, a5, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xAD)
234 LUT(x455D45DF455D45DF, a1, a4, xE7E76565E7E76565, 0xE4)
235 LUT(x4, a6, x5963A3C65963A3C6, x455D45DF455D45DF, 0x6C)
236 LUT(x1101220211012202, a2, xAAAA5555AAAA5555, x5963A3C65963A3C6, 0x20)
237 LUT(xF00F0FF0F00F0FF0, a3, a4, a5, 0x69)
238 LUT(x16E94A9716E94A97, xE7E76565E7E76565, x1101220211012202, xF00F0FF0F00F0FF0, 0x9E)
239 LUT(x2992922929929229, a1, a2, xF00F0FF0F00F0FF0, 0x49)
240 LUT(xAFAF9823AFAF9823, a5, x5050F5F55050F5F5, x2992922929929229, 0x93)
241 LUT(x3, a6, x16E94A9716E94A97, xAFAF9823AFAF9823, 0x6C)
242 LUT(x4801810248018102, a4, x5963A3C65963A3C6, x1101220211012202, 0xA4)
243 LUT(x5EE8FFFD5EE8FFFD, a5, x16E94A9716E94A97, x4801810248018102, 0x76)
244 LUT(xF0FF00FFF0FF00FF, a3, a4, a5, 0xCD)
245 LUT(x942D9A67942D9A67, x3A3A65653A3A6565, x5EE8FFFD5EE8FFFD, xF0FF00FFF0FF00FF, 0x86)
246 LUT(x1, a6, x5EE8FFFD5EE8FFFD, x942D9A67942D9A67, 0xA6)
247 LUT(x6A40D4ED6F4DD4EE, a2, x4, xAFAF9823AFAF9823, 0x2D)
248 LUT(x6CA89C7869A49C79, x1101220211012202, x16E94A9716E94A97, x6A40D4ED6F4DD4EE, 0x26)
249 LUT(xD6DE73F9D6DE73F9, a3, x6363C6C66363C6C6, x455D45DF455D45DF, 0x6B)
250 LUT(x925E63E1965A63E1, x3A3A65653A3A6565, x6CA89C7869A49C79, xD6DE73F9D6DE73F9, 0xA2)
251 LUT(x2, a6, x6CA89C7869A49C79, x925E63E1965A63E1, 0xCA)
259 static void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
261 LUT(x88AA88AA88AA88AA, a1, a2, a4, 0x0B)
262 LUT(xAAAAFF00AAAAFF00, a1, a4, a5, 0x27)
263 LUT(xADAFF8A5ADAFF8A5, a3, x88AA88AA88AA88AA, xAAAAFF00AAAAFF00, 0x9E)
264 LUT(x0A0AF5F50A0AF5F5, a1, a3, a5, 0xA6)
265 LUT(x6B69C5DC6B69C5DC, a2, xADAFF8A5ADAFF8A5, x0A0AF5F50A0AF5F5, 0x6B)
266 LUT(x1C69B2DC1C69B2DC, a4, x88AA88AA88AA88AA, x6B69C5DC6B69C5DC, 0xA9)
267 LUT(x1, a6, xADAFF8A5ADAFF8A5, x1C69B2DC1C69B2DC, 0x6A)
268 LUT(x9C9C9C9C9C9C9C9C, a1, a2, a3, 0x63)
269 LUT(xE6E63BFDE6E63BFD, a2, xAAAAFF00AAAAFF00, x0A0AF5F50A0AF5F5, 0xE7)
270 LUT(x6385639E6385639E, a4, x9C9C9C9C9C9C9C9C, xE6E63BFDE6E63BFD, 0x93)
271 LUT(x5959C4CE5959C4CE, a2, x6B69C5DC6B69C5DC, xE6E63BFDE6E63BFD, 0x5D)
272 LUT(x5B53F53B5B53F53B, a4, x0A0AF5F50A0AF5F5, x5959C4CE5959C4CE, 0x6E)
273 LUT(x3, a6, x6385639E6385639E, x5B53F53B5B53F53B, 0xC6)
274 LUT(xFAF505FAFAF505FA, a3, a4, x0A0AF5F50A0AF5F5, 0x6D)
275 LUT(x6A65956A6A65956A, a3, x9C9C9C9C9C9C9C9C, xFAF505FAFAF505FA, 0xA6)
276 LUT(x8888CCCC8888CCCC, a1, a2, a5, 0x23)
277 LUT(x94E97A9494E97A94, x1C69B2DC1C69B2DC, x6A65956A6A65956A, x8888CCCC8888CCCC, 0x72)
278 LUT(x4, a6, x6A65956A6A65956A, x94E97A9494E97A94, 0xAC)
279 LUT(xA050A050A050A050, a1, a3, a4, 0x21)
280 LUT(xC1B87A2BC1B87A2B, xAAAAFF00AAAAFF00, x5B53F53B5B53F53B, x94E97A9494E97A94, 0xA4)
281 LUT(xE96016B7E96016B7, x8888CCCC8888CCCC, xA050A050A050A050, xC1B87A2BC1B87A2B, 0x96)
282 LUT(xE3CF1FD5E3CF1FD5, x88AA88AA88AA88AA, x6A65956A6A65956A, xE96016B7E96016B7, 0x3E)
283 LUT(x6776675B6776675B, xADAFF8A5ADAFF8A5, x94E97A9494E97A94, xE3CF1FD5E3CF1FD5, 0x6B)
284 LUT(x2, a6, xE96016B7E96016B7, x6776675B6776675B, 0xC6)
292 static void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
294 LUT(xEEEE3333EEEE3333, a1, a2, a5, 0x9D)
295 LUT(xBBBBBBBBBBBBBBBB, a1, a1, a2, 0x83)
296 LUT(xDDDDAAAADDDDAAAA, a1, a2, a5, 0x5B)
297 LUT(x29295A5A29295A5A, a3, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0x85)
298 LUT(xC729695AC729695A, a4, xEEEE3333EEEE3333, x29295A5A29295A5A, 0xA6)
299 LUT(x3BF77B7B3BF77B7B, a2, a5, xC729695AC729695A, 0xF9)
300 LUT(x2900FF002900FF00, a4, a5, x29295A5A29295A5A, 0x0E)
301 LUT(x56B3803F56B3803F, xBBBBBBBBBBBBBBBB, x3BF77B7B3BF77B7B, x2900FF002900FF00, 0x61)
302 LUT(x4, a6, xC729695AC729695A, x56B3803F56B3803F, 0x6C)
303 LUT(xFBFBFBFBFBFBFBFB, a1, a2, a3, 0xDF)
304 LUT(x3012B7B73012B7B7, a2, a5, xC729695AC729695A, 0xD4)
305 LUT(x34E9B34C34E9B34C, a4, xFBFBFBFBFBFBFBFB, x3012B7B73012B7B7, 0x69)
306 LUT(xBFEAEBBEBFEAEBBE, a1, x29295A5A29295A5A, x34E9B34C34E9B34C, 0x6F)
307 LUT(xFFAEAFFEFFAEAFFE, a3, xBBBBBBBBBBBBBBBB, xBFEAEBBEBFEAEBBE, 0xB9)
308 LUT(x2, a6, x34E9B34C34E9B34C, xFFAEAFFEFFAEAFFE, 0xC6)
309 LUT(xCFDE88BBCFDE88BB, a2, xDDDDAAAADDDDAAAA, x34E9B34C34E9B34C, 0x5C)
310 LUT(x3055574530555745, a1, xC729695AC729695A, xCFDE88BBCFDE88BB, 0x71)
311 LUT(x99DDEEEE99DDEEEE, a4, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0xB9)
312 LUT(x693CD926693CD926, x3BF77B7B3BF77B7B, x34E9B34C34E9B34C, x99DDEEEE99DDEEEE, 0x69)
313 LUT(x3, a6, x3055574530555745, x693CD926693CD926, 0x6A)
314 LUT(x9955EE559955EE55, a1, a4, x99DDEEEE99DDEEEE, 0xE2)
315 LUT(x9D48FA949D48FA94, x3BF77B7B3BF77B7B, xBFEAEBBEBFEAEBBE, x9955EE559955EE55, 0x9C)
316 LUT(x1, a6, xC729695AC729695A, x9D48FA949D48FA94, 0x39)
327 * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC
328 * architectures. These use AND, OR, XOR, NOT, and AND-NOT gates.
330 * Gate counts: 49 44 46 33 48 46 46 41
333 * Several same-gate-count expressions for each S-box are included (for use on
334 * different CPUs/GPUs).
336 * These Boolean expressions corresponding to DES S-boxes have been generated
337 * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's
338 * John the Ripper password cracker: http://www.openwall.com/john/
339 * Being mathematical formulas, they are not copyrighted and are free for reuse
342 * This file (a specific representation of the S-box expressions, surrounding
343 * logic) is Copyright (c) 2011 by Solar Designer <solar at openwall.com>.
344 * Redistribution and use in source and binary forms, with or without
345 * modification, are permitted. (This is a heavily cut-down "BSD license".)
347 * The effort has been sponsored by Rapid7: http://www.rapid7.com
350 static void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
352 u32 x55005500, x5A0F5A0F, x3333FFFF, x66666666, x22226666, x2D2D6969,
354 u32 x00FFFF00, x33CCCC33, x4803120C, x2222FFFF, x6A21EDF3, x4A01CC93;
355 u32 x5555FFFF, x7F75FFFF, x00D20096, x7FA7FF69;
356 u32 x0A0A0000, x0AD80096, x00999900, x0AD99996;
357 u32 x22332233, x257AA5F0, x054885C0, xFAB77A3F, x2221EDF3, xD89697CC;
358 u32 x05B77AC0, x05F77AD6, x36C48529, x6391D07C, xBB0747B0;
359 u32 x4C460000, x4EDF9996, x2D4E49EA, xBBFFFFB0, x96B1B65A;
360 u32 x5AFF5AFF, x52B11215, x4201C010, x10B0D205;
361 u32 x00, x01, x10, x11, x20, x21, x30, x31;
363 x55005500 = a1 & ~a5;
364 x5A0F5A0F = a4 ^ x55005500;
367 x22226666 = x3333FFFF & x66666666;
368 x2D2D6969 = a4 ^ x22226666;
369 x25202160 = x2D2D6969 & ~x5A0F5A0F;
372 x33CCCC33 = a3 ^ x00FFFF00;
373 x4803120C = x5A0F5A0F & ~x33CCCC33;
374 x2222FFFF = a6 | x22226666;
375 x6A21EDF3 = x4803120C ^ x2222FFFF;
376 x4A01CC93 = x6A21EDF3 & ~x25202160;
379 x7F75FFFF = x6A21EDF3 | x5555FFFF;
380 x00D20096 = a5 & ~x2D2D6969;
381 x7FA7FF69 = x7F75FFFF ^ x00D20096;
383 x0A0A0000 = a4 & ~x5555FFFF;
384 x0AD80096 = x00D20096 ^ x0A0A0000;
385 x00999900 = x00FFFF00 & ~x66666666;
386 x0AD99996 = x0AD80096 | x00999900;
388 x22332233 = a3 & ~x55005500;
389 x257AA5F0 = x5A0F5A0F ^ x7F75FFFF;
390 x054885C0 = x257AA5F0 & ~x22332233;
391 xFAB77A3F = ~x054885C0;
392 x2221EDF3 = x3333FFFF & x6A21EDF3;
393 xD89697CC = xFAB77A3F ^ x2221EDF3;
394 x20 = x7FA7FF69 & ~a2;
395 x21 = x20 ^ xD89697CC;
398 x05B77AC0 = x00FFFF00 ^ x054885C0;
399 x05F77AD6 = x00D20096 | x05B77AC0;
400 x36C48529 = x3333FFFF ^ x05F77AD6;
401 x6391D07C = a1 ^ x36C48529;
402 xBB0747B0 = xD89697CC ^ x6391D07C;
403 x00 = x25202160 | a2;
404 x01 = x00 ^ xBB0747B0;
407 x4C460000 = x3333FFFF ^ x7F75FFFF;
408 x4EDF9996 = x0AD99996 | x4C460000;
409 x2D4E49EA = x6391D07C ^ x4EDF9996;
410 xBBFFFFB0 = x00FFFF00 | xBB0747B0;
411 x96B1B65A = x2D4E49EA ^ xBBFFFFB0;
412 x10 = x4A01CC93 | a2;
413 x11 = x10 ^ x96B1B65A;
416 x5AFF5AFF = a5 | x5A0F5A0F;
417 x52B11215 = x5AFF5AFF & ~x2D4E49EA;
418 x4201C010 = x4A01CC93 & x6391D07C;
419 x10B0D205 = x52B11215 ^ x4201C010;
420 x30 = x10B0D205 | a2;
421 x31 = x30 ^ x0AD99996;
425 static void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
428 u32 x55550000, x00AA00FF, x33BB33FF;
429 u32 x33CC0000, x11441144, x11BB11BB, x003311BB;
430 u32 x00000F0F, x336600FF, x332200FF, x332200F0;
431 u32 x0302000F, xAAAAAAAA, xA9A8AAA5, x33CCCC33, x33CCC030, x9A646A95;
432 u32 x00333303, x118822B8, xA8208805, x3CC3C33C, x94E34B39;
433 u32 x0331330C, x3FF3F33C, xA9DF596A, xA9DF5F6F, x962CAC53;
434 u32 xA9466A6A, x3DA52153, x29850143, x33C0330C, x1A45324F;
435 u32 x0A451047, xBBDFDD7B, xB19ACD3C;
436 u32 x00, x01, x10, x11, x20, x21, x30, x31;
440 x55550000 = a1 & ~a6;
441 x00AA00FF = a5 & ~x55550000;
442 x33BB33FF = a2 | x00AA00FF;
444 x33CC0000 = x33CC33CC & ~a6;
445 x11441144 = a1 & x33CC33CC;
446 x11BB11BB = a5 ^ x11441144;
447 x003311BB = x11BB11BB & ~x33CC0000;
450 x336600FF = x00AA00FF ^ x33CC0000;
451 x332200FF = x33BB33FF & x336600FF;
452 x332200F0 = x332200FF & ~x00000F0F;
454 x0302000F = a3 & x332200FF;
456 xA9A8AAA5 = x0302000F ^ xAAAAAAAA;
457 x33CCCC33 = a6 ^ x33CC33CC;
458 x33CCC030 = x33CCCC33 & ~x00000F0F;
459 x9A646A95 = xA9A8AAA5 ^ x33CCC030;
460 x10 = a4 & ~x332200F0;
461 x11 = x10 ^ x9A646A95;
464 x00333303 = a2 & ~x33CCC030;
465 x118822B8 = x11BB11BB ^ x00333303;
466 xA8208805 = xA9A8AAA5 & ~x118822B8;
467 x3CC3C33C = a3 ^ x33CCCC33;
468 x94E34B39 = xA8208805 ^ x3CC3C33C;
469 x00 = x33BB33FF & ~a4;
470 x01 = x00 ^ x94E34B39;
473 x0331330C = x0302000F ^ x00333303;
474 x3FF3F33C = x3CC3C33C | x0331330C;
475 xA9DF596A = x33BB33FF ^ x9A646A95;
476 xA9DF5F6F = x00000F0F | xA9DF596A;
477 x962CAC53 = x3FF3F33C ^ xA9DF5F6F;
479 xA9466A6A = x332200FF ^ x9A646A95;
480 x3DA52153 = x94E34B39 ^ xA9466A6A;
481 x29850143 = xA9DF5F6F & x3DA52153;
482 x33C0330C = x33CC33CC & x3FF3F33C;
483 x1A45324F = x29850143 ^ x33C0330C;
484 x20 = x1A45324F | a4;
485 x21 = x20 ^ x962CAC53;
488 x0A451047 = x1A45324F & ~x118822B8;
489 xBBDFDD7B = x33CCCC33 | xA9DF596A;
490 xB19ACD3C = x0A451047 ^ xBBDFDD7B;
491 x30 = x003311BB | a4;
492 x31 = x30 ^ xB19ACD3C;
496 static void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
498 u32 x44444444, x0F0FF0F0, x4F4FF4F4, x00FFFF00, x00AAAA00, x4FE55EF4;
499 u32 x3C3CC3C3, x3C3C0000, x7373F4F4, x0C840A00;
500 u32 x00005EF4, x00FF5EFF, x00555455, x3C699796;
501 u32 x000FF000, x55AA55AA, x26D9A15E, x2FDFAF5F, x2FD00F5F;
502 u32 x55AAFFAA, x28410014, x000000FF, x000000CC, x284100D8;
503 u32 x204100D0, x3C3CC3FF, x1C3CC32F, x4969967A;
504 u32 x4CC44CC4, x40C040C0, xC3C33C3C, x9669C396, xD6A98356;
505 u32 xD6E9C3D6, x4CEEEEC4, x9A072D12, x001A000B, x9A1F2D1B;
506 u32 x00, x01, x10, x11, x20, x21, x30, x31;
508 x44444444 = a1 & ~a2;
510 x4F4FF4F4 = x44444444 | x0F0FF0F0;
512 x00AAAA00 = x00FFFF00 & ~a1;
513 x4FE55EF4 = x4F4FF4F4 ^ x00AAAA00;
515 x3C3CC3C3 = a2 ^ x0F0FF0F0;
516 x3C3C0000 = x3C3CC3C3 & ~a6;
517 x7373F4F4 = x4F4FF4F4 ^ x3C3C0000;
518 x0C840A00 = x4FE55EF4 & ~x7373F4F4;
520 x00005EF4 = a6 & x4FE55EF4;
521 x00FF5EFF = a4 | x00005EF4;
522 x00555455 = a1 & x00FF5EFF;
523 x3C699796 = x3C3CC3C3 ^ x00555455;
524 x30 = x4FE55EF4 & ~a5;
525 x31 = x30 ^ x3C699796;
528 x000FF000 = x0F0FF0F0 & x00FFFF00;
530 x26D9A15E = x7373F4F4 ^ x55AA55AA;
531 x2FDFAF5F = a3 | x26D9A15E;
532 x2FD00F5F = x2FDFAF5F & ~x000FF000;
534 x55AAFFAA = x00AAAA00 | x55AA55AA;
535 x28410014 = x3C699796 & ~x55AAFFAA;
537 x000000CC = x000000FF & ~a2;
538 x284100D8 = x28410014 ^ x000000CC;
540 x204100D0 = x7373F4F4 & x284100D8;
541 x3C3CC3FF = x3C3CC3C3 | x000000FF;
542 x1C3CC32F = x3C3CC3FF & ~x204100D0;
543 x4969967A = a1 ^ x1C3CC32F;
544 x10 = x2FD00F5F & a5;
545 x11 = x10 ^ x4969967A;
548 x4CC44CC4 = x4FE55EF4 & ~a2;
549 x40C040C0 = x4CC44CC4 & ~a3;
550 xC3C33C3C = ~x3C3CC3C3;
551 x9669C396 = x55AAFFAA ^ xC3C33C3C;
552 xD6A98356 = x40C040C0 ^ x9669C396;
553 x00 = a5 & ~x0C840A00;
554 x01 = x00 ^ xD6A98356;
557 xD6E9C3D6 = x40C040C0 | x9669C396;
558 x4CEEEEC4 = x00AAAA00 | x4CC44CC4;
559 x9A072D12 = xD6E9C3D6 ^ x4CEEEEC4;
560 x001A000B = a4 & ~x4FE55EF4;
561 x9A1F2D1B = x9A072D12 | x001A000B;
562 x20 = a5 & ~x284100D8;
563 x21 = x20 ^ x9A1F2D1B;
567 static void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
569 u32 x5A5A5A5A, x0F0FF0F0;
570 u32 x33FF33FF, x33FFCC00, x0C0030F0, x0C0CC0C0, x0CF3C03F, x5EFBDA7F,
571 x52FBCA0F, x61C8F93C;
572 u32 x00C0C03C, x0F0F30C0, x3B92A366, x30908326, x3C90B3D6;
573 u32 x33CC33CC, x0C0CFFFF, x379E5C99, x04124C11, x56E9861E, xA91679E1;
574 u32 x9586CA37, x8402C833, x84C2C83F, xB35C94A6;
575 u32 x00, x01, x10, x11, x20, x21, x30, x31;
580 x33FFCC00 = a5 ^ x33FF33FF;
581 x0C0030F0 = x0F0FF0F0 & ~x33FFCC00;
582 x0C0CC0C0 = x0F0FF0F0 & ~a2;
583 x0CF3C03F = a4 ^ x0C0CC0C0;
584 x5EFBDA7F = x5A5A5A5A | x0CF3C03F;
585 x52FBCA0F = x5EFBDA7F & ~x0C0030F0;
586 x61C8F93C = a2 ^ x52FBCA0F;
588 x00C0C03C = x0CF3C03F & x61C8F93C;
589 x0F0F30C0 = x0F0FF0F0 & ~x00C0C03C;
590 x3B92A366 = x5A5A5A5A ^ x61C8F93C;
591 x30908326 = x3B92A366 & ~x0F0F30C0;
592 x3C90B3D6 = x0C0030F0 ^ x30908326;
595 x0C0CFFFF = a5 | x0C0CC0C0;
596 x379E5C99 = x3B92A366 ^ x0C0CFFFF;
597 x04124C11 = x379E5C99 & ~x33CC33CC;
598 x56E9861E = x52FBCA0F ^ x04124C11;
599 x00 = a6 & ~x3C90B3D6;
600 x01 = x00 ^ x56E9861E;
603 xA91679E1 = ~x56E9861E;
604 x10 = x3C90B3D6 & ~a6;
605 x11 = x10 ^ xA91679E1;
608 x9586CA37 = x3C90B3D6 ^ xA91679E1;
609 x8402C833 = x9586CA37 & ~x33CC33CC;
610 x84C2C83F = x00C0C03C | x8402C833;
611 xB35C94A6 = x379E5C99 ^ x84C2C83F;
612 x20 = x61C8F93C | a6;
613 x21 = x20 ^ xB35C94A6;
616 x30 = a6 & x61C8F93C;
617 x31 = x30 ^ xB35C94A6;
621 static void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
623 u32 x77777777, x77770000, x22225555, x11116666, x1F1F6F6F;
624 u32 x70700000, x43433333, x00430033, x55557777, x55167744, x5A19784B;
625 u32 x5A1987B4, x7A3BD7F5, x003B00F5, x221955A0, x05050707, x271C52A7;
626 u32 x2A2A82A0, x6969B193, x1FE06F90, x16804E00, xE97FB1FF;
627 u32 x43403302, x35CAED30, x37DEFFB7, x349ECCB5, x0B01234A;
628 u32 x101884B4, x0FF8EB24, x41413333, x4FF9FB37, x4FC2FBC2;
629 u32 x22222222, x16BCEE97, x0F080B04, x19B4E593;
630 u32 x5C5C5C5C, x4448184C, x2DDABE71, x6992A63D;
631 u32 x00, x01, x10, x11, x20, x21, x30, x31;
634 x77770000 = x77777777 & ~a6;
635 x22225555 = a1 ^ x77770000;
636 x11116666 = a3 ^ x22225555;
637 x1F1F6F6F = a4 | x11116666;
639 x70700000 = x77770000 & ~a4;
640 x43433333 = a3 ^ x70700000;
641 x00430033 = a5 & x43433333;
642 x55557777 = a1 | x11116666;
643 x55167744 = x00430033 ^ x55557777;
644 x5A19784B = a4 ^ x55167744;
646 x5A1987B4 = a6 ^ x5A19784B;
647 x7A3BD7F5 = x22225555 | x5A1987B4;
648 x003B00F5 = a5 & x7A3BD7F5;
649 x221955A0 = x22225555 ^ x003B00F5;
650 x05050707 = a4 & x55557777;
651 x271C52A7 = x221955A0 ^ x05050707;
653 x2A2A82A0 = x7A3BD7F5 & ~a1;
654 x6969B193 = x43433333 ^ x2A2A82A0;
655 x1FE06F90 = a5 ^ x1F1F6F6F;
656 x16804E00 = x1FE06F90 & ~x6969B193;
657 xE97FB1FF = ~x16804E00;
658 x20 = xE97FB1FF & ~a2;
659 x21 = x20 ^ x5A19784B;
662 x43403302 = x43433333 & ~x003B00F5;
663 x35CAED30 = x2A2A82A0 ^ x1FE06F90;
664 x37DEFFB7 = x271C52A7 | x35CAED30;
665 x349ECCB5 = x37DEFFB7 & ~x43403302;
666 x0B01234A = x1F1F6F6F & ~x349ECCB5;
668 x101884B4 = x5A1987B4 & x349ECCB5;
669 x0FF8EB24 = x1FE06F90 ^ x101884B4;
670 x41413333 = x43433333 & x55557777;
671 x4FF9FB37 = x0FF8EB24 | x41413333;
672 x4FC2FBC2 = x003B00F5 ^ x4FF9FB37;
673 x30 = x4FC2FBC2 & a2;
674 x31 = x30 ^ x271C52A7;
677 x22222222 = a1 ^ x77777777;
678 x16BCEE97 = x349ECCB5 ^ x22222222;
679 x0F080B04 = a4 & x0FF8EB24;
680 x19B4E593 = x16BCEE97 ^ x0F080B04;
681 x00 = x0B01234A | a2;
682 x01 = x00 ^ x19B4E593;
685 x5C5C5C5C = x1F1F6F6F ^ x43433333;
686 x4448184C = x5C5C5C5C & ~x19B4E593;
687 x2DDABE71 = x22225555 ^ x0FF8EB24;
688 x6992A63D = x4448184C ^ x2DDABE71;
689 x10 = x1F1F6F6F & a2;
690 x11 = x10 ^ x6992A63D;
694 static void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
697 u32 x3333FFFF, x11115555, x22DD6699, x22DD9966, x00220099;
698 u32 x00551144, x33662277, x5A5A5A5A, x7B7E7A7F, x59A31CE6;
699 u32 x09030C06, x09030000, x336622FF, x3A6522FF;
700 u32 x484D494C, x0000B6B3, x0F0FB9BC, x00FC00F9, x0FFFB9FD;
701 u32 x5DF75DF7, x116600F7, x1E69B94B, x1668B94B;
702 u32 x7B7B7B7B, x411E5984, x1FFFFDFD, x5EE1A479;
703 u32 x3CB4DFD2, x004B002D, xB7B2B6B3, xCCC9CDC8, xCC82CDE5;
704 u32 x0055EEBB, x5A5AECE9, x0050ECA9, xC5CAC1CE, xC59A2D67;
705 u32 x00, x01, x10, x11, x20, x21, x30, x31;
710 x11115555 = a1 & x3333FFFF;
711 x22DD6699 = x33CC33CC ^ x11115555;
712 x22DD9966 = a6 ^ x22DD6699;
713 x00220099 = a5 & ~x22DD9966;
715 x00551144 = a1 & x22DD9966;
716 x33662277 = a2 ^ x00551144;
718 x7B7E7A7F = x33662277 | x5A5A5A5A;
719 x59A31CE6 = x22DD6699 ^ x7B7E7A7F;
721 x09030C06 = a3 & x59A31CE6;
722 x09030000 = x09030C06 & ~a6;
723 x336622FF = x00220099 | x33662277;
724 x3A6522FF = x09030000 ^ x336622FF;
725 x30 = x3A6522FF & a4;
726 x31 = x30 ^ x59A31CE6;
729 x484D494C = a2 ^ x7B7E7A7F;
730 x0000B6B3 = a6 & ~x484D494C;
731 x0F0FB9BC = a3 ^ x0000B6B3;
732 x00FC00F9 = a5 & ~x09030C06;
733 x0FFFB9FD = x0F0FB9BC | x00FC00F9;
735 x5DF75DF7 = a1 | x59A31CE6;
736 x116600F7 = x336622FF & x5DF75DF7;
737 x1E69B94B = x0F0FB9BC ^ x116600F7;
738 x1668B94B = x1E69B94B & ~x09030000;
739 x20 = x00220099 | a4;
740 x21 = x20 ^ x1668B94B;
743 x7B7B7B7B = a2 | x5A5A5A5A;
744 x411E5984 = x3A6522FF ^ x7B7B7B7B;
745 x1FFFFDFD = x11115555 | x0FFFB9FD;
746 x5EE1A479 = x411E5984 ^ x1FFFFDFD;
748 x3CB4DFD2 = x22DD6699 ^ x1E69B94B;
749 x004B002D = a5 & ~x3CB4DFD2;
750 xB7B2B6B3 = ~x484D494C;
751 xCCC9CDC8 = x7B7B7B7B ^ xB7B2B6B3;
752 xCC82CDE5 = x004B002D ^ xCCC9CDC8;
753 x10 = xCC82CDE5 & ~a4;
754 x11 = x10 ^ x5EE1A479;
757 x0055EEBB = a6 ^ x00551144;
758 x5A5AECE9 = a1 ^ x0F0FB9BC;
759 x0050ECA9 = x0055EEBB & x5A5AECE9;
760 xC5CAC1CE = x09030C06 ^ xCCC9CDC8;
761 xC59A2D67 = x0050ECA9 ^ xC5CAC1CE;
762 x00 = x0FFFB9FD & ~a4;
763 x01 = x00 ^ xC59A2D67;
767 static void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
769 u32 x0FF00FF0, x3CC33CC3, x00003CC3, x0F000F00, x5A555A55, x00001841;
770 u32 x00000F00, x33333C33, x7B777E77, x0FF0F00F, x74878E78;
771 u32 x003C003C, x5A7D5A7D, x333300F0, x694E5A8D;
772 u32 x0FF0CCCC, x000F0303, x5A505854, x33CC000F, x699C585B;
773 u32 x7F878F78, x21101013, x7F979F7B, x30030CC0, x4F9493BB;
774 u32 x6F9CDBFB, x0000DBFB, x00005151, x26DAC936, x26DA9867;
775 u32 x27DA9877, x27DA438C, x2625C9C9, x27FFCBCD;
776 u32 x27FF1036, x27FF103E, xB06B6C44, x97947C7A;
777 u32 x00, x01, x10, x11, x20, x21, x30, x31;
780 x3CC33CC3 = a3 ^ x0FF00FF0;
781 x00003CC3 = a6 & x3CC33CC3;
782 x0F000F00 = a4 & x0FF00FF0;
783 x5A555A55 = a2 ^ x0F000F00;
784 x00001841 = x00003CC3 & x5A555A55;
786 x00000F00 = a6 & x0F000F00;
787 x33333C33 = a3 ^ x00000F00;
788 x7B777E77 = x5A555A55 | x33333C33;
789 x0FF0F00F = a6 ^ x0FF00FF0;
790 x74878E78 = x7B777E77 ^ x0FF0F00F;
791 x30 = a1 & ~x00001841;
792 x31 = x30 ^ x74878E78;
795 x003C003C = a5 & ~x3CC33CC3;
796 x5A7D5A7D = x5A555A55 | x003C003C;
797 x333300F0 = x00003CC3 ^ x33333C33;
798 x694E5A8D = x5A7D5A7D ^ x333300F0;
800 x0FF0CCCC = x00003CC3 ^ x0FF0F00F;
801 x000F0303 = a4 & ~x0FF0CCCC;
802 x5A505854 = x5A555A55 & ~x000F0303;
803 x33CC000F = a5 ^ x333300F0;
804 x699C585B = x5A505854 ^ x33CC000F;
806 x7F878F78 = x0F000F00 | x74878E78;
807 x21101013 = a3 & x699C585B;
808 x7F979F7B = x7F878F78 | x21101013;
809 x30030CC0 = x3CC33CC3 & ~x0FF0F00F;
810 x4F9493BB = x7F979F7B ^ x30030CC0;
811 x00 = x4F9493BB & ~a1;
812 x01 = x00 ^ x694E5A8D;
815 x6F9CDBFB = x699C585B | x4F9493BB;
816 x0000DBFB = a6 & x6F9CDBFB;
817 x00005151 = a2 & x0000DBFB;
818 x26DAC936 = x694E5A8D ^ x4F9493BB;
819 x26DA9867 = x00005151 ^ x26DAC936;
821 x27DA9877 = x21101013 | x26DA9867;
822 x27DA438C = x0000DBFB ^ x27DA9877;
823 x2625C9C9 = a5 ^ x26DAC936;
824 x27FFCBCD = x27DA438C | x2625C9C9;
825 x20 = x27FFCBCD & a1;
826 x21 = x20 ^ x699C585B;
829 x27FF1036 = x0000DBFB ^ x27FFCBCD;
830 x27FF103E = x003C003C | x27FF1036;
831 xB06B6C44 = ~x4F9493BB;
832 x97947C7A = x27FF103E ^ xB06B6C44;
833 x10 = x97947C7A & ~a1;
834 x11 = x10 ^ x26DA9867;
838 static void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
840 u32 x0C0C0C0C, x0000F0F0, x00FFF00F, x00555005, x00515001;
841 u32 x33000330, x77555775, x30303030, x3030CFCF, x30104745, x30555745;
842 u32 xFF000FF0, xCF1048B5, x080A080A, xC71A40BF, xCB164CB3;
843 u32 x9E4319E6, x000019E6, xF429738C, xF4296A6A, xC729695A;
844 u32 xC47C3D2F, xF77F3F3F, x9E43E619, x693CD926;
845 u32 xF719A695, xF4FF73FF, x03E6D56A, x56B3803F;
846 u32 xF700A600, x61008000, x03B7856B, x62B7056B;
847 u32 x00, x01, x10, x11, x20, x21, x30, x31;
849 x0C0C0C0C = a3 & ~a2;
850 x0000F0F0 = a5 & ~a3;
851 x00FFF00F = a4 ^ x0000F0F0;
852 x00555005 = a1 & x00FFF00F;
853 x00515001 = x00555005 & ~x0C0C0C0C;
855 x33000330 = a2 & ~x00FFF00F;
856 x77555775 = a1 | x33000330;
857 x30303030 = a2 & ~a3;
858 x3030CFCF = a5 ^ x30303030;
859 x30104745 = x77555775 & x3030CFCF;
860 x30555745 = x00555005 | x30104745;
862 xFF000FF0 = ~x00FFF00F;
863 xCF1048B5 = x30104745 ^ xFF000FF0;
864 x080A080A = a3 & ~x77555775;
865 xC71A40BF = xCF1048B5 ^ x080A080A;
866 xCB164CB3 = x0C0C0C0C ^ xC71A40BF;
867 x10 = x00515001 | a6;
868 x11 = x10 ^ xCB164CB3;
871 x9E4319E6 = a1 ^ xCB164CB3;
872 x000019E6 = a5 & x9E4319E6;
873 xF429738C = a2 ^ xC71A40BF;
874 xF4296A6A = x000019E6 ^ xF429738C;
875 xC729695A = x33000330 ^ xF4296A6A;
877 xC47C3D2F = x30555745 ^ xF4296A6A;
878 xF77F3F3F = a2 | xC47C3D2F;
879 x9E43E619 = a5 ^ x9E4319E6;
880 x693CD926 = xF77F3F3F ^ x9E43E619;
881 x20 = x30555745 & a6;
882 x21 = x20 ^ x693CD926;
885 xF719A695 = x3030CFCF ^ xC729695A;
886 xF4FF73FF = a4 | xF429738C;
887 x03E6D56A = xF719A695 ^ xF4FF73FF;
888 x56B3803F = a1 ^ x03E6D56A;
889 x30 = x56B3803F & a6;
890 x31 = x30 ^ xC729695A;
893 xF700A600 = xF719A695 & ~a4;
894 x61008000 = x693CD926 & xF700A600;
895 x03B7856B = x00515001 ^ x03E6D56A;
896 x62B7056B = x61008000 ^ x03B7856B;
897 x00 = x62B7056B | a6;
898 x01 = x00 ^ xC729695A;
905 #if defined IS_AMD || defined IS_GENERIC
908 * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC
909 * architectures. These use AND, OR, XOR, NOT, and AND-NOT gates.
911 * Gate counts: 49 44 46 33 48 46 46 41
914 * Several same-gate-count expressions for each S-box are included (for use on
915 * different CPUs/GPUs).
917 * These Boolean expressions corresponding to DES S-boxes have been generated
918 * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's
919 * John the Ripper password cracker: http://www.openwall.com/john/
920 * Being mathematical formulas, they are not copyrighted and are free for reuse
923 * This file (a specific representation of the S-box expressions, surrounding
924 * logic) is Copyright (c) 2011 by Solar Designer <solar at openwall.com>.
925 * Redistribution and use in source and binary forms, with or without
926 * modification, are permitted. (This is a heavily cut-down "BSD license".)
928 * The effort has been sponsored by Rapid7: http://www.rapid7.com
931 static void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
933 u32 x55005500, x5A0F5A0F, x3333FFFF, x66666666, x22226666, x2D2D6969,
935 u32 x00FFFF00, x33CCCC33, x4803120C, x2222FFFF, x6A21EDF3, x4A01CC93;
936 u32 x5555FFFF, x7F75FFFF, x00D20096, x7FA7FF69;
937 u32 x0A0A0000, x0AD80096, x00999900, x0AD99996;
938 u32 x22332233, x257AA5F0, x054885C0, xFAB77A3F, x2221EDF3, xD89697CC;
939 u32 x05B77AC0, x05F77AD6, x36C48529, x6391D07C, xBB0747B0;
940 u32 x4C460000, x4EDF9996, x2D4E49EA, xBBFFFFB0, x96B1B65A;
941 u32 x5AFF5AFF, x52B11215, x4201C010, x10B0D205;
942 u32 x00, x01, x10, x11, x20, x21, x30, x31;
944 x55005500 = a1 & ~a5;
945 x5A0F5A0F = a4 ^ x55005500;
948 x22226666 = x3333FFFF & x66666666;
949 x2D2D6969 = a4 ^ x22226666;
950 x25202160 = x2D2D6969 & ~x5A0F5A0F;
953 x33CCCC33 = a3 ^ x00FFFF00;
954 x4803120C = x5A0F5A0F & ~x33CCCC33;
955 x2222FFFF = a6 | x22226666;
956 x6A21EDF3 = x4803120C ^ x2222FFFF;
957 x4A01CC93 = x6A21EDF3 & ~x25202160;
960 x7F75FFFF = x6A21EDF3 | x5555FFFF;
961 x00D20096 = a5 & ~x2D2D6969;
962 x7FA7FF69 = x7F75FFFF ^ x00D20096;
964 x0A0A0000 = a4 & ~x5555FFFF;
965 x0AD80096 = x00D20096 ^ x0A0A0000;
966 x00999900 = x00FFFF00 & ~x66666666;
967 x0AD99996 = x0AD80096 | x00999900;
969 x22332233 = a3 & ~x55005500;
970 x257AA5F0 = x5A0F5A0F ^ x7F75FFFF;
971 x054885C0 = x257AA5F0 & ~x22332233;
972 xFAB77A3F = ~x054885C0;
973 x2221EDF3 = x3333FFFF & x6A21EDF3;
974 xD89697CC = xFAB77A3F ^ x2221EDF3;
975 x20 = x7FA7FF69 & ~a2;
976 x21 = x20 ^ xD89697CC;
979 x05B77AC0 = x00FFFF00 ^ x054885C0;
980 x05F77AD6 = x00D20096 | x05B77AC0;
981 x36C48529 = x3333FFFF ^ x05F77AD6;
982 x6391D07C = a1 ^ x36C48529;
983 xBB0747B0 = xD89697CC ^ x6391D07C;
984 x00 = x25202160 | a2;
985 x01 = x00 ^ xBB0747B0;
988 x4C460000 = x3333FFFF ^ x7F75FFFF;
989 x4EDF9996 = x0AD99996 | x4C460000;
990 x2D4E49EA = x6391D07C ^ x4EDF9996;
991 xBBFFFFB0 = x00FFFF00 | xBB0747B0;
992 x96B1B65A = x2D4E49EA ^ xBBFFFFB0;
993 x10 = x4A01CC93 | a2;
994 x11 = x10 ^ x96B1B65A;
997 x5AFF5AFF = a5 | x5A0F5A0F;
998 x52B11215 = x5AFF5AFF & ~x2D4E49EA;
999 x4201C010 = x4A01CC93 & x6391D07C;
1000 x10B0D205 = x52B11215 ^ x4201C010;
1001 x30 = x10B0D205 | a2;
1002 x31 = x30 ^ x0AD99996;
1006 static void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
1009 u32 x55550000, x00AA00FF, x33BB33FF;
1010 u32 x33CC0000, x11441144, x11BB11BB, x003311BB;
1011 u32 x00000F0F, x336600FF, x332200FF, x332200F0;
1012 u32 x0302000F, xAAAAAAAA, xA9A8AAA5, x33CCCC33, x33CCC030, x9A646A95;
1013 u32 x00333303, x118822B8, xA8208805, x3CC3C33C, x94E34B39;
1014 u32 x0331330C, x3FF3F33C, xA9DF596A, xA9DF5F6F, x962CAC53;
1015 u32 xA9466A6A, x3DA52153, x29850143, x33C0330C, x1A45324F;
1016 u32 x0A451047, xBBDFDD7B, xB19ACD3C;
1017 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1019 x33CC33CC = a2 ^ a5;
1021 x55550000 = a1 & ~a6;
1022 x00AA00FF = a5 & ~x55550000;
1023 x33BB33FF = a2 | x00AA00FF;
1025 x33CC0000 = x33CC33CC & ~a6;
1026 x11441144 = a1 & x33CC33CC;
1027 x11BB11BB = a5 ^ x11441144;
1028 x003311BB = x11BB11BB & ~x33CC0000;
1030 x00000F0F = a3 & a6;
1031 x336600FF = x00AA00FF ^ x33CC0000;
1032 x332200FF = x33BB33FF & x336600FF;
1033 x332200F0 = x332200FF & ~x00000F0F;
1035 x0302000F = a3 & x332200FF;
1037 xA9A8AAA5 = x0302000F ^ xAAAAAAAA;
1038 x33CCCC33 = a6 ^ x33CC33CC;
1039 x33CCC030 = x33CCCC33 & ~x00000F0F;
1040 x9A646A95 = xA9A8AAA5 ^ x33CCC030;
1041 x10 = a4 & ~x332200F0;
1042 x11 = x10 ^ x9A646A95;
1045 x00333303 = a2 & ~x33CCC030;
1046 x118822B8 = x11BB11BB ^ x00333303;
1047 xA8208805 = xA9A8AAA5 & ~x118822B8;
1048 x3CC3C33C = a3 ^ x33CCCC33;
1049 x94E34B39 = xA8208805 ^ x3CC3C33C;
1050 x00 = x33BB33FF & ~a4;
1051 x01 = x00 ^ x94E34B39;
1054 x0331330C = x0302000F ^ x00333303;
1055 x3FF3F33C = x3CC3C33C | x0331330C;
1056 xA9DF596A = x33BB33FF ^ x9A646A95;
1057 xA9DF5F6F = x00000F0F | xA9DF596A;
1058 x962CAC53 = x3FF3F33C ^ xA9DF5F6F;
1060 xA9466A6A = x332200FF ^ x9A646A95;
1061 x3DA52153 = x94E34B39 ^ xA9466A6A;
1062 x29850143 = xA9DF5F6F & x3DA52153;
1063 x33C0330C = x33CC33CC & x3FF3F33C;
1064 x1A45324F = x29850143 ^ x33C0330C;
1065 x20 = x1A45324F | a4;
1066 x21 = x20 ^ x962CAC53;
1069 x0A451047 = x1A45324F & ~x118822B8;
1070 xBBDFDD7B = x33CCCC33 | xA9DF596A;
1071 xB19ACD3C = x0A451047 ^ xBBDFDD7B;
1072 x30 = x003311BB | a4;
1073 x31 = x30 ^ xB19ACD3C;
1077 static void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
1079 u32 x44444444, x0F0FF0F0, x4F4FF4F4, x00FFFF00, x00AAAA00, x4FE55EF4;
1080 u32 x3C3CC3C3, x3C3C0000, x7373F4F4, x0C840A00;
1081 u32 x00005EF4, x00FF5EFF, x00555455, x3C699796;
1082 u32 x000FF000, x55AA55AA, x26D9A15E, x2FDFAF5F, x2FD00F5F;
1083 u32 x55AAFFAA, x28410014, x000000FF, x000000CC, x284100D8;
1084 u32 x204100D0, x3C3CC3FF, x1C3CC32F, x4969967A;
1085 u32 x4CC44CC4, x40C040C0, xC3C33C3C, x9669C396, xD6A98356;
1086 u32 xD6E9C3D6, x4CEEEEC4, x9A072D12, x001A000B, x9A1F2D1B;
1087 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1089 x44444444 = a1 & ~a2;
1090 x0F0FF0F0 = a3 ^ a6;
1091 x4F4FF4F4 = x44444444 | x0F0FF0F0;
1092 x00FFFF00 = a4 ^ a6;
1093 x00AAAA00 = x00FFFF00 & ~a1;
1094 x4FE55EF4 = x4F4FF4F4 ^ x00AAAA00;
1096 x3C3CC3C3 = a2 ^ x0F0FF0F0;
1097 x3C3C0000 = x3C3CC3C3 & ~a6;
1098 x7373F4F4 = x4F4FF4F4 ^ x3C3C0000;
1099 x0C840A00 = x4FE55EF4 & ~x7373F4F4;
1101 x00005EF4 = a6 & x4FE55EF4;
1102 x00FF5EFF = a4 | x00005EF4;
1103 x00555455 = a1 & x00FF5EFF;
1104 x3C699796 = x3C3CC3C3 ^ x00555455;
1105 x30 = x4FE55EF4 & ~a5;
1106 x31 = x30 ^ x3C699796;
1109 x000FF000 = x0F0FF0F0 & x00FFFF00;
1110 x55AA55AA = a1 ^ a4;
1111 x26D9A15E = x7373F4F4 ^ x55AA55AA;
1112 x2FDFAF5F = a3 | x26D9A15E;
1113 x2FD00F5F = x2FDFAF5F & ~x000FF000;
1115 x55AAFFAA = x00AAAA00 | x55AA55AA;
1116 x28410014 = x3C699796 & ~x55AAFFAA;
1117 x000000FF = a4 & a6;
1118 x000000CC = x000000FF & ~a2;
1119 x284100D8 = x28410014 ^ x000000CC;
1121 x204100D0 = x7373F4F4 & x284100D8;
1122 x3C3CC3FF = x3C3CC3C3 | x000000FF;
1123 x1C3CC32F = x3C3CC3FF & ~x204100D0;
1124 x4969967A = a1 ^ x1C3CC32F;
1125 x10 = x2FD00F5F & a5;
1126 x11 = x10 ^ x4969967A;
1129 x4CC44CC4 = x4FE55EF4 & ~a2;
1130 x40C040C0 = x4CC44CC4 & ~a3;
1131 xC3C33C3C = ~x3C3CC3C3;
1132 x9669C396 = x55AAFFAA ^ xC3C33C3C;
1133 xD6A98356 = x40C040C0 ^ x9669C396;
1134 x00 = a5 & ~x0C840A00;
1135 x01 = x00 ^ xD6A98356;
1138 xD6E9C3D6 = x40C040C0 | x9669C396;
1139 x4CEEEEC4 = x00AAAA00 | x4CC44CC4;
1140 x9A072D12 = xD6E9C3D6 ^ x4CEEEEC4;
1141 x001A000B = a4 & ~x4FE55EF4;
1142 x9A1F2D1B = x9A072D12 | x001A000B;
1143 x20 = a5 & ~x284100D8;
1144 x21 = x20 ^ x9A1F2D1B;
1148 static void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
1150 u32 x5A5A5A5A, x0F0FF0F0;
1151 u32 x33FF33FF, x33FFCC00, x0C0030F0, x0C0CC0C0, x0CF3C03F, x5EFBDA7F,
1152 x52FBCA0F, x61C8F93C;
1153 u32 x00C0C03C, x0F0F30C0, x3B92A366, x30908326, x3C90B3D6;
1154 u32 x33CC33CC, x0C0CFFFF, x379E5C99, x04124C11, x56E9861E, xA91679E1;
1155 u32 x9586CA37, x8402C833, x84C2C83F, xB35C94A6;
1156 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1158 x5A5A5A5A = a1 ^ a3;
1159 x0F0FF0F0 = a3 ^ a5;
1160 x33FF33FF = a2 | a4;
1161 x33FFCC00 = a5 ^ x33FF33FF;
1162 x0C0030F0 = x0F0FF0F0 & ~x33FFCC00;
1163 x0C0CC0C0 = x0F0FF0F0 & ~a2;
1164 x0CF3C03F = a4 ^ x0C0CC0C0;
1165 x5EFBDA7F = x5A5A5A5A | x0CF3C03F;
1166 x52FBCA0F = x5EFBDA7F & ~x0C0030F0;
1167 x61C8F93C = a2 ^ x52FBCA0F;
1169 x00C0C03C = x0CF3C03F & x61C8F93C;
1170 x0F0F30C0 = x0F0FF0F0 & ~x00C0C03C;
1171 x3B92A366 = x5A5A5A5A ^ x61C8F93C;
1172 x30908326 = x3B92A366 & ~x0F0F30C0;
1173 x3C90B3D6 = x0C0030F0 ^ x30908326;
1175 x33CC33CC = a2 ^ a4;
1176 x0C0CFFFF = a5 | x0C0CC0C0;
1177 x379E5C99 = x3B92A366 ^ x0C0CFFFF;
1178 x04124C11 = x379E5C99 & ~x33CC33CC;
1179 x56E9861E = x52FBCA0F ^ x04124C11;
1180 x00 = a6 & ~x3C90B3D6;
1181 x01 = x00 ^ x56E9861E;
1184 xA91679E1 = ~x56E9861E;
1185 x10 = x3C90B3D6 & ~a6;
1186 x11 = x10 ^ xA91679E1;
1189 x9586CA37 = x3C90B3D6 ^ xA91679E1;
1190 x8402C833 = x9586CA37 & ~x33CC33CC;
1191 x84C2C83F = x00C0C03C | x8402C833;
1192 xB35C94A6 = x379E5C99 ^ x84C2C83F;
1193 x20 = x61C8F93C | a6;
1194 x21 = x20 ^ xB35C94A6;
1197 x30 = a6 & x61C8F93C;
1198 x31 = x30 ^ xB35C94A6;
1202 static void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
1204 u32 x77777777, x77770000, x22225555, x11116666, x1F1F6F6F;
1205 u32 x70700000, x43433333, x00430033, x55557777, x55167744, x5A19784B;
1206 u32 x5A1987B4, x7A3BD7F5, x003B00F5, x221955A0, x05050707, x271C52A7;
1207 u32 x2A2A82A0, x6969B193, x1FE06F90, x16804E00, xE97FB1FF;
1208 u32 x43403302, x35CAED30, x37DEFFB7, x349ECCB5, x0B01234A;
1209 u32 x101884B4, x0FF8EB24, x41413333, x4FF9FB37, x4FC2FBC2;
1210 u32 x22222222, x16BCEE97, x0F080B04, x19B4E593;
1211 u32 x5C5C5C5C, x4448184C, x2DDABE71, x6992A63D;
1212 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1214 x77777777 = a1 | a3;
1215 x77770000 = x77777777 & ~a6;
1216 x22225555 = a1 ^ x77770000;
1217 x11116666 = a3 ^ x22225555;
1218 x1F1F6F6F = a4 | x11116666;
1220 x70700000 = x77770000 & ~a4;
1221 x43433333 = a3 ^ x70700000;
1222 x00430033 = a5 & x43433333;
1223 x55557777 = a1 | x11116666;
1224 x55167744 = x00430033 ^ x55557777;
1225 x5A19784B = a4 ^ x55167744;
1227 x5A1987B4 = a6 ^ x5A19784B;
1228 x7A3BD7F5 = x22225555 | x5A1987B4;
1229 x003B00F5 = a5 & x7A3BD7F5;
1230 x221955A0 = x22225555 ^ x003B00F5;
1231 x05050707 = a4 & x55557777;
1232 x271C52A7 = x221955A0 ^ x05050707;
1234 x2A2A82A0 = x7A3BD7F5 & ~a1;
1235 x6969B193 = x43433333 ^ x2A2A82A0;
1236 x1FE06F90 = a5 ^ x1F1F6F6F;
1237 x16804E00 = x1FE06F90 & ~x6969B193;
1238 xE97FB1FF = ~x16804E00;
1239 x20 = xE97FB1FF & ~a2;
1240 x21 = x20 ^ x5A19784B;
1243 x43403302 = x43433333 & ~x003B00F5;
1244 x35CAED30 = x2A2A82A0 ^ x1FE06F90;
1245 x37DEFFB7 = x271C52A7 | x35CAED30;
1246 x349ECCB5 = x37DEFFB7 & ~x43403302;
1247 x0B01234A = x1F1F6F6F & ~x349ECCB5;
1249 x101884B4 = x5A1987B4 & x349ECCB5;
1250 x0FF8EB24 = x1FE06F90 ^ x101884B4;
1251 x41413333 = x43433333 & x55557777;
1252 x4FF9FB37 = x0FF8EB24 | x41413333;
1253 x4FC2FBC2 = x003B00F5 ^ x4FF9FB37;
1254 x30 = x4FC2FBC2 & a2;
1255 x31 = x30 ^ x271C52A7;
1258 x22222222 = a1 ^ x77777777;
1259 x16BCEE97 = x349ECCB5 ^ x22222222;
1260 x0F080B04 = a4 & x0FF8EB24;
1261 x19B4E593 = x16BCEE97 ^ x0F080B04;
1262 x00 = x0B01234A | a2;
1263 x01 = x00 ^ x19B4E593;
1266 x5C5C5C5C = x1F1F6F6F ^ x43433333;
1267 x4448184C = x5C5C5C5C & ~x19B4E593;
1268 x2DDABE71 = x22225555 ^ x0FF8EB24;
1269 x6992A63D = x4448184C ^ x2DDABE71;
1270 x10 = x1F1F6F6F & a2;
1271 x11 = x10 ^ x6992A63D;
1275 static void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
1278 u32 x3333FFFF, x11115555, x22DD6699, x22DD9966, x00220099;
1279 u32 x00551144, x33662277, x5A5A5A5A, x7B7E7A7F, x59A31CE6;
1280 u32 x09030C06, x09030000, x336622FF, x3A6522FF;
1281 u32 x484D494C, x0000B6B3, x0F0FB9BC, x00FC00F9, x0FFFB9FD;
1282 u32 x5DF75DF7, x116600F7, x1E69B94B, x1668B94B;
1283 u32 x7B7B7B7B, x411E5984, x1FFFFDFD, x5EE1A479;
1284 u32 x3CB4DFD2, x004B002D, xB7B2B6B3, xCCC9CDC8, xCC82CDE5;
1285 u32 x0055EEBB, x5A5AECE9, x0050ECA9, xC5CAC1CE, xC59A2D67;
1286 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1288 x33CC33CC = a2 ^ a5;
1290 x3333FFFF = a2 | a6;
1291 x11115555 = a1 & x3333FFFF;
1292 x22DD6699 = x33CC33CC ^ x11115555;
1293 x22DD9966 = a6 ^ x22DD6699;
1294 x00220099 = a5 & ~x22DD9966;
1296 x00551144 = a1 & x22DD9966;
1297 x33662277 = a2 ^ x00551144;
1298 x5A5A5A5A = a1 ^ a3;
1299 x7B7E7A7F = x33662277 | x5A5A5A5A;
1300 x59A31CE6 = x22DD6699 ^ x7B7E7A7F;
1302 x09030C06 = a3 & x59A31CE6;
1303 x09030000 = x09030C06 & ~a6;
1304 x336622FF = x00220099 | x33662277;
1305 x3A6522FF = x09030000 ^ x336622FF;
1306 x30 = x3A6522FF & a4;
1307 x31 = x30 ^ x59A31CE6;
1310 x484D494C = a2 ^ x7B7E7A7F;
1311 x0000B6B3 = a6 & ~x484D494C;
1312 x0F0FB9BC = a3 ^ x0000B6B3;
1313 x00FC00F9 = a5 & ~x09030C06;
1314 x0FFFB9FD = x0F0FB9BC | x00FC00F9;
1316 x5DF75DF7 = a1 | x59A31CE6;
1317 x116600F7 = x336622FF & x5DF75DF7;
1318 x1E69B94B = x0F0FB9BC ^ x116600F7;
1319 x1668B94B = x1E69B94B & ~x09030000;
1320 x20 = x00220099 | a4;
1321 x21 = x20 ^ x1668B94B;
1324 x7B7B7B7B = a2 | x5A5A5A5A;
1325 x411E5984 = x3A6522FF ^ x7B7B7B7B;
1326 x1FFFFDFD = x11115555 | x0FFFB9FD;
1327 x5EE1A479 = x411E5984 ^ x1FFFFDFD;
1329 x3CB4DFD2 = x22DD6699 ^ x1E69B94B;
1330 x004B002D = a5 & ~x3CB4DFD2;
1331 xB7B2B6B3 = ~x484D494C;
1332 xCCC9CDC8 = x7B7B7B7B ^ xB7B2B6B3;
1333 xCC82CDE5 = x004B002D ^ xCCC9CDC8;
1334 x10 = xCC82CDE5 & ~a4;
1335 x11 = x10 ^ x5EE1A479;
1338 x0055EEBB = a6 ^ x00551144;
1339 x5A5AECE9 = a1 ^ x0F0FB9BC;
1340 x0050ECA9 = x0055EEBB & x5A5AECE9;
1341 xC5CAC1CE = x09030C06 ^ xCCC9CDC8;
1342 xC59A2D67 = x0050ECA9 ^ xC5CAC1CE;
1343 x00 = x0FFFB9FD & ~a4;
1344 x01 = x00 ^ xC59A2D67;
1348 static void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
1350 u32 x0FF00FF0, x3CC33CC3, x00003CC3, x0F000F00, x5A555A55, x00001841;
1351 u32 x00000F00, x33333C33, x7B777E77, x0FF0F00F, x74878E78;
1352 u32 x003C003C, x5A7D5A7D, x333300F0, x694E5A8D;
1353 u32 x0FF0CCCC, x000F0303, x5A505854, x33CC000F, x699C585B;
1354 u32 x7F878F78, x21101013, x7F979F7B, x30030CC0, x4F9493BB;
1355 u32 x6F9CDBFB, x0000DBFB, x00005151, x26DAC936, x26DA9867;
1356 u32 x27DA9877, x27DA438C, x2625C9C9, x27FFCBCD;
1357 u32 x27FF1036, x27FF103E, xB06B6C44, x97947C7A;
1358 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1360 x0FF00FF0 = a4 ^ a5;
1361 x3CC33CC3 = a3 ^ x0FF00FF0;
1362 x00003CC3 = a6 & x3CC33CC3;
1363 x0F000F00 = a4 & x0FF00FF0;
1364 x5A555A55 = a2 ^ x0F000F00;
1365 x00001841 = x00003CC3 & x5A555A55;
1367 x00000F00 = a6 & x0F000F00;
1368 x33333C33 = a3 ^ x00000F00;
1369 x7B777E77 = x5A555A55 | x33333C33;
1370 x0FF0F00F = a6 ^ x0FF00FF0;
1371 x74878E78 = x7B777E77 ^ x0FF0F00F;
1372 x30 = a1 & ~x00001841;
1373 x31 = x30 ^ x74878E78;
1376 x003C003C = a5 & ~x3CC33CC3;
1377 x5A7D5A7D = x5A555A55 | x003C003C;
1378 x333300F0 = x00003CC3 ^ x33333C33;
1379 x694E5A8D = x5A7D5A7D ^ x333300F0;
1381 x0FF0CCCC = x00003CC3 ^ x0FF0F00F;
1382 x000F0303 = a4 & ~x0FF0CCCC;
1383 x5A505854 = x5A555A55 & ~x000F0303;
1384 x33CC000F = a5 ^ x333300F0;
1385 x699C585B = x5A505854 ^ x33CC000F;
1387 x7F878F78 = x0F000F00 | x74878E78;
1388 x21101013 = a3 & x699C585B;
1389 x7F979F7B = x7F878F78 | x21101013;
1390 x30030CC0 = x3CC33CC3 & ~x0FF0F00F;
1391 x4F9493BB = x7F979F7B ^ x30030CC0;
1392 x00 = x4F9493BB & ~a1;
1393 x01 = x00 ^ x694E5A8D;
1396 x6F9CDBFB = x699C585B | x4F9493BB;
1397 x0000DBFB = a6 & x6F9CDBFB;
1398 x00005151 = a2 & x0000DBFB;
1399 x26DAC936 = x694E5A8D ^ x4F9493BB;
1400 x26DA9867 = x00005151 ^ x26DAC936;
1402 x27DA9877 = x21101013 | x26DA9867;
1403 x27DA438C = x0000DBFB ^ x27DA9877;
1404 x2625C9C9 = a5 ^ x26DAC936;
1405 x27FFCBCD = x27DA438C | x2625C9C9;
1406 x20 = x27FFCBCD & a1;
1407 x21 = x20 ^ x699C585B;
1410 x27FF1036 = x0000DBFB ^ x27FFCBCD;
1411 x27FF103E = x003C003C | x27FF1036;
1412 xB06B6C44 = ~x4F9493BB;
1413 x97947C7A = x27FF103E ^ xB06B6C44;
1414 x10 = x97947C7A & ~a1;
1415 x11 = x10 ^ x26DA9867;
1419 static void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
1421 u32 x0C0C0C0C, x0000F0F0, x00FFF00F, x00555005, x00515001;
1422 u32 x33000330, x77555775, x30303030, x3030CFCF, x30104745, x30555745;
1423 u32 xFF000FF0, xCF1048B5, x080A080A, xC71A40BF, xCB164CB3;
1424 u32 x9E4319E6, x000019E6, xF429738C, xF4296A6A, xC729695A;
1425 u32 xC47C3D2F, xF77F3F3F, x9E43E619, x693CD926;
1426 u32 xF719A695, xF4FF73FF, x03E6D56A, x56B3803F;
1427 u32 xF700A600, x61008000, x03B7856B, x62B7056B;
1428 u32 x00, x01, x10, x11, x20, x21, x30, x31;
1430 x0C0C0C0C = a3 & ~a2;
1431 x0000F0F0 = a5 & ~a3;
1432 x00FFF00F = a4 ^ x0000F0F0;
1433 x00555005 = a1 & x00FFF00F;
1434 x00515001 = x00555005 & ~x0C0C0C0C;
1436 x33000330 = a2 & ~x00FFF00F;
1437 x77555775 = a1 | x33000330;
1438 x30303030 = a2 & ~a3;
1439 x3030CFCF = a5 ^ x30303030;
1440 x30104745 = x77555775 & x3030CFCF;
1441 x30555745 = x00555005 | x30104745;
1443 xFF000FF0 = ~x00FFF00F;
1444 xCF1048B5 = x30104745 ^ xFF000FF0;
1445 x080A080A = a3 & ~x77555775;
1446 xC71A40BF = xCF1048B5 ^ x080A080A;
1447 xCB164CB3 = x0C0C0C0C ^ xC71A40BF;
1448 x10 = x00515001 | a6;
1449 x11 = x10 ^ xCB164CB3;
1452 x9E4319E6 = a1 ^ xCB164CB3;
1453 x000019E6 = a5 & x9E4319E6;
1454 xF429738C = a2 ^ xC71A40BF;
1455 xF4296A6A = x000019E6 ^ xF429738C;
1456 xC729695A = x33000330 ^ xF4296A6A;
1458 xC47C3D2F = x30555745 ^ xF4296A6A;
1459 xF77F3F3F = a2 | xC47C3D2F;
1460 x9E43E619 = a5 ^ x9E4319E6;
1461 x693CD926 = xF77F3F3F ^ x9E43E619;
1462 x20 = x30555745 & a6;
1463 x21 = x20 ^ x693CD926;
1466 xF719A695 = x3030CFCF ^ xC729695A;
1467 xF4FF73FF = a4 | xF429738C;
1468 x03E6D56A = xF719A695 ^ xF4FF73FF;
1469 x56B3803F = a1 ^ x03E6D56A;
1470 x30 = x56B3803F & a6;
1471 x31 = x30 ^ xC729695A;
1474 xF700A600 = xF719A695 & ~a4;
1475 x61008000 = x693CD926 & xF700A600;
1476 x03B7856B = x00515001 ^ x03E6D56A;
1477 x62B7056B = x61008000 ^ x03B7856B;
1478 x00 = x62B7056B | a6;
1479 x01 = x00 ^ xC729695A;
1485 #define SWAP(a, b) { u32 tmp=*a;*a=*b;*b=tmp; }
1521 #define KEYSET00 { k00 = K08; k01 = K44; k02 = K29; k03 = K52; k04 = K42; k05 = K14; k06 = K28; k07 = K49; k08 = K01; k09 = K07; k10 = K16; k11 = K36; k12 = K02; k13 = K30; k14 = K22; k15 = K21; k16 = K38; k17 = K50; k18 = K51; k19 = K00; k20 = K31; k21 = K23; k22 = K15; k23 = K35; k24 = K19; k25 = K24; k26 = K34; k27 = K47; k28 = K32; k29 = K03; k30 = K41; k31 = K26; k32 = K04; k33 = K46; k34 = K20; k35 = K25; k36 = K53; k37 = K18; k38 = K33; k39 = K55; k40 = K13; k41 = K17; k42 = K39; k43 = K12; k44 = K11; k45 = K54; k46 = K48; k47 = K27; }
1522 #define KEYSET10 { k00 = K49; k01 = K28; k02 = K45; k03 = K36; k04 = K01; k05 = K30; k06 = K44; k07 = K08; k08 = K42; k09 = K23; k10 = K00; k11 = K52; k12 = K43; k13 = K14; k14 = K38; k15 = K37; k16 = K22; k17 = K09; k18 = K35; k19 = K16; k20 = K15; k21 = K07; k22 = K31; k23 = K51; k24 = K03; k25 = K40; k26 = K46; k27 = K04; k28 = K20; k29 = K19; k30 = K53; k31 = K10; k32 = K47; k33 = K34; k34 = K32; k35 = K13; k36 = K41; k37 = K06; k38 = K17; k39 = K12; k40 = K25; k41 = K33; k42 = K27; k43 = K55; k44 = K54; k45 = K11; k46 = K05; k47 = K39; }
1523 #define KEYSET01 { k00 = K01; k01 = K37; k02 = K22; k03 = K45; k04 = K35; k05 = K07; k06 = K21; k07 = K42; k08 = K51; k09 = K00; k10 = K09; k11 = K29; k12 = K52; k13 = K23; k14 = K15; k15 = K14; k16 = K31; k17 = K43; k18 = K44; k19 = K50; k20 = K49; k21 = K16; k22 = K08; k23 = K28; k24 = K12; k25 = K17; k26 = K27; k27 = K40; k28 = K25; k29 = K55; k30 = K34; k31 = K19; k32 = K24; k33 = K39; k34 = K13; k35 = K18; k36 = K46; k37 = K11; k38 = K26; k39 = K48; k40 = K06; k41 = K10; k42 = K32; k43 = K05; k44 = K04; k45 = K47; k46 = K41; k47 = K20; }
1524 #define KEYSET11 { k00 = K35; k01 = K14; k02 = K31; k03 = K22; k04 = K44; k05 = K16; k06 = K30; k07 = K51; k08 = K28; k09 = K09; k10 = K43; k11 = K38; k12 = K29; k13 = K00; k14 = K49; k15 = K23; k16 = K08; k17 = K52; k18 = K21; k19 = K02; k20 = K01; k21 = K50; k22 = K42; k23 = K37; k24 = K48; k25 = K26; k26 = K32; k27 = K17; k28 = K06; k29 = K05; k30 = K39; k31 = K55; k32 = K33; k33 = K20; k34 = K18; k35 = K54; k36 = K27; k37 = K47; k38 = K03; k39 = K53; k40 = K11; k41 = K19; k42 = K13; k43 = K41; k44 = K40; k45 = K24; k46 = K46; k47 = K25; }
1525 #define KEYSET02 { k00 = K44; k01 = K23; k02 = K08; k03 = K31; k04 = K21; k05 = K50; k06 = K07; k07 = K28; k08 = K37; k09 = K43; k10 = K52; k11 = K15; k12 = K38; k13 = K09; k14 = K01; k15 = K00; k16 = K42; k17 = K29; k18 = K30; k19 = K36; k20 = K35; k21 = K02; k22 = K51; k23 = K14; k24 = K53; k25 = K03; k26 = K13; k27 = K26; k28 = K11; k29 = K41; k30 = K20; k31 = K05; k32 = K10; k33 = K25; k34 = K54; k35 = K04; k36 = K32; k37 = K24; k38 = K12; k39 = K34; k40 = K47; k41 = K55; k42 = K18; k43 = K46; k44 = K17; k45 = K33; k46 = K27; k47 = K06; }
1526 #define KEYSET12 { k00 = K21; k01 = K00; k02 = K42; k03 = K08; k04 = K30; k05 = K02; k06 = K16; k07 = K37; k08 = K14; k09 = K52; k10 = K29; k11 = K49; k12 = K15; k13 = K43; k14 = K35; k15 = K09; k16 = K51; k17 = K38; k18 = K07; k19 = K45; k20 = K44; k21 = K36; k22 = K28; k23 = K23; k24 = K34; k25 = K12; k26 = K18; k27 = K03; k28 = K47; k29 = K46; k30 = K25; k31 = K41; k32 = K19; k33 = K06; k34 = K04; k35 = K40; k36 = K13; k37 = K33; k38 = K48; k39 = K39; k40 = K24; k41 = K05; k42 = K54; k43 = K27; k44 = K26; k45 = K10; k46 = K32; k47 = K11; }
1527 #define KEYSET03 { k00 = K30; k01 = K09; k02 = K51; k03 = K42; k04 = K07; k05 = K36; k06 = K50; k07 = K14; k08 = K23; k09 = K29; k10 = K38; k11 = K01; k12 = K49; k13 = K52; k14 = K44; k15 = K43; k16 = K28; k17 = K15; k18 = K16; k19 = K22; k20 = K21; k21 = K45; k22 = K37; k23 = K00; k24 = K39; k25 = K48; k26 = K54; k27 = K12; k28 = K24; k29 = K27; k30 = K06; k31 = K46; k32 = K55; k33 = K11; k34 = K40; k35 = K17; k36 = K18; k37 = K10; k38 = K53; k39 = K20; k40 = K33; k41 = K41; k42 = K04; k43 = K32; k44 = K03; k45 = K19; k46 = K13; k47 = K47; }
1528 #define KEYSET13 { k00 = K07; k01 = K43; k02 = K28; k03 = K51; k04 = K16; k05 = K45; k06 = K02; k07 = K23; k08 = K00; k09 = K38; k10 = K15; k11 = K35; k12 = K01; k13 = K29; k14 = K21; k15 = K52; k16 = K37; k17 = K49; k18 = K50; k19 = K31; k20 = K30; k21 = K22; k22 = K14; k23 = K09; k24 = K20; k25 = K53; k26 = K04; k27 = K48; k28 = K33; k29 = K32; k30 = K11; k31 = K27; k32 = K05; k33 = K47; k34 = K17; k35 = K26; k36 = K54; k37 = K19; k38 = K34; k39 = K25; k40 = K10; k41 = K46; k42 = K40; k43 = K13; k44 = K12; k45 = K55; k46 = K18; k47 = K24; }
1529 #define KEYSET04 { k00 = K16; k01 = K52; k02 = K37; k03 = K28; k04 = K50; k05 = K22; k06 = K36; k07 = K00; k08 = K09; k09 = K15; k10 = K49; k11 = K44; k12 = K35; k13 = K38; k14 = K30; k15 = K29; k16 = K14; k17 = K01; k18 = K02; k19 = K08; k20 = K07; k21 = K31; k22 = K23; k23 = K43; k24 = K25; k25 = K34; k26 = K40; k27 = K53; k28 = K10; k29 = K13; k30 = K47; k31 = K32; k32 = K41; k33 = K24; k34 = K26; k35 = K03; k36 = K04; k37 = K55; k38 = K39; k39 = K06; k40 = K19; k41 = K27; k42 = K17; k43 = K18; k44 = K48; k45 = K05; k46 = K54; k47 = K33; }
1530 #define KEYSET14 { k00 = K50; k01 = K29; k02 = K14; k03 = K37; k04 = K02; k05 = K31; k06 = K45; k07 = K09; k08 = K43; k09 = K49; k10 = K01; k11 = K21; k12 = K44; k13 = K15; k14 = K07; k15 = K38; k16 = K23; k17 = K35; k18 = K36; k19 = K42; k20 = K16; k21 = K08; k22 = K00; k23 = K52; k24 = K06; k25 = K39; k26 = K17; k27 = K34; k28 = K19; k29 = K18; k30 = K24; k31 = K13; k32 = K46; k33 = K33; k34 = K03; k35 = K12; k36 = K40; k37 = K05; k38 = K20; k39 = K11; k40 = K55; k41 = K32; k42 = K26; k43 = K54; k44 = K53; k45 = K41; k46 = K04; k47 = K10; }
1531 #define KEYSET05 { k00 = K02; k01 = K38; k02 = K23; k03 = K14; k04 = K36; k05 = K08; k06 = K22; k07 = K43; k08 = K52; k09 = K01; k10 = K35; k11 = K30; k12 = K21; k13 = K49; k14 = K16; k15 = K15; k16 = K00; k17 = K44; k18 = K45; k19 = K51; k20 = K50; k21 = K42; k22 = K09; k23 = K29; k24 = K11; k25 = K20; k26 = K26; k27 = K39; k28 = K55; k29 = K54; k30 = K33; k31 = K18; k32 = K27; k33 = K10; k34 = K12; k35 = K48; k36 = K17; k37 = K41; k38 = K25; k39 = K47; k40 = K05; k41 = K13; k42 = K03; k43 = K04; k44 = K34; k45 = K46; k46 = K40; k47 = K19; }
1532 #define KEYSET15 { k00 = K36; k01 = K15; k02 = K00; k03 = K23; k04 = K45; k05 = K42; k06 = K31; k07 = K52; k08 = K29; k09 = K35; k10 = K44; k11 = K07; k12 = K30; k13 = K01; k14 = K50; k15 = K49; k16 = K09; k17 = K21; k18 = K22; k19 = K28; k20 = K02; k21 = K51; k22 = K43; k23 = K38; k24 = K47; k25 = K25; k26 = K03; k27 = K20; k28 = K05; k29 = K04; k30 = K10; k31 = K54; k32 = K32; k33 = K19; k34 = K48; k35 = K53; k36 = K26; k37 = K46; k38 = K06; k39 = K24; k40 = K41; k41 = K18; k42 = K12; k43 = K40; k44 = K39; k45 = K27; k46 = K17; k47 = K55; }
1533 #define KEYSET06 { k00 = K45; k01 = K49; k02 = K09; k03 = K00; k04 = K22; k05 = K51; k06 = K08; k07 = K29; k08 = K38; k09 = K44; k10 = K21; k11 = K16; k12 = K07; k13 = K35; k14 = K02; k15 = K01; k16 = K43; k17 = K30; k18 = K31; k19 = K37; k20 = K36; k21 = K28; k22 = K52; k23 = K15; k24 = K24; k25 = K06; k26 = K12; k27 = K25; k28 = K41; k29 = K40; k30 = K19; k31 = K04; k32 = K13; k33 = K55; k34 = K53; k35 = K34; k36 = K03; k37 = K27; k38 = K11; k39 = K33; k40 = K46; k41 = K54; k42 = K48; k43 = K17; k44 = K20; k45 = K32; k46 = K26; k47 = K05; }
1534 #define KEYSET16 { k00 = K22; k01 = K01; k02 = K43; k03 = K09; k04 = K31; k05 = K28; k06 = K42; k07 = K38; k08 = K15; k09 = K21; k10 = K30; k11 = K50; k12 = K16; k13 = K44; k14 = K36; k15 = K35; k16 = K52; k17 = K07; k18 = K08; k19 = K14; k20 = K45; k21 = K37; k22 = K29; k23 = K49; k24 = K33; k25 = K11; k26 = K48; k27 = K06; k28 = K46; k29 = K17; k30 = K55; k31 = K40; k32 = K18; k33 = K05; k34 = K34; k35 = K39; k36 = K12; k37 = K32; k38 = K47; k39 = K10; k40 = K27; k41 = K04; k42 = K53; k43 = K26; k44 = K25; k45 = K13; k46 = K03; k47 = K41; }
1535 #define KEYSET07 { k00 = K31; k01 = K35; k02 = K52; k03 = K43; k04 = K08; k05 = K37; k06 = K51; k07 = K15; k08 = K49; k09 = K30; k10 = K07; k11 = K02; k12 = K50; k13 = K21; k14 = K45; k15 = K44; k16 = K29; k17 = K16; k18 = K42; k19 = K23; k20 = K22; k21 = K14; k22 = K38; k23 = K01; k24 = K10; k25 = K47; k26 = K53; k27 = K11; k28 = K27; k29 = K26; k30 = K05; k31 = K17; k32 = K54; k33 = K41; k34 = K39; k35 = K20; k36 = K48; k37 = K13; k38 = K24; k39 = K19; k40 = K32; k41 = K40; k42 = K34; k43 = K03; k44 = K06; k45 = K18; k46 = K12; k47 = K46; }
1536 #define KEYSET17 { k00 = K15; k01 = K51; k02 = K36; k03 = K02; k04 = K49; k05 = K21; k06 = K35; k07 = K31; k08 = K08; k09 = K14; k10 = K23; k11 = K43; k12 = K09; k13 = K37; k14 = K29; k15 = K28; k16 = K45; k17 = K00; k18 = K01; k19 = K07; k20 = K38; k21 = K30; k22 = K22; k23 = K42; k24 = K26; k25 = K04; k26 = K41; k27 = K54; k28 = K39; k29 = K10; k30 = K48; k31 = K33; k32 = K11; k33 = K53; k34 = K27; k35 = K32; k36 = K05; k37 = K25; k38 = K40; k39 = K03; k40 = K20; k41 = K24; k42 = K46; k43 = K19; k44 = K18; k45 = K06; k46 = K55; k47 = K34; }
1538 static void DES (const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 *D00, u32 *D01, u32 *D02, u32 *D03, u32 *D04, u32 *D05, u32 *D06, u32 *D07, u32 *D08, u32 *D09, u32 *D10, u32 *D11, u32 *D12, u32 *D13, u32 *D14, u32 *D15, u32 *D16, u32 *D17, u32 *D18, u32 *D19, u32 *D20, u32 *D21, u32 *D22, u32 *D23, u32 *D24, u32 *D25, u32 *D26, u32 *D27, u32 *D28, u32 *D29, u32 *D30, u32 *D31, u32 *D32, u32 *D33, u32 *D34, u32 *D35, u32 *D36, u32 *D37, u32 *D38, u32 *D39, u32 *D40, u32 *D41, u32 *D42, u32 *D43, u32 *D44, u32 *D45, u32 *D46, u32 *D47, u32 *D48, u32 *D49, u32 *D50, u32 *D51, u32 *D52, u32 *D53, u32 *D54, u32 *D55, u32 *D56, u32 *D57, u32 *D58, u32 *D59, u32 *D60, u32 *D61, u32 *D62, u32 *D63)
1540 KXX_DECL u32 k00, k01, k02, k03, k04, k05;
1541 KXX_DECL u32 k06, k07, k08, k09, k10, k11;
1542 KXX_DECL u32 k12, k13, k14, k15, k16, k17;
1543 KXX_DECL u32 k18, k19, k20, k21, k22, k23;
1544 KXX_DECL u32 k24, k25, k26, k27, k28, k29;
1545 KXX_DECL u32 k30, k31, k32, k33, k34, k35;
1546 KXX_DECL u32 k36, k37, k38, k39, k40, k41;
1547 KXX_DECL u32 k42, k43, k44, k45, k46, k47;
1550 #if CUDA_ARCH >= 500
1564 for (u32 i = 0; i < 2; i++)
1566 if (i) KEYSET10 else KEYSET00
1568 s1(*D63 ^ k00, *D32 ^ k01, *D33 ^ k02, *D34 ^ k03, *D35 ^ k04, *D36 ^ k05, D08, D16, D22, D30);
1569 s2(*D35 ^ k06, *D36 ^ k07, *D37 ^ k08, *D38 ^ k09, *D39 ^ k10, *D40 ^ k11, D12, D27, D01, D17);
1570 s3(*D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1571 s4(*D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1572 s5(*D47 ^ k24, *D48 ^ k25, *D49 ^ k26, *D50 ^ k27, *D51 ^ k28, *D52 ^ k29, D07, D13, D24, D02);
1573 s6(*D51 ^ k30, *D52 ^ k31, *D53 ^ k32, *D54 ^ k33, *D55 ^ k34, *D56 ^ k35, D03, D28, D10, D18);
1574 s7(*D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1575 s8(*D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1577 if (i) KEYSET11 else KEYSET01
1579 s1(*D31 ^ k00, *D00 ^ k01, *D01 ^ k02, *D02 ^ k03, *D03 ^ k04, *D04 ^ k05, D40, D48, D54, D62);
1580 s2(*D03 ^ k06, *D04 ^ k07, *D05 ^ k08, *D06 ^ k09, *D07 ^ k10, *D08 ^ k11, D44, D59, D33, D49);
1581 s3(*D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1582 s4(*D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1583 s5(*D15 ^ k24, *D16 ^ k25, *D17 ^ k26, *D18 ^ k27, *D19 ^ k28, *D20 ^ k29, D39, D45, D56, D34);
1584 s6(*D19 ^ k30, *D20 ^ k31, *D21 ^ k32, *D22 ^ k33, *D23 ^ k34, *D24 ^ k35, D35, D60, D42, D50);
1585 s7(*D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1586 s8(*D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1588 if (i) KEYSET12 else KEYSET02
1590 s1(*D63 ^ k00, *D32 ^ k01, *D33 ^ k02, *D34 ^ k03, *D35 ^ k04, *D36 ^ k05, D08, D16, D22, D30);
1591 s2(*D35 ^ k06, *D36 ^ k07, *D37 ^ k08, *D38 ^ k09, *D39 ^ k10, *D40 ^ k11, D12, D27, D01, D17);
1592 s3(*D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1593 s4(*D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1594 s5(*D47 ^ k24, *D48 ^ k25, *D49 ^ k26, *D50 ^ k27, *D51 ^ k28, *D52 ^ k29, D07, D13, D24, D02);
1595 s6(*D51 ^ k30, *D52 ^ k31, *D53 ^ k32, *D54 ^ k33, *D55 ^ k34, *D56 ^ k35, D03, D28, D10, D18);
1596 s7(*D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1597 s8(*D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1599 if (i) KEYSET13 else KEYSET03
1601 s1(*D31 ^ k00, *D00 ^ k01, *D01 ^ k02, *D02 ^ k03, *D03 ^ k04, *D04 ^ k05, D40, D48, D54, D62);
1602 s2(*D03 ^ k06, *D04 ^ k07, *D05 ^ k08, *D06 ^ k09, *D07 ^ k10, *D08 ^ k11, D44, D59, D33, D49);
1603 s3(*D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1604 s4(*D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1605 s5(*D15 ^ k24, *D16 ^ k25, *D17 ^ k26, *D18 ^ k27, *D19 ^ k28, *D20 ^ k29, D39, D45, D56, D34);
1606 s6(*D19 ^ k30, *D20 ^ k31, *D21 ^ k32, *D22 ^ k33, *D23 ^ k34, *D24 ^ k35, D35, D60, D42, D50);
1607 s7(*D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1608 s8(*D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1610 if (i) KEYSET14 else KEYSET04
1612 s1(*D63 ^ k00, *D32 ^ k01, *D33 ^ k02, *D34 ^ k03, *D35 ^ k04, *D36 ^ k05, D08, D16, D22, D30);
1613 s2(*D35 ^ k06, *D36 ^ k07, *D37 ^ k08, *D38 ^ k09, *D39 ^ k10, *D40 ^ k11, D12, D27, D01, D17);
1614 s3(*D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1615 s4(*D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1616 s5(*D47 ^ k24, *D48 ^ k25, *D49 ^ k26, *D50 ^ k27, *D51 ^ k28, *D52 ^ k29, D07, D13, D24, D02);
1617 s6(*D51 ^ k30, *D52 ^ k31, *D53 ^ k32, *D54 ^ k33, *D55 ^ k34, *D56 ^ k35, D03, D28, D10, D18);
1618 s7(*D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1619 s8(*D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1621 if (i) KEYSET15 else KEYSET05
1623 s1(*D31 ^ k00, *D00 ^ k01, *D01 ^ k02, *D02 ^ k03, *D03 ^ k04, *D04 ^ k05, D40, D48, D54, D62);
1624 s2(*D03 ^ k06, *D04 ^ k07, *D05 ^ k08, *D06 ^ k09, *D07 ^ k10, *D08 ^ k11, D44, D59, D33, D49);
1625 s3(*D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1626 s4(*D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1627 s5(*D15 ^ k24, *D16 ^ k25, *D17 ^ k26, *D18 ^ k27, *D19 ^ k28, *D20 ^ k29, D39, D45, D56, D34);
1628 s6(*D19 ^ k30, *D20 ^ k31, *D21 ^ k32, *D22 ^ k33, *D23 ^ k34, *D24 ^ k35, D35, D60, D42, D50);
1629 s7(*D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1630 s8(*D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1632 if (i) KEYSET16 else KEYSET06
1634 s1(*D63 ^ k00, *D32 ^ k01, *D33 ^ k02, *D34 ^ k03, *D35 ^ k04, *D36 ^ k05, D08, D16, D22, D30);
1635 s2(*D35 ^ k06, *D36 ^ k07, *D37 ^ k08, *D38 ^ k09, *D39 ^ k10, *D40 ^ k11, D12, D27, D01, D17);
1636 s3(*D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1637 s4(*D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1638 s5(*D47 ^ k24, *D48 ^ k25, *D49 ^ k26, *D50 ^ k27, *D51 ^ k28, *D52 ^ k29, D07, D13, D24, D02);
1639 s6(*D51 ^ k30, *D52 ^ k31, *D53 ^ k32, *D54 ^ k33, *D55 ^ k34, *D56 ^ k35, D03, D28, D10, D18);
1640 s7(*D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1641 s8(*D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1643 if (i) KEYSET17 else KEYSET07
1645 s1(*D31 ^ k00, *D00 ^ k01, *D01 ^ k02, *D02 ^ k03, *D03 ^ k04, *D04 ^ k05, D40, D48, D54, D62);
1646 s2(*D03 ^ k06, *D04 ^ k07, *D05 ^ k08, *D06 ^ k09, *D07 ^ k10, *D08 ^ k11, D44, D59, D33, D49);
1647 s3(*D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1648 s4(*D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1649 s5(*D15 ^ k24, *D16 ^ k25, *D17 ^ k26, *D18 ^ k27, *D19 ^ k28, *D20 ^ k29, D39, D45, D56, D34);
1650 s6(*D19 ^ k30, *D20 ^ k31, *D21 ^ k32, *D22 ^ k33, *D23 ^ k34, *D24 ^ k35, D35, D60, D42, D50);
1651 s7(*D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1652 s8(*D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1656 static void transpose32c (u32 data[32])
1658 #define swap(x,y,j,m) \
1659 t = ((x) ^ ((y) >> (j))) & (m); \
1661 (y) = (y) ^ (t << (j));
1665 swap (data[ 0], data[16], 16, 0x0000ffff);
1666 swap (data[ 1], data[17], 16, 0x0000ffff);
1667 swap (data[ 2], data[18], 16, 0x0000ffff);
1668 swap (data[ 3], data[19], 16, 0x0000ffff);
1669 swap (data[ 4], data[20], 16, 0x0000ffff);
1670 swap (data[ 5], data[21], 16, 0x0000ffff);
1671 swap (data[ 6], data[22], 16, 0x0000ffff);
1672 swap (data[ 7], data[23], 16, 0x0000ffff);
1673 swap (data[ 8], data[24], 16, 0x0000ffff);
1674 swap (data[ 9], data[25], 16, 0x0000ffff);
1675 swap (data[10], data[26], 16, 0x0000ffff);
1676 swap (data[11], data[27], 16, 0x0000ffff);
1677 swap (data[12], data[28], 16, 0x0000ffff);
1678 swap (data[13], data[29], 16, 0x0000ffff);
1679 swap (data[14], data[30], 16, 0x0000ffff);
1680 swap (data[15], data[31], 16, 0x0000ffff);
1681 swap (data[ 0], data[ 8], 8, 0x00ff00ff);
1682 swap (data[ 1], data[ 9], 8, 0x00ff00ff);
1683 swap (data[ 2], data[10], 8, 0x00ff00ff);
1684 swap (data[ 3], data[11], 8, 0x00ff00ff);
1685 swap (data[ 4], data[12], 8, 0x00ff00ff);
1686 swap (data[ 5], data[13], 8, 0x00ff00ff);
1687 swap (data[ 6], data[14], 8, 0x00ff00ff);
1688 swap (data[ 7], data[15], 8, 0x00ff00ff);
1689 swap (data[ 0], data[ 4], 4, 0x0f0f0f0f);
1690 swap (data[ 1], data[ 5], 4, 0x0f0f0f0f);
1691 swap (data[ 2], data[ 6], 4, 0x0f0f0f0f);
1692 swap (data[ 3], data[ 7], 4, 0x0f0f0f0f);
1693 swap (data[ 0], data[ 2], 2, 0x33333333);
1694 swap (data[ 1], data[ 3], 2, 0x33333333);
1695 swap (data[ 0], data[ 1], 1, 0x55555555);
1696 swap (data[ 2], data[ 3], 1, 0x55555555);
1697 swap (data[ 4], data[ 6], 2, 0x33333333);
1698 swap (data[ 5], data[ 7], 2, 0x33333333);
1699 swap (data[ 4], data[ 5], 1, 0x55555555);
1700 swap (data[ 6], data[ 7], 1, 0x55555555);
1701 swap (data[ 8], data[12], 4, 0x0f0f0f0f);
1702 swap (data[ 9], data[13], 4, 0x0f0f0f0f);
1703 swap (data[10], data[14], 4, 0x0f0f0f0f);
1704 swap (data[11], data[15], 4, 0x0f0f0f0f);
1705 swap (data[ 8], data[10], 2, 0x33333333);
1706 swap (data[ 9], data[11], 2, 0x33333333);
1707 swap (data[ 8], data[ 9], 1, 0x55555555);
1708 swap (data[10], data[11], 1, 0x55555555);
1709 swap (data[12], data[14], 2, 0x33333333);
1710 swap (data[13], data[15], 2, 0x33333333);
1711 swap (data[12], data[13], 1, 0x55555555);
1712 swap (data[14], data[15], 1, 0x55555555);
1713 swap (data[16], data[24], 8, 0x00ff00ff);
1714 swap (data[17], data[25], 8, 0x00ff00ff);
1715 swap (data[18], data[26], 8, 0x00ff00ff);
1716 swap (data[19], data[27], 8, 0x00ff00ff);
1717 swap (data[20], data[28], 8, 0x00ff00ff);
1718 swap (data[21], data[29], 8, 0x00ff00ff);
1719 swap (data[22], data[30], 8, 0x00ff00ff);
1720 swap (data[23], data[31], 8, 0x00ff00ff);
1721 swap (data[16], data[20], 4, 0x0f0f0f0f);
1722 swap (data[17], data[21], 4, 0x0f0f0f0f);
1723 swap (data[18], data[22], 4, 0x0f0f0f0f);
1724 swap (data[19], data[23], 4, 0x0f0f0f0f);
1725 swap (data[16], data[18], 2, 0x33333333);
1726 swap (data[17], data[19], 2, 0x33333333);
1727 swap (data[16], data[17], 1, 0x55555555);
1728 swap (data[18], data[19], 1, 0x55555555);
1729 swap (data[20], data[22], 2, 0x33333333);
1730 swap (data[21], data[23], 2, 0x33333333);
1731 swap (data[20], data[21], 1, 0x55555555);
1732 swap (data[22], data[23], 1, 0x55555555);
1733 swap (data[24], data[28], 4, 0x0f0f0f0f);
1734 swap (data[25], data[29], 4, 0x0f0f0f0f);
1735 swap (data[26], data[30], 4, 0x0f0f0f0f);
1736 swap (data[27], data[31], 4, 0x0f0f0f0f);
1737 swap (data[24], data[26], 2, 0x33333333);
1738 swap (data[25], data[27], 2, 0x33333333);
1739 swap (data[24], data[25], 1, 0x55555555);
1740 swap (data[26], data[27], 1, 0x55555555);
1741 swap (data[28], data[30], 2, 0x33333333);
1742 swap (data[29], data[31], 2, 0x33333333);
1743 swap (data[28], data[29], 1, 0x55555555);
1744 swap (data[30], data[31], 1, 0x55555555);
1747 static void m03000m (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
1753 const u32 gid = get_global_id (0);
1754 const u32 lid = get_local_id (0);
1760 const u32 K00 = pws[gid].i[ 0];
1761 const u32 K01 = pws[gid].i[ 1];
1762 const u32 K02 = pws[gid].i[ 2];
1763 const u32 K03 = pws[gid].i[ 3];
1764 const u32 K04 = pws[gid].i[ 4];
1765 const u32 K05 = pws[gid].i[ 5];
1766 const u32 K06 = pws[gid].i[ 6];
1767 const u32 K07 = pws[gid].i[ 7];
1768 const u32 K08 = pws[gid].i[ 8];
1769 const u32 K09 = pws[gid].i[ 9];
1770 const u32 K10 = pws[gid].i[10];
1771 const u32 K11 = pws[gid].i[11];
1772 const u32 K12 = pws[gid].i[12];
1773 const u32 K13 = pws[gid].i[13];
1774 const u32 K14 = pws[gid].i[14];
1775 const u32 K15 = pws[gid].i[15];
1776 const u32 K16 = pws[gid].i[16];
1777 const u32 K17 = pws[gid].i[17];
1778 const u32 K18 = pws[gid].i[18];
1779 const u32 K19 = pws[gid].i[19];
1780 const u32 K20 = pws[gid].i[20];
1781 const u32 K21 = pws[gid].i[21];
1782 const u32 K22 = pws[gid].i[22];
1783 const u32 K23 = pws[gid].i[23];
1784 const u32 K24 = pws[gid].i[24];
1785 const u32 K25 = pws[gid].i[25];
1786 const u32 K26 = pws[gid].i[26];
1787 const u32 K27 = pws[gid].i[27];
1788 const u32 K28 = pws[gid].i[28];
1789 const u32 K29 = pws[gid].i[29];
1790 const u32 K30 = pws[gid].i[30];
1791 const u32 K31 = pws[gid].i[31];
1792 const u32 K32 = pws[gid].i[32];
1793 const u32 K33 = pws[gid].i[33];
1794 const u32 K34 = pws[gid].i[34];
1795 const u32 K35 = pws[gid].i[35];
1796 const u32 K36 = pws[gid].i[36];
1797 const u32 K37 = pws[gid].i[37];
1798 const u32 K38 = pws[gid].i[38];
1799 const u32 K39 = pws[gid].i[39];
1800 const u32 K40 = pws[gid].i[40];
1801 const u32 K41 = pws[gid].i[41];
1802 const u32 K42 = pws[gid].i[42];
1803 const u32 K43 = pws[gid].i[43];
1804 const u32 K44 = pws[gid].i[44];
1805 const u32 K45 = pws[gid].i[45];
1806 const u32 K46 = pws[gid].i[46];
1807 const u32 K47 = pws[gid].i[47];
1808 const u32 K48 = pws[gid].i[48];
1809 const u32 K49 = pws[gid].i[49];
1810 const u32 K50 = pws[gid].i[50];
1811 const u32 K51 = pws[gid].i[51];
1812 const u32 K52 = pws[gid].i[52];
1813 const u32 K53 = pws[gid].i[53];
1814 const u32 K54 = pws[gid].i[54];
1815 const u32 K55 = pws[gid].i[55];
1817 const u32 pc_pos = get_local_id (1);
1819 const u32 il_pos = pc_pos * 32;
1854 k00 |= words_buf_r[pc_pos].b[ 0];
1855 k01 |= words_buf_r[pc_pos].b[ 1];
1856 k02 |= words_buf_r[pc_pos].b[ 2];
1857 k03 |= words_buf_r[pc_pos].b[ 3];
1858 k04 |= words_buf_r[pc_pos].b[ 4];
1859 k05 |= words_buf_r[pc_pos].b[ 5];
1860 k06 |= words_buf_r[pc_pos].b[ 6];
1861 k07 |= words_buf_r[pc_pos].b[ 7];
1862 k08 |= words_buf_r[pc_pos].b[ 8];
1863 k09 |= words_buf_r[pc_pos].b[ 9];
1864 k10 |= words_buf_r[pc_pos].b[10];
1865 k11 |= words_buf_r[pc_pos].b[11];
1866 k12 |= words_buf_r[pc_pos].b[12];
1867 k13 |= words_buf_r[pc_pos].b[13];
1868 k14 |= words_buf_r[pc_pos].b[14];
1869 k15 |= words_buf_r[pc_pos].b[15];
1870 k16 |= words_buf_r[pc_pos].b[16];
1871 k17 |= words_buf_r[pc_pos].b[17];
1872 k18 |= words_buf_r[pc_pos].b[18];
1873 k19 |= words_buf_r[pc_pos].b[19];
1874 k20 |= words_buf_r[pc_pos].b[20];
1875 k21 |= words_buf_r[pc_pos].b[21];
1876 k22 |= words_buf_r[pc_pos].b[22];
1877 k23 |= words_buf_r[pc_pos].b[23];
1878 k24 |= words_buf_r[pc_pos].b[24];
1879 k25 |= words_buf_r[pc_pos].b[25];
1880 k26 |= words_buf_r[pc_pos].b[26];
1881 k27 |= words_buf_r[pc_pos].b[27];
1882 k28 |= words_buf_r[pc_pos].b[28];
1883 k29 |= words_buf_r[pc_pos].b[29];
1884 k30 |= words_buf_r[pc_pos].b[30];
1885 k31 |= words_buf_r[pc_pos].b[31];
1887 // KGS!@#$% including IP
1892 u32 D03 = 0xffffffff;
1894 u32 D05 = 0xffffffff;
1895 u32 D06 = 0xffffffff;
1896 u32 D07 = 0xffffffff;
1902 u32 D13 = 0xffffffff;
1905 u32 D16 = 0xffffffff;
1906 u32 D17 = 0xffffffff;
1911 u32 D22 = 0xffffffff;
1913 u32 D24 = 0xffffffff;
1915 u32 D26 = 0xffffffff;
1917 u32 D28 = 0xffffffff;
1918 u32 D29 = 0xffffffff;
1919 u32 D30 = 0xffffffff;
1920 u32 D31 = 0xffffffff;
1929 u32 D40 = 0xffffffff;
1930 u32 D41 = 0xffffffff;
1931 u32 D42 = 0xffffffff;
1933 u32 D44 = 0xffffffff;
1944 u32 D55 = 0xffffffff;
1947 u32 D58 = 0xffffffff;
1950 u32 D61 = 0xffffffff;
1951 u32 D62 = 0xffffffff;
1952 u32 D63 = 0xffffffff;
1956 k00, k01, k02, k03, k04, k05, k06,
1957 k07, k08, k09, k10, k11, k12, k13,
1958 k14, k15, k16, k17, k18, k19, k20,
1959 k21, k22, k23, k24, k25, k26, k27,
1960 k28, k29, k30, k31, K32, K33, K34,
1961 K35, K36, K37, K38, K39, K40, K41,
1962 K42, K43, K44, K45, K46, K47, K48,
1963 K49, K50, K51, K52, K53, K54, K55,
1964 &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07,
1965 &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15,
1966 &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23,
1967 &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31,
1968 &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39,
1969 &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47,
1970 &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55,
1971 &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63
2041 if (digests_cnt < 16)
2043 for (u32 d = 0; d < digests_cnt; d++)
2045 const u32 final_hash_pos = digests_offset + d;
2047 if (hashes_shown[final_hash_pos]) continue;
2051 search[0] = digests_buf[final_hash_pos].digest_buf[DGST_R0];
2052 search[1] = digests_buf[final_hash_pos].digest_buf[DGST_R1];
2057 for (int i = 0; i < 32; i++)
2059 const u32 b0 = -((search[0] >> i) & 1);
2060 const u32 b1 = -((search[1] >> i) & 1);
2062 tmpResult |= out[ 0 + i] ^ b0;
2063 tmpResult |= out[32 + i] ^ b1;
2066 if (tmpResult == 0xffffffff) continue;
2068 const u32 slice = 31 - clz (~tmpResult);
2070 const u32 r0 = search[0];
2071 const u32 r1 = search[1];
2084 for (int i = 0; i < 32; i++)
2086 out0[i] = out[ 0 + 31 - i];
2087 out1[i] = out[32 + 31 - i];
2090 transpose32c (out0);
2091 transpose32c (out1);
2094 for (int slice = 0; slice < 32; slice++)
2096 const u32 r0 = out0[31 - slice];
2097 const u32 r1 = out1[31 - slice];
2106 static void m03000s (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
2112 const u32 gid = get_global_id (0);
2113 const u32 lid = get_local_id (0);
2119 const u32 s0 = digests_buf[digests_offset].digest_buf[0];
2120 const u32 s1 = digests_buf[digests_offset].digest_buf[1];
2122 const u32 S00 = -((s0 >> 0) & 1);
2123 const u32 S01 = -((s0 >> 1) & 1);
2124 const u32 S02 = -((s0 >> 2) & 1);
2125 const u32 S03 = -((s0 >> 3) & 1);
2126 const u32 S04 = -((s0 >> 4) & 1);
2127 const u32 S05 = -((s0 >> 5) & 1);
2128 const u32 S06 = -((s0 >> 6) & 1);
2129 const u32 S07 = -((s0 >> 7) & 1);
2130 const u32 S08 = -((s0 >> 8) & 1);
2131 const u32 S09 = -((s0 >> 9) & 1);
2132 const u32 S10 = -((s0 >> 10) & 1);
2133 const u32 S11 = -((s0 >> 11) & 1);
2134 const u32 S12 = -((s0 >> 12) & 1);
2135 const u32 S13 = -((s0 >> 13) & 1);
2136 const u32 S14 = -((s0 >> 14) & 1);
2137 const u32 S15 = -((s0 >> 15) & 1);
2138 const u32 S16 = -((s0 >> 16) & 1);
2139 const u32 S17 = -((s0 >> 17) & 1);
2140 const u32 S18 = -((s0 >> 18) & 1);
2141 const u32 S19 = -((s0 >> 19) & 1);
2142 const u32 S20 = -((s0 >> 20) & 1);
2143 const u32 S21 = -((s0 >> 21) & 1);
2144 const u32 S22 = -((s0 >> 22) & 1);
2145 const u32 S23 = -((s0 >> 23) & 1);
2146 const u32 S24 = -((s0 >> 24) & 1);
2147 const u32 S25 = -((s0 >> 25) & 1);
2148 const u32 S26 = -((s0 >> 26) & 1);
2149 const u32 S27 = -((s0 >> 27) & 1);
2150 const u32 S28 = -((s0 >> 28) & 1);
2151 const u32 S29 = -((s0 >> 29) & 1);
2152 const u32 S30 = -((s0 >> 30) & 1);
2153 const u32 S31 = -((s0 >> 31) & 1);
2154 const u32 S32 = -((s1 >> 0) & 1);
2155 const u32 S33 = -((s1 >> 1) & 1);
2156 const u32 S34 = -((s1 >> 2) & 1);
2157 const u32 S35 = -((s1 >> 3) & 1);
2158 const u32 S36 = -((s1 >> 4) & 1);
2159 const u32 S37 = -((s1 >> 5) & 1);
2160 const u32 S38 = -((s1 >> 6) & 1);
2161 const u32 S39 = -((s1 >> 7) & 1);
2162 const u32 S40 = -((s1 >> 8) & 1);
2163 const u32 S41 = -((s1 >> 9) & 1);
2164 const u32 S42 = -((s1 >> 10) & 1);
2165 const u32 S43 = -((s1 >> 11) & 1);
2166 const u32 S44 = -((s1 >> 12) & 1);
2167 const u32 S45 = -((s1 >> 13) & 1);
2168 const u32 S46 = -((s1 >> 14) & 1);
2169 const u32 S47 = -((s1 >> 15) & 1);
2170 const u32 S48 = -((s1 >> 16) & 1);
2171 const u32 S49 = -((s1 >> 17) & 1);
2172 const u32 S50 = -((s1 >> 18) & 1);
2173 const u32 S51 = -((s1 >> 19) & 1);
2174 const u32 S52 = -((s1 >> 20) & 1);
2175 const u32 S53 = -((s1 >> 21) & 1);
2176 const u32 S54 = -((s1 >> 22) & 1);
2177 const u32 S55 = -((s1 >> 23) & 1);
2178 const u32 S56 = -((s1 >> 24) & 1);
2179 const u32 S57 = -((s1 >> 25) & 1);
2180 const u32 S58 = -((s1 >> 26) & 1);
2181 const u32 S59 = -((s1 >> 27) & 1);
2182 const u32 S60 = -((s1 >> 28) & 1);
2183 const u32 S61 = -((s1 >> 29) & 1);
2184 const u32 S62 = -((s1 >> 30) & 1);
2185 const u32 S63 = -((s1 >> 31) & 1);
2191 const u32 K00 = pws[gid].i[ 0];
2192 const u32 K01 = pws[gid].i[ 1];
2193 const u32 K02 = pws[gid].i[ 2];
2194 const u32 K03 = pws[gid].i[ 3];
2195 const u32 K04 = pws[gid].i[ 4];
2196 const u32 K05 = pws[gid].i[ 5];
2197 const u32 K06 = pws[gid].i[ 6];
2198 const u32 K07 = pws[gid].i[ 7];
2199 const u32 K08 = pws[gid].i[ 8];
2200 const u32 K09 = pws[gid].i[ 9];
2201 const u32 K10 = pws[gid].i[10];
2202 const u32 K11 = pws[gid].i[11];
2203 const u32 K12 = pws[gid].i[12];
2204 const u32 K13 = pws[gid].i[13];
2205 const u32 K14 = pws[gid].i[14];
2206 const u32 K15 = pws[gid].i[15];
2207 const u32 K16 = pws[gid].i[16];
2208 const u32 K17 = pws[gid].i[17];
2209 const u32 K18 = pws[gid].i[18];
2210 const u32 K19 = pws[gid].i[19];
2211 const u32 K20 = pws[gid].i[20];
2212 const u32 K21 = pws[gid].i[21];
2213 const u32 K22 = pws[gid].i[22];
2214 const u32 K23 = pws[gid].i[23];
2215 const u32 K24 = pws[gid].i[24];
2216 const u32 K25 = pws[gid].i[25];
2217 const u32 K26 = pws[gid].i[26];
2218 const u32 K27 = pws[gid].i[27];
2219 const u32 K28 = pws[gid].i[28];
2220 const u32 K29 = pws[gid].i[29];
2221 const u32 K30 = pws[gid].i[30];
2222 const u32 K31 = pws[gid].i[31];
2223 const u32 K32 = pws[gid].i[32];
2224 const u32 K33 = pws[gid].i[33];
2225 const u32 K34 = pws[gid].i[34];
2226 const u32 K35 = pws[gid].i[35];
2227 const u32 K36 = pws[gid].i[36];
2228 const u32 K37 = pws[gid].i[37];
2229 const u32 K38 = pws[gid].i[38];
2230 const u32 K39 = pws[gid].i[39];
2231 const u32 K40 = pws[gid].i[40];
2232 const u32 K41 = pws[gid].i[41];
2233 const u32 K42 = pws[gid].i[42];
2234 const u32 K43 = pws[gid].i[43];
2235 const u32 K44 = pws[gid].i[44];
2236 const u32 K45 = pws[gid].i[45];
2237 const u32 K46 = pws[gid].i[46];
2238 const u32 K47 = pws[gid].i[47];
2239 const u32 K48 = pws[gid].i[48];
2240 const u32 K49 = pws[gid].i[49];
2241 const u32 K50 = pws[gid].i[50];
2242 const u32 K51 = pws[gid].i[51];
2243 const u32 K52 = pws[gid].i[52];
2244 const u32 K53 = pws[gid].i[53];
2245 const u32 K54 = pws[gid].i[54];
2246 const u32 K55 = pws[gid].i[55];
2248 const u32 pc_pos = get_local_id (1);
2250 const u32 il_pos = pc_pos * 32;
2285 k00 |= words_buf_r[pc_pos].b[ 0];
2286 k01 |= words_buf_r[pc_pos].b[ 1];
2287 k02 |= words_buf_r[pc_pos].b[ 2];
2288 k03 |= words_buf_r[pc_pos].b[ 3];
2289 k04 |= words_buf_r[pc_pos].b[ 4];
2290 k05 |= words_buf_r[pc_pos].b[ 5];
2291 k06 |= words_buf_r[pc_pos].b[ 6];
2292 k07 |= words_buf_r[pc_pos].b[ 7];
2293 k08 |= words_buf_r[pc_pos].b[ 8];
2294 k09 |= words_buf_r[pc_pos].b[ 9];
2295 k10 |= words_buf_r[pc_pos].b[10];
2296 k11 |= words_buf_r[pc_pos].b[11];
2297 k12 |= words_buf_r[pc_pos].b[12];
2298 k13 |= words_buf_r[pc_pos].b[13];
2299 k14 |= words_buf_r[pc_pos].b[14];
2300 k15 |= words_buf_r[pc_pos].b[15];
2301 k16 |= words_buf_r[pc_pos].b[16];
2302 k17 |= words_buf_r[pc_pos].b[17];
2303 k18 |= words_buf_r[pc_pos].b[18];
2304 k19 |= words_buf_r[pc_pos].b[19];
2305 k20 |= words_buf_r[pc_pos].b[20];
2306 k21 |= words_buf_r[pc_pos].b[21];
2307 k22 |= words_buf_r[pc_pos].b[22];
2308 k23 |= words_buf_r[pc_pos].b[23];
2309 k24 |= words_buf_r[pc_pos].b[24];
2310 k25 |= words_buf_r[pc_pos].b[25];
2311 k26 |= words_buf_r[pc_pos].b[26];
2312 k27 |= words_buf_r[pc_pos].b[27];
2313 k28 |= words_buf_r[pc_pos].b[28];
2314 k29 |= words_buf_r[pc_pos].b[29];
2315 k30 |= words_buf_r[pc_pos].b[30];
2316 k31 |= words_buf_r[pc_pos].b[31];
2318 // KGS!@#$% including IP
2323 u32 D03 = 0xffffffff;
2325 u32 D05 = 0xffffffff;
2326 u32 D06 = 0xffffffff;
2327 u32 D07 = 0xffffffff;
2333 u32 D13 = 0xffffffff;
2336 u32 D16 = 0xffffffff;
2337 u32 D17 = 0xffffffff;
2342 u32 D22 = 0xffffffff;
2344 u32 D24 = 0xffffffff;
2346 u32 D26 = 0xffffffff;
2348 u32 D28 = 0xffffffff;
2349 u32 D29 = 0xffffffff;
2350 u32 D30 = 0xffffffff;
2351 u32 D31 = 0xffffffff;
2360 u32 D40 = 0xffffffff;
2361 u32 D41 = 0xffffffff;
2362 u32 D42 = 0xffffffff;
2364 u32 D44 = 0xffffffff;
2375 u32 D55 = 0xffffffff;
2378 u32 D58 = 0xffffffff;
2381 u32 D61 = 0xffffffff;
2382 u32 D62 = 0xffffffff;
2383 u32 D63 = 0xffffffff;
2387 k00, k01, k02, k03, k04, k05, k06,
2388 k07, k08, k09, k10, k11, k12, k13,
2389 k14, k15, k16, k17, k18, k19, k20,
2390 k21, k22, k23, k24, k25, k26, k27,
2391 k28, k29, k30, k31, K32, K33, K34,
2392 K35, K36, K37, K38, K39, K40, K41,
2393 K42, K43, K44, K45, K46, K47, K48,
2394 K49, K50, K51, K52, K53, K54, K55,
2395 &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07,
2396 &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15,
2397 &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23,
2398 &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31,
2399 &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39,
2400 &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47,
2401 &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55,
2402 &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63
2407 tmpResult |= D00 ^ S00;
2408 tmpResult |= D01 ^ S01;
2409 tmpResult |= D02 ^ S02;
2410 tmpResult |= D03 ^ S03;
2411 tmpResult |= D04 ^ S04;
2412 tmpResult |= D05 ^ S05;
2413 tmpResult |= D06 ^ S06;
2414 tmpResult |= D07 ^ S07;
2415 tmpResult |= D08 ^ S08;
2416 tmpResult |= D09 ^ S09;
2417 tmpResult |= D10 ^ S10;
2418 tmpResult |= D11 ^ S11;
2419 tmpResult |= D12 ^ S12;
2420 tmpResult |= D13 ^ S13;
2421 tmpResult |= D14 ^ S14;
2422 tmpResult |= D15 ^ S15;
2424 if (tmpResult == 0xffffffff) return;
2426 tmpResult |= D16 ^ S16;
2427 tmpResult |= D17 ^ S17;
2428 tmpResult |= D18 ^ S18;
2429 tmpResult |= D19 ^ S19;
2430 tmpResult |= D20 ^ S20;
2431 tmpResult |= D21 ^ S21;
2432 tmpResult |= D22 ^ S22;
2433 tmpResult |= D23 ^ S23;
2434 tmpResult |= D24 ^ S24;
2435 tmpResult |= D25 ^ S25;
2436 tmpResult |= D26 ^ S26;
2437 tmpResult |= D27 ^ S27;
2438 tmpResult |= D28 ^ S28;
2439 tmpResult |= D29 ^ S29;
2440 tmpResult |= D30 ^ S30;
2441 tmpResult |= D31 ^ S31;
2443 if (tmpResult == 0xffffffff) return;
2445 tmpResult |= D32 ^ S32;
2446 tmpResult |= D33 ^ S33;
2447 tmpResult |= D34 ^ S34;
2448 tmpResult |= D35 ^ S35;
2449 tmpResult |= D36 ^ S36;
2450 tmpResult |= D37 ^ S37;
2451 tmpResult |= D38 ^ S38;
2452 tmpResult |= D39 ^ S39;
2453 tmpResult |= D40 ^ S40;
2454 tmpResult |= D41 ^ S41;
2455 tmpResult |= D42 ^ S42;
2456 tmpResult |= D43 ^ S43;
2457 tmpResult |= D44 ^ S44;
2458 tmpResult |= D45 ^ S45;
2459 tmpResult |= D46 ^ S46;
2460 tmpResult |= D47 ^ S47;
2462 if (tmpResult == 0xffffffff) return;
2464 tmpResult |= D48 ^ S48;
2465 tmpResult |= D49 ^ S49;
2466 tmpResult |= D50 ^ S50;
2467 tmpResult |= D51 ^ S51;
2468 tmpResult |= D52 ^ S52;
2469 tmpResult |= D53 ^ S53;
2470 tmpResult |= D54 ^ S54;
2471 tmpResult |= D55 ^ S55;
2472 tmpResult |= D56 ^ S56;
2473 tmpResult |= D57 ^ S57;
2474 tmpResult |= D58 ^ S58;
2475 tmpResult |= D59 ^ S59;
2476 tmpResult |= D60 ^ S60;
2477 tmpResult |= D61 ^ S61;
2478 tmpResult |= D62 ^ S62;
2479 tmpResult |= D63 ^ S63;
2481 if (tmpResult == 0xffffffff) return;
2483 const u32 slice = 31 - clz (~tmpResult);
2489 // transpose bitslice base : easy because no overlapping buffers
2490 // mod : attention race conditions, need different buffers for *in and *out
2493 __kernel void m03000_tb (__global pw_t *pws)
2495 const u32 gid = get_global_id (0);
2497 const u32 w0s = pws[gid].i[0];
2498 const u32 w1s = pws[gid].i[1];
2501 for (int i = 0; i < 32; i += 8)
2503 pws[gid].i[i + 0 + 0] = -((w0s >> (i + 7)) & 1);
2504 pws[gid].i[i + 0 + 1] = -((w0s >> (i + 6)) & 1);
2505 pws[gid].i[i + 0 + 2] = -((w0s >> (i + 5)) & 1);
2506 pws[gid].i[i + 0 + 3] = -((w0s >> (i + 4)) & 1);
2507 pws[gid].i[i + 0 + 4] = -((w0s >> (i + 3)) & 1);
2508 pws[gid].i[i + 0 + 5] = -((w0s >> (i + 2)) & 1);
2509 pws[gid].i[i + 0 + 6] = -((w0s >> (i + 1)) & 1);
2510 pws[gid].i[i + 0 + 7] = -((w0s >> (i + 0)) & 1);
2514 for (int i = 0; i < 24; i += 8)
2516 pws[gid].i[i + 32 + 0] = -((w1s >> (i + 7)) & 1);
2517 pws[gid].i[i + 32 + 1] = -((w1s >> (i + 6)) & 1);
2518 pws[gid].i[i + 32 + 2] = -((w1s >> (i + 5)) & 1);
2519 pws[gid].i[i + 32 + 3] = -((w1s >> (i + 4)) & 1);
2520 pws[gid].i[i + 32 + 4] = -((w1s >> (i + 3)) & 1);
2521 pws[gid].i[i + 32 + 5] = -((w1s >> (i + 2)) & 1);
2522 pws[gid].i[i + 32 + 6] = -((w1s >> (i + 1)) & 1);
2523 pws[gid].i[i + 32 + 7] = -((w1s >> (i + 0)) & 1);
2527 __kernel void m03000_tm (__global u32 *mod, __global bs_word_t *words_buf_r)
2529 const u32 gid = get_global_id (0);
2531 const u32 block = gid / 32;
2532 const u32 slice = gid % 32;
2534 const u32 w0 = mod[gid];
2537 for (int i = 0; i < 32; i += 8)
2539 atomic_or (&words_buf_r[block].b[i + 0], (((w0 >> (i + 7)) & 1) << slice));
2540 atomic_or (&words_buf_r[block].b[i + 1], (((w0 >> (i + 6)) & 1) << slice));
2541 atomic_or (&words_buf_r[block].b[i + 2], (((w0 >> (i + 5)) & 1) << slice));
2542 atomic_or (&words_buf_r[block].b[i + 3], (((w0 >> (i + 4)) & 1) << slice));
2543 atomic_or (&words_buf_r[block].b[i + 4], (((w0 >> (i + 3)) & 1) << slice));
2544 atomic_or (&words_buf_r[block].b[i + 5], (((w0 >> (i + 2)) & 1) << slice));
2545 atomic_or (&words_buf_r[block].b[i + 6], (((w0 >> (i + 1)) & 1) << slice));
2546 atomic_or (&words_buf_r[block].b[i + 7], (((w0 >> (i + 0)) & 1) << slice));
2550 __kernel void m03000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2556 const u32 gid = get_global_id (0);
2557 const u32 lid = get_local_id (0);
2559 if (gid >= gid_max) return;
2565 m03000m (pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
2568 __kernel void m03000_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2572 __kernel void m03000_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2576 __kernel void m03000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2582 const u32 gid = get_global_id (0);
2583 const u32 lid = get_local_id (0);
2585 if (gid >= gid_max) return;
2591 m03000s (pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
2594 __kernel void m03000_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2598 __kernel void m03000_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)