2 * Author......: Jens Steube <jens.steube@gmail.com>
4 * NOTE........: sboxes were taken from JtR, license below
9 #include "include/constants.h"
10 #include "include/kernel_vendor.h"
17 #include "include/kernel_functions.c"
18 #include "OpenCL/types_ocl.c"
19 #include "OpenCL/common.c"
21 #define COMPARE_S "OpenCL/check_single_comp4_bs.c"
22 #define COMPARE_M "OpenCL/check_multi_comp4_bs.c"
29 #define KXX_DECL volatile
41 // Bitslice DES S-boxes with LOP3.LUT instructions
42 // For NVIDIA Maxwell architecture and CUDA 7.5 RC
43 // by DeepLearningJohnDoe, version 0.1.6, 2015/07/19
45 // Gate counts: 25 24 25 18 25 24 24 23
47 // Depth: 8 7 7 6 8 10 10 8
50 // Note that same S-box function with a lower gate count isn't necessarily faster.
52 // These Boolean expressions corresponding to DES S-boxes were
53 // discovered by <deeplearningjohndoe at gmail.com>
55 // This file itself is Copyright (c) 2015 by <deeplearningjohndoe at gmail.com>
56 // Redistribution and use in source and binary forms, with or without
57 // modification, are permitted.
59 // The underlying mathematical formulas are NOT copyrighted.
62 #define LUT(a,b,c,d,e) u32 a; asm ("lop3.b32 %0, %1, %2, %3, "#e";" : "=r"(a): "r"(b), "r"(c), "r"(d));
64 static void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
66 LUT(xAA55AA5500550055, a1, a4, a6, 0xC1)
67 LUT(xA55AA55AF0F5F0F5, a3, a6, xAA55AA5500550055, 0x9E)
68 LUT(x5F5F5F5FA5A5A5A5, a1, a3, a6, 0xD6)
69 LUT(xF5A0F5A0A55AA55A, a4, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x56)
70 LUT(x947A947AD1E7D1E7, a2, xA55AA55AF0F5F0F5, xF5A0F5A0A55AA55A, 0x6C)
71 LUT(x5FFF5FFFFFFAFFFA, a6, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x7B)
72 LUT(xB96CB96C69936993, a2, xF5A0F5A0A55AA55A, x5FFF5FFFFFFAFFFA, 0xD6)
73 LUT(x3, a5, x947A947AD1E7D1E7, xB96CB96C69936993, 0x6A)
74 LUT(x55EE55EE55EE55EE, a1, a2, a4, 0x7A)
75 LUT(x084C084CB77BB77B, a2, a6, xF5A0F5A0A55AA55A, 0xC9)
76 LUT(x9C329C32E295E295, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x72)
77 LUT(xA51EA51E50E050E0, a3, a6, x55EE55EE55EE55EE, 0x29)
78 LUT(x4AD34AD3BE3CBE3C, a2, x947A947AD1E7D1E7, xA51EA51E50E050E0, 0x95)
79 LUT(x2, a5, x9C329C32E295E295, x4AD34AD3BE3CBE3C, 0xC6)
80 LUT(xD955D95595D195D1, a1, a2, x9C329C32E295E295, 0xD2)
81 LUT(x8058805811621162, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x90)
82 LUT(x7D0F7D0FC4B3C4B3, xA51EA51E50E050E0, xD955D95595D195D1, x8058805811621162, 0x76)
83 LUT(x0805080500010001, a3, xAA55AA5500550055, xD955D95595D195D1, 0x80)
84 LUT(x4A964A96962D962D, xB96CB96C69936993, x4AD34AD3BE3CBE3C, x0805080500010001, 0xA6)
85 LUT(x4, a5, x7D0F7D0FC4B3C4B3, x4A964A96962D962D, 0xA6)
86 LUT(x148014807B087B08, a1, xAA55AA5500550055, x947A947AD1E7D1E7, 0x21)
87 LUT(x94D894D86B686B68, xA55AA55AF0F5F0F5, x8058805811621162, x148014807B087B08, 0x6A)
88 LUT(x5555555540044004, a1, a6, x084C084CB77BB77B, 0x70)
89 LUT(xAFB4AFB4BF5BBF5B, x5F5F5F5FA5A5A5A5, xA51EA51E50E050E0, x5555555540044004, 0x97)
90 LUT(x1, a5, x94D894D86B686B68, xAFB4AFB4BF5BBF5B, 0x6C)
98 static void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
100 LUT(xEEEEEEEE99999999, a1, a2, a6, 0x97)
101 LUT(xFFFFEEEE66666666, a5, a6, xEEEEEEEE99999999, 0x67)
102 LUT(x5555FFFFFFFF0000, a1, a5, a6, 0x76)
103 LUT(x6666DDDD5555AAAA, a2, xFFFFEEEE66666666, x5555FFFFFFFF0000, 0x69)
104 LUT(x6969D3D35353ACAC, a3, xFFFFEEEE66666666, x6666DDDD5555AAAA, 0x6A)
105 LUT(xCFCF3030CFCF3030, a2, a3, a5, 0x65)
106 LUT(xE4E4EEEE9999F0F0, a3, xEEEEEEEE99999999, x5555FFFFFFFF0000, 0x8D)
107 LUT(xE5E5BABACDCDB0B0, a1, xCFCF3030CFCF3030, xE4E4EEEE9999F0F0, 0xCA)
108 LUT(x3, a4, x6969D3D35353ACAC, xE5E5BABACDCDB0B0, 0xC6)
109 LUT(x3333CCCC00000000, a2, a5, a6, 0x14)
110 LUT(xCCCCDDDDFFFF0F0F, a5, xE4E4EEEE9999F0F0, x3333CCCC00000000, 0xB5)
111 LUT(x00000101F0F0F0F0, a3, a6, xFFFFEEEE66666666, 0x1C)
112 LUT(x9A9A64646A6A9595, a1, xCFCF3030CFCF3030, x00000101F0F0F0F0, 0x96)
113 LUT(x2, a4, xCCCCDDDDFFFF0F0F, x9A9A64646A6A9595, 0x6A)
114 LUT(x3333BBBB3333FFFF, a1, a2, x6666DDDD5555AAAA, 0xDE)
115 LUT(x1414141441410000, a1, a3, xE4E4EEEE9999F0F0, 0x90)
116 LUT(x7F7FF3F3F5F53939, x6969D3D35353ACAC, x9A9A64646A6A9595, x3333BBBB3333FFFF, 0x79)
117 LUT(x9494E3E34B4B3939, a5, x1414141441410000, x7F7FF3F3F5F53939, 0x29)
118 LUT(x1, a4, x3333BBBB3333FFFF, x9494E3E34B4B3939, 0xA6)
119 LUT(xB1B1BBBBCCCCA5A5, a1, a1, xE4E4EEEE9999F0F0, 0x4A)
120 LUT(xFFFFECECEEEEDDDD, a2, x3333CCCC00000000, x9A9A64646A6A9595, 0xEF)
121 LUT(xB1B1A9A9DCDC8787, xE5E5BABACDCDB0B0, xB1B1BBBBCCCCA5A5, xFFFFECECEEEEDDDD, 0x8D)
122 LUT(xFFFFCCCCEEEE4444, a2, a5, xFFFFEEEE66666666, 0x2B)
123 LUT(x4, a4, xB1B1A9A9DCDC8787, xFFFFCCCCEEEE4444, 0x6C)
131 static void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
133 LUT(xA50FA50FA50FA50F, a1, a3, a4, 0xC9)
134 LUT(xF0F00F0FF0F0F0F0, a3, a5, a6, 0x4B)
135 LUT(xAF0FA0AAAF0FAF0F, a1, xA50FA50FA50FA50F, xF0F00F0FF0F0F0F0, 0x4D)
136 LUT(x5AA5A55A5AA55AA5, a1, a4, xF0F00F0FF0F0F0F0, 0x69)
137 LUT(xAA005FFFAA005FFF, a3, a5, xA50FA50FA50FA50F, 0xD6)
138 LUT(x5AA5A55A0F5AFAA5, a6, x5AA5A55A5AA55AA5, xAA005FFFAA005FFF, 0x9C)
139 LUT(x1, a2, xAF0FA0AAAF0FAF0F, x5AA5A55A0F5AFAA5, 0xA6)
140 LUT(xAA55AA5500AA00AA, a1, a4, a6, 0x49)
141 LUT(xFAFAA50FFAFAA50F, a1, a5, xA50FA50FA50FA50F, 0x9B)
142 LUT(x50AF0F5AFA50A5A5, a1, xAA55AA5500AA00AA, xFAFAA50FFAFAA50F, 0x66)
143 LUT(xAFAFAFAFFAFAFAFA, a1, a3, a6, 0x6F)
144 LUT(xAFAFFFFFFFFAFAFF, a4, x50AF0F5AFA50A5A5, xAFAFAFAFFAFAFAFA, 0xEB)
145 LUT(x4, a2, x50AF0F5AFA50A5A5, xAFAFFFFFFFFAFAFF, 0x6C)
146 LUT(x500F500F500F500F, a1, a3, a4, 0x98)
147 LUT(xF0505A0505A5050F, x5AA5A55A0F5AFAA5, xAA55AA5500AA00AA, xAFAFAFAFFAFAFAFA, 0x1D)
148 LUT(xF0505A05AA55AAFF, a6, x500F500F500F500F, xF0505A0505A5050F, 0x9A)
149 LUT(xFF005F55FF005F55, a1, a4, xAA005FFFAA005FFF, 0xB2)
150 LUT(xA55F5AF0A55F5AF0, a5, xA50FA50FA50FA50F, x5AA5A55A5AA55AA5, 0x3D)
151 LUT(x5A5F05A5A55F5AF0, a6, xFF005F55FF005F55, xA55F5AF0A55F5AF0, 0xA6)
152 LUT(x3, a2, xF0505A05AA55AAFF, x5A5F05A5A55F5AF0, 0xA6)
153 LUT(x0F0F0F0FA5A5A5A5, a1, a3, a6, 0xC6)
154 LUT(x5FFFFF5FFFA0FFA0, x5AA5A55A5AA55AA5, xAFAFAFAFFAFAFAFA, x0F0F0F0FA5A5A5A5, 0xDB)
155 LUT(xF5555AF500A05FFF, a5, xFAFAA50FFAFAA50F, xF0505A0505A5050F, 0xB9)
156 LUT(x05A5AAF55AFA55A5, xF0505A05AA55AAFF, x0F0F0F0FA5A5A5A5, xF5555AF500A05FFF, 0x9B)
157 LUT(x2, a2, x5FFFFF5FFFA0FFA0, x05A5AAF55AFA55A5, 0xA6)
165 static void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
167 LUT(x55F055F055F055F0, a1, a3, a4, 0x72)
168 LUT(xA500F5F0A500F5F0, a3, a5, x55F055F055F055F0, 0xAD)
169 LUT(xF50AF50AF50AF50A, a1, a3, a4, 0x59)
170 LUT(xF5FA0FFFF5FA0FFF, a3, a5, xF50AF50AF50AF50A, 0xE7)
171 LUT(x61C8F93C61C8F93C, a2, xA500F5F0A500F5F0, xF5FA0FFFF5FA0FFF, 0xC6)
172 LUT(x9999666699996666, a1, a2, a5, 0x69)
173 LUT(x22C022C022C022C0, a2, a4, x55F055F055F055F0, 0x18)
174 LUT(xB35C94A6B35C94A6, xF5FA0FFFF5FA0FFF, x9999666699996666, x22C022C022C022C0, 0x63)
175 LUT(x4, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x6A)
176 LUT(x4848484848484848, a1, a2, a3, 0x12)
177 LUT(x55500AAA55500AAA, a1, a5, xF5FA0FFFF5FA0FFF, 0x28)
178 LUT(x3C90B3D63C90B3D6, x61C8F93C61C8F93C, x4848484848484848, x55500AAA55500AAA, 0x1E)
179 LUT(x8484333384843333, a1, x9999666699996666, x4848484848484848, 0x14)
180 LUT(x4452F1AC4452F1AC, xF50AF50AF50AF50A, xF5FA0FFFF5FA0FFF, xB35C94A6B35C94A6, 0x78)
181 LUT(x9586CA379586CA37, x55500AAA55500AAA, x8484333384843333, x4452F1AC4452F1AC, 0xD6)
182 LUT(x2, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0x6A)
183 LUT(x1, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0xA9)
184 LUT(x3, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x56)
192 static void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
194 LUT(xA0A0A0A0FFFFFFFF, a1, a3, a6, 0xAB)
195 LUT(xFFFF00005555FFFF, a1, a5, a6, 0xB9)
196 LUT(xB3B320207777FFFF, a2, xA0A0A0A0FFFFFFFF, xFFFF00005555FFFF, 0xE8)
197 LUT(x50505A5A5A5A5050, a1, a3, xFFFF00005555FFFF, 0x34)
198 LUT(xA2A2FFFF2222FFFF, a1, a5, xB3B320207777FFFF, 0xCE)
199 LUT(x2E2E6969A4A46363, a2, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, 0x29)
200 LUT(x3, a4, xB3B320207777FFFF, x2E2E6969A4A46363, 0xA6)
201 LUT(xA5A50A0AA5A50A0A, a1, a3, a5, 0x49)
202 LUT(x969639396969C6C6, a2, a6, xA5A50A0AA5A50A0A, 0x96)
203 LUT(x1B1B1B1B1B1B1B1B, a1, a2, a3, 0xCA)
204 LUT(xBFBFBFBFF6F6F9F9, a3, xA0A0A0A0FFFFFFFF, x969639396969C6C6, 0x7E)
205 LUT(x5B5BA4A4B8B81D1D, xFFFF00005555FFFF, x1B1B1B1B1B1B1B1B, xBFBFBFBFF6F6F9F9, 0x96)
206 LUT(x2, a4, x969639396969C6C6, x5B5BA4A4B8B81D1D, 0xCA)
207 LUT(x5555BBBBFFFF5555, a1, a2, xFFFF00005555FFFF, 0xE5)
208 LUT(x6D6D9C9C95956969, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, x969639396969C6C6, 0x97)
209 LUT(x1A1A67676A6AB4B4, xA5A50A0AA5A50A0A, x5555BBBBFFFF5555, x6D6D9C9C95956969, 0x47)
210 LUT(xA0A0FFFFAAAA0000, a3, xFFFF00005555FFFF, xA5A50A0AA5A50A0A, 0x3B)
211 LUT(x36369C9CC1C1D6D6, x969639396969C6C6, x6D6D9C9C95956969, xA0A0FFFFAAAA0000, 0xD9)
212 LUT(x1, a4, x1A1A67676A6AB4B4, x36369C9CC1C1D6D6, 0xCA)
213 LUT(x5555F0F0F5F55555, a1, a3, xFFFF00005555FFFF, 0xB1)
214 LUT(x79790202DCDC0808, xA2A2FFFF2222FFFF, xA5A50A0AA5A50A0A, x969639396969C6C6, 0x47)
215 LUT(x6C6CF2F229295D5D, xBFBFBFBFF6F6F9F9, x5555F0F0F5F55555, x79790202DCDC0808, 0x6E)
216 LUT(xA3A3505010101A1A, a2, xA2A2FFFF2222FFFF, x36369C9CC1C1D6D6, 0x94)
217 LUT(x7676C7C74F4FC7C7, a1, x2E2E6969A4A46363, xA3A3505010101A1A, 0xD9)
218 LUT(x4, a4, x6C6CF2F229295D5D, x7676C7C74F4FC7C7, 0xC6)
226 static void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
228 LUT(x5050F5F55050F5F5, a1, a3, a5, 0xB2)
229 LUT(x6363C6C66363C6C6, a1, a2, x5050F5F55050F5F5, 0x66)
230 LUT(xAAAA5555AAAA5555, a1, a1, a5, 0xA9)
231 LUT(x3A3A65653A3A6565, a3, x6363C6C66363C6C6, xAAAA5555AAAA5555, 0xA9)
232 LUT(x5963A3C65963A3C6, a4, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xC6)
233 LUT(xE7E76565E7E76565, a5, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xAD)
234 LUT(x455D45DF455D45DF, a1, a4, xE7E76565E7E76565, 0xE4)
235 LUT(x4, a6, x5963A3C65963A3C6, x455D45DF455D45DF, 0x6C)
236 LUT(x1101220211012202, a2, xAAAA5555AAAA5555, x5963A3C65963A3C6, 0x20)
237 LUT(xF00F0FF0F00F0FF0, a3, a4, a5, 0x69)
238 LUT(x16E94A9716E94A97, xE7E76565E7E76565, x1101220211012202, xF00F0FF0F00F0FF0, 0x9E)
239 LUT(x2992922929929229, a1, a2, xF00F0FF0F00F0FF0, 0x49)
240 LUT(xAFAF9823AFAF9823, a5, x5050F5F55050F5F5, x2992922929929229, 0x93)
241 LUT(x3, a6, x16E94A9716E94A97, xAFAF9823AFAF9823, 0x6C)
242 LUT(x4801810248018102, a4, x5963A3C65963A3C6, x1101220211012202, 0xA4)
243 LUT(x5EE8FFFD5EE8FFFD, a5, x16E94A9716E94A97, x4801810248018102, 0x76)
244 LUT(xF0FF00FFF0FF00FF, a3, a4, a5, 0xCD)
245 LUT(x942D9A67942D9A67, x3A3A65653A3A6565, x5EE8FFFD5EE8FFFD, xF0FF00FFF0FF00FF, 0x86)
246 LUT(x1, a6, x5EE8FFFD5EE8FFFD, x942D9A67942D9A67, 0xA6)
247 LUT(x6A40D4ED6F4DD4EE, a2, x4, xAFAF9823AFAF9823, 0x2D)
248 LUT(x6CA89C7869A49C79, x1101220211012202, x16E94A9716E94A97, x6A40D4ED6F4DD4EE, 0x26)
249 LUT(xD6DE73F9D6DE73F9, a3, x6363C6C66363C6C6, x455D45DF455D45DF, 0x6B)
250 LUT(x925E63E1965A63E1, x3A3A65653A3A6565, x6CA89C7869A49C79, xD6DE73F9D6DE73F9, 0xA2)
251 LUT(x2, a6, x6CA89C7869A49C79, x925E63E1965A63E1, 0xCA)
259 static void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
261 LUT(x88AA88AA88AA88AA, a1, a2, a4, 0x0B)
262 LUT(xAAAAFF00AAAAFF00, a1, a4, a5, 0x27)
263 LUT(xADAFF8A5ADAFF8A5, a3, x88AA88AA88AA88AA, xAAAAFF00AAAAFF00, 0x9E)
264 LUT(x0A0AF5F50A0AF5F5, a1, a3, a5, 0xA6)
265 LUT(x6B69C5DC6B69C5DC, a2, xADAFF8A5ADAFF8A5, x0A0AF5F50A0AF5F5, 0x6B)
266 LUT(x1C69B2DC1C69B2DC, a4, x88AA88AA88AA88AA, x6B69C5DC6B69C5DC, 0xA9)
267 LUT(x1, a6, xADAFF8A5ADAFF8A5, x1C69B2DC1C69B2DC, 0x6A)
268 LUT(x9C9C9C9C9C9C9C9C, a1, a2, a3, 0x63)
269 LUT(xE6E63BFDE6E63BFD, a2, xAAAAFF00AAAAFF00, x0A0AF5F50A0AF5F5, 0xE7)
270 LUT(x6385639E6385639E, a4, x9C9C9C9C9C9C9C9C, xE6E63BFDE6E63BFD, 0x93)
271 LUT(x5959C4CE5959C4CE, a2, x6B69C5DC6B69C5DC, xE6E63BFDE6E63BFD, 0x5D)
272 LUT(x5B53F53B5B53F53B, a4, x0A0AF5F50A0AF5F5, x5959C4CE5959C4CE, 0x6E)
273 LUT(x3, a6, x6385639E6385639E, x5B53F53B5B53F53B, 0xC6)
274 LUT(xFAF505FAFAF505FA, a3, a4, x0A0AF5F50A0AF5F5, 0x6D)
275 LUT(x6A65956A6A65956A, a3, x9C9C9C9C9C9C9C9C, xFAF505FAFAF505FA, 0xA6)
276 LUT(x8888CCCC8888CCCC, a1, a2, a5, 0x23)
277 LUT(x94E97A9494E97A94, x1C69B2DC1C69B2DC, x6A65956A6A65956A, x8888CCCC8888CCCC, 0x72)
278 LUT(x4, a6, x6A65956A6A65956A, x94E97A9494E97A94, 0xAC)
279 LUT(xA050A050A050A050, a1, a3, a4, 0x21)
280 LUT(xC1B87A2BC1B87A2B, xAAAAFF00AAAAFF00, x5B53F53B5B53F53B, x94E97A9494E97A94, 0xA4)
281 LUT(xE96016B7E96016B7, x8888CCCC8888CCCC, xA050A050A050A050, xC1B87A2BC1B87A2B, 0x96)
282 LUT(xE3CF1FD5E3CF1FD5, x88AA88AA88AA88AA, x6A65956A6A65956A, xE96016B7E96016B7, 0x3E)
283 LUT(x6776675B6776675B, xADAFF8A5ADAFF8A5, x94E97A9494E97A94, xE3CF1FD5E3CF1FD5, 0x6B)
284 LUT(x2, a6, xE96016B7E96016B7, x6776675B6776675B, 0xC6)
292 static void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
294 LUT(xEEEE3333EEEE3333, a1, a2, a5, 0x9D)
295 LUT(xBBBBBBBBBBBBBBBB, a1, a1, a2, 0x83)
296 LUT(xDDDDAAAADDDDAAAA, a1, a2, a5, 0x5B)
297 LUT(x29295A5A29295A5A, a3, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0x85)
298 LUT(xC729695AC729695A, a4, xEEEE3333EEEE3333, x29295A5A29295A5A, 0xA6)
299 LUT(x3BF77B7B3BF77B7B, a2, a5, xC729695AC729695A, 0xF9)
300 LUT(x2900FF002900FF00, a4, a5, x29295A5A29295A5A, 0x0E)
301 LUT(x56B3803F56B3803F, xBBBBBBBBBBBBBBBB, x3BF77B7B3BF77B7B, x2900FF002900FF00, 0x61)
302 LUT(x4, a6, xC729695AC729695A, x56B3803F56B3803F, 0x6C)
303 LUT(xFBFBFBFBFBFBFBFB, a1, a2, a3, 0xDF)
304 LUT(x3012B7B73012B7B7, a2, a5, xC729695AC729695A, 0xD4)
305 LUT(x34E9B34C34E9B34C, a4, xFBFBFBFBFBFBFBFB, x3012B7B73012B7B7, 0x69)
306 LUT(xBFEAEBBEBFEAEBBE, a1, x29295A5A29295A5A, x34E9B34C34E9B34C, 0x6F)
307 LUT(xFFAEAFFEFFAEAFFE, a3, xBBBBBBBBBBBBBBBB, xBFEAEBBEBFEAEBBE, 0xB9)
308 LUT(x2, a6, x34E9B34C34E9B34C, xFFAEAFFEFFAEAFFE, 0xC6)
309 LUT(xCFDE88BBCFDE88BB, a2, xDDDDAAAADDDDAAAA, x34E9B34C34E9B34C, 0x5C)
310 LUT(x3055574530555745, a1, xC729695AC729695A, xCFDE88BBCFDE88BB, 0x71)
311 LUT(x99DDEEEE99DDEEEE, a4, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0xB9)
312 LUT(x693CD926693CD926, x3BF77B7B3BF77B7B, x34E9B34C34E9B34C, x99DDEEEE99DDEEEE, 0x69)
313 LUT(x3, a6, x3055574530555745, x693CD926693CD926, 0x6A)
314 LUT(x9955EE559955EE55, a1, a4, x99DDEEEE99DDEEEE, 0xE2)
315 LUT(x9D48FA949D48FA94, x3BF77B7B3BF77B7B, xBFEAEBBEBFEAEBBE, x9955EE559955EE55, 0x9C)
316 LUT(x1, a6, xC729695AC729695A, x9D48FA949D48FA94, 0x39)
327 * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC
328 * architectures. These use AND, OR, XOR, NOT, and AND-NOT gates.
330 * Gate counts: 49 44 46 33 48 46 46 41
333 * Several same-gate-count expressions for each S-box are included (for use on
334 * different CPUs/GPUs).
336 * These Boolean expressions corresponding to DES S-boxes have been generated
337 * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's
338 * John the Ripper password cracker: http://www.openwall.com/john/
339 * Being mathematical formulas, they are not copyrighted and are free for reuse
342 * This file (a specific representation of the S-box expressions, surrounding
343 * logic) is Copyright (c) 2011 by Solar Designer <solar at openwall.com>.
344 * Redistribution and use in source and binary forms, with or without
345 * modification, are permitted. (This is a heavily cut-down "BSD license".)
347 * The effort has been sponsored by Rapid7: http://www.rapid7.com
350 static void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
352 u32 x55005500, x5A0F5A0F, x3333FFFF, x66666666, x22226666, x2D2D6969,
354 u32 x00FFFF00, x33CCCC33, x4803120C, x2222FFFF, x6A21EDF3, x4A01CC93;
355 u32 x5555FFFF, x7F75FFFF, x00D20096, x7FA7FF69;
356 u32 x0A0A0000, x0AD80096, x00999900, x0AD99996;
357 u32 x22332233, x257AA5F0, x054885C0, xFAB77A3F, x2221EDF3, xD89697CC;
358 u32 x05B77AC0, x05F77AD6, x36C48529, x6391D07C, xBB0747B0;
359 u32 x4C460000, x4EDF9996, x2D4E49EA, xBBFFFFB0, x96B1B65A;
360 u32 x5AFF5AFF, x52B11215, x4201C010, x10B0D205;
361 u32 x00, x01, x10, x11, x20, x21, x30, x31;
363 x55005500 = a1 & ~a5;
364 x5A0F5A0F = a4 ^ x55005500;
367 x22226666 = x3333FFFF & x66666666;
368 x2D2D6969 = a4 ^ x22226666;
369 x25202160 = x2D2D6969 & ~x5A0F5A0F;
372 x33CCCC33 = a3 ^ x00FFFF00;
373 x4803120C = x5A0F5A0F & ~x33CCCC33;
374 x2222FFFF = a6 | x22226666;
375 x6A21EDF3 = x4803120C ^ x2222FFFF;
376 x4A01CC93 = x6A21EDF3 & ~x25202160;
379 x7F75FFFF = x6A21EDF3 | x5555FFFF;
380 x00D20096 = a5 & ~x2D2D6969;
381 x7FA7FF69 = x7F75FFFF ^ x00D20096;
383 x0A0A0000 = a4 & ~x5555FFFF;
384 x0AD80096 = x00D20096 ^ x0A0A0000;
385 x00999900 = x00FFFF00 & ~x66666666;
386 x0AD99996 = x0AD80096 | x00999900;
388 x22332233 = a3 & ~x55005500;
389 x257AA5F0 = x5A0F5A0F ^ x7F75FFFF;
390 x054885C0 = x257AA5F0 & ~x22332233;
391 xFAB77A3F = ~x054885C0;
392 x2221EDF3 = x3333FFFF & x6A21EDF3;
393 xD89697CC = xFAB77A3F ^ x2221EDF3;
394 x20 = x7FA7FF69 & ~a2;
395 x21 = x20 ^ xD89697CC;
398 x05B77AC0 = x00FFFF00 ^ x054885C0;
399 x05F77AD6 = x00D20096 | x05B77AC0;
400 x36C48529 = x3333FFFF ^ x05F77AD6;
401 x6391D07C = a1 ^ x36C48529;
402 xBB0747B0 = xD89697CC ^ x6391D07C;
403 x00 = x25202160 | a2;
404 x01 = x00 ^ xBB0747B0;
407 x4C460000 = x3333FFFF ^ x7F75FFFF;
408 x4EDF9996 = x0AD99996 | x4C460000;
409 x2D4E49EA = x6391D07C ^ x4EDF9996;
410 xBBFFFFB0 = x00FFFF00 | xBB0747B0;
411 x96B1B65A = x2D4E49EA ^ xBBFFFFB0;
412 x10 = x4A01CC93 | a2;
413 x11 = x10 ^ x96B1B65A;
416 x5AFF5AFF = a5 | x5A0F5A0F;
417 x52B11215 = x5AFF5AFF & ~x2D4E49EA;
418 x4201C010 = x4A01CC93 & x6391D07C;
419 x10B0D205 = x52B11215 ^ x4201C010;
420 x30 = x10B0D205 | a2;
421 x31 = x30 ^ x0AD99996;
425 static void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
428 u32 x55550000, x00AA00FF, x33BB33FF;
429 u32 x33CC0000, x11441144, x11BB11BB, x003311BB;
430 u32 x00000F0F, x336600FF, x332200FF, x332200F0;
431 u32 x0302000F, xAAAAAAAA, xA9A8AAA5, x33CCCC33, x33CCC030, x9A646A95;
432 u32 x00333303, x118822B8, xA8208805, x3CC3C33C, x94E34B39;
433 u32 x0331330C, x3FF3F33C, xA9DF596A, xA9DF5F6F, x962CAC53;
434 u32 xA9466A6A, x3DA52153, x29850143, x33C0330C, x1A45324F;
435 u32 x0A451047, xBBDFDD7B, xB19ACD3C;
436 u32 x00, x01, x10, x11, x20, x21, x30, x31;
440 x55550000 = a1 & ~a6;
441 x00AA00FF = a5 & ~x55550000;
442 x33BB33FF = a2 | x00AA00FF;
444 x33CC0000 = x33CC33CC & ~a6;
445 x11441144 = a1 & x33CC33CC;
446 x11BB11BB = a5 ^ x11441144;
447 x003311BB = x11BB11BB & ~x33CC0000;
450 x336600FF = x00AA00FF ^ x33CC0000;
451 x332200FF = x33BB33FF & x336600FF;
452 x332200F0 = x332200FF & ~x00000F0F;
454 x0302000F = a3 & x332200FF;
456 xA9A8AAA5 = x0302000F ^ xAAAAAAAA;
457 x33CCCC33 = a6 ^ x33CC33CC;
458 x33CCC030 = x33CCCC33 & ~x00000F0F;
459 x9A646A95 = xA9A8AAA5 ^ x33CCC030;
460 x10 = a4 & ~x332200F0;
461 x11 = x10 ^ x9A646A95;
464 x00333303 = a2 & ~x33CCC030;
465 x118822B8 = x11BB11BB ^ x00333303;
466 xA8208805 = xA9A8AAA5 & ~x118822B8;
467 x3CC3C33C = a3 ^ x33CCCC33;
468 x94E34B39 = xA8208805 ^ x3CC3C33C;
469 x00 = x33BB33FF & ~a4;
470 x01 = x00 ^ x94E34B39;
473 x0331330C = x0302000F ^ x00333303;
474 x3FF3F33C = x3CC3C33C | x0331330C;
475 xA9DF596A = x33BB33FF ^ x9A646A95;
476 xA9DF5F6F = x00000F0F | xA9DF596A;
477 x962CAC53 = x3FF3F33C ^ xA9DF5F6F;
479 xA9466A6A = x332200FF ^ x9A646A95;
480 x3DA52153 = x94E34B39 ^ xA9466A6A;
481 x29850143 = xA9DF5F6F & x3DA52153;
482 x33C0330C = x33CC33CC & x3FF3F33C;
483 x1A45324F = x29850143 ^ x33C0330C;
484 x20 = x1A45324F | a4;
485 x21 = x20 ^ x962CAC53;
488 x0A451047 = x1A45324F & ~x118822B8;
489 xBBDFDD7B = x33CCCC33 | xA9DF596A;
490 xB19ACD3C = x0A451047 ^ xBBDFDD7B;
491 x30 = x003311BB | a4;
492 x31 = x30 ^ xB19ACD3C;
496 static void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
498 u32 x44444444, x0F0FF0F0, x4F4FF4F4, x00FFFF00, x00AAAA00, x4FE55EF4;
499 u32 x3C3CC3C3, x3C3C0000, x7373F4F4, x0C840A00;
500 u32 x00005EF4, x00FF5EFF, x00555455, x3C699796;
501 u32 x000FF000, x55AA55AA, x26D9A15E, x2FDFAF5F, x2FD00F5F;
502 u32 x55AAFFAA, x28410014, x000000FF, x000000CC, x284100D8;
503 u32 x204100D0, x3C3CC3FF, x1C3CC32F, x4969967A;
504 u32 x4CC44CC4, x40C040C0, xC3C33C3C, x9669C396, xD6A98356;
505 u32 xD6E9C3D6, x4CEEEEC4, x9A072D12, x001A000B, x9A1F2D1B;
506 u32 x00, x01, x10, x11, x20, x21, x30, x31;
508 x44444444 = a1 & ~a2;
510 x4F4FF4F4 = x44444444 | x0F0FF0F0;
512 x00AAAA00 = x00FFFF00 & ~a1;
513 x4FE55EF4 = x4F4FF4F4 ^ x00AAAA00;
515 x3C3CC3C3 = a2 ^ x0F0FF0F0;
516 x3C3C0000 = x3C3CC3C3 & ~a6;
517 x7373F4F4 = x4F4FF4F4 ^ x3C3C0000;
518 x0C840A00 = x4FE55EF4 & ~x7373F4F4;
520 x00005EF4 = a6 & x4FE55EF4;
521 x00FF5EFF = a4 | x00005EF4;
522 x00555455 = a1 & x00FF5EFF;
523 x3C699796 = x3C3CC3C3 ^ x00555455;
524 x30 = x4FE55EF4 & ~a5;
525 x31 = x30 ^ x3C699796;
528 x000FF000 = x0F0FF0F0 & x00FFFF00;
530 x26D9A15E = x7373F4F4 ^ x55AA55AA;
531 x2FDFAF5F = a3 | x26D9A15E;
532 x2FD00F5F = x2FDFAF5F & ~x000FF000;
534 x55AAFFAA = x00AAAA00 | x55AA55AA;
535 x28410014 = x3C699796 & ~x55AAFFAA;
537 x000000CC = x000000FF & ~a2;
538 x284100D8 = x28410014 ^ x000000CC;
540 x204100D0 = x7373F4F4 & x284100D8;
541 x3C3CC3FF = x3C3CC3C3 | x000000FF;
542 x1C3CC32F = x3C3CC3FF & ~x204100D0;
543 x4969967A = a1 ^ x1C3CC32F;
544 x10 = x2FD00F5F & a5;
545 x11 = x10 ^ x4969967A;
548 x4CC44CC4 = x4FE55EF4 & ~a2;
549 x40C040C0 = x4CC44CC4 & ~a3;
550 xC3C33C3C = ~x3C3CC3C3;
551 x9669C396 = x55AAFFAA ^ xC3C33C3C;
552 xD6A98356 = x40C040C0 ^ x9669C396;
553 x00 = a5 & ~x0C840A00;
554 x01 = x00 ^ xD6A98356;
557 xD6E9C3D6 = x40C040C0 | x9669C396;
558 x4CEEEEC4 = x00AAAA00 | x4CC44CC4;
559 x9A072D12 = xD6E9C3D6 ^ x4CEEEEC4;
560 x001A000B = a4 & ~x4FE55EF4;
561 x9A1F2D1B = x9A072D12 | x001A000B;
562 x20 = a5 & ~x284100D8;
563 x21 = x20 ^ x9A1F2D1B;
567 static void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
569 u32 x5A5A5A5A, x0F0FF0F0;
570 u32 x33FF33FF, x33FFCC00, x0C0030F0, x0C0CC0C0, x0CF3C03F, x5EFBDA7F,
571 x52FBCA0F, x61C8F93C;
572 u32 x00C0C03C, x0F0F30C0, x3B92A366, x30908326, x3C90B3D6;
573 u32 x33CC33CC, x0C0CFFFF, x379E5C99, x04124C11, x56E9861E, xA91679E1;
574 u32 x9586CA37, x8402C833, x84C2C83F, xB35C94A6;
575 u32 x00, x01, x10, x11, x20, x21, x30, x31;
580 x33FFCC00 = a5 ^ x33FF33FF;
581 x0C0030F0 = x0F0FF0F0 & ~x33FFCC00;
582 x0C0CC0C0 = x0F0FF0F0 & ~a2;
583 x0CF3C03F = a4 ^ x0C0CC0C0;
584 x5EFBDA7F = x5A5A5A5A | x0CF3C03F;
585 x52FBCA0F = x5EFBDA7F & ~x0C0030F0;
586 x61C8F93C = a2 ^ x52FBCA0F;
588 x00C0C03C = x0CF3C03F & x61C8F93C;
589 x0F0F30C0 = x0F0FF0F0 & ~x00C0C03C;
590 x3B92A366 = x5A5A5A5A ^ x61C8F93C;
591 x30908326 = x3B92A366 & ~x0F0F30C0;
592 x3C90B3D6 = x0C0030F0 ^ x30908326;
595 x0C0CFFFF = a5 | x0C0CC0C0;
596 x379E5C99 = x3B92A366 ^ x0C0CFFFF;
597 x04124C11 = x379E5C99 & ~x33CC33CC;
598 x56E9861E = x52FBCA0F ^ x04124C11;
599 x00 = a6 & ~x3C90B3D6;
600 x01 = x00 ^ x56E9861E;
603 xA91679E1 = ~x56E9861E;
604 x10 = x3C90B3D6 & ~a6;
605 x11 = x10 ^ xA91679E1;
608 x9586CA37 = x3C90B3D6 ^ xA91679E1;
609 x8402C833 = x9586CA37 & ~x33CC33CC;
610 x84C2C83F = x00C0C03C | x8402C833;
611 xB35C94A6 = x379E5C99 ^ x84C2C83F;
612 x20 = x61C8F93C | a6;
613 x21 = x20 ^ xB35C94A6;
616 x30 = a6 & x61C8F93C;
617 x31 = x30 ^ xB35C94A6;
621 static void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
623 u32 x77777777, x77770000, x22225555, x11116666, x1F1F6F6F;
624 u32 x70700000, x43433333, x00430033, x55557777, x55167744, x5A19784B;
625 u32 x5A1987B4, x7A3BD7F5, x003B00F5, x221955A0, x05050707, x271C52A7;
626 u32 x2A2A82A0, x6969B193, x1FE06F90, x16804E00, xE97FB1FF;
627 u32 x43403302, x35CAED30, x37DEFFB7, x349ECCB5, x0B01234A;
628 u32 x101884B4, x0FF8EB24, x41413333, x4FF9FB37, x4FC2FBC2;
629 u32 x22222222, x16BCEE97, x0F080B04, x19B4E593;
630 u32 x5C5C5C5C, x4448184C, x2DDABE71, x6992A63D;
631 u32 x00, x01, x10, x11, x20, x21, x30, x31;
634 x77770000 = x77777777 & ~a6;
635 x22225555 = a1 ^ x77770000;
636 x11116666 = a3 ^ x22225555;
637 x1F1F6F6F = a4 | x11116666;
639 x70700000 = x77770000 & ~a4;
640 x43433333 = a3 ^ x70700000;
641 x00430033 = a5 & x43433333;
642 x55557777 = a1 | x11116666;
643 x55167744 = x00430033 ^ x55557777;
644 x5A19784B = a4 ^ x55167744;
646 x5A1987B4 = a6 ^ x5A19784B;
647 x7A3BD7F5 = x22225555 | x5A1987B4;
648 x003B00F5 = a5 & x7A3BD7F5;
649 x221955A0 = x22225555 ^ x003B00F5;
650 x05050707 = a4 & x55557777;
651 x271C52A7 = x221955A0 ^ x05050707;
653 x2A2A82A0 = x7A3BD7F5 & ~a1;
654 x6969B193 = x43433333 ^ x2A2A82A0;
655 x1FE06F90 = a5 ^ x1F1F6F6F;
656 x16804E00 = x1FE06F90 & ~x6969B193;
657 xE97FB1FF = ~x16804E00;
658 x20 = xE97FB1FF & ~a2;
659 x21 = x20 ^ x5A19784B;
662 x43403302 = x43433333 & ~x003B00F5;
663 x35CAED30 = x2A2A82A0 ^ x1FE06F90;
664 x37DEFFB7 = x271C52A7 | x35CAED30;
665 x349ECCB5 = x37DEFFB7 & ~x43403302;
666 x0B01234A = x1F1F6F6F & ~x349ECCB5;
668 x101884B4 = x5A1987B4 & x349ECCB5;
669 x0FF8EB24 = x1FE06F90 ^ x101884B4;
670 x41413333 = x43433333 & x55557777;
671 x4FF9FB37 = x0FF8EB24 | x41413333;
672 x4FC2FBC2 = x003B00F5 ^ x4FF9FB37;
673 x30 = x4FC2FBC2 & a2;
674 x31 = x30 ^ x271C52A7;
677 x22222222 = a1 ^ x77777777;
678 x16BCEE97 = x349ECCB5 ^ x22222222;
679 x0F080B04 = a4 & x0FF8EB24;
680 x19B4E593 = x16BCEE97 ^ x0F080B04;
681 x00 = x0B01234A | a2;
682 x01 = x00 ^ x19B4E593;
685 x5C5C5C5C = x1F1F6F6F ^ x43433333;
686 x4448184C = x5C5C5C5C & ~x19B4E593;
687 x2DDABE71 = x22225555 ^ x0FF8EB24;
688 x6992A63D = x4448184C ^ x2DDABE71;
689 x10 = x1F1F6F6F & a2;
690 x11 = x10 ^ x6992A63D;
694 static void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
697 u32 x3333FFFF, x11115555, x22DD6699, x22DD9966, x00220099;
698 u32 x00551144, x33662277, x5A5A5A5A, x7B7E7A7F, x59A31CE6;
699 u32 x09030C06, x09030000, x336622FF, x3A6522FF;
700 u32 x484D494C, x0000B6B3, x0F0FB9BC, x00FC00F9, x0FFFB9FD;
701 u32 x5DF75DF7, x116600F7, x1E69B94B, x1668B94B;
702 u32 x7B7B7B7B, x411E5984, x1FFFFDFD, x5EE1A479;
703 u32 x3CB4DFD2, x004B002D, xB7B2B6B3, xCCC9CDC8, xCC82CDE5;
704 u32 x0055EEBB, x5A5AECE9, x0050ECA9, xC5CAC1CE, xC59A2D67;
705 u32 x00, x01, x10, x11, x20, x21, x30, x31;
710 x11115555 = a1 & x3333FFFF;
711 x22DD6699 = x33CC33CC ^ x11115555;
712 x22DD9966 = a6 ^ x22DD6699;
713 x00220099 = a5 & ~x22DD9966;
715 x00551144 = a1 & x22DD9966;
716 x33662277 = a2 ^ x00551144;
718 x7B7E7A7F = x33662277 | x5A5A5A5A;
719 x59A31CE6 = x22DD6699 ^ x7B7E7A7F;
721 x09030C06 = a3 & x59A31CE6;
722 x09030000 = x09030C06 & ~a6;
723 x336622FF = x00220099 | x33662277;
724 x3A6522FF = x09030000 ^ x336622FF;
725 x30 = x3A6522FF & a4;
726 x31 = x30 ^ x59A31CE6;
729 x484D494C = a2 ^ x7B7E7A7F;
730 x0000B6B3 = a6 & ~x484D494C;
731 x0F0FB9BC = a3 ^ x0000B6B3;
732 x00FC00F9 = a5 & ~x09030C06;
733 x0FFFB9FD = x0F0FB9BC | x00FC00F9;
735 x5DF75DF7 = a1 | x59A31CE6;
736 x116600F7 = x336622FF & x5DF75DF7;
737 x1E69B94B = x0F0FB9BC ^ x116600F7;
738 x1668B94B = x1E69B94B & ~x09030000;
739 x20 = x00220099 | a4;
740 x21 = x20 ^ x1668B94B;
743 x7B7B7B7B = a2 | x5A5A5A5A;
744 x411E5984 = x3A6522FF ^ x7B7B7B7B;
745 x1FFFFDFD = x11115555 | x0FFFB9FD;
746 x5EE1A479 = x411E5984 ^ x1FFFFDFD;
748 x3CB4DFD2 = x22DD6699 ^ x1E69B94B;
749 x004B002D = a5 & ~x3CB4DFD2;
750 xB7B2B6B3 = ~x484D494C;
751 xCCC9CDC8 = x7B7B7B7B ^ xB7B2B6B3;
752 xCC82CDE5 = x004B002D ^ xCCC9CDC8;
753 x10 = xCC82CDE5 & ~a4;
754 x11 = x10 ^ x5EE1A479;
757 x0055EEBB = a6 ^ x00551144;
758 x5A5AECE9 = a1 ^ x0F0FB9BC;
759 x0050ECA9 = x0055EEBB & x5A5AECE9;
760 xC5CAC1CE = x09030C06 ^ xCCC9CDC8;
761 xC59A2D67 = x0050ECA9 ^ xC5CAC1CE;
762 x00 = x0FFFB9FD & ~a4;
763 x01 = x00 ^ xC59A2D67;
767 static void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
769 u32 x0FF00FF0, x3CC33CC3, x00003CC3, x0F000F00, x5A555A55, x00001841;
770 u32 x00000F00, x33333C33, x7B777E77, x0FF0F00F, x74878E78;
771 u32 x003C003C, x5A7D5A7D, x333300F0, x694E5A8D;
772 u32 x0FF0CCCC, x000F0303, x5A505854, x33CC000F, x699C585B;
773 u32 x7F878F78, x21101013, x7F979F7B, x30030CC0, x4F9493BB;
774 u32 x6F9CDBFB, x0000DBFB, x00005151, x26DAC936, x26DA9867;
775 u32 x27DA9877, x27DA438C, x2625C9C9, x27FFCBCD;
776 u32 x27FF1036, x27FF103E, xB06B6C44, x97947C7A;
777 u32 x00, x01, x10, x11, x20, x21, x30, x31;
780 x3CC33CC3 = a3 ^ x0FF00FF0;
781 x00003CC3 = a6 & x3CC33CC3;
782 x0F000F00 = a4 & x0FF00FF0;
783 x5A555A55 = a2 ^ x0F000F00;
784 x00001841 = x00003CC3 & x5A555A55;
786 x00000F00 = a6 & x0F000F00;
787 x33333C33 = a3 ^ x00000F00;
788 x7B777E77 = x5A555A55 | x33333C33;
789 x0FF0F00F = a6 ^ x0FF00FF0;
790 x74878E78 = x7B777E77 ^ x0FF0F00F;
791 x30 = a1 & ~x00001841;
792 x31 = x30 ^ x74878E78;
795 x003C003C = a5 & ~x3CC33CC3;
796 x5A7D5A7D = x5A555A55 | x003C003C;
797 x333300F0 = x00003CC3 ^ x33333C33;
798 x694E5A8D = x5A7D5A7D ^ x333300F0;
800 x0FF0CCCC = x00003CC3 ^ x0FF0F00F;
801 x000F0303 = a4 & ~x0FF0CCCC;
802 x5A505854 = x5A555A55 & ~x000F0303;
803 x33CC000F = a5 ^ x333300F0;
804 x699C585B = x5A505854 ^ x33CC000F;
806 x7F878F78 = x0F000F00 | x74878E78;
807 x21101013 = a3 & x699C585B;
808 x7F979F7B = x7F878F78 | x21101013;
809 x30030CC0 = x3CC33CC3 & ~x0FF0F00F;
810 x4F9493BB = x7F979F7B ^ x30030CC0;
811 x00 = x4F9493BB & ~a1;
812 x01 = x00 ^ x694E5A8D;
815 x6F9CDBFB = x699C585B | x4F9493BB;
816 x0000DBFB = a6 & x6F9CDBFB;
817 x00005151 = a2 & x0000DBFB;
818 x26DAC936 = x694E5A8D ^ x4F9493BB;
819 x26DA9867 = x00005151 ^ x26DAC936;
821 x27DA9877 = x21101013 | x26DA9867;
822 x27DA438C = x0000DBFB ^ x27DA9877;
823 x2625C9C9 = a5 ^ x26DAC936;
824 x27FFCBCD = x27DA438C | x2625C9C9;
825 x20 = x27FFCBCD & a1;
826 x21 = x20 ^ x699C585B;
829 x27FF1036 = x0000DBFB ^ x27FFCBCD;
830 x27FF103E = x003C003C | x27FF1036;
831 xB06B6C44 = ~x4F9493BB;
832 x97947C7A = x27FF103E ^ xB06B6C44;
833 x10 = x97947C7A & ~a1;
834 x11 = x10 ^ x26DA9867;
838 static void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
840 u32 x0C0C0C0C, x0000F0F0, x00FFF00F, x00555005, x00515001;
841 u32 x33000330, x77555775, x30303030, x3030CFCF, x30104745, x30555745;
842 u32 xFF000FF0, xCF1048B5, x080A080A, xC71A40BF, xCB164CB3;
843 u32 x9E4319E6, x000019E6, xF429738C, xF4296A6A, xC729695A;
844 u32 xC47C3D2F, xF77F3F3F, x9E43E619, x693CD926;
845 u32 xF719A695, xF4FF73FF, x03E6D56A, x56B3803F;
846 u32 xF700A600, x61008000, x03B7856B, x62B7056B;
847 u32 x00, x01, x10, x11, x20, x21, x30, x31;
849 x0C0C0C0C = a3 & ~a2;
850 x0000F0F0 = a5 & ~a3;
851 x00FFF00F = a4 ^ x0000F0F0;
852 x00555005 = a1 & x00FFF00F;
853 x00515001 = x00555005 & ~x0C0C0C0C;
855 x33000330 = a2 & ~x00FFF00F;
856 x77555775 = a1 | x33000330;
857 x30303030 = a2 & ~a3;
858 x3030CFCF = a5 ^ x30303030;
859 x30104745 = x77555775 & x3030CFCF;
860 x30555745 = x00555005 | x30104745;
862 xFF000FF0 = ~x00FFF00F;
863 xCF1048B5 = x30104745 ^ xFF000FF0;
864 x080A080A = a3 & ~x77555775;
865 xC71A40BF = xCF1048B5 ^ x080A080A;
866 xCB164CB3 = x0C0C0C0C ^ xC71A40BF;
867 x10 = x00515001 | a6;
868 x11 = x10 ^ xCB164CB3;
871 x9E4319E6 = a1 ^ xCB164CB3;
872 x000019E6 = a5 & x9E4319E6;
873 xF429738C = a2 ^ xC71A40BF;
874 xF4296A6A = x000019E6 ^ xF429738C;
875 xC729695A = x33000330 ^ xF4296A6A;
877 xC47C3D2F = x30555745 ^ xF4296A6A;
878 xF77F3F3F = a2 | xC47C3D2F;
879 x9E43E619 = a5 ^ x9E4319E6;
880 x693CD926 = xF77F3F3F ^ x9E43E619;
881 x20 = x30555745 & a6;
882 x21 = x20 ^ x693CD926;
885 xF719A695 = x3030CFCF ^ xC729695A;
886 xF4FF73FF = a4 | xF429738C;
887 x03E6D56A = xF719A695 ^ xF4FF73FF;
888 x56B3803F = a1 ^ x03E6D56A;
889 x30 = x56B3803F & a6;
890 x31 = x30 ^ xC729695A;
893 xF700A600 = xF719A695 & ~a4;
894 x61008000 = x693CD926 & xF700A600;
895 x03B7856B = x00515001 ^ x03E6D56A;
896 x62B7056B = x61008000 ^ x03B7856B;
897 x00 = x62B7056B | a6;
898 x01 = x00 ^ xC729695A;
905 #if defined IS_AMD || defined IS_GENERIC
908 * Bitslice DES S-boxes making use of a vector conditional select operation
909 * (e.g., vsel on PowerPC with AltiVec).
911 * Gate counts: 36 33 33 26 35 34 34 32
914 * Several same-gate-count expressions for each S-box are included (for use on
915 * different CPUs/GPUs).
917 * These Boolean expressions corresponding to DES S-boxes have been generated
918 * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's
919 * John the Ripper password cracker: http://www.openwall.com/john/
920 * Being mathematical formulas, they are not copyrighted and are free for reuse
923 * This file (a specific representation of the S-box expressions, surrounding
924 * logic) is Copyright (c) 2011 by Solar Designer <solar at openwall.com>.
925 * Redistribution and use in source and binary forms, with or without
926 * modification, are permitted. (This is a heavily cut-down "BSD license".)
928 * The effort has been sponsored by Rapid7: http://www.rapid7.com
931 #define vnot(dst, a) (dst) = ~(a)
932 #define vand(dst, a, b) (dst) = (a) & (b)
933 #define vor(dst, a, b) (dst) = (a) | (b)
934 #define vandn(dst, a, b) (dst) = (a) & ~(b)
935 #define vxor(dst, a, b) (dst) = (a) ^ (b)
936 #define vsel(dst, a, b, c) (dst) = bitselect((a),(b),(c))
939 s1(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
940 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
942 u32 x0F0F3333, x3C3C3C3C, x55FF55FF, x69C369C3, x0903B73F, x09FCB7C0,
944 u32 x55AFD1B7, x3C3C69C3, x6993B874;
945 u32 x5CEDE59F, x09FCE295, x5D91A51E, x529E962D;
946 u32 x29EEADC0, x4B8771A3, x428679F3, x6B68D433;
947 u32 x5BA7E193, x026F12F3, x6B27C493, x94D83B6C;
948 u32 x965E0B0F, x3327A113, x847F0A1F, xD6E19C32;
949 u32 x0DBCE883, x3A25A215, x37994A96;
950 u32 x8A487EA7, x8B480F07, xB96C2D16;
953 vsel(x0F0F3333, a3, a2, a5);
954 vxor(x3C3C3C3C, a2, a3);
955 vor(x55FF55FF, a1, a4);
956 vxor(x69C369C3, x3C3C3C3C, x55FF55FF);
957 vsel(x0903B73F, a5, x0F0F3333, x69C369C3);
958 vxor(x09FCB7C0, a4, x0903B73F);
959 vxor(x5CA9E295, a1, x09FCB7C0);
961 vsel(x55AFD1B7, x5CA9E295, x55FF55FF, x0F0F3333);
962 vsel(x3C3C69C3, x3C3C3C3C, x69C369C3, a5);
963 vxor(x6993B874, x55AFD1B7, x3C3C69C3);
965 vsel(x5CEDE59F, x55FF55FF, x5CA9E295, x6993B874);
966 vsel(x09FCE295, x09FCB7C0, x5CA9E295, a5);
967 vsel(x5D91A51E, x5CEDE59F, x6993B874, x09FCE295);
968 vxor(x529E962D, x0F0F3333, x5D91A51E);
970 vsel(x29EEADC0, x69C369C3, x09FCB7C0, x5CEDE59F);
971 vsel(x4B8771A3, x0F0F3333, x69C369C3, x5CA9E295);
972 vsel(x428679F3, a5, x4B8771A3, x529E962D);
973 vxor(x6B68D433, x29EEADC0, x428679F3);
975 vsel(x5BA7E193, x5CA9E295, x4B8771A3, a3);
976 vsel(x026F12F3, a4, x0F0F3333, x529E962D);
977 vsel(x6B27C493, x6B68D433, x5BA7E193, x026F12F3);
978 vnot(x94D83B6C, x6B27C493);
979 vsel(x0, x94D83B6C, x6B68D433, a6);
980 vxor(*out1, *out1, x0);
982 vsel(x965E0B0F, x94D83B6C, a3, x428679F3);
983 vsel(x3327A113, x5BA7E193, a2, x69C369C3);
984 vsel(x847F0A1F, x965E0B0F, a4, x3327A113);
985 vxor(xD6E19C32, x529E962D, x847F0A1F);
986 vsel(x1, xD6E19C32, x5CA9E295, a6);
987 vxor(*out2, *out2, x1);
989 vsel(x0DBCE883, x09FCE295, x3C3C69C3, x847F0A1F);
990 vsel(x3A25A215, x3327A113, x5CA9E295, x0903B73F);
991 vxor(x37994A96, x0DBCE883, x3A25A215);
992 vsel(x3, x37994A96, x529E962D, a6);
993 vxor(*out4, *out4, x3);
995 vxor(x8A487EA7, x5CA9E295, xD6E19C32);
996 vsel(x8B480F07, a3, x8A487EA7, x847F0A1F);
997 vsel(xB96C2D16, x8B480F07, x3C3C3C3C, x3A25A215);
998 vsel(x2, xB96C2D16, x6993B874, a6);
999 vxor(*out3, *out3, x2);
1003 s2(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
1004 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
1006 u32 x55553333, x0055FF33, x33270F03, x66725A56, x00FFFF00, x668DA556;
1007 u32 x0F0F5A56, xF0F0A5A9, xA5A5969A, xA55A699A;
1008 u32 x0F5AF03C, x6600FF56, x87A5F09C;
1009 u32 xA55A963C, x3C69C30F, xB44BC32D;
1010 u32 x66D7CC56, x0F4B0F2D, x699CC37B, x996C66D2;
1011 u32 xB46C662D, x278DB412, xB66CB43B;
1012 u32 xD2DC4E52, x27993333, xD2994E33;
1013 u32 x278D0F2D, x2E0E547B, x09976748;
1016 vsel(x55553333, a1, a3, a6);
1017 vsel(x0055FF33, a6, x55553333, a5);
1018 vsel(x33270F03, a3, a4, x0055FF33);
1019 vxor(x66725A56, a1, x33270F03);
1020 vxor(x00FFFF00, a5, a6);
1021 vxor(x668DA556, x66725A56, x00FFFF00);
1023 vsel(x0F0F5A56, a4, x66725A56, a6);
1024 vnot(xF0F0A5A9, x0F0F5A56);
1025 vxor(xA5A5969A, x55553333, xF0F0A5A9);
1026 vxor(xA55A699A, x00FFFF00, xA5A5969A);
1027 vsel(x1, xA55A699A, x668DA556, a2);
1028 vxor(*out2, *out2, x1);
1030 vxor(x0F5AF03C, a4, x0055FF33);
1031 vsel(x6600FF56, x66725A56, a6, x00FFFF00);
1032 vsel(x87A5F09C, xA5A5969A, x0F5AF03C, x6600FF56);
1034 vsel(xA55A963C, xA5A5969A, x0F5AF03C, a5);
1035 vxor(x3C69C30F, a3, x0F5AF03C);
1036 vsel(xB44BC32D, xA55A963C, x3C69C30F, a1);
1038 vsel(x66D7CC56, x66725A56, x668DA556, xA5A5969A);
1039 vsel(x0F4B0F2D, a4, xB44BC32D, a5);
1040 vxor(x699CC37B, x66D7CC56, x0F4B0F2D);
1041 vxor(x996C66D2, xF0F0A5A9, x699CC37B);
1042 vsel(x0, x996C66D2, xB44BC32D, a2);
1043 vxor(*out1, *out1, x0);
1045 vsel(xB46C662D, xB44BC32D, x996C66D2, x00FFFF00);
1046 vsel(x278DB412, x668DA556, xA5A5969A, a1);
1047 vsel(xB66CB43B, xB46C662D, x278DB412, x6600FF56);
1049 vsel(xD2DC4E52, x66D7CC56, x996C66D2, xB44BC32D);
1050 vsel(x27993333, x278DB412, a3, x0055FF33);
1051 vsel(xD2994E33, xD2DC4E52, x27993333, a5);
1052 vsel(x3, x87A5F09C, xD2994E33, a2);
1053 vxor(*out4, *out4, x3);
1055 vsel(x278D0F2D, x278DB412, x0F4B0F2D, a6);
1056 vsel(x2E0E547B, x0F0F5A56, xB66CB43B, x278D0F2D);
1057 vxor(x09976748, x27993333, x2E0E547B);
1058 vsel(x2, xB66CB43B, x09976748, a2);
1059 vxor(*out3, *out3, x2);
1063 s3(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
1064 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
1066 u32 x0F330F33, x0F33F0CC, x5A66A599;
1067 u32 x2111B7BB, x03FF3033, x05BB50EE, x074F201F, x265E97A4;
1068 u32 x556BA09E, x665A93AC, x99A56C53;
1069 u32 x25A1A797, x5713754C, x66559355, x47B135C6;
1070 u32 x9A5A5C60, xD07AF8F8, x87698DB4, xE13C1EE1;
1071 u32 x9E48CDE4, x655B905E, x00A55CFF, x9E49915E;
1072 u32 xD6599874, x05330022, xD2699876;
1073 u32 x665F9364, xD573F0F2, xB32C6396;
1076 vsel(x0F330F33, a4, a3, a5);
1077 vxor(x0F33F0CC, a6, x0F330F33);
1078 vxor(x5A66A599, a2, x0F33F0CC);
1080 vsel(x2111B7BB, a3, a6, x5A66A599);
1081 vsel(x03FF3033, a5, a3, x0F33F0CC);
1082 vsel(x05BB50EE, a5, x0F33F0CC, a2);
1083 vsel(x074F201F, x03FF3033, a4, x05BB50EE);
1084 vxor(x265E97A4, x2111B7BB, x074F201F);
1086 vsel(x556BA09E, x5A66A599, x05BB50EE, a4);
1087 vsel(x665A93AC, x556BA09E, x265E97A4, a3);
1088 vnot(x99A56C53, x665A93AC);
1089 vsel(x1, x265E97A4, x99A56C53, a1);
1090 vxor(*out2, *out2, x1);
1092 vxor(x25A1A797, x03FF3033, x265E97A4);
1093 vsel(x5713754C, a2, x0F33F0CC, x074F201F);
1094 vsel(x66559355, x665A93AC, a2, a5);
1095 vsel(x47B135C6, x25A1A797, x5713754C, x66559355);
1097 vxor(x9A5A5C60, x03FF3033, x99A56C53);
1098 vsel(xD07AF8F8, x9A5A5C60, x556BA09E, x5A66A599);
1099 vxor(x87698DB4, x5713754C, xD07AF8F8);
1100 vxor(xE13C1EE1, x66559355, x87698DB4);
1102 vsel(x9E48CDE4, x9A5A5C60, x87698DB4, x265E97A4);
1103 vsel(x655B905E, x66559355, x05BB50EE, a4);
1104 vsel(x00A55CFF, a5, a6, x9A5A5C60);
1105 vsel(x9E49915E, x9E48CDE4, x655B905E, x00A55CFF);
1106 vsel(x0, x9E49915E, xE13C1EE1, a1);
1107 vxor(*out1, *out1, x0);
1109 vsel(xD6599874, xD07AF8F8, x66559355, x0F33F0CC);
1110 vand(x05330022, x0F330F33, x05BB50EE);
1111 vsel(xD2699876, xD6599874, x00A55CFF, x05330022);
1112 vsel(x3, x5A66A599, xD2699876, a1);
1113 vxor(*out4, *out4, x3);
1115 vsel(x665F9364, x265E97A4, x66559355, x47B135C6);
1116 vsel(xD573F0F2, xD07AF8F8, x05330022, a4);
1117 vxor(xB32C6396, x665F9364, xD573F0F2);
1118 vsel(x2, xB32C6396, x47B135C6, a1);
1119 vxor(*out3, *out3, x2);
1123 s4(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
1124 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
1126 u32 x0505AFAF, x0555AF55, x0A5AA05A, x46566456, x0A0A5F5F, x0AF55FA0,
1127 x0AF50F0F, x4CA36B59;
1129 u32 x01BB23BB, x5050FAFA, xA31C26BE, xA91679E1;
1131 u32 x50E9FA1E, x0AF55F00, x827D9784, xD2946D9A;
1132 u32 x31F720B3, x11FB21B3, x4712A7AD, x9586CA37;
1135 vsel(x0505AFAF, a5, a3, a1);
1136 vsel(x0555AF55, x0505AFAF, a1, a4);
1137 vxor(x0A5AA05A, a3, x0555AF55);
1138 vsel(x46566456, a1, x0A5AA05A, a2);
1139 vsel(x0A0A5F5F, a3, a5, a1);
1140 vxor(x0AF55FA0, a4, x0A0A5F5F);
1141 vsel(x0AF50F0F, x0AF55FA0, a3, a5);
1142 vxor(x4CA36B59, x46566456, x0AF50F0F);
1144 vnot(xB35C94A6, x4CA36B59);
1146 vsel(x01BB23BB, a4, a2, x0555AF55);
1147 vxor(x5050FAFA, a1, x0505AFAF);
1148 vsel(xA31C26BE, xB35C94A6, x01BB23BB, x5050FAFA);
1149 vxor(xA91679E1, x0A0A5F5F, xA31C26BE);
1151 vnot(x56E9861E, xA91679E1);
1153 vsel(x50E9FA1E, x5050FAFA, x56E9861E, a4);
1154 vsel(x0AF55F00, x0AF50F0F, x0AF55FA0, x0A0A5F5F);
1155 vsel(x827D9784, xB35C94A6, x0AF55F00, a2);
1156 vxor(xD2946D9A, x50E9FA1E, x827D9784);
1157 vsel(x2, xD2946D9A, x4CA36B59, a6);
1158 vxor(*out3, *out3, x2);
1159 vsel(x3, xB35C94A6, xD2946D9A, a6);
1160 vxor(*out4, *out4, x3);
1162 vsel(x31F720B3, a2, a4, x0AF55FA0);
1163 vsel(x11FB21B3, x01BB23BB, x31F720B3, x5050FAFA);
1164 vxor(x4712A7AD, x56E9861E, x11FB21B3);
1165 vxor(x9586CA37, xD2946D9A, x4712A7AD);
1166 vsel(x0, x56E9861E, x9586CA37, a6);
1167 vxor(*out1, *out1, x0);
1168 vsel(x1, x9586CA37, xA91679E1, a6);
1169 vxor(*out2, *out2, x1);
1173 s5(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
1174 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
1176 u32 x550F550F, xAAF0AAF0, xA5F5A5F5, x96C696C6, x00FFFF00, x963969C6;
1177 u32 x2E3C2E3C, xB73121F7, x1501DF0F, x00558A5F, x2E69A463;
1178 u32 x0679ED42, x045157FD, xB32077FF, x9D49D39C;
1179 u32 xAC81CFB2, xF72577AF, x5BA4B81D;
1180 u32 x5BA477AF, x4895469F, x3A35273A, x1A35669A;
1181 u32 x12E6283D, x9E47D3D4, x1A676AB4;
1182 u32 x891556DF, xE5E77F82, x6CF2295D;
1183 u32 x2E3CA5F5, x9697C1C6, x369CC1D6;
1186 vsel(x550F550F, a1, a3, a5);
1187 vnot(xAAF0AAF0, x550F550F);
1188 vsel(xA5F5A5F5, xAAF0AAF0, a1, a3);
1189 vxor(x96C696C6, a2, xA5F5A5F5);
1190 vxor(x00FFFF00, a5, a6);
1191 vxor(x963969C6, x96C696C6, x00FFFF00);
1193 vsel(x2E3C2E3C, a3, xAAF0AAF0, a2);
1194 vsel(xB73121F7, a2, x963969C6, x96C696C6);
1195 vsel(x1501DF0F, a6, x550F550F, xB73121F7);
1196 vsel(x00558A5F, x1501DF0F, a5, a1);
1197 vxor(x2E69A463, x2E3C2E3C, x00558A5F);
1199 vsel(x0679ED42, x00FFFF00, x2E69A463, x96C696C6);
1200 vsel(x045157FD, a6, a1, x0679ED42);
1201 vsel(xB32077FF, xB73121F7, a6, x045157FD);
1202 vxor(x9D49D39C, x2E69A463, xB32077FF);
1203 vsel(x2, x9D49D39C, x2E69A463, a4);
1204 vxor(*out3, *out3, x2);
1206 vsel(xAC81CFB2, xAAF0AAF0, x1501DF0F, x0679ED42);
1207 vsel(xF72577AF, xB32077FF, x550F550F, a1);
1208 vxor(x5BA4B81D, xAC81CFB2, xF72577AF);
1209 vsel(x1, x5BA4B81D, x963969C6, a4);
1210 vxor(*out2, *out2, x1);
1212 vsel(x5BA477AF, x5BA4B81D, xF72577AF, a6);
1213 vsel(x4895469F, x5BA477AF, x00558A5F, a2);
1214 vsel(x3A35273A, x2E3C2E3C, a2, x963969C6);
1215 vsel(x1A35669A, x4895469F, x3A35273A, x5BA4B81D);
1217 vsel(x12E6283D, a5, x5BA4B81D, x963969C6);
1218 vsel(x9E47D3D4, x96C696C6, x9D49D39C, xAC81CFB2);
1219 vsel(x1A676AB4, x12E6283D, x9E47D3D4, x4895469F);
1221 vsel(x891556DF, xB32077FF, x4895469F, x3A35273A);
1222 vsel(xE5E77F82, xF72577AF, x00FFFF00, x12E6283D);
1223 vxor(x6CF2295D, x891556DF, xE5E77F82);
1224 vsel(x3, x1A35669A, x6CF2295D, a4);
1225 vxor(*out4, *out4, x3);
1227 vsel(x2E3CA5F5, x2E3C2E3C, xA5F5A5F5, a6);
1228 vsel(x9697C1C6, x96C696C6, x963969C6, x045157FD);
1229 vsel(x369CC1D6, x2E3CA5F5, x9697C1C6, x5BA477AF);
1230 vsel(x0, x369CC1D6, x1A676AB4, a4);
1231 vxor(*out1, *out1, x0);
1235 s6(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
1236 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
1238 u32 x555500FF, x666633CC, x606F30CF, x353A659A, x353A9A65, xCAC5659A;
1239 u32 x353A6565, x0A3F0A6F, x6C5939A3, x5963A3C6;
1240 u32 x35FF659A, x3AF06A95, x05CF0A9F, x16E94A97;
1241 u32 x86CD4C9B, x12E0FFFD, x942D9A67;
1242 u32 x142956AB, x455D45DF, x1C3EE619;
1243 u32 x2AEA70D5, x20CF7A9F, x3CF19C86, x69A49C79;
1244 u32 x840DBB67, x6DA19C1E, x925E63E1;
1245 u32 x9C3CA761, x257A75D5, xB946D2B4;
1248 vsel(x555500FF, a1, a4, a5);
1249 vxor(x666633CC, a2, x555500FF);
1250 vsel(x606F30CF, x666633CC, a4, a3);
1251 vxor(x353A659A, a1, x606F30CF);
1252 vxor(x353A9A65, a5, x353A659A);
1253 vnot(xCAC5659A, x353A9A65);
1255 vsel(x353A6565, x353A659A, x353A9A65, a4);
1256 vsel(x0A3F0A6F, a3, a4, x353A6565);
1257 vxor(x6C5939A3, x666633CC, x0A3F0A6F);
1258 vxor(x5963A3C6, x353A9A65, x6C5939A3);
1260 vsel(x35FF659A, a4, x353A659A, x353A6565);
1261 vxor(x3AF06A95, a3, x35FF659A);
1262 vsel(x05CF0A9F, a4, a3, x353A9A65);
1263 vsel(x16E94A97, x3AF06A95, x05CF0A9F, x6C5939A3);
1265 vsel(x86CD4C9B, xCAC5659A, x05CF0A9F, x6C5939A3);
1266 vsel(x12E0FFFD, a5, x3AF06A95, x16E94A97);
1267 vsel(x942D9A67, x86CD4C9B, x353A9A65, x12E0FFFD);
1268 vsel(x0, xCAC5659A, x942D9A67, a6);
1269 vxor(*out1, *out1, x0);
1271 vsel(x142956AB, x353A659A, x942D9A67, a2);
1272 vsel(x455D45DF, a1, x86CD4C9B, x142956AB);
1273 vxor(x1C3EE619, x5963A3C6, x455D45DF);
1274 vsel(x3, x5963A3C6, x1C3EE619, a6);
1275 vxor(*out4, *out4, x3);
1277 vsel(x2AEA70D5, x3AF06A95, x606F30CF, x353A9A65);
1278 vsel(x20CF7A9F, x2AEA70D5, x05CF0A9F, x0A3F0A6F);
1279 vxor(x3CF19C86, x1C3EE619, x20CF7A9F);
1280 vxor(x69A49C79, x555500FF, x3CF19C86);
1282 vsel(x840DBB67, a5, x942D9A67, x86CD4C9B);
1283 vsel(x6DA19C1E, x69A49C79, x3CF19C86, x840DBB67);
1284 vnot(x925E63E1, x6DA19C1E);
1285 vsel(x1, x925E63E1, x69A49C79, a6);
1286 vxor(*out2, *out2, x1);
1288 vsel(x9C3CA761, x840DBB67, x1C3EE619, x3CF19C86);
1289 vsel(x257A75D5, x455D45DF, x2AEA70D5, x606F30CF);
1290 vxor(xB946D2B4, x9C3CA761, x257A75D5);
1291 vsel(x2, x16E94A97, xB946D2B4, a6);
1292 vxor(*out3, *out3, x2);
1296 s7(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
1297 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
1299 u32 x44447777, x4B4B7878, x22772277, x0505F5F5, x220522F5, x694E5A8D;
1300 u32 x00FFFF00, x66666666, x32353235, x26253636, x26DAC936;
1301 u32 x738F9C63, x11EF9867, x26DA9867;
1302 u32 x4B4B9C63, x4B666663, x4E639396;
1303 u32 x4E4B393C, xFF00FF00, xFF05DD21, xB14EE41D;
1304 u32 xD728827B, x6698807B, x699C585B;
1305 u32 x738C847B, xA4A71E18, x74878E78;
1306 u32 x333D9639, x74879639, x8B7869C6;
1309 vsel(x44447777, a2, a6, a3);
1310 vxor(x4B4B7878, a4, x44447777);
1311 vsel(x22772277, a3, a5, a2);
1312 vsel(x0505F5F5, a6, a2, a4);
1313 vsel(x220522F5, x22772277, x0505F5F5, a5);
1314 vxor(x694E5A8D, x4B4B7878, x220522F5);
1316 vxor(x00FFFF00, a5, a6);
1317 vxor(x66666666, a2, a3);
1318 vsel(x32353235, a3, x220522F5, a4);
1319 vsel(x26253636, x66666666, x32353235, x4B4B7878);
1320 vxor(x26DAC936, x00FFFF00, x26253636);
1321 vsel(x0, x26DAC936, x694E5A8D, a1);
1322 vxor(*out1, *out1, x0);
1324 vxor(x738F9C63, a2, x26DAC936);
1325 vsel(x11EF9867, x738F9C63, a5, x66666666);
1326 vsel(x26DA9867, x26DAC936, x11EF9867, a6);
1328 vsel(x4B4B9C63, x4B4B7878, x738F9C63, a6);
1329 vsel(x4B666663, x4B4B9C63, x66666666, x00FFFF00);
1330 vxor(x4E639396, x0505F5F5, x4B666663);
1332 vsel(x4E4B393C, x4B4B7878, x4E639396, a2);
1333 vnot(xFF00FF00, a5);
1334 vsel(xFF05DD21, xFF00FF00, x738F9C63, x32353235);
1335 vxor(xB14EE41D, x4E4B393C, xFF05DD21);
1336 vsel(x1, xB14EE41D, x26DA9867, a1);
1337 vxor(*out2, *out2, x1);
1339 vxor(xD728827B, x66666666, xB14EE41D);
1340 vsel(x6698807B, x26DA9867, xD728827B, x4E4B393C);
1341 vsel(x699C585B, x6698807B, x694E5A8D, xFF05DD21);
1342 vsel(x2, x699C585B, x4E639396, a1);
1343 vxor(*out3, *out3, x2);
1345 vsel(x738C847B, x738F9C63, xD728827B, x4B4B7878);
1346 vxor(xA4A71E18, x738F9C63, xD728827B);
1347 vsel(x74878E78, x738C847B, xA4A71E18, a4);
1349 vsel(x333D9639, x32353235, x738C847B, xB14EE41D);
1350 vsel(x74879639, x74878E78, x333D9639, a6);
1351 vnot(x8B7869C6, x74879639);
1352 vsel(x3, x74878E78, x8B7869C6, a1);
1353 vxor(*out4, *out4, x3);
1357 s8(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
1358 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
1360 u32 x0505F5F5, x05FAF50A, x0F0F00FF, x22227777, x07DA807F, x34E9B34C;
1361 u32 x00FFF00F, x0033FCCF, x5565B15C, x0C0C3F3F, x59698E63;
1362 u32 x3001F74E, x30555745, x693CD926;
1363 u32 x0C0CD926, x0C3F25E9, x38D696A5;
1365 u32 x03D2117B, xC778395B, xCB471CB2;
1366 u32 x5425B13F, x56B3803F, x919AE965;
1367 u32 x17B3023F, x75555755, x62E6556A, xA59E6C31;
1370 vsel(x0505F5F5, a5, a1, a3);
1371 vxor(x05FAF50A, a4, x0505F5F5);
1372 vsel(x0F0F00FF, a3, a4, a5);
1373 vsel(x22227777, a2, a5, a1);
1374 vsel(x07DA807F, x05FAF50A, x0F0F00FF, x22227777);
1375 vxor(x34E9B34C, a2, x07DA807F);
1377 vsel(x00FFF00F, x05FAF50A, a4, a3);
1378 vsel(x0033FCCF, a5, x00FFF00F, a2);
1379 vsel(x5565B15C, a1, x34E9B34C, x0033FCCF);
1380 vsel(x0C0C3F3F, a3, a5, a2);
1381 vxor(x59698E63, x5565B15C, x0C0C3F3F);
1383 vsel(x3001F74E, x34E9B34C, a5, x05FAF50A);
1384 vsel(x30555745, x3001F74E, a1, x00FFF00F);
1385 vxor(x693CD926, x59698E63, x30555745);
1386 vsel(x2, x693CD926, x59698E63, a6);
1387 vxor(*out3, *out3, x2);
1389 vsel(x0C0CD926, x0C0C3F3F, x693CD926, a5);
1390 vxor(x0C3F25E9, x0033FCCF, x0C0CD926);
1391 vxor(x38D696A5, x34E9B34C, x0C3F25E9);
1393 vnot(xC729695A, x38D696A5);
1395 vsel(x03D2117B, x07DA807F, a2, x0C0CD926);
1396 vsel(xC778395B, xC729695A, x03D2117B, x30555745);
1397 vxor(xCB471CB2, x0C3F25E9, xC778395B);
1398 vsel(x1, xCB471CB2, x34E9B34C, a6);
1399 vxor(*out2, *out2, x1);
1401 vsel(x5425B13F, x5565B15C, x0C0C3F3F, x03D2117B);
1402 vsel(x56B3803F, x07DA807F, x5425B13F, x59698E63);
1403 vxor(x919AE965, xC729695A, x56B3803F);
1404 vsel(x3, xC729695A, x919AE965, a6);
1405 vxor(*out4, *out4, x3);
1407 vsel(x17B3023F, x07DA807F, a2, x59698E63);
1408 vor(x75555755, a1, x30555745);
1409 vxor(x62E6556A, x17B3023F, x75555755);
1410 vxor(xA59E6C31, xC778395B, x62E6556A);
1411 vsel(x0, xA59E6C31, x38D696A5, a6);
1412 vxor(*out1, *out1, x0);
1416 #define SWAP(a, b) { u32 tmp=*a;*a=*b;*b=tmp; }
1452 #define KEYSET00 { k00 = K08; k01 = K44; k02 = K29; k03 = K52; k04 = K42; k05 = K14; k06 = K28; k07 = K49; k08 = K01; k09 = K07; k10 = K16; k11 = K36; k12 = K02; k13 = K30; k14 = K22; k15 = K21; k16 = K38; k17 = K50; k18 = K51; k19 = K00; k20 = K31; k21 = K23; k22 = K15; k23 = K35; k24 = K19; k25 = K24; k26 = K34; k27 = K47; k28 = K32; k29 = K03; k30 = K41; k31 = K26; k32 = K04; k33 = K46; k34 = K20; k35 = K25; k36 = K53; k37 = K18; k38 = K33; k39 = K55; k40 = K13; k41 = K17; k42 = K39; k43 = K12; k44 = K11; k45 = K54; k46 = K48; k47 = K27; }
1453 #define KEYSET10 { k00 = K49; k01 = K28; k02 = K45; k03 = K36; k04 = K01; k05 = K30; k06 = K44; k07 = K08; k08 = K42; k09 = K23; k10 = K00; k11 = K52; k12 = K43; k13 = K14; k14 = K38; k15 = K37; k16 = K22; k17 = K09; k18 = K35; k19 = K16; k20 = K15; k21 = K07; k22 = K31; k23 = K51; k24 = K03; k25 = K40; k26 = K46; k27 = K04; k28 = K20; k29 = K19; k30 = K53; k31 = K10; k32 = K47; k33 = K34; k34 = K32; k35 = K13; k36 = K41; k37 = K06; k38 = K17; k39 = K12; k40 = K25; k41 = K33; k42 = K27; k43 = K55; k44 = K54; k45 = K11; k46 = K05; k47 = K39; }
1454 #define KEYSET01 { k00 = K01; k01 = K37; k02 = K22; k03 = K45; k04 = K35; k05 = K07; k06 = K21; k07 = K42; k08 = K51; k09 = K00; k10 = K09; k11 = K29; k12 = K52; k13 = K23; k14 = K15; k15 = K14; k16 = K31; k17 = K43; k18 = K44; k19 = K50; k20 = K49; k21 = K16; k22 = K08; k23 = K28; k24 = K12; k25 = K17; k26 = K27; k27 = K40; k28 = K25; k29 = K55; k30 = K34; k31 = K19; k32 = K24; k33 = K39; k34 = K13; k35 = K18; k36 = K46; k37 = K11; k38 = K26; k39 = K48; k40 = K06; k41 = K10; k42 = K32; k43 = K05; k44 = K04; k45 = K47; k46 = K41; k47 = K20; }
1455 #define KEYSET11 { k00 = K35; k01 = K14; k02 = K31; k03 = K22; k04 = K44; k05 = K16; k06 = K30; k07 = K51; k08 = K28; k09 = K09; k10 = K43; k11 = K38; k12 = K29; k13 = K00; k14 = K49; k15 = K23; k16 = K08; k17 = K52; k18 = K21; k19 = K02; k20 = K01; k21 = K50; k22 = K42; k23 = K37; k24 = K48; k25 = K26; k26 = K32; k27 = K17; k28 = K06; k29 = K05; k30 = K39; k31 = K55; k32 = K33; k33 = K20; k34 = K18; k35 = K54; k36 = K27; k37 = K47; k38 = K03; k39 = K53; k40 = K11; k41 = K19; k42 = K13; k43 = K41; k44 = K40; k45 = K24; k46 = K46; k47 = K25; }
1456 #define KEYSET02 { k00 = K44; k01 = K23; k02 = K08; k03 = K31; k04 = K21; k05 = K50; k06 = K07; k07 = K28; k08 = K37; k09 = K43; k10 = K52; k11 = K15; k12 = K38; k13 = K09; k14 = K01; k15 = K00; k16 = K42; k17 = K29; k18 = K30; k19 = K36; k20 = K35; k21 = K02; k22 = K51; k23 = K14; k24 = K53; k25 = K03; k26 = K13; k27 = K26; k28 = K11; k29 = K41; k30 = K20; k31 = K05; k32 = K10; k33 = K25; k34 = K54; k35 = K04; k36 = K32; k37 = K24; k38 = K12; k39 = K34; k40 = K47; k41 = K55; k42 = K18; k43 = K46; k44 = K17; k45 = K33; k46 = K27; k47 = K06; }
1457 #define KEYSET12 { k00 = K21; k01 = K00; k02 = K42; k03 = K08; k04 = K30; k05 = K02; k06 = K16; k07 = K37; k08 = K14; k09 = K52; k10 = K29; k11 = K49; k12 = K15; k13 = K43; k14 = K35; k15 = K09; k16 = K51; k17 = K38; k18 = K07; k19 = K45; k20 = K44; k21 = K36; k22 = K28; k23 = K23; k24 = K34; k25 = K12; k26 = K18; k27 = K03; k28 = K47; k29 = K46; k30 = K25; k31 = K41; k32 = K19; k33 = K06; k34 = K04; k35 = K40; k36 = K13; k37 = K33; k38 = K48; k39 = K39; k40 = K24; k41 = K05; k42 = K54; k43 = K27; k44 = K26; k45 = K10; k46 = K32; k47 = K11; }
1458 #define KEYSET03 { k00 = K30; k01 = K09; k02 = K51; k03 = K42; k04 = K07; k05 = K36; k06 = K50; k07 = K14; k08 = K23; k09 = K29; k10 = K38; k11 = K01; k12 = K49; k13 = K52; k14 = K44; k15 = K43; k16 = K28; k17 = K15; k18 = K16; k19 = K22; k20 = K21; k21 = K45; k22 = K37; k23 = K00; k24 = K39; k25 = K48; k26 = K54; k27 = K12; k28 = K24; k29 = K27; k30 = K06; k31 = K46; k32 = K55; k33 = K11; k34 = K40; k35 = K17; k36 = K18; k37 = K10; k38 = K53; k39 = K20; k40 = K33; k41 = K41; k42 = K04; k43 = K32; k44 = K03; k45 = K19; k46 = K13; k47 = K47; }
1459 #define KEYSET13 { k00 = K07; k01 = K43; k02 = K28; k03 = K51; k04 = K16; k05 = K45; k06 = K02; k07 = K23; k08 = K00; k09 = K38; k10 = K15; k11 = K35; k12 = K01; k13 = K29; k14 = K21; k15 = K52; k16 = K37; k17 = K49; k18 = K50; k19 = K31; k20 = K30; k21 = K22; k22 = K14; k23 = K09; k24 = K20; k25 = K53; k26 = K04; k27 = K48; k28 = K33; k29 = K32; k30 = K11; k31 = K27; k32 = K05; k33 = K47; k34 = K17; k35 = K26; k36 = K54; k37 = K19; k38 = K34; k39 = K25; k40 = K10; k41 = K46; k42 = K40; k43 = K13; k44 = K12; k45 = K55; k46 = K18; k47 = K24; }
1460 #define KEYSET04 { k00 = K16; k01 = K52; k02 = K37; k03 = K28; k04 = K50; k05 = K22; k06 = K36; k07 = K00; k08 = K09; k09 = K15; k10 = K49; k11 = K44; k12 = K35; k13 = K38; k14 = K30; k15 = K29; k16 = K14; k17 = K01; k18 = K02; k19 = K08; k20 = K07; k21 = K31; k22 = K23; k23 = K43; k24 = K25; k25 = K34; k26 = K40; k27 = K53; k28 = K10; k29 = K13; k30 = K47; k31 = K32; k32 = K41; k33 = K24; k34 = K26; k35 = K03; k36 = K04; k37 = K55; k38 = K39; k39 = K06; k40 = K19; k41 = K27; k42 = K17; k43 = K18; k44 = K48; k45 = K05; k46 = K54; k47 = K33; }
1461 #define KEYSET14 { k00 = K50; k01 = K29; k02 = K14; k03 = K37; k04 = K02; k05 = K31; k06 = K45; k07 = K09; k08 = K43; k09 = K49; k10 = K01; k11 = K21; k12 = K44; k13 = K15; k14 = K07; k15 = K38; k16 = K23; k17 = K35; k18 = K36; k19 = K42; k20 = K16; k21 = K08; k22 = K00; k23 = K52; k24 = K06; k25 = K39; k26 = K17; k27 = K34; k28 = K19; k29 = K18; k30 = K24; k31 = K13; k32 = K46; k33 = K33; k34 = K03; k35 = K12; k36 = K40; k37 = K05; k38 = K20; k39 = K11; k40 = K55; k41 = K32; k42 = K26; k43 = K54; k44 = K53; k45 = K41; k46 = K04; k47 = K10; }
1462 #define KEYSET05 { k00 = K02; k01 = K38; k02 = K23; k03 = K14; k04 = K36; k05 = K08; k06 = K22; k07 = K43; k08 = K52; k09 = K01; k10 = K35; k11 = K30; k12 = K21; k13 = K49; k14 = K16; k15 = K15; k16 = K00; k17 = K44; k18 = K45; k19 = K51; k20 = K50; k21 = K42; k22 = K09; k23 = K29; k24 = K11; k25 = K20; k26 = K26; k27 = K39; k28 = K55; k29 = K54; k30 = K33; k31 = K18; k32 = K27; k33 = K10; k34 = K12; k35 = K48; k36 = K17; k37 = K41; k38 = K25; k39 = K47; k40 = K05; k41 = K13; k42 = K03; k43 = K04; k44 = K34; k45 = K46; k46 = K40; k47 = K19; }
1463 #define KEYSET15 { k00 = K36; k01 = K15; k02 = K00; k03 = K23; k04 = K45; k05 = K42; k06 = K31; k07 = K52; k08 = K29; k09 = K35; k10 = K44; k11 = K07; k12 = K30; k13 = K01; k14 = K50; k15 = K49; k16 = K09; k17 = K21; k18 = K22; k19 = K28; k20 = K02; k21 = K51; k22 = K43; k23 = K38; k24 = K47; k25 = K25; k26 = K03; k27 = K20; k28 = K05; k29 = K04; k30 = K10; k31 = K54; k32 = K32; k33 = K19; k34 = K48; k35 = K53; k36 = K26; k37 = K46; k38 = K06; k39 = K24; k40 = K41; k41 = K18; k42 = K12; k43 = K40; k44 = K39; k45 = K27; k46 = K17; k47 = K55; }
1464 #define KEYSET06 { k00 = K45; k01 = K49; k02 = K09; k03 = K00; k04 = K22; k05 = K51; k06 = K08; k07 = K29; k08 = K38; k09 = K44; k10 = K21; k11 = K16; k12 = K07; k13 = K35; k14 = K02; k15 = K01; k16 = K43; k17 = K30; k18 = K31; k19 = K37; k20 = K36; k21 = K28; k22 = K52; k23 = K15; k24 = K24; k25 = K06; k26 = K12; k27 = K25; k28 = K41; k29 = K40; k30 = K19; k31 = K04; k32 = K13; k33 = K55; k34 = K53; k35 = K34; k36 = K03; k37 = K27; k38 = K11; k39 = K33; k40 = K46; k41 = K54; k42 = K48; k43 = K17; k44 = K20; k45 = K32; k46 = K26; k47 = K05; }
1465 #define KEYSET16 { k00 = K22; k01 = K01; k02 = K43; k03 = K09; k04 = K31; k05 = K28; k06 = K42; k07 = K38; k08 = K15; k09 = K21; k10 = K30; k11 = K50; k12 = K16; k13 = K44; k14 = K36; k15 = K35; k16 = K52; k17 = K07; k18 = K08; k19 = K14; k20 = K45; k21 = K37; k22 = K29; k23 = K49; k24 = K33; k25 = K11; k26 = K48; k27 = K06; k28 = K46; k29 = K17; k30 = K55; k31 = K40; k32 = K18; k33 = K05; k34 = K34; k35 = K39; k36 = K12; k37 = K32; k38 = K47; k39 = K10; k40 = K27; k41 = K04; k42 = K53; k43 = K26; k44 = K25; k45 = K13; k46 = K03; k47 = K41; }
1466 #define KEYSET07 { k00 = K31; k01 = K35; k02 = K52; k03 = K43; k04 = K08; k05 = K37; k06 = K51; k07 = K15; k08 = K49; k09 = K30; k10 = K07; k11 = K02; k12 = K50; k13 = K21; k14 = K45; k15 = K44; k16 = K29; k17 = K16; k18 = K42; k19 = K23; k20 = K22; k21 = K14; k22 = K38; k23 = K01; k24 = K10; k25 = K47; k26 = K53; k27 = K11; k28 = K27; k29 = K26; k30 = K05; k31 = K17; k32 = K54; k33 = K41; k34 = K39; k35 = K20; k36 = K48; k37 = K13; k38 = K24; k39 = K19; k40 = K32; k41 = K40; k42 = K34; k43 = K03; k44 = K06; k45 = K18; k46 = K12; k47 = K46; }
1467 #define KEYSET17 { k00 = K15; k01 = K51; k02 = K36; k03 = K02; k04 = K49; k05 = K21; k06 = K35; k07 = K31; k08 = K08; k09 = K14; k10 = K23; k11 = K43; k12 = K09; k13 = K37; k14 = K29; k15 = K28; k16 = K45; k17 = K00; k18 = K01; k19 = K07; k20 = K38; k21 = K30; k22 = K22; k23 = K42; k24 = K26; k25 = K04; k26 = K41; k27 = K54; k28 = K39; k29 = K10; k30 = K48; k31 = K33; k32 = K11; k33 = K53; k34 = K27; k35 = K32; k36 = K05; k37 = K25; k38 = K40; k39 = K03; k40 = K20; k41 = K24; k42 = K46; k43 = K19; k44 = K18; k45 = K06; k46 = K55; k47 = K34; }
1469 static void DES (const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 *D00, u32 *D01, u32 *D02, u32 *D03, u32 *D04, u32 *D05, u32 *D06, u32 *D07, u32 *D08, u32 *D09, u32 *D10, u32 *D11, u32 *D12, u32 *D13, u32 *D14, u32 *D15, u32 *D16, u32 *D17, u32 *D18, u32 *D19, u32 *D20, u32 *D21, u32 *D22, u32 *D23, u32 *D24, u32 *D25, u32 *D26, u32 *D27, u32 *D28, u32 *D29, u32 *D30, u32 *D31, u32 *D32, u32 *D33, u32 *D34, u32 *D35, u32 *D36, u32 *D37, u32 *D38, u32 *D39, u32 *D40, u32 *D41, u32 *D42, u32 *D43, u32 *D44, u32 *D45, u32 *D46, u32 *D47, u32 *D48, u32 *D49, u32 *D50, u32 *D51, u32 *D52, u32 *D53, u32 *D54, u32 *D55, u32 *D56, u32 *D57, u32 *D58, u32 *D59, u32 *D60, u32 *D61, u32 *D62, u32 *D63)
1471 KXX_DECL u32 k00, k01, k02, k03, k04, k05;
1472 KXX_DECL u32 k06, k07, k08, k09, k10, k11;
1473 KXX_DECL u32 k12, k13, k14, k15, k16, k17;
1474 KXX_DECL u32 k18, k19, k20, k21, k22, k23;
1475 KXX_DECL u32 k24, k25, k26, k27, k28, k29;
1476 KXX_DECL u32 k30, k31, k32, k33, k34, k35;
1477 KXX_DECL u32 k36, k37, k38, k39, k40, k41;
1478 KXX_DECL u32 k42, k43, k44, k45, k46, k47;
1481 #if CUDA_ARCH >= 500
1495 for (u32 i = 0; i < 2; i++)
1497 if (i) KEYSET10 else KEYSET00
1499 s1(*D63 ^ k00, *D32 ^ k01, *D33 ^ k02, *D34 ^ k03, *D35 ^ k04, *D36 ^ k05, D08, D16, D22, D30);
1500 s2(*D35 ^ k06, *D36 ^ k07, *D37 ^ k08, *D38 ^ k09, *D39 ^ k10, *D40 ^ k11, D12, D27, D01, D17);
1501 s3(*D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1502 s4(*D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1503 s5(*D47 ^ k24, *D48 ^ k25, *D49 ^ k26, *D50 ^ k27, *D51 ^ k28, *D52 ^ k29, D07, D13, D24, D02);
1504 s6(*D51 ^ k30, *D52 ^ k31, *D53 ^ k32, *D54 ^ k33, *D55 ^ k34, *D56 ^ k35, D03, D28, D10, D18);
1505 s7(*D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1506 s8(*D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1508 if (i) KEYSET11 else KEYSET01
1510 s1(*D31 ^ k00, *D00 ^ k01, *D01 ^ k02, *D02 ^ k03, *D03 ^ k04, *D04 ^ k05, D40, D48, D54, D62);
1511 s2(*D03 ^ k06, *D04 ^ k07, *D05 ^ k08, *D06 ^ k09, *D07 ^ k10, *D08 ^ k11, D44, D59, D33, D49);
1512 s3(*D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1513 s4(*D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1514 s5(*D15 ^ k24, *D16 ^ k25, *D17 ^ k26, *D18 ^ k27, *D19 ^ k28, *D20 ^ k29, D39, D45, D56, D34);
1515 s6(*D19 ^ k30, *D20 ^ k31, *D21 ^ k32, *D22 ^ k33, *D23 ^ k34, *D24 ^ k35, D35, D60, D42, D50);
1516 s7(*D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1517 s8(*D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1519 if (i) KEYSET12 else KEYSET02
1521 s1(*D63 ^ k00, *D32 ^ k01, *D33 ^ k02, *D34 ^ k03, *D35 ^ k04, *D36 ^ k05, D08, D16, D22, D30);
1522 s2(*D35 ^ k06, *D36 ^ k07, *D37 ^ k08, *D38 ^ k09, *D39 ^ k10, *D40 ^ k11, D12, D27, D01, D17);
1523 s3(*D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1524 s4(*D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1525 s5(*D47 ^ k24, *D48 ^ k25, *D49 ^ k26, *D50 ^ k27, *D51 ^ k28, *D52 ^ k29, D07, D13, D24, D02);
1526 s6(*D51 ^ k30, *D52 ^ k31, *D53 ^ k32, *D54 ^ k33, *D55 ^ k34, *D56 ^ k35, D03, D28, D10, D18);
1527 s7(*D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1528 s8(*D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1530 if (i) KEYSET13 else KEYSET03
1532 s1(*D31 ^ k00, *D00 ^ k01, *D01 ^ k02, *D02 ^ k03, *D03 ^ k04, *D04 ^ k05, D40, D48, D54, D62);
1533 s2(*D03 ^ k06, *D04 ^ k07, *D05 ^ k08, *D06 ^ k09, *D07 ^ k10, *D08 ^ k11, D44, D59, D33, D49);
1534 s3(*D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1535 s4(*D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1536 s5(*D15 ^ k24, *D16 ^ k25, *D17 ^ k26, *D18 ^ k27, *D19 ^ k28, *D20 ^ k29, D39, D45, D56, D34);
1537 s6(*D19 ^ k30, *D20 ^ k31, *D21 ^ k32, *D22 ^ k33, *D23 ^ k34, *D24 ^ k35, D35, D60, D42, D50);
1538 s7(*D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1539 s8(*D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1541 if (i) KEYSET14 else KEYSET04
1543 s1(*D63 ^ k00, *D32 ^ k01, *D33 ^ k02, *D34 ^ k03, *D35 ^ k04, *D36 ^ k05, D08, D16, D22, D30);
1544 s2(*D35 ^ k06, *D36 ^ k07, *D37 ^ k08, *D38 ^ k09, *D39 ^ k10, *D40 ^ k11, D12, D27, D01, D17);
1545 s3(*D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1546 s4(*D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1547 s5(*D47 ^ k24, *D48 ^ k25, *D49 ^ k26, *D50 ^ k27, *D51 ^ k28, *D52 ^ k29, D07, D13, D24, D02);
1548 s6(*D51 ^ k30, *D52 ^ k31, *D53 ^ k32, *D54 ^ k33, *D55 ^ k34, *D56 ^ k35, D03, D28, D10, D18);
1549 s7(*D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1550 s8(*D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1552 if (i) KEYSET15 else KEYSET05
1554 s1(*D31 ^ k00, *D00 ^ k01, *D01 ^ k02, *D02 ^ k03, *D03 ^ k04, *D04 ^ k05, D40, D48, D54, D62);
1555 s2(*D03 ^ k06, *D04 ^ k07, *D05 ^ k08, *D06 ^ k09, *D07 ^ k10, *D08 ^ k11, D44, D59, D33, D49);
1556 s3(*D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1557 s4(*D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1558 s5(*D15 ^ k24, *D16 ^ k25, *D17 ^ k26, *D18 ^ k27, *D19 ^ k28, *D20 ^ k29, D39, D45, D56, D34);
1559 s6(*D19 ^ k30, *D20 ^ k31, *D21 ^ k32, *D22 ^ k33, *D23 ^ k34, *D24 ^ k35, D35, D60, D42, D50);
1560 s7(*D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1561 s8(*D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1563 if (i) KEYSET16 else KEYSET06
1565 s1(*D63 ^ k00, *D32 ^ k01, *D33 ^ k02, *D34 ^ k03, *D35 ^ k04, *D36 ^ k05, D08, D16, D22, D30);
1566 s2(*D35 ^ k06, *D36 ^ k07, *D37 ^ k08, *D38 ^ k09, *D39 ^ k10, *D40 ^ k11, D12, D27, D01, D17);
1567 s3(*D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1568 s4(*D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1569 s5(*D47 ^ k24, *D48 ^ k25, *D49 ^ k26, *D50 ^ k27, *D51 ^ k28, *D52 ^ k29, D07, D13, D24, D02);
1570 s6(*D51 ^ k30, *D52 ^ k31, *D53 ^ k32, *D54 ^ k33, *D55 ^ k34, *D56 ^ k35, D03, D28, D10, D18);
1571 s7(*D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1572 s8(*D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1574 if (i) KEYSET17 else KEYSET07
1576 s1(*D31 ^ k00, *D00 ^ k01, *D01 ^ k02, *D02 ^ k03, *D03 ^ k04, *D04 ^ k05, D40, D48, D54, D62);
1577 s2(*D03 ^ k06, *D04 ^ k07, *D05 ^ k08, *D06 ^ k09, *D07 ^ k10, *D08 ^ k11, D44, D59, D33, D49);
1578 s3(*D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1579 s4(*D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1580 s5(*D15 ^ k24, *D16 ^ k25, *D17 ^ k26, *D18 ^ k27, *D19 ^ k28, *D20 ^ k29, D39, D45, D56, D34);
1581 s6(*D19 ^ k30, *D20 ^ k31, *D21 ^ k32, *D22 ^ k33, *D23 ^ k34, *D24 ^ k35, D35, D60, D42, D50);
1582 s7(*D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1583 s8(*D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1587 static void transpose32c (u32 data[32])
1589 #define swap(x,y,j,m) \
1590 t = ((x) ^ ((y) >> (j))) & (m); \
1592 (y) = (y) ^ (t << (j));
1596 swap (data[ 0], data[16], 16, 0x0000ffff);
1597 swap (data[ 1], data[17], 16, 0x0000ffff);
1598 swap (data[ 2], data[18], 16, 0x0000ffff);
1599 swap (data[ 3], data[19], 16, 0x0000ffff);
1600 swap (data[ 4], data[20], 16, 0x0000ffff);
1601 swap (data[ 5], data[21], 16, 0x0000ffff);
1602 swap (data[ 6], data[22], 16, 0x0000ffff);
1603 swap (data[ 7], data[23], 16, 0x0000ffff);
1604 swap (data[ 8], data[24], 16, 0x0000ffff);
1605 swap (data[ 9], data[25], 16, 0x0000ffff);
1606 swap (data[10], data[26], 16, 0x0000ffff);
1607 swap (data[11], data[27], 16, 0x0000ffff);
1608 swap (data[12], data[28], 16, 0x0000ffff);
1609 swap (data[13], data[29], 16, 0x0000ffff);
1610 swap (data[14], data[30], 16, 0x0000ffff);
1611 swap (data[15], data[31], 16, 0x0000ffff);
1612 swap (data[ 0], data[ 8], 8, 0x00ff00ff);
1613 swap (data[ 1], data[ 9], 8, 0x00ff00ff);
1614 swap (data[ 2], data[10], 8, 0x00ff00ff);
1615 swap (data[ 3], data[11], 8, 0x00ff00ff);
1616 swap (data[ 4], data[12], 8, 0x00ff00ff);
1617 swap (data[ 5], data[13], 8, 0x00ff00ff);
1618 swap (data[ 6], data[14], 8, 0x00ff00ff);
1619 swap (data[ 7], data[15], 8, 0x00ff00ff);
1620 swap (data[ 0], data[ 4], 4, 0x0f0f0f0f);
1621 swap (data[ 1], data[ 5], 4, 0x0f0f0f0f);
1622 swap (data[ 2], data[ 6], 4, 0x0f0f0f0f);
1623 swap (data[ 3], data[ 7], 4, 0x0f0f0f0f);
1624 swap (data[ 0], data[ 2], 2, 0x33333333);
1625 swap (data[ 1], data[ 3], 2, 0x33333333);
1626 swap (data[ 0], data[ 1], 1, 0x55555555);
1627 swap (data[ 2], data[ 3], 1, 0x55555555);
1628 swap (data[ 4], data[ 6], 2, 0x33333333);
1629 swap (data[ 5], data[ 7], 2, 0x33333333);
1630 swap (data[ 4], data[ 5], 1, 0x55555555);
1631 swap (data[ 6], data[ 7], 1, 0x55555555);
1632 swap (data[ 8], data[12], 4, 0x0f0f0f0f);
1633 swap (data[ 9], data[13], 4, 0x0f0f0f0f);
1634 swap (data[10], data[14], 4, 0x0f0f0f0f);
1635 swap (data[11], data[15], 4, 0x0f0f0f0f);
1636 swap (data[ 8], data[10], 2, 0x33333333);
1637 swap (data[ 9], data[11], 2, 0x33333333);
1638 swap (data[ 8], data[ 9], 1, 0x55555555);
1639 swap (data[10], data[11], 1, 0x55555555);
1640 swap (data[12], data[14], 2, 0x33333333);
1641 swap (data[13], data[15], 2, 0x33333333);
1642 swap (data[12], data[13], 1, 0x55555555);
1643 swap (data[14], data[15], 1, 0x55555555);
1644 swap (data[16], data[24], 8, 0x00ff00ff);
1645 swap (data[17], data[25], 8, 0x00ff00ff);
1646 swap (data[18], data[26], 8, 0x00ff00ff);
1647 swap (data[19], data[27], 8, 0x00ff00ff);
1648 swap (data[20], data[28], 8, 0x00ff00ff);
1649 swap (data[21], data[29], 8, 0x00ff00ff);
1650 swap (data[22], data[30], 8, 0x00ff00ff);
1651 swap (data[23], data[31], 8, 0x00ff00ff);
1652 swap (data[16], data[20], 4, 0x0f0f0f0f);
1653 swap (data[17], data[21], 4, 0x0f0f0f0f);
1654 swap (data[18], data[22], 4, 0x0f0f0f0f);
1655 swap (data[19], data[23], 4, 0x0f0f0f0f);
1656 swap (data[16], data[18], 2, 0x33333333);
1657 swap (data[17], data[19], 2, 0x33333333);
1658 swap (data[16], data[17], 1, 0x55555555);
1659 swap (data[18], data[19], 1, 0x55555555);
1660 swap (data[20], data[22], 2, 0x33333333);
1661 swap (data[21], data[23], 2, 0x33333333);
1662 swap (data[20], data[21], 1, 0x55555555);
1663 swap (data[22], data[23], 1, 0x55555555);
1664 swap (data[24], data[28], 4, 0x0f0f0f0f);
1665 swap (data[25], data[29], 4, 0x0f0f0f0f);
1666 swap (data[26], data[30], 4, 0x0f0f0f0f);
1667 swap (data[27], data[31], 4, 0x0f0f0f0f);
1668 swap (data[24], data[26], 2, 0x33333333);
1669 swap (data[25], data[27], 2, 0x33333333);
1670 swap (data[24], data[25], 1, 0x55555555);
1671 swap (data[26], data[27], 1, 0x55555555);
1672 swap (data[28], data[30], 2, 0x33333333);
1673 swap (data[29], data[31], 2, 0x33333333);
1674 swap (data[28], data[29], 1, 0x55555555);
1675 swap (data[30], data[31], 1, 0x55555555);
1678 static void m03000m (__local u32 *s_S, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
1684 const u32 gid = get_global_id (0);
1685 const u32 lid = get_local_id (0);
1691 const u32 K00 = pws[gid].i[ 0];
1692 const u32 K01 = pws[gid].i[ 1];
1693 const u32 K02 = pws[gid].i[ 2];
1694 const u32 K03 = pws[gid].i[ 3];
1695 const u32 K04 = pws[gid].i[ 4];
1696 const u32 K05 = pws[gid].i[ 5];
1697 const u32 K06 = pws[gid].i[ 6];
1698 const u32 K07 = pws[gid].i[ 7];
1699 const u32 K08 = pws[gid].i[ 8];
1700 const u32 K09 = pws[gid].i[ 9];
1701 const u32 K10 = pws[gid].i[10];
1702 const u32 K11 = pws[gid].i[11];
1703 const u32 K12 = pws[gid].i[12];
1704 const u32 K13 = pws[gid].i[13];
1705 const u32 K14 = pws[gid].i[14];
1706 const u32 K15 = pws[gid].i[15];
1707 const u32 K16 = pws[gid].i[16];
1708 const u32 K17 = pws[gid].i[17];
1709 const u32 K18 = pws[gid].i[18];
1710 const u32 K19 = pws[gid].i[19];
1711 const u32 K20 = pws[gid].i[20];
1712 const u32 K21 = pws[gid].i[21];
1713 const u32 K22 = pws[gid].i[22];
1714 const u32 K23 = pws[gid].i[23];
1715 const u32 K24 = pws[gid].i[24];
1716 const u32 K25 = pws[gid].i[25];
1717 const u32 K26 = pws[gid].i[26];
1718 const u32 K27 = pws[gid].i[27];
1719 const u32 K28 = pws[gid].i[28];
1720 const u32 K29 = pws[gid].i[29];
1721 const u32 K30 = pws[gid].i[30];
1722 const u32 K31 = pws[gid].i[31];
1723 const u32 K32 = pws[gid].i[32];
1724 const u32 K33 = pws[gid].i[33];
1725 const u32 K34 = pws[gid].i[34];
1726 const u32 K35 = pws[gid].i[35];
1727 const u32 K36 = pws[gid].i[36];
1728 const u32 K37 = pws[gid].i[37];
1729 const u32 K38 = pws[gid].i[38];
1730 const u32 K39 = pws[gid].i[39];
1731 const u32 K40 = pws[gid].i[40];
1732 const u32 K41 = pws[gid].i[41];
1733 const u32 K42 = pws[gid].i[42];
1734 const u32 K43 = pws[gid].i[43];
1735 const u32 K44 = pws[gid].i[44];
1736 const u32 K45 = pws[gid].i[45];
1737 const u32 K46 = pws[gid].i[46];
1738 const u32 K47 = pws[gid].i[47];
1739 const u32 K48 = pws[gid].i[48];
1740 const u32 K49 = pws[gid].i[49];
1741 const u32 K50 = pws[gid].i[50];
1742 const u32 K51 = pws[gid].i[51];
1743 const u32 K52 = pws[gid].i[52];
1744 const u32 K53 = pws[gid].i[53];
1745 const u32 K54 = pws[gid].i[54];
1746 const u32 K55 = pws[gid].i[55];
1748 const u32 pc_pos = get_local_id (1);
1750 const u32 il_pos = pc_pos * 32;
1785 k00 |= words_buf_r[pc_pos].b[ 0];
1786 k01 |= words_buf_r[pc_pos].b[ 1];
1787 k02 |= words_buf_r[pc_pos].b[ 2];
1788 k03 |= words_buf_r[pc_pos].b[ 3];
1789 k04 |= words_buf_r[pc_pos].b[ 4];
1790 k05 |= words_buf_r[pc_pos].b[ 5];
1791 k06 |= words_buf_r[pc_pos].b[ 6];
1792 k07 |= words_buf_r[pc_pos].b[ 7];
1793 k08 |= words_buf_r[pc_pos].b[ 8];
1794 k09 |= words_buf_r[pc_pos].b[ 9];
1795 k10 |= words_buf_r[pc_pos].b[10];
1796 k11 |= words_buf_r[pc_pos].b[11];
1797 k12 |= words_buf_r[pc_pos].b[12];
1798 k13 |= words_buf_r[pc_pos].b[13];
1799 k14 |= words_buf_r[pc_pos].b[14];
1800 k15 |= words_buf_r[pc_pos].b[15];
1801 k16 |= words_buf_r[pc_pos].b[16];
1802 k17 |= words_buf_r[pc_pos].b[17];
1803 k18 |= words_buf_r[pc_pos].b[18];
1804 k19 |= words_buf_r[pc_pos].b[19];
1805 k20 |= words_buf_r[pc_pos].b[20];
1806 k21 |= words_buf_r[pc_pos].b[21];
1807 k22 |= words_buf_r[pc_pos].b[22];
1808 k23 |= words_buf_r[pc_pos].b[23];
1809 k24 |= words_buf_r[pc_pos].b[24];
1810 k25 |= words_buf_r[pc_pos].b[25];
1811 k26 |= words_buf_r[pc_pos].b[26];
1812 k27 |= words_buf_r[pc_pos].b[27];
1813 k28 |= words_buf_r[pc_pos].b[28];
1814 k29 |= words_buf_r[pc_pos].b[29];
1815 k30 |= words_buf_r[pc_pos].b[30];
1816 k31 |= words_buf_r[pc_pos].b[31];
1818 // KGS!@#$% including IP
1823 u32 D03 = 0xffffffff;
1825 u32 D05 = 0xffffffff;
1826 u32 D06 = 0xffffffff;
1827 u32 D07 = 0xffffffff;
1833 u32 D13 = 0xffffffff;
1836 u32 D16 = 0xffffffff;
1837 u32 D17 = 0xffffffff;
1842 u32 D22 = 0xffffffff;
1844 u32 D24 = 0xffffffff;
1846 u32 D26 = 0xffffffff;
1848 u32 D28 = 0xffffffff;
1849 u32 D29 = 0xffffffff;
1850 u32 D30 = 0xffffffff;
1851 u32 D31 = 0xffffffff;
1860 u32 D40 = 0xffffffff;
1861 u32 D41 = 0xffffffff;
1862 u32 D42 = 0xffffffff;
1864 u32 D44 = 0xffffffff;
1875 u32 D55 = 0xffffffff;
1878 u32 D58 = 0xffffffff;
1881 u32 D61 = 0xffffffff;
1882 u32 D62 = 0xffffffff;
1883 u32 D63 = 0xffffffff;
1887 k00, k01, k02, k03, k04, k05, k06,
1888 k07, k08, k09, k10, k11, k12, k13,
1889 k14, k15, k16, k17, k18, k19, k20,
1890 k21, k22, k23, k24, k25, k26, k27,
1891 k28, k29, k30, k31, K32, K33, K34,
1892 K35, K36, K37, K38, K39, K40, K41,
1893 K42, K43, K44, K45, K46, K47, K48,
1894 K49, K50, K51, K52, K53, K54, K55,
1895 &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07,
1896 &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15,
1897 &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23,
1898 &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31,
1899 &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39,
1900 &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47,
1901 &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55,
1902 &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63
1972 if (digests_cnt < 16)
1974 for (u32 d = 0; d < digests_cnt; d++)
1976 const u32 final_hash_pos = digests_offset + d;
1978 if (hashes_shown[final_hash_pos]) continue;
1982 search[0] = digests_buf[final_hash_pos].digest_buf[DGST_R0];
1983 search[1] = digests_buf[final_hash_pos].digest_buf[DGST_R1];
1988 for (int i = 0; i < 32; i++)
1990 const u32 b0 = -((search[0] >> i) & 1);
1991 const u32 b1 = -((search[1] >> i) & 1);
1993 tmpResult |= out[ 0 + i] ^ b0;
1994 tmpResult |= out[32 + i] ^ b1;
1997 if (tmpResult == 0xffffffff) continue;
1999 const u32 slice = 31 - clz (~tmpResult);
2001 const u32 r0 = search[0];
2002 const u32 r1 = search[1];
2015 for (int i = 0; i < 32; i++)
2017 out0[i] = out[ 0 + 31 - i];
2018 out1[i] = out[32 + 31 - i];
2021 transpose32c (out0);
2022 transpose32c (out1);
2025 for (int slice = 0; slice < 32; slice++)
2027 const u32 r0 = out0[31 - slice];
2028 const u32 r1 = out1[31 - slice];
2037 static void m03000s (__local u32 *s_S, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
2043 const u32 gid = get_global_id (0);
2044 const u32 lid = get_local_id (0);
2119 const u32 K00 = pws[gid].i[ 0];
2120 const u32 K01 = pws[gid].i[ 1];
2121 const u32 K02 = pws[gid].i[ 2];
2122 const u32 K03 = pws[gid].i[ 3];
2123 const u32 K04 = pws[gid].i[ 4];
2124 const u32 K05 = pws[gid].i[ 5];
2125 const u32 K06 = pws[gid].i[ 6];
2126 const u32 K07 = pws[gid].i[ 7];
2127 const u32 K08 = pws[gid].i[ 8];
2128 const u32 K09 = pws[gid].i[ 9];
2129 const u32 K10 = pws[gid].i[10];
2130 const u32 K11 = pws[gid].i[11];
2131 const u32 K12 = pws[gid].i[12];
2132 const u32 K13 = pws[gid].i[13];
2133 const u32 K14 = pws[gid].i[14];
2134 const u32 K15 = pws[gid].i[15];
2135 const u32 K16 = pws[gid].i[16];
2136 const u32 K17 = pws[gid].i[17];
2137 const u32 K18 = pws[gid].i[18];
2138 const u32 K19 = pws[gid].i[19];
2139 const u32 K20 = pws[gid].i[20];
2140 const u32 K21 = pws[gid].i[21];
2141 const u32 K22 = pws[gid].i[22];
2142 const u32 K23 = pws[gid].i[23];
2143 const u32 K24 = pws[gid].i[24];
2144 const u32 K25 = pws[gid].i[25];
2145 const u32 K26 = pws[gid].i[26];
2146 const u32 K27 = pws[gid].i[27];
2147 const u32 K28 = pws[gid].i[28];
2148 const u32 K29 = pws[gid].i[29];
2149 const u32 K30 = pws[gid].i[30];
2150 const u32 K31 = pws[gid].i[31];
2151 const u32 K32 = pws[gid].i[32];
2152 const u32 K33 = pws[gid].i[33];
2153 const u32 K34 = pws[gid].i[34];
2154 const u32 K35 = pws[gid].i[35];
2155 const u32 K36 = pws[gid].i[36];
2156 const u32 K37 = pws[gid].i[37];
2157 const u32 K38 = pws[gid].i[38];
2158 const u32 K39 = pws[gid].i[39];
2159 const u32 K40 = pws[gid].i[40];
2160 const u32 K41 = pws[gid].i[41];
2161 const u32 K42 = pws[gid].i[42];
2162 const u32 K43 = pws[gid].i[43];
2163 const u32 K44 = pws[gid].i[44];
2164 const u32 K45 = pws[gid].i[45];
2165 const u32 K46 = pws[gid].i[46];
2166 const u32 K47 = pws[gid].i[47];
2167 const u32 K48 = pws[gid].i[48];
2168 const u32 K49 = pws[gid].i[49];
2169 const u32 K50 = pws[gid].i[50];
2170 const u32 K51 = pws[gid].i[51];
2171 const u32 K52 = pws[gid].i[52];
2172 const u32 K53 = pws[gid].i[53];
2173 const u32 K54 = pws[gid].i[54];
2174 const u32 K55 = pws[gid].i[55];
2176 const u32 pc_pos = get_local_id (1);
2178 const u32 il_pos = pc_pos * 32;
2213 k00 |= words_buf_r[pc_pos].b[ 0];
2214 k01 |= words_buf_r[pc_pos].b[ 1];
2215 k02 |= words_buf_r[pc_pos].b[ 2];
2216 k03 |= words_buf_r[pc_pos].b[ 3];
2217 k04 |= words_buf_r[pc_pos].b[ 4];
2218 k05 |= words_buf_r[pc_pos].b[ 5];
2219 k06 |= words_buf_r[pc_pos].b[ 6];
2220 k07 |= words_buf_r[pc_pos].b[ 7];
2221 k08 |= words_buf_r[pc_pos].b[ 8];
2222 k09 |= words_buf_r[pc_pos].b[ 9];
2223 k10 |= words_buf_r[pc_pos].b[10];
2224 k11 |= words_buf_r[pc_pos].b[11];
2225 k12 |= words_buf_r[pc_pos].b[12];
2226 k13 |= words_buf_r[pc_pos].b[13];
2227 k14 |= words_buf_r[pc_pos].b[14];
2228 k15 |= words_buf_r[pc_pos].b[15];
2229 k16 |= words_buf_r[pc_pos].b[16];
2230 k17 |= words_buf_r[pc_pos].b[17];
2231 k18 |= words_buf_r[pc_pos].b[18];
2232 k19 |= words_buf_r[pc_pos].b[19];
2233 k20 |= words_buf_r[pc_pos].b[20];
2234 k21 |= words_buf_r[pc_pos].b[21];
2235 k22 |= words_buf_r[pc_pos].b[22];
2236 k23 |= words_buf_r[pc_pos].b[23];
2237 k24 |= words_buf_r[pc_pos].b[24];
2238 k25 |= words_buf_r[pc_pos].b[25];
2239 k26 |= words_buf_r[pc_pos].b[26];
2240 k27 |= words_buf_r[pc_pos].b[27];
2241 k28 |= words_buf_r[pc_pos].b[28];
2242 k29 |= words_buf_r[pc_pos].b[29];
2243 k30 |= words_buf_r[pc_pos].b[30];
2244 k31 |= words_buf_r[pc_pos].b[31];
2246 // KGS!@#$% including IP
2251 u32 D03 = 0xffffffff;
2253 u32 D05 = 0xffffffff;
2254 u32 D06 = 0xffffffff;
2255 u32 D07 = 0xffffffff;
2261 u32 D13 = 0xffffffff;
2264 u32 D16 = 0xffffffff;
2265 u32 D17 = 0xffffffff;
2270 u32 D22 = 0xffffffff;
2272 u32 D24 = 0xffffffff;
2274 u32 D26 = 0xffffffff;
2276 u32 D28 = 0xffffffff;
2277 u32 D29 = 0xffffffff;
2278 u32 D30 = 0xffffffff;
2279 u32 D31 = 0xffffffff;
2288 u32 D40 = 0xffffffff;
2289 u32 D41 = 0xffffffff;
2290 u32 D42 = 0xffffffff;
2292 u32 D44 = 0xffffffff;
2303 u32 D55 = 0xffffffff;
2306 u32 D58 = 0xffffffff;
2309 u32 D61 = 0xffffffff;
2310 u32 D62 = 0xffffffff;
2311 u32 D63 = 0xffffffff;
2315 k00, k01, k02, k03, k04, k05, k06,
2316 k07, k08, k09, k10, k11, k12, k13,
2317 k14, k15, k16, k17, k18, k19, k20,
2318 k21, k22, k23, k24, k25, k26, k27,
2319 k28, k29, k30, k31, K32, K33, K34,
2320 K35, K36, K37, K38, K39, K40, K41,
2321 K42, K43, K44, K45, K46, K47, K48,
2322 K49, K50, K51, K52, K53, K54, K55,
2323 &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07,
2324 &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15,
2325 &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23,
2326 &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31,
2327 &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39,
2328 &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47,
2329 &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55,
2330 &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63
2335 tmpResult |= D00 ^ S00;
2336 tmpResult |= D01 ^ S01;
2337 tmpResult |= D02 ^ S02;
2338 tmpResult |= D03 ^ S03;
2339 tmpResult |= D04 ^ S04;
2340 tmpResult |= D05 ^ S05;
2341 tmpResult |= D06 ^ S06;
2342 tmpResult |= D07 ^ S07;
2343 tmpResult |= D08 ^ S08;
2344 tmpResult |= D09 ^ S09;
2345 tmpResult |= D10 ^ S10;
2346 tmpResult |= D11 ^ S11;
2347 tmpResult |= D12 ^ S12;
2348 tmpResult |= D13 ^ S13;
2349 tmpResult |= D14 ^ S14;
2350 tmpResult |= D15 ^ S15;
2352 if (tmpResult == 0xffffffff) return;
2354 tmpResult |= D16 ^ S16;
2355 tmpResult |= D17 ^ S17;
2356 tmpResult |= D18 ^ S18;
2357 tmpResult |= D19 ^ S19;
2358 tmpResult |= D20 ^ S20;
2359 tmpResult |= D21 ^ S21;
2360 tmpResult |= D22 ^ S22;
2361 tmpResult |= D23 ^ S23;
2362 tmpResult |= D24 ^ S24;
2363 tmpResult |= D25 ^ S25;
2364 tmpResult |= D26 ^ S26;
2365 tmpResult |= D27 ^ S27;
2366 tmpResult |= D28 ^ S28;
2367 tmpResult |= D29 ^ S29;
2368 tmpResult |= D30 ^ S30;
2369 tmpResult |= D31 ^ S31;
2371 if (tmpResult == 0xffffffff) return;
2373 tmpResult |= D32 ^ S32;
2374 tmpResult |= D33 ^ S33;
2375 tmpResult |= D34 ^ S34;
2376 tmpResult |= D35 ^ S35;
2377 tmpResult |= D36 ^ S36;
2378 tmpResult |= D37 ^ S37;
2379 tmpResult |= D38 ^ S38;
2380 tmpResult |= D39 ^ S39;
2381 tmpResult |= D40 ^ S40;
2382 tmpResult |= D41 ^ S41;
2383 tmpResult |= D42 ^ S42;
2384 tmpResult |= D43 ^ S43;
2385 tmpResult |= D44 ^ S44;
2386 tmpResult |= D45 ^ S45;
2387 tmpResult |= D46 ^ S46;
2388 tmpResult |= D47 ^ S47;
2390 if (tmpResult == 0xffffffff) return;
2392 tmpResult |= D48 ^ S48;
2393 tmpResult |= D49 ^ S49;
2394 tmpResult |= D50 ^ S50;
2395 tmpResult |= D51 ^ S51;
2396 tmpResult |= D52 ^ S52;
2397 tmpResult |= D53 ^ S53;
2398 tmpResult |= D54 ^ S54;
2399 tmpResult |= D55 ^ S55;
2400 tmpResult |= D56 ^ S56;
2401 tmpResult |= D57 ^ S57;
2402 tmpResult |= D58 ^ S58;
2403 tmpResult |= D59 ^ S59;
2404 tmpResult |= D60 ^ S60;
2405 tmpResult |= D61 ^ S61;
2406 tmpResult |= D62 ^ S62;
2407 tmpResult |= D63 ^ S63;
2409 if (tmpResult == 0xffffffff) return;
2411 const u32 slice = 31 - clz (~tmpResult);
2417 // transpose bitslice base : easy because no overlapping buffers
2418 // mod : attention race conditions, need different buffers for *in and *out
2421 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03000_tb (__global pw_t *pws)
2423 const u32 gid = get_global_id (0);
2425 const u32 w0s = pws[gid].i[0];
2426 const u32 w1s = pws[gid].i[1];
2429 for (int i = 0; i < 32; i += 8)
2431 pws[gid].i[i + 0 + 0] = -((w0s >> (i + 7)) & 1);
2432 pws[gid].i[i + 0 + 1] = -((w0s >> (i + 6)) & 1);
2433 pws[gid].i[i + 0 + 2] = -((w0s >> (i + 5)) & 1);
2434 pws[gid].i[i + 0 + 3] = -((w0s >> (i + 4)) & 1);
2435 pws[gid].i[i + 0 + 4] = -((w0s >> (i + 3)) & 1);
2436 pws[gid].i[i + 0 + 5] = -((w0s >> (i + 2)) & 1);
2437 pws[gid].i[i + 0 + 6] = -((w0s >> (i + 1)) & 1);
2438 pws[gid].i[i + 0 + 7] = -((w0s >> (i + 0)) & 1);
2442 for (int i = 0; i < 24; i += 8)
2444 pws[gid].i[i + 32 + 0] = -((w1s >> (i + 7)) & 1);
2445 pws[gid].i[i + 32 + 1] = -((w1s >> (i + 6)) & 1);
2446 pws[gid].i[i + 32 + 2] = -((w1s >> (i + 5)) & 1);
2447 pws[gid].i[i + 32 + 3] = -((w1s >> (i + 4)) & 1);
2448 pws[gid].i[i + 32 + 4] = -((w1s >> (i + 3)) & 1);
2449 pws[gid].i[i + 32 + 5] = -((w1s >> (i + 2)) & 1);
2450 pws[gid].i[i + 32 + 6] = -((w1s >> (i + 1)) & 1);
2451 pws[gid].i[i + 32 + 7] = -((w1s >> (i + 0)) & 1);
2455 __kernel void __attribute__((reqd_work_group_size (32, 1, 1))) m03000_tm (__global u32 *mod, __global bs_word_t *words_buf_r)
2457 const u32 gid = get_global_id (0);
2459 const u32 block = gid / 32;
2460 const u32 slice = gid % 32;
2462 const u32 w0 = mod[gid];
2465 for (int i = 0; i < 32; i += 8)
2467 atomic_or (&words_buf_r[block].b[i + 0], (((w0 >> (i + 7)) & 1) << slice));
2468 atomic_or (&words_buf_r[block].b[i + 1], (((w0 >> (i + 6)) & 1) << slice));
2469 atomic_or (&words_buf_r[block].b[i + 2], (((w0 >> (i + 5)) & 1) << slice));
2470 atomic_or (&words_buf_r[block].b[i + 3], (((w0 >> (i + 4)) & 1) << slice));
2471 atomic_or (&words_buf_r[block].b[i + 4], (((w0 >> (i + 3)) & 1) << slice));
2472 atomic_or (&words_buf_r[block].b[i + 5], (((w0 >> (i + 2)) & 1) << slice));
2473 atomic_or (&words_buf_r[block].b[i + 6], (((w0 >> (i + 1)) & 1) << slice));
2474 atomic_or (&words_buf_r[block].b[i + 7], (((w0 >> (i + 0)) & 1) << slice));
2478 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m03000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2484 const u32 gid = get_global_id (0);
2485 const u32 lid = get_local_id (0);
2486 const u32 vid = get_local_id (1);
2488 const u32 s0 = digests_buf[digests_offset].digest_buf[0];
2489 const u32 s1 = digests_buf[digests_offset].digest_buf[1];
2491 __local u32 s_S[64];
2495 s_S[ 0 + vid] = -((s0 >> vid) & 1);
2499 s_S[32 + vid] = -((s1 >> vid) & 1);
2502 barrier (CLK_LOCAL_MEM_FENCE);
2504 if (gid >= gid_max) return;
2510 m03000m (s_S, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
2513 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m03000_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2517 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m03000_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2521 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m03000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2527 const u32 gid = get_global_id (0);
2528 const u32 lid = get_local_id (0);
2529 const u32 vid = get_local_id (1);
2531 const u32 s0 = digests_buf[digests_offset].digest_buf[0];
2532 const u32 s1 = digests_buf[digests_offset].digest_buf[1];
2534 __local u32 s_S[64];
2538 s_S[ 0 + vid] = -((s0 >> vid) & 1);
2542 s_S[32 + vid] = -((s1 >> vid) & 1);
2545 barrier (CLK_LOCAL_MEM_FENCE);
2547 if (gid >= gid_max) return;
2553 m03000s (s_S, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
2556 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m03000_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2560 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m03000_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)