2 * Author......: Jens Steube <jens.steube@gmail.com>
4 * NOTE........: sboxes were taken from JtR, license below
9 #include "include/constants.h"
10 #include "include/kernel_vendor.h"
17 #include "include/kernel_functions.c"
18 #include "OpenCL/types_ocl.c"
19 #include "OpenCL/common.c"
21 #define COMPARE_S "OpenCL/check_single_comp4_bs.c"
22 #define COMPARE_M "OpenCL/check_multi_comp4_bs.c"
29 #define KXX_DECL volatile
37 // Bitslice DES S-boxes with LOP3.LUT instructions
38 // For NVIDIA Maxwell architecture and CUDA 7.5 RC
39 // by DeepLearningJohnDoe, version 0.1.6, 2015/07/19
41 // Gate counts: 25 24 25 18 25 24 24 23
43 // Depth: 8 7 7 6 8 10 10 8
46 // Note that same S-box function with a lower gate count isn't necessarily faster.
48 // These Boolean expressions corresponding to DES S-boxes were
49 // discovered by <deeplearningjohndoe at gmail.com>
51 // This file itself is Copyright (c) 2015 by <deeplearningjohndoe at gmail.com>
52 // Redistribution and use in source and binary forms, with or without
53 // modification, are permitted.
55 // The underlying mathematical formulas are NOT copyrighted.
58 #define LUT(a,b,c,d,e) u32 a; asm ("lop3.b32 %0, %1, %2, %3, "#e";" : "=r"(a): "r"(b), "r"(c), "r"(d));
60 static void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
62 LUT(xAA55AA5500550055, a1, a4, a6, 0xC1)
63 LUT(xA55AA55AF0F5F0F5, a3, a6, xAA55AA5500550055, 0x9E)
64 LUT(x5F5F5F5FA5A5A5A5, a1, a3, a6, 0xD6)
65 LUT(xF5A0F5A0A55AA55A, a4, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x56)
66 LUT(x947A947AD1E7D1E7, a2, xA55AA55AF0F5F0F5, xF5A0F5A0A55AA55A, 0x6C)
67 LUT(x5FFF5FFFFFFAFFFA, a6, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x7B)
68 LUT(xB96CB96C69936993, a2, xF5A0F5A0A55AA55A, x5FFF5FFFFFFAFFFA, 0xD6)
69 LUT(x3, a5, x947A947AD1E7D1E7, xB96CB96C69936993, 0x6A)
70 LUT(x55EE55EE55EE55EE, a1, a2, a4, 0x7A)
71 LUT(x084C084CB77BB77B, a2, a6, xF5A0F5A0A55AA55A, 0xC9)
72 LUT(x9C329C32E295E295, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x72)
73 LUT(xA51EA51E50E050E0, a3, a6, x55EE55EE55EE55EE, 0x29)
74 LUT(x4AD34AD3BE3CBE3C, a2, x947A947AD1E7D1E7, xA51EA51E50E050E0, 0x95)
75 LUT(x2, a5, x9C329C32E295E295, x4AD34AD3BE3CBE3C, 0xC6)
76 LUT(xD955D95595D195D1, a1, a2, x9C329C32E295E295, 0xD2)
77 LUT(x8058805811621162, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x90)
78 LUT(x7D0F7D0FC4B3C4B3, xA51EA51E50E050E0, xD955D95595D195D1, x8058805811621162, 0x76)
79 LUT(x0805080500010001, a3, xAA55AA5500550055, xD955D95595D195D1, 0x80)
80 LUT(x4A964A96962D962D, xB96CB96C69936993, x4AD34AD3BE3CBE3C, x0805080500010001, 0xA6)
81 LUT(x4, a5, x7D0F7D0FC4B3C4B3, x4A964A96962D962D, 0xA6)
82 LUT(x148014807B087B08, a1, xAA55AA5500550055, x947A947AD1E7D1E7, 0x21)
83 LUT(x94D894D86B686B68, xA55AA55AF0F5F0F5, x8058805811621162, x148014807B087B08, 0x6A)
84 LUT(x5555555540044004, a1, a6, x084C084CB77BB77B, 0x70)
85 LUT(xAFB4AFB4BF5BBF5B, x5F5F5F5FA5A5A5A5, xA51EA51E50E050E0, x5555555540044004, 0x97)
86 LUT(x1, a5, x94D894D86B686B68, xAFB4AFB4BF5BBF5B, 0x6C)
94 static void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
96 LUT(xEEEEEEEE99999999, a1, a2, a6, 0x97)
97 LUT(xFFFFEEEE66666666, a5, a6, xEEEEEEEE99999999, 0x67)
98 LUT(x5555FFFFFFFF0000, a1, a5, a6, 0x76)
99 LUT(x6666DDDD5555AAAA, a2, xFFFFEEEE66666666, x5555FFFFFFFF0000, 0x69)
100 LUT(x6969D3D35353ACAC, a3, xFFFFEEEE66666666, x6666DDDD5555AAAA, 0x6A)
101 LUT(xCFCF3030CFCF3030, a2, a3, a5, 0x65)
102 LUT(xE4E4EEEE9999F0F0, a3, xEEEEEEEE99999999, x5555FFFFFFFF0000, 0x8D)
103 LUT(xE5E5BABACDCDB0B0, a1, xCFCF3030CFCF3030, xE4E4EEEE9999F0F0, 0xCA)
104 LUT(x3, a4, x6969D3D35353ACAC, xE5E5BABACDCDB0B0, 0xC6)
105 LUT(x3333CCCC00000000, a2, a5, a6, 0x14)
106 LUT(xCCCCDDDDFFFF0F0F, a5, xE4E4EEEE9999F0F0, x3333CCCC00000000, 0xB5)
107 LUT(x00000101F0F0F0F0, a3, a6, xFFFFEEEE66666666, 0x1C)
108 LUT(x9A9A64646A6A9595, a1, xCFCF3030CFCF3030, x00000101F0F0F0F0, 0x96)
109 LUT(x2, a4, xCCCCDDDDFFFF0F0F, x9A9A64646A6A9595, 0x6A)
110 LUT(x3333BBBB3333FFFF, a1, a2, x6666DDDD5555AAAA, 0xDE)
111 LUT(x1414141441410000, a1, a3, xE4E4EEEE9999F0F0, 0x90)
112 LUT(x7F7FF3F3F5F53939, x6969D3D35353ACAC, x9A9A64646A6A9595, x3333BBBB3333FFFF, 0x79)
113 LUT(x9494E3E34B4B3939, a5, x1414141441410000, x7F7FF3F3F5F53939, 0x29)
114 LUT(x1, a4, x3333BBBB3333FFFF, x9494E3E34B4B3939, 0xA6)
115 LUT(xB1B1BBBBCCCCA5A5, a1, a1, xE4E4EEEE9999F0F0, 0x4A)
116 LUT(xFFFFECECEEEEDDDD, a2, x3333CCCC00000000, x9A9A64646A6A9595, 0xEF)
117 LUT(xB1B1A9A9DCDC8787, xE5E5BABACDCDB0B0, xB1B1BBBBCCCCA5A5, xFFFFECECEEEEDDDD, 0x8D)
118 LUT(xFFFFCCCCEEEE4444, a2, a5, xFFFFEEEE66666666, 0x2B)
119 LUT(x4, a4, xB1B1A9A9DCDC8787, xFFFFCCCCEEEE4444, 0x6C)
127 static void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
129 LUT(xA50FA50FA50FA50F, a1, a3, a4, 0xC9)
130 LUT(xF0F00F0FF0F0F0F0, a3, a5, a6, 0x4B)
131 LUT(xAF0FA0AAAF0FAF0F, a1, xA50FA50FA50FA50F, xF0F00F0FF0F0F0F0, 0x4D)
132 LUT(x5AA5A55A5AA55AA5, a1, a4, xF0F00F0FF0F0F0F0, 0x69)
133 LUT(xAA005FFFAA005FFF, a3, a5, xA50FA50FA50FA50F, 0xD6)
134 LUT(x5AA5A55A0F5AFAA5, a6, x5AA5A55A5AA55AA5, xAA005FFFAA005FFF, 0x9C)
135 LUT(x1, a2, xAF0FA0AAAF0FAF0F, x5AA5A55A0F5AFAA5, 0xA6)
136 LUT(xAA55AA5500AA00AA, a1, a4, a6, 0x49)
137 LUT(xFAFAA50FFAFAA50F, a1, a5, xA50FA50FA50FA50F, 0x9B)
138 LUT(x50AF0F5AFA50A5A5, a1, xAA55AA5500AA00AA, xFAFAA50FFAFAA50F, 0x66)
139 LUT(xAFAFAFAFFAFAFAFA, a1, a3, a6, 0x6F)
140 LUT(xAFAFFFFFFFFAFAFF, a4, x50AF0F5AFA50A5A5, xAFAFAFAFFAFAFAFA, 0xEB)
141 LUT(x4, a2, x50AF0F5AFA50A5A5, xAFAFFFFFFFFAFAFF, 0x6C)
142 LUT(x500F500F500F500F, a1, a3, a4, 0x98)
143 LUT(xF0505A0505A5050F, x5AA5A55A0F5AFAA5, xAA55AA5500AA00AA, xAFAFAFAFFAFAFAFA, 0x1D)
144 LUT(xF0505A05AA55AAFF, a6, x500F500F500F500F, xF0505A0505A5050F, 0x9A)
145 LUT(xFF005F55FF005F55, a1, a4, xAA005FFFAA005FFF, 0xB2)
146 LUT(xA55F5AF0A55F5AF0, a5, xA50FA50FA50FA50F, x5AA5A55A5AA55AA5, 0x3D)
147 LUT(x5A5F05A5A55F5AF0, a6, xFF005F55FF005F55, xA55F5AF0A55F5AF0, 0xA6)
148 LUT(x3, a2, xF0505A05AA55AAFF, x5A5F05A5A55F5AF0, 0xA6)
149 LUT(x0F0F0F0FA5A5A5A5, a1, a3, a6, 0xC6)
150 LUT(x5FFFFF5FFFA0FFA0, x5AA5A55A5AA55AA5, xAFAFAFAFFAFAFAFA, x0F0F0F0FA5A5A5A5, 0xDB)
151 LUT(xF5555AF500A05FFF, a5, xFAFAA50FFAFAA50F, xF0505A0505A5050F, 0xB9)
152 LUT(x05A5AAF55AFA55A5, xF0505A05AA55AAFF, x0F0F0F0FA5A5A5A5, xF5555AF500A05FFF, 0x9B)
153 LUT(x2, a2, x5FFFFF5FFFA0FFA0, x05A5AAF55AFA55A5, 0xA6)
161 static void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
163 LUT(x55F055F055F055F0, a1, a3, a4, 0x72)
164 LUT(xA500F5F0A500F5F0, a3, a5, x55F055F055F055F0, 0xAD)
165 LUT(xF50AF50AF50AF50A, a1, a3, a4, 0x59)
166 LUT(xF5FA0FFFF5FA0FFF, a3, a5, xF50AF50AF50AF50A, 0xE7)
167 LUT(x61C8F93C61C8F93C, a2, xA500F5F0A500F5F0, xF5FA0FFFF5FA0FFF, 0xC6)
168 LUT(x9999666699996666, a1, a2, a5, 0x69)
169 LUT(x22C022C022C022C0, a2, a4, x55F055F055F055F0, 0x18)
170 LUT(xB35C94A6B35C94A6, xF5FA0FFFF5FA0FFF, x9999666699996666, x22C022C022C022C0, 0x63)
171 LUT(x4, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x6A)
172 LUT(x4848484848484848, a1, a2, a3, 0x12)
173 LUT(x55500AAA55500AAA, a1, a5, xF5FA0FFFF5FA0FFF, 0x28)
174 LUT(x3C90B3D63C90B3D6, x61C8F93C61C8F93C, x4848484848484848, x55500AAA55500AAA, 0x1E)
175 LUT(x8484333384843333, a1, x9999666699996666, x4848484848484848, 0x14)
176 LUT(x4452F1AC4452F1AC, xF50AF50AF50AF50A, xF5FA0FFFF5FA0FFF, xB35C94A6B35C94A6, 0x78)
177 LUT(x9586CA379586CA37, x55500AAA55500AAA, x8484333384843333, x4452F1AC4452F1AC, 0xD6)
178 LUT(x2, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0x6A)
179 LUT(x1, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0xA9)
180 LUT(x3, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x56)
188 static void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
190 LUT(xA0A0A0A0FFFFFFFF, a1, a3, a6, 0xAB)
191 LUT(xFFFF00005555FFFF, a1, a5, a6, 0xB9)
192 LUT(xB3B320207777FFFF, a2, xA0A0A0A0FFFFFFFF, xFFFF00005555FFFF, 0xE8)
193 LUT(x50505A5A5A5A5050, a1, a3, xFFFF00005555FFFF, 0x34)
194 LUT(xA2A2FFFF2222FFFF, a1, a5, xB3B320207777FFFF, 0xCE)
195 LUT(x2E2E6969A4A46363, a2, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, 0x29)
196 LUT(x3, a4, xB3B320207777FFFF, x2E2E6969A4A46363, 0xA6)
197 LUT(xA5A50A0AA5A50A0A, a1, a3, a5, 0x49)
198 LUT(x969639396969C6C6, a2, a6, xA5A50A0AA5A50A0A, 0x96)
199 LUT(x1B1B1B1B1B1B1B1B, a1, a2, a3, 0xCA)
200 LUT(xBFBFBFBFF6F6F9F9, a3, xA0A0A0A0FFFFFFFF, x969639396969C6C6, 0x7E)
201 LUT(x5B5BA4A4B8B81D1D, xFFFF00005555FFFF, x1B1B1B1B1B1B1B1B, xBFBFBFBFF6F6F9F9, 0x96)
202 LUT(x2, a4, x969639396969C6C6, x5B5BA4A4B8B81D1D, 0xCA)
203 LUT(x5555BBBBFFFF5555, a1, a2, xFFFF00005555FFFF, 0xE5)
204 LUT(x6D6D9C9C95956969, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, x969639396969C6C6, 0x97)
205 LUT(x1A1A67676A6AB4B4, xA5A50A0AA5A50A0A, x5555BBBBFFFF5555, x6D6D9C9C95956969, 0x47)
206 LUT(xA0A0FFFFAAAA0000, a3, xFFFF00005555FFFF, xA5A50A0AA5A50A0A, 0x3B)
207 LUT(x36369C9CC1C1D6D6, x969639396969C6C6, x6D6D9C9C95956969, xA0A0FFFFAAAA0000, 0xD9)
208 LUT(x1, a4, x1A1A67676A6AB4B4, x36369C9CC1C1D6D6, 0xCA)
209 LUT(x5555F0F0F5F55555, a1, a3, xFFFF00005555FFFF, 0xB1)
210 LUT(x79790202DCDC0808, xA2A2FFFF2222FFFF, xA5A50A0AA5A50A0A, x969639396969C6C6, 0x47)
211 LUT(x6C6CF2F229295D5D, xBFBFBFBFF6F6F9F9, x5555F0F0F5F55555, x79790202DCDC0808, 0x6E)
212 LUT(xA3A3505010101A1A, a2, xA2A2FFFF2222FFFF, x36369C9CC1C1D6D6, 0x94)
213 LUT(x7676C7C74F4FC7C7, a1, x2E2E6969A4A46363, xA3A3505010101A1A, 0xD9)
214 LUT(x4, a4, x6C6CF2F229295D5D, x7676C7C74F4FC7C7, 0xC6)
222 static void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
224 LUT(x5050F5F55050F5F5, a1, a3, a5, 0xB2)
225 LUT(x6363C6C66363C6C6, a1, a2, x5050F5F55050F5F5, 0x66)
226 LUT(xAAAA5555AAAA5555, a1, a1, a5, 0xA9)
227 LUT(x3A3A65653A3A6565, a3, x6363C6C66363C6C6, xAAAA5555AAAA5555, 0xA9)
228 LUT(x5963A3C65963A3C6, a4, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xC6)
229 LUT(xE7E76565E7E76565, a5, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xAD)
230 LUT(x455D45DF455D45DF, a1, a4, xE7E76565E7E76565, 0xE4)
231 LUT(x4, a6, x5963A3C65963A3C6, x455D45DF455D45DF, 0x6C)
232 LUT(x1101220211012202, a2, xAAAA5555AAAA5555, x5963A3C65963A3C6, 0x20)
233 LUT(xF00F0FF0F00F0FF0, a3, a4, a5, 0x69)
234 LUT(x16E94A9716E94A97, xE7E76565E7E76565, x1101220211012202, xF00F0FF0F00F0FF0, 0x9E)
235 LUT(x2992922929929229, a1, a2, xF00F0FF0F00F0FF0, 0x49)
236 LUT(xAFAF9823AFAF9823, a5, x5050F5F55050F5F5, x2992922929929229, 0x93)
237 LUT(x3, a6, x16E94A9716E94A97, xAFAF9823AFAF9823, 0x6C)
238 LUT(x4801810248018102, a4, x5963A3C65963A3C6, x1101220211012202, 0xA4)
239 LUT(x5EE8FFFD5EE8FFFD, a5, x16E94A9716E94A97, x4801810248018102, 0x76)
240 LUT(xF0FF00FFF0FF00FF, a3, a4, a5, 0xCD)
241 LUT(x942D9A67942D9A67, x3A3A65653A3A6565, x5EE8FFFD5EE8FFFD, xF0FF00FFF0FF00FF, 0x86)
242 LUT(x1, a6, x5EE8FFFD5EE8FFFD, x942D9A67942D9A67, 0xA6)
243 LUT(x6A40D4ED6F4DD4EE, a2, x4, xAFAF9823AFAF9823, 0x2D)
244 LUT(x6CA89C7869A49C79, x1101220211012202, x16E94A9716E94A97, x6A40D4ED6F4DD4EE, 0x26)
245 LUT(xD6DE73F9D6DE73F9, a3, x6363C6C66363C6C6, x455D45DF455D45DF, 0x6B)
246 LUT(x925E63E1965A63E1, x3A3A65653A3A6565, x6CA89C7869A49C79, xD6DE73F9D6DE73F9, 0xA2)
247 LUT(x2, a6, x6CA89C7869A49C79, x925E63E1965A63E1, 0xCA)
255 static void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
257 LUT(x88AA88AA88AA88AA, a1, a2, a4, 0x0B)
258 LUT(xAAAAFF00AAAAFF00, a1, a4, a5, 0x27)
259 LUT(xADAFF8A5ADAFF8A5, a3, x88AA88AA88AA88AA, xAAAAFF00AAAAFF00, 0x9E)
260 LUT(x0A0AF5F50A0AF5F5, a1, a3, a5, 0xA6)
261 LUT(x6B69C5DC6B69C5DC, a2, xADAFF8A5ADAFF8A5, x0A0AF5F50A0AF5F5, 0x6B)
262 LUT(x1C69B2DC1C69B2DC, a4, x88AA88AA88AA88AA, x6B69C5DC6B69C5DC, 0xA9)
263 LUT(x1, a6, xADAFF8A5ADAFF8A5, x1C69B2DC1C69B2DC, 0x6A)
264 LUT(x9C9C9C9C9C9C9C9C, a1, a2, a3, 0x63)
265 LUT(xE6E63BFDE6E63BFD, a2, xAAAAFF00AAAAFF00, x0A0AF5F50A0AF5F5, 0xE7)
266 LUT(x6385639E6385639E, a4, x9C9C9C9C9C9C9C9C, xE6E63BFDE6E63BFD, 0x93)
267 LUT(x5959C4CE5959C4CE, a2, x6B69C5DC6B69C5DC, xE6E63BFDE6E63BFD, 0x5D)
268 LUT(x5B53F53B5B53F53B, a4, x0A0AF5F50A0AF5F5, x5959C4CE5959C4CE, 0x6E)
269 LUT(x3, a6, x6385639E6385639E, x5B53F53B5B53F53B, 0xC6)
270 LUT(xFAF505FAFAF505FA, a3, a4, x0A0AF5F50A0AF5F5, 0x6D)
271 LUT(x6A65956A6A65956A, a3, x9C9C9C9C9C9C9C9C, xFAF505FAFAF505FA, 0xA6)
272 LUT(x8888CCCC8888CCCC, a1, a2, a5, 0x23)
273 LUT(x94E97A9494E97A94, x1C69B2DC1C69B2DC, x6A65956A6A65956A, x8888CCCC8888CCCC, 0x72)
274 LUT(x4, a6, x6A65956A6A65956A, x94E97A9494E97A94, 0xAC)
275 LUT(xA050A050A050A050, a1, a3, a4, 0x21)
276 LUT(xC1B87A2BC1B87A2B, xAAAAFF00AAAAFF00, x5B53F53B5B53F53B, x94E97A9494E97A94, 0xA4)
277 LUT(xE96016B7E96016B7, x8888CCCC8888CCCC, xA050A050A050A050, xC1B87A2BC1B87A2B, 0x96)
278 LUT(xE3CF1FD5E3CF1FD5, x88AA88AA88AA88AA, x6A65956A6A65956A, xE96016B7E96016B7, 0x3E)
279 LUT(x6776675B6776675B, xADAFF8A5ADAFF8A5, x94E97A9494E97A94, xE3CF1FD5E3CF1FD5, 0x6B)
280 LUT(x2, a6, xE96016B7E96016B7, x6776675B6776675B, 0xC6)
288 static void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
290 LUT(xEEEE3333EEEE3333, a1, a2, a5, 0x9D)
291 LUT(xBBBBBBBBBBBBBBBB, a1, a1, a2, 0x83)
292 LUT(xDDDDAAAADDDDAAAA, a1, a2, a5, 0x5B)
293 LUT(x29295A5A29295A5A, a3, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0x85)
294 LUT(xC729695AC729695A, a4, xEEEE3333EEEE3333, x29295A5A29295A5A, 0xA6)
295 LUT(x3BF77B7B3BF77B7B, a2, a5, xC729695AC729695A, 0xF9)
296 LUT(x2900FF002900FF00, a4, a5, x29295A5A29295A5A, 0x0E)
297 LUT(x56B3803F56B3803F, xBBBBBBBBBBBBBBBB, x3BF77B7B3BF77B7B, x2900FF002900FF00, 0x61)
298 LUT(x4, a6, xC729695AC729695A, x56B3803F56B3803F, 0x6C)
299 LUT(xFBFBFBFBFBFBFBFB, a1, a2, a3, 0xDF)
300 LUT(x3012B7B73012B7B7, a2, a5, xC729695AC729695A, 0xD4)
301 LUT(x34E9B34C34E9B34C, a4, xFBFBFBFBFBFBFBFB, x3012B7B73012B7B7, 0x69)
302 LUT(xBFEAEBBEBFEAEBBE, a1, x29295A5A29295A5A, x34E9B34C34E9B34C, 0x6F)
303 LUT(xFFAEAFFEFFAEAFFE, a3, xBBBBBBBBBBBBBBBB, xBFEAEBBEBFEAEBBE, 0xB9)
304 LUT(x2, a6, x34E9B34C34E9B34C, xFFAEAFFEFFAEAFFE, 0xC6)
305 LUT(xCFDE88BBCFDE88BB, a2, xDDDDAAAADDDDAAAA, x34E9B34C34E9B34C, 0x5C)
306 LUT(x3055574530555745, a1, xC729695AC729695A, xCFDE88BBCFDE88BB, 0x71)
307 LUT(x99DDEEEE99DDEEEE, a4, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0xB9)
308 LUT(x693CD926693CD926, x3BF77B7B3BF77B7B, x34E9B34C34E9B34C, x99DDEEEE99DDEEEE, 0x69)
309 LUT(x3, a6, x3055574530555745, x693CD926693CD926, 0x6A)
310 LUT(x9955EE559955EE55, a1, a4, x99DDEEEE99DDEEEE, 0xE2)
311 LUT(x9D48FA949D48FA94, x3BF77B7B3BF77B7B, xBFEAEBBEBFEAEBBE, x9955EE559955EE55, 0x9C)
312 LUT(x1, a6, xC729695AC729695A, x9D48FA949D48FA94, 0x39)
323 * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC
324 * architectures. These use AND, OR, XOR, NOT, and AND-NOT gates.
326 * Gate counts: 49 44 46 33 48 46 46 41
329 * Several same-gate-count expressions for each S-box are included (for use on
330 * different CPUs/GPUs).
332 * These Boolean expressions corresponding to DES S-boxes have been generated
333 * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's
334 * John the Ripper password cracker: http://www.openwall.com/john/
335 * Being mathematical formulas, they are not copyrighted and are free for reuse
338 * This file (a specific representation of the S-box expressions, surrounding
339 * logic) is Copyright (c) 2011 by Solar Designer <solar at openwall.com>.
340 * Redistribution and use in source and binary forms, with or without
341 * modification, are permitted. (This is a heavily cut-down "BSD license".)
343 * The effort has been sponsored by Rapid7: http://www.rapid7.com
346 static void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
348 u32 x55005500, x5A0F5A0F, x3333FFFF, x66666666, x22226666, x2D2D6969,
350 u32 x00FFFF00, x33CCCC33, x4803120C, x2222FFFF, x6A21EDF3, x4A01CC93;
351 u32 x5555FFFF, x7F75FFFF, x00D20096, x7FA7FF69;
352 u32 x0A0A0000, x0AD80096, x00999900, x0AD99996;
353 u32 x22332233, x257AA5F0, x054885C0, xFAB77A3F, x2221EDF3, xD89697CC;
354 u32 x05B77AC0, x05F77AD6, x36C48529, x6391D07C, xBB0747B0;
355 u32 x4C460000, x4EDF9996, x2D4E49EA, xBBFFFFB0, x96B1B65A;
356 u32 x5AFF5AFF, x52B11215, x4201C010, x10B0D205;
357 u32 x00, x01, x10, x11, x20, x21, x30, x31;
359 x55005500 = a1 & ~a5;
360 x5A0F5A0F = a4 ^ x55005500;
363 x22226666 = x3333FFFF & x66666666;
364 x2D2D6969 = a4 ^ x22226666;
365 x25202160 = x2D2D6969 & ~x5A0F5A0F;
368 x33CCCC33 = a3 ^ x00FFFF00;
369 x4803120C = x5A0F5A0F & ~x33CCCC33;
370 x2222FFFF = a6 | x22226666;
371 x6A21EDF3 = x4803120C ^ x2222FFFF;
372 x4A01CC93 = x6A21EDF3 & ~x25202160;
375 x7F75FFFF = x6A21EDF3 | x5555FFFF;
376 x00D20096 = a5 & ~x2D2D6969;
377 x7FA7FF69 = x7F75FFFF ^ x00D20096;
379 x0A0A0000 = a4 & ~x5555FFFF;
380 x0AD80096 = x00D20096 ^ x0A0A0000;
381 x00999900 = x00FFFF00 & ~x66666666;
382 x0AD99996 = x0AD80096 | x00999900;
384 x22332233 = a3 & ~x55005500;
385 x257AA5F0 = x5A0F5A0F ^ x7F75FFFF;
386 x054885C0 = x257AA5F0 & ~x22332233;
387 xFAB77A3F = ~x054885C0;
388 x2221EDF3 = x3333FFFF & x6A21EDF3;
389 xD89697CC = xFAB77A3F ^ x2221EDF3;
390 x20 = x7FA7FF69 & ~a2;
391 x21 = x20 ^ xD89697CC;
394 x05B77AC0 = x00FFFF00 ^ x054885C0;
395 x05F77AD6 = x00D20096 | x05B77AC0;
396 x36C48529 = x3333FFFF ^ x05F77AD6;
397 x6391D07C = a1 ^ x36C48529;
398 xBB0747B0 = xD89697CC ^ x6391D07C;
399 x00 = x25202160 | a2;
400 x01 = x00 ^ xBB0747B0;
403 x4C460000 = x3333FFFF ^ x7F75FFFF;
404 x4EDF9996 = x0AD99996 | x4C460000;
405 x2D4E49EA = x6391D07C ^ x4EDF9996;
406 xBBFFFFB0 = x00FFFF00 | xBB0747B0;
407 x96B1B65A = x2D4E49EA ^ xBBFFFFB0;
408 x10 = x4A01CC93 | a2;
409 x11 = x10 ^ x96B1B65A;
412 x5AFF5AFF = a5 | x5A0F5A0F;
413 x52B11215 = x5AFF5AFF & ~x2D4E49EA;
414 x4201C010 = x4A01CC93 & x6391D07C;
415 x10B0D205 = x52B11215 ^ x4201C010;
416 x30 = x10B0D205 | a2;
417 x31 = x30 ^ x0AD99996;
421 static void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
424 u32 x55550000, x00AA00FF, x33BB33FF;
425 u32 x33CC0000, x11441144, x11BB11BB, x003311BB;
426 u32 x00000F0F, x336600FF, x332200FF, x332200F0;
427 u32 x0302000F, xAAAAAAAA, xA9A8AAA5, x33CCCC33, x33CCC030, x9A646A95;
428 u32 x00333303, x118822B8, xA8208805, x3CC3C33C, x94E34B39;
429 u32 x0331330C, x3FF3F33C, xA9DF596A, xA9DF5F6F, x962CAC53;
430 u32 xA9466A6A, x3DA52153, x29850143, x33C0330C, x1A45324F;
431 u32 x0A451047, xBBDFDD7B, xB19ACD3C;
432 u32 x00, x01, x10, x11, x20, x21, x30, x31;
436 x55550000 = a1 & ~a6;
437 x00AA00FF = a5 & ~x55550000;
438 x33BB33FF = a2 | x00AA00FF;
440 x33CC0000 = x33CC33CC & ~a6;
441 x11441144 = a1 & x33CC33CC;
442 x11BB11BB = a5 ^ x11441144;
443 x003311BB = x11BB11BB & ~x33CC0000;
446 x336600FF = x00AA00FF ^ x33CC0000;
447 x332200FF = x33BB33FF & x336600FF;
448 x332200F0 = x332200FF & ~x00000F0F;
450 x0302000F = a3 & x332200FF;
452 xA9A8AAA5 = x0302000F ^ xAAAAAAAA;
453 x33CCCC33 = a6 ^ x33CC33CC;
454 x33CCC030 = x33CCCC33 & ~x00000F0F;
455 x9A646A95 = xA9A8AAA5 ^ x33CCC030;
456 x10 = a4 & ~x332200F0;
457 x11 = x10 ^ x9A646A95;
460 x00333303 = a2 & ~x33CCC030;
461 x118822B8 = x11BB11BB ^ x00333303;
462 xA8208805 = xA9A8AAA5 & ~x118822B8;
463 x3CC3C33C = a3 ^ x33CCCC33;
464 x94E34B39 = xA8208805 ^ x3CC3C33C;
465 x00 = x33BB33FF & ~a4;
466 x01 = x00 ^ x94E34B39;
469 x0331330C = x0302000F ^ x00333303;
470 x3FF3F33C = x3CC3C33C | x0331330C;
471 xA9DF596A = x33BB33FF ^ x9A646A95;
472 xA9DF5F6F = x00000F0F | xA9DF596A;
473 x962CAC53 = x3FF3F33C ^ xA9DF5F6F;
475 xA9466A6A = x332200FF ^ x9A646A95;
476 x3DA52153 = x94E34B39 ^ xA9466A6A;
477 x29850143 = xA9DF5F6F & x3DA52153;
478 x33C0330C = x33CC33CC & x3FF3F33C;
479 x1A45324F = x29850143 ^ x33C0330C;
480 x20 = x1A45324F | a4;
481 x21 = x20 ^ x962CAC53;
484 x0A451047 = x1A45324F & ~x118822B8;
485 xBBDFDD7B = x33CCCC33 | xA9DF596A;
486 xB19ACD3C = x0A451047 ^ xBBDFDD7B;
487 x30 = x003311BB | a4;
488 x31 = x30 ^ xB19ACD3C;
492 static void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
494 u32 x44444444, x0F0FF0F0, x4F4FF4F4, x00FFFF00, x00AAAA00, x4FE55EF4;
495 u32 x3C3CC3C3, x3C3C0000, x7373F4F4, x0C840A00;
496 u32 x00005EF4, x00FF5EFF, x00555455, x3C699796;
497 u32 x000FF000, x55AA55AA, x26D9A15E, x2FDFAF5F, x2FD00F5F;
498 u32 x55AAFFAA, x28410014, x000000FF, x000000CC, x284100D8;
499 u32 x204100D0, x3C3CC3FF, x1C3CC32F, x4969967A;
500 u32 x4CC44CC4, x40C040C0, xC3C33C3C, x9669C396, xD6A98356;
501 u32 xD6E9C3D6, x4CEEEEC4, x9A072D12, x001A000B, x9A1F2D1B;
502 u32 x00, x01, x10, x11, x20, x21, x30, x31;
504 x44444444 = a1 & ~a2;
506 x4F4FF4F4 = x44444444 | x0F0FF0F0;
508 x00AAAA00 = x00FFFF00 & ~a1;
509 x4FE55EF4 = x4F4FF4F4 ^ x00AAAA00;
511 x3C3CC3C3 = a2 ^ x0F0FF0F0;
512 x3C3C0000 = x3C3CC3C3 & ~a6;
513 x7373F4F4 = x4F4FF4F4 ^ x3C3C0000;
514 x0C840A00 = x4FE55EF4 & ~x7373F4F4;
516 x00005EF4 = a6 & x4FE55EF4;
517 x00FF5EFF = a4 | x00005EF4;
518 x00555455 = a1 & x00FF5EFF;
519 x3C699796 = x3C3CC3C3 ^ x00555455;
520 x30 = x4FE55EF4 & ~a5;
521 x31 = x30 ^ x3C699796;
524 x000FF000 = x0F0FF0F0 & x00FFFF00;
526 x26D9A15E = x7373F4F4 ^ x55AA55AA;
527 x2FDFAF5F = a3 | x26D9A15E;
528 x2FD00F5F = x2FDFAF5F & ~x000FF000;
530 x55AAFFAA = x00AAAA00 | x55AA55AA;
531 x28410014 = x3C699796 & ~x55AAFFAA;
533 x000000CC = x000000FF & ~a2;
534 x284100D8 = x28410014 ^ x000000CC;
536 x204100D0 = x7373F4F4 & x284100D8;
537 x3C3CC3FF = x3C3CC3C3 | x000000FF;
538 x1C3CC32F = x3C3CC3FF & ~x204100D0;
539 x4969967A = a1 ^ x1C3CC32F;
540 x10 = x2FD00F5F & a5;
541 x11 = x10 ^ x4969967A;
544 x4CC44CC4 = x4FE55EF4 & ~a2;
545 x40C040C0 = x4CC44CC4 & ~a3;
546 xC3C33C3C = ~x3C3CC3C3;
547 x9669C396 = x55AAFFAA ^ xC3C33C3C;
548 xD6A98356 = x40C040C0 ^ x9669C396;
549 x00 = a5 & ~x0C840A00;
550 x01 = x00 ^ xD6A98356;
553 xD6E9C3D6 = x40C040C0 | x9669C396;
554 x4CEEEEC4 = x00AAAA00 | x4CC44CC4;
555 x9A072D12 = xD6E9C3D6 ^ x4CEEEEC4;
556 x001A000B = a4 & ~x4FE55EF4;
557 x9A1F2D1B = x9A072D12 | x001A000B;
558 x20 = a5 & ~x284100D8;
559 x21 = x20 ^ x9A1F2D1B;
563 static void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
565 u32 x5A5A5A5A, x0F0FF0F0;
566 u32 x33FF33FF, x33FFCC00, x0C0030F0, x0C0CC0C0, x0CF3C03F, x5EFBDA7F,
567 x52FBCA0F, x61C8F93C;
568 u32 x00C0C03C, x0F0F30C0, x3B92A366, x30908326, x3C90B3D6;
569 u32 x33CC33CC, x0C0CFFFF, x379E5C99, x04124C11, x56E9861E, xA91679E1;
570 u32 x9586CA37, x8402C833, x84C2C83F, xB35C94A6;
571 u32 x00, x01, x10, x11, x20, x21, x30, x31;
576 x33FFCC00 = a5 ^ x33FF33FF;
577 x0C0030F0 = x0F0FF0F0 & ~x33FFCC00;
578 x0C0CC0C0 = x0F0FF0F0 & ~a2;
579 x0CF3C03F = a4 ^ x0C0CC0C0;
580 x5EFBDA7F = x5A5A5A5A | x0CF3C03F;
581 x52FBCA0F = x5EFBDA7F & ~x0C0030F0;
582 x61C8F93C = a2 ^ x52FBCA0F;
584 x00C0C03C = x0CF3C03F & x61C8F93C;
585 x0F0F30C0 = x0F0FF0F0 & ~x00C0C03C;
586 x3B92A366 = x5A5A5A5A ^ x61C8F93C;
587 x30908326 = x3B92A366 & ~x0F0F30C0;
588 x3C90B3D6 = x0C0030F0 ^ x30908326;
591 x0C0CFFFF = a5 | x0C0CC0C0;
592 x379E5C99 = x3B92A366 ^ x0C0CFFFF;
593 x04124C11 = x379E5C99 & ~x33CC33CC;
594 x56E9861E = x52FBCA0F ^ x04124C11;
595 x00 = a6 & ~x3C90B3D6;
596 x01 = x00 ^ x56E9861E;
599 xA91679E1 = ~x56E9861E;
600 x10 = x3C90B3D6 & ~a6;
601 x11 = x10 ^ xA91679E1;
604 x9586CA37 = x3C90B3D6 ^ xA91679E1;
605 x8402C833 = x9586CA37 & ~x33CC33CC;
606 x84C2C83F = x00C0C03C | x8402C833;
607 xB35C94A6 = x379E5C99 ^ x84C2C83F;
608 x20 = x61C8F93C | a6;
609 x21 = x20 ^ xB35C94A6;
612 x30 = a6 & x61C8F93C;
613 x31 = x30 ^ xB35C94A6;
617 static void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
619 u32 x77777777, x77770000, x22225555, x11116666, x1F1F6F6F;
620 u32 x70700000, x43433333, x00430033, x55557777, x55167744, x5A19784B;
621 u32 x5A1987B4, x7A3BD7F5, x003B00F5, x221955A0, x05050707, x271C52A7;
622 u32 x2A2A82A0, x6969B193, x1FE06F90, x16804E00, xE97FB1FF;
623 u32 x43403302, x35CAED30, x37DEFFB7, x349ECCB5, x0B01234A;
624 u32 x101884B4, x0FF8EB24, x41413333, x4FF9FB37, x4FC2FBC2;
625 u32 x22222222, x16BCEE97, x0F080B04, x19B4E593;
626 u32 x5C5C5C5C, x4448184C, x2DDABE71, x6992A63D;
627 u32 x00, x01, x10, x11, x20, x21, x30, x31;
630 x77770000 = x77777777 & ~a6;
631 x22225555 = a1 ^ x77770000;
632 x11116666 = a3 ^ x22225555;
633 x1F1F6F6F = a4 | x11116666;
635 x70700000 = x77770000 & ~a4;
636 x43433333 = a3 ^ x70700000;
637 x00430033 = a5 & x43433333;
638 x55557777 = a1 | x11116666;
639 x55167744 = x00430033 ^ x55557777;
640 x5A19784B = a4 ^ x55167744;
642 x5A1987B4 = a6 ^ x5A19784B;
643 x7A3BD7F5 = x22225555 | x5A1987B4;
644 x003B00F5 = a5 & x7A3BD7F5;
645 x221955A0 = x22225555 ^ x003B00F5;
646 x05050707 = a4 & x55557777;
647 x271C52A7 = x221955A0 ^ x05050707;
649 x2A2A82A0 = x7A3BD7F5 & ~a1;
650 x6969B193 = x43433333 ^ x2A2A82A0;
651 x1FE06F90 = a5 ^ x1F1F6F6F;
652 x16804E00 = x1FE06F90 & ~x6969B193;
653 xE97FB1FF = ~x16804E00;
654 x20 = xE97FB1FF & ~a2;
655 x21 = x20 ^ x5A19784B;
658 x43403302 = x43433333 & ~x003B00F5;
659 x35CAED30 = x2A2A82A0 ^ x1FE06F90;
660 x37DEFFB7 = x271C52A7 | x35CAED30;
661 x349ECCB5 = x37DEFFB7 & ~x43403302;
662 x0B01234A = x1F1F6F6F & ~x349ECCB5;
664 x101884B4 = x5A1987B4 & x349ECCB5;
665 x0FF8EB24 = x1FE06F90 ^ x101884B4;
666 x41413333 = x43433333 & x55557777;
667 x4FF9FB37 = x0FF8EB24 | x41413333;
668 x4FC2FBC2 = x003B00F5 ^ x4FF9FB37;
669 x30 = x4FC2FBC2 & a2;
670 x31 = x30 ^ x271C52A7;
673 x22222222 = a1 ^ x77777777;
674 x16BCEE97 = x349ECCB5 ^ x22222222;
675 x0F080B04 = a4 & x0FF8EB24;
676 x19B4E593 = x16BCEE97 ^ x0F080B04;
677 x00 = x0B01234A | a2;
678 x01 = x00 ^ x19B4E593;
681 x5C5C5C5C = x1F1F6F6F ^ x43433333;
682 x4448184C = x5C5C5C5C & ~x19B4E593;
683 x2DDABE71 = x22225555 ^ x0FF8EB24;
684 x6992A63D = x4448184C ^ x2DDABE71;
685 x10 = x1F1F6F6F & a2;
686 x11 = x10 ^ x6992A63D;
690 static void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
693 u32 x3333FFFF, x11115555, x22DD6699, x22DD9966, x00220099;
694 u32 x00551144, x33662277, x5A5A5A5A, x7B7E7A7F, x59A31CE6;
695 u32 x09030C06, x09030000, x336622FF, x3A6522FF;
696 u32 x484D494C, x0000B6B3, x0F0FB9BC, x00FC00F9, x0FFFB9FD;
697 u32 x5DF75DF7, x116600F7, x1E69B94B, x1668B94B;
698 u32 x7B7B7B7B, x411E5984, x1FFFFDFD, x5EE1A479;
699 u32 x3CB4DFD2, x004B002D, xB7B2B6B3, xCCC9CDC8, xCC82CDE5;
700 u32 x0055EEBB, x5A5AECE9, x0050ECA9, xC5CAC1CE, xC59A2D67;
701 u32 x00, x01, x10, x11, x20, x21, x30, x31;
706 x11115555 = a1 & x3333FFFF;
707 x22DD6699 = x33CC33CC ^ x11115555;
708 x22DD9966 = a6 ^ x22DD6699;
709 x00220099 = a5 & ~x22DD9966;
711 x00551144 = a1 & x22DD9966;
712 x33662277 = a2 ^ x00551144;
714 x7B7E7A7F = x33662277 | x5A5A5A5A;
715 x59A31CE6 = x22DD6699 ^ x7B7E7A7F;
717 x09030C06 = a3 & x59A31CE6;
718 x09030000 = x09030C06 & ~a6;
719 x336622FF = x00220099 | x33662277;
720 x3A6522FF = x09030000 ^ x336622FF;
721 x30 = x3A6522FF & a4;
722 x31 = x30 ^ x59A31CE6;
725 x484D494C = a2 ^ x7B7E7A7F;
726 x0000B6B3 = a6 & ~x484D494C;
727 x0F0FB9BC = a3 ^ x0000B6B3;
728 x00FC00F9 = a5 & ~x09030C06;
729 x0FFFB9FD = x0F0FB9BC | x00FC00F9;
731 x5DF75DF7 = a1 | x59A31CE6;
732 x116600F7 = x336622FF & x5DF75DF7;
733 x1E69B94B = x0F0FB9BC ^ x116600F7;
734 x1668B94B = x1E69B94B & ~x09030000;
735 x20 = x00220099 | a4;
736 x21 = x20 ^ x1668B94B;
739 x7B7B7B7B = a2 | x5A5A5A5A;
740 x411E5984 = x3A6522FF ^ x7B7B7B7B;
741 x1FFFFDFD = x11115555 | x0FFFB9FD;
742 x5EE1A479 = x411E5984 ^ x1FFFFDFD;
744 x3CB4DFD2 = x22DD6699 ^ x1E69B94B;
745 x004B002D = a5 & ~x3CB4DFD2;
746 xB7B2B6B3 = ~x484D494C;
747 xCCC9CDC8 = x7B7B7B7B ^ xB7B2B6B3;
748 xCC82CDE5 = x004B002D ^ xCCC9CDC8;
749 x10 = xCC82CDE5 & ~a4;
750 x11 = x10 ^ x5EE1A479;
753 x0055EEBB = a6 ^ x00551144;
754 x5A5AECE9 = a1 ^ x0F0FB9BC;
755 x0050ECA9 = x0055EEBB & x5A5AECE9;
756 xC5CAC1CE = x09030C06 ^ xCCC9CDC8;
757 xC59A2D67 = x0050ECA9 ^ xC5CAC1CE;
758 x00 = x0FFFB9FD & ~a4;
759 x01 = x00 ^ xC59A2D67;
763 static void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
765 u32 x0FF00FF0, x3CC33CC3, x00003CC3, x0F000F00, x5A555A55, x00001841;
766 u32 x00000F00, x33333C33, x7B777E77, x0FF0F00F, x74878E78;
767 u32 x003C003C, x5A7D5A7D, x333300F0, x694E5A8D;
768 u32 x0FF0CCCC, x000F0303, x5A505854, x33CC000F, x699C585B;
769 u32 x7F878F78, x21101013, x7F979F7B, x30030CC0, x4F9493BB;
770 u32 x6F9CDBFB, x0000DBFB, x00005151, x26DAC936, x26DA9867;
771 u32 x27DA9877, x27DA438C, x2625C9C9, x27FFCBCD;
772 u32 x27FF1036, x27FF103E, xB06B6C44, x97947C7A;
773 u32 x00, x01, x10, x11, x20, x21, x30, x31;
776 x3CC33CC3 = a3 ^ x0FF00FF0;
777 x00003CC3 = a6 & x3CC33CC3;
778 x0F000F00 = a4 & x0FF00FF0;
779 x5A555A55 = a2 ^ x0F000F00;
780 x00001841 = x00003CC3 & x5A555A55;
782 x00000F00 = a6 & x0F000F00;
783 x33333C33 = a3 ^ x00000F00;
784 x7B777E77 = x5A555A55 | x33333C33;
785 x0FF0F00F = a6 ^ x0FF00FF0;
786 x74878E78 = x7B777E77 ^ x0FF0F00F;
787 x30 = a1 & ~x00001841;
788 x31 = x30 ^ x74878E78;
791 x003C003C = a5 & ~x3CC33CC3;
792 x5A7D5A7D = x5A555A55 | x003C003C;
793 x333300F0 = x00003CC3 ^ x33333C33;
794 x694E5A8D = x5A7D5A7D ^ x333300F0;
796 x0FF0CCCC = x00003CC3 ^ x0FF0F00F;
797 x000F0303 = a4 & ~x0FF0CCCC;
798 x5A505854 = x5A555A55 & ~x000F0303;
799 x33CC000F = a5 ^ x333300F0;
800 x699C585B = x5A505854 ^ x33CC000F;
802 x7F878F78 = x0F000F00 | x74878E78;
803 x21101013 = a3 & x699C585B;
804 x7F979F7B = x7F878F78 | x21101013;
805 x30030CC0 = x3CC33CC3 & ~x0FF0F00F;
806 x4F9493BB = x7F979F7B ^ x30030CC0;
807 x00 = x4F9493BB & ~a1;
808 x01 = x00 ^ x694E5A8D;
811 x6F9CDBFB = x699C585B | x4F9493BB;
812 x0000DBFB = a6 & x6F9CDBFB;
813 x00005151 = a2 & x0000DBFB;
814 x26DAC936 = x694E5A8D ^ x4F9493BB;
815 x26DA9867 = x00005151 ^ x26DAC936;
817 x27DA9877 = x21101013 | x26DA9867;
818 x27DA438C = x0000DBFB ^ x27DA9877;
819 x2625C9C9 = a5 ^ x26DAC936;
820 x27FFCBCD = x27DA438C | x2625C9C9;
821 x20 = x27FFCBCD & a1;
822 x21 = x20 ^ x699C585B;
825 x27FF1036 = x0000DBFB ^ x27FFCBCD;
826 x27FF103E = x003C003C | x27FF1036;
827 xB06B6C44 = ~x4F9493BB;
828 x97947C7A = x27FF103E ^ xB06B6C44;
829 x10 = x97947C7A & ~a1;
830 x11 = x10 ^ x26DA9867;
834 static void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
836 u32 x0C0C0C0C, x0000F0F0, x00FFF00F, x00555005, x00515001;
837 u32 x33000330, x77555775, x30303030, x3030CFCF, x30104745, x30555745;
838 u32 xFF000FF0, xCF1048B5, x080A080A, xC71A40BF, xCB164CB3;
839 u32 x9E4319E6, x000019E6, xF429738C, xF4296A6A, xC729695A;
840 u32 xC47C3D2F, xF77F3F3F, x9E43E619, x693CD926;
841 u32 xF719A695, xF4FF73FF, x03E6D56A, x56B3803F;
842 u32 xF700A600, x61008000, x03B7856B, x62B7056B;
843 u32 x00, x01, x10, x11, x20, x21, x30, x31;
845 x0C0C0C0C = a3 & ~a2;
846 x0000F0F0 = a5 & ~a3;
847 x00FFF00F = a4 ^ x0000F0F0;
848 x00555005 = a1 & x00FFF00F;
849 x00515001 = x00555005 & ~x0C0C0C0C;
851 x33000330 = a2 & ~x00FFF00F;
852 x77555775 = a1 | x33000330;
853 x30303030 = a2 & ~a3;
854 x3030CFCF = a5 ^ x30303030;
855 x30104745 = x77555775 & x3030CFCF;
856 x30555745 = x00555005 | x30104745;
858 xFF000FF0 = ~x00FFF00F;
859 xCF1048B5 = x30104745 ^ xFF000FF0;
860 x080A080A = a3 & ~x77555775;
861 xC71A40BF = xCF1048B5 ^ x080A080A;
862 xCB164CB3 = x0C0C0C0C ^ xC71A40BF;
863 x10 = x00515001 | a6;
864 x11 = x10 ^ xCB164CB3;
867 x9E4319E6 = a1 ^ xCB164CB3;
868 x000019E6 = a5 & x9E4319E6;
869 xF429738C = a2 ^ xC71A40BF;
870 xF4296A6A = x000019E6 ^ xF429738C;
871 xC729695A = x33000330 ^ xF4296A6A;
873 xC47C3D2F = x30555745 ^ xF4296A6A;
874 xF77F3F3F = a2 | xC47C3D2F;
875 x9E43E619 = a5 ^ x9E4319E6;
876 x693CD926 = xF77F3F3F ^ x9E43E619;
877 x20 = x30555745 & a6;
878 x21 = x20 ^ x693CD926;
881 xF719A695 = x3030CFCF ^ xC729695A;
882 xF4FF73FF = a4 | xF429738C;
883 x03E6D56A = xF719A695 ^ xF4FF73FF;
884 x56B3803F = a1 ^ x03E6D56A;
885 x30 = x56B3803F & a6;
886 x31 = x30 ^ xC729695A;
889 xF700A600 = xF719A695 & ~a4;
890 x61008000 = x693CD926 & xF700A600;
891 x03B7856B = x00515001 ^ x03E6D56A;
892 x62B7056B = x61008000 ^ x03B7856B;
893 x00 = x62B7056B | a6;
894 x01 = x00 ^ xC729695A;
904 * Bitslice DES S-boxes making use of a vector conditional select operation
905 * (e.g., vsel on PowerPC with AltiVec).
907 * Gate counts: 36 33 33 26 35 34 34 32
910 * Several same-gate-count expressions for each S-box are included (for use on
911 * different CPUs/GPUs).
913 * These Boolean expressions corresponding to DES S-boxes have been generated
914 * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's
915 * John the Ripper password cracker: http://www.openwall.com/john/
916 * Being mathematical formulas, they are not copyrighted and are free for reuse
919 * This file (a specific representation of the S-box expressions, surrounding
920 * logic) is Copyright (c) 2011 by Solar Designer <solar at openwall.com>.
921 * Redistribution and use in source and binary forms, with or without
922 * modification, are permitted. (This is a heavily cut-down "BSD license".)
924 * The effort has been sponsored by Rapid7: http://www.rapid7.com
927 #define vnot(dst, a) (dst) = ~(a)
928 #define vand(dst, a, b) (dst) = (a) & (b)
929 #define vor(dst, a, b) (dst) = (a) | (b)
930 #define vandn(dst, a, b) (dst) = (a) & ~(b)
931 #define vxor(dst, a, b) (dst) = (a) ^ (b)
932 #define vsel(dst, a, b, c) (dst) = bitselect((a),(b),(c))
935 s1(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
936 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
938 u32 x0F0F3333, x3C3C3C3C, x55FF55FF, x69C369C3, x0903B73F, x09FCB7C0,
940 u32 x55AFD1B7, x3C3C69C3, x6993B874;
941 u32 x5CEDE59F, x09FCE295, x5D91A51E, x529E962D;
942 u32 x29EEADC0, x4B8771A3, x428679F3, x6B68D433;
943 u32 x5BA7E193, x026F12F3, x6B27C493, x94D83B6C;
944 u32 x965E0B0F, x3327A113, x847F0A1F, xD6E19C32;
945 u32 x0DBCE883, x3A25A215, x37994A96;
946 u32 x8A487EA7, x8B480F07, xB96C2D16;
949 vsel(x0F0F3333, a3, a2, a5);
950 vxor(x3C3C3C3C, a2, a3);
951 vor(x55FF55FF, a1, a4);
952 vxor(x69C369C3, x3C3C3C3C, x55FF55FF);
953 vsel(x0903B73F, a5, x0F0F3333, x69C369C3);
954 vxor(x09FCB7C0, a4, x0903B73F);
955 vxor(x5CA9E295, a1, x09FCB7C0);
957 vsel(x55AFD1B7, x5CA9E295, x55FF55FF, x0F0F3333);
958 vsel(x3C3C69C3, x3C3C3C3C, x69C369C3, a5);
959 vxor(x6993B874, x55AFD1B7, x3C3C69C3);
961 vsel(x5CEDE59F, x55FF55FF, x5CA9E295, x6993B874);
962 vsel(x09FCE295, x09FCB7C0, x5CA9E295, a5);
963 vsel(x5D91A51E, x5CEDE59F, x6993B874, x09FCE295);
964 vxor(x529E962D, x0F0F3333, x5D91A51E);
966 vsel(x29EEADC0, x69C369C3, x09FCB7C0, x5CEDE59F);
967 vsel(x4B8771A3, x0F0F3333, x69C369C3, x5CA9E295);
968 vsel(x428679F3, a5, x4B8771A3, x529E962D);
969 vxor(x6B68D433, x29EEADC0, x428679F3);
971 vsel(x5BA7E193, x5CA9E295, x4B8771A3, a3);
972 vsel(x026F12F3, a4, x0F0F3333, x529E962D);
973 vsel(x6B27C493, x6B68D433, x5BA7E193, x026F12F3);
974 vnot(x94D83B6C, x6B27C493);
975 vsel(x0, x94D83B6C, x6B68D433, a6);
976 vxor(*out1, *out1, x0);
978 vsel(x965E0B0F, x94D83B6C, a3, x428679F3);
979 vsel(x3327A113, x5BA7E193, a2, x69C369C3);
980 vsel(x847F0A1F, x965E0B0F, a4, x3327A113);
981 vxor(xD6E19C32, x529E962D, x847F0A1F);
982 vsel(x1, xD6E19C32, x5CA9E295, a6);
983 vxor(*out2, *out2, x1);
985 vsel(x0DBCE883, x09FCE295, x3C3C69C3, x847F0A1F);
986 vsel(x3A25A215, x3327A113, x5CA9E295, x0903B73F);
987 vxor(x37994A96, x0DBCE883, x3A25A215);
988 vsel(x3, x37994A96, x529E962D, a6);
989 vxor(*out4, *out4, x3);
991 vxor(x8A487EA7, x5CA9E295, xD6E19C32);
992 vsel(x8B480F07, a3, x8A487EA7, x847F0A1F);
993 vsel(xB96C2D16, x8B480F07, x3C3C3C3C, x3A25A215);
994 vsel(x2, xB96C2D16, x6993B874, a6);
995 vxor(*out3, *out3, x2);
999 s2(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
1000 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
1002 u32 x55553333, x0055FF33, x33270F03, x66725A56, x00FFFF00, x668DA556;
1003 u32 x0F0F5A56, xF0F0A5A9, xA5A5969A, xA55A699A;
1004 u32 x0F5AF03C, x6600FF56, x87A5F09C;
1005 u32 xA55A963C, x3C69C30F, xB44BC32D;
1006 u32 x66D7CC56, x0F4B0F2D, x699CC37B, x996C66D2;
1007 u32 xB46C662D, x278DB412, xB66CB43B;
1008 u32 xD2DC4E52, x27993333, xD2994E33;
1009 u32 x278D0F2D, x2E0E547B, x09976748;
1012 vsel(x55553333, a1, a3, a6);
1013 vsel(x0055FF33, a6, x55553333, a5);
1014 vsel(x33270F03, a3, a4, x0055FF33);
1015 vxor(x66725A56, a1, x33270F03);
1016 vxor(x00FFFF00, a5, a6);
1017 vxor(x668DA556, x66725A56, x00FFFF00);
1019 vsel(x0F0F5A56, a4, x66725A56, a6);
1020 vnot(xF0F0A5A9, x0F0F5A56);
1021 vxor(xA5A5969A, x55553333, xF0F0A5A9);
1022 vxor(xA55A699A, x00FFFF00, xA5A5969A);
1023 vsel(x1, xA55A699A, x668DA556, a2);
1024 vxor(*out2, *out2, x1);
1026 vxor(x0F5AF03C, a4, x0055FF33);
1027 vsel(x6600FF56, x66725A56, a6, x00FFFF00);
1028 vsel(x87A5F09C, xA5A5969A, x0F5AF03C, x6600FF56);
1030 vsel(xA55A963C, xA5A5969A, x0F5AF03C, a5);
1031 vxor(x3C69C30F, a3, x0F5AF03C);
1032 vsel(xB44BC32D, xA55A963C, x3C69C30F, a1);
1034 vsel(x66D7CC56, x66725A56, x668DA556, xA5A5969A);
1035 vsel(x0F4B0F2D, a4, xB44BC32D, a5);
1036 vxor(x699CC37B, x66D7CC56, x0F4B0F2D);
1037 vxor(x996C66D2, xF0F0A5A9, x699CC37B);
1038 vsel(x0, x996C66D2, xB44BC32D, a2);
1039 vxor(*out1, *out1, x0);
1041 vsel(xB46C662D, xB44BC32D, x996C66D2, x00FFFF00);
1042 vsel(x278DB412, x668DA556, xA5A5969A, a1);
1043 vsel(xB66CB43B, xB46C662D, x278DB412, x6600FF56);
1045 vsel(xD2DC4E52, x66D7CC56, x996C66D2, xB44BC32D);
1046 vsel(x27993333, x278DB412, a3, x0055FF33);
1047 vsel(xD2994E33, xD2DC4E52, x27993333, a5);
1048 vsel(x3, x87A5F09C, xD2994E33, a2);
1049 vxor(*out4, *out4, x3);
1051 vsel(x278D0F2D, x278DB412, x0F4B0F2D, a6);
1052 vsel(x2E0E547B, x0F0F5A56, xB66CB43B, x278D0F2D);
1053 vxor(x09976748, x27993333, x2E0E547B);
1054 vsel(x2, xB66CB43B, x09976748, a2);
1055 vxor(*out3, *out3, x2);
1059 s3(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
1060 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
1062 u32 x0F330F33, x0F33F0CC, x5A66A599;
1063 u32 x2111B7BB, x03FF3033, x05BB50EE, x074F201F, x265E97A4;
1064 u32 x556BA09E, x665A93AC, x99A56C53;
1065 u32 x25A1A797, x5713754C, x66559355, x47B135C6;
1066 u32 x9A5A5C60, xD07AF8F8, x87698DB4, xE13C1EE1;
1067 u32 x9E48CDE4, x655B905E, x00A55CFF, x9E49915E;
1068 u32 xD6599874, x05330022, xD2699876;
1069 u32 x665F9364, xD573F0F2, xB32C6396;
1072 vsel(x0F330F33, a4, a3, a5);
1073 vxor(x0F33F0CC, a6, x0F330F33);
1074 vxor(x5A66A599, a2, x0F33F0CC);
1076 vsel(x2111B7BB, a3, a6, x5A66A599);
1077 vsel(x03FF3033, a5, a3, x0F33F0CC);
1078 vsel(x05BB50EE, a5, x0F33F0CC, a2);
1079 vsel(x074F201F, x03FF3033, a4, x05BB50EE);
1080 vxor(x265E97A4, x2111B7BB, x074F201F);
1082 vsel(x556BA09E, x5A66A599, x05BB50EE, a4);
1083 vsel(x665A93AC, x556BA09E, x265E97A4, a3);
1084 vnot(x99A56C53, x665A93AC);
1085 vsel(x1, x265E97A4, x99A56C53, a1);
1086 vxor(*out2, *out2, x1);
1088 vxor(x25A1A797, x03FF3033, x265E97A4);
1089 vsel(x5713754C, a2, x0F33F0CC, x074F201F);
1090 vsel(x66559355, x665A93AC, a2, a5);
1091 vsel(x47B135C6, x25A1A797, x5713754C, x66559355);
1093 vxor(x9A5A5C60, x03FF3033, x99A56C53);
1094 vsel(xD07AF8F8, x9A5A5C60, x556BA09E, x5A66A599);
1095 vxor(x87698DB4, x5713754C, xD07AF8F8);
1096 vxor(xE13C1EE1, x66559355, x87698DB4);
1098 vsel(x9E48CDE4, x9A5A5C60, x87698DB4, x265E97A4);
1099 vsel(x655B905E, x66559355, x05BB50EE, a4);
1100 vsel(x00A55CFF, a5, a6, x9A5A5C60);
1101 vsel(x9E49915E, x9E48CDE4, x655B905E, x00A55CFF);
1102 vsel(x0, x9E49915E, xE13C1EE1, a1);
1103 vxor(*out1, *out1, x0);
1105 vsel(xD6599874, xD07AF8F8, x66559355, x0F33F0CC);
1106 vand(x05330022, x0F330F33, x05BB50EE);
1107 vsel(xD2699876, xD6599874, x00A55CFF, x05330022);
1108 vsel(x3, x5A66A599, xD2699876, a1);
1109 vxor(*out4, *out4, x3);
1111 vsel(x665F9364, x265E97A4, x66559355, x47B135C6);
1112 vsel(xD573F0F2, xD07AF8F8, x05330022, a4);
1113 vxor(xB32C6396, x665F9364, xD573F0F2);
1114 vsel(x2, xB32C6396, x47B135C6, a1);
1115 vxor(*out3, *out3, x2);
1119 s4(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
1120 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
1122 u32 x0505AFAF, x0555AF55, x0A5AA05A, x46566456, x0A0A5F5F, x0AF55FA0,
1123 x0AF50F0F, x4CA36B59;
1125 u32 x01BB23BB, x5050FAFA, xA31C26BE, xA91679E1;
1127 u32 x50E9FA1E, x0AF55F00, x827D9784, xD2946D9A;
1128 u32 x31F720B3, x11FB21B3, x4712A7AD, x9586CA37;
1131 vsel(x0505AFAF, a5, a3, a1);
1132 vsel(x0555AF55, x0505AFAF, a1, a4);
1133 vxor(x0A5AA05A, a3, x0555AF55);
1134 vsel(x46566456, a1, x0A5AA05A, a2);
1135 vsel(x0A0A5F5F, a3, a5, a1);
1136 vxor(x0AF55FA0, a4, x0A0A5F5F);
1137 vsel(x0AF50F0F, x0AF55FA0, a3, a5);
1138 vxor(x4CA36B59, x46566456, x0AF50F0F);
1140 vnot(xB35C94A6, x4CA36B59);
1142 vsel(x01BB23BB, a4, a2, x0555AF55);
1143 vxor(x5050FAFA, a1, x0505AFAF);
1144 vsel(xA31C26BE, xB35C94A6, x01BB23BB, x5050FAFA);
1145 vxor(xA91679E1, x0A0A5F5F, xA31C26BE);
1147 vnot(x56E9861E, xA91679E1);
1149 vsel(x50E9FA1E, x5050FAFA, x56E9861E, a4);
1150 vsel(x0AF55F00, x0AF50F0F, x0AF55FA0, x0A0A5F5F);
1151 vsel(x827D9784, xB35C94A6, x0AF55F00, a2);
1152 vxor(xD2946D9A, x50E9FA1E, x827D9784);
1153 vsel(x2, xD2946D9A, x4CA36B59, a6);
1154 vxor(*out3, *out3, x2);
1155 vsel(x3, xB35C94A6, xD2946D9A, a6);
1156 vxor(*out4, *out4, x3);
1158 vsel(x31F720B3, a2, a4, x0AF55FA0);
1159 vsel(x11FB21B3, x01BB23BB, x31F720B3, x5050FAFA);
1160 vxor(x4712A7AD, x56E9861E, x11FB21B3);
1161 vxor(x9586CA37, xD2946D9A, x4712A7AD);
1162 vsel(x0, x56E9861E, x9586CA37, a6);
1163 vxor(*out1, *out1, x0);
1164 vsel(x1, x9586CA37, xA91679E1, a6);
1165 vxor(*out2, *out2, x1);
1169 s5(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
1170 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
1172 u32 x550F550F, xAAF0AAF0, xA5F5A5F5, x96C696C6, x00FFFF00, x963969C6;
1173 u32 x2E3C2E3C, xB73121F7, x1501DF0F, x00558A5F, x2E69A463;
1174 u32 x0679ED42, x045157FD, xB32077FF, x9D49D39C;
1175 u32 xAC81CFB2, xF72577AF, x5BA4B81D;
1176 u32 x5BA477AF, x4895469F, x3A35273A, x1A35669A;
1177 u32 x12E6283D, x9E47D3D4, x1A676AB4;
1178 u32 x891556DF, xE5E77F82, x6CF2295D;
1179 u32 x2E3CA5F5, x9697C1C6, x369CC1D6;
1182 vsel(x550F550F, a1, a3, a5);
1183 vnot(xAAF0AAF0, x550F550F);
1184 vsel(xA5F5A5F5, xAAF0AAF0, a1, a3);
1185 vxor(x96C696C6, a2, xA5F5A5F5);
1186 vxor(x00FFFF00, a5, a6);
1187 vxor(x963969C6, x96C696C6, x00FFFF00);
1189 vsel(x2E3C2E3C, a3, xAAF0AAF0, a2);
1190 vsel(xB73121F7, a2, x963969C6, x96C696C6);
1191 vsel(x1501DF0F, a6, x550F550F, xB73121F7);
1192 vsel(x00558A5F, x1501DF0F, a5, a1);
1193 vxor(x2E69A463, x2E3C2E3C, x00558A5F);
1195 vsel(x0679ED42, x00FFFF00, x2E69A463, x96C696C6);
1196 vsel(x045157FD, a6, a1, x0679ED42);
1197 vsel(xB32077FF, xB73121F7, a6, x045157FD);
1198 vxor(x9D49D39C, x2E69A463, xB32077FF);
1199 vsel(x2, x9D49D39C, x2E69A463, a4);
1200 vxor(*out3, *out3, x2);
1202 vsel(xAC81CFB2, xAAF0AAF0, x1501DF0F, x0679ED42);
1203 vsel(xF72577AF, xB32077FF, x550F550F, a1);
1204 vxor(x5BA4B81D, xAC81CFB2, xF72577AF);
1205 vsel(x1, x5BA4B81D, x963969C6, a4);
1206 vxor(*out2, *out2, x1);
1208 vsel(x5BA477AF, x5BA4B81D, xF72577AF, a6);
1209 vsel(x4895469F, x5BA477AF, x00558A5F, a2);
1210 vsel(x3A35273A, x2E3C2E3C, a2, x963969C6);
1211 vsel(x1A35669A, x4895469F, x3A35273A, x5BA4B81D);
1213 vsel(x12E6283D, a5, x5BA4B81D, x963969C6);
1214 vsel(x9E47D3D4, x96C696C6, x9D49D39C, xAC81CFB2);
1215 vsel(x1A676AB4, x12E6283D, x9E47D3D4, x4895469F);
1217 vsel(x891556DF, xB32077FF, x4895469F, x3A35273A);
1218 vsel(xE5E77F82, xF72577AF, x00FFFF00, x12E6283D);
1219 vxor(x6CF2295D, x891556DF, xE5E77F82);
1220 vsel(x3, x1A35669A, x6CF2295D, a4);
1221 vxor(*out4, *out4, x3);
1223 vsel(x2E3CA5F5, x2E3C2E3C, xA5F5A5F5, a6);
1224 vsel(x9697C1C6, x96C696C6, x963969C6, x045157FD);
1225 vsel(x369CC1D6, x2E3CA5F5, x9697C1C6, x5BA477AF);
1226 vsel(x0, x369CC1D6, x1A676AB4, a4);
1227 vxor(*out1, *out1, x0);
1231 s6(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
1232 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
1234 u32 x555500FF, x666633CC, x606F30CF, x353A659A, x353A9A65, xCAC5659A;
1235 u32 x353A6565, x0A3F0A6F, x6C5939A3, x5963A3C6;
1236 u32 x35FF659A, x3AF06A95, x05CF0A9F, x16E94A97;
1237 u32 x86CD4C9B, x12E0FFFD, x942D9A67;
1238 u32 x142956AB, x455D45DF, x1C3EE619;
1239 u32 x2AEA70D5, x20CF7A9F, x3CF19C86, x69A49C79;
1240 u32 x840DBB67, x6DA19C1E, x925E63E1;
1241 u32 x9C3CA761, x257A75D5, xB946D2B4;
1244 vsel(x555500FF, a1, a4, a5);
1245 vxor(x666633CC, a2, x555500FF);
1246 vsel(x606F30CF, x666633CC, a4, a3);
1247 vxor(x353A659A, a1, x606F30CF);
1248 vxor(x353A9A65, a5, x353A659A);
1249 vnot(xCAC5659A, x353A9A65);
1251 vsel(x353A6565, x353A659A, x353A9A65, a4);
1252 vsel(x0A3F0A6F, a3, a4, x353A6565);
1253 vxor(x6C5939A3, x666633CC, x0A3F0A6F);
1254 vxor(x5963A3C6, x353A9A65, x6C5939A3);
1256 vsel(x35FF659A, a4, x353A659A, x353A6565);
1257 vxor(x3AF06A95, a3, x35FF659A);
1258 vsel(x05CF0A9F, a4, a3, x353A9A65);
1259 vsel(x16E94A97, x3AF06A95, x05CF0A9F, x6C5939A3);
1261 vsel(x86CD4C9B, xCAC5659A, x05CF0A9F, x6C5939A3);
1262 vsel(x12E0FFFD, a5, x3AF06A95, x16E94A97);
1263 vsel(x942D9A67, x86CD4C9B, x353A9A65, x12E0FFFD);
1264 vsel(x0, xCAC5659A, x942D9A67, a6);
1265 vxor(*out1, *out1, x0);
1267 vsel(x142956AB, x353A659A, x942D9A67, a2);
1268 vsel(x455D45DF, a1, x86CD4C9B, x142956AB);
1269 vxor(x1C3EE619, x5963A3C6, x455D45DF);
1270 vsel(x3, x5963A3C6, x1C3EE619, a6);
1271 vxor(*out4, *out4, x3);
1273 vsel(x2AEA70D5, x3AF06A95, x606F30CF, x353A9A65);
1274 vsel(x20CF7A9F, x2AEA70D5, x05CF0A9F, x0A3F0A6F);
1275 vxor(x3CF19C86, x1C3EE619, x20CF7A9F);
1276 vxor(x69A49C79, x555500FF, x3CF19C86);
1278 vsel(x840DBB67, a5, x942D9A67, x86CD4C9B);
1279 vsel(x6DA19C1E, x69A49C79, x3CF19C86, x840DBB67);
1280 vnot(x925E63E1, x6DA19C1E);
1281 vsel(x1, x925E63E1, x69A49C79, a6);
1282 vxor(*out2, *out2, x1);
1284 vsel(x9C3CA761, x840DBB67, x1C3EE619, x3CF19C86);
1285 vsel(x257A75D5, x455D45DF, x2AEA70D5, x606F30CF);
1286 vxor(xB946D2B4, x9C3CA761, x257A75D5);
1287 vsel(x2, x16E94A97, xB946D2B4, a6);
1288 vxor(*out3, *out3, x2);
1292 s7(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
1293 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
1295 u32 x44447777, x4B4B7878, x22772277, x0505F5F5, x220522F5, x694E5A8D;
1296 u32 x00FFFF00, x66666666, x32353235, x26253636, x26DAC936;
1297 u32 x738F9C63, x11EF9867, x26DA9867;
1298 u32 x4B4B9C63, x4B666663, x4E639396;
1299 u32 x4E4B393C, xFF00FF00, xFF05DD21, xB14EE41D;
1300 u32 xD728827B, x6698807B, x699C585B;
1301 u32 x738C847B, xA4A71E18, x74878E78;
1302 u32 x333D9639, x74879639, x8B7869C6;
1305 vsel(x44447777, a2, a6, a3);
1306 vxor(x4B4B7878, a4, x44447777);
1307 vsel(x22772277, a3, a5, a2);
1308 vsel(x0505F5F5, a6, a2, a4);
1309 vsel(x220522F5, x22772277, x0505F5F5, a5);
1310 vxor(x694E5A8D, x4B4B7878, x220522F5);
1312 vxor(x00FFFF00, a5, a6);
1313 vxor(x66666666, a2, a3);
1314 vsel(x32353235, a3, x220522F5, a4);
1315 vsel(x26253636, x66666666, x32353235, x4B4B7878);
1316 vxor(x26DAC936, x00FFFF00, x26253636);
1317 vsel(x0, x26DAC936, x694E5A8D, a1);
1318 vxor(*out1, *out1, x0);
1320 vxor(x738F9C63, a2, x26DAC936);
1321 vsel(x11EF9867, x738F9C63, a5, x66666666);
1322 vsel(x26DA9867, x26DAC936, x11EF9867, a6);
1324 vsel(x4B4B9C63, x4B4B7878, x738F9C63, a6);
1325 vsel(x4B666663, x4B4B9C63, x66666666, x00FFFF00);
1326 vxor(x4E639396, x0505F5F5, x4B666663);
1328 vsel(x4E4B393C, x4B4B7878, x4E639396, a2);
1329 vnot(xFF00FF00, a5);
1330 vsel(xFF05DD21, xFF00FF00, x738F9C63, x32353235);
1331 vxor(xB14EE41D, x4E4B393C, xFF05DD21);
1332 vsel(x1, xB14EE41D, x26DA9867, a1);
1333 vxor(*out2, *out2, x1);
1335 vxor(xD728827B, x66666666, xB14EE41D);
1336 vsel(x6698807B, x26DA9867, xD728827B, x4E4B393C);
1337 vsel(x699C585B, x6698807B, x694E5A8D, xFF05DD21);
1338 vsel(x2, x699C585B, x4E639396, a1);
1339 vxor(*out3, *out3, x2);
1341 vsel(x738C847B, x738F9C63, xD728827B, x4B4B7878);
1342 vxor(xA4A71E18, x738F9C63, xD728827B);
1343 vsel(x74878E78, x738C847B, xA4A71E18, a4);
1345 vsel(x333D9639, x32353235, x738C847B, xB14EE41D);
1346 vsel(x74879639, x74878E78, x333D9639, a6);
1347 vnot(x8B7869C6, x74879639);
1348 vsel(x3, x74878E78, x8B7869C6, a1);
1349 vxor(*out4, *out4, x3);
1353 s8(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
1354 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
1356 u32 x0505F5F5, x05FAF50A, x0F0F00FF, x22227777, x07DA807F, x34E9B34C;
1357 u32 x00FFF00F, x0033FCCF, x5565B15C, x0C0C3F3F, x59698E63;
1358 u32 x3001F74E, x30555745, x693CD926;
1359 u32 x0C0CD926, x0C3F25E9, x38D696A5;
1361 u32 x03D2117B, xC778395B, xCB471CB2;
1362 u32 x5425B13F, x56B3803F, x919AE965;
1363 u32 x17B3023F, x75555755, x62E6556A, xA59E6C31;
1366 vsel(x0505F5F5, a5, a1, a3);
1367 vxor(x05FAF50A, a4, x0505F5F5);
1368 vsel(x0F0F00FF, a3, a4, a5);
1369 vsel(x22227777, a2, a5, a1);
1370 vsel(x07DA807F, x05FAF50A, x0F0F00FF, x22227777);
1371 vxor(x34E9B34C, a2, x07DA807F);
1373 vsel(x00FFF00F, x05FAF50A, a4, a3);
1374 vsel(x0033FCCF, a5, x00FFF00F, a2);
1375 vsel(x5565B15C, a1, x34E9B34C, x0033FCCF);
1376 vsel(x0C0C3F3F, a3, a5, a2);
1377 vxor(x59698E63, x5565B15C, x0C0C3F3F);
1379 vsel(x3001F74E, x34E9B34C, a5, x05FAF50A);
1380 vsel(x30555745, x3001F74E, a1, x00FFF00F);
1381 vxor(x693CD926, x59698E63, x30555745);
1382 vsel(x2, x693CD926, x59698E63, a6);
1383 vxor(*out3, *out3, x2);
1385 vsel(x0C0CD926, x0C0C3F3F, x693CD926, a5);
1386 vxor(x0C3F25E9, x0033FCCF, x0C0CD926);
1387 vxor(x38D696A5, x34E9B34C, x0C3F25E9);
1389 vnot(xC729695A, x38D696A5);
1391 vsel(x03D2117B, x07DA807F, a2, x0C0CD926);
1392 vsel(xC778395B, xC729695A, x03D2117B, x30555745);
1393 vxor(xCB471CB2, x0C3F25E9, xC778395B);
1394 vsel(x1, xCB471CB2, x34E9B34C, a6);
1395 vxor(*out2, *out2, x1);
1397 vsel(x5425B13F, x5565B15C, x0C0C3F3F, x03D2117B);
1398 vsel(x56B3803F, x07DA807F, x5425B13F, x59698E63);
1399 vxor(x919AE965, xC729695A, x56B3803F);
1400 vsel(x3, xC729695A, x919AE965, a6);
1401 vxor(*out4, *out4, x3);
1403 vsel(x17B3023F, x07DA807F, a2, x59698E63);
1404 vor(x75555755, a1, x30555745);
1405 vxor(x62E6556A, x17B3023F, x75555755);
1406 vxor(xA59E6C31, xC778395B, x62E6556A);
1407 vsel(x0, xA59E6C31, x38D696A5, a6);
1408 vxor(*out1, *out1, x0);
1412 #define SWAP(a, b) { u32 tmp=*a;*a=*b;*b=tmp; }
1448 #define KEYSET00 { k00 = K08; k01 = K44; k02 = K29; k03 = K52; k04 = K42; k05 = K14; k06 = K28; k07 = K49; k08 = K01; k09 = K07; k10 = K16; k11 = K36; k12 = K02; k13 = K30; k14 = K22; k15 = K21; k16 = K38; k17 = K50; k18 = K51; k19 = K00; k20 = K31; k21 = K23; k22 = K15; k23 = K35; k24 = K19; k25 = K24; k26 = K34; k27 = K47; k28 = K32; k29 = K03; k30 = K41; k31 = K26; k32 = K04; k33 = K46; k34 = K20; k35 = K25; k36 = K53; k37 = K18; k38 = K33; k39 = K55; k40 = K13; k41 = K17; k42 = K39; k43 = K12; k44 = K11; k45 = K54; k46 = K48; k47 = K27; }
1449 #define KEYSET10 { k00 = K49; k01 = K28; k02 = K45; k03 = K36; k04 = K01; k05 = K30; k06 = K44; k07 = K08; k08 = K42; k09 = K23; k10 = K00; k11 = K52; k12 = K43; k13 = K14; k14 = K38; k15 = K37; k16 = K22; k17 = K09; k18 = K35; k19 = K16; k20 = K15; k21 = K07; k22 = K31; k23 = K51; k24 = K03; k25 = K40; k26 = K46; k27 = K04; k28 = K20; k29 = K19; k30 = K53; k31 = K10; k32 = K47; k33 = K34; k34 = K32; k35 = K13; k36 = K41; k37 = K06; k38 = K17; k39 = K12; k40 = K25; k41 = K33; k42 = K27; k43 = K55; k44 = K54; k45 = K11; k46 = K05; k47 = K39; }
1450 #define KEYSET01 { k00 = K01; k01 = K37; k02 = K22; k03 = K45; k04 = K35; k05 = K07; k06 = K21; k07 = K42; k08 = K51; k09 = K00; k10 = K09; k11 = K29; k12 = K52; k13 = K23; k14 = K15; k15 = K14; k16 = K31; k17 = K43; k18 = K44; k19 = K50; k20 = K49; k21 = K16; k22 = K08; k23 = K28; k24 = K12; k25 = K17; k26 = K27; k27 = K40; k28 = K25; k29 = K55; k30 = K34; k31 = K19; k32 = K24; k33 = K39; k34 = K13; k35 = K18; k36 = K46; k37 = K11; k38 = K26; k39 = K48; k40 = K06; k41 = K10; k42 = K32; k43 = K05; k44 = K04; k45 = K47; k46 = K41; k47 = K20; }
1451 #define KEYSET11 { k00 = K35; k01 = K14; k02 = K31; k03 = K22; k04 = K44; k05 = K16; k06 = K30; k07 = K51; k08 = K28; k09 = K09; k10 = K43; k11 = K38; k12 = K29; k13 = K00; k14 = K49; k15 = K23; k16 = K08; k17 = K52; k18 = K21; k19 = K02; k20 = K01; k21 = K50; k22 = K42; k23 = K37; k24 = K48; k25 = K26; k26 = K32; k27 = K17; k28 = K06; k29 = K05; k30 = K39; k31 = K55; k32 = K33; k33 = K20; k34 = K18; k35 = K54; k36 = K27; k37 = K47; k38 = K03; k39 = K53; k40 = K11; k41 = K19; k42 = K13; k43 = K41; k44 = K40; k45 = K24; k46 = K46; k47 = K25; }
1452 #define KEYSET02 { k00 = K44; k01 = K23; k02 = K08; k03 = K31; k04 = K21; k05 = K50; k06 = K07; k07 = K28; k08 = K37; k09 = K43; k10 = K52; k11 = K15; k12 = K38; k13 = K09; k14 = K01; k15 = K00; k16 = K42; k17 = K29; k18 = K30; k19 = K36; k20 = K35; k21 = K02; k22 = K51; k23 = K14; k24 = K53; k25 = K03; k26 = K13; k27 = K26; k28 = K11; k29 = K41; k30 = K20; k31 = K05; k32 = K10; k33 = K25; k34 = K54; k35 = K04; k36 = K32; k37 = K24; k38 = K12; k39 = K34; k40 = K47; k41 = K55; k42 = K18; k43 = K46; k44 = K17; k45 = K33; k46 = K27; k47 = K06; }
1453 #define KEYSET12 { k00 = K21; k01 = K00; k02 = K42; k03 = K08; k04 = K30; k05 = K02; k06 = K16; k07 = K37; k08 = K14; k09 = K52; k10 = K29; k11 = K49; k12 = K15; k13 = K43; k14 = K35; k15 = K09; k16 = K51; k17 = K38; k18 = K07; k19 = K45; k20 = K44; k21 = K36; k22 = K28; k23 = K23; k24 = K34; k25 = K12; k26 = K18; k27 = K03; k28 = K47; k29 = K46; k30 = K25; k31 = K41; k32 = K19; k33 = K06; k34 = K04; k35 = K40; k36 = K13; k37 = K33; k38 = K48; k39 = K39; k40 = K24; k41 = K05; k42 = K54; k43 = K27; k44 = K26; k45 = K10; k46 = K32; k47 = K11; }
1454 #define KEYSET03 { k00 = K30; k01 = K09; k02 = K51; k03 = K42; k04 = K07; k05 = K36; k06 = K50; k07 = K14; k08 = K23; k09 = K29; k10 = K38; k11 = K01; k12 = K49; k13 = K52; k14 = K44; k15 = K43; k16 = K28; k17 = K15; k18 = K16; k19 = K22; k20 = K21; k21 = K45; k22 = K37; k23 = K00; k24 = K39; k25 = K48; k26 = K54; k27 = K12; k28 = K24; k29 = K27; k30 = K06; k31 = K46; k32 = K55; k33 = K11; k34 = K40; k35 = K17; k36 = K18; k37 = K10; k38 = K53; k39 = K20; k40 = K33; k41 = K41; k42 = K04; k43 = K32; k44 = K03; k45 = K19; k46 = K13; k47 = K47; }
1455 #define KEYSET13 { k00 = K07; k01 = K43; k02 = K28; k03 = K51; k04 = K16; k05 = K45; k06 = K02; k07 = K23; k08 = K00; k09 = K38; k10 = K15; k11 = K35; k12 = K01; k13 = K29; k14 = K21; k15 = K52; k16 = K37; k17 = K49; k18 = K50; k19 = K31; k20 = K30; k21 = K22; k22 = K14; k23 = K09; k24 = K20; k25 = K53; k26 = K04; k27 = K48; k28 = K33; k29 = K32; k30 = K11; k31 = K27; k32 = K05; k33 = K47; k34 = K17; k35 = K26; k36 = K54; k37 = K19; k38 = K34; k39 = K25; k40 = K10; k41 = K46; k42 = K40; k43 = K13; k44 = K12; k45 = K55; k46 = K18; k47 = K24; }
1456 #define KEYSET04 { k00 = K16; k01 = K52; k02 = K37; k03 = K28; k04 = K50; k05 = K22; k06 = K36; k07 = K00; k08 = K09; k09 = K15; k10 = K49; k11 = K44; k12 = K35; k13 = K38; k14 = K30; k15 = K29; k16 = K14; k17 = K01; k18 = K02; k19 = K08; k20 = K07; k21 = K31; k22 = K23; k23 = K43; k24 = K25; k25 = K34; k26 = K40; k27 = K53; k28 = K10; k29 = K13; k30 = K47; k31 = K32; k32 = K41; k33 = K24; k34 = K26; k35 = K03; k36 = K04; k37 = K55; k38 = K39; k39 = K06; k40 = K19; k41 = K27; k42 = K17; k43 = K18; k44 = K48; k45 = K05; k46 = K54; k47 = K33; }
1457 #define KEYSET14 { k00 = K50; k01 = K29; k02 = K14; k03 = K37; k04 = K02; k05 = K31; k06 = K45; k07 = K09; k08 = K43; k09 = K49; k10 = K01; k11 = K21; k12 = K44; k13 = K15; k14 = K07; k15 = K38; k16 = K23; k17 = K35; k18 = K36; k19 = K42; k20 = K16; k21 = K08; k22 = K00; k23 = K52; k24 = K06; k25 = K39; k26 = K17; k27 = K34; k28 = K19; k29 = K18; k30 = K24; k31 = K13; k32 = K46; k33 = K33; k34 = K03; k35 = K12; k36 = K40; k37 = K05; k38 = K20; k39 = K11; k40 = K55; k41 = K32; k42 = K26; k43 = K54; k44 = K53; k45 = K41; k46 = K04; k47 = K10; }
1458 #define KEYSET05 { k00 = K02; k01 = K38; k02 = K23; k03 = K14; k04 = K36; k05 = K08; k06 = K22; k07 = K43; k08 = K52; k09 = K01; k10 = K35; k11 = K30; k12 = K21; k13 = K49; k14 = K16; k15 = K15; k16 = K00; k17 = K44; k18 = K45; k19 = K51; k20 = K50; k21 = K42; k22 = K09; k23 = K29; k24 = K11; k25 = K20; k26 = K26; k27 = K39; k28 = K55; k29 = K54; k30 = K33; k31 = K18; k32 = K27; k33 = K10; k34 = K12; k35 = K48; k36 = K17; k37 = K41; k38 = K25; k39 = K47; k40 = K05; k41 = K13; k42 = K03; k43 = K04; k44 = K34; k45 = K46; k46 = K40; k47 = K19; }
1459 #define KEYSET15 { k00 = K36; k01 = K15; k02 = K00; k03 = K23; k04 = K45; k05 = K42; k06 = K31; k07 = K52; k08 = K29; k09 = K35; k10 = K44; k11 = K07; k12 = K30; k13 = K01; k14 = K50; k15 = K49; k16 = K09; k17 = K21; k18 = K22; k19 = K28; k20 = K02; k21 = K51; k22 = K43; k23 = K38; k24 = K47; k25 = K25; k26 = K03; k27 = K20; k28 = K05; k29 = K04; k30 = K10; k31 = K54; k32 = K32; k33 = K19; k34 = K48; k35 = K53; k36 = K26; k37 = K46; k38 = K06; k39 = K24; k40 = K41; k41 = K18; k42 = K12; k43 = K40; k44 = K39; k45 = K27; k46 = K17; k47 = K55; }
1460 #define KEYSET06 { k00 = K45; k01 = K49; k02 = K09; k03 = K00; k04 = K22; k05 = K51; k06 = K08; k07 = K29; k08 = K38; k09 = K44; k10 = K21; k11 = K16; k12 = K07; k13 = K35; k14 = K02; k15 = K01; k16 = K43; k17 = K30; k18 = K31; k19 = K37; k20 = K36; k21 = K28; k22 = K52; k23 = K15; k24 = K24; k25 = K06; k26 = K12; k27 = K25; k28 = K41; k29 = K40; k30 = K19; k31 = K04; k32 = K13; k33 = K55; k34 = K53; k35 = K34; k36 = K03; k37 = K27; k38 = K11; k39 = K33; k40 = K46; k41 = K54; k42 = K48; k43 = K17; k44 = K20; k45 = K32; k46 = K26; k47 = K05; }
1461 #define KEYSET16 { k00 = K22; k01 = K01; k02 = K43; k03 = K09; k04 = K31; k05 = K28; k06 = K42; k07 = K38; k08 = K15; k09 = K21; k10 = K30; k11 = K50; k12 = K16; k13 = K44; k14 = K36; k15 = K35; k16 = K52; k17 = K07; k18 = K08; k19 = K14; k20 = K45; k21 = K37; k22 = K29; k23 = K49; k24 = K33; k25 = K11; k26 = K48; k27 = K06; k28 = K46; k29 = K17; k30 = K55; k31 = K40; k32 = K18; k33 = K05; k34 = K34; k35 = K39; k36 = K12; k37 = K32; k38 = K47; k39 = K10; k40 = K27; k41 = K04; k42 = K53; k43 = K26; k44 = K25; k45 = K13; k46 = K03; k47 = K41; }
1462 #define KEYSET07 { k00 = K31; k01 = K35; k02 = K52; k03 = K43; k04 = K08; k05 = K37; k06 = K51; k07 = K15; k08 = K49; k09 = K30; k10 = K07; k11 = K02; k12 = K50; k13 = K21; k14 = K45; k15 = K44; k16 = K29; k17 = K16; k18 = K42; k19 = K23; k20 = K22; k21 = K14; k22 = K38; k23 = K01; k24 = K10; k25 = K47; k26 = K53; k27 = K11; k28 = K27; k29 = K26; k30 = K05; k31 = K17; k32 = K54; k33 = K41; k34 = K39; k35 = K20; k36 = K48; k37 = K13; k38 = K24; k39 = K19; k40 = K32; k41 = K40; k42 = K34; k43 = K03; k44 = K06; k45 = K18; k46 = K12; k47 = K46; }
1463 #define KEYSET17 { k00 = K15; k01 = K51; k02 = K36; k03 = K02; k04 = K49; k05 = K21; k06 = K35; k07 = K31; k08 = K08; k09 = K14; k10 = K23; k11 = K43; k12 = K09; k13 = K37; k14 = K29; k15 = K28; k16 = K45; k17 = K00; k18 = K01; k19 = K07; k20 = K38; k21 = K30; k22 = K22; k23 = K42; k24 = K26; k25 = K04; k26 = K41; k27 = K54; k28 = K39; k29 = K10; k30 = K48; k31 = K33; k32 = K11; k33 = K53; k34 = K27; k35 = K32; k36 = K05; k37 = K25; k38 = K40; k39 = K03; k40 = K20; k41 = K24; k42 = K46; k43 = K19; k44 = K18; k45 = K06; k46 = K55; k47 = K34; }
1465 static void DES (const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 *D00, u32 *D01, u32 *D02, u32 *D03, u32 *D04, u32 *D05, u32 *D06, u32 *D07, u32 *D08, u32 *D09, u32 *D10, u32 *D11, u32 *D12, u32 *D13, u32 *D14, u32 *D15, u32 *D16, u32 *D17, u32 *D18, u32 *D19, u32 *D20, u32 *D21, u32 *D22, u32 *D23, u32 *D24, u32 *D25, u32 *D26, u32 *D27, u32 *D28, u32 *D29, u32 *D30, u32 *D31, u32 *D32, u32 *D33, u32 *D34, u32 *D35, u32 *D36, u32 *D37, u32 *D38, u32 *D39, u32 *D40, u32 *D41, u32 *D42, u32 *D43, u32 *D44, u32 *D45, u32 *D46, u32 *D47, u32 *D48, u32 *D49, u32 *D50, u32 *D51, u32 *D52, u32 *D53, u32 *D54, u32 *D55, u32 *D56, u32 *D57, u32 *D58, u32 *D59, u32 *D60, u32 *D61, u32 *D62, u32 *D63)
1467 KXX_DECL u32 k00, k01, k02, k03, k04, k05;
1468 KXX_DECL u32 k06, k07, k08, k09, k10, k11;
1469 KXX_DECL u32 k12, k13, k14, k15, k16, k17;
1470 KXX_DECL u32 k18, k19, k20, k21, k22, k23;
1471 KXX_DECL u32 k24, k25, k26, k27, k28, k29;
1472 KXX_DECL u32 k30, k31, k32, k33, k34, k35;
1473 KXX_DECL u32 k36, k37, k38, k39, k40, k41;
1474 KXX_DECL u32 k42, k43, k44, k45, k46, k47;
1477 #if CUDA_ARCH >= 500
1487 for (u32 i = 0; i < 2; i++)
1489 if (i) KEYSET10 else KEYSET00
1491 s1(*D63 ^ k00, *D32 ^ k01, *D33 ^ k02, *D34 ^ k03, *D35 ^ k04, *D36 ^ k05, D08, D16, D22, D30);
1492 s2(*D35 ^ k06, *D36 ^ k07, *D37 ^ k08, *D38 ^ k09, *D39 ^ k10, *D40 ^ k11, D12, D27, D01, D17);
1493 s3(*D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1494 s4(*D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1495 s5(*D47 ^ k24, *D48 ^ k25, *D49 ^ k26, *D50 ^ k27, *D51 ^ k28, *D52 ^ k29, D07, D13, D24, D02);
1496 s6(*D51 ^ k30, *D52 ^ k31, *D53 ^ k32, *D54 ^ k33, *D55 ^ k34, *D56 ^ k35, D03, D28, D10, D18);
1497 s7(*D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1498 s8(*D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1500 if (i) KEYSET11 else KEYSET01
1502 s1(*D31 ^ k00, *D00 ^ k01, *D01 ^ k02, *D02 ^ k03, *D03 ^ k04, *D04 ^ k05, D40, D48, D54, D62);
1503 s2(*D03 ^ k06, *D04 ^ k07, *D05 ^ k08, *D06 ^ k09, *D07 ^ k10, *D08 ^ k11, D44, D59, D33, D49);
1504 s3(*D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1505 s4(*D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1506 s5(*D15 ^ k24, *D16 ^ k25, *D17 ^ k26, *D18 ^ k27, *D19 ^ k28, *D20 ^ k29, D39, D45, D56, D34);
1507 s6(*D19 ^ k30, *D20 ^ k31, *D21 ^ k32, *D22 ^ k33, *D23 ^ k34, *D24 ^ k35, D35, D60, D42, D50);
1508 s7(*D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1509 s8(*D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1511 if (i) KEYSET12 else KEYSET02
1513 s1(*D63 ^ k00, *D32 ^ k01, *D33 ^ k02, *D34 ^ k03, *D35 ^ k04, *D36 ^ k05, D08, D16, D22, D30);
1514 s2(*D35 ^ k06, *D36 ^ k07, *D37 ^ k08, *D38 ^ k09, *D39 ^ k10, *D40 ^ k11, D12, D27, D01, D17);
1515 s3(*D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1516 s4(*D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1517 s5(*D47 ^ k24, *D48 ^ k25, *D49 ^ k26, *D50 ^ k27, *D51 ^ k28, *D52 ^ k29, D07, D13, D24, D02);
1518 s6(*D51 ^ k30, *D52 ^ k31, *D53 ^ k32, *D54 ^ k33, *D55 ^ k34, *D56 ^ k35, D03, D28, D10, D18);
1519 s7(*D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1520 s8(*D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1522 if (i) KEYSET13 else KEYSET03
1524 s1(*D31 ^ k00, *D00 ^ k01, *D01 ^ k02, *D02 ^ k03, *D03 ^ k04, *D04 ^ k05, D40, D48, D54, D62);
1525 s2(*D03 ^ k06, *D04 ^ k07, *D05 ^ k08, *D06 ^ k09, *D07 ^ k10, *D08 ^ k11, D44, D59, D33, D49);
1526 s3(*D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1527 s4(*D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1528 s5(*D15 ^ k24, *D16 ^ k25, *D17 ^ k26, *D18 ^ k27, *D19 ^ k28, *D20 ^ k29, D39, D45, D56, D34);
1529 s6(*D19 ^ k30, *D20 ^ k31, *D21 ^ k32, *D22 ^ k33, *D23 ^ k34, *D24 ^ k35, D35, D60, D42, D50);
1530 s7(*D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1531 s8(*D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1533 if (i) KEYSET14 else KEYSET04
1535 s1(*D63 ^ k00, *D32 ^ k01, *D33 ^ k02, *D34 ^ k03, *D35 ^ k04, *D36 ^ k05, D08, D16, D22, D30);
1536 s2(*D35 ^ k06, *D36 ^ k07, *D37 ^ k08, *D38 ^ k09, *D39 ^ k10, *D40 ^ k11, D12, D27, D01, D17);
1537 s3(*D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1538 s4(*D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1539 s5(*D47 ^ k24, *D48 ^ k25, *D49 ^ k26, *D50 ^ k27, *D51 ^ k28, *D52 ^ k29, D07, D13, D24, D02);
1540 s6(*D51 ^ k30, *D52 ^ k31, *D53 ^ k32, *D54 ^ k33, *D55 ^ k34, *D56 ^ k35, D03, D28, D10, D18);
1541 s7(*D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1542 s8(*D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1544 if (i) KEYSET15 else KEYSET05
1546 s1(*D31 ^ k00, *D00 ^ k01, *D01 ^ k02, *D02 ^ k03, *D03 ^ k04, *D04 ^ k05, D40, D48, D54, D62);
1547 s2(*D03 ^ k06, *D04 ^ k07, *D05 ^ k08, *D06 ^ k09, *D07 ^ k10, *D08 ^ k11, D44, D59, D33, D49);
1548 s3(*D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1549 s4(*D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1550 s5(*D15 ^ k24, *D16 ^ k25, *D17 ^ k26, *D18 ^ k27, *D19 ^ k28, *D20 ^ k29, D39, D45, D56, D34);
1551 s6(*D19 ^ k30, *D20 ^ k31, *D21 ^ k32, *D22 ^ k33, *D23 ^ k34, *D24 ^ k35, D35, D60, D42, D50);
1552 s7(*D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1553 s8(*D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1555 if (i) KEYSET16 else KEYSET06
1557 s1(*D63 ^ k00, *D32 ^ k01, *D33 ^ k02, *D34 ^ k03, *D35 ^ k04, *D36 ^ k05, D08, D16, D22, D30);
1558 s2(*D35 ^ k06, *D36 ^ k07, *D37 ^ k08, *D38 ^ k09, *D39 ^ k10, *D40 ^ k11, D12, D27, D01, D17);
1559 s3(*D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1560 s4(*D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1561 s5(*D47 ^ k24, *D48 ^ k25, *D49 ^ k26, *D50 ^ k27, *D51 ^ k28, *D52 ^ k29, D07, D13, D24, D02);
1562 s6(*D51 ^ k30, *D52 ^ k31, *D53 ^ k32, *D54 ^ k33, *D55 ^ k34, *D56 ^ k35, D03, D28, D10, D18);
1563 s7(*D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1564 s8(*D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1566 if (i) KEYSET17 else KEYSET07
1568 s1(*D31 ^ k00, *D00 ^ k01, *D01 ^ k02, *D02 ^ k03, *D03 ^ k04, *D04 ^ k05, D40, D48, D54, D62);
1569 s2(*D03 ^ k06, *D04 ^ k07, *D05 ^ k08, *D06 ^ k09, *D07 ^ k10, *D08 ^ k11, D44, D59, D33, D49);
1570 s3(*D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1571 s4(*D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1572 s5(*D15 ^ k24, *D16 ^ k25, *D17 ^ k26, *D18 ^ k27, *D19 ^ k28, *D20 ^ k29, D39, D45, D56, D34);
1573 s6(*D19 ^ k30, *D20 ^ k31, *D21 ^ k32, *D22 ^ k33, *D23 ^ k34, *D24 ^ k35, D35, D60, D42, D50);
1574 s7(*D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1575 s8(*D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1579 static void transpose32c (u32 data[32])
1581 #define swap(x,y,j,m) \
1582 t = ((x) ^ ((y) >> (j))) & (m); \
1584 (y) = (y) ^ (t << (j));
1588 swap (data[ 0], data[16], 16, 0x0000ffff);
1589 swap (data[ 1], data[17], 16, 0x0000ffff);
1590 swap (data[ 2], data[18], 16, 0x0000ffff);
1591 swap (data[ 3], data[19], 16, 0x0000ffff);
1592 swap (data[ 4], data[20], 16, 0x0000ffff);
1593 swap (data[ 5], data[21], 16, 0x0000ffff);
1594 swap (data[ 6], data[22], 16, 0x0000ffff);
1595 swap (data[ 7], data[23], 16, 0x0000ffff);
1596 swap (data[ 8], data[24], 16, 0x0000ffff);
1597 swap (data[ 9], data[25], 16, 0x0000ffff);
1598 swap (data[10], data[26], 16, 0x0000ffff);
1599 swap (data[11], data[27], 16, 0x0000ffff);
1600 swap (data[12], data[28], 16, 0x0000ffff);
1601 swap (data[13], data[29], 16, 0x0000ffff);
1602 swap (data[14], data[30], 16, 0x0000ffff);
1603 swap (data[15], data[31], 16, 0x0000ffff);
1604 swap (data[ 0], data[ 8], 8, 0x00ff00ff);
1605 swap (data[ 1], data[ 9], 8, 0x00ff00ff);
1606 swap (data[ 2], data[10], 8, 0x00ff00ff);
1607 swap (data[ 3], data[11], 8, 0x00ff00ff);
1608 swap (data[ 4], data[12], 8, 0x00ff00ff);
1609 swap (data[ 5], data[13], 8, 0x00ff00ff);
1610 swap (data[ 6], data[14], 8, 0x00ff00ff);
1611 swap (data[ 7], data[15], 8, 0x00ff00ff);
1612 swap (data[ 0], data[ 4], 4, 0x0f0f0f0f);
1613 swap (data[ 1], data[ 5], 4, 0x0f0f0f0f);
1614 swap (data[ 2], data[ 6], 4, 0x0f0f0f0f);
1615 swap (data[ 3], data[ 7], 4, 0x0f0f0f0f);
1616 swap (data[ 0], data[ 2], 2, 0x33333333);
1617 swap (data[ 1], data[ 3], 2, 0x33333333);
1618 swap (data[ 0], data[ 1], 1, 0x55555555);
1619 swap (data[ 2], data[ 3], 1, 0x55555555);
1620 swap (data[ 4], data[ 6], 2, 0x33333333);
1621 swap (data[ 5], data[ 7], 2, 0x33333333);
1622 swap (data[ 4], data[ 5], 1, 0x55555555);
1623 swap (data[ 6], data[ 7], 1, 0x55555555);
1624 swap (data[ 8], data[12], 4, 0x0f0f0f0f);
1625 swap (data[ 9], data[13], 4, 0x0f0f0f0f);
1626 swap (data[10], data[14], 4, 0x0f0f0f0f);
1627 swap (data[11], data[15], 4, 0x0f0f0f0f);
1628 swap (data[ 8], data[10], 2, 0x33333333);
1629 swap (data[ 9], data[11], 2, 0x33333333);
1630 swap (data[ 8], data[ 9], 1, 0x55555555);
1631 swap (data[10], data[11], 1, 0x55555555);
1632 swap (data[12], data[14], 2, 0x33333333);
1633 swap (data[13], data[15], 2, 0x33333333);
1634 swap (data[12], data[13], 1, 0x55555555);
1635 swap (data[14], data[15], 1, 0x55555555);
1636 swap (data[16], data[24], 8, 0x00ff00ff);
1637 swap (data[17], data[25], 8, 0x00ff00ff);
1638 swap (data[18], data[26], 8, 0x00ff00ff);
1639 swap (data[19], data[27], 8, 0x00ff00ff);
1640 swap (data[20], data[28], 8, 0x00ff00ff);
1641 swap (data[21], data[29], 8, 0x00ff00ff);
1642 swap (data[22], data[30], 8, 0x00ff00ff);
1643 swap (data[23], data[31], 8, 0x00ff00ff);
1644 swap (data[16], data[20], 4, 0x0f0f0f0f);
1645 swap (data[17], data[21], 4, 0x0f0f0f0f);
1646 swap (data[18], data[22], 4, 0x0f0f0f0f);
1647 swap (data[19], data[23], 4, 0x0f0f0f0f);
1648 swap (data[16], data[18], 2, 0x33333333);
1649 swap (data[17], data[19], 2, 0x33333333);
1650 swap (data[16], data[17], 1, 0x55555555);
1651 swap (data[18], data[19], 1, 0x55555555);
1652 swap (data[20], data[22], 2, 0x33333333);
1653 swap (data[21], data[23], 2, 0x33333333);
1654 swap (data[20], data[21], 1, 0x55555555);
1655 swap (data[22], data[23], 1, 0x55555555);
1656 swap (data[24], data[28], 4, 0x0f0f0f0f);
1657 swap (data[25], data[29], 4, 0x0f0f0f0f);
1658 swap (data[26], data[30], 4, 0x0f0f0f0f);
1659 swap (data[27], data[31], 4, 0x0f0f0f0f);
1660 swap (data[24], data[26], 2, 0x33333333);
1661 swap (data[25], data[27], 2, 0x33333333);
1662 swap (data[24], data[25], 1, 0x55555555);
1663 swap (data[26], data[27], 1, 0x55555555);
1664 swap (data[28], data[30], 2, 0x33333333);
1665 swap (data[29], data[31], 2, 0x33333333);
1666 swap (data[28], data[29], 1, 0x55555555);
1667 swap (data[30], data[31], 1, 0x55555555);
1670 static void m03000m (__local u32 *s_S, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
1676 const u32 gid = get_global_id (0);
1677 const u32 lid = get_local_id (0);
1683 const u32 K00 = pws[gid].i[ 0];
1684 const u32 K01 = pws[gid].i[ 1];
1685 const u32 K02 = pws[gid].i[ 2];
1686 const u32 K03 = pws[gid].i[ 3];
1687 const u32 K04 = pws[gid].i[ 4];
1688 const u32 K05 = pws[gid].i[ 5];
1689 const u32 K06 = pws[gid].i[ 6];
1690 const u32 K07 = pws[gid].i[ 7];
1691 const u32 K08 = pws[gid].i[ 8];
1692 const u32 K09 = pws[gid].i[ 9];
1693 const u32 K10 = pws[gid].i[10];
1694 const u32 K11 = pws[gid].i[11];
1695 const u32 K12 = pws[gid].i[12];
1696 const u32 K13 = pws[gid].i[13];
1697 const u32 K14 = pws[gid].i[14];
1698 const u32 K15 = pws[gid].i[15];
1699 const u32 K16 = pws[gid].i[16];
1700 const u32 K17 = pws[gid].i[17];
1701 const u32 K18 = pws[gid].i[18];
1702 const u32 K19 = pws[gid].i[19];
1703 const u32 K20 = pws[gid].i[20];
1704 const u32 K21 = pws[gid].i[21];
1705 const u32 K22 = pws[gid].i[22];
1706 const u32 K23 = pws[gid].i[23];
1707 const u32 K24 = pws[gid].i[24];
1708 const u32 K25 = pws[gid].i[25];
1709 const u32 K26 = pws[gid].i[26];
1710 const u32 K27 = pws[gid].i[27];
1711 const u32 K28 = pws[gid].i[28];
1712 const u32 K29 = pws[gid].i[29];
1713 const u32 K30 = pws[gid].i[30];
1714 const u32 K31 = pws[gid].i[31];
1715 const u32 K32 = pws[gid].i[32];
1716 const u32 K33 = pws[gid].i[33];
1717 const u32 K34 = pws[gid].i[34];
1718 const u32 K35 = pws[gid].i[35];
1719 const u32 K36 = pws[gid].i[36];
1720 const u32 K37 = pws[gid].i[37];
1721 const u32 K38 = pws[gid].i[38];
1722 const u32 K39 = pws[gid].i[39];
1723 const u32 K40 = pws[gid].i[40];
1724 const u32 K41 = pws[gid].i[41];
1725 const u32 K42 = pws[gid].i[42];
1726 const u32 K43 = pws[gid].i[43];
1727 const u32 K44 = pws[gid].i[44];
1728 const u32 K45 = pws[gid].i[45];
1729 const u32 K46 = pws[gid].i[46];
1730 const u32 K47 = pws[gid].i[47];
1731 const u32 K48 = pws[gid].i[48];
1732 const u32 K49 = pws[gid].i[49];
1733 const u32 K50 = pws[gid].i[50];
1734 const u32 K51 = pws[gid].i[51];
1735 const u32 K52 = pws[gid].i[52];
1736 const u32 K53 = pws[gid].i[53];
1737 const u32 K54 = pws[gid].i[54];
1738 const u32 K55 = pws[gid].i[55];
1740 const u32 pc_pos = get_local_id (1);
1742 const u32 il_pos = pc_pos * 32;
1777 k00 |= words_buf_r[pc_pos].b[ 0];
1778 k01 |= words_buf_r[pc_pos].b[ 1];
1779 k02 |= words_buf_r[pc_pos].b[ 2];
1780 k03 |= words_buf_r[pc_pos].b[ 3];
1781 k04 |= words_buf_r[pc_pos].b[ 4];
1782 k05 |= words_buf_r[pc_pos].b[ 5];
1783 k06 |= words_buf_r[pc_pos].b[ 6];
1784 k07 |= words_buf_r[pc_pos].b[ 7];
1785 k08 |= words_buf_r[pc_pos].b[ 8];
1786 k09 |= words_buf_r[pc_pos].b[ 9];
1787 k10 |= words_buf_r[pc_pos].b[10];
1788 k11 |= words_buf_r[pc_pos].b[11];
1789 k12 |= words_buf_r[pc_pos].b[12];
1790 k13 |= words_buf_r[pc_pos].b[13];
1791 k14 |= words_buf_r[pc_pos].b[14];
1792 k15 |= words_buf_r[pc_pos].b[15];
1793 k16 |= words_buf_r[pc_pos].b[16];
1794 k17 |= words_buf_r[pc_pos].b[17];
1795 k18 |= words_buf_r[pc_pos].b[18];
1796 k19 |= words_buf_r[pc_pos].b[19];
1797 k20 |= words_buf_r[pc_pos].b[20];
1798 k21 |= words_buf_r[pc_pos].b[21];
1799 k22 |= words_buf_r[pc_pos].b[22];
1800 k23 |= words_buf_r[pc_pos].b[23];
1801 k24 |= words_buf_r[pc_pos].b[24];
1802 k25 |= words_buf_r[pc_pos].b[25];
1803 k26 |= words_buf_r[pc_pos].b[26];
1804 k27 |= words_buf_r[pc_pos].b[27];
1805 k28 |= words_buf_r[pc_pos].b[28];
1806 k29 |= words_buf_r[pc_pos].b[29];
1807 k30 |= words_buf_r[pc_pos].b[30];
1808 k31 |= words_buf_r[pc_pos].b[31];
1810 // KGS!@#$% including IP
1815 u32 D03 = 0xffffffff;
1817 u32 D05 = 0xffffffff;
1818 u32 D06 = 0xffffffff;
1819 u32 D07 = 0xffffffff;
1825 u32 D13 = 0xffffffff;
1828 u32 D16 = 0xffffffff;
1829 u32 D17 = 0xffffffff;
1834 u32 D22 = 0xffffffff;
1836 u32 D24 = 0xffffffff;
1838 u32 D26 = 0xffffffff;
1840 u32 D28 = 0xffffffff;
1841 u32 D29 = 0xffffffff;
1842 u32 D30 = 0xffffffff;
1843 u32 D31 = 0xffffffff;
1852 u32 D40 = 0xffffffff;
1853 u32 D41 = 0xffffffff;
1854 u32 D42 = 0xffffffff;
1856 u32 D44 = 0xffffffff;
1867 u32 D55 = 0xffffffff;
1870 u32 D58 = 0xffffffff;
1873 u32 D61 = 0xffffffff;
1874 u32 D62 = 0xffffffff;
1875 u32 D63 = 0xffffffff;
1879 k00, k01, k02, k03, k04, k05, k06,
1880 k07, k08, k09, k10, k11, k12, k13,
1881 k14, k15, k16, k17, k18, k19, k20,
1882 k21, k22, k23, k24, k25, k26, k27,
1883 k28, k29, k30, k31, K32, K33, K34,
1884 K35, K36, K37, K38, K39, K40, K41,
1885 K42, K43, K44, K45, K46, K47, K48,
1886 K49, K50, K51, K52, K53, K54, K55,
1887 &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07,
1888 &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15,
1889 &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23,
1890 &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31,
1891 &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39,
1892 &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47,
1893 &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55,
1894 &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63
1964 if (digests_cnt < 16)
1966 for (u32 d = 0; d < digests_cnt; d++)
1968 const u32 final_hash_pos = digests_offset + d;
1970 if (hashes_shown[final_hash_pos]) continue;
1974 search[0] = digests_buf[final_hash_pos].digest_buf[DGST_R0];
1975 search[1] = digests_buf[final_hash_pos].digest_buf[DGST_R1];
1980 for (int i = 0; i < 32; i++)
1982 const u32 b0 = -((search[0] >> i) & 1);
1983 const u32 b1 = -((search[1] >> i) & 1);
1985 tmpResult |= out[ 0 + i] ^ b0;
1986 tmpResult |= out[32 + i] ^ b1;
1989 if (tmpResult == 0xffffffff) continue;
1991 const u32 slice = 31 - clz (~tmpResult);
1993 const u32 r0 = search[0];
1994 const u32 r1 = search[1];
2007 for (int i = 0; i < 32; i++)
2009 out0[i] = out[ 0 + 31 - i];
2010 out1[i] = out[32 + 31 - i];
2013 transpose32c (out0);
2014 transpose32c (out1);
2017 for (int slice = 0; slice < 32; slice++)
2019 const u32 r0 = out0[31 - slice];
2020 const u32 r1 = out1[31 - slice];
2029 static void m03000s (__local u32 *s_S, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
2035 const u32 gid = get_global_id (0);
2036 const u32 lid = get_local_id (0);
2111 const u32 K00 = pws[gid].i[ 0];
2112 const u32 K01 = pws[gid].i[ 1];
2113 const u32 K02 = pws[gid].i[ 2];
2114 const u32 K03 = pws[gid].i[ 3];
2115 const u32 K04 = pws[gid].i[ 4];
2116 const u32 K05 = pws[gid].i[ 5];
2117 const u32 K06 = pws[gid].i[ 6];
2118 const u32 K07 = pws[gid].i[ 7];
2119 const u32 K08 = pws[gid].i[ 8];
2120 const u32 K09 = pws[gid].i[ 9];
2121 const u32 K10 = pws[gid].i[10];
2122 const u32 K11 = pws[gid].i[11];
2123 const u32 K12 = pws[gid].i[12];
2124 const u32 K13 = pws[gid].i[13];
2125 const u32 K14 = pws[gid].i[14];
2126 const u32 K15 = pws[gid].i[15];
2127 const u32 K16 = pws[gid].i[16];
2128 const u32 K17 = pws[gid].i[17];
2129 const u32 K18 = pws[gid].i[18];
2130 const u32 K19 = pws[gid].i[19];
2131 const u32 K20 = pws[gid].i[20];
2132 const u32 K21 = pws[gid].i[21];
2133 const u32 K22 = pws[gid].i[22];
2134 const u32 K23 = pws[gid].i[23];
2135 const u32 K24 = pws[gid].i[24];
2136 const u32 K25 = pws[gid].i[25];
2137 const u32 K26 = pws[gid].i[26];
2138 const u32 K27 = pws[gid].i[27];
2139 const u32 K28 = pws[gid].i[28];
2140 const u32 K29 = pws[gid].i[29];
2141 const u32 K30 = pws[gid].i[30];
2142 const u32 K31 = pws[gid].i[31];
2143 const u32 K32 = pws[gid].i[32];
2144 const u32 K33 = pws[gid].i[33];
2145 const u32 K34 = pws[gid].i[34];
2146 const u32 K35 = pws[gid].i[35];
2147 const u32 K36 = pws[gid].i[36];
2148 const u32 K37 = pws[gid].i[37];
2149 const u32 K38 = pws[gid].i[38];
2150 const u32 K39 = pws[gid].i[39];
2151 const u32 K40 = pws[gid].i[40];
2152 const u32 K41 = pws[gid].i[41];
2153 const u32 K42 = pws[gid].i[42];
2154 const u32 K43 = pws[gid].i[43];
2155 const u32 K44 = pws[gid].i[44];
2156 const u32 K45 = pws[gid].i[45];
2157 const u32 K46 = pws[gid].i[46];
2158 const u32 K47 = pws[gid].i[47];
2159 const u32 K48 = pws[gid].i[48];
2160 const u32 K49 = pws[gid].i[49];
2161 const u32 K50 = pws[gid].i[50];
2162 const u32 K51 = pws[gid].i[51];
2163 const u32 K52 = pws[gid].i[52];
2164 const u32 K53 = pws[gid].i[53];
2165 const u32 K54 = pws[gid].i[54];
2166 const u32 K55 = pws[gid].i[55];
2168 const u32 pc_pos = get_local_id (1);
2170 const u32 il_pos = pc_pos * 32;
2205 k00 |= words_buf_r[pc_pos].b[ 0];
2206 k01 |= words_buf_r[pc_pos].b[ 1];
2207 k02 |= words_buf_r[pc_pos].b[ 2];
2208 k03 |= words_buf_r[pc_pos].b[ 3];
2209 k04 |= words_buf_r[pc_pos].b[ 4];
2210 k05 |= words_buf_r[pc_pos].b[ 5];
2211 k06 |= words_buf_r[pc_pos].b[ 6];
2212 k07 |= words_buf_r[pc_pos].b[ 7];
2213 k08 |= words_buf_r[pc_pos].b[ 8];
2214 k09 |= words_buf_r[pc_pos].b[ 9];
2215 k10 |= words_buf_r[pc_pos].b[10];
2216 k11 |= words_buf_r[pc_pos].b[11];
2217 k12 |= words_buf_r[pc_pos].b[12];
2218 k13 |= words_buf_r[pc_pos].b[13];
2219 k14 |= words_buf_r[pc_pos].b[14];
2220 k15 |= words_buf_r[pc_pos].b[15];
2221 k16 |= words_buf_r[pc_pos].b[16];
2222 k17 |= words_buf_r[pc_pos].b[17];
2223 k18 |= words_buf_r[pc_pos].b[18];
2224 k19 |= words_buf_r[pc_pos].b[19];
2225 k20 |= words_buf_r[pc_pos].b[20];
2226 k21 |= words_buf_r[pc_pos].b[21];
2227 k22 |= words_buf_r[pc_pos].b[22];
2228 k23 |= words_buf_r[pc_pos].b[23];
2229 k24 |= words_buf_r[pc_pos].b[24];
2230 k25 |= words_buf_r[pc_pos].b[25];
2231 k26 |= words_buf_r[pc_pos].b[26];
2232 k27 |= words_buf_r[pc_pos].b[27];
2233 k28 |= words_buf_r[pc_pos].b[28];
2234 k29 |= words_buf_r[pc_pos].b[29];
2235 k30 |= words_buf_r[pc_pos].b[30];
2236 k31 |= words_buf_r[pc_pos].b[31];
2238 // KGS!@#$% including IP
2243 u32 D03 = 0xffffffff;
2245 u32 D05 = 0xffffffff;
2246 u32 D06 = 0xffffffff;
2247 u32 D07 = 0xffffffff;
2253 u32 D13 = 0xffffffff;
2256 u32 D16 = 0xffffffff;
2257 u32 D17 = 0xffffffff;
2262 u32 D22 = 0xffffffff;
2264 u32 D24 = 0xffffffff;
2266 u32 D26 = 0xffffffff;
2268 u32 D28 = 0xffffffff;
2269 u32 D29 = 0xffffffff;
2270 u32 D30 = 0xffffffff;
2271 u32 D31 = 0xffffffff;
2280 u32 D40 = 0xffffffff;
2281 u32 D41 = 0xffffffff;
2282 u32 D42 = 0xffffffff;
2284 u32 D44 = 0xffffffff;
2295 u32 D55 = 0xffffffff;
2298 u32 D58 = 0xffffffff;
2301 u32 D61 = 0xffffffff;
2302 u32 D62 = 0xffffffff;
2303 u32 D63 = 0xffffffff;
2307 k00, k01, k02, k03, k04, k05, k06,
2308 k07, k08, k09, k10, k11, k12, k13,
2309 k14, k15, k16, k17, k18, k19, k20,
2310 k21, k22, k23, k24, k25, k26, k27,
2311 k28, k29, k30, k31, K32, K33, K34,
2312 K35, K36, K37, K38, K39, K40, K41,
2313 K42, K43, K44, K45, K46, K47, K48,
2314 K49, K50, K51, K52, K53, K54, K55,
2315 &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07,
2316 &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15,
2317 &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23,
2318 &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31,
2319 &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39,
2320 &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47,
2321 &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55,
2322 &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63
2327 tmpResult |= D00 ^ S00;
2328 tmpResult |= D01 ^ S01;
2329 tmpResult |= D02 ^ S02;
2330 tmpResult |= D03 ^ S03;
2331 tmpResult |= D04 ^ S04;
2332 tmpResult |= D05 ^ S05;
2333 tmpResult |= D06 ^ S06;
2334 tmpResult |= D07 ^ S07;
2335 tmpResult |= D08 ^ S08;
2336 tmpResult |= D09 ^ S09;
2337 tmpResult |= D10 ^ S10;
2338 tmpResult |= D11 ^ S11;
2339 tmpResult |= D12 ^ S12;
2340 tmpResult |= D13 ^ S13;
2341 tmpResult |= D14 ^ S14;
2342 tmpResult |= D15 ^ S15;
2344 if (tmpResult == 0xffffffff) return;
2346 tmpResult |= D16 ^ S16;
2347 tmpResult |= D17 ^ S17;
2348 tmpResult |= D18 ^ S18;
2349 tmpResult |= D19 ^ S19;
2350 tmpResult |= D20 ^ S20;
2351 tmpResult |= D21 ^ S21;
2352 tmpResult |= D22 ^ S22;
2353 tmpResult |= D23 ^ S23;
2354 tmpResult |= D24 ^ S24;
2355 tmpResult |= D25 ^ S25;
2356 tmpResult |= D26 ^ S26;
2357 tmpResult |= D27 ^ S27;
2358 tmpResult |= D28 ^ S28;
2359 tmpResult |= D29 ^ S29;
2360 tmpResult |= D30 ^ S30;
2361 tmpResult |= D31 ^ S31;
2363 if (tmpResult == 0xffffffff) return;
2365 tmpResult |= D32 ^ S32;
2366 tmpResult |= D33 ^ S33;
2367 tmpResult |= D34 ^ S34;
2368 tmpResult |= D35 ^ S35;
2369 tmpResult |= D36 ^ S36;
2370 tmpResult |= D37 ^ S37;
2371 tmpResult |= D38 ^ S38;
2372 tmpResult |= D39 ^ S39;
2373 tmpResult |= D40 ^ S40;
2374 tmpResult |= D41 ^ S41;
2375 tmpResult |= D42 ^ S42;
2376 tmpResult |= D43 ^ S43;
2377 tmpResult |= D44 ^ S44;
2378 tmpResult |= D45 ^ S45;
2379 tmpResult |= D46 ^ S46;
2380 tmpResult |= D47 ^ S47;
2382 if (tmpResult == 0xffffffff) return;
2384 tmpResult |= D48 ^ S48;
2385 tmpResult |= D49 ^ S49;
2386 tmpResult |= D50 ^ S50;
2387 tmpResult |= D51 ^ S51;
2388 tmpResult |= D52 ^ S52;
2389 tmpResult |= D53 ^ S53;
2390 tmpResult |= D54 ^ S54;
2391 tmpResult |= D55 ^ S55;
2392 tmpResult |= D56 ^ S56;
2393 tmpResult |= D57 ^ S57;
2394 tmpResult |= D58 ^ S58;
2395 tmpResult |= D59 ^ S59;
2396 tmpResult |= D60 ^ S60;
2397 tmpResult |= D61 ^ S61;
2398 tmpResult |= D62 ^ S62;
2399 tmpResult |= D63 ^ S63;
2401 if (tmpResult == 0xffffffff) return;
2403 const u32 slice = 31 - clz (~tmpResult);
2409 // transpose bitslice base : easy because no overlapping buffers
2410 // mod : attention race conditions, need different buffers for *in and *out
2413 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03000_tb (__global pw_t *pws)
2415 const u32 gid = get_global_id (0);
2417 const u32 w0s = pws[gid].i[0];
2418 const u32 w1s = pws[gid].i[1];
2421 for (int i = 0; i < 32; i += 8)
2423 pws[gid].i[i + 0 + 0] = -((w0s >> (i + 7)) & 1);
2424 pws[gid].i[i + 0 + 1] = -((w0s >> (i + 6)) & 1);
2425 pws[gid].i[i + 0 + 2] = -((w0s >> (i + 5)) & 1);
2426 pws[gid].i[i + 0 + 3] = -((w0s >> (i + 4)) & 1);
2427 pws[gid].i[i + 0 + 4] = -((w0s >> (i + 3)) & 1);
2428 pws[gid].i[i + 0 + 5] = -((w0s >> (i + 2)) & 1);
2429 pws[gid].i[i + 0 + 6] = -((w0s >> (i + 1)) & 1);
2430 pws[gid].i[i + 0 + 7] = -((w0s >> (i + 0)) & 1);
2434 for (int i = 0; i < 24; i += 8)
2436 pws[gid].i[i + 32 + 0] = -((w1s >> (i + 7)) & 1);
2437 pws[gid].i[i + 32 + 1] = -((w1s >> (i + 6)) & 1);
2438 pws[gid].i[i + 32 + 2] = -((w1s >> (i + 5)) & 1);
2439 pws[gid].i[i + 32 + 3] = -((w1s >> (i + 4)) & 1);
2440 pws[gid].i[i + 32 + 4] = -((w1s >> (i + 3)) & 1);
2441 pws[gid].i[i + 32 + 5] = -((w1s >> (i + 2)) & 1);
2442 pws[gid].i[i + 32 + 6] = -((w1s >> (i + 1)) & 1);
2443 pws[gid].i[i + 32 + 7] = -((w1s >> (i + 0)) & 1);
2447 __kernel void __attribute__((reqd_work_group_size (32, 1, 1))) m03000_tm (__global u32 *mod, __global bs_word_t *words_buf_r)
2449 const u32 gid = get_global_id (0);
2451 const u32 block = gid / 32;
2452 const u32 slice = gid % 32;
2454 const u32 w0 = mod[gid];
2457 for (int i = 0; i < 32; i += 8)
2459 atomic_or (&words_buf_r[block].b[i + 0], (((w0 >> (i + 7)) & 1) << slice));
2460 atomic_or (&words_buf_r[block].b[i + 1], (((w0 >> (i + 6)) & 1) << slice));
2461 atomic_or (&words_buf_r[block].b[i + 2], (((w0 >> (i + 5)) & 1) << slice));
2462 atomic_or (&words_buf_r[block].b[i + 3], (((w0 >> (i + 4)) & 1) << slice));
2463 atomic_or (&words_buf_r[block].b[i + 4], (((w0 >> (i + 3)) & 1) << slice));
2464 atomic_or (&words_buf_r[block].b[i + 5], (((w0 >> (i + 2)) & 1) << slice));
2465 atomic_or (&words_buf_r[block].b[i + 6], (((w0 >> (i + 1)) & 1) << slice));
2466 atomic_or (&words_buf_r[block].b[i + 7], (((w0 >> (i + 0)) & 1) << slice));
2470 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m03000_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2476 const u32 gid = get_global_id (0);
2477 const u32 lid = get_local_id (0);
2478 const u32 vid = get_local_id (1);
2480 const u32 s0 = digests_buf[digests_offset].digest_buf[0];
2481 const u32 s1 = digests_buf[digests_offset].digest_buf[1];
2483 __local u32 s_S[64];
2487 s_S[ 0 + vid] = -((s0 >> vid) & 1);
2491 s_S[32 + vid] = -((s1 >> vid) & 1);
2494 barrier (CLK_LOCAL_MEM_FENCE);
2496 if (gid >= gid_max) return;
2502 m03000m (s_S, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
2505 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m03000_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2509 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m03000_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2513 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m03000_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2519 const u32 gid = get_global_id (0);
2520 const u32 lid = get_local_id (0);
2521 const u32 vid = get_local_id (1);
2523 const u32 s0 = digests_buf[digests_offset].digest_buf[0];
2524 const u32 s1 = digests_buf[digests_offset].digest_buf[1];
2526 __local u32 s_S[64];
2530 s_S[ 0 + vid] = -((s0 >> vid) & 1);
2534 s_S[32 + vid] = -((s1 >> vid) & 1);
2537 barrier (CLK_LOCAL_MEM_FENCE);
2539 if (gid >= gid_max) return;
2545 m03000s (s_S, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
2548 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m03000_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2552 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m03000_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)