2 * Author......: Jens Steube <jens.steube@gmail.com>
4 * NOTE........: sboxes for maxwell were taken from DeepLearningJohnDoe, license below
5 * : sboxes for others were takes fron JtR, license below
10 #include "include/constants.h"
11 #include "include/kernel_vendor.h"
18 #include "include/kernel_functions.c"
19 #include "types_ocl.c"
22 #define COMPARE_S "check_single_comp4_bs.c"
23 #define COMPARE_M "check_multi_comp4_bs.c"
33 // Bitslice DES S-boxes with LOP3.LUT instructions
34 // For NVIDIA Maxwell architecture and CUDA 7.5 RC
35 // by DeepLearningJohnDoe, version 0.1.6, 2015/07/19
37 // Gate counts: 25 24 25 18 25 24 24 23
39 // Depth: 8 7 7 6 8 10 10 8
42 // Note that same S-box function with a lower gate count isn't necessarily faster.
44 // These Boolean expressions corresponding to DES S-boxes were
45 // discovered by <deeplearningjohndoe at gmail.com>
47 // This file itself is Copyright (c) 2015 by <deeplearningjohndoe at gmail.com>
48 // Redistribution and use in source and binary forms, with or without
49 // modification, are permitted.
51 // The underlying mathematical formulas are NOT copyrighted.
54 #define LUT(a,b,c,d,e) u32 a; asm ("lop3.b32 %0, %1, %2, %3, "#e";" : "=r"(a): "r"(b), "r"(c), "r"(d));
56 static void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
58 LUT(xAA55AA5500550055, a1, a4, a6, 0xC1)
59 LUT(xA55AA55AF0F5F0F5, a3, a6, xAA55AA5500550055, 0x9E)
60 LUT(x5F5F5F5FA5A5A5A5, a1, a3, a6, 0xD6)
61 LUT(xF5A0F5A0A55AA55A, a4, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x56)
62 LUT(x947A947AD1E7D1E7, a2, xA55AA55AF0F5F0F5, xF5A0F5A0A55AA55A, 0x6C)
63 LUT(x5FFF5FFFFFFAFFFA, a6, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x7B)
64 LUT(xB96CB96C69936993, a2, xF5A0F5A0A55AA55A, x5FFF5FFFFFFAFFFA, 0xD6)
65 LUT(x3, a5, x947A947AD1E7D1E7, xB96CB96C69936993, 0x6A)
66 LUT(x55EE55EE55EE55EE, a1, a2, a4, 0x7A)
67 LUT(x084C084CB77BB77B, a2, a6, xF5A0F5A0A55AA55A, 0xC9)
68 LUT(x9C329C32E295E295, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x72)
69 LUT(xA51EA51E50E050E0, a3, a6, x55EE55EE55EE55EE, 0x29)
70 LUT(x4AD34AD3BE3CBE3C, a2, x947A947AD1E7D1E7, xA51EA51E50E050E0, 0x95)
71 LUT(x2, a5, x9C329C32E295E295, x4AD34AD3BE3CBE3C, 0xC6)
72 LUT(xD955D95595D195D1, a1, a2, x9C329C32E295E295, 0xD2)
73 LUT(x8058805811621162, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x90)
74 LUT(x7D0F7D0FC4B3C4B3, xA51EA51E50E050E0, xD955D95595D195D1, x8058805811621162, 0x76)
75 LUT(x0805080500010001, a3, xAA55AA5500550055, xD955D95595D195D1, 0x80)
76 LUT(x4A964A96962D962D, xB96CB96C69936993, x4AD34AD3BE3CBE3C, x0805080500010001, 0xA6)
77 LUT(x4, a5, x7D0F7D0FC4B3C4B3, x4A964A96962D962D, 0xA6)
78 LUT(x148014807B087B08, a1, xAA55AA5500550055, x947A947AD1E7D1E7, 0x21)
79 LUT(x94D894D86B686B68, xA55AA55AF0F5F0F5, x8058805811621162, x148014807B087B08, 0x6A)
80 LUT(x5555555540044004, a1, a6, x084C084CB77BB77B, 0x70)
81 LUT(xAFB4AFB4BF5BBF5B, x5F5F5F5FA5A5A5A5, xA51EA51E50E050E0, x5555555540044004, 0x97)
82 LUT(x1, a5, x94D894D86B686B68, xAFB4AFB4BF5BBF5B, 0x6C)
90 static void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
92 LUT(xEEEEEEEE99999999, a1, a2, a6, 0x97)
93 LUT(xFFFFEEEE66666666, a5, a6, xEEEEEEEE99999999, 0x67)
94 LUT(x5555FFFFFFFF0000, a1, a5, a6, 0x76)
95 LUT(x6666DDDD5555AAAA, a2, xFFFFEEEE66666666, x5555FFFFFFFF0000, 0x69)
96 LUT(x6969D3D35353ACAC, a3, xFFFFEEEE66666666, x6666DDDD5555AAAA, 0x6A)
97 LUT(xCFCF3030CFCF3030, a2, a3, a5, 0x65)
98 LUT(xE4E4EEEE9999F0F0, a3, xEEEEEEEE99999999, x5555FFFFFFFF0000, 0x8D)
99 LUT(xE5E5BABACDCDB0B0, a1, xCFCF3030CFCF3030, xE4E4EEEE9999F0F0, 0xCA)
100 LUT(x3, a4, x6969D3D35353ACAC, xE5E5BABACDCDB0B0, 0xC6)
101 LUT(x3333CCCC00000000, a2, a5, a6, 0x14)
102 LUT(xCCCCDDDDFFFF0F0F, a5, xE4E4EEEE9999F0F0, x3333CCCC00000000, 0xB5)
103 LUT(x00000101F0F0F0F0, a3, a6, xFFFFEEEE66666666, 0x1C)
104 LUT(x9A9A64646A6A9595, a1, xCFCF3030CFCF3030, x00000101F0F0F0F0, 0x96)
105 LUT(x2, a4, xCCCCDDDDFFFF0F0F, x9A9A64646A6A9595, 0x6A)
106 LUT(x3333BBBB3333FFFF, a1, a2, x6666DDDD5555AAAA, 0xDE)
107 LUT(x1414141441410000, a1, a3, xE4E4EEEE9999F0F0, 0x90)
108 LUT(x7F7FF3F3F5F53939, x6969D3D35353ACAC, x9A9A64646A6A9595, x3333BBBB3333FFFF, 0x79)
109 LUT(x9494E3E34B4B3939, a5, x1414141441410000, x7F7FF3F3F5F53939, 0x29)
110 LUT(x1, a4, x3333BBBB3333FFFF, x9494E3E34B4B3939, 0xA6)
111 LUT(xB1B1BBBBCCCCA5A5, a1, a1, xE4E4EEEE9999F0F0, 0x4A)
112 LUT(xFFFFECECEEEEDDDD, a2, x3333CCCC00000000, x9A9A64646A6A9595, 0xEF)
113 LUT(xB1B1A9A9DCDC8787, xE5E5BABACDCDB0B0, xB1B1BBBBCCCCA5A5, xFFFFECECEEEEDDDD, 0x8D)
114 LUT(xFFFFCCCCEEEE4444, a2, a5, xFFFFEEEE66666666, 0x2B)
115 LUT(x4, a4, xB1B1A9A9DCDC8787, xFFFFCCCCEEEE4444, 0x6C)
123 static void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
125 LUT(xA50FA50FA50FA50F, a1, a3, a4, 0xC9)
126 LUT(xF0F00F0FF0F0F0F0, a3, a5, a6, 0x4B)
127 LUT(xAF0FA0AAAF0FAF0F, a1, xA50FA50FA50FA50F, xF0F00F0FF0F0F0F0, 0x4D)
128 LUT(x5AA5A55A5AA55AA5, a1, a4, xF0F00F0FF0F0F0F0, 0x69)
129 LUT(xAA005FFFAA005FFF, a3, a5, xA50FA50FA50FA50F, 0xD6)
130 LUT(x5AA5A55A0F5AFAA5, a6, x5AA5A55A5AA55AA5, xAA005FFFAA005FFF, 0x9C)
131 LUT(x1, a2, xAF0FA0AAAF0FAF0F, x5AA5A55A0F5AFAA5, 0xA6)
132 LUT(xAA55AA5500AA00AA, a1, a4, a6, 0x49)
133 LUT(xFAFAA50FFAFAA50F, a1, a5, xA50FA50FA50FA50F, 0x9B)
134 LUT(x50AF0F5AFA50A5A5, a1, xAA55AA5500AA00AA, xFAFAA50FFAFAA50F, 0x66)
135 LUT(xAFAFAFAFFAFAFAFA, a1, a3, a6, 0x6F)
136 LUT(xAFAFFFFFFFFAFAFF, a4, x50AF0F5AFA50A5A5, xAFAFAFAFFAFAFAFA, 0xEB)
137 LUT(x4, a2, x50AF0F5AFA50A5A5, xAFAFFFFFFFFAFAFF, 0x6C)
138 LUT(x500F500F500F500F, a1, a3, a4, 0x98)
139 LUT(xF0505A0505A5050F, x5AA5A55A0F5AFAA5, xAA55AA5500AA00AA, xAFAFAFAFFAFAFAFA, 0x1D)
140 LUT(xF0505A05AA55AAFF, a6, x500F500F500F500F, xF0505A0505A5050F, 0x9A)
141 LUT(xFF005F55FF005F55, a1, a4, xAA005FFFAA005FFF, 0xB2)
142 LUT(xA55F5AF0A55F5AF0, a5, xA50FA50FA50FA50F, x5AA5A55A5AA55AA5, 0x3D)
143 LUT(x5A5F05A5A55F5AF0, a6, xFF005F55FF005F55, xA55F5AF0A55F5AF0, 0xA6)
144 LUT(x3, a2, xF0505A05AA55AAFF, x5A5F05A5A55F5AF0, 0xA6)
145 LUT(x0F0F0F0FA5A5A5A5, a1, a3, a6, 0xC6)
146 LUT(x5FFFFF5FFFA0FFA0, x5AA5A55A5AA55AA5, xAFAFAFAFFAFAFAFA, x0F0F0F0FA5A5A5A5, 0xDB)
147 LUT(xF5555AF500A05FFF, a5, xFAFAA50FFAFAA50F, xF0505A0505A5050F, 0xB9)
148 LUT(x05A5AAF55AFA55A5, xF0505A05AA55AAFF, x0F0F0F0FA5A5A5A5, xF5555AF500A05FFF, 0x9B)
149 LUT(x2, a2, x5FFFFF5FFFA0FFA0, x05A5AAF55AFA55A5, 0xA6)
157 static void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
159 LUT(x55F055F055F055F0, a1, a3, a4, 0x72)
160 LUT(xA500F5F0A500F5F0, a3, a5, x55F055F055F055F0, 0xAD)
161 LUT(xF50AF50AF50AF50A, a1, a3, a4, 0x59)
162 LUT(xF5FA0FFFF5FA0FFF, a3, a5, xF50AF50AF50AF50A, 0xE7)
163 LUT(x61C8F93C61C8F93C, a2, xA500F5F0A500F5F0, xF5FA0FFFF5FA0FFF, 0xC6)
164 LUT(x9999666699996666, a1, a2, a5, 0x69)
165 LUT(x22C022C022C022C0, a2, a4, x55F055F055F055F0, 0x18)
166 LUT(xB35C94A6B35C94A6, xF5FA0FFFF5FA0FFF, x9999666699996666, x22C022C022C022C0, 0x63)
167 LUT(x4, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x6A)
168 LUT(x4848484848484848, a1, a2, a3, 0x12)
169 LUT(x55500AAA55500AAA, a1, a5, xF5FA0FFFF5FA0FFF, 0x28)
170 LUT(x3C90B3D63C90B3D6, x61C8F93C61C8F93C, x4848484848484848, x55500AAA55500AAA, 0x1E)
171 LUT(x8484333384843333, a1, x9999666699996666, x4848484848484848, 0x14)
172 LUT(x4452F1AC4452F1AC, xF50AF50AF50AF50A, xF5FA0FFFF5FA0FFF, xB35C94A6B35C94A6, 0x78)
173 LUT(x9586CA379586CA37, x55500AAA55500AAA, x8484333384843333, x4452F1AC4452F1AC, 0xD6)
174 LUT(x2, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0x6A)
175 LUT(x1, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0xA9)
176 LUT(x3, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x56)
184 static void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
186 LUT(xA0A0A0A0FFFFFFFF, a1, a3, a6, 0xAB)
187 LUT(xFFFF00005555FFFF, a1, a5, a6, 0xB9)
188 LUT(xB3B320207777FFFF, a2, xA0A0A0A0FFFFFFFF, xFFFF00005555FFFF, 0xE8)
189 LUT(x50505A5A5A5A5050, a1, a3, xFFFF00005555FFFF, 0x34)
190 LUT(xA2A2FFFF2222FFFF, a1, a5, xB3B320207777FFFF, 0xCE)
191 LUT(x2E2E6969A4A46363, a2, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, 0x29)
192 LUT(x3, a4, xB3B320207777FFFF, x2E2E6969A4A46363, 0xA6)
193 LUT(xA5A50A0AA5A50A0A, a1, a3, a5, 0x49)
194 LUT(x969639396969C6C6, a2, a6, xA5A50A0AA5A50A0A, 0x96)
195 LUT(x1B1B1B1B1B1B1B1B, a1, a2, a3, 0xCA)
196 LUT(xBFBFBFBFF6F6F9F9, a3, xA0A0A0A0FFFFFFFF, x969639396969C6C6, 0x7E)
197 LUT(x5B5BA4A4B8B81D1D, xFFFF00005555FFFF, x1B1B1B1B1B1B1B1B, xBFBFBFBFF6F6F9F9, 0x96)
198 LUT(x2, a4, x969639396969C6C6, x5B5BA4A4B8B81D1D, 0xCA)
199 LUT(x5555BBBBFFFF5555, a1, a2, xFFFF00005555FFFF, 0xE5)
200 LUT(x6D6D9C9C95956969, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, x969639396969C6C6, 0x97)
201 LUT(x1A1A67676A6AB4B4, xA5A50A0AA5A50A0A, x5555BBBBFFFF5555, x6D6D9C9C95956969, 0x47)
202 LUT(xA0A0FFFFAAAA0000, a3, xFFFF00005555FFFF, xA5A50A0AA5A50A0A, 0x3B)
203 LUT(x36369C9CC1C1D6D6, x969639396969C6C6, x6D6D9C9C95956969, xA0A0FFFFAAAA0000, 0xD9)
204 LUT(x1, a4, x1A1A67676A6AB4B4, x36369C9CC1C1D6D6, 0xCA)
205 LUT(x5555F0F0F5F55555, a1, a3, xFFFF00005555FFFF, 0xB1)
206 LUT(x79790202DCDC0808, xA2A2FFFF2222FFFF, xA5A50A0AA5A50A0A, x969639396969C6C6, 0x47)
207 LUT(x6C6CF2F229295D5D, xBFBFBFBFF6F6F9F9, x5555F0F0F5F55555, x79790202DCDC0808, 0x6E)
208 LUT(xA3A3505010101A1A, a2, xA2A2FFFF2222FFFF, x36369C9CC1C1D6D6, 0x94)
209 LUT(x7676C7C74F4FC7C7, a1, x2E2E6969A4A46363, xA3A3505010101A1A, 0xD9)
210 LUT(x4, a4, x6C6CF2F229295D5D, x7676C7C74F4FC7C7, 0xC6)
218 static void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
220 LUT(x5050F5F55050F5F5, a1, a3, a5, 0xB2)
221 LUT(x6363C6C66363C6C6, a1, a2, x5050F5F55050F5F5, 0x66)
222 LUT(xAAAA5555AAAA5555, a1, a1, a5, 0xA9)
223 LUT(x3A3A65653A3A6565, a3, x6363C6C66363C6C6, xAAAA5555AAAA5555, 0xA9)
224 LUT(x5963A3C65963A3C6, a4, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xC6)
225 LUT(xE7E76565E7E76565, a5, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xAD)
226 LUT(x455D45DF455D45DF, a1, a4, xE7E76565E7E76565, 0xE4)
227 LUT(x4, a6, x5963A3C65963A3C6, x455D45DF455D45DF, 0x6C)
228 LUT(x1101220211012202, a2, xAAAA5555AAAA5555, x5963A3C65963A3C6, 0x20)
229 LUT(xF00F0FF0F00F0FF0, a3, a4, a5, 0x69)
230 LUT(x16E94A9716E94A97, xE7E76565E7E76565, x1101220211012202, xF00F0FF0F00F0FF0, 0x9E)
231 LUT(x2992922929929229, a1, a2, xF00F0FF0F00F0FF0, 0x49)
232 LUT(xAFAF9823AFAF9823, a5, x5050F5F55050F5F5, x2992922929929229, 0x93)
233 LUT(x3, a6, x16E94A9716E94A97, xAFAF9823AFAF9823, 0x6C)
234 LUT(x4801810248018102, a4, x5963A3C65963A3C6, x1101220211012202, 0xA4)
235 LUT(x5EE8FFFD5EE8FFFD, a5, x16E94A9716E94A97, x4801810248018102, 0x76)
236 LUT(xF0FF00FFF0FF00FF, a3, a4, a5, 0xCD)
237 LUT(x942D9A67942D9A67, x3A3A65653A3A6565, x5EE8FFFD5EE8FFFD, xF0FF00FFF0FF00FF, 0x86)
238 LUT(x1, a6, x5EE8FFFD5EE8FFFD, x942D9A67942D9A67, 0xA6)
239 LUT(x6A40D4ED6F4DD4EE, a2, x4, xAFAF9823AFAF9823, 0x2D)
240 LUT(x6CA89C7869A49C79, x1101220211012202, x16E94A9716E94A97, x6A40D4ED6F4DD4EE, 0x26)
241 LUT(xD6DE73F9D6DE73F9, a3, x6363C6C66363C6C6, x455D45DF455D45DF, 0x6B)
242 LUT(x925E63E1965A63E1, x3A3A65653A3A6565, x6CA89C7869A49C79, xD6DE73F9D6DE73F9, 0xA2)
243 LUT(x2, a6, x6CA89C7869A49C79, x925E63E1965A63E1, 0xCA)
251 static void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
253 LUT(x88AA88AA88AA88AA, a1, a2, a4, 0x0B)
254 LUT(xAAAAFF00AAAAFF00, a1, a4, a5, 0x27)
255 LUT(xADAFF8A5ADAFF8A5, a3, x88AA88AA88AA88AA, xAAAAFF00AAAAFF00, 0x9E)
256 LUT(x0A0AF5F50A0AF5F5, a1, a3, a5, 0xA6)
257 LUT(x6B69C5DC6B69C5DC, a2, xADAFF8A5ADAFF8A5, x0A0AF5F50A0AF5F5, 0x6B)
258 LUT(x1C69B2DC1C69B2DC, a4, x88AA88AA88AA88AA, x6B69C5DC6B69C5DC, 0xA9)
259 LUT(x1, a6, xADAFF8A5ADAFF8A5, x1C69B2DC1C69B2DC, 0x6A)
260 LUT(x9C9C9C9C9C9C9C9C, a1, a2, a3, 0x63)
261 LUT(xE6E63BFDE6E63BFD, a2, xAAAAFF00AAAAFF00, x0A0AF5F50A0AF5F5, 0xE7)
262 LUT(x6385639E6385639E, a4, x9C9C9C9C9C9C9C9C, xE6E63BFDE6E63BFD, 0x93)
263 LUT(x5959C4CE5959C4CE, a2, x6B69C5DC6B69C5DC, xE6E63BFDE6E63BFD, 0x5D)
264 LUT(x5B53F53B5B53F53B, a4, x0A0AF5F50A0AF5F5, x5959C4CE5959C4CE, 0x6E)
265 LUT(x3, a6, x6385639E6385639E, x5B53F53B5B53F53B, 0xC6)
266 LUT(xFAF505FAFAF505FA, a3, a4, x0A0AF5F50A0AF5F5, 0x6D)
267 LUT(x6A65956A6A65956A, a3, x9C9C9C9C9C9C9C9C, xFAF505FAFAF505FA, 0xA6)
268 LUT(x8888CCCC8888CCCC, a1, a2, a5, 0x23)
269 LUT(x94E97A9494E97A94, x1C69B2DC1C69B2DC, x6A65956A6A65956A, x8888CCCC8888CCCC, 0x72)
270 LUT(x4, a6, x6A65956A6A65956A, x94E97A9494E97A94, 0xAC)
271 LUT(xA050A050A050A050, a1, a3, a4, 0x21)
272 LUT(xC1B87A2BC1B87A2B, xAAAAFF00AAAAFF00, x5B53F53B5B53F53B, x94E97A9494E97A94, 0xA4)
273 LUT(xE96016B7E96016B7, x8888CCCC8888CCCC, xA050A050A050A050, xC1B87A2BC1B87A2B, 0x96)
274 LUT(xE3CF1FD5E3CF1FD5, x88AA88AA88AA88AA, x6A65956A6A65956A, xE96016B7E96016B7, 0x3E)
275 LUT(x6776675B6776675B, xADAFF8A5ADAFF8A5, x94E97A9494E97A94, xE3CF1FD5E3CF1FD5, 0x6B)
276 LUT(x2, a6, xE96016B7E96016B7, x6776675B6776675B, 0xC6)
284 static void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
286 LUT(xEEEE3333EEEE3333, a1, a2, a5, 0x9D)
287 LUT(xBBBBBBBBBBBBBBBB, a1, a1, a2, 0x83)
288 LUT(xDDDDAAAADDDDAAAA, a1, a2, a5, 0x5B)
289 LUT(x29295A5A29295A5A, a3, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0x85)
290 LUT(xC729695AC729695A, a4, xEEEE3333EEEE3333, x29295A5A29295A5A, 0xA6)
291 LUT(x3BF77B7B3BF77B7B, a2, a5, xC729695AC729695A, 0xF9)
292 LUT(x2900FF002900FF00, a4, a5, x29295A5A29295A5A, 0x0E)
293 LUT(x56B3803F56B3803F, xBBBBBBBBBBBBBBBB, x3BF77B7B3BF77B7B, x2900FF002900FF00, 0x61)
294 LUT(x4, a6, xC729695AC729695A, x56B3803F56B3803F, 0x6C)
295 LUT(xFBFBFBFBFBFBFBFB, a1, a2, a3, 0xDF)
296 LUT(x3012B7B73012B7B7, a2, a5, xC729695AC729695A, 0xD4)
297 LUT(x34E9B34C34E9B34C, a4, xFBFBFBFBFBFBFBFB, x3012B7B73012B7B7, 0x69)
298 LUT(xBFEAEBBEBFEAEBBE, a1, x29295A5A29295A5A, x34E9B34C34E9B34C, 0x6F)
299 LUT(xFFAEAFFEFFAEAFFE, a3, xBBBBBBBBBBBBBBBB, xBFEAEBBEBFEAEBBE, 0xB9)
300 LUT(x2, a6, x34E9B34C34E9B34C, xFFAEAFFEFFAEAFFE, 0xC6)
301 LUT(xCFDE88BBCFDE88BB, a2, xDDDDAAAADDDDAAAA, x34E9B34C34E9B34C, 0x5C)
302 LUT(x3055574530555745, a1, xC729695AC729695A, xCFDE88BBCFDE88BB, 0x71)
303 LUT(x99DDEEEE99DDEEEE, a4, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0xB9)
304 LUT(x693CD926693CD926, x3BF77B7B3BF77B7B, x34E9B34C34E9B34C, x99DDEEEE99DDEEEE, 0x69)
305 LUT(x3, a6, x3055574530555745, x693CD926693CD926, 0x6A)
306 LUT(x9955EE559955EE55, a1, a4, x99DDEEEE99DDEEEE, 0xE2)
307 LUT(x9D48FA949D48FA94, x3BF77B7B3BF77B7B, xBFEAEBBEBFEAEBBE, x9955EE559955EE55, 0x9C)
308 LUT(x1, a6, xC729695AC729695A, x9D48FA949D48FA94, 0x39)
319 * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC
320 * architectures. These use AND, OR, XOR, NOT, and AND-NOT gates.
322 * Gate counts: 49 44 46 33 48 46 46 41
325 * Several same-gate-count expressions for each S-box are included (for use on
326 * different CPUs/GPUs).
328 * These Boolean expressions corresponding to DES S-boxes have been generated
329 * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's
330 * John the Ripper password cracker: http://www.openwall.com/john/
331 * Being mathematical formulas, they are not copyrighted and are free for reuse
334 * This file (a specific representation of the S-box expressions, surrounding
335 * logic) is Copyright (c) 2011 by Solar Designer <solar at openwall.com>.
336 * Redistribution and use in source and binary forms, with or without
337 * modification, are permitted. (This is a heavily cut-down "BSD license".)
339 * The effort has been sponsored by Rapid7: http://www.rapid7.com
342 static void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
344 u32 x55005500, x5A0F5A0F, x3333FFFF, x66666666, x22226666, x2D2D6969,
346 u32 x00FFFF00, x33CCCC33, x4803120C, x2222FFFF, x6A21EDF3, x4A01CC93;
347 u32 x5555FFFF, x7F75FFFF, x00D20096, x7FA7FF69;
348 u32 x0A0A0000, x0AD80096, x00999900, x0AD99996;
349 u32 x22332233, x257AA5F0, x054885C0, xFAB77A3F, x2221EDF3, xD89697CC;
350 u32 x05B77AC0, x05F77AD6, x36C48529, x6391D07C, xBB0747B0;
351 u32 x4C460000, x4EDF9996, x2D4E49EA, xBBFFFFB0, x96B1B65A;
352 u32 x5AFF5AFF, x52B11215, x4201C010, x10B0D205;
353 u32 x00, x01, x10, x11, x20, x21, x30, x31;
355 x55005500 = a1 & ~a5;
356 x5A0F5A0F = a4 ^ x55005500;
359 x22226666 = x3333FFFF & x66666666;
360 x2D2D6969 = a4 ^ x22226666;
361 x25202160 = x2D2D6969 & ~x5A0F5A0F;
364 x33CCCC33 = a3 ^ x00FFFF00;
365 x4803120C = x5A0F5A0F & ~x33CCCC33;
366 x2222FFFF = a6 | x22226666;
367 x6A21EDF3 = x4803120C ^ x2222FFFF;
368 x4A01CC93 = x6A21EDF3 & ~x25202160;
371 x7F75FFFF = x6A21EDF3 | x5555FFFF;
372 x00D20096 = a5 & ~x2D2D6969;
373 x7FA7FF69 = x7F75FFFF ^ x00D20096;
375 x0A0A0000 = a4 & ~x5555FFFF;
376 x0AD80096 = x00D20096 ^ x0A0A0000;
377 x00999900 = x00FFFF00 & ~x66666666;
378 x0AD99996 = x0AD80096 | x00999900;
380 x22332233 = a3 & ~x55005500;
381 x257AA5F0 = x5A0F5A0F ^ x7F75FFFF;
382 x054885C0 = x257AA5F0 & ~x22332233;
383 xFAB77A3F = ~x054885C0;
384 x2221EDF3 = x3333FFFF & x6A21EDF3;
385 xD89697CC = xFAB77A3F ^ x2221EDF3;
386 x20 = x7FA7FF69 & ~a2;
387 x21 = x20 ^ xD89697CC;
390 x05B77AC0 = x00FFFF00 ^ x054885C0;
391 x05F77AD6 = x00D20096 | x05B77AC0;
392 x36C48529 = x3333FFFF ^ x05F77AD6;
393 x6391D07C = a1 ^ x36C48529;
394 xBB0747B0 = xD89697CC ^ x6391D07C;
395 x00 = x25202160 | a2;
396 x01 = x00 ^ xBB0747B0;
399 x4C460000 = x3333FFFF ^ x7F75FFFF;
400 x4EDF9996 = x0AD99996 | x4C460000;
401 x2D4E49EA = x6391D07C ^ x4EDF9996;
402 xBBFFFFB0 = x00FFFF00 | xBB0747B0;
403 x96B1B65A = x2D4E49EA ^ xBBFFFFB0;
404 x10 = x4A01CC93 | a2;
405 x11 = x10 ^ x96B1B65A;
408 x5AFF5AFF = a5 | x5A0F5A0F;
409 x52B11215 = x5AFF5AFF & ~x2D4E49EA;
410 x4201C010 = x4A01CC93 & x6391D07C;
411 x10B0D205 = x52B11215 ^ x4201C010;
412 x30 = x10B0D205 | a2;
413 x31 = x30 ^ x0AD99996;
417 static void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
420 u32 x55550000, x00AA00FF, x33BB33FF;
421 u32 x33CC0000, x11441144, x11BB11BB, x003311BB;
422 u32 x00000F0F, x336600FF, x332200FF, x332200F0;
423 u32 x0302000F, xAAAAAAAA, xA9A8AAA5, x33CCCC33, x33CCC030, x9A646A95;
424 u32 x00333303, x118822B8, xA8208805, x3CC3C33C, x94E34B39;
425 u32 x0331330C, x3FF3F33C, xA9DF596A, xA9DF5F6F, x962CAC53;
426 u32 xA9466A6A, x3DA52153, x29850143, x33C0330C, x1A45324F;
427 u32 x0A451047, xBBDFDD7B, xB19ACD3C;
428 u32 x00, x01, x10, x11, x20, x21, x30, x31;
432 x55550000 = a1 & ~a6;
433 x00AA00FF = a5 & ~x55550000;
434 x33BB33FF = a2 | x00AA00FF;
436 x33CC0000 = x33CC33CC & ~a6;
437 x11441144 = a1 & x33CC33CC;
438 x11BB11BB = a5 ^ x11441144;
439 x003311BB = x11BB11BB & ~x33CC0000;
442 x336600FF = x00AA00FF ^ x33CC0000;
443 x332200FF = x33BB33FF & x336600FF;
444 x332200F0 = x332200FF & ~x00000F0F;
446 x0302000F = a3 & x332200FF;
448 xA9A8AAA5 = x0302000F ^ xAAAAAAAA;
449 x33CCCC33 = a6 ^ x33CC33CC;
450 x33CCC030 = x33CCCC33 & ~x00000F0F;
451 x9A646A95 = xA9A8AAA5 ^ x33CCC030;
452 x10 = a4 & ~x332200F0;
453 x11 = x10 ^ x9A646A95;
456 x00333303 = a2 & ~x33CCC030;
457 x118822B8 = x11BB11BB ^ x00333303;
458 xA8208805 = xA9A8AAA5 & ~x118822B8;
459 x3CC3C33C = a3 ^ x33CCCC33;
460 x94E34B39 = xA8208805 ^ x3CC3C33C;
461 x00 = x33BB33FF & ~a4;
462 x01 = x00 ^ x94E34B39;
465 x0331330C = x0302000F ^ x00333303;
466 x3FF3F33C = x3CC3C33C | x0331330C;
467 xA9DF596A = x33BB33FF ^ x9A646A95;
468 xA9DF5F6F = x00000F0F | xA9DF596A;
469 x962CAC53 = x3FF3F33C ^ xA9DF5F6F;
471 xA9466A6A = x332200FF ^ x9A646A95;
472 x3DA52153 = x94E34B39 ^ xA9466A6A;
473 x29850143 = xA9DF5F6F & x3DA52153;
474 x33C0330C = x33CC33CC & x3FF3F33C;
475 x1A45324F = x29850143 ^ x33C0330C;
476 x20 = x1A45324F | a4;
477 x21 = x20 ^ x962CAC53;
480 x0A451047 = x1A45324F & ~x118822B8;
481 xBBDFDD7B = x33CCCC33 | xA9DF596A;
482 xB19ACD3C = x0A451047 ^ xBBDFDD7B;
483 x30 = x003311BB | a4;
484 x31 = x30 ^ xB19ACD3C;
488 static void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
490 u32 x44444444, x0F0FF0F0, x4F4FF4F4, x00FFFF00, x00AAAA00, x4FE55EF4;
491 u32 x3C3CC3C3, x3C3C0000, x7373F4F4, x0C840A00;
492 u32 x00005EF4, x00FF5EFF, x00555455, x3C699796;
493 u32 x000FF000, x55AA55AA, x26D9A15E, x2FDFAF5F, x2FD00F5F;
494 u32 x55AAFFAA, x28410014, x000000FF, x000000CC, x284100D8;
495 u32 x204100D0, x3C3CC3FF, x1C3CC32F, x4969967A;
496 u32 x4CC44CC4, x40C040C0, xC3C33C3C, x9669C396, xD6A98356;
497 u32 xD6E9C3D6, x4CEEEEC4, x9A072D12, x001A000B, x9A1F2D1B;
498 u32 x00, x01, x10, x11, x20, x21, x30, x31;
500 x44444444 = a1 & ~a2;
502 x4F4FF4F4 = x44444444 | x0F0FF0F0;
504 x00AAAA00 = x00FFFF00 & ~a1;
505 x4FE55EF4 = x4F4FF4F4 ^ x00AAAA00;
507 x3C3CC3C3 = a2 ^ x0F0FF0F0;
508 x3C3C0000 = x3C3CC3C3 & ~a6;
509 x7373F4F4 = x4F4FF4F4 ^ x3C3C0000;
510 x0C840A00 = x4FE55EF4 & ~x7373F4F4;
512 x00005EF4 = a6 & x4FE55EF4;
513 x00FF5EFF = a4 | x00005EF4;
514 x00555455 = a1 & x00FF5EFF;
515 x3C699796 = x3C3CC3C3 ^ x00555455;
516 x30 = x4FE55EF4 & ~a5;
517 x31 = x30 ^ x3C699796;
520 x000FF000 = x0F0FF0F0 & x00FFFF00;
522 x26D9A15E = x7373F4F4 ^ x55AA55AA;
523 x2FDFAF5F = a3 | x26D9A15E;
524 x2FD00F5F = x2FDFAF5F & ~x000FF000;
526 x55AAFFAA = x00AAAA00 | x55AA55AA;
527 x28410014 = x3C699796 & ~x55AAFFAA;
529 x000000CC = x000000FF & ~a2;
530 x284100D8 = x28410014 ^ x000000CC;
532 x204100D0 = x7373F4F4 & x284100D8;
533 x3C3CC3FF = x3C3CC3C3 | x000000FF;
534 x1C3CC32F = x3C3CC3FF & ~x204100D0;
535 x4969967A = a1 ^ x1C3CC32F;
536 x10 = x2FD00F5F & a5;
537 x11 = x10 ^ x4969967A;
540 x4CC44CC4 = x4FE55EF4 & ~a2;
541 x40C040C0 = x4CC44CC4 & ~a3;
542 xC3C33C3C = ~x3C3CC3C3;
543 x9669C396 = x55AAFFAA ^ xC3C33C3C;
544 xD6A98356 = x40C040C0 ^ x9669C396;
545 x00 = a5 & ~x0C840A00;
546 x01 = x00 ^ xD6A98356;
549 xD6E9C3D6 = x40C040C0 | x9669C396;
550 x4CEEEEC4 = x00AAAA00 | x4CC44CC4;
551 x9A072D12 = xD6E9C3D6 ^ x4CEEEEC4;
552 x001A000B = a4 & ~x4FE55EF4;
553 x9A1F2D1B = x9A072D12 | x001A000B;
554 x20 = a5 & ~x284100D8;
555 x21 = x20 ^ x9A1F2D1B;
559 static void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
561 u32 x5A5A5A5A, x0F0FF0F0;
562 u32 x33FF33FF, x33FFCC00, x0C0030F0, x0C0CC0C0, x0CF3C03F, x5EFBDA7F,
563 x52FBCA0F, x61C8F93C;
564 u32 x00C0C03C, x0F0F30C0, x3B92A366, x30908326, x3C90B3D6;
565 u32 x33CC33CC, x0C0CFFFF, x379E5C99, x04124C11, x56E9861E, xA91679E1;
566 u32 x9586CA37, x8402C833, x84C2C83F, xB35C94A6;
567 u32 x00, x01, x10, x11, x20, x21, x30, x31;
572 x33FFCC00 = a5 ^ x33FF33FF;
573 x0C0030F0 = x0F0FF0F0 & ~x33FFCC00;
574 x0C0CC0C0 = x0F0FF0F0 & ~a2;
575 x0CF3C03F = a4 ^ x0C0CC0C0;
576 x5EFBDA7F = x5A5A5A5A | x0CF3C03F;
577 x52FBCA0F = x5EFBDA7F & ~x0C0030F0;
578 x61C8F93C = a2 ^ x52FBCA0F;
580 x00C0C03C = x0CF3C03F & x61C8F93C;
581 x0F0F30C0 = x0F0FF0F0 & ~x00C0C03C;
582 x3B92A366 = x5A5A5A5A ^ x61C8F93C;
583 x30908326 = x3B92A366 & ~x0F0F30C0;
584 x3C90B3D6 = x0C0030F0 ^ x30908326;
587 x0C0CFFFF = a5 | x0C0CC0C0;
588 x379E5C99 = x3B92A366 ^ x0C0CFFFF;
589 x04124C11 = x379E5C99 & ~x33CC33CC;
590 x56E9861E = x52FBCA0F ^ x04124C11;
591 x00 = a6 & ~x3C90B3D6;
592 x01 = x00 ^ x56E9861E;
595 xA91679E1 = ~x56E9861E;
596 x10 = x3C90B3D6 & ~a6;
597 x11 = x10 ^ xA91679E1;
600 x9586CA37 = x3C90B3D6 ^ xA91679E1;
601 x8402C833 = x9586CA37 & ~x33CC33CC;
602 x84C2C83F = x00C0C03C | x8402C833;
603 xB35C94A6 = x379E5C99 ^ x84C2C83F;
604 x20 = x61C8F93C | a6;
605 x21 = x20 ^ xB35C94A6;
608 x30 = a6 & x61C8F93C;
609 x31 = x30 ^ xB35C94A6;
613 static void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
615 u32 x77777777, x77770000, x22225555, x11116666, x1F1F6F6F;
616 u32 x70700000, x43433333, x00430033, x55557777, x55167744, x5A19784B;
617 u32 x5A1987B4, x7A3BD7F5, x003B00F5, x221955A0, x05050707, x271C52A7;
618 u32 x2A2A82A0, x6969B193, x1FE06F90, x16804E00, xE97FB1FF;
619 u32 x43403302, x35CAED30, x37DEFFB7, x349ECCB5, x0B01234A;
620 u32 x101884B4, x0FF8EB24, x41413333, x4FF9FB37, x4FC2FBC2;
621 u32 x22222222, x16BCEE97, x0F080B04, x19B4E593;
622 u32 x5C5C5C5C, x4448184C, x2DDABE71, x6992A63D;
623 u32 x00, x01, x10, x11, x20, x21, x30, x31;
626 x77770000 = x77777777 & ~a6;
627 x22225555 = a1 ^ x77770000;
628 x11116666 = a3 ^ x22225555;
629 x1F1F6F6F = a4 | x11116666;
631 x70700000 = x77770000 & ~a4;
632 x43433333 = a3 ^ x70700000;
633 x00430033 = a5 & x43433333;
634 x55557777 = a1 | x11116666;
635 x55167744 = x00430033 ^ x55557777;
636 x5A19784B = a4 ^ x55167744;
638 x5A1987B4 = a6 ^ x5A19784B;
639 x7A3BD7F5 = x22225555 | x5A1987B4;
640 x003B00F5 = a5 & x7A3BD7F5;
641 x221955A0 = x22225555 ^ x003B00F5;
642 x05050707 = a4 & x55557777;
643 x271C52A7 = x221955A0 ^ x05050707;
645 x2A2A82A0 = x7A3BD7F5 & ~a1;
646 x6969B193 = x43433333 ^ x2A2A82A0;
647 x1FE06F90 = a5 ^ x1F1F6F6F;
648 x16804E00 = x1FE06F90 & ~x6969B193;
649 xE97FB1FF = ~x16804E00;
650 x20 = xE97FB1FF & ~a2;
651 x21 = x20 ^ x5A19784B;
654 x43403302 = x43433333 & ~x003B00F5;
655 x35CAED30 = x2A2A82A0 ^ x1FE06F90;
656 x37DEFFB7 = x271C52A7 | x35CAED30;
657 x349ECCB5 = x37DEFFB7 & ~x43403302;
658 x0B01234A = x1F1F6F6F & ~x349ECCB5;
660 x101884B4 = x5A1987B4 & x349ECCB5;
661 x0FF8EB24 = x1FE06F90 ^ x101884B4;
662 x41413333 = x43433333 & x55557777;
663 x4FF9FB37 = x0FF8EB24 | x41413333;
664 x4FC2FBC2 = x003B00F5 ^ x4FF9FB37;
665 x30 = x4FC2FBC2 & a2;
666 x31 = x30 ^ x271C52A7;
669 x22222222 = a1 ^ x77777777;
670 x16BCEE97 = x349ECCB5 ^ x22222222;
671 x0F080B04 = a4 & x0FF8EB24;
672 x19B4E593 = x16BCEE97 ^ x0F080B04;
673 x00 = x0B01234A | a2;
674 x01 = x00 ^ x19B4E593;
677 x5C5C5C5C = x1F1F6F6F ^ x43433333;
678 x4448184C = x5C5C5C5C & ~x19B4E593;
679 x2DDABE71 = x22225555 ^ x0FF8EB24;
680 x6992A63D = x4448184C ^ x2DDABE71;
681 x10 = x1F1F6F6F & a2;
682 x11 = x10 ^ x6992A63D;
686 static void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
689 u32 x3333FFFF, x11115555, x22DD6699, x22DD9966, x00220099;
690 u32 x00551144, x33662277, x5A5A5A5A, x7B7E7A7F, x59A31CE6;
691 u32 x09030C06, x09030000, x336622FF, x3A6522FF;
692 u32 x484D494C, x0000B6B3, x0F0FB9BC, x00FC00F9, x0FFFB9FD;
693 u32 x5DF75DF7, x116600F7, x1E69B94B, x1668B94B;
694 u32 x7B7B7B7B, x411E5984, x1FFFFDFD, x5EE1A479;
695 u32 x3CB4DFD2, x004B002D, xB7B2B6B3, xCCC9CDC8, xCC82CDE5;
696 u32 x0055EEBB, x5A5AECE9, x0050ECA9, xC5CAC1CE, xC59A2D67;
697 u32 x00, x01, x10, x11, x20, x21, x30, x31;
702 x11115555 = a1 & x3333FFFF;
703 x22DD6699 = x33CC33CC ^ x11115555;
704 x22DD9966 = a6 ^ x22DD6699;
705 x00220099 = a5 & ~x22DD9966;
707 x00551144 = a1 & x22DD9966;
708 x33662277 = a2 ^ x00551144;
710 x7B7E7A7F = x33662277 | x5A5A5A5A;
711 x59A31CE6 = x22DD6699 ^ x7B7E7A7F;
713 x09030C06 = a3 & x59A31CE6;
714 x09030000 = x09030C06 & ~a6;
715 x336622FF = x00220099 | x33662277;
716 x3A6522FF = x09030000 ^ x336622FF;
717 x30 = x3A6522FF & a4;
718 x31 = x30 ^ x59A31CE6;
721 x484D494C = a2 ^ x7B7E7A7F;
722 x0000B6B3 = a6 & ~x484D494C;
723 x0F0FB9BC = a3 ^ x0000B6B3;
724 x00FC00F9 = a5 & ~x09030C06;
725 x0FFFB9FD = x0F0FB9BC | x00FC00F9;
727 x5DF75DF7 = a1 | x59A31CE6;
728 x116600F7 = x336622FF & x5DF75DF7;
729 x1E69B94B = x0F0FB9BC ^ x116600F7;
730 x1668B94B = x1E69B94B & ~x09030000;
731 x20 = x00220099 | a4;
732 x21 = x20 ^ x1668B94B;
735 x7B7B7B7B = a2 | x5A5A5A5A;
736 x411E5984 = x3A6522FF ^ x7B7B7B7B;
737 x1FFFFDFD = x11115555 | x0FFFB9FD;
738 x5EE1A479 = x411E5984 ^ x1FFFFDFD;
740 x3CB4DFD2 = x22DD6699 ^ x1E69B94B;
741 x004B002D = a5 & ~x3CB4DFD2;
742 xB7B2B6B3 = ~x484D494C;
743 xCCC9CDC8 = x7B7B7B7B ^ xB7B2B6B3;
744 xCC82CDE5 = x004B002D ^ xCCC9CDC8;
745 x10 = xCC82CDE5 & ~a4;
746 x11 = x10 ^ x5EE1A479;
749 x0055EEBB = a6 ^ x00551144;
750 x5A5AECE9 = a1 ^ x0F0FB9BC;
751 x0050ECA9 = x0055EEBB & x5A5AECE9;
752 xC5CAC1CE = x09030C06 ^ xCCC9CDC8;
753 xC59A2D67 = x0050ECA9 ^ xC5CAC1CE;
754 x00 = x0FFFB9FD & ~a4;
755 x01 = x00 ^ xC59A2D67;
759 static void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
761 u32 x0FF00FF0, x3CC33CC3, x00003CC3, x0F000F00, x5A555A55, x00001841;
762 u32 x00000F00, x33333C33, x7B777E77, x0FF0F00F, x74878E78;
763 u32 x003C003C, x5A7D5A7D, x333300F0, x694E5A8D;
764 u32 x0FF0CCCC, x000F0303, x5A505854, x33CC000F, x699C585B;
765 u32 x7F878F78, x21101013, x7F979F7B, x30030CC0, x4F9493BB;
766 u32 x6F9CDBFB, x0000DBFB, x00005151, x26DAC936, x26DA9867;
767 u32 x27DA9877, x27DA438C, x2625C9C9, x27FFCBCD;
768 u32 x27FF1036, x27FF103E, xB06B6C44, x97947C7A;
769 u32 x00, x01, x10, x11, x20, x21, x30, x31;
772 x3CC33CC3 = a3 ^ x0FF00FF0;
773 x00003CC3 = a6 & x3CC33CC3;
774 x0F000F00 = a4 & x0FF00FF0;
775 x5A555A55 = a2 ^ x0F000F00;
776 x00001841 = x00003CC3 & x5A555A55;
778 x00000F00 = a6 & x0F000F00;
779 x33333C33 = a3 ^ x00000F00;
780 x7B777E77 = x5A555A55 | x33333C33;
781 x0FF0F00F = a6 ^ x0FF00FF0;
782 x74878E78 = x7B777E77 ^ x0FF0F00F;
783 x30 = a1 & ~x00001841;
784 x31 = x30 ^ x74878E78;
787 x003C003C = a5 & ~x3CC33CC3;
788 x5A7D5A7D = x5A555A55 | x003C003C;
789 x333300F0 = x00003CC3 ^ x33333C33;
790 x694E5A8D = x5A7D5A7D ^ x333300F0;
792 x0FF0CCCC = x00003CC3 ^ x0FF0F00F;
793 x000F0303 = a4 & ~x0FF0CCCC;
794 x5A505854 = x5A555A55 & ~x000F0303;
795 x33CC000F = a5 ^ x333300F0;
796 x699C585B = x5A505854 ^ x33CC000F;
798 x7F878F78 = x0F000F00 | x74878E78;
799 x21101013 = a3 & x699C585B;
800 x7F979F7B = x7F878F78 | x21101013;
801 x30030CC0 = x3CC33CC3 & ~x0FF0F00F;
802 x4F9493BB = x7F979F7B ^ x30030CC0;
803 x00 = x4F9493BB & ~a1;
804 x01 = x00 ^ x694E5A8D;
807 x6F9CDBFB = x699C585B | x4F9493BB;
808 x0000DBFB = a6 & x6F9CDBFB;
809 x00005151 = a2 & x0000DBFB;
810 x26DAC936 = x694E5A8D ^ x4F9493BB;
811 x26DA9867 = x00005151 ^ x26DAC936;
813 x27DA9877 = x21101013 | x26DA9867;
814 x27DA438C = x0000DBFB ^ x27DA9877;
815 x2625C9C9 = a5 ^ x26DAC936;
816 x27FFCBCD = x27DA438C | x2625C9C9;
817 x20 = x27FFCBCD & a1;
818 x21 = x20 ^ x699C585B;
821 x27FF1036 = x0000DBFB ^ x27FFCBCD;
822 x27FF103E = x003C003C | x27FF1036;
823 xB06B6C44 = ~x4F9493BB;
824 x97947C7A = x27FF103E ^ xB06B6C44;
825 x10 = x97947C7A & ~a1;
826 x11 = x10 ^ x26DA9867;
830 static void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
832 u32 x0C0C0C0C, x0000F0F0, x00FFF00F, x00555005, x00515001;
833 u32 x33000330, x77555775, x30303030, x3030CFCF, x30104745, x30555745;
834 u32 xFF000FF0, xCF1048B5, x080A080A, xC71A40BF, xCB164CB3;
835 u32 x9E4319E6, x000019E6, xF429738C, xF4296A6A, xC729695A;
836 u32 xC47C3D2F, xF77F3F3F, x9E43E619, x693CD926;
837 u32 xF719A695, xF4FF73FF, x03E6D56A, x56B3803F;
838 u32 xF700A600, x61008000, x03B7856B, x62B7056B;
839 u32 x00, x01, x10, x11, x20, x21, x30, x31;
841 x0C0C0C0C = a3 & ~a2;
842 x0000F0F0 = a5 & ~a3;
843 x00FFF00F = a4 ^ x0000F0F0;
844 x00555005 = a1 & x00FFF00F;
845 x00515001 = x00555005 & ~x0C0C0C0C;
847 x33000330 = a2 & ~x00FFF00F;
848 x77555775 = a1 | x33000330;
849 x30303030 = a2 & ~a3;
850 x3030CFCF = a5 ^ x30303030;
851 x30104745 = x77555775 & x3030CFCF;
852 x30555745 = x00555005 | x30104745;
854 xFF000FF0 = ~x00FFF00F;
855 xCF1048B5 = x30104745 ^ xFF000FF0;
856 x080A080A = a3 & ~x77555775;
857 xC71A40BF = xCF1048B5 ^ x080A080A;
858 xCB164CB3 = x0C0C0C0C ^ xC71A40BF;
859 x10 = x00515001 | a6;
860 x11 = x10 ^ xCB164CB3;
863 x9E4319E6 = a1 ^ xCB164CB3;
864 x000019E6 = a5 & x9E4319E6;
865 xF429738C = a2 ^ xC71A40BF;
866 xF4296A6A = x000019E6 ^ xF429738C;
867 xC729695A = x33000330 ^ xF4296A6A;
869 xC47C3D2F = x30555745 ^ xF4296A6A;
870 xF77F3F3F = a2 | xC47C3D2F;
871 x9E43E619 = a5 ^ x9E4319E6;
872 x693CD926 = xF77F3F3F ^ x9E43E619;
873 x20 = x30555745 & a6;
874 x21 = x20 ^ x693CD926;
877 xF719A695 = x3030CFCF ^ xC729695A;
878 xF4FF73FF = a4 | xF429738C;
879 x03E6D56A = xF719A695 ^ xF4FF73FF;
880 x56B3803F = a1 ^ x03E6D56A;
881 x30 = x56B3803F & a6;
882 x31 = x30 ^ xC729695A;
885 xF700A600 = xF719A695 & ~a4;
886 x61008000 = x693CD926 & xF700A600;
887 x03B7856B = x00515001 ^ x03E6D56A;
888 x62B7056B = x61008000 ^ x03B7856B;
889 x00 = x62B7056B | a6;
890 x01 = x00 ^ xC729695A;
899 #define KXX_DECL volatile
900 #define sXXX_DECL volatile
903 * Bitslice DES S-boxes making use of a vector conditional select operation
904 * (e.g., vsel on PowerPC with AltiVec).
906 * Gate counts: 36 33 33 26 35 34 34 32
909 * Several same-gate-count expressions for each S-box are included (for use on
910 * different CPUs/GPUs).
912 * These Boolean expressions corresponding to DES S-boxes have been generated
913 * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's
914 * John the Ripper password cracker: http://www.openwall.com/john/
915 * Being mathematical formulas, they are not copyrighted and are free for reuse
918 * This file (a specific representation of the S-box expressions, surrounding
919 * logic) is Copyright (c) 2011 by Solar Designer <solar at openwall.com>.
920 * Redistribution and use in source and binary forms, with or without
921 * modification, are permitted. (This is a heavily cut-down "BSD license".)
923 * The effort has been sponsored by Rapid7: http://www.rapid7.com
926 #define vnot(dst, a) (dst) = ~(a)
927 #define vand(dst, a, b) (dst) = (a) & (b)
928 #define vor(dst, a, b) (dst) = (a) | (b)
929 #define vandn(dst, a, b) (dst) = (a) & ~(b)
930 #define vxor(dst, a, b) (dst) = (a) ^ (b)
931 #define vsel(dst, a, b, c) (dst) = bitselect((a),(b),(c))
934 s1(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
935 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
937 u32 x0F0F3333, x3C3C3C3C, x55FF55FF, x69C369C3, x0903B73F, x09FCB7C0,
939 u32 x55AFD1B7, x3C3C69C3, x6993B874;
940 u32 x5CEDE59F, x09FCE295, x5D91A51E, x529E962D;
941 u32 x29EEADC0, x4B8771A3, x428679F3, x6B68D433;
942 u32 x5BA7E193, x026F12F3, x6B27C493, x94D83B6C;
943 u32 x965E0B0F, x3327A113, x847F0A1F, xD6E19C32;
944 u32 x0DBCE883, x3A25A215, x37994A96;
945 u32 x8A487EA7, x8B480F07, xB96C2D16;
948 vsel(x0F0F3333, a3, a2, a5);
949 vxor(x3C3C3C3C, a2, a3);
950 vor(x55FF55FF, a1, a4);
951 vxor(x69C369C3, x3C3C3C3C, x55FF55FF);
952 vsel(x0903B73F, a5, x0F0F3333, x69C369C3);
953 vxor(x09FCB7C0, a4, x0903B73F);
954 vxor(x5CA9E295, a1, x09FCB7C0);
956 vsel(x55AFD1B7, x5CA9E295, x55FF55FF, x0F0F3333);
957 vsel(x3C3C69C3, x3C3C3C3C, x69C369C3, a5);
958 vxor(x6993B874, x55AFD1B7, x3C3C69C3);
960 vsel(x5CEDE59F, x55FF55FF, x5CA9E295, x6993B874);
961 vsel(x09FCE295, x09FCB7C0, x5CA9E295, a5);
962 vsel(x5D91A51E, x5CEDE59F, x6993B874, x09FCE295);
963 vxor(x529E962D, x0F0F3333, x5D91A51E);
965 vsel(x29EEADC0, x69C369C3, x09FCB7C0, x5CEDE59F);
966 vsel(x4B8771A3, x0F0F3333, x69C369C3, x5CA9E295);
967 vsel(x428679F3, a5, x4B8771A3, x529E962D);
968 vxor(x6B68D433, x29EEADC0, x428679F3);
970 vsel(x5BA7E193, x5CA9E295, x4B8771A3, a3);
971 vsel(x026F12F3, a4, x0F0F3333, x529E962D);
972 vsel(x6B27C493, x6B68D433, x5BA7E193, x026F12F3);
973 vnot(x94D83B6C, x6B27C493);
974 vsel(x0, x94D83B6C, x6B68D433, a6);
975 vxor(*out1, *out1, x0);
977 vsel(x965E0B0F, x94D83B6C, a3, x428679F3);
978 vsel(x3327A113, x5BA7E193, a2, x69C369C3);
979 vsel(x847F0A1F, x965E0B0F, a4, x3327A113);
980 vxor(xD6E19C32, x529E962D, x847F0A1F);
981 vsel(x1, xD6E19C32, x5CA9E295, a6);
982 vxor(*out2, *out2, x1);
984 vsel(x0DBCE883, x09FCE295, x3C3C69C3, x847F0A1F);
985 vsel(x3A25A215, x3327A113, x5CA9E295, x0903B73F);
986 vxor(x37994A96, x0DBCE883, x3A25A215);
987 vsel(x3, x37994A96, x529E962D, a6);
988 vxor(*out4, *out4, x3);
990 vxor(x8A487EA7, x5CA9E295, xD6E19C32);
991 vsel(x8B480F07, a3, x8A487EA7, x847F0A1F);
992 vsel(xB96C2D16, x8B480F07, x3C3C3C3C, x3A25A215);
993 vsel(x2, xB96C2D16, x6993B874, a6);
994 vxor(*out3, *out3, x2);
998 s2(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
999 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
1001 u32 x55553333, x0055FF33, x33270F03, x66725A56, x00FFFF00, x668DA556;
1002 u32 x0F0F5A56, xF0F0A5A9, xA5A5969A, xA55A699A;
1003 u32 x0F5AF03C, x6600FF56, x87A5F09C;
1004 u32 xA55A963C, x3C69C30F, xB44BC32D;
1005 u32 x66D7CC56, x0F4B0F2D, x699CC37B, x996C66D2;
1006 u32 xB46C662D, x278DB412, xB66CB43B;
1007 u32 xD2DC4E52, x27993333, xD2994E33;
1008 u32 x278D0F2D, x2E0E547B, x09976748;
1011 vsel(x55553333, a1, a3, a6);
1012 vsel(x0055FF33, a6, x55553333, a5);
1013 vsel(x33270F03, a3, a4, x0055FF33);
1014 vxor(x66725A56, a1, x33270F03);
1015 vxor(x00FFFF00, a5, a6);
1016 vxor(x668DA556, x66725A56, x00FFFF00);
1018 vsel(x0F0F5A56, a4, x66725A56, a6);
1019 vnot(xF0F0A5A9, x0F0F5A56);
1020 vxor(xA5A5969A, x55553333, xF0F0A5A9);
1021 vxor(xA55A699A, x00FFFF00, xA5A5969A);
1022 vsel(x1, xA55A699A, x668DA556, a2);
1023 vxor(*out2, *out2, x1);
1025 vxor(x0F5AF03C, a4, x0055FF33);
1026 vsel(x6600FF56, x66725A56, a6, x00FFFF00);
1027 vsel(x87A5F09C, xA5A5969A, x0F5AF03C, x6600FF56);
1029 vsel(xA55A963C, xA5A5969A, x0F5AF03C, a5);
1030 vxor(x3C69C30F, a3, x0F5AF03C);
1031 vsel(xB44BC32D, xA55A963C, x3C69C30F, a1);
1033 vsel(x66D7CC56, x66725A56, x668DA556, xA5A5969A);
1034 vsel(x0F4B0F2D, a4, xB44BC32D, a5);
1035 vxor(x699CC37B, x66D7CC56, x0F4B0F2D);
1036 vxor(x996C66D2, xF0F0A5A9, x699CC37B);
1037 vsel(x0, x996C66D2, xB44BC32D, a2);
1038 vxor(*out1, *out1, x0);
1040 vsel(xB46C662D, xB44BC32D, x996C66D2, x00FFFF00);
1041 vsel(x278DB412, x668DA556, xA5A5969A, a1);
1042 vsel(xB66CB43B, xB46C662D, x278DB412, x6600FF56);
1044 vsel(xD2DC4E52, x66D7CC56, x996C66D2, xB44BC32D);
1045 vsel(x27993333, x278DB412, a3, x0055FF33);
1046 vsel(xD2994E33, xD2DC4E52, x27993333, a5);
1047 vsel(x3, x87A5F09C, xD2994E33, a2);
1048 vxor(*out4, *out4, x3);
1050 vsel(x278D0F2D, x278DB412, x0F4B0F2D, a6);
1051 vsel(x2E0E547B, x0F0F5A56, xB66CB43B, x278D0F2D);
1052 vxor(x09976748, x27993333, x2E0E547B);
1053 vsel(x2, xB66CB43B, x09976748, a2);
1054 vxor(*out3, *out3, x2);
1058 s3(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
1059 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
1061 u32 x0F330F33, x0F33F0CC, x5A66A599;
1062 u32 x2111B7BB, x03FF3033, x05BB50EE, x074F201F, x265E97A4;
1063 u32 x556BA09E, x665A93AC, x99A56C53;
1064 u32 x25A1A797, x5713754C, x66559355, x47B135C6;
1065 u32 x9A5A5C60, xD07AF8F8, x87698DB4, xE13C1EE1;
1066 u32 x9E48CDE4, x655B905E, x00A55CFF, x9E49915E;
1067 u32 xD6599874, x05330022, xD2699876;
1068 u32 x665F9364, xD573F0F2, xB32C6396;
1071 vsel(x0F330F33, a4, a3, a5);
1072 vxor(x0F33F0CC, a6, x0F330F33);
1073 vxor(x5A66A599, a2, x0F33F0CC);
1075 vsel(x2111B7BB, a3, a6, x5A66A599);
1076 vsel(x03FF3033, a5, a3, x0F33F0CC);
1077 vsel(x05BB50EE, a5, x0F33F0CC, a2);
1078 vsel(x074F201F, x03FF3033, a4, x05BB50EE);
1079 vxor(x265E97A4, x2111B7BB, x074F201F);
1081 vsel(x556BA09E, x5A66A599, x05BB50EE, a4);
1082 vsel(x665A93AC, x556BA09E, x265E97A4, a3);
1083 vnot(x99A56C53, x665A93AC);
1084 vsel(x1, x265E97A4, x99A56C53, a1);
1085 vxor(*out2, *out2, x1);
1087 vxor(x25A1A797, x03FF3033, x265E97A4);
1088 vsel(x5713754C, a2, x0F33F0CC, x074F201F);
1089 vsel(x66559355, x665A93AC, a2, a5);
1090 vsel(x47B135C6, x25A1A797, x5713754C, x66559355);
1092 vxor(x9A5A5C60, x03FF3033, x99A56C53);
1093 vsel(xD07AF8F8, x9A5A5C60, x556BA09E, x5A66A599);
1094 vxor(x87698DB4, x5713754C, xD07AF8F8);
1095 vxor(xE13C1EE1, x66559355, x87698DB4);
1097 vsel(x9E48CDE4, x9A5A5C60, x87698DB4, x265E97A4);
1098 vsel(x655B905E, x66559355, x05BB50EE, a4);
1099 vsel(x00A55CFF, a5, a6, x9A5A5C60);
1100 vsel(x9E49915E, x9E48CDE4, x655B905E, x00A55CFF);
1101 vsel(x0, x9E49915E, xE13C1EE1, a1);
1102 vxor(*out1, *out1, x0);
1104 vsel(xD6599874, xD07AF8F8, x66559355, x0F33F0CC);
1105 vand(x05330022, x0F330F33, x05BB50EE);
1106 vsel(xD2699876, xD6599874, x00A55CFF, x05330022);
1107 vsel(x3, x5A66A599, xD2699876, a1);
1108 vxor(*out4, *out4, x3);
1110 vsel(x665F9364, x265E97A4, x66559355, x47B135C6);
1111 vsel(xD573F0F2, xD07AF8F8, x05330022, a4);
1112 vxor(xB32C6396, x665F9364, xD573F0F2);
1113 vsel(x2, xB32C6396, x47B135C6, a1);
1114 vxor(*out3, *out3, x2);
1118 s4(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
1119 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
1121 u32 x0505AFAF, x0555AF55, x0A5AA05A, x46566456, x0A0A5F5F, x0AF55FA0,
1122 x0AF50F0F, x4CA36B59;
1124 u32 x01BB23BB, x5050FAFA, xA31C26BE, xA91679E1;
1126 u32 x50E9FA1E, x0AF55F00, x827D9784, xD2946D9A;
1127 u32 x31F720B3, x11FB21B3, x4712A7AD, x9586CA37;
1130 vsel(x0505AFAF, a5, a3, a1);
1131 vsel(x0555AF55, x0505AFAF, a1, a4);
1132 vxor(x0A5AA05A, a3, x0555AF55);
1133 vsel(x46566456, a1, x0A5AA05A, a2);
1134 vsel(x0A0A5F5F, a3, a5, a1);
1135 vxor(x0AF55FA0, a4, x0A0A5F5F);
1136 vsel(x0AF50F0F, x0AF55FA0, a3, a5);
1137 vxor(x4CA36B59, x46566456, x0AF50F0F);
1139 vnot(xB35C94A6, x4CA36B59);
1141 vsel(x01BB23BB, a4, a2, x0555AF55);
1142 vxor(x5050FAFA, a1, x0505AFAF);
1143 vsel(xA31C26BE, xB35C94A6, x01BB23BB, x5050FAFA);
1144 vxor(xA91679E1, x0A0A5F5F, xA31C26BE);
1146 vnot(x56E9861E, xA91679E1);
1148 vsel(x50E9FA1E, x5050FAFA, x56E9861E, a4);
1149 vsel(x0AF55F00, x0AF50F0F, x0AF55FA0, x0A0A5F5F);
1150 vsel(x827D9784, xB35C94A6, x0AF55F00, a2);
1151 vxor(xD2946D9A, x50E9FA1E, x827D9784);
1152 vsel(x2, xD2946D9A, x4CA36B59, a6);
1153 vxor(*out3, *out3, x2);
1154 vsel(x3, xB35C94A6, xD2946D9A, a6);
1155 vxor(*out4, *out4, x3);
1157 vsel(x31F720B3, a2, a4, x0AF55FA0);
1158 vsel(x11FB21B3, x01BB23BB, x31F720B3, x5050FAFA);
1159 vxor(x4712A7AD, x56E9861E, x11FB21B3);
1160 vxor(x9586CA37, xD2946D9A, x4712A7AD);
1161 vsel(x0, x56E9861E, x9586CA37, a6);
1162 vxor(*out1, *out1, x0);
1163 vsel(x1, x9586CA37, xA91679E1, a6);
1164 vxor(*out2, *out2, x1);
1168 s5(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
1169 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
1171 u32 x550F550F, xAAF0AAF0, xA5F5A5F5, x96C696C6, x00FFFF00, x963969C6;
1172 u32 x2E3C2E3C, xB73121F7, x1501DF0F, x00558A5F, x2E69A463;
1173 u32 x0679ED42, x045157FD, xB32077FF, x9D49D39C;
1174 u32 xAC81CFB2, xF72577AF, x5BA4B81D;
1175 u32 x5BA477AF, x4895469F, x3A35273A, x1A35669A;
1176 u32 x12E6283D, x9E47D3D4, x1A676AB4;
1177 u32 x891556DF, xE5E77F82, x6CF2295D;
1178 u32 x2E3CA5F5, x9697C1C6, x369CC1D6;
1181 vsel(x550F550F, a1, a3, a5);
1182 vnot(xAAF0AAF0, x550F550F);
1183 vsel(xA5F5A5F5, xAAF0AAF0, a1, a3);
1184 vxor(x96C696C6, a2, xA5F5A5F5);
1185 vxor(x00FFFF00, a5, a6);
1186 vxor(x963969C6, x96C696C6, x00FFFF00);
1188 vsel(x2E3C2E3C, a3, xAAF0AAF0, a2);
1189 vsel(xB73121F7, a2, x963969C6, x96C696C6);
1190 vsel(x1501DF0F, a6, x550F550F, xB73121F7);
1191 vsel(x00558A5F, x1501DF0F, a5, a1);
1192 vxor(x2E69A463, x2E3C2E3C, x00558A5F);
1194 vsel(x0679ED42, x00FFFF00, x2E69A463, x96C696C6);
1195 vsel(x045157FD, a6, a1, x0679ED42);
1196 vsel(xB32077FF, xB73121F7, a6, x045157FD);
1197 vxor(x9D49D39C, x2E69A463, xB32077FF);
1198 vsel(x2, x9D49D39C, x2E69A463, a4);
1199 vxor(*out3, *out3, x2);
1201 vsel(xAC81CFB2, xAAF0AAF0, x1501DF0F, x0679ED42);
1202 vsel(xF72577AF, xB32077FF, x550F550F, a1);
1203 vxor(x5BA4B81D, xAC81CFB2, xF72577AF);
1204 vsel(x1, x5BA4B81D, x963969C6, a4);
1205 vxor(*out2, *out2, x1);
1207 vsel(x5BA477AF, x5BA4B81D, xF72577AF, a6);
1208 vsel(x4895469F, x5BA477AF, x00558A5F, a2);
1209 vsel(x3A35273A, x2E3C2E3C, a2, x963969C6);
1210 vsel(x1A35669A, x4895469F, x3A35273A, x5BA4B81D);
1212 vsel(x12E6283D, a5, x5BA4B81D, x963969C6);
1213 vsel(x9E47D3D4, x96C696C6, x9D49D39C, xAC81CFB2);
1214 vsel(x1A676AB4, x12E6283D, x9E47D3D4, x4895469F);
1216 vsel(x891556DF, xB32077FF, x4895469F, x3A35273A);
1217 vsel(xE5E77F82, xF72577AF, x00FFFF00, x12E6283D);
1218 vxor(x6CF2295D, x891556DF, xE5E77F82);
1219 vsel(x3, x1A35669A, x6CF2295D, a4);
1220 vxor(*out4, *out4, x3);
1222 vsel(x2E3CA5F5, x2E3C2E3C, xA5F5A5F5, a6);
1223 vsel(x9697C1C6, x96C696C6, x963969C6, x045157FD);
1224 vsel(x369CC1D6, x2E3CA5F5, x9697C1C6, x5BA477AF);
1225 vsel(x0, x369CC1D6, x1A676AB4, a4);
1226 vxor(*out1, *out1, x0);
1230 s6(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
1231 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
1233 u32 x555500FF, x666633CC, x606F30CF, x353A659A, x353A9A65, xCAC5659A;
1234 u32 x353A6565, x0A3F0A6F, x6C5939A3, x5963A3C6;
1235 u32 x35FF659A, x3AF06A95, x05CF0A9F, x16E94A97;
1236 u32 x86CD4C9B, x12E0FFFD, x942D9A67;
1237 u32 x142956AB, x455D45DF, x1C3EE619;
1238 u32 x2AEA70D5, x20CF7A9F, x3CF19C86, x69A49C79;
1239 u32 x840DBB67, x6DA19C1E, x925E63E1;
1240 u32 x9C3CA761, x257A75D5, xB946D2B4;
1243 vsel(x555500FF, a1, a4, a5);
1244 vxor(x666633CC, a2, x555500FF);
1245 vsel(x606F30CF, x666633CC, a4, a3);
1246 vxor(x353A659A, a1, x606F30CF);
1247 vxor(x353A9A65, a5, x353A659A);
1248 vnot(xCAC5659A, x353A9A65);
1250 vsel(x353A6565, x353A659A, x353A9A65, a4);
1251 vsel(x0A3F0A6F, a3, a4, x353A6565);
1252 vxor(x6C5939A3, x666633CC, x0A3F0A6F);
1253 vxor(x5963A3C6, x353A9A65, x6C5939A3);
1255 vsel(x35FF659A, a4, x353A659A, x353A6565);
1256 vxor(x3AF06A95, a3, x35FF659A);
1257 vsel(x05CF0A9F, a4, a3, x353A9A65);
1258 vsel(x16E94A97, x3AF06A95, x05CF0A9F, x6C5939A3);
1260 vsel(x86CD4C9B, xCAC5659A, x05CF0A9F, x6C5939A3);
1261 vsel(x12E0FFFD, a5, x3AF06A95, x16E94A97);
1262 vsel(x942D9A67, x86CD4C9B, x353A9A65, x12E0FFFD);
1263 vsel(x0, xCAC5659A, x942D9A67, a6);
1264 vxor(*out1, *out1, x0);
1266 vsel(x142956AB, x353A659A, x942D9A67, a2);
1267 vsel(x455D45DF, a1, x86CD4C9B, x142956AB);
1268 vxor(x1C3EE619, x5963A3C6, x455D45DF);
1269 vsel(x3, x5963A3C6, x1C3EE619, a6);
1270 vxor(*out4, *out4, x3);
1272 vsel(x2AEA70D5, x3AF06A95, x606F30CF, x353A9A65);
1273 vsel(x20CF7A9F, x2AEA70D5, x05CF0A9F, x0A3F0A6F);
1274 vxor(x3CF19C86, x1C3EE619, x20CF7A9F);
1275 vxor(x69A49C79, x555500FF, x3CF19C86);
1277 vsel(x840DBB67, a5, x942D9A67, x86CD4C9B);
1278 vsel(x6DA19C1E, x69A49C79, x3CF19C86, x840DBB67);
1279 vnot(x925E63E1, x6DA19C1E);
1280 vsel(x1, x925E63E1, x69A49C79, a6);
1281 vxor(*out2, *out2, x1);
1283 vsel(x9C3CA761, x840DBB67, x1C3EE619, x3CF19C86);
1284 vsel(x257A75D5, x455D45DF, x2AEA70D5, x606F30CF);
1285 vxor(xB946D2B4, x9C3CA761, x257A75D5);
1286 vsel(x2, x16E94A97, xB946D2B4, a6);
1287 vxor(*out3, *out3, x2);
1291 s7(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
1292 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
1294 u32 x44447777, x4B4B7878, x22772277, x0505F5F5, x220522F5, x694E5A8D;
1295 u32 x00FFFF00, x66666666, x32353235, x26253636, x26DAC936;
1296 u32 x738F9C63, x11EF9867, x26DA9867;
1297 u32 x4B4B9C63, x4B666663, x4E639396;
1298 u32 x4E4B393C, xFF00FF00, xFF05DD21, xB14EE41D;
1299 u32 xD728827B, x6698807B, x699C585B;
1300 u32 x738C847B, xA4A71E18, x74878E78;
1301 u32 x333D9639, x74879639, x8B7869C6;
1304 vsel(x44447777, a2, a6, a3);
1305 vxor(x4B4B7878, a4, x44447777);
1306 vsel(x22772277, a3, a5, a2);
1307 vsel(x0505F5F5, a6, a2, a4);
1308 vsel(x220522F5, x22772277, x0505F5F5, a5);
1309 vxor(x694E5A8D, x4B4B7878, x220522F5);
1311 vxor(x00FFFF00, a5, a6);
1312 vxor(x66666666, a2, a3);
1313 vsel(x32353235, a3, x220522F5, a4);
1314 vsel(x26253636, x66666666, x32353235, x4B4B7878);
1315 vxor(x26DAC936, x00FFFF00, x26253636);
1316 vsel(x0, x26DAC936, x694E5A8D, a1);
1317 vxor(*out1, *out1, x0);
1319 vxor(x738F9C63, a2, x26DAC936);
1320 vsel(x11EF9867, x738F9C63, a5, x66666666);
1321 vsel(x26DA9867, x26DAC936, x11EF9867, a6);
1323 vsel(x4B4B9C63, x4B4B7878, x738F9C63, a6);
1324 vsel(x4B666663, x4B4B9C63, x66666666, x00FFFF00);
1325 vxor(x4E639396, x0505F5F5, x4B666663);
1327 vsel(x4E4B393C, x4B4B7878, x4E639396, a2);
1328 vnot(xFF00FF00, a5);
1329 vsel(xFF05DD21, xFF00FF00, x738F9C63, x32353235);
1330 vxor(xB14EE41D, x4E4B393C, xFF05DD21);
1331 vsel(x1, xB14EE41D, x26DA9867, a1);
1332 vxor(*out2, *out2, x1);
1334 vxor(xD728827B, x66666666, xB14EE41D);
1335 vsel(x6698807B, x26DA9867, xD728827B, x4E4B393C);
1336 vsel(x699C585B, x6698807B, x694E5A8D, xFF05DD21);
1337 vsel(x2, x699C585B, x4E639396, a1);
1338 vxor(*out3, *out3, x2);
1340 vsel(x738C847B, x738F9C63, xD728827B, x4B4B7878);
1341 vxor(xA4A71E18, x738F9C63, xD728827B);
1342 vsel(x74878E78, x738C847B, xA4A71E18, a4);
1344 vsel(x333D9639, x32353235, x738C847B, xB14EE41D);
1345 vsel(x74879639, x74878E78, x333D9639, a6);
1346 vnot(x8B7869C6, x74879639);
1347 vsel(x3, x74878E78, x8B7869C6, a1);
1348 vxor(*out4, *out4, x3);
1352 s8(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
1353 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
1355 u32 x0505F5F5, x05FAF50A, x0F0F00FF, x22227777, x07DA807F, x34E9B34C;
1356 u32 x00FFF00F, x0033FCCF, x5565B15C, x0C0C3F3F, x59698E63;
1357 u32 x3001F74E, x30555745, x693CD926;
1358 u32 x0C0CD926, x0C3F25E9, x38D696A5;
1360 u32 x03D2117B, xC778395B, xCB471CB2;
1361 u32 x5425B13F, x56B3803F, x919AE965;
1362 u32 x17B3023F, x75555755, x62E6556A, xA59E6C31;
1365 vsel(x0505F5F5, a5, a1, a3);
1366 vxor(x05FAF50A, a4, x0505F5F5);
1367 vsel(x0F0F00FF, a3, a4, a5);
1368 vsel(x22227777, a2, a5, a1);
1369 vsel(x07DA807F, x05FAF50A, x0F0F00FF, x22227777);
1370 vxor(x34E9B34C, a2, x07DA807F);
1372 vsel(x00FFF00F, x05FAF50A, a4, a3);
1373 vsel(x0033FCCF, a5, x00FFF00F, a2);
1374 vsel(x5565B15C, a1, x34E9B34C, x0033FCCF);
1375 vsel(x0C0C3F3F, a3, a5, a2);
1376 vxor(x59698E63, x5565B15C, x0C0C3F3F);
1378 vsel(x3001F74E, x34E9B34C, a5, x05FAF50A);
1379 vsel(x30555745, x3001F74E, a1, x00FFF00F);
1380 vxor(x693CD926, x59698E63, x30555745);
1381 vsel(x2, x693CD926, x59698E63, a6);
1382 vxor(*out3, *out3, x2);
1384 vsel(x0C0CD926, x0C0C3F3F, x693CD926, a5);
1385 vxor(x0C3F25E9, x0033FCCF, x0C0CD926);
1386 vxor(x38D696A5, x34E9B34C, x0C3F25E9);
1388 vnot(xC729695A, x38D696A5);
1390 vsel(x03D2117B, x07DA807F, a2, x0C0CD926);
1391 vsel(xC778395B, xC729695A, x03D2117B, x30555745);
1392 vxor(xCB471CB2, x0C3F25E9, xC778395B);
1393 vsel(x1, xCB471CB2, x34E9B34C, a6);
1394 vxor(*out2, *out2, x1);
1396 vsel(x5425B13F, x5565B15C, x0C0C3F3F, x03D2117B);
1397 vsel(x56B3803F, x07DA807F, x5425B13F, x59698E63);
1398 vxor(x919AE965, xC729695A, x56B3803F);
1399 vsel(x3, xC729695A, x919AE965, a6);
1400 vxor(*out4, *out4, x3);
1402 vsel(x17B3023F, x07DA807F, a2, x59698E63);
1403 vor(x75555755, a1, x30555745);
1404 vxor(x62E6556A, x17B3023F, x75555755);
1405 vxor(xA59E6C31, xC778395B, x62E6556A);
1406 vsel(x0, xA59E6C31, x38D696A5, a6);
1407 vxor(*out1, *out1, x0);
1411 #define SWAP(a, b) { u32 tmp=*a;*a=*b;*b=tmp; }
1447 #define KEYSET00 { k00 = K08; k01 = K44; k02 = K29; k03 = K52; k04 = K42; k05 = K14; k06 = K28; k07 = K49; k08 = K01; k09 = K07; k10 = K16; k11 = K36; k12 = K02; k13 = K30; k14 = K22; k15 = K21; k16 = K38; k17 = K50; k18 = K51; k19 = K00; k20 = K31; k21 = K23; k22 = K15; k23 = K35; k24 = K19; k25 = K24; k26 = K34; k27 = K47; k28 = K32; k29 = K03; k30 = K41; k31 = K26; k32 = K04; k33 = K46; k34 = K20; k35 = K25; k36 = K53; k37 = K18; k38 = K33; k39 = K55; k40 = K13; k41 = K17; k42 = K39; k43 = K12; k44 = K11; k45 = K54; k46 = K48; k47 = K27; }
1448 #define KEYSET10 { k00 = K49; k01 = K28; k02 = K45; k03 = K36; k04 = K01; k05 = K30; k06 = K44; k07 = K08; k08 = K42; k09 = K23; k10 = K00; k11 = K52; k12 = K43; k13 = K14; k14 = K38; k15 = K37; k16 = K22; k17 = K09; k18 = K35; k19 = K16; k20 = K15; k21 = K07; k22 = K31; k23 = K51; k24 = K03; k25 = K40; k26 = K46; k27 = K04; k28 = K20; k29 = K19; k30 = K53; k31 = K10; k32 = K47; k33 = K34; k34 = K32; k35 = K13; k36 = K41; k37 = K06; k38 = K17; k39 = K12; k40 = K25; k41 = K33; k42 = K27; k43 = K55; k44 = K54; k45 = K11; k46 = K05; k47 = K39; }
1449 #define KEYSET01 { k00 = K01; k01 = K37; k02 = K22; k03 = K45; k04 = K35; k05 = K07; k06 = K21; k07 = K42; k08 = K51; k09 = K00; k10 = K09; k11 = K29; k12 = K52; k13 = K23; k14 = K15; k15 = K14; k16 = K31; k17 = K43; k18 = K44; k19 = K50; k20 = K49; k21 = K16; k22 = K08; k23 = K28; k24 = K12; k25 = K17; k26 = K27; k27 = K40; k28 = K25; k29 = K55; k30 = K34; k31 = K19; k32 = K24; k33 = K39; k34 = K13; k35 = K18; k36 = K46; k37 = K11; k38 = K26; k39 = K48; k40 = K06; k41 = K10; k42 = K32; k43 = K05; k44 = K04; k45 = K47; k46 = K41; k47 = K20; }
1450 #define KEYSET11 { k00 = K35; k01 = K14; k02 = K31; k03 = K22; k04 = K44; k05 = K16; k06 = K30; k07 = K51; k08 = K28; k09 = K09; k10 = K43; k11 = K38; k12 = K29; k13 = K00; k14 = K49; k15 = K23; k16 = K08; k17 = K52; k18 = K21; k19 = K02; k20 = K01; k21 = K50; k22 = K42; k23 = K37; k24 = K48; k25 = K26; k26 = K32; k27 = K17; k28 = K06; k29 = K05; k30 = K39; k31 = K55; k32 = K33; k33 = K20; k34 = K18; k35 = K54; k36 = K27; k37 = K47; k38 = K03; k39 = K53; k40 = K11; k41 = K19; k42 = K13; k43 = K41; k44 = K40; k45 = K24; k46 = K46; k47 = K25; }
1451 #define KEYSET02 { k00 = K44; k01 = K23; k02 = K08; k03 = K31; k04 = K21; k05 = K50; k06 = K07; k07 = K28; k08 = K37; k09 = K43; k10 = K52; k11 = K15; k12 = K38; k13 = K09; k14 = K01; k15 = K00; k16 = K42; k17 = K29; k18 = K30; k19 = K36; k20 = K35; k21 = K02; k22 = K51; k23 = K14; k24 = K53; k25 = K03; k26 = K13; k27 = K26; k28 = K11; k29 = K41; k30 = K20; k31 = K05; k32 = K10; k33 = K25; k34 = K54; k35 = K04; k36 = K32; k37 = K24; k38 = K12; k39 = K34; k40 = K47; k41 = K55; k42 = K18; k43 = K46; k44 = K17; k45 = K33; k46 = K27; k47 = K06; }
1452 #define KEYSET12 { k00 = K21; k01 = K00; k02 = K42; k03 = K08; k04 = K30; k05 = K02; k06 = K16; k07 = K37; k08 = K14; k09 = K52; k10 = K29; k11 = K49; k12 = K15; k13 = K43; k14 = K35; k15 = K09; k16 = K51; k17 = K38; k18 = K07; k19 = K45; k20 = K44; k21 = K36; k22 = K28; k23 = K23; k24 = K34; k25 = K12; k26 = K18; k27 = K03; k28 = K47; k29 = K46; k30 = K25; k31 = K41; k32 = K19; k33 = K06; k34 = K04; k35 = K40; k36 = K13; k37 = K33; k38 = K48; k39 = K39; k40 = K24; k41 = K05; k42 = K54; k43 = K27; k44 = K26; k45 = K10; k46 = K32; k47 = K11; }
1453 #define KEYSET03 { k00 = K30; k01 = K09; k02 = K51; k03 = K42; k04 = K07; k05 = K36; k06 = K50; k07 = K14; k08 = K23; k09 = K29; k10 = K38; k11 = K01; k12 = K49; k13 = K52; k14 = K44; k15 = K43; k16 = K28; k17 = K15; k18 = K16; k19 = K22; k20 = K21; k21 = K45; k22 = K37; k23 = K00; k24 = K39; k25 = K48; k26 = K54; k27 = K12; k28 = K24; k29 = K27; k30 = K06; k31 = K46; k32 = K55; k33 = K11; k34 = K40; k35 = K17; k36 = K18; k37 = K10; k38 = K53; k39 = K20; k40 = K33; k41 = K41; k42 = K04; k43 = K32; k44 = K03; k45 = K19; k46 = K13; k47 = K47; }
1454 #define KEYSET13 { k00 = K07; k01 = K43; k02 = K28; k03 = K51; k04 = K16; k05 = K45; k06 = K02; k07 = K23; k08 = K00; k09 = K38; k10 = K15; k11 = K35; k12 = K01; k13 = K29; k14 = K21; k15 = K52; k16 = K37; k17 = K49; k18 = K50; k19 = K31; k20 = K30; k21 = K22; k22 = K14; k23 = K09; k24 = K20; k25 = K53; k26 = K04; k27 = K48; k28 = K33; k29 = K32; k30 = K11; k31 = K27; k32 = K05; k33 = K47; k34 = K17; k35 = K26; k36 = K54; k37 = K19; k38 = K34; k39 = K25; k40 = K10; k41 = K46; k42 = K40; k43 = K13; k44 = K12; k45 = K55; k46 = K18; k47 = K24; }
1455 #define KEYSET04 { k00 = K16; k01 = K52; k02 = K37; k03 = K28; k04 = K50; k05 = K22; k06 = K36; k07 = K00; k08 = K09; k09 = K15; k10 = K49; k11 = K44; k12 = K35; k13 = K38; k14 = K30; k15 = K29; k16 = K14; k17 = K01; k18 = K02; k19 = K08; k20 = K07; k21 = K31; k22 = K23; k23 = K43; k24 = K25; k25 = K34; k26 = K40; k27 = K53; k28 = K10; k29 = K13; k30 = K47; k31 = K32; k32 = K41; k33 = K24; k34 = K26; k35 = K03; k36 = K04; k37 = K55; k38 = K39; k39 = K06; k40 = K19; k41 = K27; k42 = K17; k43 = K18; k44 = K48; k45 = K05; k46 = K54; k47 = K33; }
1456 #define KEYSET14 { k00 = K50; k01 = K29; k02 = K14; k03 = K37; k04 = K02; k05 = K31; k06 = K45; k07 = K09; k08 = K43; k09 = K49; k10 = K01; k11 = K21; k12 = K44; k13 = K15; k14 = K07; k15 = K38; k16 = K23; k17 = K35; k18 = K36; k19 = K42; k20 = K16; k21 = K08; k22 = K00; k23 = K52; k24 = K06; k25 = K39; k26 = K17; k27 = K34; k28 = K19; k29 = K18; k30 = K24; k31 = K13; k32 = K46; k33 = K33; k34 = K03; k35 = K12; k36 = K40; k37 = K05; k38 = K20; k39 = K11; k40 = K55; k41 = K32; k42 = K26; k43 = K54; k44 = K53; k45 = K41; k46 = K04; k47 = K10; }
1457 #define KEYSET05 { k00 = K02; k01 = K38; k02 = K23; k03 = K14; k04 = K36; k05 = K08; k06 = K22; k07 = K43; k08 = K52; k09 = K01; k10 = K35; k11 = K30; k12 = K21; k13 = K49; k14 = K16; k15 = K15; k16 = K00; k17 = K44; k18 = K45; k19 = K51; k20 = K50; k21 = K42; k22 = K09; k23 = K29; k24 = K11; k25 = K20; k26 = K26; k27 = K39; k28 = K55; k29 = K54; k30 = K33; k31 = K18; k32 = K27; k33 = K10; k34 = K12; k35 = K48; k36 = K17; k37 = K41; k38 = K25; k39 = K47; k40 = K05; k41 = K13; k42 = K03; k43 = K04; k44 = K34; k45 = K46; k46 = K40; k47 = K19; }
1458 #define KEYSET15 { k00 = K36; k01 = K15; k02 = K00; k03 = K23; k04 = K45; k05 = K42; k06 = K31; k07 = K52; k08 = K29; k09 = K35; k10 = K44; k11 = K07; k12 = K30; k13 = K01; k14 = K50; k15 = K49; k16 = K09; k17 = K21; k18 = K22; k19 = K28; k20 = K02; k21 = K51; k22 = K43; k23 = K38; k24 = K47; k25 = K25; k26 = K03; k27 = K20; k28 = K05; k29 = K04; k30 = K10; k31 = K54; k32 = K32; k33 = K19; k34 = K48; k35 = K53; k36 = K26; k37 = K46; k38 = K06; k39 = K24; k40 = K41; k41 = K18; k42 = K12; k43 = K40; k44 = K39; k45 = K27; k46 = K17; k47 = K55; }
1459 #define KEYSET06 { k00 = K45; k01 = K49; k02 = K09; k03 = K00; k04 = K22; k05 = K51; k06 = K08; k07 = K29; k08 = K38; k09 = K44; k10 = K21; k11 = K16; k12 = K07; k13 = K35; k14 = K02; k15 = K01; k16 = K43; k17 = K30; k18 = K31; k19 = K37; k20 = K36; k21 = K28; k22 = K52; k23 = K15; k24 = K24; k25 = K06; k26 = K12; k27 = K25; k28 = K41; k29 = K40; k30 = K19; k31 = K04; k32 = K13; k33 = K55; k34 = K53; k35 = K34; k36 = K03; k37 = K27; k38 = K11; k39 = K33; k40 = K46; k41 = K54; k42 = K48; k43 = K17; k44 = K20; k45 = K32; k46 = K26; k47 = K05; }
1460 #define KEYSET16 { k00 = K22; k01 = K01; k02 = K43; k03 = K09; k04 = K31; k05 = K28; k06 = K42; k07 = K38; k08 = K15; k09 = K21; k10 = K30; k11 = K50; k12 = K16; k13 = K44; k14 = K36; k15 = K35; k16 = K52; k17 = K07; k18 = K08; k19 = K14; k20 = K45; k21 = K37; k22 = K29; k23 = K49; k24 = K33; k25 = K11; k26 = K48; k27 = K06; k28 = K46; k29 = K17; k30 = K55; k31 = K40; k32 = K18; k33 = K05; k34 = K34; k35 = K39; k36 = K12; k37 = K32; k38 = K47; k39 = K10; k40 = K27; k41 = K04; k42 = K53; k43 = K26; k44 = K25; k45 = K13; k46 = K03; k47 = K41; }
1461 #define KEYSET07 { k00 = K31; k01 = K35; k02 = K52; k03 = K43; k04 = K08; k05 = K37; k06 = K51; k07 = K15; k08 = K49; k09 = K30; k10 = K07; k11 = K02; k12 = K50; k13 = K21; k14 = K45; k15 = K44; k16 = K29; k17 = K16; k18 = K42; k19 = K23; k20 = K22; k21 = K14; k22 = K38; k23 = K01; k24 = K10; k25 = K47; k26 = K53; k27 = K11; k28 = K27; k29 = K26; k30 = K05; k31 = K17; k32 = K54; k33 = K41; k34 = K39; k35 = K20; k36 = K48; k37 = K13; k38 = K24; k39 = K19; k40 = K32; k41 = K40; k42 = K34; k43 = K03; k44 = K06; k45 = K18; k46 = K12; k47 = K46; }
1462 #define KEYSET17 { k00 = K15; k01 = K51; k02 = K36; k03 = K02; k04 = K49; k05 = K21; k06 = K35; k07 = K31; k08 = K08; k09 = K14; k10 = K23; k11 = K43; k12 = K09; k13 = K37; k14 = K29; k15 = K28; k16 = K45; k17 = K00; k18 = K01; k19 = K07; k20 = K38; k21 = K30; k22 = K22; k23 = K42; k24 = K26; k25 = K04; k26 = K41; k27 = K54; k28 = K39; k29 = K10; k30 = K48; k31 = K33; k32 = K11; k33 = K53; k34 = K27; k35 = K32; k36 = K05; k37 = K25; k38 = K40; k39 = K03; k40 = K20; k41 = K24; k42 = K46; k43 = K19; k44 = K18; k45 = K06; k46 = K55; k47 = K34; }
1464 #ifdef DESCRYPT_SALT
1466 static void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 *D00, u32 *D01, u32 *D02, u32 *D03, u32 *D04, u32 *D05, u32 *D06, u32 *D07, u32 *D08, u32 *D09, u32 *D10, u32 *D11, u32 *D12, u32 *D13, u32 *D14, u32 *D15, u32 *D16, u32 *D17, u32 *D18, u32 *D19, u32 *D20, u32 *D21, u32 *D22, u32 *D23, u32 *D24, u32 *D25, u32 *D26, u32 *D27, u32 *D28, u32 *D29, u32 *D30, u32 *D31, u32 *D32, u32 *D33, u32 *D34, u32 *D35, u32 *D36, u32 *D37, u32 *D38, u32 *D39, u32 *D40, u32 *D41, u32 *D42, u32 *D43, u32 *D44, u32 *D45, u32 *D46, u32 *D47, u32 *D48, u32 *D49, u32 *D50, u32 *D51, u32 *D52, u32 *D53, u32 *D54, u32 *D55, u32 *D56, u32 *D57, u32 *D58, u32 *D59, u32 *D60, u32 *D61, u32 *D62, u32 *D63)
1468 #define myselx(a,b,c) ((c) ? (b) : (a))
1470 sXXX_DECL u32 s001 = (0x001 & DESCRYPT_SALT) ? 0xffffffff : 0;
1471 sXXX_DECL u32 s002 = (0x002 & DESCRYPT_SALT) ? 0xffffffff : 0;
1472 sXXX_DECL u32 s004 = (0x004 & DESCRYPT_SALT) ? 0xffffffff : 0;
1473 sXXX_DECL u32 s008 = (0x008 & DESCRYPT_SALT) ? 0xffffffff : 0;
1474 sXXX_DECL u32 s010 = (0x010 & DESCRYPT_SALT) ? 0xffffffff : 0;
1475 sXXX_DECL u32 s020 = (0x020 & DESCRYPT_SALT) ? 0xffffffff : 0;
1476 sXXX_DECL u32 s040 = (0x040 & DESCRYPT_SALT) ? 0xffffffff : 0;
1477 sXXX_DECL u32 s080 = (0x080 & DESCRYPT_SALT) ? 0xffffffff : 0;
1478 sXXX_DECL u32 s100 = (0x100 & DESCRYPT_SALT) ? 0xffffffff : 0;
1479 sXXX_DECL u32 s200 = (0x200 & DESCRYPT_SALT) ? 0xffffffff : 0;
1480 sXXX_DECL u32 s400 = (0x400 & DESCRYPT_SALT) ? 0xffffffff : 0;
1481 sXXX_DECL u32 s800 = (0x800 & DESCRYPT_SALT) ? 0xffffffff : 0;
1483 KXX_DECL u32 k00, k01, k02, k03, k04, k05;
1484 KXX_DECL u32 k06, k07, k08, k09, k10, k11;
1485 KXX_DECL u32 k12, k13, k14, k15, k16, k17;
1486 KXX_DECL u32 k18, k19, k20, k21, k22, k23;
1487 KXX_DECL u32 k24, k25, k26, k27, k28, k29;
1488 KXX_DECL u32 k30, k31, k32, k33, k34, k35;
1489 KXX_DECL u32 k36, k37, k38, k39, k40, k41;
1490 KXX_DECL u32 k42, k43, k44, k45, k46, k47;
1492 for (u32 ii = 0; ii < 25; ii++)
1494 #if CUDA_ARCH >= 500
1500 for (u32 i = 0; i < 2; i++)
1502 if (i) KEYSET10 else KEYSET00
1504 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1505 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1506 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1507 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1508 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1509 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1510 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1511 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1513 if (i) KEYSET11 else KEYSET01
1515 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1516 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1517 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1518 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1519 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1520 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1521 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1522 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1524 if (i) KEYSET12 else KEYSET02
1526 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1527 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1528 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1529 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1530 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1531 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1532 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1533 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1535 if (i) KEYSET13 else KEYSET03
1537 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1538 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1539 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1540 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1541 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1542 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1543 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1544 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1546 if (i) KEYSET14 else KEYSET04
1548 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1549 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1550 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1551 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1552 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1553 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1554 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1555 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1557 if (i) KEYSET15 else KEYSET05
1559 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1560 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1561 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1562 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1563 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1564 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1565 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1566 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1568 if (i) KEYSET16 else KEYSET06
1570 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1571 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1572 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1573 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1574 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1575 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1576 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1577 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1579 if (i) KEYSET17 else KEYSET07
1581 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1582 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1583 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1584 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1585 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1586 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1587 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1588 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1599 static void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 *D00, u32 *D01, u32 *D02, u32 *D03, u32 *D04, u32 *D05, u32 *D06, u32 *D07, u32 *D08, u32 *D09, u32 *D10, u32 *D11, u32 *D12, u32 *D13, u32 *D14, u32 *D15, u32 *D16, u32 *D17, u32 *D18, u32 *D19, u32 *D20, u32 *D21, u32 *D22, u32 *D23, u32 *D24, u32 *D25, u32 *D26, u32 *D27, u32 *D28, u32 *D29, u32 *D30, u32 *D31, u32 *D32, u32 *D33, u32 *D34, u32 *D35, u32 *D36, u32 *D37, u32 *D38, u32 *D39, u32 *D40, u32 *D41, u32 *D42, u32 *D43, u32 *D44, u32 *D45, u32 *D46, u32 *D47, u32 *D48, u32 *D49, u32 *D50, u32 *D51, u32 *D52, u32 *D53, u32 *D54, u32 *D55, u32 *D56, u32 *D57, u32 *D58, u32 *D59, u32 *D60, u32 *D61, u32 *D62, u32 *D63)
1601 #define myselx(a,b,c) ((c) ? (b) : (a))
1602 //#define myselx(a,b,c) ((b & c) | (a & ~c))
1603 //#define myselx(a,b,c) bitselect ((a), (b), (c))
1605 sXXX_DECL u32 s001 = (0x001 & SALT) ? 0xffffffff : 0;
1606 sXXX_DECL u32 s002 = (0x002 & SALT) ? 0xffffffff : 0;
1607 sXXX_DECL u32 s004 = (0x004 & SALT) ? 0xffffffff : 0;
1608 sXXX_DECL u32 s008 = (0x008 & SALT) ? 0xffffffff : 0;
1609 sXXX_DECL u32 s010 = (0x010 & SALT) ? 0xffffffff : 0;
1610 sXXX_DECL u32 s020 = (0x020 & SALT) ? 0xffffffff : 0;
1611 sXXX_DECL u32 s040 = (0x040 & SALT) ? 0xffffffff : 0;
1612 sXXX_DECL u32 s080 = (0x080 & SALT) ? 0xffffffff : 0;
1613 sXXX_DECL u32 s100 = (0x100 & SALT) ? 0xffffffff : 0;
1614 sXXX_DECL u32 s200 = (0x200 & SALT) ? 0xffffffff : 0;
1615 sXXX_DECL u32 s400 = (0x400 & SALT) ? 0xffffffff : 0;
1616 sXXX_DECL u32 s800 = (0x800 & SALT) ? 0xffffffff : 0;
1618 KXX_DECL u32 k00, k01, k02, k03, k04, k05;
1619 KXX_DECL u32 k06, k07, k08, k09, k10, k11;
1620 KXX_DECL u32 k12, k13, k14, k15, k16, k17;
1621 KXX_DECL u32 k18, k19, k20, k21, k22, k23;
1622 KXX_DECL u32 k24, k25, k26, k27, k28, k29;
1623 KXX_DECL u32 k30, k31, k32, k33, k34, k35;
1624 KXX_DECL u32 k36, k37, k38, k39, k40, k41;
1625 KXX_DECL u32 k42, k43, k44, k45, k46, k47;
1627 for (u32 ii = 0; ii < 25; ii++)
1629 #if CUDA_ARCH >= 500
1635 for (u32 i = 0; i < 2; i++)
1637 if (i) KEYSET10 else KEYSET00
1639 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1640 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1641 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1642 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1643 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1644 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1645 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1646 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1648 if (i) KEYSET11 else KEYSET01
1650 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1651 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1652 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1653 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1654 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1655 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1656 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1657 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1659 if (i) KEYSET12 else KEYSET02
1661 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1662 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1663 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1664 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1665 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1666 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1667 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1668 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1670 if (i) KEYSET13 else KEYSET03
1672 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1673 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1674 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1675 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1676 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1677 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1678 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1679 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1681 if (i) KEYSET14 else KEYSET04
1683 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1684 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1685 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1686 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1687 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1688 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1689 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1690 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1692 if (i) KEYSET15 else KEYSET05
1694 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1695 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1696 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1697 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1698 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1699 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1700 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1701 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1703 if (i) KEYSET16 else KEYSET06
1705 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1706 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1707 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1708 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1709 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1710 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1711 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1712 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1714 if (i) KEYSET17 else KEYSET07
1716 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1717 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1718 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1719 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1720 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1721 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1722 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1723 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1734 static void transpose32c (u32 data[32])
1736 #define swap(x,y,j,m) \
1737 t = ((x) ^ ((y) >> (j))) & (m); \
1739 (y) = (y) ^ (t << (j));
1743 swap (data[ 0], data[16], 16, 0x0000ffff);
1744 swap (data[ 1], data[17], 16, 0x0000ffff);
1745 swap (data[ 2], data[18], 16, 0x0000ffff);
1746 swap (data[ 3], data[19], 16, 0x0000ffff);
1747 swap (data[ 4], data[20], 16, 0x0000ffff);
1748 swap (data[ 5], data[21], 16, 0x0000ffff);
1749 swap (data[ 6], data[22], 16, 0x0000ffff);
1750 swap (data[ 7], data[23], 16, 0x0000ffff);
1751 swap (data[ 8], data[24], 16, 0x0000ffff);
1752 swap (data[ 9], data[25], 16, 0x0000ffff);
1753 swap (data[10], data[26], 16, 0x0000ffff);
1754 swap (data[11], data[27], 16, 0x0000ffff);
1755 swap (data[12], data[28], 16, 0x0000ffff);
1756 swap (data[13], data[29], 16, 0x0000ffff);
1757 swap (data[14], data[30], 16, 0x0000ffff);
1758 swap (data[15], data[31], 16, 0x0000ffff);
1759 swap (data[ 0], data[ 8], 8, 0x00ff00ff);
1760 swap (data[ 1], data[ 9], 8, 0x00ff00ff);
1761 swap (data[ 2], data[10], 8, 0x00ff00ff);
1762 swap (data[ 3], data[11], 8, 0x00ff00ff);
1763 swap (data[ 4], data[12], 8, 0x00ff00ff);
1764 swap (data[ 5], data[13], 8, 0x00ff00ff);
1765 swap (data[ 6], data[14], 8, 0x00ff00ff);
1766 swap (data[ 7], data[15], 8, 0x00ff00ff);
1767 swap (data[ 0], data[ 4], 4, 0x0f0f0f0f);
1768 swap (data[ 1], data[ 5], 4, 0x0f0f0f0f);
1769 swap (data[ 2], data[ 6], 4, 0x0f0f0f0f);
1770 swap (data[ 3], data[ 7], 4, 0x0f0f0f0f);
1771 swap (data[ 0], data[ 2], 2, 0x33333333);
1772 swap (data[ 1], data[ 3], 2, 0x33333333);
1773 swap (data[ 0], data[ 1], 1, 0x55555555);
1774 swap (data[ 2], data[ 3], 1, 0x55555555);
1775 swap (data[ 4], data[ 6], 2, 0x33333333);
1776 swap (data[ 5], data[ 7], 2, 0x33333333);
1777 swap (data[ 4], data[ 5], 1, 0x55555555);
1778 swap (data[ 6], data[ 7], 1, 0x55555555);
1779 swap (data[ 8], data[12], 4, 0x0f0f0f0f);
1780 swap (data[ 9], data[13], 4, 0x0f0f0f0f);
1781 swap (data[10], data[14], 4, 0x0f0f0f0f);
1782 swap (data[11], data[15], 4, 0x0f0f0f0f);
1783 swap (data[ 8], data[10], 2, 0x33333333);
1784 swap (data[ 9], data[11], 2, 0x33333333);
1785 swap (data[ 8], data[ 9], 1, 0x55555555);
1786 swap (data[10], data[11], 1, 0x55555555);
1787 swap (data[12], data[14], 2, 0x33333333);
1788 swap (data[13], data[15], 2, 0x33333333);
1789 swap (data[12], data[13], 1, 0x55555555);
1790 swap (data[14], data[15], 1, 0x55555555);
1791 swap (data[16], data[24], 8, 0x00ff00ff);
1792 swap (data[17], data[25], 8, 0x00ff00ff);
1793 swap (data[18], data[26], 8, 0x00ff00ff);
1794 swap (data[19], data[27], 8, 0x00ff00ff);
1795 swap (data[20], data[28], 8, 0x00ff00ff);
1796 swap (data[21], data[29], 8, 0x00ff00ff);
1797 swap (data[22], data[30], 8, 0x00ff00ff);
1798 swap (data[23], data[31], 8, 0x00ff00ff);
1799 swap (data[16], data[20], 4, 0x0f0f0f0f);
1800 swap (data[17], data[21], 4, 0x0f0f0f0f);
1801 swap (data[18], data[22], 4, 0x0f0f0f0f);
1802 swap (data[19], data[23], 4, 0x0f0f0f0f);
1803 swap (data[16], data[18], 2, 0x33333333);
1804 swap (data[17], data[19], 2, 0x33333333);
1805 swap (data[16], data[17], 1, 0x55555555);
1806 swap (data[18], data[19], 1, 0x55555555);
1807 swap (data[20], data[22], 2, 0x33333333);
1808 swap (data[21], data[23], 2, 0x33333333);
1809 swap (data[20], data[21], 1, 0x55555555);
1810 swap (data[22], data[23], 1, 0x55555555);
1811 swap (data[24], data[28], 4, 0x0f0f0f0f);
1812 swap (data[25], data[29], 4, 0x0f0f0f0f);
1813 swap (data[26], data[30], 4, 0x0f0f0f0f);
1814 swap (data[27], data[31], 4, 0x0f0f0f0f);
1815 swap (data[24], data[26], 2, 0x33333333);
1816 swap (data[25], data[27], 2, 0x33333333);
1817 swap (data[24], data[25], 1, 0x55555555);
1818 swap (data[26], data[27], 1, 0x55555555);
1819 swap (data[28], data[30], 2, 0x33333333);
1820 swap (data[29], data[31], 2, 0x33333333);
1821 swap (data[28], data[29], 1, 0x55555555);
1822 swap (data[30], data[31], 1, 0x55555555);
1825 static void m01500m (__local u32 *s_S, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
1831 const u32 gid = get_global_id (0);
1832 const u32 lid = get_local_id (0);
1838 const u32 salt = salt_bufs[salt_pos].salt_buf[0];
1844 const u32 K00 = pws[gid].i[ 0];
1845 const u32 K01 = pws[gid].i[ 1];
1846 const u32 K02 = pws[gid].i[ 2];
1847 const u32 K03 = pws[gid].i[ 3];
1848 const u32 K04 = pws[gid].i[ 4];
1849 const u32 K05 = pws[gid].i[ 5];
1850 const u32 K06 = pws[gid].i[ 6];
1851 const u32 K07 = pws[gid].i[ 7];
1852 const u32 K08 = pws[gid].i[ 8];
1853 const u32 K09 = pws[gid].i[ 9];
1854 const u32 K10 = pws[gid].i[10];
1855 const u32 K11 = pws[gid].i[11];
1856 const u32 K12 = pws[gid].i[12];
1857 const u32 K13 = pws[gid].i[13];
1858 const u32 K14 = pws[gid].i[14];
1859 const u32 K15 = pws[gid].i[15];
1860 const u32 K16 = pws[gid].i[16];
1861 const u32 K17 = pws[gid].i[17];
1862 const u32 K18 = pws[gid].i[18];
1863 const u32 K19 = pws[gid].i[19];
1864 const u32 K20 = pws[gid].i[20];
1865 const u32 K21 = pws[gid].i[21];
1866 const u32 K22 = pws[gid].i[22];
1867 const u32 K23 = pws[gid].i[23];
1868 const u32 K24 = pws[gid].i[24];
1869 const u32 K25 = pws[gid].i[25];
1870 const u32 K26 = pws[gid].i[26];
1871 const u32 K27 = pws[gid].i[27];
1872 const u32 K28 = pws[gid].i[28];
1873 const u32 K29 = pws[gid].i[29];
1874 const u32 K30 = pws[gid].i[30];
1875 const u32 K31 = pws[gid].i[31];
1876 const u32 K32 = pws[gid].i[32];
1877 const u32 K33 = pws[gid].i[33];
1878 const u32 K34 = pws[gid].i[34];
1879 const u32 K35 = pws[gid].i[35];
1880 const u32 K36 = pws[gid].i[36];
1881 const u32 K37 = pws[gid].i[37];
1882 const u32 K38 = pws[gid].i[38];
1883 const u32 K39 = pws[gid].i[39];
1884 const u32 K40 = pws[gid].i[40];
1885 const u32 K41 = pws[gid].i[41];
1886 const u32 K42 = pws[gid].i[42];
1887 const u32 K43 = pws[gid].i[43];
1888 const u32 K44 = pws[gid].i[44];
1889 const u32 K45 = pws[gid].i[45];
1890 const u32 K46 = pws[gid].i[46];
1891 const u32 K47 = pws[gid].i[47];
1892 const u32 K48 = pws[gid].i[48];
1893 const u32 K49 = pws[gid].i[49];
1894 const u32 K50 = pws[gid].i[50];
1895 const u32 K51 = pws[gid].i[51];
1896 const u32 K52 = pws[gid].i[52];
1897 const u32 K53 = pws[gid].i[53];
1898 const u32 K54 = pws[gid].i[54];
1899 const u32 K55 = pws[gid].i[55];
1905 const u32 pc_pos = get_local_id (1);
1907 const u32 il_pos = pc_pos * 32;
1938 k00 |= words_buf_r[pc_pos].b[ 0];
1939 k01 |= words_buf_r[pc_pos].b[ 1];
1940 k02 |= words_buf_r[pc_pos].b[ 2];
1941 k03 |= words_buf_r[pc_pos].b[ 3];
1942 k04 |= words_buf_r[pc_pos].b[ 4];
1943 k05 |= words_buf_r[pc_pos].b[ 5];
1944 k06 |= words_buf_r[pc_pos].b[ 6];
1945 k07 |= words_buf_r[pc_pos].b[ 7];
1946 k08 |= words_buf_r[pc_pos].b[ 8];
1947 k09 |= words_buf_r[pc_pos].b[ 9];
1948 k10 |= words_buf_r[pc_pos].b[10];
1949 k11 |= words_buf_r[pc_pos].b[11];
1950 k12 |= words_buf_r[pc_pos].b[12];
1951 k13 |= words_buf_r[pc_pos].b[13];
1952 k14 |= words_buf_r[pc_pos].b[14];
1953 k15 |= words_buf_r[pc_pos].b[15];
1954 k16 |= words_buf_r[pc_pos].b[16];
1955 k17 |= words_buf_r[pc_pos].b[17];
1956 k18 |= words_buf_r[pc_pos].b[18];
1957 k19 |= words_buf_r[pc_pos].b[19];
1958 k20 |= words_buf_r[pc_pos].b[20];
1959 k21 |= words_buf_r[pc_pos].b[21];
1960 k22 |= words_buf_r[pc_pos].b[22];
1961 k23 |= words_buf_r[pc_pos].b[23];
1962 k24 |= words_buf_r[pc_pos].b[24];
1963 k25 |= words_buf_r[pc_pos].b[25];
1964 k26 |= words_buf_r[pc_pos].b[26];
1965 k27 |= words_buf_r[pc_pos].b[27];
2035 k00, k01, k02, k03, k04, k05, k06,
2036 k07, k08, k09, k10, k11, k12, k13,
2037 k14, k15, k16, k17, k18, k19, k20,
2038 k21, k22, k23, k24, k25, k26, k27,
2039 K28, K29, K30, K31, K32, K33, K34,
2040 K35, K36, K37, K38, K39, K40, K41,
2041 K42, K43, K44, K45, K46, K47, K48,
2042 K49, K50, K51, K52, K53, K54, K55,
2043 &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07,
2044 &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15,
2045 &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23,
2046 &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31,
2047 &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39,
2048 &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47,
2049 &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55,
2050 &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63
2120 if (digests_cnt < 16)
2122 for (u32 d = 0; d < digests_cnt; d++)
2124 const u32 final_hash_pos = digests_offset + d;
2126 if (hashes_shown[final_hash_pos]) continue;
2130 search[0] = digests_buf[final_hash_pos].digest_buf[DGST_R0];
2131 search[1] = digests_buf[final_hash_pos].digest_buf[DGST_R1];
2136 for (int i = 0; i < 32; i++)
2138 const u32 b0 = -((search[0] >> i) & 1);
2139 const u32 b1 = -((search[1] >> i) & 1);
2141 tmpResult |= out[ 0 + i] ^ b0;
2142 tmpResult |= out[32 + i] ^ b1;
2145 if (tmpResult == 0xffffffff) continue;
2147 const u32 slice = 31 - clz (~tmpResult);
2149 const u32 r0 = search[0];
2150 const u32 r1 = search[1];
2163 for (int i = 0; i < 32; i++)
2165 out0[i] = out[ 0 + 31 - i];
2166 out1[i] = out[32 + 31 - i];
2169 transpose32c (out0);
2170 transpose32c (out1);
2173 for (int slice = 0; slice < 32; slice++)
2175 const u32 r0 = out0[31 - slice];
2176 const u32 r1 = out1[31 - slice];
2185 static void m01500s (__local u32 *s_S, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
2191 const u32 gid = get_global_id (0);
2192 const u32 lid = get_local_id (0);
2198 const u32 salt = salt_bufs[salt_pos].salt_buf[0];
2273 const u32 K00 = pws[gid].i[ 0];
2274 const u32 K01 = pws[gid].i[ 1];
2275 const u32 K02 = pws[gid].i[ 2];
2276 const u32 K03 = pws[gid].i[ 3];
2277 const u32 K04 = pws[gid].i[ 4];
2278 const u32 K05 = pws[gid].i[ 5];
2279 const u32 K06 = pws[gid].i[ 6];
2280 const u32 K07 = pws[gid].i[ 7];
2281 const u32 K08 = pws[gid].i[ 8];
2282 const u32 K09 = pws[gid].i[ 9];
2283 const u32 K10 = pws[gid].i[10];
2284 const u32 K11 = pws[gid].i[11];
2285 const u32 K12 = pws[gid].i[12];
2286 const u32 K13 = pws[gid].i[13];
2287 const u32 K14 = pws[gid].i[14];
2288 const u32 K15 = pws[gid].i[15];
2289 const u32 K16 = pws[gid].i[16];
2290 const u32 K17 = pws[gid].i[17];
2291 const u32 K18 = pws[gid].i[18];
2292 const u32 K19 = pws[gid].i[19];
2293 const u32 K20 = pws[gid].i[20];
2294 const u32 K21 = pws[gid].i[21];
2295 const u32 K22 = pws[gid].i[22];
2296 const u32 K23 = pws[gid].i[23];
2297 const u32 K24 = pws[gid].i[24];
2298 const u32 K25 = pws[gid].i[25];
2299 const u32 K26 = pws[gid].i[26];
2300 const u32 K27 = pws[gid].i[27];
2301 const u32 K28 = pws[gid].i[28];
2302 const u32 K29 = pws[gid].i[29];
2303 const u32 K30 = pws[gid].i[30];
2304 const u32 K31 = pws[gid].i[31];
2305 const u32 K32 = pws[gid].i[32];
2306 const u32 K33 = pws[gid].i[33];
2307 const u32 K34 = pws[gid].i[34];
2308 const u32 K35 = pws[gid].i[35];
2309 const u32 K36 = pws[gid].i[36];
2310 const u32 K37 = pws[gid].i[37];
2311 const u32 K38 = pws[gid].i[38];
2312 const u32 K39 = pws[gid].i[39];
2313 const u32 K40 = pws[gid].i[40];
2314 const u32 K41 = pws[gid].i[41];
2315 const u32 K42 = pws[gid].i[42];
2316 const u32 K43 = pws[gid].i[43];
2317 const u32 K44 = pws[gid].i[44];
2318 const u32 K45 = pws[gid].i[45];
2319 const u32 K46 = pws[gid].i[46];
2320 const u32 K47 = pws[gid].i[47];
2321 const u32 K48 = pws[gid].i[48];
2322 const u32 K49 = pws[gid].i[49];
2323 const u32 K50 = pws[gid].i[50];
2324 const u32 K51 = pws[gid].i[51];
2325 const u32 K52 = pws[gid].i[52];
2326 const u32 K53 = pws[gid].i[53];
2327 const u32 K54 = pws[gid].i[54];
2328 const u32 K55 = pws[gid].i[55];
2334 const u32 pc_pos = get_local_id (1);
2336 const u32 il_pos = pc_pos * 32;
2367 k00 |= words_buf_r[pc_pos].b[ 0];
2368 k01 |= words_buf_r[pc_pos].b[ 1];
2369 k02 |= words_buf_r[pc_pos].b[ 2];
2370 k03 |= words_buf_r[pc_pos].b[ 3];
2371 k04 |= words_buf_r[pc_pos].b[ 4];
2372 k05 |= words_buf_r[pc_pos].b[ 5];
2373 k06 |= words_buf_r[pc_pos].b[ 6];
2374 k07 |= words_buf_r[pc_pos].b[ 7];
2375 k08 |= words_buf_r[pc_pos].b[ 8];
2376 k09 |= words_buf_r[pc_pos].b[ 9];
2377 k10 |= words_buf_r[pc_pos].b[10];
2378 k11 |= words_buf_r[pc_pos].b[11];
2379 k12 |= words_buf_r[pc_pos].b[12];
2380 k13 |= words_buf_r[pc_pos].b[13];
2381 k14 |= words_buf_r[pc_pos].b[14];
2382 k15 |= words_buf_r[pc_pos].b[15];
2383 k16 |= words_buf_r[pc_pos].b[16];
2384 k17 |= words_buf_r[pc_pos].b[17];
2385 k18 |= words_buf_r[pc_pos].b[18];
2386 k19 |= words_buf_r[pc_pos].b[19];
2387 k20 |= words_buf_r[pc_pos].b[20];
2388 k21 |= words_buf_r[pc_pos].b[21];
2389 k22 |= words_buf_r[pc_pos].b[22];
2390 k23 |= words_buf_r[pc_pos].b[23];
2391 k24 |= words_buf_r[pc_pos].b[24];
2392 k25 |= words_buf_r[pc_pos].b[25];
2393 k26 |= words_buf_r[pc_pos].b[26];
2394 k27 |= words_buf_r[pc_pos].b[27];
2464 k00, k01, k02, k03, k04, k05, k06,
2465 k07, k08, k09, k10, k11, k12, k13,
2466 k14, k15, k16, k17, k18, k19, k20,
2467 k21, k22, k23, k24, k25, k26, k27,
2468 K28, K29, K30, K31, K32, K33, K34,
2469 K35, K36, K37, K38, K39, K40, K41,
2470 K42, K43, K44, K45, K46, K47, K48,
2471 K49, K50, K51, K52, K53, K54, K55,
2472 &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07,
2473 &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15,
2474 &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23,
2475 &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31,
2476 &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39,
2477 &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47,
2478 &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55,
2479 &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63
2484 tmpResult |= D00 ^ S00;
2485 tmpResult |= D01 ^ S01;
2486 tmpResult |= D02 ^ S02;
2487 tmpResult |= D03 ^ S03;
2488 tmpResult |= D04 ^ S04;
2489 tmpResult |= D05 ^ S05;
2490 tmpResult |= D06 ^ S06;
2491 tmpResult |= D07 ^ S07;
2492 tmpResult |= D08 ^ S08;
2493 tmpResult |= D09 ^ S09;
2494 tmpResult |= D10 ^ S10;
2495 tmpResult |= D11 ^ S11;
2496 tmpResult |= D12 ^ S12;
2497 tmpResult |= D13 ^ S13;
2498 tmpResult |= D14 ^ S14;
2499 tmpResult |= D15 ^ S15;
2500 tmpResult |= D16 ^ S16;
2501 tmpResult |= D17 ^ S17;
2502 tmpResult |= D18 ^ S18;
2503 tmpResult |= D19 ^ S19;
2504 tmpResult |= D20 ^ S20;
2505 tmpResult |= D21 ^ S21;
2506 tmpResult |= D22 ^ S22;
2507 tmpResult |= D23 ^ S23;
2508 tmpResult |= D24 ^ S24;
2509 tmpResult |= D25 ^ S25;
2510 tmpResult |= D26 ^ S26;
2511 tmpResult |= D27 ^ S27;
2512 tmpResult |= D28 ^ S28;
2513 tmpResult |= D29 ^ S29;
2514 tmpResult |= D30 ^ S30;
2515 tmpResult |= D31 ^ S31;
2516 tmpResult |= D32 ^ S32;
2517 tmpResult |= D33 ^ S33;
2518 tmpResult |= D34 ^ S34;
2519 tmpResult |= D35 ^ S35;
2520 tmpResult |= D36 ^ S36;
2521 tmpResult |= D37 ^ S37;
2522 tmpResult |= D38 ^ S38;
2523 tmpResult |= D39 ^ S39;
2524 tmpResult |= D40 ^ S40;
2525 tmpResult |= D41 ^ S41;
2526 tmpResult |= D42 ^ S42;
2527 tmpResult |= D43 ^ S43;
2528 tmpResult |= D44 ^ S44;
2529 tmpResult |= D45 ^ S45;
2530 tmpResult |= D46 ^ S46;
2531 tmpResult |= D47 ^ S47;
2533 if (tmpResult == 0xffffffff) return;
2535 tmpResult |= D48 ^ S48;
2536 tmpResult |= D49 ^ S49;
2537 tmpResult |= D50 ^ S50;
2538 tmpResult |= D51 ^ S51;
2539 tmpResult |= D52 ^ S52;
2540 tmpResult |= D53 ^ S53;
2541 tmpResult |= D54 ^ S54;
2542 tmpResult |= D55 ^ S55;
2543 tmpResult |= D56 ^ S56;
2544 tmpResult |= D57 ^ S57;
2545 tmpResult |= D58 ^ S58;
2546 tmpResult |= D59 ^ S59;
2547 tmpResult |= D60 ^ S60;
2548 tmpResult |= D61 ^ S61;
2549 tmpResult |= D62 ^ S62;
2550 tmpResult |= D63 ^ S63;
2552 if (tmpResult == 0xffffffff) return;
2554 const u32 slice = 31 - clz (~tmpResult);
2560 // transpose bitslice base : easy because no overlapping buffers
2561 // mod : attention race conditions, need different buffers for *in and *out
2564 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01500_tb (__global pw_t *pws)
2566 const u32 gid = get_global_id (0);
2568 const u32 w0 = pws[gid].i[0];
2569 const u32 w1 = pws[gid].i[1];
2571 const u32 w0s = (w0 << 1) & 0xfefefefe;
2572 const u32 w1s = (w1 << 1) & 0xfefefefe;
2575 for (int i = 0, j = 0; i < 32; i += 8, j += 7)
2577 pws[gid].i[j + 0 + 0] = -((w0s >> (i + 7)) & 1);
2578 pws[gid].i[j + 0 + 1] = -((w0s >> (i + 6)) & 1);
2579 pws[gid].i[j + 0 + 2] = -((w0s >> (i + 5)) & 1);
2580 pws[gid].i[j + 0 + 3] = -((w0s >> (i + 4)) & 1);
2581 pws[gid].i[j + 0 + 4] = -((w0s >> (i + 3)) & 1);
2582 pws[gid].i[j + 0 + 5] = -((w0s >> (i + 2)) & 1);
2583 pws[gid].i[j + 0 + 6] = -((w0s >> (i + 1)) & 1);
2587 for (int i = 0, j = 0; i < 32; i += 8, j += 7)
2589 pws[gid].i[j + 28 + 0] = -((w1s >> (i + 7)) & 1);
2590 pws[gid].i[j + 28 + 1] = -((w1s >> (i + 6)) & 1);
2591 pws[gid].i[j + 28 + 2] = -((w1s >> (i + 5)) & 1);
2592 pws[gid].i[j + 28 + 3] = -((w1s >> (i + 4)) & 1);
2593 pws[gid].i[j + 28 + 4] = -((w1s >> (i + 3)) & 1);
2594 pws[gid].i[j + 28 + 5] = -((w1s >> (i + 2)) & 1);
2595 pws[gid].i[j + 28 + 6] = -((w1s >> (i + 1)) & 1);
2599 __kernel void __attribute__((reqd_work_group_size (32, 1, 1))) m01500_tm (__global u32 *mod, __global bs_word_t *words_buf_r)
2601 const u32 gid = get_global_id (0);
2602 const u32 lid = get_local_id (0);
2604 const u32 block = gid / 32;
2605 const u32 slice = gid % 32;
2607 const u32 w0 = mod[gid];
2609 const u32 w0s = (w0 << 1) & 0xfefefefe;
2612 for (int i = 0, j = 0; i < 32; i += 8, j += 7)
2614 atomic_or (&words_buf_r[block].b[j + 0], (((w0s >> (i + 7)) & 1) << slice));
2615 atomic_or (&words_buf_r[block].b[j + 1], (((w0s >> (i + 6)) & 1) << slice));
2616 atomic_or (&words_buf_r[block].b[j + 2], (((w0s >> (i + 5)) & 1) << slice));
2617 atomic_or (&words_buf_r[block].b[j + 3], (((w0s >> (i + 4)) & 1) << slice));
2618 atomic_or (&words_buf_r[block].b[j + 4], (((w0s >> (i + 3)) & 1) << slice));
2619 atomic_or (&words_buf_r[block].b[j + 5], (((w0s >> (i + 2)) & 1) << slice));
2620 atomic_or (&words_buf_r[block].b[j + 6], (((w0s >> (i + 1)) & 1) << slice));
2624 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2630 const u32 gid = get_global_id (0);
2631 const u32 lid = get_local_id (0);
2632 const u32 vid = get_local_id (1);
2634 const u32 s0 = digests_buf[digests_offset].digest_buf[0];
2635 const u32 s1 = digests_buf[digests_offset].digest_buf[1];
2637 __local u32 s_S[64];
2641 s_S[ 0 + vid] = -((s0 >> vid) & 1);
2645 s_S[32 + vid] = -((s1 >> vid) & 1);
2648 barrier (CLK_LOCAL_MEM_FENCE);
2650 if (gid >= gid_max) return;
2656 m01500m (s_S, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
2659 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2663 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2667 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2673 const u32 gid = get_global_id (0);
2674 const u32 lid = get_local_id (0);
2675 const u32 vid = get_local_id (1);
2677 const u32 s0 = digests_buf[digests_offset].digest_buf[0];
2678 const u32 s1 = digests_buf[digests_offset].digest_buf[1];
2680 __local u32 s_S[64];
2684 s_S[ 0 + vid] = -((s0 >> vid) & 1);
2688 s_S[32 + vid] = -((s1 >> vid) & 1);
2691 barrier (CLK_LOCAL_MEM_FENCE);
2693 if (gid >= gid_max) return;
2699 m01500s (s_S, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
2702 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2706 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)