2 * Author......: Jens Steube <jens.steube@gmail.com>
4 * NOTE........: sboxes for maxwell were taken from DeepLearningJohnDoe, license below
5 * : sboxes for others were takes fron JtR, license below
10 #include "include/constants.h"
11 #include "include/kernel_vendor.h"
18 #include "include/kernel_functions.c"
19 #include "OpenCL/types_ocl.c"
20 #include "OpenCL/common.c"
22 #define COMPARE_S "OpenCL/check_single_comp4_bs.c"
23 #define COMPARE_M "OpenCL/check_multi_comp4_bs.c"
25 #define myselx(a,b,c) ((c) ? (b) : (a))
32 // Bitslice DES S-boxes with LOP3.LUT instructions
33 // For NVIDIA Maxwell architecture and CUDA 7.5 RC
34 // by DeepLearningJohnDoe, version 0.1.6, 2015/07/19
36 // Gate counts: 25 24 25 18 25 24 24 23
38 // Depth: 8 7 7 6 8 10 10 8
41 // Note that same S-box function with a lower gate count isn't necessarily faster.
43 // These Boolean expressions corresponding to DES S-boxes were
44 // discovered by <deeplearningjohndoe at gmail.com>
46 // This file itself is Copyright (c) 2015 by <deeplearningjohndoe at gmail.com>
47 // Redistribution and use in source and binary forms, with or without
48 // modification, are permitted.
50 // The underlying mathematical formulas are NOT copyrighted.
53 #define LUT(a,b,c,d,e) u32 a; asm ("lop3.b32 %0, %1, %2, %3, "#e";" : "=r"(a): "r"(b), "r"(c), "r"(d));
55 static void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
57 LUT(xAA55AA5500550055, a1, a4, a6, 0xC1)
58 LUT(xA55AA55AF0F5F0F5, a3, a6, xAA55AA5500550055, 0x9E)
59 LUT(x5F5F5F5FA5A5A5A5, a1, a3, a6, 0xD6)
60 LUT(xF5A0F5A0A55AA55A, a4, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x56)
61 LUT(x947A947AD1E7D1E7, a2, xA55AA55AF0F5F0F5, xF5A0F5A0A55AA55A, 0x6C)
62 LUT(x5FFF5FFFFFFAFFFA, a6, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x7B)
63 LUT(xB96CB96C69936993, a2, xF5A0F5A0A55AA55A, x5FFF5FFFFFFAFFFA, 0xD6)
64 LUT(x3, a5, x947A947AD1E7D1E7, xB96CB96C69936993, 0x6A)
65 LUT(x55EE55EE55EE55EE, a1, a2, a4, 0x7A)
66 LUT(x084C084CB77BB77B, a2, a6, xF5A0F5A0A55AA55A, 0xC9)
67 LUT(x9C329C32E295E295, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x72)
68 LUT(xA51EA51E50E050E0, a3, a6, x55EE55EE55EE55EE, 0x29)
69 LUT(x4AD34AD3BE3CBE3C, a2, x947A947AD1E7D1E7, xA51EA51E50E050E0, 0x95)
70 LUT(x2, a5, x9C329C32E295E295, x4AD34AD3BE3CBE3C, 0xC6)
71 LUT(xD955D95595D195D1, a1, a2, x9C329C32E295E295, 0xD2)
72 LUT(x8058805811621162, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x90)
73 LUT(x7D0F7D0FC4B3C4B3, xA51EA51E50E050E0, xD955D95595D195D1, x8058805811621162, 0x76)
74 LUT(x0805080500010001, a3, xAA55AA5500550055, xD955D95595D195D1, 0x80)
75 LUT(x4A964A96962D962D, xB96CB96C69936993, x4AD34AD3BE3CBE3C, x0805080500010001, 0xA6)
76 LUT(x4, a5, x7D0F7D0FC4B3C4B3, x4A964A96962D962D, 0xA6)
77 LUT(x148014807B087B08, a1, xAA55AA5500550055, x947A947AD1E7D1E7, 0x21)
78 LUT(x94D894D86B686B68, xA55AA55AF0F5F0F5, x8058805811621162, x148014807B087B08, 0x6A)
79 LUT(x5555555540044004, a1, a6, x084C084CB77BB77B, 0x70)
80 LUT(xAFB4AFB4BF5BBF5B, x5F5F5F5FA5A5A5A5, xA51EA51E50E050E0, x5555555540044004, 0x97)
81 LUT(x1, a5, x94D894D86B686B68, xAFB4AFB4BF5BBF5B, 0x6C)
89 static void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
91 LUT(xEEEEEEEE99999999, a1, a2, a6, 0x97)
92 LUT(xFFFFEEEE66666666, a5, a6, xEEEEEEEE99999999, 0x67)
93 LUT(x5555FFFFFFFF0000, a1, a5, a6, 0x76)
94 LUT(x6666DDDD5555AAAA, a2, xFFFFEEEE66666666, x5555FFFFFFFF0000, 0x69)
95 LUT(x6969D3D35353ACAC, a3, xFFFFEEEE66666666, x6666DDDD5555AAAA, 0x6A)
96 LUT(xCFCF3030CFCF3030, a2, a3, a5, 0x65)
97 LUT(xE4E4EEEE9999F0F0, a3, xEEEEEEEE99999999, x5555FFFFFFFF0000, 0x8D)
98 LUT(xE5E5BABACDCDB0B0, a1, xCFCF3030CFCF3030, xE4E4EEEE9999F0F0, 0xCA)
99 LUT(x3, a4, x6969D3D35353ACAC, xE5E5BABACDCDB0B0, 0xC6)
100 LUT(x3333CCCC00000000, a2, a5, a6, 0x14)
101 LUT(xCCCCDDDDFFFF0F0F, a5, xE4E4EEEE9999F0F0, x3333CCCC00000000, 0xB5)
102 LUT(x00000101F0F0F0F0, a3, a6, xFFFFEEEE66666666, 0x1C)
103 LUT(x9A9A64646A6A9595, a1, xCFCF3030CFCF3030, x00000101F0F0F0F0, 0x96)
104 LUT(x2, a4, xCCCCDDDDFFFF0F0F, x9A9A64646A6A9595, 0x6A)
105 LUT(x3333BBBB3333FFFF, a1, a2, x6666DDDD5555AAAA, 0xDE)
106 LUT(x1414141441410000, a1, a3, xE4E4EEEE9999F0F0, 0x90)
107 LUT(x7F7FF3F3F5F53939, x6969D3D35353ACAC, x9A9A64646A6A9595, x3333BBBB3333FFFF, 0x79)
108 LUT(x9494E3E34B4B3939, a5, x1414141441410000, x7F7FF3F3F5F53939, 0x29)
109 LUT(x1, a4, x3333BBBB3333FFFF, x9494E3E34B4B3939, 0xA6)
110 LUT(xB1B1BBBBCCCCA5A5, a1, a1, xE4E4EEEE9999F0F0, 0x4A)
111 LUT(xFFFFECECEEEEDDDD, a2, x3333CCCC00000000, x9A9A64646A6A9595, 0xEF)
112 LUT(xB1B1A9A9DCDC8787, xE5E5BABACDCDB0B0, xB1B1BBBBCCCCA5A5, xFFFFECECEEEEDDDD, 0x8D)
113 LUT(xFFFFCCCCEEEE4444, a2, a5, xFFFFEEEE66666666, 0x2B)
114 LUT(x4, a4, xB1B1A9A9DCDC8787, xFFFFCCCCEEEE4444, 0x6C)
122 static void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
124 LUT(xA50FA50FA50FA50F, a1, a3, a4, 0xC9)
125 LUT(xF0F00F0FF0F0F0F0, a3, a5, a6, 0x4B)
126 LUT(xAF0FA0AAAF0FAF0F, a1, xA50FA50FA50FA50F, xF0F00F0FF0F0F0F0, 0x4D)
127 LUT(x5AA5A55A5AA55AA5, a1, a4, xF0F00F0FF0F0F0F0, 0x69)
128 LUT(xAA005FFFAA005FFF, a3, a5, xA50FA50FA50FA50F, 0xD6)
129 LUT(x5AA5A55A0F5AFAA5, a6, x5AA5A55A5AA55AA5, xAA005FFFAA005FFF, 0x9C)
130 LUT(x1, a2, xAF0FA0AAAF0FAF0F, x5AA5A55A0F5AFAA5, 0xA6)
131 LUT(xAA55AA5500AA00AA, a1, a4, a6, 0x49)
132 LUT(xFAFAA50FFAFAA50F, a1, a5, xA50FA50FA50FA50F, 0x9B)
133 LUT(x50AF0F5AFA50A5A5, a1, xAA55AA5500AA00AA, xFAFAA50FFAFAA50F, 0x66)
134 LUT(xAFAFAFAFFAFAFAFA, a1, a3, a6, 0x6F)
135 LUT(xAFAFFFFFFFFAFAFF, a4, x50AF0F5AFA50A5A5, xAFAFAFAFFAFAFAFA, 0xEB)
136 LUT(x4, a2, x50AF0F5AFA50A5A5, xAFAFFFFFFFFAFAFF, 0x6C)
137 LUT(x500F500F500F500F, a1, a3, a4, 0x98)
138 LUT(xF0505A0505A5050F, x5AA5A55A0F5AFAA5, xAA55AA5500AA00AA, xAFAFAFAFFAFAFAFA, 0x1D)
139 LUT(xF0505A05AA55AAFF, a6, x500F500F500F500F, xF0505A0505A5050F, 0x9A)
140 LUT(xFF005F55FF005F55, a1, a4, xAA005FFFAA005FFF, 0xB2)
141 LUT(xA55F5AF0A55F5AF0, a5, xA50FA50FA50FA50F, x5AA5A55A5AA55AA5, 0x3D)
142 LUT(x5A5F05A5A55F5AF0, a6, xFF005F55FF005F55, xA55F5AF0A55F5AF0, 0xA6)
143 LUT(x3, a2, xF0505A05AA55AAFF, x5A5F05A5A55F5AF0, 0xA6)
144 LUT(x0F0F0F0FA5A5A5A5, a1, a3, a6, 0xC6)
145 LUT(x5FFFFF5FFFA0FFA0, x5AA5A55A5AA55AA5, xAFAFAFAFFAFAFAFA, x0F0F0F0FA5A5A5A5, 0xDB)
146 LUT(xF5555AF500A05FFF, a5, xFAFAA50FFAFAA50F, xF0505A0505A5050F, 0xB9)
147 LUT(x05A5AAF55AFA55A5, xF0505A05AA55AAFF, x0F0F0F0FA5A5A5A5, xF5555AF500A05FFF, 0x9B)
148 LUT(x2, a2, x5FFFFF5FFFA0FFA0, x05A5AAF55AFA55A5, 0xA6)
156 static void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
158 LUT(x55F055F055F055F0, a1, a3, a4, 0x72)
159 LUT(xA500F5F0A500F5F0, a3, a5, x55F055F055F055F0, 0xAD)
160 LUT(xF50AF50AF50AF50A, a1, a3, a4, 0x59)
161 LUT(xF5FA0FFFF5FA0FFF, a3, a5, xF50AF50AF50AF50A, 0xE7)
162 LUT(x61C8F93C61C8F93C, a2, xA500F5F0A500F5F0, xF5FA0FFFF5FA0FFF, 0xC6)
163 LUT(x9999666699996666, a1, a2, a5, 0x69)
164 LUT(x22C022C022C022C0, a2, a4, x55F055F055F055F0, 0x18)
165 LUT(xB35C94A6B35C94A6, xF5FA0FFFF5FA0FFF, x9999666699996666, x22C022C022C022C0, 0x63)
166 LUT(x4, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x6A)
167 LUT(x4848484848484848, a1, a2, a3, 0x12)
168 LUT(x55500AAA55500AAA, a1, a5, xF5FA0FFFF5FA0FFF, 0x28)
169 LUT(x3C90B3D63C90B3D6, x61C8F93C61C8F93C, x4848484848484848, x55500AAA55500AAA, 0x1E)
170 LUT(x8484333384843333, a1, x9999666699996666, x4848484848484848, 0x14)
171 LUT(x4452F1AC4452F1AC, xF50AF50AF50AF50A, xF5FA0FFFF5FA0FFF, xB35C94A6B35C94A6, 0x78)
172 LUT(x9586CA379586CA37, x55500AAA55500AAA, x8484333384843333, x4452F1AC4452F1AC, 0xD6)
173 LUT(x2, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0x6A)
174 LUT(x1, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0xA9)
175 LUT(x3, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x56)
183 static void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
185 LUT(xA0A0A0A0FFFFFFFF, a1, a3, a6, 0xAB)
186 LUT(xFFFF00005555FFFF, a1, a5, a6, 0xB9)
187 LUT(xB3B320207777FFFF, a2, xA0A0A0A0FFFFFFFF, xFFFF00005555FFFF, 0xE8)
188 LUT(x50505A5A5A5A5050, a1, a3, xFFFF00005555FFFF, 0x34)
189 LUT(xA2A2FFFF2222FFFF, a1, a5, xB3B320207777FFFF, 0xCE)
190 LUT(x2E2E6969A4A46363, a2, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, 0x29)
191 LUT(x3, a4, xB3B320207777FFFF, x2E2E6969A4A46363, 0xA6)
192 LUT(xA5A50A0AA5A50A0A, a1, a3, a5, 0x49)
193 LUT(x969639396969C6C6, a2, a6, xA5A50A0AA5A50A0A, 0x96)
194 LUT(x1B1B1B1B1B1B1B1B, a1, a2, a3, 0xCA)
195 LUT(xBFBFBFBFF6F6F9F9, a3, xA0A0A0A0FFFFFFFF, x969639396969C6C6, 0x7E)
196 LUT(x5B5BA4A4B8B81D1D, xFFFF00005555FFFF, x1B1B1B1B1B1B1B1B, xBFBFBFBFF6F6F9F9, 0x96)
197 LUT(x2, a4, x969639396969C6C6, x5B5BA4A4B8B81D1D, 0xCA)
198 LUT(x5555BBBBFFFF5555, a1, a2, xFFFF00005555FFFF, 0xE5)
199 LUT(x6D6D9C9C95956969, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, x969639396969C6C6, 0x97)
200 LUT(x1A1A67676A6AB4B4, xA5A50A0AA5A50A0A, x5555BBBBFFFF5555, x6D6D9C9C95956969, 0x47)
201 LUT(xA0A0FFFFAAAA0000, a3, xFFFF00005555FFFF, xA5A50A0AA5A50A0A, 0x3B)
202 LUT(x36369C9CC1C1D6D6, x969639396969C6C6, x6D6D9C9C95956969, xA0A0FFFFAAAA0000, 0xD9)
203 LUT(x1, a4, x1A1A67676A6AB4B4, x36369C9CC1C1D6D6, 0xCA)
204 LUT(x5555F0F0F5F55555, a1, a3, xFFFF00005555FFFF, 0xB1)
205 LUT(x79790202DCDC0808, xA2A2FFFF2222FFFF, xA5A50A0AA5A50A0A, x969639396969C6C6, 0x47)
206 LUT(x6C6CF2F229295D5D, xBFBFBFBFF6F6F9F9, x5555F0F0F5F55555, x79790202DCDC0808, 0x6E)
207 LUT(xA3A3505010101A1A, a2, xA2A2FFFF2222FFFF, x36369C9CC1C1D6D6, 0x94)
208 LUT(x7676C7C74F4FC7C7, a1, x2E2E6969A4A46363, xA3A3505010101A1A, 0xD9)
209 LUT(x4, a4, x6C6CF2F229295D5D, x7676C7C74F4FC7C7, 0xC6)
217 static void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
219 LUT(x5050F5F55050F5F5, a1, a3, a5, 0xB2)
220 LUT(x6363C6C66363C6C6, a1, a2, x5050F5F55050F5F5, 0x66)
221 LUT(xAAAA5555AAAA5555, a1, a1, a5, 0xA9)
222 LUT(x3A3A65653A3A6565, a3, x6363C6C66363C6C6, xAAAA5555AAAA5555, 0xA9)
223 LUT(x5963A3C65963A3C6, a4, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xC6)
224 LUT(xE7E76565E7E76565, a5, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xAD)
225 LUT(x455D45DF455D45DF, a1, a4, xE7E76565E7E76565, 0xE4)
226 LUT(x4, a6, x5963A3C65963A3C6, x455D45DF455D45DF, 0x6C)
227 LUT(x1101220211012202, a2, xAAAA5555AAAA5555, x5963A3C65963A3C6, 0x20)
228 LUT(xF00F0FF0F00F0FF0, a3, a4, a5, 0x69)
229 LUT(x16E94A9716E94A97, xE7E76565E7E76565, x1101220211012202, xF00F0FF0F00F0FF0, 0x9E)
230 LUT(x2992922929929229, a1, a2, xF00F0FF0F00F0FF0, 0x49)
231 LUT(xAFAF9823AFAF9823, a5, x5050F5F55050F5F5, x2992922929929229, 0x93)
232 LUT(x3, a6, x16E94A9716E94A97, xAFAF9823AFAF9823, 0x6C)
233 LUT(x4801810248018102, a4, x5963A3C65963A3C6, x1101220211012202, 0xA4)
234 LUT(x5EE8FFFD5EE8FFFD, a5, x16E94A9716E94A97, x4801810248018102, 0x76)
235 LUT(xF0FF00FFF0FF00FF, a3, a4, a5, 0xCD)
236 LUT(x942D9A67942D9A67, x3A3A65653A3A6565, x5EE8FFFD5EE8FFFD, xF0FF00FFF0FF00FF, 0x86)
237 LUT(x1, a6, x5EE8FFFD5EE8FFFD, x942D9A67942D9A67, 0xA6)
238 LUT(x6A40D4ED6F4DD4EE, a2, x4, xAFAF9823AFAF9823, 0x2D)
239 LUT(x6CA89C7869A49C79, x1101220211012202, x16E94A9716E94A97, x6A40D4ED6F4DD4EE, 0x26)
240 LUT(xD6DE73F9D6DE73F9, a3, x6363C6C66363C6C6, x455D45DF455D45DF, 0x6B)
241 LUT(x925E63E1965A63E1, x3A3A65653A3A6565, x6CA89C7869A49C79, xD6DE73F9D6DE73F9, 0xA2)
242 LUT(x2, a6, x6CA89C7869A49C79, x925E63E1965A63E1, 0xCA)
250 static void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
252 LUT(x88AA88AA88AA88AA, a1, a2, a4, 0x0B)
253 LUT(xAAAAFF00AAAAFF00, a1, a4, a5, 0x27)
254 LUT(xADAFF8A5ADAFF8A5, a3, x88AA88AA88AA88AA, xAAAAFF00AAAAFF00, 0x9E)
255 LUT(x0A0AF5F50A0AF5F5, a1, a3, a5, 0xA6)
256 LUT(x6B69C5DC6B69C5DC, a2, xADAFF8A5ADAFF8A5, x0A0AF5F50A0AF5F5, 0x6B)
257 LUT(x1C69B2DC1C69B2DC, a4, x88AA88AA88AA88AA, x6B69C5DC6B69C5DC, 0xA9)
258 LUT(x1, a6, xADAFF8A5ADAFF8A5, x1C69B2DC1C69B2DC, 0x6A)
259 LUT(x9C9C9C9C9C9C9C9C, a1, a2, a3, 0x63)
260 LUT(xE6E63BFDE6E63BFD, a2, xAAAAFF00AAAAFF00, x0A0AF5F50A0AF5F5, 0xE7)
261 LUT(x6385639E6385639E, a4, x9C9C9C9C9C9C9C9C, xE6E63BFDE6E63BFD, 0x93)
262 LUT(x5959C4CE5959C4CE, a2, x6B69C5DC6B69C5DC, xE6E63BFDE6E63BFD, 0x5D)
263 LUT(x5B53F53B5B53F53B, a4, x0A0AF5F50A0AF5F5, x5959C4CE5959C4CE, 0x6E)
264 LUT(x3, a6, x6385639E6385639E, x5B53F53B5B53F53B, 0xC6)
265 LUT(xFAF505FAFAF505FA, a3, a4, x0A0AF5F50A0AF5F5, 0x6D)
266 LUT(x6A65956A6A65956A, a3, x9C9C9C9C9C9C9C9C, xFAF505FAFAF505FA, 0xA6)
267 LUT(x8888CCCC8888CCCC, a1, a2, a5, 0x23)
268 LUT(x94E97A9494E97A94, x1C69B2DC1C69B2DC, x6A65956A6A65956A, x8888CCCC8888CCCC, 0x72)
269 LUT(x4, a6, x6A65956A6A65956A, x94E97A9494E97A94, 0xAC)
270 LUT(xA050A050A050A050, a1, a3, a4, 0x21)
271 LUT(xC1B87A2BC1B87A2B, xAAAAFF00AAAAFF00, x5B53F53B5B53F53B, x94E97A9494E97A94, 0xA4)
272 LUT(xE96016B7E96016B7, x8888CCCC8888CCCC, xA050A050A050A050, xC1B87A2BC1B87A2B, 0x96)
273 LUT(xE3CF1FD5E3CF1FD5, x88AA88AA88AA88AA, x6A65956A6A65956A, xE96016B7E96016B7, 0x3E)
274 LUT(x6776675B6776675B, xADAFF8A5ADAFF8A5, x94E97A9494E97A94, xE3CF1FD5E3CF1FD5, 0x6B)
275 LUT(x2, a6, xE96016B7E96016B7, x6776675B6776675B, 0xC6)
283 static void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
285 LUT(xEEEE3333EEEE3333, a1, a2, a5, 0x9D)
286 LUT(xBBBBBBBBBBBBBBBB, a1, a1, a2, 0x83)
287 LUT(xDDDDAAAADDDDAAAA, a1, a2, a5, 0x5B)
288 LUT(x29295A5A29295A5A, a3, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0x85)
289 LUT(xC729695AC729695A, a4, xEEEE3333EEEE3333, x29295A5A29295A5A, 0xA6)
290 LUT(x3BF77B7B3BF77B7B, a2, a5, xC729695AC729695A, 0xF9)
291 LUT(x2900FF002900FF00, a4, a5, x29295A5A29295A5A, 0x0E)
292 LUT(x56B3803F56B3803F, xBBBBBBBBBBBBBBBB, x3BF77B7B3BF77B7B, x2900FF002900FF00, 0x61)
293 LUT(x4, a6, xC729695AC729695A, x56B3803F56B3803F, 0x6C)
294 LUT(xFBFBFBFBFBFBFBFB, a1, a2, a3, 0xDF)
295 LUT(x3012B7B73012B7B7, a2, a5, xC729695AC729695A, 0xD4)
296 LUT(x34E9B34C34E9B34C, a4, xFBFBFBFBFBFBFBFB, x3012B7B73012B7B7, 0x69)
297 LUT(xBFEAEBBEBFEAEBBE, a1, x29295A5A29295A5A, x34E9B34C34E9B34C, 0x6F)
298 LUT(xFFAEAFFEFFAEAFFE, a3, xBBBBBBBBBBBBBBBB, xBFEAEBBEBFEAEBBE, 0xB9)
299 LUT(x2, a6, x34E9B34C34E9B34C, xFFAEAFFEFFAEAFFE, 0xC6)
300 LUT(xCFDE88BBCFDE88BB, a2, xDDDDAAAADDDDAAAA, x34E9B34C34E9B34C, 0x5C)
301 LUT(x3055574530555745, a1, xC729695AC729695A, xCFDE88BBCFDE88BB, 0x71)
302 LUT(x99DDEEEE99DDEEEE, a4, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0xB9)
303 LUT(x693CD926693CD926, x3BF77B7B3BF77B7B, x34E9B34C34E9B34C, x99DDEEEE99DDEEEE, 0x69)
304 LUT(x3, a6, x3055574530555745, x693CD926693CD926, 0x6A)
305 LUT(x9955EE559955EE55, a1, a4, x99DDEEEE99DDEEEE, 0xE2)
306 LUT(x9D48FA949D48FA94, x3BF77B7B3BF77B7B, xBFEAEBBEBFEAEBBE, x9955EE559955EE55, 0x9C)
307 LUT(x1, a6, xC729695AC729695A, x9D48FA949D48FA94, 0x39)
318 * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC
319 * architectures. These use AND, OR, XOR, NOT, and AND-NOT gates.
321 * Gate counts: 49 44 46 33 48 46 46 41
324 * Several same-gate-count expressions for each S-box are included (for use on
325 * different CPUs/GPUs).
327 * These Boolean expressions corresponding to DES S-boxes have been generated
328 * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's
329 * John the Ripper password cracker: http://www.openwall.com/john/
330 * Being mathematical formulas, they are not copyrighted and are free for reuse
333 * This file (a specific representation of the S-box expressions, surrounding
334 * logic) is Copyright (c) 2011 by Solar Designer <solar at openwall.com>.
335 * Redistribution and use in source and binary forms, with or without
336 * modification, are permitted. (This is a heavily cut-down "BSD license".)
338 * The effort has been sponsored by Rapid7: http://www.rapid7.com
341 static void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
343 u32 x55005500, x5A0F5A0F, x3333FFFF, x66666666, x22226666, x2D2D6969,
345 u32 x00FFFF00, x33CCCC33, x4803120C, x2222FFFF, x6A21EDF3, x4A01CC93;
346 u32 x5555FFFF, x7F75FFFF, x00D20096, x7FA7FF69;
347 u32 x0A0A0000, x0AD80096, x00999900, x0AD99996;
348 u32 x22332233, x257AA5F0, x054885C0, xFAB77A3F, x2221EDF3, xD89697CC;
349 u32 x05B77AC0, x05F77AD6, x36C48529, x6391D07C, xBB0747B0;
350 u32 x4C460000, x4EDF9996, x2D4E49EA, xBBFFFFB0, x96B1B65A;
351 u32 x5AFF5AFF, x52B11215, x4201C010, x10B0D205;
352 u32 x00, x01, x10, x11, x20, x21, x30, x31;
354 x55005500 = a1 & ~a5;
355 x5A0F5A0F = a4 ^ x55005500;
358 x22226666 = x3333FFFF & x66666666;
359 x2D2D6969 = a4 ^ x22226666;
360 x25202160 = x2D2D6969 & ~x5A0F5A0F;
363 x33CCCC33 = a3 ^ x00FFFF00;
364 x4803120C = x5A0F5A0F & ~x33CCCC33;
365 x2222FFFF = a6 | x22226666;
366 x6A21EDF3 = x4803120C ^ x2222FFFF;
367 x4A01CC93 = x6A21EDF3 & ~x25202160;
370 x7F75FFFF = x6A21EDF3 | x5555FFFF;
371 x00D20096 = a5 & ~x2D2D6969;
372 x7FA7FF69 = x7F75FFFF ^ x00D20096;
374 x0A0A0000 = a4 & ~x5555FFFF;
375 x0AD80096 = x00D20096 ^ x0A0A0000;
376 x00999900 = x00FFFF00 & ~x66666666;
377 x0AD99996 = x0AD80096 | x00999900;
379 x22332233 = a3 & ~x55005500;
380 x257AA5F0 = x5A0F5A0F ^ x7F75FFFF;
381 x054885C0 = x257AA5F0 & ~x22332233;
382 xFAB77A3F = ~x054885C0;
383 x2221EDF3 = x3333FFFF & x6A21EDF3;
384 xD89697CC = xFAB77A3F ^ x2221EDF3;
385 x20 = x7FA7FF69 & ~a2;
386 x21 = x20 ^ xD89697CC;
389 x05B77AC0 = x00FFFF00 ^ x054885C0;
390 x05F77AD6 = x00D20096 | x05B77AC0;
391 x36C48529 = x3333FFFF ^ x05F77AD6;
392 x6391D07C = a1 ^ x36C48529;
393 xBB0747B0 = xD89697CC ^ x6391D07C;
394 x00 = x25202160 | a2;
395 x01 = x00 ^ xBB0747B0;
398 x4C460000 = x3333FFFF ^ x7F75FFFF;
399 x4EDF9996 = x0AD99996 | x4C460000;
400 x2D4E49EA = x6391D07C ^ x4EDF9996;
401 xBBFFFFB0 = x00FFFF00 | xBB0747B0;
402 x96B1B65A = x2D4E49EA ^ xBBFFFFB0;
403 x10 = x4A01CC93 | a2;
404 x11 = x10 ^ x96B1B65A;
407 x5AFF5AFF = a5 | x5A0F5A0F;
408 x52B11215 = x5AFF5AFF & ~x2D4E49EA;
409 x4201C010 = x4A01CC93 & x6391D07C;
410 x10B0D205 = x52B11215 ^ x4201C010;
411 x30 = x10B0D205 | a2;
412 x31 = x30 ^ x0AD99996;
416 static void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
419 u32 x55550000, x00AA00FF, x33BB33FF;
420 u32 x33CC0000, x11441144, x11BB11BB, x003311BB;
421 u32 x00000F0F, x336600FF, x332200FF, x332200F0;
422 u32 x0302000F, xAAAAAAAA, xA9A8AAA5, x33CCCC33, x33CCC030, x9A646A95;
423 u32 x00333303, x118822B8, xA8208805, x3CC3C33C, x94E34B39;
424 u32 x0331330C, x3FF3F33C, xA9DF596A, xA9DF5F6F, x962CAC53;
425 u32 xA9466A6A, x3DA52153, x29850143, x33C0330C, x1A45324F;
426 u32 x0A451047, xBBDFDD7B, xB19ACD3C;
427 u32 x00, x01, x10, x11, x20, x21, x30, x31;
431 x55550000 = a1 & ~a6;
432 x00AA00FF = a5 & ~x55550000;
433 x33BB33FF = a2 | x00AA00FF;
435 x33CC0000 = x33CC33CC & ~a6;
436 x11441144 = a1 & x33CC33CC;
437 x11BB11BB = a5 ^ x11441144;
438 x003311BB = x11BB11BB & ~x33CC0000;
441 x336600FF = x00AA00FF ^ x33CC0000;
442 x332200FF = x33BB33FF & x336600FF;
443 x332200F0 = x332200FF & ~x00000F0F;
445 x0302000F = a3 & x332200FF;
447 xA9A8AAA5 = x0302000F ^ xAAAAAAAA;
448 x33CCCC33 = a6 ^ x33CC33CC;
449 x33CCC030 = x33CCCC33 & ~x00000F0F;
450 x9A646A95 = xA9A8AAA5 ^ x33CCC030;
451 x10 = a4 & ~x332200F0;
452 x11 = x10 ^ x9A646A95;
455 x00333303 = a2 & ~x33CCC030;
456 x118822B8 = x11BB11BB ^ x00333303;
457 xA8208805 = xA9A8AAA5 & ~x118822B8;
458 x3CC3C33C = a3 ^ x33CCCC33;
459 x94E34B39 = xA8208805 ^ x3CC3C33C;
460 x00 = x33BB33FF & ~a4;
461 x01 = x00 ^ x94E34B39;
464 x0331330C = x0302000F ^ x00333303;
465 x3FF3F33C = x3CC3C33C | x0331330C;
466 xA9DF596A = x33BB33FF ^ x9A646A95;
467 xA9DF5F6F = x00000F0F | xA9DF596A;
468 x962CAC53 = x3FF3F33C ^ xA9DF5F6F;
470 xA9466A6A = x332200FF ^ x9A646A95;
471 x3DA52153 = x94E34B39 ^ xA9466A6A;
472 x29850143 = xA9DF5F6F & x3DA52153;
473 x33C0330C = x33CC33CC & x3FF3F33C;
474 x1A45324F = x29850143 ^ x33C0330C;
475 x20 = x1A45324F | a4;
476 x21 = x20 ^ x962CAC53;
479 x0A451047 = x1A45324F & ~x118822B8;
480 xBBDFDD7B = x33CCCC33 | xA9DF596A;
481 xB19ACD3C = x0A451047 ^ xBBDFDD7B;
482 x30 = x003311BB | a4;
483 x31 = x30 ^ xB19ACD3C;
487 static void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
489 u32 x44444444, x0F0FF0F0, x4F4FF4F4, x00FFFF00, x00AAAA00, x4FE55EF4;
490 u32 x3C3CC3C3, x3C3C0000, x7373F4F4, x0C840A00;
491 u32 x00005EF4, x00FF5EFF, x00555455, x3C699796;
492 u32 x000FF000, x55AA55AA, x26D9A15E, x2FDFAF5F, x2FD00F5F;
493 u32 x55AAFFAA, x28410014, x000000FF, x000000CC, x284100D8;
494 u32 x204100D0, x3C3CC3FF, x1C3CC32F, x4969967A;
495 u32 x4CC44CC4, x40C040C0, xC3C33C3C, x9669C396, xD6A98356;
496 u32 xD6E9C3D6, x4CEEEEC4, x9A072D12, x001A000B, x9A1F2D1B;
497 u32 x00, x01, x10, x11, x20, x21, x30, x31;
499 x44444444 = a1 & ~a2;
501 x4F4FF4F4 = x44444444 | x0F0FF0F0;
503 x00AAAA00 = x00FFFF00 & ~a1;
504 x4FE55EF4 = x4F4FF4F4 ^ x00AAAA00;
506 x3C3CC3C3 = a2 ^ x0F0FF0F0;
507 x3C3C0000 = x3C3CC3C3 & ~a6;
508 x7373F4F4 = x4F4FF4F4 ^ x3C3C0000;
509 x0C840A00 = x4FE55EF4 & ~x7373F4F4;
511 x00005EF4 = a6 & x4FE55EF4;
512 x00FF5EFF = a4 | x00005EF4;
513 x00555455 = a1 & x00FF5EFF;
514 x3C699796 = x3C3CC3C3 ^ x00555455;
515 x30 = x4FE55EF4 & ~a5;
516 x31 = x30 ^ x3C699796;
519 x000FF000 = x0F0FF0F0 & x00FFFF00;
521 x26D9A15E = x7373F4F4 ^ x55AA55AA;
522 x2FDFAF5F = a3 | x26D9A15E;
523 x2FD00F5F = x2FDFAF5F & ~x000FF000;
525 x55AAFFAA = x00AAAA00 | x55AA55AA;
526 x28410014 = x3C699796 & ~x55AAFFAA;
528 x000000CC = x000000FF & ~a2;
529 x284100D8 = x28410014 ^ x000000CC;
531 x204100D0 = x7373F4F4 & x284100D8;
532 x3C3CC3FF = x3C3CC3C3 | x000000FF;
533 x1C3CC32F = x3C3CC3FF & ~x204100D0;
534 x4969967A = a1 ^ x1C3CC32F;
535 x10 = x2FD00F5F & a5;
536 x11 = x10 ^ x4969967A;
539 x4CC44CC4 = x4FE55EF4 & ~a2;
540 x40C040C0 = x4CC44CC4 & ~a3;
541 xC3C33C3C = ~x3C3CC3C3;
542 x9669C396 = x55AAFFAA ^ xC3C33C3C;
543 xD6A98356 = x40C040C0 ^ x9669C396;
544 x00 = a5 & ~x0C840A00;
545 x01 = x00 ^ xD6A98356;
548 xD6E9C3D6 = x40C040C0 | x9669C396;
549 x4CEEEEC4 = x00AAAA00 | x4CC44CC4;
550 x9A072D12 = xD6E9C3D6 ^ x4CEEEEC4;
551 x001A000B = a4 & ~x4FE55EF4;
552 x9A1F2D1B = x9A072D12 | x001A000B;
553 x20 = a5 & ~x284100D8;
554 x21 = x20 ^ x9A1F2D1B;
558 static void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
560 u32 x5A5A5A5A, x0F0FF0F0;
561 u32 x33FF33FF, x33FFCC00, x0C0030F0, x0C0CC0C0, x0CF3C03F, x5EFBDA7F,
562 x52FBCA0F, x61C8F93C;
563 u32 x00C0C03C, x0F0F30C0, x3B92A366, x30908326, x3C90B3D6;
564 u32 x33CC33CC, x0C0CFFFF, x379E5C99, x04124C11, x56E9861E, xA91679E1;
565 u32 x9586CA37, x8402C833, x84C2C83F, xB35C94A6;
566 u32 x00, x01, x10, x11, x20, x21, x30, x31;
571 x33FFCC00 = a5 ^ x33FF33FF;
572 x0C0030F0 = x0F0FF0F0 & ~x33FFCC00;
573 x0C0CC0C0 = x0F0FF0F0 & ~a2;
574 x0CF3C03F = a4 ^ x0C0CC0C0;
575 x5EFBDA7F = x5A5A5A5A | x0CF3C03F;
576 x52FBCA0F = x5EFBDA7F & ~x0C0030F0;
577 x61C8F93C = a2 ^ x52FBCA0F;
579 x00C0C03C = x0CF3C03F & x61C8F93C;
580 x0F0F30C0 = x0F0FF0F0 & ~x00C0C03C;
581 x3B92A366 = x5A5A5A5A ^ x61C8F93C;
582 x30908326 = x3B92A366 & ~x0F0F30C0;
583 x3C90B3D6 = x0C0030F0 ^ x30908326;
586 x0C0CFFFF = a5 | x0C0CC0C0;
587 x379E5C99 = x3B92A366 ^ x0C0CFFFF;
588 x04124C11 = x379E5C99 & ~x33CC33CC;
589 x56E9861E = x52FBCA0F ^ x04124C11;
590 x00 = a6 & ~x3C90B3D6;
591 x01 = x00 ^ x56E9861E;
594 xA91679E1 = ~x56E9861E;
595 x10 = x3C90B3D6 & ~a6;
596 x11 = x10 ^ xA91679E1;
599 x9586CA37 = x3C90B3D6 ^ xA91679E1;
600 x8402C833 = x9586CA37 & ~x33CC33CC;
601 x84C2C83F = x00C0C03C | x8402C833;
602 xB35C94A6 = x379E5C99 ^ x84C2C83F;
603 x20 = x61C8F93C | a6;
604 x21 = x20 ^ xB35C94A6;
607 x30 = a6 & x61C8F93C;
608 x31 = x30 ^ xB35C94A6;
612 static void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
614 u32 x77777777, x77770000, x22225555, x11116666, x1F1F6F6F;
615 u32 x70700000, x43433333, x00430033, x55557777, x55167744, x5A19784B;
616 u32 x5A1987B4, x7A3BD7F5, x003B00F5, x221955A0, x05050707, x271C52A7;
617 u32 x2A2A82A0, x6969B193, x1FE06F90, x16804E00, xE97FB1FF;
618 u32 x43403302, x35CAED30, x37DEFFB7, x349ECCB5, x0B01234A;
619 u32 x101884B4, x0FF8EB24, x41413333, x4FF9FB37, x4FC2FBC2;
620 u32 x22222222, x16BCEE97, x0F080B04, x19B4E593;
621 u32 x5C5C5C5C, x4448184C, x2DDABE71, x6992A63D;
622 u32 x00, x01, x10, x11, x20, x21, x30, x31;
625 x77770000 = x77777777 & ~a6;
626 x22225555 = a1 ^ x77770000;
627 x11116666 = a3 ^ x22225555;
628 x1F1F6F6F = a4 | x11116666;
630 x70700000 = x77770000 & ~a4;
631 x43433333 = a3 ^ x70700000;
632 x00430033 = a5 & x43433333;
633 x55557777 = a1 | x11116666;
634 x55167744 = x00430033 ^ x55557777;
635 x5A19784B = a4 ^ x55167744;
637 x5A1987B4 = a6 ^ x5A19784B;
638 x7A3BD7F5 = x22225555 | x5A1987B4;
639 x003B00F5 = a5 & x7A3BD7F5;
640 x221955A0 = x22225555 ^ x003B00F5;
641 x05050707 = a4 & x55557777;
642 x271C52A7 = x221955A0 ^ x05050707;
644 x2A2A82A0 = x7A3BD7F5 & ~a1;
645 x6969B193 = x43433333 ^ x2A2A82A0;
646 x1FE06F90 = a5 ^ x1F1F6F6F;
647 x16804E00 = x1FE06F90 & ~x6969B193;
648 xE97FB1FF = ~x16804E00;
649 x20 = xE97FB1FF & ~a2;
650 x21 = x20 ^ x5A19784B;
653 x43403302 = x43433333 & ~x003B00F5;
654 x35CAED30 = x2A2A82A0 ^ x1FE06F90;
655 x37DEFFB7 = x271C52A7 | x35CAED30;
656 x349ECCB5 = x37DEFFB7 & ~x43403302;
657 x0B01234A = x1F1F6F6F & ~x349ECCB5;
659 x101884B4 = x5A1987B4 & x349ECCB5;
660 x0FF8EB24 = x1FE06F90 ^ x101884B4;
661 x41413333 = x43433333 & x55557777;
662 x4FF9FB37 = x0FF8EB24 | x41413333;
663 x4FC2FBC2 = x003B00F5 ^ x4FF9FB37;
664 x30 = x4FC2FBC2 & a2;
665 x31 = x30 ^ x271C52A7;
668 x22222222 = a1 ^ x77777777;
669 x16BCEE97 = x349ECCB5 ^ x22222222;
670 x0F080B04 = a4 & x0FF8EB24;
671 x19B4E593 = x16BCEE97 ^ x0F080B04;
672 x00 = x0B01234A | a2;
673 x01 = x00 ^ x19B4E593;
676 x5C5C5C5C = x1F1F6F6F ^ x43433333;
677 x4448184C = x5C5C5C5C & ~x19B4E593;
678 x2DDABE71 = x22225555 ^ x0FF8EB24;
679 x6992A63D = x4448184C ^ x2DDABE71;
680 x10 = x1F1F6F6F & a2;
681 x11 = x10 ^ x6992A63D;
685 static void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
688 u32 x3333FFFF, x11115555, x22DD6699, x22DD9966, x00220099;
689 u32 x00551144, x33662277, x5A5A5A5A, x7B7E7A7F, x59A31CE6;
690 u32 x09030C06, x09030000, x336622FF, x3A6522FF;
691 u32 x484D494C, x0000B6B3, x0F0FB9BC, x00FC00F9, x0FFFB9FD;
692 u32 x5DF75DF7, x116600F7, x1E69B94B, x1668B94B;
693 u32 x7B7B7B7B, x411E5984, x1FFFFDFD, x5EE1A479;
694 u32 x3CB4DFD2, x004B002D, xB7B2B6B3, xCCC9CDC8, xCC82CDE5;
695 u32 x0055EEBB, x5A5AECE9, x0050ECA9, xC5CAC1CE, xC59A2D67;
696 u32 x00, x01, x10, x11, x20, x21, x30, x31;
701 x11115555 = a1 & x3333FFFF;
702 x22DD6699 = x33CC33CC ^ x11115555;
703 x22DD9966 = a6 ^ x22DD6699;
704 x00220099 = a5 & ~x22DD9966;
706 x00551144 = a1 & x22DD9966;
707 x33662277 = a2 ^ x00551144;
709 x7B7E7A7F = x33662277 | x5A5A5A5A;
710 x59A31CE6 = x22DD6699 ^ x7B7E7A7F;
712 x09030C06 = a3 & x59A31CE6;
713 x09030000 = x09030C06 & ~a6;
714 x336622FF = x00220099 | x33662277;
715 x3A6522FF = x09030000 ^ x336622FF;
716 x30 = x3A6522FF & a4;
717 x31 = x30 ^ x59A31CE6;
720 x484D494C = a2 ^ x7B7E7A7F;
721 x0000B6B3 = a6 & ~x484D494C;
722 x0F0FB9BC = a3 ^ x0000B6B3;
723 x00FC00F9 = a5 & ~x09030C06;
724 x0FFFB9FD = x0F0FB9BC | x00FC00F9;
726 x5DF75DF7 = a1 | x59A31CE6;
727 x116600F7 = x336622FF & x5DF75DF7;
728 x1E69B94B = x0F0FB9BC ^ x116600F7;
729 x1668B94B = x1E69B94B & ~x09030000;
730 x20 = x00220099 | a4;
731 x21 = x20 ^ x1668B94B;
734 x7B7B7B7B = a2 | x5A5A5A5A;
735 x411E5984 = x3A6522FF ^ x7B7B7B7B;
736 x1FFFFDFD = x11115555 | x0FFFB9FD;
737 x5EE1A479 = x411E5984 ^ x1FFFFDFD;
739 x3CB4DFD2 = x22DD6699 ^ x1E69B94B;
740 x004B002D = a5 & ~x3CB4DFD2;
741 xB7B2B6B3 = ~x484D494C;
742 xCCC9CDC8 = x7B7B7B7B ^ xB7B2B6B3;
743 xCC82CDE5 = x004B002D ^ xCCC9CDC8;
744 x10 = xCC82CDE5 & ~a4;
745 x11 = x10 ^ x5EE1A479;
748 x0055EEBB = a6 ^ x00551144;
749 x5A5AECE9 = a1 ^ x0F0FB9BC;
750 x0050ECA9 = x0055EEBB & x5A5AECE9;
751 xC5CAC1CE = x09030C06 ^ xCCC9CDC8;
752 xC59A2D67 = x0050ECA9 ^ xC5CAC1CE;
753 x00 = x0FFFB9FD & ~a4;
754 x01 = x00 ^ xC59A2D67;
758 static void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
760 u32 x0FF00FF0, x3CC33CC3, x00003CC3, x0F000F00, x5A555A55, x00001841;
761 u32 x00000F00, x33333C33, x7B777E77, x0FF0F00F, x74878E78;
762 u32 x003C003C, x5A7D5A7D, x333300F0, x694E5A8D;
763 u32 x0FF0CCCC, x000F0303, x5A505854, x33CC000F, x699C585B;
764 u32 x7F878F78, x21101013, x7F979F7B, x30030CC0, x4F9493BB;
765 u32 x6F9CDBFB, x0000DBFB, x00005151, x26DAC936, x26DA9867;
766 u32 x27DA9877, x27DA438C, x2625C9C9, x27FFCBCD;
767 u32 x27FF1036, x27FF103E, xB06B6C44, x97947C7A;
768 u32 x00, x01, x10, x11, x20, x21, x30, x31;
771 x3CC33CC3 = a3 ^ x0FF00FF0;
772 x00003CC3 = a6 & x3CC33CC3;
773 x0F000F00 = a4 & x0FF00FF0;
774 x5A555A55 = a2 ^ x0F000F00;
775 x00001841 = x00003CC3 & x5A555A55;
777 x00000F00 = a6 & x0F000F00;
778 x33333C33 = a3 ^ x00000F00;
779 x7B777E77 = x5A555A55 | x33333C33;
780 x0FF0F00F = a6 ^ x0FF00FF0;
781 x74878E78 = x7B777E77 ^ x0FF0F00F;
782 x30 = a1 & ~x00001841;
783 x31 = x30 ^ x74878E78;
786 x003C003C = a5 & ~x3CC33CC3;
787 x5A7D5A7D = x5A555A55 | x003C003C;
788 x333300F0 = x00003CC3 ^ x33333C33;
789 x694E5A8D = x5A7D5A7D ^ x333300F0;
791 x0FF0CCCC = x00003CC3 ^ x0FF0F00F;
792 x000F0303 = a4 & ~x0FF0CCCC;
793 x5A505854 = x5A555A55 & ~x000F0303;
794 x33CC000F = a5 ^ x333300F0;
795 x699C585B = x5A505854 ^ x33CC000F;
797 x7F878F78 = x0F000F00 | x74878E78;
798 x21101013 = a3 & x699C585B;
799 x7F979F7B = x7F878F78 | x21101013;
800 x30030CC0 = x3CC33CC3 & ~x0FF0F00F;
801 x4F9493BB = x7F979F7B ^ x30030CC0;
802 x00 = x4F9493BB & ~a1;
803 x01 = x00 ^ x694E5A8D;
806 x6F9CDBFB = x699C585B | x4F9493BB;
807 x0000DBFB = a6 & x6F9CDBFB;
808 x00005151 = a2 & x0000DBFB;
809 x26DAC936 = x694E5A8D ^ x4F9493BB;
810 x26DA9867 = x00005151 ^ x26DAC936;
812 x27DA9877 = x21101013 | x26DA9867;
813 x27DA438C = x0000DBFB ^ x27DA9877;
814 x2625C9C9 = a5 ^ x26DAC936;
815 x27FFCBCD = x27DA438C | x2625C9C9;
816 x20 = x27FFCBCD & a1;
817 x21 = x20 ^ x699C585B;
820 x27FF1036 = x0000DBFB ^ x27FFCBCD;
821 x27FF103E = x003C003C | x27FF1036;
822 xB06B6C44 = ~x4F9493BB;
823 x97947C7A = x27FF103E ^ xB06B6C44;
824 x10 = x97947C7A & ~a1;
825 x11 = x10 ^ x26DA9867;
829 static void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
831 u32 x0C0C0C0C, x0000F0F0, x00FFF00F, x00555005, x00515001;
832 u32 x33000330, x77555775, x30303030, x3030CFCF, x30104745, x30555745;
833 u32 xFF000FF0, xCF1048B5, x080A080A, xC71A40BF, xCB164CB3;
834 u32 x9E4319E6, x000019E6, xF429738C, xF4296A6A, xC729695A;
835 u32 xC47C3D2F, xF77F3F3F, x9E43E619, x693CD926;
836 u32 xF719A695, xF4FF73FF, x03E6D56A, x56B3803F;
837 u32 xF700A600, x61008000, x03B7856B, x62B7056B;
838 u32 x00, x01, x10, x11, x20, x21, x30, x31;
840 x0C0C0C0C = a3 & ~a2;
841 x0000F0F0 = a5 & ~a3;
842 x00FFF00F = a4 ^ x0000F0F0;
843 x00555005 = a1 & x00FFF00F;
844 x00515001 = x00555005 & ~x0C0C0C0C;
846 x33000330 = a2 & ~x00FFF00F;
847 x77555775 = a1 | x33000330;
848 x30303030 = a2 & ~a3;
849 x3030CFCF = a5 ^ x30303030;
850 x30104745 = x77555775 & x3030CFCF;
851 x30555745 = x00555005 | x30104745;
853 xFF000FF0 = ~x00FFF00F;
854 xCF1048B5 = x30104745 ^ xFF000FF0;
855 x080A080A = a3 & ~x77555775;
856 xC71A40BF = xCF1048B5 ^ x080A080A;
857 xCB164CB3 = x0C0C0C0C ^ xC71A40BF;
858 x10 = x00515001 | a6;
859 x11 = x10 ^ xCB164CB3;
862 x9E4319E6 = a1 ^ xCB164CB3;
863 x000019E6 = a5 & x9E4319E6;
864 xF429738C = a2 ^ xC71A40BF;
865 xF4296A6A = x000019E6 ^ xF429738C;
866 xC729695A = x33000330 ^ xF4296A6A;
868 xC47C3D2F = x30555745 ^ xF4296A6A;
869 xF77F3F3F = a2 | xC47C3D2F;
870 x9E43E619 = a5 ^ x9E4319E6;
871 x693CD926 = xF77F3F3F ^ x9E43E619;
872 x20 = x30555745 & a6;
873 x21 = x20 ^ x693CD926;
876 xF719A695 = x3030CFCF ^ xC729695A;
877 xF4FF73FF = a4 | xF429738C;
878 x03E6D56A = xF719A695 ^ xF4FF73FF;
879 x56B3803F = a1 ^ x03E6D56A;
880 x30 = x56B3803F & a6;
881 x31 = x30 ^ xC729695A;
884 xF700A600 = xF719A695 & ~a4;
885 x61008000 = x693CD926 & xF700A600;
886 x03B7856B = x00515001 ^ x03E6D56A;
887 x62B7056B = x61008000 ^ x03B7856B;
888 x00 = x62B7056B | a6;
889 x01 = x00 ^ xC729695A;
896 #if defined IS_AMD || IS_UNKNOWN
899 * Bitslice DES S-boxes making use of a vector conditional select operation
900 * (e.g., vsel on PowerPC with AltiVec).
902 * Gate counts: 36 33 33 26 35 34 34 32
905 * Several same-gate-count expressions for each S-box are included (for use on
906 * different CPUs/GPUs).
908 * These Boolean expressions corresponding to DES S-boxes have been generated
909 * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's
910 * John the Ripper password cracker: http://www.openwall.com/john/
911 * Being mathematical formulas, they are not copyrighted and are free for reuse
914 * This file (a specific representation of the S-box expressions, surrounding
915 * logic) is Copyright (c) 2011 by Solar Designer <solar at openwall.com>.
916 * Redistribution and use in source and binary forms, with or without
917 * modification, are permitted. (This is a heavily cut-down "BSD license".)
919 * The effort has been sponsored by Rapid7: http://www.rapid7.com
922 #define vnot(dst, a) (dst) = ~(a)
923 #define vand(dst, a, b) (dst) = (a) & (b)
924 #define vor(dst, a, b) (dst) = (a) | (b)
925 #define vandn(dst, a, b) (dst) = (a) & ~(b)
926 #define vxor(dst, a, b) (dst) = (a) ^ (b)
927 #define vsel(dst, a, b, c) (dst) = bitselect((a),(b),(c))
930 s1(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
931 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
933 u32 x0F0F3333, x3C3C3C3C, x55FF55FF, x69C369C3, x0903B73F, x09FCB7C0,
935 u32 x55AFD1B7, x3C3C69C3, x6993B874;
936 u32 x5CEDE59F, x09FCE295, x5D91A51E, x529E962D;
937 u32 x29EEADC0, x4B8771A3, x428679F3, x6B68D433;
938 u32 x5BA7E193, x026F12F3, x6B27C493, x94D83B6C;
939 u32 x965E0B0F, x3327A113, x847F0A1F, xD6E19C32;
940 u32 x0DBCE883, x3A25A215, x37994A96;
941 u32 x8A487EA7, x8B480F07, xB96C2D16;
944 vsel(x0F0F3333, a3, a2, a5);
945 vxor(x3C3C3C3C, a2, a3);
946 vor(x55FF55FF, a1, a4);
947 vxor(x69C369C3, x3C3C3C3C, x55FF55FF);
948 vsel(x0903B73F, a5, x0F0F3333, x69C369C3);
949 vxor(x09FCB7C0, a4, x0903B73F);
950 vxor(x5CA9E295, a1, x09FCB7C0);
952 vsel(x55AFD1B7, x5CA9E295, x55FF55FF, x0F0F3333);
953 vsel(x3C3C69C3, x3C3C3C3C, x69C369C3, a5);
954 vxor(x6993B874, x55AFD1B7, x3C3C69C3);
956 vsel(x5CEDE59F, x55FF55FF, x5CA9E295, x6993B874);
957 vsel(x09FCE295, x09FCB7C0, x5CA9E295, a5);
958 vsel(x5D91A51E, x5CEDE59F, x6993B874, x09FCE295);
959 vxor(x529E962D, x0F0F3333, x5D91A51E);
961 vsel(x29EEADC0, x69C369C3, x09FCB7C0, x5CEDE59F);
962 vsel(x4B8771A3, x0F0F3333, x69C369C3, x5CA9E295);
963 vsel(x428679F3, a5, x4B8771A3, x529E962D);
964 vxor(x6B68D433, x29EEADC0, x428679F3);
966 vsel(x5BA7E193, x5CA9E295, x4B8771A3, a3);
967 vsel(x026F12F3, a4, x0F0F3333, x529E962D);
968 vsel(x6B27C493, x6B68D433, x5BA7E193, x026F12F3);
969 vnot(x94D83B6C, x6B27C493);
970 vsel(x0, x94D83B6C, x6B68D433, a6);
971 vxor(*out1, *out1, x0);
973 vsel(x965E0B0F, x94D83B6C, a3, x428679F3);
974 vsel(x3327A113, x5BA7E193, a2, x69C369C3);
975 vsel(x847F0A1F, x965E0B0F, a4, x3327A113);
976 vxor(xD6E19C32, x529E962D, x847F0A1F);
977 vsel(x1, xD6E19C32, x5CA9E295, a6);
978 vxor(*out2, *out2, x1);
980 vsel(x0DBCE883, x09FCE295, x3C3C69C3, x847F0A1F);
981 vsel(x3A25A215, x3327A113, x5CA9E295, x0903B73F);
982 vxor(x37994A96, x0DBCE883, x3A25A215);
983 vsel(x3, x37994A96, x529E962D, a6);
984 vxor(*out4, *out4, x3);
986 vxor(x8A487EA7, x5CA9E295, xD6E19C32);
987 vsel(x8B480F07, a3, x8A487EA7, x847F0A1F);
988 vsel(xB96C2D16, x8B480F07, x3C3C3C3C, x3A25A215);
989 vsel(x2, xB96C2D16, x6993B874, a6);
990 vxor(*out3, *out3, x2);
994 s2(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
995 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
997 u32 x55553333, x0055FF33, x33270F03, x66725A56, x00FFFF00, x668DA556;
998 u32 x0F0F5A56, xF0F0A5A9, xA5A5969A, xA55A699A;
999 u32 x0F5AF03C, x6600FF56, x87A5F09C;
1000 u32 xA55A963C, x3C69C30F, xB44BC32D;
1001 u32 x66D7CC56, x0F4B0F2D, x699CC37B, x996C66D2;
1002 u32 xB46C662D, x278DB412, xB66CB43B;
1003 u32 xD2DC4E52, x27993333, xD2994E33;
1004 u32 x278D0F2D, x2E0E547B, x09976748;
1007 vsel(x55553333, a1, a3, a6);
1008 vsel(x0055FF33, a6, x55553333, a5);
1009 vsel(x33270F03, a3, a4, x0055FF33);
1010 vxor(x66725A56, a1, x33270F03);
1011 vxor(x00FFFF00, a5, a6);
1012 vxor(x668DA556, x66725A56, x00FFFF00);
1014 vsel(x0F0F5A56, a4, x66725A56, a6);
1015 vnot(xF0F0A5A9, x0F0F5A56);
1016 vxor(xA5A5969A, x55553333, xF0F0A5A9);
1017 vxor(xA55A699A, x00FFFF00, xA5A5969A);
1018 vsel(x1, xA55A699A, x668DA556, a2);
1019 vxor(*out2, *out2, x1);
1021 vxor(x0F5AF03C, a4, x0055FF33);
1022 vsel(x6600FF56, x66725A56, a6, x00FFFF00);
1023 vsel(x87A5F09C, xA5A5969A, x0F5AF03C, x6600FF56);
1025 vsel(xA55A963C, xA5A5969A, x0F5AF03C, a5);
1026 vxor(x3C69C30F, a3, x0F5AF03C);
1027 vsel(xB44BC32D, xA55A963C, x3C69C30F, a1);
1029 vsel(x66D7CC56, x66725A56, x668DA556, xA5A5969A);
1030 vsel(x0F4B0F2D, a4, xB44BC32D, a5);
1031 vxor(x699CC37B, x66D7CC56, x0F4B0F2D);
1032 vxor(x996C66D2, xF0F0A5A9, x699CC37B);
1033 vsel(x0, x996C66D2, xB44BC32D, a2);
1034 vxor(*out1, *out1, x0);
1036 vsel(xB46C662D, xB44BC32D, x996C66D2, x00FFFF00);
1037 vsel(x278DB412, x668DA556, xA5A5969A, a1);
1038 vsel(xB66CB43B, xB46C662D, x278DB412, x6600FF56);
1040 vsel(xD2DC4E52, x66D7CC56, x996C66D2, xB44BC32D);
1041 vsel(x27993333, x278DB412, a3, x0055FF33);
1042 vsel(xD2994E33, xD2DC4E52, x27993333, a5);
1043 vsel(x3, x87A5F09C, xD2994E33, a2);
1044 vxor(*out4, *out4, x3);
1046 vsel(x278D0F2D, x278DB412, x0F4B0F2D, a6);
1047 vsel(x2E0E547B, x0F0F5A56, xB66CB43B, x278D0F2D);
1048 vxor(x09976748, x27993333, x2E0E547B);
1049 vsel(x2, xB66CB43B, x09976748, a2);
1050 vxor(*out3, *out3, x2);
1054 s3(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
1055 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
1057 u32 x0F330F33, x0F33F0CC, x5A66A599;
1058 u32 x2111B7BB, x03FF3033, x05BB50EE, x074F201F, x265E97A4;
1059 u32 x556BA09E, x665A93AC, x99A56C53;
1060 u32 x25A1A797, x5713754C, x66559355, x47B135C6;
1061 u32 x9A5A5C60, xD07AF8F8, x87698DB4, xE13C1EE1;
1062 u32 x9E48CDE4, x655B905E, x00A55CFF, x9E49915E;
1063 u32 xD6599874, x05330022, xD2699876;
1064 u32 x665F9364, xD573F0F2, xB32C6396;
1067 vsel(x0F330F33, a4, a3, a5);
1068 vxor(x0F33F0CC, a6, x0F330F33);
1069 vxor(x5A66A599, a2, x0F33F0CC);
1071 vsel(x2111B7BB, a3, a6, x5A66A599);
1072 vsel(x03FF3033, a5, a3, x0F33F0CC);
1073 vsel(x05BB50EE, a5, x0F33F0CC, a2);
1074 vsel(x074F201F, x03FF3033, a4, x05BB50EE);
1075 vxor(x265E97A4, x2111B7BB, x074F201F);
1077 vsel(x556BA09E, x5A66A599, x05BB50EE, a4);
1078 vsel(x665A93AC, x556BA09E, x265E97A4, a3);
1079 vnot(x99A56C53, x665A93AC);
1080 vsel(x1, x265E97A4, x99A56C53, a1);
1081 vxor(*out2, *out2, x1);
1083 vxor(x25A1A797, x03FF3033, x265E97A4);
1084 vsel(x5713754C, a2, x0F33F0CC, x074F201F);
1085 vsel(x66559355, x665A93AC, a2, a5);
1086 vsel(x47B135C6, x25A1A797, x5713754C, x66559355);
1088 vxor(x9A5A5C60, x03FF3033, x99A56C53);
1089 vsel(xD07AF8F8, x9A5A5C60, x556BA09E, x5A66A599);
1090 vxor(x87698DB4, x5713754C, xD07AF8F8);
1091 vxor(xE13C1EE1, x66559355, x87698DB4);
1093 vsel(x9E48CDE4, x9A5A5C60, x87698DB4, x265E97A4);
1094 vsel(x655B905E, x66559355, x05BB50EE, a4);
1095 vsel(x00A55CFF, a5, a6, x9A5A5C60);
1096 vsel(x9E49915E, x9E48CDE4, x655B905E, x00A55CFF);
1097 vsel(x0, x9E49915E, xE13C1EE1, a1);
1098 vxor(*out1, *out1, x0);
1100 vsel(xD6599874, xD07AF8F8, x66559355, x0F33F0CC);
1101 vand(x05330022, x0F330F33, x05BB50EE);
1102 vsel(xD2699876, xD6599874, x00A55CFF, x05330022);
1103 vsel(x3, x5A66A599, xD2699876, a1);
1104 vxor(*out4, *out4, x3);
1106 vsel(x665F9364, x265E97A4, x66559355, x47B135C6);
1107 vsel(xD573F0F2, xD07AF8F8, x05330022, a4);
1108 vxor(xB32C6396, x665F9364, xD573F0F2);
1109 vsel(x2, xB32C6396, x47B135C6, a1);
1110 vxor(*out3, *out3, x2);
1114 s4(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
1115 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
1117 u32 x0505AFAF, x0555AF55, x0A5AA05A, x46566456, x0A0A5F5F, x0AF55FA0,
1118 x0AF50F0F, x4CA36B59;
1120 u32 x01BB23BB, x5050FAFA, xA31C26BE, xA91679E1;
1122 u32 x50E9FA1E, x0AF55F00, x827D9784, xD2946D9A;
1123 u32 x31F720B3, x11FB21B3, x4712A7AD, x9586CA37;
1126 vsel(x0505AFAF, a5, a3, a1);
1127 vsel(x0555AF55, x0505AFAF, a1, a4);
1128 vxor(x0A5AA05A, a3, x0555AF55);
1129 vsel(x46566456, a1, x0A5AA05A, a2);
1130 vsel(x0A0A5F5F, a3, a5, a1);
1131 vxor(x0AF55FA0, a4, x0A0A5F5F);
1132 vsel(x0AF50F0F, x0AF55FA0, a3, a5);
1133 vxor(x4CA36B59, x46566456, x0AF50F0F);
1135 vnot(xB35C94A6, x4CA36B59);
1137 vsel(x01BB23BB, a4, a2, x0555AF55);
1138 vxor(x5050FAFA, a1, x0505AFAF);
1139 vsel(xA31C26BE, xB35C94A6, x01BB23BB, x5050FAFA);
1140 vxor(xA91679E1, x0A0A5F5F, xA31C26BE);
1142 vnot(x56E9861E, xA91679E1);
1144 vsel(x50E9FA1E, x5050FAFA, x56E9861E, a4);
1145 vsel(x0AF55F00, x0AF50F0F, x0AF55FA0, x0A0A5F5F);
1146 vsel(x827D9784, xB35C94A6, x0AF55F00, a2);
1147 vxor(xD2946D9A, x50E9FA1E, x827D9784);
1148 vsel(x2, xD2946D9A, x4CA36B59, a6);
1149 vxor(*out3, *out3, x2);
1150 vsel(x3, xB35C94A6, xD2946D9A, a6);
1151 vxor(*out4, *out4, x3);
1153 vsel(x31F720B3, a2, a4, x0AF55FA0);
1154 vsel(x11FB21B3, x01BB23BB, x31F720B3, x5050FAFA);
1155 vxor(x4712A7AD, x56E9861E, x11FB21B3);
1156 vxor(x9586CA37, xD2946D9A, x4712A7AD);
1157 vsel(x0, x56E9861E, x9586CA37, a6);
1158 vxor(*out1, *out1, x0);
1159 vsel(x1, x9586CA37, xA91679E1, a6);
1160 vxor(*out2, *out2, x1);
1164 s5(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
1165 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
1167 u32 x550F550F, xAAF0AAF0, xA5F5A5F5, x96C696C6, x00FFFF00, x963969C6;
1168 u32 x2E3C2E3C, xB73121F7, x1501DF0F, x00558A5F, x2E69A463;
1169 u32 x0679ED42, x045157FD, xB32077FF, x9D49D39C;
1170 u32 xAC81CFB2, xF72577AF, x5BA4B81D;
1171 u32 x5BA477AF, x4895469F, x3A35273A, x1A35669A;
1172 u32 x12E6283D, x9E47D3D4, x1A676AB4;
1173 u32 x891556DF, xE5E77F82, x6CF2295D;
1174 u32 x2E3CA5F5, x9697C1C6, x369CC1D6;
1177 vsel(x550F550F, a1, a3, a5);
1178 vnot(xAAF0AAF0, x550F550F);
1179 vsel(xA5F5A5F5, xAAF0AAF0, a1, a3);
1180 vxor(x96C696C6, a2, xA5F5A5F5);
1181 vxor(x00FFFF00, a5, a6);
1182 vxor(x963969C6, x96C696C6, x00FFFF00);
1184 vsel(x2E3C2E3C, a3, xAAF0AAF0, a2);
1185 vsel(xB73121F7, a2, x963969C6, x96C696C6);
1186 vsel(x1501DF0F, a6, x550F550F, xB73121F7);
1187 vsel(x00558A5F, x1501DF0F, a5, a1);
1188 vxor(x2E69A463, x2E3C2E3C, x00558A5F);
1190 vsel(x0679ED42, x00FFFF00, x2E69A463, x96C696C6);
1191 vsel(x045157FD, a6, a1, x0679ED42);
1192 vsel(xB32077FF, xB73121F7, a6, x045157FD);
1193 vxor(x9D49D39C, x2E69A463, xB32077FF);
1194 vsel(x2, x9D49D39C, x2E69A463, a4);
1195 vxor(*out3, *out3, x2);
1197 vsel(xAC81CFB2, xAAF0AAF0, x1501DF0F, x0679ED42);
1198 vsel(xF72577AF, xB32077FF, x550F550F, a1);
1199 vxor(x5BA4B81D, xAC81CFB2, xF72577AF);
1200 vsel(x1, x5BA4B81D, x963969C6, a4);
1201 vxor(*out2, *out2, x1);
1203 vsel(x5BA477AF, x5BA4B81D, xF72577AF, a6);
1204 vsel(x4895469F, x5BA477AF, x00558A5F, a2);
1205 vsel(x3A35273A, x2E3C2E3C, a2, x963969C6);
1206 vsel(x1A35669A, x4895469F, x3A35273A, x5BA4B81D);
1208 vsel(x12E6283D, a5, x5BA4B81D, x963969C6);
1209 vsel(x9E47D3D4, x96C696C6, x9D49D39C, xAC81CFB2);
1210 vsel(x1A676AB4, x12E6283D, x9E47D3D4, x4895469F);
1212 vsel(x891556DF, xB32077FF, x4895469F, x3A35273A);
1213 vsel(xE5E77F82, xF72577AF, x00FFFF00, x12E6283D);
1214 vxor(x6CF2295D, x891556DF, xE5E77F82);
1215 vsel(x3, x1A35669A, x6CF2295D, a4);
1216 vxor(*out4, *out4, x3);
1218 vsel(x2E3CA5F5, x2E3C2E3C, xA5F5A5F5, a6);
1219 vsel(x9697C1C6, x96C696C6, x963969C6, x045157FD);
1220 vsel(x369CC1D6, x2E3CA5F5, x9697C1C6, x5BA477AF);
1221 vsel(x0, x369CC1D6, x1A676AB4, a4);
1222 vxor(*out1, *out1, x0);
1226 s6(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
1227 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
1229 u32 x555500FF, x666633CC, x606F30CF, x353A659A, x353A9A65, xCAC5659A;
1230 u32 x353A6565, x0A3F0A6F, x6C5939A3, x5963A3C6;
1231 u32 x35FF659A, x3AF06A95, x05CF0A9F, x16E94A97;
1232 u32 x86CD4C9B, x12E0FFFD, x942D9A67;
1233 u32 x142956AB, x455D45DF, x1C3EE619;
1234 u32 x2AEA70D5, x20CF7A9F, x3CF19C86, x69A49C79;
1235 u32 x840DBB67, x6DA19C1E, x925E63E1;
1236 u32 x9C3CA761, x257A75D5, xB946D2B4;
1239 vsel(x555500FF, a1, a4, a5);
1240 vxor(x666633CC, a2, x555500FF);
1241 vsel(x606F30CF, x666633CC, a4, a3);
1242 vxor(x353A659A, a1, x606F30CF);
1243 vxor(x353A9A65, a5, x353A659A);
1244 vnot(xCAC5659A, x353A9A65);
1246 vsel(x353A6565, x353A659A, x353A9A65, a4);
1247 vsel(x0A3F0A6F, a3, a4, x353A6565);
1248 vxor(x6C5939A3, x666633CC, x0A3F0A6F);
1249 vxor(x5963A3C6, x353A9A65, x6C5939A3);
1251 vsel(x35FF659A, a4, x353A659A, x353A6565);
1252 vxor(x3AF06A95, a3, x35FF659A);
1253 vsel(x05CF0A9F, a4, a3, x353A9A65);
1254 vsel(x16E94A97, x3AF06A95, x05CF0A9F, x6C5939A3);
1256 vsel(x86CD4C9B, xCAC5659A, x05CF0A9F, x6C5939A3);
1257 vsel(x12E0FFFD, a5, x3AF06A95, x16E94A97);
1258 vsel(x942D9A67, x86CD4C9B, x353A9A65, x12E0FFFD);
1259 vsel(x0, xCAC5659A, x942D9A67, a6);
1260 vxor(*out1, *out1, x0);
1262 vsel(x142956AB, x353A659A, x942D9A67, a2);
1263 vsel(x455D45DF, a1, x86CD4C9B, x142956AB);
1264 vxor(x1C3EE619, x5963A3C6, x455D45DF);
1265 vsel(x3, x5963A3C6, x1C3EE619, a6);
1266 vxor(*out4, *out4, x3);
1268 vsel(x2AEA70D5, x3AF06A95, x606F30CF, x353A9A65);
1269 vsel(x20CF7A9F, x2AEA70D5, x05CF0A9F, x0A3F0A6F);
1270 vxor(x3CF19C86, x1C3EE619, x20CF7A9F);
1271 vxor(x69A49C79, x555500FF, x3CF19C86);
1273 vsel(x840DBB67, a5, x942D9A67, x86CD4C9B);
1274 vsel(x6DA19C1E, x69A49C79, x3CF19C86, x840DBB67);
1275 vnot(x925E63E1, x6DA19C1E);
1276 vsel(x1, x925E63E1, x69A49C79, a6);
1277 vxor(*out2, *out2, x1);
1279 vsel(x9C3CA761, x840DBB67, x1C3EE619, x3CF19C86);
1280 vsel(x257A75D5, x455D45DF, x2AEA70D5, x606F30CF);
1281 vxor(xB946D2B4, x9C3CA761, x257A75D5);
1282 vsel(x2, x16E94A97, xB946D2B4, a6);
1283 vxor(*out3, *out3, x2);
1287 s7(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
1288 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
1290 u32 x44447777, x4B4B7878, x22772277, x0505F5F5, x220522F5, x694E5A8D;
1291 u32 x00FFFF00, x66666666, x32353235, x26253636, x26DAC936;
1292 u32 x738F9C63, x11EF9867, x26DA9867;
1293 u32 x4B4B9C63, x4B666663, x4E639396;
1294 u32 x4E4B393C, xFF00FF00, xFF05DD21, xB14EE41D;
1295 u32 xD728827B, x6698807B, x699C585B;
1296 u32 x738C847B, xA4A71E18, x74878E78;
1297 u32 x333D9639, x74879639, x8B7869C6;
1300 vsel(x44447777, a2, a6, a3);
1301 vxor(x4B4B7878, a4, x44447777);
1302 vsel(x22772277, a3, a5, a2);
1303 vsel(x0505F5F5, a6, a2, a4);
1304 vsel(x220522F5, x22772277, x0505F5F5, a5);
1305 vxor(x694E5A8D, x4B4B7878, x220522F5);
1307 vxor(x00FFFF00, a5, a6);
1308 vxor(x66666666, a2, a3);
1309 vsel(x32353235, a3, x220522F5, a4);
1310 vsel(x26253636, x66666666, x32353235, x4B4B7878);
1311 vxor(x26DAC936, x00FFFF00, x26253636);
1312 vsel(x0, x26DAC936, x694E5A8D, a1);
1313 vxor(*out1, *out1, x0);
1315 vxor(x738F9C63, a2, x26DAC936);
1316 vsel(x11EF9867, x738F9C63, a5, x66666666);
1317 vsel(x26DA9867, x26DAC936, x11EF9867, a6);
1319 vsel(x4B4B9C63, x4B4B7878, x738F9C63, a6);
1320 vsel(x4B666663, x4B4B9C63, x66666666, x00FFFF00);
1321 vxor(x4E639396, x0505F5F5, x4B666663);
1323 vsel(x4E4B393C, x4B4B7878, x4E639396, a2);
1324 vnot(xFF00FF00, a5);
1325 vsel(xFF05DD21, xFF00FF00, x738F9C63, x32353235);
1326 vxor(xB14EE41D, x4E4B393C, xFF05DD21);
1327 vsel(x1, xB14EE41D, x26DA9867, a1);
1328 vxor(*out2, *out2, x1);
1330 vxor(xD728827B, x66666666, xB14EE41D);
1331 vsel(x6698807B, x26DA9867, xD728827B, x4E4B393C);
1332 vsel(x699C585B, x6698807B, x694E5A8D, xFF05DD21);
1333 vsel(x2, x699C585B, x4E639396, a1);
1334 vxor(*out3, *out3, x2);
1336 vsel(x738C847B, x738F9C63, xD728827B, x4B4B7878);
1337 vxor(xA4A71E18, x738F9C63, xD728827B);
1338 vsel(x74878E78, x738C847B, xA4A71E18, a4);
1340 vsel(x333D9639, x32353235, x738C847B, xB14EE41D);
1341 vsel(x74879639, x74878E78, x333D9639, a6);
1342 vnot(x8B7869C6, x74879639);
1343 vsel(x3, x74878E78, x8B7869C6, a1);
1344 vxor(*out4, *out4, x3);
1348 s8(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
1349 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
1351 u32 x0505F5F5, x05FAF50A, x0F0F00FF, x22227777, x07DA807F, x34E9B34C;
1352 u32 x00FFF00F, x0033FCCF, x5565B15C, x0C0C3F3F, x59698E63;
1353 u32 x3001F74E, x30555745, x693CD926;
1354 u32 x0C0CD926, x0C3F25E9, x38D696A5;
1356 u32 x03D2117B, xC778395B, xCB471CB2;
1357 u32 x5425B13F, x56B3803F, x919AE965;
1358 u32 x17B3023F, x75555755, x62E6556A, xA59E6C31;
1361 vsel(x0505F5F5, a5, a1, a3);
1362 vxor(x05FAF50A, a4, x0505F5F5);
1363 vsel(x0F0F00FF, a3, a4, a5);
1364 vsel(x22227777, a2, a5, a1);
1365 vsel(x07DA807F, x05FAF50A, x0F0F00FF, x22227777);
1366 vxor(x34E9B34C, a2, x07DA807F);
1368 vsel(x00FFF00F, x05FAF50A, a4, a3);
1369 vsel(x0033FCCF, a5, x00FFF00F, a2);
1370 vsel(x5565B15C, a1, x34E9B34C, x0033FCCF);
1371 vsel(x0C0C3F3F, a3, a5, a2);
1372 vxor(x59698E63, x5565B15C, x0C0C3F3F);
1374 vsel(x3001F74E, x34E9B34C, a5, x05FAF50A);
1375 vsel(x30555745, x3001F74E, a1, x00FFF00F);
1376 vxor(x693CD926, x59698E63, x30555745);
1377 vsel(x2, x693CD926, x59698E63, a6);
1378 vxor(*out3, *out3, x2);
1380 vsel(x0C0CD926, x0C0C3F3F, x693CD926, a5);
1381 vxor(x0C3F25E9, x0033FCCF, x0C0CD926);
1382 vxor(x38D696A5, x34E9B34C, x0C3F25E9);
1384 vnot(xC729695A, x38D696A5);
1386 vsel(x03D2117B, x07DA807F, a2, x0C0CD926);
1387 vsel(xC778395B, xC729695A, x03D2117B, x30555745);
1388 vxor(xCB471CB2, x0C3F25E9, xC778395B);
1389 vsel(x1, xCB471CB2, x34E9B34C, a6);
1390 vxor(*out2, *out2, x1);
1392 vsel(x5425B13F, x5565B15C, x0C0C3F3F, x03D2117B);
1393 vsel(x56B3803F, x07DA807F, x5425B13F, x59698E63);
1394 vxor(x919AE965, xC729695A, x56B3803F);
1395 vsel(x3, xC729695A, x919AE965, a6);
1396 vxor(*out4, *out4, x3);
1398 vsel(x17B3023F, x07DA807F, a2, x59698E63);
1399 vor(x75555755, a1, x30555745);
1400 vxor(x62E6556A, x17B3023F, x75555755);
1401 vxor(xA59E6C31, xC778395B, x62E6556A);
1402 vsel(x0, xA59E6C31, x38D696A5, a6);
1403 vxor(*out1, *out1, x0);
1407 #define SWAP(a, b) { u32 tmp=*a;*a=*b;*b=tmp; }
1443 #define KEYSET00 { k00 = K08; k01 = K44; k02 = K29; k03 = K52; k04 = K42; k05 = K14; k06 = K28; k07 = K49; k08 = K01; k09 = K07; k10 = K16; k11 = K36; k12 = K02; k13 = K30; k14 = K22; k15 = K21; k16 = K38; k17 = K50; k18 = K51; k19 = K00; k20 = K31; k21 = K23; k22 = K15; k23 = K35; k24 = K19; k25 = K24; k26 = K34; k27 = K47; k28 = K32; k29 = K03; k30 = K41; k31 = K26; k32 = K04; k33 = K46; k34 = K20; k35 = K25; k36 = K53; k37 = K18; k38 = K33; k39 = K55; k40 = K13; k41 = K17; k42 = K39; k43 = K12; k44 = K11; k45 = K54; k46 = K48; k47 = K27; }
1444 #define KEYSET10 { k00 = K49; k01 = K28; k02 = K45; k03 = K36; k04 = K01; k05 = K30; k06 = K44; k07 = K08; k08 = K42; k09 = K23; k10 = K00; k11 = K52; k12 = K43; k13 = K14; k14 = K38; k15 = K37; k16 = K22; k17 = K09; k18 = K35; k19 = K16; k20 = K15; k21 = K07; k22 = K31; k23 = K51; k24 = K03; k25 = K40; k26 = K46; k27 = K04; k28 = K20; k29 = K19; k30 = K53; k31 = K10; k32 = K47; k33 = K34; k34 = K32; k35 = K13; k36 = K41; k37 = K06; k38 = K17; k39 = K12; k40 = K25; k41 = K33; k42 = K27; k43 = K55; k44 = K54; k45 = K11; k46 = K05; k47 = K39; }
1445 #define KEYSET01 { k00 = K01; k01 = K37; k02 = K22; k03 = K45; k04 = K35; k05 = K07; k06 = K21; k07 = K42; k08 = K51; k09 = K00; k10 = K09; k11 = K29; k12 = K52; k13 = K23; k14 = K15; k15 = K14; k16 = K31; k17 = K43; k18 = K44; k19 = K50; k20 = K49; k21 = K16; k22 = K08; k23 = K28; k24 = K12; k25 = K17; k26 = K27; k27 = K40; k28 = K25; k29 = K55; k30 = K34; k31 = K19; k32 = K24; k33 = K39; k34 = K13; k35 = K18; k36 = K46; k37 = K11; k38 = K26; k39 = K48; k40 = K06; k41 = K10; k42 = K32; k43 = K05; k44 = K04; k45 = K47; k46 = K41; k47 = K20; }
1446 #define KEYSET11 { k00 = K35; k01 = K14; k02 = K31; k03 = K22; k04 = K44; k05 = K16; k06 = K30; k07 = K51; k08 = K28; k09 = K09; k10 = K43; k11 = K38; k12 = K29; k13 = K00; k14 = K49; k15 = K23; k16 = K08; k17 = K52; k18 = K21; k19 = K02; k20 = K01; k21 = K50; k22 = K42; k23 = K37; k24 = K48; k25 = K26; k26 = K32; k27 = K17; k28 = K06; k29 = K05; k30 = K39; k31 = K55; k32 = K33; k33 = K20; k34 = K18; k35 = K54; k36 = K27; k37 = K47; k38 = K03; k39 = K53; k40 = K11; k41 = K19; k42 = K13; k43 = K41; k44 = K40; k45 = K24; k46 = K46; k47 = K25; }
1447 #define KEYSET02 { k00 = K44; k01 = K23; k02 = K08; k03 = K31; k04 = K21; k05 = K50; k06 = K07; k07 = K28; k08 = K37; k09 = K43; k10 = K52; k11 = K15; k12 = K38; k13 = K09; k14 = K01; k15 = K00; k16 = K42; k17 = K29; k18 = K30; k19 = K36; k20 = K35; k21 = K02; k22 = K51; k23 = K14; k24 = K53; k25 = K03; k26 = K13; k27 = K26; k28 = K11; k29 = K41; k30 = K20; k31 = K05; k32 = K10; k33 = K25; k34 = K54; k35 = K04; k36 = K32; k37 = K24; k38 = K12; k39 = K34; k40 = K47; k41 = K55; k42 = K18; k43 = K46; k44 = K17; k45 = K33; k46 = K27; k47 = K06; }
1448 #define KEYSET12 { k00 = K21; k01 = K00; k02 = K42; k03 = K08; k04 = K30; k05 = K02; k06 = K16; k07 = K37; k08 = K14; k09 = K52; k10 = K29; k11 = K49; k12 = K15; k13 = K43; k14 = K35; k15 = K09; k16 = K51; k17 = K38; k18 = K07; k19 = K45; k20 = K44; k21 = K36; k22 = K28; k23 = K23; k24 = K34; k25 = K12; k26 = K18; k27 = K03; k28 = K47; k29 = K46; k30 = K25; k31 = K41; k32 = K19; k33 = K06; k34 = K04; k35 = K40; k36 = K13; k37 = K33; k38 = K48; k39 = K39; k40 = K24; k41 = K05; k42 = K54; k43 = K27; k44 = K26; k45 = K10; k46 = K32; k47 = K11; }
1449 #define KEYSET03 { k00 = K30; k01 = K09; k02 = K51; k03 = K42; k04 = K07; k05 = K36; k06 = K50; k07 = K14; k08 = K23; k09 = K29; k10 = K38; k11 = K01; k12 = K49; k13 = K52; k14 = K44; k15 = K43; k16 = K28; k17 = K15; k18 = K16; k19 = K22; k20 = K21; k21 = K45; k22 = K37; k23 = K00; k24 = K39; k25 = K48; k26 = K54; k27 = K12; k28 = K24; k29 = K27; k30 = K06; k31 = K46; k32 = K55; k33 = K11; k34 = K40; k35 = K17; k36 = K18; k37 = K10; k38 = K53; k39 = K20; k40 = K33; k41 = K41; k42 = K04; k43 = K32; k44 = K03; k45 = K19; k46 = K13; k47 = K47; }
1450 #define KEYSET13 { k00 = K07; k01 = K43; k02 = K28; k03 = K51; k04 = K16; k05 = K45; k06 = K02; k07 = K23; k08 = K00; k09 = K38; k10 = K15; k11 = K35; k12 = K01; k13 = K29; k14 = K21; k15 = K52; k16 = K37; k17 = K49; k18 = K50; k19 = K31; k20 = K30; k21 = K22; k22 = K14; k23 = K09; k24 = K20; k25 = K53; k26 = K04; k27 = K48; k28 = K33; k29 = K32; k30 = K11; k31 = K27; k32 = K05; k33 = K47; k34 = K17; k35 = K26; k36 = K54; k37 = K19; k38 = K34; k39 = K25; k40 = K10; k41 = K46; k42 = K40; k43 = K13; k44 = K12; k45 = K55; k46 = K18; k47 = K24; }
1451 #define KEYSET04 { k00 = K16; k01 = K52; k02 = K37; k03 = K28; k04 = K50; k05 = K22; k06 = K36; k07 = K00; k08 = K09; k09 = K15; k10 = K49; k11 = K44; k12 = K35; k13 = K38; k14 = K30; k15 = K29; k16 = K14; k17 = K01; k18 = K02; k19 = K08; k20 = K07; k21 = K31; k22 = K23; k23 = K43; k24 = K25; k25 = K34; k26 = K40; k27 = K53; k28 = K10; k29 = K13; k30 = K47; k31 = K32; k32 = K41; k33 = K24; k34 = K26; k35 = K03; k36 = K04; k37 = K55; k38 = K39; k39 = K06; k40 = K19; k41 = K27; k42 = K17; k43 = K18; k44 = K48; k45 = K05; k46 = K54; k47 = K33; }
1452 #define KEYSET14 { k00 = K50; k01 = K29; k02 = K14; k03 = K37; k04 = K02; k05 = K31; k06 = K45; k07 = K09; k08 = K43; k09 = K49; k10 = K01; k11 = K21; k12 = K44; k13 = K15; k14 = K07; k15 = K38; k16 = K23; k17 = K35; k18 = K36; k19 = K42; k20 = K16; k21 = K08; k22 = K00; k23 = K52; k24 = K06; k25 = K39; k26 = K17; k27 = K34; k28 = K19; k29 = K18; k30 = K24; k31 = K13; k32 = K46; k33 = K33; k34 = K03; k35 = K12; k36 = K40; k37 = K05; k38 = K20; k39 = K11; k40 = K55; k41 = K32; k42 = K26; k43 = K54; k44 = K53; k45 = K41; k46 = K04; k47 = K10; }
1453 #define KEYSET05 { k00 = K02; k01 = K38; k02 = K23; k03 = K14; k04 = K36; k05 = K08; k06 = K22; k07 = K43; k08 = K52; k09 = K01; k10 = K35; k11 = K30; k12 = K21; k13 = K49; k14 = K16; k15 = K15; k16 = K00; k17 = K44; k18 = K45; k19 = K51; k20 = K50; k21 = K42; k22 = K09; k23 = K29; k24 = K11; k25 = K20; k26 = K26; k27 = K39; k28 = K55; k29 = K54; k30 = K33; k31 = K18; k32 = K27; k33 = K10; k34 = K12; k35 = K48; k36 = K17; k37 = K41; k38 = K25; k39 = K47; k40 = K05; k41 = K13; k42 = K03; k43 = K04; k44 = K34; k45 = K46; k46 = K40; k47 = K19; }
1454 #define KEYSET15 { k00 = K36; k01 = K15; k02 = K00; k03 = K23; k04 = K45; k05 = K42; k06 = K31; k07 = K52; k08 = K29; k09 = K35; k10 = K44; k11 = K07; k12 = K30; k13 = K01; k14 = K50; k15 = K49; k16 = K09; k17 = K21; k18 = K22; k19 = K28; k20 = K02; k21 = K51; k22 = K43; k23 = K38; k24 = K47; k25 = K25; k26 = K03; k27 = K20; k28 = K05; k29 = K04; k30 = K10; k31 = K54; k32 = K32; k33 = K19; k34 = K48; k35 = K53; k36 = K26; k37 = K46; k38 = K06; k39 = K24; k40 = K41; k41 = K18; k42 = K12; k43 = K40; k44 = K39; k45 = K27; k46 = K17; k47 = K55; }
1455 #define KEYSET06 { k00 = K45; k01 = K49; k02 = K09; k03 = K00; k04 = K22; k05 = K51; k06 = K08; k07 = K29; k08 = K38; k09 = K44; k10 = K21; k11 = K16; k12 = K07; k13 = K35; k14 = K02; k15 = K01; k16 = K43; k17 = K30; k18 = K31; k19 = K37; k20 = K36; k21 = K28; k22 = K52; k23 = K15; k24 = K24; k25 = K06; k26 = K12; k27 = K25; k28 = K41; k29 = K40; k30 = K19; k31 = K04; k32 = K13; k33 = K55; k34 = K53; k35 = K34; k36 = K03; k37 = K27; k38 = K11; k39 = K33; k40 = K46; k41 = K54; k42 = K48; k43 = K17; k44 = K20; k45 = K32; k46 = K26; k47 = K05; }
1456 #define KEYSET16 { k00 = K22; k01 = K01; k02 = K43; k03 = K09; k04 = K31; k05 = K28; k06 = K42; k07 = K38; k08 = K15; k09 = K21; k10 = K30; k11 = K50; k12 = K16; k13 = K44; k14 = K36; k15 = K35; k16 = K52; k17 = K07; k18 = K08; k19 = K14; k20 = K45; k21 = K37; k22 = K29; k23 = K49; k24 = K33; k25 = K11; k26 = K48; k27 = K06; k28 = K46; k29 = K17; k30 = K55; k31 = K40; k32 = K18; k33 = K05; k34 = K34; k35 = K39; k36 = K12; k37 = K32; k38 = K47; k39 = K10; k40 = K27; k41 = K04; k42 = K53; k43 = K26; k44 = K25; k45 = K13; k46 = K03; k47 = K41; }
1457 #define KEYSET07 { k00 = K31; k01 = K35; k02 = K52; k03 = K43; k04 = K08; k05 = K37; k06 = K51; k07 = K15; k08 = K49; k09 = K30; k10 = K07; k11 = K02; k12 = K50; k13 = K21; k14 = K45; k15 = K44; k16 = K29; k17 = K16; k18 = K42; k19 = K23; k20 = K22; k21 = K14; k22 = K38; k23 = K01; k24 = K10; k25 = K47; k26 = K53; k27 = K11; k28 = K27; k29 = K26; k30 = K05; k31 = K17; k32 = K54; k33 = K41; k34 = K39; k35 = K20; k36 = K48; k37 = K13; k38 = K24; k39 = K19; k40 = K32; k41 = K40; k42 = K34; k43 = K03; k44 = K06; k45 = K18; k46 = K12; k47 = K46; }
1458 #define KEYSET17 { k00 = K15; k01 = K51; k02 = K36; k03 = K02; k04 = K49; k05 = K21; k06 = K35; k07 = K31; k08 = K08; k09 = K14; k10 = K23; k11 = K43; k12 = K09; k13 = K37; k14 = K29; k15 = K28; k16 = K45; k17 = K00; k18 = K01; k19 = K07; k20 = K38; k21 = K30; k22 = K22; k23 = K42; k24 = K26; k25 = K04; k26 = K41; k27 = K54; k28 = K39; k29 = K10; k30 = K48; k31 = K33; k32 = K11; k33 = K53; k34 = K27; k35 = K32; k36 = K05; k37 = K25; k38 = K40; k39 = K03; k40 = K20; k41 = K24; k42 = K46; k43 = K19; k44 = K18; k45 = K06; k46 = K55; k47 = K34; }
1466 #define KXX_DECL volatile
1467 #define sXXX_DECL volatile
1475 #ifdef DESCRYPT_SALT
1477 static void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 *D00, u32 *D01, u32 *D02, u32 *D03, u32 *D04, u32 *D05, u32 *D06, u32 *D07, u32 *D08, u32 *D09, u32 *D10, u32 *D11, u32 *D12, u32 *D13, u32 *D14, u32 *D15, u32 *D16, u32 *D17, u32 *D18, u32 *D19, u32 *D20, u32 *D21, u32 *D22, u32 *D23, u32 *D24, u32 *D25, u32 *D26, u32 *D27, u32 *D28, u32 *D29, u32 *D30, u32 *D31, u32 *D32, u32 *D33, u32 *D34, u32 *D35, u32 *D36, u32 *D37, u32 *D38, u32 *D39, u32 *D40, u32 *D41, u32 *D42, u32 *D43, u32 *D44, u32 *D45, u32 *D46, u32 *D47, u32 *D48, u32 *D49, u32 *D50, u32 *D51, u32 *D52, u32 *D53, u32 *D54, u32 *D55, u32 *D56, u32 *D57, u32 *D58, u32 *D59, u32 *D60, u32 *D61, u32 *D62, u32 *D63)
1479 sXXX_DECL u32 s001 = (0x001 & DESCRYPT_SALT) ? 0xffffffff : 0;
1480 sXXX_DECL u32 s002 = (0x002 & DESCRYPT_SALT) ? 0xffffffff : 0;
1481 sXXX_DECL u32 s004 = (0x004 & DESCRYPT_SALT) ? 0xffffffff : 0;
1482 sXXX_DECL u32 s008 = (0x008 & DESCRYPT_SALT) ? 0xffffffff : 0;
1483 sXXX_DECL u32 s010 = (0x010 & DESCRYPT_SALT) ? 0xffffffff : 0;
1484 sXXX_DECL u32 s020 = (0x020 & DESCRYPT_SALT) ? 0xffffffff : 0;
1485 sXXX_DECL u32 s040 = (0x040 & DESCRYPT_SALT) ? 0xffffffff : 0;
1486 sXXX_DECL u32 s080 = (0x080 & DESCRYPT_SALT) ? 0xffffffff : 0;
1487 sXXX_DECL u32 s100 = (0x100 & DESCRYPT_SALT) ? 0xffffffff : 0;
1488 sXXX_DECL u32 s200 = (0x200 & DESCRYPT_SALT) ? 0xffffffff : 0;
1489 sXXX_DECL u32 s400 = (0x400 & DESCRYPT_SALT) ? 0xffffffff : 0;
1490 sXXX_DECL u32 s800 = (0x800 & DESCRYPT_SALT) ? 0xffffffff : 0;
1492 KXX_DECL u32 k00, k01, k02, k03, k04, k05;
1493 KXX_DECL u32 k06, k07, k08, k09, k10, k11;
1494 KXX_DECL u32 k12, k13, k14, k15, k16, k17;
1495 KXX_DECL u32 k18, k19, k20, k21, k22, k23;
1496 KXX_DECL u32 k24, k25, k26, k27, k28, k29;
1497 KXX_DECL u32 k30, k31, k32, k33, k34, k35;
1498 KXX_DECL u32 k36, k37, k38, k39, k40, k41;
1499 KXX_DECL u32 k42, k43, k44, k45, k46, k47;
1501 for (u32 ii = 0; ii < 25; ii++)
1504 #if CUDA_ARCH >= 500
1514 for (u32 i = 0; i < 2; i++)
1516 if (i) KEYSET10 else KEYSET00
1518 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1519 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1520 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1521 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1522 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1523 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1524 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1525 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1527 if (i) KEYSET11 else KEYSET01
1529 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1530 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1531 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1532 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1533 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1534 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1535 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1536 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1538 if (i) KEYSET12 else KEYSET02
1540 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1541 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1542 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1543 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1544 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1545 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1546 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1547 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1549 if (i) KEYSET13 else KEYSET03
1551 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1552 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1553 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1554 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1555 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1556 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1557 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1558 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1560 if (i) KEYSET14 else KEYSET04
1562 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1563 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1564 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1565 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1566 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1567 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1568 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1569 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1571 if (i) KEYSET15 else KEYSET05
1573 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1574 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1575 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1576 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1577 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1578 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1579 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1580 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1582 if (i) KEYSET16 else KEYSET06
1584 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1585 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1586 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1587 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1588 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1589 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1590 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1591 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1593 if (i) KEYSET17 else KEYSET07
1595 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1596 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1597 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1598 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1599 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1600 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1601 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1602 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1613 static void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 *D00, u32 *D01, u32 *D02, u32 *D03, u32 *D04, u32 *D05, u32 *D06, u32 *D07, u32 *D08, u32 *D09, u32 *D10, u32 *D11, u32 *D12, u32 *D13, u32 *D14, u32 *D15, u32 *D16, u32 *D17, u32 *D18, u32 *D19, u32 *D20, u32 *D21, u32 *D22, u32 *D23, u32 *D24, u32 *D25, u32 *D26, u32 *D27, u32 *D28, u32 *D29, u32 *D30, u32 *D31, u32 *D32, u32 *D33, u32 *D34, u32 *D35, u32 *D36, u32 *D37, u32 *D38, u32 *D39, u32 *D40, u32 *D41, u32 *D42, u32 *D43, u32 *D44, u32 *D45, u32 *D46, u32 *D47, u32 *D48, u32 *D49, u32 *D50, u32 *D51, u32 *D52, u32 *D53, u32 *D54, u32 *D55, u32 *D56, u32 *D57, u32 *D58, u32 *D59, u32 *D60, u32 *D61, u32 *D62, u32 *D63)
1615 sXXX_DECL u32 s001 = (0x001 & SALT) ? 0xffffffff : 0;
1616 sXXX_DECL u32 s002 = (0x002 & SALT) ? 0xffffffff : 0;
1617 sXXX_DECL u32 s004 = (0x004 & SALT) ? 0xffffffff : 0;
1618 sXXX_DECL u32 s008 = (0x008 & SALT) ? 0xffffffff : 0;
1619 sXXX_DECL u32 s010 = (0x010 & SALT) ? 0xffffffff : 0;
1620 sXXX_DECL u32 s020 = (0x020 & SALT) ? 0xffffffff : 0;
1621 sXXX_DECL u32 s040 = (0x040 & SALT) ? 0xffffffff : 0;
1622 sXXX_DECL u32 s080 = (0x080 & SALT) ? 0xffffffff : 0;
1623 sXXX_DECL u32 s100 = (0x100 & SALT) ? 0xffffffff : 0;
1624 sXXX_DECL u32 s200 = (0x200 & SALT) ? 0xffffffff : 0;
1625 sXXX_DECL u32 s400 = (0x400 & SALT) ? 0xffffffff : 0;
1626 sXXX_DECL u32 s800 = (0x800 & SALT) ? 0xffffffff : 0;
1628 KXX_DECL u32 k00, k01, k02, k03, k04, k05;
1629 KXX_DECL u32 k06, k07, k08, k09, k10, k11;
1630 KXX_DECL u32 k12, k13, k14, k15, k16, k17;
1631 KXX_DECL u32 k18, k19, k20, k21, k22, k23;
1632 KXX_DECL u32 k24, k25, k26, k27, k28, k29;
1633 KXX_DECL u32 k30, k31, k32, k33, k34, k35;
1634 KXX_DECL u32 k36, k37, k38, k39, k40, k41;
1635 KXX_DECL u32 k42, k43, k44, k45, k46, k47;
1637 for (u32 ii = 0; ii < 25; ii++)
1640 #if CUDA_ARCH >= 500
1650 for (u32 i = 0; i < 2; i++)
1652 if (i) KEYSET10 else KEYSET00
1654 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1655 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1656 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1657 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1658 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1659 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1660 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1661 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1663 if (i) KEYSET11 else KEYSET01
1665 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1666 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1667 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1668 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1669 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1670 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1671 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1672 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1674 if (i) KEYSET12 else KEYSET02
1676 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1677 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1678 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1679 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1680 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1681 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1682 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1683 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1685 if (i) KEYSET13 else KEYSET03
1687 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1688 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1689 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1690 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1691 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1692 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1693 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1694 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1696 if (i) KEYSET14 else KEYSET04
1698 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1699 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1700 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1701 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1702 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1703 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1704 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1705 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1707 if (i) KEYSET15 else KEYSET05
1709 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1710 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1711 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1712 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1713 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1714 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1715 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1716 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1718 if (i) KEYSET16 else KEYSET06
1720 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1721 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1722 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1723 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1724 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1725 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1726 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1727 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1729 if (i) KEYSET17 else KEYSET07
1731 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1732 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1733 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1734 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1735 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1736 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1737 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1738 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1749 static void transpose32c (u32 data[32])
1751 #define swap(x,y,j,m) \
1752 t = ((x) ^ ((y) >> (j))) & (m); \
1754 (y) = (y) ^ (t << (j));
1758 swap (data[ 0], data[16], 16, 0x0000ffff);
1759 swap (data[ 1], data[17], 16, 0x0000ffff);
1760 swap (data[ 2], data[18], 16, 0x0000ffff);
1761 swap (data[ 3], data[19], 16, 0x0000ffff);
1762 swap (data[ 4], data[20], 16, 0x0000ffff);
1763 swap (data[ 5], data[21], 16, 0x0000ffff);
1764 swap (data[ 6], data[22], 16, 0x0000ffff);
1765 swap (data[ 7], data[23], 16, 0x0000ffff);
1766 swap (data[ 8], data[24], 16, 0x0000ffff);
1767 swap (data[ 9], data[25], 16, 0x0000ffff);
1768 swap (data[10], data[26], 16, 0x0000ffff);
1769 swap (data[11], data[27], 16, 0x0000ffff);
1770 swap (data[12], data[28], 16, 0x0000ffff);
1771 swap (data[13], data[29], 16, 0x0000ffff);
1772 swap (data[14], data[30], 16, 0x0000ffff);
1773 swap (data[15], data[31], 16, 0x0000ffff);
1774 swap (data[ 0], data[ 8], 8, 0x00ff00ff);
1775 swap (data[ 1], data[ 9], 8, 0x00ff00ff);
1776 swap (data[ 2], data[10], 8, 0x00ff00ff);
1777 swap (data[ 3], data[11], 8, 0x00ff00ff);
1778 swap (data[ 4], data[12], 8, 0x00ff00ff);
1779 swap (data[ 5], data[13], 8, 0x00ff00ff);
1780 swap (data[ 6], data[14], 8, 0x00ff00ff);
1781 swap (data[ 7], data[15], 8, 0x00ff00ff);
1782 swap (data[ 0], data[ 4], 4, 0x0f0f0f0f);
1783 swap (data[ 1], data[ 5], 4, 0x0f0f0f0f);
1784 swap (data[ 2], data[ 6], 4, 0x0f0f0f0f);
1785 swap (data[ 3], data[ 7], 4, 0x0f0f0f0f);
1786 swap (data[ 0], data[ 2], 2, 0x33333333);
1787 swap (data[ 1], data[ 3], 2, 0x33333333);
1788 swap (data[ 0], data[ 1], 1, 0x55555555);
1789 swap (data[ 2], data[ 3], 1, 0x55555555);
1790 swap (data[ 4], data[ 6], 2, 0x33333333);
1791 swap (data[ 5], data[ 7], 2, 0x33333333);
1792 swap (data[ 4], data[ 5], 1, 0x55555555);
1793 swap (data[ 6], data[ 7], 1, 0x55555555);
1794 swap (data[ 8], data[12], 4, 0x0f0f0f0f);
1795 swap (data[ 9], data[13], 4, 0x0f0f0f0f);
1796 swap (data[10], data[14], 4, 0x0f0f0f0f);
1797 swap (data[11], data[15], 4, 0x0f0f0f0f);
1798 swap (data[ 8], data[10], 2, 0x33333333);
1799 swap (data[ 9], data[11], 2, 0x33333333);
1800 swap (data[ 8], data[ 9], 1, 0x55555555);
1801 swap (data[10], data[11], 1, 0x55555555);
1802 swap (data[12], data[14], 2, 0x33333333);
1803 swap (data[13], data[15], 2, 0x33333333);
1804 swap (data[12], data[13], 1, 0x55555555);
1805 swap (data[14], data[15], 1, 0x55555555);
1806 swap (data[16], data[24], 8, 0x00ff00ff);
1807 swap (data[17], data[25], 8, 0x00ff00ff);
1808 swap (data[18], data[26], 8, 0x00ff00ff);
1809 swap (data[19], data[27], 8, 0x00ff00ff);
1810 swap (data[20], data[28], 8, 0x00ff00ff);
1811 swap (data[21], data[29], 8, 0x00ff00ff);
1812 swap (data[22], data[30], 8, 0x00ff00ff);
1813 swap (data[23], data[31], 8, 0x00ff00ff);
1814 swap (data[16], data[20], 4, 0x0f0f0f0f);
1815 swap (data[17], data[21], 4, 0x0f0f0f0f);
1816 swap (data[18], data[22], 4, 0x0f0f0f0f);
1817 swap (data[19], data[23], 4, 0x0f0f0f0f);
1818 swap (data[16], data[18], 2, 0x33333333);
1819 swap (data[17], data[19], 2, 0x33333333);
1820 swap (data[16], data[17], 1, 0x55555555);
1821 swap (data[18], data[19], 1, 0x55555555);
1822 swap (data[20], data[22], 2, 0x33333333);
1823 swap (data[21], data[23], 2, 0x33333333);
1824 swap (data[20], data[21], 1, 0x55555555);
1825 swap (data[22], data[23], 1, 0x55555555);
1826 swap (data[24], data[28], 4, 0x0f0f0f0f);
1827 swap (data[25], data[29], 4, 0x0f0f0f0f);
1828 swap (data[26], data[30], 4, 0x0f0f0f0f);
1829 swap (data[27], data[31], 4, 0x0f0f0f0f);
1830 swap (data[24], data[26], 2, 0x33333333);
1831 swap (data[25], data[27], 2, 0x33333333);
1832 swap (data[24], data[25], 1, 0x55555555);
1833 swap (data[26], data[27], 1, 0x55555555);
1834 swap (data[28], data[30], 2, 0x33333333);
1835 swap (data[29], data[31], 2, 0x33333333);
1836 swap (data[28], data[29], 1, 0x55555555);
1837 swap (data[30], data[31], 1, 0x55555555);
1840 static void m01500m (__local u32 *s_S, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
1846 const u32 gid = get_global_id (0);
1847 const u32 lid = get_local_id (0);
1853 const u32 salt = salt_bufs[salt_pos].salt_buf[0];
1859 const u32 K00 = pws[gid].i[ 0];
1860 const u32 K01 = pws[gid].i[ 1];
1861 const u32 K02 = pws[gid].i[ 2];
1862 const u32 K03 = pws[gid].i[ 3];
1863 const u32 K04 = pws[gid].i[ 4];
1864 const u32 K05 = pws[gid].i[ 5];
1865 const u32 K06 = pws[gid].i[ 6];
1866 const u32 K07 = pws[gid].i[ 7];
1867 const u32 K08 = pws[gid].i[ 8];
1868 const u32 K09 = pws[gid].i[ 9];
1869 const u32 K10 = pws[gid].i[10];
1870 const u32 K11 = pws[gid].i[11];
1871 const u32 K12 = pws[gid].i[12];
1872 const u32 K13 = pws[gid].i[13];
1873 const u32 K14 = pws[gid].i[14];
1874 const u32 K15 = pws[gid].i[15];
1875 const u32 K16 = pws[gid].i[16];
1876 const u32 K17 = pws[gid].i[17];
1877 const u32 K18 = pws[gid].i[18];
1878 const u32 K19 = pws[gid].i[19];
1879 const u32 K20 = pws[gid].i[20];
1880 const u32 K21 = pws[gid].i[21];
1881 const u32 K22 = pws[gid].i[22];
1882 const u32 K23 = pws[gid].i[23];
1883 const u32 K24 = pws[gid].i[24];
1884 const u32 K25 = pws[gid].i[25];
1885 const u32 K26 = pws[gid].i[26];
1886 const u32 K27 = pws[gid].i[27];
1887 const u32 K28 = pws[gid].i[28];
1888 const u32 K29 = pws[gid].i[29];
1889 const u32 K30 = pws[gid].i[30];
1890 const u32 K31 = pws[gid].i[31];
1891 const u32 K32 = pws[gid].i[32];
1892 const u32 K33 = pws[gid].i[33];
1893 const u32 K34 = pws[gid].i[34];
1894 const u32 K35 = pws[gid].i[35];
1895 const u32 K36 = pws[gid].i[36];
1896 const u32 K37 = pws[gid].i[37];
1897 const u32 K38 = pws[gid].i[38];
1898 const u32 K39 = pws[gid].i[39];
1899 const u32 K40 = pws[gid].i[40];
1900 const u32 K41 = pws[gid].i[41];
1901 const u32 K42 = pws[gid].i[42];
1902 const u32 K43 = pws[gid].i[43];
1903 const u32 K44 = pws[gid].i[44];
1904 const u32 K45 = pws[gid].i[45];
1905 const u32 K46 = pws[gid].i[46];
1906 const u32 K47 = pws[gid].i[47];
1907 const u32 K48 = pws[gid].i[48];
1908 const u32 K49 = pws[gid].i[49];
1909 const u32 K50 = pws[gid].i[50];
1910 const u32 K51 = pws[gid].i[51];
1911 const u32 K52 = pws[gid].i[52];
1912 const u32 K53 = pws[gid].i[53];
1913 const u32 K54 = pws[gid].i[54];
1914 const u32 K55 = pws[gid].i[55];
1920 const u32 pc_pos = get_local_id (1);
1922 const u32 il_pos = pc_pos * 32;
1953 k00 |= words_buf_r[pc_pos].b[ 0];
1954 k01 |= words_buf_r[pc_pos].b[ 1];
1955 k02 |= words_buf_r[pc_pos].b[ 2];
1956 k03 |= words_buf_r[pc_pos].b[ 3];
1957 k04 |= words_buf_r[pc_pos].b[ 4];
1958 k05 |= words_buf_r[pc_pos].b[ 5];
1959 k06 |= words_buf_r[pc_pos].b[ 6];
1960 k07 |= words_buf_r[pc_pos].b[ 7];
1961 k08 |= words_buf_r[pc_pos].b[ 8];
1962 k09 |= words_buf_r[pc_pos].b[ 9];
1963 k10 |= words_buf_r[pc_pos].b[10];
1964 k11 |= words_buf_r[pc_pos].b[11];
1965 k12 |= words_buf_r[pc_pos].b[12];
1966 k13 |= words_buf_r[pc_pos].b[13];
1967 k14 |= words_buf_r[pc_pos].b[14];
1968 k15 |= words_buf_r[pc_pos].b[15];
1969 k16 |= words_buf_r[pc_pos].b[16];
1970 k17 |= words_buf_r[pc_pos].b[17];
1971 k18 |= words_buf_r[pc_pos].b[18];
1972 k19 |= words_buf_r[pc_pos].b[19];
1973 k20 |= words_buf_r[pc_pos].b[20];
1974 k21 |= words_buf_r[pc_pos].b[21];
1975 k22 |= words_buf_r[pc_pos].b[22];
1976 k23 |= words_buf_r[pc_pos].b[23];
1977 k24 |= words_buf_r[pc_pos].b[24];
1978 k25 |= words_buf_r[pc_pos].b[25];
1979 k26 |= words_buf_r[pc_pos].b[26];
1980 k27 |= words_buf_r[pc_pos].b[27];
2050 k00, k01, k02, k03, k04, k05, k06,
2051 k07, k08, k09, k10, k11, k12, k13,
2052 k14, k15, k16, k17, k18, k19, k20,
2053 k21, k22, k23, k24, k25, k26, k27,
2054 K28, K29, K30, K31, K32, K33, K34,
2055 K35, K36, K37, K38, K39, K40, K41,
2056 K42, K43, K44, K45, K46, K47, K48,
2057 K49, K50, K51, K52, K53, K54, K55,
2058 &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07,
2059 &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15,
2060 &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23,
2061 &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31,
2062 &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39,
2063 &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47,
2064 &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55,
2065 &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63
2135 if (digests_cnt < 16)
2137 for (u32 d = 0; d < digests_cnt; d++)
2139 const u32 final_hash_pos = digests_offset + d;
2141 if (hashes_shown[final_hash_pos]) continue;
2145 search[0] = digests_buf[final_hash_pos].digest_buf[DGST_R0];
2146 search[1] = digests_buf[final_hash_pos].digest_buf[DGST_R1];
2151 for (int i = 0; i < 32; i++)
2153 const u32 b0 = -((search[0] >> i) & 1);
2154 const u32 b1 = -((search[1] >> i) & 1);
2156 tmpResult |= out[ 0 + i] ^ b0;
2157 tmpResult |= out[32 + i] ^ b1;
2160 if (tmpResult == 0xffffffff) continue;
2162 const u32 slice = 31 - clz (~tmpResult);
2164 const u32 r0 = search[0];
2165 const u32 r1 = search[1];
2178 for (int i = 0; i < 32; i++)
2180 out0[i] = out[ 0 + 31 - i];
2181 out1[i] = out[32 + 31 - i];
2184 transpose32c (out0);
2185 transpose32c (out1);
2188 for (int slice = 0; slice < 32; slice++)
2190 const u32 r0 = out0[31 - slice];
2191 const u32 r1 = out1[31 - slice];
2200 static void m01500s (__local u32 *s_S, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
2206 const u32 gid = get_global_id (0);
2207 const u32 lid = get_local_id (0);
2213 const u32 salt = salt_bufs[salt_pos].salt_buf[0];
2288 const u32 K00 = pws[gid].i[ 0];
2289 const u32 K01 = pws[gid].i[ 1];
2290 const u32 K02 = pws[gid].i[ 2];
2291 const u32 K03 = pws[gid].i[ 3];
2292 const u32 K04 = pws[gid].i[ 4];
2293 const u32 K05 = pws[gid].i[ 5];
2294 const u32 K06 = pws[gid].i[ 6];
2295 const u32 K07 = pws[gid].i[ 7];
2296 const u32 K08 = pws[gid].i[ 8];
2297 const u32 K09 = pws[gid].i[ 9];
2298 const u32 K10 = pws[gid].i[10];
2299 const u32 K11 = pws[gid].i[11];
2300 const u32 K12 = pws[gid].i[12];
2301 const u32 K13 = pws[gid].i[13];
2302 const u32 K14 = pws[gid].i[14];
2303 const u32 K15 = pws[gid].i[15];
2304 const u32 K16 = pws[gid].i[16];
2305 const u32 K17 = pws[gid].i[17];
2306 const u32 K18 = pws[gid].i[18];
2307 const u32 K19 = pws[gid].i[19];
2308 const u32 K20 = pws[gid].i[20];
2309 const u32 K21 = pws[gid].i[21];
2310 const u32 K22 = pws[gid].i[22];
2311 const u32 K23 = pws[gid].i[23];
2312 const u32 K24 = pws[gid].i[24];
2313 const u32 K25 = pws[gid].i[25];
2314 const u32 K26 = pws[gid].i[26];
2315 const u32 K27 = pws[gid].i[27];
2316 const u32 K28 = pws[gid].i[28];
2317 const u32 K29 = pws[gid].i[29];
2318 const u32 K30 = pws[gid].i[30];
2319 const u32 K31 = pws[gid].i[31];
2320 const u32 K32 = pws[gid].i[32];
2321 const u32 K33 = pws[gid].i[33];
2322 const u32 K34 = pws[gid].i[34];
2323 const u32 K35 = pws[gid].i[35];
2324 const u32 K36 = pws[gid].i[36];
2325 const u32 K37 = pws[gid].i[37];
2326 const u32 K38 = pws[gid].i[38];
2327 const u32 K39 = pws[gid].i[39];
2328 const u32 K40 = pws[gid].i[40];
2329 const u32 K41 = pws[gid].i[41];
2330 const u32 K42 = pws[gid].i[42];
2331 const u32 K43 = pws[gid].i[43];
2332 const u32 K44 = pws[gid].i[44];
2333 const u32 K45 = pws[gid].i[45];
2334 const u32 K46 = pws[gid].i[46];
2335 const u32 K47 = pws[gid].i[47];
2336 const u32 K48 = pws[gid].i[48];
2337 const u32 K49 = pws[gid].i[49];
2338 const u32 K50 = pws[gid].i[50];
2339 const u32 K51 = pws[gid].i[51];
2340 const u32 K52 = pws[gid].i[52];
2341 const u32 K53 = pws[gid].i[53];
2342 const u32 K54 = pws[gid].i[54];
2343 const u32 K55 = pws[gid].i[55];
2349 const u32 pc_pos = get_local_id (1);
2351 const u32 il_pos = pc_pos * 32;
2382 k00 |= words_buf_r[pc_pos].b[ 0];
2383 k01 |= words_buf_r[pc_pos].b[ 1];
2384 k02 |= words_buf_r[pc_pos].b[ 2];
2385 k03 |= words_buf_r[pc_pos].b[ 3];
2386 k04 |= words_buf_r[pc_pos].b[ 4];
2387 k05 |= words_buf_r[pc_pos].b[ 5];
2388 k06 |= words_buf_r[pc_pos].b[ 6];
2389 k07 |= words_buf_r[pc_pos].b[ 7];
2390 k08 |= words_buf_r[pc_pos].b[ 8];
2391 k09 |= words_buf_r[pc_pos].b[ 9];
2392 k10 |= words_buf_r[pc_pos].b[10];
2393 k11 |= words_buf_r[pc_pos].b[11];
2394 k12 |= words_buf_r[pc_pos].b[12];
2395 k13 |= words_buf_r[pc_pos].b[13];
2396 k14 |= words_buf_r[pc_pos].b[14];
2397 k15 |= words_buf_r[pc_pos].b[15];
2398 k16 |= words_buf_r[pc_pos].b[16];
2399 k17 |= words_buf_r[pc_pos].b[17];
2400 k18 |= words_buf_r[pc_pos].b[18];
2401 k19 |= words_buf_r[pc_pos].b[19];
2402 k20 |= words_buf_r[pc_pos].b[20];
2403 k21 |= words_buf_r[pc_pos].b[21];
2404 k22 |= words_buf_r[pc_pos].b[22];
2405 k23 |= words_buf_r[pc_pos].b[23];
2406 k24 |= words_buf_r[pc_pos].b[24];
2407 k25 |= words_buf_r[pc_pos].b[25];
2408 k26 |= words_buf_r[pc_pos].b[26];
2409 k27 |= words_buf_r[pc_pos].b[27];
2479 k00, k01, k02, k03, k04, k05, k06,
2480 k07, k08, k09, k10, k11, k12, k13,
2481 k14, k15, k16, k17, k18, k19, k20,
2482 k21, k22, k23, k24, k25, k26, k27,
2483 K28, K29, K30, K31, K32, K33, K34,
2484 K35, K36, K37, K38, K39, K40, K41,
2485 K42, K43, K44, K45, K46, K47, K48,
2486 K49, K50, K51, K52, K53, K54, K55,
2487 &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07,
2488 &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15,
2489 &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23,
2490 &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31,
2491 &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39,
2492 &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47,
2493 &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55,
2494 &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63
2499 tmpResult |= D00 ^ S00;
2500 tmpResult |= D01 ^ S01;
2501 tmpResult |= D02 ^ S02;
2502 tmpResult |= D03 ^ S03;
2503 tmpResult |= D04 ^ S04;
2504 tmpResult |= D05 ^ S05;
2505 tmpResult |= D06 ^ S06;
2506 tmpResult |= D07 ^ S07;
2507 tmpResult |= D08 ^ S08;
2508 tmpResult |= D09 ^ S09;
2509 tmpResult |= D10 ^ S10;
2510 tmpResult |= D11 ^ S11;
2511 tmpResult |= D12 ^ S12;
2512 tmpResult |= D13 ^ S13;
2513 tmpResult |= D14 ^ S14;
2514 tmpResult |= D15 ^ S15;
2515 tmpResult |= D16 ^ S16;
2516 tmpResult |= D17 ^ S17;
2517 tmpResult |= D18 ^ S18;
2518 tmpResult |= D19 ^ S19;
2519 tmpResult |= D20 ^ S20;
2520 tmpResult |= D21 ^ S21;
2521 tmpResult |= D22 ^ S22;
2522 tmpResult |= D23 ^ S23;
2523 tmpResult |= D24 ^ S24;
2524 tmpResult |= D25 ^ S25;
2525 tmpResult |= D26 ^ S26;
2526 tmpResult |= D27 ^ S27;
2527 tmpResult |= D28 ^ S28;
2528 tmpResult |= D29 ^ S29;
2529 tmpResult |= D30 ^ S30;
2530 tmpResult |= D31 ^ S31;
2531 tmpResult |= D32 ^ S32;
2532 tmpResult |= D33 ^ S33;
2533 tmpResult |= D34 ^ S34;
2534 tmpResult |= D35 ^ S35;
2535 tmpResult |= D36 ^ S36;
2536 tmpResult |= D37 ^ S37;
2537 tmpResult |= D38 ^ S38;
2538 tmpResult |= D39 ^ S39;
2539 tmpResult |= D40 ^ S40;
2540 tmpResult |= D41 ^ S41;
2541 tmpResult |= D42 ^ S42;
2542 tmpResult |= D43 ^ S43;
2543 tmpResult |= D44 ^ S44;
2544 tmpResult |= D45 ^ S45;
2545 tmpResult |= D46 ^ S46;
2546 tmpResult |= D47 ^ S47;
2548 if (tmpResult == 0xffffffff) return;
2550 tmpResult |= D48 ^ S48;
2551 tmpResult |= D49 ^ S49;
2552 tmpResult |= D50 ^ S50;
2553 tmpResult |= D51 ^ S51;
2554 tmpResult |= D52 ^ S52;
2555 tmpResult |= D53 ^ S53;
2556 tmpResult |= D54 ^ S54;
2557 tmpResult |= D55 ^ S55;
2558 tmpResult |= D56 ^ S56;
2559 tmpResult |= D57 ^ S57;
2560 tmpResult |= D58 ^ S58;
2561 tmpResult |= D59 ^ S59;
2562 tmpResult |= D60 ^ S60;
2563 tmpResult |= D61 ^ S61;
2564 tmpResult |= D62 ^ S62;
2565 tmpResult |= D63 ^ S63;
2567 if (tmpResult == 0xffffffff) return;
2569 const u32 slice = 31 - clz (~tmpResult);
2575 // transpose bitslice base : easy because no overlapping buffers
2576 // mod : attention race conditions, need different buffers for *in and *out
2579 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01500_tb (__global pw_t *pws)
2581 const u32 gid = get_global_id (0);
2583 const u32 w0 = pws[gid].i[0];
2584 const u32 w1 = pws[gid].i[1];
2586 const u32 w0s = (w0 << 1) & 0xfefefefe;
2587 const u32 w1s = (w1 << 1) & 0xfefefefe;
2590 for (int i = 0, j = 0; i < 32; i += 8, j += 7)
2592 pws[gid].i[j + 0 + 0] = -((w0s >> (i + 7)) & 1);
2593 pws[gid].i[j + 0 + 1] = -((w0s >> (i + 6)) & 1);
2594 pws[gid].i[j + 0 + 2] = -((w0s >> (i + 5)) & 1);
2595 pws[gid].i[j + 0 + 3] = -((w0s >> (i + 4)) & 1);
2596 pws[gid].i[j + 0 + 4] = -((w0s >> (i + 3)) & 1);
2597 pws[gid].i[j + 0 + 5] = -((w0s >> (i + 2)) & 1);
2598 pws[gid].i[j + 0 + 6] = -((w0s >> (i + 1)) & 1);
2602 for (int i = 0, j = 0; i < 32; i += 8, j += 7)
2604 pws[gid].i[j + 28 + 0] = -((w1s >> (i + 7)) & 1);
2605 pws[gid].i[j + 28 + 1] = -((w1s >> (i + 6)) & 1);
2606 pws[gid].i[j + 28 + 2] = -((w1s >> (i + 5)) & 1);
2607 pws[gid].i[j + 28 + 3] = -((w1s >> (i + 4)) & 1);
2608 pws[gid].i[j + 28 + 4] = -((w1s >> (i + 3)) & 1);
2609 pws[gid].i[j + 28 + 5] = -((w1s >> (i + 2)) & 1);
2610 pws[gid].i[j + 28 + 6] = -((w1s >> (i + 1)) & 1);
2614 __kernel void __attribute__((reqd_work_group_size (32, 1, 1))) m01500_tm (__global u32 *mod, __global bs_word_t *words_buf_r)
2616 const u32 gid = get_global_id (0);
2617 const u32 lid = get_local_id (0);
2619 const u32 block = gid / 32;
2620 const u32 slice = gid % 32;
2622 const u32 w0 = mod[gid];
2624 const u32 w0s = (w0 << 1) & 0xfefefefe;
2627 for (int i = 0, j = 0; i < 32; i += 8, j += 7)
2629 atomic_or (&words_buf_r[block].b[j + 0], (((w0s >> (i + 7)) & 1) << slice));
2630 atomic_or (&words_buf_r[block].b[j + 1], (((w0s >> (i + 6)) & 1) << slice));
2631 atomic_or (&words_buf_r[block].b[j + 2], (((w0s >> (i + 5)) & 1) << slice));
2632 atomic_or (&words_buf_r[block].b[j + 3], (((w0s >> (i + 4)) & 1) << slice));
2633 atomic_or (&words_buf_r[block].b[j + 4], (((w0s >> (i + 3)) & 1) << slice));
2634 atomic_or (&words_buf_r[block].b[j + 5], (((w0s >> (i + 2)) & 1) << slice));
2635 atomic_or (&words_buf_r[block].b[j + 6], (((w0s >> (i + 1)) & 1) << slice));
2639 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2645 const u32 gid = get_global_id (0);
2646 const u32 lid = get_local_id (0);
2647 const u32 vid = get_local_id (1);
2649 const u32 s0 = digests_buf[digests_offset].digest_buf[0];
2650 const u32 s1 = digests_buf[digests_offset].digest_buf[1];
2652 __local u32 s_S[64];
2656 s_S[ 0 + vid] = -((s0 >> vid) & 1);
2660 s_S[32 + vid] = -((s1 >> vid) & 1);
2663 barrier (CLK_LOCAL_MEM_FENCE);
2665 if (gid >= gid_max) return;
2671 m01500m (s_S, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
2674 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2678 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2682 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2688 const u32 gid = get_global_id (0);
2689 const u32 lid = get_local_id (0);
2690 const u32 vid = get_local_id (1);
2692 const u32 s0 = digests_buf[digests_offset].digest_buf[0];
2693 const u32 s1 = digests_buf[digests_offset].digest_buf[1];
2695 __local u32 s_S[64];
2699 s_S[ 0 + vid] = -((s0 >> vid) & 1);
2703 s_S[32 + vid] = -((s1 >> vid) & 1);
2706 barrier (CLK_LOCAL_MEM_FENCE);
2708 if (gid >= gid_max) return;
2714 m01500s (s_S, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
2717 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2721 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)