2 * Author......: Jens Steube <jens.steube@gmail.com>
4 * NOTE........: sboxes for maxwell were taken from DeepLearningJohnDoe, license below
5 * : sboxes for others were takes fron JtR, license below
10 #include "include/constants.h"
11 #include "include/kernel_vendor.h"
18 #include "include/kernel_functions.c"
19 #include "types_ocl.c"
22 #define COMPARE_S "check_single_comp4_bs.c"
23 #define COMPARE_M "check_multi_comp4_bs.c"
25 #define myselx(a,b,c) ((c) ? (b) : (a))
32 // Bitslice DES S-boxes with LOP3.LUT instructions
33 // For NVIDIA Maxwell architecture and CUDA 7.5 RC
34 // by DeepLearningJohnDoe, version 0.1.6, 2015/07/19
36 // Gate counts: 25 24 25 18 25 24 24 23
38 // Depth: 8 7 7 6 8 10 10 8
41 // Note that same S-box function with a lower gate count isn't necessarily faster.
43 // These Boolean expressions corresponding to DES S-boxes were
44 // discovered by <deeplearningjohndoe at gmail.com>
46 // This file itself is Copyright (c) 2015 by <deeplearningjohndoe at gmail.com>
47 // Redistribution and use in source and binary forms, with or without
48 // modification, are permitted.
50 // The underlying mathematical formulas are NOT copyrighted.
53 #define LUT(a,b,c,d,e) u32 a; asm ("lop3.b32 %0, %1, %2, %3, "#e";" : "=r"(a): "r"(b), "r"(c), "r"(d));
55 static void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
57 LUT(xAA55AA5500550055, a1, a4, a6, 0xC1)
58 LUT(xA55AA55AF0F5F0F5, a3, a6, xAA55AA5500550055, 0x9E)
59 LUT(x5F5F5F5FA5A5A5A5, a1, a3, a6, 0xD6)
60 LUT(xF5A0F5A0A55AA55A, a4, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x56)
61 LUT(x947A947AD1E7D1E7, a2, xA55AA55AF0F5F0F5, xF5A0F5A0A55AA55A, 0x6C)
62 LUT(x5FFF5FFFFFFAFFFA, a6, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x7B)
63 LUT(xB96CB96C69936993, a2, xF5A0F5A0A55AA55A, x5FFF5FFFFFFAFFFA, 0xD6)
64 LUT(x3, a5, x947A947AD1E7D1E7, xB96CB96C69936993, 0x6A)
65 LUT(x55EE55EE55EE55EE, a1, a2, a4, 0x7A)
66 LUT(x084C084CB77BB77B, a2, a6, xF5A0F5A0A55AA55A, 0xC9)
67 LUT(x9C329C32E295E295, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x72)
68 LUT(xA51EA51E50E050E0, a3, a6, x55EE55EE55EE55EE, 0x29)
69 LUT(x4AD34AD3BE3CBE3C, a2, x947A947AD1E7D1E7, xA51EA51E50E050E0, 0x95)
70 LUT(x2, a5, x9C329C32E295E295, x4AD34AD3BE3CBE3C, 0xC6)
71 LUT(xD955D95595D195D1, a1, a2, x9C329C32E295E295, 0xD2)
72 LUT(x8058805811621162, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x90)
73 LUT(x7D0F7D0FC4B3C4B3, xA51EA51E50E050E0, xD955D95595D195D1, x8058805811621162, 0x76)
74 LUT(x0805080500010001, a3, xAA55AA5500550055, xD955D95595D195D1, 0x80)
75 LUT(x4A964A96962D962D, xB96CB96C69936993, x4AD34AD3BE3CBE3C, x0805080500010001, 0xA6)
76 LUT(x4, a5, x7D0F7D0FC4B3C4B3, x4A964A96962D962D, 0xA6)
77 LUT(x148014807B087B08, a1, xAA55AA5500550055, x947A947AD1E7D1E7, 0x21)
78 LUT(x94D894D86B686B68, xA55AA55AF0F5F0F5, x8058805811621162, x148014807B087B08, 0x6A)
79 LUT(x5555555540044004, a1, a6, x084C084CB77BB77B, 0x70)
80 LUT(xAFB4AFB4BF5BBF5B, x5F5F5F5FA5A5A5A5, xA51EA51E50E050E0, x5555555540044004, 0x97)
81 LUT(x1, a5, x94D894D86B686B68, xAFB4AFB4BF5BBF5B, 0x6C)
89 static void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
91 LUT(xEEEEEEEE99999999, a1, a2, a6, 0x97)
92 LUT(xFFFFEEEE66666666, a5, a6, xEEEEEEEE99999999, 0x67)
93 LUT(x5555FFFFFFFF0000, a1, a5, a6, 0x76)
94 LUT(x6666DDDD5555AAAA, a2, xFFFFEEEE66666666, x5555FFFFFFFF0000, 0x69)
95 LUT(x6969D3D35353ACAC, a3, xFFFFEEEE66666666, x6666DDDD5555AAAA, 0x6A)
96 LUT(xCFCF3030CFCF3030, a2, a3, a5, 0x65)
97 LUT(xE4E4EEEE9999F0F0, a3, xEEEEEEEE99999999, x5555FFFFFFFF0000, 0x8D)
98 LUT(xE5E5BABACDCDB0B0, a1, xCFCF3030CFCF3030, xE4E4EEEE9999F0F0, 0xCA)
99 LUT(x3, a4, x6969D3D35353ACAC, xE5E5BABACDCDB0B0, 0xC6)
100 LUT(x3333CCCC00000000, a2, a5, a6, 0x14)
101 LUT(xCCCCDDDDFFFF0F0F, a5, xE4E4EEEE9999F0F0, x3333CCCC00000000, 0xB5)
102 LUT(x00000101F0F0F0F0, a3, a6, xFFFFEEEE66666666, 0x1C)
103 LUT(x9A9A64646A6A9595, a1, xCFCF3030CFCF3030, x00000101F0F0F0F0, 0x96)
104 LUT(x2, a4, xCCCCDDDDFFFF0F0F, x9A9A64646A6A9595, 0x6A)
105 LUT(x3333BBBB3333FFFF, a1, a2, x6666DDDD5555AAAA, 0xDE)
106 LUT(x1414141441410000, a1, a3, xE4E4EEEE9999F0F0, 0x90)
107 LUT(x7F7FF3F3F5F53939, x6969D3D35353ACAC, x9A9A64646A6A9595, x3333BBBB3333FFFF, 0x79)
108 LUT(x9494E3E34B4B3939, a5, x1414141441410000, x7F7FF3F3F5F53939, 0x29)
109 LUT(x1, a4, x3333BBBB3333FFFF, x9494E3E34B4B3939, 0xA6)
110 LUT(xB1B1BBBBCCCCA5A5, a1, a1, xE4E4EEEE9999F0F0, 0x4A)
111 LUT(xFFFFECECEEEEDDDD, a2, x3333CCCC00000000, x9A9A64646A6A9595, 0xEF)
112 LUT(xB1B1A9A9DCDC8787, xE5E5BABACDCDB0B0, xB1B1BBBBCCCCA5A5, xFFFFECECEEEEDDDD, 0x8D)
113 LUT(xFFFFCCCCEEEE4444, a2, a5, xFFFFEEEE66666666, 0x2B)
114 LUT(x4, a4, xB1B1A9A9DCDC8787, xFFFFCCCCEEEE4444, 0x6C)
122 static void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
124 LUT(xA50FA50FA50FA50F, a1, a3, a4, 0xC9)
125 LUT(xF0F00F0FF0F0F0F0, a3, a5, a6, 0x4B)
126 LUT(xAF0FA0AAAF0FAF0F, a1, xA50FA50FA50FA50F, xF0F00F0FF0F0F0F0, 0x4D)
127 LUT(x5AA5A55A5AA55AA5, a1, a4, xF0F00F0FF0F0F0F0, 0x69)
128 LUT(xAA005FFFAA005FFF, a3, a5, xA50FA50FA50FA50F, 0xD6)
129 LUT(x5AA5A55A0F5AFAA5, a6, x5AA5A55A5AA55AA5, xAA005FFFAA005FFF, 0x9C)
130 LUT(x1, a2, xAF0FA0AAAF0FAF0F, x5AA5A55A0F5AFAA5, 0xA6)
131 LUT(xAA55AA5500AA00AA, a1, a4, a6, 0x49)
132 LUT(xFAFAA50FFAFAA50F, a1, a5, xA50FA50FA50FA50F, 0x9B)
133 LUT(x50AF0F5AFA50A5A5, a1, xAA55AA5500AA00AA, xFAFAA50FFAFAA50F, 0x66)
134 LUT(xAFAFAFAFFAFAFAFA, a1, a3, a6, 0x6F)
135 LUT(xAFAFFFFFFFFAFAFF, a4, x50AF0F5AFA50A5A5, xAFAFAFAFFAFAFAFA, 0xEB)
136 LUT(x4, a2, x50AF0F5AFA50A5A5, xAFAFFFFFFFFAFAFF, 0x6C)
137 LUT(x500F500F500F500F, a1, a3, a4, 0x98)
138 LUT(xF0505A0505A5050F, x5AA5A55A0F5AFAA5, xAA55AA5500AA00AA, xAFAFAFAFFAFAFAFA, 0x1D)
139 LUT(xF0505A05AA55AAFF, a6, x500F500F500F500F, xF0505A0505A5050F, 0x9A)
140 LUT(xFF005F55FF005F55, a1, a4, xAA005FFFAA005FFF, 0xB2)
141 LUT(xA55F5AF0A55F5AF0, a5, xA50FA50FA50FA50F, x5AA5A55A5AA55AA5, 0x3D)
142 LUT(x5A5F05A5A55F5AF0, a6, xFF005F55FF005F55, xA55F5AF0A55F5AF0, 0xA6)
143 LUT(x3, a2, xF0505A05AA55AAFF, x5A5F05A5A55F5AF0, 0xA6)
144 LUT(x0F0F0F0FA5A5A5A5, a1, a3, a6, 0xC6)
145 LUT(x5FFFFF5FFFA0FFA0, x5AA5A55A5AA55AA5, xAFAFAFAFFAFAFAFA, x0F0F0F0FA5A5A5A5, 0xDB)
146 LUT(xF5555AF500A05FFF, a5, xFAFAA50FFAFAA50F, xF0505A0505A5050F, 0xB9)
147 LUT(x05A5AAF55AFA55A5, xF0505A05AA55AAFF, x0F0F0F0FA5A5A5A5, xF5555AF500A05FFF, 0x9B)
148 LUT(x2, a2, x5FFFFF5FFFA0FFA0, x05A5AAF55AFA55A5, 0xA6)
156 static void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
158 LUT(x55F055F055F055F0, a1, a3, a4, 0x72)
159 LUT(xA500F5F0A500F5F0, a3, a5, x55F055F055F055F0, 0xAD)
160 LUT(xF50AF50AF50AF50A, a1, a3, a4, 0x59)
161 LUT(xF5FA0FFFF5FA0FFF, a3, a5, xF50AF50AF50AF50A, 0xE7)
162 LUT(x61C8F93C61C8F93C, a2, xA500F5F0A500F5F0, xF5FA0FFFF5FA0FFF, 0xC6)
163 LUT(x9999666699996666, a1, a2, a5, 0x69)
164 LUT(x22C022C022C022C0, a2, a4, x55F055F055F055F0, 0x18)
165 LUT(xB35C94A6B35C94A6, xF5FA0FFFF5FA0FFF, x9999666699996666, x22C022C022C022C0, 0x63)
166 LUT(x4, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x6A)
167 LUT(x4848484848484848, a1, a2, a3, 0x12)
168 LUT(x55500AAA55500AAA, a1, a5, xF5FA0FFFF5FA0FFF, 0x28)
169 LUT(x3C90B3D63C90B3D6, x61C8F93C61C8F93C, x4848484848484848, x55500AAA55500AAA, 0x1E)
170 LUT(x8484333384843333, a1, x9999666699996666, x4848484848484848, 0x14)
171 LUT(x4452F1AC4452F1AC, xF50AF50AF50AF50A, xF5FA0FFFF5FA0FFF, xB35C94A6B35C94A6, 0x78)
172 LUT(x9586CA379586CA37, x55500AAA55500AAA, x8484333384843333, x4452F1AC4452F1AC, 0xD6)
173 LUT(x2, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0x6A)
174 LUT(x1, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0xA9)
175 LUT(x3, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x56)
183 static void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
185 LUT(xA0A0A0A0FFFFFFFF, a1, a3, a6, 0xAB)
186 LUT(xFFFF00005555FFFF, a1, a5, a6, 0xB9)
187 LUT(xB3B320207777FFFF, a2, xA0A0A0A0FFFFFFFF, xFFFF00005555FFFF, 0xE8)
188 LUT(x50505A5A5A5A5050, a1, a3, xFFFF00005555FFFF, 0x34)
189 LUT(xA2A2FFFF2222FFFF, a1, a5, xB3B320207777FFFF, 0xCE)
190 LUT(x2E2E6969A4A46363, a2, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, 0x29)
191 LUT(x3, a4, xB3B320207777FFFF, x2E2E6969A4A46363, 0xA6)
192 LUT(xA5A50A0AA5A50A0A, a1, a3, a5, 0x49)
193 LUT(x969639396969C6C6, a2, a6, xA5A50A0AA5A50A0A, 0x96)
194 LUT(x1B1B1B1B1B1B1B1B, a1, a2, a3, 0xCA)
195 LUT(xBFBFBFBFF6F6F9F9, a3, xA0A0A0A0FFFFFFFF, x969639396969C6C6, 0x7E)
196 LUT(x5B5BA4A4B8B81D1D, xFFFF00005555FFFF, x1B1B1B1B1B1B1B1B, xBFBFBFBFF6F6F9F9, 0x96)
197 LUT(x2, a4, x969639396969C6C6, x5B5BA4A4B8B81D1D, 0xCA)
198 LUT(x5555BBBBFFFF5555, a1, a2, xFFFF00005555FFFF, 0xE5)
199 LUT(x6D6D9C9C95956969, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, x969639396969C6C6, 0x97)
200 LUT(x1A1A67676A6AB4B4, xA5A50A0AA5A50A0A, x5555BBBBFFFF5555, x6D6D9C9C95956969, 0x47)
201 LUT(xA0A0FFFFAAAA0000, a3, xFFFF00005555FFFF, xA5A50A0AA5A50A0A, 0x3B)
202 LUT(x36369C9CC1C1D6D6, x969639396969C6C6, x6D6D9C9C95956969, xA0A0FFFFAAAA0000, 0xD9)
203 LUT(x1, a4, x1A1A67676A6AB4B4, x36369C9CC1C1D6D6, 0xCA)
204 LUT(x5555F0F0F5F55555, a1, a3, xFFFF00005555FFFF, 0xB1)
205 LUT(x79790202DCDC0808, xA2A2FFFF2222FFFF, xA5A50A0AA5A50A0A, x969639396969C6C6, 0x47)
206 LUT(x6C6CF2F229295D5D, xBFBFBFBFF6F6F9F9, x5555F0F0F5F55555, x79790202DCDC0808, 0x6E)
207 LUT(xA3A3505010101A1A, a2, xA2A2FFFF2222FFFF, x36369C9CC1C1D6D6, 0x94)
208 LUT(x7676C7C74F4FC7C7, a1, x2E2E6969A4A46363, xA3A3505010101A1A, 0xD9)
209 LUT(x4, a4, x6C6CF2F229295D5D, x7676C7C74F4FC7C7, 0xC6)
217 static void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
219 LUT(x5050F5F55050F5F5, a1, a3, a5, 0xB2)
220 LUT(x6363C6C66363C6C6, a1, a2, x5050F5F55050F5F5, 0x66)
221 LUT(xAAAA5555AAAA5555, a1, a1, a5, 0xA9)
222 LUT(x3A3A65653A3A6565, a3, x6363C6C66363C6C6, xAAAA5555AAAA5555, 0xA9)
223 LUT(x5963A3C65963A3C6, a4, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xC6)
224 LUT(xE7E76565E7E76565, a5, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xAD)
225 LUT(x455D45DF455D45DF, a1, a4, xE7E76565E7E76565, 0xE4)
226 LUT(x4, a6, x5963A3C65963A3C6, x455D45DF455D45DF, 0x6C)
227 LUT(x1101220211012202, a2, xAAAA5555AAAA5555, x5963A3C65963A3C6, 0x20)
228 LUT(xF00F0FF0F00F0FF0, a3, a4, a5, 0x69)
229 LUT(x16E94A9716E94A97, xE7E76565E7E76565, x1101220211012202, xF00F0FF0F00F0FF0, 0x9E)
230 LUT(x2992922929929229, a1, a2, xF00F0FF0F00F0FF0, 0x49)
231 LUT(xAFAF9823AFAF9823, a5, x5050F5F55050F5F5, x2992922929929229, 0x93)
232 LUT(x3, a6, x16E94A9716E94A97, xAFAF9823AFAF9823, 0x6C)
233 LUT(x4801810248018102, a4, x5963A3C65963A3C6, x1101220211012202, 0xA4)
234 LUT(x5EE8FFFD5EE8FFFD, a5, x16E94A9716E94A97, x4801810248018102, 0x76)
235 LUT(xF0FF00FFF0FF00FF, a3, a4, a5, 0xCD)
236 LUT(x942D9A67942D9A67, x3A3A65653A3A6565, x5EE8FFFD5EE8FFFD, xF0FF00FFF0FF00FF, 0x86)
237 LUT(x1, a6, x5EE8FFFD5EE8FFFD, x942D9A67942D9A67, 0xA6)
238 LUT(x6A40D4ED6F4DD4EE, a2, x4, xAFAF9823AFAF9823, 0x2D)
239 LUT(x6CA89C7869A49C79, x1101220211012202, x16E94A9716E94A97, x6A40D4ED6F4DD4EE, 0x26)
240 LUT(xD6DE73F9D6DE73F9, a3, x6363C6C66363C6C6, x455D45DF455D45DF, 0x6B)
241 LUT(x925E63E1965A63E1, x3A3A65653A3A6565, x6CA89C7869A49C79, xD6DE73F9D6DE73F9, 0xA2)
242 LUT(x2, a6, x6CA89C7869A49C79, x925E63E1965A63E1, 0xCA)
250 static void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
252 LUT(x88AA88AA88AA88AA, a1, a2, a4, 0x0B)
253 LUT(xAAAAFF00AAAAFF00, a1, a4, a5, 0x27)
254 LUT(xADAFF8A5ADAFF8A5, a3, x88AA88AA88AA88AA, xAAAAFF00AAAAFF00, 0x9E)
255 LUT(x0A0AF5F50A0AF5F5, a1, a3, a5, 0xA6)
256 LUT(x6B69C5DC6B69C5DC, a2, xADAFF8A5ADAFF8A5, x0A0AF5F50A0AF5F5, 0x6B)
257 LUT(x1C69B2DC1C69B2DC, a4, x88AA88AA88AA88AA, x6B69C5DC6B69C5DC, 0xA9)
258 LUT(x1, a6, xADAFF8A5ADAFF8A5, x1C69B2DC1C69B2DC, 0x6A)
259 LUT(x9C9C9C9C9C9C9C9C, a1, a2, a3, 0x63)
260 LUT(xE6E63BFDE6E63BFD, a2, xAAAAFF00AAAAFF00, x0A0AF5F50A0AF5F5, 0xE7)
261 LUT(x6385639E6385639E, a4, x9C9C9C9C9C9C9C9C, xE6E63BFDE6E63BFD, 0x93)
262 LUT(x5959C4CE5959C4CE, a2, x6B69C5DC6B69C5DC, xE6E63BFDE6E63BFD, 0x5D)
263 LUT(x5B53F53B5B53F53B, a4, x0A0AF5F50A0AF5F5, x5959C4CE5959C4CE, 0x6E)
264 LUT(x3, a6, x6385639E6385639E, x5B53F53B5B53F53B, 0xC6)
265 LUT(xFAF505FAFAF505FA, a3, a4, x0A0AF5F50A0AF5F5, 0x6D)
266 LUT(x6A65956A6A65956A, a3, x9C9C9C9C9C9C9C9C, xFAF505FAFAF505FA, 0xA6)
267 LUT(x8888CCCC8888CCCC, a1, a2, a5, 0x23)
268 LUT(x94E97A9494E97A94, x1C69B2DC1C69B2DC, x6A65956A6A65956A, x8888CCCC8888CCCC, 0x72)
269 LUT(x4, a6, x6A65956A6A65956A, x94E97A9494E97A94, 0xAC)
270 LUT(xA050A050A050A050, a1, a3, a4, 0x21)
271 LUT(xC1B87A2BC1B87A2B, xAAAAFF00AAAAFF00, x5B53F53B5B53F53B, x94E97A9494E97A94, 0xA4)
272 LUT(xE96016B7E96016B7, x8888CCCC8888CCCC, xA050A050A050A050, xC1B87A2BC1B87A2B, 0x96)
273 LUT(xE3CF1FD5E3CF1FD5, x88AA88AA88AA88AA, x6A65956A6A65956A, xE96016B7E96016B7, 0x3E)
274 LUT(x6776675B6776675B, xADAFF8A5ADAFF8A5, x94E97A9494E97A94, xE3CF1FD5E3CF1FD5, 0x6B)
275 LUT(x2, a6, xE96016B7E96016B7, x6776675B6776675B, 0xC6)
283 static void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
285 LUT(xEEEE3333EEEE3333, a1, a2, a5, 0x9D)
286 LUT(xBBBBBBBBBBBBBBBB, a1, a1, a2, 0x83)
287 LUT(xDDDDAAAADDDDAAAA, a1, a2, a5, 0x5B)
288 LUT(x29295A5A29295A5A, a3, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0x85)
289 LUT(xC729695AC729695A, a4, xEEEE3333EEEE3333, x29295A5A29295A5A, 0xA6)
290 LUT(x3BF77B7B3BF77B7B, a2, a5, xC729695AC729695A, 0xF9)
291 LUT(x2900FF002900FF00, a4, a5, x29295A5A29295A5A, 0x0E)
292 LUT(x56B3803F56B3803F, xBBBBBBBBBBBBBBBB, x3BF77B7B3BF77B7B, x2900FF002900FF00, 0x61)
293 LUT(x4, a6, xC729695AC729695A, x56B3803F56B3803F, 0x6C)
294 LUT(xFBFBFBFBFBFBFBFB, a1, a2, a3, 0xDF)
295 LUT(x3012B7B73012B7B7, a2, a5, xC729695AC729695A, 0xD4)
296 LUT(x34E9B34C34E9B34C, a4, xFBFBFBFBFBFBFBFB, x3012B7B73012B7B7, 0x69)
297 LUT(xBFEAEBBEBFEAEBBE, a1, x29295A5A29295A5A, x34E9B34C34E9B34C, 0x6F)
298 LUT(xFFAEAFFEFFAEAFFE, a3, xBBBBBBBBBBBBBBBB, xBFEAEBBEBFEAEBBE, 0xB9)
299 LUT(x2, a6, x34E9B34C34E9B34C, xFFAEAFFEFFAEAFFE, 0xC6)
300 LUT(xCFDE88BBCFDE88BB, a2, xDDDDAAAADDDDAAAA, x34E9B34C34E9B34C, 0x5C)
301 LUT(x3055574530555745, a1, xC729695AC729695A, xCFDE88BBCFDE88BB, 0x71)
302 LUT(x99DDEEEE99DDEEEE, a4, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0xB9)
303 LUT(x693CD926693CD926, x3BF77B7B3BF77B7B, x34E9B34C34E9B34C, x99DDEEEE99DDEEEE, 0x69)
304 LUT(x3, a6, x3055574530555745, x693CD926693CD926, 0x6A)
305 LUT(x9955EE559955EE55, a1, a4, x99DDEEEE99DDEEEE, 0xE2)
306 LUT(x9D48FA949D48FA94, x3BF77B7B3BF77B7B, xBFEAEBBEBFEAEBBE, x9955EE559955EE55, 0x9C)
307 LUT(x1, a6, xC729695AC729695A, x9D48FA949D48FA94, 0x39)
318 * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC
319 * architectures. These use AND, OR, XOR, NOT, and AND-NOT gates.
321 * Gate counts: 49 44 46 33 48 46 46 41
324 * Several same-gate-count expressions for each S-box are included (for use on
325 * different CPUs/GPUs).
327 * These Boolean expressions corresponding to DES S-boxes have been generated
328 * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's
329 * John the Ripper password cracker: http://www.openwall.com/john/
330 * Being mathematical formulas, they are not copyrighted and are free for reuse
333 * This file (a specific representation of the S-box expressions, surrounding
334 * logic) is Copyright (c) 2011 by Solar Designer <solar at openwall.com>.
335 * Redistribution and use in source and binary forms, with or without
336 * modification, are permitted. (This is a heavily cut-down "BSD license".)
338 * The effort has been sponsored by Rapid7: http://www.rapid7.com
341 static void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
343 u32 x55005500, x5A0F5A0F, x3333FFFF, x66666666, x22226666, x2D2D6969,
345 u32 x00FFFF00, x33CCCC33, x4803120C, x2222FFFF, x6A21EDF3, x4A01CC93;
346 u32 x5555FFFF, x7F75FFFF, x00D20096, x7FA7FF69;
347 u32 x0A0A0000, x0AD80096, x00999900, x0AD99996;
348 u32 x22332233, x257AA5F0, x054885C0, xFAB77A3F, x2221EDF3, xD89697CC;
349 u32 x05B77AC0, x05F77AD6, x36C48529, x6391D07C, xBB0747B0;
350 u32 x4C460000, x4EDF9996, x2D4E49EA, xBBFFFFB0, x96B1B65A;
351 u32 x5AFF5AFF, x52B11215, x4201C010, x10B0D205;
352 u32 x00, x01, x10, x11, x20, x21, x30, x31;
354 x55005500 = a1 & ~a5;
355 x5A0F5A0F = a4 ^ x55005500;
358 x22226666 = x3333FFFF & x66666666;
359 x2D2D6969 = a4 ^ x22226666;
360 x25202160 = x2D2D6969 & ~x5A0F5A0F;
363 x33CCCC33 = a3 ^ x00FFFF00;
364 x4803120C = x5A0F5A0F & ~x33CCCC33;
365 x2222FFFF = a6 | x22226666;
366 x6A21EDF3 = x4803120C ^ x2222FFFF;
367 x4A01CC93 = x6A21EDF3 & ~x25202160;
370 x7F75FFFF = x6A21EDF3 | x5555FFFF;
371 x00D20096 = a5 & ~x2D2D6969;
372 x7FA7FF69 = x7F75FFFF ^ x00D20096;
374 x0A0A0000 = a4 & ~x5555FFFF;
375 x0AD80096 = x00D20096 ^ x0A0A0000;
376 x00999900 = x00FFFF00 & ~x66666666;
377 x0AD99996 = x0AD80096 | x00999900;
379 x22332233 = a3 & ~x55005500;
380 x257AA5F0 = x5A0F5A0F ^ x7F75FFFF;
381 x054885C0 = x257AA5F0 & ~x22332233;
382 xFAB77A3F = ~x054885C0;
383 x2221EDF3 = x3333FFFF & x6A21EDF3;
384 xD89697CC = xFAB77A3F ^ x2221EDF3;
385 x20 = x7FA7FF69 & ~a2;
386 x21 = x20 ^ xD89697CC;
389 x05B77AC0 = x00FFFF00 ^ x054885C0;
390 x05F77AD6 = x00D20096 | x05B77AC0;
391 x36C48529 = x3333FFFF ^ x05F77AD6;
392 x6391D07C = a1 ^ x36C48529;
393 xBB0747B0 = xD89697CC ^ x6391D07C;
394 x00 = x25202160 | a2;
395 x01 = x00 ^ xBB0747B0;
398 x4C460000 = x3333FFFF ^ x7F75FFFF;
399 x4EDF9996 = x0AD99996 | x4C460000;
400 x2D4E49EA = x6391D07C ^ x4EDF9996;
401 xBBFFFFB0 = x00FFFF00 | xBB0747B0;
402 x96B1B65A = x2D4E49EA ^ xBBFFFFB0;
403 x10 = x4A01CC93 | a2;
404 x11 = x10 ^ x96B1B65A;
407 x5AFF5AFF = a5 | x5A0F5A0F;
408 x52B11215 = x5AFF5AFF & ~x2D4E49EA;
409 x4201C010 = x4A01CC93 & x6391D07C;
410 x10B0D205 = x52B11215 ^ x4201C010;
411 x30 = x10B0D205 | a2;
412 x31 = x30 ^ x0AD99996;
416 static void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
419 u32 x55550000, x00AA00FF, x33BB33FF;
420 u32 x33CC0000, x11441144, x11BB11BB, x003311BB;
421 u32 x00000F0F, x336600FF, x332200FF, x332200F0;
422 u32 x0302000F, xAAAAAAAA, xA9A8AAA5, x33CCCC33, x33CCC030, x9A646A95;
423 u32 x00333303, x118822B8, xA8208805, x3CC3C33C, x94E34B39;
424 u32 x0331330C, x3FF3F33C, xA9DF596A, xA9DF5F6F, x962CAC53;
425 u32 xA9466A6A, x3DA52153, x29850143, x33C0330C, x1A45324F;
426 u32 x0A451047, xBBDFDD7B, xB19ACD3C;
427 u32 x00, x01, x10, x11, x20, x21, x30, x31;
431 x55550000 = a1 & ~a6;
432 x00AA00FF = a5 & ~x55550000;
433 x33BB33FF = a2 | x00AA00FF;
435 x33CC0000 = x33CC33CC & ~a6;
436 x11441144 = a1 & x33CC33CC;
437 x11BB11BB = a5 ^ x11441144;
438 x003311BB = x11BB11BB & ~x33CC0000;
441 x336600FF = x00AA00FF ^ x33CC0000;
442 x332200FF = x33BB33FF & x336600FF;
443 x332200F0 = x332200FF & ~x00000F0F;
445 x0302000F = a3 & x332200FF;
447 xA9A8AAA5 = x0302000F ^ xAAAAAAAA;
448 x33CCCC33 = a6 ^ x33CC33CC;
449 x33CCC030 = x33CCCC33 & ~x00000F0F;
450 x9A646A95 = xA9A8AAA5 ^ x33CCC030;
451 x10 = a4 & ~x332200F0;
452 x11 = x10 ^ x9A646A95;
455 x00333303 = a2 & ~x33CCC030;
456 x118822B8 = x11BB11BB ^ x00333303;
457 xA8208805 = xA9A8AAA5 & ~x118822B8;
458 x3CC3C33C = a3 ^ x33CCCC33;
459 x94E34B39 = xA8208805 ^ x3CC3C33C;
460 x00 = x33BB33FF & ~a4;
461 x01 = x00 ^ x94E34B39;
464 x0331330C = x0302000F ^ x00333303;
465 x3FF3F33C = x3CC3C33C | x0331330C;
466 xA9DF596A = x33BB33FF ^ x9A646A95;
467 xA9DF5F6F = x00000F0F | xA9DF596A;
468 x962CAC53 = x3FF3F33C ^ xA9DF5F6F;
470 xA9466A6A = x332200FF ^ x9A646A95;
471 x3DA52153 = x94E34B39 ^ xA9466A6A;
472 x29850143 = xA9DF5F6F & x3DA52153;
473 x33C0330C = x33CC33CC & x3FF3F33C;
474 x1A45324F = x29850143 ^ x33C0330C;
475 x20 = x1A45324F | a4;
476 x21 = x20 ^ x962CAC53;
479 x0A451047 = x1A45324F & ~x118822B8;
480 xBBDFDD7B = x33CCCC33 | xA9DF596A;
481 xB19ACD3C = x0A451047 ^ xBBDFDD7B;
482 x30 = x003311BB | a4;
483 x31 = x30 ^ xB19ACD3C;
487 static void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
489 u32 x44444444, x0F0FF0F0, x4F4FF4F4, x00FFFF00, x00AAAA00, x4FE55EF4;
490 u32 x3C3CC3C3, x3C3C0000, x7373F4F4, x0C840A00;
491 u32 x00005EF4, x00FF5EFF, x00555455, x3C699796;
492 u32 x000FF000, x55AA55AA, x26D9A15E, x2FDFAF5F, x2FD00F5F;
493 u32 x55AAFFAA, x28410014, x000000FF, x000000CC, x284100D8;
494 u32 x204100D0, x3C3CC3FF, x1C3CC32F, x4969967A;
495 u32 x4CC44CC4, x40C040C0, xC3C33C3C, x9669C396, xD6A98356;
496 u32 xD6E9C3D6, x4CEEEEC4, x9A072D12, x001A000B, x9A1F2D1B;
497 u32 x00, x01, x10, x11, x20, x21, x30, x31;
499 x44444444 = a1 & ~a2;
501 x4F4FF4F4 = x44444444 | x0F0FF0F0;
503 x00AAAA00 = x00FFFF00 & ~a1;
504 x4FE55EF4 = x4F4FF4F4 ^ x00AAAA00;
506 x3C3CC3C3 = a2 ^ x0F0FF0F0;
507 x3C3C0000 = x3C3CC3C3 & ~a6;
508 x7373F4F4 = x4F4FF4F4 ^ x3C3C0000;
509 x0C840A00 = x4FE55EF4 & ~x7373F4F4;
511 x00005EF4 = a6 & x4FE55EF4;
512 x00FF5EFF = a4 | x00005EF4;
513 x00555455 = a1 & x00FF5EFF;
514 x3C699796 = x3C3CC3C3 ^ x00555455;
515 x30 = x4FE55EF4 & ~a5;
516 x31 = x30 ^ x3C699796;
519 x000FF000 = x0F0FF0F0 & x00FFFF00;
521 x26D9A15E = x7373F4F4 ^ x55AA55AA;
522 x2FDFAF5F = a3 | x26D9A15E;
523 x2FD00F5F = x2FDFAF5F & ~x000FF000;
525 x55AAFFAA = x00AAAA00 | x55AA55AA;
526 x28410014 = x3C699796 & ~x55AAFFAA;
528 x000000CC = x000000FF & ~a2;
529 x284100D8 = x28410014 ^ x000000CC;
531 x204100D0 = x7373F4F4 & x284100D8;
532 x3C3CC3FF = x3C3CC3C3 | x000000FF;
533 x1C3CC32F = x3C3CC3FF & ~x204100D0;
534 x4969967A = a1 ^ x1C3CC32F;
535 x10 = x2FD00F5F & a5;
536 x11 = x10 ^ x4969967A;
539 x4CC44CC4 = x4FE55EF4 & ~a2;
540 x40C040C0 = x4CC44CC4 & ~a3;
541 xC3C33C3C = ~x3C3CC3C3;
542 x9669C396 = x55AAFFAA ^ xC3C33C3C;
543 xD6A98356 = x40C040C0 ^ x9669C396;
544 x00 = a5 & ~x0C840A00;
545 x01 = x00 ^ xD6A98356;
548 xD6E9C3D6 = x40C040C0 | x9669C396;
549 x4CEEEEC4 = x00AAAA00 | x4CC44CC4;
550 x9A072D12 = xD6E9C3D6 ^ x4CEEEEC4;
551 x001A000B = a4 & ~x4FE55EF4;
552 x9A1F2D1B = x9A072D12 | x001A000B;
553 x20 = a5 & ~x284100D8;
554 x21 = x20 ^ x9A1F2D1B;
558 static void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
560 u32 x5A5A5A5A, x0F0FF0F0;
561 u32 x33FF33FF, x33FFCC00, x0C0030F0, x0C0CC0C0, x0CF3C03F, x5EFBDA7F,
562 x52FBCA0F, x61C8F93C;
563 u32 x00C0C03C, x0F0F30C0, x3B92A366, x30908326, x3C90B3D6;
564 u32 x33CC33CC, x0C0CFFFF, x379E5C99, x04124C11, x56E9861E, xA91679E1;
565 u32 x9586CA37, x8402C833, x84C2C83F, xB35C94A6;
566 u32 x00, x01, x10, x11, x20, x21, x30, x31;
571 x33FFCC00 = a5 ^ x33FF33FF;
572 x0C0030F0 = x0F0FF0F0 & ~x33FFCC00;
573 x0C0CC0C0 = x0F0FF0F0 & ~a2;
574 x0CF3C03F = a4 ^ x0C0CC0C0;
575 x5EFBDA7F = x5A5A5A5A | x0CF3C03F;
576 x52FBCA0F = x5EFBDA7F & ~x0C0030F0;
577 x61C8F93C = a2 ^ x52FBCA0F;
579 x00C0C03C = x0CF3C03F & x61C8F93C;
580 x0F0F30C0 = x0F0FF0F0 & ~x00C0C03C;
581 x3B92A366 = x5A5A5A5A ^ x61C8F93C;
582 x30908326 = x3B92A366 & ~x0F0F30C0;
583 x3C90B3D6 = x0C0030F0 ^ x30908326;
586 x0C0CFFFF = a5 | x0C0CC0C0;
587 x379E5C99 = x3B92A366 ^ x0C0CFFFF;
588 x04124C11 = x379E5C99 & ~x33CC33CC;
589 x56E9861E = x52FBCA0F ^ x04124C11;
590 x00 = a6 & ~x3C90B3D6;
591 x01 = x00 ^ x56E9861E;
594 xA91679E1 = ~x56E9861E;
595 x10 = x3C90B3D6 & ~a6;
596 x11 = x10 ^ xA91679E1;
599 x9586CA37 = x3C90B3D6 ^ xA91679E1;
600 x8402C833 = x9586CA37 & ~x33CC33CC;
601 x84C2C83F = x00C0C03C | x8402C833;
602 xB35C94A6 = x379E5C99 ^ x84C2C83F;
603 x20 = x61C8F93C | a6;
604 x21 = x20 ^ xB35C94A6;
607 x30 = a6 & x61C8F93C;
608 x31 = x30 ^ xB35C94A6;
612 static void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
614 u32 x77777777, x77770000, x22225555, x11116666, x1F1F6F6F;
615 u32 x70700000, x43433333, x00430033, x55557777, x55167744, x5A19784B;
616 u32 x5A1987B4, x7A3BD7F5, x003B00F5, x221955A0, x05050707, x271C52A7;
617 u32 x2A2A82A0, x6969B193, x1FE06F90, x16804E00, xE97FB1FF;
618 u32 x43403302, x35CAED30, x37DEFFB7, x349ECCB5, x0B01234A;
619 u32 x101884B4, x0FF8EB24, x41413333, x4FF9FB37, x4FC2FBC2;
620 u32 x22222222, x16BCEE97, x0F080B04, x19B4E593;
621 u32 x5C5C5C5C, x4448184C, x2DDABE71, x6992A63D;
622 u32 x00, x01, x10, x11, x20, x21, x30, x31;
625 x77770000 = x77777777 & ~a6;
626 x22225555 = a1 ^ x77770000;
627 x11116666 = a3 ^ x22225555;
628 x1F1F6F6F = a4 | x11116666;
630 x70700000 = x77770000 & ~a4;
631 x43433333 = a3 ^ x70700000;
632 x00430033 = a5 & x43433333;
633 x55557777 = a1 | x11116666;
634 x55167744 = x00430033 ^ x55557777;
635 x5A19784B = a4 ^ x55167744;
637 x5A1987B4 = a6 ^ x5A19784B;
638 x7A3BD7F5 = x22225555 | x5A1987B4;
639 x003B00F5 = a5 & x7A3BD7F5;
640 x221955A0 = x22225555 ^ x003B00F5;
641 x05050707 = a4 & x55557777;
642 x271C52A7 = x221955A0 ^ x05050707;
644 x2A2A82A0 = x7A3BD7F5 & ~a1;
645 x6969B193 = x43433333 ^ x2A2A82A0;
646 x1FE06F90 = a5 ^ x1F1F6F6F;
647 x16804E00 = x1FE06F90 & ~x6969B193;
648 xE97FB1FF = ~x16804E00;
649 x20 = xE97FB1FF & ~a2;
650 x21 = x20 ^ x5A19784B;
653 x43403302 = x43433333 & ~x003B00F5;
654 x35CAED30 = x2A2A82A0 ^ x1FE06F90;
655 x37DEFFB7 = x271C52A7 | x35CAED30;
656 x349ECCB5 = x37DEFFB7 & ~x43403302;
657 x0B01234A = x1F1F6F6F & ~x349ECCB5;
659 x101884B4 = x5A1987B4 & x349ECCB5;
660 x0FF8EB24 = x1FE06F90 ^ x101884B4;
661 x41413333 = x43433333 & x55557777;
662 x4FF9FB37 = x0FF8EB24 | x41413333;
663 x4FC2FBC2 = x003B00F5 ^ x4FF9FB37;
664 x30 = x4FC2FBC2 & a2;
665 x31 = x30 ^ x271C52A7;
668 x22222222 = a1 ^ x77777777;
669 x16BCEE97 = x349ECCB5 ^ x22222222;
670 x0F080B04 = a4 & x0FF8EB24;
671 x19B4E593 = x16BCEE97 ^ x0F080B04;
672 x00 = x0B01234A | a2;
673 x01 = x00 ^ x19B4E593;
676 x5C5C5C5C = x1F1F6F6F ^ x43433333;
677 x4448184C = x5C5C5C5C & ~x19B4E593;
678 x2DDABE71 = x22225555 ^ x0FF8EB24;
679 x6992A63D = x4448184C ^ x2DDABE71;
680 x10 = x1F1F6F6F & a2;
681 x11 = x10 ^ x6992A63D;
685 static void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
688 u32 x3333FFFF, x11115555, x22DD6699, x22DD9966, x00220099;
689 u32 x00551144, x33662277, x5A5A5A5A, x7B7E7A7F, x59A31CE6;
690 u32 x09030C06, x09030000, x336622FF, x3A6522FF;
691 u32 x484D494C, x0000B6B3, x0F0FB9BC, x00FC00F9, x0FFFB9FD;
692 u32 x5DF75DF7, x116600F7, x1E69B94B, x1668B94B;
693 u32 x7B7B7B7B, x411E5984, x1FFFFDFD, x5EE1A479;
694 u32 x3CB4DFD2, x004B002D, xB7B2B6B3, xCCC9CDC8, xCC82CDE5;
695 u32 x0055EEBB, x5A5AECE9, x0050ECA9, xC5CAC1CE, xC59A2D67;
696 u32 x00, x01, x10, x11, x20, x21, x30, x31;
701 x11115555 = a1 & x3333FFFF;
702 x22DD6699 = x33CC33CC ^ x11115555;
703 x22DD9966 = a6 ^ x22DD6699;
704 x00220099 = a5 & ~x22DD9966;
706 x00551144 = a1 & x22DD9966;
707 x33662277 = a2 ^ x00551144;
709 x7B7E7A7F = x33662277 | x5A5A5A5A;
710 x59A31CE6 = x22DD6699 ^ x7B7E7A7F;
712 x09030C06 = a3 & x59A31CE6;
713 x09030000 = x09030C06 & ~a6;
714 x336622FF = x00220099 | x33662277;
715 x3A6522FF = x09030000 ^ x336622FF;
716 x30 = x3A6522FF & a4;
717 x31 = x30 ^ x59A31CE6;
720 x484D494C = a2 ^ x7B7E7A7F;
721 x0000B6B3 = a6 & ~x484D494C;
722 x0F0FB9BC = a3 ^ x0000B6B3;
723 x00FC00F9 = a5 & ~x09030C06;
724 x0FFFB9FD = x0F0FB9BC | x00FC00F9;
726 x5DF75DF7 = a1 | x59A31CE6;
727 x116600F7 = x336622FF & x5DF75DF7;
728 x1E69B94B = x0F0FB9BC ^ x116600F7;
729 x1668B94B = x1E69B94B & ~x09030000;
730 x20 = x00220099 | a4;
731 x21 = x20 ^ x1668B94B;
734 x7B7B7B7B = a2 | x5A5A5A5A;
735 x411E5984 = x3A6522FF ^ x7B7B7B7B;
736 x1FFFFDFD = x11115555 | x0FFFB9FD;
737 x5EE1A479 = x411E5984 ^ x1FFFFDFD;
739 x3CB4DFD2 = x22DD6699 ^ x1E69B94B;
740 x004B002D = a5 & ~x3CB4DFD2;
741 xB7B2B6B3 = ~x484D494C;
742 xCCC9CDC8 = x7B7B7B7B ^ xB7B2B6B3;
743 xCC82CDE5 = x004B002D ^ xCCC9CDC8;
744 x10 = xCC82CDE5 & ~a4;
745 x11 = x10 ^ x5EE1A479;
748 x0055EEBB = a6 ^ x00551144;
749 x5A5AECE9 = a1 ^ x0F0FB9BC;
750 x0050ECA9 = x0055EEBB & x5A5AECE9;
751 xC5CAC1CE = x09030C06 ^ xCCC9CDC8;
752 xC59A2D67 = x0050ECA9 ^ xC5CAC1CE;
753 x00 = x0FFFB9FD & ~a4;
754 x01 = x00 ^ xC59A2D67;
758 static void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
760 u32 x0FF00FF0, x3CC33CC3, x00003CC3, x0F000F00, x5A555A55, x00001841;
761 u32 x00000F00, x33333C33, x7B777E77, x0FF0F00F, x74878E78;
762 u32 x003C003C, x5A7D5A7D, x333300F0, x694E5A8D;
763 u32 x0FF0CCCC, x000F0303, x5A505854, x33CC000F, x699C585B;
764 u32 x7F878F78, x21101013, x7F979F7B, x30030CC0, x4F9493BB;
765 u32 x6F9CDBFB, x0000DBFB, x00005151, x26DAC936, x26DA9867;
766 u32 x27DA9877, x27DA438C, x2625C9C9, x27FFCBCD;
767 u32 x27FF1036, x27FF103E, xB06B6C44, x97947C7A;
768 u32 x00, x01, x10, x11, x20, x21, x30, x31;
771 x3CC33CC3 = a3 ^ x0FF00FF0;
772 x00003CC3 = a6 & x3CC33CC3;
773 x0F000F00 = a4 & x0FF00FF0;
774 x5A555A55 = a2 ^ x0F000F00;
775 x00001841 = x00003CC3 & x5A555A55;
777 x00000F00 = a6 & x0F000F00;
778 x33333C33 = a3 ^ x00000F00;
779 x7B777E77 = x5A555A55 | x33333C33;
780 x0FF0F00F = a6 ^ x0FF00FF0;
781 x74878E78 = x7B777E77 ^ x0FF0F00F;
782 x30 = a1 & ~x00001841;
783 x31 = x30 ^ x74878E78;
786 x003C003C = a5 & ~x3CC33CC3;
787 x5A7D5A7D = x5A555A55 | x003C003C;
788 x333300F0 = x00003CC3 ^ x33333C33;
789 x694E5A8D = x5A7D5A7D ^ x333300F0;
791 x0FF0CCCC = x00003CC3 ^ x0FF0F00F;
792 x000F0303 = a4 & ~x0FF0CCCC;
793 x5A505854 = x5A555A55 & ~x000F0303;
794 x33CC000F = a5 ^ x333300F0;
795 x699C585B = x5A505854 ^ x33CC000F;
797 x7F878F78 = x0F000F00 | x74878E78;
798 x21101013 = a3 & x699C585B;
799 x7F979F7B = x7F878F78 | x21101013;
800 x30030CC0 = x3CC33CC3 & ~x0FF0F00F;
801 x4F9493BB = x7F979F7B ^ x30030CC0;
802 x00 = x4F9493BB & ~a1;
803 x01 = x00 ^ x694E5A8D;
806 x6F9CDBFB = x699C585B | x4F9493BB;
807 x0000DBFB = a6 & x6F9CDBFB;
808 x00005151 = a2 & x0000DBFB;
809 x26DAC936 = x694E5A8D ^ x4F9493BB;
810 x26DA9867 = x00005151 ^ x26DAC936;
812 x27DA9877 = x21101013 | x26DA9867;
813 x27DA438C = x0000DBFB ^ x27DA9877;
814 x2625C9C9 = a5 ^ x26DAC936;
815 x27FFCBCD = x27DA438C | x2625C9C9;
816 x20 = x27FFCBCD & a1;
817 x21 = x20 ^ x699C585B;
820 x27FF1036 = x0000DBFB ^ x27FFCBCD;
821 x27FF103E = x003C003C | x27FF1036;
822 xB06B6C44 = ~x4F9493BB;
823 x97947C7A = x27FF103E ^ xB06B6C44;
824 x10 = x97947C7A & ~a1;
825 x11 = x10 ^ x26DA9867;
829 static void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
831 u32 x0C0C0C0C, x0000F0F0, x00FFF00F, x00555005, x00515001;
832 u32 x33000330, x77555775, x30303030, x3030CFCF, x30104745, x30555745;
833 u32 xFF000FF0, xCF1048B5, x080A080A, xC71A40BF, xCB164CB3;
834 u32 x9E4319E6, x000019E6, xF429738C, xF4296A6A, xC729695A;
835 u32 xC47C3D2F, xF77F3F3F, x9E43E619, x693CD926;
836 u32 xF719A695, xF4FF73FF, x03E6D56A, x56B3803F;
837 u32 xF700A600, x61008000, x03B7856B, x62B7056B;
838 u32 x00, x01, x10, x11, x20, x21, x30, x31;
840 x0C0C0C0C = a3 & ~a2;
841 x0000F0F0 = a5 & ~a3;
842 x00FFF00F = a4 ^ x0000F0F0;
843 x00555005 = a1 & x00FFF00F;
844 x00515001 = x00555005 & ~x0C0C0C0C;
846 x33000330 = a2 & ~x00FFF00F;
847 x77555775 = a1 | x33000330;
848 x30303030 = a2 & ~a3;
849 x3030CFCF = a5 ^ x30303030;
850 x30104745 = x77555775 & x3030CFCF;
851 x30555745 = x00555005 | x30104745;
853 xFF000FF0 = ~x00FFF00F;
854 xCF1048B5 = x30104745 ^ xFF000FF0;
855 x080A080A = a3 & ~x77555775;
856 xC71A40BF = xCF1048B5 ^ x080A080A;
857 xCB164CB3 = x0C0C0C0C ^ xC71A40BF;
858 x10 = x00515001 | a6;
859 x11 = x10 ^ xCB164CB3;
862 x9E4319E6 = a1 ^ xCB164CB3;
863 x000019E6 = a5 & x9E4319E6;
864 xF429738C = a2 ^ xC71A40BF;
865 xF4296A6A = x000019E6 ^ xF429738C;
866 xC729695A = x33000330 ^ xF4296A6A;
868 xC47C3D2F = x30555745 ^ xF4296A6A;
869 xF77F3F3F = a2 | xC47C3D2F;
870 x9E43E619 = a5 ^ x9E4319E6;
871 x693CD926 = xF77F3F3F ^ x9E43E619;
872 x20 = x30555745 & a6;
873 x21 = x20 ^ x693CD926;
876 xF719A695 = x3030CFCF ^ xC729695A;
877 xF4FF73FF = a4 | xF429738C;
878 x03E6D56A = xF719A695 ^ xF4FF73FF;
879 x56B3803F = a1 ^ x03E6D56A;
880 x30 = x56B3803F & a6;
881 x31 = x30 ^ xC729695A;
884 xF700A600 = xF719A695 & ~a4;
885 x61008000 = x693CD926 & xF700A600;
886 x03B7856B = x00515001 ^ x03E6D56A;
887 x62B7056B = x61008000 ^ x03B7856B;
888 x00 = x62B7056B | a6;
889 x01 = x00 ^ xC729695A;
899 * Bitslice DES S-boxes making use of a vector conditional select operation
900 * (e.g., vsel on PowerPC with AltiVec).
902 * Gate counts: 36 33 33 26 35 34 34 32
905 * Several same-gate-count expressions for each S-box are included (for use on
906 * different CPUs/GPUs).
908 * These Boolean expressions corresponding to DES S-boxes have been generated
909 * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's
910 * John the Ripper password cracker: http://www.openwall.com/john/
911 * Being mathematical formulas, they are not copyrighted and are free for reuse
914 * This file (a specific representation of the S-box expressions, surrounding
915 * logic) is Copyright (c) 2011 by Solar Designer <solar at openwall.com>.
916 * Redistribution and use in source and binary forms, with or without
917 * modification, are permitted. (This is a heavily cut-down "BSD license".)
919 * The effort has been sponsored by Rapid7: http://www.rapid7.com
922 #define vnot(dst, a) (dst) = ~(a)
923 #define vand(dst, a, b) (dst) = (a) & (b)
924 #define vor(dst, a, b) (dst) = (a) | (b)
925 #define vandn(dst, a, b) (dst) = (a) & ~(b)
926 #define vxor(dst, a, b) (dst) = (a) ^ (b)
927 #define vsel(dst, a, b, c) (dst) = bitselect((a),(b),(c))
930 s1(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
931 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
933 u32 x0F0F3333, x3C3C3C3C, x55FF55FF, x69C369C3, x0903B73F, x09FCB7C0,
935 u32 x55AFD1B7, x3C3C69C3, x6993B874;
936 u32 x5CEDE59F, x09FCE295, x5D91A51E, x529E962D;
937 u32 x29EEADC0, x4B8771A3, x428679F3, x6B68D433;
938 u32 x5BA7E193, x026F12F3, x6B27C493, x94D83B6C;
939 u32 x965E0B0F, x3327A113, x847F0A1F, xD6E19C32;
940 u32 x0DBCE883, x3A25A215, x37994A96;
941 u32 x8A487EA7, x8B480F07, xB96C2D16;
944 vsel(x0F0F3333, a3, a2, a5);
945 vxor(x3C3C3C3C, a2, a3);
946 vor(x55FF55FF, a1, a4);
947 vxor(x69C369C3, x3C3C3C3C, x55FF55FF);
948 vsel(x0903B73F, a5, x0F0F3333, x69C369C3);
949 vxor(x09FCB7C0, a4, x0903B73F);
950 vxor(x5CA9E295, a1, x09FCB7C0);
952 vsel(x55AFD1B7, x5CA9E295, x55FF55FF, x0F0F3333);
953 vsel(x3C3C69C3, x3C3C3C3C, x69C369C3, a5);
954 vxor(x6993B874, x55AFD1B7, x3C3C69C3);
956 vsel(x5CEDE59F, x55FF55FF, x5CA9E295, x6993B874);
957 vsel(x09FCE295, x09FCB7C0, x5CA9E295, a5);
958 vsel(x5D91A51E, x5CEDE59F, x6993B874, x09FCE295);
959 vxor(x529E962D, x0F0F3333, x5D91A51E);
961 vsel(x29EEADC0, x69C369C3, x09FCB7C0, x5CEDE59F);
962 vsel(x4B8771A3, x0F0F3333, x69C369C3, x5CA9E295);
963 vsel(x428679F3, a5, x4B8771A3, x529E962D);
964 vxor(x6B68D433, x29EEADC0, x428679F3);
966 vsel(x5BA7E193, x5CA9E295, x4B8771A3, a3);
967 vsel(x026F12F3, a4, x0F0F3333, x529E962D);
968 vsel(x6B27C493, x6B68D433, x5BA7E193, x026F12F3);
969 vnot(x94D83B6C, x6B27C493);
970 vsel(x0, x94D83B6C, x6B68D433, a6);
971 vxor(*out1, *out1, x0);
973 vsel(x965E0B0F, x94D83B6C, a3, x428679F3);
974 vsel(x3327A113, x5BA7E193, a2, x69C369C3);
975 vsel(x847F0A1F, x965E0B0F, a4, x3327A113);
976 vxor(xD6E19C32, x529E962D, x847F0A1F);
977 vsel(x1, xD6E19C32, x5CA9E295, a6);
978 vxor(*out2, *out2, x1);
980 vsel(x0DBCE883, x09FCE295, x3C3C69C3, x847F0A1F);
981 vsel(x3A25A215, x3327A113, x5CA9E295, x0903B73F);
982 vxor(x37994A96, x0DBCE883, x3A25A215);
983 vsel(x3, x37994A96, x529E962D, a6);
984 vxor(*out4, *out4, x3);
986 vxor(x8A487EA7, x5CA9E295, xD6E19C32);
987 vsel(x8B480F07, a3, x8A487EA7, x847F0A1F);
988 vsel(xB96C2D16, x8B480F07, x3C3C3C3C, x3A25A215);
989 vsel(x2, xB96C2D16, x6993B874, a6);
990 vxor(*out3, *out3, x2);
994 s2(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
995 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
997 u32 x55553333, x0055FF33, x33270F03, x66725A56, x00FFFF00, x668DA556;
998 u32 x0F0F5A56, xF0F0A5A9, xA5A5969A, xA55A699A;
999 u32 x0F5AF03C, x6600FF56, x87A5F09C;
1000 u32 xA55A963C, x3C69C30F, xB44BC32D;
1001 u32 x66D7CC56, x0F4B0F2D, x699CC37B, x996C66D2;
1002 u32 xB46C662D, x278DB412, xB66CB43B;
1003 u32 xD2DC4E52, x27993333, xD2994E33;
1004 u32 x278D0F2D, x2E0E547B, x09976748;
1007 vsel(x55553333, a1, a3, a6);
1008 vsel(x0055FF33, a6, x55553333, a5);
1009 vsel(x33270F03, a3, a4, x0055FF33);
1010 vxor(x66725A56, a1, x33270F03);
1011 vxor(x00FFFF00, a5, a6);
1012 vxor(x668DA556, x66725A56, x00FFFF00);
1014 vsel(x0F0F5A56, a4, x66725A56, a6);
1015 vnot(xF0F0A5A9, x0F0F5A56);
1016 vxor(xA5A5969A, x55553333, xF0F0A5A9);
1017 vxor(xA55A699A, x00FFFF00, xA5A5969A);
1018 vsel(x1, xA55A699A, x668DA556, a2);
1019 vxor(*out2, *out2, x1);
1021 vxor(x0F5AF03C, a4, x0055FF33);
1022 vsel(x6600FF56, x66725A56, a6, x00FFFF00);
1023 vsel(x87A5F09C, xA5A5969A, x0F5AF03C, x6600FF56);
1025 vsel(xA55A963C, xA5A5969A, x0F5AF03C, a5);
1026 vxor(x3C69C30F, a3, x0F5AF03C);
1027 vsel(xB44BC32D, xA55A963C, x3C69C30F, a1);
1029 vsel(x66D7CC56, x66725A56, x668DA556, xA5A5969A);
1030 vsel(x0F4B0F2D, a4, xB44BC32D, a5);
1031 vxor(x699CC37B, x66D7CC56, x0F4B0F2D);
1032 vxor(x996C66D2, xF0F0A5A9, x699CC37B);
1033 vsel(x0, x996C66D2, xB44BC32D, a2);
1034 vxor(*out1, *out1, x0);
1036 vsel(xB46C662D, xB44BC32D, x996C66D2, x00FFFF00);
1037 vsel(x278DB412, x668DA556, xA5A5969A, a1);
1038 vsel(xB66CB43B, xB46C662D, x278DB412, x6600FF56);
1040 vsel(xD2DC4E52, x66D7CC56, x996C66D2, xB44BC32D);
1041 vsel(x27993333, x278DB412, a3, x0055FF33);
1042 vsel(xD2994E33, xD2DC4E52, x27993333, a5);
1043 vsel(x3, x87A5F09C, xD2994E33, a2);
1044 vxor(*out4, *out4, x3);
1046 vsel(x278D0F2D, x278DB412, x0F4B0F2D, a6);
1047 vsel(x2E0E547B, x0F0F5A56, xB66CB43B, x278D0F2D);
1048 vxor(x09976748, x27993333, x2E0E547B);
1049 vsel(x2, xB66CB43B, x09976748, a2);
1050 vxor(*out3, *out3, x2);
1054 s3(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
1055 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
1057 u32 x0F330F33, x0F33F0CC, x5A66A599;
1058 u32 x2111B7BB, x03FF3033, x05BB50EE, x074F201F, x265E97A4;
1059 u32 x556BA09E, x665A93AC, x99A56C53;
1060 u32 x25A1A797, x5713754C, x66559355, x47B135C6;
1061 u32 x9A5A5C60, xD07AF8F8, x87698DB4, xE13C1EE1;
1062 u32 x9E48CDE4, x655B905E, x00A55CFF, x9E49915E;
1063 u32 xD6599874, x05330022, xD2699876;
1064 u32 x665F9364, xD573F0F2, xB32C6396;
1067 vsel(x0F330F33, a4, a3, a5);
1068 vxor(x0F33F0CC, a6, x0F330F33);
1069 vxor(x5A66A599, a2, x0F33F0CC);
1071 vsel(x2111B7BB, a3, a6, x5A66A599);
1072 vsel(x03FF3033, a5, a3, x0F33F0CC);
1073 vsel(x05BB50EE, a5, x0F33F0CC, a2);
1074 vsel(x074F201F, x03FF3033, a4, x05BB50EE);
1075 vxor(x265E97A4, x2111B7BB, x074F201F);
1077 vsel(x556BA09E, x5A66A599, x05BB50EE, a4);
1078 vsel(x665A93AC, x556BA09E, x265E97A4, a3);
1079 vnot(x99A56C53, x665A93AC);
1080 vsel(x1, x265E97A4, x99A56C53, a1);
1081 vxor(*out2, *out2, x1);
1083 vxor(x25A1A797, x03FF3033, x265E97A4);
1084 vsel(x5713754C, a2, x0F33F0CC, x074F201F);
1085 vsel(x66559355, x665A93AC, a2, a5);
1086 vsel(x47B135C6, x25A1A797, x5713754C, x66559355);
1088 vxor(x9A5A5C60, x03FF3033, x99A56C53);
1089 vsel(xD07AF8F8, x9A5A5C60, x556BA09E, x5A66A599);
1090 vxor(x87698DB4, x5713754C, xD07AF8F8);
1091 vxor(xE13C1EE1, x66559355, x87698DB4);
1093 vsel(x9E48CDE4, x9A5A5C60, x87698DB4, x265E97A4);
1094 vsel(x655B905E, x66559355, x05BB50EE, a4);
1095 vsel(x00A55CFF, a5, a6, x9A5A5C60);
1096 vsel(x9E49915E, x9E48CDE4, x655B905E, x00A55CFF);
1097 vsel(x0, x9E49915E, xE13C1EE1, a1);
1098 vxor(*out1, *out1, x0);
1100 vsel(xD6599874, xD07AF8F8, x66559355, x0F33F0CC);
1101 vand(x05330022, x0F330F33, x05BB50EE);
1102 vsel(xD2699876, xD6599874, x00A55CFF, x05330022);
1103 vsel(x3, x5A66A599, xD2699876, a1);
1104 vxor(*out4, *out4, x3);
1106 vsel(x665F9364, x265E97A4, x66559355, x47B135C6);
1107 vsel(xD573F0F2, xD07AF8F8, x05330022, a4);
1108 vxor(xB32C6396, x665F9364, xD573F0F2);
1109 vsel(x2, xB32C6396, x47B135C6, a1);
1110 vxor(*out3, *out3, x2);
1114 s4(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
1115 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
1117 u32 x0505AFAF, x0555AF55, x0A5AA05A, x46566456, x0A0A5F5F, x0AF55FA0,
1118 x0AF50F0F, x4CA36B59;
1120 u32 x01BB23BB, x5050FAFA, xA31C26BE, xA91679E1;
1122 u32 x50E9FA1E, x0AF55F00, x827D9784, xD2946D9A;
1123 u32 x31F720B3, x11FB21B3, x4712A7AD, x9586CA37;
1126 vsel(x0505AFAF, a5, a3, a1);
1127 vsel(x0555AF55, x0505AFAF, a1, a4);
1128 vxor(x0A5AA05A, a3, x0555AF55);
1129 vsel(x46566456, a1, x0A5AA05A, a2);
1130 vsel(x0A0A5F5F, a3, a5, a1);
1131 vxor(x0AF55FA0, a4, x0A0A5F5F);
1132 vsel(x0AF50F0F, x0AF55FA0, a3, a5);
1133 vxor(x4CA36B59, x46566456, x0AF50F0F);
1135 vnot(xB35C94A6, x4CA36B59);
1137 vsel(x01BB23BB, a4, a2, x0555AF55);
1138 vxor(x5050FAFA, a1, x0505AFAF);
1139 vsel(xA31C26BE, xB35C94A6, x01BB23BB, x5050FAFA);
1140 vxor(xA91679E1, x0A0A5F5F, xA31C26BE);
1142 vnot(x56E9861E, xA91679E1);
1144 vsel(x50E9FA1E, x5050FAFA, x56E9861E, a4);
1145 vsel(x0AF55F00, x0AF50F0F, x0AF55FA0, x0A0A5F5F);
1146 vsel(x827D9784, xB35C94A6, x0AF55F00, a2);
1147 vxor(xD2946D9A, x50E9FA1E, x827D9784);
1148 vsel(x2, xD2946D9A, x4CA36B59, a6);
1149 vxor(*out3, *out3, x2);
1150 vsel(x3, xB35C94A6, xD2946D9A, a6);
1151 vxor(*out4, *out4, x3);
1153 vsel(x31F720B3, a2, a4, x0AF55FA0);
1154 vsel(x11FB21B3, x01BB23BB, x31F720B3, x5050FAFA);
1155 vxor(x4712A7AD, x56E9861E, x11FB21B3);
1156 vxor(x9586CA37, xD2946D9A, x4712A7AD);
1157 vsel(x0, x56E9861E, x9586CA37, a6);
1158 vxor(*out1, *out1, x0);
1159 vsel(x1, x9586CA37, xA91679E1, a6);
1160 vxor(*out2, *out2, x1);
1164 s5(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
1165 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
1167 u32 x550F550F, xAAF0AAF0, xA5F5A5F5, x96C696C6, x00FFFF00, x963969C6;
1168 u32 x2E3C2E3C, xB73121F7, x1501DF0F, x00558A5F, x2E69A463;
1169 u32 x0679ED42, x045157FD, xB32077FF, x9D49D39C;
1170 u32 xAC81CFB2, xF72577AF, x5BA4B81D;
1171 u32 x5BA477AF, x4895469F, x3A35273A, x1A35669A;
1172 u32 x12E6283D, x9E47D3D4, x1A676AB4;
1173 u32 x891556DF, xE5E77F82, x6CF2295D;
1174 u32 x2E3CA5F5, x9697C1C6, x369CC1D6;
1177 vsel(x550F550F, a1, a3, a5);
1178 vnot(xAAF0AAF0, x550F550F);
1179 vsel(xA5F5A5F5, xAAF0AAF0, a1, a3);
1180 vxor(x96C696C6, a2, xA5F5A5F5);
1181 vxor(x00FFFF00, a5, a6);
1182 vxor(x963969C6, x96C696C6, x00FFFF00);
1184 vsel(x2E3C2E3C, a3, xAAF0AAF0, a2);
1185 vsel(xB73121F7, a2, x963969C6, x96C696C6);
1186 vsel(x1501DF0F, a6, x550F550F, xB73121F7);
1187 vsel(x00558A5F, x1501DF0F, a5, a1);
1188 vxor(x2E69A463, x2E3C2E3C, x00558A5F);
1190 vsel(x0679ED42, x00FFFF00, x2E69A463, x96C696C6);
1191 vsel(x045157FD, a6, a1, x0679ED42);
1192 vsel(xB32077FF, xB73121F7, a6, x045157FD);
1193 vxor(x9D49D39C, x2E69A463, xB32077FF);
1194 vsel(x2, x9D49D39C, x2E69A463, a4);
1195 vxor(*out3, *out3, x2);
1197 vsel(xAC81CFB2, xAAF0AAF0, x1501DF0F, x0679ED42);
1198 vsel(xF72577AF, xB32077FF, x550F550F, a1);
1199 vxor(x5BA4B81D, xAC81CFB2, xF72577AF);
1200 vsel(x1, x5BA4B81D, x963969C6, a4);
1201 vxor(*out2, *out2, x1);
1203 vsel(x5BA477AF, x5BA4B81D, xF72577AF, a6);
1204 vsel(x4895469F, x5BA477AF, x00558A5F, a2);
1205 vsel(x3A35273A, x2E3C2E3C, a2, x963969C6);
1206 vsel(x1A35669A, x4895469F, x3A35273A, x5BA4B81D);
1208 vsel(x12E6283D, a5, x5BA4B81D, x963969C6);
1209 vsel(x9E47D3D4, x96C696C6, x9D49D39C, xAC81CFB2);
1210 vsel(x1A676AB4, x12E6283D, x9E47D3D4, x4895469F);
1212 vsel(x891556DF, xB32077FF, x4895469F, x3A35273A);
1213 vsel(xE5E77F82, xF72577AF, x00FFFF00, x12E6283D);
1214 vxor(x6CF2295D, x891556DF, xE5E77F82);
1215 vsel(x3, x1A35669A, x6CF2295D, a4);
1216 vxor(*out4, *out4, x3);
1218 vsel(x2E3CA5F5, x2E3C2E3C, xA5F5A5F5, a6);
1219 vsel(x9697C1C6, x96C696C6, x963969C6, x045157FD);
1220 vsel(x369CC1D6, x2E3CA5F5, x9697C1C6, x5BA477AF);
1221 vsel(x0, x369CC1D6, x1A676AB4, a4);
1222 vxor(*out1, *out1, x0);
1226 s6(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
1227 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
1229 u32 x555500FF, x666633CC, x606F30CF, x353A659A, x353A9A65, xCAC5659A;
1230 u32 x353A6565, x0A3F0A6F, x6C5939A3, x5963A3C6;
1231 u32 x35FF659A, x3AF06A95, x05CF0A9F, x16E94A97;
1232 u32 x86CD4C9B, x12E0FFFD, x942D9A67;
1233 u32 x142956AB, x455D45DF, x1C3EE619;
1234 u32 x2AEA70D5, x20CF7A9F, x3CF19C86, x69A49C79;
1235 u32 x840DBB67, x6DA19C1E, x925E63E1;
1236 u32 x9C3CA761, x257A75D5, xB946D2B4;
1239 vsel(x555500FF, a1, a4, a5);
1240 vxor(x666633CC, a2, x555500FF);
1241 vsel(x606F30CF, x666633CC, a4, a3);
1242 vxor(x353A659A, a1, x606F30CF);
1243 vxor(x353A9A65, a5, x353A659A);
1244 vnot(xCAC5659A, x353A9A65);
1246 vsel(x353A6565, x353A659A, x353A9A65, a4);
1247 vsel(x0A3F0A6F, a3, a4, x353A6565);
1248 vxor(x6C5939A3, x666633CC, x0A3F0A6F);
1249 vxor(x5963A3C6, x353A9A65, x6C5939A3);
1251 vsel(x35FF659A, a4, x353A659A, x353A6565);
1252 vxor(x3AF06A95, a3, x35FF659A);
1253 vsel(x05CF0A9F, a4, a3, x353A9A65);
1254 vsel(x16E94A97, x3AF06A95, x05CF0A9F, x6C5939A3);
1256 vsel(x86CD4C9B, xCAC5659A, x05CF0A9F, x6C5939A3);
1257 vsel(x12E0FFFD, a5, x3AF06A95, x16E94A97);
1258 vsel(x942D9A67, x86CD4C9B, x353A9A65, x12E0FFFD);
1259 vsel(x0, xCAC5659A, x942D9A67, a6);
1260 vxor(*out1, *out1, x0);
1262 vsel(x142956AB, x353A659A, x942D9A67, a2);
1263 vsel(x455D45DF, a1, x86CD4C9B, x142956AB);
1264 vxor(x1C3EE619, x5963A3C6, x455D45DF);
1265 vsel(x3, x5963A3C6, x1C3EE619, a6);
1266 vxor(*out4, *out4, x3);
1268 vsel(x2AEA70D5, x3AF06A95, x606F30CF, x353A9A65);
1269 vsel(x20CF7A9F, x2AEA70D5, x05CF0A9F, x0A3F0A6F);
1270 vxor(x3CF19C86, x1C3EE619, x20CF7A9F);
1271 vxor(x69A49C79, x555500FF, x3CF19C86);
1273 vsel(x840DBB67, a5, x942D9A67, x86CD4C9B);
1274 vsel(x6DA19C1E, x69A49C79, x3CF19C86, x840DBB67);
1275 vnot(x925E63E1, x6DA19C1E);
1276 vsel(x1, x925E63E1, x69A49C79, a6);
1277 vxor(*out2, *out2, x1);
1279 vsel(x9C3CA761, x840DBB67, x1C3EE619, x3CF19C86);
1280 vsel(x257A75D5, x455D45DF, x2AEA70D5, x606F30CF);
1281 vxor(xB946D2B4, x9C3CA761, x257A75D5);
1282 vsel(x2, x16E94A97, xB946D2B4, a6);
1283 vxor(*out3, *out3, x2);
1287 s7(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
1288 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
1290 u32 x44447777, x4B4B7878, x22772277, x0505F5F5, x220522F5, x694E5A8D;
1291 u32 x00FFFF00, x66666666, x32353235, x26253636, x26DAC936;
1292 u32 x738F9C63, x11EF9867, x26DA9867;
1293 u32 x4B4B9C63, x4B666663, x4E639396;
1294 u32 x4E4B393C, xFF00FF00, xFF05DD21, xB14EE41D;
1295 u32 xD728827B, x6698807B, x699C585B;
1296 u32 x738C847B, xA4A71E18, x74878E78;
1297 u32 x333D9639, x74879639, x8B7869C6;
1300 vsel(x44447777, a2, a6, a3);
1301 vxor(x4B4B7878, a4, x44447777);
1302 vsel(x22772277, a3, a5, a2);
1303 vsel(x0505F5F5, a6, a2, a4);
1304 vsel(x220522F5, x22772277, x0505F5F5, a5);
1305 vxor(x694E5A8D, x4B4B7878, x220522F5);
1307 vxor(x00FFFF00, a5, a6);
1308 vxor(x66666666, a2, a3);
1309 vsel(x32353235, a3, x220522F5, a4);
1310 vsel(x26253636, x66666666, x32353235, x4B4B7878);
1311 vxor(x26DAC936, x00FFFF00, x26253636);
1312 vsel(x0, x26DAC936, x694E5A8D, a1);
1313 vxor(*out1, *out1, x0);
1315 vxor(x738F9C63, a2, x26DAC936);
1316 vsel(x11EF9867, x738F9C63, a5, x66666666);
1317 vsel(x26DA9867, x26DAC936, x11EF9867, a6);
1319 vsel(x4B4B9C63, x4B4B7878, x738F9C63, a6);
1320 vsel(x4B666663, x4B4B9C63, x66666666, x00FFFF00);
1321 vxor(x4E639396, x0505F5F5, x4B666663);
1323 vsel(x4E4B393C, x4B4B7878, x4E639396, a2);
1324 vnot(xFF00FF00, a5);
1325 vsel(xFF05DD21, xFF00FF00, x738F9C63, x32353235);
1326 vxor(xB14EE41D, x4E4B393C, xFF05DD21);
1327 vsel(x1, xB14EE41D, x26DA9867, a1);
1328 vxor(*out2, *out2, x1);
1330 vxor(xD728827B, x66666666, xB14EE41D);
1331 vsel(x6698807B, x26DA9867, xD728827B, x4E4B393C);
1332 vsel(x699C585B, x6698807B, x694E5A8D, xFF05DD21);
1333 vsel(x2, x699C585B, x4E639396, a1);
1334 vxor(*out3, *out3, x2);
1336 vsel(x738C847B, x738F9C63, xD728827B, x4B4B7878);
1337 vxor(xA4A71E18, x738F9C63, xD728827B);
1338 vsel(x74878E78, x738C847B, xA4A71E18, a4);
1340 vsel(x333D9639, x32353235, x738C847B, xB14EE41D);
1341 vsel(x74879639, x74878E78, x333D9639, a6);
1342 vnot(x8B7869C6, x74879639);
1343 vsel(x3, x74878E78, x8B7869C6, a1);
1344 vxor(*out4, *out4, x3);
1348 s8(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6,
1349 u32 * out1, u32 * out2, u32 * out3, u32 * out4)
1351 u32 x0505F5F5, x05FAF50A, x0F0F00FF, x22227777, x07DA807F, x34E9B34C;
1352 u32 x00FFF00F, x0033FCCF, x5565B15C, x0C0C3F3F, x59698E63;
1353 u32 x3001F74E, x30555745, x693CD926;
1354 u32 x0C0CD926, x0C3F25E9, x38D696A5;
1356 u32 x03D2117B, xC778395B, xCB471CB2;
1357 u32 x5425B13F, x56B3803F, x919AE965;
1358 u32 x17B3023F, x75555755, x62E6556A, xA59E6C31;
1361 vsel(x0505F5F5, a5, a1, a3);
1362 vxor(x05FAF50A, a4, x0505F5F5);
1363 vsel(x0F0F00FF, a3, a4, a5);
1364 vsel(x22227777, a2, a5, a1);
1365 vsel(x07DA807F, x05FAF50A, x0F0F00FF, x22227777);
1366 vxor(x34E9B34C, a2, x07DA807F);
1368 vsel(x00FFF00F, x05FAF50A, a4, a3);
1369 vsel(x0033FCCF, a5, x00FFF00F, a2);
1370 vsel(x5565B15C, a1, x34E9B34C, x0033FCCF);
1371 vsel(x0C0C3F3F, a3, a5, a2);
1372 vxor(x59698E63, x5565B15C, x0C0C3F3F);
1374 vsel(x3001F74E, x34E9B34C, a5, x05FAF50A);
1375 vsel(x30555745, x3001F74E, a1, x00FFF00F);
1376 vxor(x693CD926, x59698E63, x30555745);
1377 vsel(x2, x693CD926, x59698E63, a6);
1378 vxor(*out3, *out3, x2);
1380 vsel(x0C0CD926, x0C0C3F3F, x693CD926, a5);
1381 vxor(x0C3F25E9, x0033FCCF, x0C0CD926);
1382 vxor(x38D696A5, x34E9B34C, x0C3F25E9);
1384 vnot(xC729695A, x38D696A5);
1386 vsel(x03D2117B, x07DA807F, a2, x0C0CD926);
1387 vsel(xC778395B, xC729695A, x03D2117B, x30555745);
1388 vxor(xCB471CB2, x0C3F25E9, xC778395B);
1389 vsel(x1, xCB471CB2, x34E9B34C, a6);
1390 vxor(*out2, *out2, x1);
1392 vsel(x5425B13F, x5565B15C, x0C0C3F3F, x03D2117B);
1393 vsel(x56B3803F, x07DA807F, x5425B13F, x59698E63);
1394 vxor(x919AE965, xC729695A, x56B3803F);
1395 vsel(x3, xC729695A, x919AE965, a6);
1396 vxor(*out4, *out4, x3);
1398 vsel(x17B3023F, x07DA807F, a2, x59698E63);
1399 vor(x75555755, a1, x30555745);
1400 vxor(x62E6556A, x17B3023F, x75555755);
1401 vxor(xA59E6C31, xC778395B, x62E6556A);
1402 vsel(x0, xA59E6C31, x38D696A5, a6);
1403 vxor(*out1, *out1, x0);
1407 #define SWAP(a, b) { u32 tmp=*a;*a=*b;*b=tmp; }
1443 #define KEYSET00 { k00 = K08; k01 = K44; k02 = K29; k03 = K52; k04 = K42; k05 = K14; k06 = K28; k07 = K49; k08 = K01; k09 = K07; k10 = K16; k11 = K36; k12 = K02; k13 = K30; k14 = K22; k15 = K21; k16 = K38; k17 = K50; k18 = K51; k19 = K00; k20 = K31; k21 = K23; k22 = K15; k23 = K35; k24 = K19; k25 = K24; k26 = K34; k27 = K47; k28 = K32; k29 = K03; k30 = K41; k31 = K26; k32 = K04; k33 = K46; k34 = K20; k35 = K25; k36 = K53; k37 = K18; k38 = K33; k39 = K55; k40 = K13; k41 = K17; k42 = K39; k43 = K12; k44 = K11; k45 = K54; k46 = K48; k47 = K27; }
1444 #define KEYSET10 { k00 = K49; k01 = K28; k02 = K45; k03 = K36; k04 = K01; k05 = K30; k06 = K44; k07 = K08; k08 = K42; k09 = K23; k10 = K00; k11 = K52; k12 = K43; k13 = K14; k14 = K38; k15 = K37; k16 = K22; k17 = K09; k18 = K35; k19 = K16; k20 = K15; k21 = K07; k22 = K31; k23 = K51; k24 = K03; k25 = K40; k26 = K46; k27 = K04; k28 = K20; k29 = K19; k30 = K53; k31 = K10; k32 = K47; k33 = K34; k34 = K32; k35 = K13; k36 = K41; k37 = K06; k38 = K17; k39 = K12; k40 = K25; k41 = K33; k42 = K27; k43 = K55; k44 = K54; k45 = K11; k46 = K05; k47 = K39; }
1445 #define KEYSET01 { k00 = K01; k01 = K37; k02 = K22; k03 = K45; k04 = K35; k05 = K07; k06 = K21; k07 = K42; k08 = K51; k09 = K00; k10 = K09; k11 = K29; k12 = K52; k13 = K23; k14 = K15; k15 = K14; k16 = K31; k17 = K43; k18 = K44; k19 = K50; k20 = K49; k21 = K16; k22 = K08; k23 = K28; k24 = K12; k25 = K17; k26 = K27; k27 = K40; k28 = K25; k29 = K55; k30 = K34; k31 = K19; k32 = K24; k33 = K39; k34 = K13; k35 = K18; k36 = K46; k37 = K11; k38 = K26; k39 = K48; k40 = K06; k41 = K10; k42 = K32; k43 = K05; k44 = K04; k45 = K47; k46 = K41; k47 = K20; }
1446 #define KEYSET11 { k00 = K35; k01 = K14; k02 = K31; k03 = K22; k04 = K44; k05 = K16; k06 = K30; k07 = K51; k08 = K28; k09 = K09; k10 = K43; k11 = K38; k12 = K29; k13 = K00; k14 = K49; k15 = K23; k16 = K08; k17 = K52; k18 = K21; k19 = K02; k20 = K01; k21 = K50; k22 = K42; k23 = K37; k24 = K48; k25 = K26; k26 = K32; k27 = K17; k28 = K06; k29 = K05; k30 = K39; k31 = K55; k32 = K33; k33 = K20; k34 = K18; k35 = K54; k36 = K27; k37 = K47; k38 = K03; k39 = K53; k40 = K11; k41 = K19; k42 = K13; k43 = K41; k44 = K40; k45 = K24; k46 = K46; k47 = K25; }
1447 #define KEYSET02 { k00 = K44; k01 = K23; k02 = K08; k03 = K31; k04 = K21; k05 = K50; k06 = K07; k07 = K28; k08 = K37; k09 = K43; k10 = K52; k11 = K15; k12 = K38; k13 = K09; k14 = K01; k15 = K00; k16 = K42; k17 = K29; k18 = K30; k19 = K36; k20 = K35; k21 = K02; k22 = K51; k23 = K14; k24 = K53; k25 = K03; k26 = K13; k27 = K26; k28 = K11; k29 = K41; k30 = K20; k31 = K05; k32 = K10; k33 = K25; k34 = K54; k35 = K04; k36 = K32; k37 = K24; k38 = K12; k39 = K34; k40 = K47; k41 = K55; k42 = K18; k43 = K46; k44 = K17; k45 = K33; k46 = K27; k47 = K06; }
1448 #define KEYSET12 { k00 = K21; k01 = K00; k02 = K42; k03 = K08; k04 = K30; k05 = K02; k06 = K16; k07 = K37; k08 = K14; k09 = K52; k10 = K29; k11 = K49; k12 = K15; k13 = K43; k14 = K35; k15 = K09; k16 = K51; k17 = K38; k18 = K07; k19 = K45; k20 = K44; k21 = K36; k22 = K28; k23 = K23; k24 = K34; k25 = K12; k26 = K18; k27 = K03; k28 = K47; k29 = K46; k30 = K25; k31 = K41; k32 = K19; k33 = K06; k34 = K04; k35 = K40; k36 = K13; k37 = K33; k38 = K48; k39 = K39; k40 = K24; k41 = K05; k42 = K54; k43 = K27; k44 = K26; k45 = K10; k46 = K32; k47 = K11; }
1449 #define KEYSET03 { k00 = K30; k01 = K09; k02 = K51; k03 = K42; k04 = K07; k05 = K36; k06 = K50; k07 = K14; k08 = K23; k09 = K29; k10 = K38; k11 = K01; k12 = K49; k13 = K52; k14 = K44; k15 = K43; k16 = K28; k17 = K15; k18 = K16; k19 = K22; k20 = K21; k21 = K45; k22 = K37; k23 = K00; k24 = K39; k25 = K48; k26 = K54; k27 = K12; k28 = K24; k29 = K27; k30 = K06; k31 = K46; k32 = K55; k33 = K11; k34 = K40; k35 = K17; k36 = K18; k37 = K10; k38 = K53; k39 = K20; k40 = K33; k41 = K41; k42 = K04; k43 = K32; k44 = K03; k45 = K19; k46 = K13; k47 = K47; }
1450 #define KEYSET13 { k00 = K07; k01 = K43; k02 = K28; k03 = K51; k04 = K16; k05 = K45; k06 = K02; k07 = K23; k08 = K00; k09 = K38; k10 = K15; k11 = K35; k12 = K01; k13 = K29; k14 = K21; k15 = K52; k16 = K37; k17 = K49; k18 = K50; k19 = K31; k20 = K30; k21 = K22; k22 = K14; k23 = K09; k24 = K20; k25 = K53; k26 = K04; k27 = K48; k28 = K33; k29 = K32; k30 = K11; k31 = K27; k32 = K05; k33 = K47; k34 = K17; k35 = K26; k36 = K54; k37 = K19; k38 = K34; k39 = K25; k40 = K10; k41 = K46; k42 = K40; k43 = K13; k44 = K12; k45 = K55; k46 = K18; k47 = K24; }
1451 #define KEYSET04 { k00 = K16; k01 = K52; k02 = K37; k03 = K28; k04 = K50; k05 = K22; k06 = K36; k07 = K00; k08 = K09; k09 = K15; k10 = K49; k11 = K44; k12 = K35; k13 = K38; k14 = K30; k15 = K29; k16 = K14; k17 = K01; k18 = K02; k19 = K08; k20 = K07; k21 = K31; k22 = K23; k23 = K43; k24 = K25; k25 = K34; k26 = K40; k27 = K53; k28 = K10; k29 = K13; k30 = K47; k31 = K32; k32 = K41; k33 = K24; k34 = K26; k35 = K03; k36 = K04; k37 = K55; k38 = K39; k39 = K06; k40 = K19; k41 = K27; k42 = K17; k43 = K18; k44 = K48; k45 = K05; k46 = K54; k47 = K33; }
1452 #define KEYSET14 { k00 = K50; k01 = K29; k02 = K14; k03 = K37; k04 = K02; k05 = K31; k06 = K45; k07 = K09; k08 = K43; k09 = K49; k10 = K01; k11 = K21; k12 = K44; k13 = K15; k14 = K07; k15 = K38; k16 = K23; k17 = K35; k18 = K36; k19 = K42; k20 = K16; k21 = K08; k22 = K00; k23 = K52; k24 = K06; k25 = K39; k26 = K17; k27 = K34; k28 = K19; k29 = K18; k30 = K24; k31 = K13; k32 = K46; k33 = K33; k34 = K03; k35 = K12; k36 = K40; k37 = K05; k38 = K20; k39 = K11; k40 = K55; k41 = K32; k42 = K26; k43 = K54; k44 = K53; k45 = K41; k46 = K04; k47 = K10; }
1453 #define KEYSET05 { k00 = K02; k01 = K38; k02 = K23; k03 = K14; k04 = K36; k05 = K08; k06 = K22; k07 = K43; k08 = K52; k09 = K01; k10 = K35; k11 = K30; k12 = K21; k13 = K49; k14 = K16; k15 = K15; k16 = K00; k17 = K44; k18 = K45; k19 = K51; k20 = K50; k21 = K42; k22 = K09; k23 = K29; k24 = K11; k25 = K20; k26 = K26; k27 = K39; k28 = K55; k29 = K54; k30 = K33; k31 = K18; k32 = K27; k33 = K10; k34 = K12; k35 = K48; k36 = K17; k37 = K41; k38 = K25; k39 = K47; k40 = K05; k41 = K13; k42 = K03; k43 = K04; k44 = K34; k45 = K46; k46 = K40; k47 = K19; }
1454 #define KEYSET15 { k00 = K36; k01 = K15; k02 = K00; k03 = K23; k04 = K45; k05 = K42; k06 = K31; k07 = K52; k08 = K29; k09 = K35; k10 = K44; k11 = K07; k12 = K30; k13 = K01; k14 = K50; k15 = K49; k16 = K09; k17 = K21; k18 = K22; k19 = K28; k20 = K02; k21 = K51; k22 = K43; k23 = K38; k24 = K47; k25 = K25; k26 = K03; k27 = K20; k28 = K05; k29 = K04; k30 = K10; k31 = K54; k32 = K32; k33 = K19; k34 = K48; k35 = K53; k36 = K26; k37 = K46; k38 = K06; k39 = K24; k40 = K41; k41 = K18; k42 = K12; k43 = K40; k44 = K39; k45 = K27; k46 = K17; k47 = K55; }
1455 #define KEYSET06 { k00 = K45; k01 = K49; k02 = K09; k03 = K00; k04 = K22; k05 = K51; k06 = K08; k07 = K29; k08 = K38; k09 = K44; k10 = K21; k11 = K16; k12 = K07; k13 = K35; k14 = K02; k15 = K01; k16 = K43; k17 = K30; k18 = K31; k19 = K37; k20 = K36; k21 = K28; k22 = K52; k23 = K15; k24 = K24; k25 = K06; k26 = K12; k27 = K25; k28 = K41; k29 = K40; k30 = K19; k31 = K04; k32 = K13; k33 = K55; k34 = K53; k35 = K34; k36 = K03; k37 = K27; k38 = K11; k39 = K33; k40 = K46; k41 = K54; k42 = K48; k43 = K17; k44 = K20; k45 = K32; k46 = K26; k47 = K05; }
1456 #define KEYSET16 { k00 = K22; k01 = K01; k02 = K43; k03 = K09; k04 = K31; k05 = K28; k06 = K42; k07 = K38; k08 = K15; k09 = K21; k10 = K30; k11 = K50; k12 = K16; k13 = K44; k14 = K36; k15 = K35; k16 = K52; k17 = K07; k18 = K08; k19 = K14; k20 = K45; k21 = K37; k22 = K29; k23 = K49; k24 = K33; k25 = K11; k26 = K48; k27 = K06; k28 = K46; k29 = K17; k30 = K55; k31 = K40; k32 = K18; k33 = K05; k34 = K34; k35 = K39; k36 = K12; k37 = K32; k38 = K47; k39 = K10; k40 = K27; k41 = K04; k42 = K53; k43 = K26; k44 = K25; k45 = K13; k46 = K03; k47 = K41; }
1457 #define KEYSET07 { k00 = K31; k01 = K35; k02 = K52; k03 = K43; k04 = K08; k05 = K37; k06 = K51; k07 = K15; k08 = K49; k09 = K30; k10 = K07; k11 = K02; k12 = K50; k13 = K21; k14 = K45; k15 = K44; k16 = K29; k17 = K16; k18 = K42; k19 = K23; k20 = K22; k21 = K14; k22 = K38; k23 = K01; k24 = K10; k25 = K47; k26 = K53; k27 = K11; k28 = K27; k29 = K26; k30 = K05; k31 = K17; k32 = K54; k33 = K41; k34 = K39; k35 = K20; k36 = K48; k37 = K13; k38 = K24; k39 = K19; k40 = K32; k41 = K40; k42 = K34; k43 = K03; k44 = K06; k45 = K18; k46 = K12; k47 = K46; }
1458 #define KEYSET17 { k00 = K15; k01 = K51; k02 = K36; k03 = K02; k04 = K49; k05 = K21; k06 = K35; k07 = K31; k08 = K08; k09 = K14; k10 = K23; k11 = K43; k12 = K09; k13 = K37; k14 = K29; k15 = K28; k16 = K45; k17 = K00; k18 = K01; k19 = K07; k20 = K38; k21 = K30; k22 = K22; k23 = K42; k24 = K26; k25 = K04; k26 = K41; k27 = K54; k28 = K39; k29 = K10; k30 = K48; k31 = K33; k32 = K11; k33 = K53; k34 = K27; k35 = K32; k36 = K05; k37 = K25; k38 = K40; k39 = K03; k40 = K20; k41 = K24; k42 = K46; k43 = K19; k44 = K18; k45 = K06; k46 = K55; k47 = K34; }
1466 #define KXX_DECL volatile
1467 #define sXXX_DECL volatile
1470 #ifdef DESCRYPT_SALT
1472 static void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 *D00, u32 *D01, u32 *D02, u32 *D03, u32 *D04, u32 *D05, u32 *D06, u32 *D07, u32 *D08, u32 *D09, u32 *D10, u32 *D11, u32 *D12, u32 *D13, u32 *D14, u32 *D15, u32 *D16, u32 *D17, u32 *D18, u32 *D19, u32 *D20, u32 *D21, u32 *D22, u32 *D23, u32 *D24, u32 *D25, u32 *D26, u32 *D27, u32 *D28, u32 *D29, u32 *D30, u32 *D31, u32 *D32, u32 *D33, u32 *D34, u32 *D35, u32 *D36, u32 *D37, u32 *D38, u32 *D39, u32 *D40, u32 *D41, u32 *D42, u32 *D43, u32 *D44, u32 *D45, u32 *D46, u32 *D47, u32 *D48, u32 *D49, u32 *D50, u32 *D51, u32 *D52, u32 *D53, u32 *D54, u32 *D55, u32 *D56, u32 *D57, u32 *D58, u32 *D59, u32 *D60, u32 *D61, u32 *D62, u32 *D63)
1474 sXXX_DECL u32 s001 = (0x001 & DESCRYPT_SALT) ? 0xffffffff : 0;
1475 sXXX_DECL u32 s002 = (0x002 & DESCRYPT_SALT) ? 0xffffffff : 0;
1476 sXXX_DECL u32 s004 = (0x004 & DESCRYPT_SALT) ? 0xffffffff : 0;
1477 sXXX_DECL u32 s008 = (0x008 & DESCRYPT_SALT) ? 0xffffffff : 0;
1478 sXXX_DECL u32 s010 = (0x010 & DESCRYPT_SALT) ? 0xffffffff : 0;
1479 sXXX_DECL u32 s020 = (0x020 & DESCRYPT_SALT) ? 0xffffffff : 0;
1480 sXXX_DECL u32 s040 = (0x040 & DESCRYPT_SALT) ? 0xffffffff : 0;
1481 sXXX_DECL u32 s080 = (0x080 & DESCRYPT_SALT) ? 0xffffffff : 0;
1482 sXXX_DECL u32 s100 = (0x100 & DESCRYPT_SALT) ? 0xffffffff : 0;
1483 sXXX_DECL u32 s200 = (0x200 & DESCRYPT_SALT) ? 0xffffffff : 0;
1484 sXXX_DECL u32 s400 = (0x400 & DESCRYPT_SALT) ? 0xffffffff : 0;
1485 sXXX_DECL u32 s800 = (0x800 & DESCRYPT_SALT) ? 0xffffffff : 0;
1487 KXX_DECL u32 k00, k01, k02, k03, k04, k05;
1488 KXX_DECL u32 k06, k07, k08, k09, k10, k11;
1489 KXX_DECL u32 k12, k13, k14, k15, k16, k17;
1490 KXX_DECL u32 k18, k19, k20, k21, k22, k23;
1491 KXX_DECL u32 k24, k25, k26, k27, k28, k29;
1492 KXX_DECL u32 k30, k31, k32, k33, k34, k35;
1493 KXX_DECL u32 k36, k37, k38, k39, k40, k41;
1494 KXX_DECL u32 k42, k43, k44, k45, k46, k47;
1496 for (u32 ii = 0; ii < 25; ii++)
1499 #if CUDA_ARCH >= 500
1509 for (u32 i = 0; i < 2; i++)
1511 if (i) KEYSET10 else KEYSET00
1513 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1514 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1515 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1516 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1517 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1518 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1519 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1520 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1522 if (i) KEYSET11 else KEYSET01
1524 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1525 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1526 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1527 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1528 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1529 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1530 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1531 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1533 if (i) KEYSET12 else KEYSET02
1535 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1536 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1537 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1538 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1539 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1540 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1541 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1542 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1544 if (i) KEYSET13 else KEYSET03
1546 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1547 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1548 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1549 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1550 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1551 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1552 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1553 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1555 if (i) KEYSET14 else KEYSET04
1557 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1558 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1559 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1560 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1561 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1562 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1563 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1564 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1566 if (i) KEYSET15 else KEYSET05
1568 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1569 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1570 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1571 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1572 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1573 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1574 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1575 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1577 if (i) KEYSET16 else KEYSET06
1579 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1580 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1581 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1582 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1583 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1584 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1585 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1586 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1588 if (i) KEYSET17 else KEYSET07
1590 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1591 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1592 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1593 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1594 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1595 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1596 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1597 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1608 static void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 *D00, u32 *D01, u32 *D02, u32 *D03, u32 *D04, u32 *D05, u32 *D06, u32 *D07, u32 *D08, u32 *D09, u32 *D10, u32 *D11, u32 *D12, u32 *D13, u32 *D14, u32 *D15, u32 *D16, u32 *D17, u32 *D18, u32 *D19, u32 *D20, u32 *D21, u32 *D22, u32 *D23, u32 *D24, u32 *D25, u32 *D26, u32 *D27, u32 *D28, u32 *D29, u32 *D30, u32 *D31, u32 *D32, u32 *D33, u32 *D34, u32 *D35, u32 *D36, u32 *D37, u32 *D38, u32 *D39, u32 *D40, u32 *D41, u32 *D42, u32 *D43, u32 *D44, u32 *D45, u32 *D46, u32 *D47, u32 *D48, u32 *D49, u32 *D50, u32 *D51, u32 *D52, u32 *D53, u32 *D54, u32 *D55, u32 *D56, u32 *D57, u32 *D58, u32 *D59, u32 *D60, u32 *D61, u32 *D62, u32 *D63)
1610 sXXX_DECL u32 s001 = (0x001 & SALT) ? 0xffffffff : 0;
1611 sXXX_DECL u32 s002 = (0x002 & SALT) ? 0xffffffff : 0;
1612 sXXX_DECL u32 s004 = (0x004 & SALT) ? 0xffffffff : 0;
1613 sXXX_DECL u32 s008 = (0x008 & SALT) ? 0xffffffff : 0;
1614 sXXX_DECL u32 s010 = (0x010 & SALT) ? 0xffffffff : 0;
1615 sXXX_DECL u32 s020 = (0x020 & SALT) ? 0xffffffff : 0;
1616 sXXX_DECL u32 s040 = (0x040 & SALT) ? 0xffffffff : 0;
1617 sXXX_DECL u32 s080 = (0x080 & SALT) ? 0xffffffff : 0;
1618 sXXX_DECL u32 s100 = (0x100 & SALT) ? 0xffffffff : 0;
1619 sXXX_DECL u32 s200 = (0x200 & SALT) ? 0xffffffff : 0;
1620 sXXX_DECL u32 s400 = (0x400 & SALT) ? 0xffffffff : 0;
1621 sXXX_DECL u32 s800 = (0x800 & SALT) ? 0xffffffff : 0;
1623 KXX_DECL u32 k00, k01, k02, k03, k04, k05;
1624 KXX_DECL u32 k06, k07, k08, k09, k10, k11;
1625 KXX_DECL u32 k12, k13, k14, k15, k16, k17;
1626 KXX_DECL u32 k18, k19, k20, k21, k22, k23;
1627 KXX_DECL u32 k24, k25, k26, k27, k28, k29;
1628 KXX_DECL u32 k30, k31, k32, k33, k34, k35;
1629 KXX_DECL u32 k36, k37, k38, k39, k40, k41;
1630 KXX_DECL u32 k42, k43, k44, k45, k46, k47;
1632 for (u32 ii = 0; ii < 25; ii++)
1635 #if CUDA_ARCH >= 500
1645 for (u32 i = 0; i < 2; i++)
1647 if (i) KEYSET10 else KEYSET00
1649 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1650 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1651 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1652 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1653 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1654 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1655 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1656 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1658 if (i) KEYSET11 else KEYSET01
1660 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1661 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1662 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1663 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1664 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1665 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1666 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1667 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1669 if (i) KEYSET12 else KEYSET02
1671 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1672 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1673 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1674 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1675 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1676 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1677 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1678 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1680 if (i) KEYSET13 else KEYSET03
1682 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1683 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1684 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1685 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1686 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1687 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1688 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1689 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1691 if (i) KEYSET14 else KEYSET04
1693 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1694 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1695 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1696 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1697 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1698 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1699 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1700 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1702 if (i) KEYSET15 else KEYSET05
1704 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1705 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1706 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1707 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1708 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1709 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1710 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1711 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1713 if (i) KEYSET16 else KEYSET06
1715 s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
1716 s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
1717 s3( *D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05);
1718 s4( *D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00);
1719 s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
1720 s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
1721 s7( *D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06);
1722 s8( *D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20);
1724 if (i) KEYSET17 else KEYSET07
1726 s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
1727 s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
1728 s3( *D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37);
1729 s4( *D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32);
1730 s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
1731 s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
1732 s7( *D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38);
1733 s8( *D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52);
1744 static void transpose32c (u32 data[32])
1746 #define swap(x,y,j,m) \
1747 t = ((x) ^ ((y) >> (j))) & (m); \
1749 (y) = (y) ^ (t << (j));
1753 swap (data[ 0], data[16], 16, 0x0000ffff);
1754 swap (data[ 1], data[17], 16, 0x0000ffff);
1755 swap (data[ 2], data[18], 16, 0x0000ffff);
1756 swap (data[ 3], data[19], 16, 0x0000ffff);
1757 swap (data[ 4], data[20], 16, 0x0000ffff);
1758 swap (data[ 5], data[21], 16, 0x0000ffff);
1759 swap (data[ 6], data[22], 16, 0x0000ffff);
1760 swap (data[ 7], data[23], 16, 0x0000ffff);
1761 swap (data[ 8], data[24], 16, 0x0000ffff);
1762 swap (data[ 9], data[25], 16, 0x0000ffff);
1763 swap (data[10], data[26], 16, 0x0000ffff);
1764 swap (data[11], data[27], 16, 0x0000ffff);
1765 swap (data[12], data[28], 16, 0x0000ffff);
1766 swap (data[13], data[29], 16, 0x0000ffff);
1767 swap (data[14], data[30], 16, 0x0000ffff);
1768 swap (data[15], data[31], 16, 0x0000ffff);
1769 swap (data[ 0], data[ 8], 8, 0x00ff00ff);
1770 swap (data[ 1], data[ 9], 8, 0x00ff00ff);
1771 swap (data[ 2], data[10], 8, 0x00ff00ff);
1772 swap (data[ 3], data[11], 8, 0x00ff00ff);
1773 swap (data[ 4], data[12], 8, 0x00ff00ff);
1774 swap (data[ 5], data[13], 8, 0x00ff00ff);
1775 swap (data[ 6], data[14], 8, 0x00ff00ff);
1776 swap (data[ 7], data[15], 8, 0x00ff00ff);
1777 swap (data[ 0], data[ 4], 4, 0x0f0f0f0f);
1778 swap (data[ 1], data[ 5], 4, 0x0f0f0f0f);
1779 swap (data[ 2], data[ 6], 4, 0x0f0f0f0f);
1780 swap (data[ 3], data[ 7], 4, 0x0f0f0f0f);
1781 swap (data[ 0], data[ 2], 2, 0x33333333);
1782 swap (data[ 1], data[ 3], 2, 0x33333333);
1783 swap (data[ 0], data[ 1], 1, 0x55555555);
1784 swap (data[ 2], data[ 3], 1, 0x55555555);
1785 swap (data[ 4], data[ 6], 2, 0x33333333);
1786 swap (data[ 5], data[ 7], 2, 0x33333333);
1787 swap (data[ 4], data[ 5], 1, 0x55555555);
1788 swap (data[ 6], data[ 7], 1, 0x55555555);
1789 swap (data[ 8], data[12], 4, 0x0f0f0f0f);
1790 swap (data[ 9], data[13], 4, 0x0f0f0f0f);
1791 swap (data[10], data[14], 4, 0x0f0f0f0f);
1792 swap (data[11], data[15], 4, 0x0f0f0f0f);
1793 swap (data[ 8], data[10], 2, 0x33333333);
1794 swap (data[ 9], data[11], 2, 0x33333333);
1795 swap (data[ 8], data[ 9], 1, 0x55555555);
1796 swap (data[10], data[11], 1, 0x55555555);
1797 swap (data[12], data[14], 2, 0x33333333);
1798 swap (data[13], data[15], 2, 0x33333333);
1799 swap (data[12], data[13], 1, 0x55555555);
1800 swap (data[14], data[15], 1, 0x55555555);
1801 swap (data[16], data[24], 8, 0x00ff00ff);
1802 swap (data[17], data[25], 8, 0x00ff00ff);
1803 swap (data[18], data[26], 8, 0x00ff00ff);
1804 swap (data[19], data[27], 8, 0x00ff00ff);
1805 swap (data[20], data[28], 8, 0x00ff00ff);
1806 swap (data[21], data[29], 8, 0x00ff00ff);
1807 swap (data[22], data[30], 8, 0x00ff00ff);
1808 swap (data[23], data[31], 8, 0x00ff00ff);
1809 swap (data[16], data[20], 4, 0x0f0f0f0f);
1810 swap (data[17], data[21], 4, 0x0f0f0f0f);
1811 swap (data[18], data[22], 4, 0x0f0f0f0f);
1812 swap (data[19], data[23], 4, 0x0f0f0f0f);
1813 swap (data[16], data[18], 2, 0x33333333);
1814 swap (data[17], data[19], 2, 0x33333333);
1815 swap (data[16], data[17], 1, 0x55555555);
1816 swap (data[18], data[19], 1, 0x55555555);
1817 swap (data[20], data[22], 2, 0x33333333);
1818 swap (data[21], data[23], 2, 0x33333333);
1819 swap (data[20], data[21], 1, 0x55555555);
1820 swap (data[22], data[23], 1, 0x55555555);
1821 swap (data[24], data[28], 4, 0x0f0f0f0f);
1822 swap (data[25], data[29], 4, 0x0f0f0f0f);
1823 swap (data[26], data[30], 4, 0x0f0f0f0f);
1824 swap (data[27], data[31], 4, 0x0f0f0f0f);
1825 swap (data[24], data[26], 2, 0x33333333);
1826 swap (data[25], data[27], 2, 0x33333333);
1827 swap (data[24], data[25], 1, 0x55555555);
1828 swap (data[26], data[27], 1, 0x55555555);
1829 swap (data[28], data[30], 2, 0x33333333);
1830 swap (data[29], data[31], 2, 0x33333333);
1831 swap (data[28], data[29], 1, 0x55555555);
1832 swap (data[30], data[31], 1, 0x55555555);
1835 static void m01500m (__local u32 *s_S, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
1841 const u32 gid = get_global_id (0);
1842 const u32 lid = get_local_id (0);
1848 const u32 salt = salt_bufs[salt_pos].salt_buf[0];
1854 const u32 K00 = pws[gid].i[ 0];
1855 const u32 K01 = pws[gid].i[ 1];
1856 const u32 K02 = pws[gid].i[ 2];
1857 const u32 K03 = pws[gid].i[ 3];
1858 const u32 K04 = pws[gid].i[ 4];
1859 const u32 K05 = pws[gid].i[ 5];
1860 const u32 K06 = pws[gid].i[ 6];
1861 const u32 K07 = pws[gid].i[ 7];
1862 const u32 K08 = pws[gid].i[ 8];
1863 const u32 K09 = pws[gid].i[ 9];
1864 const u32 K10 = pws[gid].i[10];
1865 const u32 K11 = pws[gid].i[11];
1866 const u32 K12 = pws[gid].i[12];
1867 const u32 K13 = pws[gid].i[13];
1868 const u32 K14 = pws[gid].i[14];
1869 const u32 K15 = pws[gid].i[15];
1870 const u32 K16 = pws[gid].i[16];
1871 const u32 K17 = pws[gid].i[17];
1872 const u32 K18 = pws[gid].i[18];
1873 const u32 K19 = pws[gid].i[19];
1874 const u32 K20 = pws[gid].i[20];
1875 const u32 K21 = pws[gid].i[21];
1876 const u32 K22 = pws[gid].i[22];
1877 const u32 K23 = pws[gid].i[23];
1878 const u32 K24 = pws[gid].i[24];
1879 const u32 K25 = pws[gid].i[25];
1880 const u32 K26 = pws[gid].i[26];
1881 const u32 K27 = pws[gid].i[27];
1882 const u32 K28 = pws[gid].i[28];
1883 const u32 K29 = pws[gid].i[29];
1884 const u32 K30 = pws[gid].i[30];
1885 const u32 K31 = pws[gid].i[31];
1886 const u32 K32 = pws[gid].i[32];
1887 const u32 K33 = pws[gid].i[33];
1888 const u32 K34 = pws[gid].i[34];
1889 const u32 K35 = pws[gid].i[35];
1890 const u32 K36 = pws[gid].i[36];
1891 const u32 K37 = pws[gid].i[37];
1892 const u32 K38 = pws[gid].i[38];
1893 const u32 K39 = pws[gid].i[39];
1894 const u32 K40 = pws[gid].i[40];
1895 const u32 K41 = pws[gid].i[41];
1896 const u32 K42 = pws[gid].i[42];
1897 const u32 K43 = pws[gid].i[43];
1898 const u32 K44 = pws[gid].i[44];
1899 const u32 K45 = pws[gid].i[45];
1900 const u32 K46 = pws[gid].i[46];
1901 const u32 K47 = pws[gid].i[47];
1902 const u32 K48 = pws[gid].i[48];
1903 const u32 K49 = pws[gid].i[49];
1904 const u32 K50 = pws[gid].i[50];
1905 const u32 K51 = pws[gid].i[51];
1906 const u32 K52 = pws[gid].i[52];
1907 const u32 K53 = pws[gid].i[53];
1908 const u32 K54 = pws[gid].i[54];
1909 const u32 K55 = pws[gid].i[55];
1915 const u32 pc_pos = get_local_id (1);
1917 const u32 il_pos = pc_pos * 32;
1948 k00 |= words_buf_r[pc_pos].b[ 0];
1949 k01 |= words_buf_r[pc_pos].b[ 1];
1950 k02 |= words_buf_r[pc_pos].b[ 2];
1951 k03 |= words_buf_r[pc_pos].b[ 3];
1952 k04 |= words_buf_r[pc_pos].b[ 4];
1953 k05 |= words_buf_r[pc_pos].b[ 5];
1954 k06 |= words_buf_r[pc_pos].b[ 6];
1955 k07 |= words_buf_r[pc_pos].b[ 7];
1956 k08 |= words_buf_r[pc_pos].b[ 8];
1957 k09 |= words_buf_r[pc_pos].b[ 9];
1958 k10 |= words_buf_r[pc_pos].b[10];
1959 k11 |= words_buf_r[pc_pos].b[11];
1960 k12 |= words_buf_r[pc_pos].b[12];
1961 k13 |= words_buf_r[pc_pos].b[13];
1962 k14 |= words_buf_r[pc_pos].b[14];
1963 k15 |= words_buf_r[pc_pos].b[15];
1964 k16 |= words_buf_r[pc_pos].b[16];
1965 k17 |= words_buf_r[pc_pos].b[17];
1966 k18 |= words_buf_r[pc_pos].b[18];
1967 k19 |= words_buf_r[pc_pos].b[19];
1968 k20 |= words_buf_r[pc_pos].b[20];
1969 k21 |= words_buf_r[pc_pos].b[21];
1970 k22 |= words_buf_r[pc_pos].b[22];
1971 k23 |= words_buf_r[pc_pos].b[23];
1972 k24 |= words_buf_r[pc_pos].b[24];
1973 k25 |= words_buf_r[pc_pos].b[25];
1974 k26 |= words_buf_r[pc_pos].b[26];
1975 k27 |= words_buf_r[pc_pos].b[27];
2045 k00, k01, k02, k03, k04, k05, k06,
2046 k07, k08, k09, k10, k11, k12, k13,
2047 k14, k15, k16, k17, k18, k19, k20,
2048 k21, k22, k23, k24, k25, k26, k27,
2049 K28, K29, K30, K31, K32, K33, K34,
2050 K35, K36, K37, K38, K39, K40, K41,
2051 K42, K43, K44, K45, K46, K47, K48,
2052 K49, K50, K51, K52, K53, K54, K55,
2053 &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07,
2054 &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15,
2055 &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23,
2056 &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31,
2057 &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39,
2058 &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47,
2059 &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55,
2060 &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63
2130 if (digests_cnt < 16)
2132 for (u32 d = 0; d < digests_cnt; d++)
2134 const u32 final_hash_pos = digests_offset + d;
2136 if (hashes_shown[final_hash_pos]) continue;
2140 search[0] = digests_buf[final_hash_pos].digest_buf[DGST_R0];
2141 search[1] = digests_buf[final_hash_pos].digest_buf[DGST_R1];
2146 for (int i = 0; i < 32; i++)
2148 const u32 b0 = -((search[0] >> i) & 1);
2149 const u32 b1 = -((search[1] >> i) & 1);
2151 tmpResult |= out[ 0 + i] ^ b0;
2152 tmpResult |= out[32 + i] ^ b1;
2155 if (tmpResult == 0xffffffff) continue;
2157 const u32 slice = 31 - clz (~tmpResult);
2159 const u32 r0 = search[0];
2160 const u32 r1 = search[1];
2173 for (int i = 0; i < 32; i++)
2175 out0[i] = out[ 0 + 31 - i];
2176 out1[i] = out[32 + 31 - i];
2179 transpose32c (out0);
2180 transpose32c (out1);
2183 for (int slice = 0; slice < 32; slice++)
2185 const u32 r0 = out0[31 - slice];
2186 const u32 r1 = out1[31 - slice];
2195 static void m01500s (__local u32 *s_S, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
2201 const u32 gid = get_global_id (0);
2202 const u32 lid = get_local_id (0);
2208 const u32 salt = salt_bufs[salt_pos].salt_buf[0];
2283 const u32 K00 = pws[gid].i[ 0];
2284 const u32 K01 = pws[gid].i[ 1];
2285 const u32 K02 = pws[gid].i[ 2];
2286 const u32 K03 = pws[gid].i[ 3];
2287 const u32 K04 = pws[gid].i[ 4];
2288 const u32 K05 = pws[gid].i[ 5];
2289 const u32 K06 = pws[gid].i[ 6];
2290 const u32 K07 = pws[gid].i[ 7];
2291 const u32 K08 = pws[gid].i[ 8];
2292 const u32 K09 = pws[gid].i[ 9];
2293 const u32 K10 = pws[gid].i[10];
2294 const u32 K11 = pws[gid].i[11];
2295 const u32 K12 = pws[gid].i[12];
2296 const u32 K13 = pws[gid].i[13];
2297 const u32 K14 = pws[gid].i[14];
2298 const u32 K15 = pws[gid].i[15];
2299 const u32 K16 = pws[gid].i[16];
2300 const u32 K17 = pws[gid].i[17];
2301 const u32 K18 = pws[gid].i[18];
2302 const u32 K19 = pws[gid].i[19];
2303 const u32 K20 = pws[gid].i[20];
2304 const u32 K21 = pws[gid].i[21];
2305 const u32 K22 = pws[gid].i[22];
2306 const u32 K23 = pws[gid].i[23];
2307 const u32 K24 = pws[gid].i[24];
2308 const u32 K25 = pws[gid].i[25];
2309 const u32 K26 = pws[gid].i[26];
2310 const u32 K27 = pws[gid].i[27];
2311 const u32 K28 = pws[gid].i[28];
2312 const u32 K29 = pws[gid].i[29];
2313 const u32 K30 = pws[gid].i[30];
2314 const u32 K31 = pws[gid].i[31];
2315 const u32 K32 = pws[gid].i[32];
2316 const u32 K33 = pws[gid].i[33];
2317 const u32 K34 = pws[gid].i[34];
2318 const u32 K35 = pws[gid].i[35];
2319 const u32 K36 = pws[gid].i[36];
2320 const u32 K37 = pws[gid].i[37];
2321 const u32 K38 = pws[gid].i[38];
2322 const u32 K39 = pws[gid].i[39];
2323 const u32 K40 = pws[gid].i[40];
2324 const u32 K41 = pws[gid].i[41];
2325 const u32 K42 = pws[gid].i[42];
2326 const u32 K43 = pws[gid].i[43];
2327 const u32 K44 = pws[gid].i[44];
2328 const u32 K45 = pws[gid].i[45];
2329 const u32 K46 = pws[gid].i[46];
2330 const u32 K47 = pws[gid].i[47];
2331 const u32 K48 = pws[gid].i[48];
2332 const u32 K49 = pws[gid].i[49];
2333 const u32 K50 = pws[gid].i[50];
2334 const u32 K51 = pws[gid].i[51];
2335 const u32 K52 = pws[gid].i[52];
2336 const u32 K53 = pws[gid].i[53];
2337 const u32 K54 = pws[gid].i[54];
2338 const u32 K55 = pws[gid].i[55];
2344 const u32 pc_pos = get_local_id (1);
2346 const u32 il_pos = pc_pos * 32;
2377 k00 |= words_buf_r[pc_pos].b[ 0];
2378 k01 |= words_buf_r[pc_pos].b[ 1];
2379 k02 |= words_buf_r[pc_pos].b[ 2];
2380 k03 |= words_buf_r[pc_pos].b[ 3];
2381 k04 |= words_buf_r[pc_pos].b[ 4];
2382 k05 |= words_buf_r[pc_pos].b[ 5];
2383 k06 |= words_buf_r[pc_pos].b[ 6];
2384 k07 |= words_buf_r[pc_pos].b[ 7];
2385 k08 |= words_buf_r[pc_pos].b[ 8];
2386 k09 |= words_buf_r[pc_pos].b[ 9];
2387 k10 |= words_buf_r[pc_pos].b[10];
2388 k11 |= words_buf_r[pc_pos].b[11];
2389 k12 |= words_buf_r[pc_pos].b[12];
2390 k13 |= words_buf_r[pc_pos].b[13];
2391 k14 |= words_buf_r[pc_pos].b[14];
2392 k15 |= words_buf_r[pc_pos].b[15];
2393 k16 |= words_buf_r[pc_pos].b[16];
2394 k17 |= words_buf_r[pc_pos].b[17];
2395 k18 |= words_buf_r[pc_pos].b[18];
2396 k19 |= words_buf_r[pc_pos].b[19];
2397 k20 |= words_buf_r[pc_pos].b[20];
2398 k21 |= words_buf_r[pc_pos].b[21];
2399 k22 |= words_buf_r[pc_pos].b[22];
2400 k23 |= words_buf_r[pc_pos].b[23];
2401 k24 |= words_buf_r[pc_pos].b[24];
2402 k25 |= words_buf_r[pc_pos].b[25];
2403 k26 |= words_buf_r[pc_pos].b[26];
2404 k27 |= words_buf_r[pc_pos].b[27];
2474 k00, k01, k02, k03, k04, k05, k06,
2475 k07, k08, k09, k10, k11, k12, k13,
2476 k14, k15, k16, k17, k18, k19, k20,
2477 k21, k22, k23, k24, k25, k26, k27,
2478 K28, K29, K30, K31, K32, K33, K34,
2479 K35, K36, K37, K38, K39, K40, K41,
2480 K42, K43, K44, K45, K46, K47, K48,
2481 K49, K50, K51, K52, K53, K54, K55,
2482 &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07,
2483 &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15,
2484 &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23,
2485 &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31,
2486 &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39,
2487 &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47,
2488 &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55,
2489 &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63
2494 tmpResult |= D00 ^ S00;
2495 tmpResult |= D01 ^ S01;
2496 tmpResult |= D02 ^ S02;
2497 tmpResult |= D03 ^ S03;
2498 tmpResult |= D04 ^ S04;
2499 tmpResult |= D05 ^ S05;
2500 tmpResult |= D06 ^ S06;
2501 tmpResult |= D07 ^ S07;
2502 tmpResult |= D08 ^ S08;
2503 tmpResult |= D09 ^ S09;
2504 tmpResult |= D10 ^ S10;
2505 tmpResult |= D11 ^ S11;
2506 tmpResult |= D12 ^ S12;
2507 tmpResult |= D13 ^ S13;
2508 tmpResult |= D14 ^ S14;
2509 tmpResult |= D15 ^ S15;
2510 tmpResult |= D16 ^ S16;
2511 tmpResult |= D17 ^ S17;
2512 tmpResult |= D18 ^ S18;
2513 tmpResult |= D19 ^ S19;
2514 tmpResult |= D20 ^ S20;
2515 tmpResult |= D21 ^ S21;
2516 tmpResult |= D22 ^ S22;
2517 tmpResult |= D23 ^ S23;
2518 tmpResult |= D24 ^ S24;
2519 tmpResult |= D25 ^ S25;
2520 tmpResult |= D26 ^ S26;
2521 tmpResult |= D27 ^ S27;
2522 tmpResult |= D28 ^ S28;
2523 tmpResult |= D29 ^ S29;
2524 tmpResult |= D30 ^ S30;
2525 tmpResult |= D31 ^ S31;
2526 tmpResult |= D32 ^ S32;
2527 tmpResult |= D33 ^ S33;
2528 tmpResult |= D34 ^ S34;
2529 tmpResult |= D35 ^ S35;
2530 tmpResult |= D36 ^ S36;
2531 tmpResult |= D37 ^ S37;
2532 tmpResult |= D38 ^ S38;
2533 tmpResult |= D39 ^ S39;
2534 tmpResult |= D40 ^ S40;
2535 tmpResult |= D41 ^ S41;
2536 tmpResult |= D42 ^ S42;
2537 tmpResult |= D43 ^ S43;
2538 tmpResult |= D44 ^ S44;
2539 tmpResult |= D45 ^ S45;
2540 tmpResult |= D46 ^ S46;
2541 tmpResult |= D47 ^ S47;
2543 if (tmpResult == 0xffffffff) return;
2545 tmpResult |= D48 ^ S48;
2546 tmpResult |= D49 ^ S49;
2547 tmpResult |= D50 ^ S50;
2548 tmpResult |= D51 ^ S51;
2549 tmpResult |= D52 ^ S52;
2550 tmpResult |= D53 ^ S53;
2551 tmpResult |= D54 ^ S54;
2552 tmpResult |= D55 ^ S55;
2553 tmpResult |= D56 ^ S56;
2554 tmpResult |= D57 ^ S57;
2555 tmpResult |= D58 ^ S58;
2556 tmpResult |= D59 ^ S59;
2557 tmpResult |= D60 ^ S60;
2558 tmpResult |= D61 ^ S61;
2559 tmpResult |= D62 ^ S62;
2560 tmpResult |= D63 ^ S63;
2562 if (tmpResult == 0xffffffff) return;
2564 const u32 slice = 31 - clz (~tmpResult);
2570 // transpose bitslice base : easy because no overlapping buffers
2571 // mod : attention race conditions, need different buffers for *in and *out
2574 __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01500_tb (__global pw_t *pws)
2576 const u32 gid = get_global_id (0);
2578 const u32 w0 = pws[gid].i[0];
2579 const u32 w1 = pws[gid].i[1];
2581 const u32 w0s = (w0 << 1) & 0xfefefefe;
2582 const u32 w1s = (w1 << 1) & 0xfefefefe;
2585 for (int i = 0, j = 0; i < 32; i += 8, j += 7)
2587 pws[gid].i[j + 0 + 0] = -((w0s >> (i + 7)) & 1);
2588 pws[gid].i[j + 0 + 1] = -((w0s >> (i + 6)) & 1);
2589 pws[gid].i[j + 0 + 2] = -((w0s >> (i + 5)) & 1);
2590 pws[gid].i[j + 0 + 3] = -((w0s >> (i + 4)) & 1);
2591 pws[gid].i[j + 0 + 4] = -((w0s >> (i + 3)) & 1);
2592 pws[gid].i[j + 0 + 5] = -((w0s >> (i + 2)) & 1);
2593 pws[gid].i[j + 0 + 6] = -((w0s >> (i + 1)) & 1);
2597 for (int i = 0, j = 0; i < 32; i += 8, j += 7)
2599 pws[gid].i[j + 28 + 0] = -((w1s >> (i + 7)) & 1);
2600 pws[gid].i[j + 28 + 1] = -((w1s >> (i + 6)) & 1);
2601 pws[gid].i[j + 28 + 2] = -((w1s >> (i + 5)) & 1);
2602 pws[gid].i[j + 28 + 3] = -((w1s >> (i + 4)) & 1);
2603 pws[gid].i[j + 28 + 4] = -((w1s >> (i + 3)) & 1);
2604 pws[gid].i[j + 28 + 5] = -((w1s >> (i + 2)) & 1);
2605 pws[gid].i[j + 28 + 6] = -((w1s >> (i + 1)) & 1);
2609 __kernel void __attribute__((reqd_work_group_size (32, 1, 1))) m01500_tm (__global u32 *mod, __global bs_word_t *words_buf_r)
2611 const u32 gid = get_global_id (0);
2612 const u32 lid = get_local_id (0);
2614 const u32 block = gid / 32;
2615 const u32 slice = gid % 32;
2617 const u32 w0 = mod[gid];
2619 const u32 w0s = (w0 << 1) & 0xfefefefe;
2622 for (int i = 0, j = 0; i < 32; i += 8, j += 7)
2624 atomic_or (&words_buf_r[block].b[j + 0], (((w0s >> (i + 7)) & 1) << slice));
2625 atomic_or (&words_buf_r[block].b[j + 1], (((w0s >> (i + 6)) & 1) << slice));
2626 atomic_or (&words_buf_r[block].b[j + 2], (((w0s >> (i + 5)) & 1) << slice));
2627 atomic_or (&words_buf_r[block].b[j + 3], (((w0s >> (i + 4)) & 1) << slice));
2628 atomic_or (&words_buf_r[block].b[j + 4], (((w0s >> (i + 3)) & 1) << slice));
2629 atomic_or (&words_buf_r[block].b[j + 5], (((w0s >> (i + 2)) & 1) << slice));
2630 atomic_or (&words_buf_r[block].b[j + 6], (((w0s >> (i + 1)) & 1) << slice));
2634 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2640 const u32 gid = get_global_id (0);
2641 const u32 lid = get_local_id (0);
2642 const u32 vid = get_local_id (1);
2644 const u32 s0 = digests_buf[digests_offset].digest_buf[0];
2645 const u32 s1 = digests_buf[digests_offset].digest_buf[1];
2647 __local u32 s_S[64];
2651 s_S[ 0 + vid] = -((s0 >> vid) & 1);
2655 s_S[32 + vid] = -((s1 >> vid) & 1);
2658 barrier (CLK_LOCAL_MEM_FENCE);
2660 if (gid >= gid_max) return;
2666 m01500m (s_S, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
2669 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2673 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2677 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2683 const u32 gid = get_global_id (0);
2684 const u32 lid = get_local_id (0);
2685 const u32 vid = get_local_id (1);
2687 const u32 s0 = digests_buf[digests_offset].digest_buf[0];
2688 const u32 s1 = digests_buf[digests_offset].digest_buf[1];
2690 __local u32 s_S[64];
2694 s_S[ 0 + vid] = -((s0 >> vid) & 1);
2698 s_S[32 + vid] = -((s1 >> vid) & 1);
2701 barrier (CLK_LOCAL_MEM_FENCE);
2703 if (gid >= gid_max) return;
2709 m01500s (s_S, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
2712 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2716 __kernel void __attribute__((reqd_work_group_size (2, 32, 1))) m01500_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bs_word_t * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)