2 * Author......: Jens Steube <jens.steube@gmail.com>
4 * NOTE........: sboxes for maxwell were taken from DeepLearningJohnDoe, license below
5 * : sboxes for others were takes fron JtR, license below
11 #include "include/constants.h"
12 #include "include/kernel_vendor.h"
27 #include "include/kernel_functions.c"
29 #include "common_nv.c"
32 #define VECT_COMPARE_S "check_single_vect1_comp4_warp_bs.c"
33 #define VECT_COMPARE_M "check_multi_vect1_comp4_warp_bs.c"
37 #define VECT_COMPARE_S "check_single_vect2_comp4_warp_bs.c"
38 #define VECT_COMPARE_M "check_multi_vect2_comp4_warp_bs.c"
42 #define VECT_COMPARE_S "check_single_vect4_comp4_warp_bs.c"
43 #define VECT_COMPARE_M "check_multi_vect4_comp4_warp_bs.c"
48 __device__ __constant__ u32x c_bfs[1024];
49 __device__ __constant__ bs_word_t c_tm[32];
50 __device__ __shared__ u32 s_S[64];
52 #if __CUDA_ARCH__ >= 500
55 // Bitslice DES S-boxes with LOP3.LUT instructions
56 // For NVIDIA Maxwell architecture and CUDA 7.5 RC
57 // by DeepLearningJohnDoe, version 0.1.6, 2015/07/19
59 // Gate counts: 25 24 25 18 25 24 24 23
61 // Depth: 8 7 7 6 8 10 10 8
64 // Note that same S-box function with a lower gate count isn't necessarily faster.
66 // These Boolean expressions corresponding to DES S-boxes were
67 // discovered by <deeplearningjohndoe at gmail.com>
69 // This file itself is Copyright (c) 2015 by <deeplearningjohndoe at gmail.com>
70 // Redistribution and use in source and binary forms, with or without
71 // modification, are permitted.
73 // The underlying mathematical formulas are NOT copyrighted.
76 #define LUT(a,b,c,d,e) u32 a; asm ("lop3.b32 %0, %1, %2, %3, "#e";" : "=r"(a): "r"(b), "r"(c), "r"(d));
78 __device__ static void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
80 LUT(xAA55AA5500550055, a1, a4, a6, 0xC1)
81 LUT(xA55AA55AF0F5F0F5, a3, a6, xAA55AA5500550055, 0x9E)
82 LUT(x5F5F5F5FA5A5A5A5, a1, a3, a6, 0xD6)
83 LUT(xF5A0F5A0A55AA55A, a4, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x56)
84 LUT(x947A947AD1E7D1E7, a2, xA55AA55AF0F5F0F5, xF5A0F5A0A55AA55A, 0x6C)
85 LUT(x5FFF5FFFFFFAFFFA, a6, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x7B)
86 LUT(xB96CB96C69936993, a2, xF5A0F5A0A55AA55A, x5FFF5FFFFFFAFFFA, 0xD6)
87 LUT(x3, a5, x947A947AD1E7D1E7, xB96CB96C69936993, 0x6A)
88 LUT(x55EE55EE55EE55EE, a1, a2, a4, 0x7A)
89 LUT(x084C084CB77BB77B, a2, a6, xF5A0F5A0A55AA55A, 0xC9)
90 LUT(x9C329C32E295E295, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x72)
91 LUT(xA51EA51E50E050E0, a3, a6, x55EE55EE55EE55EE, 0x29)
92 LUT(x4AD34AD3BE3CBE3C, a2, x947A947AD1E7D1E7, xA51EA51E50E050E0, 0x95)
93 LUT(x2, a5, x9C329C32E295E295, x4AD34AD3BE3CBE3C, 0xC6)
94 LUT(xD955D95595D195D1, a1, a2, x9C329C32E295E295, 0xD2)
95 LUT(x8058805811621162, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x90)
96 LUT(x7D0F7D0FC4B3C4B3, xA51EA51E50E050E0, xD955D95595D195D1, x8058805811621162, 0x76)
97 LUT(x0805080500010001, a3, xAA55AA5500550055, xD955D95595D195D1, 0x80)
98 LUT(x4A964A96962D962D, xB96CB96C69936993, x4AD34AD3BE3CBE3C, x0805080500010001, 0xA6)
99 LUT(x4, a5, x7D0F7D0FC4B3C4B3, x4A964A96962D962D, 0xA6)
100 LUT(x148014807B087B08, a1, xAA55AA5500550055, x947A947AD1E7D1E7, 0x21)
101 LUT(x94D894D86B686B68, xA55AA55AF0F5F0F5, x8058805811621162, x148014807B087B08, 0x6A)
102 LUT(x5555555540044004, a1, a6, x084C084CB77BB77B, 0x70)
103 LUT(xAFB4AFB4BF5BBF5B, x5F5F5F5FA5A5A5A5, xA51EA51E50E050E0, x5555555540044004, 0x97)
104 LUT(x1, a5, x94D894D86B686B68, xAFB4AFB4BF5BBF5B, 0x6C)
112 __device__ static void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
114 LUT(xEEEEEEEE99999999, a1, a2, a6, 0x97)
115 LUT(xFFFFEEEE66666666, a5, a6, xEEEEEEEE99999999, 0x67)
116 LUT(x5555FFFFFFFF0000, a1, a5, a6, 0x76)
117 LUT(x6666DDDD5555AAAA, a2, xFFFFEEEE66666666, x5555FFFFFFFF0000, 0x69)
118 LUT(x6969D3D35353ACAC, a3, xFFFFEEEE66666666, x6666DDDD5555AAAA, 0x6A)
119 LUT(xCFCF3030CFCF3030, a2, a3, a5, 0x65)
120 LUT(xE4E4EEEE9999F0F0, a3, xEEEEEEEE99999999, x5555FFFFFFFF0000, 0x8D)
121 LUT(xE5E5BABACDCDB0B0, a1, xCFCF3030CFCF3030, xE4E4EEEE9999F0F0, 0xCA)
122 LUT(x3, a4, x6969D3D35353ACAC, xE5E5BABACDCDB0B0, 0xC6)
123 LUT(x3333CCCC00000000, a2, a5, a6, 0x14)
124 LUT(xCCCCDDDDFFFF0F0F, a5, xE4E4EEEE9999F0F0, x3333CCCC00000000, 0xB5)
125 LUT(x00000101F0F0F0F0, a3, a6, xFFFFEEEE66666666, 0x1C)
126 LUT(x9A9A64646A6A9595, a1, xCFCF3030CFCF3030, x00000101F0F0F0F0, 0x96)
127 LUT(x2, a4, xCCCCDDDDFFFF0F0F, x9A9A64646A6A9595, 0x6A)
128 LUT(x3333BBBB3333FFFF, a1, a2, x6666DDDD5555AAAA, 0xDE)
129 LUT(x1414141441410000, a1, a3, xE4E4EEEE9999F0F0, 0x90)
130 LUT(x7F7FF3F3F5F53939, x6969D3D35353ACAC, x9A9A64646A6A9595, x3333BBBB3333FFFF, 0x79)
131 LUT(x9494E3E34B4B3939, a5, x1414141441410000, x7F7FF3F3F5F53939, 0x29)
132 LUT(x1, a4, x3333BBBB3333FFFF, x9494E3E34B4B3939, 0xA6)
133 LUT(xB1B1BBBBCCCCA5A5, a1, a1, xE4E4EEEE9999F0F0, 0x4A)
134 LUT(xFFFFECECEEEEDDDD, a2, x3333CCCC00000000, x9A9A64646A6A9595, 0xEF)
135 LUT(xB1B1A9A9DCDC8787, xE5E5BABACDCDB0B0, xB1B1BBBBCCCCA5A5, xFFFFECECEEEEDDDD, 0x8D)
136 LUT(xFFFFCCCCEEEE4444, a2, a5, xFFFFEEEE66666666, 0x2B)
137 LUT(x4, a4, xB1B1A9A9DCDC8787, xFFFFCCCCEEEE4444, 0x6C)
145 __device__ static void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
147 LUT(xA50FA50FA50FA50F, a1, a3, a4, 0xC9)
148 LUT(xF0F00F0FF0F0F0F0, a3, a5, a6, 0x4B)
149 LUT(xAF0FA0AAAF0FAF0F, a1, xA50FA50FA50FA50F, xF0F00F0FF0F0F0F0, 0x4D)
150 LUT(x5AA5A55A5AA55AA5, a1, a4, xF0F00F0FF0F0F0F0, 0x69)
151 LUT(xAA005FFFAA005FFF, a3, a5, xA50FA50FA50FA50F, 0xD6)
152 LUT(x5AA5A55A0F5AFAA5, a6, x5AA5A55A5AA55AA5, xAA005FFFAA005FFF, 0x9C)
153 LUT(x1, a2, xAF0FA0AAAF0FAF0F, x5AA5A55A0F5AFAA5, 0xA6)
154 LUT(xAA55AA5500AA00AA, a1, a4, a6, 0x49)
155 LUT(xFAFAA50FFAFAA50F, a1, a5, xA50FA50FA50FA50F, 0x9B)
156 LUT(x50AF0F5AFA50A5A5, a1, xAA55AA5500AA00AA, xFAFAA50FFAFAA50F, 0x66)
157 LUT(xAFAFAFAFFAFAFAFA, a1, a3, a6, 0x6F)
158 LUT(xAFAFFFFFFFFAFAFF, a4, x50AF0F5AFA50A5A5, xAFAFAFAFFAFAFAFA, 0xEB)
159 LUT(x4, a2, x50AF0F5AFA50A5A5, xAFAFFFFFFFFAFAFF, 0x6C)
160 LUT(x500F500F500F500F, a1, a3, a4, 0x98)
161 LUT(xF0505A0505A5050F, x5AA5A55A0F5AFAA5, xAA55AA5500AA00AA, xAFAFAFAFFAFAFAFA, 0x1D)
162 LUT(xF0505A05AA55AAFF, a6, x500F500F500F500F, xF0505A0505A5050F, 0x9A)
163 LUT(xFF005F55FF005F55, a1, a4, xAA005FFFAA005FFF, 0xB2)
164 LUT(xA55F5AF0A55F5AF0, a5, xA50FA50FA50FA50F, x5AA5A55A5AA55AA5, 0x3D)
165 LUT(x5A5F05A5A55F5AF0, a6, xFF005F55FF005F55, xA55F5AF0A55F5AF0, 0xA6)
166 LUT(x3, a2, xF0505A05AA55AAFF, x5A5F05A5A55F5AF0, 0xA6)
167 LUT(x0F0F0F0FA5A5A5A5, a1, a3, a6, 0xC6)
168 LUT(x5FFFFF5FFFA0FFA0, x5AA5A55A5AA55AA5, xAFAFAFAFFAFAFAFA, x0F0F0F0FA5A5A5A5, 0xDB)
169 LUT(xF5555AF500A05FFF, a5, xFAFAA50FFAFAA50F, xF0505A0505A5050F, 0xB9)
170 LUT(x05A5AAF55AFA55A5, xF0505A05AA55AAFF, x0F0F0F0FA5A5A5A5, xF5555AF500A05FFF, 0x9B)
171 LUT(x2, a2, x5FFFFF5FFFA0FFA0, x05A5AAF55AFA55A5, 0xA6)
179 __device__ static void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
181 LUT(x55F055F055F055F0, a1, a3, a4, 0x72)
182 LUT(xA500F5F0A500F5F0, a3, a5, x55F055F055F055F0, 0xAD)
183 LUT(xF50AF50AF50AF50A, a1, a3, a4, 0x59)
184 LUT(xF5FA0FFFF5FA0FFF, a3, a5, xF50AF50AF50AF50A, 0xE7)
185 LUT(x61C8F93C61C8F93C, a2, xA500F5F0A500F5F0, xF5FA0FFFF5FA0FFF, 0xC6)
186 LUT(x9999666699996666, a1, a2, a5, 0x69)
187 LUT(x22C022C022C022C0, a2, a4, x55F055F055F055F0, 0x18)
188 LUT(xB35C94A6B35C94A6, xF5FA0FFFF5FA0FFF, x9999666699996666, x22C022C022C022C0, 0x63)
189 LUT(x4, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x6A)
190 LUT(x4848484848484848, a1, a2, a3, 0x12)
191 LUT(x55500AAA55500AAA, a1, a5, xF5FA0FFFF5FA0FFF, 0x28)
192 LUT(x3C90B3D63C90B3D6, x61C8F93C61C8F93C, x4848484848484848, x55500AAA55500AAA, 0x1E)
193 LUT(x8484333384843333, a1, x9999666699996666, x4848484848484848, 0x14)
194 LUT(x4452F1AC4452F1AC, xF50AF50AF50AF50A, xF5FA0FFFF5FA0FFF, xB35C94A6B35C94A6, 0x78)
195 LUT(x9586CA379586CA37, x55500AAA55500AAA, x8484333384843333, x4452F1AC4452F1AC, 0xD6)
196 LUT(x2, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0x6A)
197 LUT(x1, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0xA9)
198 LUT(x3, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x56)
206 __device__ static void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
208 LUT(xA0A0A0A0FFFFFFFF, a1, a3, a6, 0xAB)
209 LUT(xFFFF00005555FFFF, a1, a5, a6, 0xB9)
210 LUT(xB3B320207777FFFF, a2, xA0A0A0A0FFFFFFFF, xFFFF00005555FFFF, 0xE8)
211 LUT(x50505A5A5A5A5050, a1, a3, xFFFF00005555FFFF, 0x34)
212 LUT(xA2A2FFFF2222FFFF, a1, a5, xB3B320207777FFFF, 0xCE)
213 LUT(x2E2E6969A4A46363, a2, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, 0x29)
214 LUT(x3, a4, xB3B320207777FFFF, x2E2E6969A4A46363, 0xA6)
215 LUT(xA5A50A0AA5A50A0A, a1, a3, a5, 0x49)
216 LUT(x969639396969C6C6, a2, a6, xA5A50A0AA5A50A0A, 0x96)
217 LUT(x1B1B1B1B1B1B1B1B, a1, a2, a3, 0xCA)
218 LUT(xBFBFBFBFF6F6F9F9, a3, xA0A0A0A0FFFFFFFF, x969639396969C6C6, 0x7E)
219 LUT(x5B5BA4A4B8B81D1D, xFFFF00005555FFFF, x1B1B1B1B1B1B1B1B, xBFBFBFBFF6F6F9F9, 0x96)
220 LUT(x2, a4, x969639396969C6C6, x5B5BA4A4B8B81D1D, 0xCA)
221 LUT(x5555BBBBFFFF5555, a1, a2, xFFFF00005555FFFF, 0xE5)
222 LUT(x6D6D9C9C95956969, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, x969639396969C6C6, 0x97)
223 LUT(x1A1A67676A6AB4B4, xA5A50A0AA5A50A0A, x5555BBBBFFFF5555, x6D6D9C9C95956969, 0x47)
224 LUT(xA0A0FFFFAAAA0000, a3, xFFFF00005555FFFF, xA5A50A0AA5A50A0A, 0x3B)
225 LUT(x36369C9CC1C1D6D6, x969639396969C6C6, x6D6D9C9C95956969, xA0A0FFFFAAAA0000, 0xD9)
226 LUT(x1, a4, x1A1A67676A6AB4B4, x36369C9CC1C1D6D6, 0xCA)
227 LUT(x5555F0F0F5F55555, a1, a3, xFFFF00005555FFFF, 0xB1)
228 LUT(x79790202DCDC0808, xA2A2FFFF2222FFFF, xA5A50A0AA5A50A0A, x969639396969C6C6, 0x47)
229 LUT(x6C6CF2F229295D5D, xBFBFBFBFF6F6F9F9, x5555F0F0F5F55555, x79790202DCDC0808, 0x6E)
230 LUT(xA3A3505010101A1A, a2, xA2A2FFFF2222FFFF, x36369C9CC1C1D6D6, 0x94)
231 LUT(x7676C7C74F4FC7C7, a1, x2E2E6969A4A46363, xA3A3505010101A1A, 0xD9)
232 LUT(x4, a4, x6C6CF2F229295D5D, x7676C7C74F4FC7C7, 0xC6)
240 __device__ static void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
242 LUT(x5050F5F55050F5F5, a1, a3, a5, 0xB2)
243 LUT(x6363C6C66363C6C6, a1, a2, x5050F5F55050F5F5, 0x66)
244 LUT(xAAAA5555AAAA5555, a1, a1, a5, 0xA9)
245 LUT(x3A3A65653A3A6565, a3, x6363C6C66363C6C6, xAAAA5555AAAA5555, 0xA9)
246 LUT(x5963A3C65963A3C6, a4, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xC6)
247 LUT(xE7E76565E7E76565, a5, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xAD)
248 LUT(x455D45DF455D45DF, a1, a4, xE7E76565E7E76565, 0xE4)
249 LUT(x4, a6, x5963A3C65963A3C6, x455D45DF455D45DF, 0x6C)
250 LUT(x1101220211012202, a2, xAAAA5555AAAA5555, x5963A3C65963A3C6, 0x20)
251 LUT(xF00F0FF0F00F0FF0, a3, a4, a5, 0x69)
252 LUT(x16E94A9716E94A97, xE7E76565E7E76565, x1101220211012202, xF00F0FF0F00F0FF0, 0x9E)
253 LUT(x2992922929929229, a1, a2, xF00F0FF0F00F0FF0, 0x49)
254 LUT(xAFAF9823AFAF9823, a5, x5050F5F55050F5F5, x2992922929929229, 0x93)
255 LUT(x3, a6, x16E94A9716E94A97, xAFAF9823AFAF9823, 0x6C)
256 LUT(x4801810248018102, a4, x5963A3C65963A3C6, x1101220211012202, 0xA4)
257 LUT(x5EE8FFFD5EE8FFFD, a5, x16E94A9716E94A97, x4801810248018102, 0x76)
258 LUT(xF0FF00FFF0FF00FF, a3, a4, a5, 0xCD)
259 LUT(x942D9A67942D9A67, x3A3A65653A3A6565, x5EE8FFFD5EE8FFFD, xF0FF00FFF0FF00FF, 0x86)
260 LUT(x1, a6, x5EE8FFFD5EE8FFFD, x942D9A67942D9A67, 0xA6)
261 LUT(x6A40D4ED6F4DD4EE, a2, x4, xAFAF9823AFAF9823, 0x2D)
262 LUT(x6CA89C7869A49C79, x1101220211012202, x16E94A9716E94A97, x6A40D4ED6F4DD4EE, 0x26)
263 LUT(xD6DE73F9D6DE73F9, a3, x6363C6C66363C6C6, x455D45DF455D45DF, 0x6B)
264 LUT(x925E63E1965A63E1, x3A3A65653A3A6565, x6CA89C7869A49C79, xD6DE73F9D6DE73F9, 0xA2)
265 LUT(x2, a6, x6CA89C7869A49C79, x925E63E1965A63E1, 0xCA)
273 __device__ static void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
275 LUT(x88AA88AA88AA88AA, a1, a2, a4, 0x0B)
276 LUT(xAAAAFF00AAAAFF00, a1, a4, a5, 0x27)
277 LUT(xADAFF8A5ADAFF8A5, a3, x88AA88AA88AA88AA, xAAAAFF00AAAAFF00, 0x9E)
278 LUT(x0A0AF5F50A0AF5F5, a1, a3, a5, 0xA6)
279 LUT(x6B69C5DC6B69C5DC, a2, xADAFF8A5ADAFF8A5, x0A0AF5F50A0AF5F5, 0x6B)
280 LUT(x1C69B2DC1C69B2DC, a4, x88AA88AA88AA88AA, x6B69C5DC6B69C5DC, 0xA9)
281 LUT(x1, a6, xADAFF8A5ADAFF8A5, x1C69B2DC1C69B2DC, 0x6A)
282 LUT(x9C9C9C9C9C9C9C9C, a1, a2, a3, 0x63)
283 LUT(xE6E63BFDE6E63BFD, a2, xAAAAFF00AAAAFF00, x0A0AF5F50A0AF5F5, 0xE7)
284 LUT(x6385639E6385639E, a4, x9C9C9C9C9C9C9C9C, xE6E63BFDE6E63BFD, 0x93)
285 LUT(x5959C4CE5959C4CE, a2, x6B69C5DC6B69C5DC, xE6E63BFDE6E63BFD, 0x5D)
286 LUT(x5B53F53B5B53F53B, a4, x0A0AF5F50A0AF5F5, x5959C4CE5959C4CE, 0x6E)
287 LUT(x3, a6, x6385639E6385639E, x5B53F53B5B53F53B, 0xC6)
288 LUT(xFAF505FAFAF505FA, a3, a4, x0A0AF5F50A0AF5F5, 0x6D)
289 LUT(x6A65956A6A65956A, a3, x9C9C9C9C9C9C9C9C, xFAF505FAFAF505FA, 0xA6)
290 LUT(x8888CCCC8888CCCC, a1, a2, a5, 0x23)
291 LUT(x94E97A9494E97A94, x1C69B2DC1C69B2DC, x6A65956A6A65956A, x8888CCCC8888CCCC, 0x72)
292 LUT(x4, a6, x6A65956A6A65956A, x94E97A9494E97A94, 0xAC)
293 LUT(xA050A050A050A050, a1, a3, a4, 0x21)
294 LUT(xC1B87A2BC1B87A2B, xAAAAFF00AAAAFF00, x5B53F53B5B53F53B, x94E97A9494E97A94, 0xA4)
295 LUT(xE96016B7E96016B7, x8888CCCC8888CCCC, xA050A050A050A050, xC1B87A2BC1B87A2B, 0x96)
296 LUT(xE3CF1FD5E3CF1FD5, x88AA88AA88AA88AA, x6A65956A6A65956A, xE96016B7E96016B7, 0x3E)
297 LUT(x6776675B6776675B, xADAFF8A5ADAFF8A5, x94E97A9494E97A94, xE3CF1FD5E3CF1FD5, 0x6B)
298 LUT(x2, a6, xE96016B7E96016B7, x6776675B6776675B, 0xC6)
306 __device__ static void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
308 LUT(xEEEE3333EEEE3333, a1, a2, a5, 0x9D)
309 LUT(xBBBBBBBBBBBBBBBB, a1, a1, a2, 0x83)
310 LUT(xDDDDAAAADDDDAAAA, a1, a2, a5, 0x5B)
311 LUT(x29295A5A29295A5A, a3, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0x85)
312 LUT(xC729695AC729695A, a4, xEEEE3333EEEE3333, x29295A5A29295A5A, 0xA6)
313 LUT(x3BF77B7B3BF77B7B, a2, a5, xC729695AC729695A, 0xF9)
314 LUT(x2900FF002900FF00, a4, a5, x29295A5A29295A5A, 0x0E)
315 LUT(x56B3803F56B3803F, xBBBBBBBBBBBBBBBB, x3BF77B7B3BF77B7B, x2900FF002900FF00, 0x61)
316 LUT(x4, a6, xC729695AC729695A, x56B3803F56B3803F, 0x6C)
317 LUT(xFBFBFBFBFBFBFBFB, a1, a2, a3, 0xDF)
318 LUT(x3012B7B73012B7B7, a2, a5, xC729695AC729695A, 0xD4)
319 LUT(x34E9B34C34E9B34C, a4, xFBFBFBFBFBFBFBFB, x3012B7B73012B7B7, 0x69)
320 LUT(xBFEAEBBEBFEAEBBE, a1, x29295A5A29295A5A, x34E9B34C34E9B34C, 0x6F)
321 LUT(xFFAEAFFEFFAEAFFE, a3, xBBBBBBBBBBBBBBBB, xBFEAEBBEBFEAEBBE, 0xB9)
322 LUT(x2, a6, x34E9B34C34E9B34C, xFFAEAFFEFFAEAFFE, 0xC6)
323 LUT(xCFDE88BBCFDE88BB, a2, xDDDDAAAADDDDAAAA, x34E9B34C34E9B34C, 0x5C)
324 LUT(x3055574530555745, a1, xC729695AC729695A, xCFDE88BBCFDE88BB, 0x71)
325 LUT(x99DDEEEE99DDEEEE, a4, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0xB9)
326 LUT(x693CD926693CD926, x3BF77B7B3BF77B7B, x34E9B34C34E9B34C, x99DDEEEE99DDEEEE, 0x69)
327 LUT(x3, a6, x3055574530555745, x693CD926693CD926, 0x6A)
328 LUT(x9955EE559955EE55, a1, a4, x99DDEEEE99DDEEEE, 0xE2)
329 LUT(x9D48FA949D48FA94, x3BF77B7B3BF77B7B, xBFEAEBBEBFEAEBBE, x9955EE559955EE55, 0x9C)
330 LUT(x1, a6, xC729695AC729695A, x9D48FA949D48FA94, 0x39)
341 * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC
342 * architectures. These use AND, OR, XOR, NOT, and AND-NOT gates.
344 * Gate counts: 49 44 46 33 48 46 46 41
347 * Several same-gate-count expressions for each S-box are included (for use on
348 * different CPUs/GPUs).
350 * These Boolean expressions corresponding to DES S-boxes have been generated
351 * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's
352 * John the Ripper password cracker: http://www.openwall.com/john/
353 * Being mathematical formulas, they are not copyrighted and are free for reuse
356 * This file (a specific representation of the S-box expressions, surrounding
357 * logic) is Copyright (c) 2011 by Solar Designer <solar at openwall.com>.
358 * Redistribution and use in source and binary forms, with or without
359 * modification, are permitted. (This is a heavily cut-down "BSD license".)
361 * The effort has been sponsored by Rapid7: http://www.rapid7.com
364 __device__ static void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
366 u32 x55005500, x5A0F5A0F, x3333FFFF, x66666666, x22226666, x2D2D6969,
368 u32 x00FFFF00, x33CCCC33, x4803120C, x2222FFFF, x6A21EDF3, x4A01CC93;
369 u32 x5555FFFF, x7F75FFFF, x00D20096, x7FA7FF69;
370 u32 x0A0A0000, x0AD80096, x00999900, x0AD99996;
371 u32 x22332233, x257AA5F0, x054885C0, xFAB77A3F, x2221EDF3, xD89697CC;
372 u32 x05B77AC0, x05F77AD6, x36C48529, x6391D07C, xBB0747B0;
373 u32 x4C460000, x4EDF9996, x2D4E49EA, xBBFFFFB0, x96B1B65A;
374 u32 x5AFF5AFF, x52B11215, x4201C010, x10B0D205;
375 u32 x00, x01, x10, x11, x20, x21, x30, x31;
377 x55005500 = a1 & ~a5;
378 x5A0F5A0F = a4 ^ x55005500;
381 x22226666 = x3333FFFF & x66666666;
382 x2D2D6969 = a4 ^ x22226666;
383 x25202160 = x2D2D6969 & ~x5A0F5A0F;
386 x33CCCC33 = a3 ^ x00FFFF00;
387 x4803120C = x5A0F5A0F & ~x33CCCC33;
388 x2222FFFF = a6 | x22226666;
389 x6A21EDF3 = x4803120C ^ x2222FFFF;
390 x4A01CC93 = x6A21EDF3 & ~x25202160;
393 x7F75FFFF = x6A21EDF3 | x5555FFFF;
394 x00D20096 = a5 & ~x2D2D6969;
395 x7FA7FF69 = x7F75FFFF ^ x00D20096;
397 x0A0A0000 = a4 & ~x5555FFFF;
398 x0AD80096 = x00D20096 ^ x0A0A0000;
399 x00999900 = x00FFFF00 & ~x66666666;
400 x0AD99996 = x0AD80096 | x00999900;
402 x22332233 = a3 & ~x55005500;
403 x257AA5F0 = x5A0F5A0F ^ x7F75FFFF;
404 x054885C0 = x257AA5F0 & ~x22332233;
405 xFAB77A3F = ~x054885C0;
406 x2221EDF3 = x3333FFFF & x6A21EDF3;
407 xD89697CC = xFAB77A3F ^ x2221EDF3;
408 x20 = x7FA7FF69 & ~a2;
409 x21 = x20 ^ xD89697CC;
412 x05B77AC0 = x00FFFF00 ^ x054885C0;
413 x05F77AD6 = x00D20096 | x05B77AC0;
414 x36C48529 = x3333FFFF ^ x05F77AD6;
415 x6391D07C = a1 ^ x36C48529;
416 xBB0747B0 = xD89697CC ^ x6391D07C;
417 x00 = x25202160 | a2;
418 x01 = x00 ^ xBB0747B0;
421 x4C460000 = x3333FFFF ^ x7F75FFFF;
422 x4EDF9996 = x0AD99996 | x4C460000;
423 x2D4E49EA = x6391D07C ^ x4EDF9996;
424 xBBFFFFB0 = x00FFFF00 | xBB0747B0;
425 x96B1B65A = x2D4E49EA ^ xBBFFFFB0;
426 x10 = x4A01CC93 | a2;
427 x11 = x10 ^ x96B1B65A;
430 x5AFF5AFF = a5 | x5A0F5A0F;
431 x52B11215 = x5AFF5AFF & ~x2D4E49EA;
432 x4201C010 = x4A01CC93 & x6391D07C;
433 x10B0D205 = x52B11215 ^ x4201C010;
434 x30 = x10B0D205 | a2;
435 x31 = x30 ^ x0AD99996;
439 __device__ static void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
442 u32 x55550000, x00AA00FF, x33BB33FF;
443 u32 x33CC0000, x11441144, x11BB11BB, x003311BB;
444 u32 x00000F0F, x336600FF, x332200FF, x332200F0;
445 u32 x0302000F, xAAAAAAAA, xA9A8AAA5, x33CCCC33, x33CCC030, x9A646A95;
446 u32 x00333303, x118822B8, xA8208805, x3CC3C33C, x94E34B39;
447 u32 x0331330C, x3FF3F33C, xA9DF596A, xA9DF5F6F, x962CAC53;
448 u32 xA9466A6A, x3DA52153, x29850143, x33C0330C, x1A45324F;
449 u32 x0A451047, xBBDFDD7B, xB19ACD3C;
450 u32 x00, x01, x10, x11, x20, x21, x30, x31;
454 x55550000 = a1 & ~a6;
455 x00AA00FF = a5 & ~x55550000;
456 x33BB33FF = a2 | x00AA00FF;
458 x33CC0000 = x33CC33CC & ~a6;
459 x11441144 = a1 & x33CC33CC;
460 x11BB11BB = a5 ^ x11441144;
461 x003311BB = x11BB11BB & ~x33CC0000;
464 x336600FF = x00AA00FF ^ x33CC0000;
465 x332200FF = x33BB33FF & x336600FF;
466 x332200F0 = x332200FF & ~x00000F0F;
468 x0302000F = a3 & x332200FF;
470 xA9A8AAA5 = x0302000F ^ xAAAAAAAA;
471 x33CCCC33 = a6 ^ x33CC33CC;
472 x33CCC030 = x33CCCC33 & ~x00000F0F;
473 x9A646A95 = xA9A8AAA5 ^ x33CCC030;
474 x10 = a4 & ~x332200F0;
475 x11 = x10 ^ x9A646A95;
478 x00333303 = a2 & ~x33CCC030;
479 x118822B8 = x11BB11BB ^ x00333303;
480 xA8208805 = xA9A8AAA5 & ~x118822B8;
481 x3CC3C33C = a3 ^ x33CCCC33;
482 x94E34B39 = xA8208805 ^ x3CC3C33C;
483 x00 = x33BB33FF & ~a4;
484 x01 = x00 ^ x94E34B39;
487 x0331330C = x0302000F ^ x00333303;
488 x3FF3F33C = x3CC3C33C | x0331330C;
489 xA9DF596A = x33BB33FF ^ x9A646A95;
490 xA9DF5F6F = x00000F0F | xA9DF596A;
491 x962CAC53 = x3FF3F33C ^ xA9DF5F6F;
493 xA9466A6A = x332200FF ^ x9A646A95;
494 x3DA52153 = x94E34B39 ^ xA9466A6A;
495 x29850143 = xA9DF5F6F & x3DA52153;
496 x33C0330C = x33CC33CC & x3FF3F33C;
497 x1A45324F = x29850143 ^ x33C0330C;
498 x20 = x1A45324F | a4;
499 x21 = x20 ^ x962CAC53;
502 x0A451047 = x1A45324F & ~x118822B8;
503 xBBDFDD7B = x33CCCC33 | xA9DF596A;
504 xB19ACD3C = x0A451047 ^ xBBDFDD7B;
505 x30 = x003311BB | a4;
506 x31 = x30 ^ xB19ACD3C;
510 __device__ static void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
512 u32 x44444444, x0F0FF0F0, x4F4FF4F4, x00FFFF00, x00AAAA00, x4FE55EF4;
513 u32 x3C3CC3C3, x3C3C0000, x7373F4F4, x0C840A00;
514 u32 x00005EF4, x00FF5EFF, x00555455, x3C699796;
515 u32 x000FF000, x55AA55AA, x26D9A15E, x2FDFAF5F, x2FD00F5F;
516 u32 x55AAFFAA, x28410014, x000000FF, x000000CC, x284100D8;
517 u32 x204100D0, x3C3CC3FF, x1C3CC32F, x4969967A;
518 u32 x4CC44CC4, x40C040C0, xC3C33C3C, x9669C396, xD6A98356;
519 u32 xD6E9C3D6, x4CEEEEC4, x9A072D12, x001A000B, x9A1F2D1B;
520 u32 x00, x01, x10, x11, x20, x21, x30, x31;
522 x44444444 = a1 & ~a2;
524 x4F4FF4F4 = x44444444 | x0F0FF0F0;
526 x00AAAA00 = x00FFFF00 & ~a1;
527 x4FE55EF4 = x4F4FF4F4 ^ x00AAAA00;
529 x3C3CC3C3 = a2 ^ x0F0FF0F0;
530 x3C3C0000 = x3C3CC3C3 & ~a6;
531 x7373F4F4 = x4F4FF4F4 ^ x3C3C0000;
532 x0C840A00 = x4FE55EF4 & ~x7373F4F4;
534 x00005EF4 = a6 & x4FE55EF4;
535 x00FF5EFF = a4 | x00005EF4;
536 x00555455 = a1 & x00FF5EFF;
537 x3C699796 = x3C3CC3C3 ^ x00555455;
538 x30 = x4FE55EF4 & ~a5;
539 x31 = x30 ^ x3C699796;
542 x000FF000 = x0F0FF0F0 & x00FFFF00;
544 x26D9A15E = x7373F4F4 ^ x55AA55AA;
545 x2FDFAF5F = a3 | x26D9A15E;
546 x2FD00F5F = x2FDFAF5F & ~x000FF000;
548 x55AAFFAA = x00AAAA00 | x55AA55AA;
549 x28410014 = x3C699796 & ~x55AAFFAA;
551 x000000CC = x000000FF & ~a2;
552 x284100D8 = x28410014 ^ x000000CC;
554 x204100D0 = x7373F4F4 & x284100D8;
555 x3C3CC3FF = x3C3CC3C3 | x000000FF;
556 x1C3CC32F = x3C3CC3FF & ~x204100D0;
557 x4969967A = a1 ^ x1C3CC32F;
558 x10 = x2FD00F5F & a5;
559 x11 = x10 ^ x4969967A;
562 x4CC44CC4 = x4FE55EF4 & ~a2;
563 x40C040C0 = x4CC44CC4 & ~a3;
564 xC3C33C3C = ~x3C3CC3C3;
565 x9669C396 = x55AAFFAA ^ xC3C33C3C;
566 xD6A98356 = x40C040C0 ^ x9669C396;
567 x00 = a5 & ~x0C840A00;
568 x01 = x00 ^ xD6A98356;
571 xD6E9C3D6 = x40C040C0 | x9669C396;
572 x4CEEEEC4 = x00AAAA00 | x4CC44CC4;
573 x9A072D12 = xD6E9C3D6 ^ x4CEEEEC4;
574 x001A000B = a4 & ~x4FE55EF4;
575 x9A1F2D1B = x9A072D12 | x001A000B;
576 x20 = a5 & ~x284100D8;
577 x21 = x20 ^ x9A1F2D1B;
581 __device__ static void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
583 u32 x5A5A5A5A, x0F0FF0F0;
584 u32 x33FF33FF, x33FFCC00, x0C0030F0, x0C0CC0C0, x0CF3C03F, x5EFBDA7F,
585 x52FBCA0F, x61C8F93C;
586 u32 x00C0C03C, x0F0F30C0, x3B92A366, x30908326, x3C90B3D6;
587 u32 x33CC33CC, x0C0CFFFF, x379E5C99, x04124C11, x56E9861E, xA91679E1;
588 u32 x9586CA37, x8402C833, x84C2C83F, xB35C94A6;
589 u32 x00, x01, x10, x11, x20, x21, x30, x31;
594 x33FFCC00 = a5 ^ x33FF33FF;
595 x0C0030F0 = x0F0FF0F0 & ~x33FFCC00;
596 x0C0CC0C0 = x0F0FF0F0 & ~a2;
597 x0CF3C03F = a4 ^ x0C0CC0C0;
598 x5EFBDA7F = x5A5A5A5A | x0CF3C03F;
599 x52FBCA0F = x5EFBDA7F & ~x0C0030F0;
600 x61C8F93C = a2 ^ x52FBCA0F;
602 x00C0C03C = x0CF3C03F & x61C8F93C;
603 x0F0F30C0 = x0F0FF0F0 & ~x00C0C03C;
604 x3B92A366 = x5A5A5A5A ^ x61C8F93C;
605 x30908326 = x3B92A366 & ~x0F0F30C0;
606 x3C90B3D6 = x0C0030F0 ^ x30908326;
609 x0C0CFFFF = a5 | x0C0CC0C0;
610 x379E5C99 = x3B92A366 ^ x0C0CFFFF;
611 x04124C11 = x379E5C99 & ~x33CC33CC;
612 x56E9861E = x52FBCA0F ^ x04124C11;
613 x00 = a6 & ~x3C90B3D6;
614 x01 = x00 ^ x56E9861E;
617 xA91679E1 = ~x56E9861E;
618 x10 = x3C90B3D6 & ~a6;
619 x11 = x10 ^ xA91679E1;
622 x9586CA37 = x3C90B3D6 ^ xA91679E1;
623 x8402C833 = x9586CA37 & ~x33CC33CC;
624 x84C2C83F = x00C0C03C | x8402C833;
625 xB35C94A6 = x379E5C99 ^ x84C2C83F;
626 x20 = x61C8F93C | a6;
627 x21 = x20 ^ xB35C94A6;
630 x30 = a6 & x61C8F93C;
631 x31 = x30 ^ xB35C94A6;
635 __device__ static void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
637 u32 x77777777, x77770000, x22225555, x11116666, x1F1F6F6F;
638 u32 x70700000, x43433333, x00430033, x55557777, x55167744, x5A19784B;
639 u32 x5A1987B4, x7A3BD7F5, x003B00F5, x221955A0, x05050707, x271C52A7;
640 u32 x2A2A82A0, x6969B193, x1FE06F90, x16804E00, xE97FB1FF;
641 u32 x43403302, x35CAED30, x37DEFFB7, x349ECCB5, x0B01234A;
642 u32 x101884B4, x0FF8EB24, x41413333, x4FF9FB37, x4FC2FBC2;
643 u32 x22222222, x16BCEE97, x0F080B04, x19B4E593;
644 u32 x5C5C5C5C, x4448184C, x2DDABE71, x6992A63D;
645 u32 x00, x01, x10, x11, x20, x21, x30, x31;
648 x77770000 = x77777777 & ~a6;
649 x22225555 = a1 ^ x77770000;
650 x11116666 = a3 ^ x22225555;
651 x1F1F6F6F = a4 | x11116666;
653 x70700000 = x77770000 & ~a4;
654 x43433333 = a3 ^ x70700000;
655 x00430033 = a5 & x43433333;
656 x55557777 = a1 | x11116666;
657 x55167744 = x00430033 ^ x55557777;
658 x5A19784B = a4 ^ x55167744;
660 x5A1987B4 = a6 ^ x5A19784B;
661 x7A3BD7F5 = x22225555 | x5A1987B4;
662 x003B00F5 = a5 & x7A3BD7F5;
663 x221955A0 = x22225555 ^ x003B00F5;
664 x05050707 = a4 & x55557777;
665 x271C52A7 = x221955A0 ^ x05050707;
667 x2A2A82A0 = x7A3BD7F5 & ~a1;
668 x6969B193 = x43433333 ^ x2A2A82A0;
669 x1FE06F90 = a5 ^ x1F1F6F6F;
670 x16804E00 = x1FE06F90 & ~x6969B193;
671 xE97FB1FF = ~x16804E00;
672 x20 = xE97FB1FF & ~a2;
673 x21 = x20 ^ x5A19784B;
676 x43403302 = x43433333 & ~x003B00F5;
677 x35CAED30 = x2A2A82A0 ^ x1FE06F90;
678 x37DEFFB7 = x271C52A7 | x35CAED30;
679 x349ECCB5 = x37DEFFB7 & ~x43403302;
680 x0B01234A = x1F1F6F6F & ~x349ECCB5;
682 x101884B4 = x5A1987B4 & x349ECCB5;
683 x0FF8EB24 = x1FE06F90 ^ x101884B4;
684 x41413333 = x43433333 & x55557777;
685 x4FF9FB37 = x0FF8EB24 | x41413333;
686 x4FC2FBC2 = x003B00F5 ^ x4FF9FB37;
687 x30 = x4FC2FBC2 & a2;
688 x31 = x30 ^ x271C52A7;
691 x22222222 = a1 ^ x77777777;
692 x16BCEE97 = x349ECCB5 ^ x22222222;
693 x0F080B04 = a4 & x0FF8EB24;
694 x19B4E593 = x16BCEE97 ^ x0F080B04;
695 x00 = x0B01234A | a2;
696 x01 = x00 ^ x19B4E593;
699 x5C5C5C5C = x1F1F6F6F ^ x43433333;
700 x4448184C = x5C5C5C5C & ~x19B4E593;
701 x2DDABE71 = x22225555 ^ x0FF8EB24;
702 x6992A63D = x4448184C ^ x2DDABE71;
703 x10 = x1F1F6F6F & a2;
704 x11 = x10 ^ x6992A63D;
708 __device__ static void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
711 u32 x3333FFFF, x11115555, x22DD6699, x22DD9966, x00220099;
712 u32 x00551144, x33662277, x5A5A5A5A, x7B7E7A7F, x59A31CE6;
713 u32 x09030C06, x09030000, x336622FF, x3A6522FF;
714 u32 x484D494C, x0000B6B3, x0F0FB9BC, x00FC00F9, x0FFFB9FD;
715 u32 x5DF75DF7, x116600F7, x1E69B94B, x1668B94B;
716 u32 x7B7B7B7B, x411E5984, x1FFFFDFD, x5EE1A479;
717 u32 x3CB4DFD2, x004B002D, xB7B2B6B3, xCCC9CDC8, xCC82CDE5;
718 u32 x0055EEBB, x5A5AECE9, x0050ECA9, xC5CAC1CE, xC59A2D67;
719 u32 x00, x01, x10, x11, x20, x21, x30, x31;
724 x11115555 = a1 & x3333FFFF;
725 x22DD6699 = x33CC33CC ^ x11115555;
726 x22DD9966 = a6 ^ x22DD6699;
727 x00220099 = a5 & ~x22DD9966;
729 x00551144 = a1 & x22DD9966;
730 x33662277 = a2 ^ x00551144;
732 x7B7E7A7F = x33662277 | x5A5A5A5A;
733 x59A31CE6 = x22DD6699 ^ x7B7E7A7F;
735 x09030C06 = a3 & x59A31CE6;
736 x09030000 = x09030C06 & ~a6;
737 x336622FF = x00220099 | x33662277;
738 x3A6522FF = x09030000 ^ x336622FF;
739 x30 = x3A6522FF & a4;
740 x31 = x30 ^ x59A31CE6;
743 x484D494C = a2 ^ x7B7E7A7F;
744 x0000B6B3 = a6 & ~x484D494C;
745 x0F0FB9BC = a3 ^ x0000B6B3;
746 x00FC00F9 = a5 & ~x09030C06;
747 x0FFFB9FD = x0F0FB9BC | x00FC00F9;
749 x5DF75DF7 = a1 | x59A31CE6;
750 x116600F7 = x336622FF & x5DF75DF7;
751 x1E69B94B = x0F0FB9BC ^ x116600F7;
752 x1668B94B = x1E69B94B & ~x09030000;
753 x20 = x00220099 | a4;
754 x21 = x20 ^ x1668B94B;
757 x7B7B7B7B = a2 | x5A5A5A5A;
758 x411E5984 = x3A6522FF ^ x7B7B7B7B;
759 x1FFFFDFD = x11115555 | x0FFFB9FD;
760 x5EE1A479 = x411E5984 ^ x1FFFFDFD;
762 x3CB4DFD2 = x22DD6699 ^ x1E69B94B;
763 x004B002D = a5 & ~x3CB4DFD2;
764 xB7B2B6B3 = ~x484D494C;
765 xCCC9CDC8 = x7B7B7B7B ^ xB7B2B6B3;
766 xCC82CDE5 = x004B002D ^ xCCC9CDC8;
767 x10 = xCC82CDE5 & ~a4;
768 x11 = x10 ^ x5EE1A479;
771 x0055EEBB = a6 ^ x00551144;
772 x5A5AECE9 = a1 ^ x0F0FB9BC;
773 x0050ECA9 = x0055EEBB & x5A5AECE9;
774 xC5CAC1CE = x09030C06 ^ xCCC9CDC8;
775 xC59A2D67 = x0050ECA9 ^ xC5CAC1CE;
776 x00 = x0FFFB9FD & ~a4;
777 x01 = x00 ^ xC59A2D67;
781 __device__ static void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
783 u32 x0FF00FF0, x3CC33CC3, x00003CC3, x0F000F00, x5A555A55, x00001841;
784 u32 x00000F00, x33333C33, x7B777E77, x0FF0F00F, x74878E78;
785 u32 x003C003C, x5A7D5A7D, x333300F0, x694E5A8D;
786 u32 x0FF0CCCC, x000F0303, x5A505854, x33CC000F, x699C585B;
787 u32 x7F878F78, x21101013, x7F979F7B, x30030CC0, x4F9493BB;
788 u32 x6F9CDBFB, x0000DBFB, x00005151, x26DAC936, x26DA9867;
789 u32 x27DA9877, x27DA438C, x2625C9C9, x27FFCBCD;
790 u32 x27FF1036, x27FF103E, xB06B6C44, x97947C7A;
791 u32 x00, x01, x10, x11, x20, x21, x30, x31;
794 x3CC33CC3 = a3 ^ x0FF00FF0;
795 x00003CC3 = a6 & x3CC33CC3;
796 x0F000F00 = a4 & x0FF00FF0;
797 x5A555A55 = a2 ^ x0F000F00;
798 x00001841 = x00003CC3 & x5A555A55;
800 x00000F00 = a6 & x0F000F00;
801 x33333C33 = a3 ^ x00000F00;
802 x7B777E77 = x5A555A55 | x33333C33;
803 x0FF0F00F = a6 ^ x0FF00FF0;
804 x74878E78 = x7B777E77 ^ x0FF0F00F;
805 x30 = a1 & ~x00001841;
806 x31 = x30 ^ x74878E78;
809 x003C003C = a5 & ~x3CC33CC3;
810 x5A7D5A7D = x5A555A55 | x003C003C;
811 x333300F0 = x00003CC3 ^ x33333C33;
812 x694E5A8D = x5A7D5A7D ^ x333300F0;
814 x0FF0CCCC = x00003CC3 ^ x0FF0F00F;
815 x000F0303 = a4 & ~x0FF0CCCC;
816 x5A505854 = x5A555A55 & ~x000F0303;
817 x33CC000F = a5 ^ x333300F0;
818 x699C585B = x5A505854 ^ x33CC000F;
820 x7F878F78 = x0F000F00 | x74878E78;
821 x21101013 = a3 & x699C585B;
822 x7F979F7B = x7F878F78 | x21101013;
823 x30030CC0 = x3CC33CC3 & ~x0FF0F00F;
824 x4F9493BB = x7F979F7B ^ x30030CC0;
825 x00 = x4F9493BB & ~a1;
826 x01 = x00 ^ x694E5A8D;
829 x6F9CDBFB = x699C585B | x4F9493BB;
830 x0000DBFB = a6 & x6F9CDBFB;
831 x00005151 = a2 & x0000DBFB;
832 x26DAC936 = x694E5A8D ^ x4F9493BB;
833 x26DA9867 = x00005151 ^ x26DAC936;
835 x27DA9877 = x21101013 | x26DA9867;
836 x27DA438C = x0000DBFB ^ x27DA9877;
837 x2625C9C9 = a5 ^ x26DAC936;
838 x27FFCBCD = x27DA438C | x2625C9C9;
839 x20 = x27FFCBCD & a1;
840 x21 = x20 ^ x699C585B;
843 x27FF1036 = x0000DBFB ^ x27FFCBCD;
844 x27FF103E = x003C003C | x27FF1036;
845 xB06B6C44 = ~x4F9493BB;
846 x97947C7A = x27FF103E ^ xB06B6C44;
847 x10 = x97947C7A & ~a1;
848 x11 = x10 ^ x26DA9867;
852 __device__ static void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
854 u32 x0C0C0C0C, x0000F0F0, x00FFF00F, x00555005, x00515001;
855 u32 x33000330, x77555775, x30303030, x3030CFCF, x30104745, x30555745;
856 u32 xFF000FF0, xCF1048B5, x080A080A, xC71A40BF, xCB164CB3;
857 u32 x9E4319E6, x000019E6, xF429738C, xF4296A6A, xC729695A;
858 u32 xC47C3D2F, xF77F3F3F, x9E43E619, x693CD926;
859 u32 xF719A695, xF4FF73FF, x03E6D56A, x56B3803F;
860 u32 xF700A600, x61008000, x03B7856B, x62B7056B;
861 u32 x00, x01, x10, x11, x20, x21, x30, x31;
863 x0C0C0C0C = a3 & ~a2;
864 x0000F0F0 = a5 & ~a3;
865 x00FFF00F = a4 ^ x0000F0F0;
866 x00555005 = a1 & x00FFF00F;
867 x00515001 = x00555005 & ~x0C0C0C0C;
869 x33000330 = a2 & ~x00FFF00F;
870 x77555775 = a1 | x33000330;
871 x30303030 = a2 & ~a3;
872 x3030CFCF = a5 ^ x30303030;
873 x30104745 = x77555775 & x3030CFCF;
874 x30555745 = x00555005 | x30104745;
876 xFF000FF0 = ~x00FFF00F;
877 xCF1048B5 = x30104745 ^ xFF000FF0;
878 x080A080A = a3 & ~x77555775;
879 xC71A40BF = xCF1048B5 ^ x080A080A;
880 xCB164CB3 = x0C0C0C0C ^ xC71A40BF;
881 x10 = x00515001 | a6;
882 x11 = x10 ^ xCB164CB3;
885 x9E4319E6 = a1 ^ xCB164CB3;
886 x000019E6 = a5 & x9E4319E6;
887 xF429738C = a2 ^ xC71A40BF;
888 xF4296A6A = x000019E6 ^ xF429738C;
889 xC729695A = x33000330 ^ xF4296A6A;
891 xC47C3D2F = x30555745 ^ xF4296A6A;
892 xF77F3F3F = a2 | xC47C3D2F;
893 x9E43E619 = a5 ^ x9E4319E6;
894 x693CD926 = xF77F3F3F ^ x9E43E619;
895 x20 = x30555745 & a6;
896 x21 = x20 ^ x693CD926;
899 xF719A695 = x3030CFCF ^ xC729695A;
900 xF4FF73FF = a4 | xF429738C;
901 x03E6D56A = xF719A695 ^ xF4FF73FF;
902 x56B3803F = a1 ^ x03E6D56A;
903 x30 = x56B3803F & a6;
904 x31 = x30 ^ xC729695A;
907 xF700A600 = xF719A695 & ~a4;
908 x61008000 = x693CD926 & xF700A600;
909 x03B7856B = x00515001 ^ x03E6D56A;
910 x62B7056B = x61008000 ^ x03B7856B;
911 x00 = x62B7056B | a6;
912 x01 = x00 ^ xC729695A;
918 #define KEYSET00 { k00 = K08; k01 = K44; k02 = K29; k03 = K52; k04 = K42; k05 = K14; k06 = K28; k07 = K49; k08 = K01; k09 = K07; k10 = K16; k11 = K36; k12 = K02; k13 = K30; k14 = K22; k15 = K21; k16 = K38; k17 = K50; k18 = K51; k19 = K00; k20 = K31; k21 = K23; k22 = K15; k23 = K35; k24 = K19; k25 = K24; k26 = K34; k27 = K47; k28 = K32; k29 = K03; k30 = K41; k31 = K26; k32 = K04; k33 = K46; k34 = K20; k35 = K25; k36 = K53; k37 = K18; k38 = K33; k39 = K55; k40 = K13; k41 = K17; k42 = K39; k43 = K12; k44 = K11; k45 = K54; k46 = K48; k47 = K27; }
919 #define KEYSET10 { k00 = K49; k01 = K28; k02 = K45; k03 = K36; k04 = K01; k05 = K30; k06 = K44; k07 = K08; k08 = K42; k09 = K23; k10 = K00; k11 = K52; k12 = K43; k13 = K14; k14 = K38; k15 = K37; k16 = K22; k17 = K09; k18 = K35; k19 = K16; k20 = K15; k21 = K07; k22 = K31; k23 = K51; k24 = K03; k25 = K40; k26 = K46; k27 = K04; k28 = K20; k29 = K19; k30 = K53; k31 = K10; k32 = K47; k33 = K34; k34 = K32; k35 = K13; k36 = K41; k37 = K06; k38 = K17; k39 = K12; k40 = K25; k41 = K33; k42 = K27; k43 = K55; k44 = K54; k45 = K11; k46 = K05; k47 = K39; }
920 #define KEYSET01 { k00 = K01; k01 = K37; k02 = K22; k03 = K45; k04 = K35; k05 = K07; k06 = K21; k07 = K42; k08 = K51; k09 = K00; k10 = K09; k11 = K29; k12 = K52; k13 = K23; k14 = K15; k15 = K14; k16 = K31; k17 = K43; k18 = K44; k19 = K50; k20 = K49; k21 = K16; k22 = K08; k23 = K28; k24 = K12; k25 = K17; k26 = K27; k27 = K40; k28 = K25; k29 = K55; k30 = K34; k31 = K19; k32 = K24; k33 = K39; k34 = K13; k35 = K18; k36 = K46; k37 = K11; k38 = K26; k39 = K48; k40 = K06; k41 = K10; k42 = K32; k43 = K05; k44 = K04; k45 = K47; k46 = K41; k47 = K20; }
921 #define KEYSET11 { k00 = K35; k01 = K14; k02 = K31; k03 = K22; k04 = K44; k05 = K16; k06 = K30; k07 = K51; k08 = K28; k09 = K09; k10 = K43; k11 = K38; k12 = K29; k13 = K00; k14 = K49; k15 = K23; k16 = K08; k17 = K52; k18 = K21; k19 = K02; k20 = K01; k21 = K50; k22 = K42; k23 = K37; k24 = K48; k25 = K26; k26 = K32; k27 = K17; k28 = K06; k29 = K05; k30 = K39; k31 = K55; k32 = K33; k33 = K20; k34 = K18; k35 = K54; k36 = K27; k37 = K47; k38 = K03; k39 = K53; k40 = K11; k41 = K19; k42 = K13; k43 = K41; k44 = K40; k45 = K24; k46 = K46; k47 = K25; }
922 #define KEYSET02 { k00 = K44; k01 = K23; k02 = K08; k03 = K31; k04 = K21; k05 = K50; k06 = K07; k07 = K28; k08 = K37; k09 = K43; k10 = K52; k11 = K15; k12 = K38; k13 = K09; k14 = K01; k15 = K00; k16 = K42; k17 = K29; k18 = K30; k19 = K36; k20 = K35; k21 = K02; k22 = K51; k23 = K14; k24 = K53; k25 = K03; k26 = K13; k27 = K26; k28 = K11; k29 = K41; k30 = K20; k31 = K05; k32 = K10; k33 = K25; k34 = K54; k35 = K04; k36 = K32; k37 = K24; k38 = K12; k39 = K34; k40 = K47; k41 = K55; k42 = K18; k43 = K46; k44 = K17; k45 = K33; k46 = K27; k47 = K06; }
923 #define KEYSET12 { k00 = K21; k01 = K00; k02 = K42; k03 = K08; k04 = K30; k05 = K02; k06 = K16; k07 = K37; k08 = K14; k09 = K52; k10 = K29; k11 = K49; k12 = K15; k13 = K43; k14 = K35; k15 = K09; k16 = K51; k17 = K38; k18 = K07; k19 = K45; k20 = K44; k21 = K36; k22 = K28; k23 = K23; k24 = K34; k25 = K12; k26 = K18; k27 = K03; k28 = K47; k29 = K46; k30 = K25; k31 = K41; k32 = K19; k33 = K06; k34 = K04; k35 = K40; k36 = K13; k37 = K33; k38 = K48; k39 = K39; k40 = K24; k41 = K05; k42 = K54; k43 = K27; k44 = K26; k45 = K10; k46 = K32; k47 = K11; }
924 #define KEYSET03 { k00 = K30; k01 = K09; k02 = K51; k03 = K42; k04 = K07; k05 = K36; k06 = K50; k07 = K14; k08 = K23; k09 = K29; k10 = K38; k11 = K01; k12 = K49; k13 = K52; k14 = K44; k15 = K43; k16 = K28; k17 = K15; k18 = K16; k19 = K22; k20 = K21; k21 = K45; k22 = K37; k23 = K00; k24 = K39; k25 = K48; k26 = K54; k27 = K12; k28 = K24; k29 = K27; k30 = K06; k31 = K46; k32 = K55; k33 = K11; k34 = K40; k35 = K17; k36 = K18; k37 = K10; k38 = K53; k39 = K20; k40 = K33; k41 = K41; k42 = K04; k43 = K32; k44 = K03; k45 = K19; k46 = K13; k47 = K47; }
925 #define KEYSET13 { k00 = K07; k01 = K43; k02 = K28; k03 = K51; k04 = K16; k05 = K45; k06 = K02; k07 = K23; k08 = K00; k09 = K38; k10 = K15; k11 = K35; k12 = K01; k13 = K29; k14 = K21; k15 = K52; k16 = K37; k17 = K49; k18 = K50; k19 = K31; k20 = K30; k21 = K22; k22 = K14; k23 = K09; k24 = K20; k25 = K53; k26 = K04; k27 = K48; k28 = K33; k29 = K32; k30 = K11; k31 = K27; k32 = K05; k33 = K47; k34 = K17; k35 = K26; k36 = K54; k37 = K19; k38 = K34; k39 = K25; k40 = K10; k41 = K46; k42 = K40; k43 = K13; k44 = K12; k45 = K55; k46 = K18; k47 = K24; }
926 #define KEYSET04 { k00 = K16; k01 = K52; k02 = K37; k03 = K28; k04 = K50; k05 = K22; k06 = K36; k07 = K00; k08 = K09; k09 = K15; k10 = K49; k11 = K44; k12 = K35; k13 = K38; k14 = K30; k15 = K29; k16 = K14; k17 = K01; k18 = K02; k19 = K08; k20 = K07; k21 = K31; k22 = K23; k23 = K43; k24 = K25; k25 = K34; k26 = K40; k27 = K53; k28 = K10; k29 = K13; k30 = K47; k31 = K32; k32 = K41; k33 = K24; k34 = K26; k35 = K03; k36 = K04; k37 = K55; k38 = K39; k39 = K06; k40 = K19; k41 = K27; k42 = K17; k43 = K18; k44 = K48; k45 = K05; k46 = K54; k47 = K33; }
927 #define KEYSET14 { k00 = K50; k01 = K29; k02 = K14; k03 = K37; k04 = K02; k05 = K31; k06 = K45; k07 = K09; k08 = K43; k09 = K49; k10 = K01; k11 = K21; k12 = K44; k13 = K15; k14 = K07; k15 = K38; k16 = K23; k17 = K35; k18 = K36; k19 = K42; k20 = K16; k21 = K08; k22 = K00; k23 = K52; k24 = K06; k25 = K39; k26 = K17; k27 = K34; k28 = K19; k29 = K18; k30 = K24; k31 = K13; k32 = K46; k33 = K33; k34 = K03; k35 = K12; k36 = K40; k37 = K05; k38 = K20; k39 = K11; k40 = K55; k41 = K32; k42 = K26; k43 = K54; k44 = K53; k45 = K41; k46 = K04; k47 = K10; }
928 #define KEYSET05 { k00 = K02; k01 = K38; k02 = K23; k03 = K14; k04 = K36; k05 = K08; k06 = K22; k07 = K43; k08 = K52; k09 = K01; k10 = K35; k11 = K30; k12 = K21; k13 = K49; k14 = K16; k15 = K15; k16 = K00; k17 = K44; k18 = K45; k19 = K51; k20 = K50; k21 = K42; k22 = K09; k23 = K29; k24 = K11; k25 = K20; k26 = K26; k27 = K39; k28 = K55; k29 = K54; k30 = K33; k31 = K18; k32 = K27; k33 = K10; k34 = K12; k35 = K48; k36 = K17; k37 = K41; k38 = K25; k39 = K47; k40 = K05; k41 = K13; k42 = K03; k43 = K04; k44 = K34; k45 = K46; k46 = K40; k47 = K19; }
929 #define KEYSET15 { k00 = K36; k01 = K15; k02 = K00; k03 = K23; k04 = K45; k05 = K42; k06 = K31; k07 = K52; k08 = K29; k09 = K35; k10 = K44; k11 = K07; k12 = K30; k13 = K01; k14 = K50; k15 = K49; k16 = K09; k17 = K21; k18 = K22; k19 = K28; k20 = K02; k21 = K51; k22 = K43; k23 = K38; k24 = K47; k25 = K25; k26 = K03; k27 = K20; k28 = K05; k29 = K04; k30 = K10; k31 = K54; k32 = K32; k33 = K19; k34 = K48; k35 = K53; k36 = K26; k37 = K46; k38 = K06; k39 = K24; k40 = K41; k41 = K18; k42 = K12; k43 = K40; k44 = K39; k45 = K27; k46 = K17; k47 = K55; }
930 #define KEYSET06 { k00 = K45; k01 = K49; k02 = K09; k03 = K00; k04 = K22; k05 = K51; k06 = K08; k07 = K29; k08 = K38; k09 = K44; k10 = K21; k11 = K16; k12 = K07; k13 = K35; k14 = K02; k15 = K01; k16 = K43; k17 = K30; k18 = K31; k19 = K37; k20 = K36; k21 = K28; k22 = K52; k23 = K15; k24 = K24; k25 = K06; k26 = K12; k27 = K25; k28 = K41; k29 = K40; k30 = K19; k31 = K04; k32 = K13; k33 = K55; k34 = K53; k35 = K34; k36 = K03; k37 = K27; k38 = K11; k39 = K33; k40 = K46; k41 = K54; k42 = K48; k43 = K17; k44 = K20; k45 = K32; k46 = K26; k47 = K05; }
931 #define KEYSET16 { k00 = K22; k01 = K01; k02 = K43; k03 = K09; k04 = K31; k05 = K28; k06 = K42; k07 = K38; k08 = K15; k09 = K21; k10 = K30; k11 = K50; k12 = K16; k13 = K44; k14 = K36; k15 = K35; k16 = K52; k17 = K07; k18 = K08; k19 = K14; k20 = K45; k21 = K37; k22 = K29; k23 = K49; k24 = K33; k25 = K11; k26 = K48; k27 = K06; k28 = K46; k29 = K17; k30 = K55; k31 = K40; k32 = K18; k33 = K05; k34 = K34; k35 = K39; k36 = K12; k37 = K32; k38 = K47; k39 = K10; k40 = K27; k41 = K04; k42 = K53; k43 = K26; k44 = K25; k45 = K13; k46 = K03; k47 = K41; }
932 #define KEYSET07 { k00 = K31; k01 = K35; k02 = K52; k03 = K43; k04 = K08; k05 = K37; k06 = K51; k07 = K15; k08 = K49; k09 = K30; k10 = K07; k11 = K02; k12 = K50; k13 = K21; k14 = K45; k15 = K44; k16 = K29; k17 = K16; k18 = K42; k19 = K23; k20 = K22; k21 = K14; k22 = K38; k23 = K01; k24 = K10; k25 = K47; k26 = K53; k27 = K11; k28 = K27; k29 = K26; k30 = K05; k31 = K17; k32 = K54; k33 = K41; k34 = K39; k35 = K20; k36 = K48; k37 = K13; k38 = K24; k39 = K19; k40 = K32; k41 = K40; k42 = K34; k43 = K03; k44 = K06; k45 = K18; k46 = K12; k47 = K46; }
933 #define KEYSET17 { k00 = K15; k01 = K51; k02 = K36; k03 = K02; k04 = K49; k05 = K21; k06 = K35; k07 = K31; k08 = K08; k09 = K14; k10 = K23; k11 = K43; k12 = K09; k13 = K37; k14 = K29; k15 = K28; k16 = K45; k17 = K00; k18 = K01; k19 = K07; k20 = K38; k21 = K30; k22 = K22; k23 = K42; k24 = K26; k25 = K04; k26 = K41; k27 = K54; k28 = K39; k29 = K10; k30 = K48; k31 = K33; k32 = K11; k33 = K53; k34 = K27; k35 = K32; k36 = K05; k37 = K25; k38 = K40; k39 = K03; k40 = K20; k41 = K24; k42 = K46; k43 = K19; k44 = K18; k45 = K06; k46 = K55; k47 = K34; }
935 __device__ static void DES (const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 &D00, u32 &D01, u32 &D02, u32 &D03, u32 &D04, u32 &D05, u32 &D06, u32 &D07, u32 &D08, u32 &D09, u32 &D10, u32 &D11, u32 &D12, u32 &D13, u32 &D14, u32 &D15, u32 &D16, u32 &D17, u32 &D18, u32 &D19, u32 &D20, u32 &D21, u32 &D22, u32 &D23, u32 &D24, u32 &D25, u32 &D26, u32 &D27, u32 &D28, u32 &D29, u32 &D30, u32 &D31, u32 &D32, u32 &D33, u32 &D34, u32 &D35, u32 &D36, u32 &D37, u32 &D38, u32 &D39, u32 &D40, u32 &D41, u32 &D42, u32 &D43, u32 &D44, u32 &D45, u32 &D46, u32 &D47, u32 &D48, u32 &D49, u32 &D50, u32 &D51, u32 &D52, u32 &D53, u32 &D54, u32 &D55, u32 &D56, u32 &D57, u32 &D58, u32 &D59, u32 &D60, u32 &D61, u32 &D62, u32 &D63)
937 KXX_DECL u32 k00, k01, k02, k03, k04, k05;
938 KXX_DECL u32 k06, k07, k08, k09, k10, k11;
939 KXX_DECL u32 k12, k13, k14, k15, k16, k17;
940 KXX_DECL u32 k18, k19, k20, k21, k22, k23;
941 KXX_DECL u32 k24, k25, k26, k27, k28, k29;
942 KXX_DECL u32 k30, k31, k32, k33, k34, k35;
943 KXX_DECL u32 k36, k37, k38, k39, k40, k41;
944 KXX_DECL u32 k42, k43, k44, k45, k46, k47;
948 #if __CUDA_ARCH__ >= 500
954 for (u32 i = 0; i < 2; i++)
956 if (i) KEYSET10 else KEYSET00
958 s1(D63 ^ k00, D32 ^ k01, D33 ^ k02, D34 ^ k03, D35 ^ k04, D36 ^ k05, &D08, &D16, &D22, &D30);
959 s2(D35 ^ k06, D36 ^ k07, D37 ^ k08, D38 ^ k09, D39 ^ k10, D40 ^ k11, &D12, &D27, &D01, &D17);
960 s3(D39 ^ k12, D40 ^ k13, D41 ^ k14, D42 ^ k15, D43 ^ k16, D44 ^ k17, &D23, &D15, &D29, &D05);
961 s4(D43 ^ k18, D44 ^ k19, D45 ^ k20, D46 ^ k21, D47 ^ k22, D48 ^ k23, &D25, &D19, &D09, &D00);
962 s5(D47 ^ k24, D48 ^ k25, D49 ^ k26, D50 ^ k27, D51 ^ k28, D52 ^ k29, &D07, &D13, &D24, &D02);
963 s6(D51 ^ k30, D52 ^ k31, D53 ^ k32, D54 ^ k33, D55 ^ k34, D56 ^ k35, &D03, &D28, &D10, &D18);
964 s7(D55 ^ k36, D56 ^ k37, D57 ^ k38, D58 ^ k39, D59 ^ k40, D60 ^ k41, &D31, &D11, &D21, &D06);
965 s8(D59 ^ k42, D60 ^ k43, D61 ^ k44, D62 ^ k45, D63 ^ k46, D32 ^ k47, &D04, &D26, &D14, &D20);
967 if (i) KEYSET11 else KEYSET01
969 s1(D31 ^ k00, D00 ^ k01, D01 ^ k02, D02 ^ k03, D03 ^ k04, D04 ^ k05, &D40, &D48, &D54, &D62);
970 s2(D03 ^ k06, D04 ^ k07, D05 ^ k08, D06 ^ k09, D07 ^ k10, D08 ^ k11, &D44, &D59, &D33, &D49);
971 s3(D07 ^ k12, D08 ^ k13, D09 ^ k14, D10 ^ k15, D11 ^ k16, D12 ^ k17, &D55, &D47, &D61, &D37);
972 s4(D11 ^ k18, D12 ^ k19, D13 ^ k20, D14 ^ k21, D15 ^ k22, D16 ^ k23, &D57, &D51, &D41, &D32);
973 s5(D15 ^ k24, D16 ^ k25, D17 ^ k26, D18 ^ k27, D19 ^ k28, D20 ^ k29, &D39, &D45, &D56, &D34);
974 s6(D19 ^ k30, D20 ^ k31, D21 ^ k32, D22 ^ k33, D23 ^ k34, D24 ^ k35, &D35, &D60, &D42, &D50);
975 s7(D23 ^ k36, D24 ^ k37, D25 ^ k38, D26 ^ k39, D27 ^ k40, D28 ^ k41, &D63, &D43, &D53, &D38);
976 s8(D27 ^ k42, D28 ^ k43, D29 ^ k44, D30 ^ k45, D31 ^ k46, D00 ^ k47, &D36, &D58, &D46, &D52);
978 if (i) KEYSET12 else KEYSET02
980 s1(D63 ^ k00, D32 ^ k01, D33 ^ k02, D34 ^ k03, D35 ^ k04, D36 ^ k05, &D08, &D16, &D22, &D30);
981 s2(D35 ^ k06, D36 ^ k07, D37 ^ k08, D38 ^ k09, D39 ^ k10, D40 ^ k11, &D12, &D27, &D01, &D17);
982 s3(D39 ^ k12, D40 ^ k13, D41 ^ k14, D42 ^ k15, D43 ^ k16, D44 ^ k17, &D23, &D15, &D29, &D05);
983 s4(D43 ^ k18, D44 ^ k19, D45 ^ k20, D46 ^ k21, D47 ^ k22, D48 ^ k23, &D25, &D19, &D09, &D00);
984 s5(D47 ^ k24, D48 ^ k25, D49 ^ k26, D50 ^ k27, D51 ^ k28, D52 ^ k29, &D07, &D13, &D24, &D02);
985 s6(D51 ^ k30, D52 ^ k31, D53 ^ k32, D54 ^ k33, D55 ^ k34, D56 ^ k35, &D03, &D28, &D10, &D18);
986 s7(D55 ^ k36, D56 ^ k37, D57 ^ k38, D58 ^ k39, D59 ^ k40, D60 ^ k41, &D31, &D11, &D21, &D06);
987 s8(D59 ^ k42, D60 ^ k43, D61 ^ k44, D62 ^ k45, D63 ^ k46, D32 ^ k47, &D04, &D26, &D14, &D20);
989 if (i) KEYSET13 else KEYSET03
991 s1(D31 ^ k00, D00 ^ k01, D01 ^ k02, D02 ^ k03, D03 ^ k04, D04 ^ k05, &D40, &D48, &D54, &D62);
992 s2(D03 ^ k06, D04 ^ k07, D05 ^ k08, D06 ^ k09, D07 ^ k10, D08 ^ k11, &D44, &D59, &D33, &D49);
993 s3(D07 ^ k12, D08 ^ k13, D09 ^ k14, D10 ^ k15, D11 ^ k16, D12 ^ k17, &D55, &D47, &D61, &D37);
994 s4(D11 ^ k18, D12 ^ k19, D13 ^ k20, D14 ^ k21, D15 ^ k22, D16 ^ k23, &D57, &D51, &D41, &D32);
995 s5(D15 ^ k24, D16 ^ k25, D17 ^ k26, D18 ^ k27, D19 ^ k28, D20 ^ k29, &D39, &D45, &D56, &D34);
996 s6(D19 ^ k30, D20 ^ k31, D21 ^ k32, D22 ^ k33, D23 ^ k34, D24 ^ k35, &D35, &D60, &D42, &D50);
997 s7(D23 ^ k36, D24 ^ k37, D25 ^ k38, D26 ^ k39, D27 ^ k40, D28 ^ k41, &D63, &D43, &D53, &D38);
998 s8(D27 ^ k42, D28 ^ k43, D29 ^ k44, D30 ^ k45, D31 ^ k46, D00 ^ k47, &D36, &D58, &D46, &D52);
1000 if (i) KEYSET14 else KEYSET04
1002 s1(D63 ^ k00, D32 ^ k01, D33 ^ k02, D34 ^ k03, D35 ^ k04, D36 ^ k05, &D08, &D16, &D22, &D30);
1003 s2(D35 ^ k06, D36 ^ k07, D37 ^ k08, D38 ^ k09, D39 ^ k10, D40 ^ k11, &D12, &D27, &D01, &D17);
1004 s3(D39 ^ k12, D40 ^ k13, D41 ^ k14, D42 ^ k15, D43 ^ k16, D44 ^ k17, &D23, &D15, &D29, &D05);
1005 s4(D43 ^ k18, D44 ^ k19, D45 ^ k20, D46 ^ k21, D47 ^ k22, D48 ^ k23, &D25, &D19, &D09, &D00);
1006 s5(D47 ^ k24, D48 ^ k25, D49 ^ k26, D50 ^ k27, D51 ^ k28, D52 ^ k29, &D07, &D13, &D24, &D02);
1007 s6(D51 ^ k30, D52 ^ k31, D53 ^ k32, D54 ^ k33, D55 ^ k34, D56 ^ k35, &D03, &D28, &D10, &D18);
1008 s7(D55 ^ k36, D56 ^ k37, D57 ^ k38, D58 ^ k39, D59 ^ k40, D60 ^ k41, &D31, &D11, &D21, &D06);
1009 s8(D59 ^ k42, D60 ^ k43, D61 ^ k44, D62 ^ k45, D63 ^ k46, D32 ^ k47, &D04, &D26, &D14, &D20);
1011 if (i) KEYSET15 else KEYSET05
1013 s1(D31 ^ k00, D00 ^ k01, D01 ^ k02, D02 ^ k03, D03 ^ k04, D04 ^ k05, &D40, &D48, &D54, &D62);
1014 s2(D03 ^ k06, D04 ^ k07, D05 ^ k08, D06 ^ k09, D07 ^ k10, D08 ^ k11, &D44, &D59, &D33, &D49);
1015 s3(D07 ^ k12, D08 ^ k13, D09 ^ k14, D10 ^ k15, D11 ^ k16, D12 ^ k17, &D55, &D47, &D61, &D37);
1016 s4(D11 ^ k18, D12 ^ k19, D13 ^ k20, D14 ^ k21, D15 ^ k22, D16 ^ k23, &D57, &D51, &D41, &D32);
1017 s5(D15 ^ k24, D16 ^ k25, D17 ^ k26, D18 ^ k27, D19 ^ k28, D20 ^ k29, &D39, &D45, &D56, &D34);
1018 s6(D19 ^ k30, D20 ^ k31, D21 ^ k32, D22 ^ k33, D23 ^ k34, D24 ^ k35, &D35, &D60, &D42, &D50);
1019 s7(D23 ^ k36, D24 ^ k37, D25 ^ k38, D26 ^ k39, D27 ^ k40, D28 ^ k41, &D63, &D43, &D53, &D38);
1020 s8(D27 ^ k42, D28 ^ k43, D29 ^ k44, D30 ^ k45, D31 ^ k46, D00 ^ k47, &D36, &D58, &D46, &D52);
1022 if (i) KEYSET16 else KEYSET06
1024 s1(D63 ^ k00, D32 ^ k01, D33 ^ k02, D34 ^ k03, D35 ^ k04, D36 ^ k05, &D08, &D16, &D22, &D30);
1025 s2(D35 ^ k06, D36 ^ k07, D37 ^ k08, D38 ^ k09, D39 ^ k10, D40 ^ k11, &D12, &D27, &D01, &D17);
1026 s3(D39 ^ k12, D40 ^ k13, D41 ^ k14, D42 ^ k15, D43 ^ k16, D44 ^ k17, &D23, &D15, &D29, &D05);
1027 s4(D43 ^ k18, D44 ^ k19, D45 ^ k20, D46 ^ k21, D47 ^ k22, D48 ^ k23, &D25, &D19, &D09, &D00);
1028 s5(D47 ^ k24, D48 ^ k25, D49 ^ k26, D50 ^ k27, D51 ^ k28, D52 ^ k29, &D07, &D13, &D24, &D02);
1029 s6(D51 ^ k30, D52 ^ k31, D53 ^ k32, D54 ^ k33, D55 ^ k34, D56 ^ k35, &D03, &D28, &D10, &D18);
1030 s7(D55 ^ k36, D56 ^ k37, D57 ^ k38, D58 ^ k39, D59 ^ k40, D60 ^ k41, &D31, &D11, &D21, &D06);
1031 s8(D59 ^ k42, D60 ^ k43, D61 ^ k44, D62 ^ k45, D63 ^ k46, D32 ^ k47, &D04, &D26, &D14, &D20);
1033 if (i) KEYSET17 else KEYSET07
1035 s1(D31 ^ k00, D00 ^ k01, D01 ^ k02, D02 ^ k03, D03 ^ k04, D04 ^ k05, &D40, &D48, &D54, &D62);
1036 s2(D03 ^ k06, D04 ^ k07, D05 ^ k08, D06 ^ k09, D07 ^ k10, D08 ^ k11, &D44, &D59, &D33, &D49);
1037 s3(D07 ^ k12, D08 ^ k13, D09 ^ k14, D10 ^ k15, D11 ^ k16, D12 ^ k17, &D55, &D47, &D61, &D37);
1038 s4(D11 ^ k18, D12 ^ k19, D13 ^ k20, D14 ^ k21, D15 ^ k22, D16 ^ k23, &D57, &D51, &D41, &D32);
1039 s5(D15 ^ k24, D16 ^ k25, D17 ^ k26, D18 ^ k27, D19 ^ k28, D20 ^ k29, &D39, &D45, &D56, &D34);
1040 s6(D19 ^ k30, D20 ^ k31, D21 ^ k32, D22 ^ k33, D23 ^ k34, D24 ^ k35, &D35, &D60, &D42, &D50);
1041 s7(D23 ^ k36, D24 ^ k37, D25 ^ k38, D26 ^ k39, D27 ^ k40, D28 ^ k41, &D63, &D43, &D53, &D38);
1042 s8(D27 ^ k42, D28 ^ k43, D29 ^ k44, D30 ^ k45, D31 ^ k46, D00 ^ k47, &D36, &D58, &D46, &D52);
1046 __device__ static void transpose32c (u32 data[32])
1048 #define swap(x,y,j,m) \
1049 t = ((x) ^ ((y) >> (j))) & (m); \
1051 (y) = (y) ^ (t << (j));
1055 swap (data[ 0], data[16], 16, 0x0000ffff);
1056 swap (data[ 1], data[17], 16, 0x0000ffff);
1057 swap (data[ 2], data[18], 16, 0x0000ffff);
1058 swap (data[ 3], data[19], 16, 0x0000ffff);
1059 swap (data[ 4], data[20], 16, 0x0000ffff);
1060 swap (data[ 5], data[21], 16, 0x0000ffff);
1061 swap (data[ 6], data[22], 16, 0x0000ffff);
1062 swap (data[ 7], data[23], 16, 0x0000ffff);
1063 swap (data[ 8], data[24], 16, 0x0000ffff);
1064 swap (data[ 9], data[25], 16, 0x0000ffff);
1065 swap (data[10], data[26], 16, 0x0000ffff);
1066 swap (data[11], data[27], 16, 0x0000ffff);
1067 swap (data[12], data[28], 16, 0x0000ffff);
1068 swap (data[13], data[29], 16, 0x0000ffff);
1069 swap (data[14], data[30], 16, 0x0000ffff);
1070 swap (data[15], data[31], 16, 0x0000ffff);
1071 swap (data[ 0], data[ 8], 8, 0x00ff00ff);
1072 swap (data[ 1], data[ 9], 8, 0x00ff00ff);
1073 swap (data[ 2], data[10], 8, 0x00ff00ff);
1074 swap (data[ 3], data[11], 8, 0x00ff00ff);
1075 swap (data[ 4], data[12], 8, 0x00ff00ff);
1076 swap (data[ 5], data[13], 8, 0x00ff00ff);
1077 swap (data[ 6], data[14], 8, 0x00ff00ff);
1078 swap (data[ 7], data[15], 8, 0x00ff00ff);
1079 swap (data[ 0], data[ 4], 4, 0x0f0f0f0f);
1080 swap (data[ 1], data[ 5], 4, 0x0f0f0f0f);
1081 swap (data[ 2], data[ 6], 4, 0x0f0f0f0f);
1082 swap (data[ 3], data[ 7], 4, 0x0f0f0f0f);
1083 swap (data[ 0], data[ 2], 2, 0x33333333);
1084 swap (data[ 1], data[ 3], 2, 0x33333333);
1085 swap (data[ 0], data[ 1], 1, 0x55555555);
1086 swap (data[ 2], data[ 3], 1, 0x55555555);
1087 swap (data[ 4], data[ 6], 2, 0x33333333);
1088 swap (data[ 5], data[ 7], 2, 0x33333333);
1089 swap (data[ 4], data[ 5], 1, 0x55555555);
1090 swap (data[ 6], data[ 7], 1, 0x55555555);
1091 swap (data[ 8], data[12], 4, 0x0f0f0f0f);
1092 swap (data[ 9], data[13], 4, 0x0f0f0f0f);
1093 swap (data[10], data[14], 4, 0x0f0f0f0f);
1094 swap (data[11], data[15], 4, 0x0f0f0f0f);
1095 swap (data[ 8], data[10], 2, 0x33333333);
1096 swap (data[ 9], data[11], 2, 0x33333333);
1097 swap (data[ 8], data[ 9], 1, 0x55555555);
1098 swap (data[10], data[11], 1, 0x55555555);
1099 swap (data[12], data[14], 2, 0x33333333);
1100 swap (data[13], data[15], 2, 0x33333333);
1101 swap (data[12], data[13], 1, 0x55555555);
1102 swap (data[14], data[15], 1, 0x55555555);
1103 swap (data[16], data[24], 8, 0x00ff00ff);
1104 swap (data[17], data[25], 8, 0x00ff00ff);
1105 swap (data[18], data[26], 8, 0x00ff00ff);
1106 swap (data[19], data[27], 8, 0x00ff00ff);
1107 swap (data[20], data[28], 8, 0x00ff00ff);
1108 swap (data[21], data[29], 8, 0x00ff00ff);
1109 swap (data[22], data[30], 8, 0x00ff00ff);
1110 swap (data[23], data[31], 8, 0x00ff00ff);
1111 swap (data[16], data[20], 4, 0x0f0f0f0f);
1112 swap (data[17], data[21], 4, 0x0f0f0f0f);
1113 swap (data[18], data[22], 4, 0x0f0f0f0f);
1114 swap (data[19], data[23], 4, 0x0f0f0f0f);
1115 swap (data[16], data[18], 2, 0x33333333);
1116 swap (data[17], data[19], 2, 0x33333333);
1117 swap (data[16], data[17], 1, 0x55555555);
1118 swap (data[18], data[19], 1, 0x55555555);
1119 swap (data[20], data[22], 2, 0x33333333);
1120 swap (data[21], data[23], 2, 0x33333333);
1121 swap (data[20], data[21], 1, 0x55555555);
1122 swap (data[22], data[23], 1, 0x55555555);
1123 swap (data[24], data[28], 4, 0x0f0f0f0f);
1124 swap (data[25], data[29], 4, 0x0f0f0f0f);
1125 swap (data[26], data[30], 4, 0x0f0f0f0f);
1126 swap (data[27], data[31], 4, 0x0f0f0f0f);
1127 swap (data[24], data[26], 2, 0x33333333);
1128 swap (data[25], data[27], 2, 0x33333333);
1129 swap (data[24], data[25], 1, 0x55555555);
1130 swap (data[26], data[27], 1, 0x55555555);
1131 swap (data[28], data[30], 2, 0x33333333);
1132 swap (data[29], data[31], 2, 0x33333333);
1133 swap (data[28], data[29], 1, 0x55555555);
1134 swap (data[30], data[31], 1, 0x55555555);
1137 __device__ static void m03000m (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
1144 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
1145 const u32 lid = threadIdx.x;
1151 const u32 w0s = pws[gid].i[0];
1152 const u32 w1s = pws[gid].i[1];
1154 const u32 K00 = -((w0s >> ( 0 + 7)) & 1);
1155 const u32 K01 = -((w0s >> ( 0 + 6)) & 1);
1156 const u32 K02 = -((w0s >> ( 0 + 5)) & 1);
1157 const u32 K03 = -((w0s >> ( 0 + 4)) & 1);
1158 const u32 K04 = -((w0s >> ( 0 + 3)) & 1);
1159 const u32 K05 = -((w0s >> ( 0 + 2)) & 1);
1160 const u32 K06 = -((w0s >> ( 0 + 1)) & 1);
1161 const u32 K07 = -((w0s >> ( 0 + 0)) & 1);
1162 const u32 K08 = -((w0s >> ( 8 + 7)) & 1);
1163 const u32 K09 = -((w0s >> ( 8 + 6)) & 1);
1164 const u32 K10 = -((w0s >> ( 8 + 5)) & 1);
1165 const u32 K11 = -((w0s >> ( 8 + 4)) & 1);
1166 const u32 K12 = -((w0s >> ( 8 + 3)) & 1);
1167 const u32 K13 = -((w0s >> ( 8 + 2)) & 1);
1168 const u32 K14 = -((w0s >> ( 8 + 1)) & 1);
1169 const u32 K15 = -((w0s >> ( 8 + 0)) & 1);
1170 const u32 K16 = -((w0s >> (16 + 7)) & 1);
1171 const u32 K17 = -((w0s >> (16 + 6)) & 1);
1172 const u32 K18 = -((w0s >> (16 + 5)) & 1);
1173 const u32 K19 = -((w0s >> (16 + 4)) & 1);
1174 const u32 K20 = -((w0s >> (16 + 3)) & 1);
1175 const u32 K21 = -((w0s >> (16 + 2)) & 1);
1176 const u32 K22 = -((w0s >> (16 + 1)) & 1);
1177 const u32 K23 = -((w0s >> (16 + 0)) & 1);
1178 const u32 K24 = -((w0s >> (24 + 7)) & 1);
1179 const u32 K25 = -((w0s >> (24 + 6)) & 1);
1180 const u32 K26 = -((w0s >> (24 + 5)) & 1);
1181 const u32 K27 = -((w0s >> (24 + 4)) & 1);
1182 const u32 K28 = -((w0s >> (24 + 3)) & 1);
1183 const u32 K29 = -((w0s >> (24 + 2)) & 1);
1184 const u32 K30 = -((w0s >> (24 + 1)) & 1);
1185 const u32 K31 = -((w0s >> (24 + 0)) & 1);
1186 const u32 K32 = -((w1s >> ( 0 + 7)) & 1);
1187 const u32 K33 = -((w1s >> ( 0 + 6)) & 1);
1188 const u32 K34 = -((w1s >> ( 0 + 5)) & 1);
1189 const u32 K35 = -((w1s >> ( 0 + 4)) & 1);
1190 const u32 K36 = -((w1s >> ( 0 + 3)) & 1);
1191 const u32 K37 = -((w1s >> ( 0 + 2)) & 1);
1192 const u32 K38 = -((w1s >> ( 0 + 1)) & 1);
1193 const u32 K39 = -((w1s >> ( 0 + 0)) & 1);
1194 const u32 K40 = -((w1s >> ( 8 + 7)) & 1);
1195 const u32 K41 = -((w1s >> ( 8 + 6)) & 1);
1196 const u32 K42 = -((w1s >> ( 8 + 5)) & 1);
1197 const u32 K43 = -((w1s >> ( 8 + 4)) & 1);
1198 const u32 K44 = -((w1s >> ( 8 + 3)) & 1);
1199 const u32 K45 = -((w1s >> ( 8 + 2)) & 1);
1200 const u32 K46 = -((w1s >> ( 8 + 1)) & 1);
1201 const u32 K47 = -((w1s >> ( 8 + 0)) & 1);
1202 const u32 K48 = -((w1s >> (16 + 7)) & 1);
1203 const u32 K49 = -((w1s >> (16 + 6)) & 1);
1204 const u32 K50 = -((w1s >> (16 + 5)) & 1);
1205 const u32 K51 = -((w1s >> (16 + 4)) & 1);
1206 const u32 K52 = -((w1s >> (16 + 3)) & 1);
1207 const u32 K53 = -((w1s >> (16 + 2)) & 1);
1208 const u32 K54 = -((w1s >> (16 + 1)) & 1);
1209 const u32 K55 = -((w1s >> (16 + 0)) & 1);
1215 const u32 bf_loops = bfs_cnt;
1217 for (u32 il_pos = 0, pc_pos = 0; il_pos < bf_loops; il_pos += 32, pc_pos++)
1252 k00 |= c_tm[pc_pos].b[ 0];
1253 k01 |= c_tm[pc_pos].b[ 1];
1254 k02 |= c_tm[pc_pos].b[ 2];
1255 k03 |= c_tm[pc_pos].b[ 3];
1256 k04 |= c_tm[pc_pos].b[ 4];
1257 k05 |= c_tm[pc_pos].b[ 5];
1258 k06 |= c_tm[pc_pos].b[ 6];
1259 k07 |= c_tm[pc_pos].b[ 7];
1260 k08 |= c_tm[pc_pos].b[ 8];
1261 k09 |= c_tm[pc_pos].b[ 9];
1262 k10 |= c_tm[pc_pos].b[10];
1263 k11 |= c_tm[pc_pos].b[11];
1264 k12 |= c_tm[pc_pos].b[12];
1265 k13 |= c_tm[pc_pos].b[13];
1266 k14 |= c_tm[pc_pos].b[14];
1267 k15 |= c_tm[pc_pos].b[15];
1268 k16 |= c_tm[pc_pos].b[16];
1269 k17 |= c_tm[pc_pos].b[17];
1270 k18 |= c_tm[pc_pos].b[18];
1271 k19 |= c_tm[pc_pos].b[19];
1272 k20 |= c_tm[pc_pos].b[20];
1273 k21 |= c_tm[pc_pos].b[21];
1274 k22 |= c_tm[pc_pos].b[22];
1275 k23 |= c_tm[pc_pos].b[23];
1276 k24 |= c_tm[pc_pos].b[24];
1277 k25 |= c_tm[pc_pos].b[25];
1278 k26 |= c_tm[pc_pos].b[26];
1279 k27 |= c_tm[pc_pos].b[27];
1280 k28 |= c_tm[pc_pos].b[28];
1281 k29 |= c_tm[pc_pos].b[29];
1282 k30 |= c_tm[pc_pos].b[30];
1283 k31 |= c_tm[pc_pos].b[31];
1288 u32 D03 = 0xffffffff;
1290 u32 D05 = 0xffffffff;
1291 u32 D06 = 0xffffffff;
1292 u32 D07 = 0xffffffff;
1298 u32 D13 = 0xffffffff;
1301 u32 D16 = 0xffffffff;
1302 u32 D17 = 0xffffffff;
1307 u32 D22 = 0xffffffff;
1309 u32 D24 = 0xffffffff;
1311 u32 D26 = 0xffffffff;
1313 u32 D28 = 0xffffffff;
1314 u32 D29 = 0xffffffff;
1315 u32 D30 = 0xffffffff;
1316 u32 D31 = 0xffffffff;
1325 u32 D40 = 0xffffffff;
1326 u32 D41 = 0xffffffff;
1327 u32 D42 = 0xffffffff;
1329 u32 D44 = 0xffffffff;
1340 u32 D55 = 0xffffffff;
1343 u32 D58 = 0xffffffff;
1346 u32 D61 = 0xffffffff;
1347 u32 D62 = 0xffffffff;
1348 u32 D63 = 0xffffffff;
1352 k00, k01, k02, k03, k04, k05, k06,
1353 k07, k08, k09, k10, k11, k12, k13,
1354 k14, k15, k16, k17, k18, k19, k20,
1355 k21, k22, k23, k24, k25, k26, k27,
1356 k28, k29, k30, k31, K32, K33, K34,
1357 K35, K36, K37, K38, K39, K40, K41,
1358 K42, K43, K44, K45, K46, K47, K48,
1359 K49, K50, K51, K52, K53, K54, K55,
1360 D00, D01, D02, D03, D04, D05, D06, D07,
1361 D08, D09, D10, D11, D12, D13, D14, D15,
1362 D16, D17, D18, D19, D20, D21, D22, D23,
1363 D24, D25, D26, D27, D28, D29, D30, D31,
1364 D32, D33, D34, D35, D36, D37, D38, D39,
1365 D40, D41, D42, D43, D44, D45, D46, D47,
1366 D48, D49, D50, D51, D52, D53, D54, D55,
1367 D56, D57, D58, D59, D60, D61, D62, D63
1437 if (digests_cnt < 16)
1439 for (u32 d = 0; d < digests_cnt; d++)
1441 const u32 final_hash_pos = digests_offset + d;
1443 if (hashes_shown[final_hash_pos]) continue;
1447 search[0] = digests_buf[final_hash_pos].digest_buf[DGST_R0];
1448 search[1] = digests_buf[final_hash_pos].digest_buf[DGST_R1];
1453 for (int i = 0; i < 32; i++)
1455 const u32 b0 = -((search[0] >> i) & 1);
1456 const u32 b1 = -((search[1] >> i) & 1);
1458 tmpResult |= out[ 0 + i] ^ b0;
1459 tmpResult |= out[32 + i] ^ b1;
1462 if (tmpResult == 0xffffffff) continue;
1464 const u32 slice = 31 - __clz (~tmpResult);
1466 const u32x r0 = search[0];
1467 const u32x r1 = search[1];
1471 #include VECT_COMPARE_M
1480 for (int i = 0; i < 32; i++)
1482 out0[i] = out[ 0 + 31 - i];
1483 out1[i] = out[32 + 31 - i];
1486 transpose32c (out0);
1487 transpose32c (out1);
1490 for (int slice = 0; slice < 32; slice++)
1492 const u32x r0 = out0[31 - slice];
1493 const u32x r1 = out1[31 - slice];
1497 #include VECT_COMPARE_M
1503 __device__ static void m03000s (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
1509 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
1510 const u32 lid = threadIdx.x;
1585 const u32 w0s = pws[gid].i[0];
1586 const u32 w1s = pws[gid].i[1];
1588 const u32 K00 = -((w0s >> ( 0 + 7)) & 1);
1589 const u32 K01 = -((w0s >> ( 0 + 6)) & 1);
1590 const u32 K02 = -((w0s >> ( 0 + 5)) & 1);
1591 const u32 K03 = -((w0s >> ( 0 + 4)) & 1);
1592 const u32 K04 = -((w0s >> ( 0 + 3)) & 1);
1593 const u32 K05 = -((w0s >> ( 0 + 2)) & 1);
1594 const u32 K06 = -((w0s >> ( 0 + 1)) & 1);
1595 const u32 K07 = -((w0s >> ( 0 + 0)) & 1);
1596 const u32 K08 = -((w0s >> ( 8 + 7)) & 1);
1597 const u32 K09 = -((w0s >> ( 8 + 6)) & 1);
1598 const u32 K10 = -((w0s >> ( 8 + 5)) & 1);
1599 const u32 K11 = -((w0s >> ( 8 + 4)) & 1);
1600 const u32 K12 = -((w0s >> ( 8 + 3)) & 1);
1601 const u32 K13 = -((w0s >> ( 8 + 2)) & 1);
1602 const u32 K14 = -((w0s >> ( 8 + 1)) & 1);
1603 const u32 K15 = -((w0s >> ( 8 + 0)) & 1);
1604 const u32 K16 = -((w0s >> (16 + 7)) & 1);
1605 const u32 K17 = -((w0s >> (16 + 6)) & 1);
1606 const u32 K18 = -((w0s >> (16 + 5)) & 1);
1607 const u32 K19 = -((w0s >> (16 + 4)) & 1);
1608 const u32 K20 = -((w0s >> (16 + 3)) & 1);
1609 const u32 K21 = -((w0s >> (16 + 2)) & 1);
1610 const u32 K22 = -((w0s >> (16 + 1)) & 1);
1611 const u32 K23 = -((w0s >> (16 + 0)) & 1);
1612 const u32 K24 = -((w0s >> (24 + 7)) & 1);
1613 const u32 K25 = -((w0s >> (24 + 6)) & 1);
1614 const u32 K26 = -((w0s >> (24 + 5)) & 1);
1615 const u32 K27 = -((w0s >> (24 + 4)) & 1);
1616 const u32 K28 = -((w0s >> (24 + 3)) & 1);
1617 const u32 K29 = -((w0s >> (24 + 2)) & 1);
1618 const u32 K30 = -((w0s >> (24 + 1)) & 1);
1619 const u32 K31 = -((w0s >> (24 + 0)) & 1);
1620 const u32 K32 = -((w1s >> ( 0 + 7)) & 1);
1621 const u32 K33 = -((w1s >> ( 0 + 6)) & 1);
1622 const u32 K34 = -((w1s >> ( 0 + 5)) & 1);
1623 const u32 K35 = -((w1s >> ( 0 + 4)) & 1);
1624 const u32 K36 = -((w1s >> ( 0 + 3)) & 1);
1625 const u32 K37 = -((w1s >> ( 0 + 2)) & 1);
1626 const u32 K38 = -((w1s >> ( 0 + 1)) & 1);
1627 const u32 K39 = -((w1s >> ( 0 + 0)) & 1);
1628 const u32 K40 = -((w1s >> ( 8 + 7)) & 1);
1629 const u32 K41 = -((w1s >> ( 8 + 6)) & 1);
1630 const u32 K42 = -((w1s >> ( 8 + 5)) & 1);
1631 const u32 K43 = -((w1s >> ( 8 + 4)) & 1);
1632 const u32 K44 = -((w1s >> ( 8 + 3)) & 1);
1633 const u32 K45 = -((w1s >> ( 8 + 2)) & 1);
1634 const u32 K46 = -((w1s >> ( 8 + 1)) & 1);
1635 const u32 K47 = -((w1s >> ( 8 + 0)) & 1);
1636 const u32 K48 = -((w1s >> (16 + 7)) & 1);
1637 const u32 K49 = -((w1s >> (16 + 6)) & 1);
1638 const u32 K50 = -((w1s >> (16 + 5)) & 1);
1639 const u32 K51 = -((w1s >> (16 + 4)) & 1);
1640 const u32 K52 = -((w1s >> (16 + 3)) & 1);
1641 const u32 K53 = -((w1s >> (16 + 2)) & 1);
1642 const u32 K54 = -((w1s >> (16 + 1)) & 1);
1643 const u32 K55 = -((w1s >> (16 + 0)) & 1);
1649 const u32 bf_loops = bfs_cnt;
1651 for (u32 il_pos = 0, pc_pos = 0; il_pos < bf_loops; il_pos += 32, pc_pos++)
1686 k00 |= c_tm[pc_pos].b[ 0];
1687 k01 |= c_tm[pc_pos].b[ 1];
1688 k02 |= c_tm[pc_pos].b[ 2];
1689 k03 |= c_tm[pc_pos].b[ 3];
1690 k04 |= c_tm[pc_pos].b[ 4];
1691 k05 |= c_tm[pc_pos].b[ 5];
1692 k06 |= c_tm[pc_pos].b[ 6];
1693 k07 |= c_tm[pc_pos].b[ 7];
1694 k08 |= c_tm[pc_pos].b[ 8];
1695 k09 |= c_tm[pc_pos].b[ 9];
1696 k10 |= c_tm[pc_pos].b[10];
1697 k11 |= c_tm[pc_pos].b[11];
1698 k12 |= c_tm[pc_pos].b[12];
1699 k13 |= c_tm[pc_pos].b[13];
1700 k14 |= c_tm[pc_pos].b[14];
1701 k15 |= c_tm[pc_pos].b[15];
1702 k16 |= c_tm[pc_pos].b[16];
1703 k17 |= c_tm[pc_pos].b[17];
1704 k18 |= c_tm[pc_pos].b[18];
1705 k19 |= c_tm[pc_pos].b[19];
1706 k20 |= c_tm[pc_pos].b[20];
1707 k21 |= c_tm[pc_pos].b[21];
1708 k22 |= c_tm[pc_pos].b[22];
1709 k23 |= c_tm[pc_pos].b[23];
1710 k24 |= c_tm[pc_pos].b[24];
1711 k25 |= c_tm[pc_pos].b[25];
1712 k26 |= c_tm[pc_pos].b[26];
1713 k27 |= c_tm[pc_pos].b[27];
1714 k28 |= c_tm[pc_pos].b[28];
1715 k29 |= c_tm[pc_pos].b[29];
1716 k30 |= c_tm[pc_pos].b[30];
1717 k31 |= c_tm[pc_pos].b[31];
1722 u32 D03 = 0xffffffff;
1724 u32 D05 = 0xffffffff;
1725 u32 D06 = 0xffffffff;
1726 u32 D07 = 0xffffffff;
1732 u32 D13 = 0xffffffff;
1735 u32 D16 = 0xffffffff;
1736 u32 D17 = 0xffffffff;
1741 u32 D22 = 0xffffffff;
1743 u32 D24 = 0xffffffff;
1745 u32 D26 = 0xffffffff;
1747 u32 D28 = 0xffffffff;
1748 u32 D29 = 0xffffffff;
1749 u32 D30 = 0xffffffff;
1750 u32 D31 = 0xffffffff;
1759 u32 D40 = 0xffffffff;
1760 u32 D41 = 0xffffffff;
1761 u32 D42 = 0xffffffff;
1763 u32 D44 = 0xffffffff;
1774 u32 D55 = 0xffffffff;
1777 u32 D58 = 0xffffffff;
1780 u32 D61 = 0xffffffff;
1781 u32 D62 = 0xffffffff;
1782 u32 D63 = 0xffffffff;
1786 k00, k01, k02, k03, k04, k05, k06,
1787 k07, k08, k09, k10, k11, k12, k13,
1788 k14, k15, k16, k17, k18, k19, k20,
1789 k21, k22, k23, k24, k25, k26, k27,
1790 k28, k29, k30, k31, K32, K33, K34,
1791 K35, K36, K37, K38, K39, K40, K41,
1792 K42, K43, K44, K45, K46, K47, K48,
1793 K49, K50, K51, K52, K53, K54, K55,
1794 D00, D01, D02, D03, D04, D05, D06, D07,
1795 D08, D09, D10, D11, D12, D13, D14, D15,
1796 D16, D17, D18, D19, D20, D21, D22, D23,
1797 D24, D25, D26, D27, D28, D29, D30, D31,
1798 D32, D33, D34, D35, D36, D37, D38, D39,
1799 D40, D41, D42, D43, D44, D45, D46, D47,
1800 D48, D49, D50, D51, D52, D53, D54, D55,
1801 D56, D57, D58, D59, D60, D61, D62, D63
1806 tmpResult |= D00 ^ S00;
1807 tmpResult |= D01 ^ S01;
1808 tmpResult |= D02 ^ S02;
1809 tmpResult |= D03 ^ S03;
1810 tmpResult |= D04 ^ S04;
1811 tmpResult |= D05 ^ S05;
1812 tmpResult |= D06 ^ S06;
1813 tmpResult |= D07 ^ S07;
1814 tmpResult |= D08 ^ S08;
1815 tmpResult |= D09 ^ S09;
1816 tmpResult |= D10 ^ S10;
1817 tmpResult |= D11 ^ S11;
1818 tmpResult |= D12 ^ S12;
1819 tmpResult |= D13 ^ S13;
1820 tmpResult |= D14 ^ S14;
1821 tmpResult |= D15 ^ S15;
1823 if (tmpResult == 0xffffffff) continue;
1825 tmpResult |= D16 ^ S16;
1826 tmpResult |= D17 ^ S17;
1827 tmpResult |= D18 ^ S18;
1828 tmpResult |= D19 ^ S19;
1829 tmpResult |= D20 ^ S20;
1830 tmpResult |= D21 ^ S21;
1831 tmpResult |= D22 ^ S22;
1832 tmpResult |= D23 ^ S23;
1833 tmpResult |= D24 ^ S24;
1834 tmpResult |= D25 ^ S25;
1835 tmpResult |= D26 ^ S26;
1836 tmpResult |= D27 ^ S27;
1837 tmpResult |= D28 ^ S28;
1838 tmpResult |= D29 ^ S29;
1839 tmpResult |= D30 ^ S30;
1840 tmpResult |= D31 ^ S31;
1842 if (tmpResult == 0xffffffff) continue;
1844 tmpResult |= D32 ^ S32;
1845 tmpResult |= D33 ^ S33;
1846 tmpResult |= D34 ^ S34;
1847 tmpResult |= D35 ^ S35;
1848 tmpResult |= D36 ^ S36;
1849 tmpResult |= D37 ^ S37;
1850 tmpResult |= D38 ^ S38;
1851 tmpResult |= D39 ^ S39;
1852 tmpResult |= D40 ^ S40;
1853 tmpResult |= D41 ^ S41;
1854 tmpResult |= D42 ^ S42;
1855 tmpResult |= D43 ^ S43;
1856 tmpResult |= D44 ^ S44;
1857 tmpResult |= D45 ^ S45;
1858 tmpResult |= D46 ^ S46;
1859 tmpResult |= D47 ^ S47;
1861 if (tmpResult == 0xffffffff) continue;
1863 tmpResult |= D48 ^ S48;
1864 tmpResult |= D49 ^ S49;
1865 tmpResult |= D50 ^ S50;
1866 tmpResult |= D51 ^ S51;
1867 tmpResult |= D52 ^ S52;
1868 tmpResult |= D53 ^ S53;
1869 tmpResult |= D54 ^ S54;
1870 tmpResult |= D55 ^ S55;
1871 tmpResult |= D56 ^ S56;
1872 tmpResult |= D57 ^ S57;
1873 tmpResult |= D58 ^ S58;
1874 tmpResult |= D59 ^ S59;
1875 tmpResult |= D60 ^ S60;
1876 tmpResult |= D61 ^ S61;
1877 tmpResult |= D62 ^ S62;
1878 tmpResult |= D63 ^ S63;
1880 if (tmpResult == 0xffffffff) continue;
1882 const u32 slice = 31 - __clz (~tmpResult);
1884 #include VECT_COMPARE_S
1888 extern "C" __global__ void __launch_bounds__ (64, 1) m03000_tb (pw_t *pws)
1890 // not used here, inlined code
1893 extern "C" __global__ void __launch_bounds__ (32, 1) m03000_tm (const u32 *d_bfs, bs_word_t *d_tbs)
1895 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
1897 const u32 block = gid / 32;
1898 const u32 slice = gid % 32;
1900 const u32 w0 = c_bfs[gid];
1903 for (int i = 0; i < 32; i += 8)
1905 atomicOr (&d_tbs[block].b[i + 0], (((w0 >> (i + 7)) & 1) << slice));
1906 atomicOr (&d_tbs[block].b[i + 1], (((w0 >> (i + 6)) & 1) << slice));
1907 atomicOr (&d_tbs[block].b[i + 2], (((w0 >> (i + 5)) & 1) << slice));
1908 atomicOr (&d_tbs[block].b[i + 3], (((w0 >> (i + 4)) & 1) << slice));
1909 atomicOr (&d_tbs[block].b[i + 4], (((w0 >> (i + 3)) & 1) << slice));
1910 atomicOr (&d_tbs[block].b[i + 5], (((w0 >> (i + 2)) & 1) << slice));
1911 atomicOr (&d_tbs[block].b[i + 6], (((w0 >> (i + 1)) & 1) << slice));
1912 atomicOr (&d_tbs[block].b[i + 7], (((w0 >> (i + 0)) & 1) << slice));
1916 extern "C" __global__ void __launch_bounds__ (64, 1) m03000_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1922 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
1923 const u32 lid = threadIdx.x;
1925 const u32 s0 = digests_buf[digests_offset].digest_buf[0];
1926 const u32 s1 = digests_buf[digests_offset].digest_buf[1];
1930 s_S[lid] = -((s0 >> lid - 0) & 1);
1934 s_S[lid] = -((s1 >> lid - 32) & 1);
1939 if (gid >= gid_max) return;
1945 m03000m (pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
1948 extern "C" __global__ void __launch_bounds__ (64, 1) m03000_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1952 extern "C" __global__ void __launch_bounds__ (64, 1) m03000_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1956 extern "C" __global__ void __launch_bounds__ (64, 1) m03000_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1962 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
1963 const u32 lid = threadIdx.x;
1965 const u32 s0 = digests_buf[digests_offset].digest_buf[0];
1966 const u32 s1 = digests_buf[digests_offset].digest_buf[1];
1970 s_S[lid] = -((s0 >> lid - 0) & 1);
1974 s_S[lid] = -((s1 >> lid - 32) & 1);
1979 if (gid >= gid_max) return;
1985 m03000s (pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
1988 extern "C" __global__ void __launch_bounds__ (64, 1) m03000_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1992 extern "C" __global__ void __launch_bounds__ (64, 1) m03000_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)