2 * Author......: Jens Steube <jens.steube@gmail.com>
4 * NOTE........: sboxes for maxwell were taken from DeepLearningJohnDoe, license below
5 * : sboxes for others were takes fron JtR, license below
11 #include "include/constants.h"
12 #include "include/kernel_vendor.h"
27 #include "include/kernel_functions.c"
29 #include "common_nv.c"
32 #define VECT_COMPARE_S "check_single_vect1_comp4_warp_bs.c"
33 #define VECT_COMPARE_M "check_multi_vect1_comp4_warp_bs.c"
37 #define VECT_COMPARE_S "check_single_vect2_comp4_warp_bs.c"
38 #define VECT_COMPARE_M "check_multi_vect2_comp4_warp_bs.c"
42 #define VECT_COMPARE_S "check_single_vect4_comp4_warp_bs.c"
43 #define VECT_COMPARE_M "check_multi_vect4_comp4_warp_bs.c"
48 __device__ __constant__ u32x c_bfs[1024];
49 __device__ __constant__ bs_word_t c_tm[32];
50 __device__ __shared__ u32 s_S[64];
52 #if __CUDA_ARCH__ >= 500
55 // Bitslice DES S-boxes with LOP3.LUT instructions
56 // For NVIDIA Maxwell architecture and CUDA 7.5 RC
57 // by DeepLearningJohnDoe, version 0.1.6, 2015/07/19
59 // Gate counts: 25 24 25 18 25 24 24 23
61 // Depth: 8 7 7 6 8 10 10 8
64 // Note that same S-box function with a lower gate count isn't necessarily faster.
66 // These Boolean expressions corresponding to DES S-boxes were
67 // discovered by <deeplearningjohndoe at gmail.com>
69 // This file itself is Copyright (c) 2015 by <deeplearningjohndoe at gmail.com>
70 // Redistribution and use in source and binary forms, with or without
71 // modification, are permitted.
73 // The underlying mathematical formulas are NOT copyrighted.
76 #define LUT(a,b,c,d,e) u32 a; asm ("lop3.b32 %0, %1, %2, %3, "#e";" : "=r"(a): "r"(b), "r"(c), "r"(d));
78 __device__ static void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
80 LUT(xAA55AA5500550055, a1, a4, a6, 0xC1)
81 LUT(xA55AA55AF0F5F0F5, a3, a6, xAA55AA5500550055, 0x9E)
82 LUT(x5F5F5F5FA5A5A5A5, a1, a3, a6, 0xD6)
83 LUT(xF5A0F5A0A55AA55A, a4, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x56)
84 LUT(x947A947AD1E7D1E7, a2, xA55AA55AF0F5F0F5, xF5A0F5A0A55AA55A, 0x6C)
85 LUT(x5FFF5FFFFFFAFFFA, a6, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x7B)
86 LUT(xB96CB96C69936993, a2, xF5A0F5A0A55AA55A, x5FFF5FFFFFFAFFFA, 0xD6)
87 LUT(x3, a5, x947A947AD1E7D1E7, xB96CB96C69936993, 0x6A)
88 LUT(x55EE55EE55EE55EE, a1, a2, a4, 0x7A)
89 LUT(x084C084CB77BB77B, a2, a6, xF5A0F5A0A55AA55A, 0xC9)
90 LUT(x9C329C32E295E295, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x72)
91 LUT(xA51EA51E50E050E0, a3, a6, x55EE55EE55EE55EE, 0x29)
92 LUT(x4AD34AD3BE3CBE3C, a2, x947A947AD1E7D1E7, xA51EA51E50E050E0, 0x95)
93 LUT(x2, a5, x9C329C32E295E295, x4AD34AD3BE3CBE3C, 0xC6)
94 LUT(xD955D95595D195D1, a1, a2, x9C329C32E295E295, 0xD2)
95 LUT(x8058805811621162, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x90)
96 LUT(x7D0F7D0FC4B3C4B3, xA51EA51E50E050E0, xD955D95595D195D1, x8058805811621162, 0x76)
97 LUT(x0805080500010001, a3, xAA55AA5500550055, xD955D95595D195D1, 0x80)
98 LUT(x4A964A96962D962D, xB96CB96C69936993, x4AD34AD3BE3CBE3C, x0805080500010001, 0xA6)
99 LUT(x4, a5, x7D0F7D0FC4B3C4B3, x4A964A96962D962D, 0xA6)
100 LUT(x148014807B087B08, a1, xAA55AA5500550055, x947A947AD1E7D1E7, 0x21)
101 LUT(x94D894D86B686B68, xA55AA55AF0F5F0F5, x8058805811621162, x148014807B087B08, 0x6A)
102 LUT(x5555555540044004, a1, a6, x084C084CB77BB77B, 0x70)
103 LUT(xAFB4AFB4BF5BBF5B, x5F5F5F5FA5A5A5A5, xA51EA51E50E050E0, x5555555540044004, 0x97)
104 LUT(x1, a5, x94D894D86B686B68, xAFB4AFB4BF5BBF5B, 0x6C)
112 __device__ static void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
114 LUT(xEEEEEEEE99999999, a1, a2, a6, 0x97)
115 LUT(xFFFFEEEE66666666, a5, a6, xEEEEEEEE99999999, 0x67)
116 LUT(x5555FFFFFFFF0000, a1, a5, a6, 0x76)
117 LUT(x6666DDDD5555AAAA, a2, xFFFFEEEE66666666, x5555FFFFFFFF0000, 0x69)
118 LUT(x6969D3D35353ACAC, a3, xFFFFEEEE66666666, x6666DDDD5555AAAA, 0x6A)
119 LUT(xCFCF3030CFCF3030, a2, a3, a5, 0x65)
120 LUT(xE4E4EEEE9999F0F0, a3, xEEEEEEEE99999999, x5555FFFFFFFF0000, 0x8D)
121 LUT(xE5E5BABACDCDB0B0, a1, xCFCF3030CFCF3030, xE4E4EEEE9999F0F0, 0xCA)
122 LUT(x3, a4, x6969D3D35353ACAC, xE5E5BABACDCDB0B0, 0xC6)
123 LUT(x3333CCCC00000000, a2, a5, a6, 0x14)
124 LUT(xCCCCDDDDFFFF0F0F, a5, xE4E4EEEE9999F0F0, x3333CCCC00000000, 0xB5)
125 LUT(x00000101F0F0F0F0, a3, a6, xFFFFEEEE66666666, 0x1C)
126 LUT(x9A9A64646A6A9595, a1, xCFCF3030CFCF3030, x00000101F0F0F0F0, 0x96)
127 LUT(x2, a4, xCCCCDDDDFFFF0F0F, x9A9A64646A6A9595, 0x6A)
128 LUT(x3333BBBB3333FFFF, a1, a2, x6666DDDD5555AAAA, 0xDE)
129 LUT(x1414141441410000, a1, a3, xE4E4EEEE9999F0F0, 0x90)
130 LUT(x7F7FF3F3F5F53939, x6969D3D35353ACAC, x9A9A64646A6A9595, x3333BBBB3333FFFF, 0x79)
131 LUT(x9494E3E34B4B3939, a5, x1414141441410000, x7F7FF3F3F5F53939, 0x29)
132 LUT(x1, a4, x3333BBBB3333FFFF, x9494E3E34B4B3939, 0xA6)
133 LUT(xB1B1BBBBCCCCA5A5, a1, a1, xE4E4EEEE9999F0F0, 0x4A)
134 LUT(xFFFFECECEEEEDDDD, a2, x3333CCCC00000000, x9A9A64646A6A9595, 0xEF)
135 LUT(xB1B1A9A9DCDC8787, xE5E5BABACDCDB0B0, xB1B1BBBBCCCCA5A5, xFFFFECECEEEEDDDD, 0x8D)
136 LUT(xFFFFCCCCEEEE4444, a2, a5, xFFFFEEEE66666666, 0x2B)
137 LUT(x4, a4, xB1B1A9A9DCDC8787, xFFFFCCCCEEEE4444, 0x6C)
145 __device__ static void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
147 LUT(xA50FA50FA50FA50F, a1, a3, a4, 0xC9)
148 LUT(xF0F00F0FF0F0F0F0, a3, a5, a6, 0x4B)
149 LUT(xAF0FA0AAAF0FAF0F, a1, xA50FA50FA50FA50F, xF0F00F0FF0F0F0F0, 0x4D)
150 LUT(x5AA5A55A5AA55AA5, a1, a4, xF0F00F0FF0F0F0F0, 0x69)
151 LUT(xAA005FFFAA005FFF, a3, a5, xA50FA50FA50FA50F, 0xD6)
152 LUT(x5AA5A55A0F5AFAA5, a6, x5AA5A55A5AA55AA5, xAA005FFFAA005FFF, 0x9C)
153 LUT(x1, a2, xAF0FA0AAAF0FAF0F, x5AA5A55A0F5AFAA5, 0xA6)
154 LUT(xAA55AA5500AA00AA, a1, a4, a6, 0x49)
155 LUT(xFAFAA50FFAFAA50F, a1, a5, xA50FA50FA50FA50F, 0x9B)
156 LUT(x50AF0F5AFA50A5A5, a1, xAA55AA5500AA00AA, xFAFAA50FFAFAA50F, 0x66)
157 LUT(xAFAFAFAFFAFAFAFA, a1, a3, a6, 0x6F)
158 LUT(xAFAFFFFFFFFAFAFF, a4, x50AF0F5AFA50A5A5, xAFAFAFAFFAFAFAFA, 0xEB)
159 LUT(x4, a2, x50AF0F5AFA50A5A5, xAFAFFFFFFFFAFAFF, 0x6C)
160 LUT(x500F500F500F500F, a1, a3, a4, 0x98)
161 LUT(xF0505A0505A5050F, x5AA5A55A0F5AFAA5, xAA55AA5500AA00AA, xAFAFAFAFFAFAFAFA, 0x1D)
162 LUT(xF0505A05AA55AAFF, a6, x500F500F500F500F, xF0505A0505A5050F, 0x9A)
163 LUT(xFF005F55FF005F55, a1, a4, xAA005FFFAA005FFF, 0xB2)
164 LUT(xA55F5AF0A55F5AF0, a5, xA50FA50FA50FA50F, x5AA5A55A5AA55AA5, 0x3D)
165 LUT(x5A5F05A5A55F5AF0, a6, xFF005F55FF005F55, xA55F5AF0A55F5AF0, 0xA6)
166 LUT(x3, a2, xF0505A05AA55AAFF, x5A5F05A5A55F5AF0, 0xA6)
167 LUT(x0F0F0F0FA5A5A5A5, a1, a3, a6, 0xC6)
168 LUT(x5FFFFF5FFFA0FFA0, x5AA5A55A5AA55AA5, xAFAFAFAFFAFAFAFA, x0F0F0F0FA5A5A5A5, 0xDB)
169 LUT(xF5555AF500A05FFF, a5, xFAFAA50FFAFAA50F, xF0505A0505A5050F, 0xB9)
170 LUT(x05A5AAF55AFA55A5, xF0505A05AA55AAFF, x0F0F0F0FA5A5A5A5, xF5555AF500A05FFF, 0x9B)
171 LUT(x2, a2, x5FFFFF5FFFA0FFA0, x05A5AAF55AFA55A5, 0xA6)
179 __device__ static void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
181 LUT(x55F055F055F055F0, a1, a3, a4, 0x72)
182 LUT(xA500F5F0A500F5F0, a3, a5, x55F055F055F055F0, 0xAD)
183 LUT(xF50AF50AF50AF50A, a1, a3, a4, 0x59)
184 LUT(xF5FA0FFFF5FA0FFF, a3, a5, xF50AF50AF50AF50A, 0xE7)
185 LUT(x61C8F93C61C8F93C, a2, xA500F5F0A500F5F0, xF5FA0FFFF5FA0FFF, 0xC6)
186 LUT(x9999666699996666, a1, a2, a5, 0x69)
187 LUT(x22C022C022C022C0, a2, a4, x55F055F055F055F0, 0x18)
188 LUT(xB35C94A6B35C94A6, xF5FA0FFFF5FA0FFF, x9999666699996666, x22C022C022C022C0, 0x63)
189 LUT(x4, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x6A)
190 LUT(x4848484848484848, a1, a2, a3, 0x12)
191 LUT(x55500AAA55500AAA, a1, a5, xF5FA0FFFF5FA0FFF, 0x28)
192 LUT(x3C90B3D63C90B3D6, x61C8F93C61C8F93C, x4848484848484848, x55500AAA55500AAA, 0x1E)
193 LUT(x8484333384843333, a1, x9999666699996666, x4848484848484848, 0x14)
194 LUT(x4452F1AC4452F1AC, xF50AF50AF50AF50A, xF5FA0FFFF5FA0FFF, xB35C94A6B35C94A6, 0x78)
195 LUT(x9586CA379586CA37, x55500AAA55500AAA, x8484333384843333, x4452F1AC4452F1AC, 0xD6)
196 LUT(x2, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0x6A)
197 LUT(x1, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0xA9)
198 LUT(x3, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x56)
206 __device__ static void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
208 LUT(xA0A0A0A0FFFFFFFF, a1, a3, a6, 0xAB)
209 LUT(xFFFF00005555FFFF, a1, a5, a6, 0xB9)
210 LUT(xB3B320207777FFFF, a2, xA0A0A0A0FFFFFFFF, xFFFF00005555FFFF, 0xE8)
211 LUT(x50505A5A5A5A5050, a1, a3, xFFFF00005555FFFF, 0x34)
212 LUT(xA2A2FFFF2222FFFF, a1, a5, xB3B320207777FFFF, 0xCE)
213 LUT(x2E2E6969A4A46363, a2, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, 0x29)
214 LUT(x3, a4, xB3B320207777FFFF, x2E2E6969A4A46363, 0xA6)
215 LUT(xA5A50A0AA5A50A0A, a1, a3, a5, 0x49)
216 LUT(x969639396969C6C6, a2, a6, xA5A50A0AA5A50A0A, 0x96)
217 LUT(x1B1B1B1B1B1B1B1B, a1, a2, a3, 0xCA)
218 LUT(xBFBFBFBFF6F6F9F9, a3, xA0A0A0A0FFFFFFFF, x969639396969C6C6, 0x7E)
219 LUT(x5B5BA4A4B8B81D1D, xFFFF00005555FFFF, x1B1B1B1B1B1B1B1B, xBFBFBFBFF6F6F9F9, 0x96)
220 LUT(x2, a4, x969639396969C6C6, x5B5BA4A4B8B81D1D, 0xCA)
221 LUT(x5555BBBBFFFF5555, a1, a2, xFFFF00005555FFFF, 0xE5)
222 LUT(x6D6D9C9C95956969, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, x969639396969C6C6, 0x97)
223 LUT(x1A1A67676A6AB4B4, xA5A50A0AA5A50A0A, x5555BBBBFFFF5555, x6D6D9C9C95956969, 0x47)
224 LUT(xA0A0FFFFAAAA0000, a3, xFFFF00005555FFFF, xA5A50A0AA5A50A0A, 0x3B)
225 LUT(x36369C9CC1C1D6D6, x969639396969C6C6, x6D6D9C9C95956969, xA0A0FFFFAAAA0000, 0xD9)
226 LUT(x1, a4, x1A1A67676A6AB4B4, x36369C9CC1C1D6D6, 0xCA)
227 LUT(x5555F0F0F5F55555, a1, a3, xFFFF00005555FFFF, 0xB1)
228 LUT(x79790202DCDC0808, xA2A2FFFF2222FFFF, xA5A50A0AA5A50A0A, x969639396969C6C6, 0x47)
229 LUT(x6C6CF2F229295D5D, xBFBFBFBFF6F6F9F9, x5555F0F0F5F55555, x79790202DCDC0808, 0x6E)
230 LUT(xA3A3505010101A1A, a2, xA2A2FFFF2222FFFF, x36369C9CC1C1D6D6, 0x94)
231 LUT(x7676C7C74F4FC7C7, a1, x2E2E6969A4A46363, xA3A3505010101A1A, 0xD9)
232 LUT(x4, a4, x6C6CF2F229295D5D, x7676C7C74F4FC7C7, 0xC6)
240 __device__ static void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
242 LUT(x5050F5F55050F5F5, a1, a3, a5, 0xB2)
243 LUT(x6363C6C66363C6C6, a1, a2, x5050F5F55050F5F5, 0x66)
244 LUT(xAAAA5555AAAA5555, a1, a1, a5, 0xA9)
245 LUT(x3A3A65653A3A6565, a3, x6363C6C66363C6C6, xAAAA5555AAAA5555, 0xA9)
246 LUT(x5963A3C65963A3C6, a4, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xC6)
247 LUT(xE7E76565E7E76565, a5, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xAD)
248 LUT(x455D45DF455D45DF, a1, a4, xE7E76565E7E76565, 0xE4)
249 LUT(x4, a6, x5963A3C65963A3C6, x455D45DF455D45DF, 0x6C)
250 LUT(x1101220211012202, a2, xAAAA5555AAAA5555, x5963A3C65963A3C6, 0x20)
251 LUT(xF00F0FF0F00F0FF0, a3, a4, a5, 0x69)
252 LUT(x16E94A9716E94A97, xE7E76565E7E76565, x1101220211012202, xF00F0FF0F00F0FF0, 0x9E)
253 LUT(x2992922929929229, a1, a2, xF00F0FF0F00F0FF0, 0x49)
254 LUT(xAFAF9823AFAF9823, a5, x5050F5F55050F5F5, x2992922929929229, 0x93)
255 LUT(x3, a6, x16E94A9716E94A97, xAFAF9823AFAF9823, 0x6C)
256 LUT(x4801810248018102, a4, x5963A3C65963A3C6, x1101220211012202, 0xA4)
257 LUT(x5EE8FFFD5EE8FFFD, a5, x16E94A9716E94A97, x4801810248018102, 0x76)
258 LUT(xF0FF00FFF0FF00FF, a3, a4, a5, 0xCD)
259 LUT(x942D9A67942D9A67, x3A3A65653A3A6565, x5EE8FFFD5EE8FFFD, xF0FF00FFF0FF00FF, 0x86)
260 LUT(x1, a6, x5EE8FFFD5EE8FFFD, x942D9A67942D9A67, 0xA6)
261 LUT(x6A40D4ED6F4DD4EE, a2, x4, xAFAF9823AFAF9823, 0x2D)
262 LUT(x6CA89C7869A49C79, x1101220211012202, x16E94A9716E94A97, x6A40D4ED6F4DD4EE, 0x26)
263 LUT(xD6DE73F9D6DE73F9, a3, x6363C6C66363C6C6, x455D45DF455D45DF, 0x6B)
264 LUT(x925E63E1965A63E1, x3A3A65653A3A6565, x6CA89C7869A49C79, xD6DE73F9D6DE73F9, 0xA2)
265 LUT(x2, a6, x6CA89C7869A49C79, x925E63E1965A63E1, 0xCA)
274 __device__ static void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
276 LUT(x88AA88AA88AA88AA, a1, a2, a4, 0x0B)
277 LUT(xAAAAFF00AAAAFF00, a1, a4, a5, 0x27)
278 LUT(xADAFF8A5ADAFF8A5, a3, x88AA88AA88AA88AA, xAAAAFF00AAAAFF00, 0x9E)
279 LUT(x0A0AF5F50A0AF5F5, a1, a3, a5, 0xA6)
280 LUT(x6B69C5DC6B69C5DC, a2, xADAFF8A5ADAFF8A5, x0A0AF5F50A0AF5F5, 0x6B)
281 LUT(x1C69B2DC1C69B2DC, a4, x88AA88AA88AA88AA, x6B69C5DC6B69C5DC, 0xA9)
282 LUT(x1, a6, xADAFF8A5ADAFF8A5, x1C69B2DC1C69B2DC, 0x6A)
283 LUT(x9C9C9C9C9C9C9C9C, a1, a2, a3, 0x63)
284 LUT(xE6E63BFDE6E63BFD, a2, xAAAAFF00AAAAFF00, x0A0AF5F50A0AF5F5, 0xE7)
285 LUT(x6385639E6385639E, a4, x9C9C9C9C9C9C9C9C, xE6E63BFDE6E63BFD, 0x93)
286 LUT(x5959C4CE5959C4CE, a2, x6B69C5DC6B69C5DC, xE6E63BFDE6E63BFD, 0x5D)
287 LUT(x5B53F53B5B53F53B, a4, x0A0AF5F50A0AF5F5, x5959C4CE5959C4CE, 0x6E)
288 LUT(x3, a6, x6385639E6385639E, x5B53F53B5B53F53B, 0xC6)
289 LUT(xFAF505FAFAF505FA, a3, a4, x0A0AF5F50A0AF5F5, 0x6D)
290 LUT(x6A65956A6A65956A, a3, x9C9C9C9C9C9C9C9C, xFAF505FAFAF505FA, 0xA6)
291 LUT(x8888CCCC8888CCCC, a1, a2, a5, 0x23)
292 LUT(x94E97A9494E97A94, x1C69B2DC1C69B2DC, x6A65956A6A65956A, x8888CCCC8888CCCC, 0x72)
293 LUT(x4, a6, x6A65956A6A65956A, x94E97A9494E97A94, 0xAC)
294 LUT(xA050A050A050A050, a1, a3, a4, 0x21)
295 LUT(xC1B87A2BC1B87A2B, xAAAAFF00AAAAFF00, x5B53F53B5B53F53B, x94E97A9494E97A94, 0xA4)
296 LUT(xE96016B7E96016B7, x8888CCCC8888CCCC, xA050A050A050A050, xC1B87A2BC1B87A2B, 0x96)
297 LUT(xE3CF1FD5E3CF1FD5, x88AA88AA88AA88AA, x6A65956A6A65956A, xE96016B7E96016B7, 0x3E)
298 LUT(x6776675B6776675B, xADAFF8A5ADAFF8A5, x94E97A9494E97A94, xE3CF1FD5E3CF1FD5, 0x6B)
299 LUT(x2, a6, xE96016B7E96016B7, x6776675B6776675B, 0xC6)
308 __device__ static void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
310 LUT(xEEEE3333EEEE3333, a1, a2, a5, 0x9D)
311 LUT(xBBBBBBBBBBBBBBBB, a1, a1, a2, 0x83)
312 LUT(xDDDDAAAADDDDAAAA, a1, a2, a5, 0x5B)
313 LUT(x29295A5A29295A5A, a3, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0x85)
314 LUT(xC729695AC729695A, a4, xEEEE3333EEEE3333, x29295A5A29295A5A, 0xA6)
315 LUT(x3BF77B7B3BF77B7B, a2, a5, xC729695AC729695A, 0xF9)
316 LUT(x2900FF002900FF00, a4, a5, x29295A5A29295A5A, 0x0E)
317 LUT(x56B3803F56B3803F, xBBBBBBBBBBBBBBBB, x3BF77B7B3BF77B7B, x2900FF002900FF00, 0x61)
318 LUT(x4, a6, xC729695AC729695A, x56B3803F56B3803F, 0x6C)
319 LUT(xFBFBFBFBFBFBFBFB, a1, a2, a3, 0xDF)
320 LUT(x3012B7B73012B7B7, a2, a5, xC729695AC729695A, 0xD4)
321 LUT(x34E9B34C34E9B34C, a4, xFBFBFBFBFBFBFBFB, x3012B7B73012B7B7, 0x69)
322 LUT(xBFEAEBBEBFEAEBBE, a1, x29295A5A29295A5A, x34E9B34C34E9B34C, 0x6F)
323 LUT(xFFAEAFFEFFAEAFFE, a3, xBBBBBBBBBBBBBBBB, xBFEAEBBEBFEAEBBE, 0xB9)
324 LUT(x2, a6, x34E9B34C34E9B34C, xFFAEAFFEFFAEAFFE, 0xC6)
325 LUT(xCFDE88BBCFDE88BB, a2, xDDDDAAAADDDDAAAA, x34E9B34C34E9B34C, 0x5C)
326 LUT(x3055574530555745, a1, xC729695AC729695A, xCFDE88BBCFDE88BB, 0x71)
327 LUT(x99DDEEEE99DDEEEE, a4, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0xB9)
328 LUT(x693CD926693CD926, x3BF77B7B3BF77B7B, x34E9B34C34E9B34C, x99DDEEEE99DDEEEE, 0x69)
329 LUT(x3, a6, x3055574530555745, x693CD926693CD926, 0x6A)
330 LUT(x9955EE559955EE55, a1, a4, x99DDEEEE99DDEEEE, 0xE2)
331 LUT(x9D48FA949D48FA94, x3BF77B7B3BF77B7B, xBFEAEBBEBFEAEBBE, x9955EE559955EE55, 0x9C)
332 LUT(x1, a6, xC729695AC729695A, x9D48FA949D48FA94, 0x39)
344 * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC
345 * architectures. These use AND, OR, XOR, NOT, and AND-NOT gates.
347 * Gate counts: 49 44 46 33 48 46 46 41
350 * Several same-gate-count expressions for each S-box are included (for use on
351 * different CPUs/GPUs).
353 * These Boolean expressions corresponding to DES S-boxes have been generated
354 * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's
355 * John the Ripper password cracker: http://www.openwall.com/john/
356 * Being mathematical formulas, they are not copyrighted and are free for reuse
359 * This file (a specific representation of the S-box expressions, surrounding
360 * logic) is Copyright (c) 2011 by Solar Designer <solar at openwall.com>.
361 * Redistribution and use in source and binary forms, with or without
362 * modification, are permitted. (This is a heavily cut-down "BSD license".)
364 * The effort has been sponsored by Rapid7: http://www.rapid7.com
367 __device__ static void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
369 u32 x55005500, x5A0F5A0F, x3333FFFF, x66666666, x22226666, x2D2D6969,
371 u32 x00FFFF00, x33CCCC33, x4803120C, x2222FFFF, x6A21EDF3, x4A01CC93;
372 u32 x5555FFFF, x7F75FFFF, x00D20096, x7FA7FF69;
373 u32 x0A0A0000, x0AD80096, x00999900, x0AD99996;
374 u32 x22332233, x257AA5F0, x054885C0, xFAB77A3F, x2221EDF3, xD89697CC;
375 u32 x05B77AC0, x05F77AD6, x36C48529, x6391D07C, xBB0747B0;
376 u32 x4C460000, x4EDF9996, x2D4E49EA, xBBFFFFB0, x96B1B65A;
377 u32 x5AFF5AFF, x52B11215, x4201C010, x10B0D205;
378 u32 x00, x01, x10, x11, x20, x21, x30, x31;
380 x55005500 = a1 & ~a5;
381 x5A0F5A0F = a4 ^ x55005500;
384 x22226666 = x3333FFFF & x66666666;
385 x2D2D6969 = a4 ^ x22226666;
386 x25202160 = x2D2D6969 & ~x5A0F5A0F;
389 x33CCCC33 = a3 ^ x00FFFF00;
390 x4803120C = x5A0F5A0F & ~x33CCCC33;
391 x2222FFFF = a6 | x22226666;
392 x6A21EDF3 = x4803120C ^ x2222FFFF;
393 x4A01CC93 = x6A21EDF3 & ~x25202160;
396 x7F75FFFF = x6A21EDF3 | x5555FFFF;
397 x00D20096 = a5 & ~x2D2D6969;
398 x7FA7FF69 = x7F75FFFF ^ x00D20096;
400 x0A0A0000 = a4 & ~x5555FFFF;
401 x0AD80096 = x00D20096 ^ x0A0A0000;
402 x00999900 = x00FFFF00 & ~x66666666;
403 x0AD99996 = x0AD80096 | x00999900;
405 x22332233 = a3 & ~x55005500;
406 x257AA5F0 = x5A0F5A0F ^ x7F75FFFF;
407 x054885C0 = x257AA5F0 & ~x22332233;
408 xFAB77A3F = ~x054885C0;
409 x2221EDF3 = x3333FFFF & x6A21EDF3;
410 xD89697CC = xFAB77A3F ^ x2221EDF3;
411 x20 = x7FA7FF69 & ~a2;
412 x21 = x20 ^ xD89697CC;
415 x05B77AC0 = x00FFFF00 ^ x054885C0;
416 x05F77AD6 = x00D20096 | x05B77AC0;
417 x36C48529 = x3333FFFF ^ x05F77AD6;
418 x6391D07C = a1 ^ x36C48529;
419 xBB0747B0 = xD89697CC ^ x6391D07C;
420 x00 = x25202160 | a2;
421 x01 = x00 ^ xBB0747B0;
424 x4C460000 = x3333FFFF ^ x7F75FFFF;
425 x4EDF9996 = x0AD99996 | x4C460000;
426 x2D4E49EA = x6391D07C ^ x4EDF9996;
427 xBBFFFFB0 = x00FFFF00 | xBB0747B0;
428 x96B1B65A = x2D4E49EA ^ xBBFFFFB0;
429 x10 = x4A01CC93 | a2;
430 x11 = x10 ^ x96B1B65A;
433 x5AFF5AFF = a5 | x5A0F5A0F;
434 x52B11215 = x5AFF5AFF & ~x2D4E49EA;
435 x4201C010 = x4A01CC93 & x6391D07C;
436 x10B0D205 = x52B11215 ^ x4201C010;
437 x30 = x10B0D205 | a2;
438 x31 = x30 ^ x0AD99996;
442 __device__ static void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
445 u32 x55550000, x00AA00FF, x33BB33FF;
446 u32 x33CC0000, x11441144, x11BB11BB, x003311BB;
447 u32 x00000F0F, x336600FF, x332200FF, x332200F0;
448 u32 x0302000F, xAAAAAAAA, xA9A8AAA5, x33CCCC33, x33CCC030, x9A646A95;
449 u32 x00333303, x118822B8, xA8208805, x3CC3C33C, x94E34B39;
450 u32 x0331330C, x3FF3F33C, xA9DF596A, xA9DF5F6F, x962CAC53;
451 u32 xA9466A6A, x3DA52153, x29850143, x33C0330C, x1A45324F;
452 u32 x0A451047, xBBDFDD7B, xB19ACD3C;
453 u32 x00, x01, x10, x11, x20, x21, x30, x31;
457 x55550000 = a1 & ~a6;
458 x00AA00FF = a5 & ~x55550000;
459 x33BB33FF = a2 | x00AA00FF;
461 x33CC0000 = x33CC33CC & ~a6;
462 x11441144 = a1 & x33CC33CC;
463 x11BB11BB = a5 ^ x11441144;
464 x003311BB = x11BB11BB & ~x33CC0000;
467 x336600FF = x00AA00FF ^ x33CC0000;
468 x332200FF = x33BB33FF & x336600FF;
469 x332200F0 = x332200FF & ~x00000F0F;
471 x0302000F = a3 & x332200FF;
473 xA9A8AAA5 = x0302000F ^ xAAAAAAAA;
474 x33CCCC33 = a6 ^ x33CC33CC;
475 x33CCC030 = x33CCCC33 & ~x00000F0F;
476 x9A646A95 = xA9A8AAA5 ^ x33CCC030;
477 x10 = a4 & ~x332200F0;
478 x11 = x10 ^ x9A646A95;
481 x00333303 = a2 & ~x33CCC030;
482 x118822B8 = x11BB11BB ^ x00333303;
483 xA8208805 = xA9A8AAA5 & ~x118822B8;
484 x3CC3C33C = a3 ^ x33CCCC33;
485 x94E34B39 = xA8208805 ^ x3CC3C33C;
486 x00 = x33BB33FF & ~a4;
487 x01 = x00 ^ x94E34B39;
490 x0331330C = x0302000F ^ x00333303;
491 x3FF3F33C = x3CC3C33C | x0331330C;
492 xA9DF596A = x33BB33FF ^ x9A646A95;
493 xA9DF5F6F = x00000F0F | xA9DF596A;
494 x962CAC53 = x3FF3F33C ^ xA9DF5F6F;
496 xA9466A6A = x332200FF ^ x9A646A95;
497 x3DA52153 = x94E34B39 ^ xA9466A6A;
498 x29850143 = xA9DF5F6F & x3DA52153;
499 x33C0330C = x33CC33CC & x3FF3F33C;
500 x1A45324F = x29850143 ^ x33C0330C;
501 x20 = x1A45324F | a4;
502 x21 = x20 ^ x962CAC53;
505 x0A451047 = x1A45324F & ~x118822B8;
506 xBBDFDD7B = x33CCCC33 | xA9DF596A;
507 xB19ACD3C = x0A451047 ^ xBBDFDD7B;
508 x30 = x003311BB | a4;
509 x31 = x30 ^ xB19ACD3C;
513 __device__ static void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
515 u32 x44444444, x0F0FF0F0, x4F4FF4F4, x00FFFF00, x00AAAA00, x4FE55EF4;
516 u32 x3C3CC3C3, x3C3C0000, x7373F4F4, x0C840A00;
517 u32 x00005EF4, x00FF5EFF, x00555455, x3C699796;
518 u32 x000FF000, x55AA55AA, x26D9A15E, x2FDFAF5F, x2FD00F5F;
519 u32 x55AAFFAA, x28410014, x000000FF, x000000CC, x284100D8;
520 u32 x204100D0, x3C3CC3FF, x1C3CC32F, x4969967A;
521 u32 x4CC44CC4, x40C040C0, xC3C33C3C, x9669C396, xD6A98356;
522 u32 xD6E9C3D6, x4CEEEEC4, x9A072D12, x001A000B, x9A1F2D1B;
523 u32 x00, x01, x10, x11, x20, x21, x30, x31;
525 x44444444 = a1 & ~a2;
527 x4F4FF4F4 = x44444444 | x0F0FF0F0;
529 x00AAAA00 = x00FFFF00 & ~a1;
530 x4FE55EF4 = x4F4FF4F4 ^ x00AAAA00;
532 x3C3CC3C3 = a2 ^ x0F0FF0F0;
533 x3C3C0000 = x3C3CC3C3 & ~a6;
534 x7373F4F4 = x4F4FF4F4 ^ x3C3C0000;
535 x0C840A00 = x4FE55EF4 & ~x7373F4F4;
537 x00005EF4 = a6 & x4FE55EF4;
538 x00FF5EFF = a4 | x00005EF4;
539 x00555455 = a1 & x00FF5EFF;
540 x3C699796 = x3C3CC3C3 ^ x00555455;
541 x30 = x4FE55EF4 & ~a5;
542 x31 = x30 ^ x3C699796;
545 x000FF000 = x0F0FF0F0 & x00FFFF00;
547 x26D9A15E = x7373F4F4 ^ x55AA55AA;
548 x2FDFAF5F = a3 | x26D9A15E;
549 x2FD00F5F = x2FDFAF5F & ~x000FF000;
551 x55AAFFAA = x00AAAA00 | x55AA55AA;
552 x28410014 = x3C699796 & ~x55AAFFAA;
554 x000000CC = x000000FF & ~a2;
555 x284100D8 = x28410014 ^ x000000CC;
557 x204100D0 = x7373F4F4 & x284100D8;
558 x3C3CC3FF = x3C3CC3C3 | x000000FF;
559 x1C3CC32F = x3C3CC3FF & ~x204100D0;
560 x4969967A = a1 ^ x1C3CC32F;
561 x10 = x2FD00F5F & a5;
562 x11 = x10 ^ x4969967A;
565 x4CC44CC4 = x4FE55EF4 & ~a2;
566 x40C040C0 = x4CC44CC4 & ~a3;
567 xC3C33C3C = ~x3C3CC3C3;
568 x9669C396 = x55AAFFAA ^ xC3C33C3C;
569 xD6A98356 = x40C040C0 ^ x9669C396;
570 x00 = a5 & ~x0C840A00;
571 x01 = x00 ^ xD6A98356;
574 xD6E9C3D6 = x40C040C0 | x9669C396;
575 x4CEEEEC4 = x00AAAA00 | x4CC44CC4;
576 x9A072D12 = xD6E9C3D6 ^ x4CEEEEC4;
577 x001A000B = a4 & ~x4FE55EF4;
578 x9A1F2D1B = x9A072D12 | x001A000B;
579 x20 = a5 & ~x284100D8;
580 x21 = x20 ^ x9A1F2D1B;
584 __device__ static void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
586 u32 x5A5A5A5A, x0F0FF0F0;
587 u32 x33FF33FF, x33FFCC00, x0C0030F0, x0C0CC0C0, x0CF3C03F, x5EFBDA7F,
588 x52FBCA0F, x61C8F93C;
589 u32 x00C0C03C, x0F0F30C0, x3B92A366, x30908326, x3C90B3D6;
590 u32 x33CC33CC, x0C0CFFFF, x379E5C99, x04124C11, x56E9861E, xA91679E1;
591 u32 x9586CA37, x8402C833, x84C2C83F, xB35C94A6;
592 u32 x00, x01, x10, x11, x20, x21, x30, x31;
597 x33FFCC00 = a5 ^ x33FF33FF;
598 x0C0030F0 = x0F0FF0F0 & ~x33FFCC00;
599 x0C0CC0C0 = x0F0FF0F0 & ~a2;
600 x0CF3C03F = a4 ^ x0C0CC0C0;
601 x5EFBDA7F = x5A5A5A5A | x0CF3C03F;
602 x52FBCA0F = x5EFBDA7F & ~x0C0030F0;
603 x61C8F93C = a2 ^ x52FBCA0F;
605 x00C0C03C = x0CF3C03F & x61C8F93C;
606 x0F0F30C0 = x0F0FF0F0 & ~x00C0C03C;
607 x3B92A366 = x5A5A5A5A ^ x61C8F93C;
608 x30908326 = x3B92A366 & ~x0F0F30C0;
609 x3C90B3D6 = x0C0030F0 ^ x30908326;
612 x0C0CFFFF = a5 | x0C0CC0C0;
613 x379E5C99 = x3B92A366 ^ x0C0CFFFF;
614 x04124C11 = x379E5C99 & ~x33CC33CC;
615 x56E9861E = x52FBCA0F ^ x04124C11;
616 x00 = a6 & ~x3C90B3D6;
617 x01 = x00 ^ x56E9861E;
620 xA91679E1 = ~x56E9861E;
621 x10 = x3C90B3D6 & ~a6;
622 x11 = x10 ^ xA91679E1;
625 x9586CA37 = x3C90B3D6 ^ xA91679E1;
626 x8402C833 = x9586CA37 & ~x33CC33CC;
627 x84C2C83F = x00C0C03C | x8402C833;
628 xB35C94A6 = x379E5C99 ^ x84C2C83F;
629 x20 = x61C8F93C | a6;
630 x21 = x20 ^ xB35C94A6;
633 x30 = a6 & x61C8F93C;
634 x31 = x30 ^ xB35C94A6;
638 __device__ static void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
640 u32 x77777777, x77770000, x22225555, x11116666, x1F1F6F6F;
641 u32 x70700000, x43433333, x00430033, x55557777, x55167744, x5A19784B;
642 u32 x5A1987B4, x7A3BD7F5, x003B00F5, x221955A0, x05050707, x271C52A7;
643 u32 x2A2A82A0, x6969B193, x1FE06F90, x16804E00, xE97FB1FF;
644 u32 x43403302, x35CAED30, x37DEFFB7, x349ECCB5, x0B01234A;
645 u32 x101884B4, x0FF8EB24, x41413333, x4FF9FB37, x4FC2FBC2;
646 u32 x22222222, x16BCEE97, x0F080B04, x19B4E593;
647 u32 x5C5C5C5C, x4448184C, x2DDABE71, x6992A63D;
648 u32 x00, x01, x10, x11, x20, x21, x30, x31;
651 x77770000 = x77777777 & ~a6;
652 x22225555 = a1 ^ x77770000;
653 x11116666 = a3 ^ x22225555;
654 x1F1F6F6F = a4 | x11116666;
656 x70700000 = x77770000 & ~a4;
657 x43433333 = a3 ^ x70700000;
658 x00430033 = a5 & x43433333;
659 x55557777 = a1 | x11116666;
660 x55167744 = x00430033 ^ x55557777;
661 x5A19784B = a4 ^ x55167744;
663 x5A1987B4 = a6 ^ x5A19784B;
664 x7A3BD7F5 = x22225555 | x5A1987B4;
665 x003B00F5 = a5 & x7A3BD7F5;
666 x221955A0 = x22225555 ^ x003B00F5;
667 x05050707 = a4 & x55557777;
668 x271C52A7 = x221955A0 ^ x05050707;
670 x2A2A82A0 = x7A3BD7F5 & ~a1;
671 x6969B193 = x43433333 ^ x2A2A82A0;
672 x1FE06F90 = a5 ^ x1F1F6F6F;
673 x16804E00 = x1FE06F90 & ~x6969B193;
674 xE97FB1FF = ~x16804E00;
675 x20 = xE97FB1FF & ~a2;
676 x21 = x20 ^ x5A19784B;
679 x43403302 = x43433333 & ~x003B00F5;
680 x35CAED30 = x2A2A82A0 ^ x1FE06F90;
681 x37DEFFB7 = x271C52A7 | x35CAED30;
682 x349ECCB5 = x37DEFFB7 & ~x43403302;
683 x0B01234A = x1F1F6F6F & ~x349ECCB5;
685 x101884B4 = x5A1987B4 & x349ECCB5;
686 x0FF8EB24 = x1FE06F90 ^ x101884B4;
687 x41413333 = x43433333 & x55557777;
688 x4FF9FB37 = x0FF8EB24 | x41413333;
689 x4FC2FBC2 = x003B00F5 ^ x4FF9FB37;
690 x30 = x4FC2FBC2 & a2;
691 x31 = x30 ^ x271C52A7;
694 x22222222 = a1 ^ x77777777;
695 x16BCEE97 = x349ECCB5 ^ x22222222;
696 x0F080B04 = a4 & x0FF8EB24;
697 x19B4E593 = x16BCEE97 ^ x0F080B04;
698 x00 = x0B01234A | a2;
699 x01 = x00 ^ x19B4E593;
702 x5C5C5C5C = x1F1F6F6F ^ x43433333;
703 x4448184C = x5C5C5C5C & ~x19B4E593;
704 x2DDABE71 = x22225555 ^ x0FF8EB24;
705 x6992A63D = x4448184C ^ x2DDABE71;
706 x10 = x1F1F6F6F & a2;
707 x11 = x10 ^ x6992A63D;
711 __device__ static void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
714 u32 x3333FFFF, x11115555, x22DD6699, x22DD9966, x00220099;
715 u32 x00551144, x33662277, x5A5A5A5A, x7B7E7A7F, x59A31CE6;
716 u32 x09030C06, x09030000, x336622FF, x3A6522FF;
717 u32 x484D494C, x0000B6B3, x0F0FB9BC, x00FC00F9, x0FFFB9FD;
718 u32 x5DF75DF7, x116600F7, x1E69B94B, x1668B94B;
719 u32 x7B7B7B7B, x411E5984, x1FFFFDFD, x5EE1A479;
720 u32 x3CB4DFD2, x004B002D, xB7B2B6B3, xCCC9CDC8, xCC82CDE5;
721 u32 x0055EEBB, x5A5AECE9, x0050ECA9, xC5CAC1CE, xC59A2D67;
722 u32 x00, x01, x10, x11, x20, x21, x30, x31;
727 x11115555 = a1 & x3333FFFF;
728 x22DD6699 = x33CC33CC ^ x11115555;
729 x22DD9966 = a6 ^ x22DD6699;
730 x00220099 = a5 & ~x22DD9966;
732 x00551144 = a1 & x22DD9966;
733 x33662277 = a2 ^ x00551144;
735 x7B7E7A7F = x33662277 | x5A5A5A5A;
736 x59A31CE6 = x22DD6699 ^ x7B7E7A7F;
738 x09030C06 = a3 & x59A31CE6;
739 x09030000 = x09030C06 & ~a6;
740 x336622FF = x00220099 | x33662277;
741 x3A6522FF = x09030000 ^ x336622FF;
742 x30 = x3A6522FF & a4;
743 x31 = x30 ^ x59A31CE6;
746 x484D494C = a2 ^ x7B7E7A7F;
747 x0000B6B3 = a6 & ~x484D494C;
748 x0F0FB9BC = a3 ^ x0000B6B3;
749 x00FC00F9 = a5 & ~x09030C06;
750 x0FFFB9FD = x0F0FB9BC | x00FC00F9;
752 x5DF75DF7 = a1 | x59A31CE6;
753 x116600F7 = x336622FF & x5DF75DF7;
754 x1E69B94B = x0F0FB9BC ^ x116600F7;
755 x1668B94B = x1E69B94B & ~x09030000;
756 x20 = x00220099 | a4;
757 x21 = x20 ^ x1668B94B;
760 x7B7B7B7B = a2 | x5A5A5A5A;
761 x411E5984 = x3A6522FF ^ x7B7B7B7B;
762 x1FFFFDFD = x11115555 | x0FFFB9FD;
763 x5EE1A479 = x411E5984 ^ x1FFFFDFD;
765 x3CB4DFD2 = x22DD6699 ^ x1E69B94B;
766 x004B002D = a5 & ~x3CB4DFD2;
767 xB7B2B6B3 = ~x484D494C;
768 xCCC9CDC8 = x7B7B7B7B ^ xB7B2B6B3;
769 xCC82CDE5 = x004B002D ^ xCCC9CDC8;
770 x10 = xCC82CDE5 & ~a4;
771 x11 = x10 ^ x5EE1A479;
774 x0055EEBB = a6 ^ x00551144;
775 x5A5AECE9 = a1 ^ x0F0FB9BC;
776 x0050ECA9 = x0055EEBB & x5A5AECE9;
777 xC5CAC1CE = x09030C06 ^ xCCC9CDC8;
778 xC59A2D67 = x0050ECA9 ^ xC5CAC1CE;
779 x00 = x0FFFB9FD & ~a4;
780 x01 = x00 ^ xC59A2D67;
784 __device__ static void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
786 u32 x0FF00FF0, x3CC33CC3, x00003CC3, x0F000F00, x5A555A55, x00001841;
787 u32 x00000F00, x33333C33, x7B777E77, x0FF0F00F, x74878E78;
788 u32 x003C003C, x5A7D5A7D, x333300F0, x694E5A8D;
789 u32 x0FF0CCCC, x000F0303, x5A505854, x33CC000F, x699C585B;
790 u32 x7F878F78, x21101013, x7F979F7B, x30030CC0, x4F9493BB;
791 u32 x6F9CDBFB, x0000DBFB, x00005151, x26DAC936, x26DA9867;
792 u32 x27DA9877, x27DA438C, x2625C9C9, x27FFCBCD;
793 u32 x27FF1036, x27FF103E, xB06B6C44, x97947C7A;
794 u32 x00, x01, x10, x11, x20, x21, x30, x31;
797 x3CC33CC3 = a3 ^ x0FF00FF0;
798 x00003CC3 = a6 & x3CC33CC3;
799 x0F000F00 = a4 & x0FF00FF0;
800 x5A555A55 = a2 ^ x0F000F00;
801 x00001841 = x00003CC3 & x5A555A55;
803 x00000F00 = a6 & x0F000F00;
804 x33333C33 = a3 ^ x00000F00;
805 x7B777E77 = x5A555A55 | x33333C33;
806 x0FF0F00F = a6 ^ x0FF00FF0;
807 x74878E78 = x7B777E77 ^ x0FF0F00F;
808 x30 = a1 & ~x00001841;
809 x31 = x30 ^ x74878E78;
812 x003C003C = a5 & ~x3CC33CC3;
813 x5A7D5A7D = x5A555A55 | x003C003C;
814 x333300F0 = x00003CC3 ^ x33333C33;
815 x694E5A8D = x5A7D5A7D ^ x333300F0;
817 x0FF0CCCC = x00003CC3 ^ x0FF0F00F;
818 x000F0303 = a4 & ~x0FF0CCCC;
819 x5A505854 = x5A555A55 & ~x000F0303;
820 x33CC000F = a5 ^ x333300F0;
821 x699C585B = x5A505854 ^ x33CC000F;
823 x7F878F78 = x0F000F00 | x74878E78;
824 x21101013 = a3 & x699C585B;
825 x7F979F7B = x7F878F78 | x21101013;
826 x30030CC0 = x3CC33CC3 & ~x0FF0F00F;
827 x4F9493BB = x7F979F7B ^ x30030CC0;
828 x00 = x4F9493BB & ~a1;
829 x01 = x00 ^ x694E5A8D;
832 x6F9CDBFB = x699C585B | x4F9493BB;
833 x0000DBFB = a6 & x6F9CDBFB;
834 x00005151 = a2 & x0000DBFB;
835 x26DAC936 = x694E5A8D ^ x4F9493BB;
836 x26DA9867 = x00005151 ^ x26DAC936;
838 x27DA9877 = x21101013 | x26DA9867;
839 x27DA438C = x0000DBFB ^ x27DA9877;
840 x2625C9C9 = a5 ^ x26DAC936;
841 x27FFCBCD = x27DA438C | x2625C9C9;
842 x20 = x27FFCBCD & a1;
843 x21 = x20 ^ x699C585B;
846 x27FF1036 = x0000DBFB ^ x27FFCBCD;
847 x27FF103E = x003C003C | x27FF1036;
848 xB06B6C44 = ~x4F9493BB;
849 x97947C7A = x27FF103E ^ xB06B6C44;
850 x10 = x97947C7A & ~a1;
851 x11 = x10 ^ x26DA9867;
855 __device__ static void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
857 u32 x0C0C0C0C, x0000F0F0, x00FFF00F, x00555005, x00515001;
858 u32 x33000330, x77555775, x30303030, x3030CFCF, x30104745, x30555745;
859 u32 xFF000FF0, xCF1048B5, x080A080A, xC71A40BF, xCB164CB3;
860 u32 x9E4319E6, x000019E6, xF429738C, xF4296A6A, xC729695A;
861 u32 xC47C3D2F, xF77F3F3F, x9E43E619, x693CD926;
862 u32 xF719A695, xF4FF73FF, x03E6D56A, x56B3803F;
863 u32 xF700A600, x61008000, x03B7856B, x62B7056B;
864 u32 x00, x01, x10, x11, x20, x21, x30, x31;
866 x0C0C0C0C = a3 & ~a2;
867 x0000F0F0 = a5 & ~a3;
868 x00FFF00F = a4 ^ x0000F0F0;
869 x00555005 = a1 & x00FFF00F;
870 x00515001 = x00555005 & ~x0C0C0C0C;
872 x33000330 = a2 & ~x00FFF00F;
873 x77555775 = a1 | x33000330;
874 x30303030 = a2 & ~a3;
875 x3030CFCF = a5 ^ x30303030;
876 x30104745 = x77555775 & x3030CFCF;
877 x30555745 = x00555005 | x30104745;
879 xFF000FF0 = ~x00FFF00F;
880 xCF1048B5 = x30104745 ^ xFF000FF0;
881 x080A080A = a3 & ~x77555775;
882 xC71A40BF = xCF1048B5 ^ x080A080A;
883 xCB164CB3 = x0C0C0C0C ^ xC71A40BF;
884 x10 = x00515001 | a6;
885 x11 = x10 ^ xCB164CB3;
888 x9E4319E6 = a1 ^ xCB164CB3;
889 x000019E6 = a5 & x9E4319E6;
890 xF429738C = a2 ^ xC71A40BF;
891 xF4296A6A = x000019E6 ^ xF429738C;
892 xC729695A = x33000330 ^ xF4296A6A;
894 xC47C3D2F = x30555745 ^ xF4296A6A;
895 xF77F3F3F = a2 | xC47C3D2F;
896 x9E43E619 = a5 ^ x9E4319E6;
897 x693CD926 = xF77F3F3F ^ x9E43E619;
898 x20 = x30555745 & a6;
899 x21 = x20 ^ x693CD926;
902 xF719A695 = x3030CFCF ^ xC729695A;
903 xF4FF73FF = a4 | xF429738C;
904 x03E6D56A = xF719A695 ^ xF4FF73FF;
905 x56B3803F = a1 ^ x03E6D56A;
906 x30 = x56B3803F & a6;
907 x31 = x30 ^ xC729695A;
910 xF700A600 = xF719A695 & ~a4;
911 x61008000 = x693CD926 & xF700A600;
912 x03B7856B = x00515001 ^ x03E6D56A;
913 x62B7056B = x61008000 ^ x03B7856B;
914 x00 = x62B7056B | a6;
915 x01 = x00 ^ xC729695A;
921 #define KEYSET00 { k00 = K08; k01 = K44; k02 = K29; k03 = K52; k04 = K42; k05 = K14; k06 = K28; k07 = K49; k08 = K01; k09 = K07; k10 = K16; k11 = K36; k12 = K02; k13 = K30; k14 = K22; k15 = K21; k16 = K38; k17 = K50; k18 = K51; k19 = K00; k20 = K31; k21 = K23; k22 = K15; k23 = K35; k24 = K19; k25 = K24; k26 = K34; k27 = K47; k28 = K32; k29 = K03; k30 = K41; k31 = K26; k32 = K04; k33 = K46; k34 = K20; k35 = K25; k36 = K53; k37 = K18; k38 = K33; k39 = K55; k40 = K13; k41 = K17; k42 = K39; k43 = K12; k44 = K11; k45 = K54; k46 = K48; k47 = K27; }
922 #define KEYSET10 { k00 = K49; k01 = K28; k02 = K45; k03 = K36; k04 = K01; k05 = K30; k06 = K44; k07 = K08; k08 = K42; k09 = K23; k10 = K00; k11 = K52; k12 = K43; k13 = K14; k14 = K38; k15 = K37; k16 = K22; k17 = K09; k18 = K35; k19 = K16; k20 = K15; k21 = K07; k22 = K31; k23 = K51; k24 = K03; k25 = K40; k26 = K46; k27 = K04; k28 = K20; k29 = K19; k30 = K53; k31 = K10; k32 = K47; k33 = K34; k34 = K32; k35 = K13; k36 = K41; k37 = K06; k38 = K17; k39 = K12; k40 = K25; k41 = K33; k42 = K27; k43 = K55; k44 = K54; k45 = K11; k46 = K05; k47 = K39; }
923 #define KEYSET01 { k00 = K01; k01 = K37; k02 = K22; k03 = K45; k04 = K35; k05 = K07; k06 = K21; k07 = K42; k08 = K51; k09 = K00; k10 = K09; k11 = K29; k12 = K52; k13 = K23; k14 = K15; k15 = K14; k16 = K31; k17 = K43; k18 = K44; k19 = K50; k20 = K49; k21 = K16; k22 = K08; k23 = K28; k24 = K12; k25 = K17; k26 = K27; k27 = K40; k28 = K25; k29 = K55; k30 = K34; k31 = K19; k32 = K24; k33 = K39; k34 = K13; k35 = K18; k36 = K46; k37 = K11; k38 = K26; k39 = K48; k40 = K06; k41 = K10; k42 = K32; k43 = K05; k44 = K04; k45 = K47; k46 = K41; k47 = K20; }
924 #define KEYSET11 { k00 = K35; k01 = K14; k02 = K31; k03 = K22; k04 = K44; k05 = K16; k06 = K30; k07 = K51; k08 = K28; k09 = K09; k10 = K43; k11 = K38; k12 = K29; k13 = K00; k14 = K49; k15 = K23; k16 = K08; k17 = K52; k18 = K21; k19 = K02; k20 = K01; k21 = K50; k22 = K42; k23 = K37; k24 = K48; k25 = K26; k26 = K32; k27 = K17; k28 = K06; k29 = K05; k30 = K39; k31 = K55; k32 = K33; k33 = K20; k34 = K18; k35 = K54; k36 = K27; k37 = K47; k38 = K03; k39 = K53; k40 = K11; k41 = K19; k42 = K13; k43 = K41; k44 = K40; k45 = K24; k46 = K46; k47 = K25; }
925 #define KEYSET02 { k00 = K44; k01 = K23; k02 = K08; k03 = K31; k04 = K21; k05 = K50; k06 = K07; k07 = K28; k08 = K37; k09 = K43; k10 = K52; k11 = K15; k12 = K38; k13 = K09; k14 = K01; k15 = K00; k16 = K42; k17 = K29; k18 = K30; k19 = K36; k20 = K35; k21 = K02; k22 = K51; k23 = K14; k24 = K53; k25 = K03; k26 = K13; k27 = K26; k28 = K11; k29 = K41; k30 = K20; k31 = K05; k32 = K10; k33 = K25; k34 = K54; k35 = K04; k36 = K32; k37 = K24; k38 = K12; k39 = K34; k40 = K47; k41 = K55; k42 = K18; k43 = K46; k44 = K17; k45 = K33; k46 = K27; k47 = K06; }
926 #define KEYSET12 { k00 = K21; k01 = K00; k02 = K42; k03 = K08; k04 = K30; k05 = K02; k06 = K16; k07 = K37; k08 = K14; k09 = K52; k10 = K29; k11 = K49; k12 = K15; k13 = K43; k14 = K35; k15 = K09; k16 = K51; k17 = K38; k18 = K07; k19 = K45; k20 = K44; k21 = K36; k22 = K28; k23 = K23; k24 = K34; k25 = K12; k26 = K18; k27 = K03; k28 = K47; k29 = K46; k30 = K25; k31 = K41; k32 = K19; k33 = K06; k34 = K04; k35 = K40; k36 = K13; k37 = K33; k38 = K48; k39 = K39; k40 = K24; k41 = K05; k42 = K54; k43 = K27; k44 = K26; k45 = K10; k46 = K32; k47 = K11; }
927 #define KEYSET03 { k00 = K30; k01 = K09; k02 = K51; k03 = K42; k04 = K07; k05 = K36; k06 = K50; k07 = K14; k08 = K23; k09 = K29; k10 = K38; k11 = K01; k12 = K49; k13 = K52; k14 = K44; k15 = K43; k16 = K28; k17 = K15; k18 = K16; k19 = K22; k20 = K21; k21 = K45; k22 = K37; k23 = K00; k24 = K39; k25 = K48; k26 = K54; k27 = K12; k28 = K24; k29 = K27; k30 = K06; k31 = K46; k32 = K55; k33 = K11; k34 = K40; k35 = K17; k36 = K18; k37 = K10; k38 = K53; k39 = K20; k40 = K33; k41 = K41; k42 = K04; k43 = K32; k44 = K03; k45 = K19; k46 = K13; k47 = K47; }
928 #define KEYSET13 { k00 = K07; k01 = K43; k02 = K28; k03 = K51; k04 = K16; k05 = K45; k06 = K02; k07 = K23; k08 = K00; k09 = K38; k10 = K15; k11 = K35; k12 = K01; k13 = K29; k14 = K21; k15 = K52; k16 = K37; k17 = K49; k18 = K50; k19 = K31; k20 = K30; k21 = K22; k22 = K14; k23 = K09; k24 = K20; k25 = K53; k26 = K04; k27 = K48; k28 = K33; k29 = K32; k30 = K11; k31 = K27; k32 = K05; k33 = K47; k34 = K17; k35 = K26; k36 = K54; k37 = K19; k38 = K34; k39 = K25; k40 = K10; k41 = K46; k42 = K40; k43 = K13; k44 = K12; k45 = K55; k46 = K18; k47 = K24; }
929 #define KEYSET04 { k00 = K16; k01 = K52; k02 = K37; k03 = K28; k04 = K50; k05 = K22; k06 = K36; k07 = K00; k08 = K09; k09 = K15; k10 = K49; k11 = K44; k12 = K35; k13 = K38; k14 = K30; k15 = K29; k16 = K14; k17 = K01; k18 = K02; k19 = K08; k20 = K07; k21 = K31; k22 = K23; k23 = K43; k24 = K25; k25 = K34; k26 = K40; k27 = K53; k28 = K10; k29 = K13; k30 = K47; k31 = K32; k32 = K41; k33 = K24; k34 = K26; k35 = K03; k36 = K04; k37 = K55; k38 = K39; k39 = K06; k40 = K19; k41 = K27; k42 = K17; k43 = K18; k44 = K48; k45 = K05; k46 = K54; k47 = K33; }
930 #define KEYSET14 { k00 = K50; k01 = K29; k02 = K14; k03 = K37; k04 = K02; k05 = K31; k06 = K45; k07 = K09; k08 = K43; k09 = K49; k10 = K01; k11 = K21; k12 = K44; k13 = K15; k14 = K07; k15 = K38; k16 = K23; k17 = K35; k18 = K36; k19 = K42; k20 = K16; k21 = K08; k22 = K00; k23 = K52; k24 = K06; k25 = K39; k26 = K17; k27 = K34; k28 = K19; k29 = K18; k30 = K24; k31 = K13; k32 = K46; k33 = K33; k34 = K03; k35 = K12; k36 = K40; k37 = K05; k38 = K20; k39 = K11; k40 = K55; k41 = K32; k42 = K26; k43 = K54; k44 = K53; k45 = K41; k46 = K04; k47 = K10; }
931 #define KEYSET05 { k00 = K02; k01 = K38; k02 = K23; k03 = K14; k04 = K36; k05 = K08; k06 = K22; k07 = K43; k08 = K52; k09 = K01; k10 = K35; k11 = K30; k12 = K21; k13 = K49; k14 = K16; k15 = K15; k16 = K00; k17 = K44; k18 = K45; k19 = K51; k20 = K50; k21 = K42; k22 = K09; k23 = K29; k24 = K11; k25 = K20; k26 = K26; k27 = K39; k28 = K55; k29 = K54; k30 = K33; k31 = K18; k32 = K27; k33 = K10; k34 = K12; k35 = K48; k36 = K17; k37 = K41; k38 = K25; k39 = K47; k40 = K05; k41 = K13; k42 = K03; k43 = K04; k44 = K34; k45 = K46; k46 = K40; k47 = K19; }
932 #define KEYSET15 { k00 = K36; k01 = K15; k02 = K00; k03 = K23; k04 = K45; k05 = K42; k06 = K31; k07 = K52; k08 = K29; k09 = K35; k10 = K44; k11 = K07; k12 = K30; k13 = K01; k14 = K50; k15 = K49; k16 = K09; k17 = K21; k18 = K22; k19 = K28; k20 = K02; k21 = K51; k22 = K43; k23 = K38; k24 = K47; k25 = K25; k26 = K03; k27 = K20; k28 = K05; k29 = K04; k30 = K10; k31 = K54; k32 = K32; k33 = K19; k34 = K48; k35 = K53; k36 = K26; k37 = K46; k38 = K06; k39 = K24; k40 = K41; k41 = K18; k42 = K12; k43 = K40; k44 = K39; k45 = K27; k46 = K17; k47 = K55; }
933 #define KEYSET06 { k00 = K45; k01 = K49; k02 = K09; k03 = K00; k04 = K22; k05 = K51; k06 = K08; k07 = K29; k08 = K38; k09 = K44; k10 = K21; k11 = K16; k12 = K07; k13 = K35; k14 = K02; k15 = K01; k16 = K43; k17 = K30; k18 = K31; k19 = K37; k20 = K36; k21 = K28; k22 = K52; k23 = K15; k24 = K24; k25 = K06; k26 = K12; k27 = K25; k28 = K41; k29 = K40; k30 = K19; k31 = K04; k32 = K13; k33 = K55; k34 = K53; k35 = K34; k36 = K03; k37 = K27; k38 = K11; k39 = K33; k40 = K46; k41 = K54; k42 = K48; k43 = K17; k44 = K20; k45 = K32; k46 = K26; k47 = K05; }
934 #define KEYSET16 { k00 = K22; k01 = K01; k02 = K43; k03 = K09; k04 = K31; k05 = K28; k06 = K42; k07 = K38; k08 = K15; k09 = K21; k10 = K30; k11 = K50; k12 = K16; k13 = K44; k14 = K36; k15 = K35; k16 = K52; k17 = K07; k18 = K08; k19 = K14; k20 = K45; k21 = K37; k22 = K29; k23 = K49; k24 = K33; k25 = K11; k26 = K48; k27 = K06; k28 = K46; k29 = K17; k30 = K55; k31 = K40; k32 = K18; k33 = K05; k34 = K34; k35 = K39; k36 = K12; k37 = K32; k38 = K47; k39 = K10; k40 = K27; k41 = K04; k42 = K53; k43 = K26; k44 = K25; k45 = K13; k46 = K03; k47 = K41; }
935 #define KEYSET07 { k00 = K31; k01 = K35; k02 = K52; k03 = K43; k04 = K08; k05 = K37; k06 = K51; k07 = K15; k08 = K49; k09 = K30; k10 = K07; k11 = K02; k12 = K50; k13 = K21; k14 = K45; k15 = K44; k16 = K29; k17 = K16; k18 = K42; k19 = K23; k20 = K22; k21 = K14; k22 = K38; k23 = K01; k24 = K10; k25 = K47; k26 = K53; k27 = K11; k28 = K27; k29 = K26; k30 = K05; k31 = K17; k32 = K54; k33 = K41; k34 = K39; k35 = K20; k36 = K48; k37 = K13; k38 = K24; k39 = K19; k40 = K32; k41 = K40; k42 = K34; k43 = K03; k44 = K06; k45 = K18; k46 = K12; k47 = K46; }
936 #define KEYSET17 { k00 = K15; k01 = K51; k02 = K36; k03 = K02; k04 = K49; k05 = K21; k06 = K35; k07 = K31; k08 = K08; k09 = K14; k10 = K23; k11 = K43; k12 = K09; k13 = K37; k14 = K29; k15 = K28; k16 = K45; k17 = K00; k18 = K01; k19 = K07; k20 = K38; k21 = K30; k22 = K22; k23 = K42; k24 = K26; k25 = K04; k26 = K41; k27 = K54; k28 = K39; k29 = K10; k30 = K48; k31 = K33; k32 = K11; k33 = K53; k34 = K27; k35 = K32; k36 = K05; k37 = K25; k38 = K40; k39 = K03; k40 = K20; k41 = K24; k42 = K46; k43 = K19; k44 = K18; k45 = K06; k46 = K55; k47 = K34; }
938 __device__ static void DES (const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 &D00, u32 &D01, u32 &D02, u32 &D03, u32 &D04, u32 &D05, u32 &D06, u32 &D07, u32 &D08, u32 &D09, u32 &D10, u32 &D11, u32 &D12, u32 &D13, u32 &D14, u32 &D15, u32 &D16, u32 &D17, u32 &D18, u32 &D19, u32 &D20, u32 &D21, u32 &D22, u32 &D23, u32 &D24, u32 &D25, u32 &D26, u32 &D27, u32 &D28, u32 &D29, u32 &D30, u32 &D31, u32 &D32, u32 &D33, u32 &D34, u32 &D35, u32 &D36, u32 &D37, u32 &D38, u32 &D39, u32 &D40, u32 &D41, u32 &D42, u32 &D43, u32 &D44, u32 &D45, u32 &D46, u32 &D47, u32 &D48, u32 &D49, u32 &D50, u32 &D51, u32 &D52, u32 &D53, u32 &D54, u32 &D55, u32 &D56, u32 &D57, u32 &D58, u32 &D59, u32 &D60, u32 &D61, u32 &D62, u32 &D63)
940 KXX_DECL u32 k00, k01, k02, k03, k04, k05;
941 KXX_DECL u32 k06, k07, k08, k09, k10, k11;
942 KXX_DECL u32 k12, k13, k14, k15, k16, k17;
943 KXX_DECL u32 k18, k19, k20, k21, k22, k23;
944 KXX_DECL u32 k24, k25, k26, k27, k28, k29;
945 KXX_DECL u32 k30, k31, k32, k33, k34, k35;
946 KXX_DECL u32 k36, k37, k38, k39, k40, k41;
947 KXX_DECL u32 k42, k43, k44, k45, k46, k47;
951 #if __CUDA_ARCH__ >= 500
957 for (u32 i = 0; i < 2; i++)
959 if (i) KEYSET10 else KEYSET00
961 s1(D63 ^ k00, D32 ^ k01, D33 ^ k02, D34 ^ k03, D35 ^ k04, D36 ^ k05, &D08, &D16, &D22, &D30);
962 s2(D35 ^ k06, D36 ^ k07, D37 ^ k08, D38 ^ k09, D39 ^ k10, D40 ^ k11, &D12, &D27, &D01, &D17);
963 s3(D39 ^ k12, D40 ^ k13, D41 ^ k14, D42 ^ k15, D43 ^ k16, D44 ^ k17, &D23, &D15, &D29, &D05);
964 s4(D43 ^ k18, D44 ^ k19, D45 ^ k20, D46 ^ k21, D47 ^ k22, D48 ^ k23, &D25, &D19, &D09, &D00);
965 s5(D47 ^ k24, D48 ^ k25, D49 ^ k26, D50 ^ k27, D51 ^ k28, D52 ^ k29, &D07, &D13, &D24, &D02);
966 s6(D51 ^ k30, D52 ^ k31, D53 ^ k32, D54 ^ k33, D55 ^ k34, D56 ^ k35, &D03, &D28, &D10, &D18);
967 s7(D55 ^ k36, D56 ^ k37, D57 ^ k38, D58 ^ k39, D59 ^ k40, D60 ^ k41, &D31, &D11, &D21, &D06);
968 s8(D59 ^ k42, D60 ^ k43, D61 ^ k44, D62 ^ k45, D63 ^ k46, D32 ^ k47, &D04, &D26, &D14, &D20);
970 if (i) KEYSET11 else KEYSET01
972 s1(D31 ^ k00, D00 ^ k01, D01 ^ k02, D02 ^ k03, D03 ^ k04, D04 ^ k05, &D40, &D48, &D54, &D62);
973 s2(D03 ^ k06, D04 ^ k07, D05 ^ k08, D06 ^ k09, D07 ^ k10, D08 ^ k11, &D44, &D59, &D33, &D49);
974 s3(D07 ^ k12, D08 ^ k13, D09 ^ k14, D10 ^ k15, D11 ^ k16, D12 ^ k17, &D55, &D47, &D61, &D37);
975 s4(D11 ^ k18, D12 ^ k19, D13 ^ k20, D14 ^ k21, D15 ^ k22, D16 ^ k23, &D57, &D51, &D41, &D32);
976 s5(D15 ^ k24, D16 ^ k25, D17 ^ k26, D18 ^ k27, D19 ^ k28, D20 ^ k29, &D39, &D45, &D56, &D34);
977 s6(D19 ^ k30, D20 ^ k31, D21 ^ k32, D22 ^ k33, D23 ^ k34, D24 ^ k35, &D35, &D60, &D42, &D50);
978 s7(D23 ^ k36, D24 ^ k37, D25 ^ k38, D26 ^ k39, D27 ^ k40, D28 ^ k41, &D63, &D43, &D53, &D38);
979 s8(D27 ^ k42, D28 ^ k43, D29 ^ k44, D30 ^ k45, D31 ^ k46, D00 ^ k47, &D36, &D58, &D46, &D52);
981 if (i) KEYSET12 else KEYSET02
983 s1(D63 ^ k00, D32 ^ k01, D33 ^ k02, D34 ^ k03, D35 ^ k04, D36 ^ k05, &D08, &D16, &D22, &D30);
984 s2(D35 ^ k06, D36 ^ k07, D37 ^ k08, D38 ^ k09, D39 ^ k10, D40 ^ k11, &D12, &D27, &D01, &D17);
985 s3(D39 ^ k12, D40 ^ k13, D41 ^ k14, D42 ^ k15, D43 ^ k16, D44 ^ k17, &D23, &D15, &D29, &D05);
986 s4(D43 ^ k18, D44 ^ k19, D45 ^ k20, D46 ^ k21, D47 ^ k22, D48 ^ k23, &D25, &D19, &D09, &D00);
987 s5(D47 ^ k24, D48 ^ k25, D49 ^ k26, D50 ^ k27, D51 ^ k28, D52 ^ k29, &D07, &D13, &D24, &D02);
988 s6(D51 ^ k30, D52 ^ k31, D53 ^ k32, D54 ^ k33, D55 ^ k34, D56 ^ k35, &D03, &D28, &D10, &D18);
989 s7(D55 ^ k36, D56 ^ k37, D57 ^ k38, D58 ^ k39, D59 ^ k40, D60 ^ k41, &D31, &D11, &D21, &D06);
990 s8(D59 ^ k42, D60 ^ k43, D61 ^ k44, D62 ^ k45, D63 ^ k46, D32 ^ k47, &D04, &D26, &D14, &D20);
992 if (i) KEYSET13 else KEYSET03
994 s1(D31 ^ k00, D00 ^ k01, D01 ^ k02, D02 ^ k03, D03 ^ k04, D04 ^ k05, &D40, &D48, &D54, &D62);
995 s2(D03 ^ k06, D04 ^ k07, D05 ^ k08, D06 ^ k09, D07 ^ k10, D08 ^ k11, &D44, &D59, &D33, &D49);
996 s3(D07 ^ k12, D08 ^ k13, D09 ^ k14, D10 ^ k15, D11 ^ k16, D12 ^ k17, &D55, &D47, &D61, &D37);
997 s4(D11 ^ k18, D12 ^ k19, D13 ^ k20, D14 ^ k21, D15 ^ k22, D16 ^ k23, &D57, &D51, &D41, &D32);
998 s5(D15 ^ k24, D16 ^ k25, D17 ^ k26, D18 ^ k27, D19 ^ k28, D20 ^ k29, &D39, &D45, &D56, &D34);
999 s6(D19 ^ k30, D20 ^ k31, D21 ^ k32, D22 ^ k33, D23 ^ k34, D24 ^ k35, &D35, &D60, &D42, &D50);
1000 s7(D23 ^ k36, D24 ^ k37, D25 ^ k38, D26 ^ k39, D27 ^ k40, D28 ^ k41, &D63, &D43, &D53, &D38);
1001 s8(D27 ^ k42, D28 ^ k43, D29 ^ k44, D30 ^ k45, D31 ^ k46, D00 ^ k47, &D36, &D58, &D46, &D52);
1003 if (i) KEYSET14 else KEYSET04
1005 s1(D63 ^ k00, D32 ^ k01, D33 ^ k02, D34 ^ k03, D35 ^ k04, D36 ^ k05, &D08, &D16, &D22, &D30);
1006 s2(D35 ^ k06, D36 ^ k07, D37 ^ k08, D38 ^ k09, D39 ^ k10, D40 ^ k11, &D12, &D27, &D01, &D17);
1007 s3(D39 ^ k12, D40 ^ k13, D41 ^ k14, D42 ^ k15, D43 ^ k16, D44 ^ k17, &D23, &D15, &D29, &D05);
1008 s4(D43 ^ k18, D44 ^ k19, D45 ^ k20, D46 ^ k21, D47 ^ k22, D48 ^ k23, &D25, &D19, &D09, &D00);
1009 s5(D47 ^ k24, D48 ^ k25, D49 ^ k26, D50 ^ k27, D51 ^ k28, D52 ^ k29, &D07, &D13, &D24, &D02);
1010 s6(D51 ^ k30, D52 ^ k31, D53 ^ k32, D54 ^ k33, D55 ^ k34, D56 ^ k35, &D03, &D28, &D10, &D18);
1011 s7(D55 ^ k36, D56 ^ k37, D57 ^ k38, D58 ^ k39, D59 ^ k40, D60 ^ k41, &D31, &D11, &D21, &D06);
1012 s8(D59 ^ k42, D60 ^ k43, D61 ^ k44, D62 ^ k45, D63 ^ k46, D32 ^ k47, &D04, &D26, &D14, &D20);
1014 if (i) KEYSET15 else KEYSET05
1016 s1(D31 ^ k00, D00 ^ k01, D01 ^ k02, D02 ^ k03, D03 ^ k04, D04 ^ k05, &D40, &D48, &D54, &D62);
1017 s2(D03 ^ k06, D04 ^ k07, D05 ^ k08, D06 ^ k09, D07 ^ k10, D08 ^ k11, &D44, &D59, &D33, &D49);
1018 s3(D07 ^ k12, D08 ^ k13, D09 ^ k14, D10 ^ k15, D11 ^ k16, D12 ^ k17, &D55, &D47, &D61, &D37);
1019 s4(D11 ^ k18, D12 ^ k19, D13 ^ k20, D14 ^ k21, D15 ^ k22, D16 ^ k23, &D57, &D51, &D41, &D32);
1020 s5(D15 ^ k24, D16 ^ k25, D17 ^ k26, D18 ^ k27, D19 ^ k28, D20 ^ k29, &D39, &D45, &D56, &D34);
1021 s6(D19 ^ k30, D20 ^ k31, D21 ^ k32, D22 ^ k33, D23 ^ k34, D24 ^ k35, &D35, &D60, &D42, &D50);
1022 s7(D23 ^ k36, D24 ^ k37, D25 ^ k38, D26 ^ k39, D27 ^ k40, D28 ^ k41, &D63, &D43, &D53, &D38);
1023 s8(D27 ^ k42, D28 ^ k43, D29 ^ k44, D30 ^ k45, D31 ^ k46, D00 ^ k47, &D36, &D58, &D46, &D52);
1025 if (i) KEYSET16 else KEYSET06
1027 s1(D63 ^ k00, D32 ^ k01, D33 ^ k02, D34 ^ k03, D35 ^ k04, D36 ^ k05, &D08, &D16, &D22, &D30);
1028 s2(D35 ^ k06, D36 ^ k07, D37 ^ k08, D38 ^ k09, D39 ^ k10, D40 ^ k11, &D12, &D27, &D01, &D17);
1029 s3(D39 ^ k12, D40 ^ k13, D41 ^ k14, D42 ^ k15, D43 ^ k16, D44 ^ k17, &D23, &D15, &D29, &D05);
1030 s4(D43 ^ k18, D44 ^ k19, D45 ^ k20, D46 ^ k21, D47 ^ k22, D48 ^ k23, &D25, &D19, &D09, &D00);
1031 s5(D47 ^ k24, D48 ^ k25, D49 ^ k26, D50 ^ k27, D51 ^ k28, D52 ^ k29, &D07, &D13, &D24, &D02);
1032 s6(D51 ^ k30, D52 ^ k31, D53 ^ k32, D54 ^ k33, D55 ^ k34, D56 ^ k35, &D03, &D28, &D10, &D18);
1033 s7(D55 ^ k36, D56 ^ k37, D57 ^ k38, D58 ^ k39, D59 ^ k40, D60 ^ k41, &D31, &D11, &D21, &D06);
1034 s8(D59 ^ k42, D60 ^ k43, D61 ^ k44, D62 ^ k45, D63 ^ k46, D32 ^ k47, &D04, &D26, &D14, &D20);
1036 if (i) KEYSET17 else KEYSET07
1038 s1(D31 ^ k00, D00 ^ k01, D01 ^ k02, D02 ^ k03, D03 ^ k04, D04 ^ k05, &D40, &D48, &D54, &D62);
1039 s2(D03 ^ k06, D04 ^ k07, D05 ^ k08, D06 ^ k09, D07 ^ k10, D08 ^ k11, &D44, &D59, &D33, &D49);
1040 s3(D07 ^ k12, D08 ^ k13, D09 ^ k14, D10 ^ k15, D11 ^ k16, D12 ^ k17, &D55, &D47, &D61, &D37);
1041 s4(D11 ^ k18, D12 ^ k19, D13 ^ k20, D14 ^ k21, D15 ^ k22, D16 ^ k23, &D57, &D51, &D41, &D32);
1042 s5(D15 ^ k24, D16 ^ k25, D17 ^ k26, D18 ^ k27, D19 ^ k28, D20 ^ k29, &D39, &D45, &D56, &D34);
1043 s6(D19 ^ k30, D20 ^ k31, D21 ^ k32, D22 ^ k33, D23 ^ k34, D24 ^ k35, &D35, &D60, &D42, &D50);
1044 s7(D23 ^ k36, D24 ^ k37, D25 ^ k38, D26 ^ k39, D27 ^ k40, D28 ^ k41, &D63, &D43, &D53, &D38);
1045 s8(D27 ^ k42, D28 ^ k43, D29 ^ k44, D30 ^ k45, D31 ^ k46, D00 ^ k47, &D36, &D58, &D46, &D52);
1049 __device__ static void transpose32c (u32 data[32])
1051 #define swap(x,y,j,m) \
1052 t = ((x) ^ ((y) >> (j))) & (m); \
1054 (y) = (y) ^ (t << (j));
1058 swap (data[ 0], data[16], 16, 0x0000ffff);
1059 swap (data[ 1], data[17], 16, 0x0000ffff);
1060 swap (data[ 2], data[18], 16, 0x0000ffff);
1061 swap (data[ 3], data[19], 16, 0x0000ffff);
1062 swap (data[ 4], data[20], 16, 0x0000ffff);
1063 swap (data[ 5], data[21], 16, 0x0000ffff);
1064 swap (data[ 6], data[22], 16, 0x0000ffff);
1065 swap (data[ 7], data[23], 16, 0x0000ffff);
1066 swap (data[ 8], data[24], 16, 0x0000ffff);
1067 swap (data[ 9], data[25], 16, 0x0000ffff);
1068 swap (data[10], data[26], 16, 0x0000ffff);
1069 swap (data[11], data[27], 16, 0x0000ffff);
1070 swap (data[12], data[28], 16, 0x0000ffff);
1071 swap (data[13], data[29], 16, 0x0000ffff);
1072 swap (data[14], data[30], 16, 0x0000ffff);
1073 swap (data[15], data[31], 16, 0x0000ffff);
1074 swap (data[ 0], data[ 8], 8, 0x00ff00ff);
1075 swap (data[ 1], data[ 9], 8, 0x00ff00ff);
1076 swap (data[ 2], data[10], 8, 0x00ff00ff);
1077 swap (data[ 3], data[11], 8, 0x00ff00ff);
1078 swap (data[ 4], data[12], 8, 0x00ff00ff);
1079 swap (data[ 5], data[13], 8, 0x00ff00ff);
1080 swap (data[ 6], data[14], 8, 0x00ff00ff);
1081 swap (data[ 7], data[15], 8, 0x00ff00ff);
1082 swap (data[ 0], data[ 4], 4, 0x0f0f0f0f);
1083 swap (data[ 1], data[ 5], 4, 0x0f0f0f0f);
1084 swap (data[ 2], data[ 6], 4, 0x0f0f0f0f);
1085 swap (data[ 3], data[ 7], 4, 0x0f0f0f0f);
1086 swap (data[ 0], data[ 2], 2, 0x33333333);
1087 swap (data[ 1], data[ 3], 2, 0x33333333);
1088 swap (data[ 0], data[ 1], 1, 0x55555555);
1089 swap (data[ 2], data[ 3], 1, 0x55555555);
1090 swap (data[ 4], data[ 6], 2, 0x33333333);
1091 swap (data[ 5], data[ 7], 2, 0x33333333);
1092 swap (data[ 4], data[ 5], 1, 0x55555555);
1093 swap (data[ 6], data[ 7], 1, 0x55555555);
1094 swap (data[ 8], data[12], 4, 0x0f0f0f0f);
1095 swap (data[ 9], data[13], 4, 0x0f0f0f0f);
1096 swap (data[10], data[14], 4, 0x0f0f0f0f);
1097 swap (data[11], data[15], 4, 0x0f0f0f0f);
1098 swap (data[ 8], data[10], 2, 0x33333333);
1099 swap (data[ 9], data[11], 2, 0x33333333);
1100 swap (data[ 8], data[ 9], 1, 0x55555555);
1101 swap (data[10], data[11], 1, 0x55555555);
1102 swap (data[12], data[14], 2, 0x33333333);
1103 swap (data[13], data[15], 2, 0x33333333);
1104 swap (data[12], data[13], 1, 0x55555555);
1105 swap (data[14], data[15], 1, 0x55555555);
1106 swap (data[16], data[24], 8, 0x00ff00ff);
1107 swap (data[17], data[25], 8, 0x00ff00ff);
1108 swap (data[18], data[26], 8, 0x00ff00ff);
1109 swap (data[19], data[27], 8, 0x00ff00ff);
1110 swap (data[20], data[28], 8, 0x00ff00ff);
1111 swap (data[21], data[29], 8, 0x00ff00ff);
1112 swap (data[22], data[30], 8, 0x00ff00ff);
1113 swap (data[23], data[31], 8, 0x00ff00ff);
1114 swap (data[16], data[20], 4, 0x0f0f0f0f);
1115 swap (data[17], data[21], 4, 0x0f0f0f0f);
1116 swap (data[18], data[22], 4, 0x0f0f0f0f);
1117 swap (data[19], data[23], 4, 0x0f0f0f0f);
1118 swap (data[16], data[18], 2, 0x33333333);
1119 swap (data[17], data[19], 2, 0x33333333);
1120 swap (data[16], data[17], 1, 0x55555555);
1121 swap (data[18], data[19], 1, 0x55555555);
1122 swap (data[20], data[22], 2, 0x33333333);
1123 swap (data[21], data[23], 2, 0x33333333);
1124 swap (data[20], data[21], 1, 0x55555555);
1125 swap (data[22], data[23], 1, 0x55555555);
1126 swap (data[24], data[28], 4, 0x0f0f0f0f);
1127 swap (data[25], data[29], 4, 0x0f0f0f0f);
1128 swap (data[26], data[30], 4, 0x0f0f0f0f);
1129 swap (data[27], data[31], 4, 0x0f0f0f0f);
1130 swap (data[24], data[26], 2, 0x33333333);
1131 swap (data[25], data[27], 2, 0x33333333);
1132 swap (data[24], data[25], 1, 0x55555555);
1133 swap (data[26], data[27], 1, 0x55555555);
1134 swap (data[28], data[30], 2, 0x33333333);
1135 swap (data[29], data[31], 2, 0x33333333);
1136 swap (data[28], data[29], 1, 0x55555555);
1137 swap (data[30], data[31], 1, 0x55555555);
1140 __device__ static void m03000m (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
1147 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
1148 const u32 lid = threadIdx.x;
1154 const u32 w0s = pws[gid].i[0];
1155 const u32 w1s = pws[gid].i[1];
1157 const u32 K00 = -((w0s >> ( 0 + 7)) & 1);
1158 const u32 K01 = -((w0s >> ( 0 + 6)) & 1);
1159 const u32 K02 = -((w0s >> ( 0 + 5)) & 1);
1160 const u32 K03 = -((w0s >> ( 0 + 4)) & 1);
1161 const u32 K04 = -((w0s >> ( 0 + 3)) & 1);
1162 const u32 K05 = -((w0s >> ( 0 + 2)) & 1);
1163 const u32 K06 = -((w0s >> ( 0 + 1)) & 1);
1164 const u32 K07 = -((w0s >> ( 0 + 0)) & 1);
1165 const u32 K08 = -((w0s >> ( 8 + 7)) & 1);
1166 const u32 K09 = -((w0s >> ( 8 + 6)) & 1);
1167 const u32 K10 = -((w0s >> ( 8 + 5)) & 1);
1168 const u32 K11 = -((w0s >> ( 8 + 4)) & 1);
1169 const u32 K12 = -((w0s >> ( 8 + 3)) & 1);
1170 const u32 K13 = -((w0s >> ( 8 + 2)) & 1);
1171 const u32 K14 = -((w0s >> ( 8 + 1)) & 1);
1172 const u32 K15 = -((w0s >> ( 8 + 0)) & 1);
1173 const u32 K16 = -((w0s >> (16 + 7)) & 1);
1174 const u32 K17 = -((w0s >> (16 + 6)) & 1);
1175 const u32 K18 = -((w0s >> (16 + 5)) & 1);
1176 const u32 K19 = -((w0s >> (16 + 4)) & 1);
1177 const u32 K20 = -((w0s >> (16 + 3)) & 1);
1178 const u32 K21 = -((w0s >> (16 + 2)) & 1);
1179 const u32 K22 = -((w0s >> (16 + 1)) & 1);
1180 const u32 K23 = -((w0s >> (16 + 0)) & 1);
1181 const u32 K24 = -((w0s >> (24 + 7)) & 1);
1182 const u32 K25 = -((w0s >> (24 + 6)) & 1);
1183 const u32 K26 = -((w0s >> (24 + 5)) & 1);
1184 const u32 K27 = -((w0s >> (24 + 4)) & 1);
1185 const u32 K28 = -((w0s >> (24 + 3)) & 1);
1186 const u32 K29 = -((w0s >> (24 + 2)) & 1);
1187 const u32 K30 = -((w0s >> (24 + 1)) & 1);
1188 const u32 K31 = -((w0s >> (24 + 0)) & 1);
1189 const u32 K32 = -((w1s >> ( 0 + 7)) & 1);
1190 const u32 K33 = -((w1s >> ( 0 + 6)) & 1);
1191 const u32 K34 = -((w1s >> ( 0 + 5)) & 1);
1192 const u32 K35 = -((w1s >> ( 0 + 4)) & 1);
1193 const u32 K36 = -((w1s >> ( 0 + 3)) & 1);
1194 const u32 K37 = -((w1s >> ( 0 + 2)) & 1);
1195 const u32 K38 = -((w1s >> ( 0 + 1)) & 1);
1196 const u32 K39 = -((w1s >> ( 0 + 0)) & 1);
1197 const u32 K40 = -((w1s >> ( 8 + 7)) & 1);
1198 const u32 K41 = -((w1s >> ( 8 + 6)) & 1);
1199 const u32 K42 = -((w1s >> ( 8 + 5)) & 1);
1200 const u32 K43 = -((w1s >> ( 8 + 4)) & 1);
1201 const u32 K44 = -((w1s >> ( 8 + 3)) & 1);
1202 const u32 K45 = -((w1s >> ( 8 + 2)) & 1);
1203 const u32 K46 = -((w1s >> ( 8 + 1)) & 1);
1204 const u32 K47 = -((w1s >> ( 8 + 0)) & 1);
1205 const u32 K48 = -((w1s >> (16 + 7)) & 1);
1206 const u32 K49 = -((w1s >> (16 + 6)) & 1);
1207 const u32 K50 = -((w1s >> (16 + 5)) & 1);
1208 const u32 K51 = -((w1s >> (16 + 4)) & 1);
1209 const u32 K52 = -((w1s >> (16 + 3)) & 1);
1210 const u32 K53 = -((w1s >> (16 + 2)) & 1);
1211 const u32 K54 = -((w1s >> (16 + 1)) & 1);
1212 const u32 K55 = -((w1s >> (16 + 0)) & 1);
1218 const u32 bf_loops = bfs_cnt;
1220 for (u32 il_pos = 0, pc_pos = 0; il_pos < bf_loops; il_pos += 32, pc_pos++)
1255 k00 |= c_tm[pc_pos].b[ 0];
1256 k01 |= c_tm[pc_pos].b[ 1];
1257 k02 |= c_tm[pc_pos].b[ 2];
1258 k03 |= c_tm[pc_pos].b[ 3];
1259 k04 |= c_tm[pc_pos].b[ 4];
1260 k05 |= c_tm[pc_pos].b[ 5];
1261 k06 |= c_tm[pc_pos].b[ 6];
1262 k07 |= c_tm[pc_pos].b[ 7];
1263 k08 |= c_tm[pc_pos].b[ 8];
1264 k09 |= c_tm[pc_pos].b[ 9];
1265 k10 |= c_tm[pc_pos].b[10];
1266 k11 |= c_tm[pc_pos].b[11];
1267 k12 |= c_tm[pc_pos].b[12];
1268 k13 |= c_tm[pc_pos].b[13];
1269 k14 |= c_tm[pc_pos].b[14];
1270 k15 |= c_tm[pc_pos].b[15];
1271 k16 |= c_tm[pc_pos].b[16];
1272 k17 |= c_tm[pc_pos].b[17];
1273 k18 |= c_tm[pc_pos].b[18];
1274 k19 |= c_tm[pc_pos].b[19];
1275 k20 |= c_tm[pc_pos].b[20];
1276 k21 |= c_tm[pc_pos].b[21];
1277 k22 |= c_tm[pc_pos].b[22];
1278 k23 |= c_tm[pc_pos].b[23];
1279 k24 |= c_tm[pc_pos].b[24];
1280 k25 |= c_tm[pc_pos].b[25];
1281 k26 |= c_tm[pc_pos].b[26];
1282 k27 |= c_tm[pc_pos].b[27];
1283 k28 |= c_tm[pc_pos].b[28];
1284 k29 |= c_tm[pc_pos].b[29];
1285 k30 |= c_tm[pc_pos].b[30];
1286 k31 |= c_tm[pc_pos].b[31];
1291 u32 D03 = 0xffffffff;
1293 u32 D05 = 0xffffffff;
1294 u32 D06 = 0xffffffff;
1295 u32 D07 = 0xffffffff;
1301 u32 D13 = 0xffffffff;
1304 u32 D16 = 0xffffffff;
1305 u32 D17 = 0xffffffff;
1310 u32 D22 = 0xffffffff;
1312 u32 D24 = 0xffffffff;
1314 u32 D26 = 0xffffffff;
1316 u32 D28 = 0xffffffff;
1317 u32 D29 = 0xffffffff;
1318 u32 D30 = 0xffffffff;
1319 u32 D31 = 0xffffffff;
1328 u32 D40 = 0xffffffff;
1329 u32 D41 = 0xffffffff;
1330 u32 D42 = 0xffffffff;
1332 u32 D44 = 0xffffffff;
1343 u32 D55 = 0xffffffff;
1346 u32 D58 = 0xffffffff;
1349 u32 D61 = 0xffffffff;
1350 u32 D62 = 0xffffffff;
1351 u32 D63 = 0xffffffff;
1355 k00, k01, k02, k03, k04, k05, k06,
1356 k07, k08, k09, k10, k11, k12, k13,
1357 k14, k15, k16, k17, k18, k19, k20,
1358 k21, k22, k23, k24, k25, k26, k27,
1359 k28, k29, k30, k31, K32, K33, K34,
1360 K35, K36, K37, K38, K39, K40, K41,
1361 K42, K43, K44, K45, K46, K47, K48,
1362 K49, K50, K51, K52, K53, K54, K55,
1363 D00, D01, D02, D03, D04, D05, D06, D07,
1364 D08, D09, D10, D11, D12, D13, D14, D15,
1365 D16, D17, D18, D19, D20, D21, D22, D23,
1366 D24, D25, D26, D27, D28, D29, D30, D31,
1367 D32, D33, D34, D35, D36, D37, D38, D39,
1368 D40, D41, D42, D43, D44, D45, D46, D47,
1369 D48, D49, D50, D51, D52, D53, D54, D55,
1370 D56, D57, D58, D59, D60, D61, D62, D63
1440 if (digests_cnt < 16)
1442 for (u32 d = 0; d < digests_cnt; d++)
1444 const u32 final_hash_pos = digests_offset + d;
1446 if (hashes_shown[final_hash_pos]) continue;
1450 search[0] = digests_buf[final_hash_pos].digest_buf[DGST_R0];
1451 search[1] = digests_buf[final_hash_pos].digest_buf[DGST_R1];
1456 for (int i = 0; i < 32; i++)
1458 const u32 b0 = -((search[0] >> i) & 1);
1459 const u32 b1 = -((search[1] >> i) & 1);
1461 tmpResult |= out[ 0 + i] ^ b0;
1462 tmpResult |= out[32 + i] ^ b1;
1465 if (tmpResult == 0xffffffff) continue;
1467 const u32 slice = 31 - __clz (~tmpResult);
1469 const u32x r0 = search[0];
1470 const u32x r1 = search[1];
1474 #include VECT_COMPARE_M
1483 for (int i = 0; i < 32; i++)
1485 out0[i] = out[ 0 + 31 - i];
1486 out1[i] = out[32 + 31 - i];
1489 transpose32c (out0);
1490 transpose32c (out1);
1493 for (int slice = 0; slice < 32; slice++)
1495 const u32x r0 = out0[31 - slice];
1496 const u32x r1 = out1[31 - slice];
1500 #include VECT_COMPARE_M
1506 __device__ static void m03000s (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
1512 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
1513 const u32 lid = threadIdx.x;
1588 const u32 w0s = pws[gid].i[0];
1589 const u32 w1s = pws[gid].i[1];
1591 const u32 K00 = -((w0s >> ( 0 + 7)) & 1);
1592 const u32 K01 = -((w0s >> ( 0 + 6)) & 1);
1593 const u32 K02 = -((w0s >> ( 0 + 5)) & 1);
1594 const u32 K03 = -((w0s >> ( 0 + 4)) & 1);
1595 const u32 K04 = -((w0s >> ( 0 + 3)) & 1);
1596 const u32 K05 = -((w0s >> ( 0 + 2)) & 1);
1597 const u32 K06 = -((w0s >> ( 0 + 1)) & 1);
1598 const u32 K07 = -((w0s >> ( 0 + 0)) & 1);
1599 const u32 K08 = -((w0s >> ( 8 + 7)) & 1);
1600 const u32 K09 = -((w0s >> ( 8 + 6)) & 1);
1601 const u32 K10 = -((w0s >> ( 8 + 5)) & 1);
1602 const u32 K11 = -((w0s >> ( 8 + 4)) & 1);
1603 const u32 K12 = -((w0s >> ( 8 + 3)) & 1);
1604 const u32 K13 = -((w0s >> ( 8 + 2)) & 1);
1605 const u32 K14 = -((w0s >> ( 8 + 1)) & 1);
1606 const u32 K15 = -((w0s >> ( 8 + 0)) & 1);
1607 const u32 K16 = -((w0s >> (16 + 7)) & 1);
1608 const u32 K17 = -((w0s >> (16 + 6)) & 1);
1609 const u32 K18 = -((w0s >> (16 + 5)) & 1);
1610 const u32 K19 = -((w0s >> (16 + 4)) & 1);
1611 const u32 K20 = -((w0s >> (16 + 3)) & 1);
1612 const u32 K21 = -((w0s >> (16 + 2)) & 1);
1613 const u32 K22 = -((w0s >> (16 + 1)) & 1);
1614 const u32 K23 = -((w0s >> (16 + 0)) & 1);
1615 const u32 K24 = -((w0s >> (24 + 7)) & 1);
1616 const u32 K25 = -((w0s >> (24 + 6)) & 1);
1617 const u32 K26 = -((w0s >> (24 + 5)) & 1);
1618 const u32 K27 = -((w0s >> (24 + 4)) & 1);
1619 const u32 K28 = -((w0s >> (24 + 3)) & 1);
1620 const u32 K29 = -((w0s >> (24 + 2)) & 1);
1621 const u32 K30 = -((w0s >> (24 + 1)) & 1);
1622 const u32 K31 = -((w0s >> (24 + 0)) & 1);
1623 const u32 K32 = -((w1s >> ( 0 + 7)) & 1);
1624 const u32 K33 = -((w1s >> ( 0 + 6)) & 1);
1625 const u32 K34 = -((w1s >> ( 0 + 5)) & 1);
1626 const u32 K35 = -((w1s >> ( 0 + 4)) & 1);
1627 const u32 K36 = -((w1s >> ( 0 + 3)) & 1);
1628 const u32 K37 = -((w1s >> ( 0 + 2)) & 1);
1629 const u32 K38 = -((w1s >> ( 0 + 1)) & 1);
1630 const u32 K39 = -((w1s >> ( 0 + 0)) & 1);
1631 const u32 K40 = -((w1s >> ( 8 + 7)) & 1);
1632 const u32 K41 = -((w1s >> ( 8 + 6)) & 1);
1633 const u32 K42 = -((w1s >> ( 8 + 5)) & 1);
1634 const u32 K43 = -((w1s >> ( 8 + 4)) & 1);
1635 const u32 K44 = -((w1s >> ( 8 + 3)) & 1);
1636 const u32 K45 = -((w1s >> ( 8 + 2)) & 1);
1637 const u32 K46 = -((w1s >> ( 8 + 1)) & 1);
1638 const u32 K47 = -((w1s >> ( 8 + 0)) & 1);
1639 const u32 K48 = -((w1s >> (16 + 7)) & 1);
1640 const u32 K49 = -((w1s >> (16 + 6)) & 1);
1641 const u32 K50 = -((w1s >> (16 + 5)) & 1);
1642 const u32 K51 = -((w1s >> (16 + 4)) & 1);
1643 const u32 K52 = -((w1s >> (16 + 3)) & 1);
1644 const u32 K53 = -((w1s >> (16 + 2)) & 1);
1645 const u32 K54 = -((w1s >> (16 + 1)) & 1);
1646 const u32 K55 = -((w1s >> (16 + 0)) & 1);
1652 const u32 bf_loops = bfs_cnt;
1654 for (u32 il_pos = 0, pc_pos = 0; il_pos < bf_loops; il_pos += 32, pc_pos++)
1689 k00 |= c_tm[pc_pos].b[ 0];
1690 k01 |= c_tm[pc_pos].b[ 1];
1691 k02 |= c_tm[pc_pos].b[ 2];
1692 k03 |= c_tm[pc_pos].b[ 3];
1693 k04 |= c_tm[pc_pos].b[ 4];
1694 k05 |= c_tm[pc_pos].b[ 5];
1695 k06 |= c_tm[pc_pos].b[ 6];
1696 k07 |= c_tm[pc_pos].b[ 7];
1697 k08 |= c_tm[pc_pos].b[ 8];
1698 k09 |= c_tm[pc_pos].b[ 9];
1699 k10 |= c_tm[pc_pos].b[10];
1700 k11 |= c_tm[pc_pos].b[11];
1701 k12 |= c_tm[pc_pos].b[12];
1702 k13 |= c_tm[pc_pos].b[13];
1703 k14 |= c_tm[pc_pos].b[14];
1704 k15 |= c_tm[pc_pos].b[15];
1705 k16 |= c_tm[pc_pos].b[16];
1706 k17 |= c_tm[pc_pos].b[17];
1707 k18 |= c_tm[pc_pos].b[18];
1708 k19 |= c_tm[pc_pos].b[19];
1709 k20 |= c_tm[pc_pos].b[20];
1710 k21 |= c_tm[pc_pos].b[21];
1711 k22 |= c_tm[pc_pos].b[22];
1712 k23 |= c_tm[pc_pos].b[23];
1713 k24 |= c_tm[pc_pos].b[24];
1714 k25 |= c_tm[pc_pos].b[25];
1715 k26 |= c_tm[pc_pos].b[26];
1716 k27 |= c_tm[pc_pos].b[27];
1717 k28 |= c_tm[pc_pos].b[28];
1718 k29 |= c_tm[pc_pos].b[29];
1719 k30 |= c_tm[pc_pos].b[30];
1720 k31 |= c_tm[pc_pos].b[31];
1725 u32 D03 = 0xffffffff;
1727 u32 D05 = 0xffffffff;
1728 u32 D06 = 0xffffffff;
1729 u32 D07 = 0xffffffff;
1735 u32 D13 = 0xffffffff;
1738 u32 D16 = 0xffffffff;
1739 u32 D17 = 0xffffffff;
1744 u32 D22 = 0xffffffff;
1746 u32 D24 = 0xffffffff;
1748 u32 D26 = 0xffffffff;
1750 u32 D28 = 0xffffffff;
1751 u32 D29 = 0xffffffff;
1752 u32 D30 = 0xffffffff;
1753 u32 D31 = 0xffffffff;
1762 u32 D40 = 0xffffffff;
1763 u32 D41 = 0xffffffff;
1764 u32 D42 = 0xffffffff;
1766 u32 D44 = 0xffffffff;
1777 u32 D55 = 0xffffffff;
1780 u32 D58 = 0xffffffff;
1783 u32 D61 = 0xffffffff;
1784 u32 D62 = 0xffffffff;
1785 u32 D63 = 0xffffffff;
1789 k00, k01, k02, k03, k04, k05, k06,
1790 k07, k08, k09, k10, k11, k12, k13,
1791 k14, k15, k16, k17, k18, k19, k20,
1792 k21, k22, k23, k24, k25, k26, k27,
1793 k28, k29, k30, k31, K32, K33, K34,
1794 K35, K36, K37, K38, K39, K40, K41,
1795 K42, K43, K44, K45, K46, K47, K48,
1796 K49, K50, K51, K52, K53, K54, K55,
1797 D00, D01, D02, D03, D04, D05, D06, D07,
1798 D08, D09, D10, D11, D12, D13, D14, D15,
1799 D16, D17, D18, D19, D20, D21, D22, D23,
1800 D24, D25, D26, D27, D28, D29, D30, D31,
1801 D32, D33, D34, D35, D36, D37, D38, D39,
1802 D40, D41, D42, D43, D44, D45, D46, D47,
1803 D48, D49, D50, D51, D52, D53, D54, D55,
1804 D56, D57, D58, D59, D60, D61, D62, D63
1809 tmpResult |= D00 ^ S00;
1810 tmpResult |= D01 ^ S01;
1811 tmpResult |= D02 ^ S02;
1812 tmpResult |= D03 ^ S03;
1813 tmpResult |= D04 ^ S04;
1814 tmpResult |= D05 ^ S05;
1815 tmpResult |= D06 ^ S06;
1816 tmpResult |= D07 ^ S07;
1817 tmpResult |= D08 ^ S08;
1818 tmpResult |= D09 ^ S09;
1819 tmpResult |= D10 ^ S10;
1820 tmpResult |= D11 ^ S11;
1821 tmpResult |= D12 ^ S12;
1822 tmpResult |= D13 ^ S13;
1823 tmpResult |= D14 ^ S14;
1824 tmpResult |= D15 ^ S15;
1826 if (tmpResult == 0xffffffff) continue;
1828 tmpResult |= D16 ^ S16;
1829 tmpResult |= D17 ^ S17;
1830 tmpResult |= D18 ^ S18;
1831 tmpResult |= D19 ^ S19;
1832 tmpResult |= D20 ^ S20;
1833 tmpResult |= D21 ^ S21;
1834 tmpResult |= D22 ^ S22;
1835 tmpResult |= D23 ^ S23;
1836 tmpResult |= D24 ^ S24;
1837 tmpResult |= D25 ^ S25;
1838 tmpResult |= D26 ^ S26;
1839 tmpResult |= D27 ^ S27;
1840 tmpResult |= D28 ^ S28;
1841 tmpResult |= D29 ^ S29;
1842 tmpResult |= D30 ^ S30;
1843 tmpResult |= D31 ^ S31;
1845 if (tmpResult == 0xffffffff) continue;
1847 tmpResult |= D32 ^ S32;
1848 tmpResult |= D33 ^ S33;
1849 tmpResult |= D34 ^ S34;
1850 tmpResult |= D35 ^ S35;
1851 tmpResult |= D36 ^ S36;
1852 tmpResult |= D37 ^ S37;
1853 tmpResult |= D38 ^ S38;
1854 tmpResult |= D39 ^ S39;
1855 tmpResult |= D40 ^ S40;
1856 tmpResult |= D41 ^ S41;
1857 tmpResult |= D42 ^ S42;
1858 tmpResult |= D43 ^ S43;
1859 tmpResult |= D44 ^ S44;
1860 tmpResult |= D45 ^ S45;
1861 tmpResult |= D46 ^ S46;
1862 tmpResult |= D47 ^ S47;
1864 if (tmpResult == 0xffffffff) continue;
1866 tmpResult |= D48 ^ S48;
1867 tmpResult |= D49 ^ S49;
1868 tmpResult |= D50 ^ S50;
1869 tmpResult |= D51 ^ S51;
1870 tmpResult |= D52 ^ S52;
1871 tmpResult |= D53 ^ S53;
1872 tmpResult |= D54 ^ S54;
1873 tmpResult |= D55 ^ S55;
1874 tmpResult |= D56 ^ S56;
1875 tmpResult |= D57 ^ S57;
1876 tmpResult |= D58 ^ S58;
1877 tmpResult |= D59 ^ S59;
1878 tmpResult |= D60 ^ S60;
1879 tmpResult |= D61 ^ S61;
1880 tmpResult |= D62 ^ S62;
1881 tmpResult |= D63 ^ S63;
1883 if (tmpResult == 0xffffffff) continue;
1885 const u32 slice = 31 - __clz (~tmpResult);
1887 #include VECT_COMPARE_S
1891 extern "C" __global__ void __launch_bounds__ (64, 1) m03000_tb (pw_t *pws)
1893 // not used here, inlined code
1896 extern "C" __global__ void __launch_bounds__ (32, 1) m03000_tm (const u32 *d_bfs, bs_word_t *d_tbs)
1898 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
1900 const u32 block = gid / 32;
1901 const u32 slice = gid % 32;
1903 const u32 w0 = c_bfs[gid];
1906 for (int i = 0; i < 32; i += 8)
1908 atomicOr (&d_tbs[block].b[i + 0], (((w0 >> (i + 7)) & 1) << slice));
1909 atomicOr (&d_tbs[block].b[i + 1], (((w0 >> (i + 6)) & 1) << slice));
1910 atomicOr (&d_tbs[block].b[i + 2], (((w0 >> (i + 5)) & 1) << slice));
1911 atomicOr (&d_tbs[block].b[i + 3], (((w0 >> (i + 4)) & 1) << slice));
1912 atomicOr (&d_tbs[block].b[i + 4], (((w0 >> (i + 3)) & 1) << slice));
1913 atomicOr (&d_tbs[block].b[i + 5], (((w0 >> (i + 2)) & 1) << slice));
1914 atomicOr (&d_tbs[block].b[i + 6], (((w0 >> (i + 1)) & 1) << slice));
1915 atomicOr (&d_tbs[block].b[i + 7], (((w0 >> (i + 0)) & 1) << slice));
1919 extern "C" __global__ void __launch_bounds__ (64, 1) m03000_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1925 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
1926 const u32 lid = threadIdx.x;
1928 const u32 s0 = digests_buf[digests_offset].digest_buf[0];
1929 const u32 s1 = digests_buf[digests_offset].digest_buf[1];
1933 s_S[lid] = -((s0 >> lid - 0) & 1);
1937 s_S[lid] = -((s1 >> lid - 32) & 1);
1942 if (gid >= gid_max) return;
1948 m03000m (pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
1951 extern "C" __global__ void __launch_bounds__ (64, 1) m03000_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1955 extern "C" __global__ void __launch_bounds__ (64, 1) m03000_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1959 extern "C" __global__ void __launch_bounds__ (64, 1) m03000_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1965 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
1966 const u32 lid = threadIdx.x;
1968 const u32 s0 = digests_buf[digests_offset].digest_buf[0];
1969 const u32 s1 = digests_buf[digests_offset].digest_buf[1];
1973 s_S[lid] = -((s0 >> lid - 0) & 1);
1977 s_S[lid] = -((s1 >> lid - 32) & 1);
1982 if (gid >= gid_max) return;
1988 m03000s (pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
1991 extern "C" __global__ void __launch_bounds__ (64, 1) m03000_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1995 extern "C" __global__ void __launch_bounds__ (64, 1) m03000_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)