2 * Author......: Jens Steube <jens.steube@gmail.com>
4 * NOTE........: sboxes for maxwell were taken from DeepLearningJohnDoe, license below
5 * : sboxes for others were takes fron JtR, license below
11 #include "include/constants.h"
12 #include "include/kernel_vendor.h"
27 #include "include/kernel_functions.c"
29 #include "common_nv.c"
32 #define VECT_COMPARE_S "check_single_vect1_comp4_warp_bs.c"
33 #define VECT_COMPARE_M "check_multi_vect1_comp4_warp_bs.c"
37 #define VECT_COMPARE_S "check_single_vect2_comp4_warp_bs.c"
38 #define VECT_COMPARE_M "check_multi_vect2_comp4_warp_bs.c"
42 #define VECT_COMPARE_S "check_single_vect4_comp4_warp_bs.c"
43 #define VECT_COMPARE_M "check_multi_vect4_comp4_warp_bs.c"
49 #define myselx(a,b,c) ((c) ? (b) : (a))
51 __device__ __constant__ u32x c_bfs[1024];
52 __device__ __constant__ bs_word_t c_tm[32];
53 __device__ __shared__ u32 s_S[64];
55 #if __CUDA_ARCH__ >= 500
58 // Bitslice DES S-boxes with LOP3.LUT instructions
59 // For NVIDIA Maxwell architecture and CUDA 7.5 RC
60 // by DeepLearningJohnDoe, version 0.1.6, 2015/07/19
62 // Gate counts: 25 24 25 18 25 24 24 23
64 // Depth: 8 7 7 6 8 10 10 8
67 // Note that same S-box function with a lower gate count isn't necessarily faster.
69 // These Boolean expressions corresponding to DES S-boxes were
70 // discovered by <deeplearningjohndoe at gmail.com>
72 // This file itself is Copyright (c) 2015 by <deeplearningjohndoe at gmail.com>
73 // Redistribution and use in source and binary forms, with or without
74 // modification, are permitted.
76 // The underlying mathematical formulas are NOT copyrighted.
79 #define LUT(a,b,c,d,e) u32 a; asm ("lop3.b32 %0, %1, %2, %3, "#e";" : "=r"(a): "r"(b), "r"(c), "r"(d));
81 __device__ static void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
83 LUT(xAA55AA5500550055, a1, a4, a6, 0xC1)
84 LUT(xA55AA55AF0F5F0F5, a3, a6, xAA55AA5500550055, 0x9E)
85 LUT(x5F5F5F5FA5A5A5A5, a1, a3, a6, 0xD6)
86 LUT(xF5A0F5A0A55AA55A, a4, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x56)
87 LUT(x947A947AD1E7D1E7, a2, xA55AA55AF0F5F0F5, xF5A0F5A0A55AA55A, 0x6C)
88 LUT(x5FFF5FFFFFFAFFFA, a6, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x7B)
89 LUT(xB96CB96C69936993, a2, xF5A0F5A0A55AA55A, x5FFF5FFFFFFAFFFA, 0xD6)
90 LUT(x3, a5, x947A947AD1E7D1E7, xB96CB96C69936993, 0x6A)
91 LUT(x55EE55EE55EE55EE, a1, a2, a4, 0x7A)
92 LUT(x084C084CB77BB77B, a2, a6, xF5A0F5A0A55AA55A, 0xC9)
93 LUT(x9C329C32E295E295, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x72)
94 LUT(xA51EA51E50E050E0, a3, a6, x55EE55EE55EE55EE, 0x29)
95 LUT(x4AD34AD3BE3CBE3C, a2, x947A947AD1E7D1E7, xA51EA51E50E050E0, 0x95)
96 LUT(x2, a5, x9C329C32E295E295, x4AD34AD3BE3CBE3C, 0xC6)
97 LUT(xD955D95595D195D1, a1, a2, x9C329C32E295E295, 0xD2)
98 LUT(x8058805811621162, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x90)
99 LUT(x7D0F7D0FC4B3C4B3, xA51EA51E50E050E0, xD955D95595D195D1, x8058805811621162, 0x76)
100 LUT(x0805080500010001, a3, xAA55AA5500550055, xD955D95595D195D1, 0x80)
101 LUT(x4A964A96962D962D, xB96CB96C69936993, x4AD34AD3BE3CBE3C, x0805080500010001, 0xA6)
102 LUT(x4, a5, x7D0F7D0FC4B3C4B3, x4A964A96962D962D, 0xA6)
103 LUT(x148014807B087B08, a1, xAA55AA5500550055, x947A947AD1E7D1E7, 0x21)
104 LUT(x94D894D86B686B68, xA55AA55AF0F5F0F5, x8058805811621162, x148014807B087B08, 0x6A)
105 LUT(x5555555540044004, a1, a6, x084C084CB77BB77B, 0x70)
106 LUT(xAFB4AFB4BF5BBF5B, x5F5F5F5FA5A5A5A5, xA51EA51E50E050E0, x5555555540044004, 0x97)
107 LUT(x1, a5, x94D894D86B686B68, xAFB4AFB4BF5BBF5B, 0x6C)
115 __device__ static void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
117 LUT(xEEEEEEEE99999999, a1, a2, a6, 0x97)
118 LUT(xFFFFEEEE66666666, a5, a6, xEEEEEEEE99999999, 0x67)
119 LUT(x5555FFFFFFFF0000, a1, a5, a6, 0x76)
120 LUT(x6666DDDD5555AAAA, a2, xFFFFEEEE66666666, x5555FFFFFFFF0000, 0x69)
121 LUT(x6969D3D35353ACAC, a3, xFFFFEEEE66666666, x6666DDDD5555AAAA, 0x6A)
122 LUT(xCFCF3030CFCF3030, a2, a3, a5, 0x65)
123 LUT(xE4E4EEEE9999F0F0, a3, xEEEEEEEE99999999, x5555FFFFFFFF0000, 0x8D)
124 LUT(xE5E5BABACDCDB0B0, a1, xCFCF3030CFCF3030, xE4E4EEEE9999F0F0, 0xCA)
125 LUT(x3, a4, x6969D3D35353ACAC, xE5E5BABACDCDB0B0, 0xC6)
126 LUT(x3333CCCC00000000, a2, a5, a6, 0x14)
127 LUT(xCCCCDDDDFFFF0F0F, a5, xE4E4EEEE9999F0F0, x3333CCCC00000000, 0xB5)
128 LUT(x00000101F0F0F0F0, a3, a6, xFFFFEEEE66666666, 0x1C)
129 LUT(x9A9A64646A6A9595, a1, xCFCF3030CFCF3030, x00000101F0F0F0F0, 0x96)
130 LUT(x2, a4, xCCCCDDDDFFFF0F0F, x9A9A64646A6A9595, 0x6A)
131 LUT(x3333BBBB3333FFFF, a1, a2, x6666DDDD5555AAAA, 0xDE)
132 LUT(x1414141441410000, a1, a3, xE4E4EEEE9999F0F0, 0x90)
133 LUT(x7F7FF3F3F5F53939, x6969D3D35353ACAC, x9A9A64646A6A9595, x3333BBBB3333FFFF, 0x79)
134 LUT(x9494E3E34B4B3939, a5, x1414141441410000, x7F7FF3F3F5F53939, 0x29)
135 LUT(x1, a4, x3333BBBB3333FFFF, x9494E3E34B4B3939, 0xA6)
136 LUT(xB1B1BBBBCCCCA5A5, a1, a1, xE4E4EEEE9999F0F0, 0x4A)
137 LUT(xFFFFECECEEEEDDDD, a2, x3333CCCC00000000, x9A9A64646A6A9595, 0xEF)
138 LUT(xB1B1A9A9DCDC8787, xE5E5BABACDCDB0B0, xB1B1BBBBCCCCA5A5, xFFFFECECEEEEDDDD, 0x8D)
139 LUT(xFFFFCCCCEEEE4444, a2, a5, xFFFFEEEE66666666, 0x2B)
140 LUT(x4, a4, xB1B1A9A9DCDC8787, xFFFFCCCCEEEE4444, 0x6C)
148 __device__ static void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
150 LUT(xA50FA50FA50FA50F, a1, a3, a4, 0xC9)
151 LUT(xF0F00F0FF0F0F0F0, a3, a5, a6, 0x4B)
152 LUT(xAF0FA0AAAF0FAF0F, a1, xA50FA50FA50FA50F, xF0F00F0FF0F0F0F0, 0x4D)
153 LUT(x5AA5A55A5AA55AA5, a1, a4, xF0F00F0FF0F0F0F0, 0x69)
154 LUT(xAA005FFFAA005FFF, a3, a5, xA50FA50FA50FA50F, 0xD6)
155 LUT(x5AA5A55A0F5AFAA5, a6, x5AA5A55A5AA55AA5, xAA005FFFAA005FFF, 0x9C)
156 LUT(x1, a2, xAF0FA0AAAF0FAF0F, x5AA5A55A0F5AFAA5, 0xA6)
157 LUT(xAA55AA5500AA00AA, a1, a4, a6, 0x49)
158 LUT(xFAFAA50FFAFAA50F, a1, a5, xA50FA50FA50FA50F, 0x9B)
159 LUT(x50AF0F5AFA50A5A5, a1, xAA55AA5500AA00AA, xFAFAA50FFAFAA50F, 0x66)
160 LUT(xAFAFAFAFFAFAFAFA, a1, a3, a6, 0x6F)
161 LUT(xAFAFFFFFFFFAFAFF, a4, x50AF0F5AFA50A5A5, xAFAFAFAFFAFAFAFA, 0xEB)
162 LUT(x4, a2, x50AF0F5AFA50A5A5, xAFAFFFFFFFFAFAFF, 0x6C)
163 LUT(x500F500F500F500F, a1, a3, a4, 0x98)
164 LUT(xF0505A0505A5050F, x5AA5A55A0F5AFAA5, xAA55AA5500AA00AA, xAFAFAFAFFAFAFAFA, 0x1D)
165 LUT(xF0505A05AA55AAFF, a6, x500F500F500F500F, xF0505A0505A5050F, 0x9A)
166 LUT(xFF005F55FF005F55, a1, a4, xAA005FFFAA005FFF, 0xB2)
167 LUT(xA55F5AF0A55F5AF0, a5, xA50FA50FA50FA50F, x5AA5A55A5AA55AA5, 0x3D)
168 LUT(x5A5F05A5A55F5AF0, a6, xFF005F55FF005F55, xA55F5AF0A55F5AF0, 0xA6)
169 LUT(x3, a2, xF0505A05AA55AAFF, x5A5F05A5A55F5AF0, 0xA6)
170 LUT(x0F0F0F0FA5A5A5A5, a1, a3, a6, 0xC6)
171 LUT(x5FFFFF5FFFA0FFA0, x5AA5A55A5AA55AA5, xAFAFAFAFFAFAFAFA, x0F0F0F0FA5A5A5A5, 0xDB)
172 LUT(xF5555AF500A05FFF, a5, xFAFAA50FFAFAA50F, xF0505A0505A5050F, 0xB9)
173 LUT(x05A5AAF55AFA55A5, xF0505A05AA55AAFF, x0F0F0F0FA5A5A5A5, xF5555AF500A05FFF, 0x9B)
174 LUT(x2, a2, x5FFFFF5FFFA0FFA0, x05A5AAF55AFA55A5, 0xA6)
182 __device__ static void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
184 LUT(x55F055F055F055F0, a1, a3, a4, 0x72)
185 LUT(xA500F5F0A500F5F0, a3, a5, x55F055F055F055F0, 0xAD)
186 LUT(xF50AF50AF50AF50A, a1, a3, a4, 0x59)
187 LUT(xF5FA0FFFF5FA0FFF, a3, a5, xF50AF50AF50AF50A, 0xE7)
188 LUT(x61C8F93C61C8F93C, a2, xA500F5F0A500F5F0, xF5FA0FFFF5FA0FFF, 0xC6)
189 LUT(x9999666699996666, a1, a2, a5, 0x69)
190 LUT(x22C022C022C022C0, a2, a4, x55F055F055F055F0, 0x18)
191 LUT(xB35C94A6B35C94A6, xF5FA0FFFF5FA0FFF, x9999666699996666, x22C022C022C022C0, 0x63)
192 LUT(x4, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x6A)
193 LUT(x4848484848484848, a1, a2, a3, 0x12)
194 LUT(x55500AAA55500AAA, a1, a5, xF5FA0FFFF5FA0FFF, 0x28)
195 LUT(x3C90B3D63C90B3D6, x61C8F93C61C8F93C, x4848484848484848, x55500AAA55500AAA, 0x1E)
196 LUT(x8484333384843333, a1, x9999666699996666, x4848484848484848, 0x14)
197 LUT(x4452F1AC4452F1AC, xF50AF50AF50AF50A, xF5FA0FFFF5FA0FFF, xB35C94A6B35C94A6, 0x78)
198 LUT(x9586CA379586CA37, x55500AAA55500AAA, x8484333384843333, x4452F1AC4452F1AC, 0xD6)
199 LUT(x2, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0x6A)
200 LUT(x1, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0xA9)
201 LUT(x3, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x56)
209 __device__ static void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
211 LUT(xA0A0A0A0FFFFFFFF, a1, a3, a6, 0xAB)
212 LUT(xFFFF00005555FFFF, a1, a5, a6, 0xB9)
213 LUT(xB3B320207777FFFF, a2, xA0A0A0A0FFFFFFFF, xFFFF00005555FFFF, 0xE8)
214 LUT(x50505A5A5A5A5050, a1, a3, xFFFF00005555FFFF, 0x34)
215 LUT(xA2A2FFFF2222FFFF, a1, a5, xB3B320207777FFFF, 0xCE)
216 LUT(x2E2E6969A4A46363, a2, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, 0x29)
217 LUT(x3, a4, xB3B320207777FFFF, x2E2E6969A4A46363, 0xA6)
218 LUT(xA5A50A0AA5A50A0A, a1, a3, a5, 0x49)
219 LUT(x969639396969C6C6, a2, a6, xA5A50A0AA5A50A0A, 0x96)
220 LUT(x1B1B1B1B1B1B1B1B, a1, a2, a3, 0xCA)
221 LUT(xBFBFBFBFF6F6F9F9, a3, xA0A0A0A0FFFFFFFF, x969639396969C6C6, 0x7E)
222 LUT(x5B5BA4A4B8B81D1D, xFFFF00005555FFFF, x1B1B1B1B1B1B1B1B, xBFBFBFBFF6F6F9F9, 0x96)
223 LUT(x2, a4, x969639396969C6C6, x5B5BA4A4B8B81D1D, 0xCA)
224 LUT(x5555BBBBFFFF5555, a1, a2, xFFFF00005555FFFF, 0xE5)
225 LUT(x6D6D9C9C95956969, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, x969639396969C6C6, 0x97)
226 LUT(x1A1A67676A6AB4B4, xA5A50A0AA5A50A0A, x5555BBBBFFFF5555, x6D6D9C9C95956969, 0x47)
227 LUT(xA0A0FFFFAAAA0000, a3, xFFFF00005555FFFF, xA5A50A0AA5A50A0A, 0x3B)
228 LUT(x36369C9CC1C1D6D6, x969639396969C6C6, x6D6D9C9C95956969, xA0A0FFFFAAAA0000, 0xD9)
229 LUT(x1, a4, x1A1A67676A6AB4B4, x36369C9CC1C1D6D6, 0xCA)
230 LUT(x5555F0F0F5F55555, a1, a3, xFFFF00005555FFFF, 0xB1)
231 LUT(x79790202DCDC0808, xA2A2FFFF2222FFFF, xA5A50A0AA5A50A0A, x969639396969C6C6, 0x47)
232 LUT(x6C6CF2F229295D5D, xBFBFBFBFF6F6F9F9, x5555F0F0F5F55555, x79790202DCDC0808, 0x6E)
233 LUT(xA3A3505010101A1A, a2, xA2A2FFFF2222FFFF, x36369C9CC1C1D6D6, 0x94)
234 LUT(x7676C7C74F4FC7C7, a1, x2E2E6969A4A46363, xA3A3505010101A1A, 0xD9)
235 LUT(x4, a4, x6C6CF2F229295D5D, x7676C7C74F4FC7C7, 0xC6)
243 __device__ static void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
245 LUT(x5050F5F55050F5F5, a1, a3, a5, 0xB2)
246 LUT(x6363C6C66363C6C6, a1, a2, x5050F5F55050F5F5, 0x66)
247 LUT(xAAAA5555AAAA5555, a1, a1, a5, 0xA9)
248 LUT(x3A3A65653A3A6565, a3, x6363C6C66363C6C6, xAAAA5555AAAA5555, 0xA9)
249 LUT(x5963A3C65963A3C6, a4, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xC6)
250 LUT(xE7E76565E7E76565, a5, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xAD)
251 LUT(x455D45DF455D45DF, a1, a4, xE7E76565E7E76565, 0xE4)
252 LUT(x4, a6, x5963A3C65963A3C6, x455D45DF455D45DF, 0x6C)
253 LUT(x1101220211012202, a2, xAAAA5555AAAA5555, x5963A3C65963A3C6, 0x20)
254 LUT(xF00F0FF0F00F0FF0, a3, a4, a5, 0x69)
255 LUT(x16E94A9716E94A97, xE7E76565E7E76565, x1101220211012202, xF00F0FF0F00F0FF0, 0x9E)
256 LUT(x2992922929929229, a1, a2, xF00F0FF0F00F0FF0, 0x49)
257 LUT(xAFAF9823AFAF9823, a5, x5050F5F55050F5F5, x2992922929929229, 0x93)
258 LUT(x3, a6, x16E94A9716E94A97, xAFAF9823AFAF9823, 0x6C)
259 LUT(x4801810248018102, a4, x5963A3C65963A3C6, x1101220211012202, 0xA4)
260 LUT(x5EE8FFFD5EE8FFFD, a5, x16E94A9716E94A97, x4801810248018102, 0x76)
261 LUT(xF0FF00FFF0FF00FF, a3, a4, a5, 0xCD)
262 LUT(x942D9A67942D9A67, x3A3A65653A3A6565, x5EE8FFFD5EE8FFFD, xF0FF00FFF0FF00FF, 0x86)
263 LUT(x1, a6, x5EE8FFFD5EE8FFFD, x942D9A67942D9A67, 0xA6)
264 LUT(x6A40D4ED6F4DD4EE, a2, x4, xAFAF9823AFAF9823, 0x2D)
265 LUT(x6CA89C7869A49C79, x1101220211012202, x16E94A9716E94A97, x6A40D4ED6F4DD4EE, 0x26)
266 LUT(xD6DE73F9D6DE73F9, a3, x6363C6C66363C6C6, x455D45DF455D45DF, 0x6B)
267 LUT(x925E63E1965A63E1, x3A3A65653A3A6565, x6CA89C7869A49C79, xD6DE73F9D6DE73F9, 0xA2)
268 LUT(x2, a6, x6CA89C7869A49C79, x925E63E1965A63E1, 0xCA)
276 __device__ static void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
278 LUT(x88AA88AA88AA88AA, a1, a2, a4, 0x0B)
279 LUT(xAAAAFF00AAAAFF00, a1, a4, a5, 0x27)
280 LUT(xADAFF8A5ADAFF8A5, a3, x88AA88AA88AA88AA, xAAAAFF00AAAAFF00, 0x9E)
281 LUT(x0A0AF5F50A0AF5F5, a1, a3, a5, 0xA6)
282 LUT(x6B69C5DC6B69C5DC, a2, xADAFF8A5ADAFF8A5, x0A0AF5F50A0AF5F5, 0x6B)
283 LUT(x1C69B2DC1C69B2DC, a4, x88AA88AA88AA88AA, x6B69C5DC6B69C5DC, 0xA9)
284 LUT(x1, a6, xADAFF8A5ADAFF8A5, x1C69B2DC1C69B2DC, 0x6A)
285 LUT(x9C9C9C9C9C9C9C9C, a1, a2, a3, 0x63)
286 LUT(xE6E63BFDE6E63BFD, a2, xAAAAFF00AAAAFF00, x0A0AF5F50A0AF5F5, 0xE7)
287 LUT(x6385639E6385639E, a4, x9C9C9C9C9C9C9C9C, xE6E63BFDE6E63BFD, 0x93)
288 LUT(x5959C4CE5959C4CE, a2, x6B69C5DC6B69C5DC, xE6E63BFDE6E63BFD, 0x5D)
289 LUT(x5B53F53B5B53F53B, a4, x0A0AF5F50A0AF5F5, x5959C4CE5959C4CE, 0x6E)
290 LUT(x3, a6, x6385639E6385639E, x5B53F53B5B53F53B, 0xC6)
291 LUT(xFAF505FAFAF505FA, a3, a4, x0A0AF5F50A0AF5F5, 0x6D)
292 LUT(x6A65956A6A65956A, a3, x9C9C9C9C9C9C9C9C, xFAF505FAFAF505FA, 0xA6)
293 LUT(x8888CCCC8888CCCC, a1, a2, a5, 0x23)
294 LUT(x94E97A9494E97A94, x1C69B2DC1C69B2DC, x6A65956A6A65956A, x8888CCCC8888CCCC, 0x72)
295 LUT(x4, a6, x6A65956A6A65956A, x94E97A9494E97A94, 0xAC)
296 LUT(xA050A050A050A050, a1, a3, a4, 0x21)
297 LUT(xC1B87A2BC1B87A2B, xAAAAFF00AAAAFF00, x5B53F53B5B53F53B, x94E97A9494E97A94, 0xA4)
298 LUT(xE96016B7E96016B7, x8888CCCC8888CCCC, xA050A050A050A050, xC1B87A2BC1B87A2B, 0x96)
299 LUT(xE3CF1FD5E3CF1FD5, x88AA88AA88AA88AA, x6A65956A6A65956A, xE96016B7E96016B7, 0x3E)
300 LUT(x6776675B6776675B, xADAFF8A5ADAFF8A5, x94E97A9494E97A94, xE3CF1FD5E3CF1FD5, 0x6B)
301 LUT(x2, a6, xE96016B7E96016B7, x6776675B6776675B, 0xC6)
309 __device__ static void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
311 LUT(xEEEE3333EEEE3333, a1, a2, a5, 0x9D)
312 LUT(xBBBBBBBBBBBBBBBB, a1, a1, a2, 0x83)
313 LUT(xDDDDAAAADDDDAAAA, a1, a2, a5, 0x5B)
314 LUT(x29295A5A29295A5A, a3, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0x85)
315 LUT(xC729695AC729695A, a4, xEEEE3333EEEE3333, x29295A5A29295A5A, 0xA6)
316 LUT(x3BF77B7B3BF77B7B, a2, a5, xC729695AC729695A, 0xF9)
317 LUT(x2900FF002900FF00, a4, a5, x29295A5A29295A5A, 0x0E)
318 LUT(x56B3803F56B3803F, xBBBBBBBBBBBBBBBB, x3BF77B7B3BF77B7B, x2900FF002900FF00, 0x61)
319 LUT(x4, a6, xC729695AC729695A, x56B3803F56B3803F, 0x6C)
320 LUT(xFBFBFBFBFBFBFBFB, a1, a2, a3, 0xDF)
321 LUT(x3012B7B73012B7B7, a2, a5, xC729695AC729695A, 0xD4)
322 LUT(x34E9B34C34E9B34C, a4, xFBFBFBFBFBFBFBFB, x3012B7B73012B7B7, 0x69)
323 LUT(xBFEAEBBEBFEAEBBE, a1, x29295A5A29295A5A, x34E9B34C34E9B34C, 0x6F)
324 LUT(xFFAEAFFEFFAEAFFE, a3, xBBBBBBBBBBBBBBBB, xBFEAEBBEBFEAEBBE, 0xB9)
325 LUT(x2, a6, x34E9B34C34E9B34C, xFFAEAFFEFFAEAFFE, 0xC6)
326 LUT(xCFDE88BBCFDE88BB, a2, xDDDDAAAADDDDAAAA, x34E9B34C34E9B34C, 0x5C)
327 LUT(x3055574530555745, a1, xC729695AC729695A, xCFDE88BBCFDE88BB, 0x71)
328 LUT(x99DDEEEE99DDEEEE, a4, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0xB9)
329 LUT(x693CD926693CD926, x3BF77B7B3BF77B7B, x34E9B34C34E9B34C, x99DDEEEE99DDEEEE, 0x69)
330 LUT(x3, a6, x3055574530555745, x693CD926693CD926, 0x6A)
331 LUT(x9955EE559955EE55, a1, a4, x99DDEEEE99DDEEEE, 0xE2)
332 LUT(x9D48FA949D48FA94, x3BF77B7B3BF77B7B, xBFEAEBBEBFEAEBBE, x9955EE559955EE55, 0x9C)
333 LUT(x1, a6, xC729695AC729695A, x9D48FA949D48FA94, 0x39)
344 * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC
345 * architectures. These use AND, OR, XOR, NOT, and AND-NOT gates.
347 * Gate counts: 49 44 46 33 48 46 46 41
350 * Several same-gate-count expressions for each S-box are included (for use on
351 * different CPUs/GPUs).
353 * These Boolean expressions corresponding to DES S-boxes have been generated
354 * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's
355 * John the Ripper password cracker: http://www.openwall.com/john/
356 * Being mathematical formulas, they are not copyrighted and are free for reuse
359 * This file (a specific representation of the S-box expressions, surrounding
360 * logic) is Copyright (c) 2011 by Solar Designer <solar at openwall.com>.
361 * Redistribution and use in source and binary forms, with or without
362 * modification, are permitted. (This is a heavily cut-down "BSD license".)
364 * The effort has been sponsored by Rapid7: http://www.rapid7.com
367 __device__ static void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
369 u32 x55005500, x5A0F5A0F, x3333FFFF, x66666666, x22226666, x2D2D6969,
371 u32 x00FFFF00, x33CCCC33, x4803120C, x2222FFFF, x6A21EDF3, x4A01CC93;
372 u32 x5555FFFF, x7F75FFFF, x00D20096, x7FA7FF69;
373 u32 x0A0A0000, x0AD80096, x00999900, x0AD99996;
374 u32 x22332233, x257AA5F0, x054885C0, xFAB77A3F, x2221EDF3, xD89697CC;
375 u32 x05B77AC0, x05F77AD6, x36C48529, x6391D07C, xBB0747B0;
376 u32 x4C460000, x4EDF9996, x2D4E49EA, xBBFFFFB0, x96B1B65A;
377 u32 x5AFF5AFF, x52B11215, x4201C010, x10B0D205;
378 u32 x00, x01, x10, x11, x20, x21, x30, x31;
380 x55005500 = a1 & ~a5;
381 x5A0F5A0F = a4 ^ x55005500;
384 x22226666 = x3333FFFF & x66666666;
385 x2D2D6969 = a4 ^ x22226666;
386 x25202160 = x2D2D6969 & ~x5A0F5A0F;
389 x33CCCC33 = a3 ^ x00FFFF00;
390 x4803120C = x5A0F5A0F & ~x33CCCC33;
391 x2222FFFF = a6 | x22226666;
392 x6A21EDF3 = x4803120C ^ x2222FFFF;
393 x4A01CC93 = x6A21EDF3 & ~x25202160;
396 x7F75FFFF = x6A21EDF3 | x5555FFFF;
397 x00D20096 = a5 & ~x2D2D6969;
398 x7FA7FF69 = x7F75FFFF ^ x00D20096;
400 x0A0A0000 = a4 & ~x5555FFFF;
401 x0AD80096 = x00D20096 ^ x0A0A0000;
402 x00999900 = x00FFFF00 & ~x66666666;
403 x0AD99996 = x0AD80096 | x00999900;
405 x22332233 = a3 & ~x55005500;
406 x257AA5F0 = x5A0F5A0F ^ x7F75FFFF;
407 x054885C0 = x257AA5F0 & ~x22332233;
408 xFAB77A3F = ~x054885C0;
409 x2221EDF3 = x3333FFFF & x6A21EDF3;
410 xD89697CC = xFAB77A3F ^ x2221EDF3;
411 x20 = x7FA7FF69 & ~a2;
412 x21 = x20 ^ xD89697CC;
415 x05B77AC0 = x00FFFF00 ^ x054885C0;
416 x05F77AD6 = x00D20096 | x05B77AC0;
417 x36C48529 = x3333FFFF ^ x05F77AD6;
418 x6391D07C = a1 ^ x36C48529;
419 xBB0747B0 = xD89697CC ^ x6391D07C;
420 x00 = x25202160 | a2;
421 x01 = x00 ^ xBB0747B0;
424 x4C460000 = x3333FFFF ^ x7F75FFFF;
425 x4EDF9996 = x0AD99996 | x4C460000;
426 x2D4E49EA = x6391D07C ^ x4EDF9996;
427 xBBFFFFB0 = x00FFFF00 | xBB0747B0;
428 x96B1B65A = x2D4E49EA ^ xBBFFFFB0;
429 x10 = x4A01CC93 | a2;
430 x11 = x10 ^ x96B1B65A;
433 x5AFF5AFF = a5 | x5A0F5A0F;
434 x52B11215 = x5AFF5AFF & ~x2D4E49EA;
435 x4201C010 = x4A01CC93 & x6391D07C;
436 x10B0D205 = x52B11215 ^ x4201C010;
437 x30 = x10B0D205 | a2;
438 x31 = x30 ^ x0AD99996;
442 __device__ static void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
445 u32 x55550000, x00AA00FF, x33BB33FF;
446 u32 x33CC0000, x11441144, x11BB11BB, x003311BB;
447 u32 x00000F0F, x336600FF, x332200FF, x332200F0;
448 u32 x0302000F, xAAAAAAAA, xA9A8AAA5, x33CCCC33, x33CCC030, x9A646A95;
449 u32 x00333303, x118822B8, xA8208805, x3CC3C33C, x94E34B39;
450 u32 x0331330C, x3FF3F33C, xA9DF596A, xA9DF5F6F, x962CAC53;
451 u32 xA9466A6A, x3DA52153, x29850143, x33C0330C, x1A45324F;
452 u32 x0A451047, xBBDFDD7B, xB19ACD3C;
453 u32 x00, x01, x10, x11, x20, x21, x30, x31;
457 x55550000 = a1 & ~a6;
458 x00AA00FF = a5 & ~x55550000;
459 x33BB33FF = a2 | x00AA00FF;
461 x33CC0000 = x33CC33CC & ~a6;
462 x11441144 = a1 & x33CC33CC;
463 x11BB11BB = a5 ^ x11441144;
464 x003311BB = x11BB11BB & ~x33CC0000;
467 x336600FF = x00AA00FF ^ x33CC0000;
468 x332200FF = x33BB33FF & x336600FF;
469 x332200F0 = x332200FF & ~x00000F0F;
471 x0302000F = a3 & x332200FF;
473 xA9A8AAA5 = x0302000F ^ xAAAAAAAA;
474 x33CCCC33 = a6 ^ x33CC33CC;
475 x33CCC030 = x33CCCC33 & ~x00000F0F;
476 x9A646A95 = xA9A8AAA5 ^ x33CCC030;
477 x10 = a4 & ~x332200F0;
478 x11 = x10 ^ x9A646A95;
481 x00333303 = a2 & ~x33CCC030;
482 x118822B8 = x11BB11BB ^ x00333303;
483 xA8208805 = xA9A8AAA5 & ~x118822B8;
484 x3CC3C33C = a3 ^ x33CCCC33;
485 x94E34B39 = xA8208805 ^ x3CC3C33C;
486 x00 = x33BB33FF & ~a4;
487 x01 = x00 ^ x94E34B39;
490 x0331330C = x0302000F ^ x00333303;
491 x3FF3F33C = x3CC3C33C | x0331330C;
492 xA9DF596A = x33BB33FF ^ x9A646A95;
493 xA9DF5F6F = x00000F0F | xA9DF596A;
494 x962CAC53 = x3FF3F33C ^ xA9DF5F6F;
496 xA9466A6A = x332200FF ^ x9A646A95;
497 x3DA52153 = x94E34B39 ^ xA9466A6A;
498 x29850143 = xA9DF5F6F & x3DA52153;
499 x33C0330C = x33CC33CC & x3FF3F33C;
500 x1A45324F = x29850143 ^ x33C0330C;
501 x20 = x1A45324F | a4;
502 x21 = x20 ^ x962CAC53;
505 x0A451047 = x1A45324F & ~x118822B8;
506 xBBDFDD7B = x33CCCC33 | xA9DF596A;
507 xB19ACD3C = x0A451047 ^ xBBDFDD7B;
508 x30 = x003311BB | a4;
509 x31 = x30 ^ xB19ACD3C;
513 __device__ static void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
515 u32 x44444444, x0F0FF0F0, x4F4FF4F4, x00FFFF00, x00AAAA00, x4FE55EF4;
516 u32 x3C3CC3C3, x3C3C0000, x7373F4F4, x0C840A00;
517 u32 x00005EF4, x00FF5EFF, x00555455, x3C699796;
518 u32 x000FF000, x55AA55AA, x26D9A15E, x2FDFAF5F, x2FD00F5F;
519 u32 x55AAFFAA, x28410014, x000000FF, x000000CC, x284100D8;
520 u32 x204100D0, x3C3CC3FF, x1C3CC32F, x4969967A;
521 u32 x4CC44CC4, x40C040C0, xC3C33C3C, x9669C396, xD6A98356;
522 u32 xD6E9C3D6, x4CEEEEC4, x9A072D12, x001A000B, x9A1F2D1B;
523 u32 x00, x01, x10, x11, x20, x21, x30, x31;
525 x44444444 = a1 & ~a2;
527 x4F4FF4F4 = x44444444 | x0F0FF0F0;
529 x00AAAA00 = x00FFFF00 & ~a1;
530 x4FE55EF4 = x4F4FF4F4 ^ x00AAAA00;
532 x3C3CC3C3 = a2 ^ x0F0FF0F0;
533 x3C3C0000 = x3C3CC3C3 & ~a6;
534 x7373F4F4 = x4F4FF4F4 ^ x3C3C0000;
535 x0C840A00 = x4FE55EF4 & ~x7373F4F4;
537 x00005EF4 = a6 & x4FE55EF4;
538 x00FF5EFF = a4 | x00005EF4;
539 x00555455 = a1 & x00FF5EFF;
540 x3C699796 = x3C3CC3C3 ^ x00555455;
541 x30 = x4FE55EF4 & ~a5;
542 x31 = x30 ^ x3C699796;
545 x000FF000 = x0F0FF0F0 & x00FFFF00;
547 x26D9A15E = x7373F4F4 ^ x55AA55AA;
548 x2FDFAF5F = a3 | x26D9A15E;
549 x2FD00F5F = x2FDFAF5F & ~x000FF000;
551 x55AAFFAA = x00AAAA00 | x55AA55AA;
552 x28410014 = x3C699796 & ~x55AAFFAA;
554 x000000CC = x000000FF & ~a2;
555 x284100D8 = x28410014 ^ x000000CC;
557 x204100D0 = x7373F4F4 & x284100D8;
558 x3C3CC3FF = x3C3CC3C3 | x000000FF;
559 x1C3CC32F = x3C3CC3FF & ~x204100D0;
560 x4969967A = a1 ^ x1C3CC32F;
561 x10 = x2FD00F5F & a5;
562 x11 = x10 ^ x4969967A;
565 x4CC44CC4 = x4FE55EF4 & ~a2;
566 x40C040C0 = x4CC44CC4 & ~a3;
567 xC3C33C3C = ~x3C3CC3C3;
568 x9669C396 = x55AAFFAA ^ xC3C33C3C;
569 xD6A98356 = x40C040C0 ^ x9669C396;
570 x00 = a5 & ~x0C840A00;
571 x01 = x00 ^ xD6A98356;
574 xD6E9C3D6 = x40C040C0 | x9669C396;
575 x4CEEEEC4 = x00AAAA00 | x4CC44CC4;
576 x9A072D12 = xD6E9C3D6 ^ x4CEEEEC4;
577 x001A000B = a4 & ~x4FE55EF4;
578 x9A1F2D1B = x9A072D12 | x001A000B;
579 x20 = a5 & ~x284100D8;
580 x21 = x20 ^ x9A1F2D1B;
584 __device__ static void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
586 u32 x5A5A5A5A, x0F0FF0F0;
587 u32 x33FF33FF, x33FFCC00, x0C0030F0, x0C0CC0C0, x0CF3C03F, x5EFBDA7F,
588 x52FBCA0F, x61C8F93C;
589 u32 x00C0C03C, x0F0F30C0, x3B92A366, x30908326, x3C90B3D6;
590 u32 x33CC33CC, x0C0CFFFF, x379E5C99, x04124C11, x56E9861E, xA91679E1;
591 u32 x9586CA37, x8402C833, x84C2C83F, xB35C94A6;
592 u32 x00, x01, x10, x11, x20, x21, x30, x31;
597 x33FFCC00 = a5 ^ x33FF33FF;
598 x0C0030F0 = x0F0FF0F0 & ~x33FFCC00;
599 x0C0CC0C0 = x0F0FF0F0 & ~a2;
600 x0CF3C03F = a4 ^ x0C0CC0C0;
601 x5EFBDA7F = x5A5A5A5A | x0CF3C03F;
602 x52FBCA0F = x5EFBDA7F & ~x0C0030F0;
603 x61C8F93C = a2 ^ x52FBCA0F;
605 x00C0C03C = x0CF3C03F & x61C8F93C;
606 x0F0F30C0 = x0F0FF0F0 & ~x00C0C03C;
607 x3B92A366 = x5A5A5A5A ^ x61C8F93C;
608 x30908326 = x3B92A366 & ~x0F0F30C0;
609 x3C90B3D6 = x0C0030F0 ^ x30908326;
612 x0C0CFFFF = a5 | x0C0CC0C0;
613 x379E5C99 = x3B92A366 ^ x0C0CFFFF;
614 x04124C11 = x379E5C99 & ~x33CC33CC;
615 x56E9861E = x52FBCA0F ^ x04124C11;
616 x00 = a6 & ~x3C90B3D6;
617 x01 = x00 ^ x56E9861E;
620 xA91679E1 = ~x56E9861E;
621 x10 = x3C90B3D6 & ~a6;
622 x11 = x10 ^ xA91679E1;
625 x9586CA37 = x3C90B3D6 ^ xA91679E1;
626 x8402C833 = x9586CA37 & ~x33CC33CC;
627 x84C2C83F = x00C0C03C | x8402C833;
628 xB35C94A6 = x379E5C99 ^ x84C2C83F;
629 x20 = x61C8F93C | a6;
630 x21 = x20 ^ xB35C94A6;
633 x30 = a6 & x61C8F93C;
634 x31 = x30 ^ xB35C94A6;
638 __device__ static void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
640 u32 x77777777, x77770000, x22225555, x11116666, x1F1F6F6F;
641 u32 x70700000, x43433333, x00430033, x55557777, x55167744, x5A19784B;
642 u32 x5A1987B4, x7A3BD7F5, x003B00F5, x221955A0, x05050707, x271C52A7;
643 u32 x2A2A82A0, x6969B193, x1FE06F90, x16804E00, xE97FB1FF;
644 u32 x43403302, x35CAED30, x37DEFFB7, x349ECCB5, x0B01234A;
645 u32 x101884B4, x0FF8EB24, x41413333, x4FF9FB37, x4FC2FBC2;
646 u32 x22222222, x16BCEE97, x0F080B04, x19B4E593;
647 u32 x5C5C5C5C, x4448184C, x2DDABE71, x6992A63D;
648 u32 x00, x01, x10, x11, x20, x21, x30, x31;
651 x77770000 = x77777777 & ~a6;
652 x22225555 = a1 ^ x77770000;
653 x11116666 = a3 ^ x22225555;
654 x1F1F6F6F = a4 | x11116666;
656 x70700000 = x77770000 & ~a4;
657 x43433333 = a3 ^ x70700000;
658 x00430033 = a5 & x43433333;
659 x55557777 = a1 | x11116666;
660 x55167744 = x00430033 ^ x55557777;
661 x5A19784B = a4 ^ x55167744;
663 x5A1987B4 = a6 ^ x5A19784B;
664 x7A3BD7F5 = x22225555 | x5A1987B4;
665 x003B00F5 = a5 & x7A3BD7F5;
666 x221955A0 = x22225555 ^ x003B00F5;
667 x05050707 = a4 & x55557777;
668 x271C52A7 = x221955A0 ^ x05050707;
670 x2A2A82A0 = x7A3BD7F5 & ~a1;
671 x6969B193 = x43433333 ^ x2A2A82A0;
672 x1FE06F90 = a5 ^ x1F1F6F6F;
673 x16804E00 = x1FE06F90 & ~x6969B193;
674 xE97FB1FF = ~x16804E00;
675 x20 = xE97FB1FF & ~a2;
676 x21 = x20 ^ x5A19784B;
679 x43403302 = x43433333 & ~x003B00F5;
680 x35CAED30 = x2A2A82A0 ^ x1FE06F90;
681 x37DEFFB7 = x271C52A7 | x35CAED30;
682 x349ECCB5 = x37DEFFB7 & ~x43403302;
683 x0B01234A = x1F1F6F6F & ~x349ECCB5;
685 x101884B4 = x5A1987B4 & x349ECCB5;
686 x0FF8EB24 = x1FE06F90 ^ x101884B4;
687 x41413333 = x43433333 & x55557777;
688 x4FF9FB37 = x0FF8EB24 | x41413333;
689 x4FC2FBC2 = x003B00F5 ^ x4FF9FB37;
690 x30 = x4FC2FBC2 & a2;
691 x31 = x30 ^ x271C52A7;
694 x22222222 = a1 ^ x77777777;
695 x16BCEE97 = x349ECCB5 ^ x22222222;
696 x0F080B04 = a4 & x0FF8EB24;
697 x19B4E593 = x16BCEE97 ^ x0F080B04;
698 x00 = x0B01234A | a2;
699 x01 = x00 ^ x19B4E593;
702 x5C5C5C5C = x1F1F6F6F ^ x43433333;
703 x4448184C = x5C5C5C5C & ~x19B4E593;
704 x2DDABE71 = x22225555 ^ x0FF8EB24;
705 x6992A63D = x4448184C ^ x2DDABE71;
706 x10 = x1F1F6F6F & a2;
707 x11 = x10 ^ x6992A63D;
711 __device__ static void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
714 u32 x3333FFFF, x11115555, x22DD6699, x22DD9966, x00220099;
715 u32 x00551144, x33662277, x5A5A5A5A, x7B7E7A7F, x59A31CE6;
716 u32 x09030C06, x09030000, x336622FF, x3A6522FF;
717 u32 x484D494C, x0000B6B3, x0F0FB9BC, x00FC00F9, x0FFFB9FD;
718 u32 x5DF75DF7, x116600F7, x1E69B94B, x1668B94B;
719 u32 x7B7B7B7B, x411E5984, x1FFFFDFD, x5EE1A479;
720 u32 x3CB4DFD2, x004B002D, xB7B2B6B3, xCCC9CDC8, xCC82CDE5;
721 u32 x0055EEBB, x5A5AECE9, x0050ECA9, xC5CAC1CE, xC59A2D67;
722 u32 x00, x01, x10, x11, x20, x21, x30, x31;
727 x11115555 = a1 & x3333FFFF;
728 x22DD6699 = x33CC33CC ^ x11115555;
729 x22DD9966 = a6 ^ x22DD6699;
730 x00220099 = a5 & ~x22DD9966;
732 x00551144 = a1 & x22DD9966;
733 x33662277 = a2 ^ x00551144;
735 x7B7E7A7F = x33662277 | x5A5A5A5A;
736 x59A31CE6 = x22DD6699 ^ x7B7E7A7F;
738 x09030C06 = a3 & x59A31CE6;
739 x09030000 = x09030C06 & ~a6;
740 x336622FF = x00220099 | x33662277;
741 x3A6522FF = x09030000 ^ x336622FF;
742 x30 = x3A6522FF & a4;
743 x31 = x30 ^ x59A31CE6;
746 x484D494C = a2 ^ x7B7E7A7F;
747 x0000B6B3 = a6 & ~x484D494C;
748 x0F0FB9BC = a3 ^ x0000B6B3;
749 x00FC00F9 = a5 & ~x09030C06;
750 x0FFFB9FD = x0F0FB9BC | x00FC00F9;
752 x5DF75DF7 = a1 | x59A31CE6;
753 x116600F7 = x336622FF & x5DF75DF7;
754 x1E69B94B = x0F0FB9BC ^ x116600F7;
755 x1668B94B = x1E69B94B & ~x09030000;
756 x20 = x00220099 | a4;
757 x21 = x20 ^ x1668B94B;
760 x7B7B7B7B = a2 | x5A5A5A5A;
761 x411E5984 = x3A6522FF ^ x7B7B7B7B;
762 x1FFFFDFD = x11115555 | x0FFFB9FD;
763 x5EE1A479 = x411E5984 ^ x1FFFFDFD;
765 x3CB4DFD2 = x22DD6699 ^ x1E69B94B;
766 x004B002D = a5 & ~x3CB4DFD2;
767 xB7B2B6B3 = ~x484D494C;
768 xCCC9CDC8 = x7B7B7B7B ^ xB7B2B6B3;
769 xCC82CDE5 = x004B002D ^ xCCC9CDC8;
770 x10 = xCC82CDE5 & ~a4;
771 x11 = x10 ^ x5EE1A479;
774 x0055EEBB = a6 ^ x00551144;
775 x5A5AECE9 = a1 ^ x0F0FB9BC;
776 x0050ECA9 = x0055EEBB & x5A5AECE9;
777 xC5CAC1CE = x09030C06 ^ xCCC9CDC8;
778 xC59A2D67 = x0050ECA9 ^ xC5CAC1CE;
779 x00 = x0FFFB9FD & ~a4;
780 x01 = x00 ^ xC59A2D67;
784 __device__ static void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
786 u32 x0FF00FF0, x3CC33CC3, x00003CC3, x0F000F00, x5A555A55, x00001841;
787 u32 x00000F00, x33333C33, x7B777E77, x0FF0F00F, x74878E78;
788 u32 x003C003C, x5A7D5A7D, x333300F0, x694E5A8D;
789 u32 x0FF0CCCC, x000F0303, x5A505854, x33CC000F, x699C585B;
790 u32 x7F878F78, x21101013, x7F979F7B, x30030CC0, x4F9493BB;
791 u32 x6F9CDBFB, x0000DBFB, x00005151, x26DAC936, x26DA9867;
792 u32 x27DA9877, x27DA438C, x2625C9C9, x27FFCBCD;
793 u32 x27FF1036, x27FF103E, xB06B6C44, x97947C7A;
794 u32 x00, x01, x10, x11, x20, x21, x30, x31;
797 x3CC33CC3 = a3 ^ x0FF00FF0;
798 x00003CC3 = a6 & x3CC33CC3;
799 x0F000F00 = a4 & x0FF00FF0;
800 x5A555A55 = a2 ^ x0F000F00;
801 x00001841 = x00003CC3 & x5A555A55;
803 x00000F00 = a6 & x0F000F00;
804 x33333C33 = a3 ^ x00000F00;
805 x7B777E77 = x5A555A55 | x33333C33;
806 x0FF0F00F = a6 ^ x0FF00FF0;
807 x74878E78 = x7B777E77 ^ x0FF0F00F;
808 x30 = a1 & ~x00001841;
809 x31 = x30 ^ x74878E78;
812 x003C003C = a5 & ~x3CC33CC3;
813 x5A7D5A7D = x5A555A55 | x003C003C;
814 x333300F0 = x00003CC3 ^ x33333C33;
815 x694E5A8D = x5A7D5A7D ^ x333300F0;
817 x0FF0CCCC = x00003CC3 ^ x0FF0F00F;
818 x000F0303 = a4 & ~x0FF0CCCC;
819 x5A505854 = x5A555A55 & ~x000F0303;
820 x33CC000F = a5 ^ x333300F0;
821 x699C585B = x5A505854 ^ x33CC000F;
823 x7F878F78 = x0F000F00 | x74878E78;
824 x21101013 = a3 & x699C585B;
825 x7F979F7B = x7F878F78 | x21101013;
826 x30030CC0 = x3CC33CC3 & ~x0FF0F00F;
827 x4F9493BB = x7F979F7B ^ x30030CC0;
828 x00 = x4F9493BB & ~a1;
829 x01 = x00 ^ x694E5A8D;
832 x6F9CDBFB = x699C585B | x4F9493BB;
833 x0000DBFB = a6 & x6F9CDBFB;
834 x00005151 = a2 & x0000DBFB;
835 x26DAC936 = x694E5A8D ^ x4F9493BB;
836 x26DA9867 = x00005151 ^ x26DAC936;
838 x27DA9877 = x21101013 | x26DA9867;
839 x27DA438C = x0000DBFB ^ x27DA9877;
840 x2625C9C9 = a5 ^ x26DAC936;
841 x27FFCBCD = x27DA438C | x2625C9C9;
842 x20 = x27FFCBCD & a1;
843 x21 = x20 ^ x699C585B;
846 x27FF1036 = x0000DBFB ^ x27FFCBCD;
847 x27FF103E = x003C003C | x27FF1036;
848 xB06B6C44 = ~x4F9493BB;
849 x97947C7A = x27FF103E ^ xB06B6C44;
850 x10 = x97947C7A & ~a1;
851 x11 = x10 ^ x26DA9867;
855 __device__ static void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
857 u32 x0C0C0C0C, x0000F0F0, x00FFF00F, x00555005, x00515001;
858 u32 x33000330, x77555775, x30303030, x3030CFCF, x30104745, x30555745;
859 u32 xFF000FF0, xCF1048B5, x080A080A, xC71A40BF, xCB164CB3;
860 u32 x9E4319E6, x000019E6, xF429738C, xF4296A6A, xC729695A;
861 u32 xC47C3D2F, xF77F3F3F, x9E43E619, x693CD926;
862 u32 xF719A695, xF4FF73FF, x03E6D56A, x56B3803F;
863 u32 xF700A600, x61008000, x03B7856B, x62B7056B;
864 u32 x00, x01, x10, x11, x20, x21, x30, x31;
866 x0C0C0C0C = a3 & ~a2;
867 x0000F0F0 = a5 & ~a3;
868 x00FFF00F = a4 ^ x0000F0F0;
869 x00555005 = a1 & x00FFF00F;
870 x00515001 = x00555005 & ~x0C0C0C0C;
872 x33000330 = a2 & ~x00FFF00F;
873 x77555775 = a1 | x33000330;
874 x30303030 = a2 & ~a3;
875 x3030CFCF = a5 ^ x30303030;
876 x30104745 = x77555775 & x3030CFCF;
877 x30555745 = x00555005 | x30104745;
879 xFF000FF0 = ~x00FFF00F;
880 xCF1048B5 = x30104745 ^ xFF000FF0;
881 x080A080A = a3 & ~x77555775;
882 xC71A40BF = xCF1048B5 ^ x080A080A;
883 xCB164CB3 = x0C0C0C0C ^ xC71A40BF;
884 x10 = x00515001 | a6;
885 x11 = x10 ^ xCB164CB3;
888 x9E4319E6 = a1 ^ xCB164CB3;
889 x000019E6 = a5 & x9E4319E6;
890 xF429738C = a2 ^ xC71A40BF;
891 xF4296A6A = x000019E6 ^ xF429738C;
892 xC729695A = x33000330 ^ xF4296A6A;
894 xC47C3D2F = x30555745 ^ xF4296A6A;
895 xF77F3F3F = a2 | xC47C3D2F;
896 x9E43E619 = a5 ^ x9E4319E6;
897 x693CD926 = xF77F3F3F ^ x9E43E619;
898 x20 = x30555745 & a6;
899 x21 = x20 ^ x693CD926;
902 xF719A695 = x3030CFCF ^ xC729695A;
903 xF4FF73FF = a4 | xF429738C;
904 x03E6D56A = xF719A695 ^ xF4FF73FF;
905 x56B3803F = a1 ^ x03E6D56A;
906 x30 = x56B3803F & a6;
907 x31 = x30 ^ xC729695A;
910 xF700A600 = xF719A695 & ~a4;
911 x61008000 = x693CD926 & xF700A600;
912 x03B7856B = x00515001 ^ x03E6D56A;
913 x62B7056B = x61008000 ^ x03B7856B;
914 x00 = x62B7056B | a6;
915 x01 = x00 ^ xC729695A;
921 #define SWAP(a, b) { u32 tmp=a;a=b;b=tmp; }
957 #define KEYSET00 { k00 = K08; k01 = K44; k02 = K29; k03 = K52; k04 = K42; k05 = K14; k06 = K28; k07 = K49; k08 = K01; k09 = K07; k10 = K16; k11 = K36; k12 = K02; k13 = K30; k14 = K22; k15 = K21; k16 = K38; k17 = K50; k18 = K51; k19 = K00; k20 = K31; k21 = K23; k22 = K15; k23 = K35; k24 = K19; k25 = K24; k26 = K34; k27 = K47; k28 = K32; k29 = K03; k30 = K41; k31 = K26; k32 = K04; k33 = K46; k34 = K20; k35 = K25; k36 = K53; k37 = K18; k38 = K33; k39 = K55; k40 = K13; k41 = K17; k42 = K39; k43 = K12; k44 = K11; k45 = K54; k46 = K48; k47 = K27; }
958 #define KEYSET10 { k00 = K49; k01 = K28; k02 = K45; k03 = K36; k04 = K01; k05 = K30; k06 = K44; k07 = K08; k08 = K42; k09 = K23; k10 = K00; k11 = K52; k12 = K43; k13 = K14; k14 = K38; k15 = K37; k16 = K22; k17 = K09; k18 = K35; k19 = K16; k20 = K15; k21 = K07; k22 = K31; k23 = K51; k24 = K03; k25 = K40; k26 = K46; k27 = K04; k28 = K20; k29 = K19; k30 = K53; k31 = K10; k32 = K47; k33 = K34; k34 = K32; k35 = K13; k36 = K41; k37 = K06; k38 = K17; k39 = K12; k40 = K25; k41 = K33; k42 = K27; k43 = K55; k44 = K54; k45 = K11; k46 = K05; k47 = K39; }
959 #define KEYSET01 { k00 = K01; k01 = K37; k02 = K22; k03 = K45; k04 = K35; k05 = K07; k06 = K21; k07 = K42; k08 = K51; k09 = K00; k10 = K09; k11 = K29; k12 = K52; k13 = K23; k14 = K15; k15 = K14; k16 = K31; k17 = K43; k18 = K44; k19 = K50; k20 = K49; k21 = K16; k22 = K08; k23 = K28; k24 = K12; k25 = K17; k26 = K27; k27 = K40; k28 = K25; k29 = K55; k30 = K34; k31 = K19; k32 = K24; k33 = K39; k34 = K13; k35 = K18; k36 = K46; k37 = K11; k38 = K26; k39 = K48; k40 = K06; k41 = K10; k42 = K32; k43 = K05; k44 = K04; k45 = K47; k46 = K41; k47 = K20; }
960 #define KEYSET11 { k00 = K35; k01 = K14; k02 = K31; k03 = K22; k04 = K44; k05 = K16; k06 = K30; k07 = K51; k08 = K28; k09 = K09; k10 = K43; k11 = K38; k12 = K29; k13 = K00; k14 = K49; k15 = K23; k16 = K08; k17 = K52; k18 = K21; k19 = K02; k20 = K01; k21 = K50; k22 = K42; k23 = K37; k24 = K48; k25 = K26; k26 = K32; k27 = K17; k28 = K06; k29 = K05; k30 = K39; k31 = K55; k32 = K33; k33 = K20; k34 = K18; k35 = K54; k36 = K27; k37 = K47; k38 = K03; k39 = K53; k40 = K11; k41 = K19; k42 = K13; k43 = K41; k44 = K40; k45 = K24; k46 = K46; k47 = K25; }
961 #define KEYSET02 { k00 = K44; k01 = K23; k02 = K08; k03 = K31; k04 = K21; k05 = K50; k06 = K07; k07 = K28; k08 = K37; k09 = K43; k10 = K52; k11 = K15; k12 = K38; k13 = K09; k14 = K01; k15 = K00; k16 = K42; k17 = K29; k18 = K30; k19 = K36; k20 = K35; k21 = K02; k22 = K51; k23 = K14; k24 = K53; k25 = K03; k26 = K13; k27 = K26; k28 = K11; k29 = K41; k30 = K20; k31 = K05; k32 = K10; k33 = K25; k34 = K54; k35 = K04; k36 = K32; k37 = K24; k38 = K12; k39 = K34; k40 = K47; k41 = K55; k42 = K18; k43 = K46; k44 = K17; k45 = K33; k46 = K27; k47 = K06; }
962 #define KEYSET12 { k00 = K21; k01 = K00; k02 = K42; k03 = K08; k04 = K30; k05 = K02; k06 = K16; k07 = K37; k08 = K14; k09 = K52; k10 = K29; k11 = K49; k12 = K15; k13 = K43; k14 = K35; k15 = K09; k16 = K51; k17 = K38; k18 = K07; k19 = K45; k20 = K44; k21 = K36; k22 = K28; k23 = K23; k24 = K34; k25 = K12; k26 = K18; k27 = K03; k28 = K47; k29 = K46; k30 = K25; k31 = K41; k32 = K19; k33 = K06; k34 = K04; k35 = K40; k36 = K13; k37 = K33; k38 = K48; k39 = K39; k40 = K24; k41 = K05; k42 = K54; k43 = K27; k44 = K26; k45 = K10; k46 = K32; k47 = K11; }
963 #define KEYSET03 { k00 = K30; k01 = K09; k02 = K51; k03 = K42; k04 = K07; k05 = K36; k06 = K50; k07 = K14; k08 = K23; k09 = K29; k10 = K38; k11 = K01; k12 = K49; k13 = K52; k14 = K44; k15 = K43; k16 = K28; k17 = K15; k18 = K16; k19 = K22; k20 = K21; k21 = K45; k22 = K37; k23 = K00; k24 = K39; k25 = K48; k26 = K54; k27 = K12; k28 = K24; k29 = K27; k30 = K06; k31 = K46; k32 = K55; k33 = K11; k34 = K40; k35 = K17; k36 = K18; k37 = K10; k38 = K53; k39 = K20; k40 = K33; k41 = K41; k42 = K04; k43 = K32; k44 = K03; k45 = K19; k46 = K13; k47 = K47; }
964 #define KEYSET13 { k00 = K07; k01 = K43; k02 = K28; k03 = K51; k04 = K16; k05 = K45; k06 = K02; k07 = K23; k08 = K00; k09 = K38; k10 = K15; k11 = K35; k12 = K01; k13 = K29; k14 = K21; k15 = K52; k16 = K37; k17 = K49; k18 = K50; k19 = K31; k20 = K30; k21 = K22; k22 = K14; k23 = K09; k24 = K20; k25 = K53; k26 = K04; k27 = K48; k28 = K33; k29 = K32; k30 = K11; k31 = K27; k32 = K05; k33 = K47; k34 = K17; k35 = K26; k36 = K54; k37 = K19; k38 = K34; k39 = K25; k40 = K10; k41 = K46; k42 = K40; k43 = K13; k44 = K12; k45 = K55; k46 = K18; k47 = K24; }
965 #define KEYSET04 { k00 = K16; k01 = K52; k02 = K37; k03 = K28; k04 = K50; k05 = K22; k06 = K36; k07 = K00; k08 = K09; k09 = K15; k10 = K49; k11 = K44; k12 = K35; k13 = K38; k14 = K30; k15 = K29; k16 = K14; k17 = K01; k18 = K02; k19 = K08; k20 = K07; k21 = K31; k22 = K23; k23 = K43; k24 = K25; k25 = K34; k26 = K40; k27 = K53; k28 = K10; k29 = K13; k30 = K47; k31 = K32; k32 = K41; k33 = K24; k34 = K26; k35 = K03; k36 = K04; k37 = K55; k38 = K39; k39 = K06; k40 = K19; k41 = K27; k42 = K17; k43 = K18; k44 = K48; k45 = K05; k46 = K54; k47 = K33; }
966 #define KEYSET14 { k00 = K50; k01 = K29; k02 = K14; k03 = K37; k04 = K02; k05 = K31; k06 = K45; k07 = K09; k08 = K43; k09 = K49; k10 = K01; k11 = K21; k12 = K44; k13 = K15; k14 = K07; k15 = K38; k16 = K23; k17 = K35; k18 = K36; k19 = K42; k20 = K16; k21 = K08; k22 = K00; k23 = K52; k24 = K06; k25 = K39; k26 = K17; k27 = K34; k28 = K19; k29 = K18; k30 = K24; k31 = K13; k32 = K46; k33 = K33; k34 = K03; k35 = K12; k36 = K40; k37 = K05; k38 = K20; k39 = K11; k40 = K55; k41 = K32; k42 = K26; k43 = K54; k44 = K53; k45 = K41; k46 = K04; k47 = K10; }
967 #define KEYSET05 { k00 = K02; k01 = K38; k02 = K23; k03 = K14; k04 = K36; k05 = K08; k06 = K22; k07 = K43; k08 = K52; k09 = K01; k10 = K35; k11 = K30; k12 = K21; k13 = K49; k14 = K16; k15 = K15; k16 = K00; k17 = K44; k18 = K45; k19 = K51; k20 = K50; k21 = K42; k22 = K09; k23 = K29; k24 = K11; k25 = K20; k26 = K26; k27 = K39; k28 = K55; k29 = K54; k30 = K33; k31 = K18; k32 = K27; k33 = K10; k34 = K12; k35 = K48; k36 = K17; k37 = K41; k38 = K25; k39 = K47; k40 = K05; k41 = K13; k42 = K03; k43 = K04; k44 = K34; k45 = K46; k46 = K40; k47 = K19; }
968 #define KEYSET15 { k00 = K36; k01 = K15; k02 = K00; k03 = K23; k04 = K45; k05 = K42; k06 = K31; k07 = K52; k08 = K29; k09 = K35; k10 = K44; k11 = K07; k12 = K30; k13 = K01; k14 = K50; k15 = K49; k16 = K09; k17 = K21; k18 = K22; k19 = K28; k20 = K02; k21 = K51; k22 = K43; k23 = K38; k24 = K47; k25 = K25; k26 = K03; k27 = K20; k28 = K05; k29 = K04; k30 = K10; k31 = K54; k32 = K32; k33 = K19; k34 = K48; k35 = K53; k36 = K26; k37 = K46; k38 = K06; k39 = K24; k40 = K41; k41 = K18; k42 = K12; k43 = K40; k44 = K39; k45 = K27; k46 = K17; k47 = K55; }
969 #define KEYSET06 { k00 = K45; k01 = K49; k02 = K09; k03 = K00; k04 = K22; k05 = K51; k06 = K08; k07 = K29; k08 = K38; k09 = K44; k10 = K21; k11 = K16; k12 = K07; k13 = K35; k14 = K02; k15 = K01; k16 = K43; k17 = K30; k18 = K31; k19 = K37; k20 = K36; k21 = K28; k22 = K52; k23 = K15; k24 = K24; k25 = K06; k26 = K12; k27 = K25; k28 = K41; k29 = K40; k30 = K19; k31 = K04; k32 = K13; k33 = K55; k34 = K53; k35 = K34; k36 = K03; k37 = K27; k38 = K11; k39 = K33; k40 = K46; k41 = K54; k42 = K48; k43 = K17; k44 = K20; k45 = K32; k46 = K26; k47 = K05; }
970 #define KEYSET16 { k00 = K22; k01 = K01; k02 = K43; k03 = K09; k04 = K31; k05 = K28; k06 = K42; k07 = K38; k08 = K15; k09 = K21; k10 = K30; k11 = K50; k12 = K16; k13 = K44; k14 = K36; k15 = K35; k16 = K52; k17 = K07; k18 = K08; k19 = K14; k20 = K45; k21 = K37; k22 = K29; k23 = K49; k24 = K33; k25 = K11; k26 = K48; k27 = K06; k28 = K46; k29 = K17; k30 = K55; k31 = K40; k32 = K18; k33 = K05; k34 = K34; k35 = K39; k36 = K12; k37 = K32; k38 = K47; k39 = K10; k40 = K27; k41 = K04; k42 = K53; k43 = K26; k44 = K25; k45 = K13; k46 = K03; k47 = K41; }
971 #define KEYSET07 { k00 = K31; k01 = K35; k02 = K52; k03 = K43; k04 = K08; k05 = K37; k06 = K51; k07 = K15; k08 = K49; k09 = K30; k10 = K07; k11 = K02; k12 = K50; k13 = K21; k14 = K45; k15 = K44; k16 = K29; k17 = K16; k18 = K42; k19 = K23; k20 = K22; k21 = K14; k22 = K38; k23 = K01; k24 = K10; k25 = K47; k26 = K53; k27 = K11; k28 = K27; k29 = K26; k30 = K05; k31 = K17; k32 = K54; k33 = K41; k34 = K39; k35 = K20; k36 = K48; k37 = K13; k38 = K24; k39 = K19; k40 = K32; k41 = K40; k42 = K34; k43 = K03; k44 = K06; k45 = K18; k46 = K12; k47 = K46; }
972 #define KEYSET17 { k00 = K15; k01 = K51; k02 = K36; k03 = K02; k04 = K49; k05 = K21; k06 = K35; k07 = K31; k08 = K08; k09 = K14; k10 = K23; k11 = K43; k12 = K09; k13 = K37; k14 = K29; k15 = K28; k16 = K45; k17 = K00; k18 = K01; k19 = K07; k20 = K38; k21 = K30; k22 = K22; k23 = K42; k24 = K26; k25 = K04; k26 = K41; k27 = K54; k28 = K39; k29 = K10; k30 = K48; k31 = K33; k32 = K11; k33 = K53; k34 = K27; k35 = K32; k36 = K05; k37 = K25; k38 = K40; k39 = K03; k40 = K20; k41 = K24; k42 = K46; k43 = K19; k44 = K18; k45 = K06; k46 = K55; k47 = K34; }
974 __device__ static void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 &D00, u32 &D01, u32 &D02, u32 &D03, u32 &D04, u32 &D05, u32 &D06, u32 &D07, u32 &D08, u32 &D09, u32 &D10, u32 &D11, u32 &D12, u32 &D13, u32 &D14, u32 &D15, u32 &D16, u32 &D17, u32 &D18, u32 &D19, u32 &D20, u32 &D21, u32 &D22, u32 &D23, u32 &D24, u32 &D25, u32 &D26, u32 &D27, u32 &D28, u32 &D29, u32 &D30, u32 &D31, u32 &D32, u32 &D33, u32 &D34, u32 &D35, u32 &D36, u32 &D37, u32 &D38, u32 &D39, u32 &D40, u32 &D41, u32 &D42, u32 &D43, u32 &D44, u32 &D45, u32 &D46, u32 &D47, u32 &D48, u32 &D49, u32 &D50, u32 &D51, u32 &D52, u32 &D53, u32 &D54, u32 &D55, u32 &D56, u32 &D57, u32 &D58, u32 &D59, u32 &D60, u32 &D61, u32 &D62, u32 &D63)
976 sXXX_DECL u32 s001 = (0x001 & SALT) ? 0xffffffff : 0;
977 sXXX_DECL u32 s002 = (0x002 & SALT) ? 0xffffffff : 0;
978 sXXX_DECL u32 s004 = (0x004 & SALT) ? 0xffffffff : 0;
979 sXXX_DECL u32 s008 = (0x008 & SALT) ? 0xffffffff : 0;
980 sXXX_DECL u32 s010 = (0x010 & SALT) ? 0xffffffff : 0;
981 sXXX_DECL u32 s020 = (0x020 & SALT) ? 0xffffffff : 0;
982 sXXX_DECL u32 s040 = (0x040 & SALT) ? 0xffffffff : 0;
983 sXXX_DECL u32 s080 = (0x080 & SALT) ? 0xffffffff : 0;
984 sXXX_DECL u32 s100 = (0x100 & SALT) ? 0xffffffff : 0;
985 sXXX_DECL u32 s200 = (0x200 & SALT) ? 0xffffffff : 0;
986 sXXX_DECL u32 s400 = (0x400 & SALT) ? 0xffffffff : 0;
987 sXXX_DECL u32 s800 = (0x800 & SALT) ? 0xffffffff : 0;
989 KXX_DECL u32 k00, k01, k02, k03, k04, k05;
990 KXX_DECL u32 k06, k07, k08, k09, k10, k11;
991 KXX_DECL u32 k12, k13, k14, k15, k16, k17;
992 KXX_DECL u32 k18, k19, k20, k21, k22, k23;
993 KXX_DECL u32 k24, k25, k26, k27, k28, k29;
994 KXX_DECL u32 k30, k31, k32, k33, k34, k35;
995 KXX_DECL u32 k36, k37, k38, k39, k40, k41;
996 KXX_DECL u32 k42, k43, k44, k45, k46, k47;
998 for (u32 ii = 0; ii < 25; ii++)
1000 #if __CUDA_ARCH__ >= 500
1006 for (u32 i = 0; i < 2; i++)
1008 if (i) KEYSET10 else KEYSET00
1010 s1(myselx (D63, D47, s001) ^ k00, myselx (D32, D48, s002) ^ k01, myselx (D33, D49, s004) ^ k02, myselx (D34, D50, s008) ^ k03, myselx (D35, D51, s010) ^ k04, myselx (D36, D52, s020) ^ k05, &D08, &D16, &D22, &D30);
1011 s2(myselx (D35, D51, s040) ^ k06, myselx (D36, D52, s080) ^ k07, myselx (D37, D53, s100) ^ k08, myselx (D38, D54, s200) ^ k09, myselx (D39, D55, s400) ^ k10, myselx (D40, D56, s800) ^ k11, &D12, &D27, &D01, &D17);
1012 s3( D39 ^ k12, D40 ^ k13, D41 ^ k14, D42 ^ k15, D43 ^ k16, D44 ^ k17, &D23, &D15, &D29, &D05);
1013 s4( D43 ^ k18, D44 ^ k19, D45 ^ k20, D46 ^ k21, D47 ^ k22, D48 ^ k23, &D25, &D19, &D09, &D00);
1014 s5(myselx (D47, D63, s001) ^ k24, myselx (D48, D32, s002) ^ k25, myselx (D49, D33, s004) ^ k26, myselx (D50, D34, s008) ^ k27, myselx (D51, D35, s010) ^ k28, myselx (D52, D36, s020) ^ k29, &D07, &D13, &D24, &D02);
1015 s6(myselx (D51, D35, s040) ^ k30, myselx (D52, D36, s080) ^ k31, myselx (D53, D37, s100) ^ k32, myselx (D54, D38, s200) ^ k33, myselx (D55, D39, s400) ^ k34, myselx (D56, D40, s800) ^ k35, &D03, &D28, &D10, &D18);
1016 s7( D55 ^ k36, D56 ^ k37, D57 ^ k38, D58 ^ k39, D59 ^ k40, D60 ^ k41, &D31, &D11, &D21, &D06);
1017 s8( D59 ^ k42, D60 ^ k43, D61 ^ k44, D62 ^ k45, D63 ^ k46, D32 ^ k47, &D04, &D26, &D14, &D20);
1019 if (i) KEYSET11 else KEYSET01
1021 s1(myselx (D31, D15, s001) ^ k00, myselx (D00, D16, s002) ^ k01, myselx (D01, D17, s004) ^ k02, myselx (D02, D18, s008) ^ k03, myselx (D03, D19, s010) ^ k04, myselx (D04, D20, s020) ^ k05, &D40, &D48, &D54, &D62);
1022 s2(myselx (D03, D19, s040) ^ k06, myselx (D04, D20, s080) ^ k07, myselx (D05, D21, s100) ^ k08, myselx (D06, D22, s200) ^ k09, myselx (D07, D23, s400) ^ k10, myselx (D08, D24, s800) ^ k11, &D44, &D59, &D33, &D49);
1023 s3( D07 ^ k12, D08 ^ k13, D09 ^ k14, D10 ^ k15, D11 ^ k16, D12 ^ k17, &D55, &D47, &D61, &D37);
1024 s4( D11 ^ k18, D12 ^ k19, D13 ^ k20, D14 ^ k21, D15 ^ k22, D16 ^ k23, &D57, &D51, &D41, &D32);
1025 s5(myselx (D15, D31, s001) ^ k24, myselx (D16, D00, s002) ^ k25, myselx (D17, D01, s004) ^ k26, myselx (D18, D02, s008) ^ k27, myselx (D19, D03, s010) ^ k28, myselx (D20, D04, s020) ^ k29, &D39, &D45, &D56, &D34);
1026 s6(myselx (D19, D03, s040) ^ k30, myselx (D20, D04, s080) ^ k31, myselx (D21, D05, s100) ^ k32, myselx (D22, D06, s200) ^ k33, myselx (D23, D07, s400) ^ k34, myselx (D24, D08, s800) ^ k35, &D35, &D60, &D42, &D50);
1027 s7( D23 ^ k36, D24 ^ k37, D25 ^ k38, D26 ^ k39, D27 ^ k40, D28 ^ k41, &D63, &D43, &D53, &D38);
1028 s8( D27 ^ k42, D28 ^ k43, D29 ^ k44, D30 ^ k45, D31 ^ k46, D00 ^ k47, &D36, &D58, &D46, &D52);
1030 if (i) KEYSET12 else KEYSET02
1032 s1(myselx (D63, D47, s001) ^ k00, myselx (D32, D48, s002) ^ k01, myselx (D33, D49, s004) ^ k02, myselx (D34, D50, s008) ^ k03, myselx (D35, D51, s010) ^ k04, myselx (D36, D52, s020) ^ k05, &D08, &D16, &D22, &D30);
1033 s2(myselx (D35, D51, s040) ^ k06, myselx (D36, D52, s080) ^ k07, myselx (D37, D53, s100) ^ k08, myselx (D38, D54, s200) ^ k09, myselx (D39, D55, s400) ^ k10, myselx (D40, D56, s800) ^ k11, &D12, &D27, &D01, &D17);
1034 s3( D39 ^ k12, D40 ^ k13, D41 ^ k14, D42 ^ k15, D43 ^ k16, D44 ^ k17, &D23, &D15, &D29, &D05);
1035 s4( D43 ^ k18, D44 ^ k19, D45 ^ k20, D46 ^ k21, D47 ^ k22, D48 ^ k23, &D25, &D19, &D09, &D00);
1036 s5(myselx (D47, D63, s001) ^ k24, myselx (D48, D32, s002) ^ k25, myselx (D49, D33, s004) ^ k26, myselx (D50, D34, s008) ^ k27, myselx (D51, D35, s010) ^ k28, myselx (D52, D36, s020) ^ k29, &D07, &D13, &D24, &D02);
1037 s6(myselx (D51, D35, s040) ^ k30, myselx (D52, D36, s080) ^ k31, myselx (D53, D37, s100) ^ k32, myselx (D54, D38, s200) ^ k33, myselx (D55, D39, s400) ^ k34, myselx (D56, D40, s800) ^ k35, &D03, &D28, &D10, &D18);
1038 s7( D55 ^ k36, D56 ^ k37, D57 ^ k38, D58 ^ k39, D59 ^ k40, D60 ^ k41, &D31, &D11, &D21, &D06);
1039 s8( D59 ^ k42, D60 ^ k43, D61 ^ k44, D62 ^ k45, D63 ^ k46, D32 ^ k47, &D04, &D26, &D14, &D20);
1041 if (i) KEYSET13 else KEYSET03
1043 s1(myselx (D31, D15, s001) ^ k00, myselx (D00, D16, s002) ^ k01, myselx (D01, D17, s004) ^ k02, myselx (D02, D18, s008) ^ k03, myselx (D03, D19, s010) ^ k04, myselx (D04, D20, s020) ^ k05, &D40, &D48, &D54, &D62);
1044 s2(myselx (D03, D19, s040) ^ k06, myselx (D04, D20, s080) ^ k07, myselx (D05, D21, s100) ^ k08, myselx (D06, D22, s200) ^ k09, myselx (D07, D23, s400) ^ k10, myselx (D08, D24, s800) ^ k11, &D44, &D59, &D33, &D49);
1045 s3( D07 ^ k12, D08 ^ k13, D09 ^ k14, D10 ^ k15, D11 ^ k16, D12 ^ k17, &D55, &D47, &D61, &D37);
1046 s4( D11 ^ k18, D12 ^ k19, D13 ^ k20, D14 ^ k21, D15 ^ k22, D16 ^ k23, &D57, &D51, &D41, &D32);
1047 s5(myselx (D15, D31, s001) ^ k24, myselx (D16, D00, s002) ^ k25, myselx (D17, D01, s004) ^ k26, myselx (D18, D02, s008) ^ k27, myselx (D19, D03, s010) ^ k28, myselx (D20, D04, s020) ^ k29, &D39, &D45, &D56, &D34);
1048 s6(myselx (D19, D03, s040) ^ k30, myselx (D20, D04, s080) ^ k31, myselx (D21, D05, s100) ^ k32, myselx (D22, D06, s200) ^ k33, myselx (D23, D07, s400) ^ k34, myselx (D24, D08, s800) ^ k35, &D35, &D60, &D42, &D50);
1049 s7( D23 ^ k36, D24 ^ k37, D25 ^ k38, D26 ^ k39, D27 ^ k40, D28 ^ k41, &D63, &D43, &D53, &D38);
1050 s8( D27 ^ k42, D28 ^ k43, D29 ^ k44, D30 ^ k45, D31 ^ k46, D00 ^ k47, &D36, &D58, &D46, &D52);
1052 if (i) KEYSET14 else KEYSET04
1054 s1(myselx (D63, D47, s001) ^ k00, myselx (D32, D48, s002) ^ k01, myselx (D33, D49, s004) ^ k02, myselx (D34, D50, s008) ^ k03, myselx (D35, D51, s010) ^ k04, myselx (D36, D52, s020) ^ k05, &D08, &D16, &D22, &D30);
1055 s2(myselx (D35, D51, s040) ^ k06, myselx (D36, D52, s080) ^ k07, myselx (D37, D53, s100) ^ k08, myselx (D38, D54, s200) ^ k09, myselx (D39, D55, s400) ^ k10, myselx (D40, D56, s800) ^ k11, &D12, &D27, &D01, &D17);
1056 s3( D39 ^ k12, D40 ^ k13, D41 ^ k14, D42 ^ k15, D43 ^ k16, D44 ^ k17, &D23, &D15, &D29, &D05);
1057 s4( D43 ^ k18, D44 ^ k19, D45 ^ k20, D46 ^ k21, D47 ^ k22, D48 ^ k23, &D25, &D19, &D09, &D00);
1058 s5(myselx (D47, D63, s001) ^ k24, myselx (D48, D32, s002) ^ k25, myselx (D49, D33, s004) ^ k26, myselx (D50, D34, s008) ^ k27, myselx (D51, D35, s010) ^ k28, myselx (D52, D36, s020) ^ k29, &D07, &D13, &D24, &D02);
1059 s6(myselx (D51, D35, s040) ^ k30, myselx (D52, D36, s080) ^ k31, myselx (D53, D37, s100) ^ k32, myselx (D54, D38, s200) ^ k33, myselx (D55, D39, s400) ^ k34, myselx (D56, D40, s800) ^ k35, &D03, &D28, &D10, &D18);
1060 s7( D55 ^ k36, D56 ^ k37, D57 ^ k38, D58 ^ k39, D59 ^ k40, D60 ^ k41, &D31, &D11, &D21, &D06);
1061 s8( D59 ^ k42, D60 ^ k43, D61 ^ k44, D62 ^ k45, D63 ^ k46, D32 ^ k47, &D04, &D26, &D14, &D20);
1063 if (i) KEYSET15 else KEYSET05
1065 s1(myselx (D31, D15, s001) ^ k00, myselx (D00, D16, s002) ^ k01, myselx (D01, D17, s004) ^ k02, myselx (D02, D18, s008) ^ k03, myselx (D03, D19, s010) ^ k04, myselx (D04, D20, s020) ^ k05, &D40, &D48, &D54, &D62);
1066 s2(myselx (D03, D19, s040) ^ k06, myselx (D04, D20, s080) ^ k07, myselx (D05, D21, s100) ^ k08, myselx (D06, D22, s200) ^ k09, myselx (D07, D23, s400) ^ k10, myselx (D08, D24, s800) ^ k11, &D44, &D59, &D33, &D49);
1067 s3( D07 ^ k12, D08 ^ k13, D09 ^ k14, D10 ^ k15, D11 ^ k16, D12 ^ k17, &D55, &D47, &D61, &D37);
1068 s4( D11 ^ k18, D12 ^ k19, D13 ^ k20, D14 ^ k21, D15 ^ k22, D16 ^ k23, &D57, &D51, &D41, &D32);
1069 s5(myselx (D15, D31, s001) ^ k24, myselx (D16, D00, s002) ^ k25, myselx (D17, D01, s004) ^ k26, myselx (D18, D02, s008) ^ k27, myselx (D19, D03, s010) ^ k28, myselx (D20, D04, s020) ^ k29, &D39, &D45, &D56, &D34);
1070 s6(myselx (D19, D03, s040) ^ k30, myselx (D20, D04, s080) ^ k31, myselx (D21, D05, s100) ^ k32, myselx (D22, D06, s200) ^ k33, myselx (D23, D07, s400) ^ k34, myselx (D24, D08, s800) ^ k35, &D35, &D60, &D42, &D50);
1071 s7( D23 ^ k36, D24 ^ k37, D25 ^ k38, D26 ^ k39, D27 ^ k40, D28 ^ k41, &D63, &D43, &D53, &D38);
1072 s8( D27 ^ k42, D28 ^ k43, D29 ^ k44, D30 ^ k45, D31 ^ k46, D00 ^ k47, &D36, &D58, &D46, &D52);
1074 if (i) KEYSET16 else KEYSET06
1076 s1(myselx (D63, D47, s001) ^ k00, myselx (D32, D48, s002) ^ k01, myselx (D33, D49, s004) ^ k02, myselx (D34, D50, s008) ^ k03, myselx (D35, D51, s010) ^ k04, myselx (D36, D52, s020) ^ k05, &D08, &D16, &D22, &D30);
1077 s2(myselx (D35, D51, s040) ^ k06, myselx (D36, D52, s080) ^ k07, myselx (D37, D53, s100) ^ k08, myselx (D38, D54, s200) ^ k09, myselx (D39, D55, s400) ^ k10, myselx (D40, D56, s800) ^ k11, &D12, &D27, &D01, &D17);
1078 s3( D39 ^ k12, D40 ^ k13, D41 ^ k14, D42 ^ k15, D43 ^ k16, D44 ^ k17, &D23, &D15, &D29, &D05);
1079 s4( D43 ^ k18, D44 ^ k19, D45 ^ k20, D46 ^ k21, D47 ^ k22, D48 ^ k23, &D25, &D19, &D09, &D00);
1080 s5(myselx (D47, D63, s001) ^ k24, myselx (D48, D32, s002) ^ k25, myselx (D49, D33, s004) ^ k26, myselx (D50, D34, s008) ^ k27, myselx (D51, D35, s010) ^ k28, myselx (D52, D36, s020) ^ k29, &D07, &D13, &D24, &D02);
1081 s6(myselx (D51, D35, s040) ^ k30, myselx (D52, D36, s080) ^ k31, myselx (D53, D37, s100) ^ k32, myselx (D54, D38, s200) ^ k33, myselx (D55, D39, s400) ^ k34, myselx (D56, D40, s800) ^ k35, &D03, &D28, &D10, &D18);
1082 s7( D55 ^ k36, D56 ^ k37, D57 ^ k38, D58 ^ k39, D59 ^ k40, D60 ^ k41, &D31, &D11, &D21, &D06);
1083 s8( D59 ^ k42, D60 ^ k43, D61 ^ k44, D62 ^ k45, D63 ^ k46, D32 ^ k47, &D04, &D26, &D14, &D20);
1085 if (i) KEYSET17 else KEYSET07
1087 s1(myselx (D31, D15, s001) ^ k00, myselx (D00, D16, s002) ^ k01, myselx (D01, D17, s004) ^ k02, myselx (D02, D18, s008) ^ k03, myselx (D03, D19, s010) ^ k04, myselx (D04, D20, s020) ^ k05, &D40, &D48, &D54, &D62);
1088 s2(myselx (D03, D19, s040) ^ k06, myselx (D04, D20, s080) ^ k07, myselx (D05, D21, s100) ^ k08, myselx (D06, D22, s200) ^ k09, myselx (D07, D23, s400) ^ k10, myselx (D08, D24, s800) ^ k11, &D44, &D59, &D33, &D49);
1089 s3( D07 ^ k12, D08 ^ k13, D09 ^ k14, D10 ^ k15, D11 ^ k16, D12 ^ k17, &D55, &D47, &D61, &D37);
1090 s4( D11 ^ k18, D12 ^ k19, D13 ^ k20, D14 ^ k21, D15 ^ k22, D16 ^ k23, &D57, &D51, &D41, &D32);
1091 s5(myselx (D15, D31, s001) ^ k24, myselx (D16, D00, s002) ^ k25, myselx (D17, D01, s004) ^ k26, myselx (D18, D02, s008) ^ k27, myselx (D19, D03, s010) ^ k28, myselx (D20, D04, s020) ^ k29, &D39, &D45, &D56, &D34);
1092 s6(myselx (D19, D03, s040) ^ k30, myselx (D20, D04, s080) ^ k31, myselx (D21, D05, s100) ^ k32, myselx (D22, D06, s200) ^ k33, myselx (D23, D07, s400) ^ k34, myselx (D24, D08, s800) ^ k35, &D35, &D60, &D42, &D50);
1093 s7( D23 ^ k36, D24 ^ k37, D25 ^ k38, D26 ^ k39, D27 ^ k40, D28 ^ k41, &D63, &D43, &D53, &D38);
1094 s8( D27 ^ k42, D28 ^ k43, D29 ^ k44, D30 ^ k45, D31 ^ k46, D00 ^ k47, &D36, &D58, &D46, &D52);
1103 __device__ static void transpose32c (u32 data[32])
1105 #define swap(x,y,j,m) \
1106 t = ((x) ^ ((y) >> (j))) & (m); \
1108 (y) = (y) ^ (t << (j));
1112 swap (data[ 0], data[16], 16, 0x0000ffff);
1113 swap (data[ 1], data[17], 16, 0x0000ffff);
1114 swap (data[ 2], data[18], 16, 0x0000ffff);
1115 swap (data[ 3], data[19], 16, 0x0000ffff);
1116 swap (data[ 4], data[20], 16, 0x0000ffff);
1117 swap (data[ 5], data[21], 16, 0x0000ffff);
1118 swap (data[ 6], data[22], 16, 0x0000ffff);
1119 swap (data[ 7], data[23], 16, 0x0000ffff);
1120 swap (data[ 8], data[24], 16, 0x0000ffff);
1121 swap (data[ 9], data[25], 16, 0x0000ffff);
1122 swap (data[10], data[26], 16, 0x0000ffff);
1123 swap (data[11], data[27], 16, 0x0000ffff);
1124 swap (data[12], data[28], 16, 0x0000ffff);
1125 swap (data[13], data[29], 16, 0x0000ffff);
1126 swap (data[14], data[30], 16, 0x0000ffff);
1127 swap (data[15], data[31], 16, 0x0000ffff);
1128 swap (data[ 0], data[ 8], 8, 0x00ff00ff);
1129 swap (data[ 1], data[ 9], 8, 0x00ff00ff);
1130 swap (data[ 2], data[10], 8, 0x00ff00ff);
1131 swap (data[ 3], data[11], 8, 0x00ff00ff);
1132 swap (data[ 4], data[12], 8, 0x00ff00ff);
1133 swap (data[ 5], data[13], 8, 0x00ff00ff);
1134 swap (data[ 6], data[14], 8, 0x00ff00ff);
1135 swap (data[ 7], data[15], 8, 0x00ff00ff);
1136 swap (data[ 0], data[ 4], 4, 0x0f0f0f0f);
1137 swap (data[ 1], data[ 5], 4, 0x0f0f0f0f);
1138 swap (data[ 2], data[ 6], 4, 0x0f0f0f0f);
1139 swap (data[ 3], data[ 7], 4, 0x0f0f0f0f);
1140 swap (data[ 0], data[ 2], 2, 0x33333333);
1141 swap (data[ 1], data[ 3], 2, 0x33333333);
1142 swap (data[ 0], data[ 1], 1, 0x55555555);
1143 swap (data[ 2], data[ 3], 1, 0x55555555);
1144 swap (data[ 4], data[ 6], 2, 0x33333333);
1145 swap (data[ 5], data[ 7], 2, 0x33333333);
1146 swap (data[ 4], data[ 5], 1, 0x55555555);
1147 swap (data[ 6], data[ 7], 1, 0x55555555);
1148 swap (data[ 8], data[12], 4, 0x0f0f0f0f);
1149 swap (data[ 9], data[13], 4, 0x0f0f0f0f);
1150 swap (data[10], data[14], 4, 0x0f0f0f0f);
1151 swap (data[11], data[15], 4, 0x0f0f0f0f);
1152 swap (data[ 8], data[10], 2, 0x33333333);
1153 swap (data[ 9], data[11], 2, 0x33333333);
1154 swap (data[ 8], data[ 9], 1, 0x55555555);
1155 swap (data[10], data[11], 1, 0x55555555);
1156 swap (data[12], data[14], 2, 0x33333333);
1157 swap (data[13], data[15], 2, 0x33333333);
1158 swap (data[12], data[13], 1, 0x55555555);
1159 swap (data[14], data[15], 1, 0x55555555);
1160 swap (data[16], data[24], 8, 0x00ff00ff);
1161 swap (data[17], data[25], 8, 0x00ff00ff);
1162 swap (data[18], data[26], 8, 0x00ff00ff);
1163 swap (data[19], data[27], 8, 0x00ff00ff);
1164 swap (data[20], data[28], 8, 0x00ff00ff);
1165 swap (data[21], data[29], 8, 0x00ff00ff);
1166 swap (data[22], data[30], 8, 0x00ff00ff);
1167 swap (data[23], data[31], 8, 0x00ff00ff);
1168 swap (data[16], data[20], 4, 0x0f0f0f0f);
1169 swap (data[17], data[21], 4, 0x0f0f0f0f);
1170 swap (data[18], data[22], 4, 0x0f0f0f0f);
1171 swap (data[19], data[23], 4, 0x0f0f0f0f);
1172 swap (data[16], data[18], 2, 0x33333333);
1173 swap (data[17], data[19], 2, 0x33333333);
1174 swap (data[16], data[17], 1, 0x55555555);
1175 swap (data[18], data[19], 1, 0x55555555);
1176 swap (data[20], data[22], 2, 0x33333333);
1177 swap (data[21], data[23], 2, 0x33333333);
1178 swap (data[20], data[21], 1, 0x55555555);
1179 swap (data[22], data[23], 1, 0x55555555);
1180 swap (data[24], data[28], 4, 0x0f0f0f0f);
1181 swap (data[25], data[29], 4, 0x0f0f0f0f);
1182 swap (data[26], data[30], 4, 0x0f0f0f0f);
1183 swap (data[27], data[31], 4, 0x0f0f0f0f);
1184 swap (data[24], data[26], 2, 0x33333333);
1185 swap (data[25], data[27], 2, 0x33333333);
1186 swap (data[24], data[25], 1, 0x55555555);
1187 swap (data[26], data[27], 1, 0x55555555);
1188 swap (data[28], data[30], 2, 0x33333333);
1189 swap (data[29], data[31], 2, 0x33333333);
1190 swap (data[28], data[29], 1, 0x55555555);
1191 swap (data[30], data[31], 1, 0x55555555);
1194 __device__ static void m01500m (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
1200 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
1201 const u32 lid = threadIdx.x;
1207 const u32 salt = salt_bufs[salt_pos].salt_buf[0];
1213 const u32 w0s = (pws[gid].i[0] << 1) & 0xfefefefe;
1214 const u32 w1s = (pws[gid].i[1] << 1) & 0xfefefefe;
1216 const u32 K00 = -((w0s >> ( 0 + 7)) & 1);
1217 const u32 K01 = -((w0s >> ( 0 + 6)) & 1);
1218 const u32 K02 = -((w0s >> ( 0 + 5)) & 1);
1219 const u32 K03 = -((w0s >> ( 0 + 4)) & 1);
1220 const u32 K04 = -((w0s >> ( 0 + 3)) & 1);
1221 const u32 K05 = -((w0s >> ( 0 + 2)) & 1);
1222 const u32 K06 = -((w0s >> ( 0 + 1)) & 1);
1223 const u32 K07 = -((w0s >> ( 8 + 7)) & 1);
1224 const u32 K08 = -((w0s >> ( 8 + 6)) & 1);
1225 const u32 K09 = -((w0s >> ( 8 + 5)) & 1);
1226 const u32 K10 = -((w0s >> ( 8 + 4)) & 1);
1227 const u32 K11 = -((w0s >> ( 8 + 3)) & 1);
1228 const u32 K12 = -((w0s >> ( 8 + 2)) & 1);
1229 const u32 K13 = -((w0s >> ( 8 + 1)) & 1);
1230 const u32 K14 = -((w0s >> (16 + 7)) & 1);
1231 const u32 K15 = -((w0s >> (16 + 6)) & 1);
1232 const u32 K16 = -((w0s >> (16 + 5)) & 1);
1233 const u32 K17 = -((w0s >> (16 + 4)) & 1);
1234 const u32 K18 = -((w0s >> (16 + 3)) & 1);
1235 const u32 K19 = -((w0s >> (16 + 2)) & 1);
1236 const u32 K20 = -((w0s >> (16 + 1)) & 1);
1237 const u32 K21 = -((w0s >> (24 + 7)) & 1);
1238 const u32 K22 = -((w0s >> (24 + 6)) & 1);
1239 const u32 K23 = -((w0s >> (24 + 5)) & 1);
1240 const u32 K24 = -((w0s >> (24 + 4)) & 1);
1241 const u32 K25 = -((w0s >> (24 + 3)) & 1);
1242 const u32 K26 = -((w0s >> (24 + 2)) & 1);
1243 const u32 K27 = -((w0s >> (24 + 1)) & 1);
1244 const u32 K28 = -((w1s >> ( 0 + 7)) & 1);
1245 const u32 K29 = -((w1s >> ( 0 + 6)) & 1);
1246 const u32 K30 = -((w1s >> ( 0 + 5)) & 1);
1247 const u32 K31 = -((w1s >> ( 0 + 4)) & 1);
1248 const u32 K32 = -((w1s >> ( 0 + 3)) & 1);
1249 const u32 K33 = -((w1s >> ( 0 + 2)) & 1);
1250 const u32 K34 = -((w1s >> ( 0 + 1)) & 1);
1251 const u32 K35 = -((w1s >> ( 8 + 7)) & 1);
1252 const u32 K36 = -((w1s >> ( 8 + 6)) & 1);
1253 const u32 K37 = -((w1s >> ( 8 + 5)) & 1);
1254 const u32 K38 = -((w1s >> ( 8 + 4)) & 1);
1255 const u32 K39 = -((w1s >> ( 8 + 3)) & 1);
1256 const u32 K40 = -((w1s >> ( 8 + 2)) & 1);
1257 const u32 K41 = -((w1s >> ( 8 + 1)) & 1);
1258 const u32 K42 = -((w1s >> (16 + 7)) & 1);
1259 const u32 K43 = -((w1s >> (16 + 6)) & 1);
1260 const u32 K44 = -((w1s >> (16 + 5)) & 1);
1261 const u32 K45 = -((w1s >> (16 + 4)) & 1);
1262 const u32 K46 = -((w1s >> (16 + 3)) & 1);
1263 const u32 K47 = -((w1s >> (16 + 2)) & 1);
1264 const u32 K48 = -((w1s >> (16 + 1)) & 1);
1265 const u32 K49 = -((w1s >> (24 + 7)) & 1);
1266 const u32 K50 = -((w1s >> (24 + 6)) & 1);
1267 const u32 K51 = -((w1s >> (24 + 5)) & 1);
1268 const u32 K52 = -((w1s >> (24 + 4)) & 1);
1269 const u32 K53 = -((w1s >> (24 + 3)) & 1);
1270 const u32 K54 = -((w1s >> (24 + 2)) & 1);
1271 const u32 K55 = -((w1s >> (24 + 1)) & 1);
1277 const u32 bf_loops = bfs_cnt;
1279 for (u32 il_pos = 0, pc_pos = 0; il_pos < bf_loops; il_pos += 32, pc_pos++)
1310 k00 |= c_tm[pc_pos].b[ 0];
1311 k01 |= c_tm[pc_pos].b[ 1];
1312 k02 |= c_tm[pc_pos].b[ 2];
1313 k03 |= c_tm[pc_pos].b[ 3];
1314 k04 |= c_tm[pc_pos].b[ 4];
1315 k05 |= c_tm[pc_pos].b[ 5];
1316 k06 |= c_tm[pc_pos].b[ 6];
1317 k07 |= c_tm[pc_pos].b[ 7];
1318 k08 |= c_tm[pc_pos].b[ 8];
1319 k09 |= c_tm[pc_pos].b[ 9];
1320 k10 |= c_tm[pc_pos].b[10];
1321 k11 |= c_tm[pc_pos].b[11];
1322 k12 |= c_tm[pc_pos].b[12];
1323 k13 |= c_tm[pc_pos].b[13];
1324 k14 |= c_tm[pc_pos].b[14];
1325 k15 |= c_tm[pc_pos].b[15];
1326 k16 |= c_tm[pc_pos].b[16];
1327 k17 |= c_tm[pc_pos].b[17];
1328 k18 |= c_tm[pc_pos].b[18];
1329 k19 |= c_tm[pc_pos].b[19];
1330 k20 |= c_tm[pc_pos].b[20];
1331 k21 |= c_tm[pc_pos].b[21];
1332 k22 |= c_tm[pc_pos].b[22];
1333 k23 |= c_tm[pc_pos].b[23];
1334 k24 |= c_tm[pc_pos].b[24];
1335 k25 |= c_tm[pc_pos].b[25];
1336 k26 |= c_tm[pc_pos].b[26];
1337 k27 |= c_tm[pc_pos].b[27];
1407 k00, k01, k02, k03, k04, k05, k06,
1408 k07, k08, k09, k10, k11, k12, k13,
1409 k14, k15, k16, k17, k18, k19, k20,
1410 k21, k22, k23, k24, k25, k26, k27,
1411 K28, K29, K30, K31, K32, K33, K34,
1412 K35, K36, K37, K38, K39, K40, K41,
1413 K42, K43, K44, K45, K46, K47, K48,
1414 K49, K50, K51, K52, K53, K54, K55,
1415 D00, D01, D02, D03, D04, D05, D06, D07,
1416 D08, D09, D10, D11, D12, D13, D14, D15,
1417 D16, D17, D18, D19, D20, D21, D22, D23,
1418 D24, D25, D26, D27, D28, D29, D30, D31,
1419 D32, D33, D34, D35, D36, D37, D38, D39,
1420 D40, D41, D42, D43, D44, D45, D46, D47,
1421 D48, D49, D50, D51, D52, D53, D54, D55,
1422 D56, D57, D58, D59, D60, D61, D62, D63
1492 if (digests_cnt < 16)
1494 for (u32 d = 0; d < digests_cnt; d++)
1496 const u32 final_hash_pos = digests_offset + d;
1498 if (hashes_shown[final_hash_pos]) continue;
1502 search[0] = digests_buf[final_hash_pos].digest_buf[DGST_R0];
1503 search[1] = digests_buf[final_hash_pos].digest_buf[DGST_R1];
1508 for (int i = 0; i < 32; i++)
1510 const u32 b0 = -((search[0] >> i) & 1);
1511 const u32 b1 = -((search[1] >> i) & 1);
1513 tmpResult |= out[ 0 + i] ^ b0;
1514 tmpResult |= out[32 + i] ^ b1;
1517 if (tmpResult == 0xffffffff) continue;
1519 const u32 slice = 31 - __clz (~tmpResult);
1521 const u32x r0 = search[0];
1522 const u32x r1 = search[1];
1526 #include VECT_COMPARE_M
1535 for (int i = 0; i < 32; i++)
1537 out0[i] = out[ 0 + 31 - i];
1538 out1[i] = out[32 + 31 - i];
1541 transpose32c (out0);
1542 transpose32c (out1);
1545 for (int slice = 0; slice < 32; slice++)
1547 const u32x r0 = out0[31 - slice];
1548 const u32x r1 = out1[31 - slice];
1552 #include VECT_COMPARE_M
1558 __device__ static void m01500s (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
1564 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
1565 const u32 lid = threadIdx.x;
1571 const u32 salt = salt_bufs[salt_pos].salt_buf[0];
1646 const u32 w0s = (pws[gid].i[0] << 1) & 0xfefefefe;
1647 const u32 w1s = (pws[gid].i[1] << 1) & 0xfefefefe;
1649 const u32 K00 = -((w0s >> ( 0 + 7)) & 1);
1650 const u32 K01 = -((w0s >> ( 0 + 6)) & 1);
1651 const u32 K02 = -((w0s >> ( 0 + 5)) & 1);
1652 const u32 K03 = -((w0s >> ( 0 + 4)) & 1);
1653 const u32 K04 = -((w0s >> ( 0 + 3)) & 1);
1654 const u32 K05 = -((w0s >> ( 0 + 2)) & 1);
1655 const u32 K06 = -((w0s >> ( 0 + 1)) & 1);
1656 const u32 K07 = -((w0s >> ( 8 + 7)) & 1);
1657 const u32 K08 = -((w0s >> ( 8 + 6)) & 1);
1658 const u32 K09 = -((w0s >> ( 8 + 5)) & 1);
1659 const u32 K10 = -((w0s >> ( 8 + 4)) & 1);
1660 const u32 K11 = -((w0s >> ( 8 + 3)) & 1);
1661 const u32 K12 = -((w0s >> ( 8 + 2)) & 1);
1662 const u32 K13 = -((w0s >> ( 8 + 1)) & 1);
1663 const u32 K14 = -((w0s >> (16 + 7)) & 1);
1664 const u32 K15 = -((w0s >> (16 + 6)) & 1);
1665 const u32 K16 = -((w0s >> (16 + 5)) & 1);
1666 const u32 K17 = -((w0s >> (16 + 4)) & 1);
1667 const u32 K18 = -((w0s >> (16 + 3)) & 1);
1668 const u32 K19 = -((w0s >> (16 + 2)) & 1);
1669 const u32 K20 = -((w0s >> (16 + 1)) & 1);
1670 const u32 K21 = -((w0s >> (24 + 7)) & 1);
1671 const u32 K22 = -((w0s >> (24 + 6)) & 1);
1672 const u32 K23 = -((w0s >> (24 + 5)) & 1);
1673 const u32 K24 = -((w0s >> (24 + 4)) & 1);
1674 const u32 K25 = -((w0s >> (24 + 3)) & 1);
1675 const u32 K26 = -((w0s >> (24 + 2)) & 1);
1676 const u32 K27 = -((w0s >> (24 + 1)) & 1);
1677 const u32 K28 = -((w1s >> ( 0 + 7)) & 1);
1678 const u32 K29 = -((w1s >> ( 0 + 6)) & 1);
1679 const u32 K30 = -((w1s >> ( 0 + 5)) & 1);
1680 const u32 K31 = -((w1s >> ( 0 + 4)) & 1);
1681 const u32 K32 = -((w1s >> ( 0 + 3)) & 1);
1682 const u32 K33 = -((w1s >> ( 0 + 2)) & 1);
1683 const u32 K34 = -((w1s >> ( 0 + 1)) & 1);
1684 const u32 K35 = -((w1s >> ( 8 + 7)) & 1);
1685 const u32 K36 = -((w1s >> ( 8 + 6)) & 1);
1686 const u32 K37 = -((w1s >> ( 8 + 5)) & 1);
1687 const u32 K38 = -((w1s >> ( 8 + 4)) & 1);
1688 const u32 K39 = -((w1s >> ( 8 + 3)) & 1);
1689 const u32 K40 = -((w1s >> ( 8 + 2)) & 1);
1690 const u32 K41 = -((w1s >> ( 8 + 1)) & 1);
1691 const u32 K42 = -((w1s >> (16 + 7)) & 1);
1692 const u32 K43 = -((w1s >> (16 + 6)) & 1);
1693 const u32 K44 = -((w1s >> (16 + 5)) & 1);
1694 const u32 K45 = -((w1s >> (16 + 4)) & 1);
1695 const u32 K46 = -((w1s >> (16 + 3)) & 1);
1696 const u32 K47 = -((w1s >> (16 + 2)) & 1);
1697 const u32 K48 = -((w1s >> (16 + 1)) & 1);
1698 const u32 K49 = -((w1s >> (24 + 7)) & 1);
1699 const u32 K50 = -((w1s >> (24 + 6)) & 1);
1700 const u32 K51 = -((w1s >> (24 + 5)) & 1);
1701 const u32 K52 = -((w1s >> (24 + 4)) & 1);
1702 const u32 K53 = -((w1s >> (24 + 3)) & 1);
1703 const u32 K54 = -((w1s >> (24 + 2)) & 1);
1704 const u32 K55 = -((w1s >> (24 + 1)) & 1);
1710 const u32 bf_loops = bfs_cnt;
1712 for (u32 il_pos = 0, pc_pos = 0; il_pos < bf_loops; il_pos += 32, pc_pos++)
1743 k00 |= c_tm[pc_pos].b[ 0];
1744 k01 |= c_tm[pc_pos].b[ 1];
1745 k02 |= c_tm[pc_pos].b[ 2];
1746 k03 |= c_tm[pc_pos].b[ 3];
1747 k04 |= c_tm[pc_pos].b[ 4];
1748 k05 |= c_tm[pc_pos].b[ 5];
1749 k06 |= c_tm[pc_pos].b[ 6];
1750 k07 |= c_tm[pc_pos].b[ 7];
1751 k08 |= c_tm[pc_pos].b[ 8];
1752 k09 |= c_tm[pc_pos].b[ 9];
1753 k10 |= c_tm[pc_pos].b[10];
1754 k11 |= c_tm[pc_pos].b[11];
1755 k12 |= c_tm[pc_pos].b[12];
1756 k13 |= c_tm[pc_pos].b[13];
1757 k14 |= c_tm[pc_pos].b[14];
1758 k15 |= c_tm[pc_pos].b[15];
1759 k16 |= c_tm[pc_pos].b[16];
1760 k17 |= c_tm[pc_pos].b[17];
1761 k18 |= c_tm[pc_pos].b[18];
1762 k19 |= c_tm[pc_pos].b[19];
1763 k20 |= c_tm[pc_pos].b[20];
1764 k21 |= c_tm[pc_pos].b[21];
1765 k22 |= c_tm[pc_pos].b[22];
1766 k23 |= c_tm[pc_pos].b[23];
1767 k24 |= c_tm[pc_pos].b[24];
1768 k25 |= c_tm[pc_pos].b[25];
1769 k26 |= c_tm[pc_pos].b[26];
1770 k27 |= c_tm[pc_pos].b[27];
1840 k00, k01, k02, k03, k04, k05, k06,
1841 k07, k08, k09, k10, k11, k12, k13,
1842 k14, k15, k16, k17, k18, k19, k20,
1843 k21, k22, k23, k24, k25, k26, k27,
1844 K28, K29, K30, K31, K32, K33, K34,
1845 K35, K36, K37, K38, K39, K40, K41,
1846 K42, K43, K44, K45, K46, K47, K48,
1847 K49, K50, K51, K52, K53, K54, K55,
1848 D00, D01, D02, D03, D04, D05, D06, D07,
1849 D08, D09, D10, D11, D12, D13, D14, D15,
1850 D16, D17, D18, D19, D20, D21, D22, D23,
1851 D24, D25, D26, D27, D28, D29, D30, D31,
1852 D32, D33, D34, D35, D36, D37, D38, D39,
1853 D40, D41, D42, D43, D44, D45, D46, D47,
1854 D48, D49, D50, D51, D52, D53, D54, D55,
1855 D56, D57, D58, D59, D60, D61, D62, D63
1860 tmpResult |= D00 ^ S00;
1861 tmpResult |= D01 ^ S01;
1862 tmpResult |= D02 ^ S02;
1863 tmpResult |= D03 ^ S03;
1864 tmpResult |= D04 ^ S04;
1865 tmpResult |= D05 ^ S05;
1866 tmpResult |= D06 ^ S06;
1867 tmpResult |= D07 ^ S07;
1868 tmpResult |= D08 ^ S08;
1869 tmpResult |= D09 ^ S09;
1870 tmpResult |= D10 ^ S10;
1871 tmpResult |= D11 ^ S11;
1872 tmpResult |= D12 ^ S12;
1873 tmpResult |= D13 ^ S13;
1874 tmpResult |= D14 ^ S14;
1875 tmpResult |= D15 ^ S15;
1876 tmpResult |= D16 ^ S16;
1877 tmpResult |= D17 ^ S17;
1878 tmpResult |= D18 ^ S18;
1879 tmpResult |= D19 ^ S19;
1880 tmpResult |= D20 ^ S20;
1881 tmpResult |= D21 ^ S21;
1882 tmpResult |= D22 ^ S22;
1883 tmpResult |= D23 ^ S23;
1884 tmpResult |= D24 ^ S24;
1885 tmpResult |= D25 ^ S25;
1886 tmpResult |= D26 ^ S26;
1887 tmpResult |= D27 ^ S27;
1888 tmpResult |= D28 ^ S28;
1889 tmpResult |= D29 ^ S29;
1890 tmpResult |= D30 ^ S30;
1891 tmpResult |= D31 ^ S31;
1892 tmpResult |= D32 ^ S32;
1893 tmpResult |= D33 ^ S33;
1894 tmpResult |= D34 ^ S34;
1895 tmpResult |= D35 ^ S35;
1896 tmpResult |= D36 ^ S36;
1897 tmpResult |= D37 ^ S37;
1898 tmpResult |= D38 ^ S38;
1899 tmpResult |= D39 ^ S39;
1900 tmpResult |= D40 ^ S40;
1901 tmpResult |= D41 ^ S41;
1902 tmpResult |= D42 ^ S42;
1903 tmpResult |= D43 ^ S43;
1904 tmpResult |= D44 ^ S44;
1905 tmpResult |= D45 ^ S45;
1906 tmpResult |= D46 ^ S46;
1907 tmpResult |= D47 ^ S47;
1909 if (tmpResult == 0xffffffff) continue;
1911 tmpResult |= D48 ^ S48;
1912 tmpResult |= D49 ^ S49;
1913 tmpResult |= D50 ^ S50;
1914 tmpResult |= D51 ^ S51;
1915 tmpResult |= D52 ^ S52;
1916 tmpResult |= D53 ^ S53;
1917 tmpResult |= D54 ^ S54;
1918 tmpResult |= D55 ^ S55;
1919 tmpResult |= D56 ^ S56;
1920 tmpResult |= D57 ^ S57;
1921 tmpResult |= D58 ^ S58;
1922 tmpResult |= D59 ^ S59;
1923 tmpResult |= D60 ^ S60;
1924 tmpResult |= D61 ^ S61;
1925 tmpResult |= D62 ^ S62;
1926 tmpResult |= D63 ^ S63;
1928 if (tmpResult == 0xffffffff) continue;
1930 const u32 slice = 31 - __clz (~tmpResult);
1932 #include VECT_COMPARE_S
1936 extern "C" __global__ void __launch_bounds__ (64, 1) m01500_tb (pw_t *pws)
1938 // not used here, inlined code
1941 extern "C" __global__ void __launch_bounds__ (32, 1) m01500_tm (const u32 *d_bfs, bs_word_t *d_tm)
1943 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
1945 const u32 block = gid / 32;
1946 const u32 slice = gid % 32;
1948 const u32 w0 = c_bfs[gid];
1950 const u32 w0s = (w0 << 1) & 0xfefefefe;
1953 for (int i = 0, j = 0; i < 32; i += 8, j += 7)
1955 atomicOr (&d_tm[block].b[j + 0], (((w0s >> (i + 7)) & 1) << slice));
1956 atomicOr (&d_tm[block].b[j + 1], (((w0s >> (i + 6)) & 1) << slice));
1957 atomicOr (&d_tm[block].b[j + 2], (((w0s >> (i + 5)) & 1) << slice));
1958 atomicOr (&d_tm[block].b[j + 3], (((w0s >> (i + 4)) & 1) << slice));
1959 atomicOr (&d_tm[block].b[j + 4], (((w0s >> (i + 3)) & 1) << slice));
1960 atomicOr (&d_tm[block].b[j + 5], (((w0s >> (i + 2)) & 1) << slice));
1961 atomicOr (&d_tm[block].b[j + 6], (((w0s >> (i + 1)) & 1) << slice));
1965 extern "C" __global__ void __launch_bounds__ (64, 1) m01500_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1971 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
1972 const u32 lid = threadIdx.x;
1974 const u32 s0 = digests_buf[digests_offset].digest_buf[0];
1975 const u32 s1 = digests_buf[digests_offset].digest_buf[1];
1979 s_S[lid] = -((s0 >> lid - 0) & 1);
1983 s_S[lid] = -((s1 >> lid - 32) & 1);
1988 if (gid >= gid_max) return;
1994 m01500m (pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
1997 extern "C" __global__ void __launch_bounds__ (64, 1) m01500_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2001 extern "C" __global__ void __launch_bounds__ (64, 1) m01500_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2005 extern "C" __global__ void __launch_bounds__ (64, 1) m01500_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2011 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
2012 const u32 lid = threadIdx.x;
2014 const u32 s0 = digests_buf[digests_offset].digest_buf[0];
2015 const u32 s1 = digests_buf[digests_offset].digest_buf[1];
2019 s_S[lid] = -((s0 >> lid - 0) & 1);
2023 s_S[lid] = -((s1 >> lid - 32) & 1);
2028 if (gid >= gid_max) return;
2034 m01500s (pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
2037 extern "C" __global__ void __launch_bounds__ (64, 1) m01500_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2041 extern "C" __global__ void __launch_bounds__ (64, 1) m01500_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)