2 * Author......: Jens Steube <jens.steube@gmail.com>
4 * NOTE........: sboxes for maxwell were taken from DeepLearningJohnDoe, license below
5 * : sboxes for others were takes fron JtR, license below
11 #include "include/constants.h"
12 #include "include/kernel_vendor.h"
27 #include "include/kernel_functions.c"
29 #include "common_nv.c"
32 #define VECT_COMPARE_S "check_single_vect1_comp4_warp_bs.c"
33 #define VECT_COMPARE_M "check_multi_vect1_comp4_warp_bs.c"
37 #define VECT_COMPARE_S "check_single_vect2_comp4_warp_bs.c"
38 #define VECT_COMPARE_M "check_multi_vect2_comp4_warp_bs.c"
42 #define VECT_COMPARE_S "check_single_vect4_comp4_warp_bs.c"
43 #define VECT_COMPARE_M "check_multi_vect4_comp4_warp_bs.c"
49 #define myselx(a,b,c) ((c) ? (b) : (a))
51 __device__ __constant__ u32x c_bfs[1024];
52 __device__ __constant__ bs_word_t c_tm[32];
53 __device__ __shared__ u32 s_S[64];
55 #if __CUDA_ARCH__ >= 500
58 // Bitslice DES S-boxes with LOP3.LUT instructions
59 // For NVIDIA Maxwell architecture and CUDA 7.5 RC
60 // by DeepLearningJohnDoe, version 0.1.6, 2015/07/19
62 // Gate counts: 25 24 25 18 25 24 24 23
64 // Depth: 8 7 7 6 8 10 10 8
67 // Note that same S-box function with a lower gate count isn't necessarily faster.
69 // These Boolean expressions corresponding to DES S-boxes were
70 // discovered by <deeplearningjohndoe at gmail.com>
72 // This file itself is Copyright (c) 2015 by <deeplearningjohndoe at gmail.com>
73 // Redistribution and use in source and binary forms, with or without
74 // modification, are permitted.
76 // The underlying mathematical formulas are NOT copyrighted.
79 #define LUT(a,b,c,d,e) u32 a; asm ("lop3.b32 %0, %1, %2, %3, "#e";" : "=r"(a): "r"(b), "r"(c), "r"(d));
81 __device__ static void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
83 LUT(xAA55AA5500550055, a1, a4, a6, 0xC1)
84 LUT(xA55AA55AF0F5F0F5, a3, a6, xAA55AA5500550055, 0x9E)
85 LUT(x5F5F5F5FA5A5A5A5, a1, a3, a6, 0xD6)
86 LUT(xF5A0F5A0A55AA55A, a4, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x56)
87 LUT(x947A947AD1E7D1E7, a2, xA55AA55AF0F5F0F5, xF5A0F5A0A55AA55A, 0x6C)
88 LUT(x5FFF5FFFFFFAFFFA, a6, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x7B)
89 LUT(xB96CB96C69936993, a2, xF5A0F5A0A55AA55A, x5FFF5FFFFFFAFFFA, 0xD6)
90 LUT(x3, a5, x947A947AD1E7D1E7, xB96CB96C69936993, 0x6A)
91 LUT(x55EE55EE55EE55EE, a1, a2, a4, 0x7A)
92 LUT(x084C084CB77BB77B, a2, a6, xF5A0F5A0A55AA55A, 0xC9)
93 LUT(x9C329C32E295E295, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x72)
94 LUT(xA51EA51E50E050E0, a3, a6, x55EE55EE55EE55EE, 0x29)
95 LUT(x4AD34AD3BE3CBE3C, a2, x947A947AD1E7D1E7, xA51EA51E50E050E0, 0x95)
96 LUT(x2, a5, x9C329C32E295E295, x4AD34AD3BE3CBE3C, 0xC6)
97 LUT(xD955D95595D195D1, a1, a2, x9C329C32E295E295, 0xD2)
98 LUT(x8058805811621162, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x90)
99 LUT(x7D0F7D0FC4B3C4B3, xA51EA51E50E050E0, xD955D95595D195D1, x8058805811621162, 0x76)
100 LUT(x0805080500010001, a3, xAA55AA5500550055, xD955D95595D195D1, 0x80)
101 LUT(x4A964A96962D962D, xB96CB96C69936993, x4AD34AD3BE3CBE3C, x0805080500010001, 0xA6)
102 LUT(x4, a5, x7D0F7D0FC4B3C4B3, x4A964A96962D962D, 0xA6)
103 LUT(x148014807B087B08, a1, xAA55AA5500550055, x947A947AD1E7D1E7, 0x21)
104 LUT(x94D894D86B686B68, xA55AA55AF0F5F0F5, x8058805811621162, x148014807B087B08, 0x6A)
105 LUT(x5555555540044004, a1, a6, x084C084CB77BB77B, 0x70)
106 LUT(xAFB4AFB4BF5BBF5B, x5F5F5F5FA5A5A5A5, xA51EA51E50E050E0, x5555555540044004, 0x97)
107 LUT(x1, a5, x94D894D86B686B68, xAFB4AFB4BF5BBF5B, 0x6C)
115 __device__ static void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
117 LUT(xEEEEEEEE99999999, a1, a2, a6, 0x97)
118 LUT(xFFFFEEEE66666666, a5, a6, xEEEEEEEE99999999, 0x67)
119 LUT(x5555FFFFFFFF0000, a1, a5, a6, 0x76)
120 LUT(x6666DDDD5555AAAA, a2, xFFFFEEEE66666666, x5555FFFFFFFF0000, 0x69)
121 LUT(x6969D3D35353ACAC, a3, xFFFFEEEE66666666, x6666DDDD5555AAAA, 0x6A)
122 LUT(xCFCF3030CFCF3030, a2, a3, a5, 0x65)
123 LUT(xE4E4EEEE9999F0F0, a3, xEEEEEEEE99999999, x5555FFFFFFFF0000, 0x8D)
124 LUT(xE5E5BABACDCDB0B0, a1, xCFCF3030CFCF3030, xE4E4EEEE9999F0F0, 0xCA)
125 LUT(x3, a4, x6969D3D35353ACAC, xE5E5BABACDCDB0B0, 0xC6)
126 LUT(x3333CCCC00000000, a2, a5, a6, 0x14)
127 LUT(xCCCCDDDDFFFF0F0F, a5, xE4E4EEEE9999F0F0, x3333CCCC00000000, 0xB5)
128 LUT(x00000101F0F0F0F0, a3, a6, xFFFFEEEE66666666, 0x1C)
129 LUT(x9A9A64646A6A9595, a1, xCFCF3030CFCF3030, x00000101F0F0F0F0, 0x96)
130 LUT(x2, a4, xCCCCDDDDFFFF0F0F, x9A9A64646A6A9595, 0x6A)
131 LUT(x3333BBBB3333FFFF, a1, a2, x6666DDDD5555AAAA, 0xDE)
132 LUT(x1414141441410000, a1, a3, xE4E4EEEE9999F0F0, 0x90)
133 LUT(x7F7FF3F3F5F53939, x6969D3D35353ACAC, x9A9A64646A6A9595, x3333BBBB3333FFFF, 0x79)
134 LUT(x9494E3E34B4B3939, a5, x1414141441410000, x7F7FF3F3F5F53939, 0x29)
135 LUT(x1, a4, x3333BBBB3333FFFF, x9494E3E34B4B3939, 0xA6)
136 LUT(xB1B1BBBBCCCCA5A5, a1, a1, xE4E4EEEE9999F0F0, 0x4A)
137 LUT(xFFFFECECEEEEDDDD, a2, x3333CCCC00000000, x9A9A64646A6A9595, 0xEF)
138 LUT(xB1B1A9A9DCDC8787, xE5E5BABACDCDB0B0, xB1B1BBBBCCCCA5A5, xFFFFECECEEEEDDDD, 0x8D)
139 LUT(xFFFFCCCCEEEE4444, a2, a5, xFFFFEEEE66666666, 0x2B)
140 LUT(x4, a4, xB1B1A9A9DCDC8787, xFFFFCCCCEEEE4444, 0x6C)
148 __device__ static void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
150 LUT(xA50FA50FA50FA50F, a1, a3, a4, 0xC9)
151 LUT(xF0F00F0FF0F0F0F0, a3, a5, a6, 0x4B)
152 LUT(xAF0FA0AAAF0FAF0F, a1, xA50FA50FA50FA50F, xF0F00F0FF0F0F0F0, 0x4D)
153 LUT(x5AA5A55A5AA55AA5, a1, a4, xF0F00F0FF0F0F0F0, 0x69)
154 LUT(xAA005FFFAA005FFF, a3, a5, xA50FA50FA50FA50F, 0xD6)
155 LUT(x5AA5A55A0F5AFAA5, a6, x5AA5A55A5AA55AA5, xAA005FFFAA005FFF, 0x9C)
156 LUT(x1, a2, xAF0FA0AAAF0FAF0F, x5AA5A55A0F5AFAA5, 0xA6)
157 LUT(xAA55AA5500AA00AA, a1, a4, a6, 0x49)
158 LUT(xFAFAA50FFAFAA50F, a1, a5, xA50FA50FA50FA50F, 0x9B)
159 LUT(x50AF0F5AFA50A5A5, a1, xAA55AA5500AA00AA, xFAFAA50FFAFAA50F, 0x66)
160 LUT(xAFAFAFAFFAFAFAFA, a1, a3, a6, 0x6F)
161 LUT(xAFAFFFFFFFFAFAFF, a4, x50AF0F5AFA50A5A5, xAFAFAFAFFAFAFAFA, 0xEB)
162 LUT(x4, a2, x50AF0F5AFA50A5A5, xAFAFFFFFFFFAFAFF, 0x6C)
163 LUT(x500F500F500F500F, a1, a3, a4, 0x98)
164 LUT(xF0505A0505A5050F, x5AA5A55A0F5AFAA5, xAA55AA5500AA00AA, xAFAFAFAFFAFAFAFA, 0x1D)
165 LUT(xF0505A05AA55AAFF, a6, x500F500F500F500F, xF0505A0505A5050F, 0x9A)
166 LUT(xFF005F55FF005F55, a1, a4, xAA005FFFAA005FFF, 0xB2)
167 LUT(xA55F5AF0A55F5AF0, a5, xA50FA50FA50FA50F, x5AA5A55A5AA55AA5, 0x3D)
168 LUT(x5A5F05A5A55F5AF0, a6, xFF005F55FF005F55, xA55F5AF0A55F5AF0, 0xA6)
169 LUT(x3, a2, xF0505A05AA55AAFF, x5A5F05A5A55F5AF0, 0xA6)
170 LUT(x0F0F0F0FA5A5A5A5, a1, a3, a6, 0xC6)
171 LUT(x5FFFFF5FFFA0FFA0, x5AA5A55A5AA55AA5, xAFAFAFAFFAFAFAFA, x0F0F0F0FA5A5A5A5, 0xDB)
172 LUT(xF5555AF500A05FFF, a5, xFAFAA50FFAFAA50F, xF0505A0505A5050F, 0xB9)
173 LUT(x05A5AAF55AFA55A5, xF0505A05AA55AAFF, x0F0F0F0FA5A5A5A5, xF5555AF500A05FFF, 0x9B)
174 LUT(x2, a2, x5FFFFF5FFFA0FFA0, x05A5AAF55AFA55A5, 0xA6)
182 __device__ static void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
184 LUT(x55F055F055F055F0, a1, a3, a4, 0x72)
185 LUT(xA500F5F0A500F5F0, a3, a5, x55F055F055F055F0, 0xAD)
186 LUT(xF50AF50AF50AF50A, a1, a3, a4, 0x59)
187 LUT(xF5FA0FFFF5FA0FFF, a3, a5, xF50AF50AF50AF50A, 0xE7)
188 LUT(x61C8F93C61C8F93C, a2, xA500F5F0A500F5F0, xF5FA0FFFF5FA0FFF, 0xC6)
189 LUT(x9999666699996666, a1, a2, a5, 0x69)
190 LUT(x22C022C022C022C0, a2, a4, x55F055F055F055F0, 0x18)
191 LUT(xB35C94A6B35C94A6, xF5FA0FFFF5FA0FFF, x9999666699996666, x22C022C022C022C0, 0x63)
192 LUT(x4, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x6A)
193 LUT(x4848484848484848, a1, a2, a3, 0x12)
194 LUT(x55500AAA55500AAA, a1, a5, xF5FA0FFFF5FA0FFF, 0x28)
195 LUT(x3C90B3D63C90B3D6, x61C8F93C61C8F93C, x4848484848484848, x55500AAA55500AAA, 0x1E)
196 LUT(x8484333384843333, a1, x9999666699996666, x4848484848484848, 0x14)
197 LUT(x4452F1AC4452F1AC, xF50AF50AF50AF50A, xF5FA0FFFF5FA0FFF, xB35C94A6B35C94A6, 0x78)
198 LUT(x9586CA379586CA37, x55500AAA55500AAA, x8484333384843333, x4452F1AC4452F1AC, 0xD6)
199 LUT(x2, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0x6A)
200 LUT(x1, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0xA9)
201 LUT(x3, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x56)
209 __device__ static void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
211 LUT(xA0A0A0A0FFFFFFFF, a1, a3, a6, 0xAB)
212 LUT(xFFFF00005555FFFF, a1, a5, a6, 0xB9)
213 LUT(xB3B320207777FFFF, a2, xA0A0A0A0FFFFFFFF, xFFFF00005555FFFF, 0xE8)
214 LUT(x50505A5A5A5A5050, a1, a3, xFFFF00005555FFFF, 0x34)
215 LUT(xA2A2FFFF2222FFFF, a1, a5, xB3B320207777FFFF, 0xCE)
216 LUT(x2E2E6969A4A46363, a2, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, 0x29)
217 LUT(x3, a4, xB3B320207777FFFF, x2E2E6969A4A46363, 0xA6)
218 LUT(xA5A50A0AA5A50A0A, a1, a3, a5, 0x49)
219 LUT(x969639396969C6C6, a2, a6, xA5A50A0AA5A50A0A, 0x96)
220 LUT(x1B1B1B1B1B1B1B1B, a1, a2, a3, 0xCA)
221 LUT(xBFBFBFBFF6F6F9F9, a3, xA0A0A0A0FFFFFFFF, x969639396969C6C6, 0x7E)
222 LUT(x5B5BA4A4B8B81D1D, xFFFF00005555FFFF, x1B1B1B1B1B1B1B1B, xBFBFBFBFF6F6F9F9, 0x96)
223 LUT(x2, a4, x969639396969C6C6, x5B5BA4A4B8B81D1D, 0xCA)
224 LUT(x5555BBBBFFFF5555, a1, a2, xFFFF00005555FFFF, 0xE5)
225 LUT(x6D6D9C9C95956969, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, x969639396969C6C6, 0x97)
226 LUT(x1A1A67676A6AB4B4, xA5A50A0AA5A50A0A, x5555BBBBFFFF5555, x6D6D9C9C95956969, 0x47)
227 LUT(xA0A0FFFFAAAA0000, a3, xFFFF00005555FFFF, xA5A50A0AA5A50A0A, 0x3B)
228 LUT(x36369C9CC1C1D6D6, x969639396969C6C6, x6D6D9C9C95956969, xA0A0FFFFAAAA0000, 0xD9)
229 LUT(x1, a4, x1A1A67676A6AB4B4, x36369C9CC1C1D6D6, 0xCA)
230 LUT(x5555F0F0F5F55555, a1, a3, xFFFF00005555FFFF, 0xB1)
231 LUT(x79790202DCDC0808, xA2A2FFFF2222FFFF, xA5A50A0AA5A50A0A, x969639396969C6C6, 0x47)
232 LUT(x6C6CF2F229295D5D, xBFBFBFBFF6F6F9F9, x5555F0F0F5F55555, x79790202DCDC0808, 0x6E)
233 LUT(xA3A3505010101A1A, a2, xA2A2FFFF2222FFFF, x36369C9CC1C1D6D6, 0x94)
234 LUT(x7676C7C74F4FC7C7, a1, x2E2E6969A4A46363, xA3A3505010101A1A, 0xD9)
235 LUT(x4, a4, x6C6CF2F229295D5D, x7676C7C74F4FC7C7, 0xC6)
243 __device__ static void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
245 LUT(x5050F5F55050F5F5, a1, a3, a5, 0xB2)
246 LUT(x6363C6C66363C6C6, a1, a2, x5050F5F55050F5F5, 0x66)
247 LUT(xAAAA5555AAAA5555, a1, a1, a5, 0xA9)
248 LUT(x3A3A65653A3A6565, a3, x6363C6C66363C6C6, xAAAA5555AAAA5555, 0xA9)
249 LUT(x5963A3C65963A3C6, a4, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xC6)
250 LUT(xE7E76565E7E76565, a5, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xAD)
251 LUT(x455D45DF455D45DF, a1, a4, xE7E76565E7E76565, 0xE4)
252 LUT(x4, a6, x5963A3C65963A3C6, x455D45DF455D45DF, 0x6C)
253 LUT(x1101220211012202, a2, xAAAA5555AAAA5555, x5963A3C65963A3C6, 0x20)
254 LUT(xF00F0FF0F00F0FF0, a3, a4, a5, 0x69)
255 LUT(x16E94A9716E94A97, xE7E76565E7E76565, x1101220211012202, xF00F0FF0F00F0FF0, 0x9E)
256 LUT(x2992922929929229, a1, a2, xF00F0FF0F00F0FF0, 0x49)
257 LUT(xAFAF9823AFAF9823, a5, x5050F5F55050F5F5, x2992922929929229, 0x93)
258 LUT(x3, a6, x16E94A9716E94A97, xAFAF9823AFAF9823, 0x6C)
259 LUT(x4801810248018102, a4, x5963A3C65963A3C6, x1101220211012202, 0xA4)
260 LUT(x5EE8FFFD5EE8FFFD, a5, x16E94A9716E94A97, x4801810248018102, 0x76)
261 LUT(xF0FF00FFF0FF00FF, a3, a4, a5, 0xCD)
262 LUT(x942D9A67942D9A67, x3A3A65653A3A6565, x5EE8FFFD5EE8FFFD, xF0FF00FFF0FF00FF, 0x86)
263 LUT(x1, a6, x5EE8FFFD5EE8FFFD, x942D9A67942D9A67, 0xA6)
264 LUT(x6A40D4ED6F4DD4EE, a2, x4, xAFAF9823AFAF9823, 0x2D)
265 LUT(x6CA89C7869A49C79, x1101220211012202, x16E94A9716E94A97, x6A40D4ED6F4DD4EE, 0x26)
266 LUT(xD6DE73F9D6DE73F9, a3, x6363C6C66363C6C6, x455D45DF455D45DF, 0x6B)
267 LUT(x925E63E1965A63E1, x3A3A65653A3A6565, x6CA89C7869A49C79, xD6DE73F9D6DE73F9, 0xA2)
268 LUT(x2, a6, x6CA89C7869A49C79, x925E63E1965A63E1, 0xCA)
277 __device__ static void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
279 LUT(x88AA88AA88AA88AA, a1, a2, a4, 0x0B)
280 LUT(xAAAAFF00AAAAFF00, a1, a4, a5, 0x27)
281 LUT(xADAFF8A5ADAFF8A5, a3, x88AA88AA88AA88AA, xAAAAFF00AAAAFF00, 0x9E)
282 LUT(x0A0AF5F50A0AF5F5, a1, a3, a5, 0xA6)
283 LUT(x6B69C5DC6B69C5DC, a2, xADAFF8A5ADAFF8A5, x0A0AF5F50A0AF5F5, 0x6B)
284 LUT(x1C69B2DC1C69B2DC, a4, x88AA88AA88AA88AA, x6B69C5DC6B69C5DC, 0xA9)
285 LUT(x1, a6, xADAFF8A5ADAFF8A5, x1C69B2DC1C69B2DC, 0x6A)
286 LUT(x9C9C9C9C9C9C9C9C, a1, a2, a3, 0x63)
287 LUT(xE6E63BFDE6E63BFD, a2, xAAAAFF00AAAAFF00, x0A0AF5F50A0AF5F5, 0xE7)
288 LUT(x6385639E6385639E, a4, x9C9C9C9C9C9C9C9C, xE6E63BFDE6E63BFD, 0x93)
289 LUT(x5959C4CE5959C4CE, a2, x6B69C5DC6B69C5DC, xE6E63BFDE6E63BFD, 0x5D)
290 LUT(x5B53F53B5B53F53B, a4, x0A0AF5F50A0AF5F5, x5959C4CE5959C4CE, 0x6E)
291 LUT(x3, a6, x6385639E6385639E, x5B53F53B5B53F53B, 0xC6)
292 LUT(xFAF505FAFAF505FA, a3, a4, x0A0AF5F50A0AF5F5, 0x6D)
293 LUT(x6A65956A6A65956A, a3, x9C9C9C9C9C9C9C9C, xFAF505FAFAF505FA, 0xA6)
294 LUT(x8888CCCC8888CCCC, a1, a2, a5, 0x23)
295 LUT(x94E97A9494E97A94, x1C69B2DC1C69B2DC, x6A65956A6A65956A, x8888CCCC8888CCCC, 0x72)
296 LUT(x4, a6, x6A65956A6A65956A, x94E97A9494E97A94, 0xAC)
297 LUT(xA050A050A050A050, a1, a3, a4, 0x21)
298 LUT(xC1B87A2BC1B87A2B, xAAAAFF00AAAAFF00, x5B53F53B5B53F53B, x94E97A9494E97A94, 0xA4)
299 LUT(xE96016B7E96016B7, x8888CCCC8888CCCC, xA050A050A050A050, xC1B87A2BC1B87A2B, 0x96)
300 LUT(xE3CF1FD5E3CF1FD5, x88AA88AA88AA88AA, x6A65956A6A65956A, xE96016B7E96016B7, 0x3E)
301 LUT(x6776675B6776675B, xADAFF8A5ADAFF8A5, x94E97A9494E97A94, xE3CF1FD5E3CF1FD5, 0x6B)
302 LUT(x2, a6, xE96016B7E96016B7, x6776675B6776675B, 0xC6)
311 __device__ static void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
313 LUT(xEEEE3333EEEE3333, a1, a2, a5, 0x9D)
314 LUT(xBBBBBBBBBBBBBBBB, a1, a1, a2, 0x83)
315 LUT(xDDDDAAAADDDDAAAA, a1, a2, a5, 0x5B)
316 LUT(x29295A5A29295A5A, a3, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0x85)
317 LUT(xC729695AC729695A, a4, xEEEE3333EEEE3333, x29295A5A29295A5A, 0xA6)
318 LUT(x3BF77B7B3BF77B7B, a2, a5, xC729695AC729695A, 0xF9)
319 LUT(x2900FF002900FF00, a4, a5, x29295A5A29295A5A, 0x0E)
320 LUT(x56B3803F56B3803F, xBBBBBBBBBBBBBBBB, x3BF77B7B3BF77B7B, x2900FF002900FF00, 0x61)
321 LUT(x4, a6, xC729695AC729695A, x56B3803F56B3803F, 0x6C)
322 LUT(xFBFBFBFBFBFBFBFB, a1, a2, a3, 0xDF)
323 LUT(x3012B7B73012B7B7, a2, a5, xC729695AC729695A, 0xD4)
324 LUT(x34E9B34C34E9B34C, a4, xFBFBFBFBFBFBFBFB, x3012B7B73012B7B7, 0x69)
325 LUT(xBFEAEBBEBFEAEBBE, a1, x29295A5A29295A5A, x34E9B34C34E9B34C, 0x6F)
326 LUT(xFFAEAFFEFFAEAFFE, a3, xBBBBBBBBBBBBBBBB, xBFEAEBBEBFEAEBBE, 0xB9)
327 LUT(x2, a6, x34E9B34C34E9B34C, xFFAEAFFEFFAEAFFE, 0xC6)
328 LUT(xCFDE88BBCFDE88BB, a2, xDDDDAAAADDDDAAAA, x34E9B34C34E9B34C, 0x5C)
329 LUT(x3055574530555745, a1, xC729695AC729695A, xCFDE88BBCFDE88BB, 0x71)
330 LUT(x99DDEEEE99DDEEEE, a4, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0xB9)
331 LUT(x693CD926693CD926, x3BF77B7B3BF77B7B, x34E9B34C34E9B34C, x99DDEEEE99DDEEEE, 0x69)
332 LUT(x3, a6, x3055574530555745, x693CD926693CD926, 0x6A)
333 LUT(x9955EE559955EE55, a1, a4, x99DDEEEE99DDEEEE, 0xE2)
334 LUT(x9D48FA949D48FA94, x3BF77B7B3BF77B7B, xBFEAEBBEBFEAEBBE, x9955EE559955EE55, 0x9C)
335 LUT(x1, a6, xC729695AC729695A, x9D48FA949D48FA94, 0x39)
347 * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC
348 * architectures. These use AND, OR, XOR, NOT, and AND-NOT gates.
350 * Gate counts: 49 44 46 33 48 46 46 41
353 * Several same-gate-count expressions for each S-box are included (for use on
354 * different CPUs/GPUs).
356 * These Boolean expressions corresponding to DES S-boxes have been generated
357 * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's
358 * John the Ripper password cracker: http://www.openwall.com/john/
359 * Being mathematical formulas, they are not copyrighted and are free for reuse
362 * This file (a specific representation of the S-box expressions, surrounding
363 * logic) is Copyright (c) 2011 by Solar Designer <solar at openwall.com>.
364 * Redistribution and use in source and binary forms, with or without
365 * modification, are permitted. (This is a heavily cut-down "BSD license".)
367 * The effort has been sponsored by Rapid7: http://www.rapid7.com
370 __device__ static void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
372 u32 x55005500, x5A0F5A0F, x3333FFFF, x66666666, x22226666, x2D2D6969,
374 u32 x00FFFF00, x33CCCC33, x4803120C, x2222FFFF, x6A21EDF3, x4A01CC93;
375 u32 x5555FFFF, x7F75FFFF, x00D20096, x7FA7FF69;
376 u32 x0A0A0000, x0AD80096, x00999900, x0AD99996;
377 u32 x22332233, x257AA5F0, x054885C0, xFAB77A3F, x2221EDF3, xD89697CC;
378 u32 x05B77AC0, x05F77AD6, x36C48529, x6391D07C, xBB0747B0;
379 u32 x4C460000, x4EDF9996, x2D4E49EA, xBBFFFFB0, x96B1B65A;
380 u32 x5AFF5AFF, x52B11215, x4201C010, x10B0D205;
381 u32 x00, x01, x10, x11, x20, x21, x30, x31;
383 x55005500 = a1 & ~a5;
384 x5A0F5A0F = a4 ^ x55005500;
387 x22226666 = x3333FFFF & x66666666;
388 x2D2D6969 = a4 ^ x22226666;
389 x25202160 = x2D2D6969 & ~x5A0F5A0F;
392 x33CCCC33 = a3 ^ x00FFFF00;
393 x4803120C = x5A0F5A0F & ~x33CCCC33;
394 x2222FFFF = a6 | x22226666;
395 x6A21EDF3 = x4803120C ^ x2222FFFF;
396 x4A01CC93 = x6A21EDF3 & ~x25202160;
399 x7F75FFFF = x6A21EDF3 | x5555FFFF;
400 x00D20096 = a5 & ~x2D2D6969;
401 x7FA7FF69 = x7F75FFFF ^ x00D20096;
403 x0A0A0000 = a4 & ~x5555FFFF;
404 x0AD80096 = x00D20096 ^ x0A0A0000;
405 x00999900 = x00FFFF00 & ~x66666666;
406 x0AD99996 = x0AD80096 | x00999900;
408 x22332233 = a3 & ~x55005500;
409 x257AA5F0 = x5A0F5A0F ^ x7F75FFFF;
410 x054885C0 = x257AA5F0 & ~x22332233;
411 xFAB77A3F = ~x054885C0;
412 x2221EDF3 = x3333FFFF & x6A21EDF3;
413 xD89697CC = xFAB77A3F ^ x2221EDF3;
414 x20 = x7FA7FF69 & ~a2;
415 x21 = x20 ^ xD89697CC;
418 x05B77AC0 = x00FFFF00 ^ x054885C0;
419 x05F77AD6 = x00D20096 | x05B77AC0;
420 x36C48529 = x3333FFFF ^ x05F77AD6;
421 x6391D07C = a1 ^ x36C48529;
422 xBB0747B0 = xD89697CC ^ x6391D07C;
423 x00 = x25202160 | a2;
424 x01 = x00 ^ xBB0747B0;
427 x4C460000 = x3333FFFF ^ x7F75FFFF;
428 x4EDF9996 = x0AD99996 | x4C460000;
429 x2D4E49EA = x6391D07C ^ x4EDF9996;
430 xBBFFFFB0 = x00FFFF00 | xBB0747B0;
431 x96B1B65A = x2D4E49EA ^ xBBFFFFB0;
432 x10 = x4A01CC93 | a2;
433 x11 = x10 ^ x96B1B65A;
436 x5AFF5AFF = a5 | x5A0F5A0F;
437 x52B11215 = x5AFF5AFF & ~x2D4E49EA;
438 x4201C010 = x4A01CC93 & x6391D07C;
439 x10B0D205 = x52B11215 ^ x4201C010;
440 x30 = x10B0D205 | a2;
441 x31 = x30 ^ x0AD99996;
445 __device__ static void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
448 u32 x55550000, x00AA00FF, x33BB33FF;
449 u32 x33CC0000, x11441144, x11BB11BB, x003311BB;
450 u32 x00000F0F, x336600FF, x332200FF, x332200F0;
451 u32 x0302000F, xAAAAAAAA, xA9A8AAA5, x33CCCC33, x33CCC030, x9A646A95;
452 u32 x00333303, x118822B8, xA8208805, x3CC3C33C, x94E34B39;
453 u32 x0331330C, x3FF3F33C, xA9DF596A, xA9DF5F6F, x962CAC53;
454 u32 xA9466A6A, x3DA52153, x29850143, x33C0330C, x1A45324F;
455 u32 x0A451047, xBBDFDD7B, xB19ACD3C;
456 u32 x00, x01, x10, x11, x20, x21, x30, x31;
460 x55550000 = a1 & ~a6;
461 x00AA00FF = a5 & ~x55550000;
462 x33BB33FF = a2 | x00AA00FF;
464 x33CC0000 = x33CC33CC & ~a6;
465 x11441144 = a1 & x33CC33CC;
466 x11BB11BB = a5 ^ x11441144;
467 x003311BB = x11BB11BB & ~x33CC0000;
470 x336600FF = x00AA00FF ^ x33CC0000;
471 x332200FF = x33BB33FF & x336600FF;
472 x332200F0 = x332200FF & ~x00000F0F;
474 x0302000F = a3 & x332200FF;
476 xA9A8AAA5 = x0302000F ^ xAAAAAAAA;
477 x33CCCC33 = a6 ^ x33CC33CC;
478 x33CCC030 = x33CCCC33 & ~x00000F0F;
479 x9A646A95 = xA9A8AAA5 ^ x33CCC030;
480 x10 = a4 & ~x332200F0;
481 x11 = x10 ^ x9A646A95;
484 x00333303 = a2 & ~x33CCC030;
485 x118822B8 = x11BB11BB ^ x00333303;
486 xA8208805 = xA9A8AAA5 & ~x118822B8;
487 x3CC3C33C = a3 ^ x33CCCC33;
488 x94E34B39 = xA8208805 ^ x3CC3C33C;
489 x00 = x33BB33FF & ~a4;
490 x01 = x00 ^ x94E34B39;
493 x0331330C = x0302000F ^ x00333303;
494 x3FF3F33C = x3CC3C33C | x0331330C;
495 xA9DF596A = x33BB33FF ^ x9A646A95;
496 xA9DF5F6F = x00000F0F | xA9DF596A;
497 x962CAC53 = x3FF3F33C ^ xA9DF5F6F;
499 xA9466A6A = x332200FF ^ x9A646A95;
500 x3DA52153 = x94E34B39 ^ xA9466A6A;
501 x29850143 = xA9DF5F6F & x3DA52153;
502 x33C0330C = x33CC33CC & x3FF3F33C;
503 x1A45324F = x29850143 ^ x33C0330C;
504 x20 = x1A45324F | a4;
505 x21 = x20 ^ x962CAC53;
508 x0A451047 = x1A45324F & ~x118822B8;
509 xBBDFDD7B = x33CCCC33 | xA9DF596A;
510 xB19ACD3C = x0A451047 ^ xBBDFDD7B;
511 x30 = x003311BB | a4;
512 x31 = x30 ^ xB19ACD3C;
516 __device__ static void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
518 u32 x44444444, x0F0FF0F0, x4F4FF4F4, x00FFFF00, x00AAAA00, x4FE55EF4;
519 u32 x3C3CC3C3, x3C3C0000, x7373F4F4, x0C840A00;
520 u32 x00005EF4, x00FF5EFF, x00555455, x3C699796;
521 u32 x000FF000, x55AA55AA, x26D9A15E, x2FDFAF5F, x2FD00F5F;
522 u32 x55AAFFAA, x28410014, x000000FF, x000000CC, x284100D8;
523 u32 x204100D0, x3C3CC3FF, x1C3CC32F, x4969967A;
524 u32 x4CC44CC4, x40C040C0, xC3C33C3C, x9669C396, xD6A98356;
525 u32 xD6E9C3D6, x4CEEEEC4, x9A072D12, x001A000B, x9A1F2D1B;
526 u32 x00, x01, x10, x11, x20, x21, x30, x31;
528 x44444444 = a1 & ~a2;
530 x4F4FF4F4 = x44444444 | x0F0FF0F0;
532 x00AAAA00 = x00FFFF00 & ~a1;
533 x4FE55EF4 = x4F4FF4F4 ^ x00AAAA00;
535 x3C3CC3C3 = a2 ^ x0F0FF0F0;
536 x3C3C0000 = x3C3CC3C3 & ~a6;
537 x7373F4F4 = x4F4FF4F4 ^ x3C3C0000;
538 x0C840A00 = x4FE55EF4 & ~x7373F4F4;
540 x00005EF4 = a6 & x4FE55EF4;
541 x00FF5EFF = a4 | x00005EF4;
542 x00555455 = a1 & x00FF5EFF;
543 x3C699796 = x3C3CC3C3 ^ x00555455;
544 x30 = x4FE55EF4 & ~a5;
545 x31 = x30 ^ x3C699796;
548 x000FF000 = x0F0FF0F0 & x00FFFF00;
550 x26D9A15E = x7373F4F4 ^ x55AA55AA;
551 x2FDFAF5F = a3 | x26D9A15E;
552 x2FD00F5F = x2FDFAF5F & ~x000FF000;
554 x55AAFFAA = x00AAAA00 | x55AA55AA;
555 x28410014 = x3C699796 & ~x55AAFFAA;
557 x000000CC = x000000FF & ~a2;
558 x284100D8 = x28410014 ^ x000000CC;
560 x204100D0 = x7373F4F4 & x284100D8;
561 x3C3CC3FF = x3C3CC3C3 | x000000FF;
562 x1C3CC32F = x3C3CC3FF & ~x204100D0;
563 x4969967A = a1 ^ x1C3CC32F;
564 x10 = x2FD00F5F & a5;
565 x11 = x10 ^ x4969967A;
568 x4CC44CC4 = x4FE55EF4 & ~a2;
569 x40C040C0 = x4CC44CC4 & ~a3;
570 xC3C33C3C = ~x3C3CC3C3;
571 x9669C396 = x55AAFFAA ^ xC3C33C3C;
572 xD6A98356 = x40C040C0 ^ x9669C396;
573 x00 = a5 & ~x0C840A00;
574 x01 = x00 ^ xD6A98356;
577 xD6E9C3D6 = x40C040C0 | x9669C396;
578 x4CEEEEC4 = x00AAAA00 | x4CC44CC4;
579 x9A072D12 = xD6E9C3D6 ^ x4CEEEEC4;
580 x001A000B = a4 & ~x4FE55EF4;
581 x9A1F2D1B = x9A072D12 | x001A000B;
582 x20 = a5 & ~x284100D8;
583 x21 = x20 ^ x9A1F2D1B;
587 __device__ static void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
589 u32 x5A5A5A5A, x0F0FF0F0;
590 u32 x33FF33FF, x33FFCC00, x0C0030F0, x0C0CC0C0, x0CF3C03F, x5EFBDA7F,
591 x52FBCA0F, x61C8F93C;
592 u32 x00C0C03C, x0F0F30C0, x3B92A366, x30908326, x3C90B3D6;
593 u32 x33CC33CC, x0C0CFFFF, x379E5C99, x04124C11, x56E9861E, xA91679E1;
594 u32 x9586CA37, x8402C833, x84C2C83F, xB35C94A6;
595 u32 x00, x01, x10, x11, x20, x21, x30, x31;
600 x33FFCC00 = a5 ^ x33FF33FF;
601 x0C0030F0 = x0F0FF0F0 & ~x33FFCC00;
602 x0C0CC0C0 = x0F0FF0F0 & ~a2;
603 x0CF3C03F = a4 ^ x0C0CC0C0;
604 x5EFBDA7F = x5A5A5A5A | x0CF3C03F;
605 x52FBCA0F = x5EFBDA7F & ~x0C0030F0;
606 x61C8F93C = a2 ^ x52FBCA0F;
608 x00C0C03C = x0CF3C03F & x61C8F93C;
609 x0F0F30C0 = x0F0FF0F0 & ~x00C0C03C;
610 x3B92A366 = x5A5A5A5A ^ x61C8F93C;
611 x30908326 = x3B92A366 & ~x0F0F30C0;
612 x3C90B3D6 = x0C0030F0 ^ x30908326;
615 x0C0CFFFF = a5 | x0C0CC0C0;
616 x379E5C99 = x3B92A366 ^ x0C0CFFFF;
617 x04124C11 = x379E5C99 & ~x33CC33CC;
618 x56E9861E = x52FBCA0F ^ x04124C11;
619 x00 = a6 & ~x3C90B3D6;
620 x01 = x00 ^ x56E9861E;
623 xA91679E1 = ~x56E9861E;
624 x10 = x3C90B3D6 & ~a6;
625 x11 = x10 ^ xA91679E1;
628 x9586CA37 = x3C90B3D6 ^ xA91679E1;
629 x8402C833 = x9586CA37 & ~x33CC33CC;
630 x84C2C83F = x00C0C03C | x8402C833;
631 xB35C94A6 = x379E5C99 ^ x84C2C83F;
632 x20 = x61C8F93C | a6;
633 x21 = x20 ^ xB35C94A6;
636 x30 = a6 & x61C8F93C;
637 x31 = x30 ^ xB35C94A6;
641 __device__ static void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
643 u32 x77777777, x77770000, x22225555, x11116666, x1F1F6F6F;
644 u32 x70700000, x43433333, x00430033, x55557777, x55167744, x5A19784B;
645 u32 x5A1987B4, x7A3BD7F5, x003B00F5, x221955A0, x05050707, x271C52A7;
646 u32 x2A2A82A0, x6969B193, x1FE06F90, x16804E00, xE97FB1FF;
647 u32 x43403302, x35CAED30, x37DEFFB7, x349ECCB5, x0B01234A;
648 u32 x101884B4, x0FF8EB24, x41413333, x4FF9FB37, x4FC2FBC2;
649 u32 x22222222, x16BCEE97, x0F080B04, x19B4E593;
650 u32 x5C5C5C5C, x4448184C, x2DDABE71, x6992A63D;
651 u32 x00, x01, x10, x11, x20, x21, x30, x31;
654 x77770000 = x77777777 & ~a6;
655 x22225555 = a1 ^ x77770000;
656 x11116666 = a3 ^ x22225555;
657 x1F1F6F6F = a4 | x11116666;
659 x70700000 = x77770000 & ~a4;
660 x43433333 = a3 ^ x70700000;
661 x00430033 = a5 & x43433333;
662 x55557777 = a1 | x11116666;
663 x55167744 = x00430033 ^ x55557777;
664 x5A19784B = a4 ^ x55167744;
666 x5A1987B4 = a6 ^ x5A19784B;
667 x7A3BD7F5 = x22225555 | x5A1987B4;
668 x003B00F5 = a5 & x7A3BD7F5;
669 x221955A0 = x22225555 ^ x003B00F5;
670 x05050707 = a4 & x55557777;
671 x271C52A7 = x221955A0 ^ x05050707;
673 x2A2A82A0 = x7A3BD7F5 & ~a1;
674 x6969B193 = x43433333 ^ x2A2A82A0;
675 x1FE06F90 = a5 ^ x1F1F6F6F;
676 x16804E00 = x1FE06F90 & ~x6969B193;
677 xE97FB1FF = ~x16804E00;
678 x20 = xE97FB1FF & ~a2;
679 x21 = x20 ^ x5A19784B;
682 x43403302 = x43433333 & ~x003B00F5;
683 x35CAED30 = x2A2A82A0 ^ x1FE06F90;
684 x37DEFFB7 = x271C52A7 | x35CAED30;
685 x349ECCB5 = x37DEFFB7 & ~x43403302;
686 x0B01234A = x1F1F6F6F & ~x349ECCB5;
688 x101884B4 = x5A1987B4 & x349ECCB5;
689 x0FF8EB24 = x1FE06F90 ^ x101884B4;
690 x41413333 = x43433333 & x55557777;
691 x4FF9FB37 = x0FF8EB24 | x41413333;
692 x4FC2FBC2 = x003B00F5 ^ x4FF9FB37;
693 x30 = x4FC2FBC2 & a2;
694 x31 = x30 ^ x271C52A7;
697 x22222222 = a1 ^ x77777777;
698 x16BCEE97 = x349ECCB5 ^ x22222222;
699 x0F080B04 = a4 & x0FF8EB24;
700 x19B4E593 = x16BCEE97 ^ x0F080B04;
701 x00 = x0B01234A | a2;
702 x01 = x00 ^ x19B4E593;
705 x5C5C5C5C = x1F1F6F6F ^ x43433333;
706 x4448184C = x5C5C5C5C & ~x19B4E593;
707 x2DDABE71 = x22225555 ^ x0FF8EB24;
708 x6992A63D = x4448184C ^ x2DDABE71;
709 x10 = x1F1F6F6F & a2;
710 x11 = x10 ^ x6992A63D;
714 __device__ static void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
717 u32 x3333FFFF, x11115555, x22DD6699, x22DD9966, x00220099;
718 u32 x00551144, x33662277, x5A5A5A5A, x7B7E7A7F, x59A31CE6;
719 u32 x09030C06, x09030000, x336622FF, x3A6522FF;
720 u32 x484D494C, x0000B6B3, x0F0FB9BC, x00FC00F9, x0FFFB9FD;
721 u32 x5DF75DF7, x116600F7, x1E69B94B, x1668B94B;
722 u32 x7B7B7B7B, x411E5984, x1FFFFDFD, x5EE1A479;
723 u32 x3CB4DFD2, x004B002D, xB7B2B6B3, xCCC9CDC8, xCC82CDE5;
724 u32 x0055EEBB, x5A5AECE9, x0050ECA9, xC5CAC1CE, xC59A2D67;
725 u32 x00, x01, x10, x11, x20, x21, x30, x31;
730 x11115555 = a1 & x3333FFFF;
731 x22DD6699 = x33CC33CC ^ x11115555;
732 x22DD9966 = a6 ^ x22DD6699;
733 x00220099 = a5 & ~x22DD9966;
735 x00551144 = a1 & x22DD9966;
736 x33662277 = a2 ^ x00551144;
738 x7B7E7A7F = x33662277 | x5A5A5A5A;
739 x59A31CE6 = x22DD6699 ^ x7B7E7A7F;
741 x09030C06 = a3 & x59A31CE6;
742 x09030000 = x09030C06 & ~a6;
743 x336622FF = x00220099 | x33662277;
744 x3A6522FF = x09030000 ^ x336622FF;
745 x30 = x3A6522FF & a4;
746 x31 = x30 ^ x59A31CE6;
749 x484D494C = a2 ^ x7B7E7A7F;
750 x0000B6B3 = a6 & ~x484D494C;
751 x0F0FB9BC = a3 ^ x0000B6B3;
752 x00FC00F9 = a5 & ~x09030C06;
753 x0FFFB9FD = x0F0FB9BC | x00FC00F9;
755 x5DF75DF7 = a1 | x59A31CE6;
756 x116600F7 = x336622FF & x5DF75DF7;
757 x1E69B94B = x0F0FB9BC ^ x116600F7;
758 x1668B94B = x1E69B94B & ~x09030000;
759 x20 = x00220099 | a4;
760 x21 = x20 ^ x1668B94B;
763 x7B7B7B7B = a2 | x5A5A5A5A;
764 x411E5984 = x3A6522FF ^ x7B7B7B7B;
765 x1FFFFDFD = x11115555 | x0FFFB9FD;
766 x5EE1A479 = x411E5984 ^ x1FFFFDFD;
768 x3CB4DFD2 = x22DD6699 ^ x1E69B94B;
769 x004B002D = a5 & ~x3CB4DFD2;
770 xB7B2B6B3 = ~x484D494C;
771 xCCC9CDC8 = x7B7B7B7B ^ xB7B2B6B3;
772 xCC82CDE5 = x004B002D ^ xCCC9CDC8;
773 x10 = xCC82CDE5 & ~a4;
774 x11 = x10 ^ x5EE1A479;
777 x0055EEBB = a6 ^ x00551144;
778 x5A5AECE9 = a1 ^ x0F0FB9BC;
779 x0050ECA9 = x0055EEBB & x5A5AECE9;
780 xC5CAC1CE = x09030C06 ^ xCCC9CDC8;
781 xC59A2D67 = x0050ECA9 ^ xC5CAC1CE;
782 x00 = x0FFFB9FD & ~a4;
783 x01 = x00 ^ xC59A2D67;
787 __device__ static void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
789 u32 x0FF00FF0, x3CC33CC3, x00003CC3, x0F000F00, x5A555A55, x00001841;
790 u32 x00000F00, x33333C33, x7B777E77, x0FF0F00F, x74878E78;
791 u32 x003C003C, x5A7D5A7D, x333300F0, x694E5A8D;
792 u32 x0FF0CCCC, x000F0303, x5A505854, x33CC000F, x699C585B;
793 u32 x7F878F78, x21101013, x7F979F7B, x30030CC0, x4F9493BB;
794 u32 x6F9CDBFB, x0000DBFB, x00005151, x26DAC936, x26DA9867;
795 u32 x27DA9877, x27DA438C, x2625C9C9, x27FFCBCD;
796 u32 x27FF1036, x27FF103E, xB06B6C44, x97947C7A;
797 u32 x00, x01, x10, x11, x20, x21, x30, x31;
800 x3CC33CC3 = a3 ^ x0FF00FF0;
801 x00003CC3 = a6 & x3CC33CC3;
802 x0F000F00 = a4 & x0FF00FF0;
803 x5A555A55 = a2 ^ x0F000F00;
804 x00001841 = x00003CC3 & x5A555A55;
806 x00000F00 = a6 & x0F000F00;
807 x33333C33 = a3 ^ x00000F00;
808 x7B777E77 = x5A555A55 | x33333C33;
809 x0FF0F00F = a6 ^ x0FF00FF0;
810 x74878E78 = x7B777E77 ^ x0FF0F00F;
811 x30 = a1 & ~x00001841;
812 x31 = x30 ^ x74878E78;
815 x003C003C = a5 & ~x3CC33CC3;
816 x5A7D5A7D = x5A555A55 | x003C003C;
817 x333300F0 = x00003CC3 ^ x33333C33;
818 x694E5A8D = x5A7D5A7D ^ x333300F0;
820 x0FF0CCCC = x00003CC3 ^ x0FF0F00F;
821 x000F0303 = a4 & ~x0FF0CCCC;
822 x5A505854 = x5A555A55 & ~x000F0303;
823 x33CC000F = a5 ^ x333300F0;
824 x699C585B = x5A505854 ^ x33CC000F;
826 x7F878F78 = x0F000F00 | x74878E78;
827 x21101013 = a3 & x699C585B;
828 x7F979F7B = x7F878F78 | x21101013;
829 x30030CC0 = x3CC33CC3 & ~x0FF0F00F;
830 x4F9493BB = x7F979F7B ^ x30030CC0;
831 x00 = x4F9493BB & ~a1;
832 x01 = x00 ^ x694E5A8D;
835 x6F9CDBFB = x699C585B | x4F9493BB;
836 x0000DBFB = a6 & x6F9CDBFB;
837 x00005151 = a2 & x0000DBFB;
838 x26DAC936 = x694E5A8D ^ x4F9493BB;
839 x26DA9867 = x00005151 ^ x26DAC936;
841 x27DA9877 = x21101013 | x26DA9867;
842 x27DA438C = x0000DBFB ^ x27DA9877;
843 x2625C9C9 = a5 ^ x26DAC936;
844 x27FFCBCD = x27DA438C | x2625C9C9;
845 x20 = x27FFCBCD & a1;
846 x21 = x20 ^ x699C585B;
849 x27FF1036 = x0000DBFB ^ x27FFCBCD;
850 x27FF103E = x003C003C | x27FF1036;
851 xB06B6C44 = ~x4F9493BB;
852 x97947C7A = x27FF103E ^ xB06B6C44;
853 x10 = x97947C7A & ~a1;
854 x11 = x10 ^ x26DA9867;
858 __device__ static void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4)
860 u32 x0C0C0C0C, x0000F0F0, x00FFF00F, x00555005, x00515001;
861 u32 x33000330, x77555775, x30303030, x3030CFCF, x30104745, x30555745;
862 u32 xFF000FF0, xCF1048B5, x080A080A, xC71A40BF, xCB164CB3;
863 u32 x9E4319E6, x000019E6, xF429738C, xF4296A6A, xC729695A;
864 u32 xC47C3D2F, xF77F3F3F, x9E43E619, x693CD926;
865 u32 xF719A695, xF4FF73FF, x03E6D56A, x56B3803F;
866 u32 xF700A600, x61008000, x03B7856B, x62B7056B;
867 u32 x00, x01, x10, x11, x20, x21, x30, x31;
869 x0C0C0C0C = a3 & ~a2;
870 x0000F0F0 = a5 & ~a3;
871 x00FFF00F = a4 ^ x0000F0F0;
872 x00555005 = a1 & x00FFF00F;
873 x00515001 = x00555005 & ~x0C0C0C0C;
875 x33000330 = a2 & ~x00FFF00F;
876 x77555775 = a1 | x33000330;
877 x30303030 = a2 & ~a3;
878 x3030CFCF = a5 ^ x30303030;
879 x30104745 = x77555775 & x3030CFCF;
880 x30555745 = x00555005 | x30104745;
882 xFF000FF0 = ~x00FFF00F;
883 xCF1048B5 = x30104745 ^ xFF000FF0;
884 x080A080A = a3 & ~x77555775;
885 xC71A40BF = xCF1048B5 ^ x080A080A;
886 xCB164CB3 = x0C0C0C0C ^ xC71A40BF;
887 x10 = x00515001 | a6;
888 x11 = x10 ^ xCB164CB3;
891 x9E4319E6 = a1 ^ xCB164CB3;
892 x000019E6 = a5 & x9E4319E6;
893 xF429738C = a2 ^ xC71A40BF;
894 xF4296A6A = x000019E6 ^ xF429738C;
895 xC729695A = x33000330 ^ xF4296A6A;
897 xC47C3D2F = x30555745 ^ xF4296A6A;
898 xF77F3F3F = a2 | xC47C3D2F;
899 x9E43E619 = a5 ^ x9E4319E6;
900 x693CD926 = xF77F3F3F ^ x9E43E619;
901 x20 = x30555745 & a6;
902 x21 = x20 ^ x693CD926;
905 xF719A695 = x3030CFCF ^ xC729695A;
906 xF4FF73FF = a4 | xF429738C;
907 x03E6D56A = xF719A695 ^ xF4FF73FF;
908 x56B3803F = a1 ^ x03E6D56A;
909 x30 = x56B3803F & a6;
910 x31 = x30 ^ xC729695A;
913 xF700A600 = xF719A695 & ~a4;
914 x61008000 = x693CD926 & xF700A600;
915 x03B7856B = x00515001 ^ x03E6D56A;
916 x62B7056B = x61008000 ^ x03B7856B;
917 x00 = x62B7056B | a6;
918 x01 = x00 ^ xC729695A;
924 #define SWAP(a, b) { u32 tmp=a;a=b;b=tmp; }
960 #define KEYSET00 { k00 = K08; k01 = K44; k02 = K29; k03 = K52; k04 = K42; k05 = K14; k06 = K28; k07 = K49; k08 = K01; k09 = K07; k10 = K16; k11 = K36; k12 = K02; k13 = K30; k14 = K22; k15 = K21; k16 = K38; k17 = K50; k18 = K51; k19 = K00; k20 = K31; k21 = K23; k22 = K15; k23 = K35; k24 = K19; k25 = K24; k26 = K34; k27 = K47; k28 = K32; k29 = K03; k30 = K41; k31 = K26; k32 = K04; k33 = K46; k34 = K20; k35 = K25; k36 = K53; k37 = K18; k38 = K33; k39 = K55; k40 = K13; k41 = K17; k42 = K39; k43 = K12; k44 = K11; k45 = K54; k46 = K48; k47 = K27; }
961 #define KEYSET10 { k00 = K49; k01 = K28; k02 = K45; k03 = K36; k04 = K01; k05 = K30; k06 = K44; k07 = K08; k08 = K42; k09 = K23; k10 = K00; k11 = K52; k12 = K43; k13 = K14; k14 = K38; k15 = K37; k16 = K22; k17 = K09; k18 = K35; k19 = K16; k20 = K15; k21 = K07; k22 = K31; k23 = K51; k24 = K03; k25 = K40; k26 = K46; k27 = K04; k28 = K20; k29 = K19; k30 = K53; k31 = K10; k32 = K47; k33 = K34; k34 = K32; k35 = K13; k36 = K41; k37 = K06; k38 = K17; k39 = K12; k40 = K25; k41 = K33; k42 = K27; k43 = K55; k44 = K54; k45 = K11; k46 = K05; k47 = K39; }
962 #define KEYSET01 { k00 = K01; k01 = K37; k02 = K22; k03 = K45; k04 = K35; k05 = K07; k06 = K21; k07 = K42; k08 = K51; k09 = K00; k10 = K09; k11 = K29; k12 = K52; k13 = K23; k14 = K15; k15 = K14; k16 = K31; k17 = K43; k18 = K44; k19 = K50; k20 = K49; k21 = K16; k22 = K08; k23 = K28; k24 = K12; k25 = K17; k26 = K27; k27 = K40; k28 = K25; k29 = K55; k30 = K34; k31 = K19; k32 = K24; k33 = K39; k34 = K13; k35 = K18; k36 = K46; k37 = K11; k38 = K26; k39 = K48; k40 = K06; k41 = K10; k42 = K32; k43 = K05; k44 = K04; k45 = K47; k46 = K41; k47 = K20; }
963 #define KEYSET11 { k00 = K35; k01 = K14; k02 = K31; k03 = K22; k04 = K44; k05 = K16; k06 = K30; k07 = K51; k08 = K28; k09 = K09; k10 = K43; k11 = K38; k12 = K29; k13 = K00; k14 = K49; k15 = K23; k16 = K08; k17 = K52; k18 = K21; k19 = K02; k20 = K01; k21 = K50; k22 = K42; k23 = K37; k24 = K48; k25 = K26; k26 = K32; k27 = K17; k28 = K06; k29 = K05; k30 = K39; k31 = K55; k32 = K33; k33 = K20; k34 = K18; k35 = K54; k36 = K27; k37 = K47; k38 = K03; k39 = K53; k40 = K11; k41 = K19; k42 = K13; k43 = K41; k44 = K40; k45 = K24; k46 = K46; k47 = K25; }
964 #define KEYSET02 { k00 = K44; k01 = K23; k02 = K08; k03 = K31; k04 = K21; k05 = K50; k06 = K07; k07 = K28; k08 = K37; k09 = K43; k10 = K52; k11 = K15; k12 = K38; k13 = K09; k14 = K01; k15 = K00; k16 = K42; k17 = K29; k18 = K30; k19 = K36; k20 = K35; k21 = K02; k22 = K51; k23 = K14; k24 = K53; k25 = K03; k26 = K13; k27 = K26; k28 = K11; k29 = K41; k30 = K20; k31 = K05; k32 = K10; k33 = K25; k34 = K54; k35 = K04; k36 = K32; k37 = K24; k38 = K12; k39 = K34; k40 = K47; k41 = K55; k42 = K18; k43 = K46; k44 = K17; k45 = K33; k46 = K27; k47 = K06; }
965 #define KEYSET12 { k00 = K21; k01 = K00; k02 = K42; k03 = K08; k04 = K30; k05 = K02; k06 = K16; k07 = K37; k08 = K14; k09 = K52; k10 = K29; k11 = K49; k12 = K15; k13 = K43; k14 = K35; k15 = K09; k16 = K51; k17 = K38; k18 = K07; k19 = K45; k20 = K44; k21 = K36; k22 = K28; k23 = K23; k24 = K34; k25 = K12; k26 = K18; k27 = K03; k28 = K47; k29 = K46; k30 = K25; k31 = K41; k32 = K19; k33 = K06; k34 = K04; k35 = K40; k36 = K13; k37 = K33; k38 = K48; k39 = K39; k40 = K24; k41 = K05; k42 = K54; k43 = K27; k44 = K26; k45 = K10; k46 = K32; k47 = K11; }
966 #define KEYSET03 { k00 = K30; k01 = K09; k02 = K51; k03 = K42; k04 = K07; k05 = K36; k06 = K50; k07 = K14; k08 = K23; k09 = K29; k10 = K38; k11 = K01; k12 = K49; k13 = K52; k14 = K44; k15 = K43; k16 = K28; k17 = K15; k18 = K16; k19 = K22; k20 = K21; k21 = K45; k22 = K37; k23 = K00; k24 = K39; k25 = K48; k26 = K54; k27 = K12; k28 = K24; k29 = K27; k30 = K06; k31 = K46; k32 = K55; k33 = K11; k34 = K40; k35 = K17; k36 = K18; k37 = K10; k38 = K53; k39 = K20; k40 = K33; k41 = K41; k42 = K04; k43 = K32; k44 = K03; k45 = K19; k46 = K13; k47 = K47; }
967 #define KEYSET13 { k00 = K07; k01 = K43; k02 = K28; k03 = K51; k04 = K16; k05 = K45; k06 = K02; k07 = K23; k08 = K00; k09 = K38; k10 = K15; k11 = K35; k12 = K01; k13 = K29; k14 = K21; k15 = K52; k16 = K37; k17 = K49; k18 = K50; k19 = K31; k20 = K30; k21 = K22; k22 = K14; k23 = K09; k24 = K20; k25 = K53; k26 = K04; k27 = K48; k28 = K33; k29 = K32; k30 = K11; k31 = K27; k32 = K05; k33 = K47; k34 = K17; k35 = K26; k36 = K54; k37 = K19; k38 = K34; k39 = K25; k40 = K10; k41 = K46; k42 = K40; k43 = K13; k44 = K12; k45 = K55; k46 = K18; k47 = K24; }
968 #define KEYSET04 { k00 = K16; k01 = K52; k02 = K37; k03 = K28; k04 = K50; k05 = K22; k06 = K36; k07 = K00; k08 = K09; k09 = K15; k10 = K49; k11 = K44; k12 = K35; k13 = K38; k14 = K30; k15 = K29; k16 = K14; k17 = K01; k18 = K02; k19 = K08; k20 = K07; k21 = K31; k22 = K23; k23 = K43; k24 = K25; k25 = K34; k26 = K40; k27 = K53; k28 = K10; k29 = K13; k30 = K47; k31 = K32; k32 = K41; k33 = K24; k34 = K26; k35 = K03; k36 = K04; k37 = K55; k38 = K39; k39 = K06; k40 = K19; k41 = K27; k42 = K17; k43 = K18; k44 = K48; k45 = K05; k46 = K54; k47 = K33; }
969 #define KEYSET14 { k00 = K50; k01 = K29; k02 = K14; k03 = K37; k04 = K02; k05 = K31; k06 = K45; k07 = K09; k08 = K43; k09 = K49; k10 = K01; k11 = K21; k12 = K44; k13 = K15; k14 = K07; k15 = K38; k16 = K23; k17 = K35; k18 = K36; k19 = K42; k20 = K16; k21 = K08; k22 = K00; k23 = K52; k24 = K06; k25 = K39; k26 = K17; k27 = K34; k28 = K19; k29 = K18; k30 = K24; k31 = K13; k32 = K46; k33 = K33; k34 = K03; k35 = K12; k36 = K40; k37 = K05; k38 = K20; k39 = K11; k40 = K55; k41 = K32; k42 = K26; k43 = K54; k44 = K53; k45 = K41; k46 = K04; k47 = K10; }
970 #define KEYSET05 { k00 = K02; k01 = K38; k02 = K23; k03 = K14; k04 = K36; k05 = K08; k06 = K22; k07 = K43; k08 = K52; k09 = K01; k10 = K35; k11 = K30; k12 = K21; k13 = K49; k14 = K16; k15 = K15; k16 = K00; k17 = K44; k18 = K45; k19 = K51; k20 = K50; k21 = K42; k22 = K09; k23 = K29; k24 = K11; k25 = K20; k26 = K26; k27 = K39; k28 = K55; k29 = K54; k30 = K33; k31 = K18; k32 = K27; k33 = K10; k34 = K12; k35 = K48; k36 = K17; k37 = K41; k38 = K25; k39 = K47; k40 = K05; k41 = K13; k42 = K03; k43 = K04; k44 = K34; k45 = K46; k46 = K40; k47 = K19; }
971 #define KEYSET15 { k00 = K36; k01 = K15; k02 = K00; k03 = K23; k04 = K45; k05 = K42; k06 = K31; k07 = K52; k08 = K29; k09 = K35; k10 = K44; k11 = K07; k12 = K30; k13 = K01; k14 = K50; k15 = K49; k16 = K09; k17 = K21; k18 = K22; k19 = K28; k20 = K02; k21 = K51; k22 = K43; k23 = K38; k24 = K47; k25 = K25; k26 = K03; k27 = K20; k28 = K05; k29 = K04; k30 = K10; k31 = K54; k32 = K32; k33 = K19; k34 = K48; k35 = K53; k36 = K26; k37 = K46; k38 = K06; k39 = K24; k40 = K41; k41 = K18; k42 = K12; k43 = K40; k44 = K39; k45 = K27; k46 = K17; k47 = K55; }
972 #define KEYSET06 { k00 = K45; k01 = K49; k02 = K09; k03 = K00; k04 = K22; k05 = K51; k06 = K08; k07 = K29; k08 = K38; k09 = K44; k10 = K21; k11 = K16; k12 = K07; k13 = K35; k14 = K02; k15 = K01; k16 = K43; k17 = K30; k18 = K31; k19 = K37; k20 = K36; k21 = K28; k22 = K52; k23 = K15; k24 = K24; k25 = K06; k26 = K12; k27 = K25; k28 = K41; k29 = K40; k30 = K19; k31 = K04; k32 = K13; k33 = K55; k34 = K53; k35 = K34; k36 = K03; k37 = K27; k38 = K11; k39 = K33; k40 = K46; k41 = K54; k42 = K48; k43 = K17; k44 = K20; k45 = K32; k46 = K26; k47 = K05; }
973 #define KEYSET16 { k00 = K22; k01 = K01; k02 = K43; k03 = K09; k04 = K31; k05 = K28; k06 = K42; k07 = K38; k08 = K15; k09 = K21; k10 = K30; k11 = K50; k12 = K16; k13 = K44; k14 = K36; k15 = K35; k16 = K52; k17 = K07; k18 = K08; k19 = K14; k20 = K45; k21 = K37; k22 = K29; k23 = K49; k24 = K33; k25 = K11; k26 = K48; k27 = K06; k28 = K46; k29 = K17; k30 = K55; k31 = K40; k32 = K18; k33 = K05; k34 = K34; k35 = K39; k36 = K12; k37 = K32; k38 = K47; k39 = K10; k40 = K27; k41 = K04; k42 = K53; k43 = K26; k44 = K25; k45 = K13; k46 = K03; k47 = K41; }
974 #define KEYSET07 { k00 = K31; k01 = K35; k02 = K52; k03 = K43; k04 = K08; k05 = K37; k06 = K51; k07 = K15; k08 = K49; k09 = K30; k10 = K07; k11 = K02; k12 = K50; k13 = K21; k14 = K45; k15 = K44; k16 = K29; k17 = K16; k18 = K42; k19 = K23; k20 = K22; k21 = K14; k22 = K38; k23 = K01; k24 = K10; k25 = K47; k26 = K53; k27 = K11; k28 = K27; k29 = K26; k30 = K05; k31 = K17; k32 = K54; k33 = K41; k34 = K39; k35 = K20; k36 = K48; k37 = K13; k38 = K24; k39 = K19; k40 = K32; k41 = K40; k42 = K34; k43 = K03; k44 = K06; k45 = K18; k46 = K12; k47 = K46; }
975 #define KEYSET17 { k00 = K15; k01 = K51; k02 = K36; k03 = K02; k04 = K49; k05 = K21; k06 = K35; k07 = K31; k08 = K08; k09 = K14; k10 = K23; k11 = K43; k12 = K09; k13 = K37; k14 = K29; k15 = K28; k16 = K45; k17 = K00; k18 = K01; k19 = K07; k20 = K38; k21 = K30; k22 = K22; k23 = K42; k24 = K26; k25 = K04; k26 = K41; k27 = K54; k28 = K39; k29 = K10; k30 = K48; k31 = K33; k32 = K11; k33 = K53; k34 = K27; k35 = K32; k36 = K05; k37 = K25; k38 = K40; k39 = K03; k40 = K20; k41 = K24; k42 = K46; k43 = K19; k44 = K18; k45 = K06; k46 = K55; k47 = K34; }
977 __device__ static void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 &D00, u32 &D01, u32 &D02, u32 &D03, u32 &D04, u32 &D05, u32 &D06, u32 &D07, u32 &D08, u32 &D09, u32 &D10, u32 &D11, u32 &D12, u32 &D13, u32 &D14, u32 &D15, u32 &D16, u32 &D17, u32 &D18, u32 &D19, u32 &D20, u32 &D21, u32 &D22, u32 &D23, u32 &D24, u32 &D25, u32 &D26, u32 &D27, u32 &D28, u32 &D29, u32 &D30, u32 &D31, u32 &D32, u32 &D33, u32 &D34, u32 &D35, u32 &D36, u32 &D37, u32 &D38, u32 &D39, u32 &D40, u32 &D41, u32 &D42, u32 &D43, u32 &D44, u32 &D45, u32 &D46, u32 &D47, u32 &D48, u32 &D49, u32 &D50, u32 &D51, u32 &D52, u32 &D53, u32 &D54, u32 &D55, u32 &D56, u32 &D57, u32 &D58, u32 &D59, u32 &D60, u32 &D61, u32 &D62, u32 &D63)
979 sXXX_DECL u32 s001 = (0x001 & SALT) ? 0xffffffff : 0;
980 sXXX_DECL u32 s002 = (0x002 & SALT) ? 0xffffffff : 0;
981 sXXX_DECL u32 s004 = (0x004 & SALT) ? 0xffffffff : 0;
982 sXXX_DECL u32 s008 = (0x008 & SALT) ? 0xffffffff : 0;
983 sXXX_DECL u32 s010 = (0x010 & SALT) ? 0xffffffff : 0;
984 sXXX_DECL u32 s020 = (0x020 & SALT) ? 0xffffffff : 0;
985 sXXX_DECL u32 s040 = (0x040 & SALT) ? 0xffffffff : 0;
986 sXXX_DECL u32 s080 = (0x080 & SALT) ? 0xffffffff : 0;
987 sXXX_DECL u32 s100 = (0x100 & SALT) ? 0xffffffff : 0;
988 sXXX_DECL u32 s200 = (0x200 & SALT) ? 0xffffffff : 0;
989 sXXX_DECL u32 s400 = (0x400 & SALT) ? 0xffffffff : 0;
990 sXXX_DECL u32 s800 = (0x800 & SALT) ? 0xffffffff : 0;
992 KXX_DECL u32 k00, k01, k02, k03, k04, k05;
993 KXX_DECL u32 k06, k07, k08, k09, k10, k11;
994 KXX_DECL u32 k12, k13, k14, k15, k16, k17;
995 KXX_DECL u32 k18, k19, k20, k21, k22, k23;
996 KXX_DECL u32 k24, k25, k26, k27, k28, k29;
997 KXX_DECL u32 k30, k31, k32, k33, k34, k35;
998 KXX_DECL u32 k36, k37, k38, k39, k40, k41;
999 KXX_DECL u32 k42, k43, k44, k45, k46, k47;
1001 for (u32 ii = 0; ii < 25; ii++)
1003 #if __CUDA_ARCH__ >= 500
1009 for (u32 i = 0; i < 2; i++)
1011 if (i) KEYSET10 else KEYSET00
1013 s1(myselx (D63, D47, s001) ^ k00, myselx (D32, D48, s002) ^ k01, myselx (D33, D49, s004) ^ k02, myselx (D34, D50, s008) ^ k03, myselx (D35, D51, s010) ^ k04, myselx (D36, D52, s020) ^ k05, &D08, &D16, &D22, &D30);
1014 s2(myselx (D35, D51, s040) ^ k06, myselx (D36, D52, s080) ^ k07, myselx (D37, D53, s100) ^ k08, myselx (D38, D54, s200) ^ k09, myselx (D39, D55, s400) ^ k10, myselx (D40, D56, s800) ^ k11, &D12, &D27, &D01, &D17);
1015 s3( D39 ^ k12, D40 ^ k13, D41 ^ k14, D42 ^ k15, D43 ^ k16, D44 ^ k17, &D23, &D15, &D29, &D05);
1016 s4( D43 ^ k18, D44 ^ k19, D45 ^ k20, D46 ^ k21, D47 ^ k22, D48 ^ k23, &D25, &D19, &D09, &D00);
1017 s5(myselx (D47, D63, s001) ^ k24, myselx (D48, D32, s002) ^ k25, myselx (D49, D33, s004) ^ k26, myselx (D50, D34, s008) ^ k27, myselx (D51, D35, s010) ^ k28, myselx (D52, D36, s020) ^ k29, &D07, &D13, &D24, &D02);
1018 s6(myselx (D51, D35, s040) ^ k30, myselx (D52, D36, s080) ^ k31, myselx (D53, D37, s100) ^ k32, myselx (D54, D38, s200) ^ k33, myselx (D55, D39, s400) ^ k34, myselx (D56, D40, s800) ^ k35, &D03, &D28, &D10, &D18);
1019 s7( D55 ^ k36, D56 ^ k37, D57 ^ k38, D58 ^ k39, D59 ^ k40, D60 ^ k41, &D31, &D11, &D21, &D06);
1020 s8( D59 ^ k42, D60 ^ k43, D61 ^ k44, D62 ^ k45, D63 ^ k46, D32 ^ k47, &D04, &D26, &D14, &D20);
1022 if (i) KEYSET11 else KEYSET01
1024 s1(myselx (D31, D15, s001) ^ k00, myselx (D00, D16, s002) ^ k01, myselx (D01, D17, s004) ^ k02, myselx (D02, D18, s008) ^ k03, myselx (D03, D19, s010) ^ k04, myselx (D04, D20, s020) ^ k05, &D40, &D48, &D54, &D62);
1025 s2(myselx (D03, D19, s040) ^ k06, myselx (D04, D20, s080) ^ k07, myselx (D05, D21, s100) ^ k08, myselx (D06, D22, s200) ^ k09, myselx (D07, D23, s400) ^ k10, myselx (D08, D24, s800) ^ k11, &D44, &D59, &D33, &D49);
1026 s3( D07 ^ k12, D08 ^ k13, D09 ^ k14, D10 ^ k15, D11 ^ k16, D12 ^ k17, &D55, &D47, &D61, &D37);
1027 s4( D11 ^ k18, D12 ^ k19, D13 ^ k20, D14 ^ k21, D15 ^ k22, D16 ^ k23, &D57, &D51, &D41, &D32);
1028 s5(myselx (D15, D31, s001) ^ k24, myselx (D16, D00, s002) ^ k25, myselx (D17, D01, s004) ^ k26, myselx (D18, D02, s008) ^ k27, myselx (D19, D03, s010) ^ k28, myselx (D20, D04, s020) ^ k29, &D39, &D45, &D56, &D34);
1029 s6(myselx (D19, D03, s040) ^ k30, myselx (D20, D04, s080) ^ k31, myselx (D21, D05, s100) ^ k32, myselx (D22, D06, s200) ^ k33, myselx (D23, D07, s400) ^ k34, myselx (D24, D08, s800) ^ k35, &D35, &D60, &D42, &D50);
1030 s7( D23 ^ k36, D24 ^ k37, D25 ^ k38, D26 ^ k39, D27 ^ k40, D28 ^ k41, &D63, &D43, &D53, &D38);
1031 s8( D27 ^ k42, D28 ^ k43, D29 ^ k44, D30 ^ k45, D31 ^ k46, D00 ^ k47, &D36, &D58, &D46, &D52);
1033 if (i) KEYSET12 else KEYSET02
1035 s1(myselx (D63, D47, s001) ^ k00, myselx (D32, D48, s002) ^ k01, myselx (D33, D49, s004) ^ k02, myselx (D34, D50, s008) ^ k03, myselx (D35, D51, s010) ^ k04, myselx (D36, D52, s020) ^ k05, &D08, &D16, &D22, &D30);
1036 s2(myselx (D35, D51, s040) ^ k06, myselx (D36, D52, s080) ^ k07, myselx (D37, D53, s100) ^ k08, myselx (D38, D54, s200) ^ k09, myselx (D39, D55, s400) ^ k10, myselx (D40, D56, s800) ^ k11, &D12, &D27, &D01, &D17);
1037 s3( D39 ^ k12, D40 ^ k13, D41 ^ k14, D42 ^ k15, D43 ^ k16, D44 ^ k17, &D23, &D15, &D29, &D05);
1038 s4( D43 ^ k18, D44 ^ k19, D45 ^ k20, D46 ^ k21, D47 ^ k22, D48 ^ k23, &D25, &D19, &D09, &D00);
1039 s5(myselx (D47, D63, s001) ^ k24, myselx (D48, D32, s002) ^ k25, myselx (D49, D33, s004) ^ k26, myselx (D50, D34, s008) ^ k27, myselx (D51, D35, s010) ^ k28, myselx (D52, D36, s020) ^ k29, &D07, &D13, &D24, &D02);
1040 s6(myselx (D51, D35, s040) ^ k30, myselx (D52, D36, s080) ^ k31, myselx (D53, D37, s100) ^ k32, myselx (D54, D38, s200) ^ k33, myselx (D55, D39, s400) ^ k34, myselx (D56, D40, s800) ^ k35, &D03, &D28, &D10, &D18);
1041 s7( D55 ^ k36, D56 ^ k37, D57 ^ k38, D58 ^ k39, D59 ^ k40, D60 ^ k41, &D31, &D11, &D21, &D06);
1042 s8( D59 ^ k42, D60 ^ k43, D61 ^ k44, D62 ^ k45, D63 ^ k46, D32 ^ k47, &D04, &D26, &D14, &D20);
1044 if (i) KEYSET13 else KEYSET03
1046 s1(myselx (D31, D15, s001) ^ k00, myselx (D00, D16, s002) ^ k01, myselx (D01, D17, s004) ^ k02, myselx (D02, D18, s008) ^ k03, myselx (D03, D19, s010) ^ k04, myselx (D04, D20, s020) ^ k05, &D40, &D48, &D54, &D62);
1047 s2(myselx (D03, D19, s040) ^ k06, myselx (D04, D20, s080) ^ k07, myselx (D05, D21, s100) ^ k08, myselx (D06, D22, s200) ^ k09, myselx (D07, D23, s400) ^ k10, myselx (D08, D24, s800) ^ k11, &D44, &D59, &D33, &D49);
1048 s3( D07 ^ k12, D08 ^ k13, D09 ^ k14, D10 ^ k15, D11 ^ k16, D12 ^ k17, &D55, &D47, &D61, &D37);
1049 s4( D11 ^ k18, D12 ^ k19, D13 ^ k20, D14 ^ k21, D15 ^ k22, D16 ^ k23, &D57, &D51, &D41, &D32);
1050 s5(myselx (D15, D31, s001) ^ k24, myselx (D16, D00, s002) ^ k25, myselx (D17, D01, s004) ^ k26, myselx (D18, D02, s008) ^ k27, myselx (D19, D03, s010) ^ k28, myselx (D20, D04, s020) ^ k29, &D39, &D45, &D56, &D34);
1051 s6(myselx (D19, D03, s040) ^ k30, myselx (D20, D04, s080) ^ k31, myselx (D21, D05, s100) ^ k32, myselx (D22, D06, s200) ^ k33, myselx (D23, D07, s400) ^ k34, myselx (D24, D08, s800) ^ k35, &D35, &D60, &D42, &D50);
1052 s7( D23 ^ k36, D24 ^ k37, D25 ^ k38, D26 ^ k39, D27 ^ k40, D28 ^ k41, &D63, &D43, &D53, &D38);
1053 s8( D27 ^ k42, D28 ^ k43, D29 ^ k44, D30 ^ k45, D31 ^ k46, D00 ^ k47, &D36, &D58, &D46, &D52);
1055 if (i) KEYSET14 else KEYSET04
1057 s1(myselx (D63, D47, s001) ^ k00, myselx (D32, D48, s002) ^ k01, myselx (D33, D49, s004) ^ k02, myselx (D34, D50, s008) ^ k03, myselx (D35, D51, s010) ^ k04, myselx (D36, D52, s020) ^ k05, &D08, &D16, &D22, &D30);
1058 s2(myselx (D35, D51, s040) ^ k06, myselx (D36, D52, s080) ^ k07, myselx (D37, D53, s100) ^ k08, myselx (D38, D54, s200) ^ k09, myselx (D39, D55, s400) ^ k10, myselx (D40, D56, s800) ^ k11, &D12, &D27, &D01, &D17);
1059 s3( D39 ^ k12, D40 ^ k13, D41 ^ k14, D42 ^ k15, D43 ^ k16, D44 ^ k17, &D23, &D15, &D29, &D05);
1060 s4( D43 ^ k18, D44 ^ k19, D45 ^ k20, D46 ^ k21, D47 ^ k22, D48 ^ k23, &D25, &D19, &D09, &D00);
1061 s5(myselx (D47, D63, s001) ^ k24, myselx (D48, D32, s002) ^ k25, myselx (D49, D33, s004) ^ k26, myselx (D50, D34, s008) ^ k27, myselx (D51, D35, s010) ^ k28, myselx (D52, D36, s020) ^ k29, &D07, &D13, &D24, &D02);
1062 s6(myselx (D51, D35, s040) ^ k30, myselx (D52, D36, s080) ^ k31, myselx (D53, D37, s100) ^ k32, myselx (D54, D38, s200) ^ k33, myselx (D55, D39, s400) ^ k34, myselx (D56, D40, s800) ^ k35, &D03, &D28, &D10, &D18);
1063 s7( D55 ^ k36, D56 ^ k37, D57 ^ k38, D58 ^ k39, D59 ^ k40, D60 ^ k41, &D31, &D11, &D21, &D06);
1064 s8( D59 ^ k42, D60 ^ k43, D61 ^ k44, D62 ^ k45, D63 ^ k46, D32 ^ k47, &D04, &D26, &D14, &D20);
1066 if (i) KEYSET15 else KEYSET05
1068 s1(myselx (D31, D15, s001) ^ k00, myselx (D00, D16, s002) ^ k01, myselx (D01, D17, s004) ^ k02, myselx (D02, D18, s008) ^ k03, myselx (D03, D19, s010) ^ k04, myselx (D04, D20, s020) ^ k05, &D40, &D48, &D54, &D62);
1069 s2(myselx (D03, D19, s040) ^ k06, myselx (D04, D20, s080) ^ k07, myselx (D05, D21, s100) ^ k08, myselx (D06, D22, s200) ^ k09, myselx (D07, D23, s400) ^ k10, myselx (D08, D24, s800) ^ k11, &D44, &D59, &D33, &D49);
1070 s3( D07 ^ k12, D08 ^ k13, D09 ^ k14, D10 ^ k15, D11 ^ k16, D12 ^ k17, &D55, &D47, &D61, &D37);
1071 s4( D11 ^ k18, D12 ^ k19, D13 ^ k20, D14 ^ k21, D15 ^ k22, D16 ^ k23, &D57, &D51, &D41, &D32);
1072 s5(myselx (D15, D31, s001) ^ k24, myselx (D16, D00, s002) ^ k25, myselx (D17, D01, s004) ^ k26, myselx (D18, D02, s008) ^ k27, myselx (D19, D03, s010) ^ k28, myselx (D20, D04, s020) ^ k29, &D39, &D45, &D56, &D34);
1073 s6(myselx (D19, D03, s040) ^ k30, myselx (D20, D04, s080) ^ k31, myselx (D21, D05, s100) ^ k32, myselx (D22, D06, s200) ^ k33, myselx (D23, D07, s400) ^ k34, myselx (D24, D08, s800) ^ k35, &D35, &D60, &D42, &D50);
1074 s7( D23 ^ k36, D24 ^ k37, D25 ^ k38, D26 ^ k39, D27 ^ k40, D28 ^ k41, &D63, &D43, &D53, &D38);
1075 s8( D27 ^ k42, D28 ^ k43, D29 ^ k44, D30 ^ k45, D31 ^ k46, D00 ^ k47, &D36, &D58, &D46, &D52);
1077 if (i) KEYSET16 else KEYSET06
1079 s1(myselx (D63, D47, s001) ^ k00, myselx (D32, D48, s002) ^ k01, myselx (D33, D49, s004) ^ k02, myselx (D34, D50, s008) ^ k03, myselx (D35, D51, s010) ^ k04, myselx (D36, D52, s020) ^ k05, &D08, &D16, &D22, &D30);
1080 s2(myselx (D35, D51, s040) ^ k06, myselx (D36, D52, s080) ^ k07, myselx (D37, D53, s100) ^ k08, myselx (D38, D54, s200) ^ k09, myselx (D39, D55, s400) ^ k10, myselx (D40, D56, s800) ^ k11, &D12, &D27, &D01, &D17);
1081 s3( D39 ^ k12, D40 ^ k13, D41 ^ k14, D42 ^ k15, D43 ^ k16, D44 ^ k17, &D23, &D15, &D29, &D05);
1082 s4( D43 ^ k18, D44 ^ k19, D45 ^ k20, D46 ^ k21, D47 ^ k22, D48 ^ k23, &D25, &D19, &D09, &D00);
1083 s5(myselx (D47, D63, s001) ^ k24, myselx (D48, D32, s002) ^ k25, myselx (D49, D33, s004) ^ k26, myselx (D50, D34, s008) ^ k27, myselx (D51, D35, s010) ^ k28, myselx (D52, D36, s020) ^ k29, &D07, &D13, &D24, &D02);
1084 s6(myselx (D51, D35, s040) ^ k30, myselx (D52, D36, s080) ^ k31, myselx (D53, D37, s100) ^ k32, myselx (D54, D38, s200) ^ k33, myselx (D55, D39, s400) ^ k34, myselx (D56, D40, s800) ^ k35, &D03, &D28, &D10, &D18);
1085 s7( D55 ^ k36, D56 ^ k37, D57 ^ k38, D58 ^ k39, D59 ^ k40, D60 ^ k41, &D31, &D11, &D21, &D06);
1086 s8( D59 ^ k42, D60 ^ k43, D61 ^ k44, D62 ^ k45, D63 ^ k46, D32 ^ k47, &D04, &D26, &D14, &D20);
1088 if (i) KEYSET17 else KEYSET07
1090 s1(myselx (D31, D15, s001) ^ k00, myselx (D00, D16, s002) ^ k01, myselx (D01, D17, s004) ^ k02, myselx (D02, D18, s008) ^ k03, myselx (D03, D19, s010) ^ k04, myselx (D04, D20, s020) ^ k05, &D40, &D48, &D54, &D62);
1091 s2(myselx (D03, D19, s040) ^ k06, myselx (D04, D20, s080) ^ k07, myselx (D05, D21, s100) ^ k08, myselx (D06, D22, s200) ^ k09, myselx (D07, D23, s400) ^ k10, myselx (D08, D24, s800) ^ k11, &D44, &D59, &D33, &D49);
1092 s3( D07 ^ k12, D08 ^ k13, D09 ^ k14, D10 ^ k15, D11 ^ k16, D12 ^ k17, &D55, &D47, &D61, &D37);
1093 s4( D11 ^ k18, D12 ^ k19, D13 ^ k20, D14 ^ k21, D15 ^ k22, D16 ^ k23, &D57, &D51, &D41, &D32);
1094 s5(myselx (D15, D31, s001) ^ k24, myselx (D16, D00, s002) ^ k25, myselx (D17, D01, s004) ^ k26, myselx (D18, D02, s008) ^ k27, myselx (D19, D03, s010) ^ k28, myselx (D20, D04, s020) ^ k29, &D39, &D45, &D56, &D34);
1095 s6(myselx (D19, D03, s040) ^ k30, myselx (D20, D04, s080) ^ k31, myselx (D21, D05, s100) ^ k32, myselx (D22, D06, s200) ^ k33, myselx (D23, D07, s400) ^ k34, myselx (D24, D08, s800) ^ k35, &D35, &D60, &D42, &D50);
1096 s7( D23 ^ k36, D24 ^ k37, D25 ^ k38, D26 ^ k39, D27 ^ k40, D28 ^ k41, &D63, &D43, &D53, &D38);
1097 s8( D27 ^ k42, D28 ^ k43, D29 ^ k44, D30 ^ k45, D31 ^ k46, D00 ^ k47, &D36, &D58, &D46, &D52);
1106 __device__ static void transpose32c (u32 data[32])
1108 #define swap(x,y,j,m) \
1109 t = ((x) ^ ((y) >> (j))) & (m); \
1111 (y) = (y) ^ (t << (j));
1115 swap (data[ 0], data[16], 16, 0x0000ffff);
1116 swap (data[ 1], data[17], 16, 0x0000ffff);
1117 swap (data[ 2], data[18], 16, 0x0000ffff);
1118 swap (data[ 3], data[19], 16, 0x0000ffff);
1119 swap (data[ 4], data[20], 16, 0x0000ffff);
1120 swap (data[ 5], data[21], 16, 0x0000ffff);
1121 swap (data[ 6], data[22], 16, 0x0000ffff);
1122 swap (data[ 7], data[23], 16, 0x0000ffff);
1123 swap (data[ 8], data[24], 16, 0x0000ffff);
1124 swap (data[ 9], data[25], 16, 0x0000ffff);
1125 swap (data[10], data[26], 16, 0x0000ffff);
1126 swap (data[11], data[27], 16, 0x0000ffff);
1127 swap (data[12], data[28], 16, 0x0000ffff);
1128 swap (data[13], data[29], 16, 0x0000ffff);
1129 swap (data[14], data[30], 16, 0x0000ffff);
1130 swap (data[15], data[31], 16, 0x0000ffff);
1131 swap (data[ 0], data[ 8], 8, 0x00ff00ff);
1132 swap (data[ 1], data[ 9], 8, 0x00ff00ff);
1133 swap (data[ 2], data[10], 8, 0x00ff00ff);
1134 swap (data[ 3], data[11], 8, 0x00ff00ff);
1135 swap (data[ 4], data[12], 8, 0x00ff00ff);
1136 swap (data[ 5], data[13], 8, 0x00ff00ff);
1137 swap (data[ 6], data[14], 8, 0x00ff00ff);
1138 swap (data[ 7], data[15], 8, 0x00ff00ff);
1139 swap (data[ 0], data[ 4], 4, 0x0f0f0f0f);
1140 swap (data[ 1], data[ 5], 4, 0x0f0f0f0f);
1141 swap (data[ 2], data[ 6], 4, 0x0f0f0f0f);
1142 swap (data[ 3], data[ 7], 4, 0x0f0f0f0f);
1143 swap (data[ 0], data[ 2], 2, 0x33333333);
1144 swap (data[ 1], data[ 3], 2, 0x33333333);
1145 swap (data[ 0], data[ 1], 1, 0x55555555);
1146 swap (data[ 2], data[ 3], 1, 0x55555555);
1147 swap (data[ 4], data[ 6], 2, 0x33333333);
1148 swap (data[ 5], data[ 7], 2, 0x33333333);
1149 swap (data[ 4], data[ 5], 1, 0x55555555);
1150 swap (data[ 6], data[ 7], 1, 0x55555555);
1151 swap (data[ 8], data[12], 4, 0x0f0f0f0f);
1152 swap (data[ 9], data[13], 4, 0x0f0f0f0f);
1153 swap (data[10], data[14], 4, 0x0f0f0f0f);
1154 swap (data[11], data[15], 4, 0x0f0f0f0f);
1155 swap (data[ 8], data[10], 2, 0x33333333);
1156 swap (data[ 9], data[11], 2, 0x33333333);
1157 swap (data[ 8], data[ 9], 1, 0x55555555);
1158 swap (data[10], data[11], 1, 0x55555555);
1159 swap (data[12], data[14], 2, 0x33333333);
1160 swap (data[13], data[15], 2, 0x33333333);
1161 swap (data[12], data[13], 1, 0x55555555);
1162 swap (data[14], data[15], 1, 0x55555555);
1163 swap (data[16], data[24], 8, 0x00ff00ff);
1164 swap (data[17], data[25], 8, 0x00ff00ff);
1165 swap (data[18], data[26], 8, 0x00ff00ff);
1166 swap (data[19], data[27], 8, 0x00ff00ff);
1167 swap (data[20], data[28], 8, 0x00ff00ff);
1168 swap (data[21], data[29], 8, 0x00ff00ff);
1169 swap (data[22], data[30], 8, 0x00ff00ff);
1170 swap (data[23], data[31], 8, 0x00ff00ff);
1171 swap (data[16], data[20], 4, 0x0f0f0f0f);
1172 swap (data[17], data[21], 4, 0x0f0f0f0f);
1173 swap (data[18], data[22], 4, 0x0f0f0f0f);
1174 swap (data[19], data[23], 4, 0x0f0f0f0f);
1175 swap (data[16], data[18], 2, 0x33333333);
1176 swap (data[17], data[19], 2, 0x33333333);
1177 swap (data[16], data[17], 1, 0x55555555);
1178 swap (data[18], data[19], 1, 0x55555555);
1179 swap (data[20], data[22], 2, 0x33333333);
1180 swap (data[21], data[23], 2, 0x33333333);
1181 swap (data[20], data[21], 1, 0x55555555);
1182 swap (data[22], data[23], 1, 0x55555555);
1183 swap (data[24], data[28], 4, 0x0f0f0f0f);
1184 swap (data[25], data[29], 4, 0x0f0f0f0f);
1185 swap (data[26], data[30], 4, 0x0f0f0f0f);
1186 swap (data[27], data[31], 4, 0x0f0f0f0f);
1187 swap (data[24], data[26], 2, 0x33333333);
1188 swap (data[25], data[27], 2, 0x33333333);
1189 swap (data[24], data[25], 1, 0x55555555);
1190 swap (data[26], data[27], 1, 0x55555555);
1191 swap (data[28], data[30], 2, 0x33333333);
1192 swap (data[29], data[31], 2, 0x33333333);
1193 swap (data[28], data[29], 1, 0x55555555);
1194 swap (data[30], data[31], 1, 0x55555555);
1197 __device__ static void m01500m (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
1203 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
1204 const u32 lid = threadIdx.x;
1210 const u32 salt = salt_bufs[salt_pos].salt_buf[0];
1216 const u32 w0s = (pws[gid].i[0] << 1) & 0xfefefefe;
1217 const u32 w1s = (pws[gid].i[1] << 1) & 0xfefefefe;
1219 const u32 K00 = -((w0s >> ( 0 + 7)) & 1);
1220 const u32 K01 = -((w0s >> ( 0 + 6)) & 1);
1221 const u32 K02 = -((w0s >> ( 0 + 5)) & 1);
1222 const u32 K03 = -((w0s >> ( 0 + 4)) & 1);
1223 const u32 K04 = -((w0s >> ( 0 + 3)) & 1);
1224 const u32 K05 = -((w0s >> ( 0 + 2)) & 1);
1225 const u32 K06 = -((w0s >> ( 0 + 1)) & 1);
1226 const u32 K07 = -((w0s >> ( 8 + 7)) & 1);
1227 const u32 K08 = -((w0s >> ( 8 + 6)) & 1);
1228 const u32 K09 = -((w0s >> ( 8 + 5)) & 1);
1229 const u32 K10 = -((w0s >> ( 8 + 4)) & 1);
1230 const u32 K11 = -((w0s >> ( 8 + 3)) & 1);
1231 const u32 K12 = -((w0s >> ( 8 + 2)) & 1);
1232 const u32 K13 = -((w0s >> ( 8 + 1)) & 1);
1233 const u32 K14 = -((w0s >> (16 + 7)) & 1);
1234 const u32 K15 = -((w0s >> (16 + 6)) & 1);
1235 const u32 K16 = -((w0s >> (16 + 5)) & 1);
1236 const u32 K17 = -((w0s >> (16 + 4)) & 1);
1237 const u32 K18 = -((w0s >> (16 + 3)) & 1);
1238 const u32 K19 = -((w0s >> (16 + 2)) & 1);
1239 const u32 K20 = -((w0s >> (16 + 1)) & 1);
1240 const u32 K21 = -((w0s >> (24 + 7)) & 1);
1241 const u32 K22 = -((w0s >> (24 + 6)) & 1);
1242 const u32 K23 = -((w0s >> (24 + 5)) & 1);
1243 const u32 K24 = -((w0s >> (24 + 4)) & 1);
1244 const u32 K25 = -((w0s >> (24 + 3)) & 1);
1245 const u32 K26 = -((w0s >> (24 + 2)) & 1);
1246 const u32 K27 = -((w0s >> (24 + 1)) & 1);
1247 const u32 K28 = -((w1s >> ( 0 + 7)) & 1);
1248 const u32 K29 = -((w1s >> ( 0 + 6)) & 1);
1249 const u32 K30 = -((w1s >> ( 0 + 5)) & 1);
1250 const u32 K31 = -((w1s >> ( 0 + 4)) & 1);
1251 const u32 K32 = -((w1s >> ( 0 + 3)) & 1);
1252 const u32 K33 = -((w1s >> ( 0 + 2)) & 1);
1253 const u32 K34 = -((w1s >> ( 0 + 1)) & 1);
1254 const u32 K35 = -((w1s >> ( 8 + 7)) & 1);
1255 const u32 K36 = -((w1s >> ( 8 + 6)) & 1);
1256 const u32 K37 = -((w1s >> ( 8 + 5)) & 1);
1257 const u32 K38 = -((w1s >> ( 8 + 4)) & 1);
1258 const u32 K39 = -((w1s >> ( 8 + 3)) & 1);
1259 const u32 K40 = -((w1s >> ( 8 + 2)) & 1);
1260 const u32 K41 = -((w1s >> ( 8 + 1)) & 1);
1261 const u32 K42 = -((w1s >> (16 + 7)) & 1);
1262 const u32 K43 = -((w1s >> (16 + 6)) & 1);
1263 const u32 K44 = -((w1s >> (16 + 5)) & 1);
1264 const u32 K45 = -((w1s >> (16 + 4)) & 1);
1265 const u32 K46 = -((w1s >> (16 + 3)) & 1);
1266 const u32 K47 = -((w1s >> (16 + 2)) & 1);
1267 const u32 K48 = -((w1s >> (16 + 1)) & 1);
1268 const u32 K49 = -((w1s >> (24 + 7)) & 1);
1269 const u32 K50 = -((w1s >> (24 + 6)) & 1);
1270 const u32 K51 = -((w1s >> (24 + 5)) & 1);
1271 const u32 K52 = -((w1s >> (24 + 4)) & 1);
1272 const u32 K53 = -((w1s >> (24 + 3)) & 1);
1273 const u32 K54 = -((w1s >> (24 + 2)) & 1);
1274 const u32 K55 = -((w1s >> (24 + 1)) & 1);
1280 const u32 bf_loops = bfs_cnt;
1282 for (u32 il_pos = 0, pc_pos = 0; il_pos < bf_loops; il_pos += 32, pc_pos++)
1313 k00 |= c_tm[pc_pos].b[ 0];
1314 k01 |= c_tm[pc_pos].b[ 1];
1315 k02 |= c_tm[pc_pos].b[ 2];
1316 k03 |= c_tm[pc_pos].b[ 3];
1317 k04 |= c_tm[pc_pos].b[ 4];
1318 k05 |= c_tm[pc_pos].b[ 5];
1319 k06 |= c_tm[pc_pos].b[ 6];
1320 k07 |= c_tm[pc_pos].b[ 7];
1321 k08 |= c_tm[pc_pos].b[ 8];
1322 k09 |= c_tm[pc_pos].b[ 9];
1323 k10 |= c_tm[pc_pos].b[10];
1324 k11 |= c_tm[pc_pos].b[11];
1325 k12 |= c_tm[pc_pos].b[12];
1326 k13 |= c_tm[pc_pos].b[13];
1327 k14 |= c_tm[pc_pos].b[14];
1328 k15 |= c_tm[pc_pos].b[15];
1329 k16 |= c_tm[pc_pos].b[16];
1330 k17 |= c_tm[pc_pos].b[17];
1331 k18 |= c_tm[pc_pos].b[18];
1332 k19 |= c_tm[pc_pos].b[19];
1333 k20 |= c_tm[pc_pos].b[20];
1334 k21 |= c_tm[pc_pos].b[21];
1335 k22 |= c_tm[pc_pos].b[22];
1336 k23 |= c_tm[pc_pos].b[23];
1337 k24 |= c_tm[pc_pos].b[24];
1338 k25 |= c_tm[pc_pos].b[25];
1339 k26 |= c_tm[pc_pos].b[26];
1340 k27 |= c_tm[pc_pos].b[27];
1410 k00, k01, k02, k03, k04, k05, k06,
1411 k07, k08, k09, k10, k11, k12, k13,
1412 k14, k15, k16, k17, k18, k19, k20,
1413 k21, k22, k23, k24, k25, k26, k27,
1414 K28, K29, K30, K31, K32, K33, K34,
1415 K35, K36, K37, K38, K39, K40, K41,
1416 K42, K43, K44, K45, K46, K47, K48,
1417 K49, K50, K51, K52, K53, K54, K55,
1418 D00, D01, D02, D03, D04, D05, D06, D07,
1419 D08, D09, D10, D11, D12, D13, D14, D15,
1420 D16, D17, D18, D19, D20, D21, D22, D23,
1421 D24, D25, D26, D27, D28, D29, D30, D31,
1422 D32, D33, D34, D35, D36, D37, D38, D39,
1423 D40, D41, D42, D43, D44, D45, D46, D47,
1424 D48, D49, D50, D51, D52, D53, D54, D55,
1425 D56, D57, D58, D59, D60, D61, D62, D63
1495 if (digests_cnt < 16)
1497 for (u32 d = 0; d < digests_cnt; d++)
1499 const u32 final_hash_pos = digests_offset + d;
1501 if (hashes_shown[final_hash_pos]) continue;
1505 search[0] = digests_buf[final_hash_pos].digest_buf[DGST_R0];
1506 search[1] = digests_buf[final_hash_pos].digest_buf[DGST_R1];
1511 for (int i = 0; i < 32; i++)
1513 const u32 b0 = -((search[0] >> i) & 1);
1514 const u32 b1 = -((search[1] >> i) & 1);
1516 tmpResult |= out[ 0 + i] ^ b0;
1517 tmpResult |= out[32 + i] ^ b1;
1520 if (tmpResult == 0xffffffff) continue;
1522 const u32 slice = 31 - __clz (~tmpResult);
1524 const u32x r0 = search[0];
1525 const u32x r1 = search[1];
1529 #include VECT_COMPARE_M
1538 for (int i = 0; i < 32; i++)
1540 out0[i] = out[ 0 + 31 - i];
1541 out1[i] = out[32 + 31 - i];
1544 transpose32c (out0);
1545 transpose32c (out1);
1548 for (int slice = 0; slice < 32; slice++)
1550 const u32x r0 = out0[31 - slice];
1551 const u32x r1 = out1[31 - slice];
1555 #include VECT_COMPARE_M
1561 __device__ static void m01500s (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
1567 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
1568 const u32 lid = threadIdx.x;
1574 const u32 salt = salt_bufs[salt_pos].salt_buf[0];
1649 const u32 w0s = (pws[gid].i[0] << 1) & 0xfefefefe;
1650 const u32 w1s = (pws[gid].i[1] << 1) & 0xfefefefe;
1652 const u32 K00 = -((w0s >> ( 0 + 7)) & 1);
1653 const u32 K01 = -((w0s >> ( 0 + 6)) & 1);
1654 const u32 K02 = -((w0s >> ( 0 + 5)) & 1);
1655 const u32 K03 = -((w0s >> ( 0 + 4)) & 1);
1656 const u32 K04 = -((w0s >> ( 0 + 3)) & 1);
1657 const u32 K05 = -((w0s >> ( 0 + 2)) & 1);
1658 const u32 K06 = -((w0s >> ( 0 + 1)) & 1);
1659 const u32 K07 = -((w0s >> ( 8 + 7)) & 1);
1660 const u32 K08 = -((w0s >> ( 8 + 6)) & 1);
1661 const u32 K09 = -((w0s >> ( 8 + 5)) & 1);
1662 const u32 K10 = -((w0s >> ( 8 + 4)) & 1);
1663 const u32 K11 = -((w0s >> ( 8 + 3)) & 1);
1664 const u32 K12 = -((w0s >> ( 8 + 2)) & 1);
1665 const u32 K13 = -((w0s >> ( 8 + 1)) & 1);
1666 const u32 K14 = -((w0s >> (16 + 7)) & 1);
1667 const u32 K15 = -((w0s >> (16 + 6)) & 1);
1668 const u32 K16 = -((w0s >> (16 + 5)) & 1);
1669 const u32 K17 = -((w0s >> (16 + 4)) & 1);
1670 const u32 K18 = -((w0s >> (16 + 3)) & 1);
1671 const u32 K19 = -((w0s >> (16 + 2)) & 1);
1672 const u32 K20 = -((w0s >> (16 + 1)) & 1);
1673 const u32 K21 = -((w0s >> (24 + 7)) & 1);
1674 const u32 K22 = -((w0s >> (24 + 6)) & 1);
1675 const u32 K23 = -((w0s >> (24 + 5)) & 1);
1676 const u32 K24 = -((w0s >> (24 + 4)) & 1);
1677 const u32 K25 = -((w0s >> (24 + 3)) & 1);
1678 const u32 K26 = -((w0s >> (24 + 2)) & 1);
1679 const u32 K27 = -((w0s >> (24 + 1)) & 1);
1680 const u32 K28 = -((w1s >> ( 0 + 7)) & 1);
1681 const u32 K29 = -((w1s >> ( 0 + 6)) & 1);
1682 const u32 K30 = -((w1s >> ( 0 + 5)) & 1);
1683 const u32 K31 = -((w1s >> ( 0 + 4)) & 1);
1684 const u32 K32 = -((w1s >> ( 0 + 3)) & 1);
1685 const u32 K33 = -((w1s >> ( 0 + 2)) & 1);
1686 const u32 K34 = -((w1s >> ( 0 + 1)) & 1);
1687 const u32 K35 = -((w1s >> ( 8 + 7)) & 1);
1688 const u32 K36 = -((w1s >> ( 8 + 6)) & 1);
1689 const u32 K37 = -((w1s >> ( 8 + 5)) & 1);
1690 const u32 K38 = -((w1s >> ( 8 + 4)) & 1);
1691 const u32 K39 = -((w1s >> ( 8 + 3)) & 1);
1692 const u32 K40 = -((w1s >> ( 8 + 2)) & 1);
1693 const u32 K41 = -((w1s >> ( 8 + 1)) & 1);
1694 const u32 K42 = -((w1s >> (16 + 7)) & 1);
1695 const u32 K43 = -((w1s >> (16 + 6)) & 1);
1696 const u32 K44 = -((w1s >> (16 + 5)) & 1);
1697 const u32 K45 = -((w1s >> (16 + 4)) & 1);
1698 const u32 K46 = -((w1s >> (16 + 3)) & 1);
1699 const u32 K47 = -((w1s >> (16 + 2)) & 1);
1700 const u32 K48 = -((w1s >> (16 + 1)) & 1);
1701 const u32 K49 = -((w1s >> (24 + 7)) & 1);
1702 const u32 K50 = -((w1s >> (24 + 6)) & 1);
1703 const u32 K51 = -((w1s >> (24 + 5)) & 1);
1704 const u32 K52 = -((w1s >> (24 + 4)) & 1);
1705 const u32 K53 = -((w1s >> (24 + 3)) & 1);
1706 const u32 K54 = -((w1s >> (24 + 2)) & 1);
1707 const u32 K55 = -((w1s >> (24 + 1)) & 1);
1713 const u32 bf_loops = bfs_cnt;
1715 for (u32 il_pos = 0, pc_pos = 0; il_pos < bf_loops; il_pos += 32, pc_pos++)
1746 k00 |= c_tm[pc_pos].b[ 0];
1747 k01 |= c_tm[pc_pos].b[ 1];
1748 k02 |= c_tm[pc_pos].b[ 2];
1749 k03 |= c_tm[pc_pos].b[ 3];
1750 k04 |= c_tm[pc_pos].b[ 4];
1751 k05 |= c_tm[pc_pos].b[ 5];
1752 k06 |= c_tm[pc_pos].b[ 6];
1753 k07 |= c_tm[pc_pos].b[ 7];
1754 k08 |= c_tm[pc_pos].b[ 8];
1755 k09 |= c_tm[pc_pos].b[ 9];
1756 k10 |= c_tm[pc_pos].b[10];
1757 k11 |= c_tm[pc_pos].b[11];
1758 k12 |= c_tm[pc_pos].b[12];
1759 k13 |= c_tm[pc_pos].b[13];
1760 k14 |= c_tm[pc_pos].b[14];
1761 k15 |= c_tm[pc_pos].b[15];
1762 k16 |= c_tm[pc_pos].b[16];
1763 k17 |= c_tm[pc_pos].b[17];
1764 k18 |= c_tm[pc_pos].b[18];
1765 k19 |= c_tm[pc_pos].b[19];
1766 k20 |= c_tm[pc_pos].b[20];
1767 k21 |= c_tm[pc_pos].b[21];
1768 k22 |= c_tm[pc_pos].b[22];
1769 k23 |= c_tm[pc_pos].b[23];
1770 k24 |= c_tm[pc_pos].b[24];
1771 k25 |= c_tm[pc_pos].b[25];
1772 k26 |= c_tm[pc_pos].b[26];
1773 k27 |= c_tm[pc_pos].b[27];
1843 k00, k01, k02, k03, k04, k05, k06,
1844 k07, k08, k09, k10, k11, k12, k13,
1845 k14, k15, k16, k17, k18, k19, k20,
1846 k21, k22, k23, k24, k25, k26, k27,
1847 K28, K29, K30, K31, K32, K33, K34,
1848 K35, K36, K37, K38, K39, K40, K41,
1849 K42, K43, K44, K45, K46, K47, K48,
1850 K49, K50, K51, K52, K53, K54, K55,
1851 D00, D01, D02, D03, D04, D05, D06, D07,
1852 D08, D09, D10, D11, D12, D13, D14, D15,
1853 D16, D17, D18, D19, D20, D21, D22, D23,
1854 D24, D25, D26, D27, D28, D29, D30, D31,
1855 D32, D33, D34, D35, D36, D37, D38, D39,
1856 D40, D41, D42, D43, D44, D45, D46, D47,
1857 D48, D49, D50, D51, D52, D53, D54, D55,
1858 D56, D57, D58, D59, D60, D61, D62, D63
1863 tmpResult |= D00 ^ S00;
1864 tmpResult |= D01 ^ S01;
1865 tmpResult |= D02 ^ S02;
1866 tmpResult |= D03 ^ S03;
1867 tmpResult |= D04 ^ S04;
1868 tmpResult |= D05 ^ S05;
1869 tmpResult |= D06 ^ S06;
1870 tmpResult |= D07 ^ S07;
1871 tmpResult |= D08 ^ S08;
1872 tmpResult |= D09 ^ S09;
1873 tmpResult |= D10 ^ S10;
1874 tmpResult |= D11 ^ S11;
1875 tmpResult |= D12 ^ S12;
1876 tmpResult |= D13 ^ S13;
1877 tmpResult |= D14 ^ S14;
1878 tmpResult |= D15 ^ S15;
1879 tmpResult |= D16 ^ S16;
1880 tmpResult |= D17 ^ S17;
1881 tmpResult |= D18 ^ S18;
1882 tmpResult |= D19 ^ S19;
1883 tmpResult |= D20 ^ S20;
1884 tmpResult |= D21 ^ S21;
1885 tmpResult |= D22 ^ S22;
1886 tmpResult |= D23 ^ S23;
1887 tmpResult |= D24 ^ S24;
1888 tmpResult |= D25 ^ S25;
1889 tmpResult |= D26 ^ S26;
1890 tmpResult |= D27 ^ S27;
1891 tmpResult |= D28 ^ S28;
1892 tmpResult |= D29 ^ S29;
1893 tmpResult |= D30 ^ S30;
1894 tmpResult |= D31 ^ S31;
1895 tmpResult |= D32 ^ S32;
1896 tmpResult |= D33 ^ S33;
1897 tmpResult |= D34 ^ S34;
1898 tmpResult |= D35 ^ S35;
1899 tmpResult |= D36 ^ S36;
1900 tmpResult |= D37 ^ S37;
1901 tmpResult |= D38 ^ S38;
1902 tmpResult |= D39 ^ S39;
1903 tmpResult |= D40 ^ S40;
1904 tmpResult |= D41 ^ S41;
1905 tmpResult |= D42 ^ S42;
1906 tmpResult |= D43 ^ S43;
1907 tmpResult |= D44 ^ S44;
1908 tmpResult |= D45 ^ S45;
1909 tmpResult |= D46 ^ S46;
1910 tmpResult |= D47 ^ S47;
1912 if (tmpResult == 0xffffffff) continue;
1914 tmpResult |= D48 ^ S48;
1915 tmpResult |= D49 ^ S49;
1916 tmpResult |= D50 ^ S50;
1917 tmpResult |= D51 ^ S51;
1918 tmpResult |= D52 ^ S52;
1919 tmpResult |= D53 ^ S53;
1920 tmpResult |= D54 ^ S54;
1921 tmpResult |= D55 ^ S55;
1922 tmpResult |= D56 ^ S56;
1923 tmpResult |= D57 ^ S57;
1924 tmpResult |= D58 ^ S58;
1925 tmpResult |= D59 ^ S59;
1926 tmpResult |= D60 ^ S60;
1927 tmpResult |= D61 ^ S61;
1928 tmpResult |= D62 ^ S62;
1929 tmpResult |= D63 ^ S63;
1931 if (tmpResult == 0xffffffff) continue;
1933 const u32 slice = 31 - __clz (~tmpResult);
1935 #include VECT_COMPARE_S
1939 extern "C" __global__ void __launch_bounds__ (64, 1) m01500_tb (pw_t *pws)
1941 // not used here, inlined code
1944 extern "C" __global__ void __launch_bounds__ (32, 1) m01500_tm (const u32 *d_bfs, bs_word_t *d_tm)
1946 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
1948 const u32 block = gid / 32;
1949 const u32 slice = gid % 32;
1951 const u32 w0 = c_bfs[gid];
1953 const u32 w0s = (w0 << 1) & 0xfefefefe;
1956 for (int i = 0, j = 0; i < 32; i += 8, j += 7)
1958 atomicOr (&d_tm[block].b[j + 0], (((w0s >> (i + 7)) & 1) << slice));
1959 atomicOr (&d_tm[block].b[j + 1], (((w0s >> (i + 6)) & 1) << slice));
1960 atomicOr (&d_tm[block].b[j + 2], (((w0s >> (i + 5)) & 1) << slice));
1961 atomicOr (&d_tm[block].b[j + 3], (((w0s >> (i + 4)) & 1) << slice));
1962 atomicOr (&d_tm[block].b[j + 4], (((w0s >> (i + 3)) & 1) << slice));
1963 atomicOr (&d_tm[block].b[j + 5], (((w0s >> (i + 2)) & 1) << slice));
1964 atomicOr (&d_tm[block].b[j + 6], (((w0s >> (i + 1)) & 1) << slice));
1968 extern "C" __global__ void __launch_bounds__ (64, 1) m01500_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1974 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
1975 const u32 lid = threadIdx.x;
1977 const u32 s0 = digests_buf[digests_offset].digest_buf[0];
1978 const u32 s1 = digests_buf[digests_offset].digest_buf[1];
1982 s_S[lid] = -((s0 >> lid - 0) & 1);
1986 s_S[lid] = -((s1 >> lid - 32) & 1);
1991 if (gid >= gid_max) return;
1997 m01500m (pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
2000 extern "C" __global__ void __launch_bounds__ (64, 1) m01500_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2004 extern "C" __global__ void __launch_bounds__ (64, 1) m01500_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2008 extern "C" __global__ void __launch_bounds__ (64, 1) m01500_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2014 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
2015 const u32 lid = threadIdx.x;
2017 const u32 s0 = digests_buf[digests_offset].digest_buf[0];
2018 const u32 s1 = digests_buf[digests_offset].digest_buf[1];
2022 s_S[lid] = -((s0 >> lid - 0) & 1);
2026 s_S[lid] = -((s1 >> lid - 32) & 1);
2031 if (gid >= gid_max) return;
2037 m01500s (pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
2040 extern "C" __global__ void __launch_bounds__ (64, 1) m01500_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
2044 extern "C" __global__ void __launch_bounds__ (64, 1) m01500_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)