2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
24 #include "include/kernel_functions.c"
26 #include "common_nv.c"
29 #define VECT_COMPARE_S "check_single_vect1_comp4.c"
30 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
33 #define PERM_OP(a,b,tt,n,m) \
43 #define HPERM_OP(a,tt,n,m) \
49 tt = tt >> (16 + n); \
55 PERM_OP (r, l, tt, 4, 0x0f0f0f0f); \
56 PERM_OP (l, r, tt, 16, 0x0000ffff); \
57 PERM_OP (r, l, tt, 2, 0x33333333); \
58 PERM_OP (l, r, tt, 8, 0x00ff00ff); \
59 PERM_OP (r, l, tt, 1, 0x55555555); \
64 PERM_OP (l, r, tt, 1, 0x55555555); \
65 PERM_OP (r, l, tt, 8, 0x00ff00ff); \
66 PERM_OP (l, r, tt, 2, 0x33333333); \
67 PERM_OP (r, l, tt, 16, 0x0000ffff); \
68 PERM_OP (l, r, tt, 4, 0x0f0f0f0f); \
71 __device__ __constant__ u32 c_SPtrans[8][64] =
74 0x00820200, 0x00020000, 0x80800000, 0x80820200,
75 0x00800000, 0x80020200, 0x80020000, 0x80800000,
76 0x80020200, 0x00820200, 0x00820000, 0x80000200,
77 0x80800200, 0x00800000, 0x00000000, 0x80020000,
78 0x00020000, 0x80000000, 0x00800200, 0x00020200,
79 0x80820200, 0x00820000, 0x80000200, 0x00800200,
80 0x80000000, 0x00000200, 0x00020200, 0x80820000,
81 0x00000200, 0x80800200, 0x80820000, 0x00000000,
82 0x00000000, 0x80820200, 0x00800200, 0x80020000,
83 0x00820200, 0x00020000, 0x80000200, 0x00800200,
84 0x80820000, 0x00000200, 0x00020200, 0x80800000,
85 0x80020200, 0x80000000, 0x80800000, 0x00820000,
86 0x80820200, 0x00020200, 0x00820000, 0x80800200,
87 0x00800000, 0x80000200, 0x80020000, 0x00000000,
88 0x00020000, 0x00800000, 0x80800200, 0x00820200,
89 0x80000000, 0x80820000, 0x00000200, 0x80020200,
91 0x10042004, 0x00000000, 0x00042000, 0x10040000,
92 0x10000004, 0x00002004, 0x10002000, 0x00042000,
93 0x00002000, 0x10040004, 0x00000004, 0x10002000,
94 0x00040004, 0x10042000, 0x10040000, 0x00000004,
95 0x00040000, 0x10002004, 0x10040004, 0x00002000,
96 0x00042004, 0x10000000, 0x00000000, 0x00040004,
97 0x10002004, 0x00042004, 0x10042000, 0x10000004,
98 0x10000000, 0x00040000, 0x00002004, 0x10042004,
99 0x00040004, 0x10042000, 0x10002000, 0x00042004,
100 0x10042004, 0x00040004, 0x10000004, 0x00000000,
101 0x10000000, 0x00002004, 0x00040000, 0x10040004,
102 0x00002000, 0x10000000, 0x00042004, 0x10002004,
103 0x10042000, 0x00002000, 0x00000000, 0x10000004,
104 0x00000004, 0x10042004, 0x00042000, 0x10040000,
105 0x10040004, 0x00040000, 0x00002004, 0x10002000,
106 0x10002004, 0x00000004, 0x10040000, 0x00042000,
108 0x41000000, 0x01010040, 0x00000040, 0x41000040,
109 0x40010000, 0x01000000, 0x41000040, 0x00010040,
110 0x01000040, 0x00010000, 0x01010000, 0x40000000,
111 0x41010040, 0x40000040, 0x40000000, 0x41010000,
112 0x00000000, 0x40010000, 0x01010040, 0x00000040,
113 0x40000040, 0x41010040, 0x00010000, 0x41000000,
114 0x41010000, 0x01000040, 0x40010040, 0x01010000,
115 0x00010040, 0x00000000, 0x01000000, 0x40010040,
116 0x01010040, 0x00000040, 0x40000000, 0x00010000,
117 0x40000040, 0x40010000, 0x01010000, 0x41000040,
118 0x00000000, 0x01010040, 0x00010040, 0x41010000,
119 0x40010000, 0x01000000, 0x41010040, 0x40000000,
120 0x40010040, 0x41000000, 0x01000000, 0x41010040,
121 0x00010000, 0x01000040, 0x41000040, 0x00010040,
122 0x01000040, 0x00000000, 0x41010000, 0x40000040,
123 0x41000000, 0x40010040, 0x00000040, 0x01010000,
125 0x00100402, 0x04000400, 0x00000002, 0x04100402,
126 0x00000000, 0x04100000, 0x04000402, 0x00100002,
127 0x04100400, 0x04000002, 0x04000000, 0x00000402,
128 0x04000002, 0x00100402, 0x00100000, 0x04000000,
129 0x04100002, 0x00100400, 0x00000400, 0x00000002,
130 0x00100400, 0x04000402, 0x04100000, 0x00000400,
131 0x00000402, 0x00000000, 0x00100002, 0x04100400,
132 0x04000400, 0x04100002, 0x04100402, 0x00100000,
133 0x04100002, 0x00000402, 0x00100000, 0x04000002,
134 0x00100400, 0x04000400, 0x00000002, 0x04100000,
135 0x04000402, 0x00000000, 0x00000400, 0x00100002,
136 0x00000000, 0x04100002, 0x04100400, 0x00000400,
137 0x04000000, 0x04100402, 0x00100402, 0x00100000,
138 0x04100402, 0x00000002, 0x04000400, 0x00100402,
139 0x00100002, 0x00100400, 0x04100000, 0x04000402,
140 0x00000402, 0x04000000, 0x04000002, 0x04100400,
142 0x02000000, 0x00004000, 0x00000100, 0x02004108,
143 0x02004008, 0x02000100, 0x00004108, 0x02004000,
144 0x00004000, 0x00000008, 0x02000008, 0x00004100,
145 0x02000108, 0x02004008, 0x02004100, 0x00000000,
146 0x00004100, 0x02000000, 0x00004008, 0x00000108,
147 0x02000100, 0x00004108, 0x00000000, 0x02000008,
148 0x00000008, 0x02000108, 0x02004108, 0x00004008,
149 0x02004000, 0x00000100, 0x00000108, 0x02004100,
150 0x02004100, 0x02000108, 0x00004008, 0x02004000,
151 0x00004000, 0x00000008, 0x02000008, 0x02000100,
152 0x02000000, 0x00004100, 0x02004108, 0x00000000,
153 0x00004108, 0x02000000, 0x00000100, 0x00004008,
154 0x02000108, 0x00000100, 0x00000000, 0x02004108,
155 0x02004008, 0x02004100, 0x00000108, 0x00004000,
156 0x00004100, 0x02004008, 0x02000100, 0x00000108,
157 0x00000008, 0x00004108, 0x02004000, 0x02000008,
159 0x20000010, 0x00080010, 0x00000000, 0x20080800,
160 0x00080010, 0x00000800, 0x20000810, 0x00080000,
161 0x00000810, 0x20080810, 0x00080800, 0x20000000,
162 0x20000800, 0x20000010, 0x20080000, 0x00080810,
163 0x00080000, 0x20000810, 0x20080010, 0x00000000,
164 0x00000800, 0x00000010, 0x20080800, 0x20080010,
165 0x20080810, 0x20080000, 0x20000000, 0x00000810,
166 0x00000010, 0x00080800, 0x00080810, 0x20000800,
167 0x00000810, 0x20000000, 0x20000800, 0x00080810,
168 0x20080800, 0x00080010, 0x00000000, 0x20000800,
169 0x20000000, 0x00000800, 0x20080010, 0x00080000,
170 0x00080010, 0x20080810, 0x00080800, 0x00000010,
171 0x20080810, 0x00080800, 0x00080000, 0x20000810,
172 0x20000010, 0x20080000, 0x00080810, 0x00000000,
173 0x00000800, 0x20000010, 0x20000810, 0x20080800,
174 0x20080000, 0x00000810, 0x00000010, 0x20080010,
176 0x00001000, 0x00000080, 0x00400080, 0x00400001,
177 0x00401081, 0x00001001, 0x00001080, 0x00000000,
178 0x00400000, 0x00400081, 0x00000081, 0x00401000,
179 0x00000001, 0x00401080, 0x00401000, 0x00000081,
180 0x00400081, 0x00001000, 0x00001001, 0x00401081,
181 0x00000000, 0x00400080, 0x00400001, 0x00001080,
182 0x00401001, 0x00001081, 0x00401080, 0x00000001,
183 0x00001081, 0x00401001, 0x00000080, 0x00400000,
184 0x00001081, 0x00401000, 0x00401001, 0x00000081,
185 0x00001000, 0x00000080, 0x00400000, 0x00401001,
186 0x00400081, 0x00001081, 0x00001080, 0x00000000,
187 0x00000080, 0x00400001, 0x00000001, 0x00400080,
188 0x00000000, 0x00400081, 0x00400080, 0x00001080,
189 0x00000081, 0x00001000, 0x00401081, 0x00400000,
190 0x00401080, 0x00000001, 0x00001001, 0x00401081,
191 0x00400001, 0x00401080, 0x00401000, 0x00001001,
193 0x08200020, 0x08208000, 0x00008020, 0x00000000,
194 0x08008000, 0x00200020, 0x08200000, 0x08208020,
195 0x00000020, 0x08000000, 0x00208000, 0x00008020,
196 0x00208020, 0x08008020, 0x08000020, 0x08200000,
197 0x00008000, 0x00208020, 0x00200020, 0x08008000,
198 0x08208020, 0x08000020, 0x00000000, 0x00208000,
199 0x08000000, 0x00200000, 0x08008020, 0x08200020,
200 0x00200000, 0x00008000, 0x08208000, 0x00000020,
201 0x00200000, 0x00008000, 0x08000020, 0x08208020,
202 0x00008020, 0x08000000, 0x00000000, 0x00208000,
203 0x08200020, 0x08008020, 0x08008000, 0x00200020,
204 0x08208000, 0x00000020, 0x00200020, 0x08008000,
205 0x08208020, 0x00200000, 0x08200000, 0x08000020,
206 0x00208000, 0x00008020, 0x08008020, 0x08200000,
207 0x00000020, 0x08208000, 0x00208020, 0x00000000,
208 0x08000000, 0x08200020, 0x00008000, 0x00208020
211 __device__ __constant__ u32 c_skb[8][64] =
213 /* for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 */
214 0x00000000, 0x00000010, 0x20000000, 0x20000010,
215 0x00010000, 0x00010010, 0x20010000, 0x20010010,
216 0x00000800, 0x00000810, 0x20000800, 0x20000810,
217 0x00010800, 0x00010810, 0x20010800, 0x20010810,
218 0x00000020, 0x00000030, 0x20000020, 0x20000030,
219 0x00010020, 0x00010030, 0x20010020, 0x20010030,
220 0x00000820, 0x00000830, 0x20000820, 0x20000830,
221 0x00010820, 0x00010830, 0x20010820, 0x20010830,
222 0x00080000, 0x00080010, 0x20080000, 0x20080010,
223 0x00090000, 0x00090010, 0x20090000, 0x20090010,
224 0x00080800, 0x00080810, 0x20080800, 0x20080810,
225 0x00090800, 0x00090810, 0x20090800, 0x20090810,
226 0x00080020, 0x00080030, 0x20080020, 0x20080030,
227 0x00090020, 0x00090030, 0x20090020, 0x20090030,
228 0x00080820, 0x00080830, 0x20080820, 0x20080830,
229 0x00090820, 0x00090830, 0x20090820, 0x20090830,
230 /* for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 */
231 0x00000000, 0x02000000, 0x00002000, 0x02002000,
232 0x00200000, 0x02200000, 0x00202000, 0x02202000,
233 0x00000004, 0x02000004, 0x00002004, 0x02002004,
234 0x00200004, 0x02200004, 0x00202004, 0x02202004,
235 0x00000400, 0x02000400, 0x00002400, 0x02002400,
236 0x00200400, 0x02200400, 0x00202400, 0x02202400,
237 0x00000404, 0x02000404, 0x00002404, 0x02002404,
238 0x00200404, 0x02200404, 0x00202404, 0x02202404,
239 0x10000000, 0x12000000, 0x10002000, 0x12002000,
240 0x10200000, 0x12200000, 0x10202000, 0x12202000,
241 0x10000004, 0x12000004, 0x10002004, 0x12002004,
242 0x10200004, 0x12200004, 0x10202004, 0x12202004,
243 0x10000400, 0x12000400, 0x10002400, 0x12002400,
244 0x10200400, 0x12200400, 0x10202400, 0x12202400,
245 0x10000404, 0x12000404, 0x10002404, 0x12002404,
246 0x10200404, 0x12200404, 0x10202404, 0x12202404,
247 /* for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 */
248 0x00000000, 0x00000001, 0x00040000, 0x00040001,
249 0x01000000, 0x01000001, 0x01040000, 0x01040001,
250 0x00000002, 0x00000003, 0x00040002, 0x00040003,
251 0x01000002, 0x01000003, 0x01040002, 0x01040003,
252 0x00000200, 0x00000201, 0x00040200, 0x00040201,
253 0x01000200, 0x01000201, 0x01040200, 0x01040201,
254 0x00000202, 0x00000203, 0x00040202, 0x00040203,
255 0x01000202, 0x01000203, 0x01040202, 0x01040203,
256 0x08000000, 0x08000001, 0x08040000, 0x08040001,
257 0x09000000, 0x09000001, 0x09040000, 0x09040001,
258 0x08000002, 0x08000003, 0x08040002, 0x08040003,
259 0x09000002, 0x09000003, 0x09040002, 0x09040003,
260 0x08000200, 0x08000201, 0x08040200, 0x08040201,
261 0x09000200, 0x09000201, 0x09040200, 0x09040201,
262 0x08000202, 0x08000203, 0x08040202, 0x08040203,
263 0x09000202, 0x09000203, 0x09040202, 0x09040203,
264 /* for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 */
265 0x00000000, 0x00100000, 0x00000100, 0x00100100,
266 0x00000008, 0x00100008, 0x00000108, 0x00100108,
267 0x00001000, 0x00101000, 0x00001100, 0x00101100,
268 0x00001008, 0x00101008, 0x00001108, 0x00101108,
269 0x04000000, 0x04100000, 0x04000100, 0x04100100,
270 0x04000008, 0x04100008, 0x04000108, 0x04100108,
271 0x04001000, 0x04101000, 0x04001100, 0x04101100,
272 0x04001008, 0x04101008, 0x04001108, 0x04101108,
273 0x00020000, 0x00120000, 0x00020100, 0x00120100,
274 0x00020008, 0x00120008, 0x00020108, 0x00120108,
275 0x00021000, 0x00121000, 0x00021100, 0x00121100,
276 0x00021008, 0x00121008, 0x00021108, 0x00121108,
277 0x04020000, 0x04120000, 0x04020100, 0x04120100,
278 0x04020008, 0x04120008, 0x04020108, 0x04120108,
279 0x04021000, 0x04121000, 0x04021100, 0x04121100,
280 0x04021008, 0x04121008, 0x04021108, 0x04121108,
281 /* for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 */
282 0x00000000, 0x10000000, 0x00010000, 0x10010000,
283 0x00000004, 0x10000004, 0x00010004, 0x10010004,
284 0x20000000, 0x30000000, 0x20010000, 0x30010000,
285 0x20000004, 0x30000004, 0x20010004, 0x30010004,
286 0x00100000, 0x10100000, 0x00110000, 0x10110000,
287 0x00100004, 0x10100004, 0x00110004, 0x10110004,
288 0x20100000, 0x30100000, 0x20110000, 0x30110000,
289 0x20100004, 0x30100004, 0x20110004, 0x30110004,
290 0x00001000, 0x10001000, 0x00011000, 0x10011000,
291 0x00001004, 0x10001004, 0x00011004, 0x10011004,
292 0x20001000, 0x30001000, 0x20011000, 0x30011000,
293 0x20001004, 0x30001004, 0x20011004, 0x30011004,
294 0x00101000, 0x10101000, 0x00111000, 0x10111000,
295 0x00101004, 0x10101004, 0x00111004, 0x10111004,
296 0x20101000, 0x30101000, 0x20111000, 0x30111000,
297 0x20101004, 0x30101004, 0x20111004, 0x30111004,
298 /* for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 */
299 0x00000000, 0x08000000, 0x00000008, 0x08000008,
300 0x00000400, 0x08000400, 0x00000408, 0x08000408,
301 0x00020000, 0x08020000, 0x00020008, 0x08020008,
302 0x00020400, 0x08020400, 0x00020408, 0x08020408,
303 0x00000001, 0x08000001, 0x00000009, 0x08000009,
304 0x00000401, 0x08000401, 0x00000409, 0x08000409,
305 0x00020001, 0x08020001, 0x00020009, 0x08020009,
306 0x00020401, 0x08020401, 0x00020409, 0x08020409,
307 0x02000000, 0x0A000000, 0x02000008, 0x0A000008,
308 0x02000400, 0x0A000400, 0x02000408, 0x0A000408,
309 0x02020000, 0x0A020000, 0x02020008, 0x0A020008,
310 0x02020400, 0x0A020400, 0x02020408, 0x0A020408,
311 0x02000001, 0x0A000001, 0x02000009, 0x0A000009,
312 0x02000401, 0x0A000401, 0x02000409, 0x0A000409,
313 0x02020001, 0x0A020001, 0x02020009, 0x0A020009,
314 0x02020401, 0x0A020401, 0x02020409, 0x0A020409,
315 /* for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 */
316 0x00000000, 0x00000100, 0x00080000, 0x00080100,
317 0x01000000, 0x01000100, 0x01080000, 0x01080100,
318 0x00000010, 0x00000110, 0x00080010, 0x00080110,
319 0x01000010, 0x01000110, 0x01080010, 0x01080110,
320 0x00200000, 0x00200100, 0x00280000, 0x00280100,
321 0x01200000, 0x01200100, 0x01280000, 0x01280100,
322 0x00200010, 0x00200110, 0x00280010, 0x00280110,
323 0x01200010, 0x01200110, 0x01280010, 0x01280110,
324 0x00000200, 0x00000300, 0x00080200, 0x00080300,
325 0x01000200, 0x01000300, 0x01080200, 0x01080300,
326 0x00000210, 0x00000310, 0x00080210, 0x00080310,
327 0x01000210, 0x01000310, 0x01080210, 0x01080310,
328 0x00200200, 0x00200300, 0x00280200, 0x00280300,
329 0x01200200, 0x01200300, 0x01280200, 0x01280300,
330 0x00200210, 0x00200310, 0x00280210, 0x00280310,
331 0x01200210, 0x01200310, 0x01280210, 0x01280310,
332 /* for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 */
333 0x00000000, 0x04000000, 0x00040000, 0x04040000,
334 0x00000002, 0x04000002, 0x00040002, 0x04040002,
335 0x00002000, 0x04002000, 0x00042000, 0x04042000,
336 0x00002002, 0x04002002, 0x00042002, 0x04042002,
337 0x00000020, 0x04000020, 0x00040020, 0x04040020,
338 0x00000022, 0x04000022, 0x00040022, 0x04040022,
339 0x00002020, 0x04002020, 0x00042020, 0x04042020,
340 0x00002022, 0x04002022, 0x00042022, 0x04042022,
341 0x00000800, 0x04000800, 0x00040800, 0x04040800,
342 0x00000802, 0x04000802, 0x00040802, 0x04040802,
343 0x00002800, 0x04002800, 0x00042800, 0x04042800,
344 0x00002802, 0x04002802, 0x00042802, 0x04042802,
345 0x00000820, 0x04000820, 0x00040820, 0x04040820,
346 0x00000822, 0x04000822, 0x00040822, 0x04040822,
347 0x00002820, 0x04002820, 0x00042820, 0x04042820,
348 0x00002822, 0x04002822, 0x00042822, 0x04042822
352 #define BOX(i,n,S) (u32x) ((S)[(n)][(i)])
356 #define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
360 #define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
363 __device__ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], u32 s_skb[8][64])
367 PERM_OP (d, c, tt, 4, 0x0f0f0f0f);
368 HPERM_OP (c, tt, 2, 0xcccc0000);
369 HPERM_OP (d, tt, 2, 0xcccc0000);
370 PERM_OP (d, c, tt, 1, 0x55555555);
371 PERM_OP (c, d, tt, 8, 0x00ff00ff);
372 PERM_OP (d, c, tt, 1, 0x55555555);
374 d = ((d & 0x000000ff) << 16)
375 | ((d & 0x0000ff00) << 0)
376 | ((d & 0x00ff0000) >> 16)
377 | ((c & 0xf0000000) >> 4);
382 for (u32 i = 0; i < 16; i++)
384 const u32 shifts3s0[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 };
385 const u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, 26, 26, 26, 27 };
387 c = c >> shifts3s0[i] | c << shifts3s1[i];
388 d = d >> shifts3s0[i] | d << shifts3s1[i];
393 const u32x c00 = (c >> 0) & 0x0000003f;
394 const u32x c06 = (c >> 6) & 0x00383003;
395 const u32x c07 = (c >> 7) & 0x0000003c;
396 const u32x c13 = (c >> 13) & 0x0000060f;
397 const u32x c20 = (c >> 20) & 0x00000001;
399 u32x s = BOX (((c00 >> 0) & 0xff), 0, s_skb)
400 | BOX (((c06 >> 0) & 0xff)
401 |((c07 >> 0) & 0xff), 1, s_skb)
402 | BOX (((c13 >> 0) & 0xff)
403 |((c06 >> 8) & 0xff), 2, s_skb)
404 | BOX (((c20 >> 0) & 0xff)
406 |((c06 >> 16) & 0xff), 3, s_skb);
408 const u32x d00 = (d >> 0) & 0x00003c3f;
409 const u32x d07 = (d >> 7) & 0x00003f03;
410 const u32x d21 = (d >> 21) & 0x0000000f;
411 const u32x d22 = (d >> 22) & 0x00000030;
413 u32x t = BOX (((d00 >> 0) & 0xff), 4, s_skb)
414 | BOX (((d07 >> 0) & 0xff)
415 |((d00 >> 8) & 0xff), 5, s_skb)
416 | BOX (((d07 >> 8) & 0xff), 6, s_skb)
417 | BOX (((d21 >> 0) & 0xff)
418 |((d22 >> 0) & 0xff), 7, s_skb);
420 Kc[i] = ((t << 16) | (s & 0x0000ffff));
421 Kd[i] = ((s >> 16) | (t & 0xffff0000));
425 __device__ static void _des_crypt_encrypt (u32x iv[2], u32 mask, u32 rounds, u32x Kc[16], u32x Kd[16], u32 s_SPtrans[8][64])
429 const u32 E0 = ((mask >> 0) & 0x003f)
430 | ((mask >> 4) & 0x3f00);
431 const u32 E1 = ((mask >> 2) & 0x03f0)
432 | ((mask >> 6) & 0xf000)
433 | ((mask >> 22) & 0x0003);
438 for (u32 i = 0; i < rounds; i++)
441 for (u32 j = 0; j < 16; j++)
444 u32x t = r ^ (r >> 16);
468 const u32x um = u & 0x3f3f3f3f;
469 const u32x tm = t & 0x3f3f3f3f;
471 l ^= BOX (((um >> 0) & 0xff), 0, s_SPtrans)
472 | BOX (((um >> 8) & 0xff), 2, s_SPtrans)
473 | BOX (((um >> 16) & 0xff), 4, s_SPtrans)
474 | BOX (((um >> 24) & 0xff), 6, s_SPtrans)
475 | BOX (((tm >> 0) & 0xff), 1, s_SPtrans)
476 | BOX (((tm >> 8) & 0xff), 3, s_SPtrans)
477 | BOX (((tm >> 16) & 0xff), 5, s_SPtrans)
478 | BOX (((tm >> 24) & 0xff), 7, s_SPtrans);
494 extern "C" __global__ void __launch_bounds__ (256, 1) m12400_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, bsdicrypt_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
500 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
501 const u32 lid = threadIdx.x;
507 __shared__ u32 s_skb[8][64];
508 __shared__ u32 s_SPtrans[8][64];
512 s_skb[0][lid] = c_skb[0][lid];
513 s_skb[1][lid] = c_skb[1][lid];
514 s_skb[2][lid] = c_skb[2][lid];
515 s_skb[3][lid] = c_skb[3][lid];
516 s_skb[4][lid] = c_skb[4][lid];
517 s_skb[5][lid] = c_skb[5][lid];
518 s_skb[6][lid] = c_skb[6][lid];
519 s_skb[7][lid] = c_skb[7][lid];
521 s_SPtrans[0][lid] = c_SPtrans[0][lid];
522 s_SPtrans[1][lid] = c_SPtrans[1][lid];
523 s_SPtrans[2][lid] = c_SPtrans[2][lid];
524 s_SPtrans[3][lid] = c_SPtrans[3][lid];
525 s_SPtrans[4][lid] = c_SPtrans[4][lid];
526 s_SPtrans[5][lid] = c_SPtrans[5][lid];
527 s_SPtrans[6][lid] = c_SPtrans[6][lid];
528 s_SPtrans[7][lid] = c_SPtrans[7][lid];
533 if (gid >= gid_max) return;
541 w[ 0] = pws[gid].i[ 0];
542 w[ 1] = pws[gid].i[ 1];
543 w[ 2] = pws[gid].i[ 2];
544 w[ 3] = pws[gid].i[ 3];
545 w[ 4] = pws[gid].i[ 4];
546 w[ 5] = pws[gid].i[ 5];
547 w[ 6] = pws[gid].i[ 6];
548 w[ 7] = pws[gid].i[ 7];
549 w[ 8] = pws[gid].i[ 8];
550 w[ 9] = pws[gid].i[ 9];
551 w[10] = pws[gid].i[10];
552 w[11] = pws[gid].i[11];
553 w[12] = pws[gid].i[12];
554 w[13] = pws[gid].i[13];
555 w[14] = pws[gid].i[14];
556 w[15] = pws[gid].i[15];
558 u32 pw_len = pws[gid].pw_len;
568 out[0] = (w[0] << 1) & 0xfefefefe;
569 out[1] = (w[1] << 1) & 0xfefefefe;
571 for (u32 i = 8, j = 2; i < pw_len; i += 8, j += 2)
573 _des_crypt_keysetup (out[0], out[1], Kc, Kd, s_skb);
575 IP (out[0], out[1], tt);
577 out[0] = rotr32 (out[0], 31);
578 out[1] = rotr32 (out[1], 31);
580 _des_crypt_encrypt (out, 0, 1, Kc, Kd, s_SPtrans);
582 out[0] = rotl32 (out[0], 31);
583 out[1] = rotl32 (out[1], 31);
585 FP (out[1], out[0], tt);
587 const u32 R = (w[j + 0] << 1) & 0xfefefefe;
588 const u32 L = (w[j + 1] << 1) & 0xfefefefe;
595 out[0] = (out[0] & 0xfefefefe) >> 1;
596 out[1] = (out[1] & 0xfefefefe) >> 1;
598 out[0] = (out[0] << 1) & 0xfefefefe;
599 out[1] = (out[1] << 1) & 0xfefefefe;
602 _des_crypt_keysetup (out[0], out[1], Kc, Kd, s_skb);
604 tmps[gid].Kc[ 0] = Kc[ 0];
605 tmps[gid].Kc[ 1] = Kc[ 1];
606 tmps[gid].Kc[ 2] = Kc[ 2];
607 tmps[gid].Kc[ 3] = Kc[ 3];
608 tmps[gid].Kc[ 4] = Kc[ 4];
609 tmps[gid].Kc[ 5] = Kc[ 5];
610 tmps[gid].Kc[ 6] = Kc[ 6];
611 tmps[gid].Kc[ 7] = Kc[ 7];
612 tmps[gid].Kc[ 8] = Kc[ 8];
613 tmps[gid].Kc[ 9] = Kc[ 9];
614 tmps[gid].Kc[10] = Kc[10];
615 tmps[gid].Kc[11] = Kc[11];
616 tmps[gid].Kc[12] = Kc[12];
617 tmps[gid].Kc[13] = Kc[13];
618 tmps[gid].Kc[14] = Kc[14];
619 tmps[gid].Kc[15] = Kc[15];
621 tmps[gid].Kd[ 0] = Kd[ 0];
622 tmps[gid].Kd[ 1] = Kd[ 1];
623 tmps[gid].Kd[ 2] = Kd[ 2];
624 tmps[gid].Kd[ 3] = Kd[ 3];
625 tmps[gid].Kd[ 4] = Kd[ 4];
626 tmps[gid].Kd[ 5] = Kd[ 5];
627 tmps[gid].Kd[ 6] = Kd[ 6];
628 tmps[gid].Kd[ 7] = Kd[ 7];
629 tmps[gid].Kd[ 8] = Kd[ 8];
630 tmps[gid].Kd[ 9] = Kd[ 9];
631 tmps[gid].Kd[10] = Kd[10];
632 tmps[gid].Kd[11] = Kd[11];
633 tmps[gid].Kd[12] = Kd[12];
634 tmps[gid].Kd[13] = Kd[13];
635 tmps[gid].Kd[14] = Kd[14];
636 tmps[gid].Kd[15] = Kd[15];
642 extern "C" __global__ void __launch_bounds__ (256, 1) m12400_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, bsdicrypt_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
648 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
649 const u32 lid = threadIdx.x;
655 __shared__ u32 s_SPtrans[8][64];
659 s_SPtrans[0][lid] = c_SPtrans[0][lid];
660 s_SPtrans[1][lid] = c_SPtrans[1][lid];
661 s_SPtrans[2][lid] = c_SPtrans[2][lid];
662 s_SPtrans[3][lid] = c_SPtrans[3][lid];
663 s_SPtrans[4][lid] = c_SPtrans[4][lid];
664 s_SPtrans[5][lid] = c_SPtrans[5][lid];
665 s_SPtrans[6][lid] = c_SPtrans[6][lid];
666 s_SPtrans[7][lid] = c_SPtrans[7][lid];
671 if (gid >= gid_max) return;
675 Kc[ 0] = tmps[gid].Kc[ 0];
676 Kc[ 1] = tmps[gid].Kc[ 1];
677 Kc[ 2] = tmps[gid].Kc[ 2];
678 Kc[ 3] = tmps[gid].Kc[ 3];
679 Kc[ 4] = tmps[gid].Kc[ 4];
680 Kc[ 5] = tmps[gid].Kc[ 5];
681 Kc[ 6] = tmps[gid].Kc[ 6];
682 Kc[ 7] = tmps[gid].Kc[ 7];
683 Kc[ 8] = tmps[gid].Kc[ 8];
684 Kc[ 9] = tmps[gid].Kc[ 9];
685 Kc[10] = tmps[gid].Kc[10];
686 Kc[11] = tmps[gid].Kc[11];
687 Kc[12] = tmps[gid].Kc[12];
688 Kc[13] = tmps[gid].Kc[13];
689 Kc[14] = tmps[gid].Kc[14];
690 Kc[15] = tmps[gid].Kc[15];
694 Kd[ 0] = tmps[gid].Kd[ 0];
695 Kd[ 1] = tmps[gid].Kd[ 1];
696 Kd[ 2] = tmps[gid].Kd[ 2];
697 Kd[ 3] = tmps[gid].Kd[ 3];
698 Kd[ 4] = tmps[gid].Kd[ 4];
699 Kd[ 5] = tmps[gid].Kd[ 5];
700 Kd[ 6] = tmps[gid].Kd[ 6];
701 Kd[ 7] = tmps[gid].Kd[ 7];
702 Kd[ 8] = tmps[gid].Kd[ 8];
703 Kd[ 9] = tmps[gid].Kd[ 9];
704 Kd[10] = tmps[gid].Kd[10];
705 Kd[11] = tmps[gid].Kd[11];
706 Kd[12] = tmps[gid].Kd[12];
707 Kd[13] = tmps[gid].Kd[13];
708 Kd[14] = tmps[gid].Kd[14];
709 Kd[15] = tmps[gid].Kd[15];
713 iv[0] = tmps[gid].iv[0];
714 iv[1] = tmps[gid].iv[1];
716 const u32 mask = salt_bufs[salt_pos].salt_buf[0];
718 _des_crypt_encrypt (iv, mask, loop_cnt, Kc, Kd, s_SPtrans);
720 tmps[gid].Kc[ 0] = Kc[ 0];
721 tmps[gid].Kc[ 1] = Kc[ 1];
722 tmps[gid].Kc[ 2] = Kc[ 2];
723 tmps[gid].Kc[ 3] = Kc[ 3];
724 tmps[gid].Kc[ 4] = Kc[ 4];
725 tmps[gid].Kc[ 5] = Kc[ 5];
726 tmps[gid].Kc[ 6] = Kc[ 6];
727 tmps[gid].Kc[ 7] = Kc[ 7];
728 tmps[gid].Kc[ 8] = Kc[ 8];
729 tmps[gid].Kc[ 9] = Kc[ 9];
730 tmps[gid].Kc[10] = Kc[10];
731 tmps[gid].Kc[11] = Kc[11];
732 tmps[gid].Kc[12] = Kc[12];
733 tmps[gid].Kc[13] = Kc[13];
734 tmps[gid].Kc[14] = Kc[14];
735 tmps[gid].Kc[15] = Kc[15];
737 tmps[gid].Kd[ 0] = Kd[ 0];
738 tmps[gid].Kd[ 1] = Kd[ 1];
739 tmps[gid].Kd[ 2] = Kd[ 2];
740 tmps[gid].Kd[ 3] = Kd[ 3];
741 tmps[gid].Kd[ 4] = Kd[ 4];
742 tmps[gid].Kd[ 5] = Kd[ 5];
743 tmps[gid].Kd[ 6] = Kd[ 6];
744 tmps[gid].Kd[ 7] = Kd[ 7];
745 tmps[gid].Kd[ 8] = Kd[ 8];
746 tmps[gid].Kd[ 9] = Kd[ 9];
747 tmps[gid].Kd[10] = Kd[10];
748 tmps[gid].Kd[11] = Kd[11];
749 tmps[gid].Kd[12] = Kd[12];
750 tmps[gid].Kd[13] = Kd[13];
751 tmps[gid].Kd[14] = Kd[14];
752 tmps[gid].Kd[15] = Kd[15];
754 tmps[gid].iv[0] = iv[0];
755 tmps[gid].iv[1] = iv[1];
758 extern "C" __global__ void __launch_bounds__ (256, 1) m12400_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, bsdicrypt_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
764 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
766 if (gid >= gid_max) return;
768 const u32 lid = threadIdx.x;
770 const u32x r0 = tmps[gid].iv[0];
771 const u32x r1 = tmps[gid].iv[1];
777 #include VECT_COMPARE_M