2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
24 #include "include/kernel_functions.c"
26 #include "common_nv.c"
29 #define VECT_COMPARE_S "check_single_vect1_comp4.c"
30 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
34 #define VECT_COMPARE_S "check_single_vect2_comp4.c"
35 #define VECT_COMPARE_M "check_multi_vect2_comp4.c"
39 #define VECT_COMPARE_S "check_single_vect4_comp4.c"
40 #define VECT_COMPARE_M "check_multi_vect4_comp4.c"
43 __device__ __constant__ comb_t c_combs[1024];
45 #define PERM_OP(a,b,tt,n,m) \
55 #define HPERM_OP(a,tt,n,m) \
61 tt = tt >> (16 + n); \
65 __device__ __constant__ u32 c_SPtrans[8][64] =
68 0x00820200, 0x00020000, 0x80800000, 0x80820200,
69 0x00800000, 0x80020200, 0x80020000, 0x80800000,
70 0x80020200, 0x00820200, 0x00820000, 0x80000200,
71 0x80800200, 0x00800000, 0x00000000, 0x80020000,
72 0x00020000, 0x80000000, 0x00800200, 0x00020200,
73 0x80820200, 0x00820000, 0x80000200, 0x00800200,
74 0x80000000, 0x00000200, 0x00020200, 0x80820000,
75 0x00000200, 0x80800200, 0x80820000, 0x00000000,
76 0x00000000, 0x80820200, 0x00800200, 0x80020000,
77 0x00820200, 0x00020000, 0x80000200, 0x00800200,
78 0x80820000, 0x00000200, 0x00020200, 0x80800000,
79 0x80020200, 0x80000000, 0x80800000, 0x00820000,
80 0x80820200, 0x00020200, 0x00820000, 0x80800200,
81 0x00800000, 0x80000200, 0x80020000, 0x00000000,
82 0x00020000, 0x00800000, 0x80800200, 0x00820200,
83 0x80000000, 0x80820000, 0x00000200, 0x80020200,
85 0x10042004, 0x00000000, 0x00042000, 0x10040000,
86 0x10000004, 0x00002004, 0x10002000, 0x00042000,
87 0x00002000, 0x10040004, 0x00000004, 0x10002000,
88 0x00040004, 0x10042000, 0x10040000, 0x00000004,
89 0x00040000, 0x10002004, 0x10040004, 0x00002000,
90 0x00042004, 0x10000000, 0x00000000, 0x00040004,
91 0x10002004, 0x00042004, 0x10042000, 0x10000004,
92 0x10000000, 0x00040000, 0x00002004, 0x10042004,
93 0x00040004, 0x10042000, 0x10002000, 0x00042004,
94 0x10042004, 0x00040004, 0x10000004, 0x00000000,
95 0x10000000, 0x00002004, 0x00040000, 0x10040004,
96 0x00002000, 0x10000000, 0x00042004, 0x10002004,
97 0x10042000, 0x00002000, 0x00000000, 0x10000004,
98 0x00000004, 0x10042004, 0x00042000, 0x10040000,
99 0x10040004, 0x00040000, 0x00002004, 0x10002000,
100 0x10002004, 0x00000004, 0x10040000, 0x00042000,
102 0x41000000, 0x01010040, 0x00000040, 0x41000040,
103 0x40010000, 0x01000000, 0x41000040, 0x00010040,
104 0x01000040, 0x00010000, 0x01010000, 0x40000000,
105 0x41010040, 0x40000040, 0x40000000, 0x41010000,
106 0x00000000, 0x40010000, 0x01010040, 0x00000040,
107 0x40000040, 0x41010040, 0x00010000, 0x41000000,
108 0x41010000, 0x01000040, 0x40010040, 0x01010000,
109 0x00010040, 0x00000000, 0x01000000, 0x40010040,
110 0x01010040, 0x00000040, 0x40000000, 0x00010000,
111 0x40000040, 0x40010000, 0x01010000, 0x41000040,
112 0x00000000, 0x01010040, 0x00010040, 0x41010000,
113 0x40010000, 0x01000000, 0x41010040, 0x40000000,
114 0x40010040, 0x41000000, 0x01000000, 0x41010040,
115 0x00010000, 0x01000040, 0x41000040, 0x00010040,
116 0x01000040, 0x00000000, 0x41010000, 0x40000040,
117 0x41000000, 0x40010040, 0x00000040, 0x01010000,
119 0x00100402, 0x04000400, 0x00000002, 0x04100402,
120 0x00000000, 0x04100000, 0x04000402, 0x00100002,
121 0x04100400, 0x04000002, 0x04000000, 0x00000402,
122 0x04000002, 0x00100402, 0x00100000, 0x04000000,
123 0x04100002, 0x00100400, 0x00000400, 0x00000002,
124 0x00100400, 0x04000402, 0x04100000, 0x00000400,
125 0x00000402, 0x00000000, 0x00100002, 0x04100400,
126 0x04000400, 0x04100002, 0x04100402, 0x00100000,
127 0x04100002, 0x00000402, 0x00100000, 0x04000002,
128 0x00100400, 0x04000400, 0x00000002, 0x04100000,
129 0x04000402, 0x00000000, 0x00000400, 0x00100002,
130 0x00000000, 0x04100002, 0x04100400, 0x00000400,
131 0x04000000, 0x04100402, 0x00100402, 0x00100000,
132 0x04100402, 0x00000002, 0x04000400, 0x00100402,
133 0x00100002, 0x00100400, 0x04100000, 0x04000402,
134 0x00000402, 0x04000000, 0x04000002, 0x04100400,
136 0x02000000, 0x00004000, 0x00000100, 0x02004108,
137 0x02004008, 0x02000100, 0x00004108, 0x02004000,
138 0x00004000, 0x00000008, 0x02000008, 0x00004100,
139 0x02000108, 0x02004008, 0x02004100, 0x00000000,
140 0x00004100, 0x02000000, 0x00004008, 0x00000108,
141 0x02000100, 0x00004108, 0x00000000, 0x02000008,
142 0x00000008, 0x02000108, 0x02004108, 0x00004008,
143 0x02004000, 0x00000100, 0x00000108, 0x02004100,
144 0x02004100, 0x02000108, 0x00004008, 0x02004000,
145 0x00004000, 0x00000008, 0x02000008, 0x02000100,
146 0x02000000, 0x00004100, 0x02004108, 0x00000000,
147 0x00004108, 0x02000000, 0x00000100, 0x00004008,
148 0x02000108, 0x00000100, 0x00000000, 0x02004108,
149 0x02004008, 0x02004100, 0x00000108, 0x00004000,
150 0x00004100, 0x02004008, 0x02000100, 0x00000108,
151 0x00000008, 0x00004108, 0x02004000, 0x02000008,
153 0x20000010, 0x00080010, 0x00000000, 0x20080800,
154 0x00080010, 0x00000800, 0x20000810, 0x00080000,
155 0x00000810, 0x20080810, 0x00080800, 0x20000000,
156 0x20000800, 0x20000010, 0x20080000, 0x00080810,
157 0x00080000, 0x20000810, 0x20080010, 0x00000000,
158 0x00000800, 0x00000010, 0x20080800, 0x20080010,
159 0x20080810, 0x20080000, 0x20000000, 0x00000810,
160 0x00000010, 0x00080800, 0x00080810, 0x20000800,
161 0x00000810, 0x20000000, 0x20000800, 0x00080810,
162 0x20080800, 0x00080010, 0x00000000, 0x20000800,
163 0x20000000, 0x00000800, 0x20080010, 0x00080000,
164 0x00080010, 0x20080810, 0x00080800, 0x00000010,
165 0x20080810, 0x00080800, 0x00080000, 0x20000810,
166 0x20000010, 0x20080000, 0x00080810, 0x00000000,
167 0x00000800, 0x20000010, 0x20000810, 0x20080800,
168 0x20080000, 0x00000810, 0x00000010, 0x20080010,
170 0x00001000, 0x00000080, 0x00400080, 0x00400001,
171 0x00401081, 0x00001001, 0x00001080, 0x00000000,
172 0x00400000, 0x00400081, 0x00000081, 0x00401000,
173 0x00000001, 0x00401080, 0x00401000, 0x00000081,
174 0x00400081, 0x00001000, 0x00001001, 0x00401081,
175 0x00000000, 0x00400080, 0x00400001, 0x00001080,
176 0x00401001, 0x00001081, 0x00401080, 0x00000001,
177 0x00001081, 0x00401001, 0x00000080, 0x00400000,
178 0x00001081, 0x00401000, 0x00401001, 0x00000081,
179 0x00001000, 0x00000080, 0x00400000, 0x00401001,
180 0x00400081, 0x00001081, 0x00001080, 0x00000000,
181 0x00000080, 0x00400001, 0x00000001, 0x00400080,
182 0x00000000, 0x00400081, 0x00400080, 0x00001080,
183 0x00000081, 0x00001000, 0x00401081, 0x00400000,
184 0x00401080, 0x00000001, 0x00001001, 0x00401081,
185 0x00400001, 0x00401080, 0x00401000, 0x00001001,
187 0x08200020, 0x08208000, 0x00008020, 0x00000000,
188 0x08008000, 0x00200020, 0x08200000, 0x08208020,
189 0x00000020, 0x08000000, 0x00208000, 0x00008020,
190 0x00208020, 0x08008020, 0x08000020, 0x08200000,
191 0x00008000, 0x00208020, 0x00200020, 0x08008000,
192 0x08208020, 0x08000020, 0x00000000, 0x00208000,
193 0x08000000, 0x00200000, 0x08008020, 0x08200020,
194 0x00200000, 0x00008000, 0x08208000, 0x00000020,
195 0x00200000, 0x00008000, 0x08000020, 0x08208020,
196 0x00008020, 0x08000000, 0x00000000, 0x00208000,
197 0x08200020, 0x08008020, 0x08008000, 0x00200020,
198 0x08208000, 0x00000020, 0x00200020, 0x08008000,
199 0x08208020, 0x00200000, 0x08200000, 0x08000020,
200 0x00208000, 0x00008020, 0x08008020, 0x08200000,
201 0x00000020, 0x08208000, 0x00208020, 0x00000000,
202 0x08000000, 0x08200020, 0x00008000, 0x00208020
205 __device__ __constant__ u32 c_skb[8][64] =
207 /* for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 */
208 0x00000000, 0x00000010, 0x20000000, 0x20000010,
209 0x00010000, 0x00010010, 0x20010000, 0x20010010,
210 0x00000800, 0x00000810, 0x20000800, 0x20000810,
211 0x00010800, 0x00010810, 0x20010800, 0x20010810,
212 0x00000020, 0x00000030, 0x20000020, 0x20000030,
213 0x00010020, 0x00010030, 0x20010020, 0x20010030,
214 0x00000820, 0x00000830, 0x20000820, 0x20000830,
215 0x00010820, 0x00010830, 0x20010820, 0x20010830,
216 0x00080000, 0x00080010, 0x20080000, 0x20080010,
217 0x00090000, 0x00090010, 0x20090000, 0x20090010,
218 0x00080800, 0x00080810, 0x20080800, 0x20080810,
219 0x00090800, 0x00090810, 0x20090800, 0x20090810,
220 0x00080020, 0x00080030, 0x20080020, 0x20080030,
221 0x00090020, 0x00090030, 0x20090020, 0x20090030,
222 0x00080820, 0x00080830, 0x20080820, 0x20080830,
223 0x00090820, 0x00090830, 0x20090820, 0x20090830,
224 /* for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 */
225 0x00000000, 0x02000000, 0x00002000, 0x02002000,
226 0x00200000, 0x02200000, 0x00202000, 0x02202000,
227 0x00000004, 0x02000004, 0x00002004, 0x02002004,
228 0x00200004, 0x02200004, 0x00202004, 0x02202004,
229 0x00000400, 0x02000400, 0x00002400, 0x02002400,
230 0x00200400, 0x02200400, 0x00202400, 0x02202400,
231 0x00000404, 0x02000404, 0x00002404, 0x02002404,
232 0x00200404, 0x02200404, 0x00202404, 0x02202404,
233 0x10000000, 0x12000000, 0x10002000, 0x12002000,
234 0x10200000, 0x12200000, 0x10202000, 0x12202000,
235 0x10000004, 0x12000004, 0x10002004, 0x12002004,
236 0x10200004, 0x12200004, 0x10202004, 0x12202004,
237 0x10000400, 0x12000400, 0x10002400, 0x12002400,
238 0x10200400, 0x12200400, 0x10202400, 0x12202400,
239 0x10000404, 0x12000404, 0x10002404, 0x12002404,
240 0x10200404, 0x12200404, 0x10202404, 0x12202404,
241 /* for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 */
242 0x00000000, 0x00000001, 0x00040000, 0x00040001,
243 0x01000000, 0x01000001, 0x01040000, 0x01040001,
244 0x00000002, 0x00000003, 0x00040002, 0x00040003,
245 0x01000002, 0x01000003, 0x01040002, 0x01040003,
246 0x00000200, 0x00000201, 0x00040200, 0x00040201,
247 0x01000200, 0x01000201, 0x01040200, 0x01040201,
248 0x00000202, 0x00000203, 0x00040202, 0x00040203,
249 0x01000202, 0x01000203, 0x01040202, 0x01040203,
250 0x08000000, 0x08000001, 0x08040000, 0x08040001,
251 0x09000000, 0x09000001, 0x09040000, 0x09040001,
252 0x08000002, 0x08000003, 0x08040002, 0x08040003,
253 0x09000002, 0x09000003, 0x09040002, 0x09040003,
254 0x08000200, 0x08000201, 0x08040200, 0x08040201,
255 0x09000200, 0x09000201, 0x09040200, 0x09040201,
256 0x08000202, 0x08000203, 0x08040202, 0x08040203,
257 0x09000202, 0x09000203, 0x09040202, 0x09040203,
258 /* for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 */
259 0x00000000, 0x00100000, 0x00000100, 0x00100100,
260 0x00000008, 0x00100008, 0x00000108, 0x00100108,
261 0x00001000, 0x00101000, 0x00001100, 0x00101100,
262 0x00001008, 0x00101008, 0x00001108, 0x00101108,
263 0x04000000, 0x04100000, 0x04000100, 0x04100100,
264 0x04000008, 0x04100008, 0x04000108, 0x04100108,
265 0x04001000, 0x04101000, 0x04001100, 0x04101100,
266 0x04001008, 0x04101008, 0x04001108, 0x04101108,
267 0x00020000, 0x00120000, 0x00020100, 0x00120100,
268 0x00020008, 0x00120008, 0x00020108, 0x00120108,
269 0x00021000, 0x00121000, 0x00021100, 0x00121100,
270 0x00021008, 0x00121008, 0x00021108, 0x00121108,
271 0x04020000, 0x04120000, 0x04020100, 0x04120100,
272 0x04020008, 0x04120008, 0x04020108, 0x04120108,
273 0x04021000, 0x04121000, 0x04021100, 0x04121100,
274 0x04021008, 0x04121008, 0x04021108, 0x04121108,
275 /* for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 */
276 0x00000000, 0x10000000, 0x00010000, 0x10010000,
277 0x00000004, 0x10000004, 0x00010004, 0x10010004,
278 0x20000000, 0x30000000, 0x20010000, 0x30010000,
279 0x20000004, 0x30000004, 0x20010004, 0x30010004,
280 0x00100000, 0x10100000, 0x00110000, 0x10110000,
281 0x00100004, 0x10100004, 0x00110004, 0x10110004,
282 0x20100000, 0x30100000, 0x20110000, 0x30110000,
283 0x20100004, 0x30100004, 0x20110004, 0x30110004,
284 0x00001000, 0x10001000, 0x00011000, 0x10011000,
285 0x00001004, 0x10001004, 0x00011004, 0x10011004,
286 0x20001000, 0x30001000, 0x20011000, 0x30011000,
287 0x20001004, 0x30001004, 0x20011004, 0x30011004,
288 0x00101000, 0x10101000, 0x00111000, 0x10111000,
289 0x00101004, 0x10101004, 0x00111004, 0x10111004,
290 0x20101000, 0x30101000, 0x20111000, 0x30111000,
291 0x20101004, 0x30101004, 0x20111004, 0x30111004,
292 /* for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 */
293 0x00000000, 0x08000000, 0x00000008, 0x08000008,
294 0x00000400, 0x08000400, 0x00000408, 0x08000408,
295 0x00020000, 0x08020000, 0x00020008, 0x08020008,
296 0x00020400, 0x08020400, 0x00020408, 0x08020408,
297 0x00000001, 0x08000001, 0x00000009, 0x08000009,
298 0x00000401, 0x08000401, 0x00000409, 0x08000409,
299 0x00020001, 0x08020001, 0x00020009, 0x08020009,
300 0x00020401, 0x08020401, 0x00020409, 0x08020409,
301 0x02000000, 0x0A000000, 0x02000008, 0x0A000008,
302 0x02000400, 0x0A000400, 0x02000408, 0x0A000408,
303 0x02020000, 0x0A020000, 0x02020008, 0x0A020008,
304 0x02020400, 0x0A020400, 0x02020408, 0x0A020408,
305 0x02000001, 0x0A000001, 0x02000009, 0x0A000009,
306 0x02000401, 0x0A000401, 0x02000409, 0x0A000409,
307 0x02020001, 0x0A020001, 0x02020009, 0x0A020009,
308 0x02020401, 0x0A020401, 0x02020409, 0x0A020409,
309 /* for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 */
310 0x00000000, 0x00000100, 0x00080000, 0x00080100,
311 0x01000000, 0x01000100, 0x01080000, 0x01080100,
312 0x00000010, 0x00000110, 0x00080010, 0x00080110,
313 0x01000010, 0x01000110, 0x01080010, 0x01080110,
314 0x00200000, 0x00200100, 0x00280000, 0x00280100,
315 0x01200000, 0x01200100, 0x01280000, 0x01280100,
316 0x00200010, 0x00200110, 0x00280010, 0x00280110,
317 0x01200010, 0x01200110, 0x01280010, 0x01280110,
318 0x00000200, 0x00000300, 0x00080200, 0x00080300,
319 0x01000200, 0x01000300, 0x01080200, 0x01080300,
320 0x00000210, 0x00000310, 0x00080210, 0x00080310,
321 0x01000210, 0x01000310, 0x01080210, 0x01080310,
322 0x00200200, 0x00200300, 0x00280200, 0x00280300,
323 0x01200200, 0x01200300, 0x01280200, 0x01280300,
324 0x00200210, 0x00200310, 0x00280210, 0x00280310,
325 0x01200210, 0x01200310, 0x01280210, 0x01280310,
326 /* for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 */
327 0x00000000, 0x04000000, 0x00040000, 0x04040000,
328 0x00000002, 0x04000002, 0x00040002, 0x04040002,
329 0x00002000, 0x04002000, 0x00042000, 0x04042000,
330 0x00002002, 0x04002002, 0x00042002, 0x04042002,
331 0x00000020, 0x04000020, 0x00040020, 0x04040020,
332 0x00000022, 0x04000022, 0x00040022, 0x04040022,
333 0x00002020, 0x04002020, 0x00042020, 0x04042020,
334 0x00002022, 0x04002022, 0x00042022, 0x04042022,
335 0x00000800, 0x04000800, 0x00040800, 0x04040800,
336 0x00000802, 0x04000802, 0x00040802, 0x04040802,
337 0x00002800, 0x04002800, 0x00042800, 0x04042800,
338 0x00002802, 0x04002802, 0x00042802, 0x04042802,
339 0x00000820, 0x04000820, 0x00040820, 0x04040820,
340 0x00000822, 0x04000822, 0x00040822, 0x04040822,
341 0x00002820, 0x04002820, 0x00042820, 0x04042820,
342 0x00002822, 0x04002822, 0x00042822, 0x04042822
346 #define BOX(i,n,S) u32x ((S)[(n)][(i)])
350 #define BOX(i,n,S) u32x ((S)[(n)][(i).x], (S)[(n)][(i).y])
353 __device__ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], u32 s_skb[8][64])
357 PERM_OP (d, c, tt, 4, 0x0f0f0f0f);
358 HPERM_OP (c, tt, 2, 0xcccc0000);
359 HPERM_OP (d, tt, 2, 0xcccc0000);
360 PERM_OP (d, c, tt, 1, 0x55555555);
361 PERM_OP (c, d, tt, 8, 0x00ff00ff);
362 PERM_OP (d, c, tt, 1, 0x55555555);
364 d = ((d & 0x000000ff) << 16)
365 | ((d & 0x0000ff00) << 0)
366 | ((d & 0x00ff0000) >> 16)
367 | ((c & 0xf0000000) >> 4);
372 for (u32 i = 0; i < 16; i++)
374 if ((i < 2) || (i == 8) || (i == 15))
376 c = ((c >> 1) | (c << 27));
377 d = ((d >> 1) | (d << 27));
381 c = ((c >> 2) | (c << 26));
382 d = ((d >> 2) | (d << 26));
388 const u32x c00 = (c >> 0) & 0x0000003f;
389 const u32x c06 = (c >> 6) & 0x00383003;
390 const u32x c07 = (c >> 7) & 0x0000003c;
391 const u32x c13 = (c >> 13) & 0x0000060f;
392 const u32x c20 = (c >> 20) & 0x00000001;
394 u32x s = BOX (((c00 >> 0) & 0xff), 0, s_skb)
395 | BOX (((c06 >> 0) & 0xff)
396 |((c07 >> 0) & 0xff), 1, s_skb)
397 | BOX (((c13 >> 0) & 0xff)
398 |((c06 >> 8) & 0xff), 2, s_skb)
399 | BOX (((c20 >> 0) & 0xff)
401 |((c06 >> 16) & 0xff), 3, s_skb);
403 const u32x d00 = (d >> 0) & 0x00003c3f;
404 const u32x d07 = (d >> 7) & 0x00003f03;
405 const u32x d21 = (d >> 21) & 0x0000000f;
406 const u32x d22 = (d >> 22) & 0x00000030;
408 u32x t = BOX (((d00 >> 0) & 0xff), 4, s_skb)
409 | BOX (((d07 >> 0) & 0xff)
410 |((d00 >> 8) & 0xff), 5, s_skb)
411 | BOX (((d07 >> 8) & 0xff), 6, s_skb)
412 | BOX (((d21 >> 0) & 0xff)
413 |((d22 >> 0) & 0xff), 7, s_skb);
415 #if __CUDA_ARCH__ >= 200
416 Kc[i] = __byte_perm (s, t, 0x5410);
417 Kd[i] = __byte_perm (s, t, 0x7632);
419 Kc[i] = ((t << 16) | (s & 0x0000ffff));
420 Kd[i] = ((s >> 16) | (t & 0xffff0000));
425 __device__ static void _des_crypt_encrypt (u32x iv[2], u32 mask, u32x Kc[16], u32x Kd[16], u32 s_SPtrans[8][64])
427 const u32 E1 = (mask >> 2) & 0x3f0;
429 const u32 E0 = mask & 0x3f;
434 for (u32 i = 0; i < 25; i++)
437 for (u32 j = 0; j < 16; j += 2)
453 l ^= BOX (((u >> 0) & 0x3f), 0, s_SPtrans)
454 | BOX (((u >> 8) & 0x3f), 2, s_SPtrans)
455 | BOX (((u >> 16) & 0x3f), 4, s_SPtrans)
456 | BOX (((u >> 24) & 0x3f), 6, s_SPtrans)
457 | BOX (((t >> 0) & 0x3f), 1, s_SPtrans)
458 | BOX (((t >> 8) & 0x3f), 3, s_SPtrans)
459 | BOX (((t >> 16) & 0x3f), 5, s_SPtrans)
460 | BOX (((t >> 24) & 0x3f), 7, s_SPtrans);
473 r ^= BOX (((u >> 0) & 0x3f), 0, s_SPtrans)
474 | BOX (((u >> 8) & 0x3f), 2, s_SPtrans)
475 | BOX (((u >> 16) & 0x3f), 4, s_SPtrans)
476 | BOX (((u >> 24) & 0x3f), 6, s_SPtrans)
477 | BOX (((t >> 0) & 0x3f), 1, s_SPtrans)
478 | BOX (((t >> 8) & 0x3f), 3, s_SPtrans)
479 | BOX (((t >> 16) & 0x3f), 5, s_SPtrans)
480 | BOX (((t >> 24) & 0x3f), 7, s_SPtrans);
490 iv[0] = rotl32 (r, 31);
491 iv[1] = rotl32 (l, 31);
494 extern "C" __global__ void __launch_bounds__ (256, 1) m01500_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
500 const u32 lid = threadIdx.x;
506 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
510 wordl0[0] = pws[gid].i[ 0];
511 wordl0[1] = pws[gid].i[ 1];
536 const u32 pw_l_len = pws[gid].pw_len;
538 if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
540 switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len);
547 __shared__ u32 s_skb[8][64];
548 __shared__ u32 s_SPtrans[8][64];
552 s_skb[0][lid] = c_skb[0][lid];
553 s_skb[1][lid] = c_skb[1][lid];
554 s_skb[2][lid] = c_skb[2][lid];
555 s_skb[3][lid] = c_skb[3][lid];
556 s_skb[4][lid] = c_skb[4][lid];
557 s_skb[5][lid] = c_skb[5][lid];
558 s_skb[6][lid] = c_skb[6][lid];
559 s_skb[7][lid] = c_skb[7][lid];
561 s_SPtrans[0][lid] = c_SPtrans[0][lid];
562 s_SPtrans[1][lid] = c_SPtrans[1][lid];
563 s_SPtrans[2][lid] = c_SPtrans[2][lid];
564 s_SPtrans[3][lid] = c_SPtrans[3][lid];
565 s_SPtrans[4][lid] = c_SPtrans[4][lid];
566 s_SPtrans[5][lid] = c_SPtrans[5][lid];
567 s_SPtrans[6][lid] = c_SPtrans[6][lid];
568 s_SPtrans[7][lid] = c_SPtrans[7][lid];
573 if (gid >= gid_max) return;
579 const u32 mask = salt_bufs[salt_pos].salt_buf[0];
585 for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
587 const u32 pw_r_len = c_combs[il_pos].pw_len;
589 u32 pw_len = pw_l_len + pw_r_len;
591 pw_len = (pw_len >= 8) ? 8 : pw_len;
595 wordr0[0] = c_combs[il_pos].i[0];
596 wordr0[1] = c_combs[il_pos].i[1];
621 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
623 switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
628 w0[0] = wordl0[0] | wordr0[0];
629 w0[1] = wordl0[1] | wordr0[1];
656 data[0] = (w0[0] << 1) & 0xfefefefe;
657 data[1] = (w0[1] << 1) & 0xfefefefe;
662 _des_crypt_keysetup (data[0], data[1], Kc, Kd, s_skb);
666 _des_crypt_encrypt (iv, mask, Kc, Kd, s_SPtrans);
668 const u32x r0 = iv[0];
669 const u32x r1 = iv[1];
673 #include VECT_COMPARE_M
677 extern "C" __global__ void __launch_bounds__ (256, 1) m01500_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
681 extern "C" __global__ void __launch_bounds__ (256, 1) m01500_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
685 extern "C" __global__ void __launch_bounds__ (256, 1) m01500_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
691 const u32 lid = threadIdx.x;
697 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
701 wordl0[0] = pws[gid].i[ 0];
702 wordl0[1] = pws[gid].i[ 1];
727 const u32 pw_l_len = pws[gid].pw_len;
729 if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
731 switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len);
738 __shared__ u32 s_skb[8][64];
739 __shared__ u32 s_SPtrans[8][64];
743 s_skb[0][lid] = c_skb[0][lid];
744 s_skb[1][lid] = c_skb[1][lid];
745 s_skb[2][lid] = c_skb[2][lid];
746 s_skb[3][lid] = c_skb[3][lid];
747 s_skb[4][lid] = c_skb[4][lid];
748 s_skb[5][lid] = c_skb[5][lid];
749 s_skb[6][lid] = c_skb[6][lid];
750 s_skb[7][lid] = c_skb[7][lid];
752 s_SPtrans[0][lid] = c_SPtrans[0][lid];
753 s_SPtrans[1][lid] = c_SPtrans[1][lid];
754 s_SPtrans[2][lid] = c_SPtrans[2][lid];
755 s_SPtrans[3][lid] = c_SPtrans[3][lid];
756 s_SPtrans[4][lid] = c_SPtrans[4][lid];
757 s_SPtrans[5][lid] = c_SPtrans[5][lid];
758 s_SPtrans[6][lid] = c_SPtrans[6][lid];
759 s_SPtrans[7][lid] = c_SPtrans[7][lid];
764 if (gid >= gid_max) return;
770 const u32 mask = salt_bufs[salt_pos].salt_buf[0];
776 const u32 search[4] =
778 digests_buf[digests_offset].digest_buf[DGST_R0],
779 digests_buf[digests_offset].digest_buf[DGST_R1],
780 digests_buf[digests_offset].digest_buf[DGST_R2],
781 digests_buf[digests_offset].digest_buf[DGST_R3]
788 for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
790 const u32 pw_r_len = c_combs[il_pos].pw_len;
792 u32 pw_len = pw_l_len + pw_r_len;
794 pw_len = (pw_len >= 8) ? 8 : pw_len;
798 wordr0[0] = c_combs[il_pos].i[0];
799 wordr0[1] = c_combs[il_pos].i[1];
824 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
826 switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
831 w0[0] = wordl0[0] | wordr0[0];
832 w0[1] = wordl0[1] | wordr0[1];
859 data[0] = (w0[0] << 1) & 0xfefefefe;
860 data[1] = (w0[1] << 1) & 0xfefefefe;
865 _des_crypt_keysetup (data[0], data[1], Kc, Kd, s_skb);
869 _des_crypt_encrypt (iv, mask, Kc, Kd, s_SPtrans);
871 const u32x r0 = iv[0];
872 const u32x r1 = iv[1];
876 #include VECT_COMPARE_S
880 extern "C" __global__ void __launch_bounds__ (256, 1) m01500_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
884 extern "C" __global__ void __launch_bounds__ (256, 1) m01500_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)