2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
24 #include "include/kernel_functions.c"
26 #include "common_nv.c"
29 #define VECT_COMPARE_S "check_single_vect1_comp4.c"
30 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
34 #define VECT_COMPARE_S "check_single_vect2_comp4.c"
35 #define VECT_COMPARE_M "check_multi_vect2_comp4.c"
39 #define VECT_COMPARE_S "check_single_vect2_comp4.c"
40 #define VECT_COMPARE_M "check_multi_vect2_comp4.c"
43 #define PERM_OP(a,b,tt,n,m) \
53 #define HPERM_OP(a,tt,n,m) \
59 tt = tt >> (16 + n); \
65 PERM_OP (r, l, tt, 4, 0x0f0f0f0f); \
66 PERM_OP (l, r, tt, 16, 0x0000ffff); \
67 PERM_OP (r, l, tt, 2, 0x33333333); \
68 PERM_OP (l, r, tt, 8, 0x00ff00ff); \
69 PERM_OP (r, l, tt, 1, 0x55555555); \
74 PERM_OP (l, r, tt, 1, 0x55555555); \
75 PERM_OP (r, l, tt, 8, 0x00ff00ff); \
76 PERM_OP (l, r, tt, 2, 0x33333333); \
77 PERM_OP (r, l, tt, 16, 0x0000ffff); \
78 PERM_OP (l, r, tt, 4, 0x0f0f0f0f); \
81 __device__ __constant__ u32 c_SPtrans[8][64] =
84 0x02080800, 0x00080000, 0x02000002, 0x02080802,
85 0x02000000, 0x00080802, 0x00080002, 0x02000002,
86 0x00080802, 0x02080800, 0x02080000, 0x00000802,
87 0x02000802, 0x02000000, 0x00000000, 0x00080002,
88 0x00080000, 0x00000002, 0x02000800, 0x00080800,
89 0x02080802, 0x02080000, 0x00000802, 0x02000800,
90 0x00000002, 0x00000800, 0x00080800, 0x02080002,
91 0x00000800, 0x02000802, 0x02080002, 0x00000000,
92 0x00000000, 0x02080802, 0x02000800, 0x00080002,
93 0x02080800, 0x00080000, 0x00000802, 0x02000800,
94 0x02080002, 0x00000800, 0x00080800, 0x02000002,
95 0x00080802, 0x00000002, 0x02000002, 0x02080000,
96 0x02080802, 0x00080800, 0x02080000, 0x02000802,
97 0x02000000, 0x00000802, 0x00080002, 0x00000000,
98 0x00080000, 0x02000000, 0x02000802, 0x02080800,
99 0x00000002, 0x02080002, 0x00000800, 0x00080802,
101 0x40108010, 0x00000000, 0x00108000, 0x40100000,
102 0x40000010, 0x00008010, 0x40008000, 0x00108000,
103 0x00008000, 0x40100010, 0x00000010, 0x40008000,
104 0x00100010, 0x40108000, 0x40100000, 0x00000010,
105 0x00100000, 0x40008010, 0x40100010, 0x00008000,
106 0x00108010, 0x40000000, 0x00000000, 0x00100010,
107 0x40008010, 0x00108010, 0x40108000, 0x40000010,
108 0x40000000, 0x00100000, 0x00008010, 0x40108010,
109 0x00100010, 0x40108000, 0x40008000, 0x00108010,
110 0x40108010, 0x00100010, 0x40000010, 0x00000000,
111 0x40000000, 0x00008010, 0x00100000, 0x40100010,
112 0x00008000, 0x40000000, 0x00108010, 0x40008010,
113 0x40108000, 0x00008000, 0x00000000, 0x40000010,
114 0x00000010, 0x40108010, 0x00108000, 0x40100000,
115 0x40100010, 0x00100000, 0x00008010, 0x40008000,
116 0x40008010, 0x00000010, 0x40100000, 0x00108000,
118 0x04000001, 0x04040100, 0x00000100, 0x04000101,
119 0x00040001, 0x04000000, 0x04000101, 0x00040100,
120 0x04000100, 0x00040000, 0x04040000, 0x00000001,
121 0x04040101, 0x00000101, 0x00000001, 0x04040001,
122 0x00000000, 0x00040001, 0x04040100, 0x00000100,
123 0x00000101, 0x04040101, 0x00040000, 0x04000001,
124 0x04040001, 0x04000100, 0x00040101, 0x04040000,
125 0x00040100, 0x00000000, 0x04000000, 0x00040101,
126 0x04040100, 0x00000100, 0x00000001, 0x00040000,
127 0x00000101, 0x00040001, 0x04040000, 0x04000101,
128 0x00000000, 0x04040100, 0x00040100, 0x04040001,
129 0x00040001, 0x04000000, 0x04040101, 0x00000001,
130 0x00040101, 0x04000001, 0x04000000, 0x04040101,
131 0x00040000, 0x04000100, 0x04000101, 0x00040100,
132 0x04000100, 0x00000000, 0x04040001, 0x00000101,
133 0x04000001, 0x00040101, 0x00000100, 0x04040000,
135 0x00401008, 0x10001000, 0x00000008, 0x10401008,
136 0x00000000, 0x10400000, 0x10001008, 0x00400008,
137 0x10401000, 0x10000008, 0x10000000, 0x00001008,
138 0x10000008, 0x00401008, 0x00400000, 0x10000000,
139 0x10400008, 0x00401000, 0x00001000, 0x00000008,
140 0x00401000, 0x10001008, 0x10400000, 0x00001000,
141 0x00001008, 0x00000000, 0x00400008, 0x10401000,
142 0x10001000, 0x10400008, 0x10401008, 0x00400000,
143 0x10400008, 0x00001008, 0x00400000, 0x10000008,
144 0x00401000, 0x10001000, 0x00000008, 0x10400000,
145 0x10001008, 0x00000000, 0x00001000, 0x00400008,
146 0x00000000, 0x10400008, 0x10401000, 0x00001000,
147 0x10000000, 0x10401008, 0x00401008, 0x00400000,
148 0x10401008, 0x00000008, 0x10001000, 0x00401008,
149 0x00400008, 0x00401000, 0x10400000, 0x10001008,
150 0x00001008, 0x10000000, 0x10000008, 0x10401000,
152 0x08000000, 0x00010000, 0x00000400, 0x08010420,
153 0x08010020, 0x08000400, 0x00010420, 0x08010000,
154 0x00010000, 0x00000020, 0x08000020, 0x00010400,
155 0x08000420, 0x08010020, 0x08010400, 0x00000000,
156 0x00010400, 0x08000000, 0x00010020, 0x00000420,
157 0x08000400, 0x00010420, 0x00000000, 0x08000020,
158 0x00000020, 0x08000420, 0x08010420, 0x00010020,
159 0x08010000, 0x00000400, 0x00000420, 0x08010400,
160 0x08010400, 0x08000420, 0x00010020, 0x08010000,
161 0x00010000, 0x00000020, 0x08000020, 0x08000400,
162 0x08000000, 0x00010400, 0x08010420, 0x00000000,
163 0x00010420, 0x08000000, 0x00000400, 0x00010020,
164 0x08000420, 0x00000400, 0x00000000, 0x08010420,
165 0x08010020, 0x08010400, 0x00000420, 0x00010000,
166 0x00010400, 0x08010020, 0x08000400, 0x00000420,
167 0x00000020, 0x00010420, 0x08010000, 0x08000020,
169 0x80000040, 0x00200040, 0x00000000, 0x80202000,
170 0x00200040, 0x00002000, 0x80002040, 0x00200000,
171 0x00002040, 0x80202040, 0x00202000, 0x80000000,
172 0x80002000, 0x80000040, 0x80200000, 0x00202040,
173 0x00200000, 0x80002040, 0x80200040, 0x00000000,
174 0x00002000, 0x00000040, 0x80202000, 0x80200040,
175 0x80202040, 0x80200000, 0x80000000, 0x00002040,
176 0x00000040, 0x00202000, 0x00202040, 0x80002000,
177 0x00002040, 0x80000000, 0x80002000, 0x00202040,
178 0x80202000, 0x00200040, 0x00000000, 0x80002000,
179 0x80000000, 0x00002000, 0x80200040, 0x00200000,
180 0x00200040, 0x80202040, 0x00202000, 0x00000040,
181 0x80202040, 0x00202000, 0x00200000, 0x80002040,
182 0x80000040, 0x80200000, 0x00202040, 0x00000000,
183 0x00002000, 0x80000040, 0x80002040, 0x80202000,
184 0x80200000, 0x00002040, 0x00000040, 0x80200040,
186 0x00004000, 0x00000200, 0x01000200, 0x01000004,
187 0x01004204, 0x00004004, 0x00004200, 0x00000000,
188 0x01000000, 0x01000204, 0x00000204, 0x01004000,
189 0x00000004, 0x01004200, 0x01004000, 0x00000204,
190 0x01000204, 0x00004000, 0x00004004, 0x01004204,
191 0x00000000, 0x01000200, 0x01000004, 0x00004200,
192 0x01004004, 0x00004204, 0x01004200, 0x00000004,
193 0x00004204, 0x01004004, 0x00000200, 0x01000000,
194 0x00004204, 0x01004000, 0x01004004, 0x00000204,
195 0x00004000, 0x00000200, 0x01000000, 0x01004004,
196 0x01000204, 0x00004204, 0x00004200, 0x00000000,
197 0x00000200, 0x01000004, 0x00000004, 0x01000200,
198 0x00000000, 0x01000204, 0x01000200, 0x00004200,
199 0x00000204, 0x00004000, 0x01004204, 0x01000000,
200 0x01004200, 0x00000004, 0x00004004, 0x01004204,
201 0x01000004, 0x01004200, 0x01004000, 0x00004004,
203 0x20800080, 0x20820000, 0x00020080, 0x00000000,
204 0x20020000, 0x00800080, 0x20800000, 0x20820080,
205 0x00000080, 0x20000000, 0x00820000, 0x00020080,
206 0x00820080, 0x20020080, 0x20000080, 0x20800000,
207 0x00020000, 0x00820080, 0x00800080, 0x20020000,
208 0x20820080, 0x20000080, 0x00000000, 0x00820000,
209 0x20000000, 0x00800000, 0x20020080, 0x20800080,
210 0x00800000, 0x00020000, 0x20820000, 0x00000080,
211 0x00800000, 0x00020000, 0x20000080, 0x20820080,
212 0x00020080, 0x20000000, 0x00000000, 0x00820000,
213 0x20800080, 0x20020080, 0x20020000, 0x00800080,
214 0x20820000, 0x00000080, 0x00800080, 0x20020000,
215 0x20820080, 0x00800000, 0x20800000, 0x20000080,
216 0x00820000, 0x00020080, 0x20020080, 0x20800000,
217 0x00000080, 0x20820000, 0x00820080, 0x00000000,
218 0x20000000, 0x20800080, 0x00020000, 0x00820080,
221 __device__ __constant__ u32 c_skb[8][64] =
223 /* for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 */
224 0x00000000, 0x00000010, 0x20000000, 0x20000010,
225 0x00010000, 0x00010010, 0x20010000, 0x20010010,
226 0x00000800, 0x00000810, 0x20000800, 0x20000810,
227 0x00010800, 0x00010810, 0x20010800, 0x20010810,
228 0x00000020, 0x00000030, 0x20000020, 0x20000030,
229 0x00010020, 0x00010030, 0x20010020, 0x20010030,
230 0x00000820, 0x00000830, 0x20000820, 0x20000830,
231 0x00010820, 0x00010830, 0x20010820, 0x20010830,
232 0x00080000, 0x00080010, 0x20080000, 0x20080010,
233 0x00090000, 0x00090010, 0x20090000, 0x20090010,
234 0x00080800, 0x00080810, 0x20080800, 0x20080810,
235 0x00090800, 0x00090810, 0x20090800, 0x20090810,
236 0x00080020, 0x00080030, 0x20080020, 0x20080030,
237 0x00090020, 0x00090030, 0x20090020, 0x20090030,
238 0x00080820, 0x00080830, 0x20080820, 0x20080830,
239 0x00090820, 0x00090830, 0x20090820, 0x20090830,
240 /* for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 */
241 0x00000000, 0x02000000, 0x00002000, 0x02002000,
242 0x00200000, 0x02200000, 0x00202000, 0x02202000,
243 0x00000004, 0x02000004, 0x00002004, 0x02002004,
244 0x00200004, 0x02200004, 0x00202004, 0x02202004,
245 0x00000400, 0x02000400, 0x00002400, 0x02002400,
246 0x00200400, 0x02200400, 0x00202400, 0x02202400,
247 0x00000404, 0x02000404, 0x00002404, 0x02002404,
248 0x00200404, 0x02200404, 0x00202404, 0x02202404,
249 0x10000000, 0x12000000, 0x10002000, 0x12002000,
250 0x10200000, 0x12200000, 0x10202000, 0x12202000,
251 0x10000004, 0x12000004, 0x10002004, 0x12002004,
252 0x10200004, 0x12200004, 0x10202004, 0x12202004,
253 0x10000400, 0x12000400, 0x10002400, 0x12002400,
254 0x10200400, 0x12200400, 0x10202400, 0x12202400,
255 0x10000404, 0x12000404, 0x10002404, 0x12002404,
256 0x10200404, 0x12200404, 0x10202404, 0x12202404,
257 /* for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 */
258 0x00000000, 0x00000001, 0x00040000, 0x00040001,
259 0x01000000, 0x01000001, 0x01040000, 0x01040001,
260 0x00000002, 0x00000003, 0x00040002, 0x00040003,
261 0x01000002, 0x01000003, 0x01040002, 0x01040003,
262 0x00000200, 0x00000201, 0x00040200, 0x00040201,
263 0x01000200, 0x01000201, 0x01040200, 0x01040201,
264 0x00000202, 0x00000203, 0x00040202, 0x00040203,
265 0x01000202, 0x01000203, 0x01040202, 0x01040203,
266 0x08000000, 0x08000001, 0x08040000, 0x08040001,
267 0x09000000, 0x09000001, 0x09040000, 0x09040001,
268 0x08000002, 0x08000003, 0x08040002, 0x08040003,
269 0x09000002, 0x09000003, 0x09040002, 0x09040003,
270 0x08000200, 0x08000201, 0x08040200, 0x08040201,
271 0x09000200, 0x09000201, 0x09040200, 0x09040201,
272 0x08000202, 0x08000203, 0x08040202, 0x08040203,
273 0x09000202, 0x09000203, 0x09040202, 0x09040203,
274 /* for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 */
275 0x00000000, 0x00100000, 0x00000100, 0x00100100,
276 0x00000008, 0x00100008, 0x00000108, 0x00100108,
277 0x00001000, 0x00101000, 0x00001100, 0x00101100,
278 0x00001008, 0x00101008, 0x00001108, 0x00101108,
279 0x04000000, 0x04100000, 0x04000100, 0x04100100,
280 0x04000008, 0x04100008, 0x04000108, 0x04100108,
281 0x04001000, 0x04101000, 0x04001100, 0x04101100,
282 0x04001008, 0x04101008, 0x04001108, 0x04101108,
283 0x00020000, 0x00120000, 0x00020100, 0x00120100,
284 0x00020008, 0x00120008, 0x00020108, 0x00120108,
285 0x00021000, 0x00121000, 0x00021100, 0x00121100,
286 0x00021008, 0x00121008, 0x00021108, 0x00121108,
287 0x04020000, 0x04120000, 0x04020100, 0x04120100,
288 0x04020008, 0x04120008, 0x04020108, 0x04120108,
289 0x04021000, 0x04121000, 0x04021100, 0x04121100,
290 0x04021008, 0x04121008, 0x04021108, 0x04121108,
291 /* for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 */
292 0x00000000, 0x10000000, 0x00010000, 0x10010000,
293 0x00000004, 0x10000004, 0x00010004, 0x10010004,
294 0x20000000, 0x30000000, 0x20010000, 0x30010000,
295 0x20000004, 0x30000004, 0x20010004, 0x30010004,
296 0x00100000, 0x10100000, 0x00110000, 0x10110000,
297 0x00100004, 0x10100004, 0x00110004, 0x10110004,
298 0x20100000, 0x30100000, 0x20110000, 0x30110000,
299 0x20100004, 0x30100004, 0x20110004, 0x30110004,
300 0x00001000, 0x10001000, 0x00011000, 0x10011000,
301 0x00001004, 0x10001004, 0x00011004, 0x10011004,
302 0x20001000, 0x30001000, 0x20011000, 0x30011000,
303 0x20001004, 0x30001004, 0x20011004, 0x30011004,
304 0x00101000, 0x10101000, 0x00111000, 0x10111000,
305 0x00101004, 0x10101004, 0x00111004, 0x10111004,
306 0x20101000, 0x30101000, 0x20111000, 0x30111000,
307 0x20101004, 0x30101004, 0x20111004, 0x30111004,
308 /* for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 */
309 0x00000000, 0x08000000, 0x00000008, 0x08000008,
310 0x00000400, 0x08000400, 0x00000408, 0x08000408,
311 0x00020000, 0x08020000, 0x00020008, 0x08020008,
312 0x00020400, 0x08020400, 0x00020408, 0x08020408,
313 0x00000001, 0x08000001, 0x00000009, 0x08000009,
314 0x00000401, 0x08000401, 0x00000409, 0x08000409,
315 0x00020001, 0x08020001, 0x00020009, 0x08020009,
316 0x00020401, 0x08020401, 0x00020409, 0x08020409,
317 0x02000000, 0x0A000000, 0x02000008, 0x0A000008,
318 0x02000400, 0x0A000400, 0x02000408, 0x0A000408,
319 0x02020000, 0x0A020000, 0x02020008, 0x0A020008,
320 0x02020400, 0x0A020400, 0x02020408, 0x0A020408,
321 0x02000001, 0x0A000001, 0x02000009, 0x0A000009,
322 0x02000401, 0x0A000401, 0x02000409, 0x0A000409,
323 0x02020001, 0x0A020001, 0x02020009, 0x0A020009,
324 0x02020401, 0x0A020401, 0x02020409, 0x0A020409,
325 /* for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 */
326 0x00000000, 0x00000100, 0x00080000, 0x00080100,
327 0x01000000, 0x01000100, 0x01080000, 0x01080100,
328 0x00000010, 0x00000110, 0x00080010, 0x00080110,
329 0x01000010, 0x01000110, 0x01080010, 0x01080110,
330 0x00200000, 0x00200100, 0x00280000, 0x00280100,
331 0x01200000, 0x01200100, 0x01280000, 0x01280100,
332 0x00200010, 0x00200110, 0x00280010, 0x00280110,
333 0x01200010, 0x01200110, 0x01280010, 0x01280110,
334 0x00000200, 0x00000300, 0x00080200, 0x00080300,
335 0x01000200, 0x01000300, 0x01080200, 0x01080300,
336 0x00000210, 0x00000310, 0x00080210, 0x00080310,
337 0x01000210, 0x01000310, 0x01080210, 0x01080310,
338 0x00200200, 0x00200300, 0x00280200, 0x00280300,
339 0x01200200, 0x01200300, 0x01280200, 0x01280300,
340 0x00200210, 0x00200310, 0x00280210, 0x00280310,
341 0x01200210, 0x01200310, 0x01280210, 0x01280310,
342 /* for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 */
343 0x00000000, 0x04000000, 0x00040000, 0x04040000,
344 0x00000002, 0x04000002, 0x00040002, 0x04040002,
345 0x00002000, 0x04002000, 0x00042000, 0x04042000,
346 0x00002002, 0x04002002, 0x00042002, 0x04042002,
347 0x00000020, 0x04000020, 0x00040020, 0x04040020,
348 0x00000022, 0x04000022, 0x00040022, 0x04040022,
349 0x00002020, 0x04002020, 0x00042020, 0x04042020,
350 0x00002022, 0x04002022, 0x00042022, 0x04042022,
351 0x00000800, 0x04000800, 0x00040800, 0x04040800,
352 0x00000802, 0x04000802, 0x00040802, 0x04040802,
353 0x00002800, 0x04002800, 0x00042800, 0x04042800,
354 0x00002802, 0x04002802, 0x00042802, 0x04042802,
355 0x00000820, 0x04000820, 0x00040820, 0x04040820,
356 0x00000822, 0x04000822, 0x00040822, 0x04040822,
357 0x00002820, 0x04002820, 0x00042820, 0x04042820,
358 0x00002822, 0x04002822, 0x00042822, 0x04042822
362 #define BOX(i,n,S) u32x ((S)[(n)][(i)])
366 #define BOX(i,n,S) u32x ((S)[(n)][(i).x], (S)[(n)][(i).y])
369 __device__ static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], u32 s_SPtrans[8][64])
382 for (int i = 0; i < 16; i++)
385 u32x t = Kd[i] ^ rotl32 (r, 28u);
387 l ^= BOX (((u >> 2) & 0x3f), 0, s_SPtrans)
388 | BOX (((u >> 10) & 0x3f), 2, s_SPtrans)
389 | BOX (((u >> 18) & 0x3f), 4, s_SPtrans)
390 | BOX (((u >> 26) & 0x3f), 6, s_SPtrans)
391 | BOX (((t >> 2) & 0x3f), 1, s_SPtrans)
392 | BOX (((t >> 10) & 0x3f), 3, s_SPtrans)
393 | BOX (((t >> 18) & 0x3f), 5, s_SPtrans)
394 | BOX (((t >> 26) & 0x3f), 7, s_SPtrans);
410 __device__ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], u32 s_skb[8][64])
414 PERM_OP (d, c, tt, 4, 0x0f0f0f0f);
415 HPERM_OP (c, tt, 2, 0xcccc0000);
416 HPERM_OP (d, tt, 2, 0xcccc0000);
417 PERM_OP (d, c, tt, 1, 0x55555555);
418 PERM_OP (c, d, tt, 8, 0x00ff00ff);
419 PERM_OP (d, c, tt, 1, 0x55555555);
421 d = ((d & 0x000000ff) << 16)
422 | ((d & 0x0000ff00) << 0)
423 | ((d & 0x00ff0000) >> 16)
424 | ((c & 0xf0000000) >> 4);
429 for (int i = 0; i < 16; i++)
431 const u32 shifts3s0[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 };
432 const u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, 26, 26, 26, 27 };
434 c = c >> shifts3s0[i] | c << shifts3s1[i];
435 d = d >> shifts3s0[i] | d << shifts3s1[i];
440 u32x s = BOX ((( c >> 0) & 0x3f), 0, s_skb)
441 | BOX ((((c >> 6) & 0x03)
442 | ((c >> 7) & 0x3c)), 1, s_skb)
443 | BOX ((((c >> 13) & 0x0f)
444 | ((c >> 14) & 0x30)), 2, s_skb)
445 | BOX ((((c >> 20) & 0x01)
447 | ((c >> 22) & 0x38)), 3, s_skb);
449 u32x t = BOX ((( d >> 0) & 0x3f), 4, s_skb)
450 | BOX ((((d >> 7) & 0x03)
451 | ((d >> 8) & 0x3c)), 5, s_skb)
452 | BOX ((((d >> 15) & 0x3f)), 6, s_skb)
453 | BOX ((((d >> 21) & 0x0f)
454 | ((d >> 22) & 0x30)), 7, s_skb);
456 #if __CUDA_ARCH__ >= 200
457 Kc[i] = __byte_perm (s, t, 0x5410);
458 Kd[i] = __byte_perm (s, t, 0x7632);
460 Kc[i] = ((t << 16) | (s & 0x0000ffff));
461 Kd[i] = ((s >> 16) | (t & 0xffff0000));
464 Kc[i] = rotl32 (Kc[i], 2u);
465 Kd[i] = rotl32 (Kd[i], 2u);
469 __device__ __constant__ comb_t c_combs[1024];
471 extern "C" __global__ void __launch_bounds__ (256, 1) m03100_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
477 const u32 lid = threadIdx.x;
483 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
487 wordl0[0] = pws[gid].i[ 0];
488 wordl0[1] = pws[gid].i[ 1];
489 wordl0[2] = pws[gid].i[ 2];
490 wordl0[3] = pws[gid].i[ 3];
494 wordl1[0] = pws[gid].i[ 4];
495 wordl1[1] = pws[gid].i[ 5];
496 wordl1[2] = pws[gid].i[ 6];
497 wordl1[3] = pws[gid].i[ 7];
513 const u32 pw_l_len = pws[gid].pw_len;
515 if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
517 switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len);
524 __shared__ u32 s_SPtrans[8][64];
526 __shared__ u32 s_skb[8][64];
530 s_SPtrans[0][lid] = c_SPtrans[0][lid];
531 s_SPtrans[1][lid] = c_SPtrans[1][lid];
532 s_SPtrans[2][lid] = c_SPtrans[2][lid];
533 s_SPtrans[3][lid] = c_SPtrans[3][lid];
534 s_SPtrans[4][lid] = c_SPtrans[4][lid];
535 s_SPtrans[5][lid] = c_SPtrans[5][lid];
536 s_SPtrans[6][lid] = c_SPtrans[6][lid];
537 s_SPtrans[7][lid] = c_SPtrans[7][lid];
539 s_skb[0][lid] = c_skb[0][lid];
540 s_skb[1][lid] = c_skb[1][lid];
541 s_skb[2][lid] = c_skb[2][lid];
542 s_skb[3][lid] = c_skb[3][lid];
543 s_skb[4][lid] = c_skb[4][lid];
544 s_skb[5][lid] = c_skb[5][lid];
545 s_skb[6][lid] = c_skb[6][lid];
546 s_skb[7][lid] = c_skb[7][lid];
551 if (gid >= gid_max) return;
559 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
560 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
561 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
562 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
566 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
567 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
568 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
569 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
571 const u32 salt_len = salt_bufs[salt_pos].salt_len;
577 for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
579 const u32 pw_r_len = c_combs[il_pos].pw_len;
581 const u32 pw_len = pw_l_len + pw_r_len;
583 const u32 salt_word_len = (salt_len + pw_len) * 2;
587 wordr0[0] = c_combs[il_pos].i[0];
588 wordr0[1] = c_combs[il_pos].i[1];
589 wordr0[2] = c_combs[il_pos].i[2];
590 wordr0[3] = c_combs[il_pos].i[3];
594 wordr1[0] = c_combs[il_pos].i[4];
595 wordr1[1] = c_combs[il_pos].i[5];
596 wordr1[2] = c_combs[il_pos].i[6];
597 wordr1[3] = c_combs[il_pos].i[7];
613 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
615 switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
620 w0[0] = wordl0[0] | wordr0[0];
621 w0[1] = wordl0[1] | wordr0[1];
622 w0[2] = wordl0[2] | wordr0[2];
623 w0[3] = wordl0[3] | wordr0[3];
627 w1[0] = wordl1[0] | wordr1[0];
628 w1[1] = wordl1[1] | wordr1[1];
629 w1[2] = wordl1[2] | wordr1[2];
630 w1[3] = wordl1[3] | wordr1[3];
634 w2[0] = wordl2[0] | wordr2[0];
635 w2[1] = wordl2[1] | wordr2[1];
636 w2[2] = wordl2[2] | wordr2[2];
637 w2[3] = wordl2[3] | wordr2[3];
641 w3[0] = wordl3[0] | wordr3[0];
642 w3[1] = wordl3[1] | wordr3[1];
643 w3[2] = wordl3[2] | wordr3[2];
644 w3[3] = wordl3[3] | wordr3[3];
672 switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
674 w0_t[0] |= salt_buf0[0];
675 w0_t[1] |= salt_buf0[1];
676 w0_t[2] |= salt_buf0[2];
677 w0_t[3] |= salt_buf0[3];
678 w1_t[0] |= salt_buf1[0];
679 w1_t[1] |= salt_buf1[1];
680 w1_t[2] |= salt_buf1[2];
681 w1_t[3] |= salt_buf1[3];
703 * precompute key1 since key is static: 0x0123456789abcdef
704 * plus LEFT_ROTATE by 2
746 * key1 (generate key)
754 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
758 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
759 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
764 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
768 * key2 (generate hash)
771 _des_crypt_keysetup (iv[0], iv[1], Kc, Kd, s_skb);
776 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
780 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
781 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
786 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
793 const u32x r0 = iv[0];
794 const u32x r1 = iv[1];
798 #include VECT_COMPARE_M
802 extern "C" __global__ void __launch_bounds__ (256, 1) m03100_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
806 extern "C" __global__ void __launch_bounds__ (256, 1) m03100_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
810 extern "C" __global__ void __launch_bounds__ (256, 1) m03100_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
816 const u32 lid = threadIdx.x;
822 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
826 wordl0[0] = pws[gid].i[ 0];
827 wordl0[1] = pws[gid].i[ 1];
828 wordl0[2] = pws[gid].i[ 2];
829 wordl0[3] = pws[gid].i[ 3];
833 wordl1[0] = pws[gid].i[ 4];
834 wordl1[1] = pws[gid].i[ 5];
835 wordl1[2] = pws[gid].i[ 6];
836 wordl1[3] = pws[gid].i[ 7];
852 const u32 pw_l_len = pws[gid].pw_len;
854 if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
856 switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len);
863 __shared__ u32 s_SPtrans[8][64];
865 __shared__ u32 s_skb[8][64];
869 s_SPtrans[0][lid] = c_SPtrans[0][lid];
870 s_SPtrans[1][lid] = c_SPtrans[1][lid];
871 s_SPtrans[2][lid] = c_SPtrans[2][lid];
872 s_SPtrans[3][lid] = c_SPtrans[3][lid];
873 s_SPtrans[4][lid] = c_SPtrans[4][lid];
874 s_SPtrans[5][lid] = c_SPtrans[5][lid];
875 s_SPtrans[6][lid] = c_SPtrans[6][lid];
876 s_SPtrans[7][lid] = c_SPtrans[7][lid];
878 s_skb[0][lid] = c_skb[0][lid];
879 s_skb[1][lid] = c_skb[1][lid];
880 s_skb[2][lid] = c_skb[2][lid];
881 s_skb[3][lid] = c_skb[3][lid];
882 s_skb[4][lid] = c_skb[4][lid];
883 s_skb[5][lid] = c_skb[5][lid];
884 s_skb[6][lid] = c_skb[6][lid];
885 s_skb[7][lid] = c_skb[7][lid];
890 if (gid >= gid_max) return;
898 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
899 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
900 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
901 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
905 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
906 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
907 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
908 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
910 const u32 salt_len = salt_bufs[salt_pos].salt_len;
916 const u32 search[4] =
918 digests_buf[digests_offset].digest_buf[DGST_R0],
919 digests_buf[digests_offset].digest_buf[DGST_R1],
920 digests_buf[digests_offset].digest_buf[DGST_R2],
921 digests_buf[digests_offset].digest_buf[DGST_R3]
928 for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
930 const u32 pw_r_len = c_combs[il_pos].pw_len;
932 const u32 pw_len = pw_l_len + pw_r_len;
934 const u32 salt_word_len = (salt_len + pw_len) * 2;
938 wordr0[0] = c_combs[il_pos].i[0];
939 wordr0[1] = c_combs[il_pos].i[1];
940 wordr0[2] = c_combs[il_pos].i[2];
941 wordr0[3] = c_combs[il_pos].i[3];
945 wordr1[0] = c_combs[il_pos].i[4];
946 wordr1[1] = c_combs[il_pos].i[5];
947 wordr1[2] = c_combs[il_pos].i[6];
948 wordr1[3] = c_combs[il_pos].i[7];
964 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
966 switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
971 w0[0] = wordl0[0] | wordr0[0];
972 w0[1] = wordl0[1] | wordr0[1];
973 w0[2] = wordl0[2] | wordr0[2];
974 w0[3] = wordl0[3] | wordr0[3];
978 w1[0] = wordl1[0] | wordr1[0];
979 w1[1] = wordl1[1] | wordr1[1];
980 w1[2] = wordl1[2] | wordr1[2];
981 w1[3] = wordl1[3] | wordr1[3];
985 w2[0] = wordl2[0] | wordr2[0];
986 w2[1] = wordl2[1] | wordr2[1];
987 w2[2] = wordl2[2] | wordr2[2];
988 w2[3] = wordl2[3] | wordr2[3];
992 w3[0] = wordl3[0] | wordr3[0];
993 w3[1] = wordl3[1] | wordr3[1];
994 w3[2] = wordl3[2] | wordr3[2];
995 w3[3] = wordl3[3] | wordr3[3];
1023 switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
1025 w0_t[0] |= salt_buf0[0];
1026 w0_t[1] |= salt_buf0[1];
1027 w0_t[2] |= salt_buf0[2];
1028 w0_t[3] |= salt_buf0[3];
1029 w1_t[0] |= salt_buf1[0];
1030 w1_t[1] |= salt_buf1[1];
1031 w1_t[2] |= salt_buf1[2];
1032 w1_t[3] |= salt_buf1[3];
1054 * precompute key1 since key is static: 0x0123456789abcdef
1055 * plus LEFT_ROTATE by 2
1060 Kc[ 0] = 0x64649040;
1061 Kc[ 1] = 0x14909858;
1062 Kc[ 2] = 0xc4b44888;
1063 Kc[ 3] = 0x9094e438;
1064 Kc[ 4] = 0xd8a004f0;
1065 Kc[ 5] = 0xa8f02810;
1066 Kc[ 6] = 0xc84048d8;
1067 Kc[ 7] = 0x68d804a8;
1068 Kc[ 8] = 0x0490e40c;
1069 Kc[ 9] = 0xac183024;
1070 Kc[10] = 0x24c07c10;
1071 Kc[11] = 0x8c88c038;
1072 Kc[12] = 0xc048c824;
1073 Kc[13] = 0x4c0470a8;
1074 Kc[14] = 0x584020b4;
1075 Kc[15] = 0x00742c4c;
1079 Kd[ 0] = 0xa42ce40c;
1080 Kd[ 1] = 0x64689858;
1081 Kd[ 2] = 0x484050b8;
1082 Kd[ 3] = 0xe8184814;
1083 Kd[ 4] = 0x405cc070;
1084 Kd[ 5] = 0xa010784c;
1085 Kd[ 6] = 0x6074a800;
1086 Kd[ 7] = 0x80701c1c;
1087 Kd[ 8] = 0x9cd49430;
1088 Kd[ 9] = 0x4c8ce078;
1089 Kd[10] = 0x5c18c088;
1090 Kd[11] = 0x28a8a4c8;
1091 Kd[12] = 0x3c180838;
1092 Kd[13] = 0xb0b86c20;
1093 Kd[14] = 0xac84a094;
1094 Kd[15] = 0x4ce0c0c4;
1097 * key1 (generate key)
1105 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
1109 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
1110 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
1115 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
1119 * key2 (generate hash)
1122 _des_crypt_keysetup (iv[0], iv[1], Kc, Kd, s_skb);
1127 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
1131 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
1132 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
1137 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
1144 const u32x r0 = iv[0];
1145 const u32x r1 = iv[1];
1149 #include VECT_COMPARE_S
1153 extern "C" __global__ void __launch_bounds__ (256, 1) m03100_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1157 extern "C" __global__ void __launch_bounds__ (256, 1) m03100_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)