2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
24 #include "include/kernel_functions.c"
26 #include "common_nv.c"
27 #include "include/rp_gpu.h"
31 #define VECT_COMPARE_S "check_single_vect1_comp4.c"
32 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
36 #define VECT_COMPARE_S "check_single_vect2_comp4.c"
37 #define VECT_COMPARE_M "check_multi_vect2_comp4.c"
40 #define PERM_OP(a,b,tt,n,m) \
50 #define HPERM_OP(a,tt,n,m) \
56 tt = tt >> (16 + n); \
62 PERM_OP (r, l, tt, 4, 0x0f0f0f0f); \
63 PERM_OP (l, r, tt, 16, 0x0000ffff); \
64 PERM_OP (r, l, tt, 2, 0x33333333); \
65 PERM_OP (l, r, tt, 8, 0x00ff00ff); \
66 PERM_OP (r, l, tt, 1, 0x55555555); \
71 PERM_OP (l, r, tt, 1, 0x55555555); \
72 PERM_OP (r, l, tt, 8, 0x00ff00ff); \
73 PERM_OP (l, r, tt, 2, 0x33333333); \
74 PERM_OP (r, l, tt, 16, 0x0000ffff); \
75 PERM_OP (l, r, tt, 4, 0x0f0f0f0f); \
78 __device__ __constant__ u32 c_SPtrans[8][64] =
81 0x02080800, 0x00080000, 0x02000002, 0x02080802,
82 0x02000000, 0x00080802, 0x00080002, 0x02000002,
83 0x00080802, 0x02080800, 0x02080000, 0x00000802,
84 0x02000802, 0x02000000, 0x00000000, 0x00080002,
85 0x00080000, 0x00000002, 0x02000800, 0x00080800,
86 0x02080802, 0x02080000, 0x00000802, 0x02000800,
87 0x00000002, 0x00000800, 0x00080800, 0x02080002,
88 0x00000800, 0x02000802, 0x02080002, 0x00000000,
89 0x00000000, 0x02080802, 0x02000800, 0x00080002,
90 0x02080800, 0x00080000, 0x00000802, 0x02000800,
91 0x02080002, 0x00000800, 0x00080800, 0x02000002,
92 0x00080802, 0x00000002, 0x02000002, 0x02080000,
93 0x02080802, 0x00080800, 0x02080000, 0x02000802,
94 0x02000000, 0x00000802, 0x00080002, 0x00000000,
95 0x00080000, 0x02000000, 0x02000802, 0x02080800,
96 0x00000002, 0x02080002, 0x00000800, 0x00080802,
98 0x40108010, 0x00000000, 0x00108000, 0x40100000,
99 0x40000010, 0x00008010, 0x40008000, 0x00108000,
100 0x00008000, 0x40100010, 0x00000010, 0x40008000,
101 0x00100010, 0x40108000, 0x40100000, 0x00000010,
102 0x00100000, 0x40008010, 0x40100010, 0x00008000,
103 0x00108010, 0x40000000, 0x00000000, 0x00100010,
104 0x40008010, 0x00108010, 0x40108000, 0x40000010,
105 0x40000000, 0x00100000, 0x00008010, 0x40108010,
106 0x00100010, 0x40108000, 0x40008000, 0x00108010,
107 0x40108010, 0x00100010, 0x40000010, 0x00000000,
108 0x40000000, 0x00008010, 0x00100000, 0x40100010,
109 0x00008000, 0x40000000, 0x00108010, 0x40008010,
110 0x40108000, 0x00008000, 0x00000000, 0x40000010,
111 0x00000010, 0x40108010, 0x00108000, 0x40100000,
112 0x40100010, 0x00100000, 0x00008010, 0x40008000,
113 0x40008010, 0x00000010, 0x40100000, 0x00108000,
115 0x04000001, 0x04040100, 0x00000100, 0x04000101,
116 0x00040001, 0x04000000, 0x04000101, 0x00040100,
117 0x04000100, 0x00040000, 0x04040000, 0x00000001,
118 0x04040101, 0x00000101, 0x00000001, 0x04040001,
119 0x00000000, 0x00040001, 0x04040100, 0x00000100,
120 0x00000101, 0x04040101, 0x00040000, 0x04000001,
121 0x04040001, 0x04000100, 0x00040101, 0x04040000,
122 0x00040100, 0x00000000, 0x04000000, 0x00040101,
123 0x04040100, 0x00000100, 0x00000001, 0x00040000,
124 0x00000101, 0x00040001, 0x04040000, 0x04000101,
125 0x00000000, 0x04040100, 0x00040100, 0x04040001,
126 0x00040001, 0x04000000, 0x04040101, 0x00000001,
127 0x00040101, 0x04000001, 0x04000000, 0x04040101,
128 0x00040000, 0x04000100, 0x04000101, 0x00040100,
129 0x04000100, 0x00000000, 0x04040001, 0x00000101,
130 0x04000001, 0x00040101, 0x00000100, 0x04040000,
132 0x00401008, 0x10001000, 0x00000008, 0x10401008,
133 0x00000000, 0x10400000, 0x10001008, 0x00400008,
134 0x10401000, 0x10000008, 0x10000000, 0x00001008,
135 0x10000008, 0x00401008, 0x00400000, 0x10000000,
136 0x10400008, 0x00401000, 0x00001000, 0x00000008,
137 0x00401000, 0x10001008, 0x10400000, 0x00001000,
138 0x00001008, 0x00000000, 0x00400008, 0x10401000,
139 0x10001000, 0x10400008, 0x10401008, 0x00400000,
140 0x10400008, 0x00001008, 0x00400000, 0x10000008,
141 0x00401000, 0x10001000, 0x00000008, 0x10400000,
142 0x10001008, 0x00000000, 0x00001000, 0x00400008,
143 0x00000000, 0x10400008, 0x10401000, 0x00001000,
144 0x10000000, 0x10401008, 0x00401008, 0x00400000,
145 0x10401008, 0x00000008, 0x10001000, 0x00401008,
146 0x00400008, 0x00401000, 0x10400000, 0x10001008,
147 0x00001008, 0x10000000, 0x10000008, 0x10401000,
149 0x08000000, 0x00010000, 0x00000400, 0x08010420,
150 0x08010020, 0x08000400, 0x00010420, 0x08010000,
151 0x00010000, 0x00000020, 0x08000020, 0x00010400,
152 0x08000420, 0x08010020, 0x08010400, 0x00000000,
153 0x00010400, 0x08000000, 0x00010020, 0x00000420,
154 0x08000400, 0x00010420, 0x00000000, 0x08000020,
155 0x00000020, 0x08000420, 0x08010420, 0x00010020,
156 0x08010000, 0x00000400, 0x00000420, 0x08010400,
157 0x08010400, 0x08000420, 0x00010020, 0x08010000,
158 0x00010000, 0x00000020, 0x08000020, 0x08000400,
159 0x08000000, 0x00010400, 0x08010420, 0x00000000,
160 0x00010420, 0x08000000, 0x00000400, 0x00010020,
161 0x08000420, 0x00000400, 0x00000000, 0x08010420,
162 0x08010020, 0x08010400, 0x00000420, 0x00010000,
163 0x00010400, 0x08010020, 0x08000400, 0x00000420,
164 0x00000020, 0x00010420, 0x08010000, 0x08000020,
166 0x80000040, 0x00200040, 0x00000000, 0x80202000,
167 0x00200040, 0x00002000, 0x80002040, 0x00200000,
168 0x00002040, 0x80202040, 0x00202000, 0x80000000,
169 0x80002000, 0x80000040, 0x80200000, 0x00202040,
170 0x00200000, 0x80002040, 0x80200040, 0x00000000,
171 0x00002000, 0x00000040, 0x80202000, 0x80200040,
172 0x80202040, 0x80200000, 0x80000000, 0x00002040,
173 0x00000040, 0x00202000, 0x00202040, 0x80002000,
174 0x00002040, 0x80000000, 0x80002000, 0x00202040,
175 0x80202000, 0x00200040, 0x00000000, 0x80002000,
176 0x80000000, 0x00002000, 0x80200040, 0x00200000,
177 0x00200040, 0x80202040, 0x00202000, 0x00000040,
178 0x80202040, 0x00202000, 0x00200000, 0x80002040,
179 0x80000040, 0x80200000, 0x00202040, 0x00000000,
180 0x00002000, 0x80000040, 0x80002040, 0x80202000,
181 0x80200000, 0x00002040, 0x00000040, 0x80200040,
183 0x00004000, 0x00000200, 0x01000200, 0x01000004,
184 0x01004204, 0x00004004, 0x00004200, 0x00000000,
185 0x01000000, 0x01000204, 0x00000204, 0x01004000,
186 0x00000004, 0x01004200, 0x01004000, 0x00000204,
187 0x01000204, 0x00004000, 0x00004004, 0x01004204,
188 0x00000000, 0x01000200, 0x01000004, 0x00004200,
189 0x01004004, 0x00004204, 0x01004200, 0x00000004,
190 0x00004204, 0x01004004, 0x00000200, 0x01000000,
191 0x00004204, 0x01004000, 0x01004004, 0x00000204,
192 0x00004000, 0x00000200, 0x01000000, 0x01004004,
193 0x01000204, 0x00004204, 0x00004200, 0x00000000,
194 0x00000200, 0x01000004, 0x00000004, 0x01000200,
195 0x00000000, 0x01000204, 0x01000200, 0x00004200,
196 0x00000204, 0x00004000, 0x01004204, 0x01000000,
197 0x01004200, 0x00000004, 0x00004004, 0x01004204,
198 0x01000004, 0x01004200, 0x01004000, 0x00004004,
200 0x20800080, 0x20820000, 0x00020080, 0x00000000,
201 0x20020000, 0x00800080, 0x20800000, 0x20820080,
202 0x00000080, 0x20000000, 0x00820000, 0x00020080,
203 0x00820080, 0x20020080, 0x20000080, 0x20800000,
204 0x00020000, 0x00820080, 0x00800080, 0x20020000,
205 0x20820080, 0x20000080, 0x00000000, 0x00820000,
206 0x20000000, 0x00800000, 0x20020080, 0x20800080,
207 0x00800000, 0x00020000, 0x20820000, 0x00000080,
208 0x00800000, 0x00020000, 0x20000080, 0x20820080,
209 0x00020080, 0x20000000, 0x00000000, 0x00820000,
210 0x20800080, 0x20020080, 0x20020000, 0x00800080,
211 0x20820000, 0x00000080, 0x00800080, 0x20020000,
212 0x20820080, 0x00800000, 0x20800000, 0x20000080,
213 0x00820000, 0x00020080, 0x20020080, 0x20800000,
214 0x00000080, 0x20820000, 0x00820080, 0x00000000,
215 0x20000000, 0x20800080, 0x00020000, 0x00820080,
218 __device__ __constant__ u32 c_skb[8][64] =
220 /* for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 */
221 0x00000000, 0x00000010, 0x20000000, 0x20000010,
222 0x00010000, 0x00010010, 0x20010000, 0x20010010,
223 0x00000800, 0x00000810, 0x20000800, 0x20000810,
224 0x00010800, 0x00010810, 0x20010800, 0x20010810,
225 0x00000020, 0x00000030, 0x20000020, 0x20000030,
226 0x00010020, 0x00010030, 0x20010020, 0x20010030,
227 0x00000820, 0x00000830, 0x20000820, 0x20000830,
228 0x00010820, 0x00010830, 0x20010820, 0x20010830,
229 0x00080000, 0x00080010, 0x20080000, 0x20080010,
230 0x00090000, 0x00090010, 0x20090000, 0x20090010,
231 0x00080800, 0x00080810, 0x20080800, 0x20080810,
232 0x00090800, 0x00090810, 0x20090800, 0x20090810,
233 0x00080020, 0x00080030, 0x20080020, 0x20080030,
234 0x00090020, 0x00090030, 0x20090020, 0x20090030,
235 0x00080820, 0x00080830, 0x20080820, 0x20080830,
236 0x00090820, 0x00090830, 0x20090820, 0x20090830,
237 /* for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 */
238 0x00000000, 0x02000000, 0x00002000, 0x02002000,
239 0x00200000, 0x02200000, 0x00202000, 0x02202000,
240 0x00000004, 0x02000004, 0x00002004, 0x02002004,
241 0x00200004, 0x02200004, 0x00202004, 0x02202004,
242 0x00000400, 0x02000400, 0x00002400, 0x02002400,
243 0x00200400, 0x02200400, 0x00202400, 0x02202400,
244 0x00000404, 0x02000404, 0x00002404, 0x02002404,
245 0x00200404, 0x02200404, 0x00202404, 0x02202404,
246 0x10000000, 0x12000000, 0x10002000, 0x12002000,
247 0x10200000, 0x12200000, 0x10202000, 0x12202000,
248 0x10000004, 0x12000004, 0x10002004, 0x12002004,
249 0x10200004, 0x12200004, 0x10202004, 0x12202004,
250 0x10000400, 0x12000400, 0x10002400, 0x12002400,
251 0x10200400, 0x12200400, 0x10202400, 0x12202400,
252 0x10000404, 0x12000404, 0x10002404, 0x12002404,
253 0x10200404, 0x12200404, 0x10202404, 0x12202404,
254 /* for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 */
255 0x00000000, 0x00000001, 0x00040000, 0x00040001,
256 0x01000000, 0x01000001, 0x01040000, 0x01040001,
257 0x00000002, 0x00000003, 0x00040002, 0x00040003,
258 0x01000002, 0x01000003, 0x01040002, 0x01040003,
259 0x00000200, 0x00000201, 0x00040200, 0x00040201,
260 0x01000200, 0x01000201, 0x01040200, 0x01040201,
261 0x00000202, 0x00000203, 0x00040202, 0x00040203,
262 0x01000202, 0x01000203, 0x01040202, 0x01040203,
263 0x08000000, 0x08000001, 0x08040000, 0x08040001,
264 0x09000000, 0x09000001, 0x09040000, 0x09040001,
265 0x08000002, 0x08000003, 0x08040002, 0x08040003,
266 0x09000002, 0x09000003, 0x09040002, 0x09040003,
267 0x08000200, 0x08000201, 0x08040200, 0x08040201,
268 0x09000200, 0x09000201, 0x09040200, 0x09040201,
269 0x08000202, 0x08000203, 0x08040202, 0x08040203,
270 0x09000202, 0x09000203, 0x09040202, 0x09040203,
271 /* for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 */
272 0x00000000, 0x00100000, 0x00000100, 0x00100100,
273 0x00000008, 0x00100008, 0x00000108, 0x00100108,
274 0x00001000, 0x00101000, 0x00001100, 0x00101100,
275 0x00001008, 0x00101008, 0x00001108, 0x00101108,
276 0x04000000, 0x04100000, 0x04000100, 0x04100100,
277 0x04000008, 0x04100008, 0x04000108, 0x04100108,
278 0x04001000, 0x04101000, 0x04001100, 0x04101100,
279 0x04001008, 0x04101008, 0x04001108, 0x04101108,
280 0x00020000, 0x00120000, 0x00020100, 0x00120100,
281 0x00020008, 0x00120008, 0x00020108, 0x00120108,
282 0x00021000, 0x00121000, 0x00021100, 0x00121100,
283 0x00021008, 0x00121008, 0x00021108, 0x00121108,
284 0x04020000, 0x04120000, 0x04020100, 0x04120100,
285 0x04020008, 0x04120008, 0x04020108, 0x04120108,
286 0x04021000, 0x04121000, 0x04021100, 0x04121100,
287 0x04021008, 0x04121008, 0x04021108, 0x04121108,
288 /* for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 */
289 0x00000000, 0x10000000, 0x00010000, 0x10010000,
290 0x00000004, 0x10000004, 0x00010004, 0x10010004,
291 0x20000000, 0x30000000, 0x20010000, 0x30010000,
292 0x20000004, 0x30000004, 0x20010004, 0x30010004,
293 0x00100000, 0x10100000, 0x00110000, 0x10110000,
294 0x00100004, 0x10100004, 0x00110004, 0x10110004,
295 0x20100000, 0x30100000, 0x20110000, 0x30110000,
296 0x20100004, 0x30100004, 0x20110004, 0x30110004,
297 0x00001000, 0x10001000, 0x00011000, 0x10011000,
298 0x00001004, 0x10001004, 0x00011004, 0x10011004,
299 0x20001000, 0x30001000, 0x20011000, 0x30011000,
300 0x20001004, 0x30001004, 0x20011004, 0x30011004,
301 0x00101000, 0x10101000, 0x00111000, 0x10111000,
302 0x00101004, 0x10101004, 0x00111004, 0x10111004,
303 0x20101000, 0x30101000, 0x20111000, 0x30111000,
304 0x20101004, 0x30101004, 0x20111004, 0x30111004,
305 /* for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 */
306 0x00000000, 0x08000000, 0x00000008, 0x08000008,
307 0x00000400, 0x08000400, 0x00000408, 0x08000408,
308 0x00020000, 0x08020000, 0x00020008, 0x08020008,
309 0x00020400, 0x08020400, 0x00020408, 0x08020408,
310 0x00000001, 0x08000001, 0x00000009, 0x08000009,
311 0x00000401, 0x08000401, 0x00000409, 0x08000409,
312 0x00020001, 0x08020001, 0x00020009, 0x08020009,
313 0x00020401, 0x08020401, 0x00020409, 0x08020409,
314 0x02000000, 0x0A000000, 0x02000008, 0x0A000008,
315 0x02000400, 0x0A000400, 0x02000408, 0x0A000408,
316 0x02020000, 0x0A020000, 0x02020008, 0x0A020008,
317 0x02020400, 0x0A020400, 0x02020408, 0x0A020408,
318 0x02000001, 0x0A000001, 0x02000009, 0x0A000009,
319 0x02000401, 0x0A000401, 0x02000409, 0x0A000409,
320 0x02020001, 0x0A020001, 0x02020009, 0x0A020009,
321 0x02020401, 0x0A020401, 0x02020409, 0x0A020409,
322 /* for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 */
323 0x00000000, 0x00000100, 0x00080000, 0x00080100,
324 0x01000000, 0x01000100, 0x01080000, 0x01080100,
325 0x00000010, 0x00000110, 0x00080010, 0x00080110,
326 0x01000010, 0x01000110, 0x01080010, 0x01080110,
327 0x00200000, 0x00200100, 0x00280000, 0x00280100,
328 0x01200000, 0x01200100, 0x01280000, 0x01280100,
329 0x00200010, 0x00200110, 0x00280010, 0x00280110,
330 0x01200010, 0x01200110, 0x01280010, 0x01280110,
331 0x00000200, 0x00000300, 0x00080200, 0x00080300,
332 0x01000200, 0x01000300, 0x01080200, 0x01080300,
333 0x00000210, 0x00000310, 0x00080210, 0x00080310,
334 0x01000210, 0x01000310, 0x01080210, 0x01080310,
335 0x00200200, 0x00200300, 0x00280200, 0x00280300,
336 0x01200200, 0x01200300, 0x01280200, 0x01280300,
337 0x00200210, 0x00200310, 0x00280210, 0x00280310,
338 0x01200210, 0x01200310, 0x01280210, 0x01280310,
339 /* for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 */
340 0x00000000, 0x04000000, 0x00040000, 0x04040000,
341 0x00000002, 0x04000002, 0x00040002, 0x04040002,
342 0x00002000, 0x04002000, 0x00042000, 0x04042000,
343 0x00002002, 0x04002002, 0x00042002, 0x04042002,
344 0x00000020, 0x04000020, 0x00040020, 0x04040020,
345 0x00000022, 0x04000022, 0x00040022, 0x04040022,
346 0x00002020, 0x04002020, 0x00042020, 0x04042020,
347 0x00002022, 0x04002022, 0x00042022, 0x04042022,
348 0x00000800, 0x04000800, 0x00040800, 0x04040800,
349 0x00000802, 0x04000802, 0x00040802, 0x04040802,
350 0x00002800, 0x04002800, 0x00042800, 0x04042800,
351 0x00002802, 0x04002802, 0x00042802, 0x04042802,
352 0x00000820, 0x04000820, 0x00040820, 0x04040820,
353 0x00000822, 0x04000822, 0x00040822, 0x04040822,
354 0x00002820, 0x04002820, 0x00042820, 0x04042820,
355 0x00002822, 0x04002822, 0x00042822, 0x04042822,
358 #define NBOX(i,n,S) (S)[(n)][(i)]
360 __device__ static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], u32 s_SPtrans[8][64])
368 for (u32 i = 0; i < 16; i++)
371 u32x t = Kd[i] ^ rotl32 (r, 28u);
374 l ^= NBOX (((u >> 2) & 0x3f), 0, s_SPtrans)
375 | NBOX (((u >> 10) & 0x3f), 2, s_SPtrans)
376 | NBOX (((u >> 18) & 0x3f), 4, s_SPtrans)
377 | NBOX (((u >> 26) & 0x3f), 6, s_SPtrans)
378 | NBOX (((t >> 2) & 0x3f), 1, s_SPtrans)
379 | NBOX (((t >> 10) & 0x3f), 3, s_SPtrans)
380 | NBOX (((t >> 18) & 0x3f), 5, s_SPtrans)
381 | NBOX (((t >> 26) & 0x3f), 7, s_SPtrans);
385 l.s0 ^= NBOX (((u.s0 >> 2) & 0x3f), 0, s_SPtrans)
386 | NBOX (((u.s0 >> 10) & 0x3f), 2, s_SPtrans)
387 | NBOX (((u.s0 >> 18) & 0x3f), 4, s_SPtrans)
388 | NBOX (((u.s0 >> 26) & 0x3f), 6, s_SPtrans)
389 | NBOX (((t.s0 >> 2) & 0x3f), 1, s_SPtrans)
390 | NBOX (((t.s0 >> 10) & 0x3f), 3, s_SPtrans)
391 | NBOX (((t.s0 >> 18) & 0x3f), 5, s_SPtrans)
392 | NBOX (((t.s0 >> 26) & 0x3f), 7, s_SPtrans);
394 l.s1 ^= NBOX (((u.s1 >> 2) & 0x3f), 0, s_SPtrans)
395 | NBOX (((u.s1 >> 10) & 0x3f), 2, s_SPtrans)
396 | NBOX (((u.s1 >> 18) & 0x3f), 4, s_SPtrans)
397 | NBOX (((u.s1 >> 26) & 0x3f), 6, s_SPtrans)
398 | NBOX (((t.s1 >> 2) & 0x3f), 1, s_SPtrans)
399 | NBOX (((t.s1 >> 10) & 0x3f), 3, s_SPtrans)
400 | NBOX (((t.s1 >> 18) & 0x3f), 5, s_SPtrans)
401 | NBOX (((t.s1 >> 26) & 0x3f), 7, s_SPtrans);
413 __device__ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], u32 s_skb[8][64])
417 PERM_OP (d, c, tt, 4, 0x0f0f0f0f);
418 HPERM_OP (c, tt, 2, 0xcccc0000);
419 HPERM_OP (d, tt, 2, 0xcccc0000);
420 PERM_OP (d, c, tt, 1, 0x55555555);
421 PERM_OP (c, d, tt, 8, 0x00ff00ff);
422 PERM_OP (d, c, tt, 1, 0x55555555);
424 d = ((d & 0x000000ff) << 16)
425 | ((d & 0x0000ff00) << 0)
426 | ((d & 0x00ff0000) >> 16)
427 | ((c & 0xf0000000) >> 4);
432 for (u32 i = 0; i < 16; i++)
434 const u32 shifts3s0[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 };
435 const u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, 26, 26, 26, 27 };
437 c = c >> shifts3s0[i] | c << shifts3s1[i];
438 d = d >> shifts3s0[i] | d << shifts3s1[i];
447 s = NBOX ((( c >> 0) & 0x3f), 0, s_skb)
448 | NBOX ((((c >> 6) & 0x03)
449 | ((c >> 7) & 0x3c)), 1, s_skb)
450 | NBOX ((((c >> 13) & 0x0f)
451 | ((c >> 14) & 0x30)), 2, s_skb)
452 | NBOX ((((c >> 20) & 0x01)
454 | ((c >> 22) & 0x38)), 3, s_skb);
456 t = NBOX ((( d >> 0) & 0x3f), 4, s_skb)
457 | NBOX ((((d >> 7) & 0x03)
458 | ((d >> 8) & 0x3c)), 5, s_skb)
459 | NBOX ((((d >> 15) & 0x3f)), 6, s_skb)
460 | NBOX ((((d >> 21) & 0x0f)
461 | ((d >> 22) & 0x30)), 7, s_skb);
465 s.s0 = NBOX ((( c.s0 >> 0) & 0x3f), 0, s_skb)
466 | NBOX ((((c.s0 >> 6) & 0x03)
467 | ((c.s0 >> 7) & 0x3c)), 1, s_skb)
468 | NBOX ((((c.s0 >> 13) & 0x0f)
469 | ((c.s0 >> 14) & 0x30)), 2, s_skb)
470 | NBOX ((((c.s0 >> 20) & 0x01)
471 | ((c.s0 >> 21) & 0x06)
472 | ((c.s0 >> 22) & 0x38)), 3, s_skb);
474 t.s0 = NBOX ((( d.s0 >> 0) & 0x3f), 4, s_skb)
475 | NBOX ((((d.s0 >> 7) & 0x03)
476 | ((d.s0 >> 8) & 0x3c)), 5, s_skb)
477 | NBOX ((((d.s0 >> 15) & 0x3f)), 6, s_skb)
478 | NBOX ((((d.s0 >> 21) & 0x0f)
479 | ((d.s0 >> 22) & 0x30)), 7, s_skb);
481 s.s1 = NBOX ((( c.s1 >> 0) & 0x3f), 0, s_skb)
482 | NBOX ((((c.s1 >> 6) & 0x03)
483 | ((c.s1 >> 7) & 0x3c)), 1, s_skb)
484 | NBOX ((((c.s1 >> 13) & 0x0f)
485 | ((c.s1 >> 14) & 0x30)), 2, s_skb)
486 | NBOX ((((c.s1 >> 20) & 0x01)
487 | ((c.s1 >> 21) & 0x06)
488 | ((c.s1 >> 22) & 0x38)), 3, s_skb);
490 t.s1 = NBOX ((( d.s1 >> 0) & 0x3f), 4, s_skb)
491 | NBOX ((((d.s1 >> 7) & 0x03)
492 | ((d.s1 >> 8) & 0x3c)), 5, s_skb)
493 | NBOX ((((d.s1 >> 15) & 0x3f)), 6, s_skb)
494 | NBOX ((((d.s1 >> 21) & 0x0f)
495 | ((d.s1 >> 22) & 0x30)), 7, s_skb);
498 #if __CUDA_ARCH__ >= 200
499 Kc[i] = __byte_perm (s, t, 0x5410);
500 Kd[i] = __byte_perm (s, t, 0x7632);
502 Kc[i] = ((t << 16) | (s & 0x0000ffff));
503 Kd[i] = ((s >> 16) | (t & 0xffff0000));
506 Kc[i] = rotl32 (Kc[i], 2u);
507 Kd[i] = rotl32 (Kd[i], 2u);
511 __device__ static void transform_racf_key (const u32x w0, const u32x w1, u32x key[2])
514 const u8 ascii_to_ebcdic_pc[256] =
516 // little hack, can't crack 0-bytes in password, but who cares
517 // 0xab, 0xa8, 0xae, 0xad, 0xc4, 0xf1, 0xf7, 0xf4, 0x86, 0xa1, 0xe0, 0xbc, 0xb3, 0xb0, 0xb6, 0xb5,
518 0x2a, 0xa8, 0xae, 0xad, 0xc4, 0xf1, 0xf7, 0xf4, 0x86, 0xa1, 0xe0, 0xbc, 0xb3, 0xb0, 0xb6, 0xb5,
519 0x8a, 0x89, 0x8f, 0x8c, 0xd3, 0xd0, 0xce, 0xe6, 0x9b, 0x98, 0xd5, 0xe5, 0x92, 0x91, 0x97, 0x94,
520 0x2a, 0x34, 0x54, 0x5d, 0x1c, 0x73, 0x0b, 0x51, 0x31, 0x10, 0x13, 0x37, 0x7c, 0x6b, 0x3d, 0x68,
521 0x4a, 0x49, 0x4f, 0x4c, 0x43, 0x40, 0x46, 0x45, 0x5b, 0x58, 0x5e, 0x16, 0x32, 0x57, 0x76, 0x75,
522 0x52, 0x29, 0x2f, 0x2c, 0x23, 0x20, 0x26, 0x25, 0x3b, 0x38, 0x08, 0x0e, 0x0d, 0x02, 0x01, 0x07,
523 0x04, 0x1a, 0x19, 0x6e, 0x6d, 0x62, 0x61, 0x67, 0x64, 0x7a, 0x79, 0x3e, 0x6b, 0x1f, 0x15, 0x70,
524 0x58, 0xa8, 0xae, 0xad, 0xa2, 0xa1, 0xa7, 0xa4, 0xba, 0xb9, 0x89, 0x8f, 0x8c, 0x83, 0x80, 0x86,
525 0x85, 0x9b, 0x98, 0xef, 0xec, 0xe3, 0xe0, 0xe6, 0xe5, 0xfb, 0xf8, 0x2a, 0x7f, 0x0b, 0xe9, 0xa4,
526 0xea, 0xe9, 0xef, 0xec, 0xe3, 0x80, 0xa7, 0x85, 0xfb, 0xf8, 0xfe, 0xfd, 0xf2, 0xb9, 0xbf, 0x9d,
527 0xcb, 0xc8, 0x9e, 0xcd, 0xc2, 0xc1, 0xc7, 0xba, 0xda, 0xd9, 0xdf, 0xdc, 0xa2, 0x83, 0xd6, 0x68,
528 0x29, 0x2f, 0x2c, 0x23, 0x20, 0x26, 0x25, 0x3b, 0x38, 0x08, 0x0e, 0x0d, 0x02, 0x01, 0x07, 0x04,
529 0x1a, 0x19, 0x6e, 0x6d, 0x62, 0x61, 0x67, 0x64, 0x7a, 0x79, 0x4a, 0x49, 0x4f, 0x4c, 0x43, 0x40,
530 0x46, 0x45, 0x5b, 0xab, 0xbf, 0xbc, 0xb3, 0xb0, 0xb6, 0xb5, 0x8a, 0x9e, 0x9d, 0x92, 0x91, 0x97,
531 0x94, 0xea, 0xfe, 0xfd, 0xf2, 0xf1, 0xf7, 0xf4, 0xcb, 0xc8, 0xce, 0xcd, 0xc2, 0xc1, 0xc7, 0xc4,
532 0xda, 0xd9, 0xdf, 0xdc, 0xd3, 0xd0, 0xd6, 0xd5, 0x3e, 0x3d, 0x32, 0x31, 0x37, 0x34, 0x1f, 0x1c,
533 0x13, 0x10, 0x16, 0x15, 0x7f, 0x7c, 0x73, 0x70, 0x76, 0x75, 0x5e, 0x5d, 0x52, 0x51, 0x57, 0x54,
538 key[0] = (ascii_to_ebcdic_pc[(w0 >> 0) & 0xff]) << 0
539 | (ascii_to_ebcdic_pc[(w0 >> 8) & 0xff]) << 8
540 | (ascii_to_ebcdic_pc[(w0 >> 16) & 0xff]) << 16
541 | (ascii_to_ebcdic_pc[(w0 >> 24) & 0xff]) << 24;
543 key[1] = (ascii_to_ebcdic_pc[(w1 >> 0) & 0xff]) << 0
544 | (ascii_to_ebcdic_pc[(w1 >> 8) & 0xff]) << 8
545 | (ascii_to_ebcdic_pc[(w1 >> 16) & 0xff]) << 16
546 | (ascii_to_ebcdic_pc[(w1 >> 24) & 0xff]) << 24;
551 key[0].s0 = (ascii_to_ebcdic_pc[(w0.s0 >> 0) & 0xff]) << 0
552 | (ascii_to_ebcdic_pc[(w0.s0 >> 8) & 0xff]) << 8
553 | (ascii_to_ebcdic_pc[(w0.s0 >> 16) & 0xff]) << 16
554 | (ascii_to_ebcdic_pc[(w0.s0 >> 24) & 0xff]) << 24;
556 key[0].s1 = (ascii_to_ebcdic_pc[(w0.s1 >> 0) & 0xff]) << 0
557 | (ascii_to_ebcdic_pc[(w0.s1 >> 8) & 0xff]) << 8
558 | (ascii_to_ebcdic_pc[(w0.s1 >> 16) & 0xff]) << 16
559 | (ascii_to_ebcdic_pc[(w0.s1 >> 24) & 0xff]) << 24;
561 key[1].s0 = (ascii_to_ebcdic_pc[(w1.s0 >> 0) & 0xff]) << 0
562 | (ascii_to_ebcdic_pc[(w1.s0 >> 8) & 0xff]) << 8
563 | (ascii_to_ebcdic_pc[(w1.s0 >> 16) & 0xff]) << 16
564 | (ascii_to_ebcdic_pc[(w1.s0 >> 24) & 0xff]) << 24;
566 key[1].s1 = (ascii_to_ebcdic_pc[(w1.s1 >> 0) & 0xff]) << 0
567 | (ascii_to_ebcdic_pc[(w1.s1 >> 8) & 0xff]) << 8
568 | (ascii_to_ebcdic_pc[(w1.s1 >> 16) & 0xff]) << 16
569 | (ascii_to_ebcdic_pc[(w1.s1 >> 24) & 0xff]) << 24;
573 __device__ __constant__ gpu_rule_t c_rules[1024];
575 extern "C" __global__ void __launch_bounds__ (256, 1) m08500_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
581 const u32 lid = threadIdx.x;
587 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
591 pw_buf[0] = pws[gid].i[ 0];
592 pw_buf[1] = pws[gid].i[ 1];
596 const u32 pw_len = pws[gid].pw_len;
604 salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0];
605 salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1];
611 __shared__ u32 s_SPtrans[8][64];
612 __shared__ u32 s_skb[8][64];
616 s_SPtrans[0][lid] = c_SPtrans[0][lid];
617 s_SPtrans[1][lid] = c_SPtrans[1][lid];
618 s_SPtrans[2][lid] = c_SPtrans[2][lid];
619 s_SPtrans[3][lid] = c_SPtrans[3][lid];
620 s_SPtrans[4][lid] = c_SPtrans[4][lid];
621 s_SPtrans[5][lid] = c_SPtrans[5][lid];
622 s_SPtrans[6][lid] = c_SPtrans[6][lid];
623 s_SPtrans[7][lid] = c_SPtrans[7][lid];
625 s_skb[0][lid] = c_skb[0][lid];
626 s_skb[1][lid] = c_skb[1][lid];
627 s_skb[2][lid] = c_skb[2][lid];
628 s_skb[3][lid] = c_skb[3][lid];
629 s_skb[4][lid] = c_skb[4][lid];
630 s_skb[5][lid] = c_skb[5][lid];
631 s_skb[6][lid] = c_skb[6][lid];
632 s_skb[7][lid] = c_skb[7][lid];
637 if (gid >= gid_max) return;
643 for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++)
659 u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len);
661 out_len = (out_len >= 8) ? 8 : out_len;
665 transform_racf_key (w0[0], w0[1], key);
667 const u32x c = key[0];
668 const u32x d = key[1];
673 _des_crypt_keysetup (c, d, Kc, Kd, s_skb);
677 data[0] = salt_buf0[0];
678 data[1] = salt_buf0[1];
682 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
684 const u32x r0 = iv[0];
685 const u32x r1 = iv[1];
689 #include VECT_COMPARE_M
693 extern "C" __global__ void __launch_bounds__ (256, 1) m08500_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
697 extern "C" __global__ void __launch_bounds__ (256, 1) m08500_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
701 extern "C" __global__ void __launch_bounds__ (256, 1) m08500_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
707 const u32 lid = threadIdx.x;
713 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
717 pw_buf[0] = pws[gid].i[ 0];
718 pw_buf[1] = pws[gid].i[ 1];
722 const u32 pw_len = pws[gid].pw_len;
730 salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0];
731 salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1];
737 __shared__ u32 s_SPtrans[8][64];
738 __shared__ u32 s_skb[8][64];
742 s_SPtrans[0][lid] = c_SPtrans[0][lid];
743 s_SPtrans[1][lid] = c_SPtrans[1][lid];
744 s_SPtrans[2][lid] = c_SPtrans[2][lid];
745 s_SPtrans[3][lid] = c_SPtrans[3][lid];
746 s_SPtrans[4][lid] = c_SPtrans[4][lid];
747 s_SPtrans[5][lid] = c_SPtrans[5][lid];
748 s_SPtrans[6][lid] = c_SPtrans[6][lid];
749 s_SPtrans[7][lid] = c_SPtrans[7][lid];
751 s_skb[0][lid] = c_skb[0][lid];
752 s_skb[1][lid] = c_skb[1][lid];
753 s_skb[2][lid] = c_skb[2][lid];
754 s_skb[3][lid] = c_skb[3][lid];
755 s_skb[4][lid] = c_skb[4][lid];
756 s_skb[5][lid] = c_skb[5][lid];
757 s_skb[6][lid] = c_skb[6][lid];
758 s_skb[7][lid] = c_skb[7][lid];
763 if (gid >= gid_max) return;
769 const u32 search[4] =
771 digests_buf[digests_offset].digest_buf[DGST_R0],
772 digests_buf[digests_offset].digest_buf[DGST_R1],
773 digests_buf[digests_offset].digest_buf[DGST_R2],
774 digests_buf[digests_offset].digest_buf[DGST_R3]
781 for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++)
797 u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len);
799 out_len = (out_len >= 8) ? 8 : out_len;
803 transform_racf_key (w0[0], w0[1], key);
805 const u32x c = key[0];
806 const u32x d = key[1];
811 _des_crypt_keysetup (c, d, Kc, Kd, s_skb);
815 data[0] = salt_buf0[0];
816 data[1] = salt_buf0[1];
820 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
822 const u32x r0 = iv[0];
823 const u32x r1 = iv[1];
827 #include VECT_COMPARE_S
831 extern "C" __global__ void __launch_bounds__ (256, 1) m08500_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
835 extern "C" __global__ void __launch_bounds__ (256, 1) m08500_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)