2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
24 #include "include/kernel_functions.c"
26 #include "common_nv.c"
27 #include "include/rp_gpu.h"
31 #define VECT_COMPARE_S "check_single_vect1_comp4.c"
32 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
36 #define VECT_COMPARE_S "check_single_vect2_comp4.c"
37 #define VECT_COMPARE_M "check_multi_vect2_comp4.c"
41 #define VECT_COMPARE_S "check_single_vect2_comp4.c"
42 #define VECT_COMPARE_M "check_multi_vect2_comp4.c"
45 #define PERM_OP(a,b,tt,n,m) \
55 #define HPERM_OP(a,tt,n,m) \
61 tt = tt >> (16 + n); \
67 PERM_OP (r, l, tt, 4, 0x0f0f0f0f); \
68 PERM_OP (l, r, tt, 16, 0x0000ffff); \
69 PERM_OP (r, l, tt, 2, 0x33333333); \
70 PERM_OP (l, r, tt, 8, 0x00ff00ff); \
71 PERM_OP (r, l, tt, 1, 0x55555555); \
76 PERM_OP (l, r, tt, 1, 0x55555555); \
77 PERM_OP (r, l, tt, 8, 0x00ff00ff); \
78 PERM_OP (l, r, tt, 2, 0x33333333); \
79 PERM_OP (r, l, tt, 16, 0x0000ffff); \
80 PERM_OP (l, r, tt, 4, 0x0f0f0f0f); \
83 __device__ __constant__ u32 c_SPtrans[8][64] =
86 0x02080800, 0x00080000, 0x02000002, 0x02080802,
87 0x02000000, 0x00080802, 0x00080002, 0x02000002,
88 0x00080802, 0x02080800, 0x02080000, 0x00000802,
89 0x02000802, 0x02000000, 0x00000000, 0x00080002,
90 0x00080000, 0x00000002, 0x02000800, 0x00080800,
91 0x02080802, 0x02080000, 0x00000802, 0x02000800,
92 0x00000002, 0x00000800, 0x00080800, 0x02080002,
93 0x00000800, 0x02000802, 0x02080002, 0x00000000,
94 0x00000000, 0x02080802, 0x02000800, 0x00080002,
95 0x02080800, 0x00080000, 0x00000802, 0x02000800,
96 0x02080002, 0x00000800, 0x00080800, 0x02000002,
97 0x00080802, 0x00000002, 0x02000002, 0x02080000,
98 0x02080802, 0x00080800, 0x02080000, 0x02000802,
99 0x02000000, 0x00000802, 0x00080002, 0x00000000,
100 0x00080000, 0x02000000, 0x02000802, 0x02080800,
101 0x00000002, 0x02080002, 0x00000800, 0x00080802,
103 0x40108010, 0x00000000, 0x00108000, 0x40100000,
104 0x40000010, 0x00008010, 0x40008000, 0x00108000,
105 0x00008000, 0x40100010, 0x00000010, 0x40008000,
106 0x00100010, 0x40108000, 0x40100000, 0x00000010,
107 0x00100000, 0x40008010, 0x40100010, 0x00008000,
108 0x00108010, 0x40000000, 0x00000000, 0x00100010,
109 0x40008010, 0x00108010, 0x40108000, 0x40000010,
110 0x40000000, 0x00100000, 0x00008010, 0x40108010,
111 0x00100010, 0x40108000, 0x40008000, 0x00108010,
112 0x40108010, 0x00100010, 0x40000010, 0x00000000,
113 0x40000000, 0x00008010, 0x00100000, 0x40100010,
114 0x00008000, 0x40000000, 0x00108010, 0x40008010,
115 0x40108000, 0x00008000, 0x00000000, 0x40000010,
116 0x00000010, 0x40108010, 0x00108000, 0x40100000,
117 0x40100010, 0x00100000, 0x00008010, 0x40008000,
118 0x40008010, 0x00000010, 0x40100000, 0x00108000,
120 0x04000001, 0x04040100, 0x00000100, 0x04000101,
121 0x00040001, 0x04000000, 0x04000101, 0x00040100,
122 0x04000100, 0x00040000, 0x04040000, 0x00000001,
123 0x04040101, 0x00000101, 0x00000001, 0x04040001,
124 0x00000000, 0x00040001, 0x04040100, 0x00000100,
125 0x00000101, 0x04040101, 0x00040000, 0x04000001,
126 0x04040001, 0x04000100, 0x00040101, 0x04040000,
127 0x00040100, 0x00000000, 0x04000000, 0x00040101,
128 0x04040100, 0x00000100, 0x00000001, 0x00040000,
129 0x00000101, 0x00040001, 0x04040000, 0x04000101,
130 0x00000000, 0x04040100, 0x00040100, 0x04040001,
131 0x00040001, 0x04000000, 0x04040101, 0x00000001,
132 0x00040101, 0x04000001, 0x04000000, 0x04040101,
133 0x00040000, 0x04000100, 0x04000101, 0x00040100,
134 0x04000100, 0x00000000, 0x04040001, 0x00000101,
135 0x04000001, 0x00040101, 0x00000100, 0x04040000,
137 0x00401008, 0x10001000, 0x00000008, 0x10401008,
138 0x00000000, 0x10400000, 0x10001008, 0x00400008,
139 0x10401000, 0x10000008, 0x10000000, 0x00001008,
140 0x10000008, 0x00401008, 0x00400000, 0x10000000,
141 0x10400008, 0x00401000, 0x00001000, 0x00000008,
142 0x00401000, 0x10001008, 0x10400000, 0x00001000,
143 0x00001008, 0x00000000, 0x00400008, 0x10401000,
144 0x10001000, 0x10400008, 0x10401008, 0x00400000,
145 0x10400008, 0x00001008, 0x00400000, 0x10000008,
146 0x00401000, 0x10001000, 0x00000008, 0x10400000,
147 0x10001008, 0x00000000, 0x00001000, 0x00400008,
148 0x00000000, 0x10400008, 0x10401000, 0x00001000,
149 0x10000000, 0x10401008, 0x00401008, 0x00400000,
150 0x10401008, 0x00000008, 0x10001000, 0x00401008,
151 0x00400008, 0x00401000, 0x10400000, 0x10001008,
152 0x00001008, 0x10000000, 0x10000008, 0x10401000,
154 0x08000000, 0x00010000, 0x00000400, 0x08010420,
155 0x08010020, 0x08000400, 0x00010420, 0x08010000,
156 0x00010000, 0x00000020, 0x08000020, 0x00010400,
157 0x08000420, 0x08010020, 0x08010400, 0x00000000,
158 0x00010400, 0x08000000, 0x00010020, 0x00000420,
159 0x08000400, 0x00010420, 0x00000000, 0x08000020,
160 0x00000020, 0x08000420, 0x08010420, 0x00010020,
161 0x08010000, 0x00000400, 0x00000420, 0x08010400,
162 0x08010400, 0x08000420, 0x00010020, 0x08010000,
163 0x00010000, 0x00000020, 0x08000020, 0x08000400,
164 0x08000000, 0x00010400, 0x08010420, 0x00000000,
165 0x00010420, 0x08000000, 0x00000400, 0x00010020,
166 0x08000420, 0x00000400, 0x00000000, 0x08010420,
167 0x08010020, 0x08010400, 0x00000420, 0x00010000,
168 0x00010400, 0x08010020, 0x08000400, 0x00000420,
169 0x00000020, 0x00010420, 0x08010000, 0x08000020,
171 0x80000040, 0x00200040, 0x00000000, 0x80202000,
172 0x00200040, 0x00002000, 0x80002040, 0x00200000,
173 0x00002040, 0x80202040, 0x00202000, 0x80000000,
174 0x80002000, 0x80000040, 0x80200000, 0x00202040,
175 0x00200000, 0x80002040, 0x80200040, 0x00000000,
176 0x00002000, 0x00000040, 0x80202000, 0x80200040,
177 0x80202040, 0x80200000, 0x80000000, 0x00002040,
178 0x00000040, 0x00202000, 0x00202040, 0x80002000,
179 0x00002040, 0x80000000, 0x80002000, 0x00202040,
180 0x80202000, 0x00200040, 0x00000000, 0x80002000,
181 0x80000000, 0x00002000, 0x80200040, 0x00200000,
182 0x00200040, 0x80202040, 0x00202000, 0x00000040,
183 0x80202040, 0x00202000, 0x00200000, 0x80002040,
184 0x80000040, 0x80200000, 0x00202040, 0x00000000,
185 0x00002000, 0x80000040, 0x80002040, 0x80202000,
186 0x80200000, 0x00002040, 0x00000040, 0x80200040,
188 0x00004000, 0x00000200, 0x01000200, 0x01000004,
189 0x01004204, 0x00004004, 0x00004200, 0x00000000,
190 0x01000000, 0x01000204, 0x00000204, 0x01004000,
191 0x00000004, 0x01004200, 0x01004000, 0x00000204,
192 0x01000204, 0x00004000, 0x00004004, 0x01004204,
193 0x00000000, 0x01000200, 0x01000004, 0x00004200,
194 0x01004004, 0x00004204, 0x01004200, 0x00000004,
195 0x00004204, 0x01004004, 0x00000200, 0x01000000,
196 0x00004204, 0x01004000, 0x01004004, 0x00000204,
197 0x00004000, 0x00000200, 0x01000000, 0x01004004,
198 0x01000204, 0x00004204, 0x00004200, 0x00000000,
199 0x00000200, 0x01000004, 0x00000004, 0x01000200,
200 0x00000000, 0x01000204, 0x01000200, 0x00004200,
201 0x00000204, 0x00004000, 0x01004204, 0x01000000,
202 0x01004200, 0x00000004, 0x00004004, 0x01004204,
203 0x01000004, 0x01004200, 0x01004000, 0x00004004,
205 0x20800080, 0x20820000, 0x00020080, 0x00000000,
206 0x20020000, 0x00800080, 0x20800000, 0x20820080,
207 0x00000080, 0x20000000, 0x00820000, 0x00020080,
208 0x00820080, 0x20020080, 0x20000080, 0x20800000,
209 0x00020000, 0x00820080, 0x00800080, 0x20020000,
210 0x20820080, 0x20000080, 0x00000000, 0x00820000,
211 0x20000000, 0x00800000, 0x20020080, 0x20800080,
212 0x00800000, 0x00020000, 0x20820000, 0x00000080,
213 0x00800000, 0x00020000, 0x20000080, 0x20820080,
214 0x00020080, 0x20000000, 0x00000000, 0x00820000,
215 0x20800080, 0x20020080, 0x20020000, 0x00800080,
216 0x20820000, 0x00000080, 0x00800080, 0x20020000,
217 0x20820080, 0x00800000, 0x20800000, 0x20000080,
218 0x00820000, 0x00020080, 0x20020080, 0x20800000,
219 0x00000080, 0x20820000, 0x00820080, 0x00000000,
220 0x20000000, 0x20800080, 0x00020000, 0x00820080,
223 __device__ __constant__ u32 c_skb[8][64] =
225 /* for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 */
226 0x00000000, 0x00000010, 0x20000000, 0x20000010,
227 0x00010000, 0x00010010, 0x20010000, 0x20010010,
228 0x00000800, 0x00000810, 0x20000800, 0x20000810,
229 0x00010800, 0x00010810, 0x20010800, 0x20010810,
230 0x00000020, 0x00000030, 0x20000020, 0x20000030,
231 0x00010020, 0x00010030, 0x20010020, 0x20010030,
232 0x00000820, 0x00000830, 0x20000820, 0x20000830,
233 0x00010820, 0x00010830, 0x20010820, 0x20010830,
234 0x00080000, 0x00080010, 0x20080000, 0x20080010,
235 0x00090000, 0x00090010, 0x20090000, 0x20090010,
236 0x00080800, 0x00080810, 0x20080800, 0x20080810,
237 0x00090800, 0x00090810, 0x20090800, 0x20090810,
238 0x00080020, 0x00080030, 0x20080020, 0x20080030,
239 0x00090020, 0x00090030, 0x20090020, 0x20090030,
240 0x00080820, 0x00080830, 0x20080820, 0x20080830,
241 0x00090820, 0x00090830, 0x20090820, 0x20090830,
242 /* for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 */
243 0x00000000, 0x02000000, 0x00002000, 0x02002000,
244 0x00200000, 0x02200000, 0x00202000, 0x02202000,
245 0x00000004, 0x02000004, 0x00002004, 0x02002004,
246 0x00200004, 0x02200004, 0x00202004, 0x02202004,
247 0x00000400, 0x02000400, 0x00002400, 0x02002400,
248 0x00200400, 0x02200400, 0x00202400, 0x02202400,
249 0x00000404, 0x02000404, 0x00002404, 0x02002404,
250 0x00200404, 0x02200404, 0x00202404, 0x02202404,
251 0x10000000, 0x12000000, 0x10002000, 0x12002000,
252 0x10200000, 0x12200000, 0x10202000, 0x12202000,
253 0x10000004, 0x12000004, 0x10002004, 0x12002004,
254 0x10200004, 0x12200004, 0x10202004, 0x12202004,
255 0x10000400, 0x12000400, 0x10002400, 0x12002400,
256 0x10200400, 0x12200400, 0x10202400, 0x12202400,
257 0x10000404, 0x12000404, 0x10002404, 0x12002404,
258 0x10200404, 0x12200404, 0x10202404, 0x12202404,
259 /* for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 */
260 0x00000000, 0x00000001, 0x00040000, 0x00040001,
261 0x01000000, 0x01000001, 0x01040000, 0x01040001,
262 0x00000002, 0x00000003, 0x00040002, 0x00040003,
263 0x01000002, 0x01000003, 0x01040002, 0x01040003,
264 0x00000200, 0x00000201, 0x00040200, 0x00040201,
265 0x01000200, 0x01000201, 0x01040200, 0x01040201,
266 0x00000202, 0x00000203, 0x00040202, 0x00040203,
267 0x01000202, 0x01000203, 0x01040202, 0x01040203,
268 0x08000000, 0x08000001, 0x08040000, 0x08040001,
269 0x09000000, 0x09000001, 0x09040000, 0x09040001,
270 0x08000002, 0x08000003, 0x08040002, 0x08040003,
271 0x09000002, 0x09000003, 0x09040002, 0x09040003,
272 0x08000200, 0x08000201, 0x08040200, 0x08040201,
273 0x09000200, 0x09000201, 0x09040200, 0x09040201,
274 0x08000202, 0x08000203, 0x08040202, 0x08040203,
275 0x09000202, 0x09000203, 0x09040202, 0x09040203,
276 /* for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 */
277 0x00000000, 0x00100000, 0x00000100, 0x00100100,
278 0x00000008, 0x00100008, 0x00000108, 0x00100108,
279 0x00001000, 0x00101000, 0x00001100, 0x00101100,
280 0x00001008, 0x00101008, 0x00001108, 0x00101108,
281 0x04000000, 0x04100000, 0x04000100, 0x04100100,
282 0x04000008, 0x04100008, 0x04000108, 0x04100108,
283 0x04001000, 0x04101000, 0x04001100, 0x04101100,
284 0x04001008, 0x04101008, 0x04001108, 0x04101108,
285 0x00020000, 0x00120000, 0x00020100, 0x00120100,
286 0x00020008, 0x00120008, 0x00020108, 0x00120108,
287 0x00021000, 0x00121000, 0x00021100, 0x00121100,
288 0x00021008, 0x00121008, 0x00021108, 0x00121108,
289 0x04020000, 0x04120000, 0x04020100, 0x04120100,
290 0x04020008, 0x04120008, 0x04020108, 0x04120108,
291 0x04021000, 0x04121000, 0x04021100, 0x04121100,
292 0x04021008, 0x04121008, 0x04021108, 0x04121108,
293 /* for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 */
294 0x00000000, 0x10000000, 0x00010000, 0x10010000,
295 0x00000004, 0x10000004, 0x00010004, 0x10010004,
296 0x20000000, 0x30000000, 0x20010000, 0x30010000,
297 0x20000004, 0x30000004, 0x20010004, 0x30010004,
298 0x00100000, 0x10100000, 0x00110000, 0x10110000,
299 0x00100004, 0x10100004, 0x00110004, 0x10110004,
300 0x20100000, 0x30100000, 0x20110000, 0x30110000,
301 0x20100004, 0x30100004, 0x20110004, 0x30110004,
302 0x00001000, 0x10001000, 0x00011000, 0x10011000,
303 0x00001004, 0x10001004, 0x00011004, 0x10011004,
304 0x20001000, 0x30001000, 0x20011000, 0x30011000,
305 0x20001004, 0x30001004, 0x20011004, 0x30011004,
306 0x00101000, 0x10101000, 0x00111000, 0x10111000,
307 0x00101004, 0x10101004, 0x00111004, 0x10111004,
308 0x20101000, 0x30101000, 0x20111000, 0x30111000,
309 0x20101004, 0x30101004, 0x20111004, 0x30111004,
310 /* for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 */
311 0x00000000, 0x08000000, 0x00000008, 0x08000008,
312 0x00000400, 0x08000400, 0x00000408, 0x08000408,
313 0x00020000, 0x08020000, 0x00020008, 0x08020008,
314 0x00020400, 0x08020400, 0x00020408, 0x08020408,
315 0x00000001, 0x08000001, 0x00000009, 0x08000009,
316 0x00000401, 0x08000401, 0x00000409, 0x08000409,
317 0x00020001, 0x08020001, 0x00020009, 0x08020009,
318 0x00020401, 0x08020401, 0x00020409, 0x08020409,
319 0x02000000, 0x0A000000, 0x02000008, 0x0A000008,
320 0x02000400, 0x0A000400, 0x02000408, 0x0A000408,
321 0x02020000, 0x0A020000, 0x02020008, 0x0A020008,
322 0x02020400, 0x0A020400, 0x02020408, 0x0A020408,
323 0x02000001, 0x0A000001, 0x02000009, 0x0A000009,
324 0x02000401, 0x0A000401, 0x02000409, 0x0A000409,
325 0x02020001, 0x0A020001, 0x02020009, 0x0A020009,
326 0x02020401, 0x0A020401, 0x02020409, 0x0A020409,
327 /* for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 */
328 0x00000000, 0x00000100, 0x00080000, 0x00080100,
329 0x01000000, 0x01000100, 0x01080000, 0x01080100,
330 0x00000010, 0x00000110, 0x00080010, 0x00080110,
331 0x01000010, 0x01000110, 0x01080010, 0x01080110,
332 0x00200000, 0x00200100, 0x00280000, 0x00280100,
333 0x01200000, 0x01200100, 0x01280000, 0x01280100,
334 0x00200010, 0x00200110, 0x00280010, 0x00280110,
335 0x01200010, 0x01200110, 0x01280010, 0x01280110,
336 0x00000200, 0x00000300, 0x00080200, 0x00080300,
337 0x01000200, 0x01000300, 0x01080200, 0x01080300,
338 0x00000210, 0x00000310, 0x00080210, 0x00080310,
339 0x01000210, 0x01000310, 0x01080210, 0x01080310,
340 0x00200200, 0x00200300, 0x00280200, 0x00280300,
341 0x01200200, 0x01200300, 0x01280200, 0x01280300,
342 0x00200210, 0x00200310, 0x00280210, 0x00280310,
343 0x01200210, 0x01200310, 0x01280210, 0x01280310,
344 /* for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 */
345 0x00000000, 0x04000000, 0x00040000, 0x04040000,
346 0x00000002, 0x04000002, 0x00040002, 0x04040002,
347 0x00002000, 0x04002000, 0x00042000, 0x04042000,
348 0x00002002, 0x04002002, 0x00042002, 0x04042002,
349 0x00000020, 0x04000020, 0x00040020, 0x04040020,
350 0x00000022, 0x04000022, 0x00040022, 0x04040022,
351 0x00002020, 0x04002020, 0x00042020, 0x04042020,
352 0x00002022, 0x04002022, 0x00042022, 0x04042022,
353 0x00000800, 0x04000800, 0x00040800, 0x04040800,
354 0x00000802, 0x04000802, 0x00040802, 0x04040802,
355 0x00002800, 0x04002800, 0x00042800, 0x04042800,
356 0x00002802, 0x04002802, 0x00042802, 0x04042802,
357 0x00000820, 0x04000820, 0x00040820, 0x04040820,
358 0x00000822, 0x04000822, 0x00040822, 0x04040822,
359 0x00002820, 0x04002820, 0x00042820, 0x04042820,
360 0x00002822, 0x04002822, 0x00042822, 0x04042822
364 #define BOX(i,n,S) u32x ((S)[(n)][(i)])
368 #define BOX(i,n,S) u32x ((S)[(n)][(i).x], (S)[(n)][(i).y])
371 __device__ static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], u32 s_SPtrans[8][64])
384 for (int i = 0; i < 16; i++)
387 u32x t = Kd[i] ^ rotl32 (r, 28u);
389 l ^= BOX (((u >> 2) & 0x3f), 0, s_SPtrans)
390 | BOX (((u >> 10) & 0x3f), 2, s_SPtrans)
391 | BOX (((u >> 18) & 0x3f), 4, s_SPtrans)
392 | BOX (((u >> 26) & 0x3f), 6, s_SPtrans)
393 | BOX (((t >> 2) & 0x3f), 1, s_SPtrans)
394 | BOX (((t >> 10) & 0x3f), 3, s_SPtrans)
395 | BOX (((t >> 18) & 0x3f), 5, s_SPtrans)
396 | BOX (((t >> 26) & 0x3f), 7, s_SPtrans);
412 __device__ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], u32 s_skb[8][64])
416 PERM_OP (d, c, tt, 4, 0x0f0f0f0f);
417 HPERM_OP (c, tt, 2, 0xcccc0000);
418 HPERM_OP (d, tt, 2, 0xcccc0000);
419 PERM_OP (d, c, tt, 1, 0x55555555);
420 PERM_OP (c, d, tt, 8, 0x00ff00ff);
421 PERM_OP (d, c, tt, 1, 0x55555555);
423 d = ((d & 0x000000ff) << 16)
424 | ((d & 0x0000ff00) << 0)
425 | ((d & 0x00ff0000) >> 16)
426 | ((c & 0xf0000000) >> 4);
431 for (int i = 0; i < 16; i++)
433 const u32 shifts3s0[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 };
434 const u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, 26, 26, 26, 27 };
436 c = c >> shifts3s0[i] | c << shifts3s1[i];
437 d = d >> shifts3s0[i] | d << shifts3s1[i];
442 u32x s = BOX ((( c >> 0) & 0x3f), 0, s_skb)
443 | BOX ((((c >> 6) & 0x03)
444 | ((c >> 7) & 0x3c)), 1, s_skb)
445 | BOX ((((c >> 13) & 0x0f)
446 | ((c >> 14) & 0x30)), 2, s_skb)
447 | BOX ((((c >> 20) & 0x01)
449 | ((c >> 22) & 0x38)), 3, s_skb);
451 u32x t = BOX ((( d >> 0) & 0x3f), 4, s_skb)
452 | BOX ((((d >> 7) & 0x03)
453 | ((d >> 8) & 0x3c)), 5, s_skb)
454 | BOX ((((d >> 15) & 0x3f)), 6, s_skb)
455 | BOX ((((d >> 21) & 0x0f)
456 | ((d >> 22) & 0x30)), 7, s_skb);
458 #if __CUDA_ARCH__ >= 200
459 Kc[i] = __byte_perm (s, t, 0x5410);
460 Kd[i] = __byte_perm (s, t, 0x7632);
462 Kc[i] = ((t << 16) | (s & 0x0000ffff));
463 Kd[i] = ((s >> 16) | (t & 0xffff0000));
466 Kc[i] = rotl32 (Kc[i], 2u);
467 Kd[i] = rotl32 (Kd[i], 2u);
471 __device__ __constant__ gpu_rule_t c_rules[1024];
473 extern "C" __global__ void __launch_bounds__ (256, 1) m03100_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
479 const u32 lid = threadIdx.x;
485 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
489 pw_buf0[0] = pws[gid].i[ 0];
490 pw_buf0[1] = pws[gid].i[ 1];
491 pw_buf0[2] = pws[gid].i[ 2];
492 pw_buf0[3] = pws[gid].i[ 3];
496 pw_buf1[0] = pws[gid].i[ 4];
497 pw_buf1[1] = pws[gid].i[ 5];
498 pw_buf1[2] = pws[gid].i[ 6];
499 pw_buf1[3] = pws[gid].i[ 7];
501 const u32 pw_len = pws[gid].pw_len;
507 __shared__ u32 s_SPtrans[8][64];
508 __shared__ u32 s_skb[8][64];
512 s_SPtrans[0][lid] = c_SPtrans[0][lid];
513 s_SPtrans[1][lid] = c_SPtrans[1][lid];
514 s_SPtrans[2][lid] = c_SPtrans[2][lid];
515 s_SPtrans[3][lid] = c_SPtrans[3][lid];
516 s_SPtrans[4][lid] = c_SPtrans[4][lid];
517 s_SPtrans[5][lid] = c_SPtrans[5][lid];
518 s_SPtrans[6][lid] = c_SPtrans[6][lid];
519 s_SPtrans[7][lid] = c_SPtrans[7][lid];
521 s_skb[0][lid] = c_skb[0][lid];
522 s_skb[1][lid] = c_skb[1][lid];
523 s_skb[2][lid] = c_skb[2][lid];
524 s_skb[3][lid] = c_skb[3][lid];
525 s_skb[4][lid] = c_skb[4][lid];
526 s_skb[5][lid] = c_skb[5][lid];
527 s_skb[6][lid] = c_skb[6][lid];
528 s_skb[7][lid] = c_skb[7][lid];
533 if (gid >= gid_max) return;
541 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
542 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
543 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
544 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
548 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
549 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
550 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
551 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
553 const u32 salt_len = salt_bufs[salt_pos].salt_len;
559 for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++)
589 const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len);
591 const u32 salt_word_len = (salt_len + out_len) * 2;
625 switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
627 w0_t[0] |= salt_buf0[0];
628 w0_t[1] |= salt_buf0[1];
629 w0_t[2] |= salt_buf0[2];
630 w0_t[3] |= salt_buf0[3];
631 w1_t[0] |= salt_buf1[0];
632 w1_t[1] |= salt_buf1[1];
633 w1_t[2] |= salt_buf1[2];
634 w1_t[3] |= salt_buf1[3];
656 * precompute key1 since key is static: 0x0123456789abcdef
657 * plus LEFT_ROTATE by 2
699 * key1 (generate key)
707 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
711 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
712 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
717 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
721 * key2 (generate hash)
724 _des_crypt_keysetup (iv[0], iv[1], Kc, Kd, s_skb);
729 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
733 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
734 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
739 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
746 const u32x r0 = iv[0];
747 const u32x r1 = iv[1];
751 #include VECT_COMPARE_M
755 extern "C" __global__ void __launch_bounds__ (256, 1) m03100_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
759 extern "C" __global__ void __launch_bounds__ (256, 1) m03100_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
763 extern "C" __global__ void __launch_bounds__ (256, 1) m03100_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
769 const u32 lid = threadIdx.x;
775 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
779 pw_buf0[0] = pws[gid].i[ 0];
780 pw_buf0[1] = pws[gid].i[ 1];
781 pw_buf0[2] = pws[gid].i[ 2];
782 pw_buf0[3] = pws[gid].i[ 3];
786 pw_buf1[0] = pws[gid].i[ 4];
787 pw_buf1[1] = pws[gid].i[ 5];
788 pw_buf1[2] = pws[gid].i[ 6];
789 pw_buf1[3] = pws[gid].i[ 7];
791 const u32 pw_len = pws[gid].pw_len;
797 __shared__ u32 s_SPtrans[8][64];
798 __shared__ u32 s_skb[8][64];
802 s_SPtrans[0][lid] = c_SPtrans[0][lid];
803 s_SPtrans[1][lid] = c_SPtrans[1][lid];
804 s_SPtrans[2][lid] = c_SPtrans[2][lid];
805 s_SPtrans[3][lid] = c_SPtrans[3][lid];
806 s_SPtrans[4][lid] = c_SPtrans[4][lid];
807 s_SPtrans[5][lid] = c_SPtrans[5][lid];
808 s_SPtrans[6][lid] = c_SPtrans[6][lid];
809 s_SPtrans[7][lid] = c_SPtrans[7][lid];
811 s_skb[0][lid] = c_skb[0][lid];
812 s_skb[1][lid] = c_skb[1][lid];
813 s_skb[2][lid] = c_skb[2][lid];
814 s_skb[3][lid] = c_skb[3][lid];
815 s_skb[4][lid] = c_skb[4][lid];
816 s_skb[5][lid] = c_skb[5][lid];
817 s_skb[6][lid] = c_skb[6][lid];
818 s_skb[7][lid] = c_skb[7][lid];
823 if (gid >= gid_max) return;
831 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
832 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
833 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
834 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
838 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
839 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
840 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
841 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
843 const u32 salt_len = salt_bufs[salt_pos].salt_len;
849 const u32 search[4] =
851 digests_buf[digests_offset].digest_buf[DGST_R0],
852 digests_buf[digests_offset].digest_buf[DGST_R1],
853 digests_buf[digests_offset].digest_buf[DGST_R2],
854 digests_buf[digests_offset].digest_buf[DGST_R3]
861 for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++)
891 const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len);
893 const u32 salt_word_len = (salt_len + out_len) * 2;
927 switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
929 w0_t[0] |= salt_buf0[0];
930 w0_t[1] |= salt_buf0[1];
931 w0_t[2] |= salt_buf0[2];
932 w0_t[3] |= salt_buf0[3];
933 w1_t[0] |= salt_buf1[0];
934 w1_t[1] |= salt_buf1[1];
935 w1_t[2] |= salt_buf1[2];
936 w1_t[3] |= salt_buf1[3];
958 * precompute key1 since key is static: 0x0123456789abcdef
959 * plus LEFT_ROTATE by 2
1001 * key1 (generate key)
1009 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
1013 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
1014 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
1019 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
1023 * key2 (generate hash)
1026 _des_crypt_keysetup (iv[0], iv[1], Kc, Kd, s_skb);
1031 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
1035 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
1036 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
1041 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
1048 const u32x r0 = iv[0];
1049 const u32x r1 = iv[1];
1053 #include VECT_COMPARE_S
1057 extern "C" __global__ void __launch_bounds__ (256, 1) m03100_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1061 extern "C" __global__ void __launch_bounds__ (256, 1) m03100_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)