2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
24 #include "include/kernel_functions.c"
26 #include "common_nv.c"
29 #define VECT_COMPARE_S "check_single_vect1_comp4.c"
30 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
34 #define VECT_COMPARE_S "check_single_vect2_comp4.c"
35 #define VECT_COMPARE_M "check_multi_vect2_comp4.c"
39 #define VECT_COMPARE_S "check_single_vect2_comp4.c"
40 #define VECT_COMPARE_M "check_multi_vect2_comp4.c"
43 #define PERM_OP(a,b,tt,n,m) \
53 #define HPERM_OP(a,tt,n,m) \
59 tt = tt >> (16 + n); \
65 PERM_OP (r, l, tt, 4, 0x0f0f0f0f); \
66 PERM_OP (l, r, tt, 16, 0x0000ffff); \
67 PERM_OP (r, l, tt, 2, 0x33333333); \
68 PERM_OP (l, r, tt, 8, 0x00ff00ff); \
69 PERM_OP (r, l, tt, 1, 0x55555555); \
74 PERM_OP (l, r, tt, 1, 0x55555555); \
75 PERM_OP (r, l, tt, 8, 0x00ff00ff); \
76 PERM_OP (l, r, tt, 2, 0x33333333); \
77 PERM_OP (r, l, tt, 16, 0x0000ffff); \
78 PERM_OP (l, r, tt, 4, 0x0f0f0f0f); \
81 __device__ __constant__ u32 c_SPtrans[8][64] =
84 0x02080800, 0x00080000, 0x02000002, 0x02080802,
85 0x02000000, 0x00080802, 0x00080002, 0x02000002,
86 0x00080802, 0x02080800, 0x02080000, 0x00000802,
87 0x02000802, 0x02000000, 0x00000000, 0x00080002,
88 0x00080000, 0x00000002, 0x02000800, 0x00080800,
89 0x02080802, 0x02080000, 0x00000802, 0x02000800,
90 0x00000002, 0x00000800, 0x00080800, 0x02080002,
91 0x00000800, 0x02000802, 0x02080002, 0x00000000,
92 0x00000000, 0x02080802, 0x02000800, 0x00080002,
93 0x02080800, 0x00080000, 0x00000802, 0x02000800,
94 0x02080002, 0x00000800, 0x00080800, 0x02000002,
95 0x00080802, 0x00000002, 0x02000002, 0x02080000,
96 0x02080802, 0x00080800, 0x02080000, 0x02000802,
97 0x02000000, 0x00000802, 0x00080002, 0x00000000,
98 0x00080000, 0x02000000, 0x02000802, 0x02080800,
99 0x00000002, 0x02080002, 0x00000800, 0x00080802,
101 0x40108010, 0x00000000, 0x00108000, 0x40100000,
102 0x40000010, 0x00008010, 0x40008000, 0x00108000,
103 0x00008000, 0x40100010, 0x00000010, 0x40008000,
104 0x00100010, 0x40108000, 0x40100000, 0x00000010,
105 0x00100000, 0x40008010, 0x40100010, 0x00008000,
106 0x00108010, 0x40000000, 0x00000000, 0x00100010,
107 0x40008010, 0x00108010, 0x40108000, 0x40000010,
108 0x40000000, 0x00100000, 0x00008010, 0x40108010,
109 0x00100010, 0x40108000, 0x40008000, 0x00108010,
110 0x40108010, 0x00100010, 0x40000010, 0x00000000,
111 0x40000000, 0x00008010, 0x00100000, 0x40100010,
112 0x00008000, 0x40000000, 0x00108010, 0x40008010,
113 0x40108000, 0x00008000, 0x00000000, 0x40000010,
114 0x00000010, 0x40108010, 0x00108000, 0x40100000,
115 0x40100010, 0x00100000, 0x00008010, 0x40008000,
116 0x40008010, 0x00000010, 0x40100000, 0x00108000,
118 0x04000001, 0x04040100, 0x00000100, 0x04000101,
119 0x00040001, 0x04000000, 0x04000101, 0x00040100,
120 0x04000100, 0x00040000, 0x04040000, 0x00000001,
121 0x04040101, 0x00000101, 0x00000001, 0x04040001,
122 0x00000000, 0x00040001, 0x04040100, 0x00000100,
123 0x00000101, 0x04040101, 0x00040000, 0x04000001,
124 0x04040001, 0x04000100, 0x00040101, 0x04040000,
125 0x00040100, 0x00000000, 0x04000000, 0x00040101,
126 0x04040100, 0x00000100, 0x00000001, 0x00040000,
127 0x00000101, 0x00040001, 0x04040000, 0x04000101,
128 0x00000000, 0x04040100, 0x00040100, 0x04040001,
129 0x00040001, 0x04000000, 0x04040101, 0x00000001,
130 0x00040101, 0x04000001, 0x04000000, 0x04040101,
131 0x00040000, 0x04000100, 0x04000101, 0x00040100,
132 0x04000100, 0x00000000, 0x04040001, 0x00000101,
133 0x04000001, 0x00040101, 0x00000100, 0x04040000,
135 0x00401008, 0x10001000, 0x00000008, 0x10401008,
136 0x00000000, 0x10400000, 0x10001008, 0x00400008,
137 0x10401000, 0x10000008, 0x10000000, 0x00001008,
138 0x10000008, 0x00401008, 0x00400000, 0x10000000,
139 0x10400008, 0x00401000, 0x00001000, 0x00000008,
140 0x00401000, 0x10001008, 0x10400000, 0x00001000,
141 0x00001008, 0x00000000, 0x00400008, 0x10401000,
142 0x10001000, 0x10400008, 0x10401008, 0x00400000,
143 0x10400008, 0x00001008, 0x00400000, 0x10000008,
144 0x00401000, 0x10001000, 0x00000008, 0x10400000,
145 0x10001008, 0x00000000, 0x00001000, 0x00400008,
146 0x00000000, 0x10400008, 0x10401000, 0x00001000,
147 0x10000000, 0x10401008, 0x00401008, 0x00400000,
148 0x10401008, 0x00000008, 0x10001000, 0x00401008,
149 0x00400008, 0x00401000, 0x10400000, 0x10001008,
150 0x00001008, 0x10000000, 0x10000008, 0x10401000,
152 0x08000000, 0x00010000, 0x00000400, 0x08010420,
153 0x08010020, 0x08000400, 0x00010420, 0x08010000,
154 0x00010000, 0x00000020, 0x08000020, 0x00010400,
155 0x08000420, 0x08010020, 0x08010400, 0x00000000,
156 0x00010400, 0x08000000, 0x00010020, 0x00000420,
157 0x08000400, 0x00010420, 0x00000000, 0x08000020,
158 0x00000020, 0x08000420, 0x08010420, 0x00010020,
159 0x08010000, 0x00000400, 0x00000420, 0x08010400,
160 0x08010400, 0x08000420, 0x00010020, 0x08010000,
161 0x00010000, 0x00000020, 0x08000020, 0x08000400,
162 0x08000000, 0x00010400, 0x08010420, 0x00000000,
163 0x00010420, 0x08000000, 0x00000400, 0x00010020,
164 0x08000420, 0x00000400, 0x00000000, 0x08010420,
165 0x08010020, 0x08010400, 0x00000420, 0x00010000,
166 0x00010400, 0x08010020, 0x08000400, 0x00000420,
167 0x00000020, 0x00010420, 0x08010000, 0x08000020,
169 0x80000040, 0x00200040, 0x00000000, 0x80202000,
170 0x00200040, 0x00002000, 0x80002040, 0x00200000,
171 0x00002040, 0x80202040, 0x00202000, 0x80000000,
172 0x80002000, 0x80000040, 0x80200000, 0x00202040,
173 0x00200000, 0x80002040, 0x80200040, 0x00000000,
174 0x00002000, 0x00000040, 0x80202000, 0x80200040,
175 0x80202040, 0x80200000, 0x80000000, 0x00002040,
176 0x00000040, 0x00202000, 0x00202040, 0x80002000,
177 0x00002040, 0x80000000, 0x80002000, 0x00202040,
178 0x80202000, 0x00200040, 0x00000000, 0x80002000,
179 0x80000000, 0x00002000, 0x80200040, 0x00200000,
180 0x00200040, 0x80202040, 0x00202000, 0x00000040,
181 0x80202040, 0x00202000, 0x00200000, 0x80002040,
182 0x80000040, 0x80200000, 0x00202040, 0x00000000,
183 0x00002000, 0x80000040, 0x80002040, 0x80202000,
184 0x80200000, 0x00002040, 0x00000040, 0x80200040,
186 0x00004000, 0x00000200, 0x01000200, 0x01000004,
187 0x01004204, 0x00004004, 0x00004200, 0x00000000,
188 0x01000000, 0x01000204, 0x00000204, 0x01004000,
189 0x00000004, 0x01004200, 0x01004000, 0x00000204,
190 0x01000204, 0x00004000, 0x00004004, 0x01004204,
191 0x00000000, 0x01000200, 0x01000004, 0x00004200,
192 0x01004004, 0x00004204, 0x01004200, 0x00000004,
193 0x00004204, 0x01004004, 0x00000200, 0x01000000,
194 0x00004204, 0x01004000, 0x01004004, 0x00000204,
195 0x00004000, 0x00000200, 0x01000000, 0x01004004,
196 0x01000204, 0x00004204, 0x00004200, 0x00000000,
197 0x00000200, 0x01000004, 0x00000004, 0x01000200,
198 0x00000000, 0x01000204, 0x01000200, 0x00004200,
199 0x00000204, 0x00004000, 0x01004204, 0x01000000,
200 0x01004200, 0x00000004, 0x00004004, 0x01004204,
201 0x01000004, 0x01004200, 0x01004000, 0x00004004,
203 0x20800080, 0x20820000, 0x00020080, 0x00000000,
204 0x20020000, 0x00800080, 0x20800000, 0x20820080,
205 0x00000080, 0x20000000, 0x00820000, 0x00020080,
206 0x00820080, 0x20020080, 0x20000080, 0x20800000,
207 0x00020000, 0x00820080, 0x00800080, 0x20020000,
208 0x20820080, 0x20000080, 0x00000000, 0x00820000,
209 0x20000000, 0x00800000, 0x20020080, 0x20800080,
210 0x00800000, 0x00020000, 0x20820000, 0x00000080,
211 0x00800000, 0x00020000, 0x20000080, 0x20820080,
212 0x00020080, 0x20000000, 0x00000000, 0x00820000,
213 0x20800080, 0x20020080, 0x20020000, 0x00800080,
214 0x20820000, 0x00000080, 0x00800080, 0x20020000,
215 0x20820080, 0x00800000, 0x20800000, 0x20000080,
216 0x00820000, 0x00020080, 0x20020080, 0x20800000,
217 0x00000080, 0x20820000, 0x00820080, 0x00000000,
218 0x20000000, 0x20800080, 0x00020000, 0x00820080,
221 __device__ __constant__ u32 c_skb[8][64] =
223 /* for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 */
224 0x00000000, 0x00000010, 0x20000000, 0x20000010,
225 0x00010000, 0x00010010, 0x20010000, 0x20010010,
226 0x00000800, 0x00000810, 0x20000800, 0x20000810,
227 0x00010800, 0x00010810, 0x20010800, 0x20010810,
228 0x00000020, 0x00000030, 0x20000020, 0x20000030,
229 0x00010020, 0x00010030, 0x20010020, 0x20010030,
230 0x00000820, 0x00000830, 0x20000820, 0x20000830,
231 0x00010820, 0x00010830, 0x20010820, 0x20010830,
232 0x00080000, 0x00080010, 0x20080000, 0x20080010,
233 0x00090000, 0x00090010, 0x20090000, 0x20090010,
234 0x00080800, 0x00080810, 0x20080800, 0x20080810,
235 0x00090800, 0x00090810, 0x20090800, 0x20090810,
236 0x00080020, 0x00080030, 0x20080020, 0x20080030,
237 0x00090020, 0x00090030, 0x20090020, 0x20090030,
238 0x00080820, 0x00080830, 0x20080820, 0x20080830,
239 0x00090820, 0x00090830, 0x20090820, 0x20090830,
240 /* for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 */
241 0x00000000, 0x02000000, 0x00002000, 0x02002000,
242 0x00200000, 0x02200000, 0x00202000, 0x02202000,
243 0x00000004, 0x02000004, 0x00002004, 0x02002004,
244 0x00200004, 0x02200004, 0x00202004, 0x02202004,
245 0x00000400, 0x02000400, 0x00002400, 0x02002400,
246 0x00200400, 0x02200400, 0x00202400, 0x02202400,
247 0x00000404, 0x02000404, 0x00002404, 0x02002404,
248 0x00200404, 0x02200404, 0x00202404, 0x02202404,
249 0x10000000, 0x12000000, 0x10002000, 0x12002000,
250 0x10200000, 0x12200000, 0x10202000, 0x12202000,
251 0x10000004, 0x12000004, 0x10002004, 0x12002004,
252 0x10200004, 0x12200004, 0x10202004, 0x12202004,
253 0x10000400, 0x12000400, 0x10002400, 0x12002400,
254 0x10200400, 0x12200400, 0x10202400, 0x12202400,
255 0x10000404, 0x12000404, 0x10002404, 0x12002404,
256 0x10200404, 0x12200404, 0x10202404, 0x12202404,
257 /* for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 */
258 0x00000000, 0x00000001, 0x00040000, 0x00040001,
259 0x01000000, 0x01000001, 0x01040000, 0x01040001,
260 0x00000002, 0x00000003, 0x00040002, 0x00040003,
261 0x01000002, 0x01000003, 0x01040002, 0x01040003,
262 0x00000200, 0x00000201, 0x00040200, 0x00040201,
263 0x01000200, 0x01000201, 0x01040200, 0x01040201,
264 0x00000202, 0x00000203, 0x00040202, 0x00040203,
265 0x01000202, 0x01000203, 0x01040202, 0x01040203,
266 0x08000000, 0x08000001, 0x08040000, 0x08040001,
267 0x09000000, 0x09000001, 0x09040000, 0x09040001,
268 0x08000002, 0x08000003, 0x08040002, 0x08040003,
269 0x09000002, 0x09000003, 0x09040002, 0x09040003,
270 0x08000200, 0x08000201, 0x08040200, 0x08040201,
271 0x09000200, 0x09000201, 0x09040200, 0x09040201,
272 0x08000202, 0x08000203, 0x08040202, 0x08040203,
273 0x09000202, 0x09000203, 0x09040202, 0x09040203,
274 /* for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 */
275 0x00000000, 0x00100000, 0x00000100, 0x00100100,
276 0x00000008, 0x00100008, 0x00000108, 0x00100108,
277 0x00001000, 0x00101000, 0x00001100, 0x00101100,
278 0x00001008, 0x00101008, 0x00001108, 0x00101108,
279 0x04000000, 0x04100000, 0x04000100, 0x04100100,
280 0x04000008, 0x04100008, 0x04000108, 0x04100108,
281 0x04001000, 0x04101000, 0x04001100, 0x04101100,
282 0x04001008, 0x04101008, 0x04001108, 0x04101108,
283 0x00020000, 0x00120000, 0x00020100, 0x00120100,
284 0x00020008, 0x00120008, 0x00020108, 0x00120108,
285 0x00021000, 0x00121000, 0x00021100, 0x00121100,
286 0x00021008, 0x00121008, 0x00021108, 0x00121108,
287 0x04020000, 0x04120000, 0x04020100, 0x04120100,
288 0x04020008, 0x04120008, 0x04020108, 0x04120108,
289 0x04021000, 0x04121000, 0x04021100, 0x04121100,
290 0x04021008, 0x04121008, 0x04021108, 0x04121108,
291 /* for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 */
292 0x00000000, 0x10000000, 0x00010000, 0x10010000,
293 0x00000004, 0x10000004, 0x00010004, 0x10010004,
294 0x20000000, 0x30000000, 0x20010000, 0x30010000,
295 0x20000004, 0x30000004, 0x20010004, 0x30010004,
296 0x00100000, 0x10100000, 0x00110000, 0x10110000,
297 0x00100004, 0x10100004, 0x00110004, 0x10110004,
298 0x20100000, 0x30100000, 0x20110000, 0x30110000,
299 0x20100004, 0x30100004, 0x20110004, 0x30110004,
300 0x00001000, 0x10001000, 0x00011000, 0x10011000,
301 0x00001004, 0x10001004, 0x00011004, 0x10011004,
302 0x20001000, 0x30001000, 0x20011000, 0x30011000,
303 0x20001004, 0x30001004, 0x20011004, 0x30011004,
304 0x00101000, 0x10101000, 0x00111000, 0x10111000,
305 0x00101004, 0x10101004, 0x00111004, 0x10111004,
306 0x20101000, 0x30101000, 0x20111000, 0x30111000,
307 0x20101004, 0x30101004, 0x20111004, 0x30111004,
308 /* for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 */
309 0x00000000, 0x08000000, 0x00000008, 0x08000008,
310 0x00000400, 0x08000400, 0x00000408, 0x08000408,
311 0x00020000, 0x08020000, 0x00020008, 0x08020008,
312 0x00020400, 0x08020400, 0x00020408, 0x08020408,
313 0x00000001, 0x08000001, 0x00000009, 0x08000009,
314 0x00000401, 0x08000401, 0x00000409, 0x08000409,
315 0x00020001, 0x08020001, 0x00020009, 0x08020009,
316 0x00020401, 0x08020401, 0x00020409, 0x08020409,
317 0x02000000, 0x0A000000, 0x02000008, 0x0A000008,
318 0x02000400, 0x0A000400, 0x02000408, 0x0A000408,
319 0x02020000, 0x0A020000, 0x02020008, 0x0A020008,
320 0x02020400, 0x0A020400, 0x02020408, 0x0A020408,
321 0x02000001, 0x0A000001, 0x02000009, 0x0A000009,
322 0x02000401, 0x0A000401, 0x02000409, 0x0A000409,
323 0x02020001, 0x0A020001, 0x02020009, 0x0A020009,
324 0x02020401, 0x0A020401, 0x02020409, 0x0A020409,
325 /* for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 */
326 0x00000000, 0x00000100, 0x00080000, 0x00080100,
327 0x01000000, 0x01000100, 0x01080000, 0x01080100,
328 0x00000010, 0x00000110, 0x00080010, 0x00080110,
329 0x01000010, 0x01000110, 0x01080010, 0x01080110,
330 0x00200000, 0x00200100, 0x00280000, 0x00280100,
331 0x01200000, 0x01200100, 0x01280000, 0x01280100,
332 0x00200010, 0x00200110, 0x00280010, 0x00280110,
333 0x01200010, 0x01200110, 0x01280010, 0x01280110,
334 0x00000200, 0x00000300, 0x00080200, 0x00080300,
335 0x01000200, 0x01000300, 0x01080200, 0x01080300,
336 0x00000210, 0x00000310, 0x00080210, 0x00080310,
337 0x01000210, 0x01000310, 0x01080210, 0x01080310,
338 0x00200200, 0x00200300, 0x00280200, 0x00280300,
339 0x01200200, 0x01200300, 0x01280200, 0x01280300,
340 0x00200210, 0x00200310, 0x00280210, 0x00280310,
341 0x01200210, 0x01200310, 0x01280210, 0x01280310,
342 /* for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 */
343 0x00000000, 0x04000000, 0x00040000, 0x04040000,
344 0x00000002, 0x04000002, 0x00040002, 0x04040002,
345 0x00002000, 0x04002000, 0x00042000, 0x04042000,
346 0x00002002, 0x04002002, 0x00042002, 0x04042002,
347 0x00000020, 0x04000020, 0x00040020, 0x04040020,
348 0x00000022, 0x04000022, 0x00040022, 0x04040022,
349 0x00002020, 0x04002020, 0x00042020, 0x04042020,
350 0x00002022, 0x04002022, 0x00042022, 0x04042022,
351 0x00000800, 0x04000800, 0x00040800, 0x04040800,
352 0x00000802, 0x04000802, 0x00040802, 0x04040802,
353 0x00002800, 0x04002800, 0x00042800, 0x04042800,
354 0x00002802, 0x04002802, 0x00042802, 0x04042802,
355 0x00000820, 0x04000820, 0x00040820, 0x04040820,
356 0x00000822, 0x04000822, 0x00040822, 0x04040822,
357 0x00002820, 0x04002820, 0x00042820, 0x04042820,
358 0x00002822, 0x04002822, 0x00042822, 0x04042822
362 #define BOX(i,n,S) u32x ((S)[(n)][(i)])
366 #define BOX(i,n,S) u32x ((S)[(n)][(i).x], (S)[(n)][(i).y])
369 __device__ static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], u32 s_SPtrans[8][64])
382 for (int i = 0; i < 16; i++)
385 u32x t = Kd[i] ^ rotl32 (r, 28u);
387 l ^= BOX (((u >> 2) & 0x3f), 0, s_SPtrans)
388 | BOX (((u >> 10) & 0x3f), 2, s_SPtrans)
389 | BOX (((u >> 18) & 0x3f), 4, s_SPtrans)
390 | BOX (((u >> 26) & 0x3f), 6, s_SPtrans)
391 | BOX (((t >> 2) & 0x3f), 1, s_SPtrans)
392 | BOX (((t >> 10) & 0x3f), 3, s_SPtrans)
393 | BOX (((t >> 18) & 0x3f), 5, s_SPtrans)
394 | BOX (((t >> 26) & 0x3f), 7, s_SPtrans);
410 __device__ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], u32 s_skb[8][64])
414 PERM_OP (d, c, tt, 4, 0x0f0f0f0f);
415 HPERM_OP (c, tt, 2, 0xcccc0000);
416 HPERM_OP (d, tt, 2, 0xcccc0000);
417 PERM_OP (d, c, tt, 1, 0x55555555);
418 PERM_OP (c, d, tt, 8, 0x00ff00ff);
419 PERM_OP (d, c, tt, 1, 0x55555555);
421 d = ((d & 0x000000ff) << 16)
422 | ((d & 0x0000ff00) << 0)
423 | ((d & 0x00ff0000) >> 16)
424 | ((c & 0xf0000000) >> 4);
429 for (u32 i = 0; i < 16; i++)
431 const u32 shifts3s0[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 };
432 const u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, 26, 26, 26, 27 };
434 c = c >> shifts3s0[i] | c << shifts3s1[i];
435 d = d >> shifts3s0[i] | d << shifts3s1[i];
440 u32x s = BOX ((( c >> 0) & 0x3f), 0, s_skb)
441 | BOX ((((c >> 6) & 0x03)
442 | ((c >> 7) & 0x3c)), 1, s_skb)
443 | BOX ((((c >> 13) & 0x0f)
444 | ((c >> 14) & 0x30)), 2, s_skb)
445 | BOX ((((c >> 20) & 0x01)
447 | ((c >> 22) & 0x38)), 3, s_skb);
449 u32x t = BOX ((( d >> 0) & 0x3f), 4, s_skb)
450 | BOX ((((d >> 7) & 0x03)
451 | ((d >> 8) & 0x3c)), 5, s_skb)
452 | BOX ((((d >> 15) & 0x3f)), 6, s_skb)
453 | BOX ((((d >> 21) & 0x0f)
454 | ((d >> 22) & 0x30)), 7, s_skb);
456 #if __CUDA_ARCH__ >= 200
457 Kc[i] = __byte_perm (s, t, 0x5410);
458 Kd[i] = __byte_perm (s, t, 0x7632);
460 Kc[i] = ((t << 16) | (s & 0x0000ffff));
461 Kd[i] = ((s >> 16) | (t & 0xffff0000));
464 Kc[i] = rotl32 (Kc[i], 2u);
465 Kd[i] = rotl32 (Kd[i], 2u);
469 __shared__ u32 s_SPtrans[8][64];
471 __shared__ u32 s_skb[8][64];
473 __device__ __constant__ bf_t c_bfs[1024];
475 __device__ static void m03100m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
481 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
482 const u32 lid = threadIdx.x;
490 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
491 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
492 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
493 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
497 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
498 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
499 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
500 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
509 const u32 salt_len = salt_bufs[salt_pos].salt_len;
517 const u32 salt_word_len = (salt_len + pw_len) * 2;
519 for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
521 const u32 w0r = c_bfs[il_pos].i;
551 switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
553 w0_t[0] |= salt_buf0[0];
554 w0_t[1] |= salt_buf0[1];
555 w0_t[2] |= salt_buf0[2];
556 w0_t[3] |= salt_buf0[3];
557 w1_t[0] |= salt_buf1[0];
558 w1_t[1] |= salt_buf1[1];
559 w1_t[2] |= salt_buf1[2];
560 w1_t[3] |= salt_buf1[3];
561 w2_t[0] |= salt_buf2[0];
562 w2_t[1] |= salt_buf2[1];
563 w2_t[2] |= salt_buf2[2];
564 w2_t[3] |= salt_buf2[3];
590 * precompute key1 since key is static: 0x0123456789abcdef
591 * plus LEFT_ROTATE by 2
633 * key1 (generate key)
641 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
645 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
646 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
651 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
655 * key2 (generate hash)
658 _des_crypt_keysetup (iv[0], iv[1], Kc, Kd, s_skb);
663 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
667 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
668 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
673 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
680 const u32x r0 = iv[0];
681 const u32x r1 = iv[1];
685 #include VECT_COMPARE_M
689 __device__ static void m03100s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
695 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
696 const u32 lid = threadIdx.x;
704 salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
705 salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
706 salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
707 salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
711 salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
712 salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
713 salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
714 salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
723 const u32 salt_len = salt_bufs[salt_pos].salt_len;
729 const u32 search[4] =
731 digests_buf[digests_offset].digest_buf[DGST_R0],
732 digests_buf[digests_offset].digest_buf[DGST_R1],
733 digests_buf[digests_offset].digest_buf[DGST_R2],
734 digests_buf[digests_offset].digest_buf[DGST_R3]
743 const u32 salt_word_len = (salt_len + pw_len) * 2;
745 for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
747 const u32 w0r = c_bfs[il_pos].i;
777 switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
779 w0_t[0] |= salt_buf0[0];
780 w0_t[1] |= salt_buf0[1];
781 w0_t[2] |= salt_buf0[2];
782 w0_t[3] |= salt_buf0[3];
783 w1_t[0] |= salt_buf1[0];
784 w1_t[1] |= salt_buf1[1];
785 w1_t[2] |= salt_buf1[2];
786 w1_t[3] |= salt_buf1[3];
787 w2_t[0] |= salt_buf2[0];
788 w2_t[1] |= salt_buf2[1];
789 w2_t[2] |= salt_buf2[2];
790 w2_t[3] |= salt_buf2[3];
816 * precompute key1 since key is static: 0x0123456789abcdef
817 * plus LEFT_ROTATE by 2
859 * key1 (generate key)
867 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
871 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
872 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
877 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
881 * key2 (generate hash)
884 _des_crypt_keysetup (iv[0], iv[1], Kc, Kd, s_skb);
889 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++)
893 data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00);
894 data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00);
899 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
906 const u32x r0 = iv[0];
907 const u32x r1 = iv[1];
911 #include VECT_COMPARE_S
915 extern "C" __global__ void __launch_bounds__ (256, 1) m03100_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
921 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
922 const u32 lid = threadIdx.x;
926 w0[0] = pws[gid].i[ 0];
927 w0[1] = pws[gid].i[ 1];
928 w0[2] = pws[gid].i[ 2];
929 w0[3] = pws[gid].i[ 3];
952 const u32 pw_len = pws[gid].pw_len;
960 s_SPtrans[0][lid] = c_SPtrans[0][lid];
961 s_SPtrans[1][lid] = c_SPtrans[1][lid];
962 s_SPtrans[2][lid] = c_SPtrans[2][lid];
963 s_SPtrans[3][lid] = c_SPtrans[3][lid];
964 s_SPtrans[4][lid] = c_SPtrans[4][lid];
965 s_SPtrans[5][lid] = c_SPtrans[5][lid];
966 s_SPtrans[6][lid] = c_SPtrans[6][lid];
967 s_SPtrans[7][lid] = c_SPtrans[7][lid];
969 s_skb[0][lid] = c_skb[0][lid];
970 s_skb[1][lid] = c_skb[1][lid];
971 s_skb[2][lid] = c_skb[2][lid];
972 s_skb[3][lid] = c_skb[3][lid];
973 s_skb[4][lid] = c_skb[4][lid];
974 s_skb[5][lid] = c_skb[5][lid];
975 s_skb[6][lid] = c_skb[6][lid];
976 s_skb[7][lid] = c_skb[7][lid];
981 if (gid >= gid_max) return;
987 m03100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
990 extern "C" __global__ void __launch_bounds__ (256, 1) m03100_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
996 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
997 const u32 lid = threadIdx.x;
1001 w0[0] = pws[gid].i[ 0];
1002 w0[1] = pws[gid].i[ 1];
1003 w0[2] = pws[gid].i[ 2];
1004 w0[3] = pws[gid].i[ 3];
1008 w1[0] = pws[gid].i[ 4];
1009 w1[1] = pws[gid].i[ 5];
1010 w1[2] = pws[gid].i[ 6];
1011 w1[3] = pws[gid].i[ 7];
1027 const u32 pw_len = pws[gid].pw_len;
1035 s_SPtrans[0][lid] = c_SPtrans[0][lid];
1036 s_SPtrans[1][lid] = c_SPtrans[1][lid];
1037 s_SPtrans[2][lid] = c_SPtrans[2][lid];
1038 s_SPtrans[3][lid] = c_SPtrans[3][lid];
1039 s_SPtrans[4][lid] = c_SPtrans[4][lid];
1040 s_SPtrans[5][lid] = c_SPtrans[5][lid];
1041 s_SPtrans[6][lid] = c_SPtrans[6][lid];
1042 s_SPtrans[7][lid] = c_SPtrans[7][lid];
1044 s_skb[0][lid] = c_skb[0][lid];
1045 s_skb[1][lid] = c_skb[1][lid];
1046 s_skb[2][lid] = c_skb[2][lid];
1047 s_skb[3][lid] = c_skb[3][lid];
1048 s_skb[4][lid] = c_skb[4][lid];
1049 s_skb[5][lid] = c_skb[5][lid];
1050 s_skb[6][lid] = c_skb[6][lid];
1051 s_skb[7][lid] = c_skb[7][lid];
1056 if (gid >= gid_max) return;
1062 m03100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
1065 extern "C" __global__ void __launch_bounds__ (256, 1) m03100_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1071 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
1072 const u32 lid = threadIdx.x;
1076 w0[0] = pws[gid].i[ 0];
1077 w0[1] = pws[gid].i[ 1];
1078 w0[2] = pws[gid].i[ 2];
1079 w0[3] = pws[gid].i[ 3];
1083 w1[0] = pws[gid].i[ 4];
1084 w1[1] = pws[gid].i[ 5];
1085 w1[2] = pws[gid].i[ 6];
1086 w1[3] = pws[gid].i[ 7];
1090 w2[0] = pws[gid].i[ 8];
1091 w2[1] = pws[gid].i[ 9];
1092 w2[2] = pws[gid].i[10];
1093 w2[3] = pws[gid].i[11];
1097 w3[0] = pws[gid].i[12];
1098 w3[1] = pws[gid].i[13];
1102 const u32 pw_len = pws[gid].pw_len;
1110 s_SPtrans[0][lid] = c_SPtrans[0][lid];
1111 s_SPtrans[1][lid] = c_SPtrans[1][lid];
1112 s_SPtrans[2][lid] = c_SPtrans[2][lid];
1113 s_SPtrans[3][lid] = c_SPtrans[3][lid];
1114 s_SPtrans[4][lid] = c_SPtrans[4][lid];
1115 s_SPtrans[5][lid] = c_SPtrans[5][lid];
1116 s_SPtrans[6][lid] = c_SPtrans[6][lid];
1117 s_SPtrans[7][lid] = c_SPtrans[7][lid];
1119 s_skb[0][lid] = c_skb[0][lid];
1120 s_skb[1][lid] = c_skb[1][lid];
1121 s_skb[2][lid] = c_skb[2][lid];
1122 s_skb[3][lid] = c_skb[3][lid];
1123 s_skb[4][lid] = c_skb[4][lid];
1124 s_skb[5][lid] = c_skb[5][lid];
1125 s_skb[6][lid] = c_skb[6][lid];
1126 s_skb[7][lid] = c_skb[7][lid];
1131 if (gid >= gid_max) return;
1137 m03100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
1140 extern "C" __global__ void __launch_bounds__ (256, 1) m03100_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1146 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
1147 const u32 lid = threadIdx.x;
1151 w0[0] = pws[gid].i[ 0];
1152 w0[1] = pws[gid].i[ 1];
1153 w0[2] = pws[gid].i[ 2];
1154 w0[3] = pws[gid].i[ 3];
1177 const u32 pw_len = pws[gid].pw_len;
1185 s_SPtrans[0][lid] = c_SPtrans[0][lid];
1186 s_SPtrans[1][lid] = c_SPtrans[1][lid];
1187 s_SPtrans[2][lid] = c_SPtrans[2][lid];
1188 s_SPtrans[3][lid] = c_SPtrans[3][lid];
1189 s_SPtrans[4][lid] = c_SPtrans[4][lid];
1190 s_SPtrans[5][lid] = c_SPtrans[5][lid];
1191 s_SPtrans[6][lid] = c_SPtrans[6][lid];
1192 s_SPtrans[7][lid] = c_SPtrans[7][lid];
1194 s_skb[0][lid] = c_skb[0][lid];
1195 s_skb[1][lid] = c_skb[1][lid];
1196 s_skb[2][lid] = c_skb[2][lid];
1197 s_skb[3][lid] = c_skb[3][lid];
1198 s_skb[4][lid] = c_skb[4][lid];
1199 s_skb[5][lid] = c_skb[5][lid];
1200 s_skb[6][lid] = c_skb[6][lid];
1201 s_skb[7][lid] = c_skb[7][lid];
1206 if (gid >= gid_max) return;
1212 m03100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
1215 extern "C" __global__ void __launch_bounds__ (256, 1) m03100_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1221 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
1222 const u32 lid = threadIdx.x;
1226 w0[0] = pws[gid].i[ 0];
1227 w0[1] = pws[gid].i[ 1];
1228 w0[2] = pws[gid].i[ 2];
1229 w0[3] = pws[gid].i[ 3];
1233 w1[0] = pws[gid].i[ 4];
1234 w1[1] = pws[gid].i[ 5];
1235 w1[2] = pws[gid].i[ 6];
1236 w1[3] = pws[gid].i[ 7];
1252 const u32 pw_len = pws[gid].pw_len;
1260 s_SPtrans[0][lid] = c_SPtrans[0][lid];
1261 s_SPtrans[1][lid] = c_SPtrans[1][lid];
1262 s_SPtrans[2][lid] = c_SPtrans[2][lid];
1263 s_SPtrans[3][lid] = c_SPtrans[3][lid];
1264 s_SPtrans[4][lid] = c_SPtrans[4][lid];
1265 s_SPtrans[5][lid] = c_SPtrans[5][lid];
1266 s_SPtrans[6][lid] = c_SPtrans[6][lid];
1267 s_SPtrans[7][lid] = c_SPtrans[7][lid];
1269 s_skb[0][lid] = c_skb[0][lid];
1270 s_skb[1][lid] = c_skb[1][lid];
1271 s_skb[2][lid] = c_skb[2][lid];
1272 s_skb[3][lid] = c_skb[3][lid];
1273 s_skb[4][lid] = c_skb[4][lid];
1274 s_skb[5][lid] = c_skb[5][lid];
1275 s_skb[6][lid] = c_skb[6][lid];
1276 s_skb[7][lid] = c_skb[7][lid];
1281 if (gid >= gid_max) return;
1287 m03100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
1290 extern "C" __global__ void __launch_bounds__ (256, 1) m03100_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
1296 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
1297 const u32 lid = threadIdx.x;
1301 w0[0] = pws[gid].i[ 0];
1302 w0[1] = pws[gid].i[ 1];
1303 w0[2] = pws[gid].i[ 2];
1304 w0[3] = pws[gid].i[ 3];
1308 w1[0] = pws[gid].i[ 4];
1309 w1[1] = pws[gid].i[ 5];
1310 w1[2] = pws[gid].i[ 6];
1311 w1[3] = pws[gid].i[ 7];
1315 w2[0] = pws[gid].i[ 8];
1316 w2[1] = pws[gid].i[ 9];
1317 w2[2] = pws[gid].i[10];
1318 w2[3] = pws[gid].i[11];
1322 w3[0] = pws[gid].i[12];
1323 w3[1] = pws[gid].i[13];
1327 const u32 pw_len = pws[gid].pw_len;
1335 s_SPtrans[0][lid] = c_SPtrans[0][lid];
1336 s_SPtrans[1][lid] = c_SPtrans[1][lid];
1337 s_SPtrans[2][lid] = c_SPtrans[2][lid];
1338 s_SPtrans[3][lid] = c_SPtrans[3][lid];
1339 s_SPtrans[4][lid] = c_SPtrans[4][lid];
1340 s_SPtrans[5][lid] = c_SPtrans[5][lid];
1341 s_SPtrans[6][lid] = c_SPtrans[6][lid];
1342 s_SPtrans[7][lid] = c_SPtrans[7][lid];
1344 s_skb[0][lid] = c_skb[0][lid];
1345 s_skb[1][lid] = c_skb[1][lid];
1346 s_skb[2][lid] = c_skb[2][lid];
1347 s_skb[3][lid] = c_skb[3][lid];
1348 s_skb[4][lid] = c_skb[4][lid];
1349 s_skb[5][lid] = c_skb[5][lid];
1350 s_skb[6][lid] = c_skb[6][lid];
1351 s_skb[7][lid] = c_skb[7][lid];
1356 if (gid >= gid_max) return;
1362 m03100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);