2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
24 #include "include/kernel_functions.c"
26 #include "common_nv.c"
27 #include "include/rp_gpu.h"
31 #define VECT_COMPARE_S "check_single_vect1_comp4.c"
32 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
36 #define VECT_COMPARE_S "check_single_vect2_comp4.c"
37 #define VECT_COMPARE_M "check_multi_vect2_comp4.c"
41 #define VECT_COMPARE_S "check_single_vect2_comp4.c"
42 #define VECT_COMPARE_M "check_multi_vect2_comp4.c"
45 #define PERM_OP(a,b,tt,n,m) \
55 #define HPERM_OP(a,tt,n,m) \
61 tt = tt >> (16 + n); \
65 __device__ __constant__ u32 c_SPtrans[8][64] =
68 0x02080800, 0x00080000, 0x02000002, 0x02080802,
69 0x02000000, 0x00080802, 0x00080002, 0x02000002,
70 0x00080802, 0x02080800, 0x02080000, 0x00000802,
71 0x02000802, 0x02000000, 0x00000000, 0x00080002,
72 0x00080000, 0x00000002, 0x02000800, 0x00080800,
73 0x02080802, 0x02080000, 0x00000802, 0x02000800,
74 0x00000002, 0x00000800, 0x00080800, 0x02080002,
75 0x00000800, 0x02000802, 0x02080002, 0x00000000,
76 0x00000000, 0x02080802, 0x02000800, 0x00080002,
77 0x02080800, 0x00080000, 0x00000802, 0x02000800,
78 0x02080002, 0x00000800, 0x00080800, 0x02000002,
79 0x00080802, 0x00000002, 0x02000002, 0x02080000,
80 0x02080802, 0x00080800, 0x02080000, 0x02000802,
81 0x02000000, 0x00000802, 0x00080002, 0x00000000,
82 0x00080000, 0x02000000, 0x02000802, 0x02080800,
83 0x00000002, 0x02080002, 0x00000800, 0x00080802,
85 0x40108010, 0x00000000, 0x00108000, 0x40100000,
86 0x40000010, 0x00008010, 0x40008000, 0x00108000,
87 0x00008000, 0x40100010, 0x00000010, 0x40008000,
88 0x00100010, 0x40108000, 0x40100000, 0x00000010,
89 0x00100000, 0x40008010, 0x40100010, 0x00008000,
90 0x00108010, 0x40000000, 0x00000000, 0x00100010,
91 0x40008010, 0x00108010, 0x40108000, 0x40000010,
92 0x40000000, 0x00100000, 0x00008010, 0x40108010,
93 0x00100010, 0x40108000, 0x40008000, 0x00108010,
94 0x40108010, 0x00100010, 0x40000010, 0x00000000,
95 0x40000000, 0x00008010, 0x00100000, 0x40100010,
96 0x00008000, 0x40000000, 0x00108010, 0x40008010,
97 0x40108000, 0x00008000, 0x00000000, 0x40000010,
98 0x00000010, 0x40108010, 0x00108000, 0x40100000,
99 0x40100010, 0x00100000, 0x00008010, 0x40008000,
100 0x40008010, 0x00000010, 0x40100000, 0x00108000,
102 0x04000001, 0x04040100, 0x00000100, 0x04000101,
103 0x00040001, 0x04000000, 0x04000101, 0x00040100,
104 0x04000100, 0x00040000, 0x04040000, 0x00000001,
105 0x04040101, 0x00000101, 0x00000001, 0x04040001,
106 0x00000000, 0x00040001, 0x04040100, 0x00000100,
107 0x00000101, 0x04040101, 0x00040000, 0x04000001,
108 0x04040001, 0x04000100, 0x00040101, 0x04040000,
109 0x00040100, 0x00000000, 0x04000000, 0x00040101,
110 0x04040100, 0x00000100, 0x00000001, 0x00040000,
111 0x00000101, 0x00040001, 0x04040000, 0x04000101,
112 0x00000000, 0x04040100, 0x00040100, 0x04040001,
113 0x00040001, 0x04000000, 0x04040101, 0x00000001,
114 0x00040101, 0x04000001, 0x04000000, 0x04040101,
115 0x00040000, 0x04000100, 0x04000101, 0x00040100,
116 0x04000100, 0x00000000, 0x04040001, 0x00000101,
117 0x04000001, 0x00040101, 0x00000100, 0x04040000,
119 0x00401008, 0x10001000, 0x00000008, 0x10401008,
120 0x00000000, 0x10400000, 0x10001008, 0x00400008,
121 0x10401000, 0x10000008, 0x10000000, 0x00001008,
122 0x10000008, 0x00401008, 0x00400000, 0x10000000,
123 0x10400008, 0x00401000, 0x00001000, 0x00000008,
124 0x00401000, 0x10001008, 0x10400000, 0x00001000,
125 0x00001008, 0x00000000, 0x00400008, 0x10401000,
126 0x10001000, 0x10400008, 0x10401008, 0x00400000,
127 0x10400008, 0x00001008, 0x00400000, 0x10000008,
128 0x00401000, 0x10001000, 0x00000008, 0x10400000,
129 0x10001008, 0x00000000, 0x00001000, 0x00400008,
130 0x00000000, 0x10400008, 0x10401000, 0x00001000,
131 0x10000000, 0x10401008, 0x00401008, 0x00400000,
132 0x10401008, 0x00000008, 0x10001000, 0x00401008,
133 0x00400008, 0x00401000, 0x10400000, 0x10001008,
134 0x00001008, 0x10000000, 0x10000008, 0x10401000,
136 0x08000000, 0x00010000, 0x00000400, 0x08010420,
137 0x08010020, 0x08000400, 0x00010420, 0x08010000,
138 0x00010000, 0x00000020, 0x08000020, 0x00010400,
139 0x08000420, 0x08010020, 0x08010400, 0x00000000,
140 0x00010400, 0x08000000, 0x00010020, 0x00000420,
141 0x08000400, 0x00010420, 0x00000000, 0x08000020,
142 0x00000020, 0x08000420, 0x08010420, 0x00010020,
143 0x08010000, 0x00000400, 0x00000420, 0x08010400,
144 0x08010400, 0x08000420, 0x00010020, 0x08010000,
145 0x00010000, 0x00000020, 0x08000020, 0x08000400,
146 0x08000000, 0x00010400, 0x08010420, 0x00000000,
147 0x00010420, 0x08000000, 0x00000400, 0x00010020,
148 0x08000420, 0x00000400, 0x00000000, 0x08010420,
149 0x08010020, 0x08010400, 0x00000420, 0x00010000,
150 0x00010400, 0x08010020, 0x08000400, 0x00000420,
151 0x00000020, 0x00010420, 0x08010000, 0x08000020,
153 0x80000040, 0x00200040, 0x00000000, 0x80202000,
154 0x00200040, 0x00002000, 0x80002040, 0x00200000,
155 0x00002040, 0x80202040, 0x00202000, 0x80000000,
156 0x80002000, 0x80000040, 0x80200000, 0x00202040,
157 0x00200000, 0x80002040, 0x80200040, 0x00000000,
158 0x00002000, 0x00000040, 0x80202000, 0x80200040,
159 0x80202040, 0x80200000, 0x80000000, 0x00002040,
160 0x00000040, 0x00202000, 0x00202040, 0x80002000,
161 0x00002040, 0x80000000, 0x80002000, 0x00202040,
162 0x80202000, 0x00200040, 0x00000000, 0x80002000,
163 0x80000000, 0x00002000, 0x80200040, 0x00200000,
164 0x00200040, 0x80202040, 0x00202000, 0x00000040,
165 0x80202040, 0x00202000, 0x00200000, 0x80002040,
166 0x80000040, 0x80200000, 0x00202040, 0x00000000,
167 0x00002000, 0x80000040, 0x80002040, 0x80202000,
168 0x80200000, 0x00002040, 0x00000040, 0x80200040,
170 0x00004000, 0x00000200, 0x01000200, 0x01000004,
171 0x01004204, 0x00004004, 0x00004200, 0x00000000,
172 0x01000000, 0x01000204, 0x00000204, 0x01004000,
173 0x00000004, 0x01004200, 0x01004000, 0x00000204,
174 0x01000204, 0x00004000, 0x00004004, 0x01004204,
175 0x00000000, 0x01000200, 0x01000004, 0x00004200,
176 0x01004004, 0x00004204, 0x01004200, 0x00000004,
177 0x00004204, 0x01004004, 0x00000200, 0x01000000,
178 0x00004204, 0x01004000, 0x01004004, 0x00000204,
179 0x00004000, 0x00000200, 0x01000000, 0x01004004,
180 0x01000204, 0x00004204, 0x00004200, 0x00000000,
181 0x00000200, 0x01000004, 0x00000004, 0x01000200,
182 0x00000000, 0x01000204, 0x01000200, 0x00004200,
183 0x00000204, 0x00004000, 0x01004204, 0x01000000,
184 0x01004200, 0x00000004, 0x00004004, 0x01004204,
185 0x01000004, 0x01004200, 0x01004000, 0x00004004,
187 0x20800080, 0x20820000, 0x00020080, 0x00000000,
188 0x20020000, 0x00800080, 0x20800000, 0x20820080,
189 0x00000080, 0x20000000, 0x00820000, 0x00020080,
190 0x00820080, 0x20020080, 0x20000080, 0x20800000,
191 0x00020000, 0x00820080, 0x00800080, 0x20020000,
192 0x20820080, 0x20000080, 0x00000000, 0x00820000,
193 0x20000000, 0x00800000, 0x20020080, 0x20800080,
194 0x00800000, 0x00020000, 0x20820000, 0x00000080,
195 0x00800000, 0x00020000, 0x20000080, 0x20820080,
196 0x00020080, 0x20000000, 0x00000000, 0x00820000,
197 0x20800080, 0x20020080, 0x20020000, 0x00800080,
198 0x20820000, 0x00000080, 0x00800080, 0x20020000,
199 0x20820080, 0x00800000, 0x20800000, 0x20000080,
200 0x00820000, 0x00020080, 0x20020080, 0x20800000,
201 0x00000080, 0x20820000, 0x00820080, 0x00000000,
202 0x20000000, 0x20800080, 0x00020000, 0x00820080,
205 __device__ __constant__ u32 c_skb[8][64] =
207 /* for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 */
208 0x00000000, 0x00000010, 0x20000000, 0x20000010,
209 0x00010000, 0x00010010, 0x20010000, 0x20010010,
210 0x00000800, 0x00000810, 0x20000800, 0x20000810,
211 0x00010800, 0x00010810, 0x20010800, 0x20010810,
212 0x00000020, 0x00000030, 0x20000020, 0x20000030,
213 0x00010020, 0x00010030, 0x20010020, 0x20010030,
214 0x00000820, 0x00000830, 0x20000820, 0x20000830,
215 0x00010820, 0x00010830, 0x20010820, 0x20010830,
216 0x00080000, 0x00080010, 0x20080000, 0x20080010,
217 0x00090000, 0x00090010, 0x20090000, 0x20090010,
218 0x00080800, 0x00080810, 0x20080800, 0x20080810,
219 0x00090800, 0x00090810, 0x20090800, 0x20090810,
220 0x00080020, 0x00080030, 0x20080020, 0x20080030,
221 0x00090020, 0x00090030, 0x20090020, 0x20090030,
222 0x00080820, 0x00080830, 0x20080820, 0x20080830,
223 0x00090820, 0x00090830, 0x20090820, 0x20090830,
224 /* for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 */
225 0x00000000, 0x02000000, 0x00002000, 0x02002000,
226 0x00200000, 0x02200000, 0x00202000, 0x02202000,
227 0x00000004, 0x02000004, 0x00002004, 0x02002004,
228 0x00200004, 0x02200004, 0x00202004, 0x02202004,
229 0x00000400, 0x02000400, 0x00002400, 0x02002400,
230 0x00200400, 0x02200400, 0x00202400, 0x02202400,
231 0x00000404, 0x02000404, 0x00002404, 0x02002404,
232 0x00200404, 0x02200404, 0x00202404, 0x02202404,
233 0x10000000, 0x12000000, 0x10002000, 0x12002000,
234 0x10200000, 0x12200000, 0x10202000, 0x12202000,
235 0x10000004, 0x12000004, 0x10002004, 0x12002004,
236 0x10200004, 0x12200004, 0x10202004, 0x12202004,
237 0x10000400, 0x12000400, 0x10002400, 0x12002400,
238 0x10200400, 0x12200400, 0x10202400, 0x12202400,
239 0x10000404, 0x12000404, 0x10002404, 0x12002404,
240 0x10200404, 0x12200404, 0x10202404, 0x12202404,
241 /* for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 */
242 0x00000000, 0x00000001, 0x00040000, 0x00040001,
243 0x01000000, 0x01000001, 0x01040000, 0x01040001,
244 0x00000002, 0x00000003, 0x00040002, 0x00040003,
245 0x01000002, 0x01000003, 0x01040002, 0x01040003,
246 0x00000200, 0x00000201, 0x00040200, 0x00040201,
247 0x01000200, 0x01000201, 0x01040200, 0x01040201,
248 0x00000202, 0x00000203, 0x00040202, 0x00040203,
249 0x01000202, 0x01000203, 0x01040202, 0x01040203,
250 0x08000000, 0x08000001, 0x08040000, 0x08040001,
251 0x09000000, 0x09000001, 0x09040000, 0x09040001,
252 0x08000002, 0x08000003, 0x08040002, 0x08040003,
253 0x09000002, 0x09000003, 0x09040002, 0x09040003,
254 0x08000200, 0x08000201, 0x08040200, 0x08040201,
255 0x09000200, 0x09000201, 0x09040200, 0x09040201,
256 0x08000202, 0x08000203, 0x08040202, 0x08040203,
257 0x09000202, 0x09000203, 0x09040202, 0x09040203,
258 /* for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 */
259 0x00000000, 0x00100000, 0x00000100, 0x00100100,
260 0x00000008, 0x00100008, 0x00000108, 0x00100108,
261 0x00001000, 0x00101000, 0x00001100, 0x00101100,
262 0x00001008, 0x00101008, 0x00001108, 0x00101108,
263 0x04000000, 0x04100000, 0x04000100, 0x04100100,
264 0x04000008, 0x04100008, 0x04000108, 0x04100108,
265 0x04001000, 0x04101000, 0x04001100, 0x04101100,
266 0x04001008, 0x04101008, 0x04001108, 0x04101108,
267 0x00020000, 0x00120000, 0x00020100, 0x00120100,
268 0x00020008, 0x00120008, 0x00020108, 0x00120108,
269 0x00021000, 0x00121000, 0x00021100, 0x00121100,
270 0x00021008, 0x00121008, 0x00021108, 0x00121108,
271 0x04020000, 0x04120000, 0x04020100, 0x04120100,
272 0x04020008, 0x04120008, 0x04020108, 0x04120108,
273 0x04021000, 0x04121000, 0x04021100, 0x04121100,
274 0x04021008, 0x04121008, 0x04021108, 0x04121108,
275 /* for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 */
276 0x00000000, 0x10000000, 0x00010000, 0x10010000,
277 0x00000004, 0x10000004, 0x00010004, 0x10010004,
278 0x20000000, 0x30000000, 0x20010000, 0x30010000,
279 0x20000004, 0x30000004, 0x20010004, 0x30010004,
280 0x00100000, 0x10100000, 0x00110000, 0x10110000,
281 0x00100004, 0x10100004, 0x00110004, 0x10110004,
282 0x20100000, 0x30100000, 0x20110000, 0x30110000,
283 0x20100004, 0x30100004, 0x20110004, 0x30110004,
284 0x00001000, 0x10001000, 0x00011000, 0x10011000,
285 0x00001004, 0x10001004, 0x00011004, 0x10011004,
286 0x20001000, 0x30001000, 0x20011000, 0x30011000,
287 0x20001004, 0x30001004, 0x20011004, 0x30011004,
288 0x00101000, 0x10101000, 0x00111000, 0x10111000,
289 0x00101004, 0x10101004, 0x00111004, 0x10111004,
290 0x20101000, 0x30101000, 0x20111000, 0x30111000,
291 0x20101004, 0x30101004, 0x20111004, 0x30111004,
292 /* for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 */
293 0x00000000, 0x08000000, 0x00000008, 0x08000008,
294 0x00000400, 0x08000400, 0x00000408, 0x08000408,
295 0x00020000, 0x08020000, 0x00020008, 0x08020008,
296 0x00020400, 0x08020400, 0x00020408, 0x08020408,
297 0x00000001, 0x08000001, 0x00000009, 0x08000009,
298 0x00000401, 0x08000401, 0x00000409, 0x08000409,
299 0x00020001, 0x08020001, 0x00020009, 0x08020009,
300 0x00020401, 0x08020401, 0x00020409, 0x08020409,
301 0x02000000, 0x0A000000, 0x02000008, 0x0A000008,
302 0x02000400, 0x0A000400, 0x02000408, 0x0A000408,
303 0x02020000, 0x0A020000, 0x02020008, 0x0A020008,
304 0x02020400, 0x0A020400, 0x02020408, 0x0A020408,
305 0x02000001, 0x0A000001, 0x02000009, 0x0A000009,
306 0x02000401, 0x0A000401, 0x02000409, 0x0A000409,
307 0x02020001, 0x0A020001, 0x02020009, 0x0A020009,
308 0x02020401, 0x0A020401, 0x02020409, 0x0A020409,
309 /* for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 */
310 0x00000000, 0x00000100, 0x00080000, 0x00080100,
311 0x01000000, 0x01000100, 0x01080000, 0x01080100,
312 0x00000010, 0x00000110, 0x00080010, 0x00080110,
313 0x01000010, 0x01000110, 0x01080010, 0x01080110,
314 0x00200000, 0x00200100, 0x00280000, 0x00280100,
315 0x01200000, 0x01200100, 0x01280000, 0x01280100,
316 0x00200010, 0x00200110, 0x00280010, 0x00280110,
317 0x01200010, 0x01200110, 0x01280010, 0x01280110,
318 0x00000200, 0x00000300, 0x00080200, 0x00080300,
319 0x01000200, 0x01000300, 0x01080200, 0x01080300,
320 0x00000210, 0x00000310, 0x00080210, 0x00080310,
321 0x01000210, 0x01000310, 0x01080210, 0x01080310,
322 0x00200200, 0x00200300, 0x00280200, 0x00280300,
323 0x01200200, 0x01200300, 0x01280200, 0x01280300,
324 0x00200210, 0x00200310, 0x00280210, 0x00280310,
325 0x01200210, 0x01200310, 0x01280210, 0x01280310,
326 /* for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 */
327 0x00000000, 0x04000000, 0x00040000, 0x04040000,
328 0x00000002, 0x04000002, 0x00040002, 0x04040002,
329 0x00002000, 0x04002000, 0x00042000, 0x04042000,
330 0x00002002, 0x04002002, 0x00042002, 0x04042002,
331 0x00000020, 0x04000020, 0x00040020, 0x04040020,
332 0x00000022, 0x04000022, 0x00040022, 0x04040022,
333 0x00002020, 0x04002020, 0x00042020, 0x04042020,
334 0x00002022, 0x04002022, 0x00042022, 0x04042022,
335 0x00000800, 0x04000800, 0x00040800, 0x04040800,
336 0x00000802, 0x04000802, 0x00040802, 0x04040802,
337 0x00002800, 0x04002800, 0x00042800, 0x04042800,
338 0x00002802, 0x04002802, 0x00042802, 0x04042802,
339 0x00000820, 0x04000820, 0x00040820, 0x04040820,
340 0x00000822, 0x04000822, 0x00040822, 0x04040822,
341 0x00002820, 0x04002820, 0x00042820, 0x04042820,
342 0x00002822, 0x04002822, 0x00042822, 0x04042822
345 #define LM_IV_0_IP_RR3 0x2400b807
346 #define LM_IV_1_IP_RR3 0xaa190747
349 #define BOX(i,n,S) u32x ((S)[(n)][(i)])
353 #define BOX(i,n,S) u32x ((S)[(n)][(i).x], (S)[(n)][(i).y])
356 __device__ static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], u32 s_SPtrans[8][64])
358 asm (".reg .u8 c0, c1, c2, c3, c4, c5, c6, c7;");
365 for (int i = 0; i < 16; i++)
368 u32x t = Kd[i] ^ rotl32 (r, 28u);
370 u = (u >> 2) & 0x3f3f3f3f;
371 t = (t >> 2) & 0x3f3f3f3f;
378 "mov.b32 {c0, c1, c2, c3}, %8;"
379 "mov.b32 {c4, c5, c6, c7}, %9;"
389 : "=r"(u0), "=r"(u1), "=r"(u2), "=r"(u3),
390 "=r"(t0), "=r"(t1), "=r"(t2), "=r"(t3)
395 l ^= BOX (u0, 0, s_SPtrans)
396 | BOX (u1, 2, s_SPtrans)
397 | BOX (u2, 4, s_SPtrans)
398 | BOX (u3, 6, s_SPtrans)
399 | BOX (t0, 1, s_SPtrans)
400 | BOX (t1, 3, s_SPtrans)
401 | BOX (t2, 5, s_SPtrans)
402 | BOX (t3, 7, s_SPtrans);
409 iv[0] = rotl32 (l, 29);
410 iv[1] = rotl32 (r, 29);
413 __device__ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], u32 s_skb[8][64])
417 PERM_OP (d, c, tt, 4, 0x0f0f0f0f);
418 HPERM_OP (c, tt, 2, 0xcccc0000);
419 HPERM_OP (d, tt, 2, 0xcccc0000);
420 PERM_OP (d, c, tt, 1, 0x55555555);
421 PERM_OP (c, d, tt, 8, 0x00ff00ff);
422 PERM_OP (d, c, tt, 1, 0x55555555);
424 d = ((d & 0x000000ff) << 16)
425 | ((d & 0x0000ff00) << 0)
426 | ((d & 0x00ff0000) >> 16)
427 | ((c & 0xf0000000) >> 4);
431 for (u32 i = 0; i < 16; i++)
433 if ((i < 2) || (i == 8) || (i == 15))
435 c = ((c >> 1) | (c << 27));
436 d = ((d >> 1) | (d << 27));
440 c = ((c >> 2) | (c << 26));
441 d = ((d >> 2) | (d << 26));
447 const u32x c00 = (c >> 0) & 0x0000003f;
448 const u32x c06 = (c >> 6) & 0x00383003;
449 const u32x c07 = (c >> 7) & 0x0000003c;
450 const u32x c13 = (c >> 13) & 0x0000060f;
451 const u32x c20 = (c >> 20) & 0x00000001;
453 u32x s = BOX (((c00 >> 0) & 0xff), 0, s_skb)
454 | BOX (((c06 >> 0) & 0xff)
455 |((c07 >> 0) & 0xff), 1, s_skb)
456 | BOX (((c13 >> 0) & 0xff)
457 |((c06 >> 8) & 0xff), 2, s_skb)
458 | BOX (((c20 >> 0) & 0xff)
460 |((c06 >> 16) & 0xff), 3, s_skb);
462 const u32x d00 = (d >> 0) & 0x00003c3f;
463 const u32x d07 = (d >> 7) & 0x00003f03;
464 const u32x d21 = (d >> 21) & 0x0000000f;
465 const u32x d22 = (d >> 22) & 0x00000030;
467 u32x t = BOX (((d00 >> 0) & 0xff), 4, s_skb)
468 | BOX (((d07 >> 0) & 0xff)
469 |((d00 >> 8) & 0xff), 5, s_skb)
470 | BOX (((d07 >> 8) & 0xff), 6, s_skb)
471 | BOX (((d21 >> 0) & 0xff)
472 |((d22 >> 0) & 0xff), 7, s_skb);
474 #if __CUDA_ARCH__ >= 200
475 Kc[i] = __byte_perm (s, t, 0x5410);
476 Kd[i] = __byte_perm (s, t, 0x7632);
478 Kc[i] = ((t << 16) | (s & 0x0000ffff));
479 Kd[i] = ((s >> 16) | (t & 0xffff0000));
482 Kc[i] = rotl32 (Kc[i], 2u);
483 Kd[i] = rotl32 (Kd[i], 2u);
487 __device__ static void transform_netntlmv1_key (const u32x w0, const u32x w1, u32x out[2])
491 t[0] = (w0 >> 0) & 0xff;
492 t[1] = (w0 >> 8) & 0xff;
493 t[2] = (w0 >> 16) & 0xff;
494 t[3] = (w0 >> 24) & 0xff;
495 t[4] = (w1 >> 0) & 0xff;
496 t[5] = (w1 >> 8) & 0xff;
497 t[6] = (w1 >> 16) & 0xff;
498 t[7] = (w1 >> 24) & 0xff;
503 k[1] = (t[0] << 7) | (t[1] >> 1);
504 k[2] = (t[1] << 6) | (t[2] >> 2);
505 k[3] = (t[2] << 5) | (t[3] >> 3);
506 k[4] = (t[3] << 4) | (t[4] >> 4);
507 k[5] = (t[4] << 3) | (t[5] >> 5);
508 k[6] = (t[5] << 2) | (t[6] >> 6);
511 out[0] = ((k[0] & 0xff) << 0)
512 | ((k[1] & 0xff) << 8)
513 | ((k[2] & 0xff) << 16)
514 | ((k[3] & 0xff) << 24);
516 out[1] = ((k[4] & 0xff) << 0)
517 | ((k[5] & 0xff) << 8)
518 | ((k[6] & 0xff) << 16)
519 | ((k[7] & 0xff) << 24);
522 __device__ __constant__ gpu_rule_t c_rules[1024];
524 extern "C" __global__ void __launch_bounds__ (256, 1) m03000_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
530 const u32 lid = threadIdx.x;
536 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
540 pw_buf[0] = pws[gid].i[ 0];
541 pw_buf[1] = pws[gid].i[ 1];
545 const u32 pw_len = pws[gid].pw_len;
551 __shared__ u32 s_SPtrans[8][64];
552 __shared__ u32 s_skb[8][64];
556 s_SPtrans[0][lid] = c_SPtrans[0][lid];
557 s_SPtrans[1][lid] = c_SPtrans[1][lid];
558 s_SPtrans[2][lid] = c_SPtrans[2][lid];
559 s_SPtrans[3][lid] = c_SPtrans[3][lid];
560 s_SPtrans[4][lid] = c_SPtrans[4][lid];
561 s_SPtrans[5][lid] = c_SPtrans[5][lid];
562 s_SPtrans[6][lid] = c_SPtrans[6][lid];
563 s_SPtrans[7][lid] = c_SPtrans[7][lid];
565 s_skb[0][lid] = c_skb[0][lid];
566 s_skb[1][lid] = c_skb[1][lid];
567 s_skb[2][lid] = c_skb[2][lid];
568 s_skb[3][lid] = c_skb[3][lid];
569 s_skb[4][lid] = c_skb[4][lid];
570 s_skb[5][lid] = c_skb[5][lid];
571 s_skb[6][lid] = c_skb[6][lid];
572 s_skb[7][lid] = c_skb[7][lid];
577 if (gid >= gid_max) return;
583 for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++)
613 u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len);
615 out_len = (out_len >= 7) ? 7 : out_len;
619 transform_netntlmv1_key (w0[0], w0[1], key);
621 const u32x c = key[0];
622 const u32x d = key[1];
627 _des_crypt_keysetup (c, d, Kc, Kd, s_skb);
631 data[0] = LM_IV_0_IP_RR3;
632 data[1] = LM_IV_1_IP_RR3;
636 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
638 const u32x r0 = iv[0];
639 const u32x r1 = iv[1];
643 #include VECT_COMPARE_M
647 extern "C" __global__ void __launch_bounds__ (256, 1) m03000_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
651 extern "C" __global__ void __launch_bounds__ (256, 1) m03000_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
655 extern "C" __global__ void __launch_bounds__ (256, 1) m03000_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
661 const u32 lid = threadIdx.x;
667 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
671 pw_buf[0] = pws[gid].i[ 0];
672 pw_buf[1] = pws[gid].i[ 1];
676 const u32 pw_len = pws[gid].pw_len;
682 __shared__ u32 s_SPtrans[8][64];
683 __shared__ u32 s_skb[8][64];
687 s_SPtrans[0][lid] = c_SPtrans[0][lid];
688 s_SPtrans[1][lid] = c_SPtrans[1][lid];
689 s_SPtrans[2][lid] = c_SPtrans[2][lid];
690 s_SPtrans[3][lid] = c_SPtrans[3][lid];
691 s_SPtrans[4][lid] = c_SPtrans[4][lid];
692 s_SPtrans[5][lid] = c_SPtrans[5][lid];
693 s_SPtrans[6][lid] = c_SPtrans[6][lid];
694 s_SPtrans[7][lid] = c_SPtrans[7][lid];
696 s_skb[0][lid] = c_skb[0][lid];
697 s_skb[1][lid] = c_skb[1][lid];
698 s_skb[2][lid] = c_skb[2][lid];
699 s_skb[3][lid] = c_skb[3][lid];
700 s_skb[4][lid] = c_skb[4][lid];
701 s_skb[5][lid] = c_skb[5][lid];
702 s_skb[6][lid] = c_skb[6][lid];
703 s_skb[7][lid] = c_skb[7][lid];
708 if (gid >= gid_max) return;
714 const u32 search[4] =
716 digests_buf[digests_offset].digest_buf[DGST_R0],
717 digests_buf[digests_offset].digest_buf[DGST_R1],
718 digests_buf[digests_offset].digest_buf[DGST_R2],
719 digests_buf[digests_offset].digest_buf[DGST_R3]
726 for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++)
756 u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len);
758 out_len = (out_len >= 7) ? 7 : out_len;
762 transform_netntlmv1_key (w0[0], w0[1], key);
764 const u32x c = key[0];
765 const u32x d = key[1];
770 _des_crypt_keysetup (c, d, Kc, Kd, s_skb);
774 data[0] = LM_IV_0_IP_RR3;
775 data[1] = LM_IV_1_IP_RR3;
779 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
781 const u32x r0 = iv[0];
782 const u32x r1 = iv[1];
786 #include VECT_COMPARE_S
790 extern "C" __global__ void __launch_bounds__ (256, 1) m03000_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
794 extern "C" __global__ void __launch_bounds__ (256, 1) m03000_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)