2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
24 #include "include/kernel_functions.c"
26 #include "common_nv.c"
27 #include "include/rp_gpu.h"
31 #define VECT_COMPARE_S "check_single_vect1_comp4.c"
32 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
36 #define VECT_COMPARE_S "check_single_vect2_comp4.c"
37 #define VECT_COMPARE_M "check_multi_vect2_comp4.c"
40 #define PERM_OP(a,b,tt,n,m) \
50 #define HPERM_OP(a,tt,n,m) \
56 tt = tt >> (16 + n); \
60 __device__ __constant__ u32 c_SPtrans[8][64] =
63 0x02080800, 0x00080000, 0x02000002, 0x02080802,
64 0x02000000, 0x00080802, 0x00080002, 0x02000002,
65 0x00080802, 0x02080800, 0x02080000, 0x00000802,
66 0x02000802, 0x02000000, 0x00000000, 0x00080002,
67 0x00080000, 0x00000002, 0x02000800, 0x00080800,
68 0x02080802, 0x02080000, 0x00000802, 0x02000800,
69 0x00000002, 0x00000800, 0x00080800, 0x02080002,
70 0x00000800, 0x02000802, 0x02080002, 0x00000000,
71 0x00000000, 0x02080802, 0x02000800, 0x00080002,
72 0x02080800, 0x00080000, 0x00000802, 0x02000800,
73 0x02080002, 0x00000800, 0x00080800, 0x02000002,
74 0x00080802, 0x00000002, 0x02000002, 0x02080000,
75 0x02080802, 0x00080800, 0x02080000, 0x02000802,
76 0x02000000, 0x00000802, 0x00080002, 0x00000000,
77 0x00080000, 0x02000000, 0x02000802, 0x02080800,
78 0x00000002, 0x02080002, 0x00000800, 0x00080802,
80 0x40108010, 0x00000000, 0x00108000, 0x40100000,
81 0x40000010, 0x00008010, 0x40008000, 0x00108000,
82 0x00008000, 0x40100010, 0x00000010, 0x40008000,
83 0x00100010, 0x40108000, 0x40100000, 0x00000010,
84 0x00100000, 0x40008010, 0x40100010, 0x00008000,
85 0x00108010, 0x40000000, 0x00000000, 0x00100010,
86 0x40008010, 0x00108010, 0x40108000, 0x40000010,
87 0x40000000, 0x00100000, 0x00008010, 0x40108010,
88 0x00100010, 0x40108000, 0x40008000, 0x00108010,
89 0x40108010, 0x00100010, 0x40000010, 0x00000000,
90 0x40000000, 0x00008010, 0x00100000, 0x40100010,
91 0x00008000, 0x40000000, 0x00108010, 0x40008010,
92 0x40108000, 0x00008000, 0x00000000, 0x40000010,
93 0x00000010, 0x40108010, 0x00108000, 0x40100000,
94 0x40100010, 0x00100000, 0x00008010, 0x40008000,
95 0x40008010, 0x00000010, 0x40100000, 0x00108000,
97 0x04000001, 0x04040100, 0x00000100, 0x04000101,
98 0x00040001, 0x04000000, 0x04000101, 0x00040100,
99 0x04000100, 0x00040000, 0x04040000, 0x00000001,
100 0x04040101, 0x00000101, 0x00000001, 0x04040001,
101 0x00000000, 0x00040001, 0x04040100, 0x00000100,
102 0x00000101, 0x04040101, 0x00040000, 0x04000001,
103 0x04040001, 0x04000100, 0x00040101, 0x04040000,
104 0x00040100, 0x00000000, 0x04000000, 0x00040101,
105 0x04040100, 0x00000100, 0x00000001, 0x00040000,
106 0x00000101, 0x00040001, 0x04040000, 0x04000101,
107 0x00000000, 0x04040100, 0x00040100, 0x04040001,
108 0x00040001, 0x04000000, 0x04040101, 0x00000001,
109 0x00040101, 0x04000001, 0x04000000, 0x04040101,
110 0x00040000, 0x04000100, 0x04000101, 0x00040100,
111 0x04000100, 0x00000000, 0x04040001, 0x00000101,
112 0x04000001, 0x00040101, 0x00000100, 0x04040000,
114 0x00401008, 0x10001000, 0x00000008, 0x10401008,
115 0x00000000, 0x10400000, 0x10001008, 0x00400008,
116 0x10401000, 0x10000008, 0x10000000, 0x00001008,
117 0x10000008, 0x00401008, 0x00400000, 0x10000000,
118 0x10400008, 0x00401000, 0x00001000, 0x00000008,
119 0x00401000, 0x10001008, 0x10400000, 0x00001000,
120 0x00001008, 0x00000000, 0x00400008, 0x10401000,
121 0x10001000, 0x10400008, 0x10401008, 0x00400000,
122 0x10400008, 0x00001008, 0x00400000, 0x10000008,
123 0x00401000, 0x10001000, 0x00000008, 0x10400000,
124 0x10001008, 0x00000000, 0x00001000, 0x00400008,
125 0x00000000, 0x10400008, 0x10401000, 0x00001000,
126 0x10000000, 0x10401008, 0x00401008, 0x00400000,
127 0x10401008, 0x00000008, 0x10001000, 0x00401008,
128 0x00400008, 0x00401000, 0x10400000, 0x10001008,
129 0x00001008, 0x10000000, 0x10000008, 0x10401000,
131 0x08000000, 0x00010000, 0x00000400, 0x08010420,
132 0x08010020, 0x08000400, 0x00010420, 0x08010000,
133 0x00010000, 0x00000020, 0x08000020, 0x00010400,
134 0x08000420, 0x08010020, 0x08010400, 0x00000000,
135 0x00010400, 0x08000000, 0x00010020, 0x00000420,
136 0x08000400, 0x00010420, 0x00000000, 0x08000020,
137 0x00000020, 0x08000420, 0x08010420, 0x00010020,
138 0x08010000, 0x00000400, 0x00000420, 0x08010400,
139 0x08010400, 0x08000420, 0x00010020, 0x08010000,
140 0x00010000, 0x00000020, 0x08000020, 0x08000400,
141 0x08000000, 0x00010400, 0x08010420, 0x00000000,
142 0x00010420, 0x08000000, 0x00000400, 0x00010020,
143 0x08000420, 0x00000400, 0x00000000, 0x08010420,
144 0x08010020, 0x08010400, 0x00000420, 0x00010000,
145 0x00010400, 0x08010020, 0x08000400, 0x00000420,
146 0x00000020, 0x00010420, 0x08010000, 0x08000020,
148 0x80000040, 0x00200040, 0x00000000, 0x80202000,
149 0x00200040, 0x00002000, 0x80002040, 0x00200000,
150 0x00002040, 0x80202040, 0x00202000, 0x80000000,
151 0x80002000, 0x80000040, 0x80200000, 0x00202040,
152 0x00200000, 0x80002040, 0x80200040, 0x00000000,
153 0x00002000, 0x00000040, 0x80202000, 0x80200040,
154 0x80202040, 0x80200000, 0x80000000, 0x00002040,
155 0x00000040, 0x00202000, 0x00202040, 0x80002000,
156 0x00002040, 0x80000000, 0x80002000, 0x00202040,
157 0x80202000, 0x00200040, 0x00000000, 0x80002000,
158 0x80000000, 0x00002000, 0x80200040, 0x00200000,
159 0x00200040, 0x80202040, 0x00202000, 0x00000040,
160 0x80202040, 0x00202000, 0x00200000, 0x80002040,
161 0x80000040, 0x80200000, 0x00202040, 0x00000000,
162 0x00002000, 0x80000040, 0x80002040, 0x80202000,
163 0x80200000, 0x00002040, 0x00000040, 0x80200040,
165 0x00004000, 0x00000200, 0x01000200, 0x01000004,
166 0x01004204, 0x00004004, 0x00004200, 0x00000000,
167 0x01000000, 0x01000204, 0x00000204, 0x01004000,
168 0x00000004, 0x01004200, 0x01004000, 0x00000204,
169 0x01000204, 0x00004000, 0x00004004, 0x01004204,
170 0x00000000, 0x01000200, 0x01000004, 0x00004200,
171 0x01004004, 0x00004204, 0x01004200, 0x00000004,
172 0x00004204, 0x01004004, 0x00000200, 0x01000000,
173 0x00004204, 0x01004000, 0x01004004, 0x00000204,
174 0x00004000, 0x00000200, 0x01000000, 0x01004004,
175 0x01000204, 0x00004204, 0x00004200, 0x00000000,
176 0x00000200, 0x01000004, 0x00000004, 0x01000200,
177 0x00000000, 0x01000204, 0x01000200, 0x00004200,
178 0x00000204, 0x00004000, 0x01004204, 0x01000000,
179 0x01004200, 0x00000004, 0x00004004, 0x01004204,
180 0x01000004, 0x01004200, 0x01004000, 0x00004004,
182 0x20800080, 0x20820000, 0x00020080, 0x00000000,
183 0x20020000, 0x00800080, 0x20800000, 0x20820080,
184 0x00000080, 0x20000000, 0x00820000, 0x00020080,
185 0x00820080, 0x20020080, 0x20000080, 0x20800000,
186 0x00020000, 0x00820080, 0x00800080, 0x20020000,
187 0x20820080, 0x20000080, 0x00000000, 0x00820000,
188 0x20000000, 0x00800000, 0x20020080, 0x20800080,
189 0x00800000, 0x00020000, 0x20820000, 0x00000080,
190 0x00800000, 0x00020000, 0x20000080, 0x20820080,
191 0x00020080, 0x20000000, 0x00000000, 0x00820000,
192 0x20800080, 0x20020080, 0x20020000, 0x00800080,
193 0x20820000, 0x00000080, 0x00800080, 0x20020000,
194 0x20820080, 0x00800000, 0x20800000, 0x20000080,
195 0x00820000, 0x00020080, 0x20020080, 0x20800000,
196 0x00000080, 0x20820000, 0x00820080, 0x00000000,
197 0x20000000, 0x20800080, 0x00020000, 0x00820080,
200 __device__ __constant__ u32 c_skb[8][64] =
202 /* for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 */
203 0x00000000, 0x00000010, 0x20000000, 0x20000010,
204 0x00010000, 0x00010010, 0x20010000, 0x20010010,
205 0x00000800, 0x00000810, 0x20000800, 0x20000810,
206 0x00010800, 0x00010810, 0x20010800, 0x20010810,
207 0x00000020, 0x00000030, 0x20000020, 0x20000030,
208 0x00010020, 0x00010030, 0x20010020, 0x20010030,
209 0x00000820, 0x00000830, 0x20000820, 0x20000830,
210 0x00010820, 0x00010830, 0x20010820, 0x20010830,
211 0x00080000, 0x00080010, 0x20080000, 0x20080010,
212 0x00090000, 0x00090010, 0x20090000, 0x20090010,
213 0x00080800, 0x00080810, 0x20080800, 0x20080810,
214 0x00090800, 0x00090810, 0x20090800, 0x20090810,
215 0x00080020, 0x00080030, 0x20080020, 0x20080030,
216 0x00090020, 0x00090030, 0x20090020, 0x20090030,
217 0x00080820, 0x00080830, 0x20080820, 0x20080830,
218 0x00090820, 0x00090830, 0x20090820, 0x20090830,
219 /* for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 */
220 0x00000000, 0x02000000, 0x00002000, 0x02002000,
221 0x00200000, 0x02200000, 0x00202000, 0x02202000,
222 0x00000004, 0x02000004, 0x00002004, 0x02002004,
223 0x00200004, 0x02200004, 0x00202004, 0x02202004,
224 0x00000400, 0x02000400, 0x00002400, 0x02002400,
225 0x00200400, 0x02200400, 0x00202400, 0x02202400,
226 0x00000404, 0x02000404, 0x00002404, 0x02002404,
227 0x00200404, 0x02200404, 0x00202404, 0x02202404,
228 0x10000000, 0x12000000, 0x10002000, 0x12002000,
229 0x10200000, 0x12200000, 0x10202000, 0x12202000,
230 0x10000004, 0x12000004, 0x10002004, 0x12002004,
231 0x10200004, 0x12200004, 0x10202004, 0x12202004,
232 0x10000400, 0x12000400, 0x10002400, 0x12002400,
233 0x10200400, 0x12200400, 0x10202400, 0x12202400,
234 0x10000404, 0x12000404, 0x10002404, 0x12002404,
235 0x10200404, 0x12200404, 0x10202404, 0x12202404,
236 /* for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 */
237 0x00000000, 0x00000001, 0x00040000, 0x00040001,
238 0x01000000, 0x01000001, 0x01040000, 0x01040001,
239 0x00000002, 0x00000003, 0x00040002, 0x00040003,
240 0x01000002, 0x01000003, 0x01040002, 0x01040003,
241 0x00000200, 0x00000201, 0x00040200, 0x00040201,
242 0x01000200, 0x01000201, 0x01040200, 0x01040201,
243 0x00000202, 0x00000203, 0x00040202, 0x00040203,
244 0x01000202, 0x01000203, 0x01040202, 0x01040203,
245 0x08000000, 0x08000001, 0x08040000, 0x08040001,
246 0x09000000, 0x09000001, 0x09040000, 0x09040001,
247 0x08000002, 0x08000003, 0x08040002, 0x08040003,
248 0x09000002, 0x09000003, 0x09040002, 0x09040003,
249 0x08000200, 0x08000201, 0x08040200, 0x08040201,
250 0x09000200, 0x09000201, 0x09040200, 0x09040201,
251 0x08000202, 0x08000203, 0x08040202, 0x08040203,
252 0x09000202, 0x09000203, 0x09040202, 0x09040203,
253 /* for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 */
254 0x00000000, 0x00100000, 0x00000100, 0x00100100,
255 0x00000008, 0x00100008, 0x00000108, 0x00100108,
256 0x00001000, 0x00101000, 0x00001100, 0x00101100,
257 0x00001008, 0x00101008, 0x00001108, 0x00101108,
258 0x04000000, 0x04100000, 0x04000100, 0x04100100,
259 0x04000008, 0x04100008, 0x04000108, 0x04100108,
260 0x04001000, 0x04101000, 0x04001100, 0x04101100,
261 0x04001008, 0x04101008, 0x04001108, 0x04101108,
262 0x00020000, 0x00120000, 0x00020100, 0x00120100,
263 0x00020008, 0x00120008, 0x00020108, 0x00120108,
264 0x00021000, 0x00121000, 0x00021100, 0x00121100,
265 0x00021008, 0x00121008, 0x00021108, 0x00121108,
266 0x04020000, 0x04120000, 0x04020100, 0x04120100,
267 0x04020008, 0x04120008, 0x04020108, 0x04120108,
268 0x04021000, 0x04121000, 0x04021100, 0x04121100,
269 0x04021008, 0x04121008, 0x04021108, 0x04121108,
270 /* for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 */
271 0x00000000, 0x10000000, 0x00010000, 0x10010000,
272 0x00000004, 0x10000004, 0x00010004, 0x10010004,
273 0x20000000, 0x30000000, 0x20010000, 0x30010000,
274 0x20000004, 0x30000004, 0x20010004, 0x30010004,
275 0x00100000, 0x10100000, 0x00110000, 0x10110000,
276 0x00100004, 0x10100004, 0x00110004, 0x10110004,
277 0x20100000, 0x30100000, 0x20110000, 0x30110000,
278 0x20100004, 0x30100004, 0x20110004, 0x30110004,
279 0x00001000, 0x10001000, 0x00011000, 0x10011000,
280 0x00001004, 0x10001004, 0x00011004, 0x10011004,
281 0x20001000, 0x30001000, 0x20011000, 0x30011000,
282 0x20001004, 0x30001004, 0x20011004, 0x30011004,
283 0x00101000, 0x10101000, 0x00111000, 0x10111000,
284 0x00101004, 0x10101004, 0x00111004, 0x10111004,
285 0x20101000, 0x30101000, 0x20111000, 0x30111000,
286 0x20101004, 0x30101004, 0x20111004, 0x30111004,
287 /* for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 */
288 0x00000000, 0x08000000, 0x00000008, 0x08000008,
289 0x00000400, 0x08000400, 0x00000408, 0x08000408,
290 0x00020000, 0x08020000, 0x00020008, 0x08020008,
291 0x00020400, 0x08020400, 0x00020408, 0x08020408,
292 0x00000001, 0x08000001, 0x00000009, 0x08000009,
293 0x00000401, 0x08000401, 0x00000409, 0x08000409,
294 0x00020001, 0x08020001, 0x00020009, 0x08020009,
295 0x00020401, 0x08020401, 0x00020409, 0x08020409,
296 0x02000000, 0x0A000000, 0x02000008, 0x0A000008,
297 0x02000400, 0x0A000400, 0x02000408, 0x0A000408,
298 0x02020000, 0x0A020000, 0x02020008, 0x0A020008,
299 0x02020400, 0x0A020400, 0x02020408, 0x0A020408,
300 0x02000001, 0x0A000001, 0x02000009, 0x0A000009,
301 0x02000401, 0x0A000401, 0x02000409, 0x0A000409,
302 0x02020001, 0x0A020001, 0x02020009, 0x0A020009,
303 0x02020401, 0x0A020401, 0x02020409, 0x0A020409,
304 /* for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 */
305 0x00000000, 0x00000100, 0x00080000, 0x00080100,
306 0x01000000, 0x01000100, 0x01080000, 0x01080100,
307 0x00000010, 0x00000110, 0x00080010, 0x00080110,
308 0x01000010, 0x01000110, 0x01080010, 0x01080110,
309 0x00200000, 0x00200100, 0x00280000, 0x00280100,
310 0x01200000, 0x01200100, 0x01280000, 0x01280100,
311 0x00200010, 0x00200110, 0x00280010, 0x00280110,
312 0x01200010, 0x01200110, 0x01280010, 0x01280110,
313 0x00000200, 0x00000300, 0x00080200, 0x00080300,
314 0x01000200, 0x01000300, 0x01080200, 0x01080300,
315 0x00000210, 0x00000310, 0x00080210, 0x00080310,
316 0x01000210, 0x01000310, 0x01080210, 0x01080310,
317 0x00200200, 0x00200300, 0x00280200, 0x00280300,
318 0x01200200, 0x01200300, 0x01280200, 0x01280300,
319 0x00200210, 0x00200310, 0x00280210, 0x00280310,
320 0x01200210, 0x01200310, 0x01280210, 0x01280310,
321 /* for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 */
322 0x00000000, 0x04000000, 0x00040000, 0x04040000,
323 0x00000002, 0x04000002, 0x00040002, 0x04040002,
324 0x00002000, 0x04002000, 0x00042000, 0x04042000,
325 0x00002002, 0x04002002, 0x00042002, 0x04042002,
326 0x00000020, 0x04000020, 0x00040020, 0x04040020,
327 0x00000022, 0x04000022, 0x00040022, 0x04040022,
328 0x00002020, 0x04002020, 0x00042020, 0x04042020,
329 0x00002022, 0x04002022, 0x00042022, 0x04042022,
330 0x00000800, 0x04000800, 0x00040800, 0x04040800,
331 0x00000802, 0x04000802, 0x00040802, 0x04040802,
332 0x00002800, 0x04002800, 0x00042800, 0x04042800,
333 0x00002802, 0x04002802, 0x00042802, 0x04042802,
334 0x00000820, 0x04000820, 0x00040820, 0x04040820,
335 0x00000822, 0x04000822, 0x00040822, 0x04040822,
336 0x00002820, 0x04002820, 0x00042820, 0x04042820,
337 0x00002822, 0x04002822, 0x00042822, 0x04042822
341 #define BOX(i,n,S) u32x ((S)[(n)][(i)])
345 #define BOX(i,n,S) u32x ((S)[(n)][(i).x], (S)[(n)][(i).y])
348 __device__ static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], u32 s_SPtrans[8][64])
356 for (u32 i = 0; i < 16; i++)
359 u32x t = Kd[i] ^ rotl32 (r, 28u);
361 l ^= BOX (((u >> 2) & 0x3f), 0, s_SPtrans)
362 | BOX (((u >> 10) & 0x3f), 2, s_SPtrans)
363 | BOX (((u >> 18) & 0x3f), 4, s_SPtrans)
364 | BOX (((u >> 26) & 0x3f), 6, s_SPtrans)
365 | BOX (((t >> 2) & 0x3f), 1, s_SPtrans)
366 | BOX (((t >> 10) & 0x3f), 3, s_SPtrans)
367 | BOX (((t >> 18) & 0x3f), 5, s_SPtrans)
368 | BOX (((t >> 26) & 0x3f), 7, s_SPtrans);
379 __device__ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], u32 s_skb[8][64])
383 PERM_OP (d, c, tt, 4, 0x0f0f0f0f);
384 HPERM_OP (c, tt, 2, 0xcccc0000);
385 HPERM_OP (d, tt, 2, 0xcccc0000);
386 PERM_OP (d, c, tt, 1, 0x55555555);
387 PERM_OP (c, d, tt, 8, 0x00ff00ff);
388 PERM_OP (d, c, tt, 1, 0x55555555);
390 d = ((d & 0x000000ff) << 16)
391 | ((d & 0x0000ff00) << 0)
392 | ((d & 0x00ff0000) >> 16)
393 | ((c & 0xf0000000) >> 4);
398 for (u32 i = 0; i < 16; i++)
400 const u32 shifts3s0[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 };
401 const u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, 26, 26, 26, 27 };
403 c = c >> shifts3s0[i] | c << shifts3s1[i];
404 d = d >> shifts3s0[i] | d << shifts3s1[i];
409 u32x s = BOX ((( c >> 0) & 0x3f), 0, s_skb)
410 | BOX ((((c >> 6) & 0x03)
411 | ((c >> 7) & 0x3c)), 1, s_skb)
412 | BOX ((((c >> 13) & 0x0f)
413 | ((c >> 14) & 0x30)), 2, s_skb)
414 | BOX ((((c >> 20) & 0x01)
416 | ((c >> 22) & 0x38)), 3, s_skb);
418 u32x t = BOX ((( d >> 0) & 0x3f), 4, s_skb)
419 | BOX ((((d >> 7) & 0x03)
420 | ((d >> 8) & 0x3c)), 5, s_skb)
421 | BOX ((((d >> 15) & 0x3f)), 6, s_skb)
422 | BOX ((((d >> 21) & 0x0f)
423 | ((d >> 22) & 0x30)), 7, s_skb);
425 #if __CUDA_ARCH__ >= 200
426 Kc[i] = __byte_perm (s, t, 0x5410);
427 Kd[i] = __byte_perm (s, t, 0x7632);
429 Kc[i] = ((t << 16) | (s & 0x0000ffff));
430 Kd[i] = ((s >> 16) | (t & 0xffff0000));
433 Kc[i] = rotl32 (Kc[i], 2u);
434 Kd[i] = rotl32 (Kd[i], 2u);
438 __device__ static void transform_netntlmv1_key (const u32x w0, const u32x w1, u32x out[2])
442 t[0] = (w0 >> 0) & 0xff;
443 t[1] = (w0 >> 8) & 0xff;
444 t[2] = (w0 >> 16) & 0xff;
445 t[3] = (w0 >> 24) & 0xff;
446 t[4] = (w1 >> 0) & 0xff;
447 t[5] = (w1 >> 8) & 0xff;
448 t[6] = (w1 >> 16) & 0xff;
449 t[7] = (w1 >> 24) & 0xff;
454 k[1] = (t[0] << 7) | (t[1] >> 1);
455 k[2] = (t[1] << 6) | (t[2] >> 2);
456 k[3] = (t[2] << 5) | (t[3] >> 3);
457 k[4] = (t[3] << 4) | (t[4] >> 4);
458 k[5] = (t[4] << 3) | (t[5] >> 5);
459 k[6] = (t[5] << 2) | (t[6] >> 6);
462 out[0] = ((k[0] & 0xff) << 0)
463 | ((k[1] & 0xff) << 8)
464 | ((k[2] & 0xff) << 16)
465 | ((k[3] & 0xff) << 24);
467 out[1] = ((k[4] & 0xff) << 0)
468 | ((k[5] & 0xff) << 8)
469 | ((k[6] & 0xff) << 16)
470 | ((k[7] & 0xff) << 24);
473 __device__ __constant__ gpu_rule_t c_rules[1024];
475 extern "C" __global__ void __launch_bounds__ (256, 1) m05500_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
481 const u32 lid = threadIdx.x;
487 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
491 pw_buf0[0] = pws[gid].i[ 0];
492 pw_buf0[1] = pws[gid].i[ 1];
493 pw_buf0[2] = pws[gid].i[ 2];
494 pw_buf0[3] = pws[gid].i[ 3];
498 pw_buf1[0] = pws[gid].i[ 4];
499 pw_buf1[1] = pws[gid].i[ 5];
500 pw_buf1[2] = pws[gid].i[ 6];
501 pw_buf1[3] = pws[gid].i[ 7];
503 const u32 pw_len = pws[gid].pw_len;
509 __shared__ u32 s_SPtrans[8][64];
510 __shared__ u32 s_skb[8][64];
514 s_SPtrans[0][lid] = c_SPtrans[0][lid];
515 s_SPtrans[1][lid] = c_SPtrans[1][lid];
516 s_SPtrans[2][lid] = c_SPtrans[2][lid];
517 s_SPtrans[3][lid] = c_SPtrans[3][lid];
518 s_SPtrans[4][lid] = c_SPtrans[4][lid];
519 s_SPtrans[5][lid] = c_SPtrans[5][lid];
520 s_SPtrans[6][lid] = c_SPtrans[6][lid];
521 s_SPtrans[7][lid] = c_SPtrans[7][lid];
523 s_skb[0][lid] = c_skb[0][lid];
524 s_skb[1][lid] = c_skb[1][lid];
525 s_skb[2][lid] = c_skb[2][lid];
526 s_skb[3][lid] = c_skb[3][lid];
527 s_skb[4][lid] = c_skb[4][lid];
528 s_skb[5][lid] = c_skb[5][lid];
529 s_skb[6][lid] = c_skb[6][lid];
530 s_skb[7][lid] = c_skb[7][lid];
535 if (gid >= gid_max) return;
541 const u32 s0 = salt_bufs[salt_pos].salt_buf[0];
542 const u32 s1 = salt_bufs[salt_pos].salt_buf[1];
543 const u32 s2 = salt_bufs[salt_pos].salt_buf[2];
554 for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++)
584 const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len);
586 append_0x80_2 (w0, w1, out_len);
593 make_unicode (w0, w0_t, w1_t);
594 make_unicode (w1, w2_t, w3_t);
596 w3_t[2] = out_len * 8 * 2;
603 MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00);
604 MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01);
605 MD4_STEP (MD4_Fo, c, d, a, b, w0_t[2], MD4C00, MD4S02);
606 MD4_STEP (MD4_Fo, b, c, d, a, w0_t[3], MD4C00, MD4S03);
607 MD4_STEP (MD4_Fo, a, b, c, d, w1_t[0], MD4C00, MD4S00);
608 MD4_STEP (MD4_Fo, d, a, b, c, w1_t[1], MD4C00, MD4S01);
609 MD4_STEP (MD4_Fo, c, d, a, b, w1_t[2], MD4C00, MD4S02);
610 MD4_STEP (MD4_Fo, b, c, d, a, w1_t[3], MD4C00, MD4S03);
611 MD4_STEP (MD4_Fo, a, b, c, d, w2_t[0], MD4C00, MD4S00);
612 MD4_STEP (MD4_Fo, d, a, b, c, w2_t[1], MD4C00, MD4S01);
613 MD4_STEP (MD4_Fo, c, d, a, b, w2_t[2], MD4C00, MD4S02);
614 MD4_STEP (MD4_Fo, b, c, d, a, w2_t[3], MD4C00, MD4S03);
615 MD4_STEP (MD4_Fo, a, b, c, d, w3_t[0], MD4C00, MD4S00);
616 MD4_STEP (MD4_Fo, d, a, b, c, w3_t[1], MD4C00, MD4S01);
617 MD4_STEP (MD4_Fo, c, d, a, b, w3_t[2], MD4C00, MD4S02);
618 MD4_STEP (MD4_Fo, b, c, d, a, w3_t[3], MD4C00, MD4S03);
620 MD4_STEP (MD4_Go, a, b, c, d, w0_t[0], MD4C01, MD4S10);
621 MD4_STEP (MD4_Go, d, a, b, c, w1_t[0], MD4C01, MD4S11);
622 MD4_STEP (MD4_Go, c, d, a, b, w2_t[0], MD4C01, MD4S12);
623 MD4_STEP (MD4_Go, b, c, d, a, w3_t[0], MD4C01, MD4S13);
624 MD4_STEP (MD4_Go, a, b, c, d, w0_t[1], MD4C01, MD4S10);
625 MD4_STEP (MD4_Go, d, a, b, c, w1_t[1], MD4C01, MD4S11);
626 MD4_STEP (MD4_Go, c, d, a, b, w2_t[1], MD4C01, MD4S12);
627 MD4_STEP (MD4_Go, b, c, d, a, w3_t[1], MD4C01, MD4S13);
628 MD4_STEP (MD4_Go, a, b, c, d, w0_t[2], MD4C01, MD4S10);
629 MD4_STEP (MD4_Go, d, a, b, c, w1_t[2], MD4C01, MD4S11);
630 MD4_STEP (MD4_Go, c, d, a, b, w2_t[2], MD4C01, MD4S12);
631 MD4_STEP (MD4_Go, b, c, d, a, w3_t[2], MD4C01, MD4S13);
632 MD4_STEP (MD4_Go, a, b, c, d, w0_t[3], MD4C01, MD4S10);
633 MD4_STEP (MD4_Go, d, a, b, c, w1_t[3], MD4C01, MD4S11);
634 MD4_STEP (MD4_Go, c, d, a, b, w2_t[3], MD4C01, MD4S12);
635 MD4_STEP (MD4_Go, b, c, d, a, w3_t[3], MD4C01, MD4S13);
637 MD4_STEP (MD4_H , a, b, c, d, w0_t[0], MD4C02, MD4S20);
638 MD4_STEP (MD4_H , d, a, b, c, w2_t[0], MD4C02, MD4S21);
639 MD4_STEP (MD4_H , c, d, a, b, w1_t[0], MD4C02, MD4S22);
640 MD4_STEP (MD4_H , b, c, d, a, w3_t[0], MD4C02, MD4S23);
641 MD4_STEP (MD4_H , a, b, c, d, w0_t[2], MD4C02, MD4S20);
642 MD4_STEP (MD4_H , d, a, b, c, w2_t[2], MD4C02, MD4S21);
643 MD4_STEP (MD4_H , c, d, a, b, w1_t[2], MD4C02, MD4S22);
644 MD4_STEP (MD4_H , b, c, d, a, w3_t[2], MD4C02, MD4S23);
645 MD4_STEP (MD4_H , a, b, c, d, w0_t[1], MD4C02, MD4S20);
646 MD4_STEP (MD4_H , d, a, b, c, w2_t[1], MD4C02, MD4S21);
647 MD4_STEP (MD4_H , c, d, a, b, w1_t[1], MD4C02, MD4S22);
648 MD4_STEP (MD4_H , b, c, d, a, w3_t[1], MD4C02, MD4S23);
649 MD4_STEP (MD4_H , a, b, c, d, w0_t[3], MD4C02, MD4S20);
650 MD4_STEP (MD4_H , d, a, b, c, w2_t[3], MD4C02, MD4S21);
652 if (s2 != ((d + MD4M_D) >> 16)) continue;
654 MD4_STEP (MD4_H , c, d, a, b, w1_t[3], MD4C02, MD4S22);
655 MD4_STEP (MD4_H , b, c, d, a, w3_t[3], MD4C02, MD4S23);
668 transform_netntlmv1_key (a, b, key);
673 _des_crypt_keysetup (key[0], key[1], Kc, Kd, s_skb);
677 _des_crypt_encrypt (iv1, data, Kc, Kd, s_SPtrans);
683 transform_netntlmv1_key (((b >> 24) | (c << 8)), ((c >> 24) | (d << 8)), key);
685 _des_crypt_keysetup (key[0], key[1], Kc, Kd, s_skb);
689 _des_crypt_encrypt (iv2, data, Kc, Kd, s_SPtrans);
695 const u32x r0 = iv1[0];
696 const u32x r1 = iv1[1];
697 const u32x r2 = iv2[0];
698 const u32x r3 = iv2[1];
700 #include VECT_COMPARE_M
704 extern "C" __global__ void __launch_bounds__ (256, 1) m05500_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
708 extern "C" __global__ void __launch_bounds__ (256, 1) m05500_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
712 extern "C" __global__ void __launch_bounds__ (256, 1) m05500_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
718 const u32 lid = threadIdx.x;
724 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
728 pw_buf0[0] = pws[gid].i[ 0];
729 pw_buf0[1] = pws[gid].i[ 1];
730 pw_buf0[2] = pws[gid].i[ 2];
731 pw_buf0[3] = pws[gid].i[ 3];
735 pw_buf1[0] = pws[gid].i[ 4];
736 pw_buf1[1] = pws[gid].i[ 5];
737 pw_buf1[2] = pws[gid].i[ 6];
738 pw_buf1[3] = pws[gid].i[ 7];
740 const u32 pw_len = pws[gid].pw_len;
746 __shared__ u32 s_SPtrans[8][64];
747 __shared__ u32 s_skb[8][64];
751 s_SPtrans[0][lid] = c_SPtrans[0][lid];
752 s_SPtrans[1][lid] = c_SPtrans[1][lid];
753 s_SPtrans[2][lid] = c_SPtrans[2][lid];
754 s_SPtrans[3][lid] = c_SPtrans[3][lid];
755 s_SPtrans[4][lid] = c_SPtrans[4][lid];
756 s_SPtrans[5][lid] = c_SPtrans[5][lid];
757 s_SPtrans[6][lid] = c_SPtrans[6][lid];
758 s_SPtrans[7][lid] = c_SPtrans[7][lid];
760 s_skb[0][lid] = c_skb[0][lid];
761 s_skb[1][lid] = c_skb[1][lid];
762 s_skb[2][lid] = c_skb[2][lid];
763 s_skb[3][lid] = c_skb[3][lid];
764 s_skb[4][lid] = c_skb[4][lid];
765 s_skb[5][lid] = c_skb[5][lid];
766 s_skb[6][lid] = c_skb[6][lid];
767 s_skb[7][lid] = c_skb[7][lid];
772 if (gid >= gid_max) return;
778 const u32 s0 = salt_bufs[salt_pos].salt_buf[0];
779 const u32 s1 = salt_bufs[salt_pos].salt_buf[1];
780 const u32 s2 = salt_bufs[salt_pos].salt_buf[2];
791 const u32 search[4] =
793 digests_buf[digests_offset].digest_buf[DGST_R0],
794 digests_buf[digests_offset].digest_buf[DGST_R1],
795 digests_buf[digests_offset].digest_buf[DGST_R2],
796 digests_buf[digests_offset].digest_buf[DGST_R3]
803 for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++)
833 const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len);
835 append_0x80_2 (w0, w1, out_len);
842 make_unicode (w0, w0_t, w1_t);
843 make_unicode (w1, w2_t, w3_t);
845 w3_t[2] = out_len * 8 * 2;
852 MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00);
853 MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01);
854 MD4_STEP (MD4_Fo, c, d, a, b, w0_t[2], MD4C00, MD4S02);
855 MD4_STEP (MD4_Fo, b, c, d, a, w0_t[3], MD4C00, MD4S03);
856 MD4_STEP (MD4_Fo, a, b, c, d, w1_t[0], MD4C00, MD4S00);
857 MD4_STEP (MD4_Fo, d, a, b, c, w1_t[1], MD4C00, MD4S01);
858 MD4_STEP (MD4_Fo, c, d, a, b, w1_t[2], MD4C00, MD4S02);
859 MD4_STEP (MD4_Fo, b, c, d, a, w1_t[3], MD4C00, MD4S03);
860 MD4_STEP (MD4_Fo, a, b, c, d, w2_t[0], MD4C00, MD4S00);
861 MD4_STEP (MD4_Fo, d, a, b, c, w2_t[1], MD4C00, MD4S01);
862 MD4_STEP (MD4_Fo, c, d, a, b, w2_t[2], MD4C00, MD4S02);
863 MD4_STEP (MD4_Fo, b, c, d, a, w2_t[3], MD4C00, MD4S03);
864 MD4_STEP (MD4_Fo, a, b, c, d, w3_t[0], MD4C00, MD4S00);
865 MD4_STEP (MD4_Fo, d, a, b, c, w3_t[1], MD4C00, MD4S01);
866 MD4_STEP (MD4_Fo, c, d, a, b, w3_t[2], MD4C00, MD4S02);
867 MD4_STEP (MD4_Fo, b, c, d, a, w3_t[3], MD4C00, MD4S03);
869 MD4_STEP (MD4_Go, a, b, c, d, w0_t[0], MD4C01, MD4S10);
870 MD4_STEP (MD4_Go, d, a, b, c, w1_t[0], MD4C01, MD4S11);
871 MD4_STEP (MD4_Go, c, d, a, b, w2_t[0], MD4C01, MD4S12);
872 MD4_STEP (MD4_Go, b, c, d, a, w3_t[0], MD4C01, MD4S13);
873 MD4_STEP (MD4_Go, a, b, c, d, w0_t[1], MD4C01, MD4S10);
874 MD4_STEP (MD4_Go, d, a, b, c, w1_t[1], MD4C01, MD4S11);
875 MD4_STEP (MD4_Go, c, d, a, b, w2_t[1], MD4C01, MD4S12);
876 MD4_STEP (MD4_Go, b, c, d, a, w3_t[1], MD4C01, MD4S13);
877 MD4_STEP (MD4_Go, a, b, c, d, w0_t[2], MD4C01, MD4S10);
878 MD4_STEP (MD4_Go, d, a, b, c, w1_t[2], MD4C01, MD4S11);
879 MD4_STEP (MD4_Go, c, d, a, b, w2_t[2], MD4C01, MD4S12);
880 MD4_STEP (MD4_Go, b, c, d, a, w3_t[2], MD4C01, MD4S13);
881 MD4_STEP (MD4_Go, a, b, c, d, w0_t[3], MD4C01, MD4S10);
882 MD4_STEP (MD4_Go, d, a, b, c, w1_t[3], MD4C01, MD4S11);
883 MD4_STEP (MD4_Go, c, d, a, b, w2_t[3], MD4C01, MD4S12);
884 MD4_STEP (MD4_Go, b, c, d, a, w3_t[3], MD4C01, MD4S13);
886 MD4_STEP (MD4_H , a, b, c, d, w0_t[0], MD4C02, MD4S20);
887 MD4_STEP (MD4_H , d, a, b, c, w2_t[0], MD4C02, MD4S21);
888 MD4_STEP (MD4_H , c, d, a, b, w1_t[0], MD4C02, MD4S22);
889 MD4_STEP (MD4_H , b, c, d, a, w3_t[0], MD4C02, MD4S23);
890 MD4_STEP (MD4_H , a, b, c, d, w0_t[2], MD4C02, MD4S20);
891 MD4_STEP (MD4_H , d, a, b, c, w2_t[2], MD4C02, MD4S21);
892 MD4_STEP (MD4_H , c, d, a, b, w1_t[2], MD4C02, MD4S22);
893 MD4_STEP (MD4_H , b, c, d, a, w3_t[2], MD4C02, MD4S23);
894 MD4_STEP (MD4_H , a, b, c, d, w0_t[1], MD4C02, MD4S20);
895 MD4_STEP (MD4_H , d, a, b, c, w2_t[1], MD4C02, MD4S21);
896 MD4_STEP (MD4_H , c, d, a, b, w1_t[1], MD4C02, MD4S22);
897 MD4_STEP (MD4_H , b, c, d, a, w3_t[1], MD4C02, MD4S23);
898 MD4_STEP (MD4_H , a, b, c, d, w0_t[3], MD4C02, MD4S20);
899 MD4_STEP (MD4_H , d, a, b, c, w2_t[3], MD4C02, MD4S21);
901 if (s2 != ((d + MD4M_D) >> 16)) continue;
903 MD4_STEP (MD4_H , c, d, a, b, w1_t[3], MD4C02, MD4S22);
904 MD4_STEP (MD4_H , b, c, d, a, w3_t[3], MD4C02, MD4S23);
917 transform_netntlmv1_key (a, b, key);
922 _des_crypt_keysetup (key[0], key[1], Kc, Kd, s_skb);
926 _des_crypt_encrypt (iv1, data, Kc, Kd, s_SPtrans);
932 transform_netntlmv1_key (((b >> 24) | (c << 8)), ((c >> 24) | (d << 8)), key);
934 _des_crypt_keysetup (key[0], key[1], Kc, Kd, s_skb);
938 _des_crypt_encrypt (iv2, data, Kc, Kd, s_SPtrans);
944 const u32x r0 = iv1[0];
945 const u32x r1 = iv1[1];
946 const u32x r2 = iv2[0];
947 const u32x r3 = iv2[1];
949 #include VECT_COMPARE_S
953 extern "C" __global__ void __launch_bounds__ (256, 1) m05500_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
957 extern "C" __global__ void __launch_bounds__ (256, 1) m05500_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)