2 * Author......: Jens Steube <jens.steube@gmail.com>
8 #include "include/constants.h"
9 #include "include/kernel_vendor.h"
24 #include "include/kernel_functions.c"
26 #include "common_nv.c"
29 #define VECT_COMPARE_S "check_single_vect1_comp4.c"
30 #define VECT_COMPARE_M "check_multi_vect1_comp4.c"
34 #define VECT_COMPARE_S "check_single_vect2_comp4.c"
35 #define VECT_COMPARE_M "check_multi_vect2_comp4.c"
39 #define VECT_COMPARE_S "check_single_vect2_comp4.c"
40 #define VECT_COMPARE_M "check_multi_vect2_comp4.c"
43 #define PERM_OP(a,b,tt,n,m) \
53 #define HPERM_OP(a,tt,n,m) \
59 tt = tt >> (16 + n); \
63 __device__ __constant__ u32 c_SPtrans[8][64] =
66 0x02080800, 0x00080000, 0x02000002, 0x02080802,
67 0x02000000, 0x00080802, 0x00080002, 0x02000002,
68 0x00080802, 0x02080800, 0x02080000, 0x00000802,
69 0x02000802, 0x02000000, 0x00000000, 0x00080002,
70 0x00080000, 0x00000002, 0x02000800, 0x00080800,
71 0x02080802, 0x02080000, 0x00000802, 0x02000800,
72 0x00000002, 0x00000800, 0x00080800, 0x02080002,
73 0x00000800, 0x02000802, 0x02080002, 0x00000000,
74 0x00000000, 0x02080802, 0x02000800, 0x00080002,
75 0x02080800, 0x00080000, 0x00000802, 0x02000800,
76 0x02080002, 0x00000800, 0x00080800, 0x02000002,
77 0x00080802, 0x00000002, 0x02000002, 0x02080000,
78 0x02080802, 0x00080800, 0x02080000, 0x02000802,
79 0x02000000, 0x00000802, 0x00080002, 0x00000000,
80 0x00080000, 0x02000000, 0x02000802, 0x02080800,
81 0x00000002, 0x02080002, 0x00000800, 0x00080802,
83 0x40108010, 0x00000000, 0x00108000, 0x40100000,
84 0x40000010, 0x00008010, 0x40008000, 0x00108000,
85 0x00008000, 0x40100010, 0x00000010, 0x40008000,
86 0x00100010, 0x40108000, 0x40100000, 0x00000010,
87 0x00100000, 0x40008010, 0x40100010, 0x00008000,
88 0x00108010, 0x40000000, 0x00000000, 0x00100010,
89 0x40008010, 0x00108010, 0x40108000, 0x40000010,
90 0x40000000, 0x00100000, 0x00008010, 0x40108010,
91 0x00100010, 0x40108000, 0x40008000, 0x00108010,
92 0x40108010, 0x00100010, 0x40000010, 0x00000000,
93 0x40000000, 0x00008010, 0x00100000, 0x40100010,
94 0x00008000, 0x40000000, 0x00108010, 0x40008010,
95 0x40108000, 0x00008000, 0x00000000, 0x40000010,
96 0x00000010, 0x40108010, 0x00108000, 0x40100000,
97 0x40100010, 0x00100000, 0x00008010, 0x40008000,
98 0x40008010, 0x00000010, 0x40100000, 0x00108000,
100 0x04000001, 0x04040100, 0x00000100, 0x04000101,
101 0x00040001, 0x04000000, 0x04000101, 0x00040100,
102 0x04000100, 0x00040000, 0x04040000, 0x00000001,
103 0x04040101, 0x00000101, 0x00000001, 0x04040001,
104 0x00000000, 0x00040001, 0x04040100, 0x00000100,
105 0x00000101, 0x04040101, 0x00040000, 0x04000001,
106 0x04040001, 0x04000100, 0x00040101, 0x04040000,
107 0x00040100, 0x00000000, 0x04000000, 0x00040101,
108 0x04040100, 0x00000100, 0x00000001, 0x00040000,
109 0x00000101, 0x00040001, 0x04040000, 0x04000101,
110 0x00000000, 0x04040100, 0x00040100, 0x04040001,
111 0x00040001, 0x04000000, 0x04040101, 0x00000001,
112 0x00040101, 0x04000001, 0x04000000, 0x04040101,
113 0x00040000, 0x04000100, 0x04000101, 0x00040100,
114 0x04000100, 0x00000000, 0x04040001, 0x00000101,
115 0x04000001, 0x00040101, 0x00000100, 0x04040000,
117 0x00401008, 0x10001000, 0x00000008, 0x10401008,
118 0x00000000, 0x10400000, 0x10001008, 0x00400008,
119 0x10401000, 0x10000008, 0x10000000, 0x00001008,
120 0x10000008, 0x00401008, 0x00400000, 0x10000000,
121 0x10400008, 0x00401000, 0x00001000, 0x00000008,
122 0x00401000, 0x10001008, 0x10400000, 0x00001000,
123 0x00001008, 0x00000000, 0x00400008, 0x10401000,
124 0x10001000, 0x10400008, 0x10401008, 0x00400000,
125 0x10400008, 0x00001008, 0x00400000, 0x10000008,
126 0x00401000, 0x10001000, 0x00000008, 0x10400000,
127 0x10001008, 0x00000000, 0x00001000, 0x00400008,
128 0x00000000, 0x10400008, 0x10401000, 0x00001000,
129 0x10000000, 0x10401008, 0x00401008, 0x00400000,
130 0x10401008, 0x00000008, 0x10001000, 0x00401008,
131 0x00400008, 0x00401000, 0x10400000, 0x10001008,
132 0x00001008, 0x10000000, 0x10000008, 0x10401000,
134 0x08000000, 0x00010000, 0x00000400, 0x08010420,
135 0x08010020, 0x08000400, 0x00010420, 0x08010000,
136 0x00010000, 0x00000020, 0x08000020, 0x00010400,
137 0x08000420, 0x08010020, 0x08010400, 0x00000000,
138 0x00010400, 0x08000000, 0x00010020, 0x00000420,
139 0x08000400, 0x00010420, 0x00000000, 0x08000020,
140 0x00000020, 0x08000420, 0x08010420, 0x00010020,
141 0x08010000, 0x00000400, 0x00000420, 0x08010400,
142 0x08010400, 0x08000420, 0x00010020, 0x08010000,
143 0x00010000, 0x00000020, 0x08000020, 0x08000400,
144 0x08000000, 0x00010400, 0x08010420, 0x00000000,
145 0x00010420, 0x08000000, 0x00000400, 0x00010020,
146 0x08000420, 0x00000400, 0x00000000, 0x08010420,
147 0x08010020, 0x08010400, 0x00000420, 0x00010000,
148 0x00010400, 0x08010020, 0x08000400, 0x00000420,
149 0x00000020, 0x00010420, 0x08010000, 0x08000020,
151 0x80000040, 0x00200040, 0x00000000, 0x80202000,
152 0x00200040, 0x00002000, 0x80002040, 0x00200000,
153 0x00002040, 0x80202040, 0x00202000, 0x80000000,
154 0x80002000, 0x80000040, 0x80200000, 0x00202040,
155 0x00200000, 0x80002040, 0x80200040, 0x00000000,
156 0x00002000, 0x00000040, 0x80202000, 0x80200040,
157 0x80202040, 0x80200000, 0x80000000, 0x00002040,
158 0x00000040, 0x00202000, 0x00202040, 0x80002000,
159 0x00002040, 0x80000000, 0x80002000, 0x00202040,
160 0x80202000, 0x00200040, 0x00000000, 0x80002000,
161 0x80000000, 0x00002000, 0x80200040, 0x00200000,
162 0x00200040, 0x80202040, 0x00202000, 0x00000040,
163 0x80202040, 0x00202000, 0x00200000, 0x80002040,
164 0x80000040, 0x80200000, 0x00202040, 0x00000000,
165 0x00002000, 0x80000040, 0x80002040, 0x80202000,
166 0x80200000, 0x00002040, 0x00000040, 0x80200040,
168 0x00004000, 0x00000200, 0x01000200, 0x01000004,
169 0x01004204, 0x00004004, 0x00004200, 0x00000000,
170 0x01000000, 0x01000204, 0x00000204, 0x01004000,
171 0x00000004, 0x01004200, 0x01004000, 0x00000204,
172 0x01000204, 0x00004000, 0x00004004, 0x01004204,
173 0x00000000, 0x01000200, 0x01000004, 0x00004200,
174 0x01004004, 0x00004204, 0x01004200, 0x00000004,
175 0x00004204, 0x01004004, 0x00000200, 0x01000000,
176 0x00004204, 0x01004000, 0x01004004, 0x00000204,
177 0x00004000, 0x00000200, 0x01000000, 0x01004004,
178 0x01000204, 0x00004204, 0x00004200, 0x00000000,
179 0x00000200, 0x01000004, 0x00000004, 0x01000200,
180 0x00000000, 0x01000204, 0x01000200, 0x00004200,
181 0x00000204, 0x00004000, 0x01004204, 0x01000000,
182 0x01004200, 0x00000004, 0x00004004, 0x01004204,
183 0x01000004, 0x01004200, 0x01004000, 0x00004004,
185 0x20800080, 0x20820000, 0x00020080, 0x00000000,
186 0x20020000, 0x00800080, 0x20800000, 0x20820080,
187 0x00000080, 0x20000000, 0x00820000, 0x00020080,
188 0x00820080, 0x20020080, 0x20000080, 0x20800000,
189 0x00020000, 0x00820080, 0x00800080, 0x20020000,
190 0x20820080, 0x20000080, 0x00000000, 0x00820000,
191 0x20000000, 0x00800000, 0x20020080, 0x20800080,
192 0x00800000, 0x00020000, 0x20820000, 0x00000080,
193 0x00800000, 0x00020000, 0x20000080, 0x20820080,
194 0x00020080, 0x20000000, 0x00000000, 0x00820000,
195 0x20800080, 0x20020080, 0x20020000, 0x00800080,
196 0x20820000, 0x00000080, 0x00800080, 0x20020000,
197 0x20820080, 0x00800000, 0x20800000, 0x20000080,
198 0x00820000, 0x00020080, 0x20020080, 0x20800000,
199 0x00000080, 0x20820000, 0x00820080, 0x00000000,
200 0x20000000, 0x20800080, 0x00020000, 0x00820080,
203 __device__ __constant__ u32 c_skb[8][64] =
205 /* for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 */
206 0x00000000, 0x00000010, 0x20000000, 0x20000010,
207 0x00010000, 0x00010010, 0x20010000, 0x20010010,
208 0x00000800, 0x00000810, 0x20000800, 0x20000810,
209 0x00010800, 0x00010810, 0x20010800, 0x20010810,
210 0x00000020, 0x00000030, 0x20000020, 0x20000030,
211 0x00010020, 0x00010030, 0x20010020, 0x20010030,
212 0x00000820, 0x00000830, 0x20000820, 0x20000830,
213 0x00010820, 0x00010830, 0x20010820, 0x20010830,
214 0x00080000, 0x00080010, 0x20080000, 0x20080010,
215 0x00090000, 0x00090010, 0x20090000, 0x20090010,
216 0x00080800, 0x00080810, 0x20080800, 0x20080810,
217 0x00090800, 0x00090810, 0x20090800, 0x20090810,
218 0x00080020, 0x00080030, 0x20080020, 0x20080030,
219 0x00090020, 0x00090030, 0x20090020, 0x20090030,
220 0x00080820, 0x00080830, 0x20080820, 0x20080830,
221 0x00090820, 0x00090830, 0x20090820, 0x20090830,
222 /* for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 */
223 0x00000000, 0x02000000, 0x00002000, 0x02002000,
224 0x00200000, 0x02200000, 0x00202000, 0x02202000,
225 0x00000004, 0x02000004, 0x00002004, 0x02002004,
226 0x00200004, 0x02200004, 0x00202004, 0x02202004,
227 0x00000400, 0x02000400, 0x00002400, 0x02002400,
228 0x00200400, 0x02200400, 0x00202400, 0x02202400,
229 0x00000404, 0x02000404, 0x00002404, 0x02002404,
230 0x00200404, 0x02200404, 0x00202404, 0x02202404,
231 0x10000000, 0x12000000, 0x10002000, 0x12002000,
232 0x10200000, 0x12200000, 0x10202000, 0x12202000,
233 0x10000004, 0x12000004, 0x10002004, 0x12002004,
234 0x10200004, 0x12200004, 0x10202004, 0x12202004,
235 0x10000400, 0x12000400, 0x10002400, 0x12002400,
236 0x10200400, 0x12200400, 0x10202400, 0x12202400,
237 0x10000404, 0x12000404, 0x10002404, 0x12002404,
238 0x10200404, 0x12200404, 0x10202404, 0x12202404,
239 /* for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 */
240 0x00000000, 0x00000001, 0x00040000, 0x00040001,
241 0x01000000, 0x01000001, 0x01040000, 0x01040001,
242 0x00000002, 0x00000003, 0x00040002, 0x00040003,
243 0x01000002, 0x01000003, 0x01040002, 0x01040003,
244 0x00000200, 0x00000201, 0x00040200, 0x00040201,
245 0x01000200, 0x01000201, 0x01040200, 0x01040201,
246 0x00000202, 0x00000203, 0x00040202, 0x00040203,
247 0x01000202, 0x01000203, 0x01040202, 0x01040203,
248 0x08000000, 0x08000001, 0x08040000, 0x08040001,
249 0x09000000, 0x09000001, 0x09040000, 0x09040001,
250 0x08000002, 0x08000003, 0x08040002, 0x08040003,
251 0x09000002, 0x09000003, 0x09040002, 0x09040003,
252 0x08000200, 0x08000201, 0x08040200, 0x08040201,
253 0x09000200, 0x09000201, 0x09040200, 0x09040201,
254 0x08000202, 0x08000203, 0x08040202, 0x08040203,
255 0x09000202, 0x09000203, 0x09040202, 0x09040203,
256 /* for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 */
257 0x00000000, 0x00100000, 0x00000100, 0x00100100,
258 0x00000008, 0x00100008, 0x00000108, 0x00100108,
259 0x00001000, 0x00101000, 0x00001100, 0x00101100,
260 0x00001008, 0x00101008, 0x00001108, 0x00101108,
261 0x04000000, 0x04100000, 0x04000100, 0x04100100,
262 0x04000008, 0x04100008, 0x04000108, 0x04100108,
263 0x04001000, 0x04101000, 0x04001100, 0x04101100,
264 0x04001008, 0x04101008, 0x04001108, 0x04101108,
265 0x00020000, 0x00120000, 0x00020100, 0x00120100,
266 0x00020008, 0x00120008, 0x00020108, 0x00120108,
267 0x00021000, 0x00121000, 0x00021100, 0x00121100,
268 0x00021008, 0x00121008, 0x00021108, 0x00121108,
269 0x04020000, 0x04120000, 0x04020100, 0x04120100,
270 0x04020008, 0x04120008, 0x04020108, 0x04120108,
271 0x04021000, 0x04121000, 0x04021100, 0x04121100,
272 0x04021008, 0x04121008, 0x04021108, 0x04121108,
273 /* for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 */
274 0x00000000, 0x10000000, 0x00010000, 0x10010000,
275 0x00000004, 0x10000004, 0x00010004, 0x10010004,
276 0x20000000, 0x30000000, 0x20010000, 0x30010000,
277 0x20000004, 0x30000004, 0x20010004, 0x30010004,
278 0x00100000, 0x10100000, 0x00110000, 0x10110000,
279 0x00100004, 0x10100004, 0x00110004, 0x10110004,
280 0x20100000, 0x30100000, 0x20110000, 0x30110000,
281 0x20100004, 0x30100004, 0x20110004, 0x30110004,
282 0x00001000, 0x10001000, 0x00011000, 0x10011000,
283 0x00001004, 0x10001004, 0x00011004, 0x10011004,
284 0x20001000, 0x30001000, 0x20011000, 0x30011000,
285 0x20001004, 0x30001004, 0x20011004, 0x30011004,
286 0x00101000, 0x10101000, 0x00111000, 0x10111000,
287 0x00101004, 0x10101004, 0x00111004, 0x10111004,
288 0x20101000, 0x30101000, 0x20111000, 0x30111000,
289 0x20101004, 0x30101004, 0x20111004, 0x30111004,
290 /* for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 */
291 0x00000000, 0x08000000, 0x00000008, 0x08000008,
292 0x00000400, 0x08000400, 0x00000408, 0x08000408,
293 0x00020000, 0x08020000, 0x00020008, 0x08020008,
294 0x00020400, 0x08020400, 0x00020408, 0x08020408,
295 0x00000001, 0x08000001, 0x00000009, 0x08000009,
296 0x00000401, 0x08000401, 0x00000409, 0x08000409,
297 0x00020001, 0x08020001, 0x00020009, 0x08020009,
298 0x00020401, 0x08020401, 0x00020409, 0x08020409,
299 0x02000000, 0x0A000000, 0x02000008, 0x0A000008,
300 0x02000400, 0x0A000400, 0x02000408, 0x0A000408,
301 0x02020000, 0x0A020000, 0x02020008, 0x0A020008,
302 0x02020400, 0x0A020400, 0x02020408, 0x0A020408,
303 0x02000001, 0x0A000001, 0x02000009, 0x0A000009,
304 0x02000401, 0x0A000401, 0x02000409, 0x0A000409,
305 0x02020001, 0x0A020001, 0x02020009, 0x0A020009,
306 0x02020401, 0x0A020401, 0x02020409, 0x0A020409,
307 /* for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 */
308 0x00000000, 0x00000100, 0x00080000, 0x00080100,
309 0x01000000, 0x01000100, 0x01080000, 0x01080100,
310 0x00000010, 0x00000110, 0x00080010, 0x00080110,
311 0x01000010, 0x01000110, 0x01080010, 0x01080110,
312 0x00200000, 0x00200100, 0x00280000, 0x00280100,
313 0x01200000, 0x01200100, 0x01280000, 0x01280100,
314 0x00200010, 0x00200110, 0x00280010, 0x00280110,
315 0x01200010, 0x01200110, 0x01280010, 0x01280110,
316 0x00000200, 0x00000300, 0x00080200, 0x00080300,
317 0x01000200, 0x01000300, 0x01080200, 0x01080300,
318 0x00000210, 0x00000310, 0x00080210, 0x00080310,
319 0x01000210, 0x01000310, 0x01080210, 0x01080310,
320 0x00200200, 0x00200300, 0x00280200, 0x00280300,
321 0x01200200, 0x01200300, 0x01280200, 0x01280300,
322 0x00200210, 0x00200310, 0x00280210, 0x00280310,
323 0x01200210, 0x01200310, 0x01280210, 0x01280310,
324 /* for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 */
325 0x00000000, 0x04000000, 0x00040000, 0x04040000,
326 0x00000002, 0x04000002, 0x00040002, 0x04040002,
327 0x00002000, 0x04002000, 0x00042000, 0x04042000,
328 0x00002002, 0x04002002, 0x00042002, 0x04042002,
329 0x00000020, 0x04000020, 0x00040020, 0x04040020,
330 0x00000022, 0x04000022, 0x00040022, 0x04040022,
331 0x00002020, 0x04002020, 0x00042020, 0x04042020,
332 0x00002022, 0x04002022, 0x00042022, 0x04042022,
333 0x00000800, 0x04000800, 0x00040800, 0x04040800,
334 0x00000802, 0x04000802, 0x00040802, 0x04040802,
335 0x00002800, 0x04002800, 0x00042800, 0x04042800,
336 0x00002802, 0x04002802, 0x00042802, 0x04042802,
337 0x00000820, 0x04000820, 0x00040820, 0x04040820,
338 0x00000822, 0x04000822, 0x00040822, 0x04040822,
339 0x00002820, 0x04002820, 0x00042820, 0x04042820,
340 0x00002822, 0x04002822, 0x00042822, 0x04042822
343 #define LM_IV_0_IP_RR3 0x2400b807
344 #define LM_IV_1_IP_RR3 0xaa190747
347 #define BOX(i,n,S) u32x ((S)[(n)][(i)])
351 #define BOX(i,n,S) u32x ((S)[(n)][(i).x], (S)[(n)][(i).y])
354 __device__ static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], u32 s_SPtrans[8][64])
356 asm (".reg .u8 c0, c1, c2, c3, c4, c5, c6, c7;");
363 for (int i = 0; i < 16; i++)
366 u32x t = Kd[i] ^ rotl32 (r, 28u);
368 u = (u >> 2) & 0x3f3f3f3f;
369 t = (t >> 2) & 0x3f3f3f3f;
376 "mov.b32 {c0, c1, c2, c3}, %8;"
377 "mov.b32 {c4, c5, c6, c7}, %9;"
387 : "=r"(u0), "=r"(u1), "=r"(u2), "=r"(u3),
388 "=r"(t0), "=r"(t1), "=r"(t2), "=r"(t3)
393 l ^= BOX (u0, 0, s_SPtrans)
394 | BOX (u1, 2, s_SPtrans)
395 | BOX (u2, 4, s_SPtrans)
396 | BOX (u3, 6, s_SPtrans)
397 | BOX (t0, 1, s_SPtrans)
398 | BOX (t1, 3, s_SPtrans)
399 | BOX (t2, 5, s_SPtrans)
400 | BOX (t3, 7, s_SPtrans);
407 iv[0] = rotl32 (l, 29);
408 iv[1] = rotl32 (r, 29);
411 __device__ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], u32 s_skb[8][64])
415 PERM_OP (d, c, tt, 4, 0x0f0f0f0f);
416 HPERM_OP (c, tt, 2, 0xcccc0000);
417 HPERM_OP (d, tt, 2, 0xcccc0000);
418 PERM_OP (d, c, tt, 1, 0x55555555);
419 PERM_OP (c, d, tt, 8, 0x00ff00ff);
420 PERM_OP (d, c, tt, 1, 0x55555555);
422 d = ((d & 0x000000ff) << 16)
423 | ((d & 0x0000ff00) << 0)
424 | ((d & 0x00ff0000) >> 16)
425 | ((c & 0xf0000000) >> 4);
429 for (u32 i = 0; i < 16; i++)
431 if ((i < 2) || (i == 8) || (i == 15))
433 c = ((c >> 1) | (c << 27));
434 d = ((d >> 1) | (d << 27));
438 c = ((c >> 2) | (c << 26));
439 d = ((d >> 2) | (d << 26));
445 const u32x c00 = (c >> 0) & 0x0000003f;
446 const u32x c06 = (c >> 6) & 0x00383003;
447 const u32x c07 = (c >> 7) & 0x0000003c;
448 const u32x c13 = (c >> 13) & 0x0000060f;
449 const u32x c20 = (c >> 20) & 0x00000001;
451 u32x s = BOX (((c00 >> 0) & 0xff), 0, s_skb)
452 | BOX (((c06 >> 0) & 0xff)
453 |((c07 >> 0) & 0xff), 1, s_skb)
454 | BOX (((c13 >> 0) & 0xff)
455 |((c06 >> 8) & 0xff), 2, s_skb)
456 | BOX (((c20 >> 0) & 0xff)
458 |((c06 >> 16) & 0xff), 3, s_skb);
460 const u32x d00 = (d >> 0) & 0x00003c3f;
461 const u32x d07 = (d >> 7) & 0x00003f03;
462 const u32x d21 = (d >> 21) & 0x0000000f;
463 const u32x d22 = (d >> 22) & 0x00000030;
465 u32x t = BOX (((d00 >> 0) & 0xff), 4, s_skb)
466 | BOX (((d07 >> 0) & 0xff)
467 |((d00 >> 8) & 0xff), 5, s_skb)
468 | BOX (((d07 >> 8) & 0xff), 6, s_skb)
469 | BOX (((d21 >> 0) & 0xff)
470 |((d22 >> 0) & 0xff), 7, s_skb);
472 #if __CUDA_ARCH__ >= 200
473 Kc[i] = __byte_perm (s, t, 0x5410);
474 Kd[i] = __byte_perm (s, t, 0x7632);
476 Kc[i] = ((t << 16) | (s & 0x0000ffff));
477 Kd[i] = ((s >> 16) | (t & 0xffff0000));
480 Kc[i] = rotl32 (Kc[i], 2u);
481 Kd[i] = rotl32 (Kd[i], 2u);
485 __device__ static void transform_netntlmv1_key (const u32x w0, const u32x w1, u32x out[2])
489 t[0] = (w0 >> 0) & 0xff;
490 t[1] = (w0 >> 8) & 0xff;
491 t[2] = (w0 >> 16) & 0xff;
492 t[3] = (w0 >> 24) & 0xff;
493 t[4] = (w1 >> 0) & 0xff;
494 t[5] = (w1 >> 8) & 0xff;
495 t[6] = (w1 >> 16) & 0xff;
496 t[7] = (w1 >> 24) & 0xff;
501 k[1] = (t[0] << 7) | (t[1] >> 1);
502 k[2] = (t[1] << 6) | (t[2] >> 2);
503 k[3] = (t[2] << 5) | (t[3] >> 3);
504 k[4] = (t[3] << 4) | (t[4] >> 4);
505 k[5] = (t[4] << 3) | (t[5] >> 5);
506 k[6] = (t[5] << 2) | (t[6] >> 6);
509 out[0] = ((k[0] & 0xff) << 0)
510 | ((k[1] & 0xff) << 8)
511 | ((k[2] & 0xff) << 16)
512 | ((k[3] & 0xff) << 24);
514 out[1] = ((k[4] & 0xff) << 0)
515 | ((k[5] & 0xff) << 8)
516 | ((k[6] & 0xff) << 16)
517 | ((k[7] & 0xff) << 24);
520 __device__ __constant__ comb_t c_combs[1024];
522 extern "C" __global__ void __launch_bounds__ (256, 1) m03000_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
528 const u32 lid = threadIdx.x;
534 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
538 wordl0[0] = pws[gid].i[ 0];
539 wordl0[1] = pws[gid].i[ 1];
564 const u32 pw_l_len = pws[gid].pw_len;
566 if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
568 switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len);
575 __shared__ u32 s_SPtrans[8][64];
577 __shared__ u32 s_skb[8][64];
581 s_SPtrans[0][lid] = c_SPtrans[0][lid];
582 s_SPtrans[1][lid] = c_SPtrans[1][lid];
583 s_SPtrans[2][lid] = c_SPtrans[2][lid];
584 s_SPtrans[3][lid] = c_SPtrans[3][lid];
585 s_SPtrans[4][lid] = c_SPtrans[4][lid];
586 s_SPtrans[5][lid] = c_SPtrans[5][lid];
587 s_SPtrans[6][lid] = c_SPtrans[6][lid];
588 s_SPtrans[7][lid] = c_SPtrans[7][lid];
590 s_skb[0][lid] = c_skb[0][lid];
591 s_skb[1][lid] = c_skb[1][lid];
592 s_skb[2][lid] = c_skb[2][lid];
593 s_skb[3][lid] = c_skb[3][lid];
594 s_skb[4][lid] = c_skb[4][lid];
595 s_skb[5][lid] = c_skb[5][lid];
596 s_skb[6][lid] = c_skb[6][lid];
597 s_skb[7][lid] = c_skb[7][lid];
602 if (gid >= gid_max) return;
608 for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
610 const u32 pw_r_len = c_combs[il_pos].pw_len;
612 u32 pw_len = pw_l_len + pw_r_len;
614 pw_len = (pw_len >= 7) ? 7 : pw_len;
618 wordr0[0] = c_combs[il_pos].i[0];
619 wordr0[1] = c_combs[il_pos].i[1];
644 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
646 switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
651 w0[0] = wordl0[0] | wordr0[0];
652 w0[1] = wordl0[1] | wordr0[1];
679 transform_netntlmv1_key (w0[0], w0[1], key);
681 const u32x c = key[0];
682 const u32x d = key[1];
687 _des_crypt_keysetup (c, d, Kc, Kd, s_skb);
691 data[0] = LM_IV_0_IP_RR3;
692 data[1] = LM_IV_1_IP_RR3;
696 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
698 const u32x r0 = iv[0];
699 const u32x r1 = iv[1];
703 #include VECT_COMPARE_M
707 extern "C" __global__ void __launch_bounds__ (256, 1) m03000_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
711 extern "C" __global__ void __launch_bounds__ (256, 1) m03000_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
715 extern "C" __global__ void __launch_bounds__ (256, 1) m03000_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
721 const u32 lid = threadIdx.x;
727 const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;
731 wordl0[0] = pws[gid].i[ 0];
732 wordl0[1] = pws[gid].i[ 1];
757 const u32 pw_l_len = pws[gid].pw_len;
759 if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
761 switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len);
768 __shared__ u32 s_SPtrans[8][64];
770 __shared__ u32 s_skb[8][64];
774 s_SPtrans[0][lid] = c_SPtrans[0][lid];
775 s_SPtrans[1][lid] = c_SPtrans[1][lid];
776 s_SPtrans[2][lid] = c_SPtrans[2][lid];
777 s_SPtrans[3][lid] = c_SPtrans[3][lid];
778 s_SPtrans[4][lid] = c_SPtrans[4][lid];
779 s_SPtrans[5][lid] = c_SPtrans[5][lid];
780 s_SPtrans[6][lid] = c_SPtrans[6][lid];
781 s_SPtrans[7][lid] = c_SPtrans[7][lid];
783 s_skb[0][lid] = c_skb[0][lid];
784 s_skb[1][lid] = c_skb[1][lid];
785 s_skb[2][lid] = c_skb[2][lid];
786 s_skb[3][lid] = c_skb[3][lid];
787 s_skb[4][lid] = c_skb[4][lid];
788 s_skb[5][lid] = c_skb[5][lid];
789 s_skb[6][lid] = c_skb[6][lid];
790 s_skb[7][lid] = c_skb[7][lid];
795 if (gid >= gid_max) return;
801 const u32 search[4] =
803 digests_buf[digests_offset].digest_buf[DGST_R0],
804 digests_buf[digests_offset].digest_buf[DGST_R1],
805 digests_buf[digests_offset].digest_buf[DGST_R2],
806 digests_buf[digests_offset].digest_buf[DGST_R3]
813 for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
815 const u32 pw_r_len = c_combs[il_pos].pw_len;
817 u32 pw_len = pw_l_len + pw_r_len;
819 pw_len = (pw_len >= 7) ? 7 : pw_len;
823 wordr0[0] = c_combs[il_pos].i[0];
824 wordr0[1] = c_combs[il_pos].i[1];
849 if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
851 switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
856 w0[0] = wordl0[0] | wordr0[0];
857 w0[1] = wordl0[1] | wordr0[1];
884 transform_netntlmv1_key (w0[0], w0[1], key);
886 const u32x c = key[0];
887 const u32x d = key[1];
892 _des_crypt_keysetup (c, d, Kc, Kd, s_skb);
896 data[0] = LM_IV_0_IP_RR3;
897 data[1] = LM_IV_1_IP_RR3;
901 _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
903 const u32x r0 = iv[0];
904 const u32x r1 = iv[1];
908 #include VECT_COMPARE_S
912 extern "C" __global__ void __launch_bounds__ (256, 1) m03000_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
916 extern "C" __global__ void __launch_bounds__ (256, 1) m03000_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)