Some final fixes for d_return_buf refactorization; Initial kernels vor veracrypts...
[hashcat.git] / OpenCL / types_ocl.c
1 /**
2 * Authors.....: Jens Steube <jens.steube@gmail.com>
3 * magnum <john.magnum@hushmail.com>
4 *
5 * License.....: MIT
6 */
7
8 typedef uchar u8;
9 typedef ushort u16;
10 typedef uint u32;
11 typedef ulong u64;
12
13 #ifndef NEW_SIMD_CODE
14 #undef VECT_SIZE
15 #define VECT_SIZE 1
16 #endif
17
18 #define CONCAT(a, b) a##b
19 #define VTYPE(type, width) CONCAT(type, width)
20
21 #if VECT_SIZE == 1
22 typedef uchar u8x;
23 typedef ushort u16x;
24 typedef uint u32x;
25 typedef ulong u64x;
26 #else
27 typedef VTYPE(uchar, VECT_SIZE) u8x;
28 typedef VTYPE(ushort, VECT_SIZE) u16x;
29 typedef VTYPE(uint, VECT_SIZE) u32x;
30 typedef VTYPE(ulong, VECT_SIZE) u64x;
31 #endif
32
33 inline u32 l32_from_64_S (u64 a)
34 {
35 const u32 r = (u32) (a);
36
37 return r;
38 }
39
40 inline u32 h32_from_64_S (u64 a)
41 {
42 a >>= 32;
43
44 const u32 r = (u32) (a);
45
46 return r;
47 }
48
49 inline u64 hl32_to_64_S (const u32 a, const u32 b)
50 {
51 return as_ulong ((uint2) (b, a));
52 }
53
54 inline u32x l32_from_64 (u64x a)
55 {
56 u32x r;
57
58 #if VECT_SIZE == 1
59 r = (u32) a;
60 #endif
61
62 #if VECT_SIZE >= 2
63 r.s0 = (u32) a.s0;
64 r.s1 = (u32) a.s1;
65 #endif
66
67 #if VECT_SIZE >= 4
68 r.s2 = (u32) a.s2;
69 r.s3 = (u32) a.s3;
70 #endif
71
72 #if VECT_SIZE >= 8
73 r.s4 = (u32) a.s4;
74 r.s5 = (u32) a.s5;
75 r.s6 = (u32) a.s6;
76 r.s7 = (u32) a.s7;
77 #endif
78
79 #if VECT_SIZE >= 16
80 r.s8 = (u32) a.s8;
81 r.s9 = (u32) a.s9;
82 r.sa = (u32) a.sa;
83 r.sb = (u32) a.sb;
84 r.sc = (u32) a.sc;
85 r.sd = (u32) a.sd;
86 r.se = (u32) a.se;
87 r.sf = (u32) a.sf;
88 #endif
89
90 return r;
91 }
92
93 inline u32x h32_from_64 (u64x a)
94 {
95 a >>= 32;
96
97 u32x r;
98
99 #if VECT_SIZE == 1
100 r = (u32) a;
101 #endif
102
103 #if VECT_SIZE >= 2
104 r.s0 = (u32) a.s0;
105 r.s1 = (u32) a.s1;
106 #endif
107
108 #if VECT_SIZE >= 4
109 r.s2 = (u32) a.s2;
110 r.s3 = (u32) a.s3;
111 #endif
112
113 #if VECT_SIZE >= 8
114 r.s4 = (u32) a.s4;
115 r.s5 = (u32) a.s5;
116 r.s6 = (u32) a.s6;
117 r.s7 = (u32) a.s7;
118 #endif
119
120 #if VECT_SIZE >= 16
121 r.s8 = (u32) a.s8;
122 r.s9 = (u32) a.s9;
123 r.sa = (u32) a.sa;
124 r.sb = (u32) a.sb;
125 r.sc = (u32) a.sc;
126 r.sd = (u32) a.sd;
127 r.se = (u32) a.se;
128 r.sf = (u32) a.sf;
129 #endif
130
131 return r;
132 }
133
134 inline u64x hl32_to_64 (const u32x a, const u32x b)
135 {
136 u64x r;
137
138 #if VECT_SIZE == 1
139 r = as_ulong ((uint2) (b, a));
140 #endif
141
142 #if VECT_SIZE >= 2
143 r.s0 = as_ulong ((uint2) (b.s0, a.s0));
144 r.s1 = as_ulong ((uint2) (b.s1, a.s1));
145 #endif
146
147 #if VECT_SIZE >= 4
148 r.s2 = as_ulong ((uint2) (b.s2, a.s2));
149 r.s3 = as_ulong ((uint2) (b.s3, a.s3));
150 #endif
151
152 #if VECT_SIZE >= 8
153 r.s4 = as_ulong ((uint2) (b.s4, a.s4));
154 r.s5 = as_ulong ((uint2) (b.s5, a.s5));
155 r.s6 = as_ulong ((uint2) (b.s6, a.s6));
156 r.s7 = as_ulong ((uint2) (b.s7, a.s7));
157 #endif
158
159 #if VECT_SIZE >= 16
160 r.s8 = as_ulong ((uint2) (b.s8, a.s8));
161 r.s9 = as_ulong ((uint2) (b.s9, a.s9));
162 r.sa = as_ulong ((uint2) (b.sa, a.sa));
163 r.sb = as_ulong ((uint2) (b.sb, a.sb));
164 r.sc = as_ulong ((uint2) (b.sc, a.sc));
165 r.sd = as_ulong ((uint2) (b.sd, a.sd));
166 r.se = as_ulong ((uint2) (b.se, a.se));
167 r.sf = as_ulong ((uint2) (b.sf, a.sf));
168 #endif
169
170 return r;
171 }
172
173 #ifdef IS_AMD
174 inline u32 swap32_S (const u32 v)
175 {
176 return (as_uint (as_uchar4 (v).s3210));
177 }
178
179 inline u64 swap64_S (const u64 v)
180 {
181 return (as_ulong (as_uchar8 (v).s76543210));
182 }
183
184 inline u32 rotr32_S (const u32 a, const u32 n)
185 {
186 return rotate (a, 32 - n);
187 }
188
189 inline u32 rotl32_S (const u32 a, const u32 n)
190 {
191 return rotate (a, n);
192 }
193
194 inline u64 rotr64_S (const u64 a, const u32 n)
195 {
196 const u32 a0 = h32_from_64_S (a);
197 const u32 a1 = l32_from_64_S (a);
198
199 const u32 t0 = (n >= 32) ? amd_bitalign (a0, a1, n - 32) : amd_bitalign (a1, a0, n);
200 const u32 t1 = (n >= 32) ? amd_bitalign (a1, a0, n - 32) : amd_bitalign (a0, a1, n);
201
202 const u64 r = hl32_to_64_S (t0, t1);
203
204 return r;
205 }
206
207 inline u64 rotl64_S (const u64 a, const u32 n)
208 {
209 return rotr64_S (a, 64 - n);
210 }
211
212 inline u32x swap32 (const u32x v)
213 {
214 return ((v >> 24) & 0x000000ff)
215 | ((v >> 8) & 0x0000ff00)
216 | ((v << 8) & 0x00ff0000)
217 | ((v << 24) & 0xff000000);
218 }
219
220 inline u64x swap64 (const u64x v)
221 {
222 return ((v >> 56) & 0x00000000000000ff)
223 | ((v >> 40) & 0x000000000000ff00)
224 | ((v >> 24) & 0x0000000000ff0000)
225 | ((v >> 8) & 0x00000000ff000000)
226 | ((v << 8) & 0x000000ff00000000)
227 | ((v << 24) & 0x0000ff0000000000)
228 | ((v << 40) & 0x00ff000000000000)
229 | ((v << 56) & 0xff00000000000000);
230 }
231
232 inline u32x rotr32 (const u32x a, const u32 n)
233 {
234 return rotate (a, 32 - n);
235 }
236
237 inline u32x rotl32 (const u32x a, const u32 n)
238 {
239 return rotate (a, n);
240 }
241
242 inline u64x rotr64 (const u64x a, const u32 n)
243 {
244 const u32x a0 = h32_from_64 (a);
245 const u32x a1 = l32_from_64 (a);
246
247 const u32x t0 = (n >= 32) ? amd_bitalign (a0, a1, n - 32) : amd_bitalign (a1, a0, n);
248 const u32x t1 = (n >= 32) ? amd_bitalign (a1, a0, n - 32) : amd_bitalign (a0, a1, n);
249
250 const u64x r = hl32_to_64 (t0, t1);
251
252 return r;
253 }
254
255 inline u64x rotl64 (const u64x a, const u32 n)
256 {
257 return rotr64 (a, 64 - n);
258 }
259
260 inline u32x __bfe (const u32x a, const u32x b, const u32x c)
261 {
262 return amd_bfe (a, b, c);
263 }
264
265 inline u32 __bfe_S (const u32 a, const u32 b, const u32 c)
266 {
267 return amd_bfe (a, b, c);
268 }
269
270 inline u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c)
271 {
272 return amd_bytealign (a, b, c);
273 }
274 #endif
275
276 #ifdef IS_NV
277 inline u32 swap32_S (const u32 v)
278 {
279 u32 r;
280
281 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(r) : "r"(v));
282
283 return r;
284 }
285
286 inline u64 swap64_S (const u64 v)
287 {
288 u32 il;
289 u32 ir;
290
291 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(v));
292
293 u32 tl;
294 u32 tr;
295
296 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tl) : "r"(il));
297 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tr) : "r"(ir));
298
299 u64 r;
300
301 asm ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(tr), "r"(tl));
302
303 return r;
304 }
305
306 inline u32 rotr32_S (const u32 a, const u32 n)
307 {
308 return rotate (a, 32 - n);
309 }
310
311 inline u32 rotl32_S (const u32 a, const u32 n)
312 {
313 return rotate (a, n);
314 }
315
316 inline u64 rotr64_S (const u64 a, const u32 n)
317 {
318 return rotate (a, (u64) 64 - n);
319 }
320
321 inline u64 rotl64_S (const u64 a, const u32 n)
322 {
323 return rotr64_S (a, 64 - n);
324 }
325
326 inline u32x swap32 (const u32x v)
327 {
328 return ((v >> 24) & 0x000000ff)
329 | ((v >> 8) & 0x0000ff00)
330 | ((v << 8) & 0x00ff0000)
331 | ((v << 24) & 0xff000000);
332 }
333
334 inline u64x swap64 (const u64x v)
335 {
336 return ((v >> 56) & 0x00000000000000ff)
337 | ((v >> 40) & 0x000000000000ff00)
338 | ((v >> 24) & 0x0000000000ff0000)
339 | ((v >> 8) & 0x00000000ff000000)
340 | ((v << 8) & 0x000000ff00000000)
341 | ((v << 24) & 0x0000ff0000000000)
342 | ((v << 40) & 0x00ff000000000000)
343 | ((v << 56) & 0xff00000000000000);
344 }
345
346 inline u32x rotr32 (const u32x a, const u32 n)
347 {
348 return rotate (a, 32 - n);
349 }
350
351 inline u32x rotl32 (const u32x a, const u32 n)
352 {
353 return rotate (a, n);
354 }
355
356 inline u64x rotr64 (const u64x a, const u32 n)
357 {
358 return rotate (a, (u64) 64 - n);
359 }
360
361 inline u64x rotl64 (const u64x a, const u32 n)
362 {
363 return rotate (a, (u64) n);
364 }
365
366 inline u32x __byte_perm (const u32x a, const u32x b, const u32x c)
367 {
368 u32x r;
369
370 #if VECT_SIZE == 1
371 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c) );
372 #endif
373
374 #if VECT_SIZE >= 2
375 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s0) : "r"(a.s0), "r"(b.s0), "r"(c.s0));
376 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s1) : "r"(a.s1), "r"(b.s1), "r"(c.s1));
377 #endif
378
379 #if VECT_SIZE >= 4
380 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s2) : "r"(a.s2), "r"(b.s2), "r"(c.s2));
381 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s3) : "r"(a.s3), "r"(b.s3), "r"(c.s3));
382 #endif
383
384 #if VECT_SIZE >= 8
385 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s4) : "r"(a.s4), "r"(b.s4), "r"(c.s4));
386 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s5) : "r"(a.s5), "r"(b.s5), "r"(c.s5));
387 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s6) : "r"(a.s6), "r"(b.s6), "r"(c.s6));
388 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s7) : "r"(a.s7), "r"(b.s7), "r"(c.s7));
389 #endif
390
391 #if VECT_SIZE >= 16
392 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s8) : "r"(a.s8), "r"(b.s8), "r"(c.s8));
393 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s9) : "r"(a.s9), "r"(b.s9), "r"(c.s9));
394 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sa) : "r"(a.sa), "r"(b.sa), "r"(c.sa));
395 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sb) : "r"(a.sb), "r"(b.sb), "r"(c.sb));
396 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sc) : "r"(a.sc), "r"(b.sc), "r"(c.sc));
397 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sd) : "r"(a.sd), "r"(b.sd), "r"(c.sd));
398 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.se) : "r"(a.se), "r"(b.se), "r"(c.se));
399 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sf) : "r"(a.sf), "r"(b.sf), "r"(c.sf));
400 #endif
401
402 return r;
403 }
404
405 inline u32 __byte_perm_S (const u32 a, const u32 b, const u32 c)
406 {
407 u32 r;
408
409 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c));
410
411 return r;
412 }
413
414 inline u32x __bfe (const u32x a, const u32x b, const u32x c)
415 {
416 u32x r;
417
418 #if VECT_SIZE == 1
419 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c));
420 #endif
421
422 #if VECT_SIZE >= 2
423 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.s0) : "r"(a.s0), "r"(b.s0), "r"(c.s0));
424 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.s1) : "r"(a.s1), "r"(b.s1), "r"(c.s1));
425 #endif
426
427 #if VECT_SIZE >= 4
428 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.s2) : "r"(a.s2), "r"(b.s2), "r"(c.s2));
429 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.s3) : "r"(a.s3), "r"(b.s3), "r"(c.s3));
430 #endif
431
432 #if VECT_SIZE >= 8
433 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.s4) : "r"(a.s4), "r"(b.s4), "r"(c.s4));
434 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.s5) : "r"(a.s5), "r"(b.s5), "r"(c.s5));
435 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.s6) : "r"(a.s6), "r"(b.s6), "r"(c.s6));
436 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.s7) : "r"(a.s7), "r"(b.s7), "r"(c.s7));
437 #endif
438
439 #if VECT_SIZE >= 16
440 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.s8) : "r"(a.s8), "r"(b.s8), "r"(c.s8));
441 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.s9) : "r"(a.s9), "r"(b.s9), "r"(c.s9));
442 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.sa) : "r"(a.sa), "r"(b.sa), "r"(c.sa));
443 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.sb) : "r"(a.sb), "r"(b.sb), "r"(c.sb));
444 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.sc) : "r"(a.sc), "r"(b.sc), "r"(c.sc));
445 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.sd) : "r"(a.sd), "r"(b.sd), "r"(c.sd));
446 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.se) : "r"(a.se), "r"(b.se), "r"(c.se));
447 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.sf) : "r"(a.sf), "r"(b.sf), "r"(c.sf));
448 #endif
449
450 return r;
451 }
452
453 inline u32 __bfe_S (const u32 a, const u32 b, const u32 c)
454 {
455 u32 r;
456
457 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c));
458
459 return r;
460 }
461
462 inline u32x amd_bytealign (const u32x a, const u32x b, const u32x c)
463 {
464 u32x r;
465
466 #if CUDA_ARCH >= 350
467
468 #if VECT_SIZE == 1
469 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(b), "r"(a), "r"((c & 3) * 8));
470 #endif
471
472 #if VECT_SIZE >= 2
473 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.s0) : "r"(b.s0), "r"(a.s0), "r"((c.s0 & 3) * 8));
474 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.s1) : "r"(b.s1), "r"(a.s1), "r"((c.s1 & 3) * 8));
475 #endif
476
477 #if VECT_SIZE >= 4
478 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.s2) : "r"(b.s2), "r"(a.s2), "r"((c.s2 & 3) * 8));
479 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.s3) : "r"(b.s3), "r"(a.s3), "r"((c.s3 & 3) * 8));
480 #endif
481
482 #if VECT_SIZE >= 8
483 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.s4) : "r"(b.s4), "r"(a.s4), "r"((c.s4 & 3) * 8));
484 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.s5) : "r"(b.s5), "r"(a.s5), "r"((c.s5 & 3) * 8));
485 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.s6) : "r"(b.s6), "r"(a.s6), "r"((c.s6 & 3) * 8));
486 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.s7) : "r"(b.s7), "r"(a.s7), "r"((c.s7 & 3) * 8));
487 #endif
488
489 #if VECT_SIZE >= 16
490 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.s8) : "r"(b.s8), "r"(a.s8), "r"((c.s8 & 3) * 8));
491 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.s9) : "r"(b.s9), "r"(a.s9), "r"((c.s9 & 3) * 8));
492 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.sa) : "r"(b.sa), "r"(a.sa), "r"((c.sa & 3) * 8));
493 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.sb) : "r"(b.sb), "r"(a.sb), "r"((c.sb & 3) * 8));
494 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.sc) : "r"(b.sc), "r"(a.sc), "r"((c.sc & 3) * 8));
495 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.sd) : "r"(b.sd), "r"(a.sd), "r"((c.sd & 3) * 8));
496 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.se) : "r"(b.se), "r"(a.se), "r"((c.se & 3) * 8));
497 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.sf) : "r"(b.sf), "r"(a.sf), "r"((c.sf & 3) * 8));
498 #endif
499
500 #else
501
502 r = __byte_perm (b, a, ((u32x) (0x76543210) >> ((c & 3) * 4)) & 0xffff);
503
504 #endif
505
506 return r;
507 }
508
509 inline u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c)
510 {
511 u32 r;
512
513 #if CUDA_ARCH >= 350
514
515 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(b), "r"(a), "r"((c & 3) * 8));
516
517 #else
518
519 r = __byte_perm_S (b, a, (0x76543210 >> ((c & 3) * 4)) & 0xffff);
520
521 #endif
522
523 return r;
524 }
525 #endif
526
527 #ifdef IS_GENERIC
528 inline u32 swap32_S (const u32 v)
529 {
530 return (as_uint (as_uchar4 (v).s3210));
531 }
532
533 inline u64 swap64_S (const u64 v)
534 {
535 return (as_ulong (as_uchar8 (v).s76543210));
536 }
537
538 inline u32 rotr32_S (const u32 a, const u32 n)
539 {
540 return rotate (a, 32 - n);
541 }
542
543 inline u32 rotl32_S (const u32 a, const u32 n)
544 {
545 return rotate (a, n);
546 }
547
548 inline u64 rotr64_S (const u64 a, const u32 n)
549 {
550 return rotate (a, (u64) 64 - n);
551 }
552
553 inline u64 rotl64_S (const u64 a, const u32 n)
554 {
555 return rotate (a, (u64) n);
556 }
557
558 inline u32x swap32 (const u32x v)
559 {
560 return ((v >> 24) & 0x000000ff)
561 | ((v >> 8) & 0x0000ff00)
562 | ((v << 8) & 0x00ff0000)
563 | ((v << 24) & 0xff000000);
564 }
565
566 inline u64x swap64 (const u64x v)
567 {
568 return ((v >> 56) & 0x00000000000000ff)
569 | ((v >> 40) & 0x000000000000ff00)
570 | ((v >> 24) & 0x0000000000ff0000)
571 | ((v >> 8) & 0x00000000ff000000)
572 | ((v << 8) & 0x000000ff00000000)
573 | ((v << 24) & 0x0000ff0000000000)
574 | ((v << 40) & 0x00ff000000000000)
575 | ((v << 56) & 0xff00000000000000);
576 }
577
578 inline u32x rotr32 (const u32x a, const u32 n)
579 {
580 return rotate (a, 32 - n);
581 }
582
583 inline u32x rotl32 (const u32x a, const u32 n)
584 {
585 return rotate (a, n);
586 }
587
588 inline u64x rotr64 (const u64x a, const u32 n)
589 {
590 return rotate (a, (u64) 64 - n);
591 }
592
593 inline u64x rotl64 (const u64x a, const u32 n)
594 {
595 return rotate (a, (u64) n);
596 }
597
598 inline u32x __bfe (const u32x a, const u32x b, const u32x c)
599 {
600 #define BIT(x) ((u32x) (1u) << (x))
601 #define BIT_MASK(x) (BIT (x) - 1)
602 #define BFE(x,y,z) (((x) >> (y)) & BIT_MASK (z))
603
604 return BFE (a, b, c);
605
606 #undef BIT
607 #undef BIT_MASK
608 #undef BFE
609 }
610
611 inline u32 __bfe_S (const u32 a, const u32 b, const u32 c)
612 {
613 #define BIT(x) (1u << (x))
614 #define BIT_MASK(x) (BIT (x) - 1)
615 #define BFE(x,y,z) (((x) >> (y)) & BIT_MASK (z))
616
617 return BFE (a, b, c);
618
619 #undef BIT
620 #undef BIT_MASK
621 #undef BFE
622 }
623
624 inline u32x amd_bytealign (const u32x a, const u32x b, const u32 c)
625 {
626 #if VECT_SIZE == 1
627 const u64x tmp = ((((u64x) (a)) << 32) | ((u64x) (b))) >> ((c & 3) * 8);
628
629 return (u32x) (tmp);
630 #endif
631
632 #if VECT_SIZE == 2
633 const u64x tmp = ((((u64x) (a.s0, a.s1)) << 32) | ((u64x) (b.s0, b.s1))) >> ((c & 3) * 8);
634
635 return (u32x) (tmp.s0, tmp.s1);
636 #endif
637
638 #if VECT_SIZE == 4
639 const u64x tmp = ((((u64x) (a.s0, a.s1, a.s2, a.s3)) << 32) | ((u64x) (b.s0, b.s1, b.s2, b.s3))) >> ((c & 3) * 8);
640
641 return (u32x) (tmp.s0, tmp.s1, tmp.s2, tmp.s3);
642 #endif
643
644 #if VECT_SIZE == 8
645 const u64x tmp = ((((u64x) (a.s0, a.s1, a.s2, a.s3, a.s4, a.s5, a.s6, a.s7)) << 32) | ((u64x) (b.s0, b.s1, b.s2, b.s3, b.s4, b.s5, b.s6, b.s7))) >> ((c & 3) * 8);
646
647 return (u32x) (tmp.s0, tmp.s1, tmp.s2, tmp.s3, tmp.s4, tmp.s5, tmp.s6, tmp.s7);
648 #endif
649
650 #if VECT_SIZE == 16
651 const u64x tmp = ((((u64x) (a.s0, a.s1, a.s2, a.s3, a.s4, a.s5, a.s6, a.s7, a.s8, a.s9, a.sa, a.sb, a.sc, a.sd, a.se, a.sf)) << 32) | ((u64x) (b.s0, b.s1, b.s2, b.s3, b.s4, b.s5, b.s6, b.s7, b.s8, b.s9, b.sa, b.sb, b.sc, b.sd, b.se, b.sf))) >> ((c & 3) * 8);
652
653 return (u32x) (tmp.s0, tmp.s1, tmp.s2, tmp.s3, tmp.s4, tmp.s5, tmp.s6, tmp.s7, tmp.s8, tmp.s9, tmp.sa, tmp.sb, tmp.sc, tmp.sd, tmp.se, tmp.sf);
654 #endif
655 }
656
657 inline u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c)
658 {
659 const u64 tmp = ((((u64) a) << 32) | ((u64) b)) >> ((c & 3) * 8);
660
661 return (u32) (tmp);
662 }
663
664 #endif
665
666 typedef struct
667 {
668 #if defined _DES_
669 u32 digest_buf[4];
670 #elif defined _MD4_
671 u32 digest_buf[4];
672 #elif defined _MD5_
673 u32 digest_buf[4];
674 #elif defined _MD5H_
675 u32 digest_buf[4];
676 #elif defined _SHA1_
677 u32 digest_buf[5];
678 #elif defined _BCRYPT_
679 u32 digest_buf[6];
680 #elif defined _SHA256_
681 u32 digest_buf[8];
682 #elif defined _SHA384_
683 u32 digest_buf[16];
684 #elif defined _SHA512_
685 u32 digest_buf[16];
686 #elif defined _KECCAK_
687 u32 digest_buf[50];
688 #elif defined _RIPEMD160_
689 u32 digest_buf[5];
690 #elif defined _WHIRLPOOL_
691 u32 digest_buf[16];
692 #elif defined _GOST_
693 u32 digest_buf[8];
694 #elif defined _GOST2012_256_
695 u32 digest_buf[8];
696 #elif defined _GOST2012_512_
697 u32 digest_buf[16];
698 #elif defined _SAPB_
699 u32 digest_buf[4];
700 #elif defined _SAPG_
701 u32 digest_buf[5];
702 #elif defined _MYSQL323_
703 u32 digest_buf[4];
704 #elif defined _LOTUS5_
705 u32 digest_buf[4];
706 #elif defined _LOTUS6_
707 u32 digest_buf[4];
708 #elif defined _SCRYPT_
709 u32 digest_buf[8];
710 #elif defined _LOTUS8_
711 u32 digest_buf[4];
712 #elif defined _OFFICE2007_
713 u32 digest_buf[4];
714 #elif defined _OFFICE2010_
715 u32 digest_buf[4];
716 #elif defined _OFFICE2013_
717 u32 digest_buf[4];
718 #elif defined _OLDOFFICE01_
719 u32 digest_buf[4];
720 #elif defined _OLDOFFICE34_
721 u32 digest_buf[4];
722 #elif defined _SIPHASH_
723 u32 digest_buf[4];
724 #elif defined _PBKDF2_MD5_
725 u32 digest_buf[32];
726 #elif defined _PBKDF2_SHA1_
727 u32 digest_buf[32];
728 #elif defined _PBKDF2_SHA256_
729 u32 digest_buf[32];
730 #elif defined _PBKDF2_SHA512_
731 u32 digest_buf[32];
732 #elif defined _PDF17L8_
733 u32 digest_buf[8];
734 #elif defined _CRC32_
735 u32 digest_buf[4];
736 #elif defined _SEVEN_ZIP_
737 u32 digest_buf[4];
738 #elif defined _ANDROIDFDE_
739 u32 digest_buf[4];
740 #elif defined _DCC2_
741 u32 digest_buf[4];
742 #elif defined _WPA_
743 u32 digest_buf[4];
744 #elif defined _MD5_SHA1_
745 u32 digest_buf[4];
746 #elif defined _SHA1_MD5_
747 u32 digest_buf[5];
748 #elif defined _NETNTLMV2_
749 u32 digest_buf[4];
750 #elif defined _KRB5PA_
751 u32 digest_buf[4];
752 #elif defined _CLOUDKEY_
753 u32 digest_buf[8];
754 #elif defined _SCRYPT_
755 u32 digest_buf[4];
756 #elif defined _PSAFE2_
757 u32 digest_buf[5];
758 #elif defined _LOTUS8_
759 u32 digest_buf[4];
760 #elif defined _RAR3_
761 u32 digest_buf[4];
762 #elif defined _SHA256_SHA1_
763 u32 digest_buf[8];
764 #elif defined _MS_DRSR_
765 u32 digest_buf[8];
766 #elif defined _ANDROIDFDE_SAMSUNG_
767 u32 digest_buf[8];
768 #elif defined _RAR5_
769 u32 digest_buf[4];
770 #elif defined _KRB5TGS_
771 u32 digest_buf[4];
772 #elif defined _AXCRYPT_
773 u32 digest_buf[4];
774 #elif defined _KEEPASS_
775 u32 digest_buf[4];
776 #elif defined _ZIP2_
777 u32 digest_buf[4];
778 #endif
779
780 } digest_t;
781
782 typedef struct
783 {
784 u32 salt_buf[16];
785 u32 salt_buf_pc[8];
786
787 u32 salt_len;
788 u32 salt_iter;
789 u32 salt_sign[2];
790
791 u32 keccak_mdlen;
792 u32 truecrypt_mdlen;
793
794 u32 digests_cnt;
795 u32 digests_done;
796
797 u32 digests_offset;
798
799 u32 scrypt_N;
800 u32 scrypt_r;
801 u32 scrypt_p;
802 u32 scrypt_tmto;
803 u32 scrypt_phy;
804
805 } salt_t;
806
807 typedef struct
808 {
809 int V;
810 int R;
811 int P;
812
813 int enc_md;
814
815 u32 id_buf[8];
816 u32 u_buf[32];
817 u32 o_buf[32];
818
819 int id_len;
820 int o_len;
821 int u_len;
822
823 u32 rc4key[2];
824 u32 rc4data[2];
825
826 } pdf_t;
827
828 typedef struct
829 {
830 u32 pke[25];
831 u32 eapol[64];
832 int eapol_size;
833 int keyver;
834 u8 orig_mac1[6];
835 u8 orig_mac2[6];
836 u8 orig_nonce1[32];
837 u8 orig_nonce2[32];
838
839 } wpa_t;
840
841 typedef struct
842 {
843 u32 cry_master_buf[64];
844 u32 ckey_buf[64];
845 u32 public_key_buf[64];
846
847 u32 cry_master_len;
848 u32 ckey_len;
849 u32 public_key_len;
850
851 } bitcoin_wallet_t;
852
853 typedef struct
854 {
855 u32 salt_buf[30];
856 u32 salt_len;
857
858 u32 esalt_buf[38];
859 u32 esalt_len;
860
861 } sip_t;
862
863 typedef struct
864 {
865 u32 data[384];
866
867 } androidfde_t;
868
869 typedef struct
870 {
871 u32 nr_buf[16];
872 u32 nr_len;
873
874 u32 msg_buf[128];
875 u32 msg_len;
876
877 } ikepsk_t;
878
879 typedef struct
880 {
881 u32 user_len;
882 u32 domain_len;
883 u32 srvchall_len;
884 u32 clichall_len;
885
886 u32 userdomain_buf[64];
887 u32 chall_buf[256];
888
889 } netntlm_t;
890
891 typedef struct
892 {
893 u32 user[16];
894 u32 realm[16];
895 u32 salt[32];
896 u32 timestamp[16];
897 u32 checksum[4];
898
899 } krb5pa_t;
900
901 typedef struct
902 {
903 u32 account_info[512];
904 u32 checksum[4];
905 u32 edata2[2560];
906 u32 edata2_len;
907
908 } krb5tgs_t;
909
910 typedef struct
911 {
912 u32 salt_buf[16];
913 u32 data_buf[112];
914 u32 keyfile_buf[16];
915 u32 signature;
916
917 } tc_t;
918
919 typedef struct
920 {
921 u32 salt_buf[16];
922
923 } pbkdf2_md5_t;
924
925 typedef struct
926 {
927 u32 salt_buf[16];
928
929 } pbkdf2_sha1_t;
930
931 typedef struct
932 {
933 u32 salt_buf[16];
934
935 } pbkdf2_sha256_t;
936
937 typedef struct
938 {
939 u32 salt_buf[32];
940
941 } pbkdf2_sha512_t;
942
943 typedef struct
944 {
945 u32 salt_buf[128];
946 u32 salt_len;
947
948 } rakp_t;
949
950 typedef struct
951 {
952 u32 data_len;
953 u32 data_buf[512];
954
955 } cloudkey_t;
956
957 typedef struct
958 {
959 u32 encryptedVerifier[4];
960 u32 encryptedVerifierHash[5];
961
962 u32 keySize;
963
964 } office2007_t;
965
966 typedef struct
967 {
968 u32 encryptedVerifier[4];
969 u32 encryptedVerifierHash[8];
970
971 } office2010_t;
972
973 typedef struct
974 {
975 u32 encryptedVerifier[4];
976 u32 encryptedVerifierHash[8];
977
978 } office2013_t;
979
980 typedef struct
981 {
982 u32 version;
983 u32 encryptedVerifier[4];
984 u32 encryptedVerifierHash[4];
985 u32 rc4key[2];
986
987 } oldoffice01_t;
988
989 typedef struct
990 {
991 u32 version;
992 u32 encryptedVerifier[4];
993 u32 encryptedVerifierHash[5];
994 u32 rc4key[2];
995
996 } oldoffice34_t;
997
998 typedef struct
999 {
1000 u32 salt_buf[128];
1001 u32 salt_len;
1002
1003 u32 pc_digest[5];
1004 u32 pc_offset;
1005
1006 } pstoken_t;
1007
1008 typedef struct
1009 {
1010 u32 type;
1011 u32 mode;
1012 u32 magic;
1013 u32 salt_len;
1014 u32 salt_buf[4];
1015 u32 verify_bytes;
1016 u32 compress_length;
1017 u32 data_len;
1018 u32 data_buf[2048];
1019 u32 auth_len;
1020 u32 auth_buf[4];
1021
1022 } zip2_t;
1023
1024 typedef struct
1025 {
1026 u32 version;
1027 u32 algorithm;
1028
1029 /* key-file handling */
1030 u32 keyfile_len;
1031 u32 keyfile[8];
1032
1033 u32 final_random_seed[8];
1034 u32 transf_random_seed[8];
1035 u32 enc_iv[4];
1036 u32 contents_hash[8];
1037
1038 /* specific to version 1 */
1039 u32 contents_len;
1040 u32 contents[75000];
1041
1042 /* specific to version 2 */
1043 u32 expected_bytes[8];
1044
1045 } keepass_t;
1046
1047 typedef struct
1048 {
1049 u32 digest[4];
1050 u32 out[4];
1051
1052 } pdf14_tmp_t;
1053
1054 typedef struct
1055 {
1056 union
1057 {
1058 u32 dgst32[16];
1059 u64 dgst64[8];
1060 };
1061
1062 u32 dgst_len;
1063 u32 W_len;
1064
1065 } pdf17l8_tmp_t;
1066
1067 typedef struct
1068 {
1069 u32 digest_buf[4];
1070
1071 } phpass_tmp_t;
1072
1073 typedef struct
1074 {
1075 u32 digest_buf[4];
1076
1077 } md5crypt_tmp_t;
1078
1079 typedef struct
1080 {
1081 u32 alt_result[8];
1082
1083 u32 p_bytes[4];
1084 u32 s_bytes[4];
1085
1086 } sha256crypt_tmp_t;
1087
1088 typedef struct
1089 {
1090 u64 l_alt_result[8];
1091
1092 u64 l_p_bytes[2];
1093 u64 l_s_bytes[2];
1094
1095 } sha512crypt_tmp_t;
1096
1097 typedef struct
1098 {
1099 u32 ipad[5];
1100 u32 opad[5];
1101
1102 u32 dgst[10];
1103 u32 out[10];
1104
1105 } wpa_tmp_t;
1106
1107 typedef struct
1108 {
1109 u64 dgst[8];
1110
1111 } bitcoin_wallet_tmp_t;
1112
1113 typedef struct
1114 {
1115 u32 ipad[5];
1116 u32 opad[5];
1117
1118 u32 dgst[5];
1119 u32 out[4];
1120
1121 } dcc2_tmp_t;
1122
1123 typedef struct
1124 {
1125 u32 E[18];
1126
1127 u32 P[18];
1128
1129 u32 S0[256];
1130 u32 S1[256];
1131 u32 S2[256];
1132 u32 S3[256];
1133
1134 } bcrypt_tmp_t;
1135
1136 typedef struct
1137 {
1138 u32 digest[2];
1139
1140 u32 P[18];
1141
1142 u32 S0[256];
1143 u32 S1[256];
1144 u32 S2[256];
1145 u32 S3[256];
1146
1147 } pwsafe2_tmp_t;
1148
1149 typedef struct
1150 {
1151 u32 digest_buf[8];
1152
1153 } pwsafe3_tmp_t;
1154
1155 typedef struct
1156 {
1157 u32 digest_buf[5];
1158
1159 } androidpin_tmp_t;
1160
1161 typedef struct
1162 {
1163 u32 ipad[5];
1164 u32 opad[5];
1165
1166 u32 dgst[10];
1167 u32 out[10];
1168
1169 } androidfde_tmp_t;
1170
1171 typedef struct
1172 {
1173 u32 ipad[16];
1174 u32 opad[16];
1175
1176 u32 dgst[64];
1177 u32 out[64];
1178
1179 } tc_tmp_t;
1180
1181 typedef struct
1182 {
1183 u64 ipad[8];
1184 u64 opad[8];
1185
1186 u64 dgst[32];
1187 u64 out[32];
1188
1189 } tc64_tmp_t;
1190
1191 typedef struct
1192 {
1193 u32 ipad[4];
1194 u32 opad[4];
1195
1196 u32 dgst[32];
1197 u32 out[32];
1198
1199 } pbkdf2_md5_tmp_t;
1200
1201 typedef struct
1202 {
1203 u32 ipad[5];
1204 u32 opad[5];
1205
1206 u32 dgst[32];
1207 u32 out[32];
1208
1209 } pbkdf2_sha1_tmp_t;
1210
1211 typedef struct
1212 {
1213 u32 ipad[8];
1214 u32 opad[8];
1215
1216 u32 dgst[32];
1217 u32 out[32];
1218
1219 } pbkdf2_sha256_tmp_t;
1220
1221 typedef struct
1222 {
1223 u64 ipad[8];
1224 u64 opad[8];
1225
1226 u64 dgst[16];
1227 u64 out[16];
1228
1229 } pbkdf2_sha512_tmp_t;
1230
1231 typedef struct
1232 {
1233 u64 out[8];
1234
1235 } ecryptfs_tmp_t;
1236
1237 typedef struct
1238 {
1239 u64 ipad[8];
1240 u64 opad[8];
1241
1242 u64 dgst[16];
1243 u64 out[16];
1244
1245 } oraclet_tmp_t;
1246
1247 typedef struct
1248 {
1249 u32 ipad[5];
1250 u32 opad[5];
1251
1252 u32 dgst[5];
1253 u32 out[5];
1254
1255 } agilekey_tmp_t;
1256
1257 typedef struct
1258 {
1259 u32 ipad[5];
1260 u32 opad[5];
1261
1262 u32 dgst1[5];
1263 u32 out1[5];
1264
1265 u32 dgst2[5];
1266 u32 out2[5];
1267
1268 } mywallet_tmp_t;
1269
1270 typedef struct
1271 {
1272 u32 ipad[5];
1273 u32 opad[5];
1274
1275 u32 dgst[5];
1276 u32 out[5];
1277
1278 } sha1aix_tmp_t;
1279
1280 typedef struct
1281 {
1282 u32 ipad[8];
1283 u32 opad[8];
1284
1285 u32 dgst[8];
1286 u32 out[8];
1287
1288 } sha256aix_tmp_t;
1289
1290 typedef struct
1291 {
1292 u64 ipad[8];
1293 u64 opad[8];
1294
1295 u64 dgst[8];
1296 u64 out[8];
1297
1298 } sha512aix_tmp_t;
1299
1300 typedef struct
1301 {
1302 u32 ipad[8];
1303 u32 opad[8];
1304
1305 u32 dgst[8];
1306 u32 out[8];
1307
1308 } lastpass_tmp_t;
1309
1310 typedef struct
1311 {
1312 u64 digest_buf[8];
1313
1314 } drupal7_tmp_t;
1315
1316 typedef struct
1317 {
1318 u32 ipad[5];
1319 u32 opad[5];
1320
1321 u32 dgst[5];
1322 u32 out[5];
1323
1324 } lotus8_tmp_t;
1325
1326 typedef struct
1327 {
1328 u32 out[5];
1329
1330 } office2007_tmp_t;
1331
1332 typedef struct
1333 {
1334 u32 out[5];
1335
1336 } office2010_tmp_t;
1337
1338 typedef struct
1339 {
1340 u64 out[8];
1341
1342 } office2013_tmp_t;
1343
1344 typedef struct
1345 {
1346 u32 digest_buf[5];
1347
1348 } saph_sha1_tmp_t;
1349
1350 typedef struct
1351 {
1352 u32 block[16];
1353
1354 u32 dgst[8];
1355
1356 u32 block_len;
1357 u32 final_len;
1358
1359 } seven_zip_tmp_t;
1360
1361 typedef struct
1362 {
1363 u32 KEK[5];
1364
1365 u32 lsb[4];
1366 u32 cipher[4];
1367
1368 } axcrypt_tmp_t;
1369
1370 typedef struct
1371 {
1372 u32 tmp_digest[8];
1373
1374 } keepass_tmp_t;
1375
1376 typedef struct
1377 {
1378 u32 Kc[16];
1379 u32 Kd[16];
1380
1381 u32 iv[2];
1382
1383 } bsdicrypt_tmp_t;
1384
1385 typedef struct
1386 {
1387 u32 dgst[17][5];
1388
1389 } rar3_tmp_t;
1390
1391 typedef struct
1392 {
1393 u32 user[16];
1394
1395 } cram_md5_t;
1396
1397 typedef struct
1398 {
1399 u32 iv_buf[4];
1400 u32 iv_len;
1401
1402 u32 salt_buf[4];
1403 u32 salt_len;
1404
1405 u32 crc;
1406
1407 u32 data_buf[96];
1408 u32 data_len;
1409
1410 u32 unpack_size;
1411
1412 } seven_zip_t;
1413
1414 typedef struct
1415 {
1416 u32 key;
1417 u64 val;
1418
1419 } hcstat_table_t;
1420
1421 typedef struct
1422 {
1423 u32 cs_buf[0x100];
1424 u32 cs_len;
1425
1426 } cs_t;
1427
1428 typedef struct
1429 {
1430 u32 cmds[0x100];
1431
1432 } kernel_rule_t;
1433
1434 typedef struct
1435 {
1436 u32 salt_pos;
1437 u32 digest_pos;
1438 u32 hash_pos;
1439 u32 gidvid;
1440 u32 il_pos;
1441
1442 } plain_t;
1443
1444 typedef struct
1445 {
1446 u32 i[16];
1447
1448 u32 pw_len;
1449
1450 u32 alignment_placeholder_1;
1451 u32 alignment_placeholder_2;
1452 u32 alignment_placeholder_3;
1453
1454 } pw_t;
1455
1456 typedef struct
1457 {
1458 u32 i;
1459
1460 } bf_t;
1461
1462 typedef struct
1463 {
1464 u32 i[8];
1465
1466 u32 pw_len;
1467
1468 } comb_t;
1469
1470 typedef struct
1471 {
1472 u32 b[32];
1473
1474 } bs_word_t;
1475
1476 typedef struct
1477 {
1478 uint4 P[64];
1479
1480 } scrypt_tmp_t;