Remove no longer needed allx() macro
[hashcat.git] / OpenCL / types_ocl.c
1 /**
2 * Authors.....: Jens Steube <jens.steube@gmail.com>
3 * magnum <john.magnum@hushmail.com>
4 *
5 * License.....: MIT
6 */
7
8 #define DEVICE_TYPE_CPU 2
9 #define DEVICE_TYPE_GPU 4
10
11 typedef uchar u8;
12 typedef ushort u16;
13 typedef uint u32;
14 typedef ulong u64;
15
16 #ifndef NEW_SIMD_CODE
17 #undef VECT_SIZE
18 #define VECT_SIZE 1
19 #endif
20
21 #define CONCAT(a, b) a##b
22 #define VTYPE(type, width) CONCAT(type, width)
23
24 #if VECT_SIZE == 1
25 typedef uchar u8x;
26 typedef ushort u16x;
27 typedef uint u32x;
28 typedef ulong u64x;
29 #else
30 typedef VTYPE(uchar, VECT_SIZE) u8x;
31 typedef VTYPE(ushort, VECT_SIZE) u16x;
32 typedef VTYPE(uint, VECT_SIZE) u32x;
33 typedef VTYPE(ulong, VECT_SIZE) u64x;
34 #endif
35
36 static inline u32 l32_from_64_S (u64 a)
37 {
38 const u32 r = (u32) (a);
39
40 return r;
41 }
42
43 static inline u32 h32_from_64_S (u64 a)
44 {
45 a >>= 32;
46
47 const u32 r = (u32) (a);
48
49 return r;
50 }
51
52 static inline u64 hl32_to_64_S (const u32 a, const u32 b)
53 {
54 return as_ulong ((uint2) (b, a));
55 }
56
57 static inline u32x l32_from_64 (u64x a)
58 {
59 u32x r;
60
61 #if VECT_SIZE == 1
62 r = (u32) a;
63 #endif
64
65 #if VECT_SIZE >= 2
66 r.s0 = (u32) a.s0;
67 r.s1 = (u32) a.s1;
68 #endif
69
70 #if VECT_SIZE >= 4
71 r.s2 = (u32) a.s2;
72 r.s3 = (u32) a.s3;
73 #endif
74
75 #if VECT_SIZE >= 8
76 r.s4 = (u32) a.s4;
77 r.s5 = (u32) a.s5;
78 r.s6 = (u32) a.s6;
79 r.s7 = (u32) a.s7;
80 #endif
81
82 #if VECT_SIZE >= 16
83 r.s8 = (u32) a.s8;
84 r.s9 = (u32) a.s9;
85 r.sa = (u32) a.sa;
86 r.sb = (u32) a.sb;
87 r.sc = (u32) a.sc;
88 r.sd = (u32) a.sd;
89 r.se = (u32) a.se;
90 r.sf = (u32) a.sf;
91 #endif
92
93 return r;
94 }
95
96 static inline u32x h32_from_64 (u64x a)
97 {
98 a >>= 32;
99
100 u32x r;
101
102 #if VECT_SIZE == 1
103 r = (u32) a;
104 #endif
105
106 #if VECT_SIZE >= 2
107 r.s0 = (u32) a.s0;
108 r.s1 = (u32) a.s1;
109 #endif
110
111 #if VECT_SIZE >= 4
112 r.s2 = (u32) a.s2;
113 r.s3 = (u32) a.s3;
114 #endif
115
116 #if VECT_SIZE >= 8
117 r.s4 = (u32) a.s4;
118 r.s5 = (u32) a.s5;
119 r.s6 = (u32) a.s6;
120 r.s7 = (u32) a.s7;
121 #endif
122
123 #if VECT_SIZE >= 16
124 r.s8 = (u32) a.s8;
125 r.s9 = (u32) a.s9;
126 r.sa = (u32) a.sa;
127 r.sb = (u32) a.sb;
128 r.sc = (u32) a.sc;
129 r.sd = (u32) a.sd;
130 r.se = (u32) a.se;
131 r.sf = (u32) a.sf;
132 #endif
133
134 return r;
135 }
136
137 static inline u64x hl32_to_64 (const u32x a, const u32x b)
138 {
139 u64x r;
140
141 #if VECT_SIZE == 1
142 r = as_ulong ((uint2) (b, a));
143 #endif
144
145 #if VECT_SIZE >= 2
146 r.s0 = as_ulong ((uint2) (b.s0, a.s0));
147 r.s1 = as_ulong ((uint2) (b.s1, a.s1));
148 #endif
149
150 #if VECT_SIZE >= 4
151 r.s2 = as_ulong ((uint2) (b.s2, a.s2));
152 r.s3 = as_ulong ((uint2) (b.s3, a.s3));
153 #endif
154
155 #if VECT_SIZE >= 8
156 r.s4 = as_ulong ((uint2) (b.s4, a.s4));
157 r.s5 = as_ulong ((uint2) (b.s5, a.s5));
158 r.s6 = as_ulong ((uint2) (b.s6, a.s6));
159 r.s7 = as_ulong ((uint2) (b.s7, a.s7));
160 #endif
161
162 #if VECT_SIZE >= 16
163 r.s8 = as_ulong ((uint2) (b.s8, a.s8));
164 r.s9 = as_ulong ((uint2) (b.s9, a.s9));
165 r.sa = as_ulong ((uint2) (b.sa, a.sa));
166 r.sb = as_ulong ((uint2) (b.sb, a.sb));
167 r.sc = as_ulong ((uint2) (b.sc, a.sc));
168 r.sd = as_ulong ((uint2) (b.sd, a.sd));
169 r.se = as_ulong ((uint2) (b.se, a.se));
170 r.sf = as_ulong ((uint2) (b.sf, a.sf));
171 #endif
172
173 return r;
174 }
175
176 #ifdef IS_AMD
177 static inline u32 swap32_S (const u32 v)
178 {
179 return (as_uint (as_uchar4 (v).s3210));
180 }
181
182 static inline u64 swap64_S (const u64 v)
183 {
184 return (as_ulong (as_uchar8 (v).s76543210));
185 }
186
187 static inline u32 rotr32_S (const u32 a, const u32 n)
188 {
189 return rotate (a, 32 - n);
190 }
191
192 static inline u32 rotl32_S (const u32 a, const u32 n)
193 {
194 return rotate (a, n);
195 }
196
197 static inline u64 rotr64_S (const u64 a, const u32 n)
198 {
199 #if (DEVICE_TYPE == DEVICE_TYPE_GPU)
200
201 #ifdef amd_bitalign
202
203 const u32 a0 = h32_from_64_S (a);
204 const u32 a1 = l32_from_64_S (a);
205
206 const u32 t0 = (n >= 32) ? amd_bitalign (a0, a1, n - 32) : amd_bitalign (a1, a0, n);
207 const u32 t1 = (n >= 32) ? amd_bitalign (a1, a0, n - 32) : amd_bitalign (a0, a1, n);
208
209 const u64 r = hl32_to_64_S (t0, t1);
210
211 #else
212
213 const u64 r = rotate (a, (u64) 64 - n);
214
215 #endif
216
217 #else
218
219 const u64 r = rotate (a, (u64) 64 - n);
220
221 #endif
222
223 return r;
224 }
225
226 static inline u64 rotl64_S (const u64 a, const u32 n)
227 {
228 return rotr64_S (a, 64 - n);
229 }
230
231 static inline u32x swap32 (const u32x v)
232 {
233 return ((v >> 24) & 0x000000ff)
234 | ((v >> 8) & 0x0000ff00)
235 | ((v << 8) & 0x00ff0000)
236 | ((v << 24) & 0xff000000);
237 }
238
239 static inline u64x swap64 (const u64x v)
240 {
241 return ((v >> 56) & 0x00000000000000ff)
242 | ((v >> 40) & 0x000000000000ff00)
243 | ((v >> 24) & 0x0000000000ff0000)
244 | ((v >> 8) & 0x00000000ff000000)
245 | ((v << 8) & 0x000000ff00000000)
246 | ((v << 24) & 0x0000ff0000000000)
247 | ((v << 40) & 0x00ff000000000000)
248 | ((v << 56) & 0xff00000000000000);
249 }
250
251 static inline u32x rotr32 (const u32x a, const u32 n)
252 {
253 return rotate (a, 32 - n);
254 }
255
256 static inline u32x rotl32 (const u32x a, const u32 n)
257 {
258 return rotate (a, n);
259 }
260
261 static inline u64x rotr64 (const u64x a, const u32 n)
262 {
263 #if (DEVICE_TYPE == DEVICE_TYPE_GPU)
264
265 #ifdef amd_bitalign
266 const u32x a0 = h32_from_64 (a);
267 const u32x a1 = l32_from_64 (a);
268
269 const u32x t0 = (n >= 32) ? amd_bitalign (a0, a1, n - 32) : amd_bitalign (a1, a0, n);
270 const u32x t1 = (n >= 32) ? amd_bitalign (a1, a0, n - 32) : amd_bitalign (a0, a1, n);
271
272 const u64x r = hl32_to_64 (t0, t1);
273
274 #else
275
276 const u64x r = rotate (a, (u64) 64 - n);
277
278 #endif
279
280 #else
281
282 const u64x r = rotate (a, (u64) 64 - n);
283
284 #endif
285
286 return r;
287 }
288
289 static inline u64x rotl64 (const u64x a, const u32 n)
290 {
291 return rotr64 (a, 64 - n);
292 }
293
294 static inline u32 __bfe (const u32 a, const u32 b, const u32 c)
295 {
296 #ifdef amd_bfe
297 return amd_bfe (a, b, c);
298 #else
299 #define BIT(x) (1 << (x))
300 #define BIT_MASK(x) (BIT (x) - 1)
301 #define BFE(x,y,z) (((x) >> (y)) & BIT_MASK (z))
302
303 return BFE (a, b, c);
304 #endif
305 }
306
307 static inline u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c)
308 {
309 #ifdef amd_bytealign
310 return amd_bytealign (a, b, c);
311 #else
312 const u64 tmp = ((((u64) a) << 32) | ((u64) b)) >> ((c & 3) * 8);
313
314 return (u32) (tmp);
315 #endif
316 }
317 #endif
318
319 #ifdef IS_NV
320 static inline u32 swap32_S (const u32 v)
321 {
322 u32 r;
323
324 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(r) : "r"(v));
325
326 return r;
327 }
328
329 static inline u64 swap64_S (const u64 v)
330 {
331 u32 il;
332 u32 ir;
333
334 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(v));
335
336 u32 tl;
337 u32 tr;
338
339 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tl) : "r"(il));
340 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tr) : "r"(ir));
341
342 u64 r;
343
344 asm ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(tr), "r"(tl));
345
346 return r;
347 }
348
349 static inline u32 rotr32_S (const u32 a, const u32 n)
350 {
351 return rotate (a, 32 - n);
352 }
353
354 static inline u32 rotl32_S (const u32 a, const u32 n)
355 {
356 return rotate (a, n);
357 }
358
359 #if CUDA_ARCH >= 350
360 static inline u64 rotr64_S (const u64 a, const u32 n)
361 {
362 u32 il;
363 u32 ir;
364
365 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a));
366
367 u32 tl;
368 u32 tr;
369
370 if (n >= 32)
371 {
372 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
373 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
374 }
375 else
376 {
377 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
378 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
379 }
380
381 u64 r;
382
383 asm ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(tl), "r"(tr));
384
385 return r;
386 }
387 #else
388 static inline u64 rotr64_S (const u64 a, const u32 n)
389 {
390 return rotate (a, (u64) 64 - n);
391 }
392 #endif
393
394 static inline u64 rotl64_S (const u64 a, const u32 n)
395 {
396 return rotr64_S (a, 64 - n);
397 }
398
399 #if CUDA_ARCH >= 500
400 static inline u32 lut3_2d_S (const u32 a, const u32 b, const u32 c)
401 {
402 u32 r;
403
404 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
405
406 return r;
407 }
408
409 static inline u32 lut3_39_S (const u32 a, const u32 b, const u32 c)
410 {
411 u32 r;
412
413 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
414
415 return r;
416 }
417
418 static inline u32 lut3_59_S (const u32 a, const u32 b, const u32 c)
419 {
420 u32 r;
421
422 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
423
424 return r;
425 }
426
427 static inline u32 lut3_96_S (const u32 a, const u32 b, const u32 c)
428 {
429 u32 r;
430
431 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
432
433 return r;
434 }
435
436 static inline u32 lut3_e4_S (const u32 a, const u32 b, const u32 c)
437 {
438 u32 r;
439
440 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
441
442 return r;
443 }
444
445 static inline u32 lut3_e8_S (const u32 a, const u32 b, const u32 c)
446 {
447 u32 r;
448
449 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
450
451 return r;
452 }
453
454 static inline u32 lut3_ca_S (const u32 a, const u32 b, const u32 c)
455 {
456 u32 r;
457
458 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
459
460 return r;
461 }
462 #endif
463
464 static inline u32 __byte_perm_S (const u32 a, const u32 b, const u32 c)
465 {
466 u32 r;
467
468 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c));
469
470 return r;
471 }
472
473 static inline u32x swap32 (const u32x v)
474 {
475 return ((v >> 24) & 0x000000ff)
476 | ((v >> 8) & 0x0000ff00)
477 | ((v << 8) & 0x00ff0000)
478 | ((v << 24) & 0xff000000);
479 }
480
481 static inline u64x swap64 (const u64x v)
482 {
483 return ((v >> 56) & 0x00000000000000ff)
484 | ((v >> 40) & 0x000000000000ff00)
485 | ((v >> 24) & 0x0000000000ff0000)
486 | ((v >> 8) & 0x00000000ff000000)
487 | ((v << 8) & 0x000000ff00000000)
488 | ((v << 24) & 0x0000ff0000000000)
489 | ((v << 40) & 0x00ff000000000000)
490 | ((v << 56) & 0xff00000000000000);
491 }
492
493 static inline u32x rotr32 (const u32x a, const u32 n)
494 {
495 return rotate (a, 32 - n);
496 }
497
498 static inline u32x rotl32 (const u32x a, const u32 n)
499 {
500 return rotate (a, n);
501 }
502
503 #if CUDA_ARCH >= 350
504 static inline u64x rotr64 (const u64x a, const u32 n)
505 {
506 u64x r;
507
508 u32 il;
509 u32 ir;
510 u32 tl;
511 u32 tr;
512
513 #if VECT_SIZE == 1
514
515 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a));
516
517 if (n >= 32)
518 {
519 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
520 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
521 }
522 else
523 {
524 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
525 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
526 }
527
528 asm ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(tl), "r"(tr));
529
530 #endif
531
532 #if VECT_SIZE >= 2
533
534 {
535 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s0));
536
537 if (n >= 32)
538 {
539 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
540 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
541 }
542 else
543 {
544 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
545 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
546 }
547
548 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s0) : "r"(tl), "r"(tr));
549 }
550
551 {
552 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s1));
553
554 if (n >= 32)
555 {
556 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
557 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
558 }
559 else
560 {
561 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
562 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
563 }
564
565 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s1) : "r"(tl), "r"(tr));
566 }
567
568 #endif
569
570 #if VECT_SIZE >= 4
571
572 {
573 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s2));
574
575 if (n >= 32)
576 {
577 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
578 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
579 }
580 else
581 {
582 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
583 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
584 }
585
586 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s2) : "r"(tl), "r"(tr));
587 }
588
589 {
590 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s3));
591
592 if (n >= 32)
593 {
594 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
595 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
596 }
597 else
598 {
599 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
600 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
601 }
602
603 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s3) : "r"(tl), "r"(tr));
604 }
605
606 #endif
607
608 #if VECT_SIZE >= 8
609
610 {
611 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s4));
612
613 if (n >= 32)
614 {
615 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
616 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
617 }
618 else
619 {
620 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
621 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
622 }
623
624 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s4) : "r"(tl), "r"(tr));
625 }
626
627 {
628 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s5));
629
630 if (n >= 32)
631 {
632 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
633 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
634 }
635 else
636 {
637 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
638 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
639 }
640
641 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s5) : "r"(tl), "r"(tr));
642 }
643
644 {
645 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s6));
646
647 if (n >= 32)
648 {
649 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
650 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
651 }
652 else
653 {
654 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
655 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
656 }
657
658 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s6) : "r"(tl), "r"(tr));
659 }
660
661 {
662 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s7));
663
664 if (n >= 32)
665 {
666 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
667 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
668 }
669 else
670 {
671 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
672 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
673 }
674
675 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s7) : "r"(tl), "r"(tr));
676 }
677
678 #endif
679
680 #if VECT_SIZE >= 16
681
682 {
683 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s8));
684
685 if (n >= 32)
686 {
687 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
688 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
689 }
690 else
691 {
692 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
693 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
694 }
695
696 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s8) : "r"(tl), "r"(tr));
697 }
698
699 {
700 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s9));
701
702 if (n >= 32)
703 {
704 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
705 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
706 }
707 else
708 {
709 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
710 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
711 }
712
713 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s9) : "r"(tl), "r"(tr));
714 }
715
716 {
717 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sa));
718
719 if (n >= 32)
720 {
721 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
722 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
723 }
724 else
725 {
726 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
727 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
728 }
729
730 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sa) : "r"(tl), "r"(tr));
731 }
732
733 {
734 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sb));
735
736 if (n >= 32)
737 {
738 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
739 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
740 }
741 else
742 {
743 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
744 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
745 }
746
747 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sb) : "r"(tl), "r"(tr));
748 }
749
750 {
751 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sc));
752
753 if (n >= 32)
754 {
755 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
756 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
757 }
758 else
759 {
760 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
761 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
762 }
763
764 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sc) : "r"(tl), "r"(tr));
765 }
766
767 {
768 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sd));
769
770 if (n >= 32)
771 {
772 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
773 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
774 }
775 else
776 {
777 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
778 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
779 }
780
781 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sd) : "r"(tl), "r"(tr));
782 }
783
784 {
785 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.se));
786
787 if (n >= 32)
788 {
789 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
790 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
791 }
792 else
793 {
794 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
795 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
796 }
797
798 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.se) : "r"(tl), "r"(tr));
799 }
800
801 {
802 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sf));
803
804 if (n >= 32)
805 {
806 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
807 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
808 }
809 else
810 {
811 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
812 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
813 }
814
815 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sf) : "r"(tl), "r"(tr));
816 }
817
818 #endif
819
820 return r;
821 }
822 #else
823 static inline u64x rotr64 (const u64x a, const u32 n)
824 {
825 return rotate (a, (u64) 64 - n);
826 }
827 #endif
828
829 static inline u64x rotl64 (const u64x a, const u32 n)
830 {
831 return rotr64 (a, (u64) 64 - n);
832 }
833
834 static inline u32x __byte_perm (const u32x a, const u32x b, const u32x c)
835 {
836 u32x r;
837
838 #if VECT_SIZE == 1
839 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c) );
840 #endif
841
842 #if VECT_SIZE >= 2
843 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s0) : "r"(a.s0), "r"(b.s0), "r"(c.s0));
844 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s1) : "r"(a.s1), "r"(b.s1), "r"(c.s1));
845 #endif
846
847 #if VECT_SIZE >= 4
848 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s2) : "r"(a.s2), "r"(b.s2), "r"(c.s2));
849 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s3) : "r"(a.s3), "r"(b.s3), "r"(c.s3));
850 #endif
851
852 #if VECT_SIZE >= 8
853 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s4) : "r"(a.s4), "r"(b.s4), "r"(c.s4));
854 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s5) : "r"(a.s5), "r"(b.s5), "r"(c.s5));
855 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s6) : "r"(a.s6), "r"(b.s6), "r"(c.s6));
856 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s7) : "r"(a.s7), "r"(b.s7), "r"(c.s7));
857 #endif
858
859 #if VECT_SIZE >= 16
860 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s8) : "r"(a.s8), "r"(b.s8), "r"(c.s8));
861 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s9) : "r"(a.s9), "r"(b.s9), "r"(c.s9));
862 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sa) : "r"(a.sa), "r"(b.sa), "r"(c.sa));
863 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sb) : "r"(a.sb), "r"(b.sb), "r"(c.sb));
864 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sc) : "r"(a.sc), "r"(b.sc), "r"(c.sc));
865 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sd) : "r"(a.sd), "r"(b.sd), "r"(c.sd));
866 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.se) : "r"(a.se), "r"(b.se), "r"(c.se));
867 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sf) : "r"(a.sf), "r"(b.sf), "r"(c.sf));
868 #endif
869
870 return r;
871 }
872
873 static inline u32 __bfe (const u32 a, const u32 b, const u32 c)
874 {
875 u32 r;
876
877 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c));
878
879 return r;
880 }
881
882 #if CUDA_ARCH >= 350
883 static inline u32 amd_bytealign (const u32 a, const u32 b, const u32 c)
884 {
885 u32 r;
886
887 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(b), "r"(a), "r"((c & 3) * 8));
888
889 return r;
890 }
891 #else
892 static inline u32 amd_bytealign (const u32 a, const u32 b, const u32 c)
893 {
894 return __byte_perm_S (b, a, (0x76543210 >> ((c & 3) * 4)) & 0xffff);
895 }
896 #endif
897
898 #if CUDA_ARCH >= 500
899 static inline u32x lut3_2d (const u32x a, const u32x b, const u32x c)
900 {
901 u32x r;
902
903 #if VECT_SIZE == 1
904 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
905 #endif
906
907 #if VECT_SIZE >= 2
908 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
909 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
910 #endif
911
912 #if VECT_SIZE >= 4
913 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
914 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
915 #endif
916
917 #if VECT_SIZE >= 8
918 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
919 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
920 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
921 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
922 #endif
923
924 #if VECT_SIZE >= 16
925 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
926 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
927 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
928 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
929 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
930 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
931 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
932 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
933 #endif
934
935 return r;
936 }
937
938 static inline u32x lut3_39 (const u32x a, const u32x b, const u32x c)
939 {
940 u32x r;
941
942 #if VECT_SIZE == 1
943 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
944 #endif
945
946 #if VECT_SIZE >= 2
947 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
948 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
949 #endif
950
951 #if VECT_SIZE >= 4
952 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
953 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
954 #endif
955
956 #if VECT_SIZE >= 8
957 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
958 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
959 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
960 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
961 #endif
962
963 #if VECT_SIZE >= 16
964 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
965 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
966 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
967 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
968 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
969 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
970 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
971 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
972 #endif
973
974 return r;
975 }
976
977 static inline u32x lut3_59 (const u32x a, const u32x b, const u32x c)
978 {
979 u32x r;
980
981 #if VECT_SIZE == 1
982 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
983 #endif
984
985 #if VECT_SIZE >= 2
986 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
987 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
988 #endif
989
990 #if VECT_SIZE >= 4
991 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
992 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
993 #endif
994
995 #if VECT_SIZE >= 8
996 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
997 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
998 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
999 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
1000 #endif
1001
1002 #if VECT_SIZE >= 16
1003 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
1004 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
1005 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
1006 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
1007 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
1008 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
1009 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
1010 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
1011 #endif
1012
1013 return r;
1014 }
1015
1016 static inline u32x lut3_96 (const u32x a, const u32x b, const u32x c)
1017 {
1018 u32x r;
1019
1020 #if VECT_SIZE == 1
1021 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
1022 #endif
1023
1024 #if VECT_SIZE >= 2
1025 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
1026 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
1027 #endif
1028
1029 #if VECT_SIZE >= 4
1030 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
1031 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
1032 #endif
1033
1034 #if VECT_SIZE >= 8
1035 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
1036 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
1037 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
1038 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
1039 #endif
1040
1041 #if VECT_SIZE >= 16
1042 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
1043 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
1044 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
1045 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
1046 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
1047 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
1048 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
1049 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
1050 #endif
1051
1052 return r;
1053 }
1054
1055 static inline u32x lut3_e4 (const u32x a, const u32x b, const u32x c)
1056 {
1057 u32x r;
1058
1059 #if VECT_SIZE == 1
1060 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
1061 #endif
1062
1063 #if VECT_SIZE >= 2
1064 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
1065 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
1066 #endif
1067
1068 #if VECT_SIZE >= 4
1069 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
1070 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
1071 #endif
1072
1073 #if VECT_SIZE >= 8
1074 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
1075 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
1076 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
1077 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
1078 #endif
1079
1080 #if VECT_SIZE >= 16
1081 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
1082 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
1083 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
1084 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
1085 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
1086 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
1087 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
1088 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
1089 #endif
1090
1091 return r;
1092 }
1093
1094 static inline u32x lut3_e8 (const u32x a, const u32x b, const u32x c)
1095 {
1096 u32x r;
1097
1098 #if VECT_SIZE == 1
1099 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
1100 #endif
1101
1102 #if VECT_SIZE >= 2
1103 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
1104 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
1105 #endif
1106
1107 #if VECT_SIZE >= 4
1108 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
1109 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
1110 #endif
1111
1112 #if VECT_SIZE >= 8
1113 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
1114 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
1115 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
1116 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
1117 #endif
1118
1119 #if VECT_SIZE >= 16
1120 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
1121 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
1122 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
1123 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
1124 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
1125 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
1126 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
1127 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
1128 #endif
1129
1130 return r;
1131 }
1132
1133 static inline u32x lut3_ca (const u32x a, const u32x b, const u32x c)
1134 {
1135 u32x r;
1136
1137 #if VECT_SIZE == 1
1138 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
1139 #endif
1140
1141 #if VECT_SIZE >= 2
1142 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
1143 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
1144 #endif
1145
1146 #if VECT_SIZE >= 4
1147 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
1148 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
1149 #endif
1150
1151 #if VECT_SIZE >= 8
1152 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
1153 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
1154 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
1155 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
1156 #endif
1157
1158 #if VECT_SIZE >= 16
1159 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
1160 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
1161 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
1162 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
1163 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
1164 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
1165 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
1166 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
1167 #endif
1168
1169 return r;
1170 }
1171
1172 #endif
1173 #endif
1174
1175 #ifdef IS_GENERIC
1176 static inline u32 swap32_S (const u32 v)
1177 {
1178 return (as_uint (as_uchar4 (v).s3210));
1179 }
1180
1181 static inline u64 swap64_S (const u64 v)
1182 {
1183 return (as_ulong (as_uchar8 (v).s76543210));
1184 }
1185
1186 static inline u32 rotr32_S (const u32 a, const u32 n)
1187 {
1188 return rotate (a, 32 - n);
1189 }
1190
1191 static inline u32 rotl32_S (const u32 a, const u32 n)
1192 {
1193 return rotate (a, n);
1194 }
1195
1196 static inline u64 rotr64_S (const u64 a, const u32 n)
1197 {
1198 return rotate (a, (u64) 64 - n);
1199 }
1200
1201 static inline u64 rotl64_S (const u64 a, const u32 n)
1202 {
1203 return rotate (a, (u64) n);
1204 }
1205
1206 static inline u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c)
1207 {
1208 const u64 tmp = ((((u64) a) << 32) | ((u64) b)) >> ((c & 3) * 8);
1209
1210 return (u32) (tmp);
1211 }
1212
1213 static inline u32x swap32 (const u32x v)
1214 {
1215 return ((v >> 24) & 0x000000ff)
1216 | ((v >> 8) & 0x0000ff00)
1217 | ((v << 8) & 0x00ff0000)
1218 | ((v << 24) & 0xff000000);
1219 }
1220
1221 static inline u64x swap64 (const u64x v)
1222 {
1223 return ((v >> 56) & 0x00000000000000ff)
1224 | ((v >> 40) & 0x000000000000ff00)
1225 | ((v >> 24) & 0x0000000000ff0000)
1226 | ((v >> 8) & 0x00000000ff000000)
1227 | ((v << 8) & 0x000000ff00000000)
1228 | ((v << 24) & 0x0000ff0000000000)
1229 | ((v << 40) & 0x00ff000000000000)
1230 | ((v << 56) & 0xff00000000000000);
1231 }
1232
1233 static inline u32x rotr32 (const u32x a, const u32 n)
1234 {
1235 return rotate (a, 32 - n);
1236 }
1237
1238 static inline u32x rotl32 (const u32x a, const u32 n)
1239 {
1240 return rotate (a, n);
1241 }
1242
1243 static inline u64x rotr64 (const u64x a, const u32 n)
1244 {
1245 return rotate (a, (u64) 64 - n);
1246 }
1247
1248 static inline u64x rotl64 (const u64x a, const u32 n)
1249 {
1250 return rotate (a, (u64) n);
1251 }
1252
1253 static inline u32 __bfe (const u32 a, const u32 b, const u32 c)
1254 {
1255 #define BIT(x) (1 << (x))
1256 #define BIT_MASK(x) (BIT (x) - 1)
1257 #define BFE(x,y,z) (((x) >> (y)) & BIT_MASK (z))
1258
1259 return BFE (a, b, c);
1260 }
1261
1262 static inline u32x amd_bytealign (const u32x a, const u32x b, const u32 c)
1263 {
1264 #if VECT_SIZE == 1
1265 const u64x tmp = ((((u64x) (a)) << 32) | ((u64x) (b))) >> ((c & 3) * 8);
1266
1267 return (u32x) (tmp);
1268 #endif
1269
1270 #if VECT_SIZE == 2
1271 const u64x tmp = ((((u64x) (a.s0, a.s1)) << 32) | ((u64x) (b.s0, b.s1))) >> ((c & 3) * 8);
1272
1273 return (u32x) (tmp.s0, tmp.s1);
1274 #endif
1275
1276 #if VECT_SIZE == 4
1277 const u64x tmp = ((((u64x) (a.s0, a.s1, a.s2, a.s3)) << 32) | ((u64x) (b.s0, b.s1, b.s2, b.s3))) >> ((c & 3) * 8);
1278
1279 return (u32x) (tmp.s0, tmp.s1, tmp.s2, tmp.s3);
1280 #endif
1281
1282 #if VECT_SIZE == 8
1283 const u64x tmp = ((((u64x) (a.s0, a.s1, a.s2, a.s3, a.s4, a.s5, a.s6, a.s7)) << 32) | ((u64x) (b.s0, b.s1, b.s2, b.s3, b.s4, b.s5, b.s6, b.s7))) >> ((c & 3) * 8);
1284
1285 return (u32x) (tmp.s0, tmp.s1, tmp.s2, tmp.s3, tmp.s4, tmp.s5, tmp.s6, tmp.s7);
1286 #endif
1287
1288 #if VECT_SIZE == 16
1289 const u64x tmp = ((((u64x) (a.s0, a.s1, a.s2, a.s3, a.s4, a.s5, a.s6, a.s7, a.s8, a.s9, a.sa, a.sb, a.sc, a.sd, a.se, a.sf)) << 32) | ((u64x) (b.s0, b.s1, b.s2, b.s3, b.s4, b.s5, b.s6, b.s7, b.s8, b.s9, b.sa, b.sb, b.sc, b.sd, b.se, b.sf))) >> ((c & 3) * 8);
1290
1291 return (u32x) (tmp.s0, tmp.s1, tmp.s2, tmp.s3, tmp.s4, tmp.s5, tmp.s6, tmp.s7, tmp.s8, tmp.s9, tmp.sa, tmp.sb, tmp.sc, tmp.sd, tmp.se, tmp.sf);
1292 #endif
1293 }
1294 #endif
1295
1296 typedef struct
1297 {
1298 #if defined _DES_
1299 u32 digest_buf[4];
1300 #elif defined _MD4_
1301 u32 digest_buf[4];
1302 #elif defined _MD5_
1303 u32 digest_buf[4];
1304 #elif defined _MD5H_
1305 u32 digest_buf[4];
1306 #elif defined _SHA1_
1307 u32 digest_buf[5];
1308 #elif defined _BCRYPT_
1309 u32 digest_buf[6];
1310 #elif defined _SHA256_
1311 u32 digest_buf[8];
1312 #elif defined _SHA384_
1313 u32 digest_buf[16];
1314 #elif defined _SHA512_
1315 u32 digest_buf[16];
1316 #elif defined _KECCAK_
1317 u32 digest_buf[50];
1318 #elif defined _RIPEMD160_
1319 u32 digest_buf[5];
1320 #elif defined _WHIRLPOOL_
1321 u32 digest_buf[16];
1322 #elif defined _GOST_
1323 u32 digest_buf[8];
1324 #elif defined _GOST2012_256_
1325 u32 digest_buf[8];
1326 #elif defined _GOST2012_512_
1327 u32 digest_buf[16];
1328 #elif defined _SAPB_
1329 u32 digest_buf[4];
1330 #elif defined _SAPG_
1331 u32 digest_buf[5];
1332 #elif defined _MYSQL323_
1333 u32 digest_buf[4];
1334 #elif defined _LOTUS5_
1335 u32 digest_buf[4];
1336 #elif defined _LOTUS6_
1337 u32 digest_buf[4];
1338 #elif defined _SCRYPT_
1339 u32 digest_buf[8];
1340 #elif defined _LOTUS8_
1341 u32 digest_buf[4];
1342 #elif defined _OFFICE2007_
1343 u32 digest_buf[4];
1344 #elif defined _OFFICE2010_
1345 u32 digest_buf[4];
1346 #elif defined _OFFICE2013_
1347 u32 digest_buf[4];
1348 #elif defined _OLDOFFICE01_
1349 u32 digest_buf[4];
1350 #elif defined _OLDOFFICE34_
1351 u32 digest_buf[4];
1352 #elif defined _SIPHASH_
1353 u32 digest_buf[4];
1354 #elif defined _PBKDF2_MD5_
1355 u32 digest_buf[32];
1356 #elif defined _PBKDF2_SHA1_
1357 u32 digest_buf[32];
1358 #elif defined _PBKDF2_SHA256_
1359 u32 digest_buf[32];
1360 #elif defined _PBKDF2_SHA512_
1361 u32 digest_buf[32];
1362 #elif defined _PDF17L8_
1363 u32 digest_buf[8];
1364 #elif defined _CRC32_
1365 u32 digest_buf[4];
1366 #elif defined _SEVEN_ZIP_
1367 u32 digest_buf[4];
1368 #elif defined _ANDROIDFDE_
1369 u32 digest_buf[4];
1370 #elif defined _DCC2_
1371 u32 digest_buf[4];
1372 #elif defined _WPA_
1373 u32 digest_buf[4];
1374 #elif defined _MD5_SHA1_
1375 u32 digest_buf[4];
1376 #elif defined _SHA1_MD5_
1377 u32 digest_buf[5];
1378 #elif defined _NETNTLMV2_
1379 u32 digest_buf[4];
1380 #elif defined _KRB5PA_
1381 u32 digest_buf[4];
1382 #elif defined _CLOUDKEY_
1383 u32 digest_buf[8];
1384 #elif defined _SCRYPT_
1385 u32 digest_buf[4];
1386 #elif defined _PSAFE2_
1387 u32 digest_buf[5];
1388 #elif defined _LOTUS8_
1389 u32 digest_buf[4];
1390 #elif defined _RAR3_
1391 u32 digest_buf[4];
1392 #elif defined _SHA256_SHA1_
1393 u32 digest_buf[8];
1394 #elif defined _MS_DRSR_
1395 u32 digest_buf[8];
1396 #elif defined _ANDROIDFDE_SAMSUNG_
1397 u32 digest_buf[8];
1398 #elif defined _RAR5_
1399 u32 digest_buf[4];
1400 #elif defined _KRB5TGS_
1401 u32 digest_buf[4];
1402 #elif defined _AXCRYPT_
1403 u32 digest_buf[4];
1404 #elif defined _KEEPASS_
1405 u32 digest_buf[4];
1406 #endif
1407
1408 } digest_t;
1409
1410 typedef struct
1411 {
1412 u32 salt_buf[16];
1413 u32 salt_buf_pc[8];
1414
1415 u32 salt_len;
1416 u32 salt_iter;
1417 u32 salt_sign[2];
1418
1419 u32 keccak_mdlen;
1420 u32 truecrypt_mdlen;
1421
1422 u32 digests_cnt;
1423 u32 digests_done;
1424
1425 u32 digests_offset;
1426
1427 u32 scrypt_N;
1428 u32 scrypt_r;
1429 u32 scrypt_p;
1430 u32 scrypt_tmto;
1431 u32 scrypt_phy;
1432
1433 } salt_t;
1434
1435 typedef struct
1436 {
1437 int V;
1438 int R;
1439 int P;
1440
1441 int enc_md;
1442
1443 u32 id_buf[8];
1444 u32 u_buf[32];
1445 u32 o_buf[32];
1446
1447 int id_len;
1448 int o_len;
1449 int u_len;
1450
1451 u32 rc4key[2];
1452 u32 rc4data[2];
1453
1454 } pdf_t;
1455
1456 typedef struct
1457 {
1458 u32 pke[25];
1459 u32 eapol[64];
1460 int eapol_size;
1461 int keyver;
1462 u8 orig_mac1[6];
1463 u8 orig_mac2[6];
1464 u8 orig_nonce1[32];
1465 u8 orig_nonce2[32];
1466
1467 } wpa_t;
1468
1469 typedef struct
1470 {
1471 u32 cry_master_buf[64];
1472 u32 ckey_buf[64];
1473 u32 public_key_buf[64];
1474
1475 u32 cry_master_len;
1476 u32 ckey_len;
1477 u32 public_key_len;
1478
1479 } bitcoin_wallet_t;
1480
1481 typedef struct
1482 {
1483 u32 salt_buf[30];
1484 u32 salt_len;
1485
1486 u32 esalt_buf[38];
1487 u32 esalt_len;
1488
1489 } sip_t;
1490
1491 typedef struct
1492 {
1493 u32 data[384];
1494
1495 } androidfde_t;
1496
1497 typedef struct
1498 {
1499 u32 nr_buf[16];
1500 u32 nr_len;
1501
1502 u32 msg_buf[128];
1503 u32 msg_len;
1504
1505 } ikepsk_t;
1506
1507 typedef struct
1508 {
1509 u32 user_len;
1510 u32 domain_len;
1511 u32 srvchall_len;
1512 u32 clichall_len;
1513
1514 u32 userdomain_buf[64];
1515 u32 chall_buf[256];
1516
1517 } netntlm_t;
1518
1519 typedef struct
1520 {
1521 u32 user[16];
1522 u32 realm[16];
1523 u32 salt[32];
1524 u32 timestamp[16];
1525 u32 checksum[4];
1526
1527 } krb5pa_t;
1528
1529 typedef struct
1530 {
1531 u32 account_info[512];
1532 u32 checksum[4];
1533 u32 edata2[2560];
1534 u32 edata2_len;
1535
1536 } krb5tgs_t;
1537
1538 typedef struct
1539 {
1540 u32 salt_buf[16];
1541 u32 data_buf[112];
1542 u32 keyfile_buf[16];
1543
1544 } tc_t;
1545
1546 typedef struct
1547 {
1548 u32 salt_buf[16];
1549
1550 } pbkdf2_md5_t;
1551
1552 typedef struct
1553 {
1554 u32 salt_buf[16];
1555
1556 } pbkdf2_sha1_t;
1557
1558 typedef struct
1559 {
1560 u32 salt_buf[16];
1561
1562 } pbkdf2_sha256_t;
1563
1564 typedef struct
1565 {
1566 u32 salt_buf[32];
1567
1568 } pbkdf2_sha512_t;
1569
1570 typedef struct
1571 {
1572 u32 salt_buf[128];
1573 u32 salt_len;
1574
1575 } rakp_t;
1576
1577 typedef struct
1578 {
1579 u32 data_len;
1580 u32 data_buf[512];
1581
1582 } cloudkey_t;
1583
1584 typedef struct
1585 {
1586 u32 encryptedVerifier[4];
1587 u32 encryptedVerifierHash[5];
1588
1589 u32 keySize;
1590
1591 } office2007_t;
1592
1593 typedef struct
1594 {
1595 u32 encryptedVerifier[4];
1596 u32 encryptedVerifierHash[8];
1597
1598 } office2010_t;
1599
1600 typedef struct
1601 {
1602 u32 encryptedVerifier[4];
1603 u32 encryptedVerifierHash[8];
1604
1605 } office2013_t;
1606
1607 typedef struct
1608 {
1609 u32 version;
1610 u32 encryptedVerifier[4];
1611 u32 encryptedVerifierHash[4];
1612 u32 rc4key[2];
1613
1614 } oldoffice01_t;
1615
1616 typedef struct
1617 {
1618 u32 version;
1619 u32 encryptedVerifier[4];
1620 u32 encryptedVerifierHash[5];
1621 u32 rc4key[2];
1622
1623 } oldoffice34_t;
1624
1625 typedef struct
1626 {
1627 u32 version;
1628 u32 algorithm;
1629
1630 /* key-file handling */
1631 u32 keyfile_len;
1632 u32 keyfile[8];
1633
1634 u32 final_random_seed[8];
1635 u32 transf_random_seed[8];
1636 u32 enc_iv[4];
1637 u32 contents_hash[8];
1638
1639 /* specific to version 1 */
1640 u32 contents_len;
1641 u32 contents[75000];
1642
1643 /* specific to version 2 */
1644 u32 expected_bytes[8];
1645
1646 } keepass_t;
1647
1648 typedef struct
1649 {
1650 u32 digest[4];
1651 u32 out[4];
1652
1653 } pdf14_tmp_t;
1654
1655 typedef struct
1656 {
1657 union
1658 {
1659 u32 dgst32[16];
1660 u64 dgst64[8];
1661 };
1662
1663 u32 dgst_len;
1664 u32 W_len;
1665
1666 } pdf17l8_tmp_t;
1667
1668 typedef struct
1669 {
1670 u32 digest_buf[4];
1671
1672 } phpass_tmp_t;
1673
1674 typedef struct
1675 {
1676 u32 digest_buf[4];
1677
1678 } md5crypt_tmp_t;
1679
1680 typedef struct
1681 {
1682 u32 alt_result[8];
1683
1684 u32 p_bytes[4];
1685 u32 s_bytes[4];
1686
1687 } sha256crypt_tmp_t;
1688
1689 typedef struct
1690 {
1691 u64 l_alt_result[8];
1692
1693 u64 l_p_bytes[2];
1694 u64 l_s_bytes[2];
1695
1696 } sha512crypt_tmp_t;
1697
1698 typedef struct
1699 {
1700 u32 ipad[5];
1701 u32 opad[5];
1702
1703 u32 dgst[10];
1704 u32 out[10];
1705
1706 } wpa_tmp_t;
1707
1708 typedef struct
1709 {
1710 u64 dgst[8];
1711
1712 } bitcoin_wallet_tmp_t;
1713
1714 typedef struct
1715 {
1716 u32 ipad[5];
1717 u32 opad[5];
1718
1719 u32 dgst[5];
1720 u32 out[4];
1721
1722 } dcc2_tmp_t;
1723
1724 typedef struct
1725 {
1726 u32 E[18];
1727
1728 u32 P[18];
1729
1730 u32 S0[256];
1731 u32 S1[256];
1732 u32 S2[256];
1733 u32 S3[256];
1734
1735 } bcrypt_tmp_t;
1736
1737 typedef struct
1738 {
1739 u32 digest[2];
1740
1741 u32 P[18];
1742
1743 u32 S0[256];
1744 u32 S1[256];
1745 u32 S2[256];
1746 u32 S3[256];
1747
1748 } pwsafe2_tmp_t;
1749
1750 typedef struct
1751 {
1752 u32 digest_buf[8];
1753
1754 } pwsafe3_tmp_t;
1755
1756 typedef struct
1757 {
1758 u32 digest_buf[5];
1759
1760 } androidpin_tmp_t;
1761
1762 typedef struct
1763 {
1764 u32 ipad[5];
1765 u32 opad[5];
1766
1767 u32 dgst[10];
1768 u32 out[10];
1769
1770 } androidfde_tmp_t;
1771
1772 typedef struct
1773 {
1774 u32 ipad[16];
1775 u32 opad[16];
1776
1777 u32 dgst[64];
1778 u32 out[64];
1779
1780 } tc_tmp_t;
1781
1782 typedef struct
1783 {
1784 u64 ipad[8];
1785 u64 opad[8];
1786
1787 u64 dgst[32];
1788 u64 out[32];
1789
1790 } tc64_tmp_t;
1791
1792 typedef struct
1793 {
1794 u32 ipad[4];
1795 u32 opad[4];
1796
1797 u32 dgst[32];
1798 u32 out[32];
1799
1800 } pbkdf2_md5_tmp_t;
1801
1802 typedef struct
1803 {
1804 u32 ipad[5];
1805 u32 opad[5];
1806
1807 u32 dgst[32];
1808 u32 out[32];
1809
1810 } pbkdf2_sha1_tmp_t;
1811
1812 typedef struct
1813 {
1814 u32 ipad[8];
1815 u32 opad[8];
1816
1817 u32 dgst[32];
1818 u32 out[32];
1819
1820 } pbkdf2_sha256_tmp_t;
1821
1822 typedef struct
1823 {
1824 u64 ipad[8];
1825 u64 opad[8];
1826
1827 u64 dgst[16];
1828 u64 out[16];
1829
1830 } pbkdf2_sha512_tmp_t;
1831
1832 typedef struct
1833 {
1834 u64 out[8];
1835
1836 } ecryptfs_tmp_t;
1837
1838 typedef struct
1839 {
1840 u64 ipad[8];
1841 u64 opad[8];
1842
1843 u64 dgst[16];
1844 u64 out[16];
1845
1846 } oraclet_tmp_t;
1847
1848 typedef struct
1849 {
1850 u32 ipad[5];
1851 u32 opad[5];
1852
1853 u32 dgst[5];
1854 u32 out[5];
1855
1856 } agilekey_tmp_t;
1857
1858 typedef struct
1859 {
1860 u32 ipad[5];
1861 u32 opad[5];
1862
1863 u32 dgst1[5];
1864 u32 out1[5];
1865
1866 u32 dgst2[5];
1867 u32 out2[5];
1868
1869 } mywallet_tmp_t;
1870
1871 typedef struct
1872 {
1873 u32 ipad[5];
1874 u32 opad[5];
1875
1876 u32 dgst[5];
1877 u32 out[5];
1878
1879 } sha1aix_tmp_t;
1880
1881 typedef struct
1882 {
1883 u32 ipad[8];
1884 u32 opad[8];
1885
1886 u32 dgst[8];
1887 u32 out[8];
1888
1889 } sha256aix_tmp_t;
1890
1891 typedef struct
1892 {
1893 u64 ipad[8];
1894 u64 opad[8];
1895
1896 u64 dgst[8];
1897 u64 out[8];
1898
1899 } sha512aix_tmp_t;
1900
1901 typedef struct
1902 {
1903 u32 ipad[8];
1904 u32 opad[8];
1905
1906 u32 dgst[8];
1907 u32 out[8];
1908
1909 } lastpass_tmp_t;
1910
1911 typedef struct
1912 {
1913 u64 digest_buf[8];
1914
1915 } drupal7_tmp_t;
1916
1917 typedef struct
1918 {
1919 u32 ipad[5];
1920 u32 opad[5];
1921
1922 u32 dgst[5];
1923 u32 out[5];
1924
1925 } lotus8_tmp_t;
1926
1927 typedef struct
1928 {
1929 u32 out[5];
1930
1931 } office2007_tmp_t;
1932
1933 typedef struct
1934 {
1935 u32 out[5];
1936
1937 } office2010_tmp_t;
1938
1939 typedef struct
1940 {
1941 u64 out[8];
1942
1943 } office2013_tmp_t;
1944
1945 typedef struct
1946 {
1947 u32 digest_buf[5];
1948
1949 } saph_sha1_tmp_t;
1950
1951 typedef struct
1952 {
1953 u32 block[16];
1954
1955 u32 dgst[8];
1956
1957 u32 block_len;
1958 u32 final_len;
1959
1960 } seven_zip_tmp_t;
1961
1962 typedef struct
1963 {
1964 u32 KEK[5];
1965
1966 u32 lsb[4];
1967 u32 cipher[4];
1968
1969 } axcrypt_tmp_t;
1970
1971 typedef struct
1972 {
1973 u32 tmp_digest[8];
1974
1975 } keepass_tmp_t;
1976
1977 typedef struct
1978 {
1979 u32 Kc[16];
1980 u32 Kd[16];
1981
1982 u32 iv[2];
1983
1984 } bsdicrypt_tmp_t;
1985
1986 typedef struct
1987 {
1988 u32 dgst[17][5];
1989
1990 } rar3_tmp_t;
1991
1992 typedef struct
1993 {
1994 u32 user[16];
1995
1996 } cram_md5_t;
1997
1998 typedef struct
1999 {
2000 u32 iv_buf[4];
2001 u32 iv_len;
2002
2003 u32 salt_buf[4];
2004 u32 salt_len;
2005
2006 u32 crc;
2007
2008 u32 data_buf[96];
2009 u32 data_len;
2010
2011 u32 unpack_size;
2012
2013 } seven_zip_t;
2014
2015 typedef struct
2016 {
2017 u32 key;
2018 u64 val;
2019
2020 } hcstat_table_t;
2021
2022 typedef struct
2023 {
2024 u32 cs_buf[0x100];
2025 u32 cs_len;
2026
2027 } cs_t;
2028
2029 typedef struct
2030 {
2031 u32 cmds[0x100];
2032
2033 } kernel_rule_t;
2034
2035 typedef struct
2036 {
2037 u32 gidvid;
2038 u32 il_pos;
2039
2040 } plain_t;
2041
2042 typedef struct
2043 {
2044 u32 i[16];
2045
2046 u32 pw_len;
2047
2048 u32 alignment_placeholder_1;
2049 u32 alignment_placeholder_2;
2050 u32 alignment_placeholder_3;
2051
2052 } pw_t;
2053
2054 typedef struct
2055 {
2056 u32 i;
2057
2058 } bf_t;
2059
2060 typedef struct
2061 {
2062 u32 i[8];
2063
2064 u32 pw_len;
2065
2066 } comb_t;
2067
2068 typedef struct
2069 {
2070 u32 b[32];
2071
2072 } bs_word_t;
2073
2074 typedef struct
2075 {
2076 uint4 P[64];
2077
2078 } scrypt_tmp_t;