Fix broken #ifdef in types_ocl.c
[hashcat.git] / OpenCL / types_ocl.c
1 /**
2 * Authors.....: Jens Steube <jens.steube@gmail.com>
3 * magnum <john.magnum@hushmail.com>
4 *
5 * License.....: MIT
6 */
7
8 #define DEVICE_TYPE_CPU 2
9 #define DEVICE_TYPE_GPU 4
10
11 typedef uchar u8;
12 typedef ushort u16;
13 typedef uint u32;
14 typedef ulong u64;
15
16 #ifndef NEW_SIMD_CODE
17 #undef VECT_SIZE
18 #define VECT_SIZE 1
19 #endif
20
21 #define CONCAT(a, b) a##b
22 #define VTYPE(type, width) CONCAT(type, width)
23
24 #if VECT_SIZE == 1
25 typedef uchar u8x;
26 typedef ushort u16x;
27 typedef uint u32x;
28 typedef ulong u64x;
29 #else
30 typedef VTYPE(uchar, VECT_SIZE) u8x;
31 typedef VTYPE(ushort, VECT_SIZE) u16x;
32 typedef VTYPE(uint, VECT_SIZE) u32x;
33 typedef VTYPE(ulong, VECT_SIZE) u64x;
34 #endif
35
36 static inline u32 l32_from_64_S (u64 a)
37 {
38 const u32 r = (u32) (a);
39
40 return r;
41 }
42
43 static inline u32 h32_from_64_S (u64 a)
44 {
45 a >>= 32;
46
47 const u32 r = (u32) (a);
48
49 return r;
50 }
51
52 static inline u64 hl32_to_64_S (const u32 a, const u32 b)
53 {
54 return as_ulong ((uint2) (b, a));
55 }
56
57 static inline u32x l32_from_64 (u64x a)
58 {
59 u32x r;
60
61 #if VECT_SIZE == 1
62 r = (u32) a;
63 #endif
64
65 #if VECT_SIZE >= 2
66 r.s0 = (u32) a.s0;
67 r.s1 = (u32) a.s1;
68 #endif
69
70 #if VECT_SIZE >= 4
71 r.s2 = (u32) a.s2;
72 r.s3 = (u32) a.s3;
73 #endif
74
75 #if VECT_SIZE >= 8
76 r.s4 = (u32) a.s4;
77 r.s5 = (u32) a.s5;
78 r.s6 = (u32) a.s6;
79 r.s7 = (u32) a.s7;
80 #endif
81
82 #if VECT_SIZE >= 16
83 r.s8 = (u32) a.s8;
84 r.s9 = (u32) a.s9;
85 r.sa = (u32) a.sa;
86 r.sb = (u32) a.sb;
87 r.sc = (u32) a.sc;
88 r.sd = (u32) a.sd;
89 r.se = (u32) a.se;
90 r.sf = (u32) a.sf;
91 #endif
92
93 return r;
94 }
95
96 static inline u32x h32_from_64 (u64x a)
97 {
98 a >>= 32;
99
100 u32x r;
101
102 #if VECT_SIZE == 1
103 r = (u32) a;
104 #endif
105
106 #if VECT_SIZE >= 2
107 r.s0 = (u32) a.s0;
108 r.s1 = (u32) a.s1;
109 #endif
110
111 #if VECT_SIZE >= 4
112 r.s2 = (u32) a.s2;
113 r.s3 = (u32) a.s3;
114 #endif
115
116 #if VECT_SIZE >= 8
117 r.s4 = (u32) a.s4;
118 r.s5 = (u32) a.s5;
119 r.s6 = (u32) a.s6;
120 r.s7 = (u32) a.s7;
121 #endif
122
123 #if VECT_SIZE >= 16
124 r.s8 = (u32) a.s8;
125 r.s9 = (u32) a.s9;
126 r.sa = (u32) a.sa;
127 r.sb = (u32) a.sb;
128 r.sc = (u32) a.sc;
129 r.sd = (u32) a.sd;
130 r.se = (u32) a.se;
131 r.sf = (u32) a.sf;
132 #endif
133
134 return r;
135 }
136
137 static inline u64x hl32_to_64 (const u32x a, const u32x b)
138 {
139 u64x r;
140
141 #if VECT_SIZE == 1
142 r = as_ulong ((uint2) (b, a));
143 #endif
144
145 #if VECT_SIZE >= 2
146 r.s0 = as_ulong ((uint2) (b.s0, a.s0));
147 r.s1 = as_ulong ((uint2) (b.s1, a.s1));
148 #endif
149
150 #if VECT_SIZE >= 4
151 r.s2 = as_ulong ((uint2) (b.s2, a.s2));
152 r.s3 = as_ulong ((uint2) (b.s3, a.s3));
153 #endif
154
155 #if VECT_SIZE >= 8
156 r.s4 = as_ulong ((uint2) (b.s4, a.s4));
157 r.s5 = as_ulong ((uint2) (b.s5, a.s5));
158 r.s6 = as_ulong ((uint2) (b.s6, a.s6));
159 r.s7 = as_ulong ((uint2) (b.s7, a.s7));
160 #endif
161
162 #if VECT_SIZE >= 16
163 r.s8 = as_ulong ((uint2) (b.s8, a.s8));
164 r.s9 = as_ulong ((uint2) (b.s9, a.s9));
165 r.sa = as_ulong ((uint2) (b.sa, a.sa));
166 r.sb = as_ulong ((uint2) (b.sb, a.sb));
167 r.sc = as_ulong ((uint2) (b.sc, a.sc));
168 r.sd = as_ulong ((uint2) (b.sd, a.sd));
169 r.se = as_ulong ((uint2) (b.se, a.se));
170 r.sf = as_ulong ((uint2) (b.sf, a.sf));
171 #endif
172
173 return r;
174 }
175
176 #ifdef IS_AMD
177 static inline u32 swap32_S (const u32 v)
178 {
179 return (as_uint (as_uchar4 (v).s3210));
180 }
181
182 static inline u64 swap64_S (const u64 v)
183 {
184 return (as_ulong (as_uchar8 (v).s76543210));
185 }
186
187 static inline u32 rotr32_S (const u32 a, const u32 n)
188 {
189 return rotate (a, 32 - n);
190 }
191
192 static inline u32 rotl32_S (const u32 a, const u32 n)
193 {
194 return rotate (a, n);
195 }
196
197 static inline u64 rotr64_S (const u64 a, const u32 n)
198 {
199 #if (DEVICE_TYPE == DEVICE_TYPE_GPU)
200
201 #ifdef cl_amd_media_ops
202
203 const u32 a0 = h32_from_64_S (a);
204 const u32 a1 = l32_from_64_S (a);
205
206 const u32 t0 = (n >= 32) ? amd_bitalign (a0, a1, n - 32) : amd_bitalign (a1, a0, n);
207 const u32 t1 = (n >= 32) ? amd_bitalign (a1, a0, n - 32) : amd_bitalign (a0, a1, n);
208
209 const u64 r = hl32_to_64_S (t0, t1);
210
211 #else
212
213 const u64 r = rotate (a, (u64) 64 - n);
214
215 #endif
216
217 #else
218
219 const u64 r = rotate (a, (u64) 64 - n);
220
221 #endif
222
223 return r;
224 }
225
226 static inline u64 rotl64_S (const u64 a, const u32 n)
227 {
228 return rotr64_S (a, 64 - n);
229 }
230
231 static inline u32x swap32 (const u32x v)
232 {
233 return ((v >> 24) & 0x000000ff)
234 | ((v >> 8) & 0x0000ff00)
235 | ((v << 8) & 0x00ff0000)
236 | ((v << 24) & 0xff000000);
237 }
238
239 static inline u64x swap64 (const u64x v)
240 {
241 return ((v >> 56) & 0x00000000000000ff)
242 | ((v >> 40) & 0x000000000000ff00)
243 | ((v >> 24) & 0x0000000000ff0000)
244 | ((v >> 8) & 0x00000000ff000000)
245 | ((v << 8) & 0x000000ff00000000)
246 | ((v << 24) & 0x0000ff0000000000)
247 | ((v << 40) & 0x00ff000000000000)
248 | ((v << 56) & 0xff00000000000000);
249 }
250
251 static inline u32x rotr32 (const u32x a, const u32 n)
252 {
253 return rotate (a, 32 - n);
254 }
255
256 static inline u32x rotl32 (const u32x a, const u32 n)
257 {
258 return rotate (a, n);
259 }
260
261 static inline u64x rotr64 (const u64x a, const u32 n)
262 {
263 #if (DEVICE_TYPE == DEVICE_TYPE_GPU)
264
265 #ifdef cl_amd_media_ops
266
267 const u32x a0 = h32_from_64 (a);
268 const u32x a1 = l32_from_64 (a);
269
270 const u32x t0 = (n >= 32) ? amd_bitalign (a0, a1, n - 32) : amd_bitalign (a1, a0, n);
271 const u32x t1 = (n >= 32) ? amd_bitalign (a1, a0, n - 32) : amd_bitalign (a0, a1, n);
272
273 const u64x r = hl32_to_64 (t0, t1);
274
275 #else
276
277 const u64x r = rotate (a, (u64) 64 - n);
278
279 #endif
280
281 #else
282
283 const u64x r = rotate (a, (u64) 64 - n);
284
285 #endif
286
287 return r;
288 }
289
290 static inline u64x rotl64 (const u64x a, const u32 n)
291 {
292 return rotr64 (a, 64 - n);
293 }
294
295 static inline u32 __bfe (const u32 a, const u32 b, const u32 c)
296 {
297 #ifdef cl_amd_media_ops2
298
299 return amd_bfe (a, b, c);
300
301 #else
302
303 #define BIT(x) (1 << (x))
304 #define BIT_MASK(x) (BIT (x) - 1)
305 #define BFE(x,y,z) (((x) >> (y)) & BIT_MASK (z))
306
307 return BFE (a, b, c);
308
309 #endif
310 }
311
312 static inline u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c)
313 {
314 #ifdef cl_amd_media_ops
315
316 return amd_bytealign (a, b, c);
317
318 #else
319
320 const u64 tmp = ((((u64) a) << 32) | ((u64) b)) >> ((c & 3) * 8);
321
322 return (u32) (tmp);
323
324 #endif
325 }
326 #endif
327
328 #ifdef IS_NV
329 static inline u32 swap32_S (const u32 v)
330 {
331 u32 r;
332
333 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(r) : "r"(v));
334
335 return r;
336 }
337
338 static inline u64 swap64_S (const u64 v)
339 {
340 u32 il;
341 u32 ir;
342
343 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(v));
344
345 u32 tl;
346 u32 tr;
347
348 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tl) : "r"(il));
349 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tr) : "r"(ir));
350
351 u64 r;
352
353 asm ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(tr), "r"(tl));
354
355 return r;
356 }
357
358 static inline u32 rotr32_S (const u32 a, const u32 n)
359 {
360 return rotate (a, 32 - n);
361 }
362
363 static inline u32 rotl32_S (const u32 a, const u32 n)
364 {
365 return rotate (a, n);
366 }
367
368 #if CUDA_ARCH >= 350
369 static inline u64 rotr64_S (const u64 a, const u32 n)
370 {
371 u32 il;
372 u32 ir;
373
374 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a));
375
376 u32 tl;
377 u32 tr;
378
379 if (n >= 32)
380 {
381 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
382 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
383 }
384 else
385 {
386 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
387 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
388 }
389
390 u64 r;
391
392 asm ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(tl), "r"(tr));
393
394 return r;
395 }
396 #else
397 static inline u64 rotr64_S (const u64 a, const u32 n)
398 {
399 return rotate (a, (u64) 64 - n);
400 }
401 #endif
402
403 static inline u64 rotl64_S (const u64 a, const u32 n)
404 {
405 return rotr64_S (a, 64 - n);
406 }
407
408 #if CUDA_ARCH >= 500
409 static inline u32 lut3_2d_S (const u32 a, const u32 b, const u32 c)
410 {
411 u32 r;
412
413 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
414
415 return r;
416 }
417
418 static inline u32 lut3_39_S (const u32 a, const u32 b, const u32 c)
419 {
420 u32 r;
421
422 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
423
424 return r;
425 }
426
427 static inline u32 lut3_59_S (const u32 a, const u32 b, const u32 c)
428 {
429 u32 r;
430
431 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
432
433 return r;
434 }
435
436 static inline u32 lut3_96_S (const u32 a, const u32 b, const u32 c)
437 {
438 u32 r;
439
440 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
441
442 return r;
443 }
444
445 static inline u32 lut3_e4_S (const u32 a, const u32 b, const u32 c)
446 {
447 u32 r;
448
449 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
450
451 return r;
452 }
453
454 static inline u32 lut3_e8_S (const u32 a, const u32 b, const u32 c)
455 {
456 u32 r;
457
458 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
459
460 return r;
461 }
462
463 static inline u32 lut3_ca_S (const u32 a, const u32 b, const u32 c)
464 {
465 u32 r;
466
467 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
468
469 return r;
470 }
471 #endif
472
473 static inline u32 __byte_perm_S (const u32 a, const u32 b, const u32 c)
474 {
475 u32 r;
476
477 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c));
478
479 return r;
480 }
481
482 static inline u32x swap32 (const u32x v)
483 {
484 return ((v >> 24) & 0x000000ff)
485 | ((v >> 8) & 0x0000ff00)
486 | ((v << 8) & 0x00ff0000)
487 | ((v << 24) & 0xff000000);
488 }
489
490 static inline u64x swap64 (const u64x v)
491 {
492 return ((v >> 56) & 0x00000000000000ff)
493 | ((v >> 40) & 0x000000000000ff00)
494 | ((v >> 24) & 0x0000000000ff0000)
495 | ((v >> 8) & 0x00000000ff000000)
496 | ((v << 8) & 0x000000ff00000000)
497 | ((v << 24) & 0x0000ff0000000000)
498 | ((v << 40) & 0x00ff000000000000)
499 | ((v << 56) & 0xff00000000000000);
500 }
501
502 static inline u32x rotr32 (const u32x a, const u32 n)
503 {
504 return rotate (a, 32 - n);
505 }
506
507 static inline u32x rotl32 (const u32x a, const u32 n)
508 {
509 return rotate (a, n);
510 }
511
512 #if CUDA_ARCH >= 350
513 static inline u64x rotr64 (const u64x a, const u32 n)
514 {
515 u64x r;
516
517 u32 il;
518 u32 ir;
519 u32 tl;
520 u32 tr;
521
522 #if VECT_SIZE == 1
523
524 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a));
525
526 if (n >= 32)
527 {
528 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
529 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
530 }
531 else
532 {
533 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
534 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
535 }
536
537 asm ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(tl), "r"(tr));
538
539 #endif
540
541 #if VECT_SIZE >= 2
542
543 {
544 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s0));
545
546 if (n >= 32)
547 {
548 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
549 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
550 }
551 else
552 {
553 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
554 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
555 }
556
557 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s0) : "r"(tl), "r"(tr));
558 }
559
560 {
561 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s1));
562
563 if (n >= 32)
564 {
565 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
566 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
567 }
568 else
569 {
570 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
571 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
572 }
573
574 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s1) : "r"(tl), "r"(tr));
575 }
576
577 #endif
578
579 #if VECT_SIZE >= 4
580
581 {
582 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s2));
583
584 if (n >= 32)
585 {
586 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
587 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
588 }
589 else
590 {
591 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
592 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
593 }
594
595 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s2) : "r"(tl), "r"(tr));
596 }
597
598 {
599 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s3));
600
601 if (n >= 32)
602 {
603 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
604 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
605 }
606 else
607 {
608 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
609 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
610 }
611
612 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s3) : "r"(tl), "r"(tr));
613 }
614
615 #endif
616
617 #if VECT_SIZE >= 8
618
619 {
620 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s4));
621
622 if (n >= 32)
623 {
624 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
625 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
626 }
627 else
628 {
629 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
630 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
631 }
632
633 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s4) : "r"(tl), "r"(tr));
634 }
635
636 {
637 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s5));
638
639 if (n >= 32)
640 {
641 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
642 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
643 }
644 else
645 {
646 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
647 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
648 }
649
650 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s5) : "r"(tl), "r"(tr));
651 }
652
653 {
654 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s6));
655
656 if (n >= 32)
657 {
658 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
659 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
660 }
661 else
662 {
663 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
664 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
665 }
666
667 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s6) : "r"(tl), "r"(tr));
668 }
669
670 {
671 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s7));
672
673 if (n >= 32)
674 {
675 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
676 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
677 }
678 else
679 {
680 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
681 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
682 }
683
684 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s7) : "r"(tl), "r"(tr));
685 }
686
687 #endif
688
689 #if VECT_SIZE >= 16
690
691 {
692 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s8));
693
694 if (n >= 32)
695 {
696 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
697 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
698 }
699 else
700 {
701 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
702 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
703 }
704
705 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s8) : "r"(tl), "r"(tr));
706 }
707
708 {
709 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s9));
710
711 if (n >= 32)
712 {
713 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
714 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
715 }
716 else
717 {
718 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
719 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
720 }
721
722 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s9) : "r"(tl), "r"(tr));
723 }
724
725 {
726 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sa));
727
728 if (n >= 32)
729 {
730 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
731 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
732 }
733 else
734 {
735 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
736 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
737 }
738
739 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sa) : "r"(tl), "r"(tr));
740 }
741
742 {
743 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sb));
744
745 if (n >= 32)
746 {
747 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
748 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
749 }
750 else
751 {
752 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
753 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
754 }
755
756 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sb) : "r"(tl), "r"(tr));
757 }
758
759 {
760 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sc));
761
762 if (n >= 32)
763 {
764 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
765 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
766 }
767 else
768 {
769 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
770 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
771 }
772
773 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sc) : "r"(tl), "r"(tr));
774 }
775
776 {
777 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sd));
778
779 if (n >= 32)
780 {
781 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
782 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
783 }
784 else
785 {
786 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
787 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
788 }
789
790 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sd) : "r"(tl), "r"(tr));
791 }
792
793 {
794 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.se));
795
796 if (n >= 32)
797 {
798 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
799 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
800 }
801 else
802 {
803 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
804 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
805 }
806
807 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.se) : "r"(tl), "r"(tr));
808 }
809
810 {
811 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sf));
812
813 if (n >= 32)
814 {
815 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
816 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
817 }
818 else
819 {
820 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
821 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
822 }
823
824 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sf) : "r"(tl), "r"(tr));
825 }
826
827 #endif
828
829 return r;
830 }
831 #else
832 static inline u64x rotr64 (const u64x a, const u32 n)
833 {
834 return rotate (a, (u64) 64 - n);
835 }
836 #endif
837
838 static inline u64x rotl64 (const u64x a, const u32 n)
839 {
840 return rotr64 (a, (u64) 64 - n);
841 }
842
843 static inline u32x __byte_perm (const u32x a, const u32x b, const u32x c)
844 {
845 u32x r;
846
847 #if VECT_SIZE == 1
848 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c) );
849 #endif
850
851 #if VECT_SIZE >= 2
852 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s0) : "r"(a.s0), "r"(b.s0), "r"(c.s0));
853 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s1) : "r"(a.s1), "r"(b.s1), "r"(c.s1));
854 #endif
855
856 #if VECT_SIZE >= 4
857 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s2) : "r"(a.s2), "r"(b.s2), "r"(c.s2));
858 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s3) : "r"(a.s3), "r"(b.s3), "r"(c.s3));
859 #endif
860
861 #if VECT_SIZE >= 8
862 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s4) : "r"(a.s4), "r"(b.s4), "r"(c.s4));
863 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s5) : "r"(a.s5), "r"(b.s5), "r"(c.s5));
864 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s6) : "r"(a.s6), "r"(b.s6), "r"(c.s6));
865 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s7) : "r"(a.s7), "r"(b.s7), "r"(c.s7));
866 #endif
867
868 #if VECT_SIZE >= 16
869 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s8) : "r"(a.s8), "r"(b.s8), "r"(c.s8));
870 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s9) : "r"(a.s9), "r"(b.s9), "r"(c.s9));
871 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sa) : "r"(a.sa), "r"(b.sa), "r"(c.sa));
872 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sb) : "r"(a.sb), "r"(b.sb), "r"(c.sb));
873 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sc) : "r"(a.sc), "r"(b.sc), "r"(c.sc));
874 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sd) : "r"(a.sd), "r"(b.sd), "r"(c.sd));
875 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.se) : "r"(a.se), "r"(b.se), "r"(c.se));
876 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sf) : "r"(a.sf), "r"(b.sf), "r"(c.sf));
877 #endif
878
879 return r;
880 }
881
882 static inline u32 __bfe (const u32 a, const u32 b, const u32 c)
883 {
884 u32 r;
885
886 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c));
887
888 return r;
889 }
890
891 #if CUDA_ARCH >= 350
892 static inline u32 amd_bytealign (const u32 a, const u32 b, const u32 c)
893 {
894 u32 r;
895
896 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(b), "r"(a), "r"((c & 3) * 8));
897
898 return r;
899 }
900 #else
901 static inline u32 amd_bytealign (const u32 a, const u32 b, const u32 c)
902 {
903 return __byte_perm_S (b, a, (0x76543210 >> ((c & 3) * 4)) & 0xffff);
904 }
905 #endif
906
907 #if CUDA_ARCH >= 500
908 static inline u32x lut3_2d (const u32x a, const u32x b, const u32x c)
909 {
910 u32x r;
911
912 #if VECT_SIZE == 1
913 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
914 #endif
915
916 #if VECT_SIZE >= 2
917 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
918 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
919 #endif
920
921 #if VECT_SIZE >= 4
922 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
923 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
924 #endif
925
926 #if VECT_SIZE >= 8
927 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
928 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
929 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
930 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
931 #endif
932
933 #if VECT_SIZE >= 16
934 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
935 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
936 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
937 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
938 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
939 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
940 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
941 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
942 #endif
943
944 return r;
945 }
946
947 static inline u32x lut3_39 (const u32x a, const u32x b, const u32x c)
948 {
949 u32x r;
950
951 #if VECT_SIZE == 1
952 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
953 #endif
954
955 #if VECT_SIZE >= 2
956 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
957 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
958 #endif
959
960 #if VECT_SIZE >= 4
961 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
962 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
963 #endif
964
965 #if VECT_SIZE >= 8
966 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
967 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
968 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
969 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
970 #endif
971
972 #if VECT_SIZE >= 16
973 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
974 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
975 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
976 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
977 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
978 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
979 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
980 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
981 #endif
982
983 return r;
984 }
985
986 static inline u32x lut3_59 (const u32x a, const u32x b, const u32x c)
987 {
988 u32x r;
989
990 #if VECT_SIZE == 1
991 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
992 #endif
993
994 #if VECT_SIZE >= 2
995 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
996 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
997 #endif
998
999 #if VECT_SIZE >= 4
1000 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
1001 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
1002 #endif
1003
1004 #if VECT_SIZE >= 8
1005 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
1006 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
1007 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
1008 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
1009 #endif
1010
1011 #if VECT_SIZE >= 16
1012 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
1013 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
1014 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
1015 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
1016 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
1017 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
1018 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
1019 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
1020 #endif
1021
1022 return r;
1023 }
1024
1025 static inline u32x lut3_96 (const u32x a, const u32x b, const u32x c)
1026 {
1027 u32x r;
1028
1029 #if VECT_SIZE == 1
1030 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
1031 #endif
1032
1033 #if VECT_SIZE >= 2
1034 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
1035 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
1036 #endif
1037
1038 #if VECT_SIZE >= 4
1039 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
1040 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
1041 #endif
1042
1043 #if VECT_SIZE >= 8
1044 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
1045 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
1046 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
1047 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
1048 #endif
1049
1050 #if VECT_SIZE >= 16
1051 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
1052 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
1053 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
1054 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
1055 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
1056 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
1057 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
1058 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
1059 #endif
1060
1061 return r;
1062 }
1063
1064 static inline u32x lut3_e4 (const u32x a, const u32x b, const u32x c)
1065 {
1066 u32x r;
1067
1068 #if VECT_SIZE == 1
1069 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
1070 #endif
1071
1072 #if VECT_SIZE >= 2
1073 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
1074 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
1075 #endif
1076
1077 #if VECT_SIZE >= 4
1078 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
1079 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
1080 #endif
1081
1082 #if VECT_SIZE >= 8
1083 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
1084 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
1085 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
1086 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
1087 #endif
1088
1089 #if VECT_SIZE >= 16
1090 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
1091 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
1092 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
1093 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
1094 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
1095 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
1096 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
1097 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
1098 #endif
1099
1100 return r;
1101 }
1102
1103 static inline u32x lut3_e8 (const u32x a, const u32x b, const u32x c)
1104 {
1105 u32x r;
1106
1107 #if VECT_SIZE == 1
1108 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
1109 #endif
1110
1111 #if VECT_SIZE >= 2
1112 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
1113 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
1114 #endif
1115
1116 #if VECT_SIZE >= 4
1117 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
1118 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
1119 #endif
1120
1121 #if VECT_SIZE >= 8
1122 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
1123 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
1124 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
1125 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
1126 #endif
1127
1128 #if VECT_SIZE >= 16
1129 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
1130 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
1131 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
1132 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
1133 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
1134 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
1135 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
1136 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
1137 #endif
1138
1139 return r;
1140 }
1141
1142 static inline u32x lut3_ca (const u32x a, const u32x b, const u32x c)
1143 {
1144 u32x r;
1145
1146 #if VECT_SIZE == 1
1147 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
1148 #endif
1149
1150 #if VECT_SIZE >= 2
1151 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
1152 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
1153 #endif
1154
1155 #if VECT_SIZE >= 4
1156 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
1157 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
1158 #endif
1159
1160 #if VECT_SIZE >= 8
1161 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
1162 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
1163 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
1164 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
1165 #endif
1166
1167 #if VECT_SIZE >= 16
1168 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
1169 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
1170 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
1171 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
1172 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
1173 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
1174 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
1175 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
1176 #endif
1177
1178 return r;
1179 }
1180
1181 #endif
1182 #endif
1183
1184 #ifdef IS_GENERIC
1185 static inline u32 swap32_S (const u32 v)
1186 {
1187 return (as_uint (as_uchar4 (v).s3210));
1188 }
1189
1190 static inline u64 swap64_S (const u64 v)
1191 {
1192 return (as_ulong (as_uchar8 (v).s76543210));
1193 }
1194
1195 static inline u32 rotr32_S (const u32 a, const u32 n)
1196 {
1197 return rotate (a, 32 - n);
1198 }
1199
1200 static inline u32 rotl32_S (const u32 a, const u32 n)
1201 {
1202 return rotate (a, n);
1203 }
1204
1205 static inline u64 rotr64_S (const u64 a, const u32 n)
1206 {
1207 return rotate (a, (u64) 64 - n);
1208 }
1209
1210 static inline u64 rotl64_S (const u64 a, const u32 n)
1211 {
1212 return rotate (a, (u64) n);
1213 }
1214
1215 static inline u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c)
1216 {
1217 const u64 tmp = ((((u64) a) << 32) | ((u64) b)) >> ((c & 3) * 8);
1218
1219 return (u32) (tmp);
1220 }
1221
1222 static inline u32x swap32 (const u32x v)
1223 {
1224 return ((v >> 24) & 0x000000ff)
1225 | ((v >> 8) & 0x0000ff00)
1226 | ((v << 8) & 0x00ff0000)
1227 | ((v << 24) & 0xff000000);
1228 }
1229
1230 static inline u64x swap64 (const u64x v)
1231 {
1232 return ((v >> 56) & 0x00000000000000ff)
1233 | ((v >> 40) & 0x000000000000ff00)
1234 | ((v >> 24) & 0x0000000000ff0000)
1235 | ((v >> 8) & 0x00000000ff000000)
1236 | ((v << 8) & 0x000000ff00000000)
1237 | ((v << 24) & 0x0000ff0000000000)
1238 | ((v << 40) & 0x00ff000000000000)
1239 | ((v << 56) & 0xff00000000000000);
1240 }
1241
1242 static inline u32x rotr32 (const u32x a, const u32 n)
1243 {
1244 return rotate (a, 32 - n);
1245 }
1246
1247 static inline u32x rotl32 (const u32x a, const u32 n)
1248 {
1249 return rotate (a, n);
1250 }
1251
1252 static inline u64x rotr64 (const u64x a, const u32 n)
1253 {
1254 return rotate (a, (u64) 64 - n);
1255 }
1256
1257 static inline u64x rotl64 (const u64x a, const u32 n)
1258 {
1259 return rotate (a, (u64) n);
1260 }
1261
1262 static inline u32 __bfe (const u32 a, const u32 b, const u32 c)
1263 {
1264 #define BIT(x) (1 << (x))
1265 #define BIT_MASK(x) (BIT (x) - 1)
1266 #define BFE(x,y,z) (((x) >> (y)) & BIT_MASK (z))
1267
1268 return BFE (a, b, c);
1269 }
1270
1271 static inline u32x amd_bytealign (const u32x a, const u32x b, const u32 c)
1272 {
1273 #if VECT_SIZE == 1
1274 const u64x tmp = ((((u64x) (a)) << 32) | ((u64x) (b))) >> ((c & 3) * 8);
1275
1276 return (u32x) (tmp);
1277 #endif
1278
1279 #if VECT_SIZE == 2
1280 const u64x tmp = ((((u64x) (a.s0, a.s1)) << 32) | ((u64x) (b.s0, b.s1))) >> ((c & 3) * 8);
1281
1282 return (u32x) (tmp.s0, tmp.s1);
1283 #endif
1284
1285 #if VECT_SIZE == 4
1286 const u64x tmp = ((((u64x) (a.s0, a.s1, a.s2, a.s3)) << 32) | ((u64x) (b.s0, b.s1, b.s2, b.s3))) >> ((c & 3) * 8);
1287
1288 return (u32x) (tmp.s0, tmp.s1, tmp.s2, tmp.s3);
1289 #endif
1290
1291 #if VECT_SIZE == 8
1292 const u64x tmp = ((((u64x) (a.s0, a.s1, a.s2, a.s3, a.s4, a.s5, a.s6, a.s7)) << 32) | ((u64x) (b.s0, b.s1, b.s2, b.s3, b.s4, b.s5, b.s6, b.s7))) >> ((c & 3) * 8);
1293
1294 return (u32x) (tmp.s0, tmp.s1, tmp.s2, tmp.s3, tmp.s4, tmp.s5, tmp.s6, tmp.s7);
1295 #endif
1296
1297 #if VECT_SIZE == 16
1298 const u64x tmp = ((((u64x) (a.s0, a.s1, a.s2, a.s3, a.s4, a.s5, a.s6, a.s7, a.s8, a.s9, a.sa, a.sb, a.sc, a.sd, a.se, a.sf)) << 32) | ((u64x) (b.s0, b.s1, b.s2, b.s3, b.s4, b.s5, b.s6, b.s7, b.s8, b.s9, b.sa, b.sb, b.sc, b.sd, b.se, b.sf))) >> ((c & 3) * 8);
1299
1300 return (u32x) (tmp.s0, tmp.s1, tmp.s2, tmp.s3, tmp.s4, tmp.s5, tmp.s6, tmp.s7, tmp.s8, tmp.s9, tmp.sa, tmp.sb, tmp.sc, tmp.sd, tmp.se, tmp.sf);
1301 #endif
1302 }
1303 #endif
1304
1305 typedef struct
1306 {
1307 #if defined _DES_
1308 u32 digest_buf[4];
1309 #elif defined _MD4_
1310 u32 digest_buf[4];
1311 #elif defined _MD5_
1312 u32 digest_buf[4];
1313 #elif defined _MD5H_
1314 u32 digest_buf[4];
1315 #elif defined _SHA1_
1316 u32 digest_buf[5];
1317 #elif defined _BCRYPT_
1318 u32 digest_buf[6];
1319 #elif defined _SHA256_
1320 u32 digest_buf[8];
1321 #elif defined _SHA384_
1322 u32 digest_buf[16];
1323 #elif defined _SHA512_
1324 u32 digest_buf[16];
1325 #elif defined _KECCAK_
1326 u32 digest_buf[50];
1327 #elif defined _RIPEMD160_
1328 u32 digest_buf[5];
1329 #elif defined _WHIRLPOOL_
1330 u32 digest_buf[16];
1331 #elif defined _GOST_
1332 u32 digest_buf[8];
1333 #elif defined _GOST2012_256_
1334 u32 digest_buf[8];
1335 #elif defined _GOST2012_512_
1336 u32 digest_buf[16];
1337 #elif defined _SAPB_
1338 u32 digest_buf[4];
1339 #elif defined _SAPG_
1340 u32 digest_buf[5];
1341 #elif defined _MYSQL323_
1342 u32 digest_buf[4];
1343 #elif defined _LOTUS5_
1344 u32 digest_buf[4];
1345 #elif defined _LOTUS6_
1346 u32 digest_buf[4];
1347 #elif defined _SCRYPT_
1348 u32 digest_buf[8];
1349 #elif defined _LOTUS8_
1350 u32 digest_buf[4];
1351 #elif defined _OFFICE2007_
1352 u32 digest_buf[4];
1353 #elif defined _OFFICE2010_
1354 u32 digest_buf[4];
1355 #elif defined _OFFICE2013_
1356 u32 digest_buf[4];
1357 #elif defined _OLDOFFICE01_
1358 u32 digest_buf[4];
1359 #elif defined _OLDOFFICE34_
1360 u32 digest_buf[4];
1361 #elif defined _SIPHASH_
1362 u32 digest_buf[4];
1363 #elif defined _PBKDF2_MD5_
1364 u32 digest_buf[32];
1365 #elif defined _PBKDF2_SHA1_
1366 u32 digest_buf[32];
1367 #elif defined _PBKDF2_SHA256_
1368 u32 digest_buf[32];
1369 #elif defined _PBKDF2_SHA512_
1370 u32 digest_buf[32];
1371 #elif defined _PDF17L8_
1372 u32 digest_buf[8];
1373 #elif defined _CRC32_
1374 u32 digest_buf[4];
1375 #elif defined _SEVEN_ZIP_
1376 u32 digest_buf[4];
1377 #elif defined _ANDROIDFDE_
1378 u32 digest_buf[4];
1379 #elif defined _DCC2_
1380 u32 digest_buf[4];
1381 #elif defined _WPA_
1382 u32 digest_buf[4];
1383 #elif defined _MD5_SHA1_
1384 u32 digest_buf[4];
1385 #elif defined _SHA1_MD5_
1386 u32 digest_buf[5];
1387 #elif defined _NETNTLMV2_
1388 u32 digest_buf[4];
1389 #elif defined _KRB5PA_
1390 u32 digest_buf[4];
1391 #elif defined _CLOUDKEY_
1392 u32 digest_buf[8];
1393 #elif defined _SCRYPT_
1394 u32 digest_buf[4];
1395 #elif defined _PSAFE2_
1396 u32 digest_buf[5];
1397 #elif defined _LOTUS8_
1398 u32 digest_buf[4];
1399 #elif defined _RAR3_
1400 u32 digest_buf[4];
1401 #elif defined _SHA256_SHA1_
1402 u32 digest_buf[8];
1403 #elif defined _MS_DRSR_
1404 u32 digest_buf[8];
1405 #elif defined _ANDROIDFDE_SAMSUNG_
1406 u32 digest_buf[8];
1407 #elif defined _RAR5_
1408 u32 digest_buf[4];
1409 #elif defined _KRB5TGS_
1410 u32 digest_buf[4];
1411 #elif defined _AXCRYPT_
1412 u32 digest_buf[4];
1413 #elif defined _KEEPASS_
1414 u32 digest_buf[4];
1415 #endif
1416
1417 } digest_t;
1418
1419 typedef struct
1420 {
1421 u32 salt_buf[16];
1422 u32 salt_buf_pc[8];
1423
1424 u32 salt_len;
1425 u32 salt_iter;
1426 u32 salt_sign[2];
1427
1428 u32 keccak_mdlen;
1429 u32 truecrypt_mdlen;
1430
1431 u32 digests_cnt;
1432 u32 digests_done;
1433
1434 u32 digests_offset;
1435
1436 u32 scrypt_N;
1437 u32 scrypt_r;
1438 u32 scrypt_p;
1439 u32 scrypt_tmto;
1440 u32 scrypt_phy;
1441
1442 } salt_t;
1443
1444 typedef struct
1445 {
1446 int V;
1447 int R;
1448 int P;
1449
1450 int enc_md;
1451
1452 u32 id_buf[8];
1453 u32 u_buf[32];
1454 u32 o_buf[32];
1455
1456 int id_len;
1457 int o_len;
1458 int u_len;
1459
1460 u32 rc4key[2];
1461 u32 rc4data[2];
1462
1463 } pdf_t;
1464
1465 typedef struct
1466 {
1467 u32 pke[25];
1468 u32 eapol[64];
1469 int eapol_size;
1470 int keyver;
1471 u8 orig_mac1[6];
1472 u8 orig_mac2[6];
1473 u8 orig_nonce1[32];
1474 u8 orig_nonce2[32];
1475
1476 } wpa_t;
1477
1478 typedef struct
1479 {
1480 u32 cry_master_buf[64];
1481 u32 ckey_buf[64];
1482 u32 public_key_buf[64];
1483
1484 u32 cry_master_len;
1485 u32 ckey_len;
1486 u32 public_key_len;
1487
1488 } bitcoin_wallet_t;
1489
1490 typedef struct
1491 {
1492 u32 salt_buf[30];
1493 u32 salt_len;
1494
1495 u32 esalt_buf[38];
1496 u32 esalt_len;
1497
1498 } sip_t;
1499
1500 typedef struct
1501 {
1502 u32 data[384];
1503
1504 } androidfde_t;
1505
1506 typedef struct
1507 {
1508 u32 nr_buf[16];
1509 u32 nr_len;
1510
1511 u32 msg_buf[128];
1512 u32 msg_len;
1513
1514 } ikepsk_t;
1515
1516 typedef struct
1517 {
1518 u32 user_len;
1519 u32 domain_len;
1520 u32 srvchall_len;
1521 u32 clichall_len;
1522
1523 u32 userdomain_buf[64];
1524 u32 chall_buf[256];
1525
1526 } netntlm_t;
1527
1528 typedef struct
1529 {
1530 u32 user[16];
1531 u32 realm[16];
1532 u32 salt[32];
1533 u32 timestamp[16];
1534 u32 checksum[4];
1535
1536 } krb5pa_t;
1537
1538 typedef struct
1539 {
1540 u32 account_info[512];
1541 u32 checksum[4];
1542 u32 edata2[2560];
1543 u32 edata2_len;
1544
1545 } krb5tgs_t;
1546
1547 typedef struct
1548 {
1549 u32 salt_buf[16];
1550 u32 data_buf[112];
1551 u32 keyfile_buf[16];
1552
1553 } tc_t;
1554
1555 typedef struct
1556 {
1557 u32 salt_buf[16];
1558
1559 } pbkdf2_md5_t;
1560
1561 typedef struct
1562 {
1563 u32 salt_buf[16];
1564
1565 } pbkdf2_sha1_t;
1566
1567 typedef struct
1568 {
1569 u32 salt_buf[16];
1570
1571 } pbkdf2_sha256_t;
1572
1573 typedef struct
1574 {
1575 u32 salt_buf[32];
1576
1577 } pbkdf2_sha512_t;
1578
1579 typedef struct
1580 {
1581 u32 salt_buf[128];
1582 u32 salt_len;
1583
1584 } rakp_t;
1585
1586 typedef struct
1587 {
1588 u32 data_len;
1589 u32 data_buf[512];
1590
1591 } cloudkey_t;
1592
1593 typedef struct
1594 {
1595 u32 encryptedVerifier[4];
1596 u32 encryptedVerifierHash[5];
1597
1598 u32 keySize;
1599
1600 } office2007_t;
1601
1602 typedef struct
1603 {
1604 u32 encryptedVerifier[4];
1605 u32 encryptedVerifierHash[8];
1606
1607 } office2010_t;
1608
1609 typedef struct
1610 {
1611 u32 encryptedVerifier[4];
1612 u32 encryptedVerifierHash[8];
1613
1614 } office2013_t;
1615
1616 typedef struct
1617 {
1618 u32 version;
1619 u32 encryptedVerifier[4];
1620 u32 encryptedVerifierHash[4];
1621 u32 rc4key[2];
1622
1623 } oldoffice01_t;
1624
1625 typedef struct
1626 {
1627 u32 version;
1628 u32 encryptedVerifier[4];
1629 u32 encryptedVerifierHash[5];
1630 u32 rc4key[2];
1631
1632 } oldoffice34_t;
1633
1634 typedef struct
1635 {
1636 u32 version;
1637 u32 algorithm;
1638
1639 /* key-file handling */
1640 u32 keyfile_len;
1641 u32 keyfile[8];
1642
1643 u32 final_random_seed[8];
1644 u32 transf_random_seed[8];
1645 u32 enc_iv[4];
1646 u32 contents_hash[8];
1647
1648 /* specific to version 1 */
1649 u32 contents_len;
1650 u32 contents[75000];
1651
1652 /* specific to version 2 */
1653 u32 expected_bytes[8];
1654
1655 } keepass_t;
1656
1657 typedef struct
1658 {
1659 u32 digest[4];
1660 u32 out[4];
1661
1662 } pdf14_tmp_t;
1663
1664 typedef struct
1665 {
1666 union
1667 {
1668 u32 dgst32[16];
1669 u64 dgst64[8];
1670 };
1671
1672 u32 dgst_len;
1673 u32 W_len;
1674
1675 } pdf17l8_tmp_t;
1676
1677 typedef struct
1678 {
1679 u32 digest_buf[4];
1680
1681 } phpass_tmp_t;
1682
1683 typedef struct
1684 {
1685 u32 digest_buf[4];
1686
1687 } md5crypt_tmp_t;
1688
1689 typedef struct
1690 {
1691 u32 alt_result[8];
1692
1693 u32 p_bytes[4];
1694 u32 s_bytes[4];
1695
1696 } sha256crypt_tmp_t;
1697
1698 typedef struct
1699 {
1700 u64 l_alt_result[8];
1701
1702 u64 l_p_bytes[2];
1703 u64 l_s_bytes[2];
1704
1705 } sha512crypt_tmp_t;
1706
1707 typedef struct
1708 {
1709 u32 ipad[5];
1710 u32 opad[5];
1711
1712 u32 dgst[10];
1713 u32 out[10];
1714
1715 } wpa_tmp_t;
1716
1717 typedef struct
1718 {
1719 u64 dgst[8];
1720
1721 } bitcoin_wallet_tmp_t;
1722
1723 typedef struct
1724 {
1725 u32 ipad[5];
1726 u32 opad[5];
1727
1728 u32 dgst[5];
1729 u32 out[4];
1730
1731 } dcc2_tmp_t;
1732
1733 typedef struct
1734 {
1735 u32 E[18];
1736
1737 u32 P[18];
1738
1739 u32 S0[256];
1740 u32 S1[256];
1741 u32 S2[256];
1742 u32 S3[256];
1743
1744 } bcrypt_tmp_t;
1745
1746 typedef struct
1747 {
1748 u32 digest[2];
1749
1750 u32 P[18];
1751
1752 u32 S0[256];
1753 u32 S1[256];
1754 u32 S2[256];
1755 u32 S3[256];
1756
1757 } pwsafe2_tmp_t;
1758
1759 typedef struct
1760 {
1761 u32 digest_buf[8];
1762
1763 } pwsafe3_tmp_t;
1764
1765 typedef struct
1766 {
1767 u32 digest_buf[5];
1768
1769 } androidpin_tmp_t;
1770
1771 typedef struct
1772 {
1773 u32 ipad[5];
1774 u32 opad[5];
1775
1776 u32 dgst[10];
1777 u32 out[10];
1778
1779 } androidfde_tmp_t;
1780
1781 typedef struct
1782 {
1783 u32 ipad[16];
1784 u32 opad[16];
1785
1786 u32 dgst[64];
1787 u32 out[64];
1788
1789 } tc_tmp_t;
1790
1791 typedef struct
1792 {
1793 u64 ipad[8];
1794 u64 opad[8];
1795
1796 u64 dgst[32];
1797 u64 out[32];
1798
1799 } tc64_tmp_t;
1800
1801 typedef struct
1802 {
1803 u32 ipad[4];
1804 u32 opad[4];
1805
1806 u32 dgst[32];
1807 u32 out[32];
1808
1809 } pbkdf2_md5_tmp_t;
1810
1811 typedef struct
1812 {
1813 u32 ipad[5];
1814 u32 opad[5];
1815
1816 u32 dgst[32];
1817 u32 out[32];
1818
1819 } pbkdf2_sha1_tmp_t;
1820
1821 typedef struct
1822 {
1823 u32 ipad[8];
1824 u32 opad[8];
1825
1826 u32 dgst[32];
1827 u32 out[32];
1828
1829 } pbkdf2_sha256_tmp_t;
1830
1831 typedef struct
1832 {
1833 u64 ipad[8];
1834 u64 opad[8];
1835
1836 u64 dgst[16];
1837 u64 out[16];
1838
1839 } pbkdf2_sha512_tmp_t;
1840
1841 typedef struct
1842 {
1843 u64 out[8];
1844
1845 } ecryptfs_tmp_t;
1846
1847 typedef struct
1848 {
1849 u64 ipad[8];
1850 u64 opad[8];
1851
1852 u64 dgst[16];
1853 u64 out[16];
1854
1855 } oraclet_tmp_t;
1856
1857 typedef struct
1858 {
1859 u32 ipad[5];
1860 u32 opad[5];
1861
1862 u32 dgst[5];
1863 u32 out[5];
1864
1865 } agilekey_tmp_t;
1866
1867 typedef struct
1868 {
1869 u32 ipad[5];
1870 u32 opad[5];
1871
1872 u32 dgst1[5];
1873 u32 out1[5];
1874
1875 u32 dgst2[5];
1876 u32 out2[5];
1877
1878 } mywallet_tmp_t;
1879
1880 typedef struct
1881 {
1882 u32 ipad[5];
1883 u32 opad[5];
1884
1885 u32 dgst[5];
1886 u32 out[5];
1887
1888 } sha1aix_tmp_t;
1889
1890 typedef struct
1891 {
1892 u32 ipad[8];
1893 u32 opad[8];
1894
1895 u32 dgst[8];
1896 u32 out[8];
1897
1898 } sha256aix_tmp_t;
1899
1900 typedef struct
1901 {
1902 u64 ipad[8];
1903 u64 opad[8];
1904
1905 u64 dgst[8];
1906 u64 out[8];
1907
1908 } sha512aix_tmp_t;
1909
1910 typedef struct
1911 {
1912 u32 ipad[8];
1913 u32 opad[8];
1914
1915 u32 dgst[8];
1916 u32 out[8];
1917
1918 } lastpass_tmp_t;
1919
1920 typedef struct
1921 {
1922 u64 digest_buf[8];
1923
1924 } drupal7_tmp_t;
1925
1926 typedef struct
1927 {
1928 u32 ipad[5];
1929 u32 opad[5];
1930
1931 u32 dgst[5];
1932 u32 out[5];
1933
1934 } lotus8_tmp_t;
1935
1936 typedef struct
1937 {
1938 u32 out[5];
1939
1940 } office2007_tmp_t;
1941
1942 typedef struct
1943 {
1944 u32 out[5];
1945
1946 } office2010_tmp_t;
1947
1948 typedef struct
1949 {
1950 u64 out[8];
1951
1952 } office2013_tmp_t;
1953
1954 typedef struct
1955 {
1956 u32 digest_buf[5];
1957
1958 } saph_sha1_tmp_t;
1959
1960 typedef struct
1961 {
1962 u32 block[16];
1963
1964 u32 dgst[8];
1965
1966 u32 block_len;
1967 u32 final_len;
1968
1969 } seven_zip_tmp_t;
1970
1971 typedef struct
1972 {
1973 u32 KEK[5];
1974
1975 u32 lsb[4];
1976 u32 cipher[4];
1977
1978 } axcrypt_tmp_t;
1979
1980 typedef struct
1981 {
1982 u32 tmp_digest[8];
1983
1984 } keepass_tmp_t;
1985
1986 typedef struct
1987 {
1988 u32 Kc[16];
1989 u32 Kd[16];
1990
1991 u32 iv[2];
1992
1993 } bsdicrypt_tmp_t;
1994
1995 typedef struct
1996 {
1997 u32 dgst[17][5];
1998
1999 } rar3_tmp_t;
2000
2001 typedef struct
2002 {
2003 u32 user[16];
2004
2005 } cram_md5_t;
2006
2007 typedef struct
2008 {
2009 u32 iv_buf[4];
2010 u32 iv_len;
2011
2012 u32 salt_buf[4];
2013 u32 salt_len;
2014
2015 u32 crc;
2016
2017 u32 data_buf[96];
2018 u32 data_len;
2019
2020 u32 unpack_size;
2021
2022 } seven_zip_t;
2023
2024 typedef struct
2025 {
2026 u32 key;
2027 u64 val;
2028
2029 } hcstat_table_t;
2030
2031 typedef struct
2032 {
2033 u32 cs_buf[0x100];
2034 u32 cs_len;
2035
2036 } cs_t;
2037
2038 typedef struct
2039 {
2040 u32 cmds[0x100];
2041
2042 } kernel_rule_t;
2043
2044 typedef struct
2045 {
2046 u32 gidvid;
2047 u32 il_pos;
2048
2049 } plain_t;
2050
2051 typedef struct
2052 {
2053 u32 i[16];
2054
2055 u32 pw_len;
2056
2057 u32 alignment_placeholder_1;
2058 u32 alignment_placeholder_2;
2059 u32 alignment_placeholder_3;
2060
2061 } pw_t;
2062
2063 typedef struct
2064 {
2065 u32 i;
2066
2067 } bf_t;
2068
2069 typedef struct
2070 {
2071 u32 i[8];
2072
2073 u32 pw_len;
2074
2075 } comb_t;
2076
2077 typedef struct
2078 {
2079 u32 b[32];
2080
2081 } bs_word_t;
2082
2083 typedef struct
2084 {
2085 uint4 P[64];
2086
2087 } scrypt_tmp_t;