d935b68b70a8261bd9cf1a0e0f6fa0b2bf87a91f
[hashcat.git] / OpenCL / types_ocl.c
1 /**
2 * Authors.....: Jens Steube <jens.steube@gmail.com>
3 * magnum <john.magnum@hushmail.com>
4 *
5 * License.....: MIT
6 */
7
8 #define DEVICE_TYPE_CPU 2
9 #define DEVICE_TYPE_GPU 4
10
11 typedef uchar u8;
12 typedef ushort u16;
13 typedef uint u32;
14 typedef ulong u64;
15
16 #ifndef NEW_SIMD_CODE
17 #undef VECT_SIZE
18 #define VECT_SIZE 1
19 #endif
20
21 #define CONCAT(a, b) a##b
22 #define VTYPE(type, width) CONCAT(type, width)
23
24 #if VECT_SIZE == 1
25 typedef uchar u8x;
26 typedef ushort u16x;
27 typedef uint u32x;
28 typedef ulong u64x;
29 #else
30 typedef VTYPE(uchar, VECT_SIZE) u8x;
31 typedef VTYPE(ushort, VECT_SIZE) u16x;
32 typedef VTYPE(uint, VECT_SIZE) u32x;
33 typedef VTYPE(ulong, VECT_SIZE) u64x;
34 #endif
35
36 // this one needs to die
37 #define allx(r) r
38
39 static inline u32 l32_from_64_S (u64 a)
40 {
41 const u32 r = (u32) (a);
42
43 return r;
44 }
45
46 static inline u32 h32_from_64_S (u64 a)
47 {
48 a >>= 32;
49
50 const u32 r = (u32) (a);
51
52 return r;
53 }
54
55 static inline u64 hl32_to_64_S (const u32 a, const u32 b)
56 {
57 return as_ulong ((uint2) (b, a));
58 }
59
60 static inline u32x l32_from_64 (u64x a)
61 {
62 u32x r;
63
64 #if VECT_SIZE == 1
65 r = (u32) a;
66 #endif
67
68 #if VECT_SIZE >= 2
69 r.s0 = (u32) a.s0;
70 r.s1 = (u32) a.s1;
71 #endif
72
73 #if VECT_SIZE >= 4
74 r.s2 = (u32) a.s2;
75 r.s3 = (u32) a.s3;
76 #endif
77
78 #if VECT_SIZE >= 8
79 r.s4 = (u32) a.s4;
80 r.s5 = (u32) a.s5;
81 r.s6 = (u32) a.s6;
82 r.s7 = (u32) a.s7;
83 #endif
84
85 #if VECT_SIZE >= 16
86 r.s8 = (u32) a.s8;
87 r.s9 = (u32) a.s9;
88 r.sa = (u32) a.sa;
89 r.sb = (u32) a.sb;
90 r.sc = (u32) a.sc;
91 r.sd = (u32) a.sd;
92 r.se = (u32) a.se;
93 r.sf = (u32) a.sf;
94 #endif
95
96 return r;
97 }
98
99 static inline u32x h32_from_64 (u64x a)
100 {
101 a >>= 32;
102
103 u32x r;
104
105 #if VECT_SIZE == 1
106 r = (u32) a;
107 #endif
108
109 #if VECT_SIZE >= 2
110 r.s0 = (u32) a.s0;
111 r.s1 = (u32) a.s1;
112 #endif
113
114 #if VECT_SIZE >= 4
115 r.s2 = (u32) a.s2;
116 r.s3 = (u32) a.s3;
117 #endif
118
119 #if VECT_SIZE >= 8
120 r.s4 = (u32) a.s4;
121 r.s5 = (u32) a.s5;
122 r.s6 = (u32) a.s6;
123 r.s7 = (u32) a.s7;
124 #endif
125
126 #if VECT_SIZE >= 16
127 r.s8 = (u32) a.s8;
128 r.s9 = (u32) a.s9;
129 r.sa = (u32) a.sa;
130 r.sb = (u32) a.sb;
131 r.sc = (u32) a.sc;
132 r.sd = (u32) a.sd;
133 r.se = (u32) a.se;
134 r.sf = (u32) a.sf;
135 #endif
136
137 return r;
138 }
139
140 static inline u64x hl32_to_64 (const u32x a, const u32x b)
141 {
142 u64x r;
143
144 #if VECT_SIZE == 1
145 r = as_ulong ((uint2) (b, a));
146 #endif
147
148 #if VECT_SIZE >= 2
149 r.s0 = as_ulong ((uint2) (b.s0, a.s0));
150 r.s1 = as_ulong ((uint2) (b.s1, a.s1));
151 #endif
152
153 #if VECT_SIZE >= 4
154 r.s2 = as_ulong ((uint2) (b.s2, a.s2));
155 r.s3 = as_ulong ((uint2) (b.s3, a.s3));
156 #endif
157
158 #if VECT_SIZE >= 8
159 r.s4 = as_ulong ((uint2) (b.s4, a.s4));
160 r.s5 = as_ulong ((uint2) (b.s5, a.s5));
161 r.s6 = as_ulong ((uint2) (b.s6, a.s6));
162 r.s7 = as_ulong ((uint2) (b.s7, a.s7));
163 #endif
164
165 #if VECT_SIZE >= 16
166 r.s8 = as_ulong ((uint2) (b.s8, a.s8));
167 r.s9 = as_ulong ((uint2) (b.s9, a.s9));
168 r.sa = as_ulong ((uint2) (b.sa, a.sa));
169 r.sb = as_ulong ((uint2) (b.sb, a.sb));
170 r.sc = as_ulong ((uint2) (b.sc, a.sc));
171 r.sd = as_ulong ((uint2) (b.sd, a.sd));
172 r.se = as_ulong ((uint2) (b.se, a.se));
173 r.sf = as_ulong ((uint2) (b.sf, a.sf));
174 #endif
175
176 return r;
177 }
178
179 #ifdef IS_AMD
180 static inline u32 swap32_S (const u32 v)
181 {
182 return (as_uint (as_uchar4 (v).s3210));
183 }
184
185 static inline u64 swap64_S (const u64 v)
186 {
187 return (as_ulong (as_uchar8 (v).s76543210));
188 }
189
190 static inline u32 rotr32_S (const u32 a, const u32 n)
191 {
192 return rotate (a, 32 - n);
193 }
194
195 static inline u32 rotl32_S (const u32 a, const u32 n)
196 {
197 return rotate (a, n);
198 }
199
200 static inline u64 rotr64_S (const u64 a, const u32 n)
201 {
202 #if (DEVICE_TYPE == DEVICE_TYPE_GPU)
203
204 #ifdef amd_bitalign
205
206 const u32 a0 = h32_from_64_S (a);
207 const u32 a1 = l32_from_64_S (a);
208
209 const u32 t0 = (n >= 32) ? amd_bitalign (a0, a1, n - 32) : amd_bitalign (a1, a0, n);
210 const u32 t1 = (n >= 32) ? amd_bitalign (a1, a0, n - 32) : amd_bitalign (a0, a1, n);
211
212 const u64 r = hl32_to_64_S (t0, t1);
213
214 #else
215
216 const u64 r = rotate (a, (u64) 64 - n);
217
218 #endif
219
220 #else
221
222 const u64 r = rotate (a, (u64) 64 - n);
223
224 #endif
225
226 return r;
227 }
228
229 static inline u64 rotl64_S (const u64 a, const u32 n)
230 {
231 return rotr64_S (a, 64 - n);
232 }
233
234 static inline u32x swap32 (const u32x v)
235 {
236 return ((v >> 24) & 0x000000ff)
237 | ((v >> 8) & 0x0000ff00)
238 | ((v << 8) & 0x00ff0000)
239 | ((v << 24) & 0xff000000);
240 }
241
242 static inline u64x swap64 (const u64x v)
243 {
244 return ((v >> 56) & 0x00000000000000ff)
245 | ((v >> 40) & 0x000000000000ff00)
246 | ((v >> 24) & 0x0000000000ff0000)
247 | ((v >> 8) & 0x00000000ff000000)
248 | ((v << 8) & 0x000000ff00000000)
249 | ((v << 24) & 0x0000ff0000000000)
250 | ((v << 40) & 0x00ff000000000000)
251 | ((v << 56) & 0xff00000000000000);
252 }
253
254 static inline u32x rotr32 (const u32x a, const u32 n)
255 {
256 return rotate (a, 32 - n);
257 }
258
259 static inline u32x rotl32 (const u32x a, const u32 n)
260 {
261 return rotate (a, n);
262 }
263
264 static inline u64x rotr64 (const u64x a, const u32 n)
265 {
266 #if (DEVICE_TYPE == DEVICE_TYPE_GPU)
267
268 #ifdef amd_bitalign
269 const u32x a0 = h32_from_64 (a);
270 const u32x a1 = l32_from_64 (a);
271
272 const u32x t0 = (n >= 32) ? amd_bitalign (a0, a1, n - 32) : amd_bitalign (a1, a0, n);
273 const u32x t1 = (n >= 32) ? amd_bitalign (a1, a0, n - 32) : amd_bitalign (a0, a1, n);
274
275 const u64x r = hl32_to_64 (t0, t1);
276
277 #else
278
279 const u64x r = rotate (a, (u64) 64 - n);
280
281 #endif
282
283 #else
284
285 const u64x r = rotate (a, (u64) 64 - n);
286
287 #endif
288
289 return r;
290 }
291
292 static inline u64x rotl64 (const u64x a, const u32 n)
293 {
294 return rotr64 (a, 64 - n);
295 }
296
297 static inline u32 __bfe (const u32 a, const u32 b, const u32 c)
298 {
299 #ifdef amd_bfe
300 return amd_bfe (a, b, c);
301 #else
302 #define BIT(x) (1 << (x))
303 #define BIT_MASK(x) (BIT (x) - 1)
304 #define BFE(x,y,z) (((x) >> (y)) & BIT_MASK (z))
305
306 return BFE (a, b, c);
307 #endif
308 }
309
310 static inline u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c)
311 {
312 #ifdef amd_bytealign
313 return amd_bytealign (a, b, c);
314 #else
315 const u64 tmp = ((((u64) a) << 32) | ((u64) b)) >> ((c & 3) * 8);
316
317 return (u32) (tmp);
318 #endif
319 }
320 #endif
321
322 #ifdef IS_NV
323 static inline u32 swap32_S (const u32 v)
324 {
325 u32 r;
326
327 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(r) : "r"(v));
328
329 return r;
330 }
331
332 static inline u64 swap64_S (const u64 v)
333 {
334 u32 il;
335 u32 ir;
336
337 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(v));
338
339 u32 tl;
340 u32 tr;
341
342 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tl) : "r"(il));
343 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tr) : "r"(ir));
344
345 u64 r;
346
347 asm ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(tr), "r"(tl));
348
349 return r;
350 }
351
352 static inline u32 rotr32_S (const u32 a, const u32 n)
353 {
354 return rotate (a, 32 - n);
355 }
356
357 static inline u32 rotl32_S (const u32 a, const u32 n)
358 {
359 return rotate (a, n);
360 }
361
362 #if CUDA_ARCH >= 350
363 static inline u64 rotr64_S (const u64 a, const u32 n)
364 {
365 u32 il;
366 u32 ir;
367
368 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a));
369
370 u32 tl;
371 u32 tr;
372
373 if (n >= 32)
374 {
375 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
376 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
377 }
378 else
379 {
380 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
381 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
382 }
383
384 u64 r;
385
386 asm ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(tl), "r"(tr));
387
388 return r;
389 }
390 #else
391 static inline u64 rotr64_S (const u64 a, const u32 n)
392 {
393 return rotate (a, (u64) 64 - n);
394 }
395 #endif
396
397 static inline u64 rotl64_S (const u64 a, const u32 n)
398 {
399 return rotr64_S (a, 64 - n);
400 }
401
402 #if CUDA_ARCH >= 500
403 static inline u32 lut3_2d_S (const u32 a, const u32 b, const u32 c)
404 {
405 u32 r;
406
407 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
408
409 return r;
410 }
411
412 static inline u32 lut3_39_S (const u32 a, const u32 b, const u32 c)
413 {
414 u32 r;
415
416 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
417
418 return r;
419 }
420
421 static inline u32 lut3_59_S (const u32 a, const u32 b, const u32 c)
422 {
423 u32 r;
424
425 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
426
427 return r;
428 }
429
430 static inline u32 lut3_96_S (const u32 a, const u32 b, const u32 c)
431 {
432 u32 r;
433
434 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
435
436 return r;
437 }
438
439 static inline u32 lut3_e4_S (const u32 a, const u32 b, const u32 c)
440 {
441 u32 r;
442
443 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
444
445 return r;
446 }
447
448 static inline u32 lut3_e8_S (const u32 a, const u32 b, const u32 c)
449 {
450 u32 r;
451
452 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
453
454 return r;
455 }
456
457 static inline u32 lut3_ca_S (const u32 a, const u32 b, const u32 c)
458 {
459 u32 r;
460
461 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
462
463 return r;
464 }
465 #endif
466
467 static inline u32 __byte_perm_S (const u32 a, const u32 b, const u32 c)
468 {
469 u32 r;
470
471 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c));
472
473 return r;
474 }
475
476 static inline u32x swap32 (const u32x v)
477 {
478 return ((v >> 24) & 0x000000ff)
479 | ((v >> 8) & 0x0000ff00)
480 | ((v << 8) & 0x00ff0000)
481 | ((v << 24) & 0xff000000);
482 }
483
484 static inline u64x swap64 (const u64x v)
485 {
486 return ((v >> 56) & 0x00000000000000ff)
487 | ((v >> 40) & 0x000000000000ff00)
488 | ((v >> 24) & 0x0000000000ff0000)
489 | ((v >> 8) & 0x00000000ff000000)
490 | ((v << 8) & 0x000000ff00000000)
491 | ((v << 24) & 0x0000ff0000000000)
492 | ((v << 40) & 0x00ff000000000000)
493 | ((v << 56) & 0xff00000000000000);
494 }
495
496 static inline u32x rotr32 (const u32x a, const u32 n)
497 {
498 return rotate (a, 32 - n);
499 }
500
501 static inline u32x rotl32 (const u32x a, const u32 n)
502 {
503 return rotate (a, n);
504 }
505
506 #if CUDA_ARCH >= 350
507 static inline u64x rotr64 (const u64x a, const u32 n)
508 {
509 u64x r;
510
511 u32 il;
512 u32 ir;
513 u32 tl;
514 u32 tr;
515
516 #if VECT_SIZE == 1
517
518 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a));
519
520 if (n >= 32)
521 {
522 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
523 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
524 }
525 else
526 {
527 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
528 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
529 }
530
531 asm ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(tl), "r"(tr));
532
533 #endif
534
535 #if VECT_SIZE >= 2
536
537 {
538 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s0));
539
540 if (n >= 32)
541 {
542 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
543 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
544 }
545 else
546 {
547 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
548 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
549 }
550
551 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s0) : "r"(tl), "r"(tr));
552 }
553
554 {
555 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s1));
556
557 if (n >= 32)
558 {
559 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
560 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
561 }
562 else
563 {
564 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
565 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
566 }
567
568 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s1) : "r"(tl), "r"(tr));
569 }
570
571 #endif
572
573 #if VECT_SIZE >= 4
574
575 {
576 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s2));
577
578 if (n >= 32)
579 {
580 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
581 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
582 }
583 else
584 {
585 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
586 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
587 }
588
589 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s2) : "r"(tl), "r"(tr));
590 }
591
592 {
593 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s3));
594
595 if (n >= 32)
596 {
597 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
598 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
599 }
600 else
601 {
602 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
603 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
604 }
605
606 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s3) : "r"(tl), "r"(tr));
607 }
608
609 #endif
610
611 #if VECT_SIZE >= 8
612
613 {
614 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s4));
615
616 if (n >= 32)
617 {
618 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
619 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
620 }
621 else
622 {
623 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
624 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
625 }
626
627 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s4) : "r"(tl), "r"(tr));
628 }
629
630 {
631 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s5));
632
633 if (n >= 32)
634 {
635 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
636 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
637 }
638 else
639 {
640 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
641 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
642 }
643
644 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s5) : "r"(tl), "r"(tr));
645 }
646
647 {
648 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s6));
649
650 if (n >= 32)
651 {
652 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
653 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
654 }
655 else
656 {
657 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
658 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
659 }
660
661 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s6) : "r"(tl), "r"(tr));
662 }
663
664 {
665 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s7));
666
667 if (n >= 32)
668 {
669 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
670 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
671 }
672 else
673 {
674 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
675 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
676 }
677
678 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s7) : "r"(tl), "r"(tr));
679 }
680
681 #endif
682
683 #if VECT_SIZE >= 16
684
685 {
686 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s8));
687
688 if (n >= 32)
689 {
690 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
691 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
692 }
693 else
694 {
695 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
696 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
697 }
698
699 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s8) : "r"(tl), "r"(tr));
700 }
701
702 {
703 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s9));
704
705 if (n >= 32)
706 {
707 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
708 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
709 }
710 else
711 {
712 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
713 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
714 }
715
716 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s9) : "r"(tl), "r"(tr));
717 }
718
719 {
720 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sa));
721
722 if (n >= 32)
723 {
724 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
725 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
726 }
727 else
728 {
729 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
730 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
731 }
732
733 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sa) : "r"(tl), "r"(tr));
734 }
735
736 {
737 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sb));
738
739 if (n >= 32)
740 {
741 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
742 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
743 }
744 else
745 {
746 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
747 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
748 }
749
750 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sb) : "r"(tl), "r"(tr));
751 }
752
753 {
754 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sc));
755
756 if (n >= 32)
757 {
758 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
759 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
760 }
761 else
762 {
763 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
764 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
765 }
766
767 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sc) : "r"(tl), "r"(tr));
768 }
769
770 {
771 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sd));
772
773 if (n >= 32)
774 {
775 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
776 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
777 }
778 else
779 {
780 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
781 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
782 }
783
784 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sd) : "r"(tl), "r"(tr));
785 }
786
787 {
788 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.se));
789
790 if (n >= 32)
791 {
792 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
793 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
794 }
795 else
796 {
797 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
798 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
799 }
800
801 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.se) : "r"(tl), "r"(tr));
802 }
803
804 {
805 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sf));
806
807 if (n >= 32)
808 {
809 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
810 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
811 }
812 else
813 {
814 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
815 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
816 }
817
818 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sf) : "r"(tl), "r"(tr));
819 }
820
821 #endif
822
823 return r;
824 }
825 #else
826 static inline u64x rotr64 (const u64x a, const u32 n)
827 {
828 return rotate (a, (u64) 64 - n);
829 }
830 #endif
831
832 static inline u64x rotl64 (const u64x a, const u32 n)
833 {
834 return rotr64 (a, (u64) 64 - n);
835 }
836
837 static inline u32x __byte_perm (const u32x a, const u32x b, const u32x c)
838 {
839 u32x r;
840
841 #if VECT_SIZE == 1
842 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c) );
843 #endif
844
845 #if VECT_SIZE >= 2
846 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s0) : "r"(a.s0), "r"(b.s0), "r"(c.s0));
847 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s1) : "r"(a.s1), "r"(b.s1), "r"(c.s1));
848 #endif
849
850 #if VECT_SIZE >= 4
851 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s2) : "r"(a.s2), "r"(b.s2), "r"(c.s2));
852 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s3) : "r"(a.s3), "r"(b.s3), "r"(c.s3));
853 #endif
854
855 #if VECT_SIZE >= 8
856 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s4) : "r"(a.s4), "r"(b.s4), "r"(c.s4));
857 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s5) : "r"(a.s5), "r"(b.s5), "r"(c.s5));
858 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s6) : "r"(a.s6), "r"(b.s6), "r"(c.s6));
859 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s7) : "r"(a.s7), "r"(b.s7), "r"(c.s7));
860 #endif
861
862 #if VECT_SIZE >= 16
863 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s8) : "r"(a.s8), "r"(b.s8), "r"(c.s8));
864 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s9) : "r"(a.s9), "r"(b.s9), "r"(c.s9));
865 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sa) : "r"(a.sa), "r"(b.sa), "r"(c.sa));
866 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sb) : "r"(a.sb), "r"(b.sb), "r"(c.sb));
867 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sc) : "r"(a.sc), "r"(b.sc), "r"(c.sc));
868 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sd) : "r"(a.sd), "r"(b.sd), "r"(c.sd));
869 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.se) : "r"(a.se), "r"(b.se), "r"(c.se));
870 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sf) : "r"(a.sf), "r"(b.sf), "r"(c.sf));
871 #endif
872
873 return r;
874 }
875
876 static inline u32 __bfe (const u32 a, const u32 b, const u32 c)
877 {
878 u32 r;
879
880 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c));
881
882 return r;
883 }
884
885 #if CUDA_ARCH >= 350
886 static inline u32 amd_bytealign (const u32 a, const u32 b, const u32 c)
887 {
888 u32 r;
889
890 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(b), "r"(a), "r"((c & 3) * 8));
891
892 return r;
893 }
894 #else
895 static inline u32 amd_bytealign (const u32 a, const u32 b, const u32 c)
896 {
897 return __byte_perm_S (b, a, (0x76543210 >> ((c & 3) * 4)) & 0xffff);
898 }
899 #endif
900
901 #if CUDA_ARCH >= 500
902 static inline u32x lut3_2d (const u32x a, const u32x b, const u32x c)
903 {
904 u32x r;
905
906 #if VECT_SIZE == 1
907 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
908 #endif
909
910 #if VECT_SIZE >= 2
911 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
912 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
913 #endif
914
915 #if VECT_SIZE >= 4
916 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
917 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
918 #endif
919
920 #if VECT_SIZE >= 8
921 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
922 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
923 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
924 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
925 #endif
926
927 #if VECT_SIZE >= 16
928 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
929 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
930 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
931 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
932 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
933 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
934 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
935 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
936 #endif
937
938 return r;
939 }
940
941 static inline u32x lut3_39 (const u32x a, const u32x b, const u32x c)
942 {
943 u32x r;
944
945 #if VECT_SIZE == 1
946 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
947 #endif
948
949 #if VECT_SIZE >= 2
950 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
951 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
952 #endif
953
954 #if VECT_SIZE >= 4
955 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
956 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
957 #endif
958
959 #if VECT_SIZE >= 8
960 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
961 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
962 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
963 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
964 #endif
965
966 #if VECT_SIZE >= 16
967 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
968 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
969 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
970 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
971 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
972 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
973 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
974 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
975 #endif
976
977 return r;
978 }
979
980 static inline u32x lut3_59 (const u32x a, const u32x b, const u32x c)
981 {
982 u32x r;
983
984 #if VECT_SIZE == 1
985 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
986 #endif
987
988 #if VECT_SIZE >= 2
989 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
990 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
991 #endif
992
993 #if VECT_SIZE >= 4
994 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
995 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
996 #endif
997
998 #if VECT_SIZE >= 8
999 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
1000 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
1001 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
1002 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
1003 #endif
1004
1005 #if VECT_SIZE >= 16
1006 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
1007 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
1008 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
1009 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
1010 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
1011 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
1012 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
1013 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
1014 #endif
1015
1016 return r;
1017 }
1018
1019 static inline u32x lut3_96 (const u32x a, const u32x b, const u32x c)
1020 {
1021 u32x r;
1022
1023 #if VECT_SIZE == 1
1024 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
1025 #endif
1026
1027 #if VECT_SIZE >= 2
1028 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
1029 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
1030 #endif
1031
1032 #if VECT_SIZE >= 4
1033 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
1034 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
1035 #endif
1036
1037 #if VECT_SIZE >= 8
1038 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
1039 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
1040 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
1041 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
1042 #endif
1043
1044 #if VECT_SIZE >= 16
1045 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
1046 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
1047 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
1048 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
1049 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
1050 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
1051 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
1052 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
1053 #endif
1054
1055 return r;
1056 }
1057
1058 static inline u32x lut3_e4 (const u32x a, const u32x b, const u32x c)
1059 {
1060 u32x r;
1061
1062 #if VECT_SIZE == 1
1063 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
1064 #endif
1065
1066 #if VECT_SIZE >= 2
1067 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
1068 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
1069 #endif
1070
1071 #if VECT_SIZE >= 4
1072 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
1073 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
1074 #endif
1075
1076 #if VECT_SIZE >= 8
1077 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
1078 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
1079 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
1080 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
1081 #endif
1082
1083 #if VECT_SIZE >= 16
1084 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
1085 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
1086 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
1087 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
1088 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
1089 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
1090 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
1091 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
1092 #endif
1093
1094 return r;
1095 }
1096
1097 static inline u32x lut3_e8 (const u32x a, const u32x b, const u32x c)
1098 {
1099 u32x r;
1100
1101 #if VECT_SIZE == 1
1102 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
1103 #endif
1104
1105 #if VECT_SIZE >= 2
1106 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
1107 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
1108 #endif
1109
1110 #if VECT_SIZE >= 4
1111 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
1112 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
1113 #endif
1114
1115 #if VECT_SIZE >= 8
1116 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
1117 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
1118 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
1119 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
1120 #endif
1121
1122 #if VECT_SIZE >= 16
1123 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
1124 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
1125 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
1126 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
1127 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
1128 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
1129 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
1130 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
1131 #endif
1132
1133 return r;
1134 }
1135
1136 static inline u32x lut3_ca (const u32x a, const u32x b, const u32x c)
1137 {
1138 u32x r;
1139
1140 #if VECT_SIZE == 1
1141 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
1142 #endif
1143
1144 #if VECT_SIZE >= 2
1145 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
1146 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
1147 #endif
1148
1149 #if VECT_SIZE >= 4
1150 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
1151 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
1152 #endif
1153
1154 #if VECT_SIZE >= 8
1155 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
1156 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
1157 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
1158 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
1159 #endif
1160
1161 #if VECT_SIZE >= 16
1162 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
1163 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
1164 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
1165 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
1166 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
1167 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
1168 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
1169 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
1170 #endif
1171
1172 return r;
1173 }
1174
1175 #endif
1176 #endif
1177
1178 #ifdef IS_GENERIC
1179 static inline u32 swap32_S (const u32 v)
1180 {
1181 return (as_uint (as_uchar4 (v).s3210));
1182 }
1183
1184 static inline u64 swap64_S (const u64 v)
1185 {
1186 return (as_ulong (as_uchar8 (v).s76543210));
1187 }
1188
1189 static inline u32 rotr32_S (const u32 a, const u32 n)
1190 {
1191 return rotate (a, 32 - n);
1192 }
1193
1194 static inline u32 rotl32_S (const u32 a, const u32 n)
1195 {
1196 return rotate (a, n);
1197 }
1198
1199 static inline u64 rotr64_S (const u64 a, const u32 n)
1200 {
1201 return rotate (a, (u64) 64 - n);
1202 }
1203
1204 static inline u64 rotl64_S (const u64 a, const u32 n)
1205 {
1206 return rotate (a, (u64) n);
1207 }
1208
1209 static inline u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c)
1210 {
1211 const u64 tmp = ((((u64) a) << 32) | ((u64) b)) >> ((c & 3) * 8);
1212
1213 return (u32) (tmp);
1214 }
1215
1216 static inline u32x swap32 (const u32x v)
1217 {
1218 return ((v >> 24) & 0x000000ff)
1219 | ((v >> 8) & 0x0000ff00)
1220 | ((v << 8) & 0x00ff0000)
1221 | ((v << 24) & 0xff000000);
1222 }
1223
1224 static inline u64x swap64 (const u64x v)
1225 {
1226 return ((v >> 56) & 0x00000000000000ff)
1227 | ((v >> 40) & 0x000000000000ff00)
1228 | ((v >> 24) & 0x0000000000ff0000)
1229 | ((v >> 8) & 0x00000000ff000000)
1230 | ((v << 8) & 0x000000ff00000000)
1231 | ((v << 24) & 0x0000ff0000000000)
1232 | ((v << 40) & 0x00ff000000000000)
1233 | ((v << 56) & 0xff00000000000000);
1234 }
1235
1236 static inline u32x rotr32 (const u32x a, const u32 n)
1237 {
1238 return rotate (a, 32 - n);
1239 }
1240
1241 static inline u32x rotl32 (const u32x a, const u32 n)
1242 {
1243 return rotate (a, n);
1244 }
1245
1246 static inline u64x rotr64 (const u64x a, const u32 n)
1247 {
1248 return rotate (a, (u64) 64 - n);
1249 }
1250
1251 static inline u64x rotl64 (const u64x a, const u32 n)
1252 {
1253 return rotate (a, (u64) n);
1254 }
1255
1256 static inline u32 __bfe (const u32 a, const u32 b, const u32 c)
1257 {
1258 #define BIT(x) (1 << (x))
1259 #define BIT_MASK(x) (BIT (x) - 1)
1260 #define BFE(x,y,z) (((x) >> (y)) & BIT_MASK (z))
1261
1262 return BFE (a, b, c);
1263 }
1264
1265 static inline u32x amd_bytealign (const u32x a, const u32x b, const u32 c)
1266 {
1267 #if VECT_SIZE == 1
1268 const u64x tmp = ((((u64x) (a)) << 32) | ((u64x) (b))) >> ((c & 3) * 8);
1269
1270 return (u32x) (tmp);
1271 #endif
1272
1273 #if VECT_SIZE == 2
1274 const u64x tmp = ((((u64x) (a.s0, a.s1)) << 32) | ((u64x) (b.s0, b.s1))) >> ((c & 3) * 8);
1275
1276 return (u32x) (tmp.s0, tmp.s1);
1277 #endif
1278
1279 #if VECT_SIZE == 4
1280 const u64x tmp = ((((u64x) (a.s0, a.s1, a.s2, a.s3)) << 32) | ((u64x) (b.s0, b.s1, b.s2, b.s3))) >> ((c & 3) * 8);
1281
1282 return (u32x) (tmp.s0, tmp.s1, tmp.s2, tmp.s3);
1283 #endif
1284
1285 #if VECT_SIZE == 8
1286 const u64x tmp = ((((u64x) (a.s0, a.s1, a.s2, a.s3, a.s4, a.s5, a.s6, a.s7)) << 32) | ((u64x) (b.s0, b.s1, b.s2, b.s3, b.s4, b.s5, b.s6, b.s7))) >> ((c & 3) * 8);
1287
1288 return (u32x) (tmp.s0, tmp.s1, tmp.s2, tmp.s3, tmp.s4, tmp.s5, tmp.s6, tmp.s7);
1289 #endif
1290
1291 #if VECT_SIZE == 16
1292 const u64x tmp = ((((u64x) (a.s0, a.s1, a.s2, a.s3, a.s4, a.s5, a.s6, a.s7, a.s8, a.s9, a.sa, a.sb, a.sc, a.sd, a.se, a.sf)) << 32) | ((u64x) (b.s0, b.s1, b.s2, b.s3, b.s4, b.s5, b.s6, b.s7, b.s8, b.s9, b.sa, b.sb, b.sc, b.sd, b.se, b.sf))) >> ((c & 3) * 8);
1293
1294 return (u32x) (tmp.s0, tmp.s1, tmp.s2, tmp.s3, tmp.s4, tmp.s5, tmp.s6, tmp.s7, tmp.s8, tmp.s9, tmp.sa, tmp.sb, tmp.sc, tmp.sd, tmp.se, tmp.sf);
1295 #endif
1296 }
1297 #endif
1298
1299 typedef struct
1300 {
1301 #if defined _DES_
1302 u32 digest_buf[4];
1303 #elif defined _MD4_
1304 u32 digest_buf[4];
1305 #elif defined _MD5_
1306 u32 digest_buf[4];
1307 #elif defined _MD5H_
1308 u32 digest_buf[4];
1309 #elif defined _SHA1_
1310 u32 digest_buf[5];
1311 #elif defined _BCRYPT_
1312 u32 digest_buf[6];
1313 #elif defined _SHA256_
1314 u32 digest_buf[8];
1315 #elif defined _SHA384_
1316 u32 digest_buf[16];
1317 #elif defined _SHA512_
1318 u32 digest_buf[16];
1319 #elif defined _KECCAK_
1320 u32 digest_buf[50];
1321 #elif defined _RIPEMD160_
1322 u32 digest_buf[5];
1323 #elif defined _WHIRLPOOL_
1324 u32 digest_buf[16];
1325 #elif defined _GOST_
1326 u32 digest_buf[8];
1327 #elif defined _GOST2012_256_
1328 u32 digest_buf[8];
1329 #elif defined _GOST2012_512_
1330 u32 digest_buf[16];
1331 #elif defined _SAPB_
1332 u32 digest_buf[4];
1333 #elif defined _SAPG_
1334 u32 digest_buf[5];
1335 #elif defined _MYSQL323_
1336 u32 digest_buf[4];
1337 #elif defined _LOTUS5_
1338 u32 digest_buf[4];
1339 #elif defined _LOTUS6_
1340 u32 digest_buf[4];
1341 #elif defined _SCRYPT_
1342 u32 digest_buf[8];
1343 #elif defined _LOTUS8_
1344 u32 digest_buf[4];
1345 #elif defined _OFFICE2007_
1346 u32 digest_buf[4];
1347 #elif defined _OFFICE2010_
1348 u32 digest_buf[4];
1349 #elif defined _OFFICE2013_
1350 u32 digest_buf[4];
1351 #elif defined _OLDOFFICE01_
1352 u32 digest_buf[4];
1353 #elif defined _OLDOFFICE34_
1354 u32 digest_buf[4];
1355 #elif defined _SIPHASH_
1356 u32 digest_buf[4];
1357 #elif defined _PBKDF2_MD5_
1358 u32 digest_buf[32];
1359 #elif defined _PBKDF2_SHA1_
1360 u32 digest_buf[32];
1361 #elif defined _PBKDF2_SHA256_
1362 u32 digest_buf[32];
1363 #elif defined _PBKDF2_SHA512_
1364 u32 digest_buf[32];
1365 #elif defined _PDF17L8_
1366 u32 digest_buf[8];
1367 #elif defined _CRC32_
1368 u32 digest_buf[4];
1369 #elif defined _SEVEN_ZIP_
1370 u32 digest_buf[4];
1371 #elif defined _ANDROIDFDE_
1372 u32 digest_buf[4];
1373 #elif defined _DCC2_
1374 u32 digest_buf[4];
1375 #elif defined _WPA_
1376 u32 digest_buf[4];
1377 #elif defined _MD5_SHA1_
1378 u32 digest_buf[4];
1379 #elif defined _SHA1_MD5_
1380 u32 digest_buf[5];
1381 #elif defined _NETNTLMV2_
1382 u32 digest_buf[4];
1383 #elif defined _KRB5PA_
1384 u32 digest_buf[4];
1385 #elif defined _CLOUDKEY_
1386 u32 digest_buf[8];
1387 #elif defined _SCRYPT_
1388 u32 digest_buf[4];
1389 #elif defined _PSAFE2_
1390 u32 digest_buf[5];
1391 #elif defined _LOTUS8_
1392 u32 digest_buf[4];
1393 #elif defined _RAR3_
1394 u32 digest_buf[4];
1395 #elif defined _SHA256_SHA1_
1396 u32 digest_buf[8];
1397 #elif defined _MS_DRSR_
1398 u32 digest_buf[8];
1399 #elif defined _ANDROIDFDE_SAMSUNG_
1400 u32 digest_buf[8];
1401 #elif defined _RAR5_
1402 u32 digest_buf[4];
1403 #elif defined _KRB5TGS_
1404 u32 digest_buf[4];
1405 #elif defined _AXCRYPT_
1406 u32 digest_buf[4];
1407 #elif defined _KEEPASS_
1408 u32 digest_buf[4];
1409 #endif
1410
1411 } digest_t;
1412
1413 typedef struct
1414 {
1415 u32 salt_buf[16];
1416 u32 salt_buf_pc[8];
1417
1418 u32 salt_len;
1419 u32 salt_iter;
1420 u32 salt_sign[2];
1421
1422 u32 keccak_mdlen;
1423 u32 truecrypt_mdlen;
1424
1425 u32 digests_cnt;
1426 u32 digests_done;
1427
1428 u32 digests_offset;
1429
1430 u32 scrypt_N;
1431 u32 scrypt_r;
1432 u32 scrypt_p;
1433 u32 scrypt_tmto;
1434 u32 scrypt_phy;
1435
1436 } salt_t;
1437
1438 typedef struct
1439 {
1440 int V;
1441 int R;
1442 int P;
1443
1444 int enc_md;
1445
1446 u32 id_buf[8];
1447 u32 u_buf[32];
1448 u32 o_buf[32];
1449
1450 int id_len;
1451 int o_len;
1452 int u_len;
1453
1454 u32 rc4key[2];
1455 u32 rc4data[2];
1456
1457 } pdf_t;
1458
1459 typedef struct
1460 {
1461 u32 pke[25];
1462 u32 eapol[64];
1463 int eapol_size;
1464 int keyver;
1465 u8 orig_mac1[6];
1466 u8 orig_mac2[6];
1467 u8 orig_nonce1[32];
1468 u8 orig_nonce2[32];
1469
1470 } wpa_t;
1471
1472 typedef struct
1473 {
1474 u32 cry_master_buf[64];
1475 u32 ckey_buf[64];
1476 u32 public_key_buf[64];
1477
1478 u32 cry_master_len;
1479 u32 ckey_len;
1480 u32 public_key_len;
1481
1482 } bitcoin_wallet_t;
1483
1484 typedef struct
1485 {
1486 u32 salt_buf[30];
1487 u32 salt_len;
1488
1489 u32 esalt_buf[38];
1490 u32 esalt_len;
1491
1492 } sip_t;
1493
1494 typedef struct
1495 {
1496 u32 data[384];
1497
1498 } androidfde_t;
1499
1500 typedef struct
1501 {
1502 u32 nr_buf[16];
1503 u32 nr_len;
1504
1505 u32 msg_buf[128];
1506 u32 msg_len;
1507
1508 } ikepsk_t;
1509
1510 typedef struct
1511 {
1512 u32 user_len;
1513 u32 domain_len;
1514 u32 srvchall_len;
1515 u32 clichall_len;
1516
1517 u32 userdomain_buf[64];
1518 u32 chall_buf[256];
1519
1520 } netntlm_t;
1521
1522 typedef struct
1523 {
1524 u32 user[16];
1525 u32 realm[16];
1526 u32 salt[32];
1527 u32 timestamp[16];
1528 u32 checksum[4];
1529
1530 } krb5pa_t;
1531
1532 typedef struct
1533 {
1534 u32 account_info[512];
1535 u32 checksum[4];
1536 u32 edata2[2560];
1537 u32 edata2_len;
1538
1539 } krb5tgs_t;
1540
1541 typedef struct
1542 {
1543 u32 salt_buf[16];
1544 u32 data_buf[112];
1545 u32 keyfile_buf[16];
1546
1547 } tc_t;
1548
1549 typedef struct
1550 {
1551 u32 salt_buf[16];
1552
1553 } pbkdf2_md5_t;
1554
1555 typedef struct
1556 {
1557 u32 salt_buf[16];
1558
1559 } pbkdf2_sha1_t;
1560
1561 typedef struct
1562 {
1563 u32 salt_buf[16];
1564
1565 } pbkdf2_sha256_t;
1566
1567 typedef struct
1568 {
1569 u32 salt_buf[32];
1570
1571 } pbkdf2_sha512_t;
1572
1573 typedef struct
1574 {
1575 u32 salt_buf[128];
1576 u32 salt_len;
1577
1578 } rakp_t;
1579
1580 typedef struct
1581 {
1582 u32 data_len;
1583 u32 data_buf[512];
1584
1585 } cloudkey_t;
1586
1587 typedef struct
1588 {
1589 u32 encryptedVerifier[4];
1590 u32 encryptedVerifierHash[5];
1591
1592 u32 keySize;
1593
1594 } office2007_t;
1595
1596 typedef struct
1597 {
1598 u32 encryptedVerifier[4];
1599 u32 encryptedVerifierHash[8];
1600
1601 } office2010_t;
1602
1603 typedef struct
1604 {
1605 u32 encryptedVerifier[4];
1606 u32 encryptedVerifierHash[8];
1607
1608 } office2013_t;
1609
1610 typedef struct
1611 {
1612 u32 version;
1613 u32 encryptedVerifier[4];
1614 u32 encryptedVerifierHash[4];
1615 u32 rc4key[2];
1616
1617 } oldoffice01_t;
1618
1619 typedef struct
1620 {
1621 u32 version;
1622 u32 encryptedVerifier[4];
1623 u32 encryptedVerifierHash[5];
1624 u32 rc4key[2];
1625
1626 } oldoffice34_t;
1627
1628 typedef struct
1629 {
1630 u32 version;
1631 u32 algorithm;
1632
1633 /* key-file handling */
1634 u32 keyfile_len;
1635 u32 keyfile[8];
1636
1637 u32 final_random_seed[8];
1638 u32 transf_random_seed[8];
1639 u32 enc_iv[4];
1640 u32 contents_hash[8];
1641
1642 /* specific to version 1 */
1643 u32 contents_len;
1644 u32 contents[75000];
1645
1646 /* specific to version 2 */
1647 u32 expected_bytes[8];
1648
1649 } keepass_t;
1650
1651 typedef struct
1652 {
1653 u32 digest[4];
1654 u32 out[4];
1655
1656 } pdf14_tmp_t;
1657
1658 typedef struct
1659 {
1660 union
1661 {
1662 u32 dgst32[16];
1663 u64 dgst64[8];
1664 };
1665
1666 u32 dgst_len;
1667 u32 W_len;
1668
1669 } pdf17l8_tmp_t;
1670
1671 typedef struct
1672 {
1673 u32 digest_buf[4];
1674
1675 } phpass_tmp_t;
1676
1677 typedef struct
1678 {
1679 u32 digest_buf[4];
1680
1681 } md5crypt_tmp_t;
1682
1683 typedef struct
1684 {
1685 u32 alt_result[8];
1686
1687 u32 p_bytes[4];
1688 u32 s_bytes[4];
1689
1690 } sha256crypt_tmp_t;
1691
1692 typedef struct
1693 {
1694 u64 l_alt_result[8];
1695
1696 u64 l_p_bytes[2];
1697 u64 l_s_bytes[2];
1698
1699 } sha512crypt_tmp_t;
1700
1701 typedef struct
1702 {
1703 u32 ipad[5];
1704 u32 opad[5];
1705
1706 u32 dgst[10];
1707 u32 out[10];
1708
1709 } wpa_tmp_t;
1710
1711 typedef struct
1712 {
1713 u64 dgst[8];
1714
1715 } bitcoin_wallet_tmp_t;
1716
1717 typedef struct
1718 {
1719 u32 ipad[5];
1720 u32 opad[5];
1721
1722 u32 dgst[5];
1723 u32 out[4];
1724
1725 } dcc2_tmp_t;
1726
1727 typedef struct
1728 {
1729 u32 E[18];
1730
1731 u32 P[18];
1732
1733 u32 S0[256];
1734 u32 S1[256];
1735 u32 S2[256];
1736 u32 S3[256];
1737
1738 } bcrypt_tmp_t;
1739
1740 typedef struct
1741 {
1742 u32 digest[2];
1743
1744 u32 P[18];
1745
1746 u32 S0[256];
1747 u32 S1[256];
1748 u32 S2[256];
1749 u32 S3[256];
1750
1751 } pwsafe2_tmp_t;
1752
1753 typedef struct
1754 {
1755 u32 digest_buf[8];
1756
1757 } pwsafe3_tmp_t;
1758
1759 typedef struct
1760 {
1761 u32 digest_buf[5];
1762
1763 } androidpin_tmp_t;
1764
1765 typedef struct
1766 {
1767 u32 ipad[5];
1768 u32 opad[5];
1769
1770 u32 dgst[10];
1771 u32 out[10];
1772
1773 } androidfde_tmp_t;
1774
1775 typedef struct
1776 {
1777 u32 ipad[16];
1778 u32 opad[16];
1779
1780 u32 dgst[64];
1781 u32 out[64];
1782
1783 } tc_tmp_t;
1784
1785 typedef struct
1786 {
1787 u64 ipad[8];
1788 u64 opad[8];
1789
1790 u64 dgst[32];
1791 u64 out[32];
1792
1793 } tc64_tmp_t;
1794
1795 typedef struct
1796 {
1797 u32 ipad[4];
1798 u32 opad[4];
1799
1800 u32 dgst[32];
1801 u32 out[32];
1802
1803 } pbkdf2_md5_tmp_t;
1804
1805 typedef struct
1806 {
1807 u32 ipad[5];
1808 u32 opad[5];
1809
1810 u32 dgst[32];
1811 u32 out[32];
1812
1813 } pbkdf2_sha1_tmp_t;
1814
1815 typedef struct
1816 {
1817 u32 ipad[8];
1818 u32 opad[8];
1819
1820 u32 dgst[32];
1821 u32 out[32];
1822
1823 } pbkdf2_sha256_tmp_t;
1824
1825 typedef struct
1826 {
1827 u64 ipad[8];
1828 u64 opad[8];
1829
1830 u64 dgst[16];
1831 u64 out[16];
1832
1833 } pbkdf2_sha512_tmp_t;
1834
1835 typedef struct
1836 {
1837 u64 out[8];
1838
1839 } ecryptfs_tmp_t;
1840
1841 typedef struct
1842 {
1843 u64 ipad[8];
1844 u64 opad[8];
1845
1846 u64 dgst[16];
1847 u64 out[16];
1848
1849 } oraclet_tmp_t;
1850
1851 typedef struct
1852 {
1853 u32 ipad[5];
1854 u32 opad[5];
1855
1856 u32 dgst[5];
1857 u32 out[5];
1858
1859 } agilekey_tmp_t;
1860
1861 typedef struct
1862 {
1863 u32 ipad[5];
1864 u32 opad[5];
1865
1866 u32 dgst1[5];
1867 u32 out1[5];
1868
1869 u32 dgst2[5];
1870 u32 out2[5];
1871
1872 } mywallet_tmp_t;
1873
1874 typedef struct
1875 {
1876 u32 ipad[5];
1877 u32 opad[5];
1878
1879 u32 dgst[5];
1880 u32 out[5];
1881
1882 } sha1aix_tmp_t;
1883
1884 typedef struct
1885 {
1886 u32 ipad[8];
1887 u32 opad[8];
1888
1889 u32 dgst[8];
1890 u32 out[8];
1891
1892 } sha256aix_tmp_t;
1893
1894 typedef struct
1895 {
1896 u64 ipad[8];
1897 u64 opad[8];
1898
1899 u64 dgst[8];
1900 u64 out[8];
1901
1902 } sha512aix_tmp_t;
1903
1904 typedef struct
1905 {
1906 u32 ipad[8];
1907 u32 opad[8];
1908
1909 u32 dgst[8];
1910 u32 out[8];
1911
1912 } lastpass_tmp_t;
1913
1914 typedef struct
1915 {
1916 u64 digest_buf[8];
1917
1918 } drupal7_tmp_t;
1919
1920 typedef struct
1921 {
1922 u32 ipad[5];
1923 u32 opad[5];
1924
1925 u32 dgst[5];
1926 u32 out[5];
1927
1928 } lotus8_tmp_t;
1929
1930 typedef struct
1931 {
1932 u32 out[5];
1933
1934 } office2007_tmp_t;
1935
1936 typedef struct
1937 {
1938 u32 out[5];
1939
1940 } office2010_tmp_t;
1941
1942 typedef struct
1943 {
1944 u64 out[8];
1945
1946 } office2013_tmp_t;
1947
1948 typedef struct
1949 {
1950 u32 digest_buf[5];
1951
1952 } saph_sha1_tmp_t;
1953
1954 typedef struct
1955 {
1956 u32 block[16];
1957
1958 u32 dgst[8];
1959
1960 u32 block_len;
1961 u32 final_len;
1962
1963 } seven_zip_tmp_t;
1964
1965 typedef struct
1966 {
1967 u32 KEK[5];
1968
1969 u32 lsb[4];
1970 u32 cipher[4];
1971
1972 } axcrypt_tmp_t;
1973
1974 typedef struct
1975 {
1976 u32 tmp_digest[8];
1977
1978 } keepass_tmp_t;
1979
1980 typedef struct
1981 {
1982 u32 Kc[16];
1983 u32 Kd[16];
1984
1985 u32 iv[2];
1986
1987 } bsdicrypt_tmp_t;
1988
1989 typedef struct
1990 {
1991 u32 dgst[17][5];
1992
1993 } rar3_tmp_t;
1994
1995 typedef struct
1996 {
1997 u32 user[16];
1998
1999 } cram_md5_t;
2000
2001 typedef struct
2002 {
2003 u32 iv_buf[4];
2004 u32 iv_len;
2005
2006 u32 salt_buf[4];
2007 u32 salt_len;
2008
2009 u32 crc;
2010
2011 u32 data_buf[96];
2012 u32 data_len;
2013
2014 u32 unpack_size;
2015
2016 } seven_zip_t;
2017
2018 typedef struct
2019 {
2020 u32 key;
2021 u64 val;
2022
2023 } hcstat_table_t;
2024
2025 typedef struct
2026 {
2027 u32 cs_buf[0x100];
2028 u32 cs_len;
2029
2030 } cs_t;
2031
2032 typedef struct
2033 {
2034 u32 cmds[0x100];
2035
2036 } kernel_rule_t;
2037
2038 typedef struct
2039 {
2040 u32 gidvid;
2041 u32 il_pos;
2042
2043 } plain_t;
2044
2045 typedef struct
2046 {
2047 u32 i[16];
2048
2049 u32 pw_len;
2050
2051 u32 alignment_placeholder_1;
2052 u32 alignment_placeholder_2;
2053 u32 alignment_placeholder_3;
2054
2055 } pw_t;
2056
2057 typedef struct
2058 {
2059 u32 i;
2060
2061 } bf_t;
2062
2063 typedef struct
2064 {
2065 u32 i[8];
2066
2067 u32 pw_len;
2068
2069 } comb_t;
2070
2071 typedef struct
2072 {
2073 u32 b[32];
2074
2075 } bs_word_t;
2076
2077 typedef struct
2078 {
2079 uint4 P[64];
2080
2081 } scrypt_tmp_t;