New format -m 13400, Keepass 1 (AES/Twofish) and Keepass 2 (AES)
[hashcat.git] / OpenCL / types_ocl.c
1 /**
2 * Authors.....: Jens Steube <jens.steube@gmail.com>
3 * magnum <john.magnum@hushmail.com>
4 *
5 * License.....: MIT
6 */
7
8 #define DEVICE_TYPE_CPU 2
9 #define DEVICE_TYPE_GPU 4
10
11 typedef uchar u8;
12 typedef ushort u16;
13 typedef uint u32;
14 typedef ulong u64;
15
16 #ifndef NEW_SIMD_CODE
17 #undef VECT_SIZE
18 #define VECT_SIZE 1
19 #endif
20
21 #define CONCAT(a, b) a##b
22 #define VTYPE(type, width) CONCAT(type, width)
23
24 #if VECT_SIZE == 1
25 typedef uchar u8x;
26 typedef ushort u16x;
27 typedef uint u32x;
28 typedef ulong u64x;
29 #else
30 typedef VTYPE(uchar, VECT_SIZE) u8x;
31 typedef VTYPE(ushort, VECT_SIZE) u16x;
32 typedef VTYPE(uint, VECT_SIZE) u32x;
33 typedef VTYPE(ulong, VECT_SIZE) u64x;
34 #endif
35
36 // this one needs to die
37 #define allx(r) r
38
39 static inline u32 l32_from_64_S (u64 a)
40 {
41 const u32 r = (u32) (a);
42
43 return r;
44 }
45
46 static inline u32 h32_from_64_S (u64 a)
47 {
48 a >>= 32;
49
50 const u32 r = (u32) (a);
51
52 return r;
53 }
54
55 static inline u64 hl32_to_64_S (const u32 a, const u32 b)
56 {
57 return as_ulong ((uint2) (b, a));
58 }
59
60 static inline u32x l32_from_64 (u64x a)
61 {
62 u32x r;
63
64 #if VECT_SIZE == 1
65 r = (u32) a;
66 #endif
67
68 #if VECT_SIZE >= 2
69 r.s0 = (u32) a.s0;
70 r.s1 = (u32) a.s1;
71 #endif
72
73 #if VECT_SIZE >= 4
74 r.s2 = (u32) a.s2;
75 r.s3 = (u32) a.s3;
76 #endif
77
78 #if VECT_SIZE >= 8
79 r.s4 = (u32) a.s4;
80 r.s5 = (u32) a.s5;
81 r.s6 = (u32) a.s6;
82 r.s7 = (u32) a.s7;
83 #endif
84
85 #if VECT_SIZE >= 16
86 r.s8 = (u32) a.s8;
87 r.s9 = (u32) a.s9;
88 r.sa = (u32) a.sa;
89 r.sb = (u32) a.sb;
90 r.sc = (u32) a.sc;
91 r.sd = (u32) a.sd;
92 r.se = (u32) a.se;
93 r.sf = (u32) a.sf;
94 #endif
95
96 return r;
97 }
98
99 static inline u32x h32_from_64 (u64x a)
100 {
101 a >>= 32;
102
103 u32x r;
104
105 #if VECT_SIZE == 1
106 r = (u32) a;
107 #endif
108
109 #if VECT_SIZE >= 2
110 r.s0 = (u32) a.s0;
111 r.s1 = (u32) a.s1;
112 #endif
113
114 #if VECT_SIZE >= 4
115 r.s2 = (u32) a.s2;
116 r.s3 = (u32) a.s3;
117 #endif
118
119 #if VECT_SIZE >= 8
120 r.s4 = (u32) a.s4;
121 r.s5 = (u32) a.s5;
122 r.s6 = (u32) a.s6;
123 r.s7 = (u32) a.s7;
124 #endif
125
126 #if VECT_SIZE >= 16
127 r.s8 = (u32) a.s8;
128 r.s9 = (u32) a.s9;
129 r.sa = (u32) a.sa;
130 r.sb = (u32) a.sb;
131 r.sc = (u32) a.sc;
132 r.sd = (u32) a.sd;
133 r.se = (u32) a.se;
134 r.sf = (u32) a.sf;
135 #endif
136
137 return r;
138 }
139
140 static inline u64x hl32_to_64 (const u32x a, const u32x b)
141 {
142 u64x r;
143
144 #if VECT_SIZE == 1
145 r = as_ulong ((uint2) (b, a));
146 #endif
147
148 #if VECT_SIZE >= 2
149 r.s0 = as_ulong ((uint2) (b.s0, a.s0));
150 r.s1 = as_ulong ((uint2) (b.s1, a.s1));
151 #endif
152
153 #if VECT_SIZE >= 4
154 r.s2 = as_ulong ((uint2) (b.s2, a.s2));
155 r.s3 = as_ulong ((uint2) (b.s3, a.s3));
156 #endif
157
158 #if VECT_SIZE >= 8
159 r.s4 = as_ulong ((uint2) (b.s4, a.s4));
160 r.s5 = as_ulong ((uint2) (b.s5, a.s5));
161 r.s6 = as_ulong ((uint2) (b.s6, a.s6));
162 r.s7 = as_ulong ((uint2) (b.s7, a.s7));
163 #endif
164
165 #if VECT_SIZE >= 16
166 r.s8 = as_ulong ((uint2) (b.s8, a.s8));
167 r.s9 = as_ulong ((uint2) (b.s9, a.s9));
168 r.sa = as_ulong ((uint2) (b.sa, a.sa));
169 r.sb = as_ulong ((uint2) (b.sb, a.sb));
170 r.sc = as_ulong ((uint2) (b.sc, a.sc));
171 r.sd = as_ulong ((uint2) (b.sd, a.sd));
172 r.se = as_ulong ((uint2) (b.se, a.se));
173 r.sf = as_ulong ((uint2) (b.sf, a.sf));
174 #endif
175
176 return r;
177 }
178
179 #ifdef IS_AMD
180 static inline u32 swap32_S (const u32 v)
181 {
182 return (as_uint (as_uchar4 (v).s3210));
183 }
184
185 static inline u64 swap64_S (const u64 v)
186 {
187 return (as_ulong (as_uchar8 (v).s76543210));
188 }
189
190 static inline u32 rotr32_S (const u32 a, const u32 n)
191 {
192 return rotate (a, 32 - n);
193 }
194
195 static inline u32 rotl32_S (const u32 a, const u32 n)
196 {
197 return rotate (a, n);
198 }
199
200 static inline u64 rotr64_S (const u64 a, const u32 n)
201 {
202 #if DEVICE_TYPE == DEVICE_TYPE_CPU
203
204 const u64 r = rotate (a, (u64) 64 - n);
205
206 #else
207
208 const u32 a0 = h32_from_64_S (a);
209 const u32 a1 = l32_from_64_S (a);
210
211 const u32 t0 = (n >= 32) ? amd_bitalign (a0, a1, n - 32) : amd_bitalign (a1, a0, n);
212 const u32 t1 = (n >= 32) ? amd_bitalign (a1, a0, n - 32) : amd_bitalign (a0, a1, n);
213
214 const u64 r = hl32_to_64_S (t0, t1);
215
216 #endif
217
218 return r;
219 }
220
221 static inline u64 rotl64_S (const u64 a, const u32 n)
222 {
223 return rotr64_S (a, 64 - n);
224 }
225
226 static inline u32x swap32 (const u32x v)
227 {
228 return ((v >> 24) & 0x000000ff)
229 | ((v >> 8) & 0x0000ff00)
230 | ((v << 8) & 0x00ff0000)
231 | ((v << 24) & 0xff000000);
232 }
233
234 static inline u64x swap64 (const u64x v)
235 {
236 return ((v >> 56) & 0x00000000000000ff)
237 | ((v >> 40) & 0x000000000000ff00)
238 | ((v >> 24) & 0x0000000000ff0000)
239 | ((v >> 8) & 0x00000000ff000000)
240 | ((v << 8) & 0x000000ff00000000)
241 | ((v << 24) & 0x0000ff0000000000)
242 | ((v << 40) & 0x00ff000000000000)
243 | ((v << 56) & 0xff00000000000000);
244 }
245
246 static inline u32x rotr32 (const u32x a, const u32 n)
247 {
248 return rotate (a, 32 - n);
249 }
250
251 static inline u32x rotl32 (const u32x a, const u32 n)
252 {
253 return rotate (a, n);
254 }
255
256 static inline u64x rotr64 (const u64x a, const u32 n)
257 {
258 #if DEVICE_TYPE == DEVICE_TYPE_CPU
259
260 const u64x r = rotate (a, (u64) 64 - n);
261
262 #else
263
264 const u32x a0 = h32_from_64 (a);
265 const u32x a1 = l32_from_64 (a);
266
267 const u32x t0 = (n >= 32) ? amd_bitalign (a0, a1, n - 32) : amd_bitalign (a1, a0, n);
268 const u32x t1 = (n >= 32) ? amd_bitalign (a1, a0, n - 32) : amd_bitalign (a0, a1, n);
269
270 const u64x r = hl32_to_64 (t0, t1);
271
272 #endif
273
274 return r;
275 }
276
277 static inline u64x rotl64 (const u64x a, const u32 n)
278 {
279 return rotr64 (a, 64 - n);
280 }
281
282 static inline u32 __bfe (const u32 a, const u32 b, const u32 c)
283 {
284 return amd_bfe (a, b, c);
285 }
286
287 static inline u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c)
288 {
289 return amd_bytealign (a, b, c);
290 }
291 #endif
292
293 #ifdef IS_NV
294 static inline u32 swap32_S (const u32 v)
295 {
296 u32 r;
297
298 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(r) : "r"(v));
299
300 return r;
301 }
302
303 static inline u64 swap64_S (const u64 v)
304 {
305 u32 il;
306 u32 ir;
307
308 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(v));
309
310 u32 tl;
311 u32 tr;
312
313 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tl) : "r"(il));
314 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tr) : "r"(ir));
315
316 u64 r;
317
318 asm ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(tr), "r"(tl));
319
320 return r;
321 }
322
323 static inline u32 rotr32_S (const u32 a, const u32 n)
324 {
325 return rotate (a, 32 - n);
326 }
327
328 static inline u32 rotl32_S (const u32 a, const u32 n)
329 {
330 return rotate (a, n);
331 }
332
333 #if CUDA_ARCH >= 350
334 static inline u64 rotr64_S (const u64 a, const u32 n)
335 {
336 u32 il;
337 u32 ir;
338
339 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a));
340
341 u32 tl;
342 u32 tr;
343
344 if (n >= 32)
345 {
346 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
347 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
348 }
349 else
350 {
351 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
352 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
353 }
354
355 u64 r;
356
357 asm ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(tl), "r"(tr));
358
359 return r;
360 }
361 #else
362 static inline u64 rotr64_S (const u64 a, const u32 n)
363 {
364 return rotate (a, (u64) 64 - n);
365 }
366 #endif
367
368 static inline u64 rotl64_S (const u64 a, const u32 n)
369 {
370 return rotr64_S (a, 64 - n);
371 }
372
373 #if CUDA_ARCH >= 500
374 static inline u32 lut3_2d_S (const u32 a, const u32 b, const u32 c)
375 {
376 u32 r;
377
378 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
379
380 return r;
381 }
382
383 static inline u32 lut3_39_S (const u32 a, const u32 b, const u32 c)
384 {
385 u32 r;
386
387 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
388
389 return r;
390 }
391
392 static inline u32 lut3_59_S (const u32 a, const u32 b, const u32 c)
393 {
394 u32 r;
395
396 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
397
398 return r;
399 }
400
401 static inline u32 lut3_96_S (const u32 a, const u32 b, const u32 c)
402 {
403 u32 r;
404
405 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
406
407 return r;
408 }
409
410 static inline u32 lut3_e4_S (const u32 a, const u32 b, const u32 c)
411 {
412 u32 r;
413
414 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
415
416 return r;
417 }
418
419 static inline u32 lut3_e8_S (const u32 a, const u32 b, const u32 c)
420 {
421 u32 r;
422
423 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
424
425 return r;
426 }
427
428 static inline u32 lut3_ca_S (const u32 a, const u32 b, const u32 c)
429 {
430 u32 r;
431
432 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
433
434 return r;
435 }
436 #endif
437
438 static inline u32 __byte_perm_S (const u32 a, const u32 b, const u32 c)
439 {
440 u32 r;
441
442 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c));
443
444 return r;
445 }
446
447 static inline u32x swap32 (const u32x v)
448 {
449 return ((v >> 24) & 0x000000ff)
450 | ((v >> 8) & 0x0000ff00)
451 | ((v << 8) & 0x00ff0000)
452 | ((v << 24) & 0xff000000);
453 }
454
455 static inline u64x swap64 (const u64x v)
456 {
457 return ((v >> 56) & 0x00000000000000ff)
458 | ((v >> 40) & 0x000000000000ff00)
459 | ((v >> 24) & 0x0000000000ff0000)
460 | ((v >> 8) & 0x00000000ff000000)
461 | ((v << 8) & 0x000000ff00000000)
462 | ((v << 24) & 0x0000ff0000000000)
463 | ((v << 40) & 0x00ff000000000000)
464 | ((v << 56) & 0xff00000000000000);
465 }
466
467 static inline u32x rotr32 (const u32x a, const u32 n)
468 {
469 return rotate (a, 32 - n);
470 }
471
472 static inline u32x rotl32 (const u32x a, const u32 n)
473 {
474 return rotate (a, n);
475 }
476
477 #if CUDA_ARCH >= 350
478 static inline u64x rotr64 (const u64x a, const u32 n)
479 {
480 u64x r;
481
482 u32 il;
483 u32 ir;
484 u32 tl;
485 u32 tr;
486
487 #if VECT_SIZE == 1
488
489 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a));
490
491 if (n >= 32)
492 {
493 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
494 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
495 }
496 else
497 {
498 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
499 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
500 }
501
502 asm ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(tl), "r"(tr));
503
504 #endif
505
506 #if VECT_SIZE >= 2
507
508 {
509 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s0));
510
511 if (n >= 32)
512 {
513 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
514 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
515 }
516 else
517 {
518 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
519 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
520 }
521
522 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s0) : "r"(tl), "r"(tr));
523 }
524
525 {
526 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s1));
527
528 if (n >= 32)
529 {
530 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
531 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
532 }
533 else
534 {
535 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
536 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
537 }
538
539 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s1) : "r"(tl), "r"(tr));
540 }
541
542 #endif
543
544 #if VECT_SIZE >= 4
545
546 {
547 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s2));
548
549 if (n >= 32)
550 {
551 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
552 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
553 }
554 else
555 {
556 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
557 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
558 }
559
560 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s2) : "r"(tl), "r"(tr));
561 }
562
563 {
564 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s3));
565
566 if (n >= 32)
567 {
568 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
569 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
570 }
571 else
572 {
573 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
574 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
575 }
576
577 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s3) : "r"(tl), "r"(tr));
578 }
579
580 #endif
581
582 #if VECT_SIZE >= 8
583
584 {
585 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s4));
586
587 if (n >= 32)
588 {
589 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
590 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
591 }
592 else
593 {
594 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
595 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
596 }
597
598 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s4) : "r"(tl), "r"(tr));
599 }
600
601 {
602 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s5));
603
604 if (n >= 32)
605 {
606 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
607 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
608 }
609 else
610 {
611 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
612 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
613 }
614
615 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s5) : "r"(tl), "r"(tr));
616 }
617
618 {
619 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s6));
620
621 if (n >= 32)
622 {
623 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
624 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
625 }
626 else
627 {
628 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
629 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
630 }
631
632 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s6) : "r"(tl), "r"(tr));
633 }
634
635 {
636 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s7));
637
638 if (n >= 32)
639 {
640 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
641 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
642 }
643 else
644 {
645 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
646 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
647 }
648
649 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s7) : "r"(tl), "r"(tr));
650 }
651
652 #endif
653
654 #if VECT_SIZE >= 16
655
656 {
657 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s8));
658
659 if (n >= 32)
660 {
661 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
662 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
663 }
664 else
665 {
666 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
667 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
668 }
669
670 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s8) : "r"(tl), "r"(tr));
671 }
672
673 {
674 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s9));
675
676 if (n >= 32)
677 {
678 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
679 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
680 }
681 else
682 {
683 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
684 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
685 }
686
687 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s9) : "r"(tl), "r"(tr));
688 }
689
690 {
691 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sa));
692
693 if (n >= 32)
694 {
695 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
696 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
697 }
698 else
699 {
700 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
701 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
702 }
703
704 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sa) : "r"(tl), "r"(tr));
705 }
706
707 {
708 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sb));
709
710 if (n >= 32)
711 {
712 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
713 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
714 }
715 else
716 {
717 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
718 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
719 }
720
721 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sb) : "r"(tl), "r"(tr));
722 }
723
724 {
725 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sc));
726
727 if (n >= 32)
728 {
729 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
730 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
731 }
732 else
733 {
734 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
735 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
736 }
737
738 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sc) : "r"(tl), "r"(tr));
739 }
740
741 {
742 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sd));
743
744 if (n >= 32)
745 {
746 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
747 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
748 }
749 else
750 {
751 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
752 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
753 }
754
755 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sd) : "r"(tl), "r"(tr));
756 }
757
758 {
759 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.se));
760
761 if (n >= 32)
762 {
763 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
764 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
765 }
766 else
767 {
768 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
769 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
770 }
771
772 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.se) : "r"(tl), "r"(tr));
773 }
774
775 {
776 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sf));
777
778 if (n >= 32)
779 {
780 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
781 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
782 }
783 else
784 {
785 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
786 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
787 }
788
789 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sf) : "r"(tl), "r"(tr));
790 }
791
792 #endif
793
794 return r;
795 }
796 #else
797 static inline u64x rotr64 (const u64x a, const u32 n)
798 {
799 return rotate (a, (u64) 64 - n);
800 }
801 #endif
802
803 static inline u64x rotl64 (const u64x a, const u32 n)
804 {
805 return rotr64 (a, (u64) 64 - n);
806 }
807
808 static inline u32x __byte_perm (const u32x a, const u32x b, const u32x c)
809 {
810 u32x r;
811
812 #if VECT_SIZE == 1
813 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c) );
814 #endif
815
816 #if VECT_SIZE >= 2
817 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s0) : "r"(a.s0), "r"(b.s0), "r"(c.s0));
818 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s1) : "r"(a.s1), "r"(b.s1), "r"(c.s1));
819 #endif
820
821 #if VECT_SIZE >= 4
822 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s2) : "r"(a.s2), "r"(b.s2), "r"(c.s2));
823 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s3) : "r"(a.s3), "r"(b.s3), "r"(c.s3));
824 #endif
825
826 #if VECT_SIZE >= 8
827 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s4) : "r"(a.s4), "r"(b.s4), "r"(c.s4));
828 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s5) : "r"(a.s5), "r"(b.s5), "r"(c.s5));
829 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s6) : "r"(a.s6), "r"(b.s6), "r"(c.s6));
830 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s7) : "r"(a.s7), "r"(b.s7), "r"(c.s7));
831 #endif
832
833 #if VECT_SIZE >= 16
834 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s8) : "r"(a.s8), "r"(b.s8), "r"(c.s8));
835 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s9) : "r"(a.s9), "r"(b.s9), "r"(c.s9));
836 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sa) : "r"(a.sa), "r"(b.sa), "r"(c.sa));
837 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sb) : "r"(a.sb), "r"(b.sb), "r"(c.sb));
838 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sc) : "r"(a.sc), "r"(b.sc), "r"(c.sc));
839 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sd) : "r"(a.sd), "r"(b.sd), "r"(c.sd));
840 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.se) : "r"(a.se), "r"(b.se), "r"(c.se));
841 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sf) : "r"(a.sf), "r"(b.sf), "r"(c.sf));
842 #endif
843
844 return r;
845 }
846
847 static inline u32 __bfe (const u32 a, const u32 b, const u32 c)
848 {
849 u32 r;
850
851 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c));
852
853 return r;
854 }
855
856 #if CUDA_ARCH >= 350
857 static inline u32 amd_bytealign (const u32 a, const u32 b, const u32 c)
858 {
859 u32 r;
860
861 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(b), "r"(a), "r"((c & 3) * 8));
862
863 return r;
864 }
865 #else
866 static inline u32 amd_bytealign (const u32 a, const u32 b, const u32 c)
867 {
868 return __byte_perm_S (b, a, (0x76543210 >> ((c & 3) * 4)) & 0xffff);
869 }
870 #endif
871
872 #if CUDA_ARCH >= 500
873 static inline u32x lut3_2d (const u32x a, const u32x b, const u32x c)
874 {
875 u32x r;
876
877 #if VECT_SIZE == 1
878 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
879 #endif
880
881 #if VECT_SIZE >= 2
882 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
883 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
884 #endif
885
886 #if VECT_SIZE >= 4
887 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
888 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
889 #endif
890
891 #if VECT_SIZE >= 8
892 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
893 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
894 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
895 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
896 #endif
897
898 #if VECT_SIZE >= 16
899 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
900 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
901 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
902 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
903 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
904 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
905 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
906 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
907 #endif
908
909 return r;
910 }
911
912 static inline u32x lut3_39 (const u32x a, const u32x b, const u32x c)
913 {
914 u32x r;
915
916 #if VECT_SIZE == 1
917 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
918 #endif
919
920 #if VECT_SIZE >= 2
921 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
922 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
923 #endif
924
925 #if VECT_SIZE >= 4
926 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
927 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
928 #endif
929
930 #if VECT_SIZE >= 8
931 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
932 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
933 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
934 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
935 #endif
936
937 #if VECT_SIZE >= 16
938 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
939 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
940 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
941 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
942 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
943 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
944 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
945 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
946 #endif
947
948 return r;
949 }
950
951 static inline u32x lut3_59 (const u32x a, const u32x b, const u32x c)
952 {
953 u32x r;
954
955 #if VECT_SIZE == 1
956 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
957 #endif
958
959 #if VECT_SIZE >= 2
960 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
961 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
962 #endif
963
964 #if VECT_SIZE >= 4
965 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
966 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
967 #endif
968
969 #if VECT_SIZE >= 8
970 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
971 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
972 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
973 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
974 #endif
975
976 #if VECT_SIZE >= 16
977 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
978 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
979 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
980 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
981 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
982 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
983 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
984 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
985 #endif
986
987 return r;
988 }
989
990 static inline u32x lut3_96 (const u32x a, const u32x b, const u32x c)
991 {
992 u32x r;
993
994 #if VECT_SIZE == 1
995 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
996 #endif
997
998 #if VECT_SIZE >= 2
999 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
1000 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
1001 #endif
1002
1003 #if VECT_SIZE >= 4
1004 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
1005 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
1006 #endif
1007
1008 #if VECT_SIZE >= 8
1009 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
1010 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
1011 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
1012 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
1013 #endif
1014
1015 #if VECT_SIZE >= 16
1016 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
1017 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
1018 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
1019 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
1020 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
1021 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
1022 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
1023 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
1024 #endif
1025
1026 return r;
1027 }
1028
1029 static inline u32x lut3_e4 (const u32x a, const u32x b, const u32x c)
1030 {
1031 u32x r;
1032
1033 #if VECT_SIZE == 1
1034 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
1035 #endif
1036
1037 #if VECT_SIZE >= 2
1038 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
1039 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
1040 #endif
1041
1042 #if VECT_SIZE >= 4
1043 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
1044 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
1045 #endif
1046
1047 #if VECT_SIZE >= 8
1048 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
1049 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
1050 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
1051 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
1052 #endif
1053
1054 #if VECT_SIZE >= 16
1055 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
1056 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
1057 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
1058 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
1059 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
1060 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
1061 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
1062 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
1063 #endif
1064
1065 return r;
1066 }
1067
1068 static inline u32x lut3_e8 (const u32x a, const u32x b, const u32x c)
1069 {
1070 u32x r;
1071
1072 #if VECT_SIZE == 1
1073 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
1074 #endif
1075
1076 #if VECT_SIZE >= 2
1077 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
1078 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
1079 #endif
1080
1081 #if VECT_SIZE >= 4
1082 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
1083 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
1084 #endif
1085
1086 #if VECT_SIZE >= 8
1087 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
1088 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
1089 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
1090 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
1091 #endif
1092
1093 #if VECT_SIZE >= 16
1094 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
1095 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
1096 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
1097 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
1098 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
1099 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
1100 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
1101 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
1102 #endif
1103
1104 return r;
1105 }
1106
1107 static inline u32x lut3_ca (const u32x a, const u32x b, const u32x c)
1108 {
1109 u32x r;
1110
1111 #if VECT_SIZE == 1
1112 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
1113 #endif
1114
1115 #if VECT_SIZE >= 2
1116 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
1117 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
1118 #endif
1119
1120 #if VECT_SIZE >= 4
1121 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
1122 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
1123 #endif
1124
1125 #if VECT_SIZE >= 8
1126 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
1127 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
1128 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
1129 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
1130 #endif
1131
1132 #if VECT_SIZE >= 16
1133 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
1134 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
1135 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
1136 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
1137 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
1138 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
1139 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
1140 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
1141 #endif
1142
1143 return r;
1144 }
1145
1146 #endif
1147 #endif
1148
1149 #ifdef IS_GENERIC
1150 static inline u32 swap32_S (const u32 v)
1151 {
1152 return (as_uint (as_uchar4 (v).s3210));
1153 }
1154
1155 static inline u64 swap64_S (const u64 v)
1156 {
1157 return (as_ulong (as_uchar8 (v).s76543210));
1158 }
1159
1160 static inline u32 rotr32_S (const u32 a, const u32 n)
1161 {
1162 return rotate (a, 32 - n);
1163 }
1164
1165 static inline u32 rotl32_S (const u32 a, const u32 n)
1166 {
1167 return rotate (a, n);
1168 }
1169
1170 static inline u64 rotr64_S (const u64 a, const u32 n)
1171 {
1172 return rotate (a, (u64) 64 - n);
1173 }
1174
1175 static inline u64 rotl64_S (const u64 a, const u32 n)
1176 {
1177 return rotate (a, (u64) n);
1178 }
1179
1180 static inline u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c)
1181 {
1182 const u64 tmp = ((((u64) a) << 32) | ((u64) b)) >> ((c & 3) * 8);
1183
1184 return (u32) (tmp);
1185 }
1186
1187 static inline u32x swap32 (const u32x v)
1188 {
1189 return ((v >> 24) & 0x000000ff)
1190 | ((v >> 8) & 0x0000ff00)
1191 | ((v << 8) & 0x00ff0000)
1192 | ((v << 24) & 0xff000000);
1193 }
1194
1195 static inline u64x swap64 (const u64x v)
1196 {
1197 return ((v >> 56) & 0x00000000000000ff)
1198 | ((v >> 40) & 0x000000000000ff00)
1199 | ((v >> 24) & 0x0000000000ff0000)
1200 | ((v >> 8) & 0x00000000ff000000)
1201 | ((v << 8) & 0x000000ff00000000)
1202 | ((v << 24) & 0x0000ff0000000000)
1203 | ((v << 40) & 0x00ff000000000000)
1204 | ((v << 56) & 0xff00000000000000);
1205 }
1206
1207 static inline u32x rotr32 (const u32x a, const u32 n)
1208 {
1209 return rotate (a, 32 - n);
1210 }
1211
1212 static inline u32x rotl32 (const u32x a, const u32 n)
1213 {
1214 return rotate (a, n);
1215 }
1216
1217 static inline u64x rotr64 (const u64x a, const u32 n)
1218 {
1219 return rotate (a, (u64) 64 - n);
1220 }
1221
1222 static inline u64x rotl64 (const u64x a, const u32 n)
1223 {
1224 return rotate (a, (u64) n);
1225 }
1226
1227 static inline u32 __bfe (const u32 a, const u32 b, const u32 c)
1228 {
1229 #define BIT(x) (1 << (x))
1230 #define BIT_MASK(x) (BIT (x) - 1)
1231 #define BFE(x,y,z) (((x) >> (y)) & BIT_MASK (z))
1232
1233 return BFE (a, b, c);
1234 }
1235
1236 static inline u32x amd_bytealign (const u32x a, const u32x b, const u32 c)
1237 {
1238 #if VECT_SIZE == 1
1239 const u64x tmp = ((((u64x) (a)) << 32) | ((u64x) (b))) >> ((c & 3) * 8);
1240
1241 return (u32x) (tmp);
1242 #endif
1243
1244 #if VECT_SIZE == 2
1245 const u64x tmp = ((((u64x) (a.s0, a.s1)) << 32) | ((u64x) (b.s0, b.s1))) >> ((c & 3) * 8);
1246
1247 return (u32x) (tmp.s0, tmp.s1);
1248 #endif
1249
1250 #if VECT_SIZE == 4
1251 const u64x tmp = ((((u64x) (a.s0, a.s1, a.s2, a.s3)) << 32) | ((u64x) (b.s0, b.s1, b.s2, b.s3))) >> ((c & 3) * 8);
1252
1253 return (u32x) (tmp.s0, tmp.s1, tmp.s2, tmp.s3);
1254 #endif
1255
1256 #if VECT_SIZE == 8
1257 const u64x tmp = ((((u64x) (a.s0, a.s1, a.s2, a.s3, a.s4, a.s5, a.s6, a.s7)) << 32) | ((u64x) (b.s0, b.s1, b.s2, b.s3, b.s4, b.s5, b.s6, b.s7))) >> ((c & 3) * 8);
1258
1259 return (u32x) (tmp.s0, tmp.s1, tmp.s2, tmp.s3, tmp.s4, tmp.s5, tmp.s6, tmp.s7);
1260 #endif
1261
1262 #if VECT_SIZE == 16
1263 const u64x tmp = ((((u64x) (a.s0, a.s1, a.s2, a.s3, a.s4, a.s5, a.s6, a.s7, a.s8, a.s9, a.sa, a.sb, a.sc, a.sd, a.se, a.sf)) << 32) | ((u64x) (b.s0, b.s1, b.s2, b.s3, b.s4, b.s5, b.s6, b.s7, b.s8, b.s9, b.sa, b.sb, b.sc, b.sd, b.se, b.sf))) >> ((c & 3) * 8);
1264
1265 return (u32x) (tmp.s0, tmp.s1, tmp.s2, tmp.s3, tmp.s4, tmp.s5, tmp.s6, tmp.s7, tmp.s8, tmp.s9, tmp.sa, tmp.sb, tmp.sc, tmp.sd, tmp.se, tmp.sf);
1266 #endif
1267 }
1268 #endif
1269
1270 typedef struct
1271 {
1272 #if defined _DES_
1273 u32 digest_buf[4];
1274 #elif defined _MD4_
1275 u32 digest_buf[4];
1276 #elif defined _MD5_
1277 u32 digest_buf[4];
1278 #elif defined _MD5H_
1279 u32 digest_buf[4];
1280 #elif defined _SHA1_
1281 u32 digest_buf[5];
1282 #elif defined _BCRYPT_
1283 u32 digest_buf[6];
1284 #elif defined _SHA256_
1285 u32 digest_buf[8];
1286 #elif defined _SHA384_
1287 u32 digest_buf[16];
1288 #elif defined _SHA512_
1289 u32 digest_buf[16];
1290 #elif defined _KECCAK_
1291 u32 digest_buf[50];
1292 #elif defined _RIPEMD160_
1293 u32 digest_buf[5];
1294 #elif defined _WHIRLPOOL_
1295 u32 digest_buf[16];
1296 #elif defined _GOST_
1297 u32 digest_buf[8];
1298 #elif defined _GOST2012_256_
1299 u32 digest_buf[8];
1300 #elif defined _GOST2012_512_
1301 u32 digest_buf[16];
1302 #elif defined _SAPB_
1303 u32 digest_buf[4];
1304 #elif defined _SAPG_
1305 u32 digest_buf[5];
1306 #elif defined _MYSQL323_
1307 u32 digest_buf[4];
1308 #elif defined _LOTUS5_
1309 u32 digest_buf[4];
1310 #elif defined _LOTUS6_
1311 u32 digest_buf[4];
1312 #elif defined _SCRYPT_
1313 u32 digest_buf[8];
1314 #elif defined _LOTUS8_
1315 u32 digest_buf[4];
1316 #elif defined _OFFICE2007_
1317 u32 digest_buf[4];
1318 #elif defined _OFFICE2010_
1319 u32 digest_buf[4];
1320 #elif defined _OFFICE2013_
1321 u32 digest_buf[4];
1322 #elif defined _OLDOFFICE01_
1323 u32 digest_buf[4];
1324 #elif defined _OLDOFFICE34_
1325 u32 digest_buf[4];
1326 #elif defined _SIPHASH_
1327 u32 digest_buf[4];
1328 #elif defined _PBKDF2_MD5_
1329 u32 digest_buf[32];
1330 #elif defined _PBKDF2_SHA1_
1331 u32 digest_buf[32];
1332 #elif defined _PBKDF2_SHA256_
1333 u32 digest_buf[32];
1334 #elif defined _PBKDF2_SHA512_
1335 u32 digest_buf[32];
1336 #elif defined _PDF17L8_
1337 u32 digest_buf[8];
1338 #elif defined _CRC32_
1339 u32 digest_buf[4];
1340 #elif defined _SEVEN_ZIP_
1341 u32 digest_buf[4];
1342 #elif defined _ANDROIDFDE_
1343 u32 digest_buf[4];
1344 #elif defined _DCC2_
1345 u32 digest_buf[4];
1346 #elif defined _WPA_
1347 u32 digest_buf[4];
1348 #elif defined _MD5_SHA1_
1349 u32 digest_buf[4];
1350 #elif defined _SHA1_MD5_
1351 u32 digest_buf[5];
1352 #elif defined _NETNTLMV2_
1353 u32 digest_buf[4];
1354 #elif defined _KRB5PA_
1355 u32 digest_buf[4];
1356 #elif defined _CLOUDKEY_
1357 u32 digest_buf[8];
1358 #elif defined _SCRYPT_
1359 u32 digest_buf[4];
1360 #elif defined _PSAFE2_
1361 u32 digest_buf[5];
1362 #elif defined _LOTUS8_
1363 u32 digest_buf[4];
1364 #elif defined _RAR3_
1365 u32 digest_buf[4];
1366 #elif defined _SHA256_SHA1_
1367 u32 digest_buf[8];
1368 #elif defined _MS_DRSR_
1369 u32 digest_buf[8];
1370 #elif defined _ANDROIDFDE_SAMSUNG_
1371 u32 digest_buf[8];
1372 #elif defined _RAR5_
1373 u32 digest_buf[4];
1374 #elif defined _KRB5TGS_
1375 u32 digest_buf[4];
1376 #elif defined _AXCRYPT_
1377 u32 digest_buf[4];
1378 #elif defined _KEEPASS_
1379 u32 digest_buf[4];
1380 #endif
1381
1382 } digest_t;
1383
1384 typedef struct
1385 {
1386 u32 salt_buf[16];
1387 u32 salt_buf_pc[8];
1388
1389 u32 salt_len;
1390 u32 salt_iter;
1391 u32 salt_sign[2];
1392
1393 u32 keccak_mdlen;
1394 u32 truecrypt_mdlen;
1395
1396 u32 digests_cnt;
1397 u32 digests_done;
1398
1399 u32 digests_offset;
1400
1401 u32 scrypt_N;
1402 u32 scrypt_r;
1403 u32 scrypt_p;
1404 u32 scrypt_tmto;
1405 u32 scrypt_phy;
1406
1407 } salt_t;
1408
1409 typedef struct
1410 {
1411 int V;
1412 int R;
1413 int P;
1414
1415 int enc_md;
1416
1417 u32 id_buf[8];
1418 u32 u_buf[32];
1419 u32 o_buf[32];
1420
1421 int id_len;
1422 int o_len;
1423 int u_len;
1424
1425 u32 rc4key[2];
1426 u32 rc4data[2];
1427
1428 } pdf_t;
1429
1430 typedef struct
1431 {
1432 u32 pke[25];
1433 u32 eapol[64];
1434 int eapol_size;
1435 int keyver;
1436
1437 } wpa_t;
1438
1439 typedef struct
1440 {
1441 u32 cry_master_buf[64];
1442 u32 ckey_buf[64];
1443 u32 public_key_buf[64];
1444
1445 u32 cry_master_len;
1446 u32 ckey_len;
1447 u32 public_key_len;
1448
1449 } bitcoin_wallet_t;
1450
1451 typedef struct
1452 {
1453 u32 salt_buf[30];
1454 u32 salt_len;
1455
1456 u32 esalt_buf[38];
1457 u32 esalt_len;
1458
1459 } sip_t;
1460
1461 typedef struct
1462 {
1463 u32 data[384];
1464
1465 } androidfde_t;
1466
1467 typedef struct
1468 {
1469 u32 nr_buf[16];
1470 u32 nr_len;
1471
1472 u32 msg_buf[128];
1473 u32 msg_len;
1474
1475 } ikepsk_t;
1476
1477 typedef struct
1478 {
1479 u32 user_len;
1480 u32 domain_len;
1481 u32 srvchall_len;
1482 u32 clichall_len;
1483
1484 u32 userdomain_buf[64];
1485 u32 chall_buf[256];
1486
1487 } netntlm_t;
1488
1489 typedef struct
1490 {
1491 u32 user[16];
1492 u32 realm[16];
1493 u32 salt[32];
1494 u32 timestamp[16];
1495 u32 checksum[4];
1496
1497 } krb5pa_t;
1498
1499 typedef struct
1500 {
1501 u32 account_info[512];
1502 u32 checksum[4];
1503 u32 edata2[2560];
1504 u32 edata2_len;
1505
1506 } krb5tgs_t;
1507
1508 typedef struct
1509 {
1510 u32 salt_buf[16];
1511 u32 data_buf[112];
1512 u32 keyfile_buf[16];
1513
1514 } tc_t;
1515
1516 typedef struct
1517 {
1518 u32 salt_buf[16];
1519
1520 } pbkdf2_md5_t;
1521
1522 typedef struct
1523 {
1524 u32 salt_buf[16];
1525
1526 } pbkdf2_sha1_t;
1527
1528 typedef struct
1529 {
1530 u32 salt_buf[16];
1531
1532 } pbkdf2_sha256_t;
1533
1534 typedef struct
1535 {
1536 u32 salt_buf[32];
1537
1538 } pbkdf2_sha512_t;
1539
1540 typedef struct
1541 {
1542 u32 salt_buf[128];
1543 u32 salt_len;
1544
1545 } rakp_t;
1546
1547 typedef struct
1548 {
1549 u32 data_len;
1550 u32 data_buf[512];
1551
1552 } cloudkey_t;
1553
1554 typedef struct
1555 {
1556 u32 encryptedVerifier[4];
1557 u32 encryptedVerifierHash[5];
1558
1559 u32 keySize;
1560
1561 } office2007_t;
1562
1563 typedef struct
1564 {
1565 u32 encryptedVerifier[4];
1566 u32 encryptedVerifierHash[8];
1567
1568 } office2010_t;
1569
1570 typedef struct
1571 {
1572 u32 encryptedVerifier[4];
1573 u32 encryptedVerifierHash[8];
1574
1575 } office2013_t;
1576
1577 typedef struct
1578 {
1579 u32 version;
1580 u32 encryptedVerifier[4];
1581 u32 encryptedVerifierHash[4];
1582 u32 rc4key[2];
1583
1584 } oldoffice01_t;
1585
1586 typedef struct
1587 {
1588 u32 version;
1589 u32 encryptedVerifier[4];
1590 u32 encryptedVerifierHash[5];
1591 u32 rc4key[2];
1592
1593 } oldoffice34_t;
1594
1595 typedef struct
1596 {
1597 u32 version;
1598 u32 algorithm;
1599
1600 u32 final_random_seed[8];
1601 u32 transf_random_seed[8];
1602 u32 enc_iv[4];
1603 u32 contents_hash[8];
1604
1605 /* specific to version 1 */
1606 u32 contents_len;
1607 u32 contents[12500];
1608
1609 /* specific to version 2 */
1610 u32 expected_bytes[8];
1611
1612 } keepass_t;
1613
1614 typedef struct
1615 {
1616 u32 digest[4];
1617 u32 out[4];
1618
1619 } pdf14_tmp_t;
1620
1621 typedef struct
1622 {
1623 union
1624 {
1625 u32 dgst32[16];
1626 u64 dgst64[8];
1627 };
1628
1629 u32 dgst_len;
1630 u32 W_len;
1631
1632 } pdf17l8_tmp_t;
1633
1634 typedef struct
1635 {
1636 u32 digest_buf[4];
1637
1638 } phpass_tmp_t;
1639
1640 typedef struct
1641 {
1642 u32 digest_buf[4];
1643
1644 } md5crypt_tmp_t;
1645
1646 typedef struct
1647 {
1648 u32 alt_result[8];
1649
1650 u32 p_bytes[4];
1651 u32 s_bytes[4];
1652
1653 } sha256crypt_tmp_t;
1654
1655 typedef struct
1656 {
1657 u64 l_alt_result[8];
1658
1659 u64 l_p_bytes[2];
1660 u64 l_s_bytes[2];
1661
1662 } sha512crypt_tmp_t;
1663
1664 typedef struct
1665 {
1666 u32 ipad[5];
1667 u32 opad[5];
1668
1669 u32 dgst[10];
1670 u32 out[10];
1671
1672 } wpa_tmp_t;
1673
1674 typedef struct
1675 {
1676 u64 dgst[8];
1677
1678 } bitcoin_wallet_tmp_t;
1679
1680 typedef struct
1681 {
1682 u32 ipad[5];
1683 u32 opad[5];
1684
1685 u32 dgst[5];
1686 u32 out[4];
1687
1688 } dcc2_tmp_t;
1689
1690 typedef struct
1691 {
1692 u32 E[18];
1693
1694 u32 P[18];
1695
1696 u32 S0[256];
1697 u32 S1[256];
1698 u32 S2[256];
1699 u32 S3[256];
1700
1701 } bcrypt_tmp_t;
1702
1703 typedef struct
1704 {
1705 u32 digest[2];
1706
1707 u32 P[18];
1708
1709 u32 S0[256];
1710 u32 S1[256];
1711 u32 S2[256];
1712 u32 S3[256];
1713
1714 } pwsafe2_tmp_t;
1715
1716 typedef struct
1717 {
1718 u32 digest_buf[8];
1719
1720 } pwsafe3_tmp_t;
1721
1722 typedef struct
1723 {
1724 u32 digest_buf[5];
1725
1726 } androidpin_tmp_t;
1727
1728 typedef struct
1729 {
1730 u32 ipad[5];
1731 u32 opad[5];
1732
1733 u32 dgst[10];
1734 u32 out[10];
1735
1736 } androidfde_tmp_t;
1737
1738 typedef struct
1739 {
1740 u32 ipad[16];
1741 u32 opad[16];
1742
1743 u32 dgst[64];
1744 u32 out[64];
1745
1746 } tc_tmp_t;
1747
1748 typedef struct
1749 {
1750 u64 ipad[8];
1751 u64 opad[8];
1752
1753 u64 dgst[32];
1754 u64 out[32];
1755
1756 } tc64_tmp_t;
1757
1758 typedef struct
1759 {
1760 u32 ipad[4];
1761 u32 opad[4];
1762
1763 u32 dgst[32];
1764 u32 out[32];
1765
1766 } pbkdf2_md5_tmp_t;
1767
1768 typedef struct
1769 {
1770 u32 ipad[5];
1771 u32 opad[5];
1772
1773 u32 dgst[32];
1774 u32 out[32];
1775
1776 } pbkdf2_sha1_tmp_t;
1777
1778 typedef struct
1779 {
1780 u32 ipad[8];
1781 u32 opad[8];
1782
1783 u32 dgst[32];
1784 u32 out[32];
1785
1786 } pbkdf2_sha256_tmp_t;
1787
1788 typedef struct
1789 {
1790 u64 ipad[8];
1791 u64 opad[8];
1792
1793 u64 dgst[16];
1794 u64 out[16];
1795
1796 } pbkdf2_sha512_tmp_t;
1797
1798 typedef struct
1799 {
1800 u64 out[8];
1801
1802 } ecryptfs_tmp_t;
1803
1804 typedef struct
1805 {
1806 u64 ipad[8];
1807 u64 opad[8];
1808
1809 u64 dgst[16];
1810 u64 out[16];
1811
1812 } oraclet_tmp_t;
1813
1814 typedef struct
1815 {
1816 u32 ipad[5];
1817 u32 opad[5];
1818
1819 u32 dgst[5];
1820 u32 out[5];
1821
1822 } agilekey_tmp_t;
1823
1824 typedef struct
1825 {
1826 u32 ipad[5];
1827 u32 opad[5];
1828
1829 u32 dgst1[5];
1830 u32 out1[5];
1831
1832 u32 dgst2[5];
1833 u32 out2[5];
1834
1835 } mywallet_tmp_t;
1836
1837 typedef struct
1838 {
1839 u32 ipad[5];
1840 u32 opad[5];
1841
1842 u32 dgst[5];
1843 u32 out[5];
1844
1845 } sha1aix_tmp_t;
1846
1847 typedef struct
1848 {
1849 u32 ipad[8];
1850 u32 opad[8];
1851
1852 u32 dgst[8];
1853 u32 out[8];
1854
1855 } sha256aix_tmp_t;
1856
1857 typedef struct
1858 {
1859 u64 ipad[8];
1860 u64 opad[8];
1861
1862 u64 dgst[8];
1863 u64 out[8];
1864
1865 } sha512aix_tmp_t;
1866
1867 typedef struct
1868 {
1869 u32 ipad[8];
1870 u32 opad[8];
1871
1872 u32 dgst[8];
1873 u32 out[8];
1874
1875 } lastpass_tmp_t;
1876
1877 typedef struct
1878 {
1879 u64 digest_buf[8];
1880
1881 } drupal7_tmp_t;
1882
1883 typedef struct
1884 {
1885 u32 ipad[5];
1886 u32 opad[5];
1887
1888 u32 dgst[5];
1889 u32 out[5];
1890
1891 } lotus8_tmp_t;
1892
1893 typedef struct
1894 {
1895 u32 out[5];
1896
1897 } office2007_tmp_t;
1898
1899 typedef struct
1900 {
1901 u32 out[5];
1902
1903 } office2010_tmp_t;
1904
1905 typedef struct
1906 {
1907 u64 out[8];
1908
1909 } office2013_tmp_t;
1910
1911 typedef struct
1912 {
1913 u32 digest_buf[5];
1914
1915 } saph_sha1_tmp_t;
1916
1917 typedef struct
1918 {
1919 u32 block[16];
1920
1921 u32 dgst[8];
1922
1923 u32 block_len;
1924 u32 final_len;
1925
1926 } seven_zip_tmp_t;
1927
1928 typedef struct
1929 {
1930 u32 KEK[5];
1931
1932 u32 lsb[4];
1933 u32 cipher[4];
1934
1935 } axcrypt_tmp_t;
1936
1937 typedef struct
1938 {
1939 u32 tmp_digest[8];
1940
1941 } keepass_tmp_t;
1942
1943 typedef struct
1944 {
1945 u32 Kc[16];
1946 u32 Kd[16];
1947
1948 u32 iv[2];
1949
1950 } bsdicrypt_tmp_t;
1951
1952 typedef struct
1953 {
1954 u32 dgst[17][5];
1955
1956 } rar3_tmp_t;
1957
1958 typedef struct
1959 {
1960 u32 user[16];
1961
1962 } cram_md5_t;
1963
1964 typedef struct
1965 {
1966 u32 iv_buf[4];
1967 u32 iv_len;
1968
1969 u32 salt_buf[4];
1970 u32 salt_len;
1971
1972 u32 crc;
1973
1974 u32 data_buf[96];
1975 u32 data_len;
1976
1977 u32 unpack_size;
1978
1979 } seven_zip_t;
1980
1981 typedef struct
1982 {
1983 u32 key;
1984 u64 val;
1985
1986 } hcstat_table_t;
1987
1988 typedef struct
1989 {
1990 u32 cs_buf[0x100];
1991 u32 cs_len;
1992
1993 } cs_t;
1994
1995 typedef struct
1996 {
1997 u32 cmds[0x100];
1998
1999 } kernel_rule_t;
2000
2001 typedef struct
2002 {
2003 u32 gidvid;
2004 u32 il_pos;
2005
2006 } plain_t;
2007
2008 typedef struct
2009 {
2010 u32 i[16];
2011
2012 u32 pw_len;
2013
2014 u32 alignment_placeholder_1;
2015 u32 alignment_placeholder_2;
2016 u32 alignment_placeholder_3;
2017
2018 } pw_t;
2019
2020 typedef struct
2021 {
2022 u32 i;
2023
2024 } bf_t;
2025
2026 typedef struct
2027 {
2028 u32 i[8];
2029
2030 u32 pw_len;
2031
2032 } comb_t;
2033
2034 typedef struct
2035 {
2036 u32 b[32];
2037
2038 } bs_word_t;
2039
2040 typedef struct
2041 {
2042 uint4 P[64];
2043
2044 } scrypt_tmp_t;