Cleanup -m 44xx kernels to latest standard
[hashcat.git] / OpenCL / types_ocl.c
1 /**
2 * Authors.....: Jens Steube <jens.steube@gmail.com>
3 * magnum <john.magnum@hushmail.com>
4 *
5 * License.....: MIT
6 */
7
8 #define DEVICE_TYPE_CPU 2
9 #define DEVICE_TYPE_GPU 4
10
11 typedef uchar u8;
12 typedef ushort u16;
13 typedef uint u32;
14 typedef ulong u64;
15
16 #ifndef NEW_SIMD_CODE
17 #undef VECT_SIZE
18 #define VECT_SIZE 1
19 #endif
20
21 #define CONCAT(a, b) a##b
22 #define VTYPE(type, width) CONCAT(type, width)
23
24 #if VECT_SIZE == 1
25 typedef uchar u8x;
26 typedef ushort u16x;
27 typedef uint u32x;
28 typedef ulong u64x;
29 #else
30 typedef VTYPE(uchar, VECT_SIZE) u8x;
31 typedef VTYPE(ushort, VECT_SIZE) u16x;
32 typedef VTYPE(uint, VECT_SIZE) u32x;
33 typedef VTYPE(ulong, VECT_SIZE) u64x;
34 #endif
35
36 // this one needs to die
37 #define allx(r) r
38
39 static inline u32 l32_from_64_S (u64 a)
40 {
41 const u32 r = (u32) (a);
42
43 return r;
44 }
45
46 static inline u32 h32_from_64_S (u64 a)
47 {
48 a >>= 32;
49
50 const u32 r = (u32) (a);
51
52 return r;
53 }
54
55 static inline u64 hl32_to_64_S (const u32 a, const u32 b)
56 {
57 return as_ulong ((uint2) (b, a));
58 }
59
60 static inline u32x l32_from_64 (u64x a)
61 {
62 u32x r;
63
64 #if VECT_SIZE == 1
65 r = (u32) a;
66 #endif
67
68 #if VECT_SIZE >= 2
69 r.s0 = (u32) a.s0;
70 r.s1 = (u32) a.s1;
71 #endif
72
73 #if VECT_SIZE >= 4
74 r.s2 = (u32) a.s2;
75 r.s3 = (u32) a.s3;
76 #endif
77
78 #if VECT_SIZE >= 8
79 r.s4 = (u32) a.s4;
80 r.s5 = (u32) a.s5;
81 r.s6 = (u32) a.s6;
82 r.s7 = (u32) a.s7;
83 #endif
84
85 #if VECT_SIZE >= 16
86 r.s8 = (u32) a.s8;
87 r.s9 = (u32) a.s9;
88 r.sa = (u32) a.sa;
89 r.sb = (u32) a.sb;
90 r.sc = (u32) a.sc;
91 r.sd = (u32) a.sd;
92 r.se = (u32) a.se;
93 r.sf = (u32) a.sf;
94 #endif
95
96 return r;
97 }
98
99 static inline u32x h32_from_64 (u64x a)
100 {
101 a >>= 32;
102
103 u32x r;
104
105 #if VECT_SIZE == 1
106 r = (u32) a;
107 #endif
108
109 #if VECT_SIZE >= 2
110 r.s0 = (u32) a.s0;
111 r.s1 = (u32) a.s1;
112 #endif
113
114 #if VECT_SIZE >= 4
115 r.s2 = (u32) a.s2;
116 r.s3 = (u32) a.s3;
117 #endif
118
119 #if VECT_SIZE >= 8
120 r.s4 = (u32) a.s4;
121 r.s5 = (u32) a.s5;
122 r.s6 = (u32) a.s6;
123 r.s7 = (u32) a.s7;
124 #endif
125
126 #if VECT_SIZE >= 16
127 r.s8 = (u32) a.s8;
128 r.s9 = (u32) a.s9;
129 r.sa = (u32) a.sa;
130 r.sb = (u32) a.sb;
131 r.sc = (u32) a.sc;
132 r.sd = (u32) a.sd;
133 r.se = (u32) a.se;
134 r.sf = (u32) a.sf;
135 #endif
136
137 return r;
138 }
139
140 static inline u64x hl32_to_64 (const u32x a, const u32x b)
141 {
142 u64x r;
143
144 #if VECT_SIZE == 1
145 r = as_ulong ((uint2) (b, a));
146 #endif
147
148 #if VECT_SIZE >= 2
149 r.s0 = as_ulong ((uint2) (b.s0, a.s0));
150 r.s1 = as_ulong ((uint2) (b.s1, a.s1));
151 #endif
152
153 #if VECT_SIZE >= 4
154 r.s2 = as_ulong ((uint2) (b.s2, a.s2));
155 r.s3 = as_ulong ((uint2) (b.s3, a.s3));
156 #endif
157
158 #if VECT_SIZE >= 8
159 r.s4 = as_ulong ((uint2) (b.s4, a.s4));
160 r.s5 = as_ulong ((uint2) (b.s5, a.s5));
161 r.s6 = as_ulong ((uint2) (b.s6, a.s6));
162 r.s7 = as_ulong ((uint2) (b.s7, a.s7));
163 #endif
164
165 #if VECT_SIZE >= 16
166 r.s8 = as_ulong ((uint2) (b.s8, a.s8));
167 r.s9 = as_ulong ((uint2) (b.s9, a.s9));
168 r.sa = as_ulong ((uint2) (b.sa, a.sa));
169 r.sb = as_ulong ((uint2) (b.sb, a.sb));
170 r.sc = as_ulong ((uint2) (b.sc, a.sc));
171 r.sd = as_ulong ((uint2) (b.sd, a.sd));
172 r.se = as_ulong ((uint2) (b.se, a.se));
173 r.sf = as_ulong ((uint2) (b.sf, a.sf));
174 #endif
175
176 return r;
177 }
178
179 #ifdef IS_AMD
180 static inline u32 swap32_S (const u32 v)
181 {
182 return (as_uint (as_uchar4 (v).s3210));
183 }
184
185 static inline u64 swap64_S (const u64 v)
186 {
187 return (as_ulong (as_uchar8 (v).s76543210));
188 }
189
190 static inline u32 rotr32_S (const u32 a, const u32 n)
191 {
192 return rotate (a, 32 - n);
193 }
194
195 static inline u32 rotl32_S (const u32 a, const u32 n)
196 {
197 return rotate (a, n);
198 }
199
200 static inline u64 rotr64_S (const u64 a, const u32 n)
201 {
202 #if DEVICE_TYPE == DEVICE_TYPE_CPU
203
204 const u64 r = rotate (a, (u64) 64 - n);
205
206 #else
207
208 const u32 a0 = h32_from_64_S (a);
209 const u32 a1 = l32_from_64_S (a);
210
211 const u32 t0 = (n >= 32) ? amd_bitalign (a0, a1, n - 32) : amd_bitalign (a1, a0, n);
212 const u32 t1 = (n >= 32) ? amd_bitalign (a1, a0, n - 32) : amd_bitalign (a0, a1, n);
213
214 const u64 r = hl32_to_64_S (t0, t1);
215
216 #endif
217
218 return r;
219 }
220
221 static inline u64 rotl64_S (const u64 a, const u32 n)
222 {
223 return rotr64_S (a, 64 - n);
224 }
225
226 static inline u32x swap32 (const u32x v)
227 {
228 return ((v >> 24) & 0x000000ff)
229 | ((v >> 8) & 0x0000ff00)
230 | ((v << 8) & 0x00ff0000)
231 | ((v << 24) & 0xff000000);
232 }
233
234 static inline u64x swap64 (const u64x v)
235 {
236 return ((v >> 56) & 0x00000000000000ff)
237 | ((v >> 40) & 0x000000000000ff00)
238 | ((v >> 24) & 0x0000000000ff0000)
239 | ((v >> 8) & 0x00000000ff000000)
240 | ((v << 8) & 0x000000ff00000000)
241 | ((v << 24) & 0x0000ff0000000000)
242 | ((v << 40) & 0x00ff000000000000)
243 | ((v << 56) & 0xff00000000000000);
244 }
245
246 static inline u32x rotr32 (const u32x a, const u32 n)
247 {
248 return rotate (a, 32 - n);
249 }
250
251 static inline u32x rotl32 (const u32x a, const u32 n)
252 {
253 return rotate (a, n);
254 }
255
256 static inline u64x rotr64 (const u64x a, const u32 n)
257 {
258 #if DEVICE_TYPE == DEVICE_TYPE_CPU
259
260 const u64x r = rotate (a, (u64) 64 - n);
261
262 #else
263
264 const u32x a0 = h32_from_64 (a);
265 const u32x a1 = l32_from_64 (a);
266
267 const u32x t0 = (n >= 32) ? amd_bitalign (a0, a1, n - 32) : amd_bitalign (a1, a0, n);
268 const u32x t1 = (n >= 32) ? amd_bitalign (a1, a0, n - 32) : amd_bitalign (a0, a1, n);
269
270 const u64x r = hl32_to_64 (t0, t1);
271
272 #endif
273
274 return r;
275 }
276
277 static inline u64x rotl64 (const u64x a, const u32 n)
278 {
279 return rotr64 (a, 64 - n);
280 }
281
282 static inline u32 __bfe (const u32 a, const u32 b, const u32 c)
283 {
284 return amd_bfe (a, b, c);
285 }
286
287 static inline u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c)
288 {
289 return amd_bytealign (a, b, c);
290 }
291 #endif
292
293 #ifdef IS_NV
294 static inline u32 swap32_S (const u32 v)
295 {
296 u32 r;
297
298 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(r) : "r"(v));
299
300 return r;
301 }
302
303 static inline u64 swap64_S (const u64 v)
304 {
305 u32 il;
306 u32 ir;
307
308 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(v));
309
310 u32 tl;
311 u32 tr;
312
313 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tl) : "r"(il));
314 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tr) : "r"(ir));
315
316 u64 r;
317
318 asm ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(tr), "r"(tl));
319
320 return r;
321 }
322
323 static inline u32 rotr32_S (const u32 a, const u32 n)
324 {
325 return rotate (a, 32 - n);
326 }
327
328 static inline u32 rotl32_S (const u32 a, const u32 n)
329 {
330 return rotate (a, n);
331 }
332
333 #if CUDA_ARCH >= 350
334 static inline u64 rotr64_S (const u64 a, const u32 n)
335 {
336 u32 il;
337 u32 ir;
338
339 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a));
340
341 u32 tl;
342 u32 tr;
343
344 if (n >= 32)
345 {
346 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
347 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
348 }
349 else
350 {
351 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
352 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
353 }
354
355 u64 r;
356
357 asm ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(tl), "r"(tr));
358
359 return r;
360 }
361 #else
362 static inline u64 rotr64_S (const u64 a, const u32 n)
363 {
364 return rotate (a, (u64) 64 - n);
365 }
366 #endif
367
368 static inline u64 rotl64_S (const u64 a, const u32 n)
369 {
370 return rotr64_S (a, 64 - n);
371 }
372
373 #if CUDA_ARCH >= 500
374 static inline u32 lut3_2d_S (const u32 a, const u32 b, const u32 c)
375 {
376 u32 r;
377
378 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
379
380 return r;
381 }
382
383 static inline u32 lut3_39_S (const u32 a, const u32 b, const u32 c)
384 {
385 u32 r;
386
387 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
388
389 return r;
390 }
391
392 static inline u32 lut3_59_S (const u32 a, const u32 b, const u32 c)
393 {
394 u32 r;
395
396 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
397
398 return r;
399 }
400
401 static inline u32 lut3_96_S (const u32 a, const u32 b, const u32 c)
402 {
403 u32 r;
404
405 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
406
407 return r;
408 }
409
410 static inline u32 lut3_e4_S (const u32 a, const u32 b, const u32 c)
411 {
412 u32 r;
413
414 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
415
416 return r;
417 }
418
419 static inline u32 lut3_e8_S (const u32 a, const u32 b, const u32 c)
420 {
421 u32 r;
422
423 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
424
425 return r;
426 }
427
428 static inline u32 lut3_ca_S (const u32 a, const u32 b, const u32 c)
429 {
430 u32 r;
431
432 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
433
434 return r;
435 }
436 #endif
437
438 static inline u32 __byte_perm_S (const u32 a, const u32 b, const u32 c)
439 {
440 u32 r;
441
442 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c));
443
444 return r;
445 }
446
447 static inline u32x swap32 (const u32x v)
448 {
449 return ((v >> 24) & 0x000000ff)
450 | ((v >> 8) & 0x0000ff00)
451 | ((v << 8) & 0x00ff0000)
452 | ((v << 24) & 0xff000000);
453 }
454
455 static inline u64x swap64 (const u64x v)
456 {
457 return ((v >> 56) & 0x00000000000000ff)
458 | ((v >> 40) & 0x000000000000ff00)
459 | ((v >> 24) & 0x0000000000ff0000)
460 | ((v >> 8) & 0x00000000ff000000)
461 | ((v << 8) & 0x000000ff00000000)
462 | ((v << 24) & 0x0000ff0000000000)
463 | ((v << 40) & 0x00ff000000000000)
464 | ((v << 56) & 0xff00000000000000);
465 }
466
467 static inline u32x rotr32 (const u32x a, const u32 n)
468 {
469 return rotate (a, 32 - n);
470 }
471
472 static inline u32x rotl32 (const u32x a, const u32 n)
473 {
474 return rotate (a, n);
475 }
476
477 #if CUDA_ARCH >= 350
478 static inline u64x rotr64 (const u64x a, const u32 n)
479 {
480 u64x r;
481
482 u32 il;
483 u32 ir;
484 u32 tl;
485 u32 tr;
486
487 #if VECT_SIZE == 1
488
489 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a));
490
491 if (n >= 32)
492 {
493 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
494 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
495 }
496 else
497 {
498 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
499 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
500 }
501
502 asm ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(tl), "r"(tr));
503
504 #endif
505
506 #if VECT_SIZE >= 2
507
508 {
509 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s0));
510
511 if (n >= 32)
512 {
513 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
514 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
515 }
516 else
517 {
518 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
519 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
520 }
521
522 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s0) : "r"(tl), "r"(tr));
523 }
524
525 {
526 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s1));
527
528 if (n >= 32)
529 {
530 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
531 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
532 }
533 else
534 {
535 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
536 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
537 }
538
539 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s1) : "r"(tl), "r"(tr));
540 }
541
542 #endif
543
544 #if VECT_SIZE >= 4
545
546 {
547 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s2));
548
549 if (n >= 32)
550 {
551 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
552 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
553 }
554 else
555 {
556 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
557 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
558 }
559
560 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s2) : "r"(tl), "r"(tr));
561 }
562
563 {
564 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s3));
565
566 if (n >= 32)
567 {
568 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
569 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
570 }
571 else
572 {
573 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
574 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
575 }
576
577 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s3) : "r"(tl), "r"(tr));
578 }
579
580 #endif
581
582 #if VECT_SIZE >= 8
583
584 {
585 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s4));
586
587 if (n >= 32)
588 {
589 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
590 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
591 }
592 else
593 {
594 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
595 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
596 }
597
598 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s4) : "r"(tl), "r"(tr));
599 }
600
601 {
602 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s5));
603
604 if (n >= 32)
605 {
606 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
607 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
608 }
609 else
610 {
611 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
612 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
613 }
614
615 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s5) : "r"(tl), "r"(tr));
616 }
617
618 {
619 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s6));
620
621 if (n >= 32)
622 {
623 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
624 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
625 }
626 else
627 {
628 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
629 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
630 }
631
632 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s6) : "r"(tl), "r"(tr));
633 }
634
635 {
636 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s7));
637
638 if (n >= 32)
639 {
640 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
641 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
642 }
643 else
644 {
645 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
646 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
647 }
648
649 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s7) : "r"(tl), "r"(tr));
650 }
651
652 #endif
653
654 #if VECT_SIZE >= 16
655
656 {
657 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s8));
658
659 if (n >= 32)
660 {
661 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
662 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
663 }
664 else
665 {
666 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
667 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
668 }
669
670 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s8) : "r"(tl), "r"(tr));
671 }
672
673 {
674 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s9));
675
676 if (n >= 32)
677 {
678 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
679 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
680 }
681 else
682 {
683 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
684 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
685 }
686
687 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s9) : "r"(tl), "r"(tr));
688 }
689
690 {
691 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sa));
692
693 if (n >= 32)
694 {
695 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
696 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
697 }
698 else
699 {
700 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
701 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
702 }
703
704 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sa) : "r"(tl), "r"(tr));
705 }
706
707 {
708 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sb));
709
710 if (n >= 32)
711 {
712 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
713 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
714 }
715 else
716 {
717 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
718 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
719 }
720
721 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sb) : "r"(tl), "r"(tr));
722 }
723
724 {
725 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sc));
726
727 if (n >= 32)
728 {
729 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
730 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
731 }
732 else
733 {
734 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
735 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
736 }
737
738 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sc) : "r"(tl), "r"(tr));
739 }
740
741 {
742 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sd));
743
744 if (n >= 32)
745 {
746 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
747 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
748 }
749 else
750 {
751 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
752 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
753 }
754
755 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sd) : "r"(tl), "r"(tr));
756 }
757
758 {
759 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.se));
760
761 if (n >= 32)
762 {
763 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
764 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
765 }
766 else
767 {
768 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
769 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
770 }
771
772 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.se) : "r"(tl), "r"(tr));
773 }
774
775 {
776 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sf));
777
778 if (n >= 32)
779 {
780 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
781 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
782 }
783 else
784 {
785 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
786 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
787 }
788
789 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sf) : "r"(tl), "r"(tr));
790 }
791
792 #endif
793
794 return r;
795 }
796 #else
797 static inline u64x rotr64 (const u64x a, const u32 n)
798 {
799 return rotate (a, (u64) 64 - n);
800 }
801 #endif
802
803 static inline u64x rotl64 (const u64x a, const u32 n)
804 {
805 return rotr64 (a, (u64) 64 - n);
806 }
807
808 static inline u32x __byte_perm (const u32x a, const u32x b, const u32x c)
809 {
810 u32x r;
811
812 #if VECT_SIZE == 1
813 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c) );
814 #endif
815
816 #if VECT_SIZE >= 2
817 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s0) : "r"(a.s0), "r"(b.s0), "r"(c.s0));
818 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s1) : "r"(a.s1), "r"(b.s1), "r"(c.s1));
819 #endif
820
821 #if VECT_SIZE >= 4
822 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s2) : "r"(a.s2), "r"(b.s2), "r"(c.s2));
823 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s3) : "r"(a.s3), "r"(b.s3), "r"(c.s3));
824 #endif
825
826 #if VECT_SIZE >= 8
827 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s4) : "r"(a.s4), "r"(b.s4), "r"(c.s4));
828 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s5) : "r"(a.s5), "r"(b.s5), "r"(c.s5));
829 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s6) : "r"(a.s6), "r"(b.s6), "r"(c.s6));
830 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s7) : "r"(a.s7), "r"(b.s7), "r"(c.s7));
831 #endif
832
833 #if VECT_SIZE >= 16
834 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s8) : "r"(a.s8), "r"(b.s8), "r"(c.s8));
835 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s9) : "r"(a.s9), "r"(b.s9), "r"(c.s9));
836 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sa) : "r"(a.sa), "r"(b.sa), "r"(c.sa));
837 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sb) : "r"(a.sb), "r"(b.sb), "r"(c.sb));
838 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sc) : "r"(a.sc), "r"(b.sc), "r"(c.sc));
839 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sd) : "r"(a.sd), "r"(b.sd), "r"(c.sd));
840 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.se) : "r"(a.se), "r"(b.se), "r"(c.se));
841 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sf) : "r"(a.sf), "r"(b.sf), "r"(c.sf));
842 #endif
843
844 return r;
845 }
846
847 static inline u32 __bfe (const u32 a, const u32 b, const u32 c)
848 {
849 u32 r;
850
851 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c));
852
853 return r;
854 }
855
856 #if CUDA_ARCH >= 350
857 static inline u32 amd_bytealign (const u32 a, const u32 b, const u32 c)
858 {
859 u32 r;
860
861 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(b), "r"(a), "r"((c & 3) * 8));
862
863 return r;
864 }
865 #else
866 static inline u32 amd_bytealign (const u32 a, const u32 b, const u32 c)
867 {
868 return __byte_perm_S (b, a, (0x76543210 >> ((c & 3) * 4)) & 0xffff);
869 }
870 #endif
871
872 #if CUDA_ARCH >= 500
873 static inline u32x lut3_2d (const u32x a, const u32x b, const u32x c)
874 {
875 u32x r;
876
877 #if VECT_SIZE == 1
878 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
879 #endif
880
881 #if VECT_SIZE >= 2
882 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
883 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
884 #endif
885
886 #if VECT_SIZE >= 4
887 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
888 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
889 #endif
890
891 #if VECT_SIZE >= 8
892 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
893 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
894 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
895 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
896 #endif
897
898 #if VECT_SIZE >= 16
899 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
900 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
901 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
902 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
903 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
904 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
905 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
906 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
907 #endif
908
909 return r;
910 }
911
912 static inline u32x lut3_39 (const u32x a, const u32x b, const u32x c)
913 {
914 u32x r;
915
916 #if VECT_SIZE == 1
917 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
918 #endif
919
920 #if VECT_SIZE >= 2
921 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
922 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
923 #endif
924
925 #if VECT_SIZE >= 4
926 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
927 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
928 #endif
929
930 #if VECT_SIZE >= 8
931 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
932 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
933 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
934 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
935 #endif
936
937 #if VECT_SIZE >= 16
938 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
939 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
940 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
941 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
942 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
943 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
944 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
945 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
946 #endif
947
948 return r;
949 }
950
951 static inline u32x lut3_59 (const u32x a, const u32x b, const u32x c)
952 {
953 u32x r;
954
955 #if VECT_SIZE == 1
956 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
957 #endif
958
959 #if VECT_SIZE >= 2
960 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
961 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
962 #endif
963
964 #if VECT_SIZE >= 4
965 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
966 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
967 #endif
968
969 #if VECT_SIZE >= 8
970 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
971 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
972 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
973 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
974 #endif
975
976 #if VECT_SIZE >= 16
977 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
978 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
979 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
980 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
981 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
982 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
983 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
984 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
985 #endif
986
987 return r;
988 }
989
990 static inline u32x lut3_96 (const u32x a, const u32x b, const u32x c)
991 {
992 u32x r;
993
994 #if VECT_SIZE == 1
995 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
996 #endif
997
998 #if VECT_SIZE >= 2
999 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
1000 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
1001 #endif
1002
1003 #if VECT_SIZE >= 4
1004 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
1005 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
1006 #endif
1007
1008 #if VECT_SIZE >= 8
1009 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
1010 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
1011 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
1012 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
1013 #endif
1014
1015 #if VECT_SIZE >= 16
1016 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
1017 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
1018 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
1019 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
1020 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
1021 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
1022 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
1023 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
1024 #endif
1025
1026 return r;
1027 }
1028
1029 static inline u32x lut3_e4 (const u32x a, const u32x b, const u32x c)
1030 {
1031 u32x r;
1032
1033 #if VECT_SIZE == 1
1034 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
1035 #endif
1036
1037 #if VECT_SIZE >= 2
1038 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
1039 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
1040 #endif
1041
1042 #if VECT_SIZE >= 4
1043 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
1044 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
1045 #endif
1046
1047 #if VECT_SIZE >= 8
1048 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
1049 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
1050 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
1051 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
1052 #endif
1053
1054 #if VECT_SIZE >= 16
1055 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
1056 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
1057 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
1058 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
1059 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
1060 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
1061 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
1062 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
1063 #endif
1064
1065 return r;
1066 }
1067
1068 static inline u32x lut3_e8 (const u32x a, const u32x b, const u32x c)
1069 {
1070 u32x r;
1071
1072 #if VECT_SIZE == 1
1073 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
1074 #endif
1075
1076 #if VECT_SIZE >= 2
1077 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
1078 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
1079 #endif
1080
1081 #if VECT_SIZE >= 4
1082 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
1083 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
1084 #endif
1085
1086 #if VECT_SIZE >= 8
1087 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
1088 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
1089 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
1090 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
1091 #endif
1092
1093 #if VECT_SIZE >= 16
1094 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
1095 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
1096 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
1097 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
1098 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
1099 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
1100 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
1101 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
1102 #endif
1103
1104 return r;
1105 }
1106
1107 static inline u32x lut3_ca (const u32x a, const u32x b, const u32x c)
1108 {
1109 u32x r;
1110
1111 #if VECT_SIZE == 1
1112 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
1113 #endif
1114
1115 #if VECT_SIZE >= 2
1116 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
1117 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
1118 #endif
1119
1120 #if VECT_SIZE >= 4
1121 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
1122 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
1123 #endif
1124
1125 #if VECT_SIZE >= 8
1126 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
1127 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
1128 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
1129 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
1130 #endif
1131
1132 #if VECT_SIZE >= 16
1133 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
1134 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
1135 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
1136 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
1137 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
1138 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
1139 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
1140 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
1141 #endif
1142
1143 return r;
1144 }
1145
1146 #endif
1147 #endif
1148
1149 #ifdef IS_GENERIC
1150 static inline u32 swap32_S (const u32 v)
1151 {
1152 return (as_uint (as_uchar4 (v).s3210));
1153 }
1154
1155 static inline u64 swap64_S (const u64 v)
1156 {
1157 return (as_ulong (as_uchar8 (v).s76543210));
1158 }
1159
1160 static inline u32 rotr32_S (const u32 a, const u32 n)
1161 {
1162 return rotate (a, 32 - n);
1163 }
1164
1165 static inline u32 rotl32_S (const u32 a, const u32 n)
1166 {
1167 return rotate (a, n);
1168 }
1169
1170 static inline u64 rotr64_S (const u64 a, const u32 n)
1171 {
1172 return rotate (a, (u64) 64 - n);
1173 }
1174
1175 static inline u64 rotl64_S (const u64 a, const u32 n)
1176 {
1177 return rotate (a, (u64) n);
1178 }
1179
1180 static inline u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c)
1181 {
1182 const u64 tmp = ((((u64) a) << 32) | ((u64) b)) >> ((c & 3) * 8);
1183
1184 return (u32) (tmp);
1185 }
1186
1187 static inline u32x swap32 (const u32x v)
1188 {
1189 return ((v >> 24) & 0x000000ff)
1190 | ((v >> 8) & 0x0000ff00)
1191 | ((v << 8) & 0x00ff0000)
1192 | ((v << 24) & 0xff000000);
1193 }
1194
1195 static inline u64x swap64 (const u64x v)
1196 {
1197 return ((v >> 56) & 0x00000000000000ff)
1198 | ((v >> 40) & 0x000000000000ff00)
1199 | ((v >> 24) & 0x0000000000ff0000)
1200 | ((v >> 8) & 0x00000000ff000000)
1201 | ((v << 8) & 0x000000ff00000000)
1202 | ((v << 24) & 0x0000ff0000000000)
1203 | ((v << 40) & 0x00ff000000000000)
1204 | ((v << 56) & 0xff00000000000000);
1205 }
1206
1207 static inline u32x rotr32 (const u32x a, const u32 n)
1208 {
1209 return rotate (a, 32 - n);
1210 }
1211
1212 static inline u32x rotl32 (const u32x a, const u32 n)
1213 {
1214 return rotate (a, n);
1215 }
1216
1217 static inline u64x rotr64 (const u64x a, const u32 n)
1218 {
1219 return rotate (a, (u64) 64 - n);
1220 }
1221
1222 static inline u64x rotl64 (const u64x a, const u32 n)
1223 {
1224 return rotate (a, (u64) n);
1225 }
1226
1227 static inline u32 __bfe (const u32 a, const u32 b, const u32 c)
1228 {
1229 #define BIT(x) (1 << (x))
1230 #define BIT_MASK(x) (BIT (x) - 1)
1231 #define BFE(x,y,z) (((x) >> (y)) & BIT_MASK (z))
1232
1233 return BFE (a, b, c);
1234 }
1235
1236 static inline u32x amd_bytealign (const u32x a, const u32x b, const u32 c)
1237 {
1238 #if VECT_SIZE == 1
1239 const u64x tmp = ((((u64x) (a)) << 32) | ((u64x) (b))) >> ((c & 3) * 8);
1240
1241 return (u32x) (tmp);
1242 #endif
1243
1244 #if VECT_SIZE == 2
1245 const u64x tmp = ((((u64x) (a.s0, a.s1)) << 32) | ((u64x) (b.s0, b.s1))) >> ((c & 3) * 8);
1246
1247 return (u32x) (tmp.s0, tmp.s1);
1248 #endif
1249
1250 #if VECT_SIZE == 4
1251 const u64x tmp = ((((u64x) (a.s0, a.s1, a.s2, a.s3)) << 32) | ((u64x) (b.s0, b.s1, b.s2, b.s3))) >> ((c & 3) * 8);
1252
1253 return (u32x) (tmp.s0, tmp.s1, tmp.s2, tmp.s3);
1254 #endif
1255
1256 #if VECT_SIZE == 8
1257 const u64x tmp = ((((u64x) (a.s0, a.s1, a.s2, a.s3, a.s4, a.s5, a.s6, a.s7)) << 32) | ((u64x) (b.s0, b.s1, b.s2, b.s3, b.s4, b.s5, b.s6, b.s7))) >> ((c & 3) * 8);
1258
1259 return (u32x) (tmp.s0, tmp.s1, tmp.s2, tmp.s3, tmp.s4, tmp.s5, tmp.s6, tmp.s7);
1260 #endif
1261
1262 #if VECT_SIZE == 16
1263 const u64x tmp = ((((u64x) (a.s0, a.s1, a.s2, a.s3, a.s4, a.s5, a.s6, a.s7, a.s8, a.s9, a.sa, a.sb, a.sc, a.sd, a.se, a.sf)) << 32) | ((u64x) (b.s0, b.s1, b.s2, b.s3, b.s4, b.s5, b.s6, b.s7, b.s8, b.s9, b.sa, b.sb, b.sc, b.sd, b.se, b.sf))) >> ((c & 3) * 8);
1264
1265 return (u32x) (tmp.s0, tmp.s1, tmp.s2, tmp.s3, tmp.s4, tmp.s5, tmp.s6, tmp.s7, tmp.s8, tmp.s9, tmp.sa, tmp.sb, tmp.sc, tmp.sd, tmp.se, tmp.sf);
1266 #endif
1267 }
1268 #endif
1269
1270 typedef struct
1271 {
1272 #if defined _DES_
1273 u32 digest_buf[4];
1274 #elif defined _MD4_
1275 u32 digest_buf[4];
1276 #elif defined _MD5_
1277 u32 digest_buf[4];
1278 #elif defined _MD5H_
1279 u32 digest_buf[4];
1280 #elif defined _SHA1_
1281 u32 digest_buf[5];
1282 #elif defined _BCRYPT_
1283 u32 digest_buf[6];
1284 #elif defined _SHA256_
1285 u32 digest_buf[8];
1286 #elif defined _SHA384_
1287 u32 digest_buf[16];
1288 #elif defined _SHA512_
1289 u32 digest_buf[16];
1290 #elif defined _KECCAK_
1291 u32 digest_buf[50];
1292 #elif defined _RIPEMD160_
1293 u32 digest_buf[5];
1294 #elif defined _WHIRLPOOL_
1295 u32 digest_buf[16];
1296 #elif defined _GOST_
1297 u32 digest_buf[8];
1298 #elif defined _GOST2012_256_
1299 u32 digest_buf[8];
1300 #elif defined _GOST2012_512_
1301 u32 digest_buf[16];
1302 #elif defined _SAPB_
1303 u32 digest_buf[4];
1304 #elif defined _SAPG_
1305 u32 digest_buf[5];
1306 #elif defined _MYSQL323_
1307 u32 digest_buf[4];
1308 #elif defined _LOTUS5_
1309 u32 digest_buf[4];
1310 #elif defined _LOTUS6_
1311 u32 digest_buf[4];
1312 #elif defined _SCRYPT_
1313 u32 digest_buf[8];
1314 #elif defined _LOTUS8_
1315 u32 digest_buf[4];
1316 #elif defined _OFFICE2007_
1317 u32 digest_buf[4];
1318 #elif defined _OFFICE2010_
1319 u32 digest_buf[4];
1320 #elif defined _OFFICE2013_
1321 u32 digest_buf[4];
1322 #elif defined _OLDOFFICE01_
1323 u32 digest_buf[4];
1324 #elif defined _OLDOFFICE34_
1325 u32 digest_buf[4];
1326 #elif defined _SIPHASH_
1327 u32 digest_buf[4];
1328 #elif defined _PBKDF2_MD5_
1329 u32 digest_buf[32];
1330 #elif defined _PBKDF2_SHA1_
1331 u32 digest_buf[32];
1332 #elif defined _PBKDF2_SHA256_
1333 u32 digest_buf[32];
1334 #elif defined _PBKDF2_SHA512_
1335 u32 digest_buf[32];
1336 #elif defined _PDF17L8_
1337 u32 digest_buf[8];
1338 #elif defined _CRC32_
1339 u32 digest_buf[4];
1340 #elif defined _SEVEN_ZIP_
1341 u32 digest_buf[4];
1342 #elif defined _ANDROIDFDE_
1343 u32 digest_buf[4];
1344 #elif defined _DCC2_
1345 u32 digest_buf[4];
1346 #elif defined _WPA_
1347 u32 digest_buf[4];
1348 #elif defined _MD5_SHA1_
1349 u32 digest_buf[4];
1350 #elif defined _SHA1_MD5_
1351 u32 digest_buf[5];
1352 #elif defined _NETNTLMV2_
1353 u32 digest_buf[4];
1354 #elif defined _KRB5PA_
1355 u32 digest_buf[4];
1356 #elif defined _CLOUDKEY_
1357 u32 digest_buf[8];
1358 #elif defined _SCRYPT_
1359 u32 digest_buf[4];
1360 #elif defined _PSAFE2_
1361 u32 digest_buf[5];
1362 #elif defined _LOTUS8_
1363 u32 digest_buf[4];
1364 #elif defined _RAR3_
1365 u32 digest_buf[4];
1366 #elif defined _SHA256_SHA1_
1367 u32 digest_buf[8];
1368 #elif defined _MS_DRSR_
1369 u32 digest_buf[8];
1370 #elif defined _ANDROIDFDE_SAMSUNG_
1371 u32 digest_buf[8];
1372 #elif defined _RAR5_
1373 u32 digest_buf[4];
1374 #elif defined _KRB5TGS_
1375 u32 digest_buf[4];
1376 #elif defined _AXCRYPT_
1377 u32 digest_buf[4];
1378 #elif defined _KEEPASS_
1379 u32 digest_buf[4];
1380 #endif
1381
1382 } digest_t;
1383
1384 typedef struct
1385 {
1386 u32 salt_buf[16];
1387 u32 salt_buf_pc[8];
1388
1389 u32 salt_len;
1390 u32 salt_iter;
1391 u32 salt_sign[2];
1392
1393 u32 keccak_mdlen;
1394 u32 truecrypt_mdlen;
1395
1396 u32 digests_cnt;
1397 u32 digests_done;
1398
1399 u32 digests_offset;
1400
1401 u32 scrypt_N;
1402 u32 scrypt_r;
1403 u32 scrypt_p;
1404 u32 scrypt_tmto;
1405 u32 scrypt_phy;
1406
1407 } salt_t;
1408
1409 typedef struct
1410 {
1411 int V;
1412 int R;
1413 int P;
1414
1415 int enc_md;
1416
1417 u32 id_buf[8];
1418 u32 u_buf[32];
1419 u32 o_buf[32];
1420
1421 int id_len;
1422 int o_len;
1423 int u_len;
1424
1425 u32 rc4key[2];
1426 u32 rc4data[2];
1427
1428 } pdf_t;
1429
1430 typedef struct
1431 {
1432 u32 pke[25];
1433 u32 eapol[64];
1434 int eapol_size;
1435 int keyver;
1436
1437 } wpa_t;
1438
1439 typedef struct
1440 {
1441 u32 cry_master_buf[64];
1442 u32 ckey_buf[64];
1443 u32 public_key_buf[64];
1444
1445 u32 cry_master_len;
1446 u32 ckey_len;
1447 u32 public_key_len;
1448
1449 } bitcoin_wallet_t;
1450
1451 typedef struct
1452 {
1453 u32 salt_buf[30];
1454 u32 salt_len;
1455
1456 u32 esalt_buf[38];
1457 u32 esalt_len;
1458
1459 } sip_t;
1460
1461 typedef struct
1462 {
1463 u32 data[384];
1464
1465 } androidfde_t;
1466
1467 typedef struct
1468 {
1469 u32 nr_buf[16];
1470 u32 nr_len;
1471
1472 u32 msg_buf[128];
1473 u32 msg_len;
1474
1475 } ikepsk_t;
1476
1477 typedef struct
1478 {
1479 u32 user_len;
1480 u32 domain_len;
1481 u32 srvchall_len;
1482 u32 clichall_len;
1483
1484 u32 userdomain_buf[64];
1485 u32 chall_buf[256];
1486
1487 } netntlm_t;
1488
1489 typedef struct
1490 {
1491 u32 user[16];
1492 u32 realm[16];
1493 u32 salt[32];
1494 u32 timestamp[16];
1495 u32 checksum[4];
1496
1497 } krb5pa_t;
1498
1499 typedef struct
1500 {
1501 u32 account_info[512];
1502 u32 checksum[4];
1503 u32 edata2[2560];
1504 u32 edata2_len;
1505
1506 } krb5tgs_t;
1507
1508 typedef struct
1509 {
1510 u32 salt_buf[16];
1511 u32 data_buf[112];
1512 u32 keyfile_buf[16];
1513
1514 } tc_t;
1515
1516 typedef struct
1517 {
1518 u32 salt_buf[16];
1519
1520 } pbkdf2_md5_t;
1521
1522 typedef struct
1523 {
1524 u32 salt_buf[16];
1525
1526 } pbkdf2_sha1_t;
1527
1528 typedef struct
1529 {
1530 u32 salt_buf[16];
1531
1532 } pbkdf2_sha256_t;
1533
1534 typedef struct
1535 {
1536 u32 salt_buf[32];
1537
1538 } pbkdf2_sha512_t;
1539
1540 typedef struct
1541 {
1542 u32 salt_buf[128];
1543 u32 salt_len;
1544
1545 } rakp_t;
1546
1547 typedef struct
1548 {
1549 u32 data_len;
1550 u32 data_buf[512];
1551
1552 } cloudkey_t;
1553
1554 typedef struct
1555 {
1556 u32 encryptedVerifier[4];
1557 u32 encryptedVerifierHash[5];
1558
1559 u32 keySize;
1560
1561 } office2007_t;
1562
1563 typedef struct
1564 {
1565 u32 encryptedVerifier[4];
1566 u32 encryptedVerifierHash[8];
1567
1568 } office2010_t;
1569
1570 typedef struct
1571 {
1572 u32 encryptedVerifier[4];
1573 u32 encryptedVerifierHash[8];
1574
1575 } office2013_t;
1576
1577 typedef struct
1578 {
1579 u32 version;
1580 u32 encryptedVerifier[4];
1581 u32 encryptedVerifierHash[4];
1582 u32 rc4key[2];
1583
1584 } oldoffice01_t;
1585
1586 typedef struct
1587 {
1588 u32 version;
1589 u32 encryptedVerifier[4];
1590 u32 encryptedVerifierHash[5];
1591 u32 rc4key[2];
1592
1593 } oldoffice34_t;
1594
1595 typedef struct
1596 {
1597 u32 version;
1598 u32 algorithm;
1599
1600 /* key-file handling */
1601 u32 keyfile_len;
1602 u32 keyfile[8];
1603
1604 u32 final_random_seed[8];
1605 u32 transf_random_seed[8];
1606 u32 enc_iv[4];
1607 u32 contents_hash[8];
1608
1609 /* specific to version 1 */
1610 u32 contents_len;
1611 u32 contents[12500];
1612
1613 /* specific to version 2 */
1614 u32 expected_bytes[8];
1615
1616 } keepass_t;
1617
1618 typedef struct
1619 {
1620 u32 digest[4];
1621 u32 out[4];
1622
1623 } pdf14_tmp_t;
1624
1625 typedef struct
1626 {
1627 union
1628 {
1629 u32 dgst32[16];
1630 u64 dgst64[8];
1631 };
1632
1633 u32 dgst_len;
1634 u32 W_len;
1635
1636 } pdf17l8_tmp_t;
1637
1638 typedef struct
1639 {
1640 u32 digest_buf[4];
1641
1642 } phpass_tmp_t;
1643
1644 typedef struct
1645 {
1646 u32 digest_buf[4];
1647
1648 } md5crypt_tmp_t;
1649
1650 typedef struct
1651 {
1652 u32 alt_result[8];
1653
1654 u32 p_bytes[4];
1655 u32 s_bytes[4];
1656
1657 } sha256crypt_tmp_t;
1658
1659 typedef struct
1660 {
1661 u64 l_alt_result[8];
1662
1663 u64 l_p_bytes[2];
1664 u64 l_s_bytes[2];
1665
1666 } sha512crypt_tmp_t;
1667
1668 typedef struct
1669 {
1670 u32 ipad[5];
1671 u32 opad[5];
1672
1673 u32 dgst[10];
1674 u32 out[10];
1675
1676 } wpa_tmp_t;
1677
1678 typedef struct
1679 {
1680 u64 dgst[8];
1681
1682 } bitcoin_wallet_tmp_t;
1683
1684 typedef struct
1685 {
1686 u32 ipad[5];
1687 u32 opad[5];
1688
1689 u32 dgst[5];
1690 u32 out[4];
1691
1692 } dcc2_tmp_t;
1693
1694 typedef struct
1695 {
1696 u32 E[18];
1697
1698 u32 P[18];
1699
1700 u32 S0[256];
1701 u32 S1[256];
1702 u32 S2[256];
1703 u32 S3[256];
1704
1705 } bcrypt_tmp_t;
1706
1707 typedef struct
1708 {
1709 u32 digest[2];
1710
1711 u32 P[18];
1712
1713 u32 S0[256];
1714 u32 S1[256];
1715 u32 S2[256];
1716 u32 S3[256];
1717
1718 } pwsafe2_tmp_t;
1719
1720 typedef struct
1721 {
1722 u32 digest_buf[8];
1723
1724 } pwsafe3_tmp_t;
1725
1726 typedef struct
1727 {
1728 u32 digest_buf[5];
1729
1730 } androidpin_tmp_t;
1731
1732 typedef struct
1733 {
1734 u32 ipad[5];
1735 u32 opad[5];
1736
1737 u32 dgst[10];
1738 u32 out[10];
1739
1740 } androidfde_tmp_t;
1741
1742 typedef struct
1743 {
1744 u32 ipad[16];
1745 u32 opad[16];
1746
1747 u32 dgst[64];
1748 u32 out[64];
1749
1750 } tc_tmp_t;
1751
1752 typedef struct
1753 {
1754 u64 ipad[8];
1755 u64 opad[8];
1756
1757 u64 dgst[32];
1758 u64 out[32];
1759
1760 } tc64_tmp_t;
1761
1762 typedef struct
1763 {
1764 u32 ipad[4];
1765 u32 opad[4];
1766
1767 u32 dgst[32];
1768 u32 out[32];
1769
1770 } pbkdf2_md5_tmp_t;
1771
1772 typedef struct
1773 {
1774 u32 ipad[5];
1775 u32 opad[5];
1776
1777 u32 dgst[32];
1778 u32 out[32];
1779
1780 } pbkdf2_sha1_tmp_t;
1781
1782 typedef struct
1783 {
1784 u32 ipad[8];
1785 u32 opad[8];
1786
1787 u32 dgst[32];
1788 u32 out[32];
1789
1790 } pbkdf2_sha256_tmp_t;
1791
1792 typedef struct
1793 {
1794 u64 ipad[8];
1795 u64 opad[8];
1796
1797 u64 dgst[16];
1798 u64 out[16];
1799
1800 } pbkdf2_sha512_tmp_t;
1801
1802 typedef struct
1803 {
1804 u64 out[8];
1805
1806 } ecryptfs_tmp_t;
1807
1808 typedef struct
1809 {
1810 u64 ipad[8];
1811 u64 opad[8];
1812
1813 u64 dgst[16];
1814 u64 out[16];
1815
1816 } oraclet_tmp_t;
1817
1818 typedef struct
1819 {
1820 u32 ipad[5];
1821 u32 opad[5];
1822
1823 u32 dgst[5];
1824 u32 out[5];
1825
1826 } agilekey_tmp_t;
1827
1828 typedef struct
1829 {
1830 u32 ipad[5];
1831 u32 opad[5];
1832
1833 u32 dgst1[5];
1834 u32 out1[5];
1835
1836 u32 dgst2[5];
1837 u32 out2[5];
1838
1839 } mywallet_tmp_t;
1840
1841 typedef struct
1842 {
1843 u32 ipad[5];
1844 u32 opad[5];
1845
1846 u32 dgst[5];
1847 u32 out[5];
1848
1849 } sha1aix_tmp_t;
1850
1851 typedef struct
1852 {
1853 u32 ipad[8];
1854 u32 opad[8];
1855
1856 u32 dgst[8];
1857 u32 out[8];
1858
1859 } sha256aix_tmp_t;
1860
1861 typedef struct
1862 {
1863 u64 ipad[8];
1864 u64 opad[8];
1865
1866 u64 dgst[8];
1867 u64 out[8];
1868
1869 } sha512aix_tmp_t;
1870
1871 typedef struct
1872 {
1873 u32 ipad[8];
1874 u32 opad[8];
1875
1876 u32 dgst[8];
1877 u32 out[8];
1878
1879 } lastpass_tmp_t;
1880
1881 typedef struct
1882 {
1883 u64 digest_buf[8];
1884
1885 } drupal7_tmp_t;
1886
1887 typedef struct
1888 {
1889 u32 ipad[5];
1890 u32 opad[5];
1891
1892 u32 dgst[5];
1893 u32 out[5];
1894
1895 } lotus8_tmp_t;
1896
1897 typedef struct
1898 {
1899 u32 out[5];
1900
1901 } office2007_tmp_t;
1902
1903 typedef struct
1904 {
1905 u32 out[5];
1906
1907 } office2010_tmp_t;
1908
1909 typedef struct
1910 {
1911 u64 out[8];
1912
1913 } office2013_tmp_t;
1914
1915 typedef struct
1916 {
1917 u32 digest_buf[5];
1918
1919 } saph_sha1_tmp_t;
1920
1921 typedef struct
1922 {
1923 u32 block[16];
1924
1925 u32 dgst[8];
1926
1927 u32 block_len;
1928 u32 final_len;
1929
1930 } seven_zip_tmp_t;
1931
1932 typedef struct
1933 {
1934 u32 KEK[5];
1935
1936 u32 lsb[4];
1937 u32 cipher[4];
1938
1939 } axcrypt_tmp_t;
1940
1941 typedef struct
1942 {
1943 u32 tmp_digest[8];
1944
1945 } keepass_tmp_t;
1946
1947 typedef struct
1948 {
1949 u32 Kc[16];
1950 u32 Kd[16];
1951
1952 u32 iv[2];
1953
1954 } bsdicrypt_tmp_t;
1955
1956 typedef struct
1957 {
1958 u32 dgst[17][5];
1959
1960 } rar3_tmp_t;
1961
1962 typedef struct
1963 {
1964 u32 user[16];
1965
1966 } cram_md5_t;
1967
1968 typedef struct
1969 {
1970 u32 iv_buf[4];
1971 u32 iv_len;
1972
1973 u32 salt_buf[4];
1974 u32 salt_len;
1975
1976 u32 crc;
1977
1978 u32 data_buf[96];
1979 u32 data_len;
1980
1981 u32 unpack_size;
1982
1983 } seven_zip_t;
1984
1985 typedef struct
1986 {
1987 u32 key;
1988 u64 val;
1989
1990 } hcstat_table_t;
1991
1992 typedef struct
1993 {
1994 u32 cs_buf[0x100];
1995 u32 cs_len;
1996
1997 } cs_t;
1998
1999 typedef struct
2000 {
2001 u32 cmds[0x100];
2002
2003 } kernel_rule_t;
2004
2005 typedef struct
2006 {
2007 u32 gidvid;
2008 u32 il_pos;
2009
2010 } plain_t;
2011
2012 typedef struct
2013 {
2014 u32 i[16];
2015
2016 u32 pw_len;
2017
2018 u32 alignment_placeholder_1;
2019 u32 alignment_placeholder_2;
2020 u32 alignment_placeholder_3;
2021
2022 } pw_t;
2023
2024 typedef struct
2025 {
2026 u32 i;
2027
2028 } bf_t;
2029
2030 typedef struct
2031 {
2032 u32 i[8];
2033
2034 u32 pw_len;
2035
2036 } comb_t;
2037
2038 typedef struct
2039 {
2040 u32 b[32];
2041
2042 } bs_word_t;
2043
2044 typedef struct
2045 {
2046 uint4 P[64];
2047
2048 } scrypt_tmp_t;