Get rid of old pw_cache mechanism to control host-based vector data-types
[hashcat.git] / OpenCL / types_ocl.c
1 /**
2 * Authors.....: Jens Steube <jens.steube@gmail.com>
3 * magnum <john.magnum@hushmail.com>
4 *
5 * License.....: MIT
6 */
7
8 #define DEVICE_TYPE_CPU 2
9 #define DEVICE_TYPE_GPU 4
10
11 typedef uchar u8;
12 typedef ushort u16;
13 typedef uint u32;
14 typedef ulong u64;
15
16 #ifndef NEW_SIMD_CODE
17 #undef VECT_SIZE
18 #define VECT_SIZE 1
19 #endif
20
21 #define CONCAT(a, b) a##b
22 #define VTYPE(type, width) CONCAT(type, width)
23
24 #if VECT_SIZE == 1
25 typedef uchar u8x;
26 typedef ushort u16x;
27 typedef uint u32x;
28 typedef ulong u64x;
29 #else
30 typedef VTYPE(uchar, VECT_SIZE) u8x;
31 typedef VTYPE(ushort, VECT_SIZE) u16x;
32 typedef VTYPE(uint, VECT_SIZE) u32x;
33 typedef VTYPE(ulong, VECT_SIZE) u64x;
34 #endif
35
36 // this one needs to die
37 #define allx(r) r
38
39 static inline u32 l32_from_64_S (u64 a)
40 {
41 const u32 r = (u32) (a);
42
43 return r;
44 }
45
46 static inline u32 h32_from_64_S (u64 a)
47 {
48 a >>= 32;
49
50 const u32 r = (u32) (a);
51
52 return r;
53 }
54
55 static inline u64 hl32_to_64_S (const u32 a, const u32 b)
56 {
57 return as_ulong ((uint2) (b, a));
58 }
59
60 static inline u32x l32_from_64 (u64x a)
61 {
62 u32x r;
63
64 #if VECT_SIZE == 1
65 r = (u32) a;
66 #endif
67
68 #if VECT_SIZE >= 2
69 r.s0 = (u32) a.s0;
70 r.s1 = (u32) a.s1;
71 #endif
72
73 #if VECT_SIZE >= 4
74 r.s2 = (u32) a.s2;
75 r.s3 = (u32) a.s3;
76 #endif
77
78 #if VECT_SIZE >= 8
79 r.s4 = (u32) a.s4;
80 r.s5 = (u32) a.s5;
81 r.s6 = (u32) a.s6;
82 r.s7 = (u32) a.s7;
83 #endif
84
85 #if VECT_SIZE >= 16
86 r.s8 = (u32) a.s8;
87 r.s9 = (u32) a.s9;
88 r.sa = (u32) a.sa;
89 r.sb = (u32) a.sb;
90 r.sc = (u32) a.sc;
91 r.sd = (u32) a.sd;
92 r.se = (u32) a.se;
93 r.sf = (u32) a.sf;
94 #endif
95
96 return r;
97 }
98
99 static inline u32x h32_from_64 (u64x a)
100 {
101 a >>= 32;
102
103 u32x r;
104
105 #if VECT_SIZE == 1
106 r = (u32) a;
107 #endif
108
109 #if VECT_SIZE >= 2
110 r.s0 = (u32) a.s0;
111 r.s1 = (u32) a.s1;
112 #endif
113
114 #if VECT_SIZE >= 4
115 r.s2 = (u32) a.s2;
116 r.s3 = (u32) a.s3;
117 #endif
118
119 #if VECT_SIZE >= 8
120 r.s4 = (u32) a.s4;
121 r.s5 = (u32) a.s5;
122 r.s6 = (u32) a.s6;
123 r.s7 = (u32) a.s7;
124 #endif
125
126 #if VECT_SIZE >= 16
127 r.s8 = (u32) a.s8;
128 r.s9 = (u32) a.s9;
129 r.sa = (u32) a.sa;
130 r.sb = (u32) a.sb;
131 r.sc = (u32) a.sc;
132 r.sd = (u32) a.sd;
133 r.se = (u32) a.se;
134 r.sf = (u32) a.sf;
135 #endif
136
137 return r;
138 }
139
140 static inline u64x hl32_to_64 (const u32x a, const u32x b)
141 {
142 u64x r;
143
144 #if VECT_SIZE == 1
145 r = as_ulong ((uint2) (b, a));
146 #endif
147
148 #if VECT_SIZE >= 2
149 r.s0 = as_ulong ((uint2) (b.s0, a.s0));
150 r.s1 = as_ulong ((uint2) (b.s1, a.s1));
151 #endif
152
153 #if VECT_SIZE >= 4
154 r.s2 = as_ulong ((uint2) (b.s2, a.s2));
155 r.s3 = as_ulong ((uint2) (b.s3, a.s3));
156 #endif
157
158 #if VECT_SIZE >= 8
159 r.s4 = as_ulong ((uint2) (b.s4, a.s4));
160 r.s5 = as_ulong ((uint2) (b.s5, a.s5));
161 r.s6 = as_ulong ((uint2) (b.s6, a.s6));
162 r.s7 = as_ulong ((uint2) (b.s7, a.s7));
163 #endif
164
165 #if VECT_SIZE >= 16
166 r.s8 = as_ulong ((uint2) (b.s8, a.s8));
167 r.s9 = as_ulong ((uint2) (b.s9, a.s9));
168 r.sa = as_ulong ((uint2) (b.sa, a.sa));
169 r.sb = as_ulong ((uint2) (b.sb, a.sb));
170 r.sc = as_ulong ((uint2) (b.sc, a.sc));
171 r.sd = as_ulong ((uint2) (b.sd, a.sd));
172 r.se = as_ulong ((uint2) (b.se, a.se));
173 r.sf = as_ulong ((uint2) (b.sf, a.sf));
174 #endif
175
176 return r;
177 }
178
179 #ifdef IS_AMD
180 static inline u32 swap32_S (const u32 v)
181 {
182 return (as_uint (as_uchar4 (v).s3210));
183 }
184
185 static inline u64 swap64_S (const u64 v)
186 {
187 return (as_ulong (as_uchar8 (v).s76543210));
188 }
189
190 static inline u32 rotr32_S (const u32 a, const u32 n)
191 {
192 return rotate (a, 32 - n);
193 }
194
195 static inline u32 rotl32_S (const u32 a, const u32 n)
196 {
197 return rotate (a, n);
198 }
199
200 static inline u64 rotr64_S (const u64 a, const u32 n)
201 {
202 #if DEVICE_TYPE == DEVICE_TYPE_CPU
203
204 const u64 r = rotate (a, (u64) 64 - n);
205
206 #else
207
208 const u32 a0 = h32_from_64_S (a);
209 const u32 a1 = l32_from_64_S (a);
210
211 const u32 t0 = (n >= 32) ? amd_bitalign (a0, a1, n - 32) : amd_bitalign (a1, a0, n);
212 const u32 t1 = (n >= 32) ? amd_bitalign (a1, a0, n - 32) : amd_bitalign (a0, a1, n);
213
214 const u64 r = hl32_to_64_S (t0, t1);
215
216 #endif
217
218 return r;
219 }
220
221 static inline u64 rotl64_S (const u64 a, const u32 n)
222 {
223 return rotr64_S (a, 64 - n);
224 }
225
226 static inline u32x swap32 (const u32x v)
227 {
228 return ((v >> 24) & 0x000000ff)
229 | ((v >> 8) & 0x0000ff00)
230 | ((v << 8) & 0x00ff0000)
231 | ((v << 24) & 0xff000000);
232 }
233
234 static inline u64x swap64 (const u64x v)
235 {
236 return ((v >> 56) & 0x00000000000000ff)
237 | ((v >> 40) & 0x000000000000ff00)
238 | ((v >> 24) & 0x0000000000ff0000)
239 | ((v >> 8) & 0x00000000ff000000)
240 | ((v << 8) & 0x000000ff00000000)
241 | ((v << 24) & 0x0000ff0000000000)
242 | ((v << 40) & 0x00ff000000000000)
243 | ((v << 56) & 0xff00000000000000);
244 }
245
246 static inline u32x rotr32 (const u32x a, const u32 n)
247 {
248 return rotate (a, 32 - n);
249 }
250
251 static inline u32x rotl32 (const u32x a, const u32 n)
252 {
253 return rotate (a, n);
254 }
255
256 static inline u64x rotr64 (const u64x a, const u32 n)
257 {
258 #if DEVICE_TYPE == DEVICE_TYPE_CPU
259
260 const u64x r = rotate (a, (u64) 64 - n);
261
262 #else
263
264 const u32x a0 = h32_from_64 (a);
265 const u32x a1 = l32_from_64 (a);
266
267 const u32x t0 = (n >= 32) ? amd_bitalign (a0, a1, n - 32) : amd_bitalign (a1, a0, n);
268 const u32x t1 = (n >= 32) ? amd_bitalign (a1, a0, n - 32) : amd_bitalign (a0, a1, n);
269
270 const u64x r = hl32_to_64 (t0, t1);
271
272 #endif
273
274 return r;
275 }
276
277 static inline u64x rotl64 (const u64x a, const u32 n)
278 {
279 return rotr64 (a, 64 - n);
280 }
281
282 static inline u32 __bfe (const u32 a, const u32 b, const u32 c)
283 {
284 return amd_bfe (a, b, c);
285 }
286
287 static inline u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c)
288 {
289 return amd_bytealign (a, b, c);
290 }
291 #endif
292
293 #ifdef IS_NV
294 static inline u32 swap32_S (const u32 v)
295 {
296 u32 r;
297
298 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(r) : "r"(v));
299
300 return r;
301 }
302
303 static inline u64 swap64_S (const u64 v)
304 {
305 u32 il;
306 u32 ir;
307
308 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(v));
309
310 u32 tl;
311 u32 tr;
312
313 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tl) : "r"(il));
314 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tr) : "r"(ir));
315
316 u64 r;
317
318 asm ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(tr), "r"(tl));
319
320 return r;
321 }
322
323 static inline u32 rotr32_S (const u32 a, const u32 n)
324 {
325 return rotate (a, 32 - n);
326 }
327
328 static inline u32 rotl32_S (const u32 a, const u32 n)
329 {
330 return rotate (a, n);
331 }
332
333 #if CUDA_ARCH >= 350
334 static inline u64 rotr64_S (const u64 a, const u32 n)
335 {
336 u32 il;
337 u32 ir;
338
339 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a));
340
341 u32 tl;
342 u32 tr;
343
344 if (n >= 32)
345 {
346 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
347 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
348 }
349 else
350 {
351 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
352 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
353 }
354
355 u64 r;
356
357 asm ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(tl), "r"(tr));
358
359 return r;
360 }
361 #else
362 static inline u64 rotr64_S (const u64 a, const u32 n)
363 {
364 return rotate (a, (u64) 64 - n);
365 }
366 #endif
367
368 static inline u64 rotl64_S (const u64 a, const u32 n)
369 {
370 return rotr64_S (a, 64 - n);
371 }
372
373 #if CUDA_ARCH >= 500
374 static inline u32 lut3_2d_S (const u32 a, const u32 b, const u32 c)
375 {
376 u32 r;
377
378 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
379
380 return r;
381 }
382
383 static inline u32 lut3_39_S (const u32 a, const u32 b, const u32 c)
384 {
385 u32 r;
386
387 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
388
389 return r;
390 }
391
392 static inline u32 lut3_59_S (const u32 a, const u32 b, const u32 c)
393 {
394 u32 r;
395
396 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
397
398 return r;
399 }
400
401 static inline u32 lut3_96_S (const u32 a, const u32 b, const u32 c)
402 {
403 u32 r;
404
405 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
406
407 return r;
408 }
409
410 static inline u32 lut3_e4_S (const u32 a, const u32 b, const u32 c)
411 {
412 u32 r;
413
414 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
415
416 return r;
417 }
418
419 static inline u32 lut3_e8_S (const u32 a, const u32 b, const u32 c)
420 {
421 u32 r;
422
423 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
424
425 return r;
426 }
427
428 static inline u32 lut3_ca_S (const u32 a, const u32 b, const u32 c)
429 {
430 u32 r;
431
432 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
433
434 return r;
435 }
436 #endif
437
438 static inline u32 __byte_perm_S (const u32 a, const u32 b, const u32 c)
439 {
440 u32 r;
441
442 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c));
443
444 return r;
445 }
446
447 static inline u32x swap32 (const u32x v)
448 {
449 return ((v >> 24) & 0x000000ff)
450 | ((v >> 8) & 0x0000ff00)
451 | ((v << 8) & 0x00ff0000)
452 | ((v << 24) & 0xff000000);
453 }
454
455 static inline u64x swap64 (const u64x v)
456 {
457 return ((v >> 56) & 0x00000000000000ff)
458 | ((v >> 40) & 0x000000000000ff00)
459 | ((v >> 24) & 0x0000000000ff0000)
460 | ((v >> 8) & 0x00000000ff000000)
461 | ((v << 8) & 0x000000ff00000000)
462 | ((v << 24) & 0x0000ff0000000000)
463 | ((v << 40) & 0x00ff000000000000)
464 | ((v << 56) & 0xff00000000000000);
465 }
466
467 static inline u32x rotr32 (const u32x a, const u32 n)
468 {
469 return rotate (a, 32 - n);
470 }
471
472 static inline u32x rotl32 (const u32x a, const u32 n)
473 {
474 return rotate (a, n);
475 }
476
477 #if CUDA_ARCH >= 350
478 static inline u64x rotr64 (const u64x a, const u32 n)
479 {
480 u64x r;
481
482 u32 il;
483 u32 ir;
484 u32 tl;
485 u32 tr;
486
487 #if VECT_SIZE == 1
488
489 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a));
490
491 if (n >= 32)
492 {
493 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
494 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
495 }
496 else
497 {
498 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
499 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
500 }
501
502 asm ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(tl), "r"(tr));
503
504 #endif
505
506 #if VECT_SIZE >= 2
507
508 {
509 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s0));
510
511 if (n >= 32)
512 {
513 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
514 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
515 }
516 else
517 {
518 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
519 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
520 }
521
522 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s0) : "r"(tl), "r"(tr));
523 }
524
525 {
526 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s1));
527
528 if (n >= 32)
529 {
530 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
531 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
532 }
533 else
534 {
535 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
536 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
537 }
538
539 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s1) : "r"(tl), "r"(tr));
540 }
541
542 #endif
543
544 #if VECT_SIZE >= 4
545
546 {
547 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s2));
548
549 if (n >= 32)
550 {
551 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
552 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
553 }
554 else
555 {
556 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
557 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
558 }
559
560 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s2) : "r"(tl), "r"(tr));
561 }
562
563 {
564 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s3));
565
566 if (n >= 32)
567 {
568 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
569 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
570 }
571 else
572 {
573 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
574 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
575 }
576
577 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s3) : "r"(tl), "r"(tr));
578 }
579
580 #endif
581
582 #if VECT_SIZE >= 8
583
584 {
585 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s4));
586
587 if (n >= 32)
588 {
589 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
590 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
591 }
592 else
593 {
594 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
595 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
596 }
597
598 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s4) : "r"(tl), "r"(tr));
599 }
600
601 {
602 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s5));
603
604 if (n >= 32)
605 {
606 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
607 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
608 }
609 else
610 {
611 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
612 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
613 }
614
615 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s5) : "r"(tl), "r"(tr));
616 }
617
618 {
619 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s6));
620
621 if (n >= 32)
622 {
623 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
624 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
625 }
626 else
627 {
628 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
629 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
630 }
631
632 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s6) : "r"(tl), "r"(tr));
633 }
634
635 {
636 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s7));
637
638 if (n >= 32)
639 {
640 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
641 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
642 }
643 else
644 {
645 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
646 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
647 }
648
649 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s7) : "r"(tl), "r"(tr));
650 }
651
652 #endif
653
654 #if VECT_SIZE >= 16
655
656 {
657 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s8));
658
659 if (n >= 32)
660 {
661 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
662 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
663 }
664 else
665 {
666 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
667 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
668 }
669
670 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s8) : "r"(tl), "r"(tr));
671 }
672
673 {
674 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s9));
675
676 if (n >= 32)
677 {
678 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
679 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
680 }
681 else
682 {
683 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
684 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
685 }
686
687 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s9) : "r"(tl), "r"(tr));
688 }
689
690 {
691 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sa));
692
693 if (n >= 32)
694 {
695 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
696 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
697 }
698 else
699 {
700 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
701 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
702 }
703
704 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sa) : "r"(tl), "r"(tr));
705 }
706
707 {
708 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sb));
709
710 if (n >= 32)
711 {
712 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
713 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
714 }
715 else
716 {
717 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
718 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
719 }
720
721 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sb) : "r"(tl), "r"(tr));
722 }
723
724 {
725 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sc));
726
727 if (n >= 32)
728 {
729 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
730 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
731 }
732 else
733 {
734 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
735 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
736 }
737
738 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sc) : "r"(tl), "r"(tr));
739 }
740
741 {
742 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sd));
743
744 if (n >= 32)
745 {
746 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
747 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
748 }
749 else
750 {
751 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
752 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
753 }
754
755 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sd) : "r"(tl), "r"(tr));
756 }
757
758 {
759 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.se));
760
761 if (n >= 32)
762 {
763 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
764 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
765 }
766 else
767 {
768 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
769 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
770 }
771
772 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.se) : "r"(tl), "r"(tr));
773 }
774
775 {
776 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sf));
777
778 if (n >= 32)
779 {
780 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
781 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
782 }
783 else
784 {
785 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
786 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
787 }
788
789 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sf) : "r"(tl), "r"(tr));
790 }
791
792 #endif
793
794 return r;
795 }
796 #else
797 static inline u64x rotr64 (const u64x a, const u32 n)
798 {
799 return rotate (a, (u64) 64 - n);
800 }
801 #endif
802
803 static inline u64x rotl64 (const u64x a, const u32 n)
804 {
805 return rotr64 (a, (u64) 64 - n);
806 }
807
808 static inline u32x __byte_perm (const u32x a, const u32x b, const u32x c)
809 {
810 u32x r;
811
812 #if VECT_SIZE == 1
813 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c) );
814 #endif
815
816 #if VECT_SIZE >= 2
817 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s0) : "r"(a.s0), "r"(b.s0), "r"(c.s0));
818 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s1) : "r"(a.s1), "r"(b.s1), "r"(c.s1));
819 #endif
820
821 #if VECT_SIZE >= 4
822 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s2) : "r"(a.s2), "r"(b.s2), "r"(c.s2));
823 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s3) : "r"(a.s3), "r"(b.s3), "r"(c.s3));
824 #endif
825
826 #if VECT_SIZE >= 8
827 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s4) : "r"(a.s4), "r"(b.s4), "r"(c.s4));
828 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s5) : "r"(a.s5), "r"(b.s5), "r"(c.s5));
829 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s6) : "r"(a.s6), "r"(b.s6), "r"(c.s6));
830 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s7) : "r"(a.s7), "r"(b.s7), "r"(c.s7));
831 #endif
832
833 #if VECT_SIZE >= 16
834 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s8) : "r"(a.s8), "r"(b.s8), "r"(c.s8));
835 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s9) : "r"(a.s9), "r"(b.s9), "r"(c.s9));
836 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sa) : "r"(a.sa), "r"(b.sa), "r"(c.sa));
837 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sb) : "r"(a.sb), "r"(b.sb), "r"(c.sb));
838 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sc) : "r"(a.sc), "r"(b.sc), "r"(c.sc));
839 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sd) : "r"(a.sd), "r"(b.sd), "r"(c.sd));
840 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.se) : "r"(a.se), "r"(b.se), "r"(c.se));
841 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sf) : "r"(a.sf), "r"(b.sf), "r"(c.sf));
842 #endif
843
844 return r;
845 }
846
847 static inline u32 __bfe (const u32 a, const u32 b, const u32 c)
848 {
849 u32 r;
850
851 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c));
852
853 return r;
854 }
855
856 #if CUDA_ARCH >= 350
857 static inline u32 amd_bytealign (const u32 a, const u32 b, const u32 c)
858 {
859 u32 r;
860
861 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(b), "r"(a), "r"((c & 3) * 8));
862
863 return r;
864 }
865 #else
866 static inline u32 amd_bytealign (const u32 a, const u32 b, const u32 c)
867 {
868 return __byte_perm_S (b, a, (0x76543210 >> ((c & 3) * 4)) & 0xffff);
869 }
870 #endif
871
872 #if CUDA_ARCH >= 500
873 static inline u32x lut3_2d (const u32x a, const u32x b, const u32x c)
874 {
875 u32x r;
876
877 #if VECT_SIZE == 1
878 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
879 #endif
880
881 #if VECT_SIZE >= 2
882 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
883 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
884 #endif
885
886 #if VECT_SIZE >= 4
887 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
888 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
889 #endif
890
891 #if VECT_SIZE >= 8
892 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
893 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
894 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
895 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
896 #endif
897
898 #if VECT_SIZE >= 16
899 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
900 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
901 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
902 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
903 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
904 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
905 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
906 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
907 #endif
908
909 return r;
910 }
911
912 static inline u32x lut3_39 (const u32x a, const u32x b, const u32x c)
913 {
914 u32x r;
915
916 #if VECT_SIZE == 1
917 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
918 #endif
919
920 #if VECT_SIZE >= 2
921 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
922 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
923 #endif
924
925 #if VECT_SIZE >= 4
926 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
927 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
928 #endif
929
930 #if VECT_SIZE >= 8
931 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
932 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
933 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
934 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
935 #endif
936
937 #if VECT_SIZE >= 16
938 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
939 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
940 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
941 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
942 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
943 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
944 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
945 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
946 #endif
947
948 return r;
949 }
950
951 static inline u32x lut3_59 (const u32x a, const u32x b, const u32x c)
952 {
953 u32x r;
954
955 #if VECT_SIZE == 1
956 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
957 #endif
958
959 #if VECT_SIZE >= 2
960 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
961 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
962 #endif
963
964 #if VECT_SIZE >= 4
965 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
966 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
967 #endif
968
969 #if VECT_SIZE >= 8
970 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
971 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
972 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
973 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
974 #endif
975
976 #if VECT_SIZE >= 16
977 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
978 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
979 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
980 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
981 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
982 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
983 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
984 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
985 #endif
986
987 return r;
988 }
989
990 static inline u32x lut3_96 (const u32x a, const u32x b, const u32x c)
991 {
992 u32x r;
993
994 #if VECT_SIZE == 1
995 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
996 #endif
997
998 #if VECT_SIZE >= 2
999 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
1000 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
1001 #endif
1002
1003 #if VECT_SIZE >= 4
1004 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
1005 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
1006 #endif
1007
1008 #if VECT_SIZE >= 8
1009 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
1010 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
1011 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
1012 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
1013 #endif
1014
1015 #if VECT_SIZE >= 16
1016 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
1017 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
1018 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
1019 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
1020 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
1021 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
1022 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
1023 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
1024 #endif
1025
1026 return r;
1027 }
1028
1029 static inline u32x lut3_e4 (const u32x a, const u32x b, const u32x c)
1030 {
1031 u32x r;
1032
1033 #if VECT_SIZE == 1
1034 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
1035 #endif
1036
1037 #if VECT_SIZE >= 2
1038 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
1039 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
1040 #endif
1041
1042 #if VECT_SIZE >= 4
1043 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
1044 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
1045 #endif
1046
1047 #if VECT_SIZE >= 8
1048 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
1049 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
1050 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
1051 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
1052 #endif
1053
1054 #if VECT_SIZE >= 16
1055 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
1056 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
1057 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
1058 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
1059 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
1060 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
1061 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
1062 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
1063 #endif
1064
1065 return r;
1066 }
1067
1068 static inline u32x lut3_e8 (const u32x a, const u32x b, const u32x c)
1069 {
1070 u32x r;
1071
1072 #if VECT_SIZE == 1
1073 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
1074 #endif
1075
1076 #if VECT_SIZE >= 2
1077 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
1078 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
1079 #endif
1080
1081 #if VECT_SIZE >= 4
1082 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
1083 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
1084 #endif
1085
1086 #if VECT_SIZE >= 8
1087 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
1088 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
1089 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
1090 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
1091 #endif
1092
1093 #if VECT_SIZE >= 16
1094 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
1095 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
1096 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
1097 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
1098 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
1099 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
1100 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
1101 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
1102 #endif
1103
1104 return r;
1105 }
1106
1107 static inline u32x lut3_ca (const u32x a, const u32x b, const u32x c)
1108 {
1109 u32x r;
1110
1111 #if VECT_SIZE == 1
1112 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
1113 #endif
1114
1115 #if VECT_SIZE >= 2
1116 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
1117 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
1118 #endif
1119
1120 #if VECT_SIZE >= 4
1121 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
1122 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
1123 #endif
1124
1125 #if VECT_SIZE >= 8
1126 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
1127 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
1128 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
1129 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
1130 #endif
1131
1132 #if VECT_SIZE >= 16
1133 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
1134 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
1135 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
1136 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
1137 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
1138 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
1139 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
1140 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
1141 #endif
1142
1143 return r;
1144 }
1145
1146 #endif
1147 #endif
1148
1149 #ifdef IS_GENERIC
1150 static inline u32 swap32_S (const u32 v)
1151 {
1152 return (as_uint (as_uchar4 (v).s3210));
1153 }
1154
1155 static inline u64 swap64_S (const u64 v)
1156 {
1157 return (as_ulong (as_uchar8 (v).s76543210));
1158 }
1159
1160 static inline u32 rotr32_S (const u32 a, const u32 n)
1161 {
1162 return rotate (a, 32 - n);
1163 }
1164
1165 static inline u32 rotl32_S (const u32 a, const u32 n)
1166 {
1167 return rotate (a, n);
1168 }
1169
1170 static inline u64 rotr64_S (const u64 a, const u32 n)
1171 {
1172 return rotate (a, (u64) 64 - n);
1173 }
1174
1175 static inline u64 rotl64_S (const u64 a, const u32 n)
1176 {
1177 return rotate (a, (u64) n);
1178 }
1179
1180 static inline u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c)
1181 {
1182 const u64 tmp = ((((u64) a) << 32) | ((u64) b)) >> ((c & 3) * 8);
1183
1184 return (u32) (tmp);
1185 }
1186
1187 static inline u32x swap32 (const u32x v)
1188 {
1189 return ((v >> 24) & 0x000000ff)
1190 | ((v >> 8) & 0x0000ff00)
1191 | ((v << 8) & 0x00ff0000)
1192 | ((v << 24) & 0xff000000);
1193 }
1194
1195 static inline u64x swap64 (const u64x v)
1196 {
1197 return ((v >> 56) & 0x00000000000000ff)
1198 | ((v >> 40) & 0x000000000000ff00)
1199 | ((v >> 24) & 0x0000000000ff0000)
1200 | ((v >> 8) & 0x00000000ff000000)
1201 | ((v << 8) & 0x000000ff00000000)
1202 | ((v << 24) & 0x0000ff0000000000)
1203 | ((v << 40) & 0x00ff000000000000)
1204 | ((v << 56) & 0xff00000000000000);
1205 }
1206
1207 static inline u32x rotr32 (const u32x a, const u32 n)
1208 {
1209 return rotate (a, 32 - n);
1210 }
1211
1212 static inline u32x rotl32 (const u32x a, const u32 n)
1213 {
1214 return rotate (a, n);
1215 }
1216
1217 static inline u64x rotr64 (const u64x a, const u32 n)
1218 {
1219 return rotate (a, (u64) 64 - n);
1220 }
1221
1222 static inline u64x rotl64 (const u64x a, const u32 n)
1223 {
1224 return rotate (a, (u64) n);
1225 }
1226
1227 static inline u32 __bfe (const u32 a, const u32 b, const u32 c)
1228 {
1229 #define BIT(x) (1 << (x))
1230 #define BIT_MASK(x) (BIT (x) - 1)
1231 #define BFE(x,y,z) (((x) >> (y)) & BIT_MASK (z))
1232
1233 return BFE (a, b, c);
1234 }
1235
1236 static inline u32x amd_bytealign (const u32x a, const u32x b, const u32 c)
1237 {
1238 #if VECT_SIZE == 1
1239 const u64x tmp = ((((u64x) (a)) << 32) | ((u64x) (b))) >> ((c & 3) * 8);
1240
1241 return (u32x) (tmp);
1242 #endif
1243
1244 #if VECT_SIZE == 2
1245 const u64x tmp = ((((u64x) (a.s0, a.s1)) << 32) | ((u64x) (b.s0, b.s1))) >> ((c & 3) * 8);
1246
1247 return (u32x) (tmp.s0, tmp.s1);
1248 #endif
1249
1250 #if VECT_SIZE == 4
1251 const u64x tmp = ((((u64x) (a.s0, a.s1, a.s2, a.s3)) << 32) | ((u64x) (b.s0, b.s1, b.s2, b.s3))) >> ((c & 3) * 8);
1252
1253 return (u32x) (tmp.s0, tmp.s1, tmp.s2, tmp.s3);
1254 #endif
1255
1256 #if VECT_SIZE == 8
1257 const u64x tmp = ((((u64x) (a.s0, a.s1, a.s2, a.s3, a.s4, a.s5, a.s6, a.s7)) << 32) | ((u64x) (b.s0, b.s1, b.s2, b.s3, b.s4, b.s5, b.s6, b.s7))) >> ((c & 3) * 8);
1258
1259 return (u32x) (tmp.s0, tmp.s1, tmp.s2, tmp.s3, tmp.s4, tmp.s5, tmp.s6, tmp.s7);
1260 #endif
1261
1262 #if VECT_SIZE == 16
1263 const u64x tmp = ((((u64x) (a.s0, a.s1, a.s2, a.s3, a.s4, a.s5, a.s6, a.s7, a.s8, a.s9, a.sa, a.sb, a.sc, a.sd, a.se, a.sf)) << 32) | ((u64x) (b.s0, b.s1, b.s2, b.s3, b.s4, b.s5, b.s6, b.s7, b.s8, b.s9, b.sa, b.sb, b.sc, b.sd, b.se, b.sf))) >> ((c & 3) * 8);
1264
1265 return (u32x) (tmp.s0, tmp.s1, tmp.s2, tmp.s3, tmp.s4, tmp.s5, tmp.s6, tmp.s7, tmp.s8, tmp.s9, tmp.sa, tmp.sb, tmp.sc, tmp.sd, tmp.se, tmp.sf);
1266 #endif
1267 }
1268 #endif
1269
1270 typedef struct
1271 {
1272 #if defined _DES_
1273 u32 digest_buf[4];
1274 #elif defined _MD4_
1275 u32 digest_buf[4];
1276 #elif defined _MD5_
1277 u32 digest_buf[4];
1278 #elif defined _MD5H_
1279 u32 digest_buf[4];
1280 #elif defined _SHA1_
1281 u32 digest_buf[5];
1282 #elif defined _BCRYPT_
1283 u32 digest_buf[6];
1284 #elif defined _SHA256_
1285 u32 digest_buf[8];
1286 #elif defined _SHA384_
1287 u32 digest_buf[16];
1288 #elif defined _SHA512_
1289 u32 digest_buf[16];
1290 #elif defined _KECCAK_
1291 u32 digest_buf[50];
1292 #elif defined _RIPEMD160_
1293 u32 digest_buf[5];
1294 #elif defined _WHIRLPOOL_
1295 u32 digest_buf[16];
1296 #elif defined _GOST_
1297 u32 digest_buf[8];
1298 #elif defined _GOST2012_256_
1299 u32 digest_buf[8];
1300 #elif defined _GOST2012_512_
1301 u32 digest_buf[16];
1302 #elif defined _SAPB_
1303 u32 digest_buf[4];
1304 #elif defined _SAPG_
1305 u32 digest_buf[5];
1306 #elif defined _MYSQL323_
1307 u32 digest_buf[4];
1308 #elif defined _LOTUS5_
1309 u32 digest_buf[4];
1310 #elif defined _LOTUS6_
1311 u32 digest_buf[4];
1312 #elif defined _SCRYPT_
1313 u32 digest_buf[8];
1314 #elif defined _LOTUS8_
1315 u32 digest_buf[4];
1316 #elif defined _OFFICE2007_
1317 u32 digest_buf[4];
1318 #elif defined _OFFICE2010_
1319 u32 digest_buf[4];
1320 #elif defined _OFFICE2013_
1321 u32 digest_buf[4];
1322 #elif defined _OLDOFFICE01_
1323 u32 digest_buf[4];
1324 #elif defined _OLDOFFICE34_
1325 u32 digest_buf[4];
1326 #elif defined _SIPHASH_
1327 u32 digest_buf[4];
1328 #elif defined _PBKDF2_MD5_
1329 u32 digest_buf[32];
1330 #elif defined _PBKDF2_SHA1_
1331 u32 digest_buf[32];
1332 #elif defined _PBKDF2_SHA256_
1333 u32 digest_buf[32];
1334 #elif defined _PBKDF2_SHA512_
1335 u32 digest_buf[32];
1336 #elif defined _PDF17L8_
1337 u32 digest_buf[8];
1338 #elif defined _CRC32_
1339 u32 digest_buf[4];
1340 #elif defined _SEVEN_ZIP_
1341 u32 digest_buf[4];
1342 #elif defined _ANDROIDFDE_
1343 u32 digest_buf[4];
1344 #elif defined _DCC2_
1345 u32 digest_buf[4];
1346 #elif defined _WPA_
1347 u32 digest_buf[4];
1348 #elif defined _MD5_SHA1_
1349 u32 digest_buf[4];
1350 #elif defined _SHA1_MD5_
1351 u32 digest_buf[5];
1352 #elif defined _NETNTLMV2_
1353 u32 digest_buf[4];
1354 #elif defined _KRB5PA_
1355 u32 digest_buf[4];
1356 #elif defined _CLOUDKEY_
1357 u32 digest_buf[8];
1358 #elif defined _SCRYPT_
1359 u32 digest_buf[4];
1360 #elif defined _PSAFE2_
1361 u32 digest_buf[5];
1362 #elif defined _LOTUS8_
1363 u32 digest_buf[4];
1364 #elif defined _RAR3_
1365 u32 digest_buf[4];
1366 #elif defined _SHA256_SHA1_
1367 u32 digest_buf[8];
1368 #elif defined _MS_DRSR_
1369 u32 digest_buf[8];
1370 #elif defined _ANDROIDFDE_SAMSUNG_
1371 u32 digest_buf[8];
1372 #elif defined _RAR5_
1373 u32 digest_buf[4];
1374 #elif defined _KRB5TGS_
1375 u32 digest_buf[4];
1376 #endif
1377
1378 } digest_t;
1379
1380 typedef struct
1381 {
1382 u32 salt_buf[16];
1383 u32 salt_buf_pc[8];
1384
1385 u32 salt_len;
1386 u32 salt_iter;
1387 u32 salt_sign[2];
1388
1389 u32 keccak_mdlen;
1390 u32 truecrypt_mdlen;
1391
1392 u32 digests_cnt;
1393 u32 digests_done;
1394
1395 u32 digests_offset;
1396
1397 u32 scrypt_N;
1398 u32 scrypt_r;
1399 u32 scrypt_p;
1400 u32 scrypt_tmto;
1401 u32 scrypt_phy;
1402
1403 } salt_t;
1404
1405 typedef struct
1406 {
1407 int V;
1408 int R;
1409 int P;
1410
1411 int enc_md;
1412
1413 u32 id_buf[8];
1414 u32 u_buf[32];
1415 u32 o_buf[32];
1416
1417 int id_len;
1418 int o_len;
1419 int u_len;
1420
1421 u32 rc4key[2];
1422 u32 rc4data[2];
1423
1424 } pdf_t;
1425
1426 typedef struct
1427 {
1428 u32 pke[25];
1429 u32 eapol[64];
1430 int eapol_size;
1431 int keyver;
1432
1433 } wpa_t;
1434
1435 typedef struct
1436 {
1437 u32 cry_master_buf[64];
1438 u32 ckey_buf[64];
1439 u32 public_key_buf[64];
1440
1441 u32 cry_master_len;
1442 u32 ckey_len;
1443 u32 public_key_len;
1444
1445 } bitcoin_wallet_t;
1446
1447 typedef struct
1448 {
1449 u32 salt_buf[30];
1450 u32 salt_len;
1451
1452 u32 esalt_buf[38];
1453 u32 esalt_len;
1454
1455 } sip_t;
1456
1457 typedef struct
1458 {
1459 u32 data[384];
1460
1461 } androidfde_t;
1462
1463 typedef struct
1464 {
1465 u32 nr_buf[16];
1466 u32 nr_len;
1467
1468 u32 msg_buf[128];
1469 u32 msg_len;
1470
1471 } ikepsk_t;
1472
1473 typedef struct
1474 {
1475 u32 user_len;
1476 u32 domain_len;
1477 u32 srvchall_len;
1478 u32 clichall_len;
1479
1480 u32 userdomain_buf[64];
1481 u32 chall_buf[256];
1482
1483 } netntlm_t;
1484
1485 typedef struct
1486 {
1487 u32 user[16];
1488 u32 realm[16];
1489 u32 salt[32];
1490 u32 timestamp[16];
1491 u32 checksum[4];
1492
1493 } krb5pa_t;
1494
1495 typedef struct
1496 {
1497 u32 account_info[512];
1498 u32 checksum[4];
1499 u32 edata2[2560];
1500 u32 edata2_len;
1501
1502 } krb5tgs_t;
1503
1504 typedef struct
1505 {
1506 u32 salt_buf[16];
1507 u32 data_buf[112];
1508 u32 keyfile_buf[16];
1509
1510 } tc_t;
1511
1512 typedef struct
1513 {
1514 u32 salt_buf[16];
1515
1516 } pbkdf2_md5_t;
1517
1518 typedef struct
1519 {
1520 u32 salt_buf[16];
1521
1522 } pbkdf2_sha1_t;
1523
1524 typedef struct
1525 {
1526 u32 salt_buf[16];
1527
1528 } pbkdf2_sha256_t;
1529
1530 typedef struct
1531 {
1532 u32 salt_buf[32];
1533
1534 } pbkdf2_sha512_t;
1535
1536 typedef struct
1537 {
1538 u32 salt_buf[128];
1539 u32 salt_len;
1540
1541 } rakp_t;
1542
1543 typedef struct
1544 {
1545 u32 data_len;
1546 u32 data_buf[512];
1547
1548 } cloudkey_t;
1549
1550 typedef struct
1551 {
1552 u32 encryptedVerifier[4];
1553 u32 encryptedVerifierHash[5];
1554
1555 u32 keySize;
1556
1557 } office2007_t;
1558
1559 typedef struct
1560 {
1561 u32 encryptedVerifier[4];
1562 u32 encryptedVerifierHash[8];
1563
1564 } office2010_t;
1565
1566 typedef struct
1567 {
1568 u32 encryptedVerifier[4];
1569 u32 encryptedVerifierHash[8];
1570
1571 } office2013_t;
1572
1573 typedef struct
1574 {
1575 u32 version;
1576 u32 encryptedVerifier[4];
1577 u32 encryptedVerifierHash[4];
1578 u32 rc4key[2];
1579
1580 } oldoffice01_t;
1581
1582 typedef struct
1583 {
1584 u32 version;
1585 u32 encryptedVerifier[4];
1586 u32 encryptedVerifierHash[5];
1587 u32 rc4key[2];
1588
1589 } oldoffice34_t;
1590
1591 typedef struct
1592 {
1593 u32 digest[4];
1594 u32 out[4];
1595
1596 } pdf14_tmp_t;
1597
1598 typedef struct
1599 {
1600 union
1601 {
1602 u32 dgst32[16];
1603 u64 dgst64[8];
1604 };
1605
1606 u32 dgst_len;
1607 u32 W_len;
1608
1609 } pdf17l8_tmp_t;
1610
1611 typedef struct
1612 {
1613 u32 digest_buf[4];
1614
1615 } phpass_tmp_t;
1616
1617 typedef struct
1618 {
1619 u32 digest_buf[4];
1620
1621 } md5crypt_tmp_t;
1622
1623 typedef struct
1624 {
1625 u32 alt_result[8];
1626
1627 u32 p_bytes[4];
1628 u32 s_bytes[4];
1629
1630 } sha256crypt_tmp_t;
1631
1632 typedef struct
1633 {
1634 u64 l_alt_result[8];
1635
1636 u64 l_p_bytes[2];
1637 u64 l_s_bytes[2];
1638
1639 } sha512crypt_tmp_t;
1640
1641 typedef struct
1642 {
1643 u32 ipad[5];
1644 u32 opad[5];
1645
1646 u32 dgst[10];
1647 u32 out[10];
1648
1649 } wpa_tmp_t;
1650
1651 typedef struct
1652 {
1653 u64 dgst[8];
1654
1655 } bitcoin_wallet_tmp_t;
1656
1657 typedef struct
1658 {
1659 u32 ipad[5];
1660 u32 opad[5];
1661
1662 u32 dgst[5];
1663 u32 out[4];
1664
1665 } dcc2_tmp_t;
1666
1667 typedef struct
1668 {
1669 u32 E[18];
1670
1671 u32 P[18];
1672
1673 u32 S0[256];
1674 u32 S1[256];
1675 u32 S2[256];
1676 u32 S3[256];
1677
1678 } bcrypt_tmp_t;
1679
1680 typedef struct
1681 {
1682 u32 digest[2];
1683
1684 u32 P[18];
1685
1686 u32 S0[256];
1687 u32 S1[256];
1688 u32 S2[256];
1689 u32 S3[256];
1690
1691 } pwsafe2_tmp_t;
1692
1693 typedef struct
1694 {
1695 u32 digest_buf[8];
1696
1697 } pwsafe3_tmp_t;
1698
1699 typedef struct
1700 {
1701 u32 digest_buf[5];
1702
1703 } androidpin_tmp_t;
1704
1705 typedef struct
1706 {
1707 u32 ipad[5];
1708 u32 opad[5];
1709
1710 u32 dgst[10];
1711 u32 out[10];
1712
1713 } androidfde_tmp_t;
1714
1715 typedef struct
1716 {
1717 u32 ipad[16];
1718 u32 opad[16];
1719
1720 u32 dgst[64];
1721 u32 out[64];
1722
1723 } tc_tmp_t;
1724
1725 typedef struct
1726 {
1727 u64 ipad[8];
1728 u64 opad[8];
1729
1730 u64 dgst[32];
1731 u64 out[32];
1732
1733 } tc64_tmp_t;
1734
1735 typedef struct
1736 {
1737 u32 ipad[4];
1738 u32 opad[4];
1739
1740 u32 dgst[32];
1741 u32 out[32];
1742
1743 } pbkdf2_md5_tmp_t;
1744
1745 typedef struct
1746 {
1747 u32 ipad[5];
1748 u32 opad[5];
1749
1750 u32 dgst[32];
1751 u32 out[32];
1752
1753 } pbkdf2_sha1_tmp_t;
1754
1755 typedef struct
1756 {
1757 u32 ipad[8];
1758 u32 opad[8];
1759
1760 u32 dgst[32];
1761 u32 out[32];
1762
1763 } pbkdf2_sha256_tmp_t;
1764
1765 typedef struct
1766 {
1767 u64 ipad[8];
1768 u64 opad[8];
1769
1770 u64 dgst[16];
1771 u64 out[16];
1772
1773 } pbkdf2_sha512_tmp_t;
1774
1775 typedef struct
1776 {
1777 u64 out[8];
1778
1779 } ecryptfs_tmp_t;
1780
1781 typedef struct
1782 {
1783 u64 ipad[8];
1784 u64 opad[8];
1785
1786 u64 dgst[16];
1787 u64 out[16];
1788
1789 } oraclet_tmp_t;
1790
1791 typedef struct
1792 {
1793 u32 ipad[5];
1794 u32 opad[5];
1795
1796 u32 dgst[5];
1797 u32 out[5];
1798
1799 } agilekey_tmp_t;
1800
1801 typedef struct
1802 {
1803 u32 ipad[5];
1804 u32 opad[5];
1805
1806 u32 dgst1[5];
1807 u32 out1[5];
1808
1809 u32 dgst2[5];
1810 u32 out2[5];
1811
1812 } mywallet_tmp_t;
1813
1814 typedef struct
1815 {
1816 u32 ipad[5];
1817 u32 opad[5];
1818
1819 u32 dgst[5];
1820 u32 out[5];
1821
1822 } sha1aix_tmp_t;
1823
1824 typedef struct
1825 {
1826 u32 ipad[8];
1827 u32 opad[8];
1828
1829 u32 dgst[8];
1830 u32 out[8];
1831
1832 } sha256aix_tmp_t;
1833
1834 typedef struct
1835 {
1836 u64 ipad[8];
1837 u64 opad[8];
1838
1839 u64 dgst[8];
1840 u64 out[8];
1841
1842 } sha512aix_tmp_t;
1843
1844 typedef struct
1845 {
1846 u32 ipad[8];
1847 u32 opad[8];
1848
1849 u32 dgst[8];
1850 u32 out[8];
1851
1852 } lastpass_tmp_t;
1853
1854 typedef struct
1855 {
1856 u64 digest_buf[8];
1857
1858 } drupal7_tmp_t;
1859
1860 typedef struct
1861 {
1862 u32 ipad[5];
1863 u32 opad[5];
1864
1865 u32 dgst[5];
1866 u32 out[5];
1867
1868 } lotus8_tmp_t;
1869
1870 typedef struct
1871 {
1872 u32 out[5];
1873
1874 } office2007_tmp_t;
1875
1876 typedef struct
1877 {
1878 u32 out[5];
1879
1880 } office2010_tmp_t;
1881
1882 typedef struct
1883 {
1884 u64 out[8];
1885
1886 } office2013_tmp_t;
1887
1888 typedef struct
1889 {
1890 u32 digest_buf[5];
1891
1892 } saph_sha1_tmp_t;
1893
1894 typedef struct
1895 {
1896 u32 block[16];
1897
1898 u32 dgst[8];
1899
1900 u32 block_len;
1901 u32 final_len;
1902
1903 } seven_zip_tmp_t;
1904
1905 typedef struct
1906 {
1907 u32 Kc[16];
1908 u32 Kd[16];
1909
1910 u32 iv[2];
1911
1912 } bsdicrypt_tmp_t;
1913
1914 typedef struct
1915 {
1916 u32 dgst[17][5];
1917
1918 } rar3_tmp_t;
1919
1920 typedef struct
1921 {
1922 u32 user[16];
1923
1924 } cram_md5_t;
1925
1926 typedef struct
1927 {
1928 u32 iv_buf[4];
1929 u32 iv_len;
1930
1931 u32 salt_buf[4];
1932 u32 salt_len;
1933
1934 u32 crc;
1935
1936 u32 data_buf[96];
1937 u32 data_len;
1938
1939 u32 unpack_size;
1940
1941 } seven_zip_t;
1942
1943 typedef struct
1944 {
1945 u32 key;
1946 u64 val;
1947
1948 } hcstat_table_t;
1949
1950 typedef struct
1951 {
1952 u32 cs_buf[0x100];
1953 u32 cs_len;
1954
1955 } cs_t;
1956
1957 typedef struct
1958 {
1959 u32 cmds[0x100];
1960
1961 } kernel_rule_t;
1962
1963 typedef struct
1964 {
1965 u32 gidvid;
1966 u32 il_pos;
1967
1968 } plain_t;
1969
1970 typedef struct
1971 {
1972 u32 i[16];
1973
1974 u32 pw_len;
1975
1976 u32 alignment_placeholder_1;
1977 u32 alignment_placeholder_2;
1978 u32 alignment_placeholder_3;
1979
1980 } pw_t;
1981
1982 typedef struct
1983 {
1984 u32 i;
1985
1986 } bf_t;
1987
1988 typedef struct
1989 {
1990 u32 i[8];
1991
1992 u32 pw_len;
1993
1994 } comb_t;
1995
1996 typedef struct
1997 {
1998 u32 b[32];
1999
2000 } bs_word_t;
2001
2002 typedef struct
2003 {
2004 uint4 P[64];
2005
2006 } scrypt_tmp_t;