173c468454597841e6a19f500501b376179315a3
[hashcat.git] / OpenCL / types_ocl.c
1 /**
2 * Authors.....: Jens Steube <jens.steube@gmail.com>
3 * magnum <john.magnum@hushmail.com>
4 *
5 * License.....: MIT
6 */
7
8 #define DEVICE_TYPE_CPU 2
9 #define DEVICE_TYPE_GPU 4
10
11 typedef uchar u8;
12 typedef ushort u16;
13 typedef uint u32;
14 typedef ulong u64;
15
16 #ifndef NEW_SIMD_CODE
17 #undef VECT_SIZE
18 #define VECT_SIZE 1
19 #endif
20
21 #define CONCAT(a, b) a##b
22 #define VTYPE(type, width) CONCAT(type, width)
23
24 #if VECT_SIZE == 1
25 typedef uchar u8x;
26 typedef ushort u16x;
27 typedef uint u32x;
28 typedef ulong u64x;
29 #else
30 typedef VTYPE(uchar, VECT_SIZE) u8x;
31 typedef VTYPE(ushort, VECT_SIZE) u16x;
32 typedef VTYPE(uint, VECT_SIZE) u32x;
33 typedef VTYPE(ulong, VECT_SIZE) u64x;
34 #endif
35
36 // this one needs to die
37 #define allx(r) r
38
39 static inline u32 l32_from_64_S (u64 a)
40 {
41 const u32 r = (u32) (a);
42
43 return r;
44 }
45
46 static inline u32 h32_from_64_S (u64 a)
47 {
48 a >>= 32;
49
50 const u32 r = (u32) (a);
51
52 return r;
53 }
54
55 static inline u64 hl32_to_64_S (const u32 a, const u32 b)
56 {
57 return as_ulong ((uint2) (b, a));
58 }
59
60 static inline u32x l32_from_64 (u64x a)
61 {
62 u32x r;
63
64 #if VECT_SIZE == 1
65 r = (u32) a;
66 #endif
67
68 #if VECT_SIZE >= 2
69 r.s0 = (u32) a.s0;
70 r.s1 = (u32) a.s1;
71 #endif
72
73 #if VECT_SIZE >= 4
74 r.s2 = (u32) a.s2;
75 r.s3 = (u32) a.s3;
76 #endif
77
78 #if VECT_SIZE >= 8
79 r.s4 = (u32) a.s4;
80 r.s5 = (u32) a.s5;
81 r.s6 = (u32) a.s6;
82 r.s7 = (u32) a.s7;
83 #endif
84
85 #if VECT_SIZE >= 16
86 r.s8 = (u32) a.s8;
87 r.s9 = (u32) a.s9;
88 r.sa = (u32) a.sa;
89 r.sb = (u32) a.sb;
90 r.sc = (u32) a.sc;
91 r.sd = (u32) a.sd;
92 r.se = (u32) a.se;
93 r.sf = (u32) a.sf;
94 #endif
95
96 return r;
97 }
98
99 static inline u32x h32_from_64 (u64x a)
100 {
101 a >>= 32;
102
103 u32x r;
104
105 #if VECT_SIZE == 1
106 r = (u32) a;
107 #endif
108
109 #if VECT_SIZE >= 2
110 r.s0 = (u32) a.s0;
111 r.s1 = (u32) a.s1;
112 #endif
113
114 #if VECT_SIZE >= 4
115 r.s2 = (u32) a.s2;
116 r.s3 = (u32) a.s3;
117 #endif
118
119 #if VECT_SIZE >= 8
120 r.s4 = (u32) a.s4;
121 r.s5 = (u32) a.s5;
122 r.s6 = (u32) a.s6;
123 r.s7 = (u32) a.s7;
124 #endif
125
126 #if VECT_SIZE >= 16
127 r.s8 = (u32) a.s8;
128 r.s9 = (u32) a.s9;
129 r.sa = (u32) a.sa;
130 r.sb = (u32) a.sb;
131 r.sc = (u32) a.sc;
132 r.sd = (u32) a.sd;
133 r.se = (u32) a.se;
134 r.sf = (u32) a.sf;
135 #endif
136
137 return r;
138 }
139
140 static inline u64x hl32_to_64 (const u32x a, const u32x b)
141 {
142 u64x r;
143
144 #if VECT_SIZE == 1
145 r = as_ulong ((uint2) (b, a));
146 #endif
147
148 #if VECT_SIZE >= 2
149 r.s0 = as_ulong ((uint2) (b.s0, a.s0));
150 r.s1 = as_ulong ((uint2) (b.s1, a.s1));
151 #endif
152
153 #if VECT_SIZE >= 4
154 r.s2 = as_ulong ((uint2) (b.s2, a.s2));
155 r.s3 = as_ulong ((uint2) (b.s3, a.s3));
156 #endif
157
158 #if VECT_SIZE >= 8
159 r.s4 = as_ulong ((uint2) (b.s4, a.s4));
160 r.s5 = as_ulong ((uint2) (b.s5, a.s5));
161 r.s6 = as_ulong ((uint2) (b.s6, a.s6));
162 r.s7 = as_ulong ((uint2) (b.s7, a.s7));
163 #endif
164
165 #if VECT_SIZE >= 16
166 r.s8 = as_ulong ((uint2) (b.s8, a.s8));
167 r.s9 = as_ulong ((uint2) (b.s9, a.s9));
168 r.sa = as_ulong ((uint2) (b.sa, a.sa));
169 r.sb = as_ulong ((uint2) (b.sb, a.sb));
170 r.sc = as_ulong ((uint2) (b.sc, a.sc));
171 r.sd = as_ulong ((uint2) (b.sd, a.sd));
172 r.se = as_ulong ((uint2) (b.se, a.se));
173 r.sf = as_ulong ((uint2) (b.sf, a.sf));
174 #endif
175
176 return r;
177 }
178
179 #ifdef IS_AMD
180 static inline u32 swap32_S (const u32 v)
181 {
182 return (as_uint (as_uchar4 (v).s3210));
183 }
184
185 static inline u64 swap64_S (const u64 v)
186 {
187 return (as_ulong (as_uchar8 (v).s76543210));
188 }
189
190 static inline u32 rotr32_S (const u32 a, const u32 n)
191 {
192 return rotate (a, 32 - n);
193 }
194
195 static inline u32 rotl32_S (const u32 a, const u32 n)
196 {
197 return rotate (a, n);
198 }
199
200 static inline u64 rotr64_S (const u64 a, const u32 n)
201 {
202 #if DEVICE_TYPE == DEVICE_TYPE_CPU
203
204 const u64 r = rotate (a, (u64) 64 - n);
205
206 #else
207
208 const u32 a0 = h32_from_64_S (a);
209 const u32 a1 = l32_from_64_S (a);
210
211 const u32 t0 = (n >= 32) ? amd_bitalign (a0, a1, n - 32) : amd_bitalign (a1, a0, n);
212 const u32 t1 = (n >= 32) ? amd_bitalign (a1, a0, n - 32) : amd_bitalign (a0, a1, n);
213
214 const u64 r = hl32_to_64_S (t0, t1);
215
216 #endif
217
218 return r;
219 }
220
221 static inline u64 rotl64_S (const u64 a, const u32 n)
222 {
223 return rotr64_S (a, 64 - n);
224 }
225
226 static inline u32x swap32 (const u32x v)
227 {
228 return ((v >> 24) & 0x000000ff)
229 | ((v >> 8) & 0x0000ff00)
230 | ((v << 8) & 0x00ff0000)
231 | ((v << 24) & 0xff000000);
232 }
233
234 static inline u64x swap64 (const u64x v)
235 {
236 return ((v >> 56) & 0x00000000000000ff)
237 | ((v >> 40) & 0x000000000000ff00)
238 | ((v >> 24) & 0x0000000000ff0000)
239 | ((v >> 8) & 0x00000000ff000000)
240 | ((v << 8) & 0x000000ff00000000)
241 | ((v << 24) & 0x0000ff0000000000)
242 | ((v << 40) & 0x00ff000000000000)
243 | ((v << 56) & 0xff00000000000000);
244 }
245
246 static inline u32x rotr32 (const u32x a, const u32 n)
247 {
248 return rotate (a, 32 - n);
249 }
250
251 static inline u32x rotl32 (const u32x a, const u32 n)
252 {
253 return rotate (a, n);
254 }
255
256 static inline u64x rotr64 (const u64x a, const u32 n)
257 {
258 #if DEVICE_TYPE == DEVICE_TYPE_CPU
259
260 const u64x r = rotate (a, (u64) 64 - n);
261
262 #else
263
264 const u32x a0 = h32_from_64 (a);
265 const u32x a1 = l32_from_64 (a);
266
267 const u32x t0 = (n >= 32) ? amd_bitalign (a0, a1, n - 32) : amd_bitalign (a1, a0, n);
268 const u32x t1 = (n >= 32) ? amd_bitalign (a1, a0, n - 32) : amd_bitalign (a0, a1, n);
269
270 const u64x r = hl32_to_64 (t0, t1);
271
272 #endif
273
274 return r;
275 }
276
277 static inline u64x rotl64 (const u64x a, const u32 n)
278 {
279 return rotr64 (a, 64 - n);
280 }
281
282 static inline u32 __bfe (const u32 a, const u32 b, const u32 c)
283 {
284 return amd_bfe (a, b, c);
285 }
286
287 static inline u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c)
288 {
289 return amd_bytealign (a, b, c);
290 }
291 #endif
292
293 #ifdef IS_NV
294 static inline u32 swap32_S (const u32 v)
295 {
296 u32 r;
297
298 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(r) : "r"(v));
299
300 return r;
301 }
302
303 static inline u64 swap64_S (const u64 v)
304 {
305 u32 il;
306 u32 ir;
307
308 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(v));
309
310 u32 tl;
311 u32 tr;
312
313 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tl) : "r"(il));
314 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tr) : "r"(ir));
315
316 u64 r;
317
318 asm ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(tr), "r"(tl));
319
320 return r;
321 }
322
323 static inline u32 rotr32_S (const u32 a, const u32 n)
324 {
325 return rotate (a, 32 - n);
326 }
327
328 static inline u32 rotl32_S (const u32 a, const u32 n)
329 {
330 return rotate (a, n);
331 }
332
333 #if CUDA_ARCH >= 350
334 static inline u64 rotr64_S (const u64 a, const u32 n)
335 {
336 u32 il;
337 u32 ir;
338
339 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a));
340
341 u32 tl;
342 u32 tr;
343
344 if (n >= 32)
345 {
346 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
347 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
348 }
349 else
350 {
351 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
352 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
353 }
354
355 u64 r;
356
357 asm ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(tl), "r"(tr));
358
359 return r;
360 }
361 #else
362 static inline u64 rotr64_S (const u64 a, const u32 n)
363 {
364 return rotate (a, (u64) 64 - n);
365 }
366 #endif
367
368 static inline u64 rotl64_S (const u64 a, const u32 n)
369 {
370 return rotr64_S (a, 64 - n);
371 }
372
373 #if CUDA_ARCH >= 500
374 static inline u32 lut3_2d_S (const u32 a, const u32 b, const u32 c)
375 {
376 u32 r;
377
378 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
379
380 return r;
381 }
382
383 static inline u32 lut3_39_S (const u32 a, const u32 b, const u32 c)
384 {
385 u32 r;
386
387 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
388
389 return r;
390 }
391
392 static inline u32 lut3_59_S (const u32 a, const u32 b, const u32 c)
393 {
394 u32 r;
395
396 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
397
398 return r;
399 }
400
401 static inline u32 lut3_96_S (const u32 a, const u32 b, const u32 c)
402 {
403 u32 r;
404
405 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
406
407 return r;
408 }
409
410 static inline u32 lut3_e4_S (const u32 a, const u32 b, const u32 c)
411 {
412 u32 r;
413
414 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
415
416 return r;
417 }
418
419 static inline u32 lut3_e8_S (const u32 a, const u32 b, const u32 c)
420 {
421 u32 r;
422
423 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
424
425 return r;
426 }
427
428 static inline u32 lut3_ca_S (const u32 a, const u32 b, const u32 c)
429 {
430 u32 r;
431
432 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
433
434 return r;
435 }
436 #endif
437
438 static inline u32 __byte_perm_S (const u32 a, const u32 b, const u32 c)
439 {
440 u32 r;
441
442 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c));
443
444 return r;
445 }
446
447 static inline u32x swap32 (const u32x v)
448 {
449 return ((v >> 24) & 0x000000ff)
450 | ((v >> 8) & 0x0000ff00)
451 | ((v << 8) & 0x00ff0000)
452 | ((v << 24) & 0xff000000);
453 }
454
455 static inline u64x swap64 (const u64x v)
456 {
457 return ((v >> 56) & 0x00000000000000ff)
458 | ((v >> 40) & 0x000000000000ff00)
459 | ((v >> 24) & 0x0000000000ff0000)
460 | ((v >> 8) & 0x00000000ff000000)
461 | ((v << 8) & 0x000000ff00000000)
462 | ((v << 24) & 0x0000ff0000000000)
463 | ((v << 40) & 0x00ff000000000000)
464 | ((v << 56) & 0xff00000000000000);
465 }
466
467 static inline u32x rotr32 (const u32x a, const u32 n)
468 {
469 return rotate (a, 32 - n);
470 }
471
472 static inline u32x rotl32 (const u32x a, const u32 n)
473 {
474 return rotate (a, n);
475 }
476
477 #if CUDA_ARCH >= 350
478 static inline u64x rotr64 (const u64x a, const u32 n)
479 {
480 u64x r;
481
482 u32 il;
483 u32 ir;
484 u32 tl;
485 u32 tr;
486
487 #if VECT_SIZE == 1
488
489 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a));
490
491 if (n >= 32)
492 {
493 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
494 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
495 }
496 else
497 {
498 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
499 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
500 }
501
502 asm ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(tl), "r"(tr));
503
504 #endif
505
506 #if VECT_SIZE >= 2
507
508 {
509 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s0));
510
511 if (n >= 32)
512 {
513 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
514 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
515 }
516 else
517 {
518 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
519 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
520 }
521
522 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s0) : "r"(tl), "r"(tr));
523 }
524
525 {
526 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s1));
527
528 if (n >= 32)
529 {
530 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
531 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
532 }
533 else
534 {
535 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
536 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
537 }
538
539 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s1) : "r"(tl), "r"(tr));
540 }
541
542 #endif
543
544 #if VECT_SIZE >= 4
545
546 {
547 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s2));
548
549 if (n >= 32)
550 {
551 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
552 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
553 }
554 else
555 {
556 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
557 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
558 }
559
560 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s2) : "r"(tl), "r"(tr));
561 }
562
563 {
564 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s3));
565
566 if (n >= 32)
567 {
568 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
569 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
570 }
571 else
572 {
573 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
574 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
575 }
576
577 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s3) : "r"(tl), "r"(tr));
578 }
579
580 #endif
581
582 #if VECT_SIZE >= 8
583
584 {
585 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s4));
586
587 if (n >= 32)
588 {
589 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
590 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
591 }
592 else
593 {
594 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
595 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
596 }
597
598 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s4) : "r"(tl), "r"(tr));
599 }
600
601 {
602 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s5));
603
604 if (n >= 32)
605 {
606 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
607 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
608 }
609 else
610 {
611 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
612 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
613 }
614
615 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s5) : "r"(tl), "r"(tr));
616 }
617
618 {
619 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s6));
620
621 if (n >= 32)
622 {
623 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
624 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
625 }
626 else
627 {
628 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
629 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
630 }
631
632 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s6) : "r"(tl), "r"(tr));
633 }
634
635 {
636 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s7));
637
638 if (n >= 32)
639 {
640 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
641 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
642 }
643 else
644 {
645 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
646 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
647 }
648
649 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s7) : "r"(tl), "r"(tr));
650 }
651
652 #endif
653
654 #if VECT_SIZE >= 16
655
656 {
657 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s8));
658
659 if (n >= 32)
660 {
661 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
662 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
663 }
664 else
665 {
666 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
667 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
668 }
669
670 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s8) : "r"(tl), "r"(tr));
671 }
672
673 {
674 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s9));
675
676 if (n >= 32)
677 {
678 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
679 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
680 }
681 else
682 {
683 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
684 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
685 }
686
687 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s9) : "r"(tl), "r"(tr));
688 }
689
690 {
691 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sa));
692
693 if (n >= 32)
694 {
695 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
696 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
697 }
698 else
699 {
700 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
701 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
702 }
703
704 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sa) : "r"(tl), "r"(tr));
705 }
706
707 {
708 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sb));
709
710 if (n >= 32)
711 {
712 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
713 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
714 }
715 else
716 {
717 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
718 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
719 }
720
721 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sb) : "r"(tl), "r"(tr));
722 }
723
724 {
725 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sc));
726
727 if (n >= 32)
728 {
729 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
730 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
731 }
732 else
733 {
734 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
735 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
736 }
737
738 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sc) : "r"(tl), "r"(tr));
739 }
740
741 {
742 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sd));
743
744 if (n >= 32)
745 {
746 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
747 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
748 }
749 else
750 {
751 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
752 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
753 }
754
755 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sd) : "r"(tl), "r"(tr));
756 }
757
758 {
759 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.se));
760
761 if (n >= 32)
762 {
763 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
764 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
765 }
766 else
767 {
768 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
769 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
770 }
771
772 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.se) : "r"(tl), "r"(tr));
773 }
774
775 {
776 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sf));
777
778 if (n >= 32)
779 {
780 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
781 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
782 }
783 else
784 {
785 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
786 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
787 }
788
789 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sf) : "r"(tl), "r"(tr));
790 }
791
792 #endif
793
794 return r;
795 }
796 #else
797 static inline u64x rotr64 (const u64x a, const u32 n)
798 {
799 return rotate (a, (u64) 64 - n);
800 }
801 #endif
802
803 static inline u64x rotl64 (const u64x a, const u32 n)
804 {
805 return rotr64 (a, (u64) 64 - n);
806 }
807
808 static inline u32x __byte_perm (const u32x a, const u32x b, const u32x c)
809 {
810 u32x r;
811
812 #if VECT_SIZE == 1
813 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c) );
814 #endif
815
816 #if VECT_SIZE >= 2
817 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s0) : "r"(a.s0), "r"(b.s0), "r"(c.s0));
818 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s1) : "r"(a.s1), "r"(b.s1), "r"(c.s1));
819 #endif
820
821 #if VECT_SIZE >= 4
822 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s2) : "r"(a.s2), "r"(b.s2), "r"(c.s2));
823 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s3) : "r"(a.s3), "r"(b.s3), "r"(c.s3));
824 #endif
825
826 #if VECT_SIZE >= 8
827 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s4) : "r"(a.s4), "r"(b.s4), "r"(c.s4));
828 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s5) : "r"(a.s5), "r"(b.s5), "r"(c.s5));
829 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s6) : "r"(a.s6), "r"(b.s6), "r"(c.s6));
830 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s7) : "r"(a.s7), "r"(b.s7), "r"(c.s7));
831 #endif
832
833 #if VECT_SIZE >= 16
834 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s8) : "r"(a.s8), "r"(b.s8), "r"(c.s8));
835 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s9) : "r"(a.s9), "r"(b.s9), "r"(c.s9));
836 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sa) : "r"(a.sa), "r"(b.sa), "r"(c.sa));
837 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sb) : "r"(a.sb), "r"(b.sb), "r"(c.sb));
838 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sc) : "r"(a.sc), "r"(b.sc), "r"(c.sc));
839 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sd) : "r"(a.sd), "r"(b.sd), "r"(c.sd));
840 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.se) : "r"(a.se), "r"(b.se), "r"(c.se));
841 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sf) : "r"(a.sf), "r"(b.sf), "r"(c.sf));
842 #endif
843
844 return r;
845 }
846
847 static inline u32 __bfe (const u32 a, const u32 b, const u32 c)
848 {
849 u32 r;
850
851 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c));
852
853 return r;
854 }
855
856 #if CUDA_ARCH >= 350
857 static inline u32 amd_bytealign (const u32 a, const u32 b, const u32 c)
858 {
859 u32 r;
860
861 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(b), "r"(a), "r"((c & 3) * 8));
862
863 return r;
864 }
865 #else
866 static inline u32 amd_bytealign (const u32 a, const u32 b, const u32 c)
867 {
868 return __byte_perm_S (b, a, (0x76543210 >> ((c & 3) * 4)) & 0xffff);
869 }
870 #endif
871
872 #if CUDA_ARCH >= 500
873 static inline u32x lut3_2d (const u32x a, const u32x b, const u32x c)
874 {
875 u32x r;
876
877 #if VECT_SIZE == 1
878 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
879 #endif
880
881 #if VECT_SIZE >= 2
882 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
883 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
884 #endif
885
886 #if VECT_SIZE >= 4
887 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
888 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
889 #endif
890
891 #if VECT_SIZE >= 8
892 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
893 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
894 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
895 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
896 #endif
897
898 #if VECT_SIZE >= 16
899 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
900 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
901 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
902 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
903 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
904 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
905 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
906 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
907 #endif
908
909 return r;
910 }
911
912 static inline u32x lut3_39 (const u32x a, const u32x b, const u32x c)
913 {
914 u32x r;
915
916 #if VECT_SIZE == 1
917 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
918 #endif
919
920 #if VECT_SIZE >= 2
921 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
922 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
923 #endif
924
925 #if VECT_SIZE >= 4
926 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
927 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
928 #endif
929
930 #if VECT_SIZE >= 8
931 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
932 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
933 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
934 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
935 #endif
936
937 #if VECT_SIZE >= 16
938 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
939 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
940 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
941 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
942 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
943 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
944 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
945 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
946 #endif
947
948 return r;
949 }
950
951 static inline u32x lut3_59 (const u32x a, const u32x b, const u32x c)
952 {
953 u32x r;
954
955 #if VECT_SIZE == 1
956 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
957 #endif
958
959 #if VECT_SIZE >= 2
960 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
961 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
962 #endif
963
964 #if VECT_SIZE >= 4
965 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
966 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
967 #endif
968
969 #if VECT_SIZE >= 8
970 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
971 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
972 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
973 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
974 #endif
975
976 #if VECT_SIZE >= 16
977 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
978 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
979 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
980 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
981 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
982 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
983 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
984 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
985 #endif
986
987 return r;
988 }
989
990 static inline u32x lut3_96 (const u32x a, const u32x b, const u32x c)
991 {
992 u32x r;
993
994 #if VECT_SIZE == 1
995 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
996 #endif
997
998 #if VECT_SIZE >= 2
999 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
1000 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
1001 #endif
1002
1003 #if VECT_SIZE >= 4
1004 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
1005 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
1006 #endif
1007
1008 #if VECT_SIZE >= 8
1009 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
1010 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
1011 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
1012 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
1013 #endif
1014
1015 #if VECT_SIZE >= 16
1016 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
1017 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
1018 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
1019 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
1020 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
1021 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
1022 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
1023 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
1024 #endif
1025
1026 return r;
1027 }
1028
1029 static inline u32x lut3_e4 (const u32x a, const u32x b, const u32x c)
1030 {
1031 u32x r;
1032
1033 #if VECT_SIZE == 1
1034 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
1035 #endif
1036
1037 #if VECT_SIZE >= 2
1038 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
1039 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
1040 #endif
1041
1042 #if VECT_SIZE >= 4
1043 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
1044 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
1045 #endif
1046
1047 #if VECT_SIZE >= 8
1048 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
1049 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
1050 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
1051 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
1052 #endif
1053
1054 #if VECT_SIZE >= 16
1055 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
1056 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
1057 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
1058 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
1059 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
1060 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
1061 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
1062 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
1063 #endif
1064
1065 return r;
1066 }
1067
1068 static inline u32x lut3_e8 (const u32x a, const u32x b, const u32x c)
1069 {
1070 u32x r;
1071
1072 #if VECT_SIZE == 1
1073 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
1074 #endif
1075
1076 #if VECT_SIZE >= 2
1077 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
1078 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
1079 #endif
1080
1081 #if VECT_SIZE >= 4
1082 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
1083 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
1084 #endif
1085
1086 #if VECT_SIZE >= 8
1087 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
1088 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
1089 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
1090 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
1091 #endif
1092
1093 #if VECT_SIZE >= 16
1094 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
1095 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
1096 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
1097 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
1098 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
1099 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
1100 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
1101 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
1102 #endif
1103
1104 return r;
1105 }
1106
1107 static inline u32x lut3_ca (const u32x a, const u32x b, const u32x c)
1108 {
1109 u32x r;
1110
1111 #if VECT_SIZE == 1
1112 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
1113 #endif
1114
1115 #if VECT_SIZE >= 2
1116 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
1117 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
1118 #endif
1119
1120 #if VECT_SIZE >= 4
1121 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
1122 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
1123 #endif
1124
1125 #if VECT_SIZE >= 8
1126 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
1127 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
1128 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
1129 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
1130 #endif
1131
1132 #if VECT_SIZE >= 16
1133 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
1134 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
1135 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
1136 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
1137 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
1138 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
1139 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
1140 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
1141 #endif
1142
1143 return r;
1144 }
1145
1146 #endif
1147 #endif
1148
1149 #ifdef IS_GENERIC
1150 static inline u32 swap32_S (const u32 v)
1151 {
1152 return (as_uint (as_uchar4 (v).s3210));
1153 }
1154
1155 static inline u64 swap64_S (const u64 v)
1156 {
1157 return (as_ulong (as_uchar8 (v).s76543210));
1158 }
1159
1160 static inline u32 rotr32_S (const u32 a, const u32 n)
1161 {
1162 return rotate (a, 32 - n);
1163 }
1164
1165 static inline u32 rotl32_S (const u32 a, const u32 n)
1166 {
1167 return rotate (a, n);
1168 }
1169
1170 static inline u64 rotr64_S (const u64 a, const u32 n)
1171 {
1172 return rotate (a, (u64) 64 - n);
1173 }
1174
1175 static inline u64 rotl64_S (const u64 a, const u32 n)
1176 {
1177 return rotate (a, (u64) n);
1178 }
1179
1180 static inline u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c)
1181 {
1182 const u64 tmp = ((((u64) a) << 32) | ((u64) b)) >> ((c & 3) * 8);
1183
1184 return (u32) (tmp);
1185 }
1186
1187 static inline u32x swap32 (const u32x v)
1188 {
1189 return ((v >> 24) & 0x000000ff)
1190 | ((v >> 8) & 0x0000ff00)
1191 | ((v << 8) & 0x00ff0000)
1192 | ((v << 24) & 0xff000000);
1193 }
1194
1195 static inline u64x swap64 (const u64x v)
1196 {
1197 return ((v >> 56) & 0x00000000000000ff)
1198 | ((v >> 40) & 0x000000000000ff00)
1199 | ((v >> 24) & 0x0000000000ff0000)
1200 | ((v >> 8) & 0x00000000ff000000)
1201 | ((v << 8) & 0x000000ff00000000)
1202 | ((v << 24) & 0x0000ff0000000000)
1203 | ((v << 40) & 0x00ff000000000000)
1204 | ((v << 56) & 0xff00000000000000);
1205 }
1206
1207 static inline u32x rotr32 (const u32x a, const u32 n)
1208 {
1209 return rotate (a, 32 - n);
1210 }
1211
1212 static inline u32x rotl32 (const u32x a, const u32 n)
1213 {
1214 return rotate (a, n);
1215 }
1216
1217 static inline u64x rotr64 (const u64x a, const u32 n)
1218 {
1219 return rotate (a, (u64) 64 - n);
1220 }
1221
1222 static inline u64x rotl64 (const u64x a, const u32 n)
1223 {
1224 return rotate (a, (u64) n);
1225 }
1226
1227 static inline u32 __bfe (const u32 a, const u32 b, const u32 c)
1228 {
1229 #define BIT(x) (1 << (x))
1230 #define BIT_MASK(x) (BIT (x) - 1)
1231 #define BFE(x,y,z) (((x) >> (y)) & BIT_MASK (z))
1232
1233 return BFE (a, b, c);
1234 }
1235
1236 static inline u32x amd_bytealign (const u32x a, const u32x b, const u32 c)
1237 {
1238 #if VECT_SIZE == 1
1239 const u64x tmp = ((((u64x) (a)) << 32) | ((u64x) (b))) >> ((c & 3) * 8);
1240
1241 return (u32x) (tmp);
1242 #endif
1243
1244 #if VECT_SIZE == 2
1245 const u64x tmp = ((((u64x) (a.s0, a.s1)) << 32) | ((u64x) (b.s0, b.s1))) >> ((c & 3) * 8);
1246
1247 return (u32x) (tmp.s0, tmp.s1);
1248 #endif
1249
1250 #if VECT_SIZE == 4
1251 const u64x tmp = ((((u64x) (a.s0, a.s1, a.s2, a.s3)) << 32) | ((u64x) (b.s0, b.s1, b.s2, b.s3))) >> ((c & 3) * 8);
1252
1253 return (u32x) (tmp.s0, tmp.s1, tmp.s2, tmp.s3);
1254 #endif
1255
1256 #if VECT_SIZE == 8
1257 const u64x tmp = ((((u64x) (a.s0, a.s1, a.s2, a.s3, a.s4, a.s5, a.s6, a.s7)) << 32) | ((u64x) (b.s0, b.s1, b.s2, b.s3, b.s4, b.s5, b.s6, b.s7))) >> ((c & 3) * 8);
1258
1259 return (u32x) (tmp.s0, tmp.s1, tmp.s2, tmp.s3, tmp.s4, tmp.s5, tmp.s6, tmp.s7);
1260 #endif
1261
1262 #if VECT_SIZE == 16
1263 const u64x tmp = ((((u64x) (a.s0, a.s1, a.s2, a.s3, a.s4, a.s5, a.s6, a.s7, a.s8, a.s9, a.sa, a.sb, a.sc, a.sd, a.se, a.sf)) << 32) | ((u64x) (b.s0, b.s1, b.s2, b.s3, b.s4, b.s5, b.s6, b.s7, b.s8, b.s9, b.sa, b.sb, b.sc, b.sd, b.se, b.sf))) >> ((c & 3) * 8);
1264
1265 return (u32x) (tmp.s0, tmp.s1, tmp.s2, tmp.s3, tmp.s4, tmp.s5, tmp.s6, tmp.s7, tmp.s8, tmp.s9, tmp.sa, tmp.sb, tmp.sc, tmp.sd, tmp.se, tmp.sf);
1266 #endif
1267 }
1268 #endif
1269
1270 typedef struct
1271 {
1272 #if defined _DES_
1273 u32 digest_buf[4];
1274 #elif defined _MD4_
1275 u32 digest_buf[4];
1276 #elif defined _MD5_
1277 u32 digest_buf[4];
1278 #elif defined _MD5H_
1279 u32 digest_buf[4];
1280 #elif defined _SHA1_
1281 u32 digest_buf[5];
1282 #elif defined _BCRYPT_
1283 u32 digest_buf[6];
1284 #elif defined _SHA256_
1285 u32 digest_buf[8];
1286 #elif defined _SHA384_
1287 u32 digest_buf[16];
1288 #elif defined _SHA512_
1289 u32 digest_buf[16];
1290 #elif defined _KECCAK_
1291 u32 digest_buf[50];
1292 #elif defined _RIPEMD160_
1293 u32 digest_buf[5];
1294 #elif defined _WHIRLPOOL_
1295 u32 digest_buf[16];
1296 #elif defined _GOST_
1297 u32 digest_buf[8];
1298 #elif defined _GOST2012_256_
1299 u32 digest_buf[8];
1300 #elif defined _GOST2012_512_
1301 u32 digest_buf[16];
1302 #elif defined _SAPB_
1303 u32 digest_buf[4];
1304 #elif defined _SAPG_
1305 u32 digest_buf[5];
1306 #elif defined _MYSQL323_
1307 u32 digest_buf[4];
1308 #elif defined _LOTUS5_
1309 u32 digest_buf[4];
1310 #elif defined _LOTUS6_
1311 u32 digest_buf[4];
1312 #elif defined _SCRYPT_
1313 u32 digest_buf[8];
1314 #elif defined _LOTUS8_
1315 u32 digest_buf[4];
1316 #elif defined _OFFICE2007_
1317 u32 digest_buf[4];
1318 #elif defined _OFFICE2010_
1319 u32 digest_buf[4];
1320 #elif defined _OFFICE2013_
1321 u32 digest_buf[4];
1322 #elif defined _OLDOFFICE01_
1323 u32 digest_buf[4];
1324 #elif defined _OLDOFFICE34_
1325 u32 digest_buf[4];
1326 #elif defined _SIPHASH_
1327 u32 digest_buf[4];
1328 #elif defined _PBKDF2_MD5_
1329 u32 digest_buf[32];
1330 #elif defined _PBKDF2_SHA1_
1331 u32 digest_buf[32];
1332 #elif defined _PBKDF2_SHA256_
1333 u32 digest_buf[32];
1334 #elif defined _PBKDF2_SHA512_
1335 u32 digest_buf[32];
1336 #elif defined _PDF17L8_
1337 u32 digest_buf[8];
1338 #elif defined _CRC32_
1339 u32 digest_buf[4];
1340 #elif defined _SEVEN_ZIP_
1341 u32 digest_buf[4];
1342 #elif defined _ANDROIDFDE_
1343 u32 digest_buf[4];
1344 #elif defined _DCC2_
1345 u32 digest_buf[4];
1346 #elif defined _WPA_
1347 u32 digest_buf[4];
1348 #elif defined _MD5_SHA1_
1349 u32 digest_buf[4];
1350 #elif defined _SHA1_MD5_
1351 u32 digest_buf[5];
1352 #elif defined _NETNTLMV2_
1353 u32 digest_buf[4];
1354 #elif defined _KRB5PA_
1355 u32 digest_buf[4];
1356 #elif defined _CLOUDKEY_
1357 u32 digest_buf[8];
1358 #elif defined _SCRYPT_
1359 u32 digest_buf[4];
1360 #elif defined _PSAFE2_
1361 u32 digest_buf[5];
1362 #elif defined _LOTUS8_
1363 u32 digest_buf[4];
1364 #elif defined _RAR3_
1365 u32 digest_buf[4];
1366 #elif defined _SHA256_SHA1_
1367 u32 digest_buf[8];
1368 #elif defined _MS_DRSR_
1369 u32 digest_buf[8];
1370 #elif defined _ANDROIDFDE_SAMSUNG_
1371 u32 digest_buf[8];
1372 #elif defined _RAR5_
1373 u32 digest_buf[4];
1374 #elif defined _KRB5TGS_
1375 u32 digest_buf[4];
1376 #elif defined _AXCRYPT_
1377 u32 digest_buf[4];
1378 #endif
1379
1380 } digest_t;
1381
1382 typedef struct
1383 {
1384 u32 salt_buf[16];
1385 u32 salt_buf_pc[8];
1386
1387 u32 salt_len;
1388 u32 salt_iter;
1389 u32 salt_sign[2];
1390
1391 u32 keccak_mdlen;
1392 u32 truecrypt_mdlen;
1393
1394 u32 digests_cnt;
1395 u32 digests_done;
1396
1397 u32 digests_offset;
1398
1399 u32 scrypt_N;
1400 u32 scrypt_r;
1401 u32 scrypt_p;
1402 u32 scrypt_tmto;
1403 u32 scrypt_phy;
1404
1405 } salt_t;
1406
1407 typedef struct
1408 {
1409 int V;
1410 int R;
1411 int P;
1412
1413 int enc_md;
1414
1415 u32 id_buf[8];
1416 u32 u_buf[32];
1417 u32 o_buf[32];
1418
1419 int id_len;
1420 int o_len;
1421 int u_len;
1422
1423 u32 rc4key[2];
1424 u32 rc4data[2];
1425
1426 } pdf_t;
1427
1428 typedef struct
1429 {
1430 u32 pke[25];
1431 u32 eapol[64];
1432 int eapol_size;
1433 int keyver;
1434
1435 } wpa_t;
1436
1437 typedef struct
1438 {
1439 u32 cry_master_buf[64];
1440 u32 ckey_buf[64];
1441 u32 public_key_buf[64];
1442
1443 u32 cry_master_len;
1444 u32 ckey_len;
1445 u32 public_key_len;
1446
1447 } bitcoin_wallet_t;
1448
1449 typedef struct
1450 {
1451 u32 salt_buf[30];
1452 u32 salt_len;
1453
1454 u32 esalt_buf[38];
1455 u32 esalt_len;
1456
1457 } sip_t;
1458
1459 typedef struct
1460 {
1461 u32 data[384];
1462
1463 } androidfde_t;
1464
1465 typedef struct
1466 {
1467 u32 nr_buf[16];
1468 u32 nr_len;
1469
1470 u32 msg_buf[128];
1471 u32 msg_len;
1472
1473 } ikepsk_t;
1474
1475 typedef struct
1476 {
1477 u32 user_len;
1478 u32 domain_len;
1479 u32 srvchall_len;
1480 u32 clichall_len;
1481
1482 u32 userdomain_buf[64];
1483 u32 chall_buf[256];
1484
1485 } netntlm_t;
1486
1487 typedef struct
1488 {
1489 u32 user[16];
1490 u32 realm[16];
1491 u32 salt[32];
1492 u32 timestamp[16];
1493 u32 checksum[4];
1494
1495 } krb5pa_t;
1496
1497 typedef struct
1498 {
1499 u32 account_info[512];
1500 u32 checksum[4];
1501 u32 edata2[2560];
1502 u32 edata2_len;
1503
1504 } krb5tgs_t;
1505
1506 typedef struct
1507 {
1508 u32 salt_buf[16];
1509 u32 data_buf[112];
1510 u32 keyfile_buf[16];
1511
1512 } tc_t;
1513
1514 typedef struct
1515 {
1516 u32 salt_buf[16];
1517
1518 } pbkdf2_md5_t;
1519
1520 typedef struct
1521 {
1522 u32 salt_buf[16];
1523
1524 } pbkdf2_sha1_t;
1525
1526 typedef struct
1527 {
1528 u32 salt_buf[16];
1529
1530 } pbkdf2_sha256_t;
1531
1532 typedef struct
1533 {
1534 u32 salt_buf[32];
1535
1536 } pbkdf2_sha512_t;
1537
1538 typedef struct
1539 {
1540 u32 salt_buf[128];
1541 u32 salt_len;
1542
1543 } rakp_t;
1544
1545 typedef struct
1546 {
1547 u32 data_len;
1548 u32 data_buf[512];
1549
1550 } cloudkey_t;
1551
1552 typedef struct
1553 {
1554 u32 encryptedVerifier[4];
1555 u32 encryptedVerifierHash[5];
1556
1557 u32 keySize;
1558
1559 } office2007_t;
1560
1561 typedef struct
1562 {
1563 u32 encryptedVerifier[4];
1564 u32 encryptedVerifierHash[8];
1565
1566 } office2010_t;
1567
1568 typedef struct
1569 {
1570 u32 encryptedVerifier[4];
1571 u32 encryptedVerifierHash[8];
1572
1573 } office2013_t;
1574
1575 typedef struct
1576 {
1577 u32 version;
1578 u32 encryptedVerifier[4];
1579 u32 encryptedVerifierHash[4];
1580 u32 rc4key[2];
1581
1582 } oldoffice01_t;
1583
1584 typedef struct
1585 {
1586 u32 version;
1587 u32 encryptedVerifier[4];
1588 u32 encryptedVerifierHash[5];
1589 u32 rc4key[2];
1590
1591 } oldoffice34_t;
1592
1593 typedef struct
1594 {
1595 u32 digest[4];
1596 u32 out[4];
1597
1598 } pdf14_tmp_t;
1599
1600 typedef struct
1601 {
1602 union
1603 {
1604 u32 dgst32[16];
1605 u64 dgst64[8];
1606 };
1607
1608 u32 dgst_len;
1609 u32 W_len;
1610
1611 } pdf17l8_tmp_t;
1612
1613 typedef struct
1614 {
1615 u32 digest_buf[4];
1616
1617 } phpass_tmp_t;
1618
1619 typedef struct
1620 {
1621 u32 digest_buf[4];
1622
1623 } md5crypt_tmp_t;
1624
1625 typedef struct
1626 {
1627 u32 alt_result[8];
1628
1629 u32 p_bytes[4];
1630 u32 s_bytes[4];
1631
1632 } sha256crypt_tmp_t;
1633
1634 typedef struct
1635 {
1636 u64 l_alt_result[8];
1637
1638 u64 l_p_bytes[2];
1639 u64 l_s_bytes[2];
1640
1641 } sha512crypt_tmp_t;
1642
1643 typedef struct
1644 {
1645 u32 ipad[5];
1646 u32 opad[5];
1647
1648 u32 dgst[10];
1649 u32 out[10];
1650
1651 } wpa_tmp_t;
1652
1653 typedef struct
1654 {
1655 u64 dgst[8];
1656
1657 } bitcoin_wallet_tmp_t;
1658
1659 typedef struct
1660 {
1661 u32 ipad[5];
1662 u32 opad[5];
1663
1664 u32 dgst[5];
1665 u32 out[4];
1666
1667 } dcc2_tmp_t;
1668
1669 typedef struct
1670 {
1671 u32 E[18];
1672
1673 u32 P[18];
1674
1675 u32 S0[256];
1676 u32 S1[256];
1677 u32 S2[256];
1678 u32 S3[256];
1679
1680 } bcrypt_tmp_t;
1681
1682 typedef struct
1683 {
1684 u32 digest[2];
1685
1686 u32 P[18];
1687
1688 u32 S0[256];
1689 u32 S1[256];
1690 u32 S2[256];
1691 u32 S3[256];
1692
1693 } pwsafe2_tmp_t;
1694
1695 typedef struct
1696 {
1697 u32 digest_buf[8];
1698
1699 } pwsafe3_tmp_t;
1700
1701 typedef struct
1702 {
1703 u32 digest_buf[5];
1704
1705 } androidpin_tmp_t;
1706
1707 typedef struct
1708 {
1709 u32 ipad[5];
1710 u32 opad[5];
1711
1712 u32 dgst[10];
1713 u32 out[10];
1714
1715 } androidfde_tmp_t;
1716
1717 typedef struct
1718 {
1719 u32 ipad[16];
1720 u32 opad[16];
1721
1722 u32 dgst[64];
1723 u32 out[64];
1724
1725 } tc_tmp_t;
1726
1727 typedef struct
1728 {
1729 u64 ipad[8];
1730 u64 opad[8];
1731
1732 u64 dgst[32];
1733 u64 out[32];
1734
1735 } tc64_tmp_t;
1736
1737 typedef struct
1738 {
1739 u32 ipad[4];
1740 u32 opad[4];
1741
1742 u32 dgst[32];
1743 u32 out[32];
1744
1745 } pbkdf2_md5_tmp_t;
1746
1747 typedef struct
1748 {
1749 u32 ipad[5];
1750 u32 opad[5];
1751
1752 u32 dgst[32];
1753 u32 out[32];
1754
1755 } pbkdf2_sha1_tmp_t;
1756
1757 typedef struct
1758 {
1759 u32 ipad[8];
1760 u32 opad[8];
1761
1762 u32 dgst[32];
1763 u32 out[32];
1764
1765 } pbkdf2_sha256_tmp_t;
1766
1767 typedef struct
1768 {
1769 u64 ipad[8];
1770 u64 opad[8];
1771
1772 u64 dgst[16];
1773 u64 out[16];
1774
1775 } pbkdf2_sha512_tmp_t;
1776
1777 typedef struct
1778 {
1779 u64 out[8];
1780
1781 } ecryptfs_tmp_t;
1782
1783 typedef struct
1784 {
1785 u64 ipad[8];
1786 u64 opad[8];
1787
1788 u64 dgst[16];
1789 u64 out[16];
1790
1791 } oraclet_tmp_t;
1792
1793 typedef struct
1794 {
1795 u32 ipad[5];
1796 u32 opad[5];
1797
1798 u32 dgst[5];
1799 u32 out[5];
1800
1801 } agilekey_tmp_t;
1802
1803 typedef struct
1804 {
1805 u32 ipad[5];
1806 u32 opad[5];
1807
1808 u32 dgst1[5];
1809 u32 out1[5];
1810
1811 u32 dgst2[5];
1812 u32 out2[5];
1813
1814 } mywallet_tmp_t;
1815
1816 typedef struct
1817 {
1818 u32 ipad[5];
1819 u32 opad[5];
1820
1821 u32 dgst[5];
1822 u32 out[5];
1823
1824 } sha1aix_tmp_t;
1825
1826 typedef struct
1827 {
1828 u32 ipad[8];
1829 u32 opad[8];
1830
1831 u32 dgst[8];
1832 u32 out[8];
1833
1834 } sha256aix_tmp_t;
1835
1836 typedef struct
1837 {
1838 u64 ipad[8];
1839 u64 opad[8];
1840
1841 u64 dgst[8];
1842 u64 out[8];
1843
1844 } sha512aix_tmp_t;
1845
1846 typedef struct
1847 {
1848 u32 ipad[8];
1849 u32 opad[8];
1850
1851 u32 dgst[8];
1852 u32 out[8];
1853
1854 } lastpass_tmp_t;
1855
1856 typedef struct
1857 {
1858 u64 digest_buf[8];
1859
1860 } drupal7_tmp_t;
1861
1862 typedef struct
1863 {
1864 u32 ipad[5];
1865 u32 opad[5];
1866
1867 u32 dgst[5];
1868 u32 out[5];
1869
1870 } lotus8_tmp_t;
1871
1872 typedef struct
1873 {
1874 u32 out[5];
1875
1876 } office2007_tmp_t;
1877
1878 typedef struct
1879 {
1880 u32 out[5];
1881
1882 } office2010_tmp_t;
1883
1884 typedef struct
1885 {
1886 u64 out[8];
1887
1888 } office2013_tmp_t;
1889
1890 typedef struct
1891 {
1892 u32 digest_buf[5];
1893
1894 } saph_sha1_tmp_t;
1895
1896 typedef struct
1897 {
1898 u32 block[16];
1899
1900 u32 dgst[8];
1901
1902 u32 block_len;
1903 u32 final_len;
1904
1905 } seven_zip_tmp_t;
1906
1907 typedef struct
1908 {
1909 u32 KEK[5];
1910
1911 u32 lsb[4];
1912 u32 cipher[4];
1913
1914 } axcrypt_tmp_t;
1915
1916 typedef struct
1917 {
1918 u32 Kc[16];
1919 u32 Kd[16];
1920
1921 u32 iv[2];
1922
1923 } bsdicrypt_tmp_t;
1924
1925 typedef struct
1926 {
1927 u32 dgst[17][5];
1928
1929 } rar3_tmp_t;
1930
1931 typedef struct
1932 {
1933 u32 user[16];
1934
1935 } cram_md5_t;
1936
1937 typedef struct
1938 {
1939 u32 iv_buf[4];
1940 u32 iv_len;
1941
1942 u32 salt_buf[4];
1943 u32 salt_len;
1944
1945 u32 crc;
1946
1947 u32 data_buf[96];
1948 u32 data_len;
1949
1950 u32 unpack_size;
1951
1952 } seven_zip_t;
1953
1954 typedef struct
1955 {
1956 u32 key;
1957 u64 val;
1958
1959 } hcstat_table_t;
1960
1961 typedef struct
1962 {
1963 u32 cs_buf[0x100];
1964 u32 cs_len;
1965
1966 } cs_t;
1967
1968 typedef struct
1969 {
1970 u32 cmds[0x100];
1971
1972 } kernel_rule_t;
1973
1974 typedef struct
1975 {
1976 u32 gidvid;
1977 u32 il_pos;
1978
1979 } plain_t;
1980
1981 typedef struct
1982 {
1983 u32 i[16];
1984
1985 u32 pw_len;
1986
1987 u32 alignment_placeholder_1;
1988 u32 alignment_placeholder_2;
1989 u32 alignment_placeholder_3;
1990
1991 } pw_t;
1992
1993 typedef struct
1994 {
1995 u32 i;
1996
1997 } bf_t;
1998
1999 typedef struct
2000 {
2001 u32 i[8];
2002
2003 u32 pw_len;
2004
2005 } comb_t;
2006
2007 typedef struct
2008 {
2009 u32 b[32];
2010
2011 } bs_word_t;
2012
2013 typedef struct
2014 {
2015 uint4 P[64];
2016
2017 } scrypt_tmp_t;