3a57b12661944b4fa720343bc0a1f68a98d59015
[hashcat.git] / OpenCL / types_ocl.c
1 /**
2 * Authors.....: Jens Steube <jens.steube@gmail.com>
3 * magnum <john.magnum@hushmail.com>
4 *
5 * License.....: MIT
6 */
7
8 #define DEVICE_TYPE_CPU 2
9 #define DEVICE_TYPE_GPU 4
10
11 typedef uchar u8;
12 typedef ushort u16;
13 typedef uint u32;
14 typedef ulong u64;
15
16 #ifndef NEW_SIMD_CODE
17 #undef VECT_SIZE
18 #define VECT_SIZE 1
19 #endif
20
21 #define CONCAT(a, b) a##b
22 #define VTYPE(type, width) CONCAT(type, width)
23
24 #if VECT_SIZE == 1
25 typedef uchar u8x;
26 typedef ushort u16x;
27 typedef uint u32x;
28 typedef ulong u64x;
29 #else
30 typedef VTYPE(uchar, VECT_SIZE) u8x;
31 typedef VTYPE(ushort, VECT_SIZE) u16x;
32 typedef VTYPE(uint, VECT_SIZE) u32x;
33 typedef VTYPE(ulong, VECT_SIZE) u64x;
34 #endif
35
36 // this one needs to die
37 #define allx(r) r
38
39 static inline u32 l32_from_64_S (u64 a)
40 {
41 const u32 r = (u32) (a);
42
43 return r;
44 }
45
46 static inline u32 h32_from_64_S (u64 a)
47 {
48 a >>= 32;
49
50 const u32 r = (u32) (a);
51
52 return r;
53 }
54
55 static inline u64 hl32_to_64_S (const u32 a, const u32 b)
56 {
57 return as_ulong ((uint2) (b, a));
58 }
59
60 static inline u32x l32_from_64 (u64x a)
61 {
62 u32x r;
63
64 #if VECT_SIZE == 1
65 r = (u32) a;
66 #endif
67
68 #if VECT_SIZE >= 2
69 r.s0 = (u32) a.s0;
70 r.s1 = (u32) a.s1;
71 #endif
72
73 #if VECT_SIZE >= 4
74 r.s2 = (u32) a.s2;
75 r.s3 = (u32) a.s3;
76 #endif
77
78 #if VECT_SIZE >= 8
79 r.s4 = (u32) a.s4;
80 r.s5 = (u32) a.s5;
81 r.s6 = (u32) a.s6;
82 r.s7 = (u32) a.s7;
83 #endif
84
85 #if VECT_SIZE >= 16
86 r.s8 = (u32) a.s8;
87 r.s9 = (u32) a.s9;
88 r.sa = (u32) a.sa;
89 r.sb = (u32) a.sb;
90 r.sc = (u32) a.sc;
91 r.sd = (u32) a.sd;
92 r.se = (u32) a.se;
93 r.sf = (u32) a.sf;
94 #endif
95
96 return r;
97 }
98
99 static inline u32x h32_from_64 (u64x a)
100 {
101 a >>= 32;
102
103 u32x r;
104
105 #if VECT_SIZE == 1
106 r = (u32) a;
107 #endif
108
109 #if VECT_SIZE >= 2
110 r.s0 = (u32) a.s0;
111 r.s1 = (u32) a.s1;
112 #endif
113
114 #if VECT_SIZE >= 4
115 r.s2 = (u32) a.s2;
116 r.s3 = (u32) a.s3;
117 #endif
118
119 #if VECT_SIZE >= 8
120 r.s4 = (u32) a.s4;
121 r.s5 = (u32) a.s5;
122 r.s6 = (u32) a.s6;
123 r.s7 = (u32) a.s7;
124 #endif
125
126 #if VECT_SIZE >= 16
127 r.s8 = (u32) a.s8;
128 r.s9 = (u32) a.s9;
129 r.sa = (u32) a.sa;
130 r.sb = (u32) a.sb;
131 r.sc = (u32) a.sc;
132 r.sd = (u32) a.sd;
133 r.se = (u32) a.se;
134 r.sf = (u32) a.sf;
135 #endif
136
137 return r;
138 }
139
140 static inline u64x hl32_to_64 (const u32x a, const u32x b)
141 {
142 u64x r;
143
144 #if VECT_SIZE == 1
145 r = as_ulong ((uint2) (b, a));
146 #endif
147
148 #if VECT_SIZE >= 2
149 r.s0 = as_ulong ((uint2) (b.s0, a.s0));
150 r.s1 = as_ulong ((uint2) (b.s1, a.s1));
151 #endif
152
153 #if VECT_SIZE >= 4
154 r.s2 = as_ulong ((uint2) (b.s2, a.s2));
155 r.s3 = as_ulong ((uint2) (b.s3, a.s3));
156 #endif
157
158 #if VECT_SIZE >= 8
159 r.s4 = as_ulong ((uint2) (b.s4, a.s4));
160 r.s5 = as_ulong ((uint2) (b.s5, a.s5));
161 r.s6 = as_ulong ((uint2) (b.s6, a.s6));
162 r.s7 = as_ulong ((uint2) (b.s7, a.s7));
163 #endif
164
165 #if VECT_SIZE >= 16
166 r.s8 = as_ulong ((uint2) (b.s8, a.s8));
167 r.s9 = as_ulong ((uint2) (b.s9, a.s9));
168 r.sa = as_ulong ((uint2) (b.sa, a.sa));
169 r.sb = as_ulong ((uint2) (b.sb, a.sb));
170 r.sc = as_ulong ((uint2) (b.sc, a.sc));
171 r.sd = as_ulong ((uint2) (b.sd, a.sd));
172 r.se = as_ulong ((uint2) (b.se, a.se));
173 r.sf = as_ulong ((uint2) (b.sf, a.sf));
174 #endif
175
176 return r;
177 }
178
179 #ifdef IS_AMD
180 static inline u32 swap32_S (const u32 v)
181 {
182 return (as_uint (as_uchar4 (v).s3210));
183 }
184
185 static inline u64 swap64_S (const u64 v)
186 {
187 return (as_ulong (as_uchar8 (v).s76543210));
188 }
189
190 static inline u32 rotr32_S (const u32 a, const u32 n)
191 {
192 return rotate (a, 32 - n);
193 }
194
195 static inline u32 rotl32_S (const u32 a, const u32 n)
196 {
197 return rotate (a, n);
198 }
199
200 static inline u64 rotr64_S (const u64 a, const u32 n)
201 {
202 #if DEVICE_TYPE == DEVICE_TYPE_CPU
203
204 const u64 r = rotate (a, (u64) 64 - n);
205
206 #else
207
208 const u32 a0 = h32_from_64_S (a);
209 const u32 a1 = l32_from_64_S (a);
210
211 const u32 t0 = (n >= 32) ? amd_bitalign (a0, a1, n - 32) : amd_bitalign (a1, a0, n);
212 const u32 t1 = (n >= 32) ? amd_bitalign (a1, a0, n - 32) : amd_bitalign (a0, a1, n);
213
214 const u64 r = hl32_to_64_S (t0, t1);
215
216 #endif
217
218 return r;
219 }
220
221 static inline u64 rotl64_S (const u64 a, const u32 n)
222 {
223 return rotr64_S (a, 64 - n);
224 }
225
226 static inline u32x swap32 (const u32x v)
227 {
228 return ((v >> 24) & 0x000000ff)
229 | ((v >> 8) & 0x0000ff00)
230 | ((v << 8) & 0x00ff0000)
231 | ((v << 24) & 0xff000000);
232 }
233
234 static inline u64x swap64 (const u64x v)
235 {
236 return ((v >> 56) & 0x00000000000000ff)
237 | ((v >> 40) & 0x000000000000ff00)
238 | ((v >> 24) & 0x0000000000ff0000)
239 | ((v >> 8) & 0x00000000ff000000)
240 | ((v << 8) & 0x000000ff00000000)
241 | ((v << 24) & 0x0000ff0000000000)
242 | ((v << 40) & 0x00ff000000000000)
243 | ((v << 56) & 0xff00000000000000);
244 }
245
246 static inline u32x rotr32 (const u32x a, const u32 n)
247 {
248 return rotate (a, 32 - n);
249 }
250
251 static inline u32x rotl32 (const u32x a, const u32 n)
252 {
253 return rotate (a, n);
254 }
255
256 static inline u64x rotr64 (const u64x a, const u32 n)
257 {
258 #if DEVICE_TYPE == DEVICE_TYPE_CPU
259
260 const u64x r = rotate (a, (u64) 64 - n);
261
262 #else
263
264 const u32x a0 = h32_from_64 (a);
265 const u32x a1 = l32_from_64 (a);
266
267 const u32x t0 = (n >= 32) ? amd_bitalign (a0, a1, n - 32) : amd_bitalign (a1, a0, n);
268 const u32x t1 = (n >= 32) ? amd_bitalign (a1, a0, n - 32) : amd_bitalign (a0, a1, n);
269
270 const u64x r = hl32_to_64 (t0, t1);
271
272 #endif
273
274 return r;
275 }
276
277 static inline u64x rotl64 (const u64x a, const u32 n)
278 {
279 return rotr64 (a, 64 - n);
280 }
281
282 static inline u32 __bfe (const u32 a, const u32 b, const u32 c)
283 {
284 return amd_bfe (a, b, c);
285 }
286
287 static inline u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c)
288 {
289 return amd_bytealign (a, b, c);
290 }
291 #endif
292
293 #ifdef IS_NV
294 static inline u32 swap32_S (const u32 v)
295 {
296 u32 r;
297
298 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(r) : "r"(v));
299
300 return r;
301 }
302
303 static inline u64 swap64_S (const u64 v)
304 {
305 u32 il;
306 u32 ir;
307
308 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(v));
309
310 u32 tl;
311 u32 tr;
312
313 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tl) : "r"(il));
314 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tr) : "r"(ir));
315
316 u64 r;
317
318 asm ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(tr), "r"(tl));
319
320 return r;
321 }
322
323 static inline u32 rotr32_S (const u32 a, const u32 n)
324 {
325 return rotate (a, 32 - n);
326 }
327
328 static inline u32 rotl32_S (const u32 a, const u32 n)
329 {
330 return rotate (a, n);
331 }
332
333 #if CUDA_ARCH >= 350
334 static inline u64 rotr64_S (const u64 a, const u32 n)
335 {
336 u32 il;
337 u32 ir;
338
339 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a));
340
341 u32 tl;
342 u32 tr;
343
344 if (n >= 32)
345 {
346 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
347 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
348 }
349 else
350 {
351 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
352 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
353 }
354
355 u64 r;
356
357 asm ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(tl), "r"(tr));
358
359 return r;
360 }
361 #else
362 static inline u64 rotr64_S (const u64 a, const u32 n)
363 {
364 return rotate (a, (u64) 64 - n);
365 }
366 #endif
367
368 static inline u64 rotl64_S (const u64 a, const u32 n)
369 {
370 return rotr64_S (a, 64 - n);
371 }
372
373 #if CUDA_ARCH >= 500
374 static inline u32 lut3_2d_S (const u32 a, const u32 b, const u32 c)
375 {
376 u32 r;
377
378 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
379
380 return r;
381 }
382
383 static inline u32 lut3_39_S (const u32 a, const u32 b, const u32 c)
384 {
385 u32 r;
386
387 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
388
389 return r;
390 }
391
392 static inline u32 lut3_59_S (const u32 a, const u32 b, const u32 c)
393 {
394 u32 r;
395
396 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
397
398 return r;
399 }
400
401 static inline u32 lut3_96_S (const u32 a, const u32 b, const u32 c)
402 {
403 u32 r;
404
405 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
406
407 return r;
408 }
409
410 static inline u32 lut3_e4_S (const u32 a, const u32 b, const u32 c)
411 {
412 u32 r;
413
414 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
415
416 return r;
417 }
418
419 static inline u32 lut3_e8_S (const u32 a, const u32 b, const u32 c)
420 {
421 u32 r;
422
423 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
424
425 return r;
426 }
427
428 static inline u32 lut3_ca_S (const u32 a, const u32 b, const u32 c)
429 {
430 u32 r;
431
432 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
433
434 return r;
435 }
436 #endif
437
438 static inline u32 __byte_perm_S (const u32 a, const u32 b, const u32 c)
439 {
440 u32 r;
441
442 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c));
443
444 return r;
445 }
446
447 static inline u32x swap32 (const u32x v)
448 {
449 return ((v >> 24) & 0x000000ff)
450 | ((v >> 8) & 0x0000ff00)
451 | ((v << 8) & 0x00ff0000)
452 | ((v << 24) & 0xff000000);
453 }
454
455 static inline u64x swap64 (const u64x v)
456 {
457 return ((v >> 56) & 0x00000000000000ff)
458 | ((v >> 40) & 0x000000000000ff00)
459 | ((v >> 24) & 0x0000000000ff0000)
460 | ((v >> 8) & 0x00000000ff000000)
461 | ((v << 8) & 0x000000ff00000000)
462 | ((v << 24) & 0x0000ff0000000000)
463 | ((v << 40) & 0x00ff000000000000)
464 | ((v << 56) & 0xff00000000000000);
465 }
466
467 static inline u32x rotr32 (const u32x a, const u32 n)
468 {
469 return rotate (a, 32 - n);
470 }
471
472 static inline u32x rotl32 (const u32x a, const u32 n)
473 {
474 return rotate (a, n);
475 }
476
477 #if CUDA_ARCH >= 350
478 static inline u64x rotr64 (const u64x a, const u32 n)
479 {
480 u64x r;
481
482 u32 il;
483 u32 ir;
484 u32 tl;
485 u32 tr;
486
487 #if VECT_SIZE == 1
488
489 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a));
490
491 if (n >= 32)
492 {
493 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
494 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
495 }
496 else
497 {
498 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
499 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
500 }
501
502 asm ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(tl), "r"(tr));
503
504 #endif
505
506 #if VECT_SIZE >= 2
507
508 {
509 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s0));
510
511 if (n >= 32)
512 {
513 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
514 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
515 }
516 else
517 {
518 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
519 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
520 }
521
522 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s0) : "r"(tl), "r"(tr));
523 }
524
525 {
526 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s1));
527
528 if (n >= 32)
529 {
530 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
531 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
532 }
533 else
534 {
535 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
536 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
537 }
538
539 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s1) : "r"(tl), "r"(tr));
540 }
541
542 #endif
543
544 #if VECT_SIZE >= 4
545
546 {
547 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s2));
548
549 if (n >= 32)
550 {
551 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
552 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
553 }
554 else
555 {
556 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
557 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
558 }
559
560 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s2) : "r"(tl), "r"(tr));
561 }
562
563 {
564 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s3));
565
566 if (n >= 32)
567 {
568 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
569 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
570 }
571 else
572 {
573 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
574 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
575 }
576
577 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s3) : "r"(tl), "r"(tr));
578 }
579
580 #endif
581
582 #if VECT_SIZE >= 8
583
584 {
585 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s4));
586
587 if (n >= 32)
588 {
589 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
590 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
591 }
592 else
593 {
594 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
595 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
596 }
597
598 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s4) : "r"(tl), "r"(tr));
599 }
600
601 {
602 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s5));
603
604 if (n >= 32)
605 {
606 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
607 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
608 }
609 else
610 {
611 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
612 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
613 }
614
615 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s5) : "r"(tl), "r"(tr));
616 }
617
618 {
619 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s6));
620
621 if (n >= 32)
622 {
623 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
624 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
625 }
626 else
627 {
628 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
629 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
630 }
631
632 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s6) : "r"(tl), "r"(tr));
633 }
634
635 {
636 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s7));
637
638 if (n >= 32)
639 {
640 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
641 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
642 }
643 else
644 {
645 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
646 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
647 }
648
649 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s7) : "r"(tl), "r"(tr));
650 }
651
652 #endif
653
654 #if VECT_SIZE >= 16
655
656 {
657 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s8));
658
659 if (n >= 32)
660 {
661 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
662 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
663 }
664 else
665 {
666 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
667 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
668 }
669
670 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s8) : "r"(tl), "r"(tr));
671 }
672
673 {
674 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s9));
675
676 if (n >= 32)
677 {
678 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
679 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
680 }
681 else
682 {
683 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
684 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
685 }
686
687 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s9) : "r"(tl), "r"(tr));
688 }
689
690 {
691 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sa));
692
693 if (n >= 32)
694 {
695 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
696 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
697 }
698 else
699 {
700 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
701 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
702 }
703
704 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sa) : "r"(tl), "r"(tr));
705 }
706
707 {
708 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sb));
709
710 if (n >= 32)
711 {
712 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
713 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
714 }
715 else
716 {
717 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
718 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
719 }
720
721 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sb) : "r"(tl), "r"(tr));
722 }
723
724 {
725 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sc));
726
727 if (n >= 32)
728 {
729 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
730 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
731 }
732 else
733 {
734 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
735 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
736 }
737
738 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sc) : "r"(tl), "r"(tr));
739 }
740
741 {
742 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sd));
743
744 if (n >= 32)
745 {
746 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
747 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
748 }
749 else
750 {
751 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
752 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
753 }
754
755 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sd) : "r"(tl), "r"(tr));
756 }
757
758 {
759 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.se));
760
761 if (n >= 32)
762 {
763 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
764 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
765 }
766 else
767 {
768 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
769 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
770 }
771
772 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.se) : "r"(tl), "r"(tr));
773 }
774
775 {
776 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sf));
777
778 if (n >= 32)
779 {
780 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
781 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
782 }
783 else
784 {
785 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
786 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
787 }
788
789 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sf) : "r"(tl), "r"(tr));
790 }
791
792 #endif
793
794 return r;
795 }
796 #else
797 static inline u64x rotr64 (const u64x a, const u32 n)
798 {
799 return rotate (a, (u64) 64 - n);
800 }
801 #endif
802
803 static inline u64x rotl64 (const u64x a, const u32 n)
804 {
805 return rotr64 (a, (u64) 64 - n);
806 }
807
808 static inline u32x __byte_perm (const u32x a, const u32x b, const u32x c)
809 {
810 u32x r;
811
812 #if VECT_SIZE == 1
813 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c) );
814 #endif
815
816 #if VECT_SIZE >= 2
817 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s0) : "r"(a.s0), "r"(b.s0), "r"(c.s0));
818 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s1) : "r"(a.s1), "r"(b.s1), "r"(c.s1));
819 #endif
820
821 #if VECT_SIZE >= 4
822 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s2) : "r"(a.s2), "r"(b.s2), "r"(c.s2));
823 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s3) : "r"(a.s3), "r"(b.s3), "r"(c.s3));
824 #endif
825
826 #if VECT_SIZE >= 8
827 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s4) : "r"(a.s4), "r"(b.s4), "r"(c.s4));
828 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s5) : "r"(a.s5), "r"(b.s5), "r"(c.s5));
829 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s6) : "r"(a.s6), "r"(b.s6), "r"(c.s6));
830 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s7) : "r"(a.s7), "r"(b.s7), "r"(c.s7));
831 #endif
832
833 #if VECT_SIZE >= 16
834 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s8) : "r"(a.s8), "r"(b.s8), "r"(c.s8));
835 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s9) : "r"(a.s9), "r"(b.s9), "r"(c.s9));
836 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sa) : "r"(a.sa), "r"(b.sa), "r"(c.sa));
837 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sb) : "r"(a.sb), "r"(b.sb), "r"(c.sb));
838 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sc) : "r"(a.sc), "r"(b.sc), "r"(c.sc));
839 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sd) : "r"(a.sd), "r"(b.sd), "r"(c.sd));
840 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.se) : "r"(a.se), "r"(b.se), "r"(c.se));
841 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sf) : "r"(a.sf), "r"(b.sf), "r"(c.sf));
842 #endif
843
844 return r;
845 }
846
847 static inline u32 __bfe (const u32 a, const u32 b, const u32 c)
848 {
849 u32 r;
850
851 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c));
852
853 return r;
854 }
855
856 #if CUDA_ARCH >= 350
857 static inline u32 amd_bytealign (const u32 a, const u32 b, const u32 c)
858 {
859 u32 r;
860
861 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(b), "r"(a), "r"((c & 3) * 8));
862
863 return r;
864 }
865 #else
866 static inline u32 amd_bytealign (const u32 a, const u32 b, const u32 c)
867 {
868 return __byte_perm_S (b, a, (0x76543210 >> ((c & 3) * 4)) & 0xffff);
869 }
870 #endif
871
872 #if CUDA_ARCH >= 500
873 static inline u32x lut3_2d (const u32x a, const u32x b, const u32x c)
874 {
875 u32x r;
876
877 #if VECT_SIZE == 1
878 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
879 #endif
880
881 #if VECT_SIZE >= 2
882 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
883 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
884 #endif
885
886 #if VECT_SIZE >= 4
887 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
888 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
889 #endif
890
891 #if VECT_SIZE >= 8
892 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
893 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
894 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
895 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
896 #endif
897
898 #if VECT_SIZE >= 16
899 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
900 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
901 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
902 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
903 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
904 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
905 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
906 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
907 #endif
908
909 return r;
910 }
911
912 static inline u32x lut3_39 (const u32x a, const u32x b, const u32x c)
913 {
914 u32x r;
915
916 #if VECT_SIZE == 1
917 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
918 #endif
919
920 #if VECT_SIZE >= 2
921 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
922 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
923 #endif
924
925 #if VECT_SIZE >= 4
926 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
927 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
928 #endif
929
930 #if VECT_SIZE >= 8
931 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
932 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
933 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
934 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
935 #endif
936
937 #if VECT_SIZE >= 16
938 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
939 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
940 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
941 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
942 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
943 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
944 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
945 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
946 #endif
947
948 return r;
949 }
950
951 static inline u32x lut3_59 (const u32x a, const u32x b, const u32x c)
952 {
953 u32x r;
954
955 #if VECT_SIZE == 1
956 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
957 #endif
958
959 #if VECT_SIZE >= 2
960 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
961 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
962 #endif
963
964 #if VECT_SIZE >= 4
965 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
966 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
967 #endif
968
969 #if VECT_SIZE >= 8
970 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
971 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
972 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
973 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
974 #endif
975
976 #if VECT_SIZE >= 16
977 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
978 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
979 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
980 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
981 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
982 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
983 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
984 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
985 #endif
986
987 return r;
988 }
989
990 static inline u32x lut3_96 (const u32x a, const u32x b, const u32x c)
991 {
992 u32x r;
993
994 #if VECT_SIZE == 1
995 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
996 #endif
997
998 #if VECT_SIZE >= 2
999 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
1000 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
1001 #endif
1002
1003 #if VECT_SIZE >= 4
1004 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
1005 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
1006 #endif
1007
1008 #if VECT_SIZE >= 8
1009 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
1010 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
1011 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
1012 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
1013 #endif
1014
1015 #if VECT_SIZE >= 16
1016 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
1017 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
1018 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
1019 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
1020 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
1021 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
1022 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
1023 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
1024 #endif
1025
1026 return r;
1027 }
1028
1029 static inline u32x lut3_e4 (const u32x a, const u32x b, const u32x c)
1030 {
1031 u32x r;
1032
1033 #if VECT_SIZE == 1
1034 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
1035 #endif
1036
1037 #if VECT_SIZE >= 2
1038 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
1039 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
1040 #endif
1041
1042 #if VECT_SIZE >= 4
1043 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
1044 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
1045 #endif
1046
1047 #if VECT_SIZE >= 8
1048 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
1049 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
1050 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
1051 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
1052 #endif
1053
1054 #if VECT_SIZE >= 16
1055 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
1056 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
1057 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
1058 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
1059 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
1060 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
1061 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
1062 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
1063 #endif
1064
1065 return r;
1066 }
1067
1068 static inline u32x lut3_e8 (const u32x a, const u32x b, const u32x c)
1069 {
1070 u32x r;
1071
1072 #if VECT_SIZE == 1
1073 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
1074 #endif
1075
1076 #if VECT_SIZE >= 2
1077 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
1078 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
1079 #endif
1080
1081 #if VECT_SIZE >= 4
1082 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
1083 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
1084 #endif
1085
1086 #if VECT_SIZE >= 8
1087 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
1088 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
1089 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
1090 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
1091 #endif
1092
1093 #if VECT_SIZE >= 16
1094 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
1095 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
1096 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
1097 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
1098 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
1099 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
1100 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
1101 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
1102 #endif
1103
1104 return r;
1105 }
1106
1107 static inline u32x lut3_ca (const u32x a, const u32x b, const u32x c)
1108 {
1109 u32x r;
1110
1111 #if VECT_SIZE == 1
1112 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
1113 #endif
1114
1115 #if VECT_SIZE >= 2
1116 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
1117 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
1118 #endif
1119
1120 #if VECT_SIZE >= 4
1121 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
1122 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
1123 #endif
1124
1125 #if VECT_SIZE >= 8
1126 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
1127 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
1128 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
1129 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
1130 #endif
1131
1132 #if VECT_SIZE >= 16
1133 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
1134 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
1135 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
1136 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
1137 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
1138 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
1139 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
1140 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
1141 #endif
1142
1143 return r;
1144 }
1145
1146 #endif
1147 #endif
1148
1149 #ifdef IS_GENERIC
1150 static inline u32 swap32_S (const u32 v)
1151 {
1152 return (as_uint (as_uchar4 (v).s3210));
1153 }
1154
1155 static inline u64 swap64_S (const u64 v)
1156 {
1157 return (as_ulong (as_uchar8 (v).s76543210));
1158 }
1159
1160 static inline u32 rotr32_S (const u32 a, const u32 n)
1161 {
1162 return rotate (a, 32 - n);
1163 }
1164
1165 static inline u32 rotl32_S (const u32 a, const u32 n)
1166 {
1167 return rotate (a, n);
1168 }
1169
1170 static inline u64 rotr64_S (const u64 a, const u32 n)
1171 {
1172 return rotate (a, (u64) 64 - n);
1173 }
1174
1175 static inline u64 rotl64_S (const u64 a, const u32 n)
1176 {
1177 return rotate (a, (u64) n);
1178 }
1179
1180 static inline u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c)
1181 {
1182 const u64 tmp = ((((u64) a) << 32) | ((u64) b)) >> ((c & 3) * 8);
1183
1184 return (u32) (tmp);
1185 }
1186
1187 static inline u32x swap32 (const u32x v)
1188 {
1189 return ((v >> 24) & 0x000000ff)
1190 | ((v >> 8) & 0x0000ff00)
1191 | ((v << 8) & 0x00ff0000)
1192 | ((v << 24) & 0xff000000);
1193 }
1194
1195 static inline u64x swap64 (const u64x v)
1196 {
1197 return ((v >> 56) & 0x00000000000000ff)
1198 | ((v >> 40) & 0x000000000000ff00)
1199 | ((v >> 24) & 0x0000000000ff0000)
1200 | ((v >> 8) & 0x00000000ff000000)
1201 | ((v << 8) & 0x000000ff00000000)
1202 | ((v << 24) & 0x0000ff0000000000)
1203 | ((v << 40) & 0x00ff000000000000)
1204 | ((v << 56) & 0xff00000000000000);
1205 }
1206
1207 static inline u32x rotr32 (const u32x a, const u32 n)
1208 {
1209 return rotate (a, 32 - n);
1210 }
1211
1212 static inline u32x rotl32 (const u32x a, const u32 n)
1213 {
1214 return rotate (a, n);
1215 }
1216
1217 static inline u64x rotr64 (const u64x a, const u32 n)
1218 {
1219 return rotate (a, (u64) 64 - n);
1220 }
1221
1222 static inline u64x rotl64 (const u64x a, const u32 n)
1223 {
1224 return rotate (a, (u64) n);
1225 }
1226
1227 static inline u32 __bfe (const u32 a, const u32 b, const u32 c)
1228 {
1229 #define BIT(x) (1 << (x))
1230 #define BIT_MASK(x) (BIT (x) - 1)
1231 #define BFE(x,y,z) (((x) >> (y)) & BIT_MASK (z))
1232
1233 return BFE (a, b, c);
1234 }
1235
1236 static inline u32x amd_bytealign (const u32x a, const u32x b, const u32 c)
1237 {
1238 #if VECT_SIZE == 1
1239 const u64x tmp = ((((u64x) (a)) << 32) | ((u64x) (b))) >> ((c & 3) * 8);
1240
1241 return (u32x) (tmp);
1242 #endif
1243
1244 #if VECT_SIZE == 2
1245 const u64x tmp = ((((u64x) (a.s0, a.s1)) << 32) | ((u64x) (b.s0, b.s1))) >> ((c & 3) * 8);
1246
1247 return (u32x) (tmp.s0, tmp.s1);
1248 #endif
1249
1250 #if VECT_SIZE == 4
1251 const u64x tmp = ((((u64x) (a.s0, a.s1, a.s2, a.s3)) << 32) | ((u64x) (b.s0, b.s1, b.s2, b.s3))) >> ((c & 3) * 8);
1252
1253 return (u32x) (tmp.s0, tmp.s1, tmp.s2, tmp.s3);
1254 #endif
1255
1256 #if VECT_SIZE == 8
1257 const u64x tmp = ((((u64x) (a.s0, a.s1, a.s2, a.s3, a.s4, a.s5, a.s6, a.s7)) << 32) | ((u64x) (b.s0, b.s1, b.s2, b.s3, b.s4, b.s5, b.s6, b.s7))) >> ((c & 3) * 8);
1258
1259 return (u32x) (tmp.s0, tmp.s1, tmp.s2, tmp.s3, tmp.s4, tmp.s5, tmp.s6, tmp.s7);
1260 #endif
1261
1262 #if VECT_SIZE == 16
1263 const u64x tmp = ((((u64x) (a.s0, a.s1, a.s2, a.s3, a.s4, a.s5, a.s6, a.s7, a.s8, a.s9, a.sa, a.sb, a.sc, a.sd, a.se, a.sf)) << 32) | ((u64x) (b.s0, b.s1, b.s2, b.s3, b.s4, b.s5, b.s6, b.s7, b.s8, b.s9, b.sa, b.sb, b.sc, b.sd, b.se, b.sf))) >> ((c & 3) * 8);
1264
1265 return (u32x) (tmp.s0, tmp.s1, tmp.s2, tmp.s3, tmp.s4, tmp.s5, tmp.s6, tmp.s7, tmp.s8, tmp.s9, tmp.sa, tmp.sb, tmp.sc, tmp.sd, tmp.se, tmp.sf);
1266 #endif
1267 }
1268 #endif
1269
1270 typedef struct
1271 {
1272 #if defined _DES_
1273 u32 digest_buf[4];
1274 #elif defined _MD4_
1275 u32 digest_buf[4];
1276 #elif defined _MD5_
1277 u32 digest_buf[4];
1278 #elif defined _MD5H_
1279 u32 digest_buf[4];
1280 #elif defined _SHA1_
1281 u32 digest_buf[5];
1282 #elif defined _BCRYPT_
1283 u32 digest_buf[6];
1284 #elif defined _SHA256_
1285 u32 digest_buf[8];
1286 #elif defined _SHA384_
1287 u32 digest_buf[16];
1288 #elif defined _SHA512_
1289 u32 digest_buf[16];
1290 #elif defined _KECCAK_
1291 u32 digest_buf[50];
1292 #elif defined _RIPEMD160_
1293 u32 digest_buf[5];
1294 #elif defined _WHIRLPOOL_
1295 u32 digest_buf[16];
1296 #elif defined _GOST_
1297 u32 digest_buf[8];
1298 #elif defined _GOST2012_256_
1299 u32 digest_buf[8];
1300 #elif defined _GOST2012_512_
1301 u32 digest_buf[16];
1302 #elif defined _SAPB_
1303 u32 digest_buf[4];
1304 #elif defined _SAPG_
1305 u32 digest_buf[5];
1306 #elif defined _MYSQL323_
1307 u32 digest_buf[4];
1308 #elif defined _LOTUS5_
1309 u32 digest_buf[4];
1310 #elif defined _LOTUS6_
1311 u32 digest_buf[4];
1312 #elif defined _SCRYPT_
1313 u32 digest_buf[8];
1314 #elif defined _LOTUS8_
1315 u32 digest_buf[4];
1316 #elif defined _OFFICE2007_
1317 u32 digest_buf[4];
1318 #elif defined _OFFICE2010_
1319 u32 digest_buf[4];
1320 #elif defined _OFFICE2013_
1321 u32 digest_buf[4];
1322 #elif defined _OLDOFFICE01_
1323 u32 digest_buf[4];
1324 #elif defined _OLDOFFICE34_
1325 u32 digest_buf[4];
1326 #elif defined _SIPHASH_
1327 u32 digest_buf[4];
1328 #elif defined _PBKDF2_MD5_
1329 u32 digest_buf[32];
1330 #elif defined _PBKDF2_SHA1_
1331 u32 digest_buf[32];
1332 #elif defined _PBKDF2_SHA256_
1333 u32 digest_buf[32];
1334 #elif defined _PBKDF2_SHA512_
1335 u32 digest_buf[32];
1336 #elif defined _PDF17L8_
1337 u32 digest_buf[8];
1338 #elif defined _CRC32_
1339 u32 digest_buf[4];
1340 #elif defined _SEVEN_ZIP_
1341 u32 digest_buf[4];
1342 #elif defined _ANDROIDFDE_
1343 u32 digest_buf[4];
1344 #elif defined _DCC2_
1345 u32 digest_buf[4];
1346 #elif defined _WPA_
1347 u32 digest_buf[4];
1348 #elif defined _MD5_SHA1_
1349 u32 digest_buf[4];
1350 #elif defined _SHA1_MD5_
1351 u32 digest_buf[5];
1352 #elif defined _NETNTLMV2_
1353 u32 digest_buf[4];
1354 #elif defined _KRB5PA_
1355 u32 digest_buf[4];
1356 #elif defined _CLOUDKEY_
1357 u32 digest_buf[8];
1358 #elif defined _SCRYPT_
1359 u32 digest_buf[4];
1360 #elif defined _PSAFE2_
1361 u32 digest_buf[5];
1362 #elif defined _LOTUS8_
1363 u32 digest_buf[4];
1364 #elif defined _RAR3_
1365 u32 digest_buf[4];
1366 #elif defined _SHA256_SHA1_
1367 u32 digest_buf[8];
1368 #elif defined _MS_DRSR_
1369 u32 digest_buf[8];
1370 #elif defined _ANDROIDFDE_SAMSUNG_
1371 u32 digest_buf[8];
1372 #elif defined _RAR5_
1373 u32 digest_buf[4];
1374 #elif defined _KRB5TGS_
1375 u32 digest_buf[4];
1376 #elif defined _AXCRYPT_
1377 u32 digest_buf[4];
1378 #elif defined _KEEPASS_
1379 u32 digest_buf[4];
1380 #endif
1381
1382 } digest_t;
1383
1384 typedef struct
1385 {
1386 u32 salt_buf[16];
1387 u32 salt_buf_pc[8];
1388
1389 u32 salt_len;
1390 u32 salt_iter;
1391 u32 salt_sign[2];
1392
1393 u32 keccak_mdlen;
1394 u32 truecrypt_mdlen;
1395
1396 u32 digests_cnt;
1397 u32 digests_done;
1398
1399 u32 digests_offset;
1400
1401 u32 scrypt_N;
1402 u32 scrypt_r;
1403 u32 scrypt_p;
1404 u32 scrypt_tmto;
1405 u32 scrypt_phy;
1406
1407 } salt_t;
1408
1409 typedef struct
1410 {
1411 int V;
1412 int R;
1413 int P;
1414
1415 int enc_md;
1416
1417 u32 id_buf[8];
1418 u32 u_buf[32];
1419 u32 o_buf[32];
1420
1421 int id_len;
1422 int o_len;
1423 int u_len;
1424
1425 u32 rc4key[2];
1426 u32 rc4data[2];
1427
1428 } pdf_t;
1429
1430 typedef struct
1431 {
1432 u32 pke[25];
1433 u32 eapol[64];
1434 int eapol_size;
1435 int keyver;
1436 u8 orig_mac1[6];
1437 u8 orig_mac2[6];
1438 u8 orig_nonce1[32];
1439 u8 orig_nonce2[32];
1440
1441 } wpa_t;
1442
1443 typedef struct
1444 {
1445 u32 cry_master_buf[64];
1446 u32 ckey_buf[64];
1447 u32 public_key_buf[64];
1448
1449 u32 cry_master_len;
1450 u32 ckey_len;
1451 u32 public_key_len;
1452
1453 } bitcoin_wallet_t;
1454
1455 typedef struct
1456 {
1457 u32 salt_buf[30];
1458 u32 salt_len;
1459
1460 u32 esalt_buf[38];
1461 u32 esalt_len;
1462
1463 } sip_t;
1464
1465 typedef struct
1466 {
1467 u32 data[384];
1468
1469 } androidfde_t;
1470
1471 typedef struct
1472 {
1473 u32 nr_buf[16];
1474 u32 nr_len;
1475
1476 u32 msg_buf[128];
1477 u32 msg_len;
1478
1479 } ikepsk_t;
1480
1481 typedef struct
1482 {
1483 u32 user_len;
1484 u32 domain_len;
1485 u32 srvchall_len;
1486 u32 clichall_len;
1487
1488 u32 userdomain_buf[64];
1489 u32 chall_buf[256];
1490
1491 } netntlm_t;
1492
1493 typedef struct
1494 {
1495 u32 user[16];
1496 u32 realm[16];
1497 u32 salt[32];
1498 u32 timestamp[16];
1499 u32 checksum[4];
1500
1501 } krb5pa_t;
1502
1503 typedef struct
1504 {
1505 u32 account_info[512];
1506 u32 checksum[4];
1507 u32 edata2[2560];
1508 u32 edata2_len;
1509
1510 } krb5tgs_t;
1511
1512 typedef struct
1513 {
1514 u32 salt_buf[16];
1515 u32 data_buf[112];
1516 u32 keyfile_buf[16];
1517
1518 } tc_t;
1519
1520 typedef struct
1521 {
1522 u32 salt_buf[16];
1523
1524 } pbkdf2_md5_t;
1525
1526 typedef struct
1527 {
1528 u32 salt_buf[16];
1529
1530 } pbkdf2_sha1_t;
1531
1532 typedef struct
1533 {
1534 u32 salt_buf[16];
1535
1536 } pbkdf2_sha256_t;
1537
1538 typedef struct
1539 {
1540 u32 salt_buf[32];
1541
1542 } pbkdf2_sha512_t;
1543
1544 typedef struct
1545 {
1546 u32 salt_buf[128];
1547 u32 salt_len;
1548
1549 } rakp_t;
1550
1551 typedef struct
1552 {
1553 u32 data_len;
1554 u32 data_buf[512];
1555
1556 } cloudkey_t;
1557
1558 typedef struct
1559 {
1560 u32 encryptedVerifier[4];
1561 u32 encryptedVerifierHash[5];
1562
1563 u32 keySize;
1564
1565 } office2007_t;
1566
1567 typedef struct
1568 {
1569 u32 encryptedVerifier[4];
1570 u32 encryptedVerifierHash[8];
1571
1572 } office2010_t;
1573
1574 typedef struct
1575 {
1576 u32 encryptedVerifier[4];
1577 u32 encryptedVerifierHash[8];
1578
1579 } office2013_t;
1580
1581 typedef struct
1582 {
1583 u32 version;
1584 u32 encryptedVerifier[4];
1585 u32 encryptedVerifierHash[4];
1586 u32 rc4key[2];
1587
1588 } oldoffice01_t;
1589
1590 typedef struct
1591 {
1592 u32 version;
1593 u32 encryptedVerifier[4];
1594 u32 encryptedVerifierHash[5];
1595 u32 rc4key[2];
1596
1597 } oldoffice34_t;
1598
1599 typedef struct
1600 {
1601 u32 version;
1602 u32 algorithm;
1603
1604 /* key-file handling */
1605 u32 keyfile_len;
1606 u32 keyfile[8];
1607
1608 u32 final_random_seed[8];
1609 u32 transf_random_seed[8];
1610 u32 enc_iv[4];
1611 u32 contents_hash[8];
1612
1613 /* specific to version 1 */
1614 u32 contents_len;
1615 u32 contents[12500];
1616
1617 /* specific to version 2 */
1618 u32 expected_bytes[8];
1619
1620 } keepass_t;
1621
1622 typedef struct
1623 {
1624 u32 digest[4];
1625 u32 out[4];
1626
1627 } pdf14_tmp_t;
1628
1629 typedef struct
1630 {
1631 union
1632 {
1633 u32 dgst32[16];
1634 u64 dgst64[8];
1635 };
1636
1637 u32 dgst_len;
1638 u32 W_len;
1639
1640 } pdf17l8_tmp_t;
1641
1642 typedef struct
1643 {
1644 u32 digest_buf[4];
1645
1646 } phpass_tmp_t;
1647
1648 typedef struct
1649 {
1650 u32 digest_buf[4];
1651
1652 } md5crypt_tmp_t;
1653
1654 typedef struct
1655 {
1656 u32 alt_result[8];
1657
1658 u32 p_bytes[4];
1659 u32 s_bytes[4];
1660
1661 } sha256crypt_tmp_t;
1662
1663 typedef struct
1664 {
1665 u64 l_alt_result[8];
1666
1667 u64 l_p_bytes[2];
1668 u64 l_s_bytes[2];
1669
1670 } sha512crypt_tmp_t;
1671
1672 typedef struct
1673 {
1674 u32 ipad[5];
1675 u32 opad[5];
1676
1677 u32 dgst[10];
1678 u32 out[10];
1679
1680 } wpa_tmp_t;
1681
1682 typedef struct
1683 {
1684 u64 dgst[8];
1685
1686 } bitcoin_wallet_tmp_t;
1687
1688 typedef struct
1689 {
1690 u32 ipad[5];
1691 u32 opad[5];
1692
1693 u32 dgst[5];
1694 u32 out[4];
1695
1696 } dcc2_tmp_t;
1697
1698 typedef struct
1699 {
1700 u32 E[18];
1701
1702 u32 P[18];
1703
1704 u32 S0[256];
1705 u32 S1[256];
1706 u32 S2[256];
1707 u32 S3[256];
1708
1709 } bcrypt_tmp_t;
1710
1711 typedef struct
1712 {
1713 u32 digest[2];
1714
1715 u32 P[18];
1716
1717 u32 S0[256];
1718 u32 S1[256];
1719 u32 S2[256];
1720 u32 S3[256];
1721
1722 } pwsafe2_tmp_t;
1723
1724 typedef struct
1725 {
1726 u32 digest_buf[8];
1727
1728 } pwsafe3_tmp_t;
1729
1730 typedef struct
1731 {
1732 u32 digest_buf[5];
1733
1734 } androidpin_tmp_t;
1735
1736 typedef struct
1737 {
1738 u32 ipad[5];
1739 u32 opad[5];
1740
1741 u32 dgst[10];
1742 u32 out[10];
1743
1744 } androidfde_tmp_t;
1745
1746 typedef struct
1747 {
1748 u32 ipad[16];
1749 u32 opad[16];
1750
1751 u32 dgst[64];
1752 u32 out[64];
1753
1754 } tc_tmp_t;
1755
1756 typedef struct
1757 {
1758 u64 ipad[8];
1759 u64 opad[8];
1760
1761 u64 dgst[32];
1762 u64 out[32];
1763
1764 } tc64_tmp_t;
1765
1766 typedef struct
1767 {
1768 u32 ipad[4];
1769 u32 opad[4];
1770
1771 u32 dgst[32];
1772 u32 out[32];
1773
1774 } pbkdf2_md5_tmp_t;
1775
1776 typedef struct
1777 {
1778 u32 ipad[5];
1779 u32 opad[5];
1780
1781 u32 dgst[32];
1782 u32 out[32];
1783
1784 } pbkdf2_sha1_tmp_t;
1785
1786 typedef struct
1787 {
1788 u32 ipad[8];
1789 u32 opad[8];
1790
1791 u32 dgst[32];
1792 u32 out[32];
1793
1794 } pbkdf2_sha256_tmp_t;
1795
1796 typedef struct
1797 {
1798 u64 ipad[8];
1799 u64 opad[8];
1800
1801 u64 dgst[16];
1802 u64 out[16];
1803
1804 } pbkdf2_sha512_tmp_t;
1805
1806 typedef struct
1807 {
1808 u64 out[8];
1809
1810 } ecryptfs_tmp_t;
1811
1812 typedef struct
1813 {
1814 u64 ipad[8];
1815 u64 opad[8];
1816
1817 u64 dgst[16];
1818 u64 out[16];
1819
1820 } oraclet_tmp_t;
1821
1822 typedef struct
1823 {
1824 u32 ipad[5];
1825 u32 opad[5];
1826
1827 u32 dgst[5];
1828 u32 out[5];
1829
1830 } agilekey_tmp_t;
1831
1832 typedef struct
1833 {
1834 u32 ipad[5];
1835 u32 opad[5];
1836
1837 u32 dgst1[5];
1838 u32 out1[5];
1839
1840 u32 dgst2[5];
1841 u32 out2[5];
1842
1843 } mywallet_tmp_t;
1844
1845 typedef struct
1846 {
1847 u32 ipad[5];
1848 u32 opad[5];
1849
1850 u32 dgst[5];
1851 u32 out[5];
1852
1853 } sha1aix_tmp_t;
1854
1855 typedef struct
1856 {
1857 u32 ipad[8];
1858 u32 opad[8];
1859
1860 u32 dgst[8];
1861 u32 out[8];
1862
1863 } sha256aix_tmp_t;
1864
1865 typedef struct
1866 {
1867 u64 ipad[8];
1868 u64 opad[8];
1869
1870 u64 dgst[8];
1871 u64 out[8];
1872
1873 } sha512aix_tmp_t;
1874
1875 typedef struct
1876 {
1877 u32 ipad[8];
1878 u32 opad[8];
1879
1880 u32 dgst[8];
1881 u32 out[8];
1882
1883 } lastpass_tmp_t;
1884
1885 typedef struct
1886 {
1887 u64 digest_buf[8];
1888
1889 } drupal7_tmp_t;
1890
1891 typedef struct
1892 {
1893 u32 ipad[5];
1894 u32 opad[5];
1895
1896 u32 dgst[5];
1897 u32 out[5];
1898
1899 } lotus8_tmp_t;
1900
1901 typedef struct
1902 {
1903 u32 out[5];
1904
1905 } office2007_tmp_t;
1906
1907 typedef struct
1908 {
1909 u32 out[5];
1910
1911 } office2010_tmp_t;
1912
1913 typedef struct
1914 {
1915 u64 out[8];
1916
1917 } office2013_tmp_t;
1918
1919 typedef struct
1920 {
1921 u32 digest_buf[5];
1922
1923 } saph_sha1_tmp_t;
1924
1925 typedef struct
1926 {
1927 u32 block[16];
1928
1929 u32 dgst[8];
1930
1931 u32 block_len;
1932 u32 final_len;
1933
1934 } seven_zip_tmp_t;
1935
1936 typedef struct
1937 {
1938 u32 KEK[5];
1939
1940 u32 lsb[4];
1941 u32 cipher[4];
1942
1943 } axcrypt_tmp_t;
1944
1945 typedef struct
1946 {
1947 u32 tmp_digest[8];
1948
1949 } keepass_tmp_t;
1950
1951 typedef struct
1952 {
1953 u32 Kc[16];
1954 u32 Kd[16];
1955
1956 u32 iv[2];
1957
1958 } bsdicrypt_tmp_t;
1959
1960 typedef struct
1961 {
1962 u32 dgst[17][5];
1963
1964 } rar3_tmp_t;
1965
1966 typedef struct
1967 {
1968 u32 user[16];
1969
1970 } cram_md5_t;
1971
1972 typedef struct
1973 {
1974 u32 iv_buf[4];
1975 u32 iv_len;
1976
1977 u32 salt_buf[4];
1978 u32 salt_len;
1979
1980 u32 crc;
1981
1982 u32 data_buf[96];
1983 u32 data_len;
1984
1985 u32 unpack_size;
1986
1987 } seven_zip_t;
1988
1989 typedef struct
1990 {
1991 u32 key;
1992 u64 val;
1993
1994 } hcstat_table_t;
1995
1996 typedef struct
1997 {
1998 u32 cs_buf[0x100];
1999 u32 cs_len;
2000
2001 } cs_t;
2002
2003 typedef struct
2004 {
2005 u32 cmds[0x100];
2006
2007 } kernel_rule_t;
2008
2009 typedef struct
2010 {
2011 u32 gidvid;
2012 u32 il_pos;
2013
2014 } plain_t;
2015
2016 typedef struct
2017 {
2018 u32 i[16];
2019
2020 u32 pw_len;
2021
2022 u32 alignment_placeholder_1;
2023 u32 alignment_placeholder_2;
2024 u32 alignment_placeholder_3;
2025
2026 } pw_t;
2027
2028 typedef struct
2029 {
2030 u32 i;
2031
2032 } bf_t;
2033
2034 typedef struct
2035 {
2036 u32 i[8];
2037
2038 u32 pw_len;
2039
2040 } comb_t;
2041
2042 typedef struct
2043 {
2044 u32 b[32];
2045
2046 } bs_word_t;
2047
2048 typedef struct
2049 {
2050 uint4 P[64];
2051
2052 } scrypt_tmp_t;