Add platform vendor_id detection
[hashcat.git] / OpenCL / types_ocl.c
1 /**
2 * Authors.....: Jens Steube <jens.steube@gmail.com>
3 * magnum <john.magnum@hushmail.com>
4 *
5 * License.....: MIT
6 */
7
8 #define DEVICE_TYPE_CPU 2
9 #define DEVICE_TYPE_GPU 4
10
11 typedef uchar u8;
12 typedef ushort u16;
13 typedef uint u32;
14 typedef ulong u64;
15
16 #ifndef NEW_SIMD_CODE
17 #undef VECT_SIZE
18 #define VECT_SIZE 1
19 #endif
20
21 #define CONCAT(a, b) a##b
22 #define VTYPE(type, width) CONCAT(type, width)
23
24 #if VECT_SIZE == 1
25 typedef uchar u8x;
26 typedef ushort u16x;
27 typedef uint u32x;
28 typedef ulong u64x;
29 #else
30 typedef VTYPE(uchar, VECT_SIZE) u8x;
31 typedef VTYPE(ushort, VECT_SIZE) u16x;
32 typedef VTYPE(uint, VECT_SIZE) u32x;
33 typedef VTYPE(ulong, VECT_SIZE) u64x;
34 #endif
35
36 static inline u32 l32_from_64_S (u64 a)
37 {
38 const u32 r = (u32) (a);
39
40 return r;
41 }
42
43 static inline u32 h32_from_64_S (u64 a)
44 {
45 a >>= 32;
46
47 const u32 r = (u32) (a);
48
49 return r;
50 }
51
52 static inline u64 hl32_to_64_S (const u32 a, const u32 b)
53 {
54 return as_ulong ((uint2) (b, a));
55 }
56
57 static inline u32x l32_from_64 (u64x a)
58 {
59 u32x r;
60
61 #if VECT_SIZE == 1
62 r = (u32) a;
63 #endif
64
65 #if VECT_SIZE >= 2
66 r.s0 = (u32) a.s0;
67 r.s1 = (u32) a.s1;
68 #endif
69
70 #if VECT_SIZE >= 4
71 r.s2 = (u32) a.s2;
72 r.s3 = (u32) a.s3;
73 #endif
74
75 #if VECT_SIZE >= 8
76 r.s4 = (u32) a.s4;
77 r.s5 = (u32) a.s5;
78 r.s6 = (u32) a.s6;
79 r.s7 = (u32) a.s7;
80 #endif
81
82 #if VECT_SIZE >= 16
83 r.s8 = (u32) a.s8;
84 r.s9 = (u32) a.s9;
85 r.sa = (u32) a.sa;
86 r.sb = (u32) a.sb;
87 r.sc = (u32) a.sc;
88 r.sd = (u32) a.sd;
89 r.se = (u32) a.se;
90 r.sf = (u32) a.sf;
91 #endif
92
93 return r;
94 }
95
96 static inline u32x h32_from_64 (u64x a)
97 {
98 a >>= 32;
99
100 u32x r;
101
102 #if VECT_SIZE == 1
103 r = (u32) a;
104 #endif
105
106 #if VECT_SIZE >= 2
107 r.s0 = (u32) a.s0;
108 r.s1 = (u32) a.s1;
109 #endif
110
111 #if VECT_SIZE >= 4
112 r.s2 = (u32) a.s2;
113 r.s3 = (u32) a.s3;
114 #endif
115
116 #if VECT_SIZE >= 8
117 r.s4 = (u32) a.s4;
118 r.s5 = (u32) a.s5;
119 r.s6 = (u32) a.s6;
120 r.s7 = (u32) a.s7;
121 #endif
122
123 #if VECT_SIZE >= 16
124 r.s8 = (u32) a.s8;
125 r.s9 = (u32) a.s9;
126 r.sa = (u32) a.sa;
127 r.sb = (u32) a.sb;
128 r.sc = (u32) a.sc;
129 r.sd = (u32) a.sd;
130 r.se = (u32) a.se;
131 r.sf = (u32) a.sf;
132 #endif
133
134 return r;
135 }
136
137 static inline u64x hl32_to_64 (const u32x a, const u32x b)
138 {
139 u64x r;
140
141 #if VECT_SIZE == 1
142 r = as_ulong ((uint2) (b, a));
143 #endif
144
145 #if VECT_SIZE >= 2
146 r.s0 = as_ulong ((uint2) (b.s0, a.s0));
147 r.s1 = as_ulong ((uint2) (b.s1, a.s1));
148 #endif
149
150 #if VECT_SIZE >= 4
151 r.s2 = as_ulong ((uint2) (b.s2, a.s2));
152 r.s3 = as_ulong ((uint2) (b.s3, a.s3));
153 #endif
154
155 #if VECT_SIZE >= 8
156 r.s4 = as_ulong ((uint2) (b.s4, a.s4));
157 r.s5 = as_ulong ((uint2) (b.s5, a.s5));
158 r.s6 = as_ulong ((uint2) (b.s6, a.s6));
159 r.s7 = as_ulong ((uint2) (b.s7, a.s7));
160 #endif
161
162 #if VECT_SIZE >= 16
163 r.s8 = as_ulong ((uint2) (b.s8, a.s8));
164 r.s9 = as_ulong ((uint2) (b.s9, a.s9));
165 r.sa = as_ulong ((uint2) (b.sa, a.sa));
166 r.sb = as_ulong ((uint2) (b.sb, a.sb));
167 r.sc = as_ulong ((uint2) (b.sc, a.sc));
168 r.sd = as_ulong ((uint2) (b.sd, a.sd));
169 r.se = as_ulong ((uint2) (b.se, a.se));
170 r.sf = as_ulong ((uint2) (b.sf, a.sf));
171 #endif
172
173 return r;
174 }
175
176 #ifdef IS_AMD
177 static inline u32 swap32_S (const u32 v)
178 {
179 return (as_uint (as_uchar4 (v).s3210));
180 }
181
182 static inline u64 swap64_S (const u64 v)
183 {
184 return (as_ulong (as_uchar8 (v).s76543210));
185 }
186
187 static inline u32 rotr32_S (const u32 a, const u32 n)
188 {
189 return rotate (a, 32 - n);
190 }
191
192 static inline u32 rotl32_S (const u32 a, const u32 n)
193 {
194 return rotate (a, n);
195 }
196
197 static inline u64 rotr64_S (const u64 a, const u32 n)
198 {
199 const u32 a0 = h32_from_64_S (a);
200 const u32 a1 = l32_from_64_S (a);
201
202 const u32 t0 = (n >= 32) ? amd_bitalign (a0, a1, n - 32) : amd_bitalign (a1, a0, n);
203 const u32 t1 = (n >= 32) ? amd_bitalign (a1, a0, n - 32) : amd_bitalign (a0, a1, n);
204
205 const u64 r = hl32_to_64_S (t0, t1);
206
207 return r;
208 }
209
210 static inline u64 rotl64_S (const u64 a, const u32 n)
211 {
212 return rotr64_S (a, 64 - n);
213 }
214
215 static inline u32x swap32 (const u32x v)
216 {
217 return ((v >> 24) & 0x000000ff)
218 | ((v >> 8) & 0x0000ff00)
219 | ((v << 8) & 0x00ff0000)
220 | ((v << 24) & 0xff000000);
221 }
222
223 static inline u64x swap64 (const u64x v)
224 {
225 return ((v >> 56) & 0x00000000000000ff)
226 | ((v >> 40) & 0x000000000000ff00)
227 | ((v >> 24) & 0x0000000000ff0000)
228 | ((v >> 8) & 0x00000000ff000000)
229 | ((v << 8) & 0x000000ff00000000)
230 | ((v << 24) & 0x0000ff0000000000)
231 | ((v << 40) & 0x00ff000000000000)
232 | ((v << 56) & 0xff00000000000000);
233 }
234
235 static inline u32x rotr32 (const u32x a, const u32 n)
236 {
237 return rotate (a, 32 - n);
238 }
239
240 static inline u32x rotl32 (const u32x a, const u32 n)
241 {
242 return rotate (a, n);
243 }
244
245 static inline u64x rotr64 (const u64x a, const u32 n)
246 {
247 const u32x a0 = h32_from_64 (a);
248 const u32x a1 = l32_from_64 (a);
249
250 const u32x t0 = (n >= 32) ? amd_bitalign (a0, a1, n - 32) : amd_bitalign (a1, a0, n);
251 const u32x t1 = (n >= 32) ? amd_bitalign (a1, a0, n - 32) : amd_bitalign (a0, a1, n);
252
253 const u64x r = hl32_to_64 (t0, t1);
254
255 return r;
256 }
257
258 static inline u64x rotl64 (const u64x a, const u32 n)
259 {
260 return rotr64 (a, 64 - n);
261 }
262
263 static inline u32 __bfe (const u32 a, const u32 b, const u32 c)
264 {
265 return amd_bfe (a, b, c);
266 }
267
268 static inline u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c)
269 {
270 return amd_bytealign (a, b, c);
271 }
272 #endif
273
274 #ifdef IS_NV
275 static inline u32 swap32_S (const u32 v)
276 {
277 u32 r;
278
279 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(r) : "r"(v));
280
281 return r;
282 }
283
284 static inline u64 swap64_S (const u64 v)
285 {
286 u32 il;
287 u32 ir;
288
289 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(v));
290
291 u32 tl;
292 u32 tr;
293
294 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tl) : "r"(il));
295 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tr) : "r"(ir));
296
297 u64 r;
298
299 asm ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(tr), "r"(tl));
300
301 return r;
302 }
303
304 static inline u32 rotr32_S (const u32 a, const u32 n)
305 {
306 return rotate (a, 32 - n);
307 }
308
309 static inline u32 rotl32_S (const u32 a, const u32 n)
310 {
311 return rotate (a, n);
312 }
313
314 #if CUDA_ARCH >= 350
315 static inline u64 rotr64_S (const u64 a, const u32 n)
316 {
317 u32 il;
318 u32 ir;
319
320 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a));
321
322 u32 tl;
323 u32 tr;
324
325 if (n >= 32)
326 {
327 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
328 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
329 }
330 else
331 {
332 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
333 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
334 }
335
336 u64 r;
337
338 asm ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(tl), "r"(tr));
339
340 return r;
341 }
342 #else
343 static inline u64 rotr64_S (const u64 a, const u32 n)
344 {
345 return rotate (a, (u64) 64 - n);
346 }
347 #endif
348
349 static inline u64 rotl64_S (const u64 a, const u32 n)
350 {
351 return rotr64_S (a, 64 - n);
352 }
353
354 #if CUDA_ARCH >= 500
355 static inline u32 lut3_2d_S (const u32 a, const u32 b, const u32 c)
356 {
357 u32 r;
358
359 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
360
361 return r;
362 }
363
364 static inline u32 lut3_39_S (const u32 a, const u32 b, const u32 c)
365 {
366 u32 r;
367
368 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
369
370 return r;
371 }
372
373 static inline u32 lut3_59_S (const u32 a, const u32 b, const u32 c)
374 {
375 u32 r;
376
377 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
378
379 return r;
380 }
381
382 static inline u32 lut3_96_S (const u32 a, const u32 b, const u32 c)
383 {
384 u32 r;
385
386 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
387
388 return r;
389 }
390
391 static inline u32 lut3_e4_S (const u32 a, const u32 b, const u32 c)
392 {
393 u32 r;
394
395 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
396
397 return r;
398 }
399
400 static inline u32 lut3_e8_S (const u32 a, const u32 b, const u32 c)
401 {
402 u32 r;
403
404 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
405
406 return r;
407 }
408
409 static inline u32 lut3_ca_S (const u32 a, const u32 b, const u32 c)
410 {
411 u32 r;
412
413 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
414
415 return r;
416 }
417 #endif
418
419 static inline u32 __byte_perm_S (const u32 a, const u32 b, const u32 c)
420 {
421 u32 r;
422
423 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c));
424
425 return r;
426 }
427
428 static inline u32x swap32 (const u32x v)
429 {
430 return ((v >> 24) & 0x000000ff)
431 | ((v >> 8) & 0x0000ff00)
432 | ((v << 8) & 0x00ff0000)
433 | ((v << 24) & 0xff000000);
434 }
435
436 static inline u64x swap64 (const u64x v)
437 {
438 return ((v >> 56) & 0x00000000000000ff)
439 | ((v >> 40) & 0x000000000000ff00)
440 | ((v >> 24) & 0x0000000000ff0000)
441 | ((v >> 8) & 0x00000000ff000000)
442 | ((v << 8) & 0x000000ff00000000)
443 | ((v << 24) & 0x0000ff0000000000)
444 | ((v << 40) & 0x00ff000000000000)
445 | ((v << 56) & 0xff00000000000000);
446 }
447
448 static inline u32x rotr32 (const u32x a, const u32 n)
449 {
450 return rotate (a, 32 - n);
451 }
452
453 static inline u32x rotl32 (const u32x a, const u32 n)
454 {
455 return rotate (a, n);
456 }
457
458 #if CUDA_ARCH >= 350
459 static inline u64x rotr64 (const u64x a, const u32 n)
460 {
461 u64x r;
462
463 u32 il;
464 u32 ir;
465 u32 tl;
466 u32 tr;
467
468 #if VECT_SIZE == 1
469
470 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a));
471
472 if (n >= 32)
473 {
474 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
475 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
476 }
477 else
478 {
479 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
480 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
481 }
482
483 asm ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(tl), "r"(tr));
484
485 #endif
486
487 #if VECT_SIZE >= 2
488
489 {
490 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s0));
491
492 if (n >= 32)
493 {
494 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
495 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
496 }
497 else
498 {
499 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
500 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
501 }
502
503 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s0) : "r"(tl), "r"(tr));
504 }
505
506 {
507 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s1));
508
509 if (n >= 32)
510 {
511 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
512 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
513 }
514 else
515 {
516 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
517 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
518 }
519
520 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s1) : "r"(tl), "r"(tr));
521 }
522
523 #endif
524
525 #if VECT_SIZE >= 4
526
527 {
528 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s2));
529
530 if (n >= 32)
531 {
532 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
533 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
534 }
535 else
536 {
537 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
538 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
539 }
540
541 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s2) : "r"(tl), "r"(tr));
542 }
543
544 {
545 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s3));
546
547 if (n >= 32)
548 {
549 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
550 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
551 }
552 else
553 {
554 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
555 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
556 }
557
558 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s3) : "r"(tl), "r"(tr));
559 }
560
561 #endif
562
563 #if VECT_SIZE >= 8
564
565 {
566 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s4));
567
568 if (n >= 32)
569 {
570 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
571 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
572 }
573 else
574 {
575 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
576 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
577 }
578
579 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s4) : "r"(tl), "r"(tr));
580 }
581
582 {
583 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s5));
584
585 if (n >= 32)
586 {
587 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
588 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
589 }
590 else
591 {
592 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
593 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
594 }
595
596 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s5) : "r"(tl), "r"(tr));
597 }
598
599 {
600 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s6));
601
602 if (n >= 32)
603 {
604 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
605 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
606 }
607 else
608 {
609 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
610 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
611 }
612
613 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s6) : "r"(tl), "r"(tr));
614 }
615
616 {
617 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s7));
618
619 if (n >= 32)
620 {
621 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
622 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
623 }
624 else
625 {
626 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
627 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
628 }
629
630 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s7) : "r"(tl), "r"(tr));
631 }
632
633 #endif
634
635 #if VECT_SIZE >= 16
636
637 {
638 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s8));
639
640 if (n >= 32)
641 {
642 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
643 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
644 }
645 else
646 {
647 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
648 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
649 }
650
651 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s8) : "r"(tl), "r"(tr));
652 }
653
654 {
655 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.s9));
656
657 if (n >= 32)
658 {
659 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
660 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
661 }
662 else
663 {
664 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
665 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
666 }
667
668 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.s9) : "r"(tl), "r"(tr));
669 }
670
671 {
672 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sa));
673
674 if (n >= 32)
675 {
676 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
677 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
678 }
679 else
680 {
681 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
682 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
683 }
684
685 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sa) : "r"(tl), "r"(tr));
686 }
687
688 {
689 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sb));
690
691 if (n >= 32)
692 {
693 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
694 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
695 }
696 else
697 {
698 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
699 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
700 }
701
702 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sb) : "r"(tl), "r"(tr));
703 }
704
705 {
706 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sc));
707
708 if (n >= 32)
709 {
710 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
711 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
712 }
713 else
714 {
715 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
716 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
717 }
718
719 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sc) : "r"(tl), "r"(tr));
720 }
721
722 {
723 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sd));
724
725 if (n >= 32)
726 {
727 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
728 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
729 }
730 else
731 {
732 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
733 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
734 }
735
736 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sd) : "r"(tl), "r"(tr));
737 }
738
739 {
740 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.se));
741
742 if (n >= 32)
743 {
744 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
745 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
746 }
747 else
748 {
749 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
750 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
751 }
752
753 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.se) : "r"(tl), "r"(tr));
754 }
755
756 {
757 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a.sf));
758
759 if (n >= 32)
760 {
761 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
762 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
763 }
764 else
765 {
766 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
767 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
768 }
769
770 asm ("mov.b64 %0, {%1, %2};" : "=l"(r.sf) : "r"(tl), "r"(tr));
771 }
772
773 #endif
774
775 return r;
776 }
777 #else
778 static inline u64x rotr64 (const u64x a, const u32 n)
779 {
780 return rotate (a, (u64) 64 - n);
781 }
782 #endif
783
784 static inline u64x rotl64 (const u64x a, const u32 n)
785 {
786 return rotr64 (a, (u64) 64 - n);
787 }
788
789 static inline u32x __byte_perm (const u32x a, const u32x b, const u32x c)
790 {
791 u32x r;
792
793 #if VECT_SIZE == 1
794 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c) );
795 #endif
796
797 #if VECT_SIZE >= 2
798 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s0) : "r"(a.s0), "r"(b.s0), "r"(c.s0));
799 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s1) : "r"(a.s1), "r"(b.s1), "r"(c.s1));
800 #endif
801
802 #if VECT_SIZE >= 4
803 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s2) : "r"(a.s2), "r"(b.s2), "r"(c.s2));
804 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s3) : "r"(a.s3), "r"(b.s3), "r"(c.s3));
805 #endif
806
807 #if VECT_SIZE >= 8
808 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s4) : "r"(a.s4), "r"(b.s4), "r"(c.s4));
809 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s5) : "r"(a.s5), "r"(b.s5), "r"(c.s5));
810 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s6) : "r"(a.s6), "r"(b.s6), "r"(c.s6));
811 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s7) : "r"(a.s7), "r"(b.s7), "r"(c.s7));
812 #endif
813
814 #if VECT_SIZE >= 16
815 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s8) : "r"(a.s8), "r"(b.s8), "r"(c.s8));
816 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s9) : "r"(a.s9), "r"(b.s9), "r"(c.s9));
817 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sa) : "r"(a.sa), "r"(b.sa), "r"(c.sa));
818 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sb) : "r"(a.sb), "r"(b.sb), "r"(c.sb));
819 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sc) : "r"(a.sc), "r"(b.sc), "r"(c.sc));
820 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sd) : "r"(a.sd), "r"(b.sd), "r"(c.sd));
821 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.se) : "r"(a.se), "r"(b.se), "r"(c.se));
822 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sf) : "r"(a.sf), "r"(b.sf), "r"(c.sf));
823 #endif
824
825 return r;
826 }
827
828 static inline u32 __bfe (const u32 a, const u32 b, const u32 c)
829 {
830 u32 r;
831
832 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c));
833
834 return r;
835 }
836
837 #if CUDA_ARCH >= 350
838 static inline u32 amd_bytealign (const u32 a, const u32 b, const u32 c)
839 {
840 u32 r;
841
842 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(b), "r"(a), "r"((c & 3) * 8));
843
844 return r;
845 }
846 #else
847 static inline u32 amd_bytealign (const u32 a, const u32 b, const u32 c)
848 {
849 return __byte_perm_S (b, a, (0x76543210 >> ((c & 3) * 4)) & 0xffff);
850 }
851 #endif
852
853 #if CUDA_ARCH >= 500
854 static inline u32x lut3_2d (const u32x a, const u32x b, const u32x c)
855 {
856 u32x r;
857
858 #if VECT_SIZE == 1
859 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
860 #endif
861
862 #if VECT_SIZE >= 2
863 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
864 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
865 #endif
866
867 #if VECT_SIZE >= 4
868 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
869 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
870 #endif
871
872 #if VECT_SIZE >= 8
873 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
874 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
875 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
876 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
877 #endif
878
879 #if VECT_SIZE >= 16
880 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
881 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
882 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
883 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
884 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
885 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
886 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
887 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
888 #endif
889
890 return r;
891 }
892
893 static inline u32x lut3_39 (const u32x a, const u32x b, const u32x c)
894 {
895 u32x r;
896
897 #if VECT_SIZE == 1
898 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
899 #endif
900
901 #if VECT_SIZE >= 2
902 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
903 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
904 #endif
905
906 #if VECT_SIZE >= 4
907 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
908 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
909 #endif
910
911 #if VECT_SIZE >= 8
912 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
913 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
914 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
915 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
916 #endif
917
918 #if VECT_SIZE >= 16
919 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
920 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
921 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
922 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
923 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
924 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
925 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
926 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
927 #endif
928
929 return r;
930 }
931
932 static inline u32x lut3_59 (const u32x a, const u32x b, const u32x c)
933 {
934 u32x r;
935
936 #if VECT_SIZE == 1
937 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
938 #endif
939
940 #if VECT_SIZE >= 2
941 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
942 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
943 #endif
944
945 #if VECT_SIZE >= 4
946 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
947 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
948 #endif
949
950 #if VECT_SIZE >= 8
951 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
952 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
953 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
954 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
955 #endif
956
957 #if VECT_SIZE >= 16
958 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
959 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
960 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
961 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
962 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
963 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
964 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
965 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
966 #endif
967
968 return r;
969 }
970
971 static inline u32x lut3_96 (const u32x a, const u32x b, const u32x c)
972 {
973 u32x r;
974
975 #if VECT_SIZE == 1
976 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
977 #endif
978
979 #if VECT_SIZE >= 2
980 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
981 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
982 #endif
983
984 #if VECT_SIZE >= 4
985 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
986 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
987 #endif
988
989 #if VECT_SIZE >= 8
990 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
991 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
992 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
993 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
994 #endif
995
996 #if VECT_SIZE >= 16
997 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
998 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
999 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
1000 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
1001 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
1002 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
1003 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
1004 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
1005 #endif
1006
1007 return r;
1008 }
1009
1010 static inline u32x lut3_e4 (const u32x a, const u32x b, const u32x c)
1011 {
1012 u32x r;
1013
1014 #if VECT_SIZE == 1
1015 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
1016 #endif
1017
1018 #if VECT_SIZE >= 2
1019 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
1020 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
1021 #endif
1022
1023 #if VECT_SIZE >= 4
1024 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
1025 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
1026 #endif
1027
1028 #if VECT_SIZE >= 8
1029 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
1030 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
1031 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
1032 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
1033 #endif
1034
1035 #if VECT_SIZE >= 16
1036 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
1037 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
1038 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
1039 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
1040 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
1041 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
1042 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
1043 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
1044 #endif
1045
1046 return r;
1047 }
1048
1049 static inline u32x lut3_e8 (const u32x a, const u32x b, const u32x c)
1050 {
1051 u32x r;
1052
1053 #if VECT_SIZE == 1
1054 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
1055 #endif
1056
1057 #if VECT_SIZE >= 2
1058 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
1059 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
1060 #endif
1061
1062 #if VECT_SIZE >= 4
1063 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
1064 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
1065 #endif
1066
1067 #if VECT_SIZE >= 8
1068 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
1069 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
1070 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
1071 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
1072 #endif
1073
1074 #if VECT_SIZE >= 16
1075 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
1076 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
1077 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
1078 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
1079 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
1080 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
1081 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
1082 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
1083 #endif
1084
1085 return r;
1086 }
1087
1088 static inline u32x lut3_ca (const u32x a, const u32x b, const u32x c)
1089 {
1090 u32x r;
1091
1092 #if VECT_SIZE == 1
1093 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
1094 #endif
1095
1096 #if VECT_SIZE >= 2
1097 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
1098 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
1099 #endif
1100
1101 #if VECT_SIZE >= 4
1102 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
1103 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
1104 #endif
1105
1106 #if VECT_SIZE >= 8
1107 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
1108 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
1109 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
1110 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
1111 #endif
1112
1113 #if VECT_SIZE >= 16
1114 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s8) : "r" (a.s8), "r" (b.s8), "r" (c.s8));
1115 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s9) : "r" (a.s9), "r" (b.s9), "r" (c.s9));
1116 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sa) : "r" (a.sa), "r" (b.sa), "r" (c.sa));
1117 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sb) : "r" (a.sb), "r" (b.sb), "r" (c.sb));
1118 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sc) : "r" (a.sc), "r" (b.sc), "r" (c.sc));
1119 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sd) : "r" (a.sd), "r" (b.sd), "r" (c.sd));
1120 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.se) : "r" (a.se), "r" (b.se), "r" (c.se));
1121 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.sf) : "r" (a.sf), "r" (b.sf), "r" (c.sf));
1122 #endif
1123
1124 return r;
1125 }
1126
1127 #endif
1128 #endif
1129
1130 #ifdef IS_GENERIC
1131 static inline u32 swap32_S (const u32 v)
1132 {
1133 return (as_uint (as_uchar4 (v).s3210));
1134 }
1135
1136 static inline u64 swap64_S (const u64 v)
1137 {
1138 return (as_ulong (as_uchar8 (v).s76543210));
1139 }
1140
1141 static inline u32 rotr32_S (const u32 a, const u32 n)
1142 {
1143 return rotate (a, 32 - n);
1144 }
1145
1146 static inline u32 rotl32_S (const u32 a, const u32 n)
1147 {
1148 return rotate (a, n);
1149 }
1150
1151 static inline u64 rotr64_S (const u64 a, const u32 n)
1152 {
1153 return rotate (a, (u64) 64 - n);
1154 }
1155
1156 static inline u64 rotl64_S (const u64 a, const u32 n)
1157 {
1158 return rotate (a, (u64) n);
1159 }
1160
1161 static inline u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c)
1162 {
1163 const u64 tmp = ((((u64) a) << 32) | ((u64) b)) >> ((c & 3) * 8);
1164
1165 return (u32) (tmp);
1166 }
1167
1168 static inline u32x swap32 (const u32x v)
1169 {
1170 return ((v >> 24) & 0x000000ff)
1171 | ((v >> 8) & 0x0000ff00)
1172 | ((v << 8) & 0x00ff0000)
1173 | ((v << 24) & 0xff000000);
1174 }
1175
1176 static inline u64x swap64 (const u64x v)
1177 {
1178 return ((v >> 56) & 0x00000000000000ff)
1179 | ((v >> 40) & 0x000000000000ff00)
1180 | ((v >> 24) & 0x0000000000ff0000)
1181 | ((v >> 8) & 0x00000000ff000000)
1182 | ((v << 8) & 0x000000ff00000000)
1183 | ((v << 24) & 0x0000ff0000000000)
1184 | ((v << 40) & 0x00ff000000000000)
1185 | ((v << 56) & 0xff00000000000000);
1186 }
1187
1188 static inline u32x rotr32 (const u32x a, const u32 n)
1189 {
1190 return rotate (a, 32 - n);
1191 }
1192
1193 static inline u32x rotl32 (const u32x a, const u32 n)
1194 {
1195 return rotate (a, n);
1196 }
1197
1198 static inline u64x rotr64 (const u64x a, const u32 n)
1199 {
1200 return rotate (a, (u64) 64 - n);
1201 }
1202
1203 static inline u64x rotl64 (const u64x a, const u32 n)
1204 {
1205 return rotate (a, (u64) n);
1206 }
1207
1208 static inline u32 __bfe (const u32 a, const u32 b, const u32 c)
1209 {
1210 #define BIT(x) (1 << (x))
1211 #define BIT_MASK(x) (BIT (x) - 1)
1212 #define BFE(x,y,z) (((x) >> (y)) & BIT_MASK (z))
1213
1214 return BFE (a, b, c);
1215 }
1216
1217 static inline u32x amd_bytealign (const u32x a, const u32x b, const u32 c)
1218 {
1219 #if VECT_SIZE == 1
1220 const u64x tmp = ((((u64x) (a)) << 32) | ((u64x) (b))) >> ((c & 3) * 8);
1221
1222 return (u32x) (tmp);
1223 #endif
1224
1225 #if VECT_SIZE == 2
1226 const u64x tmp = ((((u64x) (a.s0, a.s1)) << 32) | ((u64x) (b.s0, b.s1))) >> ((c & 3) * 8);
1227
1228 return (u32x) (tmp.s0, tmp.s1);
1229 #endif
1230
1231 #if VECT_SIZE == 4
1232 const u64x tmp = ((((u64x) (a.s0, a.s1, a.s2, a.s3)) << 32) | ((u64x) (b.s0, b.s1, b.s2, b.s3))) >> ((c & 3) * 8);
1233
1234 return (u32x) (tmp.s0, tmp.s1, tmp.s2, tmp.s3);
1235 #endif
1236
1237 #if VECT_SIZE == 8
1238 const u64x tmp = ((((u64x) (a.s0, a.s1, a.s2, a.s3, a.s4, a.s5, a.s6, a.s7)) << 32) | ((u64x) (b.s0, b.s1, b.s2, b.s3, b.s4, b.s5, b.s6, b.s7))) >> ((c & 3) * 8);
1239
1240 return (u32x) (tmp.s0, tmp.s1, tmp.s2, tmp.s3, tmp.s4, tmp.s5, tmp.s6, tmp.s7);
1241 #endif
1242
1243 #if VECT_SIZE == 16
1244 const u64x tmp = ((((u64x) (a.s0, a.s1, a.s2, a.s3, a.s4, a.s5, a.s6, a.s7, a.s8, a.s9, a.sa, a.sb, a.sc, a.sd, a.se, a.sf)) << 32) | ((u64x) (b.s0, b.s1, b.s2, b.s3, b.s4, b.s5, b.s6, b.s7, b.s8, b.s9, b.sa, b.sb, b.sc, b.sd, b.se, b.sf))) >> ((c & 3) * 8);
1245
1246 return (u32x) (tmp.s0, tmp.s1, tmp.s2, tmp.s3, tmp.s4, tmp.s5, tmp.s6, tmp.s7, tmp.s8, tmp.s9, tmp.sa, tmp.sb, tmp.sc, tmp.sd, tmp.se, tmp.sf);
1247 #endif
1248 }
1249 #endif
1250
1251 typedef struct
1252 {
1253 #if defined _DES_
1254 u32 digest_buf[4];
1255 #elif defined _MD4_
1256 u32 digest_buf[4];
1257 #elif defined _MD5_
1258 u32 digest_buf[4];
1259 #elif defined _MD5H_
1260 u32 digest_buf[4];
1261 #elif defined _SHA1_
1262 u32 digest_buf[5];
1263 #elif defined _BCRYPT_
1264 u32 digest_buf[6];
1265 #elif defined _SHA256_
1266 u32 digest_buf[8];
1267 #elif defined _SHA384_
1268 u32 digest_buf[16];
1269 #elif defined _SHA512_
1270 u32 digest_buf[16];
1271 #elif defined _KECCAK_
1272 u32 digest_buf[50];
1273 #elif defined _RIPEMD160_
1274 u32 digest_buf[5];
1275 #elif defined _WHIRLPOOL_
1276 u32 digest_buf[16];
1277 #elif defined _GOST_
1278 u32 digest_buf[8];
1279 #elif defined _GOST2012_256_
1280 u32 digest_buf[8];
1281 #elif defined _GOST2012_512_
1282 u32 digest_buf[16];
1283 #elif defined _SAPB_
1284 u32 digest_buf[4];
1285 #elif defined _SAPG_
1286 u32 digest_buf[5];
1287 #elif defined _MYSQL323_
1288 u32 digest_buf[4];
1289 #elif defined _LOTUS5_
1290 u32 digest_buf[4];
1291 #elif defined _LOTUS6_
1292 u32 digest_buf[4];
1293 #elif defined _SCRYPT_
1294 u32 digest_buf[8];
1295 #elif defined _LOTUS8_
1296 u32 digest_buf[4];
1297 #elif defined _OFFICE2007_
1298 u32 digest_buf[4];
1299 #elif defined _OFFICE2010_
1300 u32 digest_buf[4];
1301 #elif defined _OFFICE2013_
1302 u32 digest_buf[4];
1303 #elif defined _OLDOFFICE01_
1304 u32 digest_buf[4];
1305 #elif defined _OLDOFFICE34_
1306 u32 digest_buf[4];
1307 #elif defined _SIPHASH_
1308 u32 digest_buf[4];
1309 #elif defined _PBKDF2_MD5_
1310 u32 digest_buf[32];
1311 #elif defined _PBKDF2_SHA1_
1312 u32 digest_buf[32];
1313 #elif defined _PBKDF2_SHA256_
1314 u32 digest_buf[32];
1315 #elif defined _PBKDF2_SHA512_
1316 u32 digest_buf[32];
1317 #elif defined _PDF17L8_
1318 u32 digest_buf[8];
1319 #elif defined _CRC32_
1320 u32 digest_buf[4];
1321 #elif defined _SEVEN_ZIP_
1322 u32 digest_buf[4];
1323 #elif defined _ANDROIDFDE_
1324 u32 digest_buf[4];
1325 #elif defined _DCC2_
1326 u32 digest_buf[4];
1327 #elif defined _WPA_
1328 u32 digest_buf[4];
1329 #elif defined _MD5_SHA1_
1330 u32 digest_buf[4];
1331 #elif defined _SHA1_MD5_
1332 u32 digest_buf[5];
1333 #elif defined _NETNTLMV2_
1334 u32 digest_buf[4];
1335 #elif defined _KRB5PA_
1336 u32 digest_buf[4];
1337 #elif defined _CLOUDKEY_
1338 u32 digest_buf[8];
1339 #elif defined _SCRYPT_
1340 u32 digest_buf[4];
1341 #elif defined _PSAFE2_
1342 u32 digest_buf[5];
1343 #elif defined _LOTUS8_
1344 u32 digest_buf[4];
1345 #elif defined _RAR3_
1346 u32 digest_buf[4];
1347 #elif defined _SHA256_SHA1_
1348 u32 digest_buf[8];
1349 #elif defined _MS_DRSR_
1350 u32 digest_buf[8];
1351 #elif defined _ANDROIDFDE_SAMSUNG_
1352 u32 digest_buf[8];
1353 #elif defined _RAR5_
1354 u32 digest_buf[4];
1355 #elif defined _KRB5TGS_
1356 u32 digest_buf[4];
1357 #elif defined _AXCRYPT_
1358 u32 digest_buf[4];
1359 #elif defined _KEEPASS_
1360 u32 digest_buf[4];
1361 #endif
1362
1363 } digest_t;
1364
1365 typedef struct
1366 {
1367 u32 salt_buf[16];
1368 u32 salt_buf_pc[8];
1369
1370 u32 salt_len;
1371 u32 salt_iter;
1372 u32 salt_sign[2];
1373
1374 u32 keccak_mdlen;
1375 u32 truecrypt_mdlen;
1376
1377 u32 digests_cnt;
1378 u32 digests_done;
1379
1380 u32 digests_offset;
1381
1382 u32 scrypt_N;
1383 u32 scrypt_r;
1384 u32 scrypt_p;
1385 u32 scrypt_tmto;
1386 u32 scrypt_phy;
1387
1388 } salt_t;
1389
1390 typedef struct
1391 {
1392 int V;
1393 int R;
1394 int P;
1395
1396 int enc_md;
1397
1398 u32 id_buf[8];
1399 u32 u_buf[32];
1400 u32 o_buf[32];
1401
1402 int id_len;
1403 int o_len;
1404 int u_len;
1405
1406 u32 rc4key[2];
1407 u32 rc4data[2];
1408
1409 } pdf_t;
1410
1411 typedef struct
1412 {
1413 u32 pke[25];
1414 u32 eapol[64];
1415 int eapol_size;
1416 int keyver;
1417 u8 orig_mac1[6];
1418 u8 orig_mac2[6];
1419 u8 orig_nonce1[32];
1420 u8 orig_nonce2[32];
1421
1422 } wpa_t;
1423
1424 typedef struct
1425 {
1426 u32 cry_master_buf[64];
1427 u32 ckey_buf[64];
1428 u32 public_key_buf[64];
1429
1430 u32 cry_master_len;
1431 u32 ckey_len;
1432 u32 public_key_len;
1433
1434 } bitcoin_wallet_t;
1435
1436 typedef struct
1437 {
1438 u32 salt_buf[30];
1439 u32 salt_len;
1440
1441 u32 esalt_buf[38];
1442 u32 esalt_len;
1443
1444 } sip_t;
1445
1446 typedef struct
1447 {
1448 u32 data[384];
1449
1450 } androidfde_t;
1451
1452 typedef struct
1453 {
1454 u32 nr_buf[16];
1455 u32 nr_len;
1456
1457 u32 msg_buf[128];
1458 u32 msg_len;
1459
1460 } ikepsk_t;
1461
1462 typedef struct
1463 {
1464 u32 user_len;
1465 u32 domain_len;
1466 u32 srvchall_len;
1467 u32 clichall_len;
1468
1469 u32 userdomain_buf[64];
1470 u32 chall_buf[256];
1471
1472 } netntlm_t;
1473
1474 typedef struct
1475 {
1476 u32 user[16];
1477 u32 realm[16];
1478 u32 salt[32];
1479 u32 timestamp[16];
1480 u32 checksum[4];
1481
1482 } krb5pa_t;
1483
1484 typedef struct
1485 {
1486 u32 account_info[512];
1487 u32 checksum[4];
1488 u32 edata2[2560];
1489 u32 edata2_len;
1490
1491 } krb5tgs_t;
1492
1493 typedef struct
1494 {
1495 u32 salt_buf[16];
1496 u32 data_buf[112];
1497 u32 keyfile_buf[16];
1498
1499 } tc_t;
1500
1501 typedef struct
1502 {
1503 u32 salt_buf[16];
1504
1505 } pbkdf2_md5_t;
1506
1507 typedef struct
1508 {
1509 u32 salt_buf[16];
1510
1511 } pbkdf2_sha1_t;
1512
1513 typedef struct
1514 {
1515 u32 salt_buf[16];
1516
1517 } pbkdf2_sha256_t;
1518
1519 typedef struct
1520 {
1521 u32 salt_buf[32];
1522
1523 } pbkdf2_sha512_t;
1524
1525 typedef struct
1526 {
1527 u32 salt_buf[128];
1528 u32 salt_len;
1529
1530 } rakp_t;
1531
1532 typedef struct
1533 {
1534 u32 data_len;
1535 u32 data_buf[512];
1536
1537 } cloudkey_t;
1538
1539 typedef struct
1540 {
1541 u32 encryptedVerifier[4];
1542 u32 encryptedVerifierHash[5];
1543
1544 u32 keySize;
1545
1546 } office2007_t;
1547
1548 typedef struct
1549 {
1550 u32 encryptedVerifier[4];
1551 u32 encryptedVerifierHash[8];
1552
1553 } office2010_t;
1554
1555 typedef struct
1556 {
1557 u32 encryptedVerifier[4];
1558 u32 encryptedVerifierHash[8];
1559
1560 } office2013_t;
1561
1562 typedef struct
1563 {
1564 u32 version;
1565 u32 encryptedVerifier[4];
1566 u32 encryptedVerifierHash[4];
1567 u32 rc4key[2];
1568
1569 } oldoffice01_t;
1570
1571 typedef struct
1572 {
1573 u32 version;
1574 u32 encryptedVerifier[4];
1575 u32 encryptedVerifierHash[5];
1576 u32 rc4key[2];
1577
1578 } oldoffice34_t;
1579
1580 typedef struct
1581 {
1582 u32 version;
1583 u32 algorithm;
1584
1585 /* key-file handling */
1586 u32 keyfile_len;
1587 u32 keyfile[8];
1588
1589 u32 final_random_seed[8];
1590 u32 transf_random_seed[8];
1591 u32 enc_iv[4];
1592 u32 contents_hash[8];
1593
1594 /* specific to version 1 */
1595 u32 contents_len;
1596 u32 contents[75000];
1597
1598 /* specific to version 2 */
1599 u32 expected_bytes[8];
1600
1601 } keepass_t;
1602
1603 typedef struct
1604 {
1605 u32 digest[4];
1606 u32 out[4];
1607
1608 } pdf14_tmp_t;
1609
1610 typedef struct
1611 {
1612 union
1613 {
1614 u32 dgst32[16];
1615 u64 dgst64[8];
1616 };
1617
1618 u32 dgst_len;
1619 u32 W_len;
1620
1621 } pdf17l8_tmp_t;
1622
1623 typedef struct
1624 {
1625 u32 digest_buf[4];
1626
1627 } phpass_tmp_t;
1628
1629 typedef struct
1630 {
1631 u32 digest_buf[4];
1632
1633 } md5crypt_tmp_t;
1634
1635 typedef struct
1636 {
1637 u32 alt_result[8];
1638
1639 u32 p_bytes[4];
1640 u32 s_bytes[4];
1641
1642 } sha256crypt_tmp_t;
1643
1644 typedef struct
1645 {
1646 u64 l_alt_result[8];
1647
1648 u64 l_p_bytes[2];
1649 u64 l_s_bytes[2];
1650
1651 } sha512crypt_tmp_t;
1652
1653 typedef struct
1654 {
1655 u32 ipad[5];
1656 u32 opad[5];
1657
1658 u32 dgst[10];
1659 u32 out[10];
1660
1661 } wpa_tmp_t;
1662
1663 typedef struct
1664 {
1665 u64 dgst[8];
1666
1667 } bitcoin_wallet_tmp_t;
1668
1669 typedef struct
1670 {
1671 u32 ipad[5];
1672 u32 opad[5];
1673
1674 u32 dgst[5];
1675 u32 out[4];
1676
1677 } dcc2_tmp_t;
1678
1679 typedef struct
1680 {
1681 u32 E[18];
1682
1683 u32 P[18];
1684
1685 u32 S0[256];
1686 u32 S1[256];
1687 u32 S2[256];
1688 u32 S3[256];
1689
1690 } bcrypt_tmp_t;
1691
1692 typedef struct
1693 {
1694 u32 digest[2];
1695
1696 u32 P[18];
1697
1698 u32 S0[256];
1699 u32 S1[256];
1700 u32 S2[256];
1701 u32 S3[256];
1702
1703 } pwsafe2_tmp_t;
1704
1705 typedef struct
1706 {
1707 u32 digest_buf[8];
1708
1709 } pwsafe3_tmp_t;
1710
1711 typedef struct
1712 {
1713 u32 digest_buf[5];
1714
1715 } androidpin_tmp_t;
1716
1717 typedef struct
1718 {
1719 u32 ipad[5];
1720 u32 opad[5];
1721
1722 u32 dgst[10];
1723 u32 out[10];
1724
1725 } androidfde_tmp_t;
1726
1727 typedef struct
1728 {
1729 u32 ipad[16];
1730 u32 opad[16];
1731
1732 u32 dgst[64];
1733 u32 out[64];
1734
1735 } tc_tmp_t;
1736
1737 typedef struct
1738 {
1739 u64 ipad[8];
1740 u64 opad[8];
1741
1742 u64 dgst[32];
1743 u64 out[32];
1744
1745 } tc64_tmp_t;
1746
1747 typedef struct
1748 {
1749 u32 ipad[4];
1750 u32 opad[4];
1751
1752 u32 dgst[32];
1753 u32 out[32];
1754
1755 } pbkdf2_md5_tmp_t;
1756
1757 typedef struct
1758 {
1759 u32 ipad[5];
1760 u32 opad[5];
1761
1762 u32 dgst[32];
1763 u32 out[32];
1764
1765 } pbkdf2_sha1_tmp_t;
1766
1767 typedef struct
1768 {
1769 u32 ipad[8];
1770 u32 opad[8];
1771
1772 u32 dgst[32];
1773 u32 out[32];
1774
1775 } pbkdf2_sha256_tmp_t;
1776
1777 typedef struct
1778 {
1779 u64 ipad[8];
1780 u64 opad[8];
1781
1782 u64 dgst[16];
1783 u64 out[16];
1784
1785 } pbkdf2_sha512_tmp_t;
1786
1787 typedef struct
1788 {
1789 u64 out[8];
1790
1791 } ecryptfs_tmp_t;
1792
1793 typedef struct
1794 {
1795 u64 ipad[8];
1796 u64 opad[8];
1797
1798 u64 dgst[16];
1799 u64 out[16];
1800
1801 } oraclet_tmp_t;
1802
1803 typedef struct
1804 {
1805 u32 ipad[5];
1806 u32 opad[5];
1807
1808 u32 dgst[5];
1809 u32 out[5];
1810
1811 } agilekey_tmp_t;
1812
1813 typedef struct
1814 {
1815 u32 ipad[5];
1816 u32 opad[5];
1817
1818 u32 dgst1[5];
1819 u32 out1[5];
1820
1821 u32 dgst2[5];
1822 u32 out2[5];
1823
1824 } mywallet_tmp_t;
1825
1826 typedef struct
1827 {
1828 u32 ipad[5];
1829 u32 opad[5];
1830
1831 u32 dgst[5];
1832 u32 out[5];
1833
1834 } sha1aix_tmp_t;
1835
1836 typedef struct
1837 {
1838 u32 ipad[8];
1839 u32 opad[8];
1840
1841 u32 dgst[8];
1842 u32 out[8];
1843
1844 } sha256aix_tmp_t;
1845
1846 typedef struct
1847 {
1848 u64 ipad[8];
1849 u64 opad[8];
1850
1851 u64 dgst[8];
1852 u64 out[8];
1853
1854 } sha512aix_tmp_t;
1855
1856 typedef struct
1857 {
1858 u32 ipad[8];
1859 u32 opad[8];
1860
1861 u32 dgst[8];
1862 u32 out[8];
1863
1864 } lastpass_tmp_t;
1865
1866 typedef struct
1867 {
1868 u64 digest_buf[8];
1869
1870 } drupal7_tmp_t;
1871
1872 typedef struct
1873 {
1874 u32 ipad[5];
1875 u32 opad[5];
1876
1877 u32 dgst[5];
1878 u32 out[5];
1879
1880 } lotus8_tmp_t;
1881
1882 typedef struct
1883 {
1884 u32 out[5];
1885
1886 } office2007_tmp_t;
1887
1888 typedef struct
1889 {
1890 u32 out[5];
1891
1892 } office2010_tmp_t;
1893
1894 typedef struct
1895 {
1896 u64 out[8];
1897
1898 } office2013_tmp_t;
1899
1900 typedef struct
1901 {
1902 u32 digest_buf[5];
1903
1904 } saph_sha1_tmp_t;
1905
1906 typedef struct
1907 {
1908 u32 block[16];
1909
1910 u32 dgst[8];
1911
1912 u32 block_len;
1913 u32 final_len;
1914
1915 } seven_zip_tmp_t;
1916
1917 typedef struct
1918 {
1919 u32 KEK[5];
1920
1921 u32 lsb[4];
1922 u32 cipher[4];
1923
1924 } axcrypt_tmp_t;
1925
1926 typedef struct
1927 {
1928 u32 tmp_digest[8];
1929
1930 } keepass_tmp_t;
1931
1932 typedef struct
1933 {
1934 u32 Kc[16];
1935 u32 Kd[16];
1936
1937 u32 iv[2];
1938
1939 } bsdicrypt_tmp_t;
1940
1941 typedef struct
1942 {
1943 u32 dgst[17][5];
1944
1945 } rar3_tmp_t;
1946
1947 typedef struct
1948 {
1949 u32 user[16];
1950
1951 } cram_md5_t;
1952
1953 typedef struct
1954 {
1955 u32 iv_buf[4];
1956 u32 iv_len;
1957
1958 u32 salt_buf[4];
1959 u32 salt_len;
1960
1961 u32 crc;
1962
1963 u32 data_buf[96];
1964 u32 data_len;
1965
1966 u32 unpack_size;
1967
1968 } seven_zip_t;
1969
1970 typedef struct
1971 {
1972 u32 key;
1973 u64 val;
1974
1975 } hcstat_table_t;
1976
1977 typedef struct
1978 {
1979 u32 cs_buf[0x100];
1980 u32 cs_len;
1981
1982 } cs_t;
1983
1984 typedef struct
1985 {
1986 u32 cmds[0x100];
1987
1988 } kernel_rule_t;
1989
1990 typedef struct
1991 {
1992 u32 gidvid;
1993 u32 il_pos;
1994
1995 } plain_t;
1996
1997 typedef struct
1998 {
1999 u32 i[16];
2000
2001 u32 pw_len;
2002
2003 u32 alignment_placeholder_1;
2004 u32 alignment_placeholder_2;
2005 u32 alignment_placeholder_3;
2006
2007 } pw_t;
2008
2009 typedef struct
2010 {
2011 u32 i;
2012
2013 } bf_t;
2014
2015 typedef struct
2016 {
2017 u32 i[8];
2018
2019 u32 pw_len;
2020
2021 } comb_t;
2022
2023 typedef struct
2024 {
2025 u32 b[32];
2026
2027 } bs_word_t;
2028
2029 typedef struct
2030 {
2031 uint4 P[64];
2032
2033 } scrypt_tmp_t;