Converted _a3 kernels, use SIMD for CPU and GPU
[hashcat.git] / OpenCL / types_ocl.c
1 /**
2 * Author......: Jens Steube <jens.steube@gmail.com>
3 * License.....: MIT
4 */
5
6 #define DEVICE_TYPE_CPU 2
7 #define DEVICE_TYPE_GPU 4
8
9 typedef uchar u8;
10 typedef ushort u16;
11 typedef uint u32;
12 typedef ulong u64;
13
14 #ifndef NEW_SIMD_CODE
15 #undef VECT_SIZE
16 #define VECT_SIZE 1
17 #endif
18
19 #if VECT_SIZE == 1
20 typedef uchar u8x;
21 typedef ushort u16x;
22 typedef uint u32x;
23 typedef ulong u64x;
24 #endif
25
26 #if VECT_SIZE == 2
27 typedef uchar2 u8x;
28 typedef ushort2 u16x;
29 typedef uint2 u32x;
30 typedef ulong2 u64x;
31 #endif
32
33 #if VECT_SIZE == 4
34 typedef uchar4 u8x;
35 typedef ushort4 u16x;
36 typedef uint4 u32x;
37 typedef ulong4 u64x;
38 #endif
39
40 #if VECT_SIZE == 8
41 typedef uchar8 u8x;
42 typedef ushort8 u16x;
43 typedef uint8 u32x;
44 typedef ulong8 u64x;
45 #endif
46
47 // this one needs to die
48 #define allx(r) r
49
50 static inline u32 l32_from_64_S (u64 a)
51 {
52 const u32 r = (u32) (a);
53
54 return r;
55 }
56
57 static inline u32 h32_from_64_S (u64 a)
58 {
59 a >>= 32;
60
61 const u32 r = (u32) (a);
62
63 return r;
64 }
65
66 static inline u64 hl32_to_64_S (const u32 a, const u32 b)
67 {
68 return as_ulong ((uint2) (b, a));
69 }
70
71 static inline u32x l32_from_64 (u64x a)
72 {
73 u32x r;
74
75 #if VECT_SIZE == 1
76 r = (u32) a;
77 #endif
78
79 #if VECT_SIZE >= 2
80 r.s0 = (u32) a.s0;
81 r.s1 = (u32) a.s1;
82 #endif
83
84 #if VECT_SIZE >= 4
85 r.s2 = (u32) a.s2;
86 r.s3 = (u32) a.s3;
87 #endif
88
89 #if VECT_SIZE >= 8
90 r.s4 = (u32) a.s4;
91 r.s5 = (u32) a.s5;
92 r.s6 = (u32) a.s6;
93 r.s7 = (u32) a.s7;
94 #endif
95
96 return r;
97 }
98
99 static inline u32x h32_from_64 (u64x a)
100 {
101 a >>= 32;
102
103 u32x r;
104
105 #if VECT_SIZE == 1
106 r = (u32) a;
107 #endif
108
109 #if VECT_SIZE >= 2
110 r.s0 = (u32) a.s0;
111 r.s1 = (u32) a.s1;
112 #endif
113
114 #if VECT_SIZE >= 4
115 r.s2 = (u32) a.s2;
116 r.s3 = (u32) a.s3;
117 #endif
118
119 #if VECT_SIZE >= 8
120 r.s4 = (u32) a.s4;
121 r.s5 = (u32) a.s5;
122 r.s6 = (u32) a.s6;
123 r.s7 = (u32) a.s7;
124 #endif
125
126 return r;
127 }
128
129 static inline u64x hl32_to_64 (const u32x a, const u32x b)
130 {
131 u64x r;
132
133 #if VECT_SIZE == 1
134 r = as_ulong ((uint2) (b, a));
135 #endif
136
137 #if VECT_SIZE >= 2
138 r.s0 = as_ulong ((uint2) (b.s0, a.s0));
139 r.s1 = as_ulong ((uint2) (b.s1, a.s1));
140 #endif
141
142 #if VECT_SIZE >= 4
143 r.s2 = as_ulong ((uint2) (b.s2, a.s2));
144 r.s3 = as_ulong ((uint2) (b.s3, a.s3));
145 #endif
146
147 #if VECT_SIZE >= 8
148 r.s4 = as_ulong ((uint2) (b.s4, a.s4));
149 r.s5 = as_ulong ((uint2) (b.s5, a.s5));
150 r.s6 = as_ulong ((uint2) (b.s6, a.s6));
151 r.s7 = as_ulong ((uint2) (b.s7, a.s7));
152 #endif
153
154 return r;
155 }
156
157 #ifdef IS_AMD
158 static inline u32 swap32_S (const u32 v)
159 {
160 return (as_uint (as_uchar4 (v).s3210));
161 }
162
163 static inline u64 swap64_S (const u64 v)
164 {
165 return (as_ulong (as_uchar8 (v).s76543210));
166 }
167
168 static inline u32 rotr32_S (const u32 a, const u32 n)
169 {
170 return rotate (a, 32 - n);
171 }
172
173 static inline u32 rotl32_S (const u32 a, const u32 n)
174 {
175 return rotate (a, n);
176 }
177
178 static inline u64 rotr64_S (const u64 a, const u32 n)
179 {
180 u64 r;
181
182 #if DEVICE_TYPE == DEVICE_TYPE_CPU
183
184 r = rotate (a, (u64) 64 - n);
185
186 #else
187
188 uint2 a2 = as_uint2 (a);
189
190 uint2 t;
191
192 t.s0 = (n >= 32) ? amd_bitalign (a2.s0, a2.s1, n - 32)
193 : amd_bitalign (a2.s1, a2.s0, n);
194 t.s1 = (n >= 32) ? amd_bitalign (a2.s1, a2.s0, n - 32)
195 : amd_bitalign (a2.s0, a2.s1, n);
196
197 r = as_ulong (t);
198
199 #endif
200
201 return r;
202 }
203
204 static inline u64 rotl64_S (const u64 a, const u32 n)
205 {
206 return rotr64_S (a, 64 - n);
207 }
208
209 static inline u32x swap32 (const u32x v)
210 {
211 return ((v >> 24) & 0x000000ff)
212 | ((v >> 8) & 0x0000ff00)
213 | ((v << 8) & 0x00ff0000)
214 | ((v << 24) & 0xff000000);
215 }
216
217 static inline u64x swap64 (const u64x v)
218 {
219 return ((v >> 56) & 0x00000000000000ff)
220 | ((v >> 40) & 0x000000000000ff00)
221 | ((v >> 24) & 0x0000000000ff0000)
222 | ((v >> 8) & 0x00000000ff000000)
223 | ((v << 8) & 0x000000ff00000000)
224 | ((v << 24) & 0x0000ff0000000000)
225 | ((v << 40) & 0x00ff000000000000)
226 | ((v << 56) & 0xff00000000000000);
227 }
228
229 static inline u32x rotr32 (const u32x a, const u32 n)
230 {
231 return rotate (a, 32 - n);
232 }
233
234 static inline u32x rotl32 (const u32x a, const u32 n)
235 {
236 return rotate (a, n);
237 }
238
239 static inline u64x rotr64 (const u64x a, const u32 n)
240 {
241 u64x r;
242
243 #if DEVICE_TYPE == DEVICE_TYPE_CPU
244
245 r = rotate (a, (u64) 64 - n);
246
247 #else
248
249 uint2 a2;
250 uint2 t;
251
252 #if VECT_SIZE == 1
253
254 a2 = as_uint2 (a);
255
256 t.s0 = (n >= 32) ? amd_bitalign (a2.s0, a2.s1, n - 32) : amd_bitalign (a2.s1, a2.s0, n);
257 t.s1 = (n >= 32) ? amd_bitalign (a2.s1, a2.s0, n - 32) : amd_bitalign (a2.s0, a2.s1, n);
258
259 r = as_ulong (t);
260
261 #elif VECT_SIZE == 2
262
263 {
264 a2 = as_uint2 (a.s0);
265
266 t.s0 = (n >= 32) ? amd_bitalign (a2.s0, a2.s1, n - 32) : amd_bitalign (a2.s1, a2.s0, n);
267 t.s1 = (n >= 32) ? amd_bitalign (a2.s1, a2.s0, n - 32) : amd_bitalign (a2.s0, a2.s1, n);
268
269 r.s0 = as_ulong (t);
270 }
271
272 {
273 a2 = as_uint2 (a.s1);
274
275 t.s0 = (n >= 32) ? amd_bitalign (a2.s0, a2.s1, n - 32) : amd_bitalign (a2.s1, a2.s0, n);
276 t.s1 = (n >= 32) ? amd_bitalign (a2.s1, a2.s0, n - 32) : amd_bitalign (a2.s0, a2.s1, n);
277
278 r.s1 = as_ulong (t);
279 }
280
281 #elif VECT_SIZE == 4
282
283 {
284 a2 = as_uint2 (a.s0);
285
286 t.s0 = (n >= 32) ? amd_bitalign (a2.s0, a2.s1, n - 32) : amd_bitalign (a2.s1, a2.s0, n);
287 t.s1 = (n >= 32) ? amd_bitalign (a2.s1, a2.s0, n - 32) : amd_bitalign (a2.s0, a2.s1, n);
288
289 r.s0 = as_ulong (t);
290 }
291
292 {
293 a2 = as_uint2 (a.s1);
294
295 t.s0 = (n >= 32) ? amd_bitalign (a2.s0, a2.s1, n - 32) : amd_bitalign (a2.s1, a2.s0, n);
296 t.s1 = (n >= 32) ? amd_bitalign (a2.s1, a2.s0, n - 32) : amd_bitalign (a2.s0, a2.s1, n);
297
298 r.s1 = as_ulong (t);
299 }
300
301 {
302 a2 = as_uint2 (a.s2);
303
304 t.s0 = (n >= 32) ? amd_bitalign (a2.s0, a2.s1, n - 32) : amd_bitalign (a2.s1, a2.s0, n);
305 t.s1 = (n >= 32) ? amd_bitalign (a2.s1, a2.s0, n - 32) : amd_bitalign (a2.s0, a2.s1, n);
306
307 r.s2 = as_ulong (t);
308 }
309
310 {
311 a2 = as_uint2 (a.s3);
312
313 t.s0 = (n >= 32) ? amd_bitalign (a2.s0, a2.s1, n - 32) : amd_bitalign (a2.s1, a2.s0, n);
314 t.s1 = (n >= 32) ? amd_bitalign (a2.s1, a2.s0, n - 32) : amd_bitalign (a2.s0, a2.s1, n);
315
316 r.s3 = as_ulong (t);
317 }
318
319 #elif VECT_SIZE == 8
320
321 {
322 a2 = as_uint2 (a.s0);
323
324 t.s0 = (n >= 32) ? amd_bitalign (a2.s0, a2.s1, n - 32) : amd_bitalign (a2.s1, a2.s0, n);
325 t.s1 = (n >= 32) ? amd_bitalign (a2.s1, a2.s0, n - 32) : amd_bitalign (a2.s0, a2.s1, n);
326
327 r.s0 = as_ulong (t);
328 }
329
330 {
331 a2 = as_uint2 (a.s1);
332
333 t.s0 = (n >= 32) ? amd_bitalign (a2.s0, a2.s1, n - 32) : amd_bitalign (a2.s1, a2.s0, n);
334 t.s1 = (n >= 32) ? amd_bitalign (a2.s1, a2.s0, n - 32) : amd_bitalign (a2.s0, a2.s1, n);
335
336 r.s1 = as_ulong (t);
337 }
338
339 {
340 a2 = as_uint2 (a.s2);
341
342 t.s0 = (n >= 32) ? amd_bitalign (a2.s0, a2.s1, n - 32) : amd_bitalign (a2.s1, a2.s0, n);
343 t.s1 = (n >= 32) ? amd_bitalign (a2.s1, a2.s0, n - 32) : amd_bitalign (a2.s0, a2.s1, n);
344
345 r.s2 = as_ulong (t);
346 }
347
348 {
349 a2 = as_uint2 (a.s3);
350
351 t.s0 = (n >= 32) ? amd_bitalign (a2.s0, a2.s1, n - 32) : amd_bitalign (a2.s1, a2.s0, n);
352 t.s1 = (n >= 32) ? amd_bitalign (a2.s1, a2.s0, n - 32) : amd_bitalign (a2.s0, a2.s1, n);
353
354 r.s3 = as_ulong (t);
355 }
356
357 {
358 a2 = as_uint2 (a.s4);
359
360 t.s0 = (n >= 32) ? amd_bitalign (a2.s0, a2.s1, n - 32) : amd_bitalign (a2.s1, a2.s0, n);
361 t.s1 = (n >= 32) ? amd_bitalign (a2.s1, a2.s0, n - 32) : amd_bitalign (a2.s0, a2.s1, n);
362
363 r.s4 = as_ulong (t);
364 }
365
366 {
367 a2 = as_uint2 (a.s5);
368
369 t.s0 = (n >= 32) ? amd_bitalign (a2.s0, a2.s1, n - 32) : amd_bitalign (a2.s1, a2.s0, n);
370 t.s1 = (n >= 32) ? amd_bitalign (a2.s1, a2.s0, n - 32) : amd_bitalign (a2.s0, a2.s1, n);
371
372 r.s5 = as_ulong (t);
373 }
374
375 {
376 a2 = as_uint2 (a.s6);
377
378 t.s0 = (n >= 32) ? amd_bitalign (a2.s0, a2.s1, n - 32) : amd_bitalign (a2.s1, a2.s0, n);
379 t.s1 = (n >= 32) ? amd_bitalign (a2.s1, a2.s0, n - 32) : amd_bitalign (a2.s0, a2.s1, n);
380
381 r.s6 = as_ulong (t);
382 }
383
384 {
385 a2 = as_uint2 (a.s7);
386
387 t.s0 = (n >= 32) ? amd_bitalign (a2.s0, a2.s1, n - 32) : amd_bitalign (a2.s1, a2.s0, n);
388 t.s1 = (n >= 32) ? amd_bitalign (a2.s1, a2.s0, n - 32) : amd_bitalign (a2.s0, a2.s1, n);
389
390 r.s7 = as_ulong (t);
391 }
392
393 #endif
394 #endif
395
396 return r;
397 }
398
399 static inline u64x rotl64 (const u64x a, const u32 n)
400 {
401 return rotr64 (a, 64 - n);
402 }
403
404 static inline u32 __bfe (const u32 a, const u32 b, const u32 c)
405 {
406 return amd_bfe (a, b, c);
407 }
408
409 static inline u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c)
410 {
411 return amd_bytealign (a, b, c);
412 }
413 #endif
414
415 #ifdef IS_NV
416 static inline u32 swap32_S (const u32 v)
417 {
418 u32 r;
419
420 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(r) : "r"(v));
421
422 return r;
423 }
424
425 static inline u64 swap64_S (const u64 v)
426 {
427 u32 il;
428 u32 ir;
429
430 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(v));
431
432 u32 tl;
433 u32 tr;
434
435 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tl) : "r"(il));
436 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tr) : "r"(ir));
437
438 u64 r;
439
440 asm ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(tr), "r"(tl));
441
442 return r;
443 }
444
445 static inline u32 rotr32_S (const u32 a, const u32 n)
446 {
447 return rotate (a, 32 - n);
448 }
449
450 static inline u32 rotl32_S (const u32 a, const u32 n)
451 {
452 return rotate (a, n);
453 }
454
455 #if CUDA_ARCH >= 350
456 static inline u64 rotr64_S (const u64 a, const u32 n)
457 {
458 u32 il;
459 u32 ir;
460
461 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a));
462
463 u32 tl;
464 u32 tr;
465
466 if (n >= 32)
467 {
468 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
469 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
470 }
471 else
472 {
473 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
474 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
475 }
476
477 u64 r;
478
479 asm ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(tl), "r"(tr));
480
481 return r;
482 }
483 #else
484 static inline u64 rotr64_S (const u64 a, const u32 n)
485 {
486 return rotate (a, (u64) 64 - n);
487 }
488 #endif
489
490 static inline u64 rotl64_S (const u64 a, const u32 n)
491 {
492 return rotr64_S (a, 64 - n);
493 }
494
495 #if CUDA_ARCH >= 500
496 static inline u32 lut3_2d_S (const u32 a, const u32 b, const u32 c)
497 {
498 u32 r;
499
500 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
501
502 return r;
503 }
504
505 static inline u32 lut3_39_S (const u32 a, const u32 b, const u32 c)
506 {
507 u32 r;
508
509 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
510
511 return r;
512 }
513
514 static inline u32 lut3_59_S (const u32 a, const u32 b, const u32 c)
515 {
516 u32 r;
517
518 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
519
520 return r;
521 }
522
523 static inline u32 lut3_96_S (const u32 a, const u32 b, const u32 c)
524 {
525 u32 r;
526
527 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
528
529 return r;
530 }
531
532 static inline u32 lut3_e4_S (const u32 a, const u32 b, const u32 c)
533 {
534 u32 r;
535
536 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
537
538 return r;
539 }
540
541 static inline u32 lut3_e8_S (const u32 a, const u32 b, const u32 c)
542 {
543 u32 r;
544
545 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
546
547 return r;
548 }
549
550 static inline u32 lut3_ca_S (const u32 a, const u32 b, const u32 c)
551 {
552 u32 r;
553
554 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
555
556 return r;
557 }
558 #endif
559
560 static inline u32 __byte_perm_S (const u32 a, const u32 b, const u32 c)
561 {
562 u32 r;
563
564 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c));
565
566 return r;
567 }
568
569 static inline u32x swap32 (const u32x v)
570 {
571 return ((v >> 24) & 0x000000ff)
572 | ((v >> 8) & 0x0000ff00)
573 | ((v << 8) & 0x00ff0000)
574 | ((v << 24) & 0xff000000);
575 }
576
577 static inline u64x swap64 (const u64x v)
578 {
579 return ((v >> 56) & 0x00000000000000ff)
580 | ((v >> 40) & 0x000000000000ff00)
581 | ((v >> 24) & 0x0000000000ff0000)
582 | ((v >> 8) & 0x00000000ff000000)
583 | ((v << 8) & 0x000000ff00000000)
584 | ((v << 24) & 0x0000ff0000000000)
585 | ((v << 40) & 0x00ff000000000000)
586 | ((v << 56) & 0xff00000000000000);
587 }
588
589 static inline u32x rotr32 (const u32x a, const u32 n)
590 {
591 return rotate (a, 32 - n);
592 }
593
594 static inline u32x rotl32 (const u32x a, const u32 n)
595 {
596 return rotate (a, n);
597 }
598
599 static inline u64x rotr64 (const u64x a, const u32 n)
600 {
601 return rotate (a, (u64) 64 - n);
602 }
603
604 static inline u64x rotl64 (const u64x a, const u32 n)
605 {
606 return rotate (a, (u64) n);
607 }
608
609 static inline u32x __byte_perm (const u32x a, const u32x b, const u32x c)
610 {
611 u32x r;
612
613 #if VECT_SIZE == 1
614 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c) );
615 #endif
616
617 #if VECT_SIZE >= 2
618 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s0) : "r"(a.s0), "r"(b.s0), "r"(c.s0));
619 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s1) : "r"(a.s1), "r"(b.s1), "r"(c.s1));
620 #endif
621
622 #if VECT_SIZE >= 4
623 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s2) : "r"(a.s2), "r"(b.s2), "r"(c.s2));
624 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s3) : "r"(a.s3), "r"(b.s3), "r"(c.s3));
625 #endif
626
627 #if VECT_SIZE >= 8
628 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s4) : "r"(a.s4), "r"(b.s4), "r"(c.s4));
629 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s5) : "r"(a.s5), "r"(b.s5), "r"(c.s5));
630 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s6) : "r"(a.s6), "r"(b.s6), "r"(c.s6));
631 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s7) : "r"(a.s7), "r"(b.s7), "r"(c.s7));
632 #endif
633
634 return r;
635 }
636
637 static inline u32 __bfe (const u32 a, const u32 b, const u32 c)
638 {
639 u32 r;
640
641 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c));
642
643 return r;
644 }
645
646 #if CUDA_ARCH >= 350
647 static inline u32 amd_bytealign (const u32 a, const u32 b, const u32 c)
648 {
649 u32 r;
650
651 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(b), "r"(a), "r"((c & 3) * 8));
652
653 return r;
654 }
655 #else
656 static inline u32 amd_bytealign (const u32 a, const u32 b, const u32 c)
657 {
658 return __byte_perm_S (b, a, (0x76543210 >> ((c & 3) * 4)) & 0xffff);
659 }
660 #endif
661
662 #if CUDA_ARCH >= 500
663 static inline u32x lut3_2d (const u32x a, const u32x b, const u32x c)
664 {
665 u32x r;
666
667 #if VECT_SIZE == 1
668 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
669 #endif
670
671 #if VECT_SIZE >= 2
672 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
673 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
674 #endif
675
676 #if VECT_SIZE >= 4
677 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
678 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
679 #endif
680
681 #if VECT_SIZE >= 8
682 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
683 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
684 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
685 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
686 #endif
687
688 return r;
689 }
690
691 static inline u32x lut3_39 (const u32x a, const u32x b, const u32x c)
692 {
693 u32x r;
694
695 #if VECT_SIZE == 1
696 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
697 #endif
698
699 #if VECT_SIZE == 2
700 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
701 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
702 #endif
703
704 #if VECT_SIZE == 4
705 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
706 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
707 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
708 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
709 #endif
710
711 #if VECT_SIZE == 8
712 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
713 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
714 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
715 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
716 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
717 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
718 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
719 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
720 #endif
721
722 return r;
723 }
724
725 static inline u32x lut3_59 (const u32x a, const u32x b, const u32x c)
726 {
727 u32x r;
728
729 #if VECT_SIZE == 1
730 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
731 #endif
732
733 #if VECT_SIZE == 2
734 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
735 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
736 #endif
737
738 #if VECT_SIZE == 4
739 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
740 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
741 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
742 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
743 #endif
744
745 #if VECT_SIZE == 8
746 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
747 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
748 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
749 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
750 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
751 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
752 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
753 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
754 #endif
755
756 return r;
757 }
758
759 static inline u32x lut3_96 (const u32x a, const u32x b, const u32x c)
760 {
761 u32x r;
762
763 #if VECT_SIZE == 1
764 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
765 #endif
766
767 #if VECT_SIZE == 2
768 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
769 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
770 #endif
771
772 #if VECT_SIZE == 4
773 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
774 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
775 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
776 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
777 #endif
778
779 #if VECT_SIZE == 8
780 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
781 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
782 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
783 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
784 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
785 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
786 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
787 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
788 #endif
789
790 return r;
791 }
792
793 static inline u32x lut3_e4 (const u32x a, const u32x b, const u32x c)
794 {
795 u32x r;
796
797 #if VECT_SIZE == 1
798 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
799 #endif
800
801 #if VECT_SIZE == 2
802 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
803 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
804 #endif
805
806 #if VECT_SIZE == 4
807 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
808 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
809 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
810 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
811 #endif
812
813 #if VECT_SIZE == 8
814 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
815 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
816 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
817 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
818 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
819 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
820 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
821 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
822 #endif
823
824 return r;
825 }
826
827 static inline u32x lut3_e8 (const u32x a, const u32x b, const u32x c)
828 {
829 u32x r;
830
831 #if VECT_SIZE == 1
832 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
833 #endif
834
835 #if VECT_SIZE == 2
836 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
837 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
838 #endif
839
840 #if VECT_SIZE == 4
841 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
842 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
843 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
844 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
845 #endif
846
847 #if VECT_SIZE == 8
848 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
849 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
850 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
851 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
852 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
853 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
854 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
855 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
856 #endif
857
858 return r;
859 }
860
861 static inline u32x lut3_ca (const u32x a, const u32x b, const u32x c)
862 {
863 u32x r;
864
865 #if VECT_SIZE == 1
866 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
867 #endif
868
869 #if VECT_SIZE == 2
870 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
871 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
872 #endif
873
874 #if VECT_SIZE == 4
875 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
876 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
877 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
878 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
879 #endif
880
881 #if VECT_SIZE == 8
882 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s0) : "r" (a.s0), "r" (b.s0), "r" (c.s0));
883 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s1) : "r" (a.s1), "r" (b.s1), "r" (c.s1));
884 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s2) : "r" (a.s2), "r" (b.s2), "r" (c.s2));
885 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s3) : "r" (a.s3), "r" (b.s3), "r" (c.s3));
886 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s4) : "r" (a.s4), "r" (b.s4), "r" (c.s4));
887 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s5) : "r" (a.s5), "r" (b.s5), "r" (c.s5));
888 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s6) : "r" (a.s6), "r" (b.s6), "r" (c.s6));
889 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r.s7) : "r" (a.s7), "r" (b.s7), "r" (c.s7));
890 #endif
891
892 return r;
893 }
894
895 #endif
896 #endif
897
898 #ifdef IS_GENERIC
899 static inline u32 swap32_S (const u32 v)
900 {
901 return (as_uint (as_uchar4 (v).s3210));
902 }
903
904 static inline u64 swap64_S (const u64 v)
905 {
906 return (as_ulong (as_uchar8 (v).s76543210));
907 }
908
909 static inline u32 rotr32_S (const u32 a, const u32 n)
910 {
911 return rotate (a, 32 - n);
912 }
913
914 static inline u32 rotl32_S (const u32 a, const u32 n)
915 {
916 return rotate (a, n);
917 }
918
919 static inline u64 rotr64_S (const u64 a, const u32 n)
920 {
921 return rotate (a, (u64) 64 - n);
922 }
923
924 static inline u64 rotl64_S (const u64 a, const u32 n)
925 {
926 return rotate (a, (u64) n);
927 }
928
929 static inline u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c)
930 {
931 const u64 tmp = ((((u64) a) << 32) | ((u64) b)) >> ((c & 3) * 8);
932
933 return (u32) (tmp);
934 }
935
936 static inline u32x swap32 (const u32x v)
937 {
938 return ((v >> 24) & 0x000000ff)
939 | ((v >> 8) & 0x0000ff00)
940 | ((v << 8) & 0x00ff0000)
941 | ((v << 24) & 0xff000000);
942 }
943
944 static inline u64x swap64 (const u64x v)
945 {
946 return ((v >> 56) & 0x00000000000000ff)
947 | ((v >> 40) & 0x000000000000ff00)
948 | ((v >> 24) & 0x0000000000ff0000)
949 | ((v >> 8) & 0x00000000ff000000)
950 | ((v << 8) & 0x000000ff00000000)
951 | ((v << 24) & 0x0000ff0000000000)
952 | ((v << 40) & 0x00ff000000000000)
953 | ((v << 56) & 0xff00000000000000);
954 }
955
956 static inline u32x rotr32 (const u32x a, const u32 n)
957 {
958 return rotate (a, 32 - n);
959 }
960
961 static inline u32x rotl32 (const u32x a, const u32 n)
962 {
963 return rotate (a, n);
964 }
965
966 static inline u64x rotr64 (const u64x a, const u32 n)
967 {
968 return rotate (a, (u64) 64 - n);
969 }
970
971 static inline u64x rotl64 (const u64x a, const u32 n)
972 {
973 return rotate (a, (u64) n);
974 }
975
976 static inline u32 __bfe (const u32 a, const u32 b, const u32 c)
977 {
978 #define BIT(x) (1 << (x))
979 #define BIT_MASK(x) (BIT (x) - 1)
980 #define BFE(x,y,z) (((x) >> (y)) & BIT_MASK (z))
981
982 return BFE (a, b, c);
983 }
984
985 static inline u32x amd_bytealign (const u32x a, const u32x b, const u32 c)
986 {
987 #if VECT_SIZE == 1
988 const u64x tmp = ((((u64x) (a)) << 32) | ((u64x) (b))) >> ((c & 3) * 8);
989
990 return (u32x) (tmp);
991 #endif
992
993 #if VECT_SIZE == 2
994 const u64x tmp = ((((u64x) (a.s0, a.s1)) << 32) | ((u64x) (b.s0, b.s1))) >> ((c & 3) * 8);
995
996 return (u32x) (tmp.s0, tmp.s1);
997 #endif
998
999 #if VECT_SIZE == 4
1000 const u64x tmp = ((((u64x) (a.s0, a.s1, a.s2, a.s3)) << 32) | ((u64x) (b.s0, b.s1, b.s2, b.s3))) >> ((c & 3) * 8);
1001
1002 return (u32x) (tmp.s0, tmp.s1, tmp.s2, tmp.s3);
1003 #endif
1004
1005 #if VECT_SIZE == 8
1006 const u64x tmp = ((((u64x) (a.s0, a.s1, a.s2, a.s3, a.s4, a.s5, a.s6, a.s7)) << 32) | ((u64x) (b.s0, b.s1, b.s2, b.s3, b.s4, b.s5, b.s6, b.s7))) >> ((c & 3) * 8);
1007
1008 return (u32x) (tmp.s0, tmp.s1, tmp.s2, tmp.s3, tmp.s4, tmp.s5, tmp.s6, tmp.s7);
1009 #endif
1010 }
1011 #endif
1012
1013 typedef struct
1014 {
1015 #if defined _DES_
1016 u32 digest_buf[4];
1017 #elif defined _MD4_
1018 u32 digest_buf[4];
1019 #elif defined _MD5_
1020 u32 digest_buf[4];
1021 #elif defined _MD5H_
1022 u32 digest_buf[4];
1023 #elif defined _SHA1_
1024 u32 digest_buf[5];
1025 #elif defined _BCRYPT_
1026 u32 digest_buf[6];
1027 #elif defined _SHA256_
1028 u32 digest_buf[8];
1029 #elif defined _SHA384_
1030 u32 digest_buf[16];
1031 #elif defined _SHA512_
1032 u32 digest_buf[16];
1033 #elif defined _KECCAK_
1034 u32 digest_buf[50];
1035 #elif defined _RIPEMD160_
1036 u32 digest_buf[5];
1037 #elif defined _WHIRLPOOL_
1038 u32 digest_buf[16];
1039 #elif defined _GOST_
1040 u32 digest_buf[8];
1041 #elif defined _GOST2012_256_
1042 u32 digest_buf[8];
1043 #elif defined _GOST2012_512_
1044 u32 digest_buf[16];
1045 #elif defined _SAPB_
1046 u32 digest_buf[4];
1047 #elif defined _SAPG_
1048 u32 digest_buf[5];
1049 #elif defined _MYSQL323_
1050 u32 digest_buf[4];
1051 #elif defined _LOTUS5_
1052 u32 digest_buf[4];
1053 #elif defined _LOTUS6_
1054 u32 digest_buf[4];
1055 #elif defined _SCRYPT_
1056 u32 digest_buf[8];
1057 #elif defined _LOTUS8_
1058 u32 digest_buf[4];
1059 #elif defined _OFFICE2007_
1060 u32 digest_buf[4];
1061 #elif defined _OFFICE2010_
1062 u32 digest_buf[4];
1063 #elif defined _OFFICE2013_
1064 u32 digest_buf[4];
1065 #elif defined _OLDOFFICE01_
1066 u32 digest_buf[4];
1067 #elif defined _OLDOFFICE34_
1068 u32 digest_buf[4];
1069 #elif defined _SIPHASH_
1070 u32 digest_buf[4];
1071 #elif defined _PBKDF2_MD5_
1072 u32 digest_buf[32];
1073 #elif defined _PBKDF2_SHA1_
1074 u32 digest_buf[32];
1075 #elif defined _PBKDF2_SHA256_
1076 u32 digest_buf[32];
1077 #elif defined _PBKDF2_SHA512_
1078 u32 digest_buf[32];
1079 #elif defined _PDF17L8_
1080 u32 digest_buf[8];
1081 #elif defined _CRC32_
1082 u32 digest_buf[4];
1083 #elif defined _SEVEN_ZIP_
1084 u32 digest_buf[4];
1085 #elif defined _ANDROIDFDE_
1086 u32 digest_buf[4];
1087 #elif defined _DCC2_
1088 u32 digest_buf[4];
1089 #elif defined _WPA_
1090 u32 digest_buf[4];
1091 #elif defined _MD5_SHA1_
1092 u32 digest_buf[4];
1093 #elif defined _SHA1_MD5_
1094 u32 digest_buf[5];
1095 #elif defined _NETNTLMV2_
1096 u32 digest_buf[4];
1097 #elif defined _KRB5PA_
1098 u32 digest_buf[4];
1099 #elif defined _CLOUDKEY_
1100 u32 digest_buf[8];
1101 #elif defined _SCRYPT_
1102 u32 digest_buf[4];
1103 #elif defined _PSAFE2_
1104 u32 digest_buf[5];
1105 #elif defined _LOTUS8_
1106 u32 digest_buf[4];
1107 #elif defined _RAR3_
1108 u32 digest_buf[4];
1109 #elif defined _SHA256_SHA1_
1110 u32 digest_buf[8];
1111 #elif defined _MS_DRSR_
1112 u32 digest_buf[8];
1113 #elif defined _ANDROIDFDE_SAMSUNG_
1114 u32 digest_buf[8];
1115 #elif defined _RAR5_
1116 u32 digest_buf[4];
1117 #endif
1118
1119 } digest_t;
1120
1121 typedef struct
1122 {
1123 u32 salt_buf[16];
1124 u32 salt_buf_pc[8];
1125
1126 u32 salt_len;
1127 u32 salt_iter;
1128 u32 salt_sign[2];
1129
1130 u32 keccak_mdlen;
1131 u32 truecrypt_mdlen;
1132
1133 u32 digests_cnt;
1134 u32 digests_done;
1135
1136 u32 digests_offset;
1137
1138 u32 scrypt_N;
1139 u32 scrypt_r;
1140 u32 scrypt_p;
1141 u32 scrypt_tmto;
1142 u32 scrypt_phy;
1143
1144 } salt_t;
1145
1146 typedef struct
1147 {
1148 int V;
1149 int R;
1150 int P;
1151
1152 int enc_md;
1153
1154 u32 id_buf[8];
1155 u32 u_buf[32];
1156 u32 o_buf[32];
1157
1158 int id_len;
1159 int o_len;
1160 int u_len;
1161
1162 u32 rc4key[2];
1163 u32 rc4data[2];
1164
1165 } pdf_t;
1166
1167 typedef struct
1168 {
1169 u32 pke[25];
1170 u32 eapol[64];
1171 int eapol_size;
1172 int keyver;
1173
1174 } wpa_t;
1175
1176 typedef struct
1177 {
1178 u32 cry_master_buf[64];
1179 u32 ckey_buf[64];
1180 u32 public_key_buf[64];
1181
1182 u32 cry_master_len;
1183 u32 ckey_len;
1184 u32 public_key_len;
1185
1186 } bitcoin_wallet_t;
1187
1188 typedef struct
1189 {
1190 u32 salt_buf[30];
1191 u32 salt_len;
1192
1193 u32 esalt_buf[38];
1194 u32 esalt_len;
1195
1196 } sip_t;
1197
1198 typedef struct
1199 {
1200 u32 data[384];
1201
1202 } androidfde_t;
1203
1204 typedef struct
1205 {
1206 u32 nr_buf[16];
1207 u32 nr_len;
1208
1209 u32 msg_buf[128];
1210 u32 msg_len;
1211
1212 } ikepsk_t;
1213
1214 typedef struct
1215 {
1216 u32 user_len;
1217 u32 domain_len;
1218 u32 srvchall_len;
1219 u32 clichall_len;
1220
1221 u32 userdomain_buf[64];
1222 u32 chall_buf[256];
1223
1224 } netntlm_t;
1225
1226 typedef struct
1227 {
1228 u32 user[16];
1229 u32 realm[16];
1230 u32 salt[32];
1231 u32 timestamp[16];
1232 u32 checksum[4];
1233
1234 } krb5pa_t;
1235
1236 typedef struct
1237 {
1238 u32 salt_buf[16];
1239 u32 data_buf[112];
1240 u32 keyfile_buf[16];
1241
1242 } tc_t;
1243
1244 typedef struct
1245 {
1246 u32 salt_buf[16];
1247
1248 } pbkdf2_md5_t;
1249
1250 typedef struct
1251 {
1252 u32 salt_buf[16];
1253
1254 } pbkdf2_sha1_t;
1255
1256 typedef struct
1257 {
1258 u32 salt_buf[16];
1259
1260 } pbkdf2_sha256_t;
1261
1262 typedef struct
1263 {
1264 u32 salt_buf[32];
1265
1266 } pbkdf2_sha512_t;
1267
1268 typedef struct
1269 {
1270 u32 salt_buf[128];
1271 u32 salt_len;
1272
1273 } rakp_t;
1274
1275 typedef struct
1276 {
1277 u32 data_len;
1278 u32 data_buf[512];
1279
1280 } cloudkey_t;
1281
1282 typedef struct
1283 {
1284 u32 encryptedVerifier[4];
1285 u32 encryptedVerifierHash[5];
1286
1287 u32 keySize;
1288
1289 } office2007_t;
1290
1291 typedef struct
1292 {
1293 u32 encryptedVerifier[4];
1294 u32 encryptedVerifierHash[8];
1295
1296 } office2010_t;
1297
1298 typedef struct
1299 {
1300 u32 encryptedVerifier[4];
1301 u32 encryptedVerifierHash[8];
1302
1303 } office2013_t;
1304
1305 typedef struct
1306 {
1307 u32 version;
1308 u32 encryptedVerifier[4];
1309 u32 encryptedVerifierHash[4];
1310 u32 rc4key[2];
1311
1312 } oldoffice01_t;
1313
1314 typedef struct
1315 {
1316 u32 version;
1317 u32 encryptedVerifier[4];
1318 u32 encryptedVerifierHash[5];
1319 u32 rc4key[2];
1320
1321 } oldoffice34_t;
1322
1323 typedef struct
1324 {
1325 u32 digest[4];
1326 u32 out[4];
1327
1328 } pdf14_tmp_t;
1329
1330 typedef struct
1331 {
1332 union
1333 {
1334 u32 dgst32[16];
1335 u64 dgst64[8];
1336 };
1337
1338 u32 dgst_len;
1339 u32 W_len;
1340
1341 } pdf17l8_tmp_t;
1342
1343 typedef struct
1344 {
1345 u32 digest_buf[4];
1346
1347 } phpass_tmp_t;
1348
1349 typedef struct
1350 {
1351 u32 digest_buf[4];
1352
1353 } md5crypt_tmp_t;
1354
1355 typedef struct
1356 {
1357 u32 alt_result[8];
1358
1359 u32 p_bytes[4];
1360 u32 s_bytes[4];
1361
1362 } sha256crypt_tmp_t;
1363
1364 typedef struct
1365 {
1366 u64 l_alt_result[8];
1367
1368 u64 l_p_bytes[2];
1369 u64 l_s_bytes[2];
1370
1371 } sha512crypt_tmp_t;
1372
1373 typedef struct
1374 {
1375 u32 ipad[5];
1376 u32 opad[5];
1377
1378 u32 dgst[10];
1379 u32 out[10];
1380
1381 } wpa_tmp_t;
1382
1383 typedef struct
1384 {
1385 u64 dgst[8];
1386
1387 } bitcoin_wallet_tmp_t;
1388
1389 typedef struct
1390 {
1391 u32 ipad[5];
1392 u32 opad[5];
1393
1394 u32 dgst[5];
1395 u32 out[4];
1396
1397 } dcc2_tmp_t;
1398
1399 typedef struct
1400 {
1401 u32 E[18];
1402
1403 u32 P[18];
1404
1405 u32 S0[256];
1406 u32 S1[256];
1407 u32 S2[256];
1408 u32 S3[256];
1409
1410 } bcrypt_tmp_t;
1411
1412 typedef struct
1413 {
1414 u32 digest[2];
1415
1416 u32 P[18];
1417
1418 u32 S0[256];
1419 u32 S1[256];
1420 u32 S2[256];
1421 u32 S3[256];
1422
1423 } pwsafe2_tmp_t;
1424
1425 typedef struct
1426 {
1427 u32 digest_buf[8];
1428
1429 } pwsafe3_tmp_t;
1430
1431 typedef struct
1432 {
1433 u32 digest_buf[5];
1434
1435 } androidpin_tmp_t;
1436
1437 typedef struct
1438 {
1439 u32 ipad[5];
1440 u32 opad[5];
1441
1442 u32 dgst[10];
1443 u32 out[10];
1444
1445 } androidfde_tmp_t;
1446
1447 typedef struct
1448 {
1449 u32 ipad[16];
1450 u32 opad[16];
1451
1452 u32 dgst[64];
1453 u32 out[64];
1454
1455 } tc_tmp_t;
1456
1457 typedef struct
1458 {
1459 u64 ipad[8];
1460 u64 opad[8];
1461
1462 u64 dgst[32];
1463 u64 out[32];
1464
1465 } tc64_tmp_t;
1466
1467 typedef struct
1468 {
1469 u32 ipad[4];
1470 u32 opad[4];
1471
1472 u32 dgst[32];
1473 u32 out[32];
1474
1475 } pbkdf2_md5_tmp_t;
1476
1477 typedef struct
1478 {
1479 u32 ipad[5];
1480 u32 opad[5];
1481
1482 u32 dgst[32];
1483 u32 out[32];
1484
1485 } pbkdf2_sha1_tmp_t;
1486
1487 typedef struct
1488 {
1489 u32 ipad[8];
1490 u32 opad[8];
1491
1492 u32 dgst[32];
1493 u32 out[32];
1494
1495 } pbkdf2_sha256_tmp_t;
1496
1497 typedef struct
1498 {
1499 u64 ipad[8];
1500 u64 opad[8];
1501
1502 u64 dgst[16];
1503 u64 out[16];
1504
1505 } pbkdf2_sha512_tmp_t;
1506
1507 typedef struct
1508 {
1509 u64 out[8];
1510
1511 } ecryptfs_tmp_t;
1512
1513 typedef struct
1514 {
1515 u64 ipad[8];
1516 u64 opad[8];
1517
1518 u64 dgst[16];
1519 u64 out[16];
1520
1521 } oraclet_tmp_t;
1522
1523 typedef struct
1524 {
1525 u32 ipad[5];
1526 u32 opad[5];
1527
1528 u32 dgst[5];
1529 u32 out[5];
1530
1531 } agilekey_tmp_t;
1532
1533 typedef struct
1534 {
1535 u32 ipad[5];
1536 u32 opad[5];
1537
1538 u32 dgst1[5];
1539 u32 out1[5];
1540
1541 u32 dgst2[5];
1542 u32 out2[5];
1543
1544 } mywallet_tmp_t;
1545
1546 typedef struct
1547 {
1548 u32 ipad[5];
1549 u32 opad[5];
1550
1551 u32 dgst[5];
1552 u32 out[5];
1553
1554 } sha1aix_tmp_t;
1555
1556 typedef struct
1557 {
1558 u32 ipad[8];
1559 u32 opad[8];
1560
1561 u32 dgst[8];
1562 u32 out[8];
1563
1564 } sha256aix_tmp_t;
1565
1566 typedef struct
1567 {
1568 u64 ipad[8];
1569 u64 opad[8];
1570
1571 u64 dgst[8];
1572 u64 out[8];
1573
1574 } sha512aix_tmp_t;
1575
1576 typedef struct
1577 {
1578 u32 ipad[8];
1579 u32 opad[8];
1580
1581 u32 dgst[8];
1582 u32 out[8];
1583
1584 } lastpass_tmp_t;
1585
1586 typedef struct
1587 {
1588 u64 digest_buf[8];
1589
1590 } drupal7_tmp_t;
1591
1592 typedef struct
1593 {
1594 u32 ipad[5];
1595 u32 opad[5];
1596
1597 u32 dgst[5];
1598 u32 out[5];
1599
1600 } lotus8_tmp_t;
1601
1602 typedef struct
1603 {
1604 u32 out[5];
1605
1606 } office2007_tmp_t;
1607
1608 typedef struct
1609 {
1610 u32 out[5];
1611
1612 } office2010_tmp_t;
1613
1614 typedef struct
1615 {
1616 u64 out[8];
1617
1618 } office2013_tmp_t;
1619
1620 typedef struct
1621 {
1622 u32 digest_buf[5];
1623
1624 } saph_sha1_tmp_t;
1625
1626 typedef struct
1627 {
1628 u32 block[16];
1629
1630 u32 dgst[8];
1631
1632 u32 block_len;
1633 u32 final_len;
1634
1635 } seven_zip_tmp_t;
1636
1637 typedef struct
1638 {
1639 u32 Kc[16];
1640 u32 Kd[16];
1641
1642 u32 iv[2];
1643
1644 } bsdicrypt_tmp_t;
1645
1646 typedef struct
1647 {
1648 u32 dgst[17][5];
1649
1650 } rar3_tmp_t;
1651
1652 typedef struct
1653 {
1654 u32 user[16];
1655
1656 } cram_md5_t;
1657
1658 typedef struct
1659 {
1660 u32 iv_buf[4];
1661 u32 iv_len;
1662
1663 u32 salt_buf[4];
1664 u32 salt_len;
1665
1666 u32 crc;
1667
1668 u32 data_buf[96];
1669 u32 data_len;
1670
1671 u32 unpack_size;
1672
1673 } seven_zip_t;
1674
1675 typedef struct
1676 {
1677 u32 key;
1678 u64 val;
1679
1680 } hcstat_table_t;
1681
1682 typedef struct
1683 {
1684 u32 cs_buf[0x100];
1685 u32 cs_len;
1686
1687 } cs_t;
1688
1689 typedef struct
1690 {
1691 u32 cmds[0x100];
1692
1693 } kernel_rule_t;
1694
1695 typedef struct
1696 {
1697 u32 gidvid;
1698 u32 il_pos;
1699
1700 } plain_t;
1701
1702 typedef struct
1703 {
1704 u32 i[64];
1705
1706 u32 pw_len;
1707
1708 u32 alignment_placeholder_1;
1709 u32 alignment_placeholder_2;
1710 u32 alignment_placeholder_3;
1711
1712 } pw_t;
1713
1714 typedef struct
1715 {
1716 u32 i;
1717
1718 } bf_t;
1719
1720 typedef struct
1721 {
1722 u32 i[8];
1723
1724 u32 pw_len;
1725
1726 } comb_t;
1727
1728 typedef struct
1729 {
1730 u32 b[32];
1731
1732 } bs_word_t;
1733
1734 typedef struct
1735 {
1736 uint4 P[64];
1737
1738 } scrypt_tmp_t;