be492db82c160e47d875e97536698b5d5309e7dc
[hashcat.git] / OpenCL / types_ocl.c
1 /**
2 * Authors.....: Jens Steube <jens.steube@gmail.com>
3 * magnum <john.magnum@hushmail.com>
4 *
5 * License.....: MIT
6 */
7
8 typedef uchar u8;
9 typedef ushort u16;
10 typedef uint u32;
11 typedef ulong u64;
12
13 #ifndef NEW_SIMD_CODE
14 #undef VECT_SIZE
15 #define VECT_SIZE 1
16 #endif
17
18 #define CONCAT(a, b) a##b
19 #define VTYPE(type, width) CONCAT(type, width)
20
21 #if VECT_SIZE == 1
22 typedef uchar u8x;
23 typedef ushort u16x;
24 typedef uint u32x;
25 typedef ulong u64x;
26 #else
27 typedef VTYPE(uchar, VECT_SIZE) u8x;
28 typedef VTYPE(ushort, VECT_SIZE) u16x;
29 typedef VTYPE(uint, VECT_SIZE) u32x;
30 typedef VTYPE(ulong, VECT_SIZE) u64x;
31 #endif
32
33 inline u32 l32_from_64_S (u64 a)
34 {
35 const u32 r = (u32) (a);
36
37 return r;
38 }
39
40 inline u32 h32_from_64_S (u64 a)
41 {
42 a >>= 32;
43
44 const u32 r = (u32) (a);
45
46 return r;
47 }
48
49 inline u64 hl32_to_64_S (const u32 a, const u32 b)
50 {
51 return as_ulong ((uint2) (b, a));
52 }
53
54 inline u32x l32_from_64 (u64x a)
55 {
56 u32x r;
57
58 #if VECT_SIZE == 1
59 r = (u32) a;
60 #endif
61
62 #if VECT_SIZE >= 2
63 r.s0 = (u32) a.s0;
64 r.s1 = (u32) a.s1;
65 #endif
66
67 #if VECT_SIZE >= 4
68 r.s2 = (u32) a.s2;
69 r.s3 = (u32) a.s3;
70 #endif
71
72 #if VECT_SIZE >= 8
73 r.s4 = (u32) a.s4;
74 r.s5 = (u32) a.s5;
75 r.s6 = (u32) a.s6;
76 r.s7 = (u32) a.s7;
77 #endif
78
79 #if VECT_SIZE >= 16
80 r.s8 = (u32) a.s8;
81 r.s9 = (u32) a.s9;
82 r.sa = (u32) a.sa;
83 r.sb = (u32) a.sb;
84 r.sc = (u32) a.sc;
85 r.sd = (u32) a.sd;
86 r.se = (u32) a.se;
87 r.sf = (u32) a.sf;
88 #endif
89
90 return r;
91 }
92
93 inline u32x h32_from_64 (u64x a)
94 {
95 a >>= 32;
96
97 u32x r;
98
99 #if VECT_SIZE == 1
100 r = (u32) a;
101 #endif
102
103 #if VECT_SIZE >= 2
104 r.s0 = (u32) a.s0;
105 r.s1 = (u32) a.s1;
106 #endif
107
108 #if VECT_SIZE >= 4
109 r.s2 = (u32) a.s2;
110 r.s3 = (u32) a.s3;
111 #endif
112
113 #if VECT_SIZE >= 8
114 r.s4 = (u32) a.s4;
115 r.s5 = (u32) a.s5;
116 r.s6 = (u32) a.s6;
117 r.s7 = (u32) a.s7;
118 #endif
119
120 #if VECT_SIZE >= 16
121 r.s8 = (u32) a.s8;
122 r.s9 = (u32) a.s9;
123 r.sa = (u32) a.sa;
124 r.sb = (u32) a.sb;
125 r.sc = (u32) a.sc;
126 r.sd = (u32) a.sd;
127 r.se = (u32) a.se;
128 r.sf = (u32) a.sf;
129 #endif
130
131 return r;
132 }
133
134 inline u64x hl32_to_64 (const u32x a, const u32x b)
135 {
136 u64x r;
137
138 #if VECT_SIZE == 1
139 r = as_ulong ((uint2) (b, a));
140 #endif
141
142 #if VECT_SIZE >= 2
143 r.s0 = as_ulong ((uint2) (b.s0, a.s0));
144 r.s1 = as_ulong ((uint2) (b.s1, a.s1));
145 #endif
146
147 #if VECT_SIZE >= 4
148 r.s2 = as_ulong ((uint2) (b.s2, a.s2));
149 r.s3 = as_ulong ((uint2) (b.s3, a.s3));
150 #endif
151
152 #if VECT_SIZE >= 8
153 r.s4 = as_ulong ((uint2) (b.s4, a.s4));
154 r.s5 = as_ulong ((uint2) (b.s5, a.s5));
155 r.s6 = as_ulong ((uint2) (b.s6, a.s6));
156 r.s7 = as_ulong ((uint2) (b.s7, a.s7));
157 #endif
158
159 #if VECT_SIZE >= 16
160 r.s8 = as_ulong ((uint2) (b.s8, a.s8));
161 r.s9 = as_ulong ((uint2) (b.s9, a.s9));
162 r.sa = as_ulong ((uint2) (b.sa, a.sa));
163 r.sb = as_ulong ((uint2) (b.sb, a.sb));
164 r.sc = as_ulong ((uint2) (b.sc, a.sc));
165 r.sd = as_ulong ((uint2) (b.sd, a.sd));
166 r.se = as_ulong ((uint2) (b.se, a.se));
167 r.sf = as_ulong ((uint2) (b.sf, a.sf));
168 #endif
169
170 return r;
171 }
172
173 #ifdef IS_AMD
174 inline u32 swap32_S (const u32 v)
175 {
176 return (as_uint (as_uchar4 (v).s3210));
177 }
178
179 inline u64 swap64_S (const u64 v)
180 {
181 return (as_ulong (as_uchar8 (v).s76543210));
182 }
183
184 inline u32 rotr32_S (const u32 a, const u32 n)
185 {
186 return rotate (a, 32 - n);
187 }
188
189 inline u32 rotl32_S (const u32 a, const u32 n)
190 {
191 return rotate (a, n);
192 }
193
194 inline u64 rotr64_S (const u64 a, const u32 n)
195 {
196 const u32 a0 = h32_from_64_S (a);
197 const u32 a1 = l32_from_64_S (a);
198
199 const u32 t0 = (n >= 32) ? amd_bitalign (a0, a1, n - 32) : amd_bitalign (a1, a0, n);
200 const u32 t1 = (n >= 32) ? amd_bitalign (a1, a0, n - 32) : amd_bitalign (a0, a1, n);
201
202 const u64 r = hl32_to_64_S (t0, t1);
203
204 return r;
205 }
206
207 inline u64 rotl64_S (const u64 a, const u32 n)
208 {
209 return rotr64_S (a, 64 - n);
210 }
211
212 inline u32x swap32 (const u32x v)
213 {
214 return ((v >> 24) & 0x000000ff)
215 | ((v >> 8) & 0x0000ff00)
216 | ((v << 8) & 0x00ff0000)
217 | ((v << 24) & 0xff000000);
218 }
219
220 inline u64x swap64 (const u64x v)
221 {
222 return ((v >> 56) & 0x00000000000000ff)
223 | ((v >> 40) & 0x000000000000ff00)
224 | ((v >> 24) & 0x0000000000ff0000)
225 | ((v >> 8) & 0x00000000ff000000)
226 | ((v << 8) & 0x000000ff00000000)
227 | ((v << 24) & 0x0000ff0000000000)
228 | ((v << 40) & 0x00ff000000000000)
229 | ((v << 56) & 0xff00000000000000);
230 }
231
232 inline u32x rotr32 (const u32x a, const u32 n)
233 {
234 return rotate (a, 32 - n);
235 }
236
237 inline u32x rotl32 (const u32x a, const u32 n)
238 {
239 return rotate (a, n);
240 }
241
242 inline u64x rotr64 (const u64x a, const u32 n)
243 {
244 const u32x a0 = h32_from_64 (a);
245 const u32x a1 = l32_from_64 (a);
246
247 const u32x t0 = (n >= 32) ? amd_bitalign (a0, a1, n - 32) : amd_bitalign (a1, a0, n);
248 const u32x t1 = (n >= 32) ? amd_bitalign (a1, a0, n - 32) : amd_bitalign (a0, a1, n);
249
250 const u64x r = hl32_to_64 (t0, t1);
251
252 return r;
253 }
254
255 inline u64x rotl64 (const u64x a, const u32 n)
256 {
257 return rotr64 (a, 64 - n);
258 }
259
260 inline u32x __bfe (const u32x a, const u32x b, const u32x c)
261 {
262 return amd_bfe (a, b, c);
263 }
264
265 inline u32 __bfe_S (const u32 a, const u32 b, const u32 c)
266 {
267 return amd_bfe (a, b, c);
268 }
269
270 inline u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c)
271 {
272 return amd_bytealign (a, b, c);
273 }
274 #endif
275
276 #ifdef IS_NV
277 inline u32 swap32_S (const u32 v)
278 {
279 u32 r;
280
281 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(r) : "r"(v));
282
283 return r;
284 }
285
286 inline u64 swap64_S (const u64 v)
287 {
288 u32 il;
289 u32 ir;
290
291 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(v));
292
293 u32 tl;
294 u32 tr;
295
296 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tl) : "r"(il));
297 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tr) : "r"(ir));
298
299 u64 r;
300
301 asm ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(tr), "r"(tl));
302
303 return r;
304 }
305
306 inline u32 rotr32_S (const u32 a, const u32 n)
307 {
308 return rotate (a, 32 - n);
309 }
310
311 inline u32 rotl32_S (const u32 a, const u32 n)
312 {
313 return rotate (a, n);
314 }
315
316 inline u64 rotr64_S (const u64 a, const u32 n)
317 {
318 return rotate (a, (u64) 64 - n);
319 }
320
321 inline u64 rotl64_S (const u64 a, const u32 n)
322 {
323 return rotr64_S (a, 64 - n);
324 }
325
326 inline u32x swap32 (const u32x v)
327 {
328 return ((v >> 24) & 0x000000ff)
329 | ((v >> 8) & 0x0000ff00)
330 | ((v << 8) & 0x00ff0000)
331 | ((v << 24) & 0xff000000);
332 }
333
334 inline u64x swap64 (const u64x v)
335 {
336 return ((v >> 56) & 0x00000000000000ff)
337 | ((v >> 40) & 0x000000000000ff00)
338 | ((v >> 24) & 0x0000000000ff0000)
339 | ((v >> 8) & 0x00000000ff000000)
340 | ((v << 8) & 0x000000ff00000000)
341 | ((v << 24) & 0x0000ff0000000000)
342 | ((v << 40) & 0x00ff000000000000)
343 | ((v << 56) & 0xff00000000000000);
344 }
345
346 inline u32x rotr32 (const u32x a, const u32 n)
347 {
348 return rotate (a, 32 - n);
349 }
350
351 inline u32x rotl32 (const u32x a, const u32 n)
352 {
353 return rotate (a, n);
354 }
355
356 inline u64x rotr64 (const u64x a, const u32 n)
357 {
358 return rotate (a, (u64) 64 - n);
359 }
360
361 inline u64x rotl64 (const u64x a, const u32 n)
362 {
363 return rotate (a, (u64) n);
364 }
365
366 inline u32x __byte_perm (const u32x a, const u32x b, const u32x c)
367 {
368 u32x r;
369
370 #if VECT_SIZE == 1
371 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c) );
372 #endif
373
374 #if VECT_SIZE >= 2
375 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s0) : "r"(a.s0), "r"(b.s0), "r"(c.s0));
376 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s1) : "r"(a.s1), "r"(b.s1), "r"(c.s1));
377 #endif
378
379 #if VECT_SIZE >= 4
380 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s2) : "r"(a.s2), "r"(b.s2), "r"(c.s2));
381 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s3) : "r"(a.s3), "r"(b.s3), "r"(c.s3));
382 #endif
383
384 #if VECT_SIZE >= 8
385 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s4) : "r"(a.s4), "r"(b.s4), "r"(c.s4));
386 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s5) : "r"(a.s5), "r"(b.s5), "r"(c.s5));
387 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s6) : "r"(a.s6), "r"(b.s6), "r"(c.s6));
388 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s7) : "r"(a.s7), "r"(b.s7), "r"(c.s7));
389 #endif
390
391 #if VECT_SIZE >= 16
392 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s8) : "r"(a.s8), "r"(b.s8), "r"(c.s8));
393 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s9) : "r"(a.s9), "r"(b.s9), "r"(c.s9));
394 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sa) : "r"(a.sa), "r"(b.sa), "r"(c.sa));
395 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sb) : "r"(a.sb), "r"(b.sb), "r"(c.sb));
396 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sc) : "r"(a.sc), "r"(b.sc), "r"(c.sc));
397 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sd) : "r"(a.sd), "r"(b.sd), "r"(c.sd));
398 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.se) : "r"(a.se), "r"(b.se), "r"(c.se));
399 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sf) : "r"(a.sf), "r"(b.sf), "r"(c.sf));
400 #endif
401
402 return r;
403 }
404
405 inline u32 __byte_perm_S (const u32 a, const u32 b, const u32 c)
406 {
407 u32 r;
408
409 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c));
410
411 return r;
412 }
413
414 inline u32x __bfe (const u32x a, const u32x b, const u32x c)
415 {
416 u32x r;
417
418 #if VECT_SIZE == 1
419 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c));
420 #endif
421
422 #if VECT_SIZE >= 2
423 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.s0) : "r"(a.s0), "r"(b.s0), "r"(c.s0));
424 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.s1) : "r"(a.s1), "r"(b.s1), "r"(c.s1));
425 #endif
426
427 #if VECT_SIZE >= 4
428 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.s2) : "r"(a.s2), "r"(b.s2), "r"(c.s2));
429 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.s3) : "r"(a.s3), "r"(b.s3), "r"(c.s3));
430 #endif
431
432 #if VECT_SIZE >= 8
433 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.s4) : "r"(a.s4), "r"(b.s4), "r"(c.s4));
434 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.s5) : "r"(a.s5), "r"(b.s5), "r"(c.s5));
435 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.s6) : "r"(a.s6), "r"(b.s6), "r"(c.s6));
436 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.s7) : "r"(a.s7), "r"(b.s7), "r"(c.s7));
437 #endif
438
439 #if VECT_SIZE >= 16
440 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.s8) : "r"(a.s8), "r"(b.s8), "r"(c.s8));
441 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.s9) : "r"(a.s9), "r"(b.s9), "r"(c.s9));
442 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.sa) : "r"(a.sa), "r"(b.sa), "r"(c.sa));
443 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.sb) : "r"(a.sb), "r"(b.sb), "r"(c.sb));
444 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.sc) : "r"(a.sc), "r"(b.sc), "r"(c.sc));
445 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.sd) : "r"(a.sd), "r"(b.sd), "r"(c.sd));
446 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.se) : "r"(a.se), "r"(b.se), "r"(c.se));
447 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.sf) : "r"(a.sf), "r"(b.sf), "r"(c.sf));
448 #endif
449
450 return r;
451 }
452
453 inline u32 __bfe_S (const u32 a, const u32 b, const u32 c)
454 {
455 u32 r;
456
457 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c));
458
459 return r;
460 }
461
462 inline u32x amd_bytealign (const u32x a, const u32x b, const u32x c)
463 {
464 u32x r;
465
466 #if CUDA_ARCH >= 350
467
468 #if VECT_SIZE == 1
469 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(b), "r"(a), "r"((c & 3) * 8));
470 #endif
471
472 #if VECT_SIZE >= 2
473 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.s0) : "r"(b.s0), "r"(a.s0), "r"((c.s0 & 3) * 8));
474 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.s1) : "r"(b.s1), "r"(a.s1), "r"((c.s1 & 3) * 8));
475 #endif
476
477 #if VECT_SIZE >= 4
478 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.s2) : "r"(b.s2), "r"(a.s2), "r"((c.s2 & 3) * 8));
479 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.s3) : "r"(b.s3), "r"(a.s3), "r"((c.s3 & 3) * 8));
480 #endif
481
482 #if VECT_SIZE >= 8
483 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.s4) : "r"(b.s4), "r"(a.s4), "r"((c.s4 & 3) * 8));
484 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.s5) : "r"(b.s5), "r"(a.s5), "r"((c.s5 & 3) * 8));
485 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.s6) : "r"(b.s6), "r"(a.s6), "r"((c.s6 & 3) * 8));
486 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.s7) : "r"(b.s7), "r"(a.s7), "r"((c.s7 & 3) * 8));
487 #endif
488
489 #if VECT_SIZE >= 16
490 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.s8) : "r"(b.s8), "r"(a.s8), "r"((c.s8 & 3) * 8));
491 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.s9) : "r"(b.s9), "r"(a.s9), "r"((c.s9 & 3) * 8));
492 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.sa) : "r"(b.sa), "r"(a.sa), "r"((c.sa & 3) * 8));
493 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.sb) : "r"(b.sb), "r"(a.sb), "r"((c.sb & 3) * 8));
494 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.sc) : "r"(b.sc), "r"(a.sc), "r"((c.sc & 3) * 8));
495 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.sd) : "r"(b.sd), "r"(a.sd), "r"((c.sd & 3) * 8));
496 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.se) : "r"(b.se), "r"(a.se), "r"((c.se & 3) * 8));
497 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r.sf) : "r"(b.sf), "r"(a.sf), "r"((c.sf & 3) * 8));
498 #endif
499
500 #else
501
502 r = __byte_perm (b, a, ((u32x) (0x76543210) >> ((c & 3) * 4)) & 0xffff);
503
504 #endif
505
506 return r;
507 }
508
509 inline u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c)
510 {
511 u32 r;
512
513 #if CUDA_ARCH >= 350
514
515 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(b), "r"(a), "r"((c & 3) * 8));
516
517 #else
518
519 r = __byte_perm_S (b, a, (0x76543210 >> ((c & 3) * 4)) & 0xffff);
520
521 #endif
522
523 return r;
524 }
525 #endif
526
527 #ifdef IS_GENERIC
528 inline u32 swap32_S (const u32 v)
529 {
530 return (as_uint (as_uchar4 (v).s3210));
531 }
532
533 inline u64 swap64_S (const u64 v)
534 {
535 return (as_ulong (as_uchar8 (v).s76543210));
536 }
537
538 inline u32 rotr32_S (const u32 a, const u32 n)
539 {
540 return rotate (a, 32 - n);
541 }
542
543 inline u32 rotl32_S (const u32 a, const u32 n)
544 {
545 return rotate (a, n);
546 }
547
548 inline u64 rotr64_S (const u64 a, const u32 n)
549 {
550 return rotate (a, (u64) 64 - n);
551 }
552
553 inline u64 rotl64_S (const u64 a, const u32 n)
554 {
555 return rotate (a, (u64) n);
556 }
557
558 inline u32x swap32 (const u32x v)
559 {
560 return ((v >> 24) & 0x000000ff)
561 | ((v >> 8) & 0x0000ff00)
562 | ((v << 8) & 0x00ff0000)
563 | ((v << 24) & 0xff000000);
564 }
565
566 inline u64x swap64 (const u64x v)
567 {
568 return ((v >> 56) & 0x00000000000000ff)
569 | ((v >> 40) & 0x000000000000ff00)
570 | ((v >> 24) & 0x0000000000ff0000)
571 | ((v >> 8) & 0x00000000ff000000)
572 | ((v << 8) & 0x000000ff00000000)
573 | ((v << 24) & 0x0000ff0000000000)
574 | ((v << 40) & 0x00ff000000000000)
575 | ((v << 56) & 0xff00000000000000);
576 }
577
578 inline u32x rotr32 (const u32x a, const u32 n)
579 {
580 return rotate (a, 32 - n);
581 }
582
583 inline u32x rotl32 (const u32x a, const u32 n)
584 {
585 return rotate (a, n);
586 }
587
588 inline u64x rotr64 (const u64x a, const u32 n)
589 {
590 return rotate (a, (u64) 64 - n);
591 }
592
593 inline u64x rotl64 (const u64x a, const u32 n)
594 {
595 return rotate (a, (u64) n);
596 }
597
598 inline u32x __bfe (const u32x a, const u32x b, const u32x c)
599 {
600 #define BIT(x) (1 << (x))
601 #define BIT_MASK(x) (BIT (x) - 1)
602 #define BFE(x,y,z) (((x) >> (y)) & BIT_MASK (z))
603
604 return BFE (a, b, c);
605
606 #undef BIT
607 #undef BIT_MASK
608 #undef BFE
609 }
610
611 inline u32 __bfe_S (const u32 a, const u32 b, const u32 c)
612 {
613 #define BIT(x) (1 << (x))
614 #define BIT_MASK(x) (BIT (x) - 1)
615 #define BFE(x,y,z) (((x) >> (y)) & BIT_MASK (z))
616
617 return BFE (a, b, c);
618
619 #undef BIT
620 #undef BIT_MASK
621 #undef BFE
622 }
623
624 inline u32x amd_bytealign (const u32x a, const u32x b, const u32 c)
625 {
626 #if VECT_SIZE == 1
627 const u64x tmp = ((((u64x) (a)) << 32) | ((u64x) (b))) >> ((c & 3) * 8);
628
629 return (u32x) (tmp);
630 #endif
631
632 #if VECT_SIZE == 2
633 const u64x tmp = ((((u64x) (a.s0, a.s1)) << 32) | ((u64x) (b.s0, b.s1))) >> ((c & 3) * 8);
634
635 return (u32x) (tmp.s0, tmp.s1);
636 #endif
637
638 #if VECT_SIZE == 4
639 const u64x tmp = ((((u64x) (a.s0, a.s1, a.s2, a.s3)) << 32) | ((u64x) (b.s0, b.s1, b.s2, b.s3))) >> ((c & 3) * 8);
640
641 return (u32x) (tmp.s0, tmp.s1, tmp.s2, tmp.s3);
642 #endif
643
644 #if VECT_SIZE == 8
645 const u64x tmp = ((((u64x) (a.s0, a.s1, a.s2, a.s3, a.s4, a.s5, a.s6, a.s7)) << 32) | ((u64x) (b.s0, b.s1, b.s2, b.s3, b.s4, b.s5, b.s6, b.s7))) >> ((c & 3) * 8);
646
647 return (u32x) (tmp.s0, tmp.s1, tmp.s2, tmp.s3, tmp.s4, tmp.s5, tmp.s6, tmp.s7);
648 #endif
649
650 #if VECT_SIZE == 16
651 const u64x tmp = ((((u64x) (a.s0, a.s1, a.s2, a.s3, a.s4, a.s5, a.s6, a.s7, a.s8, a.s9, a.sa, a.sb, a.sc, a.sd, a.se, a.sf)) << 32) | ((u64x) (b.s0, b.s1, b.s2, b.s3, b.s4, b.s5, b.s6, b.s7, b.s8, b.s9, b.sa, b.sb, b.sc, b.sd, b.se, b.sf))) >> ((c & 3) * 8);
652
653 return (u32x) (tmp.s0, tmp.s1, tmp.s2, tmp.s3, tmp.s4, tmp.s5, tmp.s6, tmp.s7, tmp.s8, tmp.s9, tmp.sa, tmp.sb, tmp.sc, tmp.sd, tmp.se, tmp.sf);
654 #endif
655 }
656
657 inline u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c)
658 {
659 const u64 tmp = ((((u64) a) << 32) | ((u64) b)) >> ((c & 3) * 8);
660
661 return (u32) (tmp);
662 }
663
664 #endif
665
666 typedef struct
667 {
668 #if defined _DES_
669 u32 digest_buf[4];
670 #elif defined _MD4_
671 u32 digest_buf[4];
672 #elif defined _MD5_
673 u32 digest_buf[4];
674 #elif defined _MD5H_
675 u32 digest_buf[4];
676 #elif defined _SHA1_
677 u32 digest_buf[5];
678 #elif defined _BCRYPT_
679 u32 digest_buf[6];
680 #elif defined _SHA256_
681 u32 digest_buf[8];
682 #elif defined _SHA384_
683 u32 digest_buf[16];
684 #elif defined _SHA512_
685 u32 digest_buf[16];
686 #elif defined _KECCAK_
687 u32 digest_buf[50];
688 #elif defined _RIPEMD160_
689 u32 digest_buf[5];
690 #elif defined _WHIRLPOOL_
691 u32 digest_buf[16];
692 #elif defined _GOST_
693 u32 digest_buf[8];
694 #elif defined _GOST2012_256_
695 u32 digest_buf[8];
696 #elif defined _GOST2012_512_
697 u32 digest_buf[16];
698 #elif defined _SAPB_
699 u32 digest_buf[4];
700 #elif defined _SAPG_
701 u32 digest_buf[5];
702 #elif defined _MYSQL323_
703 u32 digest_buf[4];
704 #elif defined _LOTUS5_
705 u32 digest_buf[4];
706 #elif defined _LOTUS6_
707 u32 digest_buf[4];
708 #elif defined _SCRYPT_
709 u32 digest_buf[8];
710 #elif defined _LOTUS8_
711 u32 digest_buf[4];
712 #elif defined _OFFICE2007_
713 u32 digest_buf[4];
714 #elif defined _OFFICE2010_
715 u32 digest_buf[4];
716 #elif defined _OFFICE2013_
717 u32 digest_buf[4];
718 #elif defined _OLDOFFICE01_
719 u32 digest_buf[4];
720 #elif defined _OLDOFFICE34_
721 u32 digest_buf[4];
722 #elif defined _SIPHASH_
723 u32 digest_buf[4];
724 #elif defined _PBKDF2_MD5_
725 u32 digest_buf[32];
726 #elif defined _PBKDF2_SHA1_
727 u32 digest_buf[32];
728 #elif defined _PBKDF2_SHA256_
729 u32 digest_buf[32];
730 #elif defined _PBKDF2_SHA512_
731 u32 digest_buf[32];
732 #elif defined _PDF17L8_
733 u32 digest_buf[8];
734 #elif defined _CRC32_
735 u32 digest_buf[4];
736 #elif defined _SEVEN_ZIP_
737 u32 digest_buf[4];
738 #elif defined _ANDROIDFDE_
739 u32 digest_buf[4];
740 #elif defined _DCC2_
741 u32 digest_buf[4];
742 #elif defined _WPA_
743 u32 digest_buf[4];
744 #elif defined _MD5_SHA1_
745 u32 digest_buf[4];
746 #elif defined _SHA1_MD5_
747 u32 digest_buf[5];
748 #elif defined _NETNTLMV2_
749 u32 digest_buf[4];
750 #elif defined _KRB5PA_
751 u32 digest_buf[4];
752 #elif defined _CLOUDKEY_
753 u32 digest_buf[8];
754 #elif defined _SCRYPT_
755 u32 digest_buf[4];
756 #elif defined _PSAFE2_
757 u32 digest_buf[5];
758 #elif defined _LOTUS8_
759 u32 digest_buf[4];
760 #elif defined _RAR3_
761 u32 digest_buf[4];
762 #elif defined _SHA256_SHA1_
763 u32 digest_buf[8];
764 #elif defined _MS_DRSR_
765 u32 digest_buf[8];
766 #elif defined _ANDROIDFDE_SAMSUNG_
767 u32 digest_buf[8];
768 #elif defined _RAR5_
769 u32 digest_buf[4];
770 #elif defined _KRB5TGS_
771 u32 digest_buf[4];
772 #elif defined _AXCRYPT_
773 u32 digest_buf[4];
774 #elif defined _KEEPASS_
775 u32 digest_buf[4];
776 #elif defined _ZIP2_
777 u32 digest_buf[4];
778 #endif
779
780 } digest_t;
781
782 typedef struct
783 {
784 u32 salt_buf[16];
785 u32 salt_buf_pc[8];
786
787 u32 salt_len;
788 u32 salt_iter;
789 u32 salt_sign[2];
790
791 u32 keccak_mdlen;
792 u32 truecrypt_mdlen;
793
794 u32 digests_cnt;
795 u32 digests_done;
796
797 u32 digests_offset;
798
799 u32 scrypt_N;
800 u32 scrypt_r;
801 u32 scrypt_p;
802 u32 scrypt_tmto;
803 u32 scrypt_phy;
804
805 } salt_t;
806
807 typedef struct
808 {
809 int V;
810 int R;
811 int P;
812
813 int enc_md;
814
815 u32 id_buf[8];
816 u32 u_buf[32];
817 u32 o_buf[32];
818
819 int id_len;
820 int o_len;
821 int u_len;
822
823 u32 rc4key[2];
824 u32 rc4data[2];
825
826 } pdf_t;
827
828 typedef struct
829 {
830 u32 pke[25];
831 u32 eapol[64];
832 int eapol_size;
833 int keyver;
834 u8 orig_mac1[6];
835 u8 orig_mac2[6];
836 u8 orig_nonce1[32];
837 u8 orig_nonce2[32];
838
839 } wpa_t;
840
841 typedef struct
842 {
843 u32 cry_master_buf[64];
844 u32 ckey_buf[64];
845 u32 public_key_buf[64];
846
847 u32 cry_master_len;
848 u32 ckey_len;
849 u32 public_key_len;
850
851 } bitcoin_wallet_t;
852
853 typedef struct
854 {
855 u32 salt_buf[30];
856 u32 salt_len;
857
858 u32 esalt_buf[38];
859 u32 esalt_len;
860
861 } sip_t;
862
863 typedef struct
864 {
865 u32 data[384];
866
867 } androidfde_t;
868
869 typedef struct
870 {
871 u32 nr_buf[16];
872 u32 nr_len;
873
874 u32 msg_buf[128];
875 u32 msg_len;
876
877 } ikepsk_t;
878
879 typedef struct
880 {
881 u32 user_len;
882 u32 domain_len;
883 u32 srvchall_len;
884 u32 clichall_len;
885
886 u32 userdomain_buf[64];
887 u32 chall_buf[256];
888
889 } netntlm_t;
890
891 typedef struct
892 {
893 u32 user[16];
894 u32 realm[16];
895 u32 salt[32];
896 u32 timestamp[16];
897 u32 checksum[4];
898
899 } krb5pa_t;
900
901 typedef struct
902 {
903 u32 account_info[512];
904 u32 checksum[4];
905 u32 edata2[2560];
906 u32 edata2_len;
907
908 } krb5tgs_t;
909
910 typedef struct
911 {
912 u32 salt_buf[16];
913 u32 data_buf[112];
914 u32 keyfile_buf[16];
915
916 } tc_t;
917
918 typedef struct
919 {
920 u32 salt_buf[16];
921
922 } pbkdf2_md5_t;
923
924 typedef struct
925 {
926 u32 salt_buf[16];
927
928 } pbkdf2_sha1_t;
929
930 typedef struct
931 {
932 u32 salt_buf[16];
933
934 } pbkdf2_sha256_t;
935
936 typedef struct
937 {
938 u32 salt_buf[32];
939
940 } pbkdf2_sha512_t;
941
942 typedef struct
943 {
944 u32 salt_buf[128];
945 u32 salt_len;
946
947 } rakp_t;
948
949 typedef struct
950 {
951 u32 data_len;
952 u32 data_buf[512];
953
954 } cloudkey_t;
955
956 typedef struct
957 {
958 u32 encryptedVerifier[4];
959 u32 encryptedVerifierHash[5];
960
961 u32 keySize;
962
963 } office2007_t;
964
965 typedef struct
966 {
967 u32 encryptedVerifier[4];
968 u32 encryptedVerifierHash[8];
969
970 } office2010_t;
971
972 typedef struct
973 {
974 u32 encryptedVerifier[4];
975 u32 encryptedVerifierHash[8];
976
977 } office2013_t;
978
979 typedef struct
980 {
981 u32 version;
982 u32 encryptedVerifier[4];
983 u32 encryptedVerifierHash[4];
984 u32 rc4key[2];
985
986 } oldoffice01_t;
987
988 typedef struct
989 {
990 u32 version;
991 u32 encryptedVerifier[4];
992 u32 encryptedVerifierHash[5];
993 u32 rc4key[2];
994
995 } oldoffice34_t;
996
997 typedef struct
998 {
999 u32 salt_buf[128];
1000 u32 salt_len;
1001
1002 u32 pc_digest[5];
1003 u32 pc_offset;
1004
1005 } pstoken_t;
1006
1007 typedef struct
1008 {
1009 u32 type;
1010 u32 mode;
1011 u32 magic;
1012 u32 salt_len;
1013 u32 salt_buf[4];
1014 u32 verify_bytes;
1015 u32 compress_length;
1016 u32 data_len;
1017 u32 data_buf[2048];
1018 u32 auth_len;
1019 u32 auth_buf[4];
1020
1021 } zip2_t;
1022
1023 typedef struct
1024 {
1025 u32 version;
1026 u32 algorithm;
1027
1028 /* key-file handling */
1029 u32 keyfile_len;
1030 u32 keyfile[8];
1031
1032 u32 final_random_seed[8];
1033 u32 transf_random_seed[8];
1034 u32 enc_iv[4];
1035 u32 contents_hash[8];
1036
1037 /* specific to version 1 */
1038 u32 contents_len;
1039 u32 contents[75000];
1040
1041 /* specific to version 2 */
1042 u32 expected_bytes[8];
1043
1044 } keepass_t;
1045
1046 typedef struct
1047 {
1048 u32 digest[4];
1049 u32 out[4];
1050
1051 } pdf14_tmp_t;
1052
1053 typedef struct
1054 {
1055 union
1056 {
1057 u32 dgst32[16];
1058 u64 dgst64[8];
1059 };
1060
1061 u32 dgst_len;
1062 u32 W_len;
1063
1064 } pdf17l8_tmp_t;
1065
1066 typedef struct
1067 {
1068 u32 digest_buf[4];
1069
1070 } phpass_tmp_t;
1071
1072 typedef struct
1073 {
1074 u32 digest_buf[4];
1075
1076 } md5crypt_tmp_t;
1077
1078 typedef struct
1079 {
1080 u32 alt_result[8];
1081
1082 u32 p_bytes[4];
1083 u32 s_bytes[4];
1084
1085 } sha256crypt_tmp_t;
1086
1087 typedef struct
1088 {
1089 u64 l_alt_result[8];
1090
1091 u64 l_p_bytes[2];
1092 u64 l_s_bytes[2];
1093
1094 } sha512crypt_tmp_t;
1095
1096 typedef struct
1097 {
1098 u32 ipad[5];
1099 u32 opad[5];
1100
1101 u32 dgst[10];
1102 u32 out[10];
1103
1104 } wpa_tmp_t;
1105
1106 typedef struct
1107 {
1108 u64 dgst[8];
1109
1110 } bitcoin_wallet_tmp_t;
1111
1112 typedef struct
1113 {
1114 u32 ipad[5];
1115 u32 opad[5];
1116
1117 u32 dgst[5];
1118 u32 out[4];
1119
1120 } dcc2_tmp_t;
1121
1122 typedef struct
1123 {
1124 u32 E[18];
1125
1126 u32 P[18];
1127
1128 u32 S0[256];
1129 u32 S1[256];
1130 u32 S2[256];
1131 u32 S3[256];
1132
1133 } bcrypt_tmp_t;
1134
1135 typedef struct
1136 {
1137 u32 digest[2];
1138
1139 u32 P[18];
1140
1141 u32 S0[256];
1142 u32 S1[256];
1143 u32 S2[256];
1144 u32 S3[256];
1145
1146 } pwsafe2_tmp_t;
1147
1148 typedef struct
1149 {
1150 u32 digest_buf[8];
1151
1152 } pwsafe3_tmp_t;
1153
1154 typedef struct
1155 {
1156 u32 digest_buf[5];
1157
1158 } androidpin_tmp_t;
1159
1160 typedef struct
1161 {
1162 u32 ipad[5];
1163 u32 opad[5];
1164
1165 u32 dgst[10];
1166 u32 out[10];
1167
1168 } androidfde_tmp_t;
1169
1170 typedef struct
1171 {
1172 u32 ipad[16];
1173 u32 opad[16];
1174
1175 u32 dgst[64];
1176 u32 out[64];
1177
1178 } tc_tmp_t;
1179
1180 typedef struct
1181 {
1182 u64 ipad[8];
1183 u64 opad[8];
1184
1185 u64 dgst[32];
1186 u64 out[32];
1187
1188 } tc64_tmp_t;
1189
1190 typedef struct
1191 {
1192 u32 ipad[4];
1193 u32 opad[4];
1194
1195 u32 dgst[32];
1196 u32 out[32];
1197
1198 } pbkdf2_md5_tmp_t;
1199
1200 typedef struct
1201 {
1202 u32 ipad[5];
1203 u32 opad[5];
1204
1205 u32 dgst[32];
1206 u32 out[32];
1207
1208 } pbkdf2_sha1_tmp_t;
1209
1210 typedef struct
1211 {
1212 u32 ipad[8];
1213 u32 opad[8];
1214
1215 u32 dgst[32];
1216 u32 out[32];
1217
1218 } pbkdf2_sha256_tmp_t;
1219
1220 typedef struct
1221 {
1222 u64 ipad[8];
1223 u64 opad[8];
1224
1225 u64 dgst[16];
1226 u64 out[16];
1227
1228 } pbkdf2_sha512_tmp_t;
1229
1230 typedef struct
1231 {
1232 u64 out[8];
1233
1234 } ecryptfs_tmp_t;
1235
1236 typedef struct
1237 {
1238 u64 ipad[8];
1239 u64 opad[8];
1240
1241 u64 dgst[16];
1242 u64 out[16];
1243
1244 } oraclet_tmp_t;
1245
1246 typedef struct
1247 {
1248 u32 ipad[5];
1249 u32 opad[5];
1250
1251 u32 dgst[5];
1252 u32 out[5];
1253
1254 } agilekey_tmp_t;
1255
1256 typedef struct
1257 {
1258 u32 ipad[5];
1259 u32 opad[5];
1260
1261 u32 dgst1[5];
1262 u32 out1[5];
1263
1264 u32 dgst2[5];
1265 u32 out2[5];
1266
1267 } mywallet_tmp_t;
1268
1269 typedef struct
1270 {
1271 u32 ipad[5];
1272 u32 opad[5];
1273
1274 u32 dgst[5];
1275 u32 out[5];
1276
1277 } sha1aix_tmp_t;
1278
1279 typedef struct
1280 {
1281 u32 ipad[8];
1282 u32 opad[8];
1283
1284 u32 dgst[8];
1285 u32 out[8];
1286
1287 } sha256aix_tmp_t;
1288
1289 typedef struct
1290 {
1291 u64 ipad[8];
1292 u64 opad[8];
1293
1294 u64 dgst[8];
1295 u64 out[8];
1296
1297 } sha512aix_tmp_t;
1298
1299 typedef struct
1300 {
1301 u32 ipad[8];
1302 u32 opad[8];
1303
1304 u32 dgst[8];
1305 u32 out[8];
1306
1307 } lastpass_tmp_t;
1308
1309 typedef struct
1310 {
1311 u64 digest_buf[8];
1312
1313 } drupal7_tmp_t;
1314
1315 typedef struct
1316 {
1317 u32 ipad[5];
1318 u32 opad[5];
1319
1320 u32 dgst[5];
1321 u32 out[5];
1322
1323 } lotus8_tmp_t;
1324
1325 typedef struct
1326 {
1327 u32 out[5];
1328
1329 } office2007_tmp_t;
1330
1331 typedef struct
1332 {
1333 u32 out[5];
1334
1335 } office2010_tmp_t;
1336
1337 typedef struct
1338 {
1339 u64 out[8];
1340
1341 } office2013_tmp_t;
1342
1343 typedef struct
1344 {
1345 u32 digest_buf[5];
1346
1347 } saph_sha1_tmp_t;
1348
1349 typedef struct
1350 {
1351 u32 block[16];
1352
1353 u32 dgst[8];
1354
1355 u32 block_len;
1356 u32 final_len;
1357
1358 } seven_zip_tmp_t;
1359
1360 typedef struct
1361 {
1362 u32 KEK[5];
1363
1364 u32 lsb[4];
1365 u32 cipher[4];
1366
1367 } axcrypt_tmp_t;
1368
1369 typedef struct
1370 {
1371 u32 tmp_digest[8];
1372
1373 } keepass_tmp_t;
1374
1375 typedef struct
1376 {
1377 u32 Kc[16];
1378 u32 Kd[16];
1379
1380 u32 iv[2];
1381
1382 } bsdicrypt_tmp_t;
1383
1384 typedef struct
1385 {
1386 u32 dgst[17][5];
1387
1388 } rar3_tmp_t;
1389
1390 typedef struct
1391 {
1392 u32 user[16];
1393
1394 } cram_md5_t;
1395
1396 typedef struct
1397 {
1398 u32 iv_buf[4];
1399 u32 iv_len;
1400
1401 u32 salt_buf[4];
1402 u32 salt_len;
1403
1404 u32 crc;
1405
1406 u32 data_buf[96];
1407 u32 data_len;
1408
1409 u32 unpack_size;
1410
1411 } seven_zip_t;
1412
1413 typedef struct
1414 {
1415 u32 key;
1416 u64 val;
1417
1418 } hcstat_table_t;
1419
1420 typedef struct
1421 {
1422 u32 cs_buf[0x100];
1423 u32 cs_len;
1424
1425 } cs_t;
1426
1427 typedef struct
1428 {
1429 u32 cmds[0x100];
1430
1431 } kernel_rule_t;
1432
1433 typedef struct
1434 {
1435 u32 gidvid;
1436 u32 il_pos;
1437
1438 } plain_t;
1439
1440 typedef struct
1441 {
1442 u32 i[16];
1443
1444 u32 pw_len;
1445
1446 u32 alignment_placeholder_1;
1447 u32 alignment_placeholder_2;
1448 u32 alignment_placeholder_3;
1449
1450 } pw_t;
1451
1452 typedef struct
1453 {
1454 u32 i;
1455
1456 } bf_t;
1457
1458 typedef struct
1459 {
1460 u32 i[8];
1461
1462 u32 pw_len;
1463
1464 } comb_t;
1465
1466 typedef struct
1467 {
1468 u32 b[32];
1469
1470 } bs_word_t;
1471
1472 typedef struct
1473 {
1474 uint4 P[64];
1475
1476 } scrypt_tmp_t;