Initial commit
[hashcat.git] / nv / types_nv.c
1 /**
2 * Author......: Jens Steube <jens.steube@gmail.com>
3 * License.....: MIT
4 */
5
6 #include <stdint.h>
7
8 typedef uint8_t u8;
9 typedef uint16_t u16;
10 typedef uint32_t u32;
11 typedef uint64_t u64;
12
13 __device__ static u32 lut3_2d (const u32 a, const u32 b, const u32 c)
14 {
15 u32 r;
16
17 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
18
19 return r;
20 }
21
22 __device__ static u32 lut3_39 (const u32 a, const u32 b, const u32 c)
23 {
24 u32 r;
25
26 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
27
28 return r;
29 }
30
31 __device__ static u32 lut3_59 (const u32 a, const u32 b, const u32 c)
32 {
33 u32 r;
34
35 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
36
37 return r;
38 }
39
40 __device__ static u32 lut3_96 (const u32 a, const u32 b, const u32 c)
41 {
42 u32 r;
43
44 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
45
46 return r;
47 }
48
49 __device__ static u32 lut3_e4 (const u32 a, const u32 b, const u32 c)
50 {
51 u32 r;
52
53 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
54
55 return r;
56 }
57
58 __device__ static u32 lut3_e8 (const u32 a, const u32 b, const u32 c)
59 {
60 u32 r;
61
62 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
63
64 return r;
65 }
66
67 __device__ static u32 lut3_ca (const u32 a, const u32 b, const u32 c)
68 {
69 u32 r;
70
71 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r) : "r" (a), "r" (b), "r" (c));
72
73 return r;
74 }
75
76 #if __CUDA_ARCH__ >= 350
77
78 __device__ static u32 rotr32 (const u32 a, const u32 n)
79 {
80 return __funnelshift_r (a, a, n);
81 }
82
83 __device__ static u32 rotl32 (const u32 a, const u32 n)
84 {
85 return rotr32 (a, 32 - n);
86 }
87
88 __device__ static u64 rotr64 (const u64 a, const u32 n)
89 {
90 u32 il;
91 u32 ir;
92
93 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a));
94
95 u32 tl;
96 u32 tr;
97
98 if (n >= 32)
99 {
100 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
101 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
102 }
103 else
104 {
105 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
106 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
107 }
108
109 u64 r;
110
111 asm ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(tl), "r"(tr));
112
113 return r;
114 }
115
116 __device__ static u64 rotl64 (const u64 a, const u32 n)
117 {
118 return rotr64 (a, 64 - n);
119 }
120
121 #else
122
123 __device__ static u32 rotr32 (const u32 a, const u32 n)
124 {
125 return (((a) >> (n)) + ((a) << (32 - (n))));
126 }
127
128 __device__ static u32 rotl32 (const u32 a, const u32 n)
129 {
130 return rotr32 (a, 32 - n);
131 }
132
133 __device__ static u64 rotr64 (const u64 a, const u32 n)
134 {
135 return (((a) >> (n)) + ((a) << (64 - (n))));
136 }
137
138 __device__ static u64 rotl64 (const u64 a, const u32 n)
139 {
140 return rotr64 (a, 64 - n);
141 }
142
143
144 #endif
145
146 #ifdef VECT_SIZE1
147 #define VECT_SHIFT 0
148 #define VECT_DIV 1
149
150 typedef u8 u8x;
151 typedef u16 u16x;
152 typedef u32 u32x;
153 typedef u64 u64x;
154
155 __device__ static u32 l32_from_64 (u64 a)
156 {
157 const u32 r = (u32) a;
158
159 return r;
160 }
161
162 __device__ static u32 h32_from_64 (u64 a)
163 {
164 a >>= 32;
165
166 const u32 r = (u32) a;
167
168 return r;
169 }
170
171 __device__ static u64 hl32_to_64 (const u32x a, const u32x b)
172 {
173 u64 r;
174
175 asm ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(b), "r"(a));
176
177 return r;
178 }
179
180 #endif
181
182 #ifdef VECT_SIZE2
183 #define VECT_SHIFT 1
184 #define VECT_DIV 2
185
186 class u8x
187 {
188 private:
189 public:
190
191 u8 x;
192 u8 y;
193
194 inline __device__ u8x (const u8 a, const u8 b) : x(a), y(b) { }
195 inline __device__ u8x (const u8 a) : x(a), y(a) { }
196
197 inline __device__ u8x (void) { }
198 inline __device__ ~u8x (void) { }
199 };
200
201 class u16x
202 {
203 private:
204 public:
205
206 u16 x;
207 u16 y;
208
209 inline __device__ u16x (const u16 a, const u16 b) : x(a), y(b) { }
210 inline __device__ u16x (const u16 a) : x(a), y(a) { }
211
212 inline __device__ u16x (void) { }
213 inline __device__ ~u16x (void) { }
214 };
215
216 class u32x
217 {
218 private:
219 public:
220
221 u32 x;
222 u32 y;
223
224 inline __device__ u32x (const u32 a, const u32 b) : x(a), y(b) { }
225 inline __device__ u32x (const u32 a) : x(a), y(a) { }
226
227 inline __device__ u32x (void) { }
228 inline __device__ ~u32x (void) { }
229 };
230
231 class u64x
232 {
233 private:
234 public:
235
236 u64 x;
237 u64 y;
238
239 inline __device__ u64x (const u32x a) : x(a.x), y(a.y) { }
240
241 inline __device__ u64x (const u64 a, const u64 b) : x(a), y(b) { }
242 inline __device__ u64x (const u64 a) : x(a), y(a) { }
243
244 inline __device__ u64x (void) { }
245 inline __device__ ~u64x (void) { }
246 };
247
248 inline __device__ bool operator != (const u32x a, const u32 b) { return ((a.x != b ) && (a.y != b )); }
249 inline __device__ bool operator != (const u32x a, const u32x b) { return ((a.x != b.x) && (a.y != b.y)); }
250
251 inline __device__ void operator ^= (u32x &a, const u32 b) { a.x ^= b; a.y ^= b; }
252 inline __device__ void operator ^= (u32x &a, const u32x b) { a.x ^= b.x; a.y ^= b.y; }
253
254 inline __device__ void operator |= (u32x &a, const u32 b) { a.x |= b; a.y |= b; }
255 inline __device__ void operator |= (u32x &a, const u32x b) { a.x |= b.x; a.y |= b.y; }
256
257 inline __device__ void operator &= (u32x &a, const u32 b) { a.x &= b; a.y &= b; }
258 inline __device__ void operator &= (u32x &a, const u32x b) { a.x &= b.x; a.y &= b.y; }
259
260 inline __device__ void operator += (u32x &a, const u32 b) { a.x += b; a.y += b; }
261 inline __device__ void operator += (u32x &a, const u32x b) { a.x += b.x; a.y += b.y; }
262
263 inline __device__ void operator -= (u32x &a, const u32 b) { a.x -= b; a.y -= b; }
264 inline __device__ void operator -= (u32x &a, const u32x b) { a.x -= b.x; a.y -= b.y; }
265
266 inline __device__ u32x operator << (const u32x a, const u32 b) { return u32x ((a.x << b ), (a.y << b )); }
267 inline __device__ u32x operator << (const u32x a, const u32x b) { return u32x ((a.x << b.x), (a.y << b.y)); }
268
269 inline __device__ u32x operator >> (const u32x a, const u32 b) { return u32x ((a.x >> b ), (a.y >> b )); }
270 inline __device__ u32x operator >> (const u32x a, const u32x b) { return u32x ((a.x >> b.x), (a.y >> b.y)); }
271
272 inline __device__ u32x operator ^ (const u32x a, const u32 b) { return u32x ((a.x ^ b ), (a.y ^ b )); }
273 inline __device__ u32x operator ^ (const u32x a, const u32x b) { return u32x ((a.x ^ b.x), (a.y ^ b.y)); }
274
275 inline __device__ u32x operator | (const u32x a, const u32 b) { return u32x ((a.x | b ), (a.y | b )); }
276 inline __device__ u32x operator | (const u32x a, const u32x b) { return u32x ((a.x | b.x), (a.y | b.y)); }
277
278 inline __device__ u32x operator & (const u32x a, const u32 b) { return u32x ((a.x & b ), (a.y & b )); }
279 inline __device__ u32x operator & (const u32x a, const u32x b) { return u32x ((a.x & b.x), (a.y & b.y)); }
280
281 inline __device__ u32x operator + (const u32x a, const u32 b) { return u32x ((a.x + b ), (a.y + b )); }
282 inline __device__ u32x operator + (const u32x a, const u32x b) { return u32x ((a.x + b.x), (a.y + b.y)); }
283
284 inline __device__ u32x operator - (const u32x a, const u32 b) { return u32x ((a.x - b ), (a.y - b )); }
285 inline __device__ u32x operator - (const u32x a, const u32x b) { return u32x ((a.x - b.x), (a.y - b.y)); }
286
287 inline __device__ u32x operator * (const u32x a, const u32 b) { return u32x ((a.x * b ), (a.y * b )); }
288 inline __device__ u32x operator * (const u32x a, const u32x b) { return u32x ((a.x * b.x), (a.y * b.y)); }
289
290 inline __device__ u32x operator ~ (const u32x a) { return u32x (~a.x, ~a.y); }
291
292 inline __device__ bool operator != (const u64x a, const u64 b) { return ((a.x != b ) && (a.y != b )); }
293 inline __device__ bool operator != (const u64x a, const u64x b) { return ((a.x != b.x) && (a.y != b.y)); }
294
295 inline __device__ void operator ^= (u64x &a, const u64 b) { a.x ^= b; a.y ^= b; }
296 inline __device__ void operator ^= (u64x &a, const u64x b) { a.x ^= b.x; a.y ^= b.y; }
297
298 inline __device__ void operator |= (u64x &a, const u64 b) { a.x |= b; a.y |= b; }
299 inline __device__ void operator |= (u64x &a, const u64x b) { a.x |= b.x; a.y |= b.y; }
300
301 inline __device__ void operator &= (u64x &a, const u64 b) { a.x &= b; a.y &= b; }
302 inline __device__ void operator &= (u64x &a, const u64x b) { a.x &= b.x; a.y &= b.y; }
303
304 inline __device__ void operator += (u64x &a, const u64 b) { a.x += b; a.y += b; }
305 inline __device__ void operator += (u64x &a, const u64x b) { a.x += b.x; a.y += b.y; }
306
307 inline __device__ void operator -= (u64x &a, const u64 b) { a.x -= b; a.y -= b; }
308 inline __device__ void operator -= (u64x &a, const u64x b) { a.x -= b.x; a.y -= b.y; }
309
310 inline __device__ u64x operator << (const u64x a, const u64 b) { return u64x ((a.x << b ), (a.y << b )); }
311 inline __device__ u64x operator << (const u64x a, const u64x b) { return u64x ((a.x << b.x), (a.y << b.y)); }
312
313 inline __device__ u64x operator >> (const u64x a, const u64 b) { return u64x ((a.x >> b ), (a.y >> b )); }
314 inline __device__ u64x operator >> (const u64x a, const u64x b) { return u64x ((a.x >> b.x), (a.y >> b.y)); }
315
316 inline __device__ u64x operator ^ (const u64x a, const u64 b) { return u64x ((a.x ^ b ), (a.y ^ b )); }
317 inline __device__ u64x operator ^ (const u64x a, const u64x b) { return u64x ((a.x ^ b.x), (a.y ^ b.y)); }
318
319 inline __device__ u64x operator | (const u64x a, const u64 b) { return u64x ((a.x | b ), (a.y | b )); }
320 inline __device__ u64x operator | (const u64x a, const u64x b) { return u64x ((a.x | b.x), (a.y | b.y)); }
321
322 inline __device__ u64x operator & (const u64x a, const u64 b) { return u64x ((a.x & b ), (a.y & b )); }
323 inline __device__ u64x operator & (const u64x a, const u64x b) { return u64x ((a.x & b.x), (a.y & b.y)); }
324
325 inline __device__ u64x operator + (const u64x a, const u64 b) { return u64x ((a.x + b ), (a.y + b )); }
326 inline __device__ u64x operator + (const u64x a, const u64x b) { return u64x ((a.x + b.x), (a.y + b.y)); }
327
328 inline __device__ u64x operator - (const u64x a, const u64 b) { return u64x ((a.x - b ), (a.y - b )); }
329 inline __device__ u64x operator - (const u64x a, const u64x b) { return u64x ((a.x - b.x), (a.y - b.y)); }
330
331 inline __device__ u64x operator ~ (const u64x a) { return u64x (~a.x, ~a.y); }
332
333 __device__ static u32x lut3_2d (const u32x a, const u32x b, const u32x c)
334 {
335 return u32x (lut3_2d (a.x, b.x, c.x),
336 lut3_2d (a.y, b.y, c.y));
337 }
338
339 __device__ static u32x lut3_39 (const u32x a, const u32x b, const u32x c)
340 {
341 return u32x (lut3_39 (a.x, b.x, c.x),
342 lut3_39 (a.y, b.y, c.y));
343 }
344
345 __device__ static u32x lut3_59 (const u32x a, const u32x b, const u32x c)
346 {
347 return u32x (lut3_59 (a.x, b.x, c.x),
348 lut3_59 (a.y, b.y, c.y));
349 }
350
351 __device__ static u32x lut3_96 (const u32x a, const u32x b, const u32x c)
352 {
353 return u32x (lut3_96 (a.x, b.x, c.x),
354 lut3_96 (a.y, b.y, c.y));
355 }
356
357 __device__ static u32x lut3_e4 (const u32x a, const u32x b, const u32x c)
358 {
359 return u32x (lut3_e4 (a.x, b.x, c.x),
360 lut3_e4 (a.y, b.y, c.y));
361 }
362
363 __device__ static u32x lut3_e8 (const u32x a, const u32x b, const u32x c)
364 {
365 return u32x (lut3_e8 (a.x, b.x, c.x),
366 lut3_e8 (a.y, b.y, c.y));
367 }
368
369 __device__ static u32x lut3_ca (const u32x a, const u32x b, const u32x c)
370 {
371 return u32x (lut3_ca (a.x, b.x, c.x),
372 lut3_ca (a.y, b.y, c.y));
373 }
374
375 __device__ static u32x rotl32(const u32x a, const u32 n)
376 {
377 return u32x (rotl32 (a.x, n),
378 rotl32 (a.y, n));
379 }
380
381 __device__ static u32x rotr32(const u32x a, const u32 n)
382 {
383 return u32x (rotr32 (a.x, n),
384 rotr32 (a.y, n));
385 }
386
387 __device__ static u64x rotl64(const u64x a, const u32 n)
388 {
389 return u64x (rotl64 (a.x, n),
390 rotl64 (a.y, n));
391 }
392
393 __device__ static u64x rotr64(const u64x a, const u32 n)
394 {
395 return u64x (rotr64 (a.x, n),
396 rotr64 (a.y, n));
397 }
398
399 __device__ static u32x __byte_perm (const u32x a, const u32x b, const u32 c)
400 {
401 return u32x (__byte_perm (a.x, b.x, c),
402 __byte_perm (a.y, b.y, c));
403 }
404
405 #endif
406
407 #ifdef VECT_SIZE4
408 #define VECT_SHIFT 2
409 #define VECT_DIV 4
410
411 class u8x
412 {
413 private:
414 public:
415
416 u8 x;
417 u8 y;
418 u8 z;
419 u8 w;
420
421 inline __device__ u8x (const u8 a, const u8 b, const u8 c, const u8 d) : x(a), y(b), z(c), w(d) { }
422 inline __device__ u8x (const u8 a) : x(a), y(a), z(a), w(a) { }
423
424 inline __device__ u8x (void) { }
425 inline __device__ ~u8x (void) { }
426 };
427
428 class u16x
429 {
430 private:
431 public:
432
433 u16 x;
434 u16 y;
435 u16 z;
436 u16 w;
437
438 inline __device__ u16x (const u16 a, const u16 b, const u16 c, const u16 d) : x(a), y(b), z(c), w(d) { }
439 inline __device__ u16x (const u16 a) : x(a), y(a), z(a), w(a) { }
440
441 inline __device__ u16x (void) { }
442 inline __device__ ~u16x (void) { }
443 };
444
445 class u32x
446 {
447 private:
448 public:
449
450 u32 x;
451 u32 y;
452 u32 z;
453 u32 w;
454
455 inline __device__ u32x (const u32 a, const u32 b, const u32 c, const u32 d) : x(a), y(b), z(c), w(d) { }
456 inline __device__ u32x (const u32 a) : x(a), y(a), z(a), w(a) { }
457
458 inline __device__ u32x (void) { }
459 inline __device__ ~u32x (void) { }
460 };
461
462 class u64x
463 {
464 private:
465 public:
466
467 u64 x;
468 u64 y;
469 u64 z;
470 u64 w;
471
472 inline __device__ u64x (const u32x a) : x(a.x), y(a.y), z(a.z), w(a.w) { }
473
474 inline __device__ u64x (const u64 a, const u64 b, const u64 c, const u64 d) : x(a), y(b), z(c), w(d) { }
475 inline __device__ u64x (const u64 a) : x(a), y(a), z(a), w(a) { }
476
477 inline __device__ u64x (void) { }
478 inline __device__ ~u64x (void) { }
479 };
480
481 inline __device__ bool operator != (const u32x a, const u32 b) { return ((a.x != b ) && (a.y != b ) && (a.z != b ) && (a.w != b )); }
482 inline __device__ bool operator != (const u32x a, const u32x b) { return ((a.x != b.x) && (a.y != b.y) && (a.z != b.z) && (a.w != b.w)); }
483
484 inline __device__ void operator ^= (u32x &a, const u32 b) { a.x ^= b; a.y ^= b; a.z ^= b; a.w ^= b; }
485 inline __device__ void operator ^= (u32x &a, const u32x b) { a.x ^= b.x; a.y ^= b.y; a.z ^= b.z; a.w ^= b.w; }
486
487 inline __device__ void operator |= (u32x &a, const u32 b) { a.x |= b; a.y |= b; a.z |= b; a.w |= b; }
488 inline __device__ void operator |= (u32x &a, const u32x b) { a.x |= b.x; a.y |= b.y; a.z |= b.z; a.w |= b.w; }
489
490 inline __device__ void operator &= (u32x &a, const u32 b) { a.x &= b; a.y &= b; a.z &= b; a.w &= b; }
491 inline __device__ void operator &= (u32x &a, const u32x b) { a.x &= b.x; a.y &= b.y; a.z &= b.z; a.w &= b.w; }
492
493 inline __device__ void operator += (u32x &a, const u32 b) { a.x += b; a.y += b; a.z += b; a.w += b; }
494 inline __device__ void operator += (u32x &a, const u32x b) { a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w; }
495
496 inline __device__ void operator -= (u32x &a, const u32 b) { a.x -= b; a.y -= b; a.z -= b; a.w -= b; }
497 inline __device__ void operator -= (u32x &a, const u32x b) { a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w; }
498
499 inline __device__ u32x operator << (const u32x a, const u32 b) { return u32x ((a.x << b ), (a.y << b ), (a.z << b ), (a.w << b )); }
500 inline __device__ u32x operator << (const u32x a, const u32x b) { return u32x ((a.x << b.x), (a.y << b.y), (a.z << b.z), (a.w << b.w)); }
501
502 inline __device__ u32x operator >> (const u32x a, const u32 b) { return u32x ((a.x >> b ), (a.y >> b ), (a.z >> b ), (a.w >> b )); }
503 inline __device__ u32x operator >> (const u32x a, const u32x b) { return u32x ((a.x >> b.x), (a.y >> b.y), (a.z >> b.z), (a.w >> b.w)); }
504
505 inline __device__ u32x operator ^ (const u32x a, const u32 b) { return u32x ((a.x ^ b ), (a.y ^ b ), (a.z ^ b ), (a.w ^ b )); }
506 inline __device__ u32x operator ^ (const u32x a, const u32x b) { return u32x ((a.x ^ b.x), (a.y ^ b.y), (a.z ^ b.z), (a.w ^ b.w)); }
507
508 inline __device__ u32x operator | (const u32x a, const u32 b) { return u32x ((a.x | b ), (a.y | b ), (a.z | b ), (a.w | b )); }
509 inline __device__ u32x operator | (const u32x a, const u32x b) { return u32x ((a.x | b.x), (a.y | b.y), (a.z | b.z), (a.w | b.w)); }
510
511 inline __device__ u32x operator & (const u32x a, const u32 b) { return u32x ((a.x & b ), (a.y & b ), (a.z & b ), (a.w & b )); }
512 inline __device__ u32x operator & (const u32x a, const u32x b) { return u32x ((a.x & b.x), (a.y & b.y), (a.z & b.z), (a.w & b.w)); }
513
514 inline __device__ u32x operator + (const u32x a, const u32 b) { return u32x ((a.x + b ), (a.y + b ), (a.z + b ), (a.w + b )); }
515 inline __device__ u32x operator + (const u32x a, const u32x b) { return u32x ((a.x + b.x), (a.y + b.y), (a.z + b.z), (a.w + b.w)); }
516
517 inline __device__ u32x operator - (const u32x a, const u32 b) { return u32x ((a.x - b ), (a.y - b ), (a.z - b ), (a.w - b )); }
518 inline __device__ u32x operator - (const u32x a, const u32x b) { return u32x ((a.x - b.x), (a.y - b.y), (a.z - b.z), (a.w - b.w)); }
519
520 inline __device__ u32x operator * (const u32x a, const u32 b) { return u32x ((a.x * b ), (a.y * b ), (a.z * b ), (a.w * b )); }
521 inline __device__ u32x operator * (const u32x a, const u32x b) { return u32x ((a.x * b.x), (a.y * b.y), (a.z * b.z), (a.w * b.w)); }
522
523 inline __device__ u32x operator ~ (const u32x a) { return u32x (~a.x, ~a.y, ~a.z, ~a.w); }
524
525 inline __device__ bool operator != (const u64x a, const u64 b) { return ((a.x != b ) && (a.y != b ) && (a.z != b ) && (a.w != b )); }
526 inline __device__ bool operator != (const u64x a, const u64x b) { return ((a.x != b.x) && (a.y != b.y) && (a.z != b.z) && (a.w != b.w)); }
527
528 inline __device__ void operator ^= (u64x &a, const u64 b) { a.x ^= b; a.y ^= b; a.z ^= b; a.w ^= b; }
529 inline __device__ void operator ^= (u64x &a, const u64x b) { a.x ^= b.x; a.y ^= b.y; a.z ^= b.z; a.w ^= b.w; }
530
531 inline __device__ void operator |= (u64x &a, const u64 b) { a.x |= b; a.y |= b; a.z |= b; a.w |= b; }
532 inline __device__ void operator |= (u64x &a, const u64x b) { a.x |= b.x; a.y |= b.y; a.z |= b.z; a.w |= b.w; }
533
534 inline __device__ void operator &= (u64x &a, const u64 b) { a.x &= b; a.y &= b; a.z &= b; a.w &= b; }
535 inline __device__ void operator &= (u64x &a, const u64x b) { a.x &= b.x; a.y &= b.y; a.z &= b.z; a.w &= b.w; }
536
537 inline __device__ void operator += (u64x &a, const u64 b) { a.x += b; a.y += b; a.z += b; a.w += b; }
538 inline __device__ void operator += (u64x &a, const u64x b) { a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w; }
539
540 inline __device__ void operator -= (u64x &a, const u64 b) { a.x -= b; a.y -= b; a.z -= b; a.w -= b; }
541 inline __device__ void operator -= (u64x &a, const u64x b) { a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w; }
542
543 inline __device__ u64x operator << (const u64x a, const u64 b) { return u64x ((a.x << b ), (a.y << b ), (a.z << b ), (a.w << b )); }
544 inline __device__ u64x operator << (const u64x a, const u64x b) { return u64x ((a.x << b.x), (a.y << b.y), (a.z << b.z), (a.w << b.w)); }
545
546 inline __device__ u64x operator >> (const u64x a, const u64 b) { return u64x ((a.x >> b ), (a.y >> b ), (a.z >> b ), (a.w >> b )); }
547 inline __device__ u64x operator >> (const u64x a, const u64x b) { return u64x ((a.x >> b.x), (a.y >> b.y), (a.z >> b.z), (a.w >> b.w)); }
548
549 inline __device__ u64x operator ^ (const u64x a, const u64 b) { return u64x ((a.x ^ b ), (a.y ^ b ), (a.z ^ b ), (a.w ^ b )); }
550 inline __device__ u64x operator ^ (const u64x a, const u64x b) { return u64x ((a.x ^ b.x), (a.y ^ b.y), (a.z ^ b.z), (a.w ^ b.w)); }
551
552 inline __device__ u64x operator | (const u64x a, const u64 b) { return u64x ((a.x | b ), (a.y | b ), (a.z | b ), (a.w | b )); }
553 inline __device__ u64x operator | (const u64x a, const u64x b) { return u64x ((a.x | b.x), (a.y | b.y), (a.z | b.z), (a.w | b.w)); }
554
555 inline __device__ u64x operator & (const u64x a, const u64 b) { return u64x ((a.x & b ), (a.y & b ), (a.z & b ), (a.w & b )); }
556 inline __device__ u64x operator & (const u64x a, const u64x b) { return u64x ((a.x & b.x), (a.y & b.y), (a.z & b.z), (a.w & b.w)); }
557
558 inline __device__ u64x operator + (const u64x a, const u64 b) { return u64x ((a.x + b ), (a.y + b ), (a.z + b ), (a.w + b )); }
559 inline __device__ u64x operator + (const u64x a, const u64x b) { return u64x ((a.x + b.x), (a.y + b.y), (a.z + b.z), (a.w + b.w)); }
560
561 inline __device__ u64x operator - (const u64x a, const u64 b) { return u64x ((a.x - b ), (a.y - b ), (a.z - b ), (a.w - b )); }
562 inline __device__ u64x operator - (const u64x a, const u64x b) { return u64x ((a.x - b.x), (a.y - b.y), (a.z - b.z), (a.w - b.w)); }
563
564 inline __device__ u64x operator * (const u64x a, const u64 b) { return u64x ((a.x * b ), (a.y * b ), (a.z * b ), (a.w * b )); }
565 inline __device__ u64x operator * (const u64x a, const u64x b) { return u64x ((a.x * b.x), (a.y * b.y), (a.z * b.z), (a.w * b.w)); }
566
567 inline __device__ u64x operator ~ (const u64x a) { return u64x (~a.x, ~a.y, ~a.z, ~a.w); }
568
569 __device__ static u32x lut3_2d (const u32x a, const u32x b, const u32x c)
570 {
571 return u32x (lut3_2d(a.x, b.x, c.x),
572 lut3_2d (a.y, b.y, c.y),
573 lut3_2d (a.z, b.z, c.z),
574 lut3_2d (a.w, b.w, c.w));
575 }
576
577 __device__ static u32x lut3_39 (const u32x a, const u32x b, const u32x c)
578 {
579 return u32x (lut3_39 (a.x, b.x, c.x),
580 lut3_39 (a.y, b.y, c.y),
581 lut3_39 (a.z, b.z, c.z),
582 lut3_39 (a.w, b.w, c.w));
583 }
584
585 __device__ static u32x lut3_59 (const u32x a, const u32x b, const u32x c)
586 {
587 return u32x (lut3_59 (a.x, b.x, c.x),
588 lut3_59 (a.y, b.y, c.y),
589 lut3_59 (a.z, b.z, c.z),
590 lut3_59 (a.w, b.w, c.w));
591 }
592
593 __device__ static u32x lut3_96 (const u32x a, const u32x b, const u32x c)
594 {
595 return u32x (lut3_96 (a.x, b.x, c.x),
596 lut3_96 (a.y, b.y, c.y),
597 lut3_96 (a.z, b.z, c.z),
598 lut3_96 (a.w, b.w, c.w));
599 }
600
601 __device__ static u32x lut3_e4 (const u32x a, const u32x b, const u32x c)
602 {
603 return u32x (lut3_e4 (a.x, b.x, c.x),
604 lut3_e4 (a.y, b.y, c.y),
605 lut3_e4 (a.z, b.z, c.z),
606 lut3_e4 (a.w, b.w, c.w));
607 }
608
609 __device__ static u32x lut3_e8 (const u32x a, const u32x b, const u32x c)
610 {
611 return u32x (lut3_e8 (a.x, b.x, c.x),
612 lut3_e8 (a.y, b.y, c.y),
613 lut3_e8 (a.z, b.z, c.z),
614 lut3_e8 (a.w, b.w, c.w));
615 }
616
617 __device__ static u32x lut3_ca (const u32x a, const u32x b, const u32x c)
618 {
619 return u32x (lut3_ca (a.x, b.x, c.x),
620 lut3_ca (a.y, b.y, c.y),
621 lut3_ca (a.z, b.z, c.z),
622 lut3_ca (a.w, b.w, c.w));
623 }
624
625 __device__ static u32x rotl32(const u32x a, const u32 n)
626 {
627 return u32x (rotl32 (a.x, n),
628 rotl32 (a.y, n),
629 rotl32 (a.z, n),
630 rotl32 (a.w, n));
631 }
632
633 __device__ static u32x rotr32(const u32x a, const u32 n)
634 {
635 return u32x (rotr32 (a.x, n),
636 rotr32 (a.y, n),
637 rotr32 (a.z, n),
638 rotr32 (a.w, n));
639 }
640
641 __device__ static u64x rotl64(const u64x a, const u32 n)
642 {
643 return u64x (rotl64 (a.x, n),
644 rotl64 (a.y, n),
645 rotl64 (a.z, n),
646 rotl64 (a.w, n));
647 }
648
649 __device__ static u64x rotr64(const u64x a, const u32 n)
650 {
651 return u64x (rotr64 (a.x, n),
652 rotr64 (a.y, n),
653 rotr64 (a.z, n),
654 rotr64 (a.w, n));
655 }
656
657 __device__ static u32x __byte_perm (const u32x a, const u32x b, const u32 c)
658 {
659 return u32x (__byte_perm (a.x, b.x, c),
660 __byte_perm (a.y, b.y, c),
661 __byte_perm (a.z, b.z, c),
662 __byte_perm (a.w, b.w, c));
663 }
664
665 #endif
666
667 typedef struct
668 {
669 #if defined _DES_
670 u32 digest_buf[4];
671 #elif defined _MD4_
672 u32 digest_buf[4];
673 #elif defined _MD5_
674 u32 digest_buf[4];
675 #elif defined _MD5H_
676 u32 digest_buf[4];
677 #elif defined _SHA1_
678 u32 digest_buf[5];
679 #elif defined _BCRYPT_
680 u32 digest_buf[6];
681 #elif defined _SHA256_
682 u32 digest_buf[8];
683 #elif defined _SHA384_
684 u32 digest_buf[16];
685 #elif defined _SHA512_
686 u32 digest_buf[16];
687 #elif defined _KECCAK_
688 u32 digest_buf[50];
689 #elif defined _RIPEMD160_
690 u32 digest_buf[5];
691 #elif defined _WHIRLPOOL_
692 u32 digest_buf[16];
693 #elif defined _GOST_
694 u32 digest_buf[8];
695 #elif defined _GOST2012_256_
696 u32 digest_buf[8];
697 #elif defined _GOST2012_512_
698 u32 digest_buf[16];
699 #elif defined _SAPB_
700 u32 digest_buf[4];
701 #elif defined _SAPG_
702 u32 digest_buf[5];
703 #elif defined _MYSQL323_
704 u32 digest_buf[4];
705 #elif defined _LOTUS5_
706 u32 digest_buf[4];
707 #elif defined _LOTUS6_
708 u32 digest_buf[4];
709 #elif defined _SCRYPT_
710 u32 digest_buf[8];
711 #elif defined _LOTUS8_
712 u32 digest_buf[4];
713 #elif defined _OFFICE2007_
714 u32 digest_buf[4];
715 #elif defined _OFFICE2010_
716 u32 digest_buf[4];
717 #elif defined _OFFICE2013_
718 u32 digest_buf[4];
719 #elif defined _OLDOFFICE01_
720 u32 digest_buf[4];
721 #elif defined _OLDOFFICE34_
722 u32 digest_buf[4];
723 #elif defined _SIPHASH_
724 u32 digest_buf[4];
725 #elif defined _PBKDF2_MD5_
726 u32 digest_buf[32];
727 #elif defined _PBKDF2_SHA1_
728 u32 digest_buf[32];
729 #elif defined _PBKDF2_SHA256_
730 u32 digest_buf[32];
731 #elif defined _PBKDF2_SHA512_
732 u32 digest_buf[32];
733 #elif defined _PDF17L8_
734 u32 digest_buf[8];
735 #elif defined _CRC32_
736 u32 digest_buf[4];
737 #elif defined _SEVEN_ZIP_
738 u32 digest_buf[4];
739 #elif defined _ANDROIDFDE_
740 u32 digest_buf[4];
741 #elif defined _DCC2_
742 u32 digest_buf[4];
743 #elif defined _WPA_
744 u32 digest_buf[4];
745 #elif defined _MD5_SHA1_
746 u32 digest_buf[4];
747 #elif defined _SHA1_MD5_
748 u32 digest_buf[5];
749 #elif defined _NETNTLMV2_
750 u32 digest_buf[4];
751 #elif defined _KRB5PA_
752 u32 digest_buf[4];
753 #elif defined _CLOUDKEY_
754 u32 digest_buf[8];
755 #elif defined _SCRYPT_
756 u32 digest_buf[4];
757 #elif defined _PSAFE2_
758 u32 digest_buf[5];
759 #elif defined _LOTUS8_
760 u32 digest_buf[4];
761 #elif defined _RAR3_
762 u32 digest_buf[4];
763 #elif defined _SHA256_SHA1_
764 u32 digest_buf[8];
765 #elif defined _MS_DRSR_
766 u32 digest_buf[8];
767 #endif
768
769 } digest_t;
770
771 typedef struct
772 {
773 u32 salt_buf[16];
774 u32 salt_buf_pc[8];
775
776 u32 salt_len;
777 u32 salt_iter;
778 u32 salt_sign[2];
779
780 u32 keccak_mdlen;
781 u32 truecrypt_mdlen;
782
783 u32 digests_cnt;
784 u32 digests_done;
785
786 u32 digests_offset;
787
788 u32 scrypt_N;
789 u32 scrypt_r;
790 u32 scrypt_p;
791 u32 scrypt_tmto;
792 u32 scrypt_phy;
793
794 } salt_t;
795
796 typedef struct
797 {
798 int V;
799 int R;
800 int P;
801
802 int enc_md;
803
804 u32 id_buf[8];
805 u32 u_buf[32];
806 u32 o_buf[32];
807
808 int id_len;
809 int o_len;
810 int u_len;
811
812 u32 rc4key[2];
813 u32 rc4data[2];
814
815 } pdf_t;
816
817 typedef struct
818 {
819 u32 pke[25];
820 u32 eapol[64];
821 int eapol_size;
822 int keyver;
823
824 } wpa_t;
825
826 typedef struct
827 {
828 u32 cry_master_buf[64];
829 u32 ckey_buf[64];
830 u32 public_key_buf[64];
831
832 u32 cry_master_len;
833 u32 ckey_len;
834 u32 public_key_len;
835
836 } bitcoin_wallet_t;
837
838 typedef struct
839 {
840 u32 salt_buf[30];
841 u32 salt_len;
842
843 u32 esalt_buf[38];
844 u32 esalt_len;
845
846 } sip_t;
847
848 typedef struct
849 {
850 u32 data[384];
851
852 } androidfde_t;
853
854 typedef struct
855 {
856 u32 nr_buf[16];
857 u32 nr_len;
858
859 u32 msg_buf[128];
860 u32 msg_len;
861
862 } ikepsk_t;
863
864 typedef struct
865 {
866 u32 user_len;
867 u32 domain_len;
868 u32 srvchall_len;
869 u32 clichall_len;
870
871 u32 userdomain_buf[64];
872 u32 chall_buf[256];
873
874 } netntlm_t;
875
876 typedef struct
877 {
878 u32 user[16];
879 u32 realm[16];
880 u32 salt[32];
881 u32 timestamp[16];
882 u32 checksum[4];
883
884 } krb5pa_t;
885
886 typedef struct
887 {
888 u32 salt_buf[16];
889 u32 data_buf[112];
890 u32 keyfile_buf[16];
891
892 } tc_t;
893
894 typedef struct
895 {
896 u32 salt_buf[16];
897
898 } pbkdf2_md5_t;
899
900 typedef struct
901 {
902 u32 salt_buf[16];
903
904 } pbkdf2_sha1_t;
905
906 typedef struct
907 {
908 u32 salt_buf[16];
909
910 } pbkdf2_sha256_t;
911
912 typedef struct
913 {
914 u32 salt_buf[32];
915
916 } pbkdf2_sha512_t;
917
918 typedef struct
919 {
920 u32 salt_buf[128];
921 u32 salt_len;
922
923 } rakp_t;
924
925 typedef struct
926 {
927 u32 data_len;
928 u32 data_buf[512];
929
930 } cloudkey_t;
931
932 typedef struct
933 {
934 u32 encryptedVerifier[4];
935 u32 encryptedVerifierHash[5];
936
937 u32 keySize;
938
939 } office2007_t;
940
941 typedef struct
942 {
943 u32 encryptedVerifier[4];
944 u32 encryptedVerifierHash[8];
945
946 } office2010_t;
947
948 typedef struct
949 {
950 u32 encryptedVerifier[4];
951 u32 encryptedVerifierHash[8];
952
953 } office2013_t;
954
955 typedef struct
956 {
957 u32 version;
958 u32 encryptedVerifier[4];
959 u32 encryptedVerifierHash[4];
960 u32 rc4key[2];
961
962 } oldoffice01_t;
963
964 typedef struct
965 {
966 u32 version;
967 u32 encryptedVerifier[4];
968 u32 encryptedVerifierHash[5];
969 u32 rc4key[2];
970
971 } oldoffice34_t;
972
973 typedef struct
974 {
975 u32x digest[4];
976 u32x out[4];
977
978 } pdf14_tmp_t;
979
980 typedef struct
981 {
982 union
983 {
984 u32 dgst32[16];
985 u64 dgst64[8];
986 };
987
988 u32 dgst_len;
989 u32 W_len;
990
991 } pdf17l8_tmp_t;
992
993 typedef struct
994 {
995 u32x digest_buf[4];
996
997 } phpass_tmp_t;
998
999 typedef struct
1000 {
1001 u32x digest_buf[4];
1002
1003 } md5crypt_tmp_t;
1004
1005 typedef struct
1006 {
1007 u32x alt_result[8];
1008
1009 u32x p_bytes[4];
1010 u32x s_bytes[4];
1011
1012 } sha256crypt_tmp_t;
1013
1014 typedef struct
1015 {
1016 u64x l_alt_result[8];
1017
1018 u64x l_p_bytes[2];
1019 u64x l_s_bytes[2];
1020
1021 } sha512crypt_tmp_t;
1022
1023 typedef struct
1024 {
1025 u32x ipad[5];
1026 u32x opad[5];
1027
1028 u32x dgst[10];
1029 u32x out[10];
1030
1031 } wpa_tmp_t;
1032
1033 typedef struct
1034 {
1035 u64x dgst[8];
1036
1037 } bitcoin_wallet_tmp_t;
1038
1039 typedef struct
1040 {
1041 u32x ipad[5];
1042 u32x opad[5];
1043
1044 u32x dgst[5];
1045 u32x out[4];
1046
1047 } dcc2_tmp_t;
1048
1049 typedef struct
1050 {
1051 u32x P[18];
1052
1053 u32x S0[256];
1054 u32x S1[256];
1055 u32x S2[256];
1056 u32x S3[256];
1057
1058 } bcrypt_tmp_t;
1059
1060 typedef struct
1061 {
1062 u32x digest[2];
1063
1064 u32x P[18];
1065
1066 u32x S0[256];
1067 u32x S1[256];
1068 u32x S2[256];
1069 u32x S3[256];
1070
1071 } pwsafe2_tmp_t;
1072
1073 typedef struct
1074 {
1075 u32x digest_buf[8];
1076
1077 } pwsafe3_tmp_t;
1078
1079 typedef struct
1080 {
1081 u32x digest_buf[5];
1082
1083 } androidpin_tmp_t;
1084
1085 typedef struct
1086 {
1087 u32x ipad[5];
1088 u32x opad[5];
1089
1090 u32x dgst[10];
1091 u32x out[10];
1092
1093 } androidfde_tmp_t;
1094
1095 typedef struct
1096 {
1097 u32x ipad[16];
1098 u32x opad[16];
1099
1100 u32x dgst[64];
1101 u32x out[64];
1102
1103 } tc_tmp_t;
1104
1105 typedef struct
1106 {
1107 u64x ipad[8];
1108 u64x opad[8];
1109
1110 u64x dgst[32];
1111 u64x out[32];
1112
1113 } tc64_tmp_t;
1114
1115 typedef struct
1116 {
1117 u32x ipad[4];
1118 u32x opad[4];
1119
1120 u32x dgst[32];
1121 u32x out[32];
1122
1123 } pbkdf2_md5_tmp_t;
1124
1125 typedef struct
1126 {
1127 u32x ipad[5];
1128 u32x opad[5];
1129
1130 u32x dgst[32];
1131 u32x out[32];
1132
1133 } pbkdf2_sha1_tmp_t;
1134
1135 typedef struct
1136 {
1137 u32x ipad[8];
1138 u32x opad[8];
1139
1140 u32x dgst[32];
1141 u32x out[32];
1142
1143 } pbkdf2_sha256_tmp_t;
1144
1145 typedef struct
1146 {
1147 u64x ipad[8];
1148 u64x opad[8];
1149
1150 u64x dgst[16];
1151 u64x out[16];
1152
1153 } pbkdf2_sha512_tmp_t;
1154
1155 typedef struct
1156 {
1157 u64x out[8];
1158
1159 } ecryptfs_tmp_t;
1160
1161 typedef struct
1162 {
1163 u64x ipad[8];
1164 u64x opad[8];
1165
1166 u64x dgst[16];
1167 u64x out[16];
1168
1169 } oraclet_tmp_t;
1170
1171 typedef struct
1172 {
1173 u32x ipad[5];
1174 u32x opad[5];
1175
1176 u32x dgst[5];
1177 u32x out[5];
1178
1179 } agilekey_tmp_t;
1180
1181 typedef struct
1182 {
1183 u32 ipad[5];
1184 u32 opad[5];
1185
1186 u32 dgst1[5];
1187 u32 out1[5];
1188
1189 u32 dgst2[5];
1190 u32 out2[5];
1191
1192 } mywallet_tmp_t;
1193
1194 typedef struct
1195 {
1196 u32x ipad[5];
1197 u32x opad[5];
1198
1199 u32x dgst[5];
1200 u32x out[5];
1201
1202 } sha1aix_tmp_t;
1203
1204 typedef struct
1205 {
1206 u32x ipad[8];
1207 u32x opad[8];
1208
1209 u32x dgst[8];
1210 u32x out[8];
1211
1212 } sha256aix_tmp_t;
1213
1214 typedef struct
1215 {
1216 u64x ipad[8];
1217 u64x opad[8];
1218
1219 u64x dgst[8];
1220 u64x out[8];
1221
1222 } sha512aix_tmp_t;
1223
1224 typedef struct
1225 {
1226 u32x ipad[8];
1227 u32x opad[8];
1228
1229 u32x dgst[8];
1230 u32x out[8];
1231
1232 } lastpass_tmp_t;
1233
1234 typedef struct
1235 {
1236 u64x digest_buf[8];
1237
1238 } drupal7_tmp_t;
1239
1240 typedef struct
1241 {
1242 u32x ipad[5];
1243 u32x opad[5];
1244
1245 u32x dgst[5];
1246 u32x out[5];
1247
1248 } lotus8_tmp_t;
1249
1250 typedef struct
1251 {
1252 u32x out[5];
1253
1254 } office2007_tmp_t;
1255
1256 typedef struct
1257 {
1258 u32x out[5];
1259
1260 } office2010_tmp_t;
1261
1262 typedef struct
1263 {
1264 u64x out[8];
1265
1266 } office2013_tmp_t;
1267
1268 typedef struct
1269 {
1270 u32x digest_buf[5];
1271
1272 } saph_sha1_tmp_t;
1273
1274 typedef struct
1275 {
1276 u32x block[16];
1277
1278 u32x dgst[8];
1279
1280 u32x block_len;
1281 u32x final_len;
1282
1283 } seven_zip_tmp_t;
1284
1285 typedef struct
1286 {
1287 u32x Kc[16];
1288 u32x Kd[16];
1289
1290 u32x iv[2];
1291
1292 } bsdicrypt_tmp_t;
1293
1294 typedef struct
1295 {
1296 u32 dgst[17][5];
1297
1298 } rar3_tmp_t;
1299
1300 typedef struct
1301 {
1302 u32 user[16];
1303
1304 } cram_md5_t;
1305
1306 typedef struct
1307 {
1308 u32 iv_buf[4];
1309 u32 iv_len;
1310
1311 u32 salt_buf[4];
1312 u32 salt_len;
1313
1314 u32 crc;
1315
1316 u32 data_buf[96];
1317 u32 data_len;
1318
1319 u32 unpack_size;
1320
1321 } seven_zip_t;
1322
1323 typedef struct
1324 {
1325 u32 key;
1326 u64 val;
1327
1328 } hcstat_table_t;
1329
1330 typedef struct
1331 {
1332 u32 cs_buf[0x100];
1333 u32 cs_len;
1334
1335 } cs_t;
1336
1337 typedef struct
1338 {
1339 u32 cmds[15];
1340
1341 } gpu_rule_t;
1342
1343 /*
1344 typedef struct
1345 {
1346 u32 plain_buf[16];
1347 u32 plailen;
1348
1349 } plain_t;
1350 */
1351
1352 typedef struct
1353 {
1354 u32 gidvid;
1355 u32 il_pos;
1356
1357 } plain_t;
1358
1359 typedef struct
1360 {
1361 #ifdef _SCALAR_
1362 u32 i[64];
1363 #else
1364 #ifdef VECT_SIZE4
1365 u32x i[16];
1366 #endif
1367
1368 #ifdef VECT_SIZE2
1369 u32x i[32];
1370 #endif
1371
1372 #ifdef VECT_SIZE1
1373 u32x i[64];
1374 #endif
1375 #endif
1376
1377 u32 pw_len;
1378 u32 alignment_placeholder_1;
1379 u32 alignment_placeholder_2;
1380 u32 alignment_placeholder_3;
1381
1382 } pw_t;
1383
1384 typedef struct
1385 {
1386 u32 i;
1387
1388 } bf_t;
1389
1390 typedef struct
1391 {
1392 u32 i[8];
1393
1394 u32 pw_len;
1395
1396 } comb_t;
1397
1398 typedef struct
1399 {
1400 u32 b[32];
1401
1402 } bs_word_t;