2 * Author......: Jens Steube <jens.steube@gmail.com>
13 __device__
static u32
lut3_2d (const u32 a
, const u32 b
, const u32 c
)
17 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r
) : "r" (a
), "r" (b
), "r" (c
));
22 __device__
static u32
lut3_39 (const u32 a
, const u32 b
, const u32 c
)
26 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r
) : "r" (a
), "r" (b
), "r" (c
));
31 __device__
static u32
lut3_59 (const u32 a
, const u32 b
, const u32 c
)
35 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r
) : "r" (a
), "r" (b
), "r" (c
));
40 __device__
static u32
lut3_96 (const u32 a
, const u32 b
, const u32 c
)
44 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r
) : "r" (a
), "r" (b
), "r" (c
));
49 __device__
static u32
lut3_e4 (const u32 a
, const u32 b
, const u32 c
)
53 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r
) : "r" (a
), "r" (b
), "r" (c
));
58 __device__
static u32
lut3_e8 (const u32 a
, const u32 b
, const u32 c
)
62 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r
) : "r" (a
), "r" (b
), "r" (c
));
67 __device__
static u32
lut3_ca (const u32 a
, const u32 b
, const u32 c
)
71 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r
) : "r" (a
), "r" (b
), "r" (c
));
76 #if __CUDA_ARCH__ >= 350
78 __device__
static u32
rotr32 (const u32 a
, const u32 n
)
80 return __funnelshift_r (a
, a
, n
);
83 __device__
static u32
rotl32 (const u32 a
, const u32 n
)
85 return rotr32 (a
, 32 - n
);
88 __device__
static u64
rotr64 (const u64 a
, const u32 n
)
93 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il
), "=r"(ir
) : "l"(a
));
100 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl
) : "r"(ir
), "r"(il
), "r"(n
- 32));
101 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr
) : "r"(il
), "r"(ir
), "r"(n
- 32));
105 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl
) : "r"(il
), "r"(ir
), "r"(n
));
106 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr
) : "r"(ir
), "r"(il
), "r"(n
));
111 asm ("mov.b64 %0, {%1, %2};" : "=l"(r
) : "r"(tl
), "r"(tr
));
116 __device__
static u64
rotl64 (const u64 a
, const u32 n
)
118 return rotr64 (a
, 64 - n
);
123 __device__
static u32
rotr32 (const u32 a
, const u32 n
)
125 return (((a
) >> (n
)) + ((a
) << (32 - (n
))));
128 __device__
static u32
rotl32 (const u32 a
, const u32 n
)
130 return rotr32 (a
, 32 - n
);
133 __device__
static u64
rotr64 (const u64 a
, const u32 n
)
135 return (((a
) >> (n
)) + ((a
) << (64 - (n
))));
138 __device__
static u64
rotl64 (const u64 a
, const u32 n
)
140 return rotr64 (a
, 64 - n
);
155 __device__
static u32
l32_from_64 (u64 a
)
157 const u32 r
= (u32
) a
;
162 __device__
static u32
h32_from_64 (u64 a
)
166 const u32 r
= (u32
) a
;
171 __device__
static u64
hl32_to_64 (const u32x a
, const u32x b
)
175 asm ("mov.b64 %0, {%1, %2};" : "=l"(r
) : "r"(b
), "r"(a
));
194 inline __device__
u8x (const u8 a
, const u8 b
) : x(a
), y(b
) { }
195 inline __device__
u8x (const u8 a
) : x(a
), y(a
) { }
197 inline __device__
u8x (void) { }
198 inline __device__
~u8x (void) { }
209 inline __device__
u16x (const u16 a
, const u16 b
) : x(a
), y(b
) { }
210 inline __device__
u16x (const u16 a
) : x(a
), y(a
) { }
212 inline __device__
u16x (void) { }
213 inline __device__
~u16x (void) { }
224 inline __device__
u32x (const u32 a
, const u32 b
) : x(a
), y(b
) { }
225 inline __device__
u32x (const u32 a
) : x(a
), y(a
) { }
227 inline __device__
u32x (void) { }
228 inline __device__
~u32x (void) { }
239 inline __device__
u64x (const u32x a
) : x(a
.x
), y(a
.y
) { }
241 inline __device__
u64x (const u64 a
, const u64 b
) : x(a
), y(b
) { }
242 inline __device__
u64x (const u64 a
) : x(a
), y(a
) { }
244 inline __device__
u64x (void) { }
245 inline __device__
~u64x (void) { }
248 inline __device__
bool operator != (const u32x a
, const u32 b
) { return ((a
.x
!= b
) && (a
.y
!= b
)); }
249 inline __device__
bool operator != (const u32x a
, const u32x b
) { return ((a
.x
!= b
.x
) && (a
.y
!= b
.y
)); }
251 inline __device__
void operator ^= (u32x
&a
, const u32 b
) { a
.x
^= b
; a
.y
^= b
; }
252 inline __device__
void operator ^= (u32x
&a
, const u32x b
) { a
.x
^= b
.x
; a
.y
^= b
.y
; }
254 inline __device__
void operator |= (u32x
&a
, const u32 b
) { a
.x
|= b
; a
.y
|= b
; }
255 inline __device__
void operator |= (u32x
&a
, const u32x b
) { a
.x
|= b
.x
; a
.y
|= b
.y
; }
257 inline __device__
void operator &= (u32x
&a
, const u32 b
) { a
.x
&= b
; a
.y
&= b
; }
258 inline __device__
void operator &= (u32x
&a
, const u32x b
) { a
.x
&= b
.x
; a
.y
&= b
.y
; }
260 inline __device__
void operator += (u32x
&a
, const u32 b
) { a
.x
+= b
; a
.y
+= b
; }
261 inline __device__
void operator += (u32x
&a
, const u32x b
) { a
.x
+= b
.x
; a
.y
+= b
.y
; }
263 inline __device__
void operator -= (u32x
&a
, const u32 b
) { a
.x
-= b
; a
.y
-= b
; }
264 inline __device__
void operator -= (u32x
&a
, const u32x b
) { a
.x
-= b
.x
; a
.y
-= b
.y
; }
266 inline __device__ u32x
operator << (const u32x a
, const u32 b
) { return u32x ((a
.x
<< b
), (a
.y
<< b
)); }
267 inline __device__ u32x
operator << (const u32x a
, const u32x b
) { return u32x ((a
.x
<< b
.x
), (a
.y
<< b
.y
)); }
269 inline __device__ u32x
operator >> (const u32x a
, const u32 b
) { return u32x ((a
.x
>> b
), (a
.y
>> b
)); }
270 inline __device__ u32x
operator >> (const u32x a
, const u32x b
) { return u32x ((a
.x
>> b
.x
), (a
.y
>> b
.y
)); }
272 inline __device__ u32x
operator ^ (const u32x a
, const u32 b
) { return u32x ((a
.x
^ b
), (a
.y
^ b
)); }
273 inline __device__ u32x
operator ^ (const u32x a
, const u32x b
) { return u32x ((a
.x
^ b
.x
), (a
.y
^ b
.y
)); }
275 inline __device__ u32x
operator | (const u32x a
, const u32 b
) { return u32x ((a
.x
| b
), (a
.y
| b
)); }
276 inline __device__ u32x
operator | (const u32x a
, const u32x b
) { return u32x ((a
.x
| b
.x
), (a
.y
| b
.y
)); }
278 inline __device__ u32x
operator & (const u32x a
, const u32 b
) { return u32x ((a
.x
& b
), (a
.y
& b
)); }
279 inline __device__ u32x
operator & (const u32x a
, const u32x b
) { return u32x ((a
.x
& b
.x
), (a
.y
& b
.y
)); }
281 inline __device__ u32x
operator + (const u32x a
, const u32 b
) { return u32x ((a
.x
+ b
), (a
.y
+ b
)); }
282 inline __device__ u32x
operator + (const u32x a
, const u32x b
) { return u32x ((a
.x
+ b
.x
), (a
.y
+ b
.y
)); }
284 inline __device__ u32x
operator - (const u32x a
, const u32 b
) { return u32x ((a
.x
- b
), (a
.y
- b
)); }
285 inline __device__ u32x
operator - (const u32x a
, const u32x b
) { return u32x ((a
.x
- b
.x
), (a
.y
- b
.y
)); }
287 inline __device__ u32x
operator * (const u32x a
, const u32 b
) { return u32x ((a
.x
* b
), (a
.y
* b
)); }
288 inline __device__ u32x
operator * (const u32x a
, const u32x b
) { return u32x ((a
.x
* b
.x
), (a
.y
* b
.y
)); }
290 inline __device__ u32x
operator ~ (const u32x a
) { return u32x (~a
.x
, ~a
.y
); }
292 inline __device__
bool operator != (const u64x a
, const u64 b
) { return ((a
.x
!= b
) && (a
.y
!= b
)); }
293 inline __device__
bool operator != (const u64x a
, const u64x b
) { return ((a
.x
!= b
.x
) && (a
.y
!= b
.y
)); }
295 inline __device__
void operator ^= (u64x
&a
, const u64 b
) { a
.x
^= b
; a
.y
^= b
; }
296 inline __device__
void operator ^= (u64x
&a
, const u64x b
) { a
.x
^= b
.x
; a
.y
^= b
.y
; }
298 inline __device__
void operator |= (u64x
&a
, const u64 b
) { a
.x
|= b
; a
.y
|= b
; }
299 inline __device__
void operator |= (u64x
&a
, const u64x b
) { a
.x
|= b
.x
; a
.y
|= b
.y
; }
301 inline __device__
void operator &= (u64x
&a
, const u64 b
) { a
.x
&= b
; a
.y
&= b
; }
302 inline __device__
void operator &= (u64x
&a
, const u64x b
) { a
.x
&= b
.x
; a
.y
&= b
.y
; }
304 inline __device__
void operator += (u64x
&a
, const u64 b
) { a
.x
+= b
; a
.y
+= b
; }
305 inline __device__
void operator += (u64x
&a
, const u64x b
) { a
.x
+= b
.x
; a
.y
+= b
.y
; }
307 inline __device__
void operator -= (u64x
&a
, const u64 b
) { a
.x
-= b
; a
.y
-= b
; }
308 inline __device__
void operator -= (u64x
&a
, const u64x b
) { a
.x
-= b
.x
; a
.y
-= b
.y
; }
310 inline __device__ u64x
operator << (const u64x a
, const u64 b
) { return u64x ((a
.x
<< b
), (a
.y
<< b
)); }
311 inline __device__ u64x
operator << (const u64x a
, const u64x b
) { return u64x ((a
.x
<< b
.x
), (a
.y
<< b
.y
)); }
313 inline __device__ u64x
operator >> (const u64x a
, const u64 b
) { return u64x ((a
.x
>> b
), (a
.y
>> b
)); }
314 inline __device__ u64x
operator >> (const u64x a
, const u64x b
) { return u64x ((a
.x
>> b
.x
), (a
.y
>> b
.y
)); }
316 inline __device__ u64x
operator ^ (const u64x a
, const u64 b
) { return u64x ((a
.x
^ b
), (a
.y
^ b
)); }
317 inline __device__ u64x
operator ^ (const u64x a
, const u64x b
) { return u64x ((a
.x
^ b
.x
), (a
.y
^ b
.y
)); }
319 inline __device__ u64x
operator | (const u64x a
, const u64 b
) { return u64x ((a
.x
| b
), (a
.y
| b
)); }
320 inline __device__ u64x
operator | (const u64x a
, const u64x b
) { return u64x ((a
.x
| b
.x
), (a
.y
| b
.y
)); }
322 inline __device__ u64x
operator & (const u64x a
, const u64 b
) { return u64x ((a
.x
& b
), (a
.y
& b
)); }
323 inline __device__ u64x
operator & (const u64x a
, const u64x b
) { return u64x ((a
.x
& b
.x
), (a
.y
& b
.y
)); }
325 inline __device__ u64x
operator + (const u64x a
, const u64 b
) { return u64x ((a
.x
+ b
), (a
.y
+ b
)); }
326 inline __device__ u64x
operator + (const u64x a
, const u64x b
) { return u64x ((a
.x
+ b
.x
), (a
.y
+ b
.y
)); }
328 inline __device__ u64x
operator - (const u64x a
, const u64 b
) { return u64x ((a
.x
- b
), (a
.y
- b
)); }
329 inline __device__ u64x
operator - (const u64x a
, const u64x b
) { return u64x ((a
.x
- b
.x
), (a
.y
- b
.y
)); }
331 inline __device__ u64x
operator ~ (const u64x a
) { return u64x (~a
.x
, ~a
.y
); }
333 __device__
static u32x
lut3_2d (const u32x a
, const u32x b
, const u32x c
)
335 return u32x (lut3_2d (a
.x
, b
.x
, c
.x
),
336 lut3_2d (a
.y
, b
.y
, c
.y
));
339 __device__
static u32x
lut3_39 (const u32x a
, const u32x b
, const u32x c
)
341 return u32x (lut3_39 (a
.x
, b
.x
, c
.x
),
342 lut3_39 (a
.y
, b
.y
, c
.y
));
345 __device__
static u32x
lut3_59 (const u32x a
, const u32x b
, const u32x c
)
347 return u32x (lut3_59 (a
.x
, b
.x
, c
.x
),
348 lut3_59 (a
.y
, b
.y
, c
.y
));
351 __device__
static u32x
lut3_96 (const u32x a
, const u32x b
, const u32x c
)
353 return u32x (lut3_96 (a
.x
, b
.x
, c
.x
),
354 lut3_96 (a
.y
, b
.y
, c
.y
));
357 __device__
static u32x
lut3_e4 (const u32x a
, const u32x b
, const u32x c
)
359 return u32x (lut3_e4 (a
.x
, b
.x
, c
.x
),
360 lut3_e4 (a
.y
, b
.y
, c
.y
));
363 __device__
static u32x
lut3_e8 (const u32x a
, const u32x b
, const u32x c
)
365 return u32x (lut3_e8 (a
.x
, b
.x
, c
.x
),
366 lut3_e8 (a
.y
, b
.y
, c
.y
));
369 __device__
static u32x
lut3_ca (const u32x a
, const u32x b
, const u32x c
)
371 return u32x (lut3_ca (a
.x
, b
.x
, c
.x
),
372 lut3_ca (a
.y
, b
.y
, c
.y
));
375 __device__
static u32x
rotl32(const u32x a
, const u32 n
)
377 return u32x (rotl32 (a
.x
, n
),
381 __device__
static u32x
rotr32(const u32x a
, const u32 n
)
383 return u32x (rotr32 (a
.x
, n
),
387 __device__
static u64x
rotl64(const u64x a
, const u32 n
)
389 return u64x (rotl64 (a
.x
, n
),
393 __device__
static u64x
rotr64(const u64x a
, const u32 n
)
395 return u64x (rotr64 (a
.x
, n
),
399 __device__
static u32x
__byte_perm (const u32x a
, const u32x b
, const u32 c
)
401 return u32x (__byte_perm (a
.x
, b
.x
, c
),
402 __byte_perm (a
.y
, b
.y
, c
));
421 inline __device__
u8x (const u8 a
, const u8 b
, const u8 c
, const u8 d
) : x(a
), y(b
), z(c
), w(d
) { }
422 inline __device__
u8x (const u8 a
) : x(a
), y(a
), z(a
), w(a
) { }
424 inline __device__
u8x (void) { }
425 inline __device__
~u8x (void) { }
438 inline __device__
u16x (const u16 a
, const u16 b
, const u16 c
, const u16 d
) : x(a
), y(b
), z(c
), w(d
) { }
439 inline __device__
u16x (const u16 a
) : x(a
), y(a
), z(a
), w(a
) { }
441 inline __device__
u16x (void) { }
442 inline __device__
~u16x (void) { }
455 inline __device__
u32x (const u32 a
, const u32 b
, const u32 c
, const u32 d
) : x(a
), y(b
), z(c
), w(d
) { }
456 inline __device__
u32x (const u32 a
) : x(a
), y(a
), z(a
), w(a
) { }
458 inline __device__
u32x (void) { }
459 inline __device__
~u32x (void) { }
472 inline __device__
u64x (const u32x a
) : x(a
.x
), y(a
.y
), z(a
.z
), w(a
.w
) { }
474 inline __device__
u64x (const u64 a
, const u64 b
, const u64 c
, const u64 d
) : x(a
), y(b
), z(c
), w(d
) { }
475 inline __device__
u64x (const u64 a
) : x(a
), y(a
), z(a
), w(a
) { }
477 inline __device__
u64x (void) { }
478 inline __device__
~u64x (void) { }
481 inline __device__
bool operator != (const u32x a
, const u32 b
) { return ((a
.x
!= b
) && (a
.y
!= b
) && (a
.z
!= b
) && (a
.w
!= b
)); }
482 inline __device__
bool operator != (const u32x a
, const u32x b
) { return ((a
.x
!= b
.x
) && (a
.y
!= b
.y
) && (a
.z
!= b
.z
) && (a
.w
!= b
.w
)); }
484 inline __device__
void operator ^= (u32x
&a
, const u32 b
) { a
.x
^= b
; a
.y
^= b
; a
.z
^= b
; a
.w
^= b
; }
485 inline __device__
void operator ^= (u32x
&a
, const u32x b
) { a
.x
^= b
.x
; a
.y
^= b
.y
; a
.z
^= b
.z
; a
.w
^= b
.w
; }
487 inline __device__
void operator |= (u32x
&a
, const u32 b
) { a
.x
|= b
; a
.y
|= b
; a
.z
|= b
; a
.w
|= b
; }
488 inline __device__
void operator |= (u32x
&a
, const u32x b
) { a
.x
|= b
.x
; a
.y
|= b
.y
; a
.z
|= b
.z
; a
.w
|= b
.w
; }
490 inline __device__
void operator &= (u32x
&a
, const u32 b
) { a
.x
&= b
; a
.y
&= b
; a
.z
&= b
; a
.w
&= b
; }
491 inline __device__
void operator &= (u32x
&a
, const u32x b
) { a
.x
&= b
.x
; a
.y
&= b
.y
; a
.z
&= b
.z
; a
.w
&= b
.w
; }
493 inline __device__
void operator += (u32x
&a
, const u32 b
) { a
.x
+= b
; a
.y
+= b
; a
.z
+= b
; a
.w
+= b
; }
494 inline __device__
void operator += (u32x
&a
, const u32x b
) { a
.x
+= b
.x
; a
.y
+= b
.y
; a
.z
+= b
.z
; a
.w
+= b
.w
; }
496 inline __device__
void operator -= (u32x
&a
, const u32 b
) { a
.x
-= b
; a
.y
-= b
; a
.z
-= b
; a
.w
-= b
; }
497 inline __device__
void operator -= (u32x
&a
, const u32x b
) { a
.x
-= b
.x
; a
.y
-= b
.y
; a
.z
-= b
.z
; a
.w
-= b
.w
; }
499 inline __device__ u32x
operator << (const u32x a
, const u32 b
) { return u32x ((a
.x
<< b
), (a
.y
<< b
), (a
.z
<< b
), (a
.w
<< b
)); }
500 inline __device__ u32x
operator << (const u32x a
, const u32x b
) { return u32x ((a
.x
<< b
.x
), (a
.y
<< b
.y
), (a
.z
<< b
.z
), (a
.w
<< b
.w
)); }
502 inline __device__ u32x
operator >> (const u32x a
, const u32 b
) { return u32x ((a
.x
>> b
), (a
.y
>> b
), (a
.z
>> b
), (a
.w
>> b
)); }
503 inline __device__ u32x
operator >> (const u32x a
, const u32x b
) { return u32x ((a
.x
>> b
.x
), (a
.y
>> b
.y
), (a
.z
>> b
.z
), (a
.w
>> b
.w
)); }
505 inline __device__ u32x
operator ^ (const u32x a
, const u32 b
) { return u32x ((a
.x
^ b
), (a
.y
^ b
), (a
.z
^ b
), (a
.w
^ b
)); }
506 inline __device__ u32x
operator ^ (const u32x a
, const u32x b
) { return u32x ((a
.x
^ b
.x
), (a
.y
^ b
.y
), (a
.z
^ b
.z
), (a
.w
^ b
.w
)); }
508 inline __device__ u32x
operator | (const u32x a
, const u32 b
) { return u32x ((a
.x
| b
), (a
.y
| b
), (a
.z
| b
), (a
.w
| b
)); }
509 inline __device__ u32x
operator | (const u32x a
, const u32x b
) { return u32x ((a
.x
| b
.x
), (a
.y
| b
.y
), (a
.z
| b
.z
), (a
.w
| b
.w
)); }
511 inline __device__ u32x
operator & (const u32x a
, const u32 b
) { return u32x ((a
.x
& b
), (a
.y
& b
), (a
.z
& b
), (a
.w
& b
)); }
512 inline __device__ u32x
operator & (const u32x a
, const u32x b
) { return u32x ((a
.x
& b
.x
), (a
.y
& b
.y
), (a
.z
& b
.z
), (a
.w
& b
.w
)); }
514 inline __device__ u32x
operator + (const u32x a
, const u32 b
) { return u32x ((a
.x
+ b
), (a
.y
+ b
), (a
.z
+ b
), (a
.w
+ b
)); }
515 inline __device__ u32x
operator + (const u32x a
, const u32x b
) { return u32x ((a
.x
+ b
.x
), (a
.y
+ b
.y
), (a
.z
+ b
.z
), (a
.w
+ b
.w
)); }
517 inline __device__ u32x
operator - (const u32x a
, const u32 b
) { return u32x ((a
.x
- b
), (a
.y
- b
), (a
.z
- b
), (a
.w
- b
)); }
518 inline __device__ u32x
operator - (const u32x a
, const u32x b
) { return u32x ((a
.x
- b
.x
), (a
.y
- b
.y
), (a
.z
- b
.z
), (a
.w
- b
.w
)); }
520 inline __device__ u32x
operator * (const u32x a
, const u32 b
) { return u32x ((a
.x
* b
), (a
.y
* b
), (a
.z
* b
), (a
.w
* b
)); }
521 inline __device__ u32x
operator * (const u32x a
, const u32x b
) { return u32x ((a
.x
* b
.x
), (a
.y
* b
.y
), (a
.z
* b
.z
), (a
.w
* b
.w
)); }
523 inline __device__ u32x
operator ~ (const u32x a
) { return u32x (~a
.x
, ~a
.y
, ~a
.z
, ~a
.w
); }
525 inline __device__
bool operator != (const u64x a
, const u64 b
) { return ((a
.x
!= b
) && (a
.y
!= b
) && (a
.z
!= b
) && (a
.w
!= b
)); }
526 inline __device__
bool operator != (const u64x a
, const u64x b
) { return ((a
.x
!= b
.x
) && (a
.y
!= b
.y
) && (a
.z
!= b
.z
) && (a
.w
!= b
.w
)); }
528 inline __device__
void operator ^= (u64x
&a
, const u64 b
) { a
.x
^= b
; a
.y
^= b
; a
.z
^= b
; a
.w
^= b
; }
529 inline __device__
void operator ^= (u64x
&a
, const u64x b
) { a
.x
^= b
.x
; a
.y
^= b
.y
; a
.z
^= b
.z
; a
.w
^= b
.w
; }
531 inline __device__
void operator |= (u64x
&a
, const u64 b
) { a
.x
|= b
; a
.y
|= b
; a
.z
|= b
; a
.w
|= b
; }
532 inline __device__
void operator |= (u64x
&a
, const u64x b
) { a
.x
|= b
.x
; a
.y
|= b
.y
; a
.z
|= b
.z
; a
.w
|= b
.w
; }
534 inline __device__
void operator &= (u64x
&a
, const u64 b
) { a
.x
&= b
; a
.y
&= b
; a
.z
&= b
; a
.w
&= b
; }
535 inline __device__
void operator &= (u64x
&a
, const u64x b
) { a
.x
&= b
.x
; a
.y
&= b
.y
; a
.z
&= b
.z
; a
.w
&= b
.w
; }
537 inline __device__
void operator += (u64x
&a
, const u64 b
) { a
.x
+= b
; a
.y
+= b
; a
.z
+= b
; a
.w
+= b
; }
538 inline __device__
void operator += (u64x
&a
, const u64x b
) { a
.x
+= b
.x
; a
.y
+= b
.y
; a
.z
+= b
.z
; a
.w
+= b
.w
; }
540 inline __device__
void operator -= (u64x
&a
, const u64 b
) { a
.x
-= b
; a
.y
-= b
; a
.z
-= b
; a
.w
-= b
; }
541 inline __device__
void operator -= (u64x
&a
, const u64x b
) { a
.x
-= b
.x
; a
.y
-= b
.y
; a
.z
-= b
.z
; a
.w
-= b
.w
; }
543 inline __device__ u64x
operator << (const u64x a
, const u64 b
) { return u64x ((a
.x
<< b
), (a
.y
<< b
), (a
.z
<< b
), (a
.w
<< b
)); }
544 inline __device__ u64x
operator << (const u64x a
, const u64x b
) { return u64x ((a
.x
<< b
.x
), (a
.y
<< b
.y
), (a
.z
<< b
.z
), (a
.w
<< b
.w
)); }
546 inline __device__ u64x
operator >> (const u64x a
, const u64 b
) { return u64x ((a
.x
>> b
), (a
.y
>> b
), (a
.z
>> b
), (a
.w
>> b
)); }
547 inline __device__ u64x
operator >> (const u64x a
, const u64x b
) { return u64x ((a
.x
>> b
.x
), (a
.y
>> b
.y
), (a
.z
>> b
.z
), (a
.w
>> b
.w
)); }
549 inline __device__ u64x
operator ^ (const u64x a
, const u64 b
) { return u64x ((a
.x
^ b
), (a
.y
^ b
), (a
.z
^ b
), (a
.w
^ b
)); }
550 inline __device__ u64x
operator ^ (const u64x a
, const u64x b
) { return u64x ((a
.x
^ b
.x
), (a
.y
^ b
.y
), (a
.z
^ b
.z
), (a
.w
^ b
.w
)); }
552 inline __device__ u64x
operator | (const u64x a
, const u64 b
) { return u64x ((a
.x
| b
), (a
.y
| b
), (a
.z
| b
), (a
.w
| b
)); }
553 inline __device__ u64x
operator | (const u64x a
, const u64x b
) { return u64x ((a
.x
| b
.x
), (a
.y
| b
.y
), (a
.z
| b
.z
), (a
.w
| b
.w
)); }
555 inline __device__ u64x
operator & (const u64x a
, const u64 b
) { return u64x ((a
.x
& b
), (a
.y
& b
), (a
.z
& b
), (a
.w
& b
)); }
556 inline __device__ u64x
operator & (const u64x a
, const u64x b
) { return u64x ((a
.x
& b
.x
), (a
.y
& b
.y
), (a
.z
& b
.z
), (a
.w
& b
.w
)); }
558 inline __device__ u64x
operator + (const u64x a
, const u64 b
) { return u64x ((a
.x
+ b
), (a
.y
+ b
), (a
.z
+ b
), (a
.w
+ b
)); }
559 inline __device__ u64x
operator + (const u64x a
, const u64x b
) { return u64x ((a
.x
+ b
.x
), (a
.y
+ b
.y
), (a
.z
+ b
.z
), (a
.w
+ b
.w
)); }
561 inline __device__ u64x
operator - (const u64x a
, const u64 b
) { return u64x ((a
.x
- b
), (a
.y
- b
), (a
.z
- b
), (a
.w
- b
)); }
562 inline __device__ u64x
operator - (const u64x a
, const u64x b
) { return u64x ((a
.x
- b
.x
), (a
.y
- b
.y
), (a
.z
- b
.z
), (a
.w
- b
.w
)); }
564 inline __device__ u64x
operator * (const u64x a
, const u64 b
) { return u64x ((a
.x
* b
), (a
.y
* b
), (a
.z
* b
), (a
.w
* b
)); }
565 inline __device__ u64x
operator * (const u64x a
, const u64x b
) { return u64x ((a
.x
* b
.x
), (a
.y
* b
.y
), (a
.z
* b
.z
), (a
.w
* b
.w
)); }
567 inline __device__ u64x
operator ~ (const u64x a
) { return u64x (~a
.x
, ~a
.y
, ~a
.z
, ~a
.w
); }
569 __device__
static u32x
lut3_2d (const u32x a
, const u32x b
, const u32x c
)
571 return u32x (lut3_2d(a
.x
, b
.x
, c
.x
),
572 lut3_2d (a
.y
, b
.y
, c
.y
),
573 lut3_2d (a
.z
, b
.z
, c
.z
),
574 lut3_2d (a
.w
, b
.w
, c
.w
));
577 __device__
static u32x
lut3_39 (const u32x a
, const u32x b
, const u32x c
)
579 return u32x (lut3_39 (a
.x
, b
.x
, c
.x
),
580 lut3_39 (a
.y
, b
.y
, c
.y
),
581 lut3_39 (a
.z
, b
.z
, c
.z
),
582 lut3_39 (a
.w
, b
.w
, c
.w
));
585 __device__
static u32x
lut3_59 (const u32x a
, const u32x b
, const u32x c
)
587 return u32x (lut3_59 (a
.x
, b
.x
, c
.x
),
588 lut3_59 (a
.y
, b
.y
, c
.y
),
589 lut3_59 (a
.z
, b
.z
, c
.z
),
590 lut3_59 (a
.w
, b
.w
, c
.w
));
593 __device__
static u32x
lut3_96 (const u32x a
, const u32x b
, const u32x c
)
595 return u32x (lut3_96 (a
.x
, b
.x
, c
.x
),
596 lut3_96 (a
.y
, b
.y
, c
.y
),
597 lut3_96 (a
.z
, b
.z
, c
.z
),
598 lut3_96 (a
.w
, b
.w
, c
.w
));
601 __device__
static u32x
lut3_e4 (const u32x a
, const u32x b
, const u32x c
)
603 return u32x (lut3_e4 (a
.x
, b
.x
, c
.x
),
604 lut3_e4 (a
.y
, b
.y
, c
.y
),
605 lut3_e4 (a
.z
, b
.z
, c
.z
),
606 lut3_e4 (a
.w
, b
.w
, c
.w
));
609 __device__
static u32x
lut3_e8 (const u32x a
, const u32x b
, const u32x c
)
611 return u32x (lut3_e8 (a
.x
, b
.x
, c
.x
),
612 lut3_e8 (a
.y
, b
.y
, c
.y
),
613 lut3_e8 (a
.z
, b
.z
, c
.z
),
614 lut3_e8 (a
.w
, b
.w
, c
.w
));
617 __device__
static u32x
lut3_ca (const u32x a
, const u32x b
, const u32x c
)
619 return u32x (lut3_ca (a
.x
, b
.x
, c
.x
),
620 lut3_ca (a
.y
, b
.y
, c
.y
),
621 lut3_ca (a
.z
, b
.z
, c
.z
),
622 lut3_ca (a
.w
, b
.w
, c
.w
));
625 __device__
static u32x
rotl32(const u32x a
, const u32 n
)
627 return u32x (rotl32 (a
.x
, n
),
633 __device__
static u32x
rotr32(const u32x a
, const u32 n
)
635 return u32x (rotr32 (a
.x
, n
),
641 __device__
static u64x
rotl64(const u64x a
, const u32 n
)
643 return u64x (rotl64 (a
.x
, n
),
649 __device__
static u64x
rotr64(const u64x a
, const u32 n
)
651 return u64x (rotr64 (a
.x
, n
),
657 __device__
static u32x
__byte_perm (const u32x a
, const u32x b
, const u32 c
)
659 return u32x (__byte_perm (a
.x
, b
.x
, c
),
660 __byte_perm (a
.y
, b
.y
, c
),
661 __byte_perm (a
.z
, b
.z
, c
),
662 __byte_perm (a
.w
, b
.w
, c
));
679 #elif defined _BCRYPT_
681 #elif defined _SHA256_
683 #elif defined _SHA384_
685 #elif defined _SHA512_
687 #elif defined _KECCAK_
689 #elif defined _RIPEMD160_
691 #elif defined _WHIRLPOOL_
695 #elif defined _GOST2012_256_
697 #elif defined _GOST2012_512_
703 #elif defined _MYSQL323_
705 #elif defined _LOTUS5_
707 #elif defined _LOTUS6_
709 #elif defined _SCRYPT_
711 #elif defined _LOTUS8_
713 #elif defined _OFFICE2007_
715 #elif defined _OFFICE2010_
717 #elif defined _OFFICE2013_
719 #elif defined _OLDOFFICE01_
721 #elif defined _OLDOFFICE34_
723 #elif defined _SIPHASH_
725 #elif defined _PBKDF2_MD5_
727 #elif defined _PBKDF2_SHA1_
729 #elif defined _PBKDF2_SHA256_
731 #elif defined _PBKDF2_SHA512_
733 #elif defined _PDF17L8_
735 #elif defined _CRC32_
737 #elif defined _SEVEN_ZIP_
739 #elif defined _ANDROIDFDE_
745 #elif defined _MD5_SHA1_
747 #elif defined _SHA1_MD5_
749 #elif defined _NETNTLMV2_
751 #elif defined _KRB5PA_
753 #elif defined _CLOUDKEY_
755 #elif defined _SCRYPT_
757 #elif defined _PSAFE2_
759 #elif defined _LOTUS8_
763 #elif defined _SHA256_SHA1_
765 #elif defined _MS_DRSR_
828 u32 cry_master_buf
[64];
830 u32 public_key_buf
[64];
871 u32 userdomain_buf
[64];
934 u32 encryptedVerifier
[4];
935 u32 encryptedVerifierHash
[5];
943 u32 encryptedVerifier
[4];
944 u32 encryptedVerifierHash
[8];
950 u32 encryptedVerifier
[4];
951 u32 encryptedVerifierHash
[8];
958 u32 encryptedVerifier
[4];
959 u32 encryptedVerifierHash
[4];
967 u32 encryptedVerifier
[4];
968 u32 encryptedVerifierHash
[5];
1012 } sha256crypt_tmp_t
;
1016 u64x l_alt_result
[8];
1021 } sha512crypt_tmp_t
;
1037 } bitcoin_wallet_tmp_t
;
1133 } pbkdf2_sha1_tmp_t
;
1143 } pbkdf2_sha256_tmp_t
;
1153 } pbkdf2_sha512_tmp_t
;
1378 u32 alignment_placeholder_1
;
1379 u32 alignment_placeholder_2
;
1380 u32 alignment_placeholder_3
;