2 * Author......: Jens Steube <jens.steube@gmail.com>
12 static inline u32
swap32 (const u32 v
)
14 return (as_uint (as_uchar4 (v
).s3210
));
17 static inline u64
swap64 (const u64 v
)
19 return (as_ulong (as_uchar8 (v
).s76543210
));
24 static inline u32
swap32 (const u32 v
)
28 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(r
) : "r"(v
));
33 static inline u64
swap64 (const u64 v
)
38 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il
), "=r"(ir
) : "l"(v
));
43 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tl
) : "r"(il
));
44 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tr
) : "r"(ir
));
48 asm ("mov.b64 %0, {%1, %2};" : "=l"(r
) : "r"(tr
), "r"(tl
));
55 static inline u32
swap32 (const u32 v
)
57 return (as_uint (as_uchar4 (v
).s3210
));
60 static inline u64
swap64 (const u64 v
)
62 return (as_ulong (as_uchar8 (v
).s76543210
));
67 static inline u32
__bfe (const u32 a
, const u32 b
, const u32 c
)
69 return amd_bfe (a
, b
, c
);
74 static inline u32
__byte_perm (const u32 a
, const u32 b
, const u32 c
)
78 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r
) : "r"(a
), "r"(b
), "r"(c
));
83 static inline u32
__bfe (const u32 a
, const u32 b
, const u32 c
)
87 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r
) : "r"(a
), "r"(b
), "r"(c
));
94 static inline u32
amd_bytealign (const u32 a
, const u32 b
, const u32 c
)
98 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r
) : "r"(b
), "r"(a
), "r"((c
& 3) * 8));
105 static inline u32
amd_bytealign (const u32 a
, const u32 b
, const u32 c
)
107 return __byte_perm (b
, a
, (0x76543210 >> ((c
& 3) * 4)) & 0xffff);
112 static inline u32
lut3_2d (const u32 a
, const u32 b
, const u32 c
)
116 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r
) : "r" (a
), "r" (b
), "r" (c
));
121 static inline u32
lut3_39 (const u32 a
, const u32 b
, const u32 c
)
125 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r
) : "r" (a
), "r" (b
), "r" (c
));
130 static inline u32
lut3_59 (const u32 a
, const u32 b
, const u32 c
)
134 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r
) : "r" (a
), "r" (b
), "r" (c
));
139 static inline u32
lut3_96 (const u32 a
, const u32 b
, const u32 c
)
143 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r
) : "r" (a
), "r" (b
), "r" (c
));
148 static inline u32
lut3_e4 (const u32 a
, const u32 b
, const u32 c
)
152 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r
) : "r" (a
), "r" (b
), "r" (c
));
157 static inline u32
lut3_e8 (const u32 a
, const u32 b
, const u32 c
)
161 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r
) : "r" (a
), "r" (b
), "r" (c
));
166 static inline u32
lut3_ca (const u32 a
, const u32 b
, const u32 c
)
170 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r
) : "r" (a
), "r" (b
), "r" (c
));
180 static u32 allx (const u32 r)
186 static inline u32
l32_from_64 (u64 a
)
188 const u32 r
= (uint
) (a
);
193 static inline u32
h32_from_64 (u64 a
)
197 const u32 r
= (uint
) (a
);
202 static inline u64
hl32_to_64 (const u32 a
, const u32 b
)
204 return as_ulong ((uint2
) (b
, a
));
209 static inline u32
rotr32 (const u32 a
, const u32 n
)
211 return rotate (a
, 32 - n
);
214 static inline u32
rotl32 (const u32 a
, const u32 n
)
216 return rotate (a
, n
);
219 static inline u64
rotr64 (const u64 a
, const u32 n
)
221 uint2 a2
= as_uint2 (a
);
225 t
.s0
= (n
>= 32) ? amd_bitalign (a2
.s0
, a2
.s1
, n
- 32)
226 : amd_bitalign (a2
.s1
, a2
.s0
, n
);
227 t
.s1
= (n
>= 32) ? amd_bitalign (a2
.s1
, a2
.s0
, n
- 32)
228 : amd_bitalign (a2
.s0
, a2
.s1
, n
);
233 static inline u64
rotl64 (const u64 a
, const u32 n
)
235 return rotr64 (a
, 64 - n
);
245 this version reduced the number of registers but for some unknown reason the whole kernel become slower.. instruction cache monster?
246 static inline u32 rotr32 (const u32 a, const u32 n)
252 case 0: asm ("shf.r.wrap.b32 %0, %1, %1, 0;" : "=r"(r) : "r"(a)); break;
253 case 1: asm ("shf.r.wrap.b32 %0, %1, %1, 1;" : "=r"(r) : "r"(a)); break;
254 case 2: asm ("shf.r.wrap.b32 %0, %1, %1, 2;" : "=r"(r) : "r"(a)); break;
255 case 3: asm ("shf.r.wrap.b32 %0, %1, %1, 3;" : "=r"(r) : "r"(a)); break;
256 case 4: asm ("shf.r.wrap.b32 %0, %1, %1, 4;" : "=r"(r) : "r"(a)); break;
257 case 5: asm ("shf.r.wrap.b32 %0, %1, %1, 5;" : "=r"(r) : "r"(a)); break;
258 case 6: asm ("shf.r.wrap.b32 %0, %1, %1, 6;" : "=r"(r) : "r"(a)); break;
259 case 7: asm ("shf.r.wrap.b32 %0, %1, %1, 7;" : "=r"(r) : "r"(a)); break;
260 case 8: asm ("shf.r.wrap.b32 %0, %1, %1, 8;" : "=r"(r) : "r"(a)); break;
261 case 9: asm ("shf.r.wrap.b32 %0, %1, %1, 9;" : "=r"(r) : "r"(a)); break;
262 case 10: asm ("shf.r.wrap.b32 %0, %1, %1, 10;" : "=r"(r) : "r"(a)); break;
263 case 11: asm ("shf.r.wrap.b32 %0, %1, %1, 11;" : "=r"(r) : "r"(a)); break;
264 case 12: asm ("shf.r.wrap.b32 %0, %1, %1, 12;" : "=r"(r) : "r"(a)); break;
265 case 13: asm ("shf.r.wrap.b32 %0, %1, %1, 13;" : "=r"(r) : "r"(a)); break;
266 case 14: asm ("shf.r.wrap.b32 %0, %1, %1, 14;" : "=r"(r) : "r"(a)); break;
267 case 15: asm ("shf.r.wrap.b32 %0, %1, %1, 15;" : "=r"(r) : "r"(a)); break;
268 case 16: asm ("shf.r.wrap.b32 %0, %1, %1, 16;" : "=r"(r) : "r"(a)); break;
269 case 17: asm ("shf.r.wrap.b32 %0, %1, %1, 17;" : "=r"(r) : "r"(a)); break;
270 case 18: asm ("shf.r.wrap.b32 %0, %1, %1, 18;" : "=r"(r) : "r"(a)); break;
271 case 19: asm ("shf.r.wrap.b32 %0, %1, %1, 19;" : "=r"(r) : "r"(a)); break;
272 case 20: asm ("shf.r.wrap.b32 %0, %1, %1, 20;" : "=r"(r) : "r"(a)); break;
273 case 21: asm ("shf.r.wrap.b32 %0, %1, %1, 21;" : "=r"(r) : "r"(a)); break;
274 case 22: asm ("shf.r.wrap.b32 %0, %1, %1, 22;" : "=r"(r) : "r"(a)); break;
275 case 23: asm ("shf.r.wrap.b32 %0, %1, %1, 23;" : "=r"(r) : "r"(a)); break;
276 case 24: asm ("shf.r.wrap.b32 %0, %1, %1, 24;" : "=r"(r) : "r"(a)); break;
277 case 25: asm ("shf.r.wrap.b32 %0, %1, %1, 25;" : "=r"(r) : "r"(a)); break;
278 case 26: asm ("shf.r.wrap.b32 %0, %1, %1, 26;" : "=r"(r) : "r"(a)); break;
279 case 27: asm ("shf.r.wrap.b32 %0, %1, %1, 27;" : "=r"(r) : "r"(a)); break;
280 case 28: asm ("shf.r.wrap.b32 %0, %1, %1, 28;" : "=r"(r) : "r"(a)); break;
281 case 29: asm ("shf.r.wrap.b32 %0, %1, %1, 29;" : "=r"(r) : "r"(a)); break;
282 case 30: asm ("shf.r.wrap.b32 %0, %1, %1, 30;" : "=r"(r) : "r"(a)); break;
283 case 31: asm ("shf.r.wrap.b32 %0, %1, %1, 31;" : "=r"(r) : "r"(a)); break;
290 static inline u32
rotr32 (const u32 a
, const u32 n
)
294 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r
) : "r"(a
), "r"(a
), "r"(n
));
299 static inline u32
rotl32 (const u32 a
, const u32 n
)
301 return rotr32 (a
, 32 - n
);
304 static inline u64
rotr64 (const u64 a
, const u32 n
)
309 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il
), "=r"(ir
) : "l"(a
));
316 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl
) : "r"(ir
), "r"(il
), "r"(n
- 32));
317 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr
) : "r"(il
), "r"(ir
), "r"(n
- 32));
321 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl
) : "r"(il
), "r"(ir
), "r"(n
));
322 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr
) : "r"(ir
), "r"(il
), "r"(n
));
327 asm ("mov.b64 %0, {%1, %2};" : "=l"(r
) : "r"(tl
), "r"(tr
));
332 static inline u64
rotl64 (const u64 a
, const u32 n
)
334 return rotr64 (a
, 64 - n
);
339 static inline u32
rotr32 (const u32 a
, const u32 n
)
341 return rotate (a
, 32 - n
);
344 static inline u32
rotl32 (const u32 a
, const u32 n
)
346 return rotate (a
, n
);
349 static inline u64
rotr64 (const u64 a
, const u32 n
)
351 return rotate (a
, (u64
) 64 - n
);
354 static inline u64
rotl64 (const u64 a
, const u32 n
)
356 return rotr64 (a
, (u64
) 64 - n
);
364 static inline u32
rotr32 (const u32 a
, const u32 n
)
366 return rotate (a
, 32 - n
);
369 static inline u32
rotl32 (const u32 a
, const u32 n
)
371 // return rotate (a, n);
372 return (a
<< n
) | (a
>> (32 - n
));
375 static inline u64
rotr64 (const u64 a
, const u32 n
)
377 return rotate (a
, (u64
) 64 - n
);
380 static inline u64
rotl64 (const u64 a
, const u32 n
)
382 return rotr64 (a
, (u64
) 64 - n
);
399 #elif defined _BCRYPT_
401 #elif defined _SHA256_
403 #elif defined _SHA384_
405 #elif defined _SHA512_
407 #elif defined _KECCAK_
409 #elif defined _RIPEMD160_
411 #elif defined _WHIRLPOOL_
415 #elif defined _GOST2012_256_
417 #elif defined _GOST2012_512_
423 #elif defined _MYSQL323_
425 #elif defined _LOTUS5_
427 #elif defined _LOTUS6_
429 #elif defined _SCRYPT_
431 #elif defined _LOTUS8_
433 #elif defined _OFFICE2007_
435 #elif defined _OFFICE2010_
437 #elif defined _OFFICE2013_
439 #elif defined _OLDOFFICE01_
441 #elif defined _OLDOFFICE34_
443 #elif defined _SIPHASH_
445 #elif defined _PBKDF2_MD5_
447 #elif defined _PBKDF2_SHA1_
449 #elif defined _PBKDF2_SHA256_
451 #elif defined _PBKDF2_SHA512_
453 #elif defined _PDF17L8_
455 #elif defined _CRC32_
457 #elif defined _SEVEN_ZIP_
459 #elif defined _ANDROIDFDE_
465 #elif defined _MD5_SHA1_
467 #elif defined _SHA1_MD5_
469 #elif defined _NETNTLMV2_
471 #elif defined _KRB5PA_
473 #elif defined _CLOUDKEY_
475 #elif defined _SCRYPT_
477 #elif defined _PSAFE2_
479 #elif defined _LOTUS8_
483 #elif defined _SHA256_SHA1_
485 #elif defined _MS_DRSR_
548 u32 cry_master_buf
[64];
550 u32 public_key_buf
[64];
591 u32 userdomain_buf
[64];
654 u32 encryptedVerifier
[4];
655 u32 encryptedVerifierHash
[5];
663 u32 encryptedVerifier
[4];
664 u32 encryptedVerifierHash
[8];
670 u32 encryptedVerifier
[4];
671 u32 encryptedVerifierHash
[8];
678 u32 encryptedVerifier
[4];
679 u32 encryptedVerifierHash
[4];
687 u32 encryptedVerifier
[4];
688 u32 encryptedVerifierHash
[5];
757 } bitcoin_wallet_tmp_t
;
865 } pbkdf2_sha256_tmp_t
;
875 } pbkdf2_sha512_tmp_t
;
1078 u32 alignment_placeholder_1
;
1079 u32 alignment_placeholder_2
;
1080 u32 alignment_placeholder_3
;