2 * Author......: Jens Steube <jens.steube@gmail.com>
12 static inline u32
swap32 (const u32 v
)
14 return (as_uint (as_uchar4 (v
).s3210
));
17 static inline u64
swap64 (const u64 v
)
19 return (as_ulong (as_uchar8 (v
).s76543210
));
25 static inline u32
swap32 (const u32 v
)
29 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(r
) : "r"(v
));
34 static inline u64
swap64 (const u64 v
)
39 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il
), "=r"(ir
) : "l"(v
));
44 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tl
) : "r"(il
));
45 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tr
) : "r"(ir
));
49 asm ("mov.b64 %0, {%1, %2};" : "=l"(r
) : "r"(tr
), "r"(tl
));
57 static inline u32
__bfe (const u32 a
, const u32 b
, const u32 c
)
59 return amd_bfe (a
, b
, c
);
64 static inline u32
__byte_perm (const u32 a
, const u32 b
, const u32 c
)
68 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r
) : "r"(a
), "r"(b
), "r"(c
));
73 static inline u32
__bfe (const u32 a
, const u32 b
, const u32 c
)
77 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r
) : "r"(a
), "r"(b
), "r"(c
));
84 static inline u32
amd_bytealign (const u32 a
, const u32 b
, const u32 c
)
88 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r
) : "r"(b
), "r"(a
), "r"((c
& 3) * 8));
95 static inline u32
amd_bytealign (const u32 a
, const u32 b
, const u32 c
)
97 return __byte_perm (b
, a
, (0x76543210 >> ((c
& 3) * 4)) & 0xffff);
102 static inline u32
lut3_2d (const u32 a
, const u32 b
, const u32 c
)
106 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r
) : "r" (a
), "r" (b
), "r" (c
));
111 static inline u32
lut3_39 (const u32 a
, const u32 b
, const u32 c
)
115 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r
) : "r" (a
), "r" (b
), "r" (c
));
120 static inline u32
lut3_59 (const u32 a
, const u32 b
, const u32 c
)
124 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r
) : "r" (a
), "r" (b
), "r" (c
));
129 static inline u32
lut3_96 (const u32 a
, const u32 b
, const u32 c
)
133 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r
) : "r" (a
), "r" (b
), "r" (c
));
138 static inline u32
lut3_e4 (const u32 a
, const u32 b
, const u32 c
)
142 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r
) : "r" (a
), "r" (b
), "r" (c
));
147 static inline u32
lut3_e8 (const u32 a
, const u32 b
, const u32 c
)
151 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r
) : "r" (a
), "r" (b
), "r" (c
));
156 static inline u32
lut3_ca (const u32 a
, const u32 b
, const u32 c
)
160 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r
) : "r" (a
), "r" (b
), "r" (c
));
170 static u32 allx (const u32 r)
176 static inline u32
l32_from_64 (u64 a
)
178 const u32 r
= (uint
) (a
);
183 static inline u32
h32_from_64 (u64 a
)
187 const u32 r
= (uint
) (a
);
192 static inline u64
hl32_to_64 (const u32 a
, const u32 b
)
194 return as_ulong ((uint2
) (b
, a
));
199 static inline u32
rotr32 (const u32 a
, const u32 n
)
201 return rotate (a
, 32 - n
);
204 static inline u32
rotl32 (const u32 a
, const u32 n
)
206 return rotate (a
, n
);
209 static inline u64
rotr64 (const u64 a
, const u32 n
)
211 uint2 a2
= as_uint2 (a
);
215 t
.s0
= (n
>= 32) ? amd_bitalign (a2
.s0
, a2
.s1
, n
- 32)
216 : amd_bitalign (a2
.s1
, a2
.s0
, n
);
217 t
.s1
= (n
>= 32) ? amd_bitalign (a2
.s1
, a2
.s0
, n
- 32)
218 : amd_bitalign (a2
.s0
, a2
.s1
, n
);
223 static inline u64
rotl64 (const u64 a
, const u32 n
)
225 return rotr64 (a
, 64 - n
);
235 this version reduced the number of registers but for some unknown reason the whole kernel become slower.. instruction cache monster?
236 static inline u32 rotr32 (const u32 a, const u32 n)
242 case 0: asm ("shf.r.wrap.b32 %0, %1, %1, 0;" : "=r"(r) : "r"(a)); break;
243 case 1: asm ("shf.r.wrap.b32 %0, %1, %1, 1;" : "=r"(r) : "r"(a)); break;
244 case 2: asm ("shf.r.wrap.b32 %0, %1, %1, 2;" : "=r"(r) : "r"(a)); break;
245 case 3: asm ("shf.r.wrap.b32 %0, %1, %1, 3;" : "=r"(r) : "r"(a)); break;
246 case 4: asm ("shf.r.wrap.b32 %0, %1, %1, 4;" : "=r"(r) : "r"(a)); break;
247 case 5: asm ("shf.r.wrap.b32 %0, %1, %1, 5;" : "=r"(r) : "r"(a)); break;
248 case 6: asm ("shf.r.wrap.b32 %0, %1, %1, 6;" : "=r"(r) : "r"(a)); break;
249 case 7: asm ("shf.r.wrap.b32 %0, %1, %1, 7;" : "=r"(r) : "r"(a)); break;
250 case 8: asm ("shf.r.wrap.b32 %0, %1, %1, 8;" : "=r"(r) : "r"(a)); break;
251 case 9: asm ("shf.r.wrap.b32 %0, %1, %1, 9;" : "=r"(r) : "r"(a)); break;
252 case 10: asm ("shf.r.wrap.b32 %0, %1, %1, 10;" : "=r"(r) : "r"(a)); break;
253 case 11: asm ("shf.r.wrap.b32 %0, %1, %1, 11;" : "=r"(r) : "r"(a)); break;
254 case 12: asm ("shf.r.wrap.b32 %0, %1, %1, 12;" : "=r"(r) : "r"(a)); break;
255 case 13: asm ("shf.r.wrap.b32 %0, %1, %1, 13;" : "=r"(r) : "r"(a)); break;
256 case 14: asm ("shf.r.wrap.b32 %0, %1, %1, 14;" : "=r"(r) : "r"(a)); break;
257 case 15: asm ("shf.r.wrap.b32 %0, %1, %1, 15;" : "=r"(r) : "r"(a)); break;
258 case 16: asm ("shf.r.wrap.b32 %0, %1, %1, 16;" : "=r"(r) : "r"(a)); break;
259 case 17: asm ("shf.r.wrap.b32 %0, %1, %1, 17;" : "=r"(r) : "r"(a)); break;
260 case 18: asm ("shf.r.wrap.b32 %0, %1, %1, 18;" : "=r"(r) : "r"(a)); break;
261 case 19: asm ("shf.r.wrap.b32 %0, %1, %1, 19;" : "=r"(r) : "r"(a)); break;
262 case 20: asm ("shf.r.wrap.b32 %0, %1, %1, 20;" : "=r"(r) : "r"(a)); break;
263 case 21: asm ("shf.r.wrap.b32 %0, %1, %1, 21;" : "=r"(r) : "r"(a)); break;
264 case 22: asm ("shf.r.wrap.b32 %0, %1, %1, 22;" : "=r"(r) : "r"(a)); break;
265 case 23: asm ("shf.r.wrap.b32 %0, %1, %1, 23;" : "=r"(r) : "r"(a)); break;
266 case 24: asm ("shf.r.wrap.b32 %0, %1, %1, 24;" : "=r"(r) : "r"(a)); break;
267 case 25: asm ("shf.r.wrap.b32 %0, %1, %1, 25;" : "=r"(r) : "r"(a)); break;
268 case 26: asm ("shf.r.wrap.b32 %0, %1, %1, 26;" : "=r"(r) : "r"(a)); break;
269 case 27: asm ("shf.r.wrap.b32 %0, %1, %1, 27;" : "=r"(r) : "r"(a)); break;
270 case 28: asm ("shf.r.wrap.b32 %0, %1, %1, 28;" : "=r"(r) : "r"(a)); break;
271 case 29: asm ("shf.r.wrap.b32 %0, %1, %1, 29;" : "=r"(r) : "r"(a)); break;
272 case 30: asm ("shf.r.wrap.b32 %0, %1, %1, 30;" : "=r"(r) : "r"(a)); break;
273 case 31: asm ("shf.r.wrap.b32 %0, %1, %1, 31;" : "=r"(r) : "r"(a)); break;
280 static inline u32
rotr32 (const u32 a
, const u32 n
)
284 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r
) : "r"(a
), "r"(a
), "r"(n
));
289 static inline u32
rotl32 (const u32 a
, const u32 n
)
291 return rotr32 (a
, 32 - n
);
294 static inline u64
rotr64 (const u64 a
, const u32 n
)
299 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il
), "=r"(ir
) : "l"(a
));
306 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl
) : "r"(ir
), "r"(il
), "r"(n
- 32));
307 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr
) : "r"(il
), "r"(ir
), "r"(n
- 32));
311 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl
) : "r"(il
), "r"(ir
), "r"(n
));
312 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr
) : "r"(ir
), "r"(il
), "r"(n
));
317 asm ("mov.b64 %0, {%1, %2};" : "=l"(r
) : "r"(tl
), "r"(tr
));
322 static inline u64
rotl64 (const u64 a
, const u32 n
)
324 return rotr64 (a
, 64 - n
);
329 static inline u32
rotr32 (const u32 a
, const u32 n
)
331 return rotate (a
, 32 - n
);
334 static inline u32
rotl32 (const u32 a
, const u32 n
)
336 return rotate (a
, n
);
339 static inline u64
rotr64 (const u64 a
, const u64 n
)
341 return rotate (a
, 64 - n
);
344 static inline u64
rotl64 (const u64 a
, const u64 n
)
346 return rotate (a
, n
);
364 #elif defined _BCRYPT_
366 #elif defined _SHA256_
368 #elif defined _SHA384_
370 #elif defined _SHA512_
372 #elif defined _KECCAK_
374 #elif defined _RIPEMD160_
376 #elif defined _WHIRLPOOL_
380 #elif defined _GOST2012_256_
382 #elif defined _GOST2012_512_
388 #elif defined _MYSQL323_
390 #elif defined _LOTUS5_
392 #elif defined _LOTUS6_
394 #elif defined _SCRYPT_
396 #elif defined _LOTUS8_
398 #elif defined _OFFICE2007_
400 #elif defined _OFFICE2010_
402 #elif defined _OFFICE2013_
404 #elif defined _OLDOFFICE01_
406 #elif defined _OLDOFFICE34_
408 #elif defined _SIPHASH_
410 #elif defined _PBKDF2_MD5_
412 #elif defined _PBKDF2_SHA1_
414 #elif defined _PBKDF2_SHA256_
416 #elif defined _PBKDF2_SHA512_
418 #elif defined _PDF17L8_
420 #elif defined _CRC32_
422 #elif defined _SEVEN_ZIP_
424 #elif defined _ANDROIDFDE_
430 #elif defined _MD5_SHA1_
432 #elif defined _SHA1_MD5_
434 #elif defined _NETNTLMV2_
436 #elif defined _KRB5PA_
438 #elif defined _CLOUDKEY_
440 #elif defined _SCRYPT_
442 #elif defined _PSAFE2_
444 #elif defined _LOTUS8_
448 #elif defined _SHA256_SHA1_
450 #elif defined _MS_DRSR_
513 u32 cry_master_buf
[64];
515 u32 public_key_buf
[64];
556 u32 userdomain_buf
[64];
619 u32 encryptedVerifier
[4];
620 u32 encryptedVerifierHash
[5];
628 u32 encryptedVerifier
[4];
629 u32 encryptedVerifierHash
[8];
635 u32 encryptedVerifier
[4];
636 u32 encryptedVerifierHash
[8];
643 u32 encryptedVerifier
[4];
644 u32 encryptedVerifierHash
[4];
652 u32 encryptedVerifier
[4];
653 u32 encryptedVerifierHash
[5];
722 } bitcoin_wallet_tmp_t
;
830 } pbkdf2_sha256_tmp_t
;
840 } pbkdf2_sha512_tmp_t
;
1052 u32 alignment_placeholder_1
;
1053 u32 alignment_placeholder_2
;
1054 u32 alignment_placeholder_3
;