2 * Author......: Jens Steube <jens.steube@gmail.com>
14 static u32 allx (const u32 r)
20 static inline u32
l32_from_64 (u64 a
)
22 const u32 r
= (uint
) (a
);
27 static inline u32
h32_from_64 (u64 a
)
31 const u32 r
= (uint
) (a
);
36 static inline u64
hl32_to_64 (const u32 a
, const u32 b
)
38 return as_ulong ((uint2
) (b
, a
));
42 static inline u32
swap32 (const u32 v
)
44 return (as_uint (as_uchar4 (v
).s3210
));
47 static inline u64
swap64 (const u64 v
)
49 return (as_ulong (as_uchar8 (v
).s76543210
));
54 static inline u32
swap32 (const u32 v
)
58 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(r
) : "r"(v
));
63 static inline u64
swap64 (const u64 v
)
68 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il
), "=r"(ir
) : "l"(v
));
73 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tl
) : "r"(il
));
74 asm ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tr
) : "r"(ir
));
78 asm ("mov.b64 %0, {%1, %2};" : "=l"(r
) : "r"(tr
), "r"(tl
));
85 static inline u32
swap32 (const u32 v
)
87 return (as_uint (as_uchar4 (v
).s3210
));
90 static inline u64
swap64 (const u64 v
)
92 return (as_ulong (as_uchar8 (v
).s76543210
));
97 static inline u32
__bfe (const u32 a
, const u32 b
, const u32 c
)
99 return amd_bfe (a
, b
, c
);
104 static inline u32
__byte_perm (const u32 a
, const u32 b
, const u32 c
)
108 asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r
) : "r"(a
), "r"(b
), "r"(c
));
113 static inline u32
__bfe (const u32 a
, const u32 b
, const u32 c
)
117 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r
) : "r"(a
), "r"(b
), "r"(c
));
122 static inline u32
amd_bytealign (const u32 a
, const u32 b
, const u32 c
)
126 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r
) : "r"(b
), "r"(a
), "r"((c
& 3) * 8));
131 static inline u32
amd_bytealign (const u32 a
, const u32 b
, const u32 c
)
133 return __byte_perm (b
, a
, (0x76543210 >> ((c
& 3) * 4)) & 0xffff);
139 static inline u32
__bfe (const u32 a
, const u32 b
, const u32 c
)
141 #define BIT(x) (1 << (x))
142 #define BIT_MASK(x) (BIT (x) - 1)
143 #define BFE(x,y,z) (((x) >> (y)) & BIT_MASK (z))
145 return BFE (a
, b
, c
);
148 static inline u32
amd_bytealign (const u32 a
, const u32 b
, const u32 c
)
150 return (u32
) (((((u64
) a
) << 32) | (u64
) b
) >> ((c
& 3) * 8));
155 static inline u32
rotr32 (const u32 a
, const u32 n
)
157 return rotate (a
, 32 - n
);
160 static inline u32
rotl32 (const u32 a
, const u32 n
)
162 return rotate (a
, n
);
165 static inline u64
rotr64 (const u64 a
, const u32 n
)
167 uint2 a2
= as_uint2 (a
);
171 t
.s0
= (n
>= 32) ? amd_bitalign (a2
.s0
, a2
.s1
, n
- 32)
172 : amd_bitalign (a2
.s1
, a2
.s0
, n
);
173 t
.s1
= (n
>= 32) ? amd_bitalign (a2
.s1
, a2
.s0
, n
- 32)
174 : amd_bitalign (a2
.s0
, a2
.s1
, n
);
179 static inline u64
rotl64 (const u64 a
, const u32 n
)
181 return rotr64 (a
, 64 - n
);
188 this version reduced the number of registers but for some unknown reason the whole kernel become slower.. instruction cache monster?
189 static inline u32 rotr32 (const u32 a, const u32 n)
195 case 0: asm ("shf.r.wrap.b32 %0, %1, %1, 0;" : "=r"(r) : "r"(a)); break;
196 case 1: asm ("shf.r.wrap.b32 %0, %1, %1, 1;" : "=r"(r) : "r"(a)); break;
197 case 2: asm ("shf.r.wrap.b32 %0, %1, %1, 2;" : "=r"(r) : "r"(a)); break;
198 case 3: asm ("shf.r.wrap.b32 %0, %1, %1, 3;" : "=r"(r) : "r"(a)); break;
199 case 4: asm ("shf.r.wrap.b32 %0, %1, %1, 4;" : "=r"(r) : "r"(a)); break;
200 case 5: asm ("shf.r.wrap.b32 %0, %1, %1, 5;" : "=r"(r) : "r"(a)); break;
201 case 6: asm ("shf.r.wrap.b32 %0, %1, %1, 6;" : "=r"(r) : "r"(a)); break;
202 case 7: asm ("shf.r.wrap.b32 %0, %1, %1, 7;" : "=r"(r) : "r"(a)); break;
203 case 8: asm ("shf.r.wrap.b32 %0, %1, %1, 8;" : "=r"(r) : "r"(a)); break;
204 case 9: asm ("shf.r.wrap.b32 %0, %1, %1, 9;" : "=r"(r) : "r"(a)); break;
205 case 10: asm ("shf.r.wrap.b32 %0, %1, %1, 10;" : "=r"(r) : "r"(a)); break;
206 case 11: asm ("shf.r.wrap.b32 %0, %1, %1, 11;" : "=r"(r) : "r"(a)); break;
207 case 12: asm ("shf.r.wrap.b32 %0, %1, %1, 12;" : "=r"(r) : "r"(a)); break;
208 case 13: asm ("shf.r.wrap.b32 %0, %1, %1, 13;" : "=r"(r) : "r"(a)); break;
209 case 14: asm ("shf.r.wrap.b32 %0, %1, %1, 14;" : "=r"(r) : "r"(a)); break;
210 case 15: asm ("shf.r.wrap.b32 %0, %1, %1, 15;" : "=r"(r) : "r"(a)); break;
211 case 16: asm ("shf.r.wrap.b32 %0, %1, %1, 16;" : "=r"(r) : "r"(a)); break;
212 case 17: asm ("shf.r.wrap.b32 %0, %1, %1, 17;" : "=r"(r) : "r"(a)); break;
213 case 18: asm ("shf.r.wrap.b32 %0, %1, %1, 18;" : "=r"(r) : "r"(a)); break;
214 case 19: asm ("shf.r.wrap.b32 %0, %1, %1, 19;" : "=r"(r) : "r"(a)); break;
215 case 20: asm ("shf.r.wrap.b32 %0, %1, %1, 20;" : "=r"(r) : "r"(a)); break;
216 case 21: asm ("shf.r.wrap.b32 %0, %1, %1, 21;" : "=r"(r) : "r"(a)); break;
217 case 22: asm ("shf.r.wrap.b32 %0, %1, %1, 22;" : "=r"(r) : "r"(a)); break;
218 case 23: asm ("shf.r.wrap.b32 %0, %1, %1, 23;" : "=r"(r) : "r"(a)); break;
219 case 24: asm ("shf.r.wrap.b32 %0, %1, %1, 24;" : "=r"(r) : "r"(a)); break;
220 case 25: asm ("shf.r.wrap.b32 %0, %1, %1, 25;" : "=r"(r) : "r"(a)); break;
221 case 26: asm ("shf.r.wrap.b32 %0, %1, %1, 26;" : "=r"(r) : "r"(a)); break;
222 case 27: asm ("shf.r.wrap.b32 %0, %1, %1, 27;" : "=r"(r) : "r"(a)); break;
223 case 28: asm ("shf.r.wrap.b32 %0, %1, %1, 28;" : "=r"(r) : "r"(a)); break;
224 case 29: asm ("shf.r.wrap.b32 %0, %1, %1, 29;" : "=r"(r) : "r"(a)); break;
225 case 30: asm ("shf.r.wrap.b32 %0, %1, %1, 30;" : "=r"(r) : "r"(a)); break;
226 case 31: asm ("shf.r.wrap.b32 %0, %1, %1, 31;" : "=r"(r) : "r"(a)); break;
233 static inline u32
rotr32 (const u32 a
, const u32 n
)
237 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r
) : "r"(a
), "r"(a
), "r"(n
));
242 static inline u32
rotl32 (const u32 a
, const u32 n
)
244 return rotr32 (a
, 32 - n
);
247 static inline u64
rotr64 (const u64 a
, const u32 n
)
252 asm ("mov.b64 {%0, %1}, %2;" : "=r"(il
), "=r"(ir
) : "l"(a
));
259 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl
) : "r"(ir
), "r"(il
), "r"(n
- 32));
260 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr
) : "r"(il
), "r"(ir
), "r"(n
- 32));
264 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl
) : "r"(il
), "r"(ir
), "r"(n
));
265 asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr
) : "r"(ir
), "r"(il
), "r"(n
));
270 asm ("mov.b64 %0, {%1, %2};" : "=l"(r
) : "r"(tl
), "r"(tr
));
275 static inline u64
rotl64 (const u64 a
, const u32 n
)
277 return rotr64 (a
, 64 - n
);
280 static inline u32
rotr32 (const u32 a
, const u32 n
)
282 return rotate (a
, 32 - n
);
285 static inline u32
rotl32 (const u32 a
, const u32 n
)
287 return rotate (a
, n
);
290 static inline u64
rotr64 (const u64 a
, const u32 n
)
292 return rotate (a
, (u64
) 64 - n
);
295 static inline u64
rotl64 (const u64 a
, const u32 n
)
297 return rotate (a
, (u64
) n
);
303 static inline u32
rotr32 (const u32 a
, const u32 n
)
305 return rotate (a
, 32 - n
);
308 static inline u32
rotl32 (const u32 a
, const u32 n
)
310 return rotate (a
, n
);
313 static inline u64
rotr64 (const u64 a
, const u32 n
)
315 return rotate (a
, (u64
) 64 - n
);
318 static inline u64
rotl64 (const u64 a
, const u32 n
)
320 return rotate (a
, (u64
) n
);
326 static inline u32
lut3_2d (const u32 a
, const u32 b
, const u32 c
)
330 asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r
) : "r" (a
), "r" (b
), "r" (c
));
335 static inline u32
lut3_39 (const u32 a
, const u32 b
, const u32 c
)
339 asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r
) : "r" (a
), "r" (b
), "r" (c
));
344 static inline u32
lut3_59 (const u32 a
, const u32 b
, const u32 c
)
348 asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r
) : "r" (a
), "r" (b
), "r" (c
));
353 static inline u32
lut3_96 (const u32 a
, const u32 b
, const u32 c
)
357 asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r
) : "r" (a
), "r" (b
), "r" (c
));
362 static inline u32
lut3_e4 (const u32 a
, const u32 b
, const u32 c
)
366 asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r
) : "r" (a
), "r" (b
), "r" (c
));
371 static inline u32
lut3_e8 (const u32 a
, const u32 b
, const u32 c
)
375 asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r
) : "r" (a
), "r" (b
), "r" (c
));
380 static inline u32
lut3_ca (const u32 a
, const u32 b
, const u32 c
)
384 asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r
) : "r" (a
), "r" (b
), "r" (c
));
403 #elif defined _BCRYPT_
405 #elif defined _SHA256_
407 #elif defined _SHA384_
409 #elif defined _SHA512_
411 #elif defined _KECCAK_
413 #elif defined _RIPEMD160_
415 #elif defined _WHIRLPOOL_
419 #elif defined _GOST2012_256_
421 #elif defined _GOST2012_512_
427 #elif defined _MYSQL323_
429 #elif defined _LOTUS5_
431 #elif defined _LOTUS6_
433 #elif defined _SCRYPT_
435 #elif defined _LOTUS8_
437 #elif defined _OFFICE2007_
439 #elif defined _OFFICE2010_
441 #elif defined _OFFICE2013_
443 #elif defined _OLDOFFICE01_
445 #elif defined _OLDOFFICE34_
447 #elif defined _SIPHASH_
449 #elif defined _PBKDF2_MD5_
451 #elif defined _PBKDF2_SHA1_
453 #elif defined _PBKDF2_SHA256_
455 #elif defined _PBKDF2_SHA512_
457 #elif defined _PDF17L8_
459 #elif defined _CRC32_
461 #elif defined _SEVEN_ZIP_
463 #elif defined _ANDROIDFDE_
469 #elif defined _MD5_SHA1_
471 #elif defined _SHA1_MD5_
473 #elif defined _NETNTLMV2_
475 #elif defined _KRB5PA_
477 #elif defined _CLOUDKEY_
479 #elif defined _SCRYPT_
481 #elif defined _PSAFE2_
483 #elif defined _LOTUS8_
487 #elif defined _SHA256_SHA1_
489 #elif defined _MS_DRSR_
491 #elif defined _ANDROIDFDE_SAMSUNG_
556 u32 cry_master_buf
[64];
558 u32 public_key_buf
[64];
599 u32 userdomain_buf
[64];
662 u32 encryptedVerifier
[4];
663 u32 encryptedVerifierHash
[5];
671 u32 encryptedVerifier
[4];
672 u32 encryptedVerifierHash
[8];
678 u32 encryptedVerifier
[4];
679 u32 encryptedVerifierHash
[8];
686 u32 encryptedVerifier
[4];
687 u32 encryptedVerifierHash
[4];
695 u32 encryptedVerifier
[4];
696 u32 encryptedVerifierHash
[5];
765 } bitcoin_wallet_tmp_t
;
873 } pbkdf2_sha256_tmp_t
;
883 } pbkdf2_sha512_tmp_t
;
1086 u32 alignment_placeholder_1
;
1087 u32 alignment_placeholder_2
;
1088 u32 alignment_placeholder_3
;