0xb74e6132, 0xce77e25b, 0x578fdfe3, 0x3ac372e6
};
+#ifdef IS_AMD
#define BF_ROUND(L,R,N) \
{ \
uchar4 c = as_uchar4 ((L)); \
\
- u32 tmp; \
+ u32 tmp; \
\
tmp = S0[c.s3]; \
tmp += S1[c.s2]; \
\
(R) ^= tmp ^ P[(N)]; \
}
+#endif
+
+#ifdef IS_NV
+#define BF_ROUND(L,R,N) \
+{ \
+ u32 tmp; \
+ \
+ tmp = S0[__bfe ((L), 24, 8)]; \
+ tmp += S1[__bfe ((L), 16, 8)]; \
+ tmp ^= S2[__bfe ((L), 8, 8)]; \
+ tmp += S3[__bfe ((L), 0, 8)]; \
+ \
+ (R) ^= tmp ^ P[(N)]; \
+}
+#endif
#define BF_ENCRYPT(L,R) \
{ \
0xb74e6132, 0xce77e25b, 0x578fdfe3, 0x3ac372e6
};
-
#ifdef IS_AMD
#define BF_ROUND(L,R,N) \
{ \
{ \
u32 tmp; \
\
- tmp = S0[((L) >> 24) & 0xff]; \
- tmp += S1[((L) >> 16) & 0xff]; \
- tmp ^= S2[((L) >> 8) & 0xff]; \
- tmp += S3[((L) >> 0) & 0xff]; \
+ tmp = S0[__bfe ((L), 24, 8)]; \
+ tmp += S1[__bfe ((L), 16, 8)]; \
+ tmp ^= S2[__bfe ((L), 8, 8)]; \
+ tmp += S3[__bfe ((L), 0, 8)]; \
\
(R) ^= tmp ^ P[(N)]; \
}
#endif
#ifdef IS_NV
-static inline u32 __byte_perm (const u32 a, const u32 b, const u32 s)
+static inline u32 __byte_perm (const u32 a, const u32 b, const u32 c)
{
u32 r;
- asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(s));
+ asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c));
return r;
}
+static inline u32 __bfe (const u32 a, const u32 b, const u32 c)
+{
+ u32 r;
+
+ asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c));
+
+ return r;
+}
+
+
#if CUDA_ARCH >= 350
static inline u32 amd_bytealign (const u32 a, const u32 b, const u32 c)