From 94d6fd3693b0c12ed77a680321c66cd29c2992f7 Mon Sep 17 00:00:00 2001 From: jsteube Date: Sun, 27 Dec 2015 21:22:54 +0100 Subject: [PATCH] revert switch to rotate () --- OpenCL/types_ocl.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/OpenCL/types_ocl.c b/OpenCL/types_ocl.c index d83189e..60c972f 100644 --- a/OpenCL/types_ocl.c +++ b/OpenCL/types_ocl.c @@ -159,6 +159,8 @@ static inline u64 hl32_to_64 (const u32 a, const u32 b) return as_ulong ((uint2) (b, a)); } +#ifdef IS_AMD + static inline u32 rotr32 (const u32 a, const u32 n) { return rotate (a, 32 - n); @@ -169,8 +171,6 @@ static inline u32 rotl32 (const u32 a, const u32 n) return rotate (a, n); } -#ifdef IS_AMD - static inline u64 rotr64 (const u64 a, const u32 n) { uint2 a2 = as_uint2 (a); @@ -196,6 +196,20 @@ static inline u64 rotl64 (const u64 a, const u32 n) #if CUDA_ARCH >= 350 +static inline u32 rotr32 (const u32 a, const u32 n) +{ + u32 r; + + asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(a), "r"(n)); + + return r; +} + +static inline u32 rotl32 (const u32 a, const u32 n) +{ + return rotr32 (a, 32 - n); +} + static inline u64 rotr64 (const u64 a, const u32 n) { u32 il; @@ -231,6 +245,16 @@ static inline u64 rotl64 (const u64 a, const u32 n) #else +static inline u32 rotr32 (const u32 a, const u32 n) +{ + return rotate (a, 32 - n); +} + +static inline u32 rotl32 (const u32 a, const u32 n) +{ + return rotate (a, n); +} + static inline u64 rotr64 (const u64 a, const u64 n) { return rotate (a, 64 - n); -- 2.43.0