From 212395168fc5d3df183ce15183f6ac7f38168097 Mon Sep 17 00:00:00 2001 From: jsteube Date: Sun, 27 Dec 2015 19:23:55 +0100 Subject: [PATCH] Testchange for speed --- OpenCL/types_ocl.c | 32 ++++---------------------------- 1 file changed, 4 insertions(+), 28 deletions(-) diff --git a/OpenCL/types_ocl.c b/OpenCL/types_ocl.c index 3ac84b4..d83189e 100644 --- a/OpenCL/types_ocl.c +++ b/OpenCL/types_ocl.c @@ -159,8 +159,6 @@ static inline u64 hl32_to_64 (const u32 a, const u32 b) return as_ulong ((uint2) (b, a)); } -#ifdef IS_AMD - static inline u32 rotr32 (const u32 a, const u32 n) { return rotate (a, 32 - n); @@ -171,6 +169,8 @@ static inline u32 rotl32 (const u32 a, const u32 n) return rotate (a, n); } +#ifdef IS_AMD + static inline u64 rotr64 (const u64 a, const u32 n) { uint2 a2 = as_uint2 (a); @@ -196,21 +196,7 @@ static inline u64 rotl64 (const u64 a, const u32 n) #if CUDA_ARCH >= 350 -static u32 rotr32 (const u32 a, const u32 n) -{ - u32 r; - - asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(a), "r"(n)); - - return r; -} - -static u32 rotl32 (const u32 a, const u32 n) -{ - return rotr32 (a, 32 - n); -} - -static u64 rotr64 (const u64 a, const u32 n) +static inline u64 rotr64 (const u64 a, const u32 n) { u32 il; u32 ir; @@ -238,23 +224,13 @@ static u64 rotr64 (const u64 a, const u32 n) return r; } -static u64 rotl64 (const u64 a, const u32 n) +static inline u64 rotl64 (const u64 a, const u32 n) { return rotr64 (a, 64 - n); } #else -static inline u32 rotr32 (const u32 a, const u32 n) -{ - return rotate (a, 32 - n); -} - -static inline u32 rotl32 (const u32 a, const u32 n) -{ - return rotate (a, n); -} - static inline u64 rotr64 (const u64 a, const u64 n) { return rotate (a, 64 - n); -- 2.43.0