From 530651e96ae0cd8a73f739af662c43d024ee2c69 Mon Sep 17 00:00:00 2001 From: Mangix Date: Wed, 20 Apr 2016 19:27:26 -0700 Subject: [PATCH] Disable AMD optimizations for LLVM --- OpenCL/types_ocl.c | 47 +++++++++++++++++++++++++++++++++-------- include/kernel_vendor.h | 2 ++ 2 files changed, 40 insertions(+), 9 deletions(-) diff --git a/OpenCL/types_ocl.c b/OpenCL/types_ocl.c index 17e3d69..d935b68 100644 --- a/OpenCL/types_ocl.c +++ b/OpenCL/types_ocl.c @@ -199,11 +199,9 @@ static inline u32 rotl32_S (const u32 a, const u32 n) static inline u64 rotr64_S (const u64 a, const u32 n) { - #if DEVICE_TYPE == DEVICE_TYPE_CPU + #if (DEVICE_TYPE == DEVICE_TYPE_GPU) - const u64 r = rotate (a, (u64) 64 - n); - - #else + #ifdef amd_bitalign const u32 a0 = h32_from_64_S (a); const u32 a1 = l32_from_64_S (a); @@ -213,6 +211,16 @@ static inline u64 rotr64_S (const u64 a, const u32 n) const u64 r = hl32_to_64_S (t0, t1); + #else + + const u64 r = rotate (a, (u64) 64 - n); + + #endif + + #else + + const u64 r = rotate (a, (u64) 64 - n); + #endif return r; @@ -255,12 +263,9 @@ static inline u32x rotl32 (const u32x a, const u32 n) static inline u64x rotr64 (const u64x a, const u32 n) { - #if DEVICE_TYPE == DEVICE_TYPE_CPU - - const u64x r = rotate (a, (u64) 64 - n); - - #else + #if (DEVICE_TYPE == DEVICE_TYPE_GPU) + #ifdef amd_bitalign const u32x a0 = h32_from_64 (a); const u32x a1 = l32_from_64 (a); @@ -269,6 +274,16 @@ static inline u64x rotr64 (const u64x a, const u32 n) const u64x r = hl32_to_64 (t0, t1); + #else + + const u64x r = rotate (a, (u64) 64 - n); + + #endif + + #else + + const u64x r = rotate (a, (u64) 64 - n); + #endif return r; @@ -281,12 +296,26 @@ static inline u64x rotl64 (const u64x a, const u32 n) static inline u32 __bfe (const u32 a, const u32 b, const u32 c) { +#ifdef amd_bfe return amd_bfe (a, b, c); +#else + #define BIT(x) (1 << (x)) + #define BIT_MASK(x) (BIT (x) - 1) + #define BFE(x,y,z) (((x) >> (y)) & BIT_MASK (z)) + + return BFE (a, b, c); +#endif } static inline u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c) { +#ifdef amd_bytealign return amd_bytealign (a, b, c); +#else + const u64 tmp = ((((u64) a) << 32) | ((u64) b)) >> ((c & 3) * 8); + + return (u32) (tmp); +#endif } #endif diff --git a/include/kernel_vendor.h b/include/kernel_vendor.h index 7ee2176..6d0b64a 100644 --- a/include/kernel_vendor.h +++ b/include/kernel_vendor.h @@ -28,9 +28,11 @@ */ #ifdef IS_AMD +#ifndef cl_clang_storage_class_specifiers #pragma OPENCL EXTENSION cl_amd_media_ops : enable #pragma OPENCL EXTENSION cl_amd_media_ops2 : enable #endif +#endif /** * NV specific -- 2.25.1