From bd1f555c9ced2c4a64bfc391186c6bf56ebd9560 Mon Sep 17 00:00:00 2001 From: jsteube Date: Sun, 8 May 2016 18:29:54 +0200 Subject: [PATCH] Remove some unroll to reduce register pressure --- OpenCL/m01750_a0.cl | 7 +------ OpenCL/m01750_a1.cl | 7 +------ OpenCL/m01750_a3.cl | 7 +------ OpenCL/m01760_a0.cl | 7 +------ OpenCL/m01760_a1.cl | 7 +------ OpenCL/m01760_a3.cl | 7 +------ OpenCL/m01800.cl | 5 +++-- OpenCL/m06221.cl | 7 +------ OpenCL/m06222.cl | 7 +------ OpenCL/m06223.cl | 7 +------ OpenCL/m06500.cl | 7 +------ OpenCL/m07100.cl | 7 +------ OpenCL/m08200.cl | 7 +------ OpenCL/m10700.cl | 4 ++-- OpenCL/m12300.cl | 7 +------ 15 files changed, 18 insertions(+), 82 deletions(-) diff --git a/OpenCL/m01750_a0.cl b/OpenCL/m01750_a0.cl index 5095385..0ad4790 100644 --- a/OpenCL/m01750_a0.cl +++ b/OpenCL/m01750_a0.cl @@ -116,12 +116,7 @@ void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], con ROUND_STEP (0); - #ifdef IS_AMD - // #pragma unroll - // breaks compiler - #else - #pragma unroll - #endif + //#pragma unroll for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m01750_a1.cl b/OpenCL/m01750_a1.cl index d920d14..f2697fc 100644 --- a/OpenCL/m01750_a1.cl +++ b/OpenCL/m01750_a1.cl @@ -114,12 +114,7 @@ void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], con ROUND_STEP (0); - #ifdef IS_AMD - // #pragma unroll - // breaks compiler - #else - #pragma unroll - #endif + //#pragma unroll for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m01750_a3.cl b/OpenCL/m01750_a3.cl index 2f08963..e213850 100644 --- a/OpenCL/m01750_a3.cl +++ b/OpenCL/m01750_a3.cl @@ -114,12 +114,7 @@ void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], con ROUND_STEP (0); - #ifdef IS_AMD - // #pragma unroll - // breaks compiler - #else - #pragma unroll - #endif + //#pragma unroll for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m01760_a0.cl b/OpenCL/m01760_a0.cl index 8e2338f..f487180 100644 --- a/OpenCL/m01760_a0.cl +++ b/OpenCL/m01760_a0.cl @@ -116,12 +116,7 @@ void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], con ROUND_STEP (0); - #ifdef IS_AMD - // #pragma unroll - // breaks compiler - #else - #pragma unroll - #endif + //#pragma unroll for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m01760_a1.cl b/OpenCL/m01760_a1.cl index a0ba4be..d329446 100644 --- a/OpenCL/m01760_a1.cl +++ b/OpenCL/m01760_a1.cl @@ -114,12 +114,7 @@ void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], con ROUND_STEP (0); - #ifdef IS_AMD - // #pragma unroll - // breaks compiler - #else - #pragma unroll - #endif + //#pragma unroll for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m01760_a3.cl b/OpenCL/m01760_a3.cl index cb8ff6b..b45cfab 100644 --- a/OpenCL/m01760_a3.cl +++ b/OpenCL/m01760_a3.cl @@ -114,12 +114,7 @@ void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], con ROUND_STEP (0); - #ifdef IS_AMD - // #pragma unroll - // breaks compiler - #else - #pragma unroll - #endif + //#pragma unroll for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m01800.cl b/OpenCL/m01800.cl index e0c91f8..d3a821c 100644 --- a/OpenCL/m01800.cl +++ b/OpenCL/m01800.cl @@ -23,7 +23,8 @@ // Buggy drivers... #ifdef IS_AMD -#define STATE_DECL volatile +//#define STATE_DECL volatile +#define STATE_DECL #else #define STATE_DECL #endif @@ -133,7 +134,7 @@ void sha512_transform (const u64 w[16], u64 digest[8]) ROUND_STEP (0); - #pragma unroll + //#pragma unroll for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m06221.cl b/OpenCL/m06221.cl index 60bdf54..2949080 100644 --- a/OpenCL/m06221.cl +++ b/OpenCL/m06221.cl @@ -115,12 +115,7 @@ void sha512_transform (const u64 w[16], u64 dgst[8]) ROUND_STEP (0); - #ifdef IS_AMD - // #pragma unroll - // breaks compiler - #else - #pragma unroll - #endif + //#pragma unroll for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m06222.cl b/OpenCL/m06222.cl index 102a890..2620c72 100644 --- a/OpenCL/m06222.cl +++ b/OpenCL/m06222.cl @@ -115,12 +115,7 @@ void sha512_transform (const u64 w[16], u64 dgst[8]) ROUND_STEP (0); - #ifdef IS_AMD - // #pragma unroll - // breaks compiler - #else - #pragma unroll - #endif + //#pragma unroll for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m06223.cl b/OpenCL/m06223.cl index 86afa82..a0eb7c2 100644 --- a/OpenCL/m06223.cl +++ b/OpenCL/m06223.cl @@ -115,12 +115,7 @@ void sha512_transform (const u64 w[16], u64 dgst[8]) ROUND_STEP (0); - #ifdef IS_AMD - // #pragma unroll - // breaks compiler - #else - #pragma unroll - #endif + //#pragma unroll for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m06500.cl b/OpenCL/m06500.cl index 5fa3977..44e4d7f 100644 --- a/OpenCL/m06500.cl +++ b/OpenCL/m06500.cl @@ -114,12 +114,7 @@ void sha512_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4], const ROUND_STEP (0); - #ifdef IS_AMD - // #pragma unroll - // breaks compiler - #else - #pragma unroll - #endif + //#pragma unroll for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m07100.cl b/OpenCL/m07100.cl index 15252e7..a9f0900 100644 --- a/OpenCL/m07100.cl +++ b/OpenCL/m07100.cl @@ -114,12 +114,7 @@ void sha512_transform (const u64 w[16], u64 dgst[8]) ROUND_STEP (0); - #ifdef IS_AMD - // #pragma unroll - // breaks compiler - #else - #pragma unroll - #endif + //#pragma unroll for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m08200.cl b/OpenCL/m08200.cl index da4405e..73e076d 100644 --- a/OpenCL/m08200.cl +++ b/OpenCL/m08200.cl @@ -321,12 +321,7 @@ void sha512_transform (const u64 w[16], u64 dgst[8]) ROUND512_STEP (0); - #ifdef IS_AMD - // #pragma unroll - // breaks compiler - #else - #pragma unroll - #endif + //#pragma unroll for (int i = 16; i < 80; i += 16) { ROUND512_EXPAND (); ROUND512_STEP (i); diff --git a/OpenCL/m10700.cl b/OpenCL/m10700.cl index 32ae0e3..c5ec11d 100644 --- a/OpenCL/m10700.cl +++ b/OpenCL/m10700.cl @@ -242,7 +242,7 @@ void sha384_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4], const ROUND384_STEP (0); - #pragma unroll + //#pragma unroll for (int i = 16; i < 80; i += 16) { ROUND384_EXPAND (); ROUND384_STEP (i); @@ -352,7 +352,7 @@ void sha512_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4], const ROUND512_STEP (0); - #pragma unroll + //#pragma unroll for (int i = 16; i < 80; i += 16) { ROUND512_EXPAND (); ROUND512_STEP (i); diff --git a/OpenCL/m12300.cl b/OpenCL/m12300.cl index 33b410a..5ab6a05 100644 --- a/OpenCL/m12300.cl +++ b/OpenCL/m12300.cl @@ -114,12 +114,7 @@ void sha512_transform (const u64 w[16], u64 dgst[8]) ROUND_STEP (0); - #ifdef IS_AMD - // #pragma unroll - // breaks compiler - #else - #pragma unroll - #endif + //#pragma unroll for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); -- 2.25.1