From c79bed3b7de4ac974a3ea5d3b498f4fe712f93b9 Mon Sep 17 00:00:00 2001 From: jsteube Date: Mon, 9 May 2016 21:32:12 +0200 Subject: [PATCH] Prepare for a more dynamic #pragma unroll use --- OpenCL/kernel_serpent256.c | 4 + OpenCL/m01450_a0.cl | 2 + OpenCL/m01450_a1.cl | 2 + OpenCL/m01450_a3.cl | 2 + OpenCL/m01460_a0.cl | 2 + OpenCL/m01460_a1.cl | 2 + OpenCL/m01460_a3.cl | 2 + OpenCL/m01500_a0.cl | 7 +- OpenCL/m01500_a1.cl | 7 +- OpenCL/m01500_a3.cl | 28 +++--- OpenCL/m01700_a0.cl | 2 + OpenCL/m01700_a1.cl | 2 + OpenCL/m01700_a3.cl | 2 + OpenCL/m01710_a0.cl | 2 + OpenCL/m01710_a1.cl | 2 + OpenCL/m01710_a3.cl | 2 + OpenCL/m01720_a0.cl | 2 + OpenCL/m01720_a1.cl | 2 + OpenCL/m01720_a3.cl | 2 + OpenCL/m01730_a0.cl | 2 + OpenCL/m01730_a1.cl | 2 + OpenCL/m01730_a3.cl | 2 + OpenCL/m01740_a0.cl | 2 + OpenCL/m01740_a1.cl | 2 + OpenCL/m01740_a3.cl | 2 + OpenCL/m01750_a0.cl | 4 +- OpenCL/m01750_a1.cl | 4 +- OpenCL/m01750_a3.cl | 4 +- OpenCL/m01760_a0.cl | 4 +- OpenCL/m01760_a1.cl | 4 +- OpenCL/m01760_a3.cl | 4 +- OpenCL/m01800.cl | 8 +- OpenCL/m03000_a0.cl | 8 +- OpenCL/m03000_a1.cl | 8 +- OpenCL/m03000_a3.cl | 20 ++--- OpenCL/m03100_a0.cl | 8 +- OpenCL/m03100_a1.cl | 8 +- OpenCL/m03100_a3.cl | 8 +- OpenCL/m03200.cl | 4 + OpenCL/m05200.cl | 2 + OpenCL/m05500_a0.cl | 8 +- OpenCL/m05500_a1.cl | 8 +- OpenCL/m05500_a3.cl | 8 +- OpenCL/m06100_a0.cl | 8 +- OpenCL/m06100_a1.cl | 8 +- OpenCL/m06100_a3.cl | 8 +- OpenCL/m06221.cl | 4 +- OpenCL/m06222.cl | 4 +- OpenCL/m06223.cl | 4 +- OpenCL/m06231.cl | 8 ++ OpenCL/m06232.cl | 8 +- OpenCL/m06233.cl | 8 +- OpenCL/m06400.cl | 2 + OpenCL/m06500.cl | 4 +- OpenCL/m06600.cl | 8 +- OpenCL/m06800.cl | 6 +- OpenCL/m07100.cl | 4 +- OpenCL/m07400.cl | 10 ++- OpenCL/m07500_a0.cl | 4 + OpenCL/m07500_a1.cl | 4 + OpenCL/m07500_a3.cl | 4 + OpenCL/m07800_a0.cl | 8 +- OpenCL/m07800_a1.cl | 8 +- OpenCL/m07800_a3.cl | 8 +- OpenCL/m07900.cl | 2 + OpenCL/m08000_a0.cl | 14 +++ OpenCL/m08000_a1.cl | 14 +++ OpenCL/m08000_a3.cl | 14 +++ OpenCL/m08200.cl | 6 +- OpenCL/m08500_a0.cl | 8 +- OpenCL/m08500_a1.cl | 8 +- OpenCL/m08500_a3.cl | 8 +- OpenCL/m08600_a0.cl | 8 +- OpenCL/m08600_a1.cl | 8 +- OpenCL/m08600_a3.cl | 8 +- OpenCL/m08700_a0.cl | 8 +- OpenCL/m08700_a1.cl | 8 +- OpenCL/m08700_a3.cl | 8 +- OpenCL/m08800.cl | 2 + OpenCL/m08900.cl | 10 +++ OpenCL/m09100.cl | 8 +- OpenCL/m09600.cl | 5 +- OpenCL/m09700_a0.cl | 4 + OpenCL/m09700_a1.cl | 4 + OpenCL/m09700_a3.cl | 4 + OpenCL/m09710_a0.cl | 4 + OpenCL/m09710_a1.cl | 4 + OpenCL/m09710_a3.cl | 4 + OpenCL/m09800_a0.cl | 4 + OpenCL/m09800_a1.cl | 4 + OpenCL/m09800_a3.cl | 4 + OpenCL/m09810_a0.cl | 4 + OpenCL/m09810_a1.cl | 4 + OpenCL/m09810_a3.cl | 4 + OpenCL/m10400_a0.cl | 8 +- OpenCL/m10400_a1.cl | 8 +- OpenCL/m10400_a3.cl | 8 +- OpenCL/m10410_a0.cl | 8 +- OpenCL/m10410_a1.cl | 8 +- OpenCL/m10410_a3.cl | 8 +- OpenCL/m10500.cl | 6 ++ OpenCL/m10700.cl | 10 ++- OpenCL/m10800_a0.cl | 2 + OpenCL/m10800_a1.cl | 2 + OpenCL/m10800_a3.cl | 2 + OpenCL/m10900.cl | 2 + OpenCL/m11300.cl | 2 + OpenCL/m11600.cl | 2 + OpenCL/m11700_a0.cl | 12 +++ OpenCL/m11700_a1.cl | 12 +++ OpenCL/m11700_a3.cl | 12 +++ OpenCL/m11800_a0.cl | 12 +++ OpenCL/m11800_a1.cl | 12 +++ OpenCL/m11800_a3.cl | 12 +++ OpenCL/m12200.cl | 5 +- OpenCL/m12300.cl | 4 +- OpenCL/m12400.cl | 2 + OpenCL/m12800.cl | 2 + OpenCL/m12900.cl | 2 + OpenCL/m13000.cl | 2 + OpenCL/m13100_a0.cl | 4 + OpenCL/m13100_a1.cl | 4 + OpenCL/m13100_a3.cl | 4 + OpenCL/m13200.cl | 26 +++--- OpenCL/m13400.cl | 2 + OpenCL/rp.c | 2 + OpenCL/types_ocl.c | 3 - include/kernel_vendor.h | 169 +++++++++++++++++++++++++++++++++++-- src/oclHashcat.c | 2 +- 129 files changed, 776 insertions(+), 140 deletions(-) diff --git a/OpenCL/kernel_serpent256.c b/OpenCL/kernel_serpent256.c index c8a1be8..5c326f5 100644 --- a/OpenCL/kernel_serpent256.c +++ b/OpenCL/kernel_serpent256.c @@ -403,13 +403,17 @@ void serpent256_set_key (u32 *ks, const u32 *ukey) { + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { ks[i] = ukey[i]; } + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 132; i++) { ks[i + 8] = rotl32 (ks[i + 7] ^ ks[i + 5] ^ ks[i + 3] ^ ks[i + 0] ^ 0x9e3779b9 ^ i, 11); diff --git a/OpenCL/m01450_a0.cl b/OpenCL/m01450_a0.cl index 6f45381..7e12d30 100644 --- a/OpenCL/m01450_a0.cl +++ b/OpenCL/m01450_a0.cl @@ -112,7 +112,9 @@ void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 64; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m01450_a1.cl b/OpenCL/m01450_a1.cl index 297810c..ab683cf 100644 --- a/OpenCL/m01450_a1.cl +++ b/OpenCL/m01450_a1.cl @@ -110,7 +110,9 @@ void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 64; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m01450_a3.cl b/OpenCL/m01450_a3.cl index f802ecd..6756571 100644 --- a/OpenCL/m01450_a3.cl +++ b/OpenCL/m01450_a3.cl @@ -110,7 +110,9 @@ void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 64; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m01460_a0.cl b/OpenCL/m01460_a0.cl index ef35cec..3824c2c 100644 --- a/OpenCL/m01460_a0.cl +++ b/OpenCL/m01460_a0.cl @@ -112,7 +112,9 @@ void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 64; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m01460_a1.cl b/OpenCL/m01460_a1.cl index f618ee9..6081ef6 100644 --- a/OpenCL/m01460_a1.cl +++ b/OpenCL/m01460_a1.cl @@ -110,7 +110,9 @@ void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 64; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m01460_a3.cl b/OpenCL/m01460_a3.cl index d623d6a..64c0df5 100644 --- a/OpenCL/m01460_a3.cl +++ b/OpenCL/m01460_a3.cl @@ -110,7 +110,9 @@ void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 64; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m01500_a0.cl b/OpenCL/m01500_a0.cl index 6bf6f34..0007970 100644 --- a/OpenCL/m01500_a0.cl +++ b/OpenCL/m01500_a0.cl @@ -371,7 +371,9 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 c = c & 0x0fffffff; - #pragma unroll 16 + #ifdef _unroll + #pragma unroll + #endif for (u32 i = 0; i < 16; i++) { if ((i < 2) || (i == 8) || (i == 15)) @@ -430,6 +432,9 @@ void _des_crypt_encrypt (u32x iv[2], u32 mask, u32x Kc[16], u32x Kd[16], __local for (u32 i = 0; i < 25; i++) { + #ifdef _unroll + #pragma unroll + #endif for (u32 j = 0; j < 16; j += 2) { u32x t; diff --git a/OpenCL/m01500_a1.cl b/OpenCL/m01500_a1.cl index 4d7a547..135cf96 100644 --- a/OpenCL/m01500_a1.cl +++ b/OpenCL/m01500_a1.cl @@ -368,7 +368,9 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 c = c & 0x0fffffff; - #pragma unroll 16 + #ifdef _unroll + #pragma unroll + #endif for (u32 i = 0; i < 16; i++) { if ((i < 2) || (i == 8) || (i == 15)) @@ -427,6 +429,9 @@ void _des_crypt_encrypt (u32x iv[2], u32 mask, u32x Kc[16], u32x Kd[16], __local for (u32 i = 0; i < 25; i++) { + #ifdef _unroll + #pragma unroll + #endif for (u32 j = 0; j < 16; j += 2) { u32x t; diff --git a/OpenCL/m01500_a3.cl b/OpenCL/m01500_a3.cl index 6b29a9a..2585829 100644 --- a/OpenCL/m01500_a3.cl +++ b/OpenCL/m01500_a3.cl @@ -1569,17 +1569,9 @@ void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32 K02, cons for (u32 ii = 0; ii < 25; ii++) { - #ifdef IS_NV - #if CUDA_ARCH >= 500 - #else + #ifdef _unroll #pragma unroll #endif - #endif - - #ifdef IS_AMD - #pragma unroll - #endif - for (u32 i = 0; i < 2; i++) { if (i) KEYSET10 else KEYSET00 @@ -1705,17 +1697,9 @@ void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32 K02, cons for (u32 ii = 0; ii < 25; ii++) { - #ifdef IS_NV - #if CUDA_ARCH >= 500 - #else - #pragma unroll - #endif - #endif - - #ifdef IS_AMD + #ifdef _unroll #pragma unroll #endif - for (u32 i = 0; i < 2; i++) { if (i) KEYSET10 else KEYSET00 @@ -2222,7 +2206,9 @@ void m01500m (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global co u32 tmpResult = 0; + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 32; i++) { const u32 b0 = -((search[0] >> i) & 1); @@ -2249,7 +2235,9 @@ void m01500m (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global co u32 out0[32]; u32 out1[32]; + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 32; i++) { out0[i] = out[ 0 + 31 - i]; @@ -2259,7 +2247,9 @@ void m01500m (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global co transpose32c (out0); transpose32c (out1); + #ifdef _unroll #pragma unroll + #endif for (int slice = 0; slice < 32; slice++) { const u32 r0 = out0[31 - slice]; @@ -2676,7 +2666,9 @@ __kernel void m01500_tm (__global u32 *mod, __global bs_word_t *words_buf_r) const u32 w0s = (w0 << 1) & 0xfefefefe; + #ifdef _unroll #pragma unroll + #endif for (int i = 0, j = 0; i < 32; i += 8, j += 7) { atomic_or (&words_buf_r[block].b[j + 0], (((w0s >> (i + 7)) & 1) << slice)); diff --git a/OpenCL/m01700_a0.cl b/OpenCL/m01700_a0.cl index 47993c2..5324cb0 100644 --- a/OpenCL/m01700_a0.cl +++ b/OpenCL/m01700_a0.cl @@ -116,7 +116,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m01700_a1.cl b/OpenCL/m01700_a1.cl index 742a78a..57adc4f 100644 --- a/OpenCL/m01700_a1.cl +++ b/OpenCL/m01700_a1.cl @@ -114,7 +114,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m01700_a3.cl b/OpenCL/m01700_a3.cl index fb31e08..c7df44e 100644 --- a/OpenCL/m01700_a3.cl +++ b/OpenCL/m01700_a3.cl @@ -114,7 +114,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m01710_a0.cl b/OpenCL/m01710_a0.cl index 7d1b1f8..e2172f0 100644 --- a/OpenCL/m01710_a0.cl +++ b/OpenCL/m01710_a0.cl @@ -116,7 +116,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m01710_a1.cl b/OpenCL/m01710_a1.cl index 900f684..794c00f 100644 --- a/OpenCL/m01710_a1.cl +++ b/OpenCL/m01710_a1.cl @@ -114,7 +114,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m01710_a3.cl b/OpenCL/m01710_a3.cl index 4bde04c..794b9b3 100644 --- a/OpenCL/m01710_a3.cl +++ b/OpenCL/m01710_a3.cl @@ -114,7 +114,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m01720_a0.cl b/OpenCL/m01720_a0.cl index 223b6c7..52eb80c 100644 --- a/OpenCL/m01720_a0.cl +++ b/OpenCL/m01720_a0.cl @@ -116,7 +116,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m01720_a1.cl b/OpenCL/m01720_a1.cl index 93a8b9e..0ed7984 100644 --- a/OpenCL/m01720_a1.cl +++ b/OpenCL/m01720_a1.cl @@ -114,7 +114,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m01720_a3.cl b/OpenCL/m01720_a3.cl index 802e977..12c1552 100644 --- a/OpenCL/m01720_a3.cl +++ b/OpenCL/m01720_a3.cl @@ -114,7 +114,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m01730_a0.cl b/OpenCL/m01730_a0.cl index a9a25cf..2dd7473 100644 --- a/OpenCL/m01730_a0.cl +++ b/OpenCL/m01730_a0.cl @@ -116,7 +116,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m01730_a1.cl b/OpenCL/m01730_a1.cl index 68d3a82..e2a0557 100644 --- a/OpenCL/m01730_a1.cl +++ b/OpenCL/m01730_a1.cl @@ -114,7 +114,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m01730_a3.cl b/OpenCL/m01730_a3.cl index 795d6d6..517822b 100644 --- a/OpenCL/m01730_a3.cl +++ b/OpenCL/m01730_a3.cl @@ -114,7 +114,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m01740_a0.cl b/OpenCL/m01740_a0.cl index 189996c..d6574ce 100644 --- a/OpenCL/m01740_a0.cl +++ b/OpenCL/m01740_a0.cl @@ -116,7 +116,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m01740_a1.cl b/OpenCL/m01740_a1.cl index 7677fe6..c859c41 100644 --- a/OpenCL/m01740_a1.cl +++ b/OpenCL/m01740_a1.cl @@ -114,7 +114,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m01740_a3.cl b/OpenCL/m01740_a3.cl index b14fe1e..97a98d3 100644 --- a/OpenCL/m01740_a3.cl +++ b/OpenCL/m01740_a3.cl @@ -114,7 +114,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m01750_a0.cl b/OpenCL/m01750_a0.cl index 0ad4790..4ba5ea2 100644 --- a/OpenCL/m01750_a0.cl +++ b/OpenCL/m01750_a0.cl @@ -116,7 +116,9 @@ void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], con ROUND_STEP (0); - //#pragma unroll + #ifdef _unroll + #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m01750_a1.cl b/OpenCL/m01750_a1.cl index f2697fc..65112a3 100644 --- a/OpenCL/m01750_a1.cl +++ b/OpenCL/m01750_a1.cl @@ -114,7 +114,9 @@ void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], con ROUND_STEP (0); - //#pragma unroll + #ifdef _unroll + #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m01750_a3.cl b/OpenCL/m01750_a3.cl index e213850..aff816f 100644 --- a/OpenCL/m01750_a3.cl +++ b/OpenCL/m01750_a3.cl @@ -114,7 +114,9 @@ void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], con ROUND_STEP (0); - //#pragma unroll + #ifdef _unroll + #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m01760_a0.cl b/OpenCL/m01760_a0.cl index f487180..8883d47 100644 --- a/OpenCL/m01760_a0.cl +++ b/OpenCL/m01760_a0.cl @@ -116,7 +116,9 @@ void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], con ROUND_STEP (0); - //#pragma unroll + #ifdef _unroll + #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m01760_a1.cl b/OpenCL/m01760_a1.cl index d329446..1d7895f 100644 --- a/OpenCL/m01760_a1.cl +++ b/OpenCL/m01760_a1.cl @@ -114,7 +114,9 @@ void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], con ROUND_STEP (0); - //#pragma unroll + #ifdef _unroll + #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m01760_a3.cl b/OpenCL/m01760_a3.cl index b45cfab..61bb6d7 100644 --- a/OpenCL/m01760_a3.cl +++ b/OpenCL/m01760_a3.cl @@ -114,7 +114,9 @@ void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], con ROUND_STEP (0); - //#pragma unroll + #ifdef _unroll + #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m01800.cl b/OpenCL/m01800.cl index a557be9..21b1685 100644 --- a/OpenCL/m01800.cl +++ b/OpenCL/m01800.cl @@ -134,7 +134,9 @@ void sha512_transform (const u64 w[16], u64 digest[8]) ROUND_STEP (0); - //#pragma unroll + #ifdef _unroll + #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); @@ -500,7 +502,9 @@ __kernel void m01800_loop (__global pw_t *pws, __global kernel_rule_t *rules_buf { const u32 block_len = wpc_len[pc]; - #pragma unroll 64 + #ifdef _unroll + #pragma unroll + #endif for (u32 k = 0, p = block_len - 64; k < 64; k++, p++) { PUTCHAR64_BE (block, p, GETCHAR64_BE (l_alt_result, k)); diff --git a/OpenCL/m03000_a0.cl b/OpenCL/m03000_a0.cl index d2aa49d..dc3653d 100644 --- a/OpenCL/m03000_a0.cl +++ b/OpenCL/m03000_a0.cl @@ -361,7 +361,9 @@ void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __l u32x r = data[0]; u32x l = data[1]; - #pragma unroll 16 + #ifdef _unroll + #pragma unroll + #endif for (u32 i = 0; i < 16; i += 2) { u32x u; @@ -414,7 +416,9 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 c = c & 0x0fffffff; - #pragma unroll 16 + #ifdef _unroll + #pragma unroll + #endif for (u32 i = 0; i < 16; i++) { if ((i < 2) || (i == 8) || (i == 15)) diff --git a/OpenCL/m03000_a1.cl b/OpenCL/m03000_a1.cl index 7079a7a..22b6de6 100644 --- a/OpenCL/m03000_a1.cl +++ b/OpenCL/m03000_a1.cl @@ -358,7 +358,9 @@ void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __l u32x r = data[0]; u32x l = data[1]; - #pragma unroll 16 + #ifdef _unroll + #pragma unroll + #endif for (u32 i = 0; i < 16; i += 2) { u32x u; @@ -411,7 +413,9 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 c = c & 0x0fffffff; - #pragma unroll 16 + #ifdef _unroll + #pragma unroll + #endif for (u32 i = 0; i < 16; i++) { if ((i < 2) || (i == 8) || (i == 15)) diff --git a/OpenCL/m03000_a3.cl b/OpenCL/m03000_a3.cl index 08a7af1..f5d3380 100644 --- a/OpenCL/m03000_a3.cl +++ b/OpenCL/m03000_a3.cl @@ -1546,21 +1546,9 @@ void DES (const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 KXX_DECL u32 k36, k37, k38, k39, k40, k41; KXX_DECL u32 k42, k43, k44, k45, k46, k47; - #ifdef IS_NV - #if CUDA_ARCH >= 500 - #else + #ifdef _unroll #pragma unroll #endif - #endif - - #ifdef IS_AMD - #pragma unroll - #endif - - #ifdef IS_GENERIC - #pragma unroll 1 - #endif - for (u32 i = 0; i < 2; i++) { if (i) KEYSET10 else KEYSET00 @@ -2060,7 +2048,9 @@ void m03000m (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global co u32 tmpResult = 0; + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 32; i++) { const u32 b0 = -((search[0] >> i) & 1); @@ -2087,7 +2077,9 @@ void m03000m (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global co u32 out0[32]; u32 out1[32]; + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 32; i++) { out0[i] = out[ 0 + 31 - i]; @@ -2097,7 +2089,9 @@ void m03000m (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global co transpose32c (out0); transpose32c (out1); + #ifdef _unroll #pragma unroll + #endif for (int slice = 0; slice < 32; slice++) { const u32 r0 = out0[31 - slice]; diff --git a/OpenCL/m03100_a0.cl b/OpenCL/m03100_a0.cl index 9220eb7..5f98fda 100644 --- a/OpenCL/m03100_a0.cl +++ b/OpenCL/m03100_a0.cl @@ -384,7 +384,9 @@ void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __l r = rotl32 (r, 3u); l = rotl32 (l, 3u); - #pragma unroll 16 + #ifdef _unroll + #pragma unroll + #endif for (u32 i = 0; i < 16; i += 2) { u32x u; @@ -442,7 +444,9 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 c = c & 0x0fffffff; - #pragma unroll 16 + #ifdef _unroll + #pragma unroll + #endif for (u32 i = 0; i < 16; i++) { if ((i < 2) || (i == 8) || (i == 15)) diff --git a/OpenCL/m03100_a1.cl b/OpenCL/m03100_a1.cl index 5f3207a..b16db6c 100644 --- a/OpenCL/m03100_a1.cl +++ b/OpenCL/m03100_a1.cl @@ -381,7 +381,9 @@ void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __l r = rotl32 (r, 3u); l = rotl32 (l, 3u); - #pragma unroll 16 + #ifdef _unroll + #pragma unroll + #endif for (u32 i = 0; i < 16; i += 2) { u32x u; @@ -439,7 +441,9 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 c = c & 0x0fffffff; - #pragma unroll 16 + #ifdef _unroll + #pragma unroll + #endif for (u32 i = 0; i < 16; i++) { if ((i < 2) || (i == 8) || (i == 15)) diff --git a/OpenCL/m03100_a3.cl b/OpenCL/m03100_a3.cl index 63a7eea..e1729e3 100644 --- a/OpenCL/m03100_a3.cl +++ b/OpenCL/m03100_a3.cl @@ -381,7 +381,9 @@ void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __l r = rotl32 (r, 3u); l = rotl32 (l, 3u); - #pragma unroll 16 + #ifdef _unroll + #pragma unroll + #endif for (u32 i = 0; i < 16; i += 2) { u32x u; @@ -439,7 +441,9 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 c = c & 0x0fffffff; - #pragma unroll 16 + #ifdef _unroll + #pragma unroll + #endif for (u32 i = 0; i < 16; i++) { if ((i < 2) || (i == 8) || (i == 15)) diff --git a/OpenCL/m03200.cl b/OpenCL/m03200.cl index 9e5c798..a3789cd 100644 --- a/OpenCL/m03200.cl +++ b/OpenCL/m03200.cl @@ -679,7 +679,9 @@ __kernel void __attribute__((reqd_work_group_size (8, 1, 1))) m03200_loop (__glo L0 = 0; R0 = 0; + #ifdef _unroll #pragma unroll + #endif for (u32 i = 0; i < 9; i++) { BF_ENCRYPT (L0, R0); @@ -742,7 +744,9 @@ __kernel void __attribute__((reqd_work_group_size (8, 1, 1))) m03200_loop (__glo L0 = 0; R0 = 0; + #ifdef _unroll #pragma unroll + #endif for (u32 i = 0; i < 9; i++) { BF_ENCRYPT (L0, R0); diff --git a/OpenCL/m05200.cl b/OpenCL/m05200.cl index 9e51ecb..abd2912 100644 --- a/OpenCL/m05200.cl +++ b/OpenCL/m05200.cl @@ -110,7 +110,9 @@ void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 64; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m05500_a0.cl b/OpenCL/m05500_a0.cl index b7769f3..82ad293 100644 --- a/OpenCL/m05500_a0.cl +++ b/OpenCL/m05500_a0.cl @@ -358,7 +358,9 @@ void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __l u32x r = data[0]; u32x l = data[1]; - #pragma unroll 16 + #ifdef _unroll + #pragma unroll + #endif for (u32 i = 0; i < 16; i += 2) { u32x u; @@ -411,7 +413,9 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 c = c & 0x0fffffff; - #pragma unroll 16 + #ifdef _unroll + #pragma unroll + #endif for (u32 i = 0; i < 16; i++) { if ((i < 2) || (i == 8) || (i == 15)) diff --git a/OpenCL/m05500_a1.cl b/OpenCL/m05500_a1.cl index e453129..be2994c 100644 --- a/OpenCL/m05500_a1.cl +++ b/OpenCL/m05500_a1.cl @@ -355,7 +355,9 @@ void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __l u32x r = data[0]; u32x l = data[1]; - #pragma unroll 16 + #ifdef _unroll + #pragma unroll + #endif for (u32 i = 0; i < 16; i += 2) { u32x u; @@ -408,7 +410,9 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 c = c & 0x0fffffff; - #pragma unroll 16 + #ifdef _unroll + #pragma unroll + #endif for (u32 i = 0; i < 16; i++) { if ((i < 2) || (i == 8) || (i == 15)) diff --git a/OpenCL/m05500_a3.cl b/OpenCL/m05500_a3.cl index 733dbe2..a9fc359 100644 --- a/OpenCL/m05500_a3.cl +++ b/OpenCL/m05500_a3.cl @@ -356,7 +356,9 @@ void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __l u32x r = data[0]; u32x l = data[1]; - #pragma unroll 16 + #ifdef _unroll + #pragma unroll + #endif for (u32 i = 0; i < 16; i += 2) { u32x u; @@ -409,7 +411,9 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 c = c & 0x0fffffff; - #pragma unroll 16 + #ifdef _unroll + #pragma unroll + #endif for (u32 i = 0; i < 16; i++) { if ((i < 2) || (i == 8) || (i == 15)) diff --git a/OpenCL/m06100_a0.cl b/OpenCL/m06100_a0.cl index 1e173ca..3b0d23c 100644 --- a/OpenCL/m06100_a0.cl +++ b/OpenCL/m06100_a0.cl @@ -1180,7 +1180,9 @@ void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 (*s_Ch)[2 u32x Lh[8]; u32x Ll[8]; + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { const u32x Lp0 = stateh[(i + 8) & 7] >> 24; @@ -1233,7 +1235,9 @@ void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 (*s_Ch)[2 u32x Lh[8]; u32x Ll[8]; + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { const u32x Lp0 = Kh[(i + 8) & 7] >> 24; @@ -1281,7 +1285,9 @@ void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 (*s_Ch)[2 Kh[7] = Lh[7]; Kl[7] = Ll[7]; - #pragma unroll 8 + #ifdef _unroll + #pragma unroll + #endif for (int i = 0; i < 8; i++) { const u32x Lp0 = stateh[(i + 8) & 7] >> 24; diff --git a/OpenCL/m06100_a1.cl b/OpenCL/m06100_a1.cl index 2ce7c90..35005ae 100644 --- a/OpenCL/m06100_a1.cl +++ b/OpenCL/m06100_a1.cl @@ -1177,7 +1177,9 @@ void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 (*s_Ch)[2 u32x Lh[8]; u32x Ll[8]; + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { const u32x Lp0 = stateh[(i + 8) & 7] >> 24; @@ -1230,7 +1232,9 @@ void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 (*s_Ch)[2 u32x Lh[8]; u32x Ll[8]; + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { const u32x Lp0 = Kh[(i + 8) & 7] >> 24; @@ -1278,7 +1282,9 @@ void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 (*s_Ch)[2 Kh[7] = Lh[7]; Kl[7] = Ll[7]; - #pragma unroll 8 + #ifdef _unroll + #pragma unroll + #endif for (int i = 0; i < 8; i++) { const u32x Lp0 = stateh[(i + 8) & 7] >> 24; diff --git a/OpenCL/m06100_a3.cl b/OpenCL/m06100_a3.cl index b8c30fc..d613d2e 100644 --- a/OpenCL/m06100_a3.cl +++ b/OpenCL/m06100_a3.cl @@ -1178,7 +1178,9 @@ void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 (*s_Ch)[2 u32x Lh[8]; u32x Ll[8]; + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { const u32x Lp0 = stateh[(i + 8) & 7] >> 24; @@ -1231,7 +1233,9 @@ void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 (*s_Ch)[2 u32x Lh[8]; u32x Ll[8]; + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { const u32x Lp0 = Kh[(i + 8) & 7] >> 24; @@ -1279,7 +1283,9 @@ void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 (*s_Ch)[2 Kh[7] = Lh[7]; Kl[7] = Ll[7]; - #pragma unroll 8 + #ifdef _unroll + #pragma unroll + #endif for (int i = 0; i < 8; i++) { const u32x Lp0 = stateh[(i + 8) & 7] >> 24; diff --git a/OpenCL/m06221.cl b/OpenCL/m06221.cl index 2949080..78a82a6 100644 --- a/OpenCL/m06221.cl +++ b/OpenCL/m06221.cl @@ -115,7 +115,9 @@ void sha512_transform (const u64 w[16], u64 dgst[8]) ROUND_STEP (0); - //#pragma unroll + #ifdef _unroll + #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m06222.cl b/OpenCL/m06222.cl index 2620c72..40da089 100644 --- a/OpenCL/m06222.cl +++ b/OpenCL/m06222.cl @@ -115,7 +115,9 @@ void sha512_transform (const u64 w[16], u64 dgst[8]) ROUND_STEP (0); - //#pragma unroll + #ifdef _unroll + #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m06223.cl b/OpenCL/m06223.cl index a0eb7c2..df6c629 100644 --- a/OpenCL/m06223.cl +++ b/OpenCL/m06223.cl @@ -115,7 +115,9 @@ void sha512_transform (const u64 w[16], u64 dgst[8]) ROUND_STEP (0); - //#pragma unroll + #ifdef _unroll + #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m06231.cl b/OpenCL/m06231.cl index e64748f..b134603 100644 --- a/OpenCL/m06231.cl +++ b/OpenCL/m06231.cl @@ -1175,7 +1175,9 @@ void whirlpool_transform_last (u32 dgst[16], __local u32 (*s_Ch)[256], __local u u32 i; + #ifdef _unroll #pragma unroll + #endif for (i = 0; i < 8; i++) { const u32 Lp0 = Kh[(i + 8) & 7] >> 24; @@ -1223,7 +1225,9 @@ void whirlpool_transform_last (u32 dgst[16], __local u32 (*s_Ch)[256], __local u Kh[7] = Lh[7]; Kl[7] = Ll[7]; + #ifdef _unroll #pragma unroll + #endif for (i = 0; i < 8; i++) { const u32 Lp0 = stateh[(i + 8) & 7] >> 24; @@ -1371,7 +1375,9 @@ void whirlpool_transform (const u32 w[16], u32 dgst[16], __local u32 (*s_Ch)[256 u32 i; + #ifdef _unroll #pragma unroll + #endif for (i = 0; i < 8; i++) { const u32 Lp0 = Kh[(i + 8) & 7] >> 24; @@ -1419,7 +1425,9 @@ void whirlpool_transform (const u32 w[16], u32 dgst[16], __local u32 (*s_Ch)[256 Kh[7] = Lh[7]; Kl[7] = Ll[7]; + #ifdef _unroll #pragma unroll + #endif for (i = 0; i < 8; i++) { const u32 Lp0 = stateh[(i + 8) & 7] >> 24; diff --git a/OpenCL/m06232.cl b/OpenCL/m06232.cl index c7b79d4..3563bd0 100644 --- a/OpenCL/m06232.cl +++ b/OpenCL/m06232.cl @@ -1172,7 +1172,9 @@ void whirlpool_transform (const u32 w[16], u32 dgst[16], __local u32 (*s_Ch)[256 u32 i; - #pragma unroll 8 + #ifdef _unroll + #pragma unroll + #endif for (i = 0; i < 8; i++) { const u8 Lp0 = Kh[(i + 8) & 7] >> 24; @@ -1220,7 +1222,9 @@ void whirlpool_transform (const u32 w[16], u32 dgst[16], __local u32 (*s_Ch)[256 Kh[7] = Lh[7]; Kl[7] = Ll[7]; - #pragma unroll 8 + #ifdef _unroll + #pragma unroll + #endif for (i = 0; i < 8; i++) { const u8 Lp0 = stateh[(i + 8) & 7] >> 24; diff --git a/OpenCL/m06233.cl b/OpenCL/m06233.cl index 1fa1e1c..1e11789 100644 --- a/OpenCL/m06233.cl +++ b/OpenCL/m06233.cl @@ -1172,7 +1172,9 @@ void whirlpool_transform (const u32 w[16], u32 dgst[16], __local u32 (*s_Ch)[256 u32 i; - #pragma unroll 8 + #ifdef _unroll + #pragma unroll + #endif for (i = 0; i < 8; i++) { const u8 Lp0 = Kh[(i + 8) & 7] >> 24; @@ -1220,7 +1222,9 @@ void whirlpool_transform (const u32 w[16], u32 dgst[16], __local u32 (*s_Ch)[256 Kh[7] = Lh[7]; Kl[7] = Ll[7]; - #pragma unroll 8 + #ifdef _unroll + #pragma unroll + #endif for (i = 0; i < 8; i++) { const u8 Lp0 = stateh[(i + 8) & 7] >> 24; diff --git a/OpenCL/m06400.cl b/OpenCL/m06400.cl index db12885..99e0dd8 100644 --- a/OpenCL/m06400.cl +++ b/OpenCL/m06400.cl @@ -110,7 +110,9 @@ void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 64; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m06500.cl b/OpenCL/m06500.cl index 44e4d7f..b5d77e1 100644 --- a/OpenCL/m06500.cl +++ b/OpenCL/m06500.cl @@ -114,7 +114,9 @@ void sha512_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4], const ROUND_STEP (0); - //#pragma unroll + #ifdef _unroll + #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m06600.cl b/OpenCL/m06600.cl index 45b9e8b..6ee0523 100644 --- a/OpenCL/m06600.cl +++ b/OpenCL/m06600.cl @@ -716,7 +716,9 @@ void AES128_ExpandKey (u32 *userkey, u32 *rek, __local u32 *s_te0, __local u32 * rek[2] = userkey[2]; rek[3] = userkey[3]; - #pragma unroll 10 + #ifdef _unroll + #pragma unroll + #endif for (u32 i = 0, j = 0; i < 10; i += 1, j += 4) { u32 temp = rek[j + 3]; @@ -1391,7 +1393,9 @@ __kernel void m06600_comp (__global pw_t *pws, __global kernel_rule_t *rules_buf AES128_ExpandKey (ukey, rek, s_te0, s_te1, s_te2, s_te3, s_te4); - #pragma unroll KEYLEN + #ifdef _unroll + #pragma unroll + #endif for (u32 i = 0; i < KEYLEN; i++) rdk[i] = rek[i]; AES128_InvertKey (rdk, s_td0, s_td1, s_td2, s_td3, s_td4, s_te0, s_te1, s_te2, s_te3, s_te4); diff --git a/OpenCL/m06800.cl b/OpenCL/m06800.cl index 91fecec..32d9a30 100644 --- a/OpenCL/m06800.cl +++ b/OpenCL/m06800.cl @@ -1075,7 +1075,9 @@ void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 64; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); @@ -1541,7 +1543,9 @@ __kernel void m06800_comp (__global pw_t *pws, __global kernel_rule_t *rules_buf u32 rdk[KEYLEN]; - #pragma unroll 60 + #ifdef _unroll + #pragma unroll + #endif for (u32 i = 0; i < KEYLEN; i++) rdk[i] = rek[i]; AES256_InvertKey (rdk, s_td0, s_td1, s_td2, s_td3, s_td4, s_te0, s_te1, s_te2, s_te3, s_te4); diff --git a/OpenCL/m07100.cl b/OpenCL/m07100.cl index a9f0900..1464b0e 100644 --- a/OpenCL/m07100.cl +++ b/OpenCL/m07100.cl @@ -114,7 +114,9 @@ void sha512_transform (const u64 w[16], u64 dgst[8]) ROUND_STEP (0); - //#pragma unroll + #ifdef _unroll + #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m07400.cl b/OpenCL/m07400.cl index 00eb8d2..8ee2fe1 100644 --- a/OpenCL/m07400.cl +++ b/OpenCL/m07400.cl @@ -112,7 +112,9 @@ void sha256_transform (const u32 w[16], u32 digest[8]) ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 64; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); @@ -1303,7 +1305,9 @@ void sha256_transform (const u32 w[16], u32 digest[8]) ROUND_STEP (0); - //#pragma unroll + #ifdef _unroll + #pragma unroll + #endif for (int i = 16; i < 64; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); @@ -1686,7 +1690,9 @@ __kernel void m07400_loop (__global pw_t *pws, __global kernel_rule_t *rules_buf if (j1) { - #pragma unroll 32 + #ifdef _unroll + #pragma unroll + #endif for (u32 k = 0, p = block_len - 32; k < 32; k++, p++) { PUTCHAR32_BE (block, p, GETCHAR32_BE (alt_result, k)); diff --git a/OpenCL/m07500_a0.cl b/OpenCL/m07500_a0.cl index be23099..b337129 100644 --- a/OpenCL/m07500_a0.cl +++ b/OpenCL/m07500_a0.cl @@ -47,7 +47,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) __local u32 *ptr = (__local u32 *) rc4_key->S; + #ifdef _unroll #pragma unroll + #endif for (u32 i = 0; i < 64; i++) { *ptr++ = v; v += a; @@ -93,7 +95,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4]) { + #ifdef _unroll #pragma unroll + #endif for (u32 k = 0; k < 4; k++) { u32 xor4 = 0; diff --git a/OpenCL/m07500_a1.cl b/OpenCL/m07500_a1.cl index 096888a..c99bb8d 100644 --- a/OpenCL/m07500_a1.cl +++ b/OpenCL/m07500_a1.cl @@ -45,7 +45,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) __local u32 *ptr = (__local u32 *) rc4_key->S; + #ifdef _unroll #pragma unroll + #endif for (u32 i = 0; i < 64; i++) { *ptr++ = v; v += a; @@ -91,7 +93,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4]) { + #ifdef _unroll #pragma unroll + #endif for (u32 k = 0; k < 4; k++) { u32 xor4 = 0; diff --git a/OpenCL/m07500_a3.cl b/OpenCL/m07500_a3.cl index ae1a92d..279ad1b 100644 --- a/OpenCL/m07500_a3.cl +++ b/OpenCL/m07500_a3.cl @@ -47,7 +47,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) __local u32 *ptr = (__local u32 *) rc4_key->S; + #ifdef _unroll #pragma unroll + #endif for (u32 i = 0; i < 64; i++) { *ptr++ = v; v += a; @@ -93,7 +95,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4]) { + #ifdef _unroll #pragma unroll + #endif for (u32 k = 0; k < 4; k++) { u32 xor4 = 0; diff --git a/OpenCL/m07800_a0.cl b/OpenCL/m07800_a0.cl index 3ca3be1..b4b98db 100644 --- a/OpenCL/m07800_a0.cl +++ b/OpenCL/m07800_a0.cl @@ -350,7 +350,9 @@ __kernel void m07800_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, digest[3] = SHA1M_D; digest[4] = SHA1M_E; - #pragma unroll 32 + #ifdef _unroll + #pragma unroll + #endif for (int i = 0; i < 32; i++) final[i] = 0; final[0] = w0[0]; @@ -594,7 +596,9 @@ __kernel void m07800_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, digest[3] = SHA1M_D; digest[4] = SHA1M_E; - #pragma unroll 32 + #ifdef _unroll + #pragma unroll + #endif for (int i = 0; i < 32; i++) final[i] = 0; final[0] = w0[0]; diff --git a/OpenCL/m07800_a1.cl b/OpenCL/m07800_a1.cl index a58c8d9..feb36f1 100644 --- a/OpenCL/m07800_a1.cl +++ b/OpenCL/m07800_a1.cl @@ -408,7 +408,9 @@ __kernel void m07800_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, digest[3] = SHA1M_D; digest[4] = SHA1M_E; - #pragma unroll 32 + #ifdef _unroll + #pragma unroll + #endif for (int i = 0; i < 32; i++) final[i] = 0; final[0] = w0[0]; @@ -712,7 +714,9 @@ __kernel void m07800_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, digest[3] = SHA1M_D; digest[4] = SHA1M_E; - #pragma unroll 32 + #ifdef _unroll + #pragma unroll + #endif for (int i = 0; i < 32; i++) final[i] = 0; final[0] = w0[0]; diff --git a/OpenCL/m07800_a3.cl b/OpenCL/m07800_a3.cl index 02d7719..3b75ace 100644 --- a/OpenCL/m07800_a3.cl +++ b/OpenCL/m07800_a3.cl @@ -324,7 +324,9 @@ void m07800m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __gl digest[3] = SHA1M_D; digest[4] = SHA1M_E; - #pragma unroll 32 + #ifdef _unroll + #pragma unroll + #endif for (int i = 0; i < 32; i++) final[i] = 0; final[0] = swap32 (w0[0]); @@ -536,7 +538,9 @@ void m07800s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __gl digest[3] = SHA1M_D; digest[4] = SHA1M_E; - #pragma unroll 32 + #ifdef _unroll + #pragma unroll + #endif for (int i = 0; i < 32; i++) final[i] = 0; final[0] = swap32 (w0[0]); diff --git a/OpenCL/m07900.cl b/OpenCL/m07900.cl index 40b13a7..7226bec 100644 --- a/OpenCL/m07900.cl +++ b/OpenCL/m07900.cl @@ -114,7 +114,9 @@ void sha512_transform (const u64 w[16], u64 dgst[8]) ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m08000_a0.cl b/OpenCL/m08000_a0.cl index d4f62b5..33266e0 100644 --- a/OpenCL/m08000_a0.cl +++ b/OpenCL/m08000_a0.cl @@ -119,7 +119,9 @@ void sha256_transform (u32x digest[8], const u32x w[16]) ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 64; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); @@ -168,7 +170,9 @@ void sha256_transform_z (u32x digest[8]) ROUND_STEP_Z (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 64; i += 16) { ROUND_STEP_Z (i); @@ -217,7 +221,9 @@ void sha256_transform_s (u32x digest[8], __local u32 *w) ROUND_STEP_S (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 64; i += 16) { ROUND_STEP_S (i); @@ -270,7 +276,9 @@ __kernel void m08000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, { w_s1[15] = 0 | salt_buf0 >> 16; + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 64; i++) { w_s1[i] = SHA256_EXPAND_S (w_s1[i - 2], w_s1[i - 7], w_s1[i - 15], w_s1[i - 16]); @@ -281,7 +289,9 @@ __kernel void m08000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, w_s2[ 2] = salt_buf2 << 16 | 0; w_s2[15] = (510 + 8) * 8; + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 64; i++) { w_s2[i] = SHA256_EXPAND_S (w_s2[i - 2], w_s2[i - 7], w_s2[i - 15], w_s2[i - 16]); @@ -437,7 +447,9 @@ __kernel void m08000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, { w_s1[15] = 0 | salt_buf0 >> 16; + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 64; i++) { w_s1[i] = SHA256_EXPAND_S (w_s1[i - 2], w_s1[i - 7], w_s1[i - 15], w_s1[i - 16]); @@ -448,7 +460,9 @@ __kernel void m08000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, w_s2[ 2] = salt_buf2 << 16 | 0; w_s2[15] = (510 + 8) * 8; + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 64; i++) { w_s2[i] = SHA256_EXPAND_S (w_s2[i - 2], w_s2[i - 7], w_s2[i - 15], w_s2[i - 16]); diff --git a/OpenCL/m08000_a1.cl b/OpenCL/m08000_a1.cl index c525626..efbc2bb 100644 --- a/OpenCL/m08000_a1.cl +++ b/OpenCL/m08000_a1.cl @@ -117,7 +117,9 @@ void sha256_transform (u32x digest[8], const u32x w[16]) ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 64; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); @@ -166,7 +168,9 @@ void sha256_transform_z (u32x digest[8]) ROUND_STEP_Z (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 64; i += 16) { ROUND_STEP_Z (i); @@ -215,7 +219,9 @@ void sha256_transform_s (u32x digest[8], __local u32 *w) ROUND_STEP_S (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 64; i += 16) { ROUND_STEP_S (i); @@ -268,7 +274,9 @@ __kernel void m08000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, { w_s1[15] = 0 | salt_buf0 >> 16; + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 64; i++) { w_s1[i] = SHA256_EXPAND_S (w_s1[i - 2], w_s1[i - 7], w_s1[i - 15], w_s1[i - 16]); @@ -279,7 +287,9 @@ __kernel void m08000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, w_s2[ 2] = salt_buf2 << 16 | 0; w_s2[15] = (510 + 8) * 8; + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 64; i++) { w_s2[i] = SHA256_EXPAND_S (w_s2[i - 2], w_s2[i - 7], w_s2[i - 15], w_s2[i - 16]); @@ -489,7 +499,9 @@ __kernel void m08000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, { w_s1[15] = 0 | salt_buf0 >> 16; + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 64; i++) { w_s1[i] = SHA256_EXPAND_S (w_s1[i - 2], w_s1[i - 7], w_s1[i - 15], w_s1[i - 16]); @@ -500,7 +512,9 @@ __kernel void m08000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, w_s2[ 2] = salt_buf2 << 16 | 0; w_s2[15] = (510 + 8) * 8; + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 64; i++) { w_s2[i] = SHA256_EXPAND_S (w_s2[i - 2], w_s2[i - 7], w_s2[i - 15], w_s2[i - 16]); diff --git a/OpenCL/m08000_a3.cl b/OpenCL/m08000_a3.cl index 5a77ee5..b670305 100644 --- a/OpenCL/m08000_a3.cl +++ b/OpenCL/m08000_a3.cl @@ -117,7 +117,9 @@ void sha256_transform (u32x digest[8], const u32x w[16]) ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 64; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); @@ -166,7 +168,9 @@ void sha256_transform_z (u32x digest[8]) ROUND_STEP_Z (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 64; i += 16) { ROUND_STEP_Z (i); @@ -215,7 +219,9 @@ void sha256_transform_s (u32x digest[8], __local u32 *w) ROUND_STEP_S (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 64; i += 16) { ROUND_STEP_S (i); @@ -265,7 +271,9 @@ void m08000m (__local u32 *w_s1, __local u32 *w_s2, u32 w[16], const u32 pw_len, { w_s1[15] = 0 | salt_buf0 >> 16; + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 64; i++) { w_s1[i] = SHA256_EXPAND_S (w_s1[i - 2], w_s1[i - 7], w_s1[i - 15], w_s1[i - 16]); @@ -276,7 +284,9 @@ void m08000m (__local u32 *w_s1, __local u32 *w_s2, u32 w[16], const u32 pw_len, w_s2[ 2] = salt_buf2 << 16 | 0; w_s2[15] = (510 + 8) * 8; + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 64; i++) { w_s2[i] = SHA256_EXPAND_S (w_s2[i - 2], w_s2[i - 7], w_s2[i - 15], w_s2[i - 16]); @@ -383,7 +393,9 @@ void m08000s (__local u32 *w_s1, __local u32 *w_s2, u32 w[16], const u32 pw_len, { w_s1[15] = 0 | salt_buf0 >> 16; + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 64; i++) { w_s1[i] = SHA256_EXPAND_S (w_s1[i - 2], w_s1[i - 7], w_s1[i - 15], w_s1[i - 16]); @@ -394,7 +406,9 @@ void m08000s (__local u32 *w_s1, __local u32 *w_s2, u32 w[16], const u32 pw_len, w_s2[ 2] = salt_buf2 << 16 | 0; w_s2[15] = (510 + 8) * 8; + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 64; i++) { w_s2[i] = SHA256_EXPAND_S (w_s2[i - 2], w_s2[i - 7], w_s2[i - 15], w_s2[i - 16]); diff --git a/OpenCL/m08200.cl b/OpenCL/m08200.cl index 73e076d..dc7b768 100644 --- a/OpenCL/m08200.cl +++ b/OpenCL/m08200.cl @@ -134,7 +134,9 @@ void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 64; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); @@ -321,7 +323,9 @@ void sha512_transform (const u64 w[16], u64 dgst[8]) ROUND512_STEP (0); - //#pragma unroll + #ifdef _unroll + #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND512_EXPAND (); ROUND512_STEP (i); diff --git a/OpenCL/m08500_a0.cl b/OpenCL/m08500_a0.cl index e9b845e..d21d101 100644 --- a/OpenCL/m08500_a0.cl +++ b/OpenCL/m08500_a0.cl @@ -410,7 +410,9 @@ void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __l u32x r = data[0]; u32x l = data[1]; - #pragma unroll 16 + #ifdef _unroll + #pragma unroll + #endif for (u32 i = 0; i < 16; i += 2) { u32x u; @@ -463,7 +465,9 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 c = c & 0x0fffffff; - #pragma unroll 16 + #ifdef _unroll + #pragma unroll + #endif for (u32 i = 0; i < 16; i++) { if ((i < 2) || (i == 8) || (i == 15)) diff --git a/OpenCL/m08500_a1.cl b/OpenCL/m08500_a1.cl index 65bb133..20f2599 100644 --- a/OpenCL/m08500_a1.cl +++ b/OpenCL/m08500_a1.cl @@ -407,7 +407,9 @@ void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __l u32x r = data[0]; u32x l = data[1]; - #pragma unroll 16 + #ifdef _unroll + #pragma unroll + #endif for (u32 i = 0; i < 16; i += 2) { u32x u; @@ -460,7 +462,9 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 c = c & 0x0fffffff; - #pragma unroll 16 + #ifdef _unroll + #pragma unroll + #endif for (u32 i = 0; i < 16; i++) { if ((i < 2) || (i == 8) || (i == 15)) diff --git a/OpenCL/m08500_a3.cl b/OpenCL/m08500_a3.cl index dba7ada..a97d62d 100644 --- a/OpenCL/m08500_a3.cl +++ b/OpenCL/m08500_a3.cl @@ -408,7 +408,9 @@ void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __l u32x r = data[0]; u32x l = data[1]; - #pragma unroll 16 + #ifdef _unroll + #pragma unroll + #endif for (u32 i = 0; i < 16; i += 2) { u32x u; @@ -461,7 +463,9 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 c = c & 0x0fffffff; - #pragma unroll 16 + #ifdef _unroll + #pragma unroll + #endif for (u32 i = 0; i < 16; i++) { if ((i < 2) || (i == 8) || (i == 15)) diff --git a/OpenCL/m08600_a0.cl b/OpenCL/m08600_a0.cl index 34f2526..c4a043c 100644 --- a/OpenCL/m08600_a0.cl +++ b/OpenCL/m08600_a0.cl @@ -82,7 +82,9 @@ void lotus_mix (u32x *in, __local u32 *s_lotus_magic_table) { u32 s = 48; - #pragma unroll 12 + #ifdef _unroll + #pragma unroll + #endif for (int j = 0; j < 12; j++) { u32x tmp_in = in[j]; @@ -104,7 +106,9 @@ void lotus_transform_password (u32x in[4], u32x out[4], __local u32 *s_lotus_mag u32x c; - #pragma unroll 4 + #ifdef _unroll + #pragma unroll + #endif for (int i = 0; i < 4; i++) { t ^= (in[i] >> 0) & 0xff; c = BOX1 (s_lotus_magic_table, t); out[i] ^= c << 0; t = ((out[i] >> 0) & 0xff); diff --git a/OpenCL/m08600_a1.cl b/OpenCL/m08600_a1.cl index 90cf979..58be048 100644 --- a/OpenCL/m08600_a1.cl +++ b/OpenCL/m08600_a1.cl @@ -79,7 +79,9 @@ void lotus_mix (u32x *in, __local u32 *s_lotus_magic_table) { u32 s = 48; - #pragma unroll 12 + #ifdef _unroll + #pragma unroll + #endif for (int j = 0; j < 12; j++) { u32x tmp_in = in[j]; @@ -101,7 +103,9 @@ void lotus_transform_password (u32x in[4], u32x out[4], __local u32 *s_lotus_mag u32x c; - #pragma unroll 4 + #ifdef _unroll + #pragma unroll + #endif for (int i = 0; i < 4; i++) { t ^= (in[i] >> 0) & 0xff; c = BOX1 (s_lotus_magic_table, t); out[i] ^= c << 0; t = ((out[i] >> 0) & 0xff); diff --git a/OpenCL/m08600_a3.cl b/OpenCL/m08600_a3.cl index bd30653..5b7d924 100644 --- a/OpenCL/m08600_a3.cl +++ b/OpenCL/m08600_a3.cl @@ -79,7 +79,9 @@ void lotus_mix (u32x *in, __local u32 *s_lotus_magic_table) { u32 s = 48; - #pragma unroll 12 + #ifdef _unroll + #pragma unroll + #endif for (int j = 0; j < 12; j++) { u32x tmp_in = in[j]; @@ -101,7 +103,9 @@ void lotus_transform_password (u32x in[4], u32x out[4], __local u32 *s_lotus_mag u32x c; - #pragma unroll 4 + #ifdef _unroll + #pragma unroll + #endif for (int i = 0; i < 4; i++) { t ^= (in[i] >> 0) & 0xff; c = BOX1 (s_lotus_magic_table, t); out[i] ^= c << 0; t = ((out[i] >> 0) & 0xff); diff --git a/OpenCL/m08700_a0.cl b/OpenCL/m08700_a0.cl index 4e37c47..1bcb8d6 100644 --- a/OpenCL/m08700_a0.cl +++ b/OpenCL/m08700_a0.cl @@ -94,7 +94,9 @@ void lotus_mix (u32x *in, __local u32 *s_lotus_magic_table) { u32 s = 48; - #pragma unroll 12 + #ifdef _unroll + #pragma unroll + #endif for (int j = 0; j < 12; j++) { u32x tmp_in = in[j]; @@ -116,7 +118,9 @@ void lotus_transform_password (u32x in[4], u32x out[4], __local u32 *s_lotus_mag u32x c; - #pragma unroll 4 + #ifdef _unroll + #pragma unroll + #endif for (int i = 0; i < 4; i++) { t ^= (in[i] >> 0) & 0xff; c = BOX1 (s_lotus_magic_table, t); out[i] ^= c << 0; t = ((out[i] >> 0) & 0xff); diff --git a/OpenCL/m08700_a1.cl b/OpenCL/m08700_a1.cl index cc9902d..ed6d197 100644 --- a/OpenCL/m08700_a1.cl +++ b/OpenCL/m08700_a1.cl @@ -91,7 +91,9 @@ void lotus_mix (u32x *in, __local u32 *s_lotus_magic_table) { u32 s = 48; - #pragma unroll 12 + #ifdef _unroll + #pragma unroll + #endif for (int j = 0; j < 12; j++) { u32x tmp_in = in[j]; @@ -113,7 +115,9 @@ void lotus_transform_password (u32x in[4], u32x out[4], __local u32 *s_lotus_mag u32x c; - #pragma unroll 4 + #ifdef _unroll + #pragma unroll + #endif for (int i = 0; i < 4; i++) { t ^= (in[i] >> 0) & 0xff; c = BOX1 (s_lotus_magic_table, t); out[i] ^= c << 0; t = ((out[i] >> 0) & 0xff); diff --git a/OpenCL/m08700_a3.cl b/OpenCL/m08700_a3.cl index d0a291d..b6fd2d3 100644 --- a/OpenCL/m08700_a3.cl +++ b/OpenCL/m08700_a3.cl @@ -93,7 +93,9 @@ void lotus_mix (u32x *in, __local u32 *s_lotus_magic_table) { u32 s = 48; - #pragma unroll 12 + #ifdef _unroll + #pragma unroll + #endif for (int j = 0; j < 12; j++) { u32x tmp_in = in[j]; @@ -115,7 +117,9 @@ void lotus_transform_password (u32x in[4], u32x out[4], __local u32 *s_lotus_mag u32x c; - #pragma unroll 4 + #ifdef _unroll + #pragma unroll + #endif for (int i = 0; i < 4; i++) { t ^= (in[i] >> 0) & 0xff; c = BOX1 (s_lotus_magic_table, t); out[i] ^= c << 0; t = ((out[i] >> 0) & 0xff); diff --git a/OpenCL/m08800.cl b/OpenCL/m08800.cl index 62d0cae..934310b 100644 --- a/OpenCL/m08800.cl +++ b/OpenCL/m08800.cl @@ -1212,7 +1212,9 @@ void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 64; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m08900.cl b/OpenCL/m08900.cl index 5f90dc9..85ad68d 100644 --- a/OpenCL/m08900.cl +++ b/OpenCL/m08900.cl @@ -110,7 +110,9 @@ void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 64; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); @@ -705,7 +707,9 @@ void scrypt_smix (uint4 *X, uint4 *T, const u32 phy, __global uint4 *V) const u32 x = gid % xSIZE; + #ifdef _unroll #pragma unroll + #endif for (u32 i = 0; i < STATE_CNT4; i += 4) { T[0] = (uint4) (X[i + 0].x, X[i + 1].y, X[i + 2].z, X[i + 3].w); @@ -743,7 +747,9 @@ void scrypt_smix (uint4 *X, uint4 *T, const u32 phy, __global uint4 *V) salsa_r (X); } + #ifdef _unroll #pragma unroll + #endif for (u32 i = 0; i < STATE_CNT4; i += 4) { T[0] = (uint4) (X[i + 0].x, X[i + 3].y, X[i + 2].z, X[i + 1].w); @@ -910,12 +916,16 @@ __kernel void m08900_loop (__global pw_t *pws, __global kernel_rule_t *rules_buf uint4 X[STATE_CNT4]; uint4 T[STATE_CNT4]; + #ifdef _unroll #pragma unroll + #endif for (int z = 0; z < STATE_CNT4; z++) X[z] = swap32_4 (tmps[gid].P[z]); scrypt_smix (X, T, scrypt_phy, d_scryptV_buf); + #ifdef _unroll #pragma unroll + #endif for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[z] = swap32_4 (X[z]); #if SCRYPT_P >= 1 diff --git a/OpenCL/m09100.cl b/OpenCL/m09100.cl index eb4333e..4efe528 100644 --- a/OpenCL/m09100.cl +++ b/OpenCL/m09100.cl @@ -75,7 +75,9 @@ void lotus_mix (u32 *in, __local u32 *s_lotus_magic_table) { u32 s = 48; - #pragma unroll 12 + #ifdef _unroll + #pragma unroll + #endif for (int j = 0; j < 12; j++) { u32 tmp_in = in[j]; @@ -97,7 +99,9 @@ void lotus_transform_password (u32 in[4], u32 out[4], __local u32 *s_lotus_magic u32 c; - #pragma unroll 4 + #ifdef _unroll + #pragma unroll + #endif for (int i = 0; i < 4; i++) { t ^= (in[i] >> 0) & 0xff; c = BOX (s_lotus_magic_table, t); out[i] ^= c << 0; t = ((out[i] >> 0) & 0xff); diff --git a/OpenCL/m09600.cl b/OpenCL/m09600.cl index f6d4ba6..eb28f6c 100644 --- a/OpenCL/m09600.cl +++ b/OpenCL/m09600.cl @@ -1073,10 +1073,7 @@ void sha512_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4], const ROUND_STEP (0); - #ifdef IS_AMD - // #pragma unroll - // breaks compiler - #else + #ifdef _unroll #pragma unroll #endif for (int i = 16; i < 80; i += 16) diff --git a/OpenCL/m09700_a0.cl b/OpenCL/m09700_a0.cl index d9ae316..4fa60b9 100644 --- a/OpenCL/m09700_a0.cl +++ b/OpenCL/m09700_a0.cl @@ -47,7 +47,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) __local u32 *ptr = (__local u32 *) rc4_key->S; + #ifdef _unroll #pragma unroll + #endif for (u32 i = 0; i < 64; i++) { *ptr++ = v; v += a; @@ -93,7 +95,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4]) { + #ifdef _unroll #pragma unroll + #endif for (u32 k = 0; k < 4; k++) { u32 xor4 = 0; diff --git a/OpenCL/m09700_a1.cl b/OpenCL/m09700_a1.cl index 67671ad..65162f7 100644 --- a/OpenCL/m09700_a1.cl +++ b/OpenCL/m09700_a1.cl @@ -45,7 +45,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) __local u32 *ptr = (__local u32 *) rc4_key->S; + #ifdef _unroll #pragma unroll + #endif for (u32 i = 0; i < 64; i++) { *ptr++ = v; v += a; @@ -91,7 +93,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4]) { + #ifdef _unroll #pragma unroll + #endif for (u32 k = 0; k < 4; k++) { u32 xor4 = 0; diff --git a/OpenCL/m09700_a3.cl b/OpenCL/m09700_a3.cl index 8e1393e..45128af 100644 --- a/OpenCL/m09700_a3.cl +++ b/OpenCL/m09700_a3.cl @@ -44,7 +44,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) __local u32 *ptr = (__local u32 *) rc4_key->S; + #ifdef _unroll #pragma unroll + #endif for (u32 i = 0; i < 64; i++) { *ptr++ = v; v += a; @@ -90,7 +92,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4]) { + #ifdef _unroll #pragma unroll + #endif for (u32 k = 0; k < 4; k++) { u32 xor4 = 0; diff --git a/OpenCL/m09710_a0.cl b/OpenCL/m09710_a0.cl index dffcdca..b992e8d 100644 --- a/OpenCL/m09710_a0.cl +++ b/OpenCL/m09710_a0.cl @@ -47,7 +47,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) __local u32 *ptr = (__local u32 *) rc4_key->S; + #ifdef _unroll #pragma unroll + #endif for (u32 i = 0; i < 64; i++) { *ptr++ = v; v += a; @@ -93,7 +95,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4]) { + #ifdef _unroll #pragma unroll + #endif for (u32 k = 0; k < 4; k++) { u32 xor4 = 0; diff --git a/OpenCL/m09710_a1.cl b/OpenCL/m09710_a1.cl index a0353a4..c761e44 100644 --- a/OpenCL/m09710_a1.cl +++ b/OpenCL/m09710_a1.cl @@ -45,7 +45,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) __local u32 *ptr = (__local u32 *) rc4_key->S; + #ifdef _unroll #pragma unroll + #endif for (u32 i = 0; i < 64; i++) { *ptr++ = v; v += a; @@ -91,7 +93,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4]) { + #ifdef _unroll #pragma unroll + #endif for (u32 k = 0; k < 4; k++) { u32 xor4 = 0; diff --git a/OpenCL/m09710_a3.cl b/OpenCL/m09710_a3.cl index 8aa72e2..61f23d1 100644 --- a/OpenCL/m09710_a3.cl +++ b/OpenCL/m09710_a3.cl @@ -47,7 +47,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) __local u32 *ptr = (__local u32 *) rc4_key->S; + #ifdef _unroll #pragma unroll + #endif for (u32 i = 0; i < 64; i++) { *ptr++ = v; v += a; @@ -93,7 +95,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4]) { + #ifdef _unroll #pragma unroll + #endif for (u32 k = 0; k < 4; k++) { u32 xor4 = 0; diff --git a/OpenCL/m09800_a0.cl b/OpenCL/m09800_a0.cl index 8b9ef18..31f6355 100644 --- a/OpenCL/m09800_a0.cl +++ b/OpenCL/m09800_a0.cl @@ -47,7 +47,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) __local u32 *ptr = (__local u32 *) rc4_key->S; + #ifdef _unroll #pragma unroll + #endif for (u32 i = 0; i < 64; i++) { *ptr++ = v; v += a; @@ -93,7 +95,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4]) { + #ifdef _unroll #pragma unroll + #endif for (u32 k = 0; k < 4; k++) { u32 xor4 = 0; diff --git a/OpenCL/m09800_a1.cl b/OpenCL/m09800_a1.cl index 920a1e1..1c11f01 100644 --- a/OpenCL/m09800_a1.cl +++ b/OpenCL/m09800_a1.cl @@ -45,7 +45,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) __local u32 *ptr = (__local u32 *) rc4_key->S; + #ifdef _unroll #pragma unroll + #endif for (u32 i = 0; i < 64; i++) { *ptr++ = v; v += a; @@ -91,7 +93,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4]) { + #ifdef _unroll #pragma unroll + #endif for (u32 k = 0; k < 4; k++) { u32 xor4 = 0; diff --git a/OpenCL/m09800_a3.cl b/OpenCL/m09800_a3.cl index 92589de..6fc92c0 100644 --- a/OpenCL/m09800_a3.cl +++ b/OpenCL/m09800_a3.cl @@ -44,7 +44,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) __local u32 *ptr = (__local u32 *) rc4_key->S; + #ifdef _unroll #pragma unroll + #endif for (u32 i = 0; i < 64; i++) { *ptr++ = v; v += a; @@ -90,7 +92,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4]) { + #ifdef _unroll #pragma unroll + #endif for (u32 k = 0; k < 4; k++) { u32 xor4 = 0; diff --git a/OpenCL/m09810_a0.cl b/OpenCL/m09810_a0.cl index 278bda6..40fb8a0 100644 --- a/OpenCL/m09810_a0.cl +++ b/OpenCL/m09810_a0.cl @@ -47,7 +47,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) __local u32 *ptr = (__local u32 *) rc4_key->S; + #ifdef _unroll #pragma unroll + #endif for (u32 i = 0; i < 64; i++) { *ptr++ = v; v += a; @@ -93,7 +95,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4]) { + #ifdef _unroll #pragma unroll + #endif for (u32 k = 0; k < 4; k++) { u32 xor4 = 0; diff --git a/OpenCL/m09810_a1.cl b/OpenCL/m09810_a1.cl index 298767a..527b1bd 100644 --- a/OpenCL/m09810_a1.cl +++ b/OpenCL/m09810_a1.cl @@ -45,7 +45,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) __local u32 *ptr = (__local u32 *) rc4_key->S; + #ifdef _unroll #pragma unroll + #endif for (u32 i = 0; i < 64; i++) { *ptr++ = v; v += a; @@ -91,7 +93,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4]) { + #ifdef _unroll #pragma unroll + #endif for (u32 k = 0; k < 4; k++) { u32 xor4 = 0; diff --git a/OpenCL/m09810_a3.cl b/OpenCL/m09810_a3.cl index dac5f94..5bfc896 100644 --- a/OpenCL/m09810_a3.cl +++ b/OpenCL/m09810_a3.cl @@ -47,7 +47,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) __local u32 *ptr = (__local u32 *) rc4_key->S; + #ifdef _unroll #pragma unroll + #endif for (u32 i = 0; i < 64; i++) { *ptr++ = v; v += a; @@ -93,7 +95,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4]) { + #ifdef _unroll #pragma unroll + #endif for (u32 k = 0; k < 4; k++) { u32 xor4 = 0; diff --git a/OpenCL/m10400_a0.cl b/OpenCL/m10400_a0.cl index 27d8718..90cd0bc 100644 --- a/OpenCL/m10400_a0.cl +++ b/OpenCL/m10400_a0.cl @@ -61,7 +61,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) __local u32 *ptr = (__local u32 *) rc4_key->S; + #ifdef _unroll #pragma unroll + #endif for (u32 i = 0; i < 64; i++) { ptr[i] = v; v += a; @@ -75,7 +77,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) u32 j = 0; + #ifdef _unroll #pragma unroll + #endif for (u32 i = 0; i < 255; i += 5) { j += rc4_key->S[i + 0] + d0; swap (rc4_key, i + 0, j); @@ -90,7 +94,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, __constant u32 *in, u32 out[4]) { - #pragma unroll 4 + #ifdef _unroll + #pragma unroll + #endif for (u32 k = 0; k < 4; k++) { u32 xor4 = 0; diff --git a/OpenCL/m10400_a1.cl b/OpenCL/m10400_a1.cl index 4a27f7c..093af29 100644 --- a/OpenCL/m10400_a1.cl +++ b/OpenCL/m10400_a1.cl @@ -59,7 +59,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) __local u32 *ptr = (__local u32 *) rc4_key->S; + #ifdef _unroll #pragma unroll + #endif for (u32 i = 0; i < 64; i++) { ptr[i] = v; v += a; @@ -73,7 +75,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) u32 j = 0; + #ifdef _unroll #pragma unroll + #endif for (u32 i = 0; i < 255; i += 5) { j += rc4_key->S[i + 0] + d0; swap (rc4_key, i + 0, j); @@ -88,7 +92,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, __constant u32 *in, u32 out[4]) { - #pragma unroll 4 + #ifdef _unroll + #pragma unroll + #endif for (u32 k = 0; k < 4; k++) { u32 xor4 = 0; diff --git a/OpenCL/m10400_a3.cl b/OpenCL/m10400_a3.cl index f91a348..92ebfe5 100644 --- a/OpenCL/m10400_a3.cl +++ b/OpenCL/m10400_a3.cl @@ -59,7 +59,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) __local u32 *ptr = (__local u32 *) rc4_key->S; + #ifdef _unroll #pragma unroll + #endif for (u32 i = 0; i < 64; i++) { ptr[i] = v; v += a; @@ -73,7 +75,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) u32 j = 0; + #ifdef _unroll #pragma unroll + #endif for (u32 i = 0; i < 255; i += 5) { j += rc4_key->S[i + 0] + d0; swap (rc4_key, i + 0, j); @@ -88,7 +92,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, __constant u32 *in, u32 out[4]) { - #pragma unroll 4 + #ifdef _unroll + #pragma unroll + #endif for (u32 k = 0; k < 4; k++) { u32 xor4 = 0; diff --git a/OpenCL/m10410_a0.cl b/OpenCL/m10410_a0.cl index ee8ca20..518b84c 100644 --- a/OpenCL/m10410_a0.cl +++ b/OpenCL/m10410_a0.cl @@ -61,7 +61,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) __local u32 *ptr = (__local u32 *) rc4_key->S; + #ifdef _unroll #pragma unroll + #endif for (u32 i = 0; i < 64; i++) { ptr[i] = v; v += a; @@ -75,7 +77,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) u32 j = 0; + #ifdef _unroll #pragma unroll + #endif for (u32 i = 0; i < 255; i += 5) { j += rc4_key->S[i + 0] + d0; swap (rc4_key, i + 0, j); @@ -90,7 +94,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, __constant u32 *in, u32 out[4]) { - #pragma unroll 4 + #ifdef _unroll + #pragma unroll + #endif for (u32 k = 0; k < 4; k++) { u32 xor4 = 0; diff --git a/OpenCL/m10410_a1.cl b/OpenCL/m10410_a1.cl index 35281fb..ec226b8 100644 --- a/OpenCL/m10410_a1.cl +++ b/OpenCL/m10410_a1.cl @@ -59,7 +59,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) __local u32 *ptr = (__local u32 *) rc4_key->S; + #ifdef _unroll #pragma unroll + #endif for (u32 i = 0; i < 64; i++) { ptr[i] = v; v += a; @@ -73,7 +75,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) u32 j = 0; + #ifdef _unroll #pragma unroll + #endif for (u32 i = 0; i < 255; i += 5) { j += rc4_key->S[i + 0] + d0; swap (rc4_key, i + 0, j); @@ -88,7 +92,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, __constant u32 *in, u32 out[4]) { - #pragma unroll 4 + #ifdef _unroll + #pragma unroll + #endif for (u32 k = 0; k < 4; k++) { u32 xor4 = 0; diff --git a/OpenCL/m10410_a3.cl b/OpenCL/m10410_a3.cl index f5d2567..d43b657 100644 --- a/OpenCL/m10410_a3.cl +++ b/OpenCL/m10410_a3.cl @@ -59,7 +59,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) __local u32 *ptr = (__local u32 *) rc4_key->S; + #ifdef _unroll #pragma unroll + #endif for (u32 i = 0; i < 64; i++) { ptr[i] = v; v += a; @@ -73,7 +75,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) u32 j = 0; + #ifdef _unroll #pragma unroll + #endif for (u32 i = 0; i < 255; i += 5) { j += rc4_key->S[i + 0] + d0; swap (rc4_key, i + 0, j); @@ -88,7 +92,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, __constant u32 *in, u32 out[4]) { - #pragma unroll 4 + #ifdef _unroll + #pragma unroll + #endif for (u32 k = 0; k < 4; k++) { u32 xor4 = 0; diff --git a/OpenCL/m10500.cl b/OpenCL/m10500.cl index cf763af..a24469f 100644 --- a/OpenCL/m10500.cl +++ b/OpenCL/m10500.cl @@ -56,7 +56,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) __local u32 *ptr = (__local u32 *) rc4_key->S; + #ifdef _unroll #pragma unroll + #endif for (u32 i = 0; i < 64; i++) { *ptr++ = v; v += a; @@ -64,7 +66,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) u32 j = 0; + #ifdef _unroll #pragma unroll + #endif for (u32 i = 0; i < 16; i++) { u32 idx = i * 16; @@ -103,7 +107,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4]) { + #ifdef _unroll #pragma unroll + #endif for (u32 k = 0; k < 4; k++) { u32 xor4 = 0; diff --git a/OpenCL/m10700.cl b/OpenCL/m10700.cl index c5ec11d..1ab5fb7 100644 --- a/OpenCL/m10700.cl +++ b/OpenCL/m10700.cl @@ -132,7 +132,9 @@ void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const ROUND256_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 64; i += 16) { ROUND256_EXPAND (); ROUND256_STEP (i); @@ -242,7 +244,9 @@ void sha384_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4], const ROUND384_STEP (0); - //#pragma unroll + #ifdef _unroll + #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND384_EXPAND (); ROUND384_STEP (i); @@ -352,7 +356,9 @@ void sha512_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4], const ROUND512_STEP (0); - //#pragma unroll + #ifdef _unroll + #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND512_EXPAND (); ROUND512_STEP (i); diff --git a/OpenCL/m10800_a0.cl b/OpenCL/m10800_a0.cl index 78ad998..17f7540 100644 --- a/OpenCL/m10800_a0.cl +++ b/OpenCL/m10800_a0.cl @@ -116,7 +116,9 @@ void sha384_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m10800_a1.cl b/OpenCL/m10800_a1.cl index 24762c0..bcb0c58 100644 --- a/OpenCL/m10800_a1.cl +++ b/OpenCL/m10800_a1.cl @@ -114,7 +114,9 @@ void sha384_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m10800_a3.cl b/OpenCL/m10800_a3.cl index c6df671..7bfc741 100644 --- a/OpenCL/m10800_a3.cl +++ b/OpenCL/m10800_a3.cl @@ -114,7 +114,9 @@ void sha384_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m10900.cl b/OpenCL/m10900.cl index ef82e3c..0107d84 100644 --- a/OpenCL/m10900.cl +++ b/OpenCL/m10900.cl @@ -111,7 +111,9 @@ void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 64; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m11300.cl b/OpenCL/m11300.cl index 0547e57..46af4c2 100644 --- a/OpenCL/m11300.cl +++ b/OpenCL/m11300.cl @@ -986,7 +986,9 @@ void sha512_transform (const u64 w[16], u64 dgst[8]) ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m11600.cl b/OpenCL/m11600.cl index 16bc140..c83bf04 100644 --- a/OpenCL/m11600.cl +++ b/OpenCL/m11600.cl @@ -985,7 +985,9 @@ void sha256_transform (const u32 w[16], u32 digest[8]) ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 64; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m11700_a0.cl b/OpenCL/m11700_a0.cl index ec10233..2e93185 100644 --- a/OpenCL/m11700_a0.cl +++ b/OpenCL/m11700_a0.cl @@ -2250,7 +2250,9 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256]) u64x s[8]; u64x t[8]; + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { t[i] = h[i]; @@ -2261,7 +2263,9 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256]) k[i] = SBOG_LPSti64; } + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { s[i] = m[i]; @@ -2269,13 +2273,17 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256]) for (int r = 0; r < 12; r++) { + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { t[i] = s[i] ^ k[i]; } + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { s[i] = SBOG_LPSti64; @@ -2286,14 +2294,18 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256]) t[i] = k[i] ^ sbob_rc64[r][i]; } + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { k[i] = SBOG_LPSti64; } } + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { h[i] ^= s[i] ^ k[i] ^ m[i]; diff --git a/OpenCL/m11700_a1.cl b/OpenCL/m11700_a1.cl index 1d194bb..36ce0b9 100644 --- a/OpenCL/m11700_a1.cl +++ b/OpenCL/m11700_a1.cl @@ -2247,7 +2247,9 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256]) u64x s[8]; u64x t[8]; + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { t[i] = h[i]; @@ -2258,7 +2260,9 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256]) k[i] = SBOG_LPSti64; } + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { s[i] = m[i]; @@ -2266,13 +2270,17 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256]) for (int r = 0; r < 12; r++) { + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { t[i] = s[i] ^ k[i]; } + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { s[i] = SBOG_LPSti64; @@ -2283,14 +2291,18 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256]) t[i] = k[i] ^ sbob_rc64[r][i]; } + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { k[i] = SBOG_LPSti64; } } + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { h[i] ^= s[i] ^ k[i] ^ m[i]; diff --git a/OpenCL/m11700_a3.cl b/OpenCL/m11700_a3.cl index 63e4f0a..1569b1f 100644 --- a/OpenCL/m11700_a3.cl +++ b/OpenCL/m11700_a3.cl @@ -2248,7 +2248,9 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256]) u64x s[8]; u64x t[8]; + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { t[i] = h[i]; @@ -2259,7 +2261,9 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256]) k[i] = SBOG_LPSti64; } + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { s[i] = m[i]; @@ -2267,13 +2271,17 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256]) for (int r = 0; r < 12; r++) { + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { t[i] = s[i] ^ k[i]; } + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { s[i] = SBOG_LPSti64; @@ -2284,14 +2292,18 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256]) t[i] = k[i] ^ sbob_rc64[r][i]; } + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { k[i] = SBOG_LPSti64; } } + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { h[i] ^= s[i] ^ k[i] ^ m[i]; diff --git a/OpenCL/m11800_a0.cl b/OpenCL/m11800_a0.cl index 9b6b80c..f282c3e 100644 --- a/OpenCL/m11800_a0.cl +++ b/OpenCL/m11800_a0.cl @@ -2250,7 +2250,9 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256]) u64x s[8]; u64x t[8]; + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { t[i] = h[i]; @@ -2261,7 +2263,9 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256]) k[i] = SBOG_LPSti64; } + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { s[i] = m[i]; @@ -2269,13 +2273,17 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256]) for (int r = 0; r < 12; r++) { + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { t[i] = s[i] ^ k[i]; } + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { s[i] = SBOG_LPSti64; @@ -2286,14 +2294,18 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256]) t[i] = k[i] ^ sbob_rc64[r][i]; } + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { k[i] = SBOG_LPSti64; } } + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { h[i] ^= s[i] ^ k[i] ^ m[i]; diff --git a/OpenCL/m11800_a1.cl b/OpenCL/m11800_a1.cl index f136fb5..cf8392c 100644 --- a/OpenCL/m11800_a1.cl +++ b/OpenCL/m11800_a1.cl @@ -2247,7 +2247,9 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256]) u64x s[8]; u64x t[8]; + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { t[i] = h[i]; @@ -2258,7 +2260,9 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256]) k[i] = SBOG_LPSti64; } + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { s[i] = m[i]; @@ -2266,13 +2270,17 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256]) for (int r = 0; r < 12; r++) { + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { t[i] = s[i] ^ k[i]; } + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { s[i] = SBOG_LPSti64; @@ -2283,14 +2291,18 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256]) t[i] = k[i] ^ sbob_rc64[r][i]; } + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { k[i] = SBOG_LPSti64; } } + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { h[i] ^= s[i] ^ k[i] ^ m[i]; diff --git a/OpenCL/m11800_a3.cl b/OpenCL/m11800_a3.cl index 1028117..2370800 100644 --- a/OpenCL/m11800_a3.cl +++ b/OpenCL/m11800_a3.cl @@ -2248,7 +2248,9 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256]) u64x s[8]; u64x t[8]; + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { t[i] = h[i]; @@ -2259,7 +2261,9 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256]) k[i] = SBOG_LPSti64; } + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { s[i] = m[i]; @@ -2267,13 +2271,17 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256]) for (int r = 0; r < 12; r++) { + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { t[i] = s[i] ^ k[i]; } + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { s[i] = SBOG_LPSti64; @@ -2284,14 +2292,18 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256]) t[i] = k[i] ^ sbob_rc64[r][i]; } + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { k[i] = SBOG_LPSti64; } } + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < 8; i++) { h[i] ^= s[i] ^ k[i] ^ m[i]; diff --git a/OpenCL/m12200.cl b/OpenCL/m12200.cl index 001a5a2..7b789bc 100644 --- a/OpenCL/m12200.cl +++ b/OpenCL/m12200.cl @@ -114,10 +114,7 @@ void sha512_transform (const u64 w[16], u64 dgst[8]) ROUND_STEP (0); - #ifdef IS_AMD - // #pragma unroll - // breaks compiler - #else + #ifdef _unroll #pragma unroll #endif for (int i = 16; i < 80; i += 16) diff --git a/OpenCL/m12300.cl b/OpenCL/m12300.cl index 5ab6a05..5430b4f 100644 --- a/OpenCL/m12300.cl +++ b/OpenCL/m12300.cl @@ -114,7 +114,9 @@ void sha512_transform (const u64 w[16], u64 dgst[8]) ROUND_STEP (0); - //#pragma unroll + #ifdef _unroll + #pragma unroll + #endif for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m12400.cl b/OpenCL/m12400.cl index b755f39..7902c5d 100644 --- a/OpenCL/m12400.cl +++ b/OpenCL/m12400.cl @@ -376,7 +376,9 @@ void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u32 (*s_ c = c & 0x0fffffff; + #ifdef _unroll #pragma unroll + #endif for (u32 i = 0; i < 16; i++) { if ((i < 2) || (i == 8) || (i == 15)) diff --git a/OpenCL/m12800.cl b/OpenCL/m12800.cl index 2a9bbfd..a9e3f93 100644 --- a/OpenCL/m12800.cl +++ b/OpenCL/m12800.cl @@ -177,7 +177,9 @@ void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 64; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m12900.cl b/OpenCL/m12900.cl index 2ad3de6..817a819 100644 --- a/OpenCL/m12900.cl +++ b/OpenCL/m12900.cl @@ -111,7 +111,9 @@ void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 64; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m13000.cl b/OpenCL/m13000.cl index f12f630..267081c 100644 --- a/OpenCL/m13000.cl +++ b/OpenCL/m13000.cl @@ -111,7 +111,9 @@ void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 64; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m13100_a0.cl b/OpenCL/m13100_a0.cl index ef288a5..430e3d9 100644 --- a/OpenCL/m13100_a0.cl +++ b/OpenCL/m13100_a0.cl @@ -50,7 +50,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) __local u32 *ptr = (__local u32 *) rc4_key->S; + #ifdef _unroll #pragma unroll + #endif for (u32 i = 0; i < 64; i++) { *ptr++ = v; v += a; @@ -96,7 +98,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, __global u32 *in, u32 out[4]) { + #ifdef _unroll #pragma unroll + #endif for (u32 k = 0; k < 4; k++) { u32 xor4 = 0; diff --git a/OpenCL/m13100_a1.cl b/OpenCL/m13100_a1.cl index 8ddaac7..fc7840a 100644 --- a/OpenCL/m13100_a1.cl +++ b/OpenCL/m13100_a1.cl @@ -48,7 +48,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) __local u32 *ptr = (__local u32 *) rc4_key->S; + #ifdef _unroll #pragma unroll + #endif for (u32 i = 0; i < 64; i++) { *ptr++ = v; v += a; @@ -94,7 +96,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, __global u32 *in, u32 out[4]) { + #ifdef _unroll #pragma unroll + #endif for (u32 k = 0; k < 4; k++) { u32 xor4 = 0; diff --git a/OpenCL/m13100_a3.cl b/OpenCL/m13100_a3.cl index 385773c..4a2878e 100644 --- a/OpenCL/m13100_a3.cl +++ b/OpenCL/m13100_a3.cl @@ -48,7 +48,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) __local u32 *ptr = (__local u32 *) rc4_key->S; + #ifdef _unroll #pragma unroll + #endif for (u32 i = 0; i < 64; i++) { *ptr++ = v; v += a; @@ -94,7 +96,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, __global u32 *in, u32 out[4]) { + #ifdef _unroll #pragma unroll + #endif for (u32 k = 0; k < 4; k++) { u32 xor4 = 0; diff --git a/OpenCL/m13200.cl b/OpenCL/m13200.cl index c0b4af5..3daa846 100644 --- a/OpenCL/m13200.cl +++ b/OpenCL/m13200.cl @@ -4,7 +4,7 @@ * License.....: MIT */ - + #define _SHA1_ #include "include/constants.h" @@ -713,7 +713,9 @@ void AES128_ExpandKey (u32 *userkey, u32 *rek, __local u32 *s_te0, __local u32 * rek[2] = userkey[2]; rek[3] = userkey[3]; - #pragma unroll 10 + #ifdef _unroll + #pragma unroll + #endif for (u32 i = 0, j = 0; i < 10; i += 1, j += 4) { u32 temp = rek[j + 3]; @@ -1016,7 +1018,7 @@ __kernel void m13200_init (__global pw_t *pws, __global kernel_rule_t *rules_buf const u32 pw_len = pws[gid].pw_len; append_0x80_4x4 (w0, w1, w2, w3, pw_len); - + w0[0] = swap32 (w0[0]); w0[1] = swap32 (w0[1]); w0[2] = swap32 (w0[2]); @@ -1035,7 +1037,7 @@ __kernel void m13200_init (__global pw_t *pws, __global kernel_rule_t *rules_buf w3[3] = swap32 (w3[3]); w3[3] = pw_len * 8; - + /** * KEK */ @@ -1072,7 +1074,7 @@ __kernel void m13200_init (__global pw_t *pws, __global kernel_rule_t *rules_buf tmps[gid].cipher[1] = salt_bufs[salt_pos].salt_buf[5]; tmps[gid].cipher[2] = 0; tmps[gid].cipher[3] = 0; - + } __kernel void m13200_loop (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global axcrypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) @@ -1137,17 +1139,19 @@ __kernel void m13200_loop (__global pw_t *pws, __global kernel_rule_t *rules_buf ukeyx[1] = tmps[gid].KEK[1]; ukeyx[2] = tmps[gid].KEK[2]; ukeyx[3] = tmps[gid].KEK[3]; - + AES128_ExpandKey (ukeyx, rek, s_te0, s_te1, s_te2, s_te3, s_te4); - #pragma unroll KEYLEN + #ifdef _unroll + #pragma unroll + #endif for (u32 i = 0; i < KEYLEN; i++) rdk[i] = rek[i]; AES128_InvertKey (rdk, s_td0, s_td1, s_td2, s_td3, s_td4, s_te0, s_te1, s_te2, s_te3, s_te4); u32 lsb[4]; u32 cipher[4]; - + lsb[0] = tmps[gid].lsb[0]; lsb[1] = tmps[gid].lsb[1]; lsb[2] = tmps[gid].lsb[2]; @@ -1157,8 +1161,8 @@ __kernel void m13200_loop (__global pw_t *pws, __global kernel_rule_t *rules_buf cipher[1] = tmps[gid].cipher[1]; cipher[2] = tmps[gid].cipher[2]; cipher[3] = tmps[gid].cipher[3]; - - + + /** * AxCrypt main cipher routine */ @@ -1201,7 +1205,7 @@ __kernel void m13200_loop (__global pw_t *pws, __global kernel_rule_t *rules_buf tmps[gid].lsb[1] = lsb[1]; tmps[gid].lsb[2] = lsb[2]; tmps[gid].lsb[3] = lsb[3]; - + tmps[gid].cipher[0] = cipher[0]; tmps[gid].cipher[1] = cipher[1]; tmps[gid].cipher[2] = cipher[2]; diff --git a/OpenCL/m13400.cl b/OpenCL/m13400.cl index e9e36e3..1ba5d64 100644 --- a/OpenCL/m13400.cl +++ b/OpenCL/m13400.cl @@ -1074,7 +1074,9 @@ void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const ROUND_STEP (0); + #ifdef _unroll #pragma unroll + #endif for (int i = 16; i < 64; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/rp.c b/OpenCL/rp.c index cc2f95a..f94b3ee 100644 --- a/OpenCL/rp.c +++ b/OpenCL/rp.c @@ -4126,7 +4126,9 @@ inline u32x apply_rules_vect (const u32 pw_buf0[4], const u32 pw_buf1[4], const u32x out_len = 0; + #ifdef _unroll #pragma unroll + #endif for (int i = 0; i < VECT_SIZE; i++) { u32 tmp0[4]; diff --git a/OpenCL/types_ocl.c b/OpenCL/types_ocl.c index 8104039..772aba1 100644 --- a/OpenCL/types_ocl.c +++ b/OpenCL/types_ocl.c @@ -5,9 +5,6 @@ * License.....: MIT */ -#define DEVICE_TYPE_CPU 2 -#define DEVICE_TYPE_GPU 4 - typedef uchar u8; typedef ushort u16; typedef uint u32; diff --git a/include/kernel_vendor.h b/include/kernel_vendor.h index d2ffb59..d364256 100644 --- a/include/kernel_vendor.h +++ b/include/kernel_vendor.h @@ -3,15 +3,30 @@ * License.....: MIT */ -#ifdef cl_khr_byte_addressable_store +#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable #pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable + +/** + * device type + */ + +#define DEVICE_TYPE_CPU 2 +#define DEVICE_TYPE_GPU 4 +#define DEVICE_TYPE_ACCEL 8 + +#if DEVICE_TYPE == DEVICE_TYPE_CPU +#define IS_CPU +#elif DEVICE_TYPE == DEVICE_TYPE_GPU +#define IS_GPU +#elif DEVICE_TYPE == DEVICE_TYPE_ACCEL +#define IS_ACCEL #endif /** * vendor specific */ -#if VENDOR_ID == (1 << 0) +#if VENDOR_ID == (1 << 0) #define IS_AMD #elif VENDOR_ID == (1 << 6) #define IS_NV @@ -29,15 +44,155 @@ #endif /** - * NV specific + * Unrolling is generally enabled, for all device types and hash modes + * There's a few exception when it's better not to unroll */ +// Some algorithms run into too much register pressure due to loop unrolling + #ifdef IS_NV +#ifdef IS_GPU + +#if KERN_TYPE == 1500 +#undef _unroll +#endif +#if KERN_TYPE == 1800 +#undef _unroll +#endif +#if KERN_TYPE == 3000 +#undef _unroll +#endif +#if KERN_TYPE == 6221 +#undef _unroll +#endif +#if KERN_TYPE == 6222 +#undef _unroll +#endif +#if KERN_TYPE == 6223 +#undef _unroll +#endif +#if KERN_TYPE == 6500 +#undef _unroll +#endif +#if KERN_TYPE == 7100 +#undef _unroll +#endif +#if KERN_TYPE == 7400 +#undef _unroll +#endif +#if KERN_TYPE == 8200 +#undef _unroll +#endif +#if KERN_TYPE == 10400 +#undef _unroll +#endif +#if KERN_TYPE == 10500 +#undef _unroll +#endif +#if KERN_TYPE == 10700 +#undef _unroll +#endif +#if KERN_TYPE == 12300 +#undef _unroll +#endif +#if KERN_TYPE == 12400 +#undef _unroll #endif -/** - * Generic - */ +#endif +#endif + +#ifdef IS_AMD +#ifdef IS_GPU + +#if KERN_TYPE == 3200 +#undef _unroll +#endif +#if KERN_TYPE == 5200 +#undef _unroll +#endif +#if KERN_TYPE == 6100 +#undef _unroll +#endif +#if KERN_TYPE == 6221 +#undef _unroll +#endif +#if KERN_TYPE == 6222 +#undef _unroll +#endif +#if KERN_TYPE == 6223 +#undef _unroll +#endif +#if KERN_TYPE == 6400 +#undef _unroll +#endif +#if KERN_TYPE == 6500 +#undef _unroll +#endif +#if KERN_TYPE == 6800 +#undef _unroll +#endif +#if KERN_TYPE == 7100 +#undef _unroll +#endif +#if KERN_TYPE == 7400 +#undef _unroll +#endif +#if KERN_TYPE == 8000 +#undef _unroll +#endif +#if KERN_TYPE == 8200 +#undef _unroll +#endif +#if KERN_TYPE == 10900 +#undef _unroll +#endif +#if KERN_TYPE == 11600 +#undef _unroll +#endif +#if KERN_TYPE == 12300 +#undef _unroll +#endif +#if KERN_TYPE == 12800 +#undef _unroll +#endif +#if KERN_TYPE == 12900 +#undef _unroll +#endif +#if KERN_TYPE == 13000 +#undef _unroll +#endif -#ifdef IS_GENERIC +#endif +#endif + +// Some algorithms break due to loop unrolling, it's unknown why, probably compiler bugs +// Can overlap with above cases + +#ifdef IS_AMD +#ifdef IS_GPU + +#if KERN_TYPE == 1750 +#undef _unroll +#endif +#if KERN_TYPE == 1760 +#undef _unroll +#endif +#if KERN_TYPE == 6500 +#undef _unroll +#endif +#if KERN_TYPE == 7100 +#undef _unroll +#endif +#if KERN_TYPE == 9600 +#undef _unroll +#endif +#if KERN_TYPE == 12200 +#undef _unroll +#endif +#if KERN_TYPE == 12300 +#undef _unroll +#endif + +#endif #endif diff --git a/src/oclHashcat.c b/src/oclHashcat.c index d624467..242ba90 100644 --- a/src/oclHashcat.c +++ b/src/oclHashcat.c @@ -13971,7 +13971,7 @@ int main (int argc, char **argv) // we don't have sm_* on vendors not NV but it doesn't matter - snprintf (build_opts, sizeof (build_opts) - 1, "-cl-std=CL1.1 -I\"%s/\" -DVENDOR_ID=%u -DCUDA_ARCH=%d -DVECT_SIZE=%u -DDEVICE_TYPE=%u", shared_dir, device_param->vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->vector_width, (u32) device_param->device_type); + snprintf (build_opts, sizeof (build_opts) - 1, "-cl-std=CL1.1 -I\"%s/\" -DVENDOR_ID=%u -DCUDA_ARCH=%d -DVECT_SIZE=%u -DDEVICE_TYPE=%u -DKERN_TYPE=%u -D_unroll", shared_dir, device_param->vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->vector_width, (u32) device_param->device_type, kern_type); /** * main kernel -- 2.25.1