Prepare for a more dynamic #pragma unroll use

author jsteube <jens.steube@gmail.com>

Mon, 9 May 2016 19:32:12 +0000 (21:32 +0200)

committer jsteube <jens.steube@gmail.com>

Mon, 9 May 2016 19:32:12 +0000 (21:32 +0200)
author jsteube <jens.steube@gmail.com>
Mon, 9 May 2016 19:32:12 +0000 (21:32 +0200)
committer jsteube <jens.steube@gmail.com>
Mon, 9 May 2016 19:32:12 +0000 (21:32 +0200)
diff --git a/OpenCL/kernel_serpent256.c b/OpenCL/kernel_serpent256.c

index c8a1be8..5c326f5 100644 (file)
--- a/OpenCL/kernel_serpent256.c
+++ b/OpenCL/kernel_serpent256.c
@@ -403,13 +403,17 @@
  
  void serpent256_set_key (u32 *ks, const u32 *ukey)
  {
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 0; i < 8; i++)
    {
      ks[i] = ukey[i];
    }
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 0; i < 132; i++)
    {
      ks[i + 8] = rotl32 (ks[i + 7] ^ ks[i + 5] ^ ks[i + 3] ^ ks[i + 0] ^ 0x9e3779b9 ^ i, 11);
diff --git a/OpenCL/m01450_a0.cl b/OpenCL/m01450_a0.cl

index 6f45381..7e12d30 100644 (file)
--- a/OpenCL/m01450_a0.cl
+++ b/OpenCL/m01450_a0.cl
@@ -112,7 +112,9 @@ void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 64; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m01450_a1.cl b/OpenCL/m01450_a1.cl

index 297810c..ab683cf 100644 (file)
--- a/OpenCL/m01450_a1.cl
+++ b/OpenCL/m01450_a1.cl
@@ -110,7 +110,9 @@ void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 64; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m01450_a3.cl b/OpenCL/m01450_a3.cl

index f802ecd..6756571 100644 (file)
--- a/OpenCL/m01450_a3.cl
+++ b/OpenCL/m01450_a3.cl
@@ -110,7 +110,9 @@ void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 64; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m01460_a0.cl b/OpenCL/m01460_a0.cl

index ef35cec..3824c2c 100644 (file)
--- a/OpenCL/m01460_a0.cl
+++ b/OpenCL/m01460_a0.cl
@@ -112,7 +112,9 @@ void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 64; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m01460_a1.cl b/OpenCL/m01460_a1.cl

index f618ee9..6081ef6 100644 (file)
--- a/OpenCL/m01460_a1.cl
+++ b/OpenCL/m01460_a1.cl
@@ -110,7 +110,9 @@ void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 64; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m01460_a3.cl b/OpenCL/m01460_a3.cl

index d623d6a..64c0df5 100644 (file)
--- a/OpenCL/m01460_a3.cl
+++ b/OpenCL/m01460_a3.cl
@@ -110,7 +110,9 @@ void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 64; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m01500_a0.cl b/OpenCL/m01500_a0.cl

index 6bf6f34..0007970 100644 (file)
--- a/OpenCL/m01500_a0.cl
+++ b/OpenCL/m01500_a0.cl
@@ -371,7 +371,9 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32
  
    c = c & 0x0fffffff;
  
-  #pragma unroll 16
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (u32 i = 0; i < 16; i++)
    {
      if ((i < 2) || (i == 8) || (i == 15))
@@ -430,6 +432,9 @@ void _des_crypt_encrypt (u32x iv[2], u32 mask, u32x Kc[16], u32x Kd[16], __local
  
    for (u32 i = 0; i < 25; i++)
    {
+    #ifdef _unroll
+    #pragma unroll
+    #endif
      for (u32 j = 0; j < 16; j += 2)
      {
        u32x t;
diff --git a/OpenCL/m01500_a1.cl b/OpenCL/m01500_a1.cl

index 4d7a547..135cf96 100644 (file)
--- a/OpenCL/m01500_a1.cl
+++ b/OpenCL/m01500_a1.cl
@@ -368,7 +368,9 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32
  
    c = c & 0x0fffffff;
  
-  #pragma unroll 16
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (u32 i = 0; i < 16; i++)
    {
      if ((i < 2) || (i == 8) || (i == 15))
@@ -427,6 +429,9 @@ void _des_crypt_encrypt (u32x iv[2], u32 mask, u32x Kc[16], u32x Kd[16], __local
  
    for (u32 i = 0; i < 25; i++)
    {
+    #ifdef _unroll
+    #pragma unroll
+    #endif
      for (u32 j = 0; j < 16; j += 2)
      {
        u32x t;
diff --git a/OpenCL/m01500_a3.cl b/OpenCL/m01500_a3.cl

index 6b29a9a..2585829 100644 (file)
--- a/OpenCL/m01500_a3.cl
+++ b/OpenCL/m01500_a3.cl
@@ -1569,17 +1569,9 @@ void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32 K02, cons
  
    for (u32 ii = 0; ii < 25; ii++)
    {
-    #ifdef IS_NV
-    #if CUDA_ARCH >= 500
-    #else
+    #ifdef _unroll
      #pragma unroll
      #endif
-    #endif
-
-    #ifdef IS_AMD
-    #pragma unroll
-    #endif
-
      for (u32 i = 0; i < 2; i++)
      {
        if (i) KEYSET10 else KEYSET00
@@ -1705,17 +1697,9 @@ void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32 K02, cons
  
    for (u32 ii = 0; ii < 25; ii++)
    {
-    #ifdef IS_NV
-    #if CUDA_ARCH >= 500
-    #else
-    #pragma unroll
-    #endif
-    #endif
-
-    #ifdef IS_AMD
+    #ifdef _unroll
      #pragma unroll
      #endif
-
      for (u32 i = 0; i < 2; i++)
      {
        if (i) KEYSET10 else KEYSET00
@@ -2222,7 +2206,9 @@ void m01500m (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global co
  
        u32 tmpResult = 0;
  
+      #ifdef _unroll
        #pragma unroll
+      #endif
        for (int i = 0; i < 32; i++)
        {
          const u32 b0 = -((search[0] >> i) & 1);
@@ -2249,7 +2235,9 @@ void m01500m (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global co
      u32 out0[32];
      u32 out1[32];
  
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (int i = 0; i < 32; i++)
      {
        out0[i] = out[ 0 + 31 - i];
@@ -2259,7 +2247,9 @@ void m01500m (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global co
      transpose32c (out0);
      transpose32c (out1);
  
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (int slice = 0; slice < 32; slice++)
      {
        const u32 r0 = out0[31 - slice];
@@ -2676,7 +2666,9 @@ __kernel void m01500_tm (__global u32 *mod, __global bs_word_t *words_buf_r)
  
    const u32 w0s = (w0 << 1) & 0xfefefefe;
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 0, j = 0; i < 32; i += 8, j += 7)
    {
      atomic_or (&words_buf_r[block].b[j + 0], (((w0s >> (i + 7)) & 1) << slice));
diff --git a/OpenCL/m01700_a0.cl b/OpenCL/m01700_a0.cl

index 47993c2..5324cb0 100644 (file)
--- a/OpenCL/m01700_a0.cl
+++ b/OpenCL/m01700_a0.cl
@@ -116,7 +116,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 80; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m01700_a1.cl b/OpenCL/m01700_a1.cl

index 742a78a..57adc4f 100644 (file)
--- a/OpenCL/m01700_a1.cl
+++ b/OpenCL/m01700_a1.cl
@@ -114,7 +114,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 80; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m01700_a3.cl b/OpenCL/m01700_a3.cl

index fb31e08..c7df44e 100644 (file)
--- a/OpenCL/m01700_a3.cl
+++ b/OpenCL/m01700_a3.cl
@@ -114,7 +114,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 80; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m01710_a0.cl b/OpenCL/m01710_a0.cl

index 7d1b1f8..e2172f0 100644 (file)
--- a/OpenCL/m01710_a0.cl
+++ b/OpenCL/m01710_a0.cl
@@ -116,7 +116,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 80; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m01710_a1.cl b/OpenCL/m01710_a1.cl

index 900f684..794c00f 100644 (file)
--- a/OpenCL/m01710_a1.cl
+++ b/OpenCL/m01710_a1.cl
@@ -114,7 +114,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 80; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m01710_a3.cl b/OpenCL/m01710_a3.cl

index 4bde04c..794b9b3 100644 (file)
--- a/OpenCL/m01710_a3.cl
+++ b/OpenCL/m01710_a3.cl
@@ -114,7 +114,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 80; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m01720_a0.cl b/OpenCL/m01720_a0.cl

index 223b6c7..52eb80c 100644 (file)
--- a/OpenCL/m01720_a0.cl
+++ b/OpenCL/m01720_a0.cl
@@ -116,7 +116,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 80; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m01720_a1.cl b/OpenCL/m01720_a1.cl

index 93a8b9e..0ed7984 100644 (file)
--- a/OpenCL/m01720_a1.cl
+++ b/OpenCL/m01720_a1.cl
@@ -114,7 +114,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 80; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m01720_a3.cl b/OpenCL/m01720_a3.cl

index 802e977..12c1552 100644 (file)
--- a/OpenCL/m01720_a3.cl
+++ b/OpenCL/m01720_a3.cl
@@ -114,7 +114,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 80; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m01730_a0.cl b/OpenCL/m01730_a0.cl

index a9a25cf..2dd7473 100644 (file)
--- a/OpenCL/m01730_a0.cl
+++ b/OpenCL/m01730_a0.cl
@@ -116,7 +116,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 80; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m01730_a1.cl b/OpenCL/m01730_a1.cl

index 68d3a82..e2a0557 100644 (file)
--- a/OpenCL/m01730_a1.cl
+++ b/OpenCL/m01730_a1.cl
@@ -114,7 +114,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 80; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m01730_a3.cl b/OpenCL/m01730_a3.cl

index 795d6d6..517822b 100644 (file)
--- a/OpenCL/m01730_a3.cl
+++ b/OpenCL/m01730_a3.cl
@@ -114,7 +114,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 80; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m01740_a0.cl b/OpenCL/m01740_a0.cl

index 189996c..d6574ce 100644 (file)
--- a/OpenCL/m01740_a0.cl
+++ b/OpenCL/m01740_a0.cl
@@ -116,7 +116,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 80; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m01740_a1.cl b/OpenCL/m01740_a1.cl

index 7677fe6..c859c41 100644 (file)
--- a/OpenCL/m01740_a1.cl
+++ b/OpenCL/m01740_a1.cl
@@ -114,7 +114,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 80; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m01740_a3.cl b/OpenCL/m01740_a3.cl

index b14fe1e..97a98d3 100644 (file)
--- a/OpenCL/m01740_a3.cl
+++ b/OpenCL/m01740_a3.cl
@@ -114,7 +114,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 80; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m01750_a0.cl b/OpenCL/m01750_a0.cl

index 0ad4790..4ba5ea2 100644 (file)
--- a/OpenCL/m01750_a0.cl
+++ b/OpenCL/m01750_a0.cl
@@ -116,7 +116,9 @@ void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], con
  
    ROUND_STEP (0);
  
-  //#pragma unroll
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (int i = 16; i < 80; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m01750_a1.cl b/OpenCL/m01750_a1.cl

index f2697fc..65112a3 100644 (file)
--- a/OpenCL/m01750_a1.cl
+++ b/OpenCL/m01750_a1.cl
@@ -114,7 +114,9 @@ void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], con
  
    ROUND_STEP (0);
  
-  //#pragma unroll
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (int i = 16; i < 80; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m01750_a3.cl b/OpenCL/m01750_a3.cl

index e213850..aff816f 100644 (file)
--- a/OpenCL/m01750_a3.cl
+++ b/OpenCL/m01750_a3.cl
@@ -114,7 +114,9 @@ void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], con
  
    ROUND_STEP (0);
  
-  //#pragma unroll
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (int i = 16; i < 80; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m01760_a0.cl b/OpenCL/m01760_a0.cl

index f487180..8883d47 100644 (file)
--- a/OpenCL/m01760_a0.cl
+++ b/OpenCL/m01760_a0.cl
@@ -116,7 +116,9 @@ void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], con
  
    ROUND_STEP (0);
  
-  //#pragma unroll
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (int i = 16; i < 80; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m01760_a1.cl b/OpenCL/m01760_a1.cl

index d329446..1d7895f 100644 (file)
--- a/OpenCL/m01760_a1.cl
+++ b/OpenCL/m01760_a1.cl
@@ -114,7 +114,9 @@ void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], con
  
    ROUND_STEP (0);
  
-  //#pragma unroll
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (int i = 16; i < 80; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m01760_a3.cl b/OpenCL/m01760_a3.cl

index b45cfab..61bb6d7 100644 (file)
--- a/OpenCL/m01760_a3.cl
+++ b/OpenCL/m01760_a3.cl
@@ -114,7 +114,9 @@ void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], con
  
    ROUND_STEP (0);
  
-  //#pragma unroll
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (int i = 16; i < 80; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m01800.cl b/OpenCL/m01800.cl

index a557be9..21b1685 100644 (file)
--- a/OpenCL/m01800.cl
+++ b/OpenCL/m01800.cl
@@ -134,7 +134,9 @@ void sha512_transform (const u64 w[16], u64 digest[8])
  
    ROUND_STEP (0);
  
-  //#pragma unroll
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (int i = 16; i < 80; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
@@ -500,7 +502,9 @@ __kernel void m01800_loop (__global pw_t *pws, __global kernel_rule_t *rules_buf
      {
        const u32 block_len = wpc_len[pc];
  
-      #pragma unroll 64
+      #ifdef _unroll
+      #pragma unroll
+      #endif
        for (u32 k = 0, p = block_len - 64; k < 64; k++, p++)
        {
          PUTCHAR64_BE (block, p, GETCHAR64_BE (l_alt_result, k));
diff --git a/OpenCL/m03000_a0.cl b/OpenCL/m03000_a0.cl

index d2aa49d..dc3653d 100644 (file)
--- a/OpenCL/m03000_a0.cl
+++ b/OpenCL/m03000_a0.cl
@@ -361,7 +361,9 @@ void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __l
    u32x r = data[0];
    u32x l = data[1];
  
-  #pragma unroll 16
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (u32 i = 0; i < 16; i += 2)
    {
      u32x u;
@@ -414,7 +416,9 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32
  
    c = c & 0x0fffffff;
  
-  #pragma unroll 16
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (u32 i = 0; i < 16; i++)
    {
      if ((i < 2) || (i == 8) || (i == 15))
diff --git a/OpenCL/m03000_a1.cl b/OpenCL/m03000_a1.cl

index 7079a7a..22b6de6 100644 (file)
--- a/OpenCL/m03000_a1.cl
+++ b/OpenCL/m03000_a1.cl
@@ -358,7 +358,9 @@ void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __l
    u32x r = data[0];
    u32x l = data[1];
  
-  #pragma unroll 16
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (u32 i = 0; i < 16; i += 2)
    {
      u32x u;
@@ -411,7 +413,9 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32
  
    c = c & 0x0fffffff;
  
-  #pragma unroll 16
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (u32 i = 0; i < 16; i++)
    {
      if ((i < 2) || (i == 8) || (i == 15))
diff --git a/OpenCL/m03000_a3.cl b/OpenCL/m03000_a3.cl

index 08a7af1..f5d3380 100644 (file)
--- a/OpenCL/m03000_a3.cl
+++ b/OpenCL/m03000_a3.cl
@@ -1546,21 +1546,9 @@ void DES (const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32
    KXX_DECL u32 k36, k37, k38, k39, k40, k41;
    KXX_DECL u32 k42, k43, k44, k45, k46, k47;
  
-  #ifdef IS_NV
-  #if CUDA_ARCH >= 500
-  #else
+  #ifdef _unroll
    #pragma unroll
    #endif
-  #endif
-
-  #ifdef IS_AMD
-  #pragma unroll
-  #endif
-
-  #ifdef IS_GENERIC
-  #pragma unroll 1
-  #endif
-
    for (u32 i = 0; i < 2; i++)
    {
      if (i) KEYSET10 else KEYSET00
@@ -2060,7 +2048,9 @@ void m03000m (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global co
  
        u32 tmpResult = 0;
  
+      #ifdef _unroll
        #pragma unroll
+      #endif
        for (int i = 0; i < 32; i++)
        {
          const u32 b0 = -((search[0] >> i) & 1);
@@ -2087,7 +2077,9 @@ void m03000m (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global co
      u32 out0[32];
      u32 out1[32];
  
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (int i = 0; i < 32; i++)
      {
        out0[i] = out[ 0 + 31 - i];
@@ -2097,7 +2089,9 @@ void m03000m (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global co
      transpose32c (out0);
      transpose32c (out1);
  
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (int slice = 0; slice < 32; slice++)
      {
        const u32 r0 = out0[31 - slice];
diff --git a/OpenCL/m03100_a0.cl b/OpenCL/m03100_a0.cl

index 9220eb7..5f98fda 100644 (file)
--- a/OpenCL/m03100_a0.cl
+++ b/OpenCL/m03100_a0.cl
@@ -384,7 +384,9 @@ void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __l
    r = rotl32 (r, 3u);
    l = rotl32 (l, 3u);
  
-  #pragma unroll 16
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (u32 i = 0; i < 16; i += 2)
    {
      u32x u;
@@ -442,7 +444,9 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32
  
    c = c & 0x0fffffff;
  
-  #pragma unroll 16
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (u32 i = 0; i < 16; i++)
    {
      if ((i < 2) || (i == 8) || (i == 15))
diff --git a/OpenCL/m03100_a1.cl b/OpenCL/m03100_a1.cl

index 5f3207a..b16db6c 100644 (file)
--- a/OpenCL/m03100_a1.cl
+++ b/OpenCL/m03100_a1.cl
@@ -381,7 +381,9 @@ void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __l
    r = rotl32 (r, 3u);
    l = rotl32 (l, 3u);
  
-  #pragma unroll 16
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (u32 i = 0; i < 16; i += 2)
    {
      u32x u;
@@ -439,7 +441,9 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32
  
    c = c & 0x0fffffff;
  
-  #pragma unroll 16
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (u32 i = 0; i < 16; i++)
    {
      if ((i < 2) || (i == 8) || (i == 15))
diff --git a/OpenCL/m03100_a3.cl b/OpenCL/m03100_a3.cl

index 63a7eea..e1729e3 100644 (file)
--- a/OpenCL/m03100_a3.cl
+++ b/OpenCL/m03100_a3.cl
@@ -381,7 +381,9 @@ void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __l
    r = rotl32 (r, 3u);
    l = rotl32 (l, 3u);
  
-  #pragma unroll 16
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (u32 i = 0; i < 16; i += 2)
    {
      u32x u;
@@ -439,7 +441,9 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32
  
    c = c & 0x0fffffff;
  
-  #pragma unroll 16
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (u32 i = 0; i < 16; i++)
    {
      if ((i < 2) || (i == 8) || (i == 15))
diff --git a/OpenCL/m03200.cl b/OpenCL/m03200.cl

index 9e5c798..a3789cd 100644 (file)
--- a/OpenCL/m03200.cl
+++ b/OpenCL/m03200.cl
@@ -679,7 +679,9 @@ __kernel void __attribute__((reqd_work_group_size (8, 1, 1))) m03200_loop (__glo
      L0 = 0;
      R0 = 0;
  
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (u32 i = 0; i < 9; i++)
      {
        BF_ENCRYPT (L0, R0);
@@ -742,7 +744,9 @@ __kernel void __attribute__((reqd_work_group_size (8, 1, 1))) m03200_loop (__glo
      L0 = 0;
      R0 = 0;
  
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (u32 i = 0; i < 9; i++)
      {
        BF_ENCRYPT (L0, R0);
diff --git a/OpenCL/m05200.cl b/OpenCL/m05200.cl

index 9e51ecb..abd2912 100644 (file)
--- a/OpenCL/m05200.cl
+++ b/OpenCL/m05200.cl
@@ -110,7 +110,9 @@ void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 64; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m05500_a0.cl b/OpenCL/m05500_a0.cl

index b7769f3..82ad293 100644 (file)
--- a/OpenCL/m05500_a0.cl
+++ b/OpenCL/m05500_a0.cl
@@ -358,7 +358,9 @@ void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __l
    u32x r = data[0];
    u32x l = data[1];
  
-  #pragma unroll 16
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (u32 i = 0; i < 16; i += 2)
    {
      u32x u;
@@ -411,7 +413,9 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32
  
    c = c & 0x0fffffff;
  
-  #pragma unroll 16
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (u32 i = 0; i < 16; i++)
    {
      if ((i < 2) || (i == 8) || (i == 15))
diff --git a/OpenCL/m05500_a1.cl b/OpenCL/m05500_a1.cl

index e453129..be2994c 100644 (file)
--- a/OpenCL/m05500_a1.cl
+++ b/OpenCL/m05500_a1.cl
@@ -355,7 +355,9 @@ void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __l
    u32x r = data[0];
    u32x l = data[1];
  
-  #pragma unroll 16
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (u32 i = 0; i < 16; i += 2)
    {
      u32x u;
@@ -408,7 +410,9 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32
  
    c = c & 0x0fffffff;
  
-  #pragma unroll 16
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (u32 i = 0; i < 16; i++)
    {
      if ((i < 2) || (i == 8) || (i == 15))
diff --git a/OpenCL/m05500_a3.cl b/OpenCL/m05500_a3.cl

index 733dbe2..a9fc359 100644 (file)
--- a/OpenCL/m05500_a3.cl
+++ b/OpenCL/m05500_a3.cl
@@ -356,7 +356,9 @@ void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __l
    u32x r = data[0];
    u32x l = data[1];
  
-  #pragma unroll 16
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (u32 i = 0; i < 16; i += 2)
    {
      u32x u;
@@ -409,7 +411,9 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32
  
    c = c & 0x0fffffff;
  
-  #pragma unroll 16
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (u32 i = 0; i < 16; i++)
    {
      if ((i < 2) || (i == 8) || (i == 15))
diff --git a/OpenCL/m06100_a0.cl b/OpenCL/m06100_a0.cl

index 1e173ca..3b0d23c 100644 (file)
--- a/OpenCL/m06100_a0.cl
+++ b/OpenCL/m06100_a0.cl
@@ -1180,7 +1180,9 @@ void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 (*s_Ch)[2
    u32x Lh[8];
    u32x Ll[8];
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 0; i < 8; i++)
    {
      const u32x Lp0 = stateh[(i + 8) & 7] >> 24;
@@ -1233,7 +1235,9 @@ void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 (*s_Ch)[2
      u32x Lh[8];
      u32x Ll[8];
  
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (int i = 0; i < 8; i++)
      {
        const u32x Lp0 = Kh[(i + 8) & 7] >> 24;
@@ -1281,7 +1285,9 @@ void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 (*s_Ch)[2
      Kh[7] = Lh[7];
      Kl[7] = Ll[7];
  
-    #pragma unroll 8
+    #ifdef _unroll
+    #pragma unroll
+    #endif
      for (int i = 0; i < 8; i++)
      {
        const u32x Lp0 = stateh[(i + 8) & 7] >> 24;
diff --git a/OpenCL/m06100_a1.cl b/OpenCL/m06100_a1.cl

index 2ce7c90..35005ae 100644 (file)
--- a/OpenCL/m06100_a1.cl
+++ b/OpenCL/m06100_a1.cl
@@ -1177,7 +1177,9 @@ void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 (*s_Ch)[2
    u32x Lh[8];
    u32x Ll[8];
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 0; i < 8; i++)
    {
      const u32x Lp0 = stateh[(i + 8) & 7] >> 24;
@@ -1230,7 +1232,9 @@ void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 (*s_Ch)[2
      u32x Lh[8];
      u32x Ll[8];
  
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (int i = 0; i < 8; i++)
      {
        const u32x Lp0 = Kh[(i + 8) & 7] >> 24;
@@ -1278,7 +1282,9 @@ void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 (*s_Ch)[2
      Kh[7] = Lh[7];
      Kl[7] = Ll[7];
  
-    #pragma unroll 8
+    #ifdef _unroll
+    #pragma unroll
+    #endif
      for (int i = 0; i < 8; i++)
      {
        const u32x Lp0 = stateh[(i + 8) & 7] >> 24;
diff --git a/OpenCL/m06100_a3.cl b/OpenCL/m06100_a3.cl

index b8c30fc..d613d2e 100644 (file)
--- a/OpenCL/m06100_a3.cl
+++ b/OpenCL/m06100_a3.cl
@@ -1178,7 +1178,9 @@ void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 (*s_Ch)[2
    u32x Lh[8];
    u32x Ll[8];
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 0; i < 8; i++)
    {
      const u32x Lp0 = stateh[(i + 8) & 7] >> 24;
@@ -1231,7 +1233,9 @@ void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 (*s_Ch)[2
      u32x Lh[8];
      u32x Ll[8];
  
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (int i = 0; i < 8; i++)
      {
        const u32x Lp0 = Kh[(i + 8) & 7] >> 24;
@@ -1279,7 +1283,9 @@ void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 (*s_Ch)[2
      Kh[7] = Lh[7];
      Kl[7] = Ll[7];
  
-    #pragma unroll 8
+    #ifdef _unroll
+    #pragma unroll
+    #endif
      for (int i = 0; i < 8; i++)
      {
        const u32x Lp0 = stateh[(i + 8) & 7] >> 24;
diff --git a/OpenCL/m06221.cl b/OpenCL/m06221.cl

index 2949080..78a82a6 100644 (file)
--- a/OpenCL/m06221.cl
+++ b/OpenCL/m06221.cl
@@ -115,7 +115,9 @@ void sha512_transform (const u64 w[16], u64 dgst[8])
  
    ROUND_STEP (0);
  
-  //#pragma unroll
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (int i = 16; i < 80; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m06222.cl b/OpenCL/m06222.cl

index 2620c72..40da089 100644 (file)
--- a/OpenCL/m06222.cl
+++ b/OpenCL/m06222.cl
@@ -115,7 +115,9 @@ void sha512_transform (const u64 w[16], u64 dgst[8])
  
    ROUND_STEP (0);
  
-  //#pragma unroll
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (int i = 16; i < 80; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m06223.cl b/OpenCL/m06223.cl

index a0eb7c2..df6c629 100644 (file)
--- a/OpenCL/m06223.cl
+++ b/OpenCL/m06223.cl
@@ -115,7 +115,9 @@ void sha512_transform (const u64 w[16], u64 dgst[8])
  
    ROUND_STEP (0);
  
-  //#pragma unroll
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (int i = 16; i < 80; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m06231.cl b/OpenCL/m06231.cl

index e64748f..b134603 100644 (file)
--- a/OpenCL/m06231.cl
+++ b/OpenCL/m06231.cl
@@ -1175,7 +1175,9 @@ void whirlpool_transform_last (u32 dgst[16], __local u32 (*s_Ch)[256], __local u
  
      u32 i;
  
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (i = 0; i < 8; i++)
      {
        const u32 Lp0 = Kh[(i + 8) & 7] >> 24;
@@ -1223,7 +1225,9 @@ void whirlpool_transform_last (u32 dgst[16], __local u32 (*s_Ch)[256], __local u
      Kh[7] = Lh[7];
      Kl[7] = Ll[7];
  
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (i = 0; i < 8; i++)
      {
        const u32 Lp0 = stateh[(i + 8) & 7] >> 24;
@@ -1371,7 +1375,9 @@ void whirlpool_transform (const u32 w[16], u32 dgst[16], __local u32 (*s_Ch)[256
  
      u32 i;
  
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (i = 0; i < 8; i++)
      {
        const u32 Lp0 = Kh[(i + 8) & 7] >> 24;
@@ -1419,7 +1425,9 @@ void whirlpool_transform (const u32 w[16], u32 dgst[16], __local u32 (*s_Ch)[256
      Kh[7] = Lh[7];
      Kl[7] = Ll[7];
  
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (i = 0; i < 8; i++)
      {
        const u32 Lp0 = stateh[(i + 8) & 7] >> 24;
diff --git a/OpenCL/m06232.cl b/OpenCL/m06232.cl

index c7b79d4..3563bd0 100644 (file)
--- a/OpenCL/m06232.cl
+++ b/OpenCL/m06232.cl
@@ -1172,7 +1172,9 @@ void whirlpool_transform (const u32 w[16], u32 dgst[16], __local u32 (*s_Ch)[256
  
      u32 i;
  
-    #pragma unroll 8
+    #ifdef _unroll
+    #pragma unroll
+    #endif
      for (i = 0; i < 8; i++)
      {
        const u8 Lp0 = Kh[(i + 8) & 7] >> 24;
@@ -1220,7 +1222,9 @@ void whirlpool_transform (const u32 w[16], u32 dgst[16], __local u32 (*s_Ch)[256
      Kh[7] = Lh[7];
      Kl[7] = Ll[7];
  
-    #pragma unroll 8
+    #ifdef _unroll
+    #pragma unroll
+    #endif
      for (i = 0; i < 8; i++)
      {
        const u8 Lp0 = stateh[(i + 8) & 7] >> 24;
diff --git a/OpenCL/m06233.cl b/OpenCL/m06233.cl

index 1fa1e1c..1e11789 100644 (file)
--- a/OpenCL/m06233.cl
+++ b/OpenCL/m06233.cl
@@ -1172,7 +1172,9 @@ void whirlpool_transform (const u32 w[16], u32 dgst[16], __local u32 (*s_Ch)[256
  
      u32 i;
  
-    #pragma unroll 8
+    #ifdef _unroll
+    #pragma unroll
+    #endif
      for (i = 0; i < 8; i++)
      {
        const u8 Lp0 = Kh[(i + 8) & 7] >> 24;
@@ -1220,7 +1222,9 @@ void whirlpool_transform (const u32 w[16], u32 dgst[16], __local u32 (*s_Ch)[256
      Kh[7] = Lh[7];
      Kl[7] = Ll[7];
  
-    #pragma unroll 8
+    #ifdef _unroll
+    #pragma unroll
+    #endif
      for (i = 0; i < 8; i++)
      {
        const u8 Lp0 = stateh[(i + 8) & 7] >> 24;
diff --git a/OpenCL/m06400.cl b/OpenCL/m06400.cl

index db12885..99e0dd8 100644 (file)
--- a/OpenCL/m06400.cl
+++ b/OpenCL/m06400.cl
@@ -110,7 +110,9 @@ void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 64; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m06500.cl b/OpenCL/m06500.cl

index 44e4d7f..b5d77e1 100644 (file)
--- a/OpenCL/m06500.cl
+++ b/OpenCL/m06500.cl
@@ -114,7 +114,9 @@ void sha512_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4], const
  
    ROUND_STEP (0);
  
-  //#pragma unroll
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (int i = 16; i < 80; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m06600.cl b/OpenCL/m06600.cl

index 45b9e8b..6ee0523 100644 (file)
--- a/OpenCL/m06600.cl
+++ b/OpenCL/m06600.cl
@@ -716,7 +716,9 @@ void AES128_ExpandKey (u32 *userkey, u32 *rek, __local u32 *s_te0, __local u32 *
    rek[2] = userkey[2];
    rek[3] = userkey[3];
  
-  #pragma unroll 10
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (u32 i = 0, j = 0; i < 10; i += 1, j += 4)
    {
      u32 temp = rek[j + 3];
@@ -1391,7 +1393,9 @@ __kernel void m06600_comp (__global pw_t *pws, __global kernel_rule_t *rules_buf
  
    AES128_ExpandKey (ukey, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
  
-  #pragma unroll KEYLEN
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (u32 i = 0; i < KEYLEN; i++) rdk[i] = rek[i];
  
    AES128_InvertKey (rdk, s_td0, s_td1, s_td2, s_td3, s_td4, s_te0, s_te1, s_te2, s_te3, s_te4);
diff --git a/OpenCL/m06800.cl b/OpenCL/m06800.cl

index 91fecec..32d9a30 100644 (file)
--- a/OpenCL/m06800.cl
+++ b/OpenCL/m06800.cl
@@ -1075,7 +1075,9 @@ void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 64; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
@@ -1541,7 +1543,9 @@ __kernel void m06800_comp (__global pw_t *pws, __global kernel_rule_t *rules_buf
  
    u32 rdk[KEYLEN];
  
-  #pragma unroll 60
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (u32 i = 0; i < KEYLEN; i++) rdk[i] = rek[i];
  
    AES256_InvertKey (rdk, s_td0, s_td1, s_td2, s_td3, s_td4, s_te0, s_te1, s_te2, s_te3, s_te4);
diff --git a/OpenCL/m07100.cl b/OpenCL/m07100.cl

index a9f0900..1464b0e 100644 (file)
--- a/OpenCL/m07100.cl
+++ b/OpenCL/m07100.cl
@@ -114,7 +114,9 @@ void sha512_transform (const u64 w[16], u64 dgst[8])
  
    ROUND_STEP (0);
  
-  //#pragma unroll
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (int i = 16; i < 80; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m07400.cl b/OpenCL/m07400.cl

index 00eb8d2..8ee2fe1 100644 (file)
--- a/OpenCL/m07400.cl
+++ b/OpenCL/m07400.cl
@@ -112,7 +112,9 @@ void sha256_transform (const u32 w[16], u32 digest[8])
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 64; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
@@ -1303,7 +1305,9 @@ void sha256_transform (const u32 w[16], u32 digest[8])
  
    ROUND_STEP (0);
  
-  //#pragma unroll
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (int i = 16; i < 64; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
@@ -1686,7 +1690,9 @@ __kernel void m07400_loop (__global pw_t *pws, __global kernel_rule_t *rules_buf
  
      if (j1)
      {
-      #pragma unroll 32
+      #ifdef _unroll
+      #pragma unroll
+      #endif
        for (u32 k = 0, p = block_len - 32; k < 32; k++, p++)
        {
          PUTCHAR32_BE (block, p, GETCHAR32_BE (alt_result, k));
diff --git a/OpenCL/m07500_a0.cl b/OpenCL/m07500_a0.cl

index be23099..b337129 100644 (file)
--- a/OpenCL/m07500_a0.cl
+++ b/OpenCL/m07500_a0.cl
@@ -47,7 +47,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
    __local u32 *ptr = (__local u32 *) rc4_key->S;
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 i = 0; i < 64; i++)
    {
      *ptr++ = v; v += a;
@@ -93,7 +95,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
  u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4])
  {
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 k = 0; k < 4; k++)
    {
      u32 xor4 = 0;
diff --git a/OpenCL/m07500_a1.cl b/OpenCL/m07500_a1.cl

index 096888a..c99bb8d 100644 (file)
--- a/OpenCL/m07500_a1.cl
+++ b/OpenCL/m07500_a1.cl
@@ -45,7 +45,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
    __local u32 *ptr = (__local u32 *) rc4_key->S;
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 i = 0; i < 64; i++)
    {
      *ptr++ = v; v += a;
@@ -91,7 +93,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
  u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4])
  {
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 k = 0; k < 4; k++)
    {
      u32 xor4 = 0;
diff --git a/OpenCL/m07500_a3.cl b/OpenCL/m07500_a3.cl

index ae1a92d..279ad1b 100644 (file)
--- a/OpenCL/m07500_a3.cl
+++ b/OpenCL/m07500_a3.cl
@@ -47,7 +47,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
    __local u32 *ptr = (__local u32 *) rc4_key->S;
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 i = 0; i < 64; i++)
    {
      *ptr++ = v; v += a;
@@ -93,7 +95,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
  u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4])
  {
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 k = 0; k < 4; k++)
    {
      u32 xor4 = 0;
diff --git a/OpenCL/m07800_a0.cl b/OpenCL/m07800_a0.cl

index 3ca3be1..b4b98db 100644 (file)
--- a/OpenCL/m07800_a0.cl
+++ b/OpenCL/m07800_a0.cl
@@ -350,7 +350,9 @@ __kernel void m07800_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
      digest[3] = SHA1M_D;
      digest[4] = SHA1M_E;
  
-    #pragma unroll 32
+    #ifdef _unroll
+    #pragma unroll
+    #endif
      for (int i = 0; i < 32; i++) final[i] = 0;
  
      final[0] = w0[0];
@@ -594,7 +596,9 @@ __kernel void m07800_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
      digest[3] = SHA1M_D;
      digest[4] = SHA1M_E;
  
-    #pragma unroll 32
+    #ifdef _unroll
+    #pragma unroll
+    #endif
      for (int i = 0; i < 32; i++) final[i] = 0;
  
      final[0] = w0[0];
diff --git a/OpenCL/m07800_a1.cl b/OpenCL/m07800_a1.cl

index a58c8d9..feb36f1 100644 (file)
--- a/OpenCL/m07800_a1.cl
+++ b/OpenCL/m07800_a1.cl
@@ -408,7 +408,9 @@ __kernel void m07800_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
      digest[3] = SHA1M_D;
      digest[4] = SHA1M_E;
  
-    #pragma unroll 32
+    #ifdef _unroll
+    #pragma unroll
+    #endif
      for (int i = 0; i < 32; i++) final[i] = 0;
  
      final[0] = w0[0];
@@ -712,7 +714,9 @@ __kernel void m07800_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
      digest[3] = SHA1M_D;
      digest[4] = SHA1M_E;
  
-    #pragma unroll 32
+    #ifdef _unroll
+    #pragma unroll
+    #endif
      for (int i = 0; i < 32; i++) final[i] = 0;
  
      final[0] = w0[0];
diff --git a/OpenCL/m07800_a3.cl b/OpenCL/m07800_a3.cl

index 02d7719..3b75ace 100644 (file)
--- a/OpenCL/m07800_a3.cl
+++ b/OpenCL/m07800_a3.cl
@@ -324,7 +324,9 @@ void m07800m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __gl
      digest[3] = SHA1M_D;
      digest[4] = SHA1M_E;
  
-    #pragma unroll 32
+    #ifdef _unroll
+    #pragma unroll
+    #endif
      for (int i = 0; i < 32; i++) final[i] = 0;
  
      final[0] = swap32 (w0[0]);
@@ -536,7 +538,9 @@ void m07800s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __gl
      digest[3] = SHA1M_D;
      digest[4] = SHA1M_E;
  
-    #pragma unroll 32
+    #ifdef _unroll
+    #pragma unroll
+    #endif
      for (int i = 0; i < 32; i++) final[i] = 0;
  
      final[0] = swap32 (w0[0]);
diff --git a/OpenCL/m07900.cl b/OpenCL/m07900.cl

index 40b13a7..7226bec 100644 (file)
--- a/OpenCL/m07900.cl
+++ b/OpenCL/m07900.cl
@@ -114,7 +114,9 @@ void sha512_transform (const u64 w[16], u64 dgst[8])
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 80; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m08000_a0.cl b/OpenCL/m08000_a0.cl

index d4f62b5..33266e0 100644 (file)
--- a/OpenCL/m08000_a0.cl
+++ b/OpenCL/m08000_a0.cl
@@ -119,7 +119,9 @@ void sha256_transform (u32x digest[8], const u32x w[16])
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 64; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
@@ -168,7 +170,9 @@ void sha256_transform_z (u32x digest[8])
  
    ROUND_STEP_Z (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 64; i += 16)
    {
      ROUND_STEP_Z (i);
@@ -217,7 +221,9 @@ void sha256_transform_s (u32x digest[8], __local u32 *w)
  
    ROUND_STEP_S (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 64; i += 16)
    {
      ROUND_STEP_S (i);
@@ -270,7 +276,9 @@ __kernel void m08000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
    {
      w_s1[15] =               0 | salt_buf0 >> 16;
  
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (int i = 16; i < 64; i++)
      {
        w_s1[i] = SHA256_EXPAND_S (w_s1[i - 2], w_s1[i - 7], w_s1[i - 15], w_s1[i - 16]);
@@ -281,7 +289,9 @@ __kernel void m08000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
      w_s2[ 2] = salt_buf2 << 16 | 0;
      w_s2[15] = (510 + 8) * 8;
  
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (int i = 16; i < 64; i++)
      {
        w_s2[i] = SHA256_EXPAND_S (w_s2[i - 2], w_s2[i - 7], w_s2[i - 15], w_s2[i - 16]);
@@ -437,7 +447,9 @@ __kernel void m08000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
    {
      w_s1[15] =               0 | salt_buf0 >> 16;
  
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (int i = 16; i < 64; i++)
      {
        w_s1[i] = SHA256_EXPAND_S (w_s1[i - 2], w_s1[i - 7], w_s1[i - 15], w_s1[i - 16]);
@@ -448,7 +460,9 @@ __kernel void m08000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
      w_s2[ 2] = salt_buf2 << 16 | 0;
      w_s2[15] = (510 + 8) * 8;
  
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (int i = 16; i < 64; i++)
      {
        w_s2[i] = SHA256_EXPAND_S (w_s2[i - 2], w_s2[i - 7], w_s2[i - 15], w_s2[i - 16]);
diff --git a/OpenCL/m08000_a1.cl b/OpenCL/m08000_a1.cl

index c525626..efbc2bb 100644 (file)
--- a/OpenCL/m08000_a1.cl
+++ b/OpenCL/m08000_a1.cl
@@ -117,7 +117,9 @@ void sha256_transform (u32x digest[8], const u32x w[16])
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 64; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
@@ -166,7 +168,9 @@ void sha256_transform_z (u32x digest[8])
  
    ROUND_STEP_Z (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 64; i += 16)
    {
      ROUND_STEP_Z (i);
@@ -215,7 +219,9 @@ void sha256_transform_s (u32x digest[8], __local u32 *w)
  
    ROUND_STEP_S (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 64; i += 16)
    {
      ROUND_STEP_S (i);
@@ -268,7 +274,9 @@ __kernel void m08000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
    {
      w_s1[15] =               0 | salt_buf0 >> 16;
  
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (int i = 16; i < 64; i++)
      {
        w_s1[i] = SHA256_EXPAND_S (w_s1[i - 2], w_s1[i - 7], w_s1[i - 15], w_s1[i - 16]);
@@ -279,7 +287,9 @@ __kernel void m08000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
      w_s2[ 2] = salt_buf2 << 16 | 0;
      w_s2[15] = (510 + 8) * 8;
  
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (int i = 16; i < 64; i++)
      {
        w_s2[i] = SHA256_EXPAND_S (w_s2[i - 2], w_s2[i - 7], w_s2[i - 15], w_s2[i - 16]);
@@ -489,7 +499,9 @@ __kernel void m08000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
    {
      w_s1[15] =               0 | salt_buf0 >> 16;
  
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (int i = 16; i < 64; i++)
      {
        w_s1[i] = SHA256_EXPAND_S (w_s1[i - 2], w_s1[i - 7], w_s1[i - 15], w_s1[i - 16]);
@@ -500,7 +512,9 @@ __kernel void m08000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
      w_s2[ 2] = salt_buf2 << 16 | 0;
      w_s2[15] = (510 + 8) * 8;
  
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (int i = 16; i < 64; i++)
      {
        w_s2[i] = SHA256_EXPAND_S (w_s2[i - 2], w_s2[i - 7], w_s2[i - 15], w_s2[i - 16]);
diff --git a/OpenCL/m08000_a3.cl b/OpenCL/m08000_a3.cl

index 5a77ee5..b670305 100644 (file)
--- a/OpenCL/m08000_a3.cl
+++ b/OpenCL/m08000_a3.cl
@@ -117,7 +117,9 @@ void sha256_transform (u32x digest[8], const u32x w[16])
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 64; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
@@ -166,7 +168,9 @@ void sha256_transform_z (u32x digest[8])
  
    ROUND_STEP_Z (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 64; i += 16)
    {
      ROUND_STEP_Z (i);
@@ -215,7 +219,9 @@ void sha256_transform_s (u32x digest[8], __local u32 *w)
  
    ROUND_STEP_S (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 64; i += 16)
    {
      ROUND_STEP_S (i);
@@ -265,7 +271,9 @@ void m08000m (__local u32 *w_s1, __local u32 *w_s2, u32 w[16], const u32 pw_len,
    {
      w_s1[15] =               0 | salt_buf0 >> 16;
  
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (int i = 16; i < 64; i++)
      {
        w_s1[i] = SHA256_EXPAND_S (w_s1[i - 2], w_s1[i - 7], w_s1[i - 15], w_s1[i - 16]);
@@ -276,7 +284,9 @@ void m08000m (__local u32 *w_s1, __local u32 *w_s2, u32 w[16], const u32 pw_len,
      w_s2[ 2] = salt_buf2 << 16 | 0;
      w_s2[15] = (510 + 8) * 8;
  
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (int i = 16; i < 64; i++)
      {
        w_s2[i] = SHA256_EXPAND_S (w_s2[i - 2], w_s2[i - 7], w_s2[i - 15], w_s2[i - 16]);
@@ -383,7 +393,9 @@ void m08000s (__local u32 *w_s1, __local u32 *w_s2, u32 w[16], const u32 pw_len,
    {
      w_s1[15] =               0 | salt_buf0 >> 16;
  
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (int i = 16; i < 64; i++)
      {
        w_s1[i] = SHA256_EXPAND_S (w_s1[i - 2], w_s1[i - 7], w_s1[i - 15], w_s1[i - 16]);
@@ -394,7 +406,9 @@ void m08000s (__local u32 *w_s1, __local u32 *w_s2, u32 w[16], const u32 pw_len,
      w_s2[ 2] = salt_buf2 << 16 | 0;
      w_s2[15] = (510 + 8) * 8;
  
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (int i = 16; i < 64; i++)
      {
        w_s2[i] = SHA256_EXPAND_S (w_s2[i - 2], w_s2[i - 7], w_s2[i - 15], w_s2[i - 16]);
diff --git a/OpenCL/m08200.cl b/OpenCL/m08200.cl

index 73e076d..dc7b768 100644 (file)
--- a/OpenCL/m08200.cl
+++ b/OpenCL/m08200.cl
@@ -134,7 +134,9 @@ void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 64; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
@@ -321,7 +323,9 @@ void sha512_transform (const u64 w[16], u64 dgst[8])
  
    ROUND512_STEP (0);
  
-  //#pragma unroll
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (int i = 16; i < 80; i += 16)
    {
      ROUND512_EXPAND (); ROUND512_STEP (i);
diff --git a/OpenCL/m08500_a0.cl b/OpenCL/m08500_a0.cl

index e9b845e..d21d101 100644 (file)
--- a/OpenCL/m08500_a0.cl
+++ b/OpenCL/m08500_a0.cl
@@ -410,7 +410,9 @@ void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __l
    u32x r = data[0];
    u32x l = data[1];
  
-  #pragma unroll 16
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (u32 i = 0; i < 16; i += 2)
    {
      u32x u;
@@ -463,7 +465,9 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32
  
    c = c & 0x0fffffff;
  
-  #pragma unroll 16
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (u32 i = 0; i < 16; i++)
    {
      if ((i < 2) || (i == 8) || (i == 15))
diff --git a/OpenCL/m08500_a1.cl b/OpenCL/m08500_a1.cl

index 65bb133..20f2599 100644 (file)
--- a/OpenCL/m08500_a1.cl
+++ b/OpenCL/m08500_a1.cl
@@ -407,7 +407,9 @@ void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __l
    u32x r = data[0];
    u32x l = data[1];
  
-  #pragma unroll 16
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (u32 i = 0; i < 16; i += 2)
    {
      u32x u;
@@ -460,7 +462,9 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32
  
    c = c & 0x0fffffff;
  
-  #pragma unroll 16
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (u32 i = 0; i < 16; i++)
    {
      if ((i < 2) || (i == 8) || (i == 15))
diff --git a/OpenCL/m08500_a3.cl b/OpenCL/m08500_a3.cl

index dba7ada..a97d62d 100644 (file)
--- a/OpenCL/m08500_a3.cl
+++ b/OpenCL/m08500_a3.cl
@@ -408,7 +408,9 @@ void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __l
    u32x r = data[0];
    u32x l = data[1];
  
-  #pragma unroll 16
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (u32 i = 0; i < 16; i += 2)
    {
      u32x u;
@@ -461,7 +463,9 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32
  
    c = c & 0x0fffffff;
  
-  #pragma unroll 16
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (u32 i = 0; i < 16; i++)
    {
      if ((i < 2) || (i == 8) || (i == 15))
diff --git a/OpenCL/m08600_a0.cl b/OpenCL/m08600_a0.cl

index 34f2526..c4a043c 100644 (file)
--- a/OpenCL/m08600_a0.cl
+++ b/OpenCL/m08600_a0.cl
@@ -82,7 +82,9 @@ void lotus_mix (u32x *in, __local u32 *s_lotus_magic_table)
    {
      u32 s = 48;
  
-    #pragma unroll 12
+    #ifdef _unroll
+    #pragma unroll
+    #endif
      for (int j = 0; j < 12; j++)
      {
        u32x tmp_in = in[j];
@@ -104,7 +106,9 @@ void lotus_transform_password (u32x in[4], u32x out[4], __local u32 *s_lotus_mag
  
    u32x c;
  
-  #pragma unroll 4
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (int i = 0; i < 4; i++)
    {
      t ^= (in[i] >>  0) & 0xff; c = BOX1 (s_lotus_magic_table, t); out[i] ^= c <<  0; t = ((out[i] >>  0) & 0xff);
diff --git a/OpenCL/m08600_a1.cl b/OpenCL/m08600_a1.cl

index 90cf979..58be048 100644 (file)
--- a/OpenCL/m08600_a1.cl
+++ b/OpenCL/m08600_a1.cl
@@ -79,7 +79,9 @@ void lotus_mix (u32x *in, __local u32 *s_lotus_magic_table)
    {
      u32 s = 48;
  
-    #pragma unroll 12
+    #ifdef _unroll
+    #pragma unroll
+    #endif
      for (int j = 0; j < 12; j++)
      {
        u32x tmp_in = in[j];
@@ -101,7 +103,9 @@ void lotus_transform_password (u32x in[4], u32x out[4], __local u32 *s_lotus_mag
  
    u32x c;
  
-  #pragma unroll 4
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (int i = 0; i < 4; i++)
    {
      t ^= (in[i] >>  0) & 0xff; c = BOX1 (s_lotus_magic_table, t); out[i] ^= c <<  0; t = ((out[i] >>  0) & 0xff);
diff --git a/OpenCL/m08600_a3.cl b/OpenCL/m08600_a3.cl

index bd30653..5b7d924 100644 (file)
--- a/OpenCL/m08600_a3.cl
+++ b/OpenCL/m08600_a3.cl
@@ -79,7 +79,9 @@ void lotus_mix (u32x *in, __local u32 *s_lotus_magic_table)
    {
      u32 s = 48;
  
-    #pragma unroll 12
+    #ifdef _unroll
+    #pragma unroll
+    #endif
      for (int j = 0; j < 12; j++)
      {
        u32x tmp_in = in[j];
@@ -101,7 +103,9 @@ void lotus_transform_password (u32x in[4], u32x out[4], __local u32 *s_lotus_mag
  
    u32x c;
  
-  #pragma unroll 4
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (int i = 0; i < 4; i++)
    {
      t ^= (in[i] >>  0) & 0xff; c = BOX1 (s_lotus_magic_table, t); out[i] ^= c <<  0; t = ((out[i] >>  0) & 0xff);
diff --git a/OpenCL/m08700_a0.cl b/OpenCL/m08700_a0.cl

index 4e37c47..1bcb8d6 100644 (file)
--- a/OpenCL/m08700_a0.cl
+++ b/OpenCL/m08700_a0.cl
@@ -94,7 +94,9 @@ void lotus_mix (u32x *in, __local u32 *s_lotus_magic_table)
    {
      u32 s = 48;
  
-    #pragma unroll 12
+    #ifdef _unroll
+    #pragma unroll
+    #endif
      for (int j = 0; j < 12; j++)
      {
        u32x tmp_in = in[j];
@@ -116,7 +118,9 @@ void lotus_transform_password (u32x in[4], u32x out[4], __local u32 *s_lotus_mag
  
    u32x c;
  
-  #pragma unroll 4
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (int i = 0; i < 4; i++)
    {
      t ^= (in[i] >>  0) & 0xff; c = BOX1 (s_lotus_magic_table, t); out[i] ^= c <<  0; t = ((out[i] >>  0) & 0xff);
diff --git a/OpenCL/m08700_a1.cl b/OpenCL/m08700_a1.cl

index cc9902d..ed6d197 100644 (file)
--- a/OpenCL/m08700_a1.cl
+++ b/OpenCL/m08700_a1.cl
@@ -91,7 +91,9 @@ void lotus_mix (u32x *in, __local u32 *s_lotus_magic_table)
    {
      u32 s = 48;
  
-    #pragma unroll 12
+    #ifdef _unroll
+    #pragma unroll
+    #endif
      for (int j = 0; j < 12; j++)
      {
        u32x tmp_in = in[j];
@@ -113,7 +115,9 @@ void lotus_transform_password (u32x in[4], u32x out[4], __local u32 *s_lotus_mag
  
    u32x c;
  
-  #pragma unroll 4
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (int i = 0; i < 4; i++)
    {
      t ^= (in[i] >>  0) & 0xff; c = BOX1 (s_lotus_magic_table, t); out[i] ^= c <<  0; t = ((out[i] >>  0) & 0xff);
diff --git a/OpenCL/m08700_a3.cl b/OpenCL/m08700_a3.cl

index d0a291d..b6fd2d3 100644 (file)
--- a/OpenCL/m08700_a3.cl
+++ b/OpenCL/m08700_a3.cl
@@ -93,7 +93,9 @@ void lotus_mix (u32x *in, __local u32 *s_lotus_magic_table)
    {
      u32 s = 48;
  
-    #pragma unroll 12
+    #ifdef _unroll
+    #pragma unroll
+    #endif
      for (int j = 0; j < 12; j++)
      {
        u32x tmp_in = in[j];
@@ -115,7 +117,9 @@ void lotus_transform_password (u32x in[4], u32x out[4], __local u32 *s_lotus_mag
  
    u32x c;
  
-  #pragma unroll 4
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (int i = 0; i < 4; i++)
    {
      t ^= (in[i] >>  0) & 0xff; c = BOX1 (s_lotus_magic_table, t); out[i] ^= c <<  0; t = ((out[i] >>  0) & 0xff);
diff --git a/OpenCL/m08800.cl b/OpenCL/m08800.cl

index 62d0cae..934310b 100644 (file)
--- a/OpenCL/m08800.cl
+++ b/OpenCL/m08800.cl
@@ -1212,7 +1212,9 @@ void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 64; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m08900.cl b/OpenCL/m08900.cl

index 5f90dc9..85ad68d 100644 (file)
--- a/OpenCL/m08900.cl
+++ b/OpenCL/m08900.cl
@@ -110,7 +110,9 @@ void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 64; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
@@ -705,7 +707,9 @@ void scrypt_smix (uint4 *X, uint4 *T, const u32 phy, __global uint4 *V)
  
    const u32 x = gid % xSIZE;
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 i = 0; i < STATE_CNT4; i += 4)
    {
      T[0] = (uint4) (X[i + 0].x, X[i + 1].y, X[i + 2].z, X[i + 3].w);
@@ -743,7 +747,9 @@ void scrypt_smix (uint4 *X, uint4 *T, const u32 phy, __global uint4 *V)
      salsa_r (X);
    }
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 i = 0; i < STATE_CNT4; i += 4)
    {
      T[0] = (uint4) (X[i + 0].x, X[i + 3].y, X[i + 2].z, X[i + 1].w);
@@ -910,12 +916,16 @@ __kernel void m08900_loop (__global pw_t *pws, __global kernel_rule_t *rules_buf
    uint4 X[STATE_CNT4];
    uint4 T[STATE_CNT4];
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int z = 0; z < STATE_CNT4; z++) X[z] = swap32_4 (tmps[gid].P[z]);
  
    scrypt_smix (X, T, scrypt_phy, d_scryptV_buf);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[z] = swap32_4 (X[z]);
  
    #if SCRYPT_P >= 1
diff --git a/OpenCL/m09100.cl b/OpenCL/m09100.cl

index eb4333e..4efe528 100644 (file)
--- a/OpenCL/m09100.cl
+++ b/OpenCL/m09100.cl
@@ -75,7 +75,9 @@ void lotus_mix (u32 *in, __local u32 *s_lotus_magic_table)
    {
      u32 s = 48;
  
-    #pragma unroll 12
+    #ifdef _unroll
+    #pragma unroll
+    #endif
      for (int j = 0; j < 12; j++)
      {
        u32 tmp_in = in[j];
@@ -97,7 +99,9 @@ void lotus_transform_password (u32 in[4], u32 out[4], __local u32 *s_lotus_magic
  
    u32 c;
  
-  #pragma unroll 4
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (int i = 0; i < 4; i++)
    {
      t ^= (in[i] >>  0) & 0xff; c = BOX (s_lotus_magic_table, t); out[i] ^= c <<  0; t = ((out[i] >>  0) & 0xff);
diff --git a/OpenCL/m09600.cl b/OpenCL/m09600.cl

index f6d4ba6..eb28f6c 100644 (file)
--- a/OpenCL/m09600.cl
+++ b/OpenCL/m09600.cl
@@ -1073,10 +1073,7 @@ void sha512_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4], const
  
    ROUND_STEP (0);
  
-  #ifdef IS_AMD
-  // #pragma unroll
-  // breaks compiler
-  #else
+  #ifdef _unroll
    #pragma unroll
    #endif
    for (int i = 16; i < 80; i += 16)
diff --git a/OpenCL/m09700_a0.cl b/OpenCL/m09700_a0.cl

index d9ae316..4fa60b9 100644 (file)
--- a/OpenCL/m09700_a0.cl
+++ b/OpenCL/m09700_a0.cl
@@ -47,7 +47,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
    __local u32 *ptr = (__local u32 *) rc4_key->S;
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 i = 0; i < 64; i++)
    {
      *ptr++ = v; v += a;
@@ -93,7 +95,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
  u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4])
  {
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 k = 0; k < 4; k++)
    {
      u32 xor4 = 0;
diff --git a/OpenCL/m09700_a1.cl b/OpenCL/m09700_a1.cl

index 67671ad..65162f7 100644 (file)
--- a/OpenCL/m09700_a1.cl
+++ b/OpenCL/m09700_a1.cl
@@ -45,7 +45,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
    __local u32 *ptr = (__local u32 *) rc4_key->S;
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 i = 0; i < 64; i++)
    {
      *ptr++ = v; v += a;
@@ -91,7 +93,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
  u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4])
  {
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 k = 0; k < 4; k++)
    {
      u32 xor4 = 0;
diff --git a/OpenCL/m09700_a3.cl b/OpenCL/m09700_a3.cl

index 8e1393e..45128af 100644 (file)
--- a/OpenCL/m09700_a3.cl
+++ b/OpenCL/m09700_a3.cl
@@ -44,7 +44,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
    __local u32 *ptr = (__local u32 *) rc4_key->S;
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 i = 0; i < 64; i++)
    {
      *ptr++ = v; v += a;
@@ -90,7 +92,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
  u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4])
  {
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 k = 0; k < 4; k++)
    {
      u32 xor4 = 0;
diff --git a/OpenCL/m09710_a0.cl b/OpenCL/m09710_a0.cl

index dffcdca..b992e8d 100644 (file)
--- a/OpenCL/m09710_a0.cl
+++ b/OpenCL/m09710_a0.cl
@@ -47,7 +47,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
    __local u32 *ptr = (__local u32 *) rc4_key->S;
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 i = 0; i < 64; i++)
    {
      *ptr++ = v; v += a;
@@ -93,7 +95,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
  u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4])
  {
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 k = 0; k < 4; k++)
    {
      u32 xor4 = 0;
diff --git a/OpenCL/m09710_a1.cl b/OpenCL/m09710_a1.cl

index a0353a4..c761e44 100644 (file)
--- a/OpenCL/m09710_a1.cl
+++ b/OpenCL/m09710_a1.cl
@@ -45,7 +45,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
    __local u32 *ptr = (__local u32 *) rc4_key->S;
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 i = 0; i < 64; i++)
    {
      *ptr++ = v; v += a;
@@ -91,7 +93,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
  u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4])
  {
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 k = 0; k < 4; k++)
    {
      u32 xor4 = 0;
diff --git a/OpenCL/m09710_a3.cl b/OpenCL/m09710_a3.cl

index 8aa72e2..61f23d1 100644 (file)
--- a/OpenCL/m09710_a3.cl
+++ b/OpenCL/m09710_a3.cl
@@ -47,7 +47,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
    __local u32 *ptr = (__local u32 *) rc4_key->S;
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 i = 0; i < 64; i++)
    {
      *ptr++ = v; v += a;
@@ -93,7 +95,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
  u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4])
  {
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 k = 0; k < 4; k++)
    {
      u32 xor4 = 0;
diff --git a/OpenCL/m09800_a0.cl b/OpenCL/m09800_a0.cl

index 8b9ef18..31f6355 100644 (file)
--- a/OpenCL/m09800_a0.cl
+++ b/OpenCL/m09800_a0.cl
@@ -47,7 +47,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
    __local u32 *ptr = (__local u32 *) rc4_key->S;
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 i = 0; i < 64; i++)
    {
      *ptr++ = v; v += a;
@@ -93,7 +95,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
  u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4])
  {
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 k = 0; k < 4; k++)
    {
      u32 xor4 = 0;
diff --git a/OpenCL/m09800_a1.cl b/OpenCL/m09800_a1.cl

index 920a1e1..1c11f01 100644 (file)
--- a/OpenCL/m09800_a1.cl
+++ b/OpenCL/m09800_a1.cl
@@ -45,7 +45,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
    __local u32 *ptr = (__local u32 *) rc4_key->S;
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 i = 0; i < 64; i++)
    {
      *ptr++ = v; v += a;
@@ -91,7 +93,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
  u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4])
  {
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 k = 0; k < 4; k++)
    {
      u32 xor4 = 0;
diff --git a/OpenCL/m09800_a3.cl b/OpenCL/m09800_a3.cl

index 92589de..6fc92c0 100644 (file)
--- a/OpenCL/m09800_a3.cl
+++ b/OpenCL/m09800_a3.cl
@@ -44,7 +44,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
    __local u32 *ptr = (__local u32 *) rc4_key->S;
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 i = 0; i < 64; i++)
    {
      *ptr++ = v; v += a;
@@ -90,7 +92,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
  u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4])
  {
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 k = 0; k < 4; k++)
    {
      u32 xor4 = 0;
diff --git a/OpenCL/m09810_a0.cl b/OpenCL/m09810_a0.cl

index 278bda6..40fb8a0 100644 (file)
--- a/OpenCL/m09810_a0.cl
+++ b/OpenCL/m09810_a0.cl
@@ -47,7 +47,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
    __local u32 *ptr = (__local u32 *) rc4_key->S;
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 i = 0; i < 64; i++)
    {
      *ptr++ = v; v += a;
@@ -93,7 +95,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
  u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4])
  {
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 k = 0; k < 4; k++)
    {
      u32 xor4 = 0;
diff --git a/OpenCL/m09810_a1.cl b/OpenCL/m09810_a1.cl

index 298767a..527b1bd 100644 (file)
--- a/OpenCL/m09810_a1.cl
+++ b/OpenCL/m09810_a1.cl
@@ -45,7 +45,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
    __local u32 *ptr = (__local u32 *) rc4_key->S;
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 i = 0; i < 64; i++)
    {
      *ptr++ = v; v += a;
@@ -91,7 +93,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
  u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4])
  {
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 k = 0; k < 4; k++)
    {
      u32 xor4 = 0;
diff --git a/OpenCL/m09810_a3.cl b/OpenCL/m09810_a3.cl

index dac5f94..5bfc896 100644 (file)
--- a/OpenCL/m09810_a3.cl
+++ b/OpenCL/m09810_a3.cl
@@ -47,7 +47,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
    __local u32 *ptr = (__local u32 *) rc4_key->S;
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 i = 0; i < 64; i++)
    {
      *ptr++ = v; v += a;
@@ -93,7 +95,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
  u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4])
  {
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 k = 0; k < 4; k++)
    {
      u32 xor4 = 0;
diff --git a/OpenCL/m10400_a0.cl b/OpenCL/m10400_a0.cl

index 27d8718..90cd0bc 100644 (file)
--- a/OpenCL/m10400_a0.cl
+++ b/OpenCL/m10400_a0.cl
@@ -61,7 +61,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
    __local u32 *ptr = (__local u32 *) rc4_key->S;
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 i = 0; i < 64; i++)
    {
      ptr[i] = v; v += a;
@@ -75,7 +77,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
    u32 j = 0;
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 i = 0; i < 255; i += 5)
    {
      j += rc4_key->S[i + 0] + d0; swap (rc4_key, i + 0, j);
@@ -90,7 +94,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
  u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, __constant u32 *in, u32 out[4])
  {
-  #pragma unroll 4
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (u32 k = 0; k < 4; k++)
    {
      u32 xor4 = 0;
diff --git a/OpenCL/m10400_a1.cl b/OpenCL/m10400_a1.cl

index 4a27f7c..093af29 100644 (file)
--- a/OpenCL/m10400_a1.cl
+++ b/OpenCL/m10400_a1.cl
@@ -59,7 +59,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
    __local u32 *ptr = (__local u32 *) rc4_key->S;
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 i = 0; i < 64; i++)
    {
      ptr[i] = v; v += a;
@@ -73,7 +75,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
    u32 j = 0;
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 i = 0; i < 255; i += 5)
    {
      j += rc4_key->S[i + 0] + d0; swap (rc4_key, i + 0, j);
@@ -88,7 +92,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
  u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, __constant u32 *in, u32 out[4])
  {
-  #pragma unroll 4
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (u32 k = 0; k < 4; k++)
    {
      u32 xor4 = 0;
diff --git a/OpenCL/m10400_a3.cl b/OpenCL/m10400_a3.cl

index f91a348..92ebfe5 100644 (file)
--- a/OpenCL/m10400_a3.cl
+++ b/OpenCL/m10400_a3.cl
@@ -59,7 +59,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
    __local u32 *ptr = (__local u32 *) rc4_key->S;
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 i = 0; i < 64; i++)
    {
      ptr[i] = v; v += a;
@@ -73,7 +75,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
    u32 j = 0;
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 i = 0; i < 255; i += 5)
    {
      j += rc4_key->S[i + 0] + d0; swap (rc4_key, i + 0, j);
@@ -88,7 +92,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
  u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, __constant u32 *in, u32 out[4])
  {
-  #pragma unroll 4
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (u32 k = 0; k < 4; k++)
    {
      u32 xor4 = 0;
diff --git a/OpenCL/m10410_a0.cl b/OpenCL/m10410_a0.cl

index ee8ca20..518b84c 100644 (file)
--- a/OpenCL/m10410_a0.cl
+++ b/OpenCL/m10410_a0.cl
@@ -61,7 +61,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
    __local u32 *ptr = (__local u32 *) rc4_key->S;
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 i = 0; i < 64; i++)
    {
      ptr[i] = v; v += a;
@@ -75,7 +77,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
    u32 j = 0;
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 i = 0; i < 255; i += 5)
    {
      j += rc4_key->S[i + 0] + d0; swap (rc4_key, i + 0, j);
@@ -90,7 +94,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
  u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, __constant u32 *in, u32 out[4])
  {
-  #pragma unroll 4
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (u32 k = 0; k < 4; k++)
    {
      u32 xor4 = 0;
diff --git a/OpenCL/m10410_a1.cl b/OpenCL/m10410_a1.cl

index 35281fb..ec226b8 100644 (file)
--- a/OpenCL/m10410_a1.cl
+++ b/OpenCL/m10410_a1.cl
@@ -59,7 +59,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
    __local u32 *ptr = (__local u32 *) rc4_key->S;
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 i = 0; i < 64; i++)
    {
      ptr[i] = v; v += a;
@@ -73,7 +75,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
    u32 j = 0;
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 i = 0; i < 255; i += 5)
    {
      j += rc4_key->S[i + 0] + d0; swap (rc4_key, i + 0, j);
@@ -88,7 +92,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
  u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, __constant u32 *in, u32 out[4])
  {
-  #pragma unroll 4
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (u32 k = 0; k < 4; k++)
    {
      u32 xor4 = 0;
diff --git a/OpenCL/m10410_a3.cl b/OpenCL/m10410_a3.cl

index f5d2567..d43b657 100644 (file)
--- a/OpenCL/m10410_a3.cl
+++ b/OpenCL/m10410_a3.cl
@@ -59,7 +59,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
    __local u32 *ptr = (__local u32 *) rc4_key->S;
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 i = 0; i < 64; i++)
    {
      ptr[i] = v; v += a;
@@ -73,7 +75,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
    u32 j = 0;
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 i = 0; i < 255; i += 5)
    {
      j += rc4_key->S[i + 0] + d0; swap (rc4_key, i + 0, j);
@@ -88,7 +92,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
  u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, __constant u32 *in, u32 out[4])
  {
-  #pragma unroll 4
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (u32 k = 0; k < 4; k++)
    {
      u32 xor4 = 0;
diff --git a/OpenCL/m10500.cl b/OpenCL/m10500.cl

index cf763af..a24469f 100644 (file)
--- a/OpenCL/m10500.cl
+++ b/OpenCL/m10500.cl
@@ -56,7 +56,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
    __local u32 *ptr = (__local u32 *) rc4_key->S;
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 i = 0; i < 64; i++)
    {
      *ptr++ = v; v += a;
@@ -64,7 +66,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
    u32 j = 0;
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 i = 0; i < 16; i++)
    {
      u32 idx = i * 16;
@@ -103,7 +107,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
  u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4])
  {
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 k = 0; k < 4; k++)
    {
      u32 xor4 = 0;
diff --git a/OpenCL/m10700.cl b/OpenCL/m10700.cl

index c5ec11d..1ab5fb7 100644 (file)
--- a/OpenCL/m10700.cl
+++ b/OpenCL/m10700.cl
@@ -132,7 +132,9 @@ void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const
  
    ROUND256_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 64; i += 16)
    {
      ROUND256_EXPAND (); ROUND256_STEP (i);
@@ -242,7 +244,9 @@ void sha384_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4], const
  
    ROUND384_STEP (0);
  
-  //#pragma unroll
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (int i = 16; i < 80; i += 16)
    {
      ROUND384_EXPAND (); ROUND384_STEP (i);
@@ -352,7 +356,9 @@ void sha512_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4], const
  
    ROUND512_STEP (0);
  
-  //#pragma unroll
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (int i = 16; i < 80; i += 16)
    {
      ROUND512_EXPAND (); ROUND512_STEP (i);
diff --git a/OpenCL/m10800_a0.cl b/OpenCL/m10800_a0.cl

index 78ad998..17f7540 100644 (file)
--- a/OpenCL/m10800_a0.cl
+++ b/OpenCL/m10800_a0.cl
@@ -116,7 +116,9 @@ void sha384_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 80; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m10800_a1.cl b/OpenCL/m10800_a1.cl

index 24762c0..bcb0c58 100644 (file)
--- a/OpenCL/m10800_a1.cl
+++ b/OpenCL/m10800_a1.cl
@@ -114,7 +114,9 @@ void sha384_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 80; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m10800_a3.cl b/OpenCL/m10800_a3.cl

index c6df671..7bfc741 100644 (file)
--- a/OpenCL/m10800_a3.cl
+++ b/OpenCL/m10800_a3.cl
@@ -114,7 +114,9 @@ void sha384_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 80; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m10900.cl b/OpenCL/m10900.cl

index ef82e3c..0107d84 100644 (file)
--- a/OpenCL/m10900.cl
+++ b/OpenCL/m10900.cl
@@ -111,7 +111,9 @@ void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 64; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m11300.cl b/OpenCL/m11300.cl

index 0547e57..46af4c2 100644 (file)
--- a/OpenCL/m11300.cl
+++ b/OpenCL/m11300.cl
@@ -986,7 +986,9 @@ void sha512_transform (const u64 w[16], u64 dgst[8])
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 80; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m11600.cl b/OpenCL/m11600.cl

index 16bc140..c83bf04 100644 (file)
--- a/OpenCL/m11600.cl
+++ b/OpenCL/m11600.cl
@@ -985,7 +985,9 @@ void sha256_transform (const u32 w[16], u32 digest[8])
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 64; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m11700_a0.cl b/OpenCL/m11700_a0.cl

index ec10233..2e93185 100644 (file)
--- a/OpenCL/m11700_a0.cl
+++ b/OpenCL/m11700_a0.cl
@@ -2250,7 +2250,9 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256])
    u64x s[8];
    u64x t[8];
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 0; i < 8; i++)
    {
      t[i] = h[i];
@@ -2261,7 +2263,9 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256])
      k[i] = SBOG_LPSti64;
    }
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 0; i < 8; i++)
    {
      s[i] = m[i];
@@ -2269,13 +2273,17 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256])
  
    for (int r = 0; r < 12; r++)
    {
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (int i = 0; i < 8; i++)
      {
        t[i] = s[i] ^ k[i];
      }
  
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (int i = 0; i < 8; i++)
      {
        s[i] = SBOG_LPSti64;
@@ -2286,14 +2294,18 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256])
        t[i] = k[i] ^ sbob_rc64[r][i];
      }
  
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (int i = 0; i < 8; i++)
      {
        k[i] = SBOG_LPSti64;
      }
    }
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 0; i < 8; i++)
    {
      h[i] ^= s[i] ^ k[i] ^ m[i];
diff --git a/OpenCL/m11700_a1.cl b/OpenCL/m11700_a1.cl

index 1d194bb..36ce0b9 100644 (file)
--- a/OpenCL/m11700_a1.cl
+++ b/OpenCL/m11700_a1.cl
@@ -2247,7 +2247,9 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256])
    u64x s[8];
    u64x t[8];
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 0; i < 8; i++)
    {
      t[i] = h[i];
@@ -2258,7 +2260,9 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256])
      k[i] = SBOG_LPSti64;
    }
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 0; i < 8; i++)
    {
      s[i] = m[i];
@@ -2266,13 +2270,17 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256])
  
    for (int r = 0; r < 12; r++)
    {
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (int i = 0; i < 8; i++)
      {
        t[i] = s[i] ^ k[i];
      }
  
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (int i = 0; i < 8; i++)
      {
        s[i] = SBOG_LPSti64;
@@ -2283,14 +2291,18 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256])
        t[i] = k[i] ^ sbob_rc64[r][i];
      }
  
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (int i = 0; i < 8; i++)
      {
        k[i] = SBOG_LPSti64;
      }
    }
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 0; i < 8; i++)
    {
      h[i] ^= s[i] ^ k[i] ^ m[i];
diff --git a/OpenCL/m11700_a3.cl b/OpenCL/m11700_a3.cl

index 63e4f0a..1569b1f 100644 (file)
--- a/OpenCL/m11700_a3.cl
+++ b/OpenCL/m11700_a3.cl
@@ -2248,7 +2248,9 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256])
    u64x s[8];
    u64x t[8];
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 0; i < 8; i++)
    {
      t[i] = h[i];
@@ -2259,7 +2261,9 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256])
      k[i] = SBOG_LPSti64;
    }
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 0; i < 8; i++)
    {
      s[i] = m[i];
@@ -2267,13 +2271,17 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256])
  
    for (int r = 0; r < 12; r++)
    {
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (int i = 0; i < 8; i++)
      {
        t[i] = s[i] ^ k[i];
      }
  
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (int i = 0; i < 8; i++)
      {
        s[i] = SBOG_LPSti64;
@@ -2284,14 +2292,18 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256])
        t[i] = k[i] ^ sbob_rc64[r][i];
      }
  
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (int i = 0; i < 8; i++)
      {
        k[i] = SBOG_LPSti64;
      }
    }
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 0; i < 8; i++)
    {
      h[i] ^= s[i] ^ k[i] ^ m[i];
diff --git a/OpenCL/m11800_a0.cl b/OpenCL/m11800_a0.cl

index 9b6b80c..f282c3e 100644 (file)
--- a/OpenCL/m11800_a0.cl
+++ b/OpenCL/m11800_a0.cl
@@ -2250,7 +2250,9 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256])
    u64x s[8];
    u64x t[8];
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 0; i < 8; i++)
    {
      t[i] = h[i];
@@ -2261,7 +2263,9 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256])
      k[i] = SBOG_LPSti64;
    }
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 0; i < 8; i++)
    {
      s[i] = m[i];
@@ -2269,13 +2273,17 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256])
  
    for (int r = 0; r < 12; r++)
    {
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (int i = 0; i < 8; i++)
      {
        t[i] = s[i] ^ k[i];
      }
  
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (int i = 0; i < 8; i++)
      {
        s[i] = SBOG_LPSti64;
@@ -2286,14 +2294,18 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256])
        t[i] = k[i] ^ sbob_rc64[r][i];
      }
  
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (int i = 0; i < 8; i++)
      {
        k[i] = SBOG_LPSti64;
      }
    }
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 0; i < 8; i++)
    {
      h[i] ^= s[i] ^ k[i] ^ m[i];
diff --git a/OpenCL/m11800_a1.cl b/OpenCL/m11800_a1.cl

index f136fb5..cf8392c 100644 (file)
--- a/OpenCL/m11800_a1.cl
+++ b/OpenCL/m11800_a1.cl
@@ -2247,7 +2247,9 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256])
    u64x s[8];
    u64x t[8];
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 0; i < 8; i++)
    {
      t[i] = h[i];
@@ -2258,7 +2260,9 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256])
      k[i] = SBOG_LPSti64;
    }
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 0; i < 8; i++)
    {
      s[i] = m[i];
@@ -2266,13 +2270,17 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256])
  
    for (int r = 0; r < 12; r++)
    {
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (int i = 0; i < 8; i++)
      {
        t[i] = s[i] ^ k[i];
      }
  
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (int i = 0; i < 8; i++)
      {
        s[i] = SBOG_LPSti64;
@@ -2283,14 +2291,18 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256])
        t[i] = k[i] ^ sbob_rc64[r][i];
      }
  
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (int i = 0; i < 8; i++)
      {
        k[i] = SBOG_LPSti64;
      }
    }
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 0; i < 8; i++)
    {
      h[i] ^= s[i] ^ k[i] ^ m[i];
diff --git a/OpenCL/m11800_a3.cl b/OpenCL/m11800_a3.cl

index 1028117..2370800 100644 (file)
--- a/OpenCL/m11800_a3.cl
+++ b/OpenCL/m11800_a3.cl
@@ -2248,7 +2248,9 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256])
    u64x s[8];
    u64x t[8];
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 0; i < 8; i++)
    {
      t[i] = h[i];
@@ -2259,7 +2261,9 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256])
      k[i] = SBOG_LPSti64;
    }
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 0; i < 8; i++)
    {
      s[i] = m[i];
@@ -2267,13 +2271,17 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256])
  
    for (int r = 0; r < 12; r++)
    {
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (int i = 0; i < 8; i++)
      {
        t[i] = s[i] ^ k[i];
      }
  
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (int i = 0; i < 8; i++)
      {
        s[i] = SBOG_LPSti64;
@@ -2284,14 +2292,18 @@ void streebog_g (u64x h[8], const u64x m[8], __local u64 (*s_sbob_sl64)[256])
        t[i] = k[i] ^ sbob_rc64[r][i];
      }
  
+    #ifdef _unroll
      #pragma unroll
+    #endif
      for (int i = 0; i < 8; i++)
      {
        k[i] = SBOG_LPSti64;
      }
    }
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 0; i < 8; i++)
    {
      h[i] ^= s[i] ^ k[i] ^ m[i];
diff --git a/OpenCL/m12200.cl b/OpenCL/m12200.cl

index 001a5a2..7b789bc 100644 (file)
--- a/OpenCL/m12200.cl
+++ b/OpenCL/m12200.cl
@@ -114,10 +114,7 @@ void sha512_transform (const u64 w[16], u64 dgst[8])
  
    ROUND_STEP (0);
  
-  #ifdef IS_AMD
-  // #pragma unroll
-  // breaks compiler
-  #else
+  #ifdef _unroll
    #pragma unroll
    #endif
    for (int i = 16; i < 80; i += 16)
diff --git a/OpenCL/m12300.cl b/OpenCL/m12300.cl

index 5ab6a05..5430b4f 100644 (file)
--- a/OpenCL/m12300.cl
+++ b/OpenCL/m12300.cl
@@ -114,7 +114,9 @@ void sha512_transform (const u64 w[16], u64 dgst[8])
  
    ROUND_STEP (0);
  
-  //#pragma unroll
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (int i = 16; i < 80; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m12400.cl b/OpenCL/m12400.cl

index b755f39..7902c5d 100644 (file)
--- a/OpenCL/m12400.cl
+++ b/OpenCL/m12400.cl
@@ -376,7 +376,9 @@ void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u32 (*s_
  
    c = c & 0x0fffffff;
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 i = 0; i < 16; i++)
    {
      if ((i < 2) || (i == 8) || (i == 15))
diff --git a/OpenCL/m12800.cl b/OpenCL/m12800.cl

index 2a9bbfd..a9e3f93 100644 (file)
--- a/OpenCL/m12800.cl
+++ b/OpenCL/m12800.cl
@@ -177,7 +177,9 @@ void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 64; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m12900.cl b/OpenCL/m12900.cl

index 2ad3de6..817a819 100644 (file)
--- a/OpenCL/m12900.cl
+++ b/OpenCL/m12900.cl
@@ -111,7 +111,9 @@ void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 64; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m13000.cl b/OpenCL/m13000.cl

index f12f630..267081c 100644 (file)
--- a/OpenCL/m13000.cl
+++ b/OpenCL/m13000.cl
@@ -111,7 +111,9 @@ void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 64; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/m13100_a0.cl b/OpenCL/m13100_a0.cl

index ef288a5..430e3d9 100644 (file)
--- a/OpenCL/m13100_a0.cl
+++ b/OpenCL/m13100_a0.cl
@@ -50,7 +50,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
    __local u32 *ptr = (__local u32 *) rc4_key->S;
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 i = 0; i < 64; i++)
    {
      *ptr++ = v; v += a;
@@ -96,7 +98,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
  u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, __global u32 *in, u32 out[4])
  {
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 k = 0; k < 4; k++)
    {
      u32 xor4 = 0;
diff --git a/OpenCL/m13100_a1.cl b/OpenCL/m13100_a1.cl

index 8ddaac7..fc7840a 100644 (file)
--- a/OpenCL/m13100_a1.cl
+++ b/OpenCL/m13100_a1.cl
@@ -48,7 +48,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
    __local u32 *ptr = (__local u32 *) rc4_key->S;
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 i = 0; i < 64; i++)
    {
      *ptr++ = v; v += a;
@@ -94,7 +96,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
  u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, __global u32 *in, u32 out[4])
  {
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 k = 0; k < 4; k++)
    {
      u32 xor4 = 0;
diff --git a/OpenCL/m13100_a3.cl b/OpenCL/m13100_a3.cl

index 385773c..4a2878e 100644 (file)
--- a/OpenCL/m13100_a3.cl
+++ b/OpenCL/m13100_a3.cl
@@ -48,7 +48,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
    __local u32 *ptr = (__local u32 *) rc4_key->S;
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 i = 0; i < 64; i++)
    {
      *ptr++ = v; v += a;
@@ -94,7 +96,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
  
  u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, __global u32 *in, u32 out[4])
  {
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (u32 k = 0; k < 4; k++)
    {
      u32 xor4 = 0;
diff --git a/OpenCL/m13200.cl b/OpenCL/m13200.cl

index c0b4af5..3daa846 100644 (file)
--- a/OpenCL/m13200.cl
+++ b/OpenCL/m13200.cl
@@ -4,7 +4,7 @@
  
   * License.....: MIT
   */
- 
+
  #define _SHA1_
  
  #include "include/constants.h"
@@ -713,7 +713,9 @@ void AES128_ExpandKey (u32 *userkey, u32 *rek, __local u32 *s_te0, __local u32 *
    rek[2] = userkey[2];
    rek[3] = userkey[3];
  
-  #pragma unroll 10
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (u32 i = 0, j = 0; i < 10; i += 1, j += 4)
    {
      u32 temp = rek[j + 3];
@@ -1016,7 +1018,7 @@ __kernel void m13200_init (__global pw_t *pws, __global kernel_rule_t *rules_buf
    const u32 pw_len = pws[gid].pw_len;
  
    append_0x80_4x4 (w0, w1, w2, w3, pw_len);
-  
+
    w0[0] = swap32 (w0[0]);
    w0[1] = swap32 (w0[1]);
    w0[2] = swap32 (w0[2]);
@@ -1035,7 +1037,7 @@ __kernel void m13200_init (__global pw_t *pws, __global kernel_rule_t *rules_buf
    w3[3] = swap32 (w3[3]);
  
    w3[3] = pw_len * 8;
-  
+
    /**
     * KEK
     */
@@ -1072,7 +1074,7 @@ __kernel void m13200_init (__global pw_t *pws, __global kernel_rule_t *rules_buf
    tmps[gid].cipher[1] = salt_bufs[salt_pos].salt_buf[5];
    tmps[gid].cipher[2] = 0;
    tmps[gid].cipher[3] = 0;
-  
+
  }
  
  __kernel void m13200_loop (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global axcrypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
@@ -1137,17 +1139,19 @@ __kernel void m13200_loop (__global pw_t *pws, __global kernel_rule_t *rules_buf
    ukeyx[1] = tmps[gid].KEK[1];
    ukeyx[2] = tmps[gid].KEK[2];
    ukeyx[3] = tmps[gid].KEK[3];
-  
+
    AES128_ExpandKey (ukeyx, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
  
-  #pragma unroll KEYLEN
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (u32 i = 0; i < KEYLEN; i++) rdk[i] = rek[i];
  
    AES128_InvertKey (rdk, s_td0, s_td1, s_td2, s_td3, s_td4, s_te0, s_te1, s_te2, s_te3, s_te4);
  
    u32 lsb[4];
    u32 cipher[4];
-  
+
    lsb[0] = tmps[gid].lsb[0];
    lsb[1] = tmps[gid].lsb[1];
    lsb[2] = tmps[gid].lsb[2];
@@ -1157,8 +1161,8 @@ __kernel void m13200_loop (__global pw_t *pws, __global kernel_rule_t *rules_buf
    cipher[1] = tmps[gid].cipher[1];
    cipher[2] = tmps[gid].cipher[2];
    cipher[3] = tmps[gid].cipher[3];
-  
-  
+
+
    /**
    * AxCrypt main cipher routine
    */
@@ -1201,7 +1205,7 @@ __kernel void m13200_loop (__global pw_t *pws, __global kernel_rule_t *rules_buf
    tmps[gid].lsb[1] = lsb[1];
    tmps[gid].lsb[2] = lsb[2];
    tmps[gid].lsb[3] = lsb[3];
-  
+
    tmps[gid].cipher[0] = cipher[0];
    tmps[gid].cipher[1] = cipher[1];
    tmps[gid].cipher[2] = cipher[2];
diff --git a/OpenCL/m13400.cl b/OpenCL/m13400.cl

index e9e36e3..1ba5d64 100644 (file)
--- a/OpenCL/m13400.cl
+++ b/OpenCL/m13400.cl
@@ -1074,7 +1074,9 @@ void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 64; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
diff --git a/OpenCL/rp.c b/OpenCL/rp.c

index cc2f95a..f94b3ee 100644 (file)
--- a/OpenCL/rp.c
+++ b/OpenCL/rp.c
@@ -4126,7 +4126,9 @@ inline u32x apply_rules_vect (const u32 pw_buf0[4], const u32 pw_buf1[4], const
  
    u32x out_len = 0;
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 0; i < VECT_SIZE; i++)
    {
      u32 tmp0[4];
diff --git a/OpenCL/types_ocl.c b/OpenCL/types_ocl.c

index 8104039..772aba1 100644 (file)
--- a/OpenCL/types_ocl.c
+++ b/OpenCL/types_ocl.c
@@ -5,9 +5,6 @@
   * License.....: MIT
   */
  
-#define DEVICE_TYPE_CPU 2
-#define DEVICE_TYPE_GPU 4
-
  typedef uchar  u8;
  typedef ushort u16;
  typedef uint   u32;
diff --git a/include/kernel_vendor.h b/include/kernel_vendor.h

index d2ffb59..d364256 100644 (file)
--- a/include/kernel_vendor.h
+++ b/include/kernel_vendor.h
@@ -3,15 +3,30 @@
   * License.....: MIT
   */
  
-#ifdef cl_khr_byte_addressable_store
+#pragma OPENCL EXTENSION cl_khr_int64_base_atomics     : enable
  #pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
+
+/**
+ * device type
+ */
+
+#define DEVICE_TYPE_CPU   2
+#define DEVICE_TYPE_GPU   4
+#define DEVICE_TYPE_ACCEL 8
+
+#if   DEVICE_TYPE == DEVICE_TYPE_CPU
+#define IS_CPU
+#elif DEVICE_TYPE == DEVICE_TYPE_GPU
+#define IS_GPU
+#elif DEVICE_TYPE == DEVICE_TYPE_ACCEL
+#define IS_ACCEL
  #endif
  
  /**
   * vendor specific
   */
  
-#if VENDOR_ID == (1 << 0)
+#if   VENDOR_ID == (1 << 0)
  #define IS_AMD
  #elif VENDOR_ID == (1 << 6)
  #define IS_NV
@@ -29,15 +44,155 @@
  #endif
  
  /**
- * NV specific
+ * Unrolling is generally enabled, for all device types and hash modes
+ * There's a few exception when it's better not to unroll
   */
  
+// Some algorithms run into too much register pressure due to loop unrolling
+
  #ifdef IS_NV
+#ifdef IS_GPU
+
+#if KERN_TYPE == 1500
+#undef _unroll
+#endif
+#if KERN_TYPE == 1800
+#undef _unroll
+#endif
+#if KERN_TYPE == 3000
+#undef _unroll
+#endif
+#if KERN_TYPE == 6221
+#undef _unroll
+#endif
+#if KERN_TYPE == 6222
+#undef _unroll
+#endif
+#if KERN_TYPE == 6223
+#undef _unroll
+#endif
+#if KERN_TYPE == 6500
+#undef _unroll
+#endif
+#if KERN_TYPE == 7100
+#undef _unroll
+#endif
+#if KERN_TYPE == 7400
+#undef _unroll
+#endif
+#if KERN_TYPE == 8200
+#undef _unroll
+#endif
+#if KERN_TYPE == 10400
+#undef _unroll
+#endif
+#if KERN_TYPE == 10500
+#undef _unroll
+#endif
+#if KERN_TYPE == 10700
+#undef _unroll
+#endif
+#if KERN_TYPE == 12300
+#undef _unroll
+#endif
+#if KERN_TYPE == 12400
+#undef _unroll
  #endif
  
-/**
- * Generic
- */
+#endif
+#endif
+
+#ifdef IS_AMD
+#ifdef IS_GPU
+
+#if KERN_TYPE == 3200
+#undef _unroll
+#endif
+#if KERN_TYPE == 5200
+#undef _unroll
+#endif
+#if KERN_TYPE == 6100
+#undef _unroll
+#endif
+#if KERN_TYPE == 6221
+#undef _unroll
+#endif
+#if KERN_TYPE == 6222
+#undef _unroll
+#endif
+#if KERN_TYPE == 6223
+#undef _unroll
+#endif
+#if KERN_TYPE == 6400
+#undef _unroll
+#endif
+#if KERN_TYPE == 6500
+#undef _unroll
+#endif
+#if KERN_TYPE == 6800
+#undef _unroll
+#endif
+#if KERN_TYPE == 7100
+#undef _unroll
+#endif
+#if KERN_TYPE == 7400
+#undef _unroll
+#endif
+#if KERN_TYPE == 8000
+#undef _unroll
+#endif
+#if KERN_TYPE == 8200
+#undef _unroll
+#endif
+#if KERN_TYPE == 10900
+#undef _unroll
+#endif
+#if KERN_TYPE == 11600
+#undef _unroll
+#endif
+#if KERN_TYPE == 12300
+#undef _unroll
+#endif
+#if KERN_TYPE == 12800
+#undef _unroll
+#endif
+#if KERN_TYPE == 12900
+#undef _unroll
+#endif
+#if KERN_TYPE == 13000
+#undef _unroll
+#endif
  
-#ifdef IS_GENERIC
+#endif
+#endif
+
+// Some algorithms break due to loop unrolling, it's unknown why, probably compiler bugs
+// Can overlap with above cases
+
+#ifdef IS_AMD
+#ifdef IS_GPU
+
+#if KERN_TYPE == 1750
+#undef _unroll
+#endif
+#if KERN_TYPE == 1760
+#undef _unroll
+#endif
+#if KERN_TYPE == 6500
+#undef _unroll
+#endif
+#if KERN_TYPE == 7100
+#undef _unroll
+#endif
+#if KERN_TYPE == 9600
+#undef _unroll
+#endif
+#if KERN_TYPE == 12200
+#undef _unroll
+#endif
+#if KERN_TYPE == 12300
+#undef _unroll
+#endif
+
+#endif
  #endif
diff --git a/src/oclHashcat.c b/src/oclHashcat.c

index d624467..242ba90 100644 (file)
--- a/src/oclHashcat.c
+++ b/src/oclHashcat.c
@@ -13971,7 +13971,7 @@ int main (int argc, char **argv)
  
        // we don't have sm_* on vendors not NV but it doesn't matter
  
-      snprintf (build_opts, sizeof (build_opts) - 1, "-cl-std=CL1.1 -I\"%s/\" -DVENDOR_ID=%u -DCUDA_ARCH=%d -DVECT_SIZE=%u -DDEVICE_TYPE=%u", shared_dir, device_param->vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->vector_width, (u32) device_param->device_type);
+      snprintf (build_opts, sizeof (build_opts) - 1, "-cl-std=CL1.1 -I\"%s/\" -DVENDOR_ID=%u -DCUDA_ARCH=%d -DVECT_SIZE=%u -DDEVICE_TYPE=%u -DKERN_TYPE=%u -D_unroll", shared_dir, device_param->vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->vector_width, (u32) device_param->device_type, kern_type);
  
        /**
         * main kernel
author	jsteube <jens.steube@gmail.com>
	Mon, 9 May 2016 19:32:12 +0000 (21:32 +0200)
committer	jsteube <jens.steube@gmail.com>
	Mon, 9 May 2016 19:32:12 +0000 (21:32 +0200)
OpenCL/kernel_serpent256.c		patch \| blob \| history
OpenCL/m01450_a0.cl		patch \| blob \| history
OpenCL/m01450_a1.cl		patch \| blob \| history
OpenCL/m01450_a3.cl		patch \| blob \| history
OpenCL/m01460_a0.cl		patch \| blob \| history
OpenCL/m01460_a1.cl		patch \| blob \| history
OpenCL/m01460_a3.cl		patch \| blob \| history
OpenCL/m01500_a0.cl		patch \| blob \| history
OpenCL/m01500_a1.cl		patch \| blob \| history
OpenCL/m01500_a3.cl		patch \| blob \| history
OpenCL/m01700_a0.cl		patch \| blob \| history
OpenCL/m01700_a1.cl		patch \| blob \| history
OpenCL/m01700_a3.cl		patch \| blob \| history
OpenCL/m01710_a0.cl		patch \| blob \| history
OpenCL/m01710_a1.cl		patch \| blob \| history
OpenCL/m01710_a3.cl		patch \| blob \| history
OpenCL/m01720_a0.cl		patch \| blob \| history
OpenCL/m01720_a1.cl		patch \| blob \| history
OpenCL/m01720_a3.cl		patch \| blob \| history
OpenCL/m01730_a0.cl		patch \| blob \| history
OpenCL/m01730_a1.cl		patch \| blob \| history
OpenCL/m01730_a3.cl		patch \| blob \| history
OpenCL/m01740_a0.cl		patch \| blob \| history
OpenCL/m01740_a1.cl		patch \| blob \| history
OpenCL/m01740_a3.cl		patch \| blob \| history
OpenCL/m01750_a0.cl		patch \| blob \| history
OpenCL/m01750_a1.cl		patch \| blob \| history
OpenCL/m01750_a3.cl		patch \| blob \| history
OpenCL/m01760_a0.cl		patch \| blob \| history
OpenCL/m01760_a1.cl		patch \| blob \| history
OpenCL/m01760_a3.cl		patch \| blob \| history
OpenCL/m01800.cl		patch \| blob \| history
OpenCL/m03000_a0.cl		patch \| blob \| history
OpenCL/m03000_a1.cl		patch \| blob \| history
OpenCL/m03000_a3.cl		patch \| blob \| history
OpenCL/m03100_a0.cl		patch \| blob \| history
OpenCL/m03100_a1.cl		patch \| blob \| history
OpenCL/m03100_a3.cl		patch \| blob \| history
OpenCL/m03200.cl		patch \| blob \| history
OpenCL/m05200.cl		patch \| blob \| history
OpenCL/m05500_a0.cl		patch \| blob \| history
OpenCL/m05500_a1.cl		patch \| blob \| history
OpenCL/m05500_a3.cl		patch \| blob \| history
OpenCL/m06100_a0.cl		patch \| blob \| history
OpenCL/m06100_a1.cl		patch \| blob \| history
OpenCL/m06100_a3.cl		patch \| blob \| history
OpenCL/m06221.cl		patch \| blob \| history
OpenCL/m06222.cl		patch \| blob \| history
OpenCL/m06223.cl		patch \| blob \| history
OpenCL/m06231.cl		patch \| blob \| history
OpenCL/m06232.cl		patch \| blob \| history
OpenCL/m06233.cl		patch \| blob \| history
OpenCL/m06400.cl		patch \| blob \| history
OpenCL/m06500.cl		patch \| blob \| history
OpenCL/m06600.cl		patch \| blob \| history
OpenCL/m06800.cl		patch \| blob \| history
OpenCL/m07100.cl		patch \| blob \| history
OpenCL/m07400.cl		patch \| blob \| history
OpenCL/m07500_a0.cl		patch \| blob \| history
OpenCL/m07500_a1.cl		patch \| blob \| history
OpenCL/m07500_a3.cl		patch \| blob \| history
OpenCL/m07800_a0.cl		patch \| blob \| history
OpenCL/m07800_a1.cl		patch \| blob \| history
OpenCL/m07800_a3.cl		patch \| blob \| history
OpenCL/m07900.cl		patch \| blob \| history
OpenCL/m08000_a0.cl		patch \| blob \| history
OpenCL/m08000_a1.cl		patch \| blob \| history
OpenCL/m08000_a3.cl		patch \| blob \| history
OpenCL/m08200.cl		patch \| blob \| history
OpenCL/m08500_a0.cl		patch \| blob \| history
OpenCL/m08500_a1.cl		patch \| blob \| history
OpenCL/m08500_a3.cl		patch \| blob \| history
OpenCL/m08600_a0.cl		patch \| blob \| history
OpenCL/m08600_a1.cl		patch \| blob \| history
OpenCL/m08600_a3.cl		patch \| blob \| history
OpenCL/m08700_a0.cl		patch \| blob \| history
OpenCL/m08700_a1.cl		patch \| blob \| history
OpenCL/m08700_a3.cl		patch \| blob \| history
OpenCL/m08800.cl		patch \| blob \| history
OpenCL/m08900.cl		patch \| blob \| history
OpenCL/m09100.cl		patch \| blob \| history
OpenCL/m09600.cl		patch \| blob \| history
OpenCL/m09700_a0.cl		patch \| blob \| history
OpenCL/m09700_a1.cl		patch \| blob \| history
OpenCL/m09700_a3.cl		patch \| blob \| history
OpenCL/m09710_a0.cl		patch \| blob \| history
OpenCL/m09710_a1.cl		patch \| blob \| history
OpenCL/m09710_a3.cl		patch \| blob \| history
OpenCL/m09800_a0.cl		patch \| blob \| history
OpenCL/m09800_a1.cl		patch \| blob \| history
OpenCL/m09800_a3.cl		patch \| blob \| history
OpenCL/m09810_a0.cl		patch \| blob \| history
OpenCL/m09810_a1.cl		patch \| blob \| history
OpenCL/m09810_a3.cl		patch \| blob \| history
OpenCL/m10400_a0.cl		patch \| blob \| history
OpenCL/m10400_a1.cl		patch \| blob \| history
OpenCL/m10400_a3.cl		patch \| blob \| history
OpenCL/m10410_a0.cl		patch \| blob \| history
OpenCL/m10410_a1.cl		patch \| blob \| history
OpenCL/m10410_a3.cl		patch \| blob \| history
OpenCL/m10500.cl		patch \| blob \| history
OpenCL/m10700.cl		patch \| blob \| history
OpenCL/m10800_a0.cl		patch \| blob \| history
OpenCL/m10800_a1.cl		patch \| blob \| history
OpenCL/m10800_a3.cl		patch \| blob \| history
OpenCL/m10900.cl		patch \| blob \| history
OpenCL/m11300.cl		patch \| blob \| history
OpenCL/m11600.cl		patch \| blob \| history
OpenCL/m11700_a0.cl		patch \| blob \| history
OpenCL/m11700_a1.cl		patch \| blob \| history
OpenCL/m11700_a3.cl		patch \| blob \| history
OpenCL/m11800_a0.cl		patch \| blob \| history
OpenCL/m11800_a1.cl		patch \| blob \| history
OpenCL/m11800_a3.cl		patch \| blob \| history
OpenCL/m12200.cl		patch \| blob \| history
OpenCL/m12300.cl		patch \| blob \| history
OpenCL/m12400.cl		patch \| blob \| history
OpenCL/m12800.cl		patch \| blob \| history
OpenCL/m12900.cl		patch \| blob \| history
OpenCL/m13000.cl		patch \| blob \| history
OpenCL/m13100_a0.cl		patch \| blob \| history
OpenCL/m13100_a1.cl		patch \| blob \| history
OpenCL/m13100_a3.cl		patch \| blob \| history
OpenCL/m13200.cl		patch \| blob \| history
OpenCL/m13400.cl		patch \| blob \| history
OpenCL/rp.c		patch \| blob \| history
OpenCL/types_ocl.c		patch \| blob \| history
include/kernel_vendor.h		patch \| blob \| history
src/oclHashcat.c		patch \| blob \| history