Improve Lotus Notes/Domino 5 performance

[hashcat.git] / OpenCL / m07400.cl
diff --git a/OpenCL/m07400.cl b/OpenCL/m07400.cl

index 00eb8d2..3fc4600 100644 (file)
--- a/OpenCL/m07400.cl
+++ b/OpenCL/m07400.cl
@@ -5,20 +5,20 @@
  
  #define _SHA256_
  
-#include "include/constants.h"
-#include "include/kernel_vendor.h"
+#include "inc_hash_constants.h"
+#include "inc_vendor.cl"
  
  #define DGST_R0 0
  #define DGST_R1 1
  #define DGST_R2 2
  #define DGST_R3 3
  
-#include "include/kernel_functions.c"
-#include "OpenCL/types_ocl.c"
-#include "OpenCL/common.c"
+#include "inc_hash_functions.cl"
+#include "inc_types.cl"
+#include "inc_common.cl"
  
-#define COMPARE_S "OpenCL/check_single_comp4.c"
-#define COMPARE_M "OpenCL/check_multi_comp4.c"
+#define COMPARE_S "inc_comp_single.cl"
+#define COMPARE_M "inc_comp_multi.cl"
  
  __constant u32 k_sha256[64] =
  {
@@ -40,7 +40,7 @@ __constant u32 k_sha256[64] =
    SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f,
  };
  
-#ifdef IS_AMD
+#if 1
  
  void sha256_transform (const u32 w[16], u32 digest[8])
  {
@@ -112,7 +112,9 @@ void sha256_transform (const u32 w[16], u32 digest[8])
  
    ROUND_STEP (0);
  
+  #ifdef _unroll
    #pragma unroll
+  #endif
    for (int i = 16; i < 64; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
@@ -1222,6 +1224,8 @@ __kernel void m07400_comp (__global pw_t *pws, __global kernel_rule_t *rules_buf
  
  #else
  
+// this is basically a much cleaner version, but apparently drops speeds by over 100% :(
+
  #define PUTCHAR32_BE(a,p,c) ((u8 *)(a))[(p) ^ 3] = (u8) (c)
  #define GETCHAR32_BE(a,p)   ((u8 *)(a))[(p) ^ 3]
  
@@ -1303,7 +1307,9 @@ void sha256_transform (const u32 w[16], u32 digest[8])
  
    ROUND_STEP (0);
  
-  //#pragma unroll
+  #ifdef _unroll
+  #pragma unroll
+  #endif
    for (int i = 16; i < 64; i += 16)
    {
      ROUND_EXPAND (); ROUND_STEP (i);
@@ -1686,7 +1692,9 @@ __kernel void m07400_loop (__global pw_t *pws, __global kernel_rule_t *rules_buf
  
      if (j1)
      {
-      #pragma unroll 32
+      #ifdef _unroll
+      #pragma unroll
+      #endif
        for (u32 k = 0, p = block_len - 32; k < 32; k++, p++)
        {
          PUTCHAR32_BE (block, p, GETCHAR32_BE (alt_result, k));