#define _SHA256_
-#include "include/constants.h"
-#include "include/kernel_vendor.h"
+#include "inc_hash_constants.h"
+#include "inc_vendor.cl"
#define DGST_R0 0
#define DGST_R1 1
#define DGST_R2 2
#define DGST_R3 3
-#include "include/kernel_functions.c"
-#include "OpenCL/types_ocl.c"
-#include "OpenCL/common.c"
+#include "inc_hash_functions.cl"
+#include "inc_types.cl"
+#include "inc_common.cl"
-#define COMPARE_S "OpenCL/check_single_comp4.c"
-#define COMPARE_M "OpenCL/check_multi_comp4.c"
+#define COMPARE_S "inc_comp_single.cl"
+#define COMPARE_M "inc_comp_multi.cl"
__constant u32 k_sha256[64] =
{
SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f,
};
-#ifdef IS_AMD
+#if 1
void sha256_transform (const u32 w[16], u32 digest[8])
{
ROUND_STEP (0);
+ #ifdef _unroll
#pragma unroll
+ #endif
for (int i = 16; i < 64; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);
#else
+// this is basically a much cleaner version, but apparently drops speeds by over 100% :(
+
#define PUTCHAR32_BE(a,p,c) ((u8 *)(a))[(p) ^ 3] = (u8) (c)
#define GETCHAR32_BE(a,p) ((u8 *)(a))[(p) ^ 3]
ROUND_STEP (0);
- //#pragma unroll
+ #ifdef _unroll
+ #pragma unroll
+ #endif
for (int i = 16; i < 64; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);
if (j1)
{
- #pragma unroll 32
+ #ifdef _unroll
+ #pragma unroll
+ #endif
for (u32 k = 0, p = block_len - 32; k < 32; k++, p++)
{
PUTCHAR32_BE (block, p, GETCHAR32_BE (alt_result, k));