#define NEW_SIMD_CODE
-#include "include/constants.h"
-#include "include/kernel_vendor.h"
+#include "inc_hash_constants.h"
+#include "inc_vendor.cl"
#define DGST_R0 14
#define DGST_R1 15
#define DGST_R2 6
#define DGST_R3 7
-#include "include/kernel_functions.c"
-#include "OpenCL/types_ocl.c"
-#include "OpenCL/common.c"
-#include "include/rp_kernel.h"
-#include "OpenCL/rp.c"
-#include "OpenCL/simd.c"
+#include "inc_hash_functions.cl"
+#include "inc_types.cl"
+#include "inc_common.cl"
+#include "inc_rp.h"
+#include "inc_rp.cl"
+#include "inc_simd.cl"
__constant u64 k_sha512[80] =
{
SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f,
};
-static void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8])
+void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8])
{
u64x w0_t = hl32_to_64 (w0[0], w0[1]);
u64x w1_t = hl32_to_64 (w0[2], w0[3]);
ROUND_STEP (0);
+ #ifdef _unroll
#pragma unroll
+ #endif
for (int i = 16; i < 80; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);
if (gid >= gid_max) return;
u32 pw_buf0[4];
-
- pw_buf0[0] = pws[gid].i[ 0];
- pw_buf0[1] = pws[gid].i[ 1];
- pw_buf0[2] = pws[gid].i[ 2];
- pw_buf0[3] = pws[gid].i[ 3];
-
u32 pw_buf1[4];
- pw_buf1[0] = pws[gid].i[ 4];
- pw_buf1[1] = pws[gid].i[ 5];
- pw_buf1[2] = pws[gid].i[ 6];
- pw_buf1[3] = pws[gid].i[ 7];
+ pw_buf0[0] = pws[gid].i[0];
+ pw_buf0[1] = pws[gid].i[1];
+ pw_buf0[2] = pws[gid].i[2];
+ pw_buf0[3] = pws[gid].i[3];
+ pw_buf1[0] = pws[gid].i[4];
+ pw_buf1[1] = pws[gid].i[5];
+ pw_buf1[2] = pws[gid].i[6];
+ pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len;
*/
u32 salt_buf0[4];
-
- salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
- salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
- salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
- salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
-
u32 salt_buf1[4];
-
- salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
- salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
- salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
- salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
+ u32 salt_buf2[4];
+ u32 salt_buf3[4];
+
+ salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
+ salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
+ salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
+ salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
+ salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
+ salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
+ salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
+ salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
+ salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
+ salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9];
+ salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10];
+ salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11];
+ salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12];
+ salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13];
+ salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14];
+ salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15];
const u32 salt_len = salt_bufs[salt_pos].salt_len;
const u32x out_salt_len = out_len + salt_len;
- switch_buffer_by_offset_le (w0, w1, w2, w3, salt_len);
+ switch_buffer_by_offset_le_VV (w0, w1, w2, w3, salt_len);
w0[0] |= salt_buf0[0];
w0[1] |= salt_buf0[1];
w1[1] |= salt_buf1[1];
w1[2] |= salt_buf1[2];
w1[3] |= salt_buf1[3];
+ w2[0] |= salt_buf2[0];
+ w2[1] |= salt_buf2[1];
+ w2[2] |= salt_buf2[2];
+ w2[3] |= salt_buf2[3];
+ w3[0] |= salt_buf3[0];
+ w3[1] |= salt_buf3[1];
+ w3[2] |= salt_buf3[2];
+ w3[3] |= salt_buf3[3];
append_0x80_4x4_VV (w0, w1, w2, w3, out_salt_len);
if (gid >= gid_max) return;
u32 pw_buf0[4];
-
- pw_buf0[0] = pws[gid].i[ 0];
- pw_buf0[1] = pws[gid].i[ 1];
- pw_buf0[2] = pws[gid].i[ 2];
- pw_buf0[3] = pws[gid].i[ 3];
-
u32 pw_buf1[4];
- pw_buf1[0] = pws[gid].i[ 4];
- pw_buf1[1] = pws[gid].i[ 5];
- pw_buf1[2] = pws[gid].i[ 6];
- pw_buf1[3] = pws[gid].i[ 7];
+ pw_buf0[0] = pws[gid].i[0];
+ pw_buf0[1] = pws[gid].i[1];
+ pw_buf0[2] = pws[gid].i[2];
+ pw_buf0[3] = pws[gid].i[3];
+ pw_buf1[0] = pws[gid].i[4];
+ pw_buf1[1] = pws[gid].i[5];
+ pw_buf1[2] = pws[gid].i[6];
+ pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len;
*/
u32 salt_buf0[4];
-
- salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
- salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
- salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
- salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
-
u32 salt_buf1[4];
-
- salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
- salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
- salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
- salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
+ u32 salt_buf2[4];
+ u32 salt_buf3[4];
+
+ salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
+ salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
+ salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
+ salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
+ salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
+ salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
+ salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
+ salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
+ salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
+ salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9];
+ salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10];
+ salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11];
+ salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12];
+ salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13];
+ salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14];
+ salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15];
const u32 salt_len = salt_bufs[salt_pos].salt_len;
const u32x out_salt_len = out_len + salt_len;
- switch_buffer_by_offset_le (w0, w1, w2, w3, salt_len);
+ switch_buffer_by_offset_le_VV (w0, w1, w2, w3, salt_len);
w0[0] |= salt_buf0[0];
w0[1] |= salt_buf0[1];
w1[1] |= salt_buf1[1];
w1[2] |= salt_buf1[2];
w1[3] |= salt_buf1[3];
+ w2[0] |= salt_buf2[0];
+ w2[1] |= salt_buf2[1];
+ w2[2] |= salt_buf2[2];
+ w2[3] |= salt_buf2[3];
+ w3[0] |= salt_buf3[0];
+ w3[1] |= salt_buf3[1];
+ w3[2] |= salt_buf3[2];
+ w3[3] |= salt_buf3[3];
append_0x80_4x4_VV (w0, w1, w2, w3, out_salt_len);