digest[7] += h;
}
+#ifdef IS_AMD
static void sha512_transform_workaround (const u64 w[16], u64 digest[8])
{
u64 w0_t = w[ 0];
digest[6] += g;
digest[7] += h;
}
+#endif
static void sha512_init (sha512_ctx_t *sha512_ctx)
{
PUTCHAR64_BE (sha512_ctx->buf, pos++, GETCHAR64_BE (buf, i));
}
+ #ifdef IS_AMD
sha512_transform_workaround (sha512_ctx->buf, sha512_ctx->state);
+ #endif
+
+ #ifdef IS_NV
+ sha512_transform (sha512_ctx->buf, sha512_ctx->state);
+ #endif
len -= cnt;
0x9216d5d9, 0x8979fb1b
};
+/*
#define BF_ROUND(L,R,N) \
{ \
uchar4 c = as_uchar4 ((L)); \
\
(R) ^= tmp ^ P[(N)]; \
}
+*/
+
+
+#define BF_ROUND(L,R,N) \
+{ \
+ u32 tmp; \
+ \
+ tmp = S0[((L) >> 24) & 0xff]; \
+ tmp += S1[((L) >> 16) & 0xff]; \
+ tmp ^= S2[((L) >> 8) & 0xff]; \
+ tmp += S3[((L) >> 0) & 0xff]; \
+ \
+ (R) ^= tmp ^ P[(N)]; \
+}
#define BF_ENCRYPT(L,R) \
{ \
SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f,
};
-static void sha512_transform (volatile const u64 w0[4], volatile const u64 w1[4], volatile const u64 w2[4], volatile const u64 w3[4], volatile u64 dgst[8])
+static void sha512_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4], const u64 w3[4], u64 dgst[8])
{
u64 a = dgst[0];
u64 b = dgst[1];
ROUND_STEP (0);
- #pragma unroll
+ //#pragma unroll
for (int i = 16; i < 80; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);
u64 g = dgst[6];
u64 h = dgst[7];
- volatile u64 w0_t = w[ 0];
- volatile u64 w1_t = w[ 1];
- volatile u64 w2_t = w[ 2];
- volatile u64 w3_t = w[ 3];
- volatile u64 w4_t = w[ 4];
- volatile u64 w5_t = w[ 5];
- volatile u64 w6_t = w[ 6];
- volatile u64 w7_t = w[ 7];
- volatile u64 w8_t = w[ 8];
- volatile u64 w9_t = w[ 9];
- volatile u64 wa_t = w[10];
- volatile u64 wb_t = w[11];
- volatile u64 wc_t = w[12];
- volatile u64 wd_t = w[13];
- volatile u64 we_t = w[14];
- volatile u64 wf_t = w[15];
+ u64 w0_t = w[ 0];
+ u64 w1_t = w[ 1];
+ u64 w2_t = w[ 2];
+ u64 w3_t = w[ 3];
+ u64 w4_t = w[ 4];
+ u64 w5_t = w[ 5];
+ u64 w6_t = w[ 6];
+ u64 w7_t = w[ 7];
+ u64 w8_t = w[ 8];
+ u64 w9_t = w[ 9];
+ u64 wa_t = w[10];
+ u64 wb_t = w[11];
+ u64 wc_t = w[12];
+ u64 wd_t = w[13];
+ u64 we_t = w[14];
+ u64 wf_t = w[15];
#define ROUND_EXPAND() \
{ \
ROUND_STEP (0);
- #pragma unroll
+// #pragma unroll
for (int i = 16; i < 80; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);
#ifdef IS_NV
-#if CUDA_ARCH >= 350
-
static u32 rotr32 (const u32 a, const u32 n)
{
- u32 r;
-
- asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(a), "r"(n));
-
- return r;
-}
-
-static u32 rotl32 (const u32 a, const u32 n)
-{
- return rotr32 (a, 32 - n);
-}
-
-static u64 rotr64 (const u64 a, const u32 n)
-{
- u32 il;
- u32 ir;
-
- asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a));
-
- u32 tl;
- u32 tr;
-
- if (n >= 32)
- {
- asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32));
- asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32));
- }
- else
- {
- asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n));
- asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n));
- }
-
- u64 r;
-
- asm ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(tl), "r"(tr));
-
- return r;
-}
-
-static u64 rotl64 (const u64 a, const u32 n)
-{
- return rotr64 (a, 64 - n);
-}
-
-#else
-
-static u32 rotr32 (const u32 a, const u32 n)
-{
- return (((a) >> (n)) | ((a) << (32 - (n))));
+ return rotate (a, 32 - n);
}
static u32 rotl32 (const u32 a, const u32 n)
{
- return rotr32 (a, 32 - n);
+ return rotate (a, n);
}
-static u64 rotr64 (const u64 a, const u32 n)
+static u64 rotr64 (const u64 a, const u64 n)
{
- return (((a) >> (n)) | ((a) << (64 - (n))));
+ return rotate (a, 64 - n);
}
-static u64 rotl64 (const u64 a, const u32 n)
+static u64 rotl64 (const u64 a, const u64 n)
{
- return rotr64 (a, 64 - n);
+ return rotate (a, n);
}
-#endif
#endif
typedef struct
#define SHA512_EXPAND(x,y,z,w) (SHA512_S3 (x) + y + SHA512_S2 (z) + w)
-#define SHA512_S2_WO(x) (rotr64 ((x), 1) ^ rotr64 ((x), 8) ^ SHIFT_RIGHT_64 ((x), 7))
-#define SHA512_S3_WO(x) (rotr64 ((x), 19) ^ rotr64 ((x), 61) ^ SHIFT_RIGHT_64 ((x), 6))
+#define SHA512_S2_WO(x) (rotate ((x), 64- 1ull) ^ rotate ((x), 64- 8ull) ^ SHIFT_RIGHT_64 ((x), 7))
+#define SHA512_S3_WO(x) (rotate ((x), 64-19ull) ^ rotate ((x), 64-61ull) ^ SHIFT_RIGHT_64 ((x), 6))
#define SHA512_EXPAND_WO(x,y,z,w) (SHA512_S3_WO (x) + y + SHA512_S2_WO (z) + w)
#endif
{
log_error ("ERROR: %s %d\n", "clBuildProgram()", CL_err);
- exit (-1);
+ // If we exit here we can't see the error message
+ // exit (-1);
}
}
gpu_accel = 32;
break;
case 1800: gpu_loops = ROUNDS_SHA512CRYPT;
- gpu_accel = 8;
+ gpu_accel = 16;
break;
case 2100: gpu_loops = ROUNDS_DCC2;
gpu_accel = 16;
gpu_accel = 32;
break;
case 3200: gpu_loops = ROUNDS_BCRYPT;
- gpu_accel = 2;
+ gpu_accel = 8;
break;
case 5200: gpu_loops = ROUNDS_PSAFE3;
gpu_accel = 16;
gpu_accel = 64;
break;
case 7100: gpu_loops = ROUNDS_SHA512OSX;
- gpu_accel = 2;
+ gpu_accel = 8;
break;
case 7200: gpu_loops = ROUNDS_GRUB;
- gpu_accel = 2;
+ gpu_accel = 16;
break;
case 7400: gpu_loops = ROUNDS_SHA256CRYPT;
- gpu_accel = 4;
+ gpu_accel = 8;
break;
case 7900: gpu_loops = ROUNDS_DRUPAL7;
gpu_accel = 8;
break;
case 8200: gpu_loops = ROUNDS_CLOUDKEY;
- gpu_accel = 2;
+ gpu_accel = 8;
break;
case 8800: gpu_loops = ROUNDS_ANDROIDFDE;
gpu_accel = 32;
gpu_accel = 8;
break;
case 9300: gpu_loops = 1;
- gpu_accel = 4;
+ gpu_accel = 8;
break;
case 9400: gpu_loops = ROUNDS_OFFICE2007;
gpu_accel = 32;
gpu_accel = 32;
break;
case 9600: gpu_loops = ROUNDS_OFFICE2013;
- gpu_accel = 4;
+ gpu_accel = 8;
break;
case 10000: gpu_loops = ROUNDS_DJANGOPBKDF2;
gpu_accel = 8;
gpu_accel = 8;
break;
case 11300: gpu_loops = ROUNDS_BITCOIN_WALLET;
- gpu_accel = 2;
+ gpu_accel = 8;
break;
case 11600: gpu_loops = ROUNDS_SEVEN_ZIP;
- gpu_accel = 4;
+ gpu_accel = 8;
break;
case 11900: gpu_loops = ROUNDS_PBKDF2_MD5;
gpu_accel = 8;