From 7be2c2fd8dff462f0ab8d3cb1993317815679714 Mon Sep 17 00:00:00 2001 From: jsteube Date: Mon, 21 Dec 2015 14:29:04 +0100 Subject: [PATCH] Fixed some speeds: 1800 3200 7100 7200 7400 8200 9300 9000 9600 11300 11600 --- OpenCL/m01800.cl | 8 +++++ OpenCL/m09000.cl | 15 +++++++++ OpenCL/m09600.cl | 4 +-- OpenCL/m11300.cl | 34 ++++++++++---------- OpenCL/types_ocl.c | 64 ++++---------------------------------- include/kernel_functions.c | 4 +-- src/ext_OpenCL.c | 3 +- src/oclHashcat.c | 20 ++++++------ 8 files changed, 62 insertions(+), 90 deletions(-) diff --git a/OpenCL/m01800.cl b/OpenCL/m01800.cl index 1226052..7cc2567 100644 --- a/OpenCL/m01800.cl +++ b/OpenCL/m01800.cl @@ -141,6 +141,7 @@ static void sha512_transform (const u64 w[16], u64 digest[8]) digest[7] += h; } +#ifdef IS_AMD static void sha512_transform_workaround (const u64 w[16], u64 digest[8]) { u64 w0_t = w[ 0]; @@ -205,6 +206,7 @@ static void sha512_transform_workaround (const u64 w[16], u64 digest[8]) digest[6] += g; digest[7] += h; } +#endif static void sha512_init (sha512_ctx_t *sha512_ctx) { @@ -243,7 +245,13 @@ static void sha512_update (sha512_ctx_t *sha512_ctx, const u64 *buf, int len) PUTCHAR64_BE (sha512_ctx->buf, pos++, GETCHAR64_BE (buf, i)); } + #ifdef IS_AMD sha512_transform_workaround (sha512_ctx->buf, sha512_ctx->state); + #endif + + #ifdef IS_NV + sha512_transform (sha512_ctx->buf, sha512_ctx->state); + #endif len -= cnt; diff --git a/OpenCL/m09000.cl b/OpenCL/m09000.cl index 4f4cd32..cd40b1b 100644 --- a/OpenCL/m09000.cl +++ b/OpenCL/m09000.cl @@ -303,6 +303,7 @@ __constant u32 c_pbox[18] = 0x9216d5d9, 0x8979fb1b }; +/* #define BF_ROUND(L,R,N) \ { \ uchar4 c = as_uchar4 ((L)); \ @@ -316,6 +317,20 @@ __constant u32 c_pbox[18] = \ (R) ^= tmp ^ P[(N)]; \ } +*/ + + +#define BF_ROUND(L,R,N) \ +{ \ + u32 tmp; \ + \ + tmp = S0[((L) >> 24) & 0xff]; \ + tmp += S1[((L) >> 16) & 0xff]; \ + tmp ^= S2[((L) >> 8) & 0xff]; \ + tmp += S3[((L) >> 0) & 0xff]; \ + \ + (R) ^= tmp ^ P[(N)]; \ +} #define BF_ENCRYPT(L,R) \ { \ diff --git a/OpenCL/m09600.cl b/OpenCL/m09600.cl index 9bf4b37..3e61eff 100644 --- a/OpenCL/m09600.cl +++ b/OpenCL/m09600.cl @@ -1001,7 +1001,7 @@ __constant u64 k_sha512[80] = SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, }; -static void sha512_transform (volatile const u64 w0[4], volatile const u64 w1[4], volatile const u64 w2[4], volatile const u64 w3[4], volatile u64 dgst[8]) +static void sha512_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4], const u64 w3[4], u64 dgst[8]) { u64 a = dgst[0]; u64 b = dgst[1]; @@ -1071,7 +1071,7 @@ static void sha512_transform (volatile const u64 w0[4], volatile const u64 w1[4] ROUND_STEP (0); - #pragma unroll + //#pragma unroll for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/m11300.cl b/OpenCL/m11300.cl index cbf2363..1a52405 100644 --- a/OpenCL/m11300.cl +++ b/OpenCL/m11300.cl @@ -925,22 +925,22 @@ static void sha512_transform (const u64 w[16], u64 dgst[8]) u64 g = dgst[6]; u64 h = dgst[7]; - volatile u64 w0_t = w[ 0]; - volatile u64 w1_t = w[ 1]; - volatile u64 w2_t = w[ 2]; - volatile u64 w3_t = w[ 3]; - volatile u64 w4_t = w[ 4]; - volatile u64 w5_t = w[ 5]; - volatile u64 w6_t = w[ 6]; - volatile u64 w7_t = w[ 7]; - volatile u64 w8_t = w[ 8]; - volatile u64 w9_t = w[ 9]; - volatile u64 wa_t = w[10]; - volatile u64 wb_t = w[11]; - volatile u64 wc_t = w[12]; - volatile u64 wd_t = w[13]; - volatile u64 we_t = w[14]; - volatile u64 wf_t = w[15]; + u64 w0_t = w[ 0]; + u64 w1_t = w[ 1]; + u64 w2_t = w[ 2]; + u64 w3_t = w[ 3]; + u64 w4_t = w[ 4]; + u64 w5_t = w[ 5]; + u64 w6_t = w[ 6]; + u64 w7_t = w[ 7]; + u64 w8_t = w[ 8]; + u64 w9_t = w[ 9]; + u64 wa_t = w[10]; + u64 wb_t = w[11]; + u64 wc_t = w[12]; + u64 wd_t = w[13]; + u64 we_t = w[14]; + u64 wf_t = w[15]; #define ROUND_EXPAND() \ { \ @@ -984,7 +984,7 @@ static void sha512_transform (const u64 w[16], u64 dgst[8]) ROUND_STEP (0); - #pragma unroll +// #pragma unroll for (int i = 16; i < 80; i += 16) { ROUND_EXPAND (); ROUND_STEP (i); diff --git a/OpenCL/types_ocl.c b/OpenCL/types_ocl.c index 884036b..2787fae 100644 --- a/OpenCL/types_ocl.c +++ b/OpenCL/types_ocl.c @@ -181,78 +181,26 @@ static u64 rotl64 (const u64 a, const u32 n) #ifdef IS_NV -#if CUDA_ARCH >= 350 - static u32 rotr32 (const u32 a, const u32 n) { - u32 r; - - asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(a), "r"(n)); - - return r; -} - -static u32 rotl32 (const u32 a, const u32 n) -{ - return rotr32 (a, 32 - n); -} - -static u64 rotr64 (const u64 a, const u32 n) -{ - u32 il; - u32 ir; - - asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a)); - - u32 tl; - u32 tr; - - if (n >= 32) - { - asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32)); - asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32)); - } - else - { - asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n)); - asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n)); - } - - u64 r; - - asm ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(tl), "r"(tr)); - - return r; -} - -static u64 rotl64 (const u64 a, const u32 n) -{ - return rotr64 (a, 64 - n); -} - -#else - -static u32 rotr32 (const u32 a, const u32 n) -{ - return (((a) >> (n)) | ((a) << (32 - (n)))); + return rotate (a, 32 - n); } static u32 rotl32 (const u32 a, const u32 n) { - return rotr32 (a, 32 - n); + return rotate (a, n); } -static u64 rotr64 (const u64 a, const u32 n) +static u64 rotr64 (const u64 a, const u64 n) { - return (((a) >> (n)) | ((a) << (64 - (n)))); + return rotate (a, 64 - n); } -static u64 rotl64 (const u64 a, const u32 n) +static u64 rotl64 (const u64 a, const u64 n) { - return rotr64 (a, 64 - n); + return rotate (a, n); } -#endif #endif typedef struct diff --git a/include/kernel_functions.c b/include/kernel_functions.c index 274e8db..6a4d3ee 100644 --- a/include/kernel_functions.c +++ b/include/kernel_functions.c @@ -284,8 +284,8 @@ #define SHA512_EXPAND(x,y,z,w) (SHA512_S3 (x) + y + SHA512_S2 (z) + w) -#define SHA512_S2_WO(x) (rotr64 ((x), 1) ^ rotr64 ((x), 8) ^ SHIFT_RIGHT_64 ((x), 7)) -#define SHA512_S3_WO(x) (rotr64 ((x), 19) ^ rotr64 ((x), 61) ^ SHIFT_RIGHT_64 ((x), 6)) +#define SHA512_S2_WO(x) (rotate ((x), 64- 1ull) ^ rotate ((x), 64- 8ull) ^ SHIFT_RIGHT_64 ((x), 7)) +#define SHA512_S3_WO(x) (rotate ((x), 64-19ull) ^ rotate ((x), 64-61ull) ^ SHIFT_RIGHT_64 ((x), 6)) #define SHA512_EXPAND_WO(x,y,z,w) (SHA512_S3_WO (x) + y + SHA512_S2_WO (z) + w) #endif diff --git a/src/ext_OpenCL.c b/src/ext_OpenCL.c index a632545..79a090f 100644 --- a/src/ext_OpenCL.c +++ b/src/ext_OpenCL.c @@ -256,7 +256,8 @@ void hc_clBuildProgram (cl_program program, cl_uint num_devices, const cl_device { log_error ("ERROR: %s %d\n", "clBuildProgram()", CL_err); - exit (-1); + // If we exit here we can't see the error message + // exit (-1); } } diff --git a/src/oclHashcat.c b/src/oclHashcat.c index 98398cd..88a751e 100644 --- a/src/oclHashcat.c +++ b/src/oclHashcat.c @@ -11130,7 +11130,7 @@ int main (int argc, char **argv) gpu_accel = 32; break; case 1800: gpu_loops = ROUNDS_SHA512CRYPT; - gpu_accel = 8; + gpu_accel = 16; break; case 2100: gpu_loops = ROUNDS_DCC2; gpu_accel = 16; @@ -11139,7 +11139,7 @@ int main (int argc, char **argv) gpu_accel = 32; break; case 3200: gpu_loops = ROUNDS_BCRYPT; - gpu_accel = 2; + gpu_accel = 8; break; case 5200: gpu_loops = ROUNDS_PSAFE3; gpu_accel = 16; @@ -11202,19 +11202,19 @@ int main (int argc, char **argv) gpu_accel = 64; break; case 7100: gpu_loops = ROUNDS_SHA512OSX; - gpu_accel = 2; + gpu_accel = 8; break; case 7200: gpu_loops = ROUNDS_GRUB; - gpu_accel = 2; + gpu_accel = 16; break; case 7400: gpu_loops = ROUNDS_SHA256CRYPT; - gpu_accel = 4; + gpu_accel = 8; break; case 7900: gpu_loops = ROUNDS_DRUPAL7; gpu_accel = 8; break; case 8200: gpu_loops = ROUNDS_CLOUDKEY; - gpu_accel = 2; + gpu_accel = 8; break; case 8800: gpu_loops = ROUNDS_ANDROIDFDE; gpu_accel = 32; @@ -11232,7 +11232,7 @@ int main (int argc, char **argv) gpu_accel = 8; break; case 9300: gpu_loops = 1; - gpu_accel = 4; + gpu_accel = 8; break; case 9400: gpu_loops = ROUNDS_OFFICE2007; gpu_accel = 32; @@ -11241,7 +11241,7 @@ int main (int argc, char **argv) gpu_accel = 32; break; case 9600: gpu_loops = ROUNDS_OFFICE2013; - gpu_accel = 4; + gpu_accel = 8; break; case 10000: gpu_loops = ROUNDS_DJANGOPBKDF2; gpu_accel = 8; @@ -11259,10 +11259,10 @@ int main (int argc, char **argv) gpu_accel = 8; break; case 11300: gpu_loops = ROUNDS_BITCOIN_WALLET; - gpu_accel = 2; + gpu_accel = 8; break; case 11600: gpu_loops = ROUNDS_SEVEN_ZIP; - gpu_accel = 4; + gpu_accel = 8; break; case 11900: gpu_loops = ROUNDS_PBKDF2_MD5; gpu_accel = 8; -- 2.43.0