From 76cc1631bee0eb0de1d091cb9ca5e1cd4b0361e4 Mon Sep 17 00:00:00 2001 From: jsteube Date: Tue, 15 Dec 2015 17:46:00 +0100 Subject: [PATCH] More kernel fixes for function calls and vector datatypes --- OpenCL/m01500_a0.cl | 12 +- OpenCL/m01500_a1.cl | 12 +- OpenCL/m01500_a3.cl | 16 +-- OpenCL/m01800.cl | 17 ++- OpenCL/m02100.cl | 9 +- OpenCL/m02610_a0.cl | 10 -- OpenCL/m02610_a1.cl | 10 -- OpenCL/m02610_a3.cl | 10 -- OpenCL/m02710_a0.cl | 10 -- OpenCL/m02710_a1.cl | 14 --- OpenCL/m02710_a3.cl | 10 -- OpenCL/m02810_a0.cl | 10 -- OpenCL/m02810_a1.cl | 10 -- OpenCL/m02810_a3.cl | 10 -- OpenCL/m03000_a0.cl | 90 +------------- OpenCL/m03000_a1.cl | 90 +------------- OpenCL/m03000_a3.cl | 16 +-- OpenCL/m03100_a0.cl | 12 +- OpenCL/m03100_a1.cl | 12 +- OpenCL/m03100_a3.cl | 12 +- OpenCL/m03200.cl | 5 +- OpenCL/m03710_a0.cl | 10 -- OpenCL/m03710_a1.cl | 10 -- OpenCL/m03710_a3.cl | 10 -- OpenCL/m04310_a0.cl | 10 -- OpenCL/m04310_a1.cl | 10 -- OpenCL/m04310_a3.cl | 10 -- OpenCL/m04400_a0.cl | 10 -- OpenCL/m04400_a1.cl | 10 -- OpenCL/m04400_a3.cl | 10 -- OpenCL/m04500_a0.cl | 10 -- OpenCL/m04500_a1.cl | 10 -- OpenCL/m04500_a3.cl | 10 -- OpenCL/m04700_a0.cl | 10 -- OpenCL/m04700_a1.cl | 10 -- OpenCL/m04700_a3.cl | 10 -- OpenCL/m05000_a0.cl | 40 ++---- OpenCL/m05000_a1.cl | 36 ++---- OpenCL/m05000_a3.cl | 60 +++------ OpenCL/m05500_a0.cl | 90 +------------- OpenCL/m05500_a1.cl | 90 +------------- OpenCL/m05500_a3.cl | 90 +------------- OpenCL/m06100_a0.cl | 8 +- OpenCL/m06100_a1.cl | 8 +- OpenCL/m06100_a3.cl | 8 +- OpenCL/m06231.cl | 4 +- OpenCL/m06232.cl | 4 +- OpenCL/m06233.cl | 4 +- OpenCL/m06500.cl | 32 ++--- OpenCL/m06600.cl | 9 +- OpenCL/m06800.cl | 9 +- OpenCL/m06900_a0.cl | 14 +-- OpenCL/m06900_a1.cl | 14 +-- OpenCL/m06900_a3.cl | 14 +-- OpenCL/m07500_a0.cl | 8 -- OpenCL/m07500_a1.cl | 8 -- OpenCL/m07500_a3.cl | 4 - OpenCL/m07600_a0.cl | 10 -- OpenCL/m07600_a1.cl | 10 -- OpenCL/m07600_a3.cl | 10 -- OpenCL/m07700_a0.cl | 2 - OpenCL/m07700_a1.cl | 2 - OpenCL/m07700_a3.cl | 2 - OpenCL/m07900.cl | 5 +- OpenCL/m08200.cl | 5 +- OpenCL/m08400_a0.cl | 10 -- OpenCL/m08400_a1.cl | 10 -- OpenCL/m08400_a3.cl | 10 -- OpenCL/m08500_a0.cl | 294 +++++--------------------------------------- OpenCL/m08500_a1.cl | 294 +++++--------------------------------------- OpenCL/m08500_a3.cl | 294 +++++--------------------------------------- OpenCL/m08600_a0.cl | 12 +- OpenCL/m08600_a1.cl | 12 +- OpenCL/m08600_a3.cl | 12 +- OpenCL/m08700_a0.cl | 24 +--- OpenCL/m08700_a1.cl | 24 +--- OpenCL/m08700_a3.cl | 24 +--- OpenCL/m09000.cl | 5 +- OpenCL/m09100.cl | 170 +------------------------ OpenCL/m10100_a0.cl | 18 --- OpenCL/m10100_a1.cl | 18 --- OpenCL/m10100_a3.cl | 18 --- OpenCL/m10900.cl | 5 +- OpenCL/m11100_a0.cl | 10 -- OpenCL/m11100_a1.cl | 10 -- OpenCL/m11100_a3.cl | 10 -- OpenCL/m11500_a0.cl | 16 +-- OpenCL/m11500_a1.cl | 16 +-- OpenCL/m11500_a3.cl | 16 +-- OpenCL/m11600.cl | 16 +-- OpenCL/m11900.cl | 5 +- OpenCL/m12000.cl | 5 +- OpenCL/m12200.cl | 5 +- OpenCL/m12300.cl | 5 +- OpenCL/m12400.cl | 12 +- OpenCL/m12500.cl | 5 +- OpenCL/m12600_a0.cl | 10 -- OpenCL/m12600_a1.cl | 10 -- OpenCL/m12600_a3.cl | 10 -- OpenCL/m12700.cl | 9 +- OpenCL/m12800.cl | 7 +- 101 files changed, 235 insertions(+), 2343 deletions(-) diff --git a/OpenCL/m01500_a0.cl b/OpenCL/m01500_a0.cl index dba48ba..29dea1f 100644 --- a/OpenCL/m01500_a0.cl +++ b/OpenCL/m01500_a0.cl @@ -325,17 +325,7 @@ __constant u32 c_skb[8][64] = __constant u32 shifts3s0[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 }; __constant u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, 26, 26, 26, 27 }; -#ifdef VECT_SIZE1 -#define BOX(i,n,S) (u32x) ((S)[(n)][(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3]) -#endif +#define BOX(i,n,S) (S)[(n)][(i)] static void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u32 s_skb[8][64]) { diff --git a/OpenCL/m01500_a1.cl b/OpenCL/m01500_a1.cl index d265a41..e87a2f1 100644 --- a/OpenCL/m01500_a1.cl +++ b/OpenCL/m01500_a1.cl @@ -323,17 +323,7 @@ __constant u32 c_skb[8][64] = __constant u32 shifts3s0[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 }; __constant u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, 26, 26, 26, 27 }; -#ifdef VECT_SIZE1 -#define BOX(i,n,S) (u32x) ((S)[(n)][(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3]) -#endif +#define BOX(i,n,S) (S)[(n)][(i)] static void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u32 s_skb[8][64]) { diff --git a/OpenCL/m01500_a3.cl b/OpenCL/m01500_a3.cl index 8e3a5f7..4a3707d 100644 --- a/OpenCL/m01500_a3.cl +++ b/OpenCL/m01500_a3.cl @@ -18,20 +18,8 @@ #include "types_ocl.c" #include "common.c" -#ifdef VECT_SIZE1 -#define COMPARE_S "check_single_vect1_comp4_bs.c" -#define COMPARE_M "check_multi_vect1_comp4_bs.c" -#endif - -#ifdef VECT_SIZE2 -#define COMPARE_S "check_single_vect2_comp4_bs.c" -#define COMPARE_M "check_multi_vect2_comp4_bs.c" -#endif - -#ifdef VECT_SIZE4 -#define COMPARE_S "check_single_vect4_comp4_bs.c" -#define COMPARE_M "check_multi_vect4_comp4_bs.c" -#endif +#define COMPARE_S "check_single_comp4_bs.c" +#define COMPARE_M "check_multi_comp4_bs.c" #define KXX_DECL volatile #define sXXX_DECL volatile diff --git a/OpenCL/m01800.cl b/OpenCL/m01800.cl index 3089975..d00d203 100644 --- a/OpenCL/m01800.cl +++ b/OpenCL/m01800.cl @@ -17,9 +17,8 @@ #include "types_ocl.c" #include "common.c" -#ifdef VECT_SIZE1 -#define COMPARE_M "check_multi_vect1_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #define PUTCHAR64_BE(a,p,c) ((u8 *)(a))[(p) ^ 7] = (u8) (c) #define GETCHAR64_BE(a,p) ((u8 *)(a))[(p) ^ 7] @@ -330,13 +329,13 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01800_init (__gl u64 pw[2]; - pw[0] = swap32 (hl32_to_64 (w0[1], w0[0])); - pw[1] = swap32 (hl32_to_64 (w0[3], w0[2])); + pw[0] = swap64 (hl32_to_64 (w0[1], w0[0])); + pw[1] = swap64 (hl32_to_64 (w0[3], w0[2])); u64 salt[2]; - salt[0] = swap32 (hl32_to_64 (salt_buf[1], salt_buf[0])); - salt[1] = swap32 (hl32_to_64 (salt_buf[3], salt_buf[2])); + salt[0] = swap64 (hl32_to_64 (salt_buf[1], salt_buf[0])); + salt[1] = swap64 (hl32_to_64 (salt_buf[3], salt_buf[2])); /** * begin @@ -609,8 +608,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01800_comp (__gl const u32 lid = get_local_id (0); - const u64 a = swap32 (tmps[gid].l_alt_result[0]); - const u64 b = swap32 (tmps[gid].l_alt_result[1]); + const u64 a = swap64 (tmps[gid].l_alt_result[0]); + const u64 b = swap64 (tmps[gid].l_alt_result[1]); const u32 r0 = l32_from_64 (a); const u32 r1 = h32_from_64 (a); diff --git a/OpenCL/m02100.cl b/OpenCL/m02100.cl index 182d40c..4279e85 100644 --- a/OpenCL/m02100.cl +++ b/OpenCL/m02100.cl @@ -17,13 +17,8 @@ #include "types_ocl.c" #include "common.c" -#ifdef VECT_SIZE1 -#define COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define COMPARE_M "check_multi_vect2_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" static void md4_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { diff --git a/OpenCL/m02610_a0.cl b/OpenCL/m02610_a0.cl index 3381260..216add4 100644 --- a/OpenCL/m02610_a0.cl +++ b/OpenCL/m02610_a0.cl @@ -22,17 +22,7 @@ #define COMPARE_S "check_single_comp4.c" #define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) -#endif __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02610_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { diff --git a/OpenCL/m02610_a1.cl b/OpenCL/m02610_a1.cl index cad9768..2beab4b 100644 --- a/OpenCL/m02610_a1.cl +++ b/OpenCL/m02610_a1.cl @@ -20,17 +20,7 @@ #define COMPARE_S "check_single_comp4.c" #define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) -#endif __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02610_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { diff --git a/OpenCL/m02610_a3.cl b/OpenCL/m02610_a3.cl index 68d012d..2cbe9ad 100644 --- a/OpenCL/m02610_a3.cl +++ b/OpenCL/m02610_a3.cl @@ -20,17 +20,7 @@ #define COMPARE_S "check_single_comp4.c" #define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) -#endif static void m02610m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { diff --git a/OpenCL/m02710_a0.cl b/OpenCL/m02710_a0.cl index 5d1a731..37f7999 100644 --- a/OpenCL/m02710_a0.cl +++ b/OpenCL/m02710_a0.cl @@ -22,17 +22,7 @@ #define COMPARE_S "check_single_comp4.c" #define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) -#endif __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02710_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { diff --git a/OpenCL/m02710_a1.cl b/OpenCL/m02710_a1.cl index 8a5b0af..c98a0b6 100644 --- a/OpenCL/m02710_a1.cl +++ b/OpenCL/m02710_a1.cl @@ -20,21 +20,7 @@ #define COMPARE_S "check_single_comp4.c" #define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE1 -#define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) -#endif __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02710_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { diff --git a/OpenCL/m02710_a3.cl b/OpenCL/m02710_a3.cl index c2badb5..ba2e957 100644 --- a/OpenCL/m02710_a3.cl +++ b/OpenCL/m02710_a3.cl @@ -20,17 +20,7 @@ #define COMPARE_S "check_single_comp4.c" #define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) -#endif static void m02710m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { diff --git a/OpenCL/m02810_a0.cl b/OpenCL/m02810_a0.cl index b13f1be..1f8f5d1 100644 --- a/OpenCL/m02810_a0.cl +++ b/OpenCL/m02810_a0.cl @@ -22,17 +22,7 @@ #define COMPARE_S "check_single_comp4.c" #define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) -#endif __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02810_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { diff --git a/OpenCL/m02810_a1.cl b/OpenCL/m02810_a1.cl index c396fca..f9f1317 100644 --- a/OpenCL/m02810_a1.cl +++ b/OpenCL/m02810_a1.cl @@ -20,17 +20,7 @@ #define COMPARE_S "check_single_comp4.c" #define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) -#endif __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02810_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { diff --git a/OpenCL/m02810_a3.cl b/OpenCL/m02810_a3.cl index 8450648..2792a2b 100644 --- a/OpenCL/m02810_a3.cl +++ b/OpenCL/m02810_a3.cl @@ -20,17 +20,7 @@ #define COMPARE_S "check_single_comp4.c" #define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) -#endif static void m02810m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { diff --git a/OpenCL/m03000_a0.cl b/OpenCL/m03000_a0.cl index 16789f8..7fe973a 100644 --- a/OpenCL/m03000_a0.cl +++ b/OpenCL/m03000_a0.cl @@ -328,17 +328,7 @@ __constant u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, #define LM_IV_0_IP_RR3 0x2400b807 #define LM_IV_1_IP_RR3 0xaa190747 -#ifdef VECT_SIZE1 -#define BOX(i,n,S) u32 ((S)[(n)][(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(i,n,S) u32 ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define BOX(i,n,S) u32 ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3]) -#endif +#define BOX(i,n,S) (S)[(n)][(i)] static void _des_crypt_encrypt (u32 iv[2], u32 data[2], u32 Kc[16], u32 Kd[16], __local u32 s_SPtrans[8][64]) { @@ -443,7 +433,6 @@ static void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u static void transform_netntlmv1_key (const u32 w0, const u32 w1, u32 out[2]) { - #ifdef VECT_SIZE1 const uchar4 t0 = as_uchar4 (w0); const uchar4 t1 = as_uchar4 (w1); @@ -461,83 +450,6 @@ static void transform_netntlmv1_key (const u32 w0, const u32 w1, u32 out[2]) out[0] = as_uint (k0); out[1] = as_uint (k1); - #endif - - #ifdef VECT_SIZE2 - const uchar8 t0 = as_uchar8 (w0); - const uchar8 t1 = as_uchar8 (w1); - - uchar8 k0; - uchar8 k1; - - k0.s0 = (t0.s0 >> 0); - k0.s1 = (t0.s0 << 7) | (t0.s1 >> 1); - k0.s2 = (t0.s1 << 6) | (t0.s2 >> 2); - k0.s3 = (t0.s2 << 5) | (t0.s3 >> 3); - k1.s0 = (t0.s3 << 4) | (t1.s0 >> 4); - k1.s1 = (t1.s0 << 3) | (t1.s1 >> 5); - k1.s2 = (t1.s1 << 2) | (t1.s2 >> 6); - k1.s3 = (t1.s2 << 1); - - k0.s4 = (t0.s4 >> 0); - k0.s5 = (t0.s4 << 7) | (t0.s5 >> 1); - k0.s6 = (t0.s5 << 6) | (t0.s6 >> 2); - k0.s7 = (t0.s6 << 5) | (t0.s7 >> 3); - k1.s4 = (t0.s7 << 4) | (t1.s4 >> 4); - k1.s5 = (t1.s4 << 3) | (t1.s5 >> 5); - k1.s6 = (t1.s5 << 2) | (t1.s6 >> 6); - k1.s7 = (t1.s6 << 1); - - out[0] = as_uint2 (k0); - out[1] = as_uint2 (k1); - #endif - - #ifdef VECT_SIZE4 - const uchar16 t0 = as_uchar16 (w0); - const uchar16 t1 = as_uchar16 (w1); - - uchar16 k0; - uchar16 k1; - - k0.s0 = (t0.s0 >> 0); - k0.s1 = (t0.s0 << 7) | (t0.s1 >> 1); - k0.s2 = (t0.s1 << 6) | (t0.s2 >> 2); - k0.s3 = (t0.s2 << 5) | (t0.s3 >> 3); - k1.s0 = (t0.s3 << 4) | (t1.s0 >> 4); - k1.s1 = (t1.s0 << 3) | (t1.s1 >> 5); - k1.s2 = (t1.s1 << 2) | (t1.s2 >> 6); - k1.s3 = (t1.s2 << 1); - - k0.s4 = (t0.s4 >> 0); - k0.s5 = (t0.s4 << 7) | (t0.s5 >> 1); - k0.s6 = (t0.s5 << 6) | (t0.s6 >> 2); - k0.s7 = (t0.s6 << 5) | (t0.s7 >> 3); - k1.s4 = (t0.s7 << 4) | (t1.s4 >> 4); - k1.s5 = (t1.s4 << 3) | (t1.s5 >> 5); - k1.s6 = (t1.s5 << 2) | (t1.s6 >> 6); - k1.s7 = (t1.s6 << 1); - - k0.s8 = (t0.s8 >> 0); - k0.s9 = (t0.s8 << 7) | (t0.s9 >> 1); - k0.sa = (t0.s9 << 6) | (t0.sa >> 2); - k0.sb = (t0.sa << 5) | (t0.sb >> 3); - k1.s8 = (t0.sb << 4) | (t1.s8 >> 4); - k1.s9 = (t1.s8 << 3) | (t1.s9 >> 5); - k1.sa = (t1.s9 << 2) | (t1.sa >> 6); - k1.sb = (t1.sa << 1); - - k0.sc = (t0.sc >> 0); - k0.sd = (t0.sc << 7) | (t0.sd >> 1); - k0.se = (t0.sd << 6) | (t0.se >> 2); - k0.sf = (t0.se << 5) | (t0.sf >> 3); - k1.sc = (t0.sf << 4) | (t1.sc >> 4); - k1.sd = (t1.sc << 3) | (t1.sd >> 5); - k1.se = (t1.sd << 2) | (t1.se >> 6); - k1.sf = (t1.se << 1); - - out[0] = as_uint4 (k0); - out[1] = as_uint4 (k1); - #endif } __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03000_m04 (__global pw_t *pws, __global gpu_rule_t * rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) diff --git a/OpenCL/m03000_a1.cl b/OpenCL/m03000_a1.cl index 8ad0cd8..c4da6ed 100644 --- a/OpenCL/m03000_a1.cl +++ b/OpenCL/m03000_a1.cl @@ -326,17 +326,7 @@ __constant u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, #define LM_IV_0_IP_RR3 0x2400b807 #define LM_IV_1_IP_RR3 0xaa190747 -#ifdef VECT_SIZE1 -#define BOX(i,n,S) u32 ((S)[(n)][(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(i,n,S) u32 ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define BOX(i,n,S) u32 ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3]) -#endif +#define BOX(i,n,S) (S)[(n)][(i)] static void _des_crypt_encrypt (u32 iv[2], u32 data[2], u32 Kc[16], u32 Kd[16], __local u32 s_SPtrans[8][64]) { @@ -441,7 +431,6 @@ static void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u static void transform_netntlmv1_key (const u32 w0, const u32 w1, u32 out[2]) { - #ifdef VECT_SIZE1 const uchar4 t0 = as_uchar4 (w0); const uchar4 t1 = as_uchar4 (w1); @@ -459,83 +448,6 @@ static void transform_netntlmv1_key (const u32 w0, const u32 w1, u32 out[2]) out[0] = as_uint (k0); out[1] = as_uint (k1); - #endif - - #ifdef VECT_SIZE2 - const uchar8 t0 = as_uchar8 (w0); - const uchar8 t1 = as_uchar8 (w1); - - uchar8 k0; - uchar8 k1; - - k0.s0 = (t0.s0 >> 0); - k0.s1 = (t0.s0 << 7) | (t0.s1 >> 1); - k0.s2 = (t0.s1 << 6) | (t0.s2 >> 2); - k0.s3 = (t0.s2 << 5) | (t0.s3 >> 3); - k1.s0 = (t0.s3 << 4) | (t1.s0 >> 4); - k1.s1 = (t1.s0 << 3) | (t1.s1 >> 5); - k1.s2 = (t1.s1 << 2) | (t1.s2 >> 6); - k1.s3 = (t1.s2 << 1); - - k0.s4 = (t0.s4 >> 0); - k0.s5 = (t0.s4 << 7) | (t0.s5 >> 1); - k0.s6 = (t0.s5 << 6) | (t0.s6 >> 2); - k0.s7 = (t0.s6 << 5) | (t0.s7 >> 3); - k1.s4 = (t0.s7 << 4) | (t1.s4 >> 4); - k1.s5 = (t1.s4 << 3) | (t1.s5 >> 5); - k1.s6 = (t1.s5 << 2) | (t1.s6 >> 6); - k1.s7 = (t1.s6 << 1); - - out[0] = as_uint2 (k0); - out[1] = as_uint2 (k1); - #endif - - #ifdef VECT_SIZE4 - const uchar16 t0 = as_uchar16 (w0); - const uchar16 t1 = as_uchar16 (w1); - - uchar16 k0; - uchar16 k1; - - k0.s0 = (t0.s0 >> 0); - k0.s1 = (t0.s0 << 7) | (t0.s1 >> 1); - k0.s2 = (t0.s1 << 6) | (t0.s2 >> 2); - k0.s3 = (t0.s2 << 5) | (t0.s3 >> 3); - k1.s0 = (t0.s3 << 4) | (t1.s0 >> 4); - k1.s1 = (t1.s0 << 3) | (t1.s1 >> 5); - k1.s2 = (t1.s1 << 2) | (t1.s2 >> 6); - k1.s3 = (t1.s2 << 1); - - k0.s4 = (t0.s4 >> 0); - k0.s5 = (t0.s4 << 7) | (t0.s5 >> 1); - k0.s6 = (t0.s5 << 6) | (t0.s6 >> 2); - k0.s7 = (t0.s6 << 5) | (t0.s7 >> 3); - k1.s4 = (t0.s7 << 4) | (t1.s4 >> 4); - k1.s5 = (t1.s4 << 3) | (t1.s5 >> 5); - k1.s6 = (t1.s5 << 2) | (t1.s6 >> 6); - k1.s7 = (t1.s6 << 1); - - k0.s8 = (t0.s8 >> 0); - k0.s9 = (t0.s8 << 7) | (t0.s9 >> 1); - k0.sa = (t0.s9 << 6) | (t0.sa >> 2); - k0.sb = (t0.sa << 5) | (t0.sb >> 3); - k1.s8 = (t0.sb << 4) | (t1.s8 >> 4); - k1.s9 = (t1.s8 << 3) | (t1.s9 >> 5); - k1.sa = (t1.s9 << 2) | (t1.sa >> 6); - k1.sb = (t1.sa << 1); - - k0.sc = (t0.sc >> 0); - k0.sd = (t0.sc << 7) | (t0.sd >> 1); - k0.se = (t0.sd << 6) | (t0.se >> 2); - k0.sf = (t0.se << 5) | (t0.sf >> 3); - k1.sc = (t0.sf << 4) | (t1.sc >> 4); - k1.sd = (t1.sc << 3) | (t1.sd >> 5); - k1.se = (t1.sd << 2) | (t1.se >> 6); - k1.sf = (t1.se << 1); - - out[0] = as_uint4 (k0); - out[1] = as_uint4 (k1); - #endif } __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03000_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) diff --git a/OpenCL/m03000_a3.cl b/OpenCL/m03000_a3.cl index 392d612..5556a6c 100644 --- a/OpenCL/m03000_a3.cl +++ b/OpenCL/m03000_a3.cl @@ -18,20 +18,8 @@ #include "types_ocl.c" #include "common.c" -#ifdef VECT_SIZE1 -#define COMPARE_S "check_single_vect1_comp4_bs.c" -#define COMPARE_M "check_multi_vect1_comp4_bs.c" -#endif - -#ifdef VECT_SIZE2 -#define COMPARE_S "check_single_vect2_comp4_bs.c" -#define COMPARE_M "check_multi_vect2_comp4_bs.c" -#endif - -#ifdef VECT_SIZE4 -#define COMPARE_S "check_single_vect4_comp4_bs.c" -#define COMPARE_M "check_multi_vect4_comp4_bs.c" -#endif +#define COMPARE_S "check_single_comp4_bs.c" +#define COMPARE_M "check_multi_comp4_bs.c" #define KXX_DECL diff --git a/OpenCL/m03100_a0.cl b/OpenCL/m03100_a0.cl index d04183e..55685cf 100644 --- a/OpenCL/m03100_a0.cl +++ b/OpenCL/m03100_a0.cl @@ -343,17 +343,7 @@ __constant u32 c_skb[8][64] = __constant u32 shifts3s0[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 }; __constant u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, 26, 26, 26, 27 }; -#ifdef VECT_SIZE1 -#define BOX(i,n,S) u32 ((S)[(n)][(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(i,n,S) u32 ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define BOX(i,n,S) u32 ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3]) -#endif +#define BOX(i,n,S) (S)[(n)][(i)] static void _des_crypt_encrypt (u32 iv[2], u32 data[2], u32 Kc[16], u32 Kd[16], __local u32 s_SPtrans[8][64]) { diff --git a/OpenCL/m03100_a1.cl b/OpenCL/m03100_a1.cl index 63f8e98..5b6d398 100644 --- a/OpenCL/m03100_a1.cl +++ b/OpenCL/m03100_a1.cl @@ -341,17 +341,7 @@ __constant u32 c_skb[8][64] = __constant u32 shifts3s0[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 }; __constant u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, 26, 26, 26, 27 }; -#ifdef VECT_SIZE1 -#define BOX(i,n,S) u32 ((S)[(n)][(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(i,n,S) u32 ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define BOX(i,n,S) u32 ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3]) -#endif +#define BOX(i,n,S) (S)[(n)][(i)] static void _des_crypt_encrypt (u32 iv[2], u32 data[2], u32 Kc[16], u32 Kd[16], __local u32 s_SPtrans[8][64]) { diff --git a/OpenCL/m03100_a3.cl b/OpenCL/m03100_a3.cl index 3941078..89a9173 100644 --- a/OpenCL/m03100_a3.cl +++ b/OpenCL/m03100_a3.cl @@ -341,17 +341,7 @@ __constant u32 c_skb[8][64] = __constant u32 shifts3s0[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 }; __constant u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, 26, 26, 26, 27 }; -#ifdef VECT_SIZE1 -#define BOX(i,n,S) u32 ((S)[(n)][(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(i,n,S) u32 ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define BOX(i,n,S) u32 ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3]) -#endif +#define BOX(i,n,S) (S)[(n)][(i)] static void _des_crypt_encrypt (u32 iv[2], u32 data[2], u32 Kc[16], u32 Kd[16], __local u32 s_SPtrans[8][64]) { diff --git a/OpenCL/m03200.cl b/OpenCL/m03200.cl index d6b8e0b..3b76dec 100644 --- a/OpenCL/m03200.cl +++ b/OpenCL/m03200.cl @@ -17,9 +17,8 @@ #include "types_ocl.c" #include "common.c" -#ifdef VECT_SIZE1 -#define COMPARE_M "check_multi_vect1_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" // http://www.schneier.com/code/constants.txt diff --git a/OpenCL/m03710_a0.cl b/OpenCL/m03710_a0.cl index 61615f4..132a8fb 100644 --- a/OpenCL/m03710_a0.cl +++ b/OpenCL/m03710_a0.cl @@ -22,17 +22,7 @@ #define COMPARE_S "check_single_comp4.c" #define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) -#endif __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03710_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { diff --git a/OpenCL/m03710_a1.cl b/OpenCL/m03710_a1.cl index e3b1508..601e3c1 100644 --- a/OpenCL/m03710_a1.cl +++ b/OpenCL/m03710_a1.cl @@ -20,17 +20,7 @@ #define COMPARE_S "check_single_comp4.c" #define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) -#endif __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03710_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { diff --git a/OpenCL/m03710_a3.cl b/OpenCL/m03710_a3.cl index 054a591..91daea7 100644 --- a/OpenCL/m03710_a3.cl +++ b/OpenCL/m03710_a3.cl @@ -20,17 +20,7 @@ #define COMPARE_S "check_single_comp4.c" #define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) -#endif static void m03710m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { diff --git a/OpenCL/m04310_a0.cl b/OpenCL/m04310_a0.cl index 84b7652..9009507 100644 --- a/OpenCL/m04310_a0.cl +++ b/OpenCL/m04310_a0.cl @@ -22,17 +22,7 @@ #define COMPARE_S "check_single_comp4.c" #define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) -#endif __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04310_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { diff --git a/OpenCL/m04310_a1.cl b/OpenCL/m04310_a1.cl index c6b298d..b11b391 100644 --- a/OpenCL/m04310_a1.cl +++ b/OpenCL/m04310_a1.cl @@ -20,17 +20,7 @@ #define COMPARE_S "check_single_comp4.c" #define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) -#endif __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04310_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { diff --git a/OpenCL/m04310_a3.cl b/OpenCL/m04310_a3.cl index e98b9f2..70e0dd5 100644 --- a/OpenCL/m04310_a3.cl +++ b/OpenCL/m04310_a3.cl @@ -20,17 +20,7 @@ #define COMPARE_S "check_single_comp4.c" #define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) -#endif static void m04310m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { diff --git a/OpenCL/m04400_a0.cl b/OpenCL/m04400_a0.cl index b6045af..281bd11 100644 --- a/OpenCL/m04400_a0.cl +++ b/OpenCL/m04400_a0.cl @@ -22,17 +22,7 @@ #define COMPARE_S "check_single_comp4.c" #define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) -#endif __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04400_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { diff --git a/OpenCL/m04400_a1.cl b/OpenCL/m04400_a1.cl index c5289c0..f9305b6 100644 --- a/OpenCL/m04400_a1.cl +++ b/OpenCL/m04400_a1.cl @@ -20,17 +20,7 @@ #define COMPARE_S "check_single_comp4.c" #define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) -#endif __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04400_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { diff --git a/OpenCL/m04400_a3.cl b/OpenCL/m04400_a3.cl index 483fd34..48eb12f 100644 --- a/OpenCL/m04400_a3.cl +++ b/OpenCL/m04400_a3.cl @@ -20,17 +20,7 @@ #define COMPARE_S "check_single_comp4.c" #define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) -#endif static void m04400m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { diff --git a/OpenCL/m04500_a0.cl b/OpenCL/m04500_a0.cl index 51c7a4e..b561de4 100644 --- a/OpenCL/m04500_a0.cl +++ b/OpenCL/m04500_a0.cl @@ -22,17 +22,7 @@ #define COMPARE_S "check_single_comp4.c" #define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE1 #define uint_to_hex_lower8_le(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8_le(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8_le(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) -#endif __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04500_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { diff --git a/OpenCL/m04500_a1.cl b/OpenCL/m04500_a1.cl index fb275ce..f15b819 100644 --- a/OpenCL/m04500_a1.cl +++ b/OpenCL/m04500_a1.cl @@ -20,17 +20,7 @@ #define COMPARE_S "check_single_comp4.c" #define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE1 #define uint_to_hex_lower8_le(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8_le(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8_le(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) -#endif __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04500_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { diff --git a/OpenCL/m04500_a3.cl b/OpenCL/m04500_a3.cl index 7a0d1c6..679110d 100644 --- a/OpenCL/m04500_a3.cl +++ b/OpenCL/m04500_a3.cl @@ -20,17 +20,7 @@ #define COMPARE_S "check_single_comp4.c" #define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE1 #define uint_to_hex_lower8_le(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8_le(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8_le(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) -#endif static void m04500m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { diff --git a/OpenCL/m04700_a0.cl b/OpenCL/m04700_a0.cl index bb04ca7..1323d31 100644 --- a/OpenCL/m04700_a0.cl +++ b/OpenCL/m04700_a0.cl @@ -23,17 +23,7 @@ #define COMPARE_S "check_single_comp4.c" #define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE1 #define uint_to_hex_lower8_le(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8_le(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8_le(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) -#endif __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04700_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { diff --git a/OpenCL/m04700_a1.cl b/OpenCL/m04700_a1.cl index 407e7ef..643f759 100644 --- a/OpenCL/m04700_a1.cl +++ b/OpenCL/m04700_a1.cl @@ -21,17 +21,7 @@ #define COMPARE_S "check_single_comp4.c" #define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE1 #define uint_to_hex_lower8_le(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8_le(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8_le(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) -#endif __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04700_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { diff --git a/OpenCL/m04700_a3.cl b/OpenCL/m04700_a3.cl index e011d2e..88eab76 100644 --- a/OpenCL/m04700_a3.cl +++ b/OpenCL/m04700_a3.cl @@ -21,17 +21,7 @@ #define COMPARE_S "check_single_comp4.c" #define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE1 #define uint_to_hex_lower8_le(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8_le(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8_le(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) -#endif static void m04700m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { diff --git a/OpenCL/m05000_a0.cl b/OpenCL/m05000_a0.cl index 7351ecc..a6aa03a 100644 --- a/OpenCL/m05000_a0.cl +++ b/OpenCL/m05000_a0.cl @@ -63,8 +63,8 @@ __constant u32 keccakf_piln[24] = #define Rho_Pi(s) \ { \ - u32 j = keccakf_piln[s]; \ - u32 k = keccakf_rotc[s]; \ + u32 j = keccakf_piln[s]; \ + u32 k = keccakf_rotc[s]; \ bc0 = st[j]; \ st[j] = rotl64 (t, k); \ t = bc0; \ @@ -166,20 +166,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05000_m04 (__glo u64 st[25]; - #ifdef VECT_SIZE1 - st[ 0] = (u64x) (w0[0]) | (u64x) (w0[1]) << 32; - st[ 1] = (u64x) (w0[2]) | (u64x) (w0[3]) << 32; - st[ 2] = (u64x) (w1[0]) | (u64x) (w1[1]) << 32; - st[ 3] = (u64x) (w1[2]) | (u64x) (w1[3]) << 32; - #endif - - #ifdef VECT_SIZE2 - st[ 0] = (u64x) (w0[0].s0, w0[0].s1) | (u64x) (w0[1].s0, w0[1].s1) << 32; - st[ 1] = (u64x) (w0[2].s0, w0[2].s1) | (u64x) (w0[3].s0, w0[3].s1) << 32; - st[ 2] = (u64x) (w1[0].s0, w1[0].s1) | (u64x) (w1[1].s0, w1[1].s1) << 32; - st[ 3] = (u64x) (w1[2].s0, w1[2].s1) | (u64x) (w1[3].s0, w1[3].s1) << 32; - #endif - + st[ 0] = (u64) (w0[0]) | (u64) (w0[1]) << 32; + st[ 1] = (u64) (w0[2]) | (u64) (w0[3]) << 32; + st[ 2] = (u64) (w1[0]) | (u64) (w1[1]) << 32; + st[ 3] = (u64) (w1[2]) | (u64) (w1[3]) << 32; st[ 4] = 0; st[ 5] = 0; st[ 6] = 0; @@ -377,20 +367,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05000_s04 (__glo u64 st[25]; - #ifdef VECT_SIZE1 - st[ 0] = (u64x) (w0[0]) | (u64x) (w0[1]) << 32; - st[ 1] = (u64x) (w0[2]) | (u64x) (w0[3]) << 32; - st[ 2] = (u64x) (w1[0]) | (u64x) (w1[1]) << 32; - st[ 3] = (u64x) (w1[2]) | (u64x) (w1[3]) << 32; - #endif - - #ifdef VECT_SIZE2 - st[ 0] = (u64x) (w0[0].s0, w0[0].s1) | (u64x) (w0[1].s0, w0[1].s1) << 32; - st[ 1] = (u64x) (w0[2].s0, w0[2].s1) | (u64x) (w0[3].s0, w0[3].s1) << 32; - st[ 2] = (u64x) (w1[0].s0, w1[0].s1) | (u64x) (w1[1].s0, w1[1].s1) << 32; - st[ 3] = (u64x) (w1[2].s0, w1[2].s1) | (u64x) (w1[3].s0, w1[3].s1) << 32; - #endif - + st[ 0] = (u64) (w0[0]) | (u64) (w0[1]) << 32; + st[ 1] = (u64) (w0[2]) | (u64) (w0[3]) << 32; + st[ 2] = (u64) (w1[0]) | (u64) (w1[1]) << 32; + st[ 3] = (u64) (w1[2]) | (u64) (w1[3]) << 32; st[ 4] = 0; st[ 5] = 0; st[ 6] = 0; diff --git a/OpenCL/m05000_a1.cl b/OpenCL/m05000_a1.cl index df47782..b986d15 100644 --- a/OpenCL/m05000_a1.cl +++ b/OpenCL/m05000_a1.cl @@ -220,20 +220,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05000_m04 (__glo u64 st[25]; - #ifdef VECT_SIZE1 - st[ 0] = (u64x) (w0[0]) | (u64x) (w0[1]) << 32; - st[ 1] = (u64x) (w0[2]) | (u64x) (w0[3]) << 32; - st[ 2] = (u64x) (w1[0]) | (u64x) (w1[1]) << 32; - st[ 3] = (u64x) (w1[2]) | (u64x) (w1[3]) << 32; - #endif - - #ifdef VECT_SIZE2 - st[ 0] = (u64x) (w0[0].s0, w0[0].s1) | (u64x) (w0[1].s0, w0[1].s1) << 32; - st[ 1] = (u64x) (w0[2].s0, w0[2].s1) | (u64x) (w0[3].s0, w0[3].s1) << 32; - st[ 2] = (u64x) (w1[0].s0, w1[0].s1) | (u64x) (w1[1].s0, w1[1].s1) << 32; - st[ 3] = (u64x) (w1[2].s0, w1[2].s1) | (u64x) (w1[3].s0, w1[3].s1) << 32; - #endif - + st[ 0] = (u64) (w0[0]) | (u64) (w0[1]) << 32; + st[ 1] = (u64) (w0[2]) | (u64) (w0[3]) << 32; + st[ 2] = (u64) (w1[0]) | (u64) (w1[1]) << 32; + st[ 3] = (u64) (w1[2]) | (u64) (w1[3]) << 32; st[ 4] = 0; st[ 5] = 0; st[ 6] = 0; @@ -487,20 +477,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05000_s04 (__glo u64 st[25]; - #ifdef VECT_SIZE1 - st[ 0] = (u64x) (w0[0]) | (u64x) (w0[1]) << 32; - st[ 1] = (u64x) (w0[2]) | (u64x) (w0[3]) << 32; - st[ 2] = (u64x) (w1[0]) | (u64x) (w1[1]) << 32; - st[ 3] = (u64x) (w1[2]) | (u64x) (w1[3]) << 32; - #endif - - #ifdef VECT_SIZE2 - st[ 0] = (u64x) (w0[0].s0, w0[0].s1) | (u64x) (w0[1].s0, w0[1].s1) << 32; - st[ 1] = (u64x) (w0[2].s0, w0[2].s1) | (u64x) (w0[3].s0, w0[3].s1) << 32; - st[ 2] = (u64x) (w1[0].s0, w1[0].s1) | (u64x) (w1[1].s0, w1[1].s1) << 32; - st[ 3] = (u64x) (w1[2].s0, w1[2].s1) | (u64x) (w1[3].s0, w1[3].s1) << 32; - #endif - + st[ 0] = (u64) (w0[0]) | (u64) (w0[1]) << 32; + st[ 1] = (u64) (w0[2]) | (u64) (w0[3]) << 32; + st[ 2] = (u64) (w1[0]) | (u64) (w1[1]) << 32; + st[ 3] = (u64) (w1[2]) | (u64) (w1[3]) << 32; st[ 4] = 0; st[ 5] = 0; st[ 6] = 0; diff --git a/OpenCL/m05000_a3.cl b/OpenCL/m05000_a3.cl index 516aaf2..e273678 100644 --- a/OpenCL/m05000_a3.cl +++ b/OpenCL/m05000_a3.cl @@ -115,28 +115,14 @@ static void m05000m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le u64 st[25]; - #ifdef VECT_SIZE1 - st[ 0] = (u64x) (w0[0]) | (u64x) (w0[1]) << 32; - st[ 1] = (u64x) (w0[2]) | (u64x) (w0[3]) << 32; - st[ 2] = (u64x) (w1[0]) | (u64x) (w1[1]) << 32; - st[ 3] = (u64x) (w1[2]) | (u64x) (w1[3]) << 32; - st[ 4] = (u64x) (w2[0]) | (u64x) (w2[1]) << 32; - st[ 5] = (u64x) (w2[2]) | (u64x) (w2[3]) << 32; - st[ 6] = (u64x) (w3[0]) | (u64x) (w3[1]) << 32; - st[ 7] = (u64x) (w3[2]) | (u64x) (w3[3]) << 32; - #endif - - #ifdef VECT_SIZE2 - st[ 0] = (u64x) (w0[0].s0, w0[0].s1) | (u64x) (w0[1].s0, w0[1].s1) << 32; - st[ 1] = (u64x) (w0[2].s0, w0[2].s1) | (u64x) (w0[3].s0, w0[3].s1) << 32; - st[ 2] = (u64x) (w1[0].s0, w1[0].s1) | (u64x) (w1[1].s0, w1[1].s1) << 32; - st[ 3] = (u64x) (w1[2].s0, w1[2].s1) | (u64x) (w1[3].s0, w1[3].s1) << 32; - st[ 4] = (u64x) (w2[0].s0, w2[0].s1) | (u64x) (w2[1].s0, w2[1].s1) << 32; - st[ 5] = (u64x) (w2[2].s0, w2[2].s1) | (u64x) (w2[3].s0, w2[3].s1) << 32; - st[ 6] = (u64x) (w3[0].s0, w3[0].s1) | (u64x) (w3[1].s0, w3[1].s1) << 32; - st[ 7] = (u64x) (w3[2].s0, w3[2].s1) | (u64x) (w3[3].s0, w3[3].s1) << 32; - #endif - + st[ 0] = (u64) (w0[0]) | (u64) (w0[1]) << 32; + st[ 1] = (u64) (w0[2]) | (u64) (w0[3]) << 32; + st[ 2] = (u64) (w1[0]) | (u64) (w1[1]) << 32; + st[ 3] = (u64) (w1[2]) | (u64) (w1[3]) << 32; + st[ 4] = (u64) (w2[0]) | (u64) (w2[1]) << 32; + st[ 5] = (u64) (w2[2]) | (u64) (w2[3]) << 32; + st[ 6] = (u64) (w3[0]) | (u64) (w3[1]) << 32; + st[ 7] = (u64) (w3[2]) | (u64) (w3[3]) << 32; st[ 8] = 0; st[ 9] = 0; st[10] = 0; @@ -273,28 +259,14 @@ static void m05000s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le u64 st[25]; - #ifdef VECT_SIZE1 - st[ 0] = (u64x) (w0[0]) | (u64x) (w0[1]) << 32; - st[ 1] = (u64x) (w0[2]) | (u64x) (w0[3]) << 32; - st[ 2] = (u64x) (w1[0]) | (u64x) (w1[1]) << 32; - st[ 3] = (u64x) (w1[2]) | (u64x) (w1[3]) << 32; - st[ 4] = (u64x) (w2[0]) | (u64x) (w2[1]) << 32; - st[ 5] = (u64x) (w2[2]) | (u64x) (w2[3]) << 32; - st[ 6] = (u64x) (w3[0]) | (u64x) (w3[1]) << 32; - st[ 7] = (u64x) (w3[2]) | (u64x) (w3[3]) << 32; - #endif - - #ifdef VECT_SIZE2 - st[ 0] = (u64x) (w0[0].s0, w0[0].s1) | (u64x) (w0[1].s0, w0[1].s1) << 32; - st[ 1] = (u64x) (w0[2].s0, w0[2].s1) | (u64x) (w0[3].s0, w0[3].s1) << 32; - st[ 2] = (u64x) (w1[0].s0, w1[0].s1) | (u64x) (w1[1].s0, w1[1].s1) << 32; - st[ 3] = (u64x) (w1[2].s0, w1[2].s1) | (u64x) (w1[3].s0, w1[3].s1) << 32; - st[ 4] = (u64x) (w2[0].s0, w2[0].s1) | (u64x) (w2[1].s0, w2[1].s1) << 32; - st[ 5] = (u64x) (w2[2].s0, w2[2].s1) | (u64x) (w2[3].s0, w2[3].s1) << 32; - st[ 6] = (u64x) (w3[0].s0, w3[0].s1) | (u64x) (w3[1].s0, w3[1].s1) << 32; - st[ 7] = (u64x) (w3[2].s0, w3[2].s1) | (u64x) (w3[3].s0, w3[3].s1) << 32; - #endif - + st[ 0] = (u64) (w0[0]) | (u64) (w0[1]) << 32; + st[ 1] = (u64) (w0[2]) | (u64) (w0[3]) << 32; + st[ 2] = (u64) (w1[0]) | (u64) (w1[1]) << 32; + st[ 3] = (u64) (w1[2]) | (u64) (w1[3]) << 32; + st[ 4] = (u64) (w2[0]) | (u64) (w2[1]) << 32; + st[ 5] = (u64) (w2[2]) | (u64) (w2[3]) << 32; + st[ 6] = (u64) (w3[0]) | (u64) (w3[1]) << 32; + st[ 7] = (u64) (w3[2]) | (u64) (w3[3]) << 32; st[ 8] = 0; st[ 9] = 0; st[10] = 0; diff --git a/OpenCL/m05500_a0.cl b/OpenCL/m05500_a0.cl index 572cbe3..d807ebf 100644 --- a/OpenCL/m05500_a0.cl +++ b/OpenCL/m05500_a0.cl @@ -325,17 +325,7 @@ __constant u32 c_skb[8][64] = __constant u32 shifts3s0[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 }; __constant u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, 26, 26, 26, 27 }; -#ifdef VECT_SIZE1 -#define BOX(i,n,S) (u32x) ((S)[(n)][(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3]) -#endif +#define BOX(i,n,S) (S)[(n)][(i)] static void _des_crypt_encrypt (u32 iv[2], u32 data[2], u32 Kc[16], u32 Kd[16], __local u32 s_SPtrans[8][64]) { @@ -440,7 +430,6 @@ static void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u static void transform_netntlmv1_key (const u32 w0, const u32 w1, u32 out[2]) { - #ifdef VECT_SIZE1 const uchar4 t0 = as_uchar4 (w0); const uchar4 t1 = as_uchar4 (w1); @@ -458,83 +447,6 @@ static void transform_netntlmv1_key (const u32 w0, const u32 w1, u32 out[2]) out[0] = as_uint (k0); out[1] = as_uint (k1); - #endif - - #ifdef VECT_SIZE2 - const uchar8 t0 = as_uchar8 (w0); - const uchar8 t1 = as_uchar8 (w1); - - uchar8 k0; - uchar8 k1; - - k0.s0 = (t0.s0 >> 0); - k0.s1 = (t0.s0 << 7) | (t0.s1 >> 1); - k0.s2 = (t0.s1 << 6) | (t0.s2 >> 2); - k0.s3 = (t0.s2 << 5) | (t0.s3 >> 3); - k1.s0 = (t0.s3 << 4) | (t1.s0 >> 4); - k1.s1 = (t1.s0 << 3) | (t1.s1 >> 5); - k1.s2 = (t1.s1 << 2) | (t1.s2 >> 6); - k1.s3 = (t1.s2 << 1); - - k0.s4 = (t0.s4 >> 0); - k0.s5 = (t0.s4 << 7) | (t0.s5 >> 1); - k0.s6 = (t0.s5 << 6) | (t0.s6 >> 2); - k0.s7 = (t0.s6 << 5) | (t0.s7 >> 3); - k1.s4 = (t0.s7 << 4) | (t1.s4 >> 4); - k1.s5 = (t1.s4 << 3) | (t1.s5 >> 5); - k1.s6 = (t1.s5 << 2) | (t1.s6 >> 6); - k1.s7 = (t1.s6 << 1); - - out[0] = as_uint2 (k0); - out[1] = as_uint2 (k1); - #endif - - #ifdef VECT_SIZE4 - const uchar16 t0 = as_uchar16 (w0); - const uchar16 t1 = as_uchar16 (w1); - - uchar16 k0; - uchar16 k1; - - k0.s0 = (t0.s0 >> 0); - k0.s1 = (t0.s0 << 7) | (t0.s1 >> 1); - k0.s2 = (t0.s1 << 6) | (t0.s2 >> 2); - k0.s3 = (t0.s2 << 5) | (t0.s3 >> 3); - k1.s0 = (t0.s3 << 4) | (t1.s0 >> 4); - k1.s1 = (t1.s0 << 3) | (t1.s1 >> 5); - k1.s2 = (t1.s1 << 2) | (t1.s2 >> 6); - k1.s3 = (t1.s2 << 1); - - k0.s4 = (t0.s4 >> 0); - k0.s5 = (t0.s4 << 7) | (t0.s5 >> 1); - k0.s6 = (t0.s5 << 6) | (t0.s6 >> 2); - k0.s7 = (t0.s6 << 5) | (t0.s7 >> 3); - k1.s4 = (t0.s7 << 4) | (t1.s4 >> 4); - k1.s5 = (t1.s4 << 3) | (t1.s5 >> 5); - k1.s6 = (t1.s5 << 2) | (t1.s6 >> 6); - k1.s7 = (t1.s6 << 1); - - k0.s8 = (t0.s8 >> 0); - k0.s9 = (t0.s8 << 7) | (t0.s9 >> 1); - k0.sa = (t0.s9 << 6) | (t0.sa >> 2); - k0.sb = (t0.sa << 5) | (t0.sb >> 3); - k1.s8 = (t0.sb << 4) | (t1.s8 >> 4); - k1.s9 = (t1.s8 << 3) | (t1.s9 >> 5); - k1.sa = (t1.s9 << 2) | (t1.sa >> 6); - k1.sb = (t1.sa << 1); - - k0.sc = (t0.sc >> 0); - k0.sd = (t0.sc << 7) | (t0.sd >> 1); - k0.se = (t0.sd << 6) | (t0.se >> 2); - k0.sf = (t0.se << 5) | (t0.sf >> 3); - k1.sc = (t0.sf << 4) | (t1.sc >> 4); - k1.sd = (t1.sc << 3) | (t1.sd >> 5); - k1.se = (t1.sd << 2) | (t1.se >> 6); - k1.sf = (t1.se << 1); - - out[0] = as_uint4 (k0); - out[1] = as_uint4 (k1); - #endif } __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_m04 (__global pw_t *pws, __global gpu_rule_t * rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) diff --git a/OpenCL/m05500_a1.cl b/OpenCL/m05500_a1.cl index 729e778..f0645f7 100644 --- a/OpenCL/m05500_a1.cl +++ b/OpenCL/m05500_a1.cl @@ -323,17 +323,7 @@ __constant u32 c_skb[8][64] = __constant u32 shifts3s0[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 }; __constant u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, 26, 26, 26, 27 }; -#ifdef VECT_SIZE1 -#define BOX(i,n,S) (u32x) ((S)[(n)][(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3]) -#endif +#define BOX(i,n,S) (S)[(n)][(i)] static void _des_crypt_encrypt (u32 iv[2], u32 data[2], u32 Kc[16], u32 Kd[16], __local u32 s_SPtrans[8][64]) { @@ -438,7 +428,6 @@ static void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u static void transform_netntlmv1_key (const u32 w0, const u32 w1, u32 out[2]) { - #ifdef VECT_SIZE1 const uchar4 t0 = as_uchar4 (w0); const uchar4 t1 = as_uchar4 (w1); @@ -456,83 +445,6 @@ static void transform_netntlmv1_key (const u32 w0, const u32 w1, u32 out[2]) out[0] = as_uint (k0); out[1] = as_uint (k1); - #endif - - #ifdef VECT_SIZE2 - const uchar8 t0 = as_uchar8 (w0); - const uchar8 t1 = as_uchar8 (w1); - - uchar8 k0; - uchar8 k1; - - k0.s0 = (t0.s0 >> 0); - k0.s1 = (t0.s0 << 7) | (t0.s1 >> 1); - k0.s2 = (t0.s1 << 6) | (t0.s2 >> 2); - k0.s3 = (t0.s2 << 5) | (t0.s3 >> 3); - k1.s0 = (t0.s3 << 4) | (t1.s0 >> 4); - k1.s1 = (t1.s0 << 3) | (t1.s1 >> 5); - k1.s2 = (t1.s1 << 2) | (t1.s2 >> 6); - k1.s3 = (t1.s2 << 1); - - k0.s4 = (t0.s4 >> 0); - k0.s5 = (t0.s4 << 7) | (t0.s5 >> 1); - k0.s6 = (t0.s5 << 6) | (t0.s6 >> 2); - k0.s7 = (t0.s6 << 5) | (t0.s7 >> 3); - k1.s4 = (t0.s7 << 4) | (t1.s4 >> 4); - k1.s5 = (t1.s4 << 3) | (t1.s5 >> 5); - k1.s6 = (t1.s5 << 2) | (t1.s6 >> 6); - k1.s7 = (t1.s6 << 1); - - out[0] = as_uint2 (k0); - out[1] = as_uint2 (k1); - #endif - - #ifdef VECT_SIZE4 - const uchar16 t0 = as_uchar16 (w0); - const uchar16 t1 = as_uchar16 (w1); - - uchar16 k0; - uchar16 k1; - - k0.s0 = (t0.s0 >> 0); - k0.s1 = (t0.s0 << 7) | (t0.s1 >> 1); - k0.s2 = (t0.s1 << 6) | (t0.s2 >> 2); - k0.s3 = (t0.s2 << 5) | (t0.s3 >> 3); - k1.s0 = (t0.s3 << 4) | (t1.s0 >> 4); - k1.s1 = (t1.s0 << 3) | (t1.s1 >> 5); - k1.s2 = (t1.s1 << 2) | (t1.s2 >> 6); - k1.s3 = (t1.s2 << 1); - - k0.s4 = (t0.s4 >> 0); - k0.s5 = (t0.s4 << 7) | (t0.s5 >> 1); - k0.s6 = (t0.s5 << 6) | (t0.s6 >> 2); - k0.s7 = (t0.s6 << 5) | (t0.s7 >> 3); - k1.s4 = (t0.s7 << 4) | (t1.s4 >> 4); - k1.s5 = (t1.s4 << 3) | (t1.s5 >> 5); - k1.s6 = (t1.s5 << 2) | (t1.s6 >> 6); - k1.s7 = (t1.s6 << 1); - - k0.s8 = (t0.s8 >> 0); - k0.s9 = (t0.s8 << 7) | (t0.s9 >> 1); - k0.sa = (t0.s9 << 6) | (t0.sa >> 2); - k0.sb = (t0.sa << 5) | (t0.sb >> 3); - k1.s8 = (t0.sb << 4) | (t1.s8 >> 4); - k1.s9 = (t1.s8 << 3) | (t1.s9 >> 5); - k1.sa = (t1.s9 << 2) | (t1.sa >> 6); - k1.sb = (t1.sa << 1); - - k0.sc = (t0.sc >> 0); - k0.sd = (t0.sc << 7) | (t0.sd >> 1); - k0.se = (t0.sd << 6) | (t0.se >> 2); - k0.sf = (t0.se << 5) | (t0.sf >> 3); - k1.sc = (t0.sf << 4) | (t1.sc >> 4); - k1.sd = (t1.sc << 3) | (t1.sd >> 5); - k1.se = (t1.sd << 2) | (t1.se >> 6); - k1.sf = (t1.se << 1); - - out[0] = as_uint4 (k0); - out[1] = as_uint4 (k1); - #endif } __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) diff --git a/OpenCL/m05500_a3.cl b/OpenCL/m05500_a3.cl index dcbb017..248f226 100644 --- a/OpenCL/m05500_a3.cl +++ b/OpenCL/m05500_a3.cl @@ -323,17 +323,7 @@ __constant u32 c_skb[8][64] = __constant u32 shifts3s0[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 }; __constant u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, 26, 26, 26, 27 }; -#ifdef VECT_SIZE1 -#define BOX(i,n,S) (u32x) ((S)[(n)][(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3]) -#endif +#define BOX(i,n,S) (S)[(n)][(i)] static void _des_crypt_encrypt (u32 iv[2], u32 data[2], u32 Kc[16], u32 Kd[16], __local u32 s_SPtrans[8][64]) { @@ -438,7 +428,6 @@ static void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u static void transform_netntlmv1_key (const u32 w0, const u32 w1, u32 out[2]) { - #ifdef VECT_SIZE1 const uchar4 t0 = as_uchar4 (w0); const uchar4 t1 = as_uchar4 (w1); @@ -456,83 +445,6 @@ static void transform_netntlmv1_key (const u32 w0, const u32 w1, u32 out[2]) out[0] = as_uint (k0); out[1] = as_uint (k1); - #endif - - #ifdef VECT_SIZE2 - const uchar8 t0 = as_uchar8 (w0); - const uchar8 t1 = as_uchar8 (w1); - - uchar8 k0; - uchar8 k1; - - k0.s0 = (t0.s0 >> 0); - k0.s1 = (t0.s0 << 7) | (t0.s1 >> 1); - k0.s2 = (t0.s1 << 6) | (t0.s2 >> 2); - k0.s3 = (t0.s2 << 5) | (t0.s3 >> 3); - k1.s0 = (t0.s3 << 4) | (t1.s0 >> 4); - k1.s1 = (t1.s0 << 3) | (t1.s1 >> 5); - k1.s2 = (t1.s1 << 2) | (t1.s2 >> 6); - k1.s3 = (t1.s2 << 1); - - k0.s4 = (t0.s4 >> 0); - k0.s5 = (t0.s4 << 7) | (t0.s5 >> 1); - k0.s6 = (t0.s5 << 6) | (t0.s6 >> 2); - k0.s7 = (t0.s6 << 5) | (t0.s7 >> 3); - k1.s4 = (t0.s7 << 4) | (t1.s4 >> 4); - k1.s5 = (t1.s4 << 3) | (t1.s5 >> 5); - k1.s6 = (t1.s5 << 2) | (t1.s6 >> 6); - k1.s7 = (t1.s6 << 1); - - out[0] = as_uint2 (k0); - out[1] = as_uint2 (k1); - #endif - - #ifdef VECT_SIZE4 - const uchar16 t0 = as_uchar16 (w0); - const uchar16 t1 = as_uchar16 (w1); - - uchar16 k0; - uchar16 k1; - - k0.s0 = (t0.s0 >> 0); - k0.s1 = (t0.s0 << 7) | (t0.s1 >> 1); - k0.s2 = (t0.s1 << 6) | (t0.s2 >> 2); - k0.s3 = (t0.s2 << 5) | (t0.s3 >> 3); - k1.s0 = (t0.s3 << 4) | (t1.s0 >> 4); - k1.s1 = (t1.s0 << 3) | (t1.s1 >> 5); - k1.s2 = (t1.s1 << 2) | (t1.s2 >> 6); - k1.s3 = (t1.s2 << 1); - - k0.s4 = (t0.s4 >> 0); - k0.s5 = (t0.s4 << 7) | (t0.s5 >> 1); - k0.s6 = (t0.s5 << 6) | (t0.s6 >> 2); - k0.s7 = (t0.s6 << 5) | (t0.s7 >> 3); - k1.s4 = (t0.s7 << 4) | (t1.s4 >> 4); - k1.s5 = (t1.s4 << 3) | (t1.s5 >> 5); - k1.s6 = (t1.s5 << 2) | (t1.s6 >> 6); - k1.s7 = (t1.s6 << 1); - - k0.s8 = (t0.s8 >> 0); - k0.s9 = (t0.s8 << 7) | (t0.s9 >> 1); - k0.sa = (t0.s9 << 6) | (t0.sa >> 2); - k0.sb = (t0.sa << 5) | (t0.sb >> 3); - k1.s8 = (t0.sb << 4) | (t1.s8 >> 4); - k1.s9 = (t1.s8 << 3) | (t1.s9 >> 5); - k1.sa = (t1.s9 << 2) | (t1.sa >> 6); - k1.sb = (t1.sa << 1); - - k0.sc = (t0.sc >> 0); - k0.sd = (t0.sc << 7) | (t0.sd >> 1); - k0.se = (t0.sd << 6) | (t0.se >> 2); - k0.sf = (t0.se << 5) | (t0.sf >> 3); - k1.sc = (t0.sf << 4) | (t1.sc >> 4); - k1.sd = (t1.sc << 3) | (t1.sd >> 5); - k1.se = (t1.sd << 2) | (t1.se >> 6); - k1.sf = (t1.se << 1); - - out[0] = as_uint4 (k0); - out[1] = as_uint4 (k1); - #endif } static void m05500m (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) diff --git a/OpenCL/m06100_a0.cl b/OpenCL/m06100_a0.cl index 0be3a10..9424575 100644 --- a/OpenCL/m06100_a0.cl +++ b/OpenCL/m06100_a0.cl @@ -24,13 +24,7 @@ #define R 10 -#ifdef VECT_SIZE1 -#define BOX(S,n,i) u32 ((S)[(n)][(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(S,n,i) u32 ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) -#endif +#define BOX(S,n,i) (S)[(n)][(i)] __constant u32 Ch[8][256] = { diff --git a/OpenCL/m06100_a1.cl b/OpenCL/m06100_a1.cl index c62c4c2..1330ea8 100644 --- a/OpenCL/m06100_a1.cl +++ b/OpenCL/m06100_a1.cl @@ -22,13 +22,7 @@ #define R 10 -#ifdef VECT_SIZE1 -#define BOX(S,n,i) u32 ((S)[(n)][(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(S,n,i) u32 ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) -#endif +#define BOX(S,n,i) (S)[(n)][(i)] __constant u32 Ch[8][256] = { diff --git a/OpenCL/m06100_a3.cl b/OpenCL/m06100_a3.cl index 9794fe9..1ae654d 100644 --- a/OpenCL/m06100_a3.cl +++ b/OpenCL/m06100_a3.cl @@ -22,13 +22,7 @@ #define R 10 -#ifdef VECT_SIZE1 -#define BOX(S,n,i) u32 ((S)[(n)][(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(S,n,i) u32 ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) -#endif +#define BOX(S,n,i) (S)[(n)][(i)] __constant u32 Ch[8][256] = { diff --git a/OpenCL/m06231.cl b/OpenCL/m06231.cl index 5403f6a..e409e33 100644 --- a/OpenCL/m06231.cl +++ b/OpenCL/m06231.cl @@ -1089,9 +1089,7 @@ __constant u32 Cl[8][256] = }, }; -#ifdef VECT_SIZE1 -#define BOX(S,n,i) (u32) ((S)[(n)][(i)]) -#endif +#define BOX(S,n,i) (S)[(n)][(i)] static void whirlpool_transform (const u32 w[16], u32 dgst[16], __local u32 s_Ch[8][256], __local u32 s_Cl[8][256]) { diff --git a/OpenCL/m06232.cl b/OpenCL/m06232.cl index 0fbe937..ac96de6 100644 --- a/OpenCL/m06232.cl +++ b/OpenCL/m06232.cl @@ -1089,9 +1089,7 @@ __constant u32 Cl[8][256] = }, }; -#ifdef VECT_SIZE1 -#define BOX(S,n,i) (u32) ((S)[(n)][(i)]) -#endif +#define BOX(S,n,i) (S)[(n)][(i)] static void whirlpool_transform (const u32 w[16], u32 dgst[16], __local u32 s_Ch[8][256], __local u32 s_Cl[8][256]) { diff --git a/OpenCL/m06233.cl b/OpenCL/m06233.cl index 864dcda..a79446a 100644 --- a/OpenCL/m06233.cl +++ b/OpenCL/m06233.cl @@ -1089,9 +1089,7 @@ __constant u32 Cl[8][256] = }, }; -#ifdef VECT_SIZE1 -#define BOX(S,n,i) (u32) ((S)[(n)][(i)]) -#endif +#define BOX(S,n,i) (S)[(n)][(i)] static void whirlpool_transform (const u32 w[16], u32 dgst[16], __local u32 s_Ch[8][256], __local u32 s_Cl[8][256]) { diff --git a/OpenCL/m06500.cl b/OpenCL/m06500.cl index 9793c1d..2c556cb 100644 --- a/OpenCL/m06500.cl +++ b/OpenCL/m06500.cl @@ -333,14 +333,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06500_init (__gl u64 w2l[4]; u64 w3l[4]; - w0l[0] = (u64x) (w0[0]) << 32 | (u64x) (w0[1]); - w0l[1] = (u64x) (w0[2]) << 32 | (u64x) (w0[3]); - w0l[2] = (u64x) (w1[0]) << 32 | (u64x) (w1[1]); - w0l[3] = (u64x) (w1[2]) << 32 | (u64x) (w1[3]); - w1l[0] = (u64x) (w2[0]) << 32 | (u64x) (w2[1]); - w1l[1] = (u64x) (w2[2]) << 32 | (u64x) (w2[3]); - w1l[2] = (u64x) (w3[0]) << 32 | (u64x) (w3[1]); - w1l[3] = (u64x) (w3[2]) << 32 | (u64x) (w3[3]); + w0l[0] = (u64) (w0[0]) << 32 | (u64) (w0[1]); + w0l[1] = (u64) (w0[2]) << 32 | (u64) (w0[3]); + w0l[2] = (u64) (w1[0]) << 32 | (u64) (w1[1]); + w0l[3] = (u64) (w1[2]) << 32 | (u64) (w1[3]); + w1l[0] = (u64) (w2[0]) << 32 | (u64) (w2[1]); + w1l[1] = (u64) (w2[2]) << 32 | (u64) (w2[3]); + w1l[2] = (u64) (w3[0]) << 32 | (u64) (w3[1]); + w1l[3] = (u64) (w3[2]) << 32 | (u64) (w3[3]); w2l[0] = 0; w2l[1] = 0; w2l[2] = 0; @@ -373,14 +373,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06500_init (__gl tmps[gid].opad[6] = opad[6]; tmps[gid].opad[7] = opad[7]; - w0l[0] = (u64x) salt_buf0[1] << 32 | (u64x) salt_buf0[0]; - w0l[1] = (u64x) salt_buf0[3] << 32 | (u64x) salt_buf0[2]; - w0l[2] = (u64x) salt_buf1[1] << 32 | (u64x) salt_buf1[0]; - w0l[3] = (u64x) salt_buf1[3] << 32 | (u64x) salt_buf1[2]; - w1l[0] = (u64x) salt_buf2[1] << 32 | (u64x) salt_buf2[0]; - w1l[1] = (u64x) salt_buf2[3] << 32 | (u64x) salt_buf2[2]; - w1l[2] = (u64x) salt_buf3[1] << 32 | (u64x) salt_buf3[0]; - w1l[3] = (u64x) salt_buf3[3] << 32 | (u64x) salt_buf3[2]; + w0l[0] = (u64) salt_buf0[1] << 32 | (u64) salt_buf0[0]; + w0l[1] = (u64) salt_buf0[3] << 32 | (u64) salt_buf0[2]; + w0l[2] = (u64) salt_buf1[1] << 32 | (u64) salt_buf1[0]; + w0l[3] = (u64) salt_buf1[3] << 32 | (u64) salt_buf1[2]; + w1l[0] = (u64) salt_buf2[1] << 32 | (u64) salt_buf2[0]; + w1l[1] = (u64) salt_buf2[3] << 32 | (u64) salt_buf2[2]; + w1l[2] = (u64) salt_buf3[1] << 32 | (u64) salt_buf3[0]; + w1l[3] = (u64) salt_buf3[3] << 32 | (u64) salt_buf3[2]; w2l[0] = 0; w2l[1] = 0; w2l[2] = 0; diff --git a/OpenCL/m06600.cl b/OpenCL/m06600.cl index 6cd6562..9b46654 100644 --- a/OpenCL/m06600.cl +++ b/OpenCL/m06600.cl @@ -17,13 +17,8 @@ #include "types_ocl.c" #include "common.c" -#ifdef VECT_SIZE1 -#define COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define COMPARE_M "check_multi_vect2_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u32 te0[256] = { diff --git a/OpenCL/m06800.cl b/OpenCL/m06800.cl index e94c5c5..a14c1d6 100644 --- a/OpenCL/m06800.cl +++ b/OpenCL/m06800.cl @@ -17,13 +17,8 @@ #include "types_ocl.c" #include "common.c" -#ifdef VECT_SIZE1 -#define COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define COMPARE_M "check_multi_vect2_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u32 te0[256] = { diff --git a/OpenCL/m06900_a0.cl b/OpenCL/m06900_a0.cl index 1dd627d..2d9c16e 100644 --- a/OpenCL/m06900_a0.cl +++ b/OpenCL/m06900_a0.cl @@ -290,17 +290,11 @@ __constant u32 c_tables[4][256] = } }; -#ifdef VECT_SIZE1 -#define BOX(i,n,S) (u32x) ((S)[(n)][(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) -#endif +#define BOX(i,n,S) (S)[(n)][(i)] #define round(k1,k2,tbl) \ { \ - u32 t; \ + u32 t; \ t = (k1) + r; \ l ^= BOX (amd_bfe (t, 0, 8), 0, tbl) ^ \ BOX (amd_bfe (t, 8, 8), 1, tbl) ^ \ @@ -315,8 +309,8 @@ __constant u32 c_tables[4][256] = #define R(k,h,s,i,t) \ { \ - u32 r; \ - u32 l; \ + u32 r; \ + u32 l; \ r = h[i + 0]; \ l = h[i + 1]; \ round (k[0], k[1], t); \ diff --git a/OpenCL/m06900_a1.cl b/OpenCL/m06900_a1.cl index 6d252bb..26b0f23 100644 --- a/OpenCL/m06900_a1.cl +++ b/OpenCL/m06900_a1.cl @@ -288,17 +288,11 @@ __constant u32 c_tables[4][256] = } }; -#ifdef VECT_SIZE1 -#define BOX(i,n,S) (u32x) ((S)[(n)][(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) -#endif +#define BOX(i,n,S) (S)[(n)][(i)] #define round(k1,k2,tbl) \ { \ - u32 t; \ + u32 t; \ t = (k1) + r; \ l ^= BOX (amd_bfe (t, 0, 8), 0, tbl) ^ \ BOX (amd_bfe (t, 8, 8), 1, tbl) ^ \ @@ -313,8 +307,8 @@ __constant u32 c_tables[4][256] = #define R(k,h,s,i,t) \ { \ - u32 r; \ - u32 l; \ + u32 r; \ + u32 l; \ r = h[i + 0]; \ l = h[i + 1]; \ round (k[0], k[1], t); \ diff --git a/OpenCL/m06900_a3.cl b/OpenCL/m06900_a3.cl index 2d6bfbc..bf2bbe1 100644 --- a/OpenCL/m06900_a3.cl +++ b/OpenCL/m06900_a3.cl @@ -288,17 +288,11 @@ __constant u32 c_tables[4][256] = } }; -#ifdef VECT_SIZE1 -#define BOX(i,n,S) (u32x) ((S)[(n)][(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) -#endif +#define BOX(i,n,S) (S)[(n)][(i)] #define round(k1,k2,tbl) \ { \ - u32 t; \ + u32 t; \ t = (k1) + r; \ l ^= BOX (amd_bfe (t, 0, 8), 0, tbl) ^ \ BOX (amd_bfe (t, 8, 8), 1, tbl) ^ \ @@ -313,8 +307,8 @@ __constant u32 c_tables[4][256] = #define R(k,h,s,i,t) \ { \ - u32 r; \ - u32 l; \ + u32 r; \ + u32 l; \ r = h[i + 0]; \ l = h[i + 1]; \ round (k[0], k[1], t); \ diff --git a/OpenCL/m07500_a0.cl b/OpenCL/m07500_a0.cl index 6e20bab..4f9bf26 100644 --- a/OpenCL/m07500_a0.cl +++ b/OpenCL/m07500_a0.cl @@ -651,8 +651,6 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07500_m04 (__glo u32 tmp[4]; - #ifdef VECT_SIZE1 - tmp[0] = digest[0]; tmp[1] = digest[1]; tmp[2] = digest[2]; @@ -664,8 +662,6 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07500_m04 (__glo d_return_buf[lid] = 1; } - - #endif } } @@ -778,8 +774,6 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07500_s04 (__glo u32 tmp[4]; - #ifdef VECT_SIZE1 - tmp[0] = digest[0]; tmp[1] = digest[1]; tmp[2] = digest[2]; @@ -791,8 +785,6 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07500_s04 (__glo d_return_buf[lid] = 1; } - - #endif } } diff --git a/OpenCL/m07500_a1.cl b/OpenCL/m07500_a1.cl index 5bbd454..9de9fa2 100644 --- a/OpenCL/m07500_a1.cl +++ b/OpenCL/m07500_a1.cl @@ -703,8 +703,6 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07500_m04 (__glo u32 tmp[4]; - #ifdef VECT_SIZE1 - tmp[0] = digest[0]; tmp[1] = digest[1]; tmp[2] = digest[2]; @@ -716,8 +714,6 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07500_m04 (__glo d_return_buf[lid] = 1; } - - #endif } } @@ -884,8 +880,6 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07500_s04 (__glo u32 tmp[4]; - #ifdef VECT_SIZE1 - tmp[0] = digest[0]; tmp[1] = digest[1]; tmp[2] = digest[2]; @@ -897,8 +891,6 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07500_s04 (__glo d_return_buf[lid] = 1; } - - #endif } } diff --git a/OpenCL/m07500_a3.cl b/OpenCL/m07500_a3.cl index 9bdbfaf..f34c7a1 100644 --- a/OpenCL/m07500_a3.cl +++ b/OpenCL/m07500_a3.cl @@ -597,8 +597,6 @@ static void m07500 (__local RC4_KEY rc4_keys[64], u32 w0[4], u32 w1[4], u32 w2[4 u32 tmp[4]; - #ifdef VECT_SIZE1 - tmp[0] = digest[0]; tmp[1] = digest[1]; tmp[2] = digest[2]; @@ -610,8 +608,6 @@ static void m07500 (__local RC4_KEY rc4_keys[64], u32 w0[4], u32 w1[4], u32 w2[4 d_return_buf[lid] = 1; } - - #endif } } diff --git a/OpenCL/m07600_a0.cl b/OpenCL/m07600_a0.cl index e0592d3..1e23baf 100644 --- a/OpenCL/m07600_a0.cl +++ b/OpenCL/m07600_a0.cl @@ -22,17 +22,7 @@ #define COMPARE_S "check_single_comp4.c" #define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) -#endif __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07600_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { diff --git a/OpenCL/m07600_a1.cl b/OpenCL/m07600_a1.cl index 0de7114..2ceda7b 100644 --- a/OpenCL/m07600_a1.cl +++ b/OpenCL/m07600_a1.cl @@ -20,17 +20,7 @@ #define COMPARE_S "check_single_comp4.c" #define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) -#endif __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07600_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { diff --git a/OpenCL/m07600_a3.cl b/OpenCL/m07600_a3.cl index 9652cd4..a71184b 100644 --- a/OpenCL/m07600_a3.cl +++ b/OpenCL/m07600_a3.cl @@ -20,17 +20,7 @@ #define COMPARE_S "check_single_comp4.c" #define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) -#endif static void m07600m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { diff --git a/OpenCL/m07700_a0.cl b/OpenCL/m07700_a0.cl index 9d00474..b2b12ba 100644 --- a/OpenCL/m07700_a0.cl +++ b/OpenCL/m07700_a0.cl @@ -66,12 +66,10 @@ static u32 sapb_trans (const u32 in) { u32 out = 0; - #ifdef VECT_SIZE1 out |= (sapb_trans_tbl[(in >> 0) & 0xff]) << 0; out |= (sapb_trans_tbl[(in >> 8) & 0xff]) << 8; out |= (sapb_trans_tbl[(in >> 16) & 0xff]) << 16; out |= (sapb_trans_tbl[(in >> 24) & 0xff]) << 24; - #endif return out; } diff --git a/OpenCL/m07700_a1.cl b/OpenCL/m07700_a1.cl index 399409b..4b4f197 100644 --- a/OpenCL/m07700_a1.cl +++ b/OpenCL/m07700_a1.cl @@ -64,12 +64,10 @@ static u32 sapb_trans (const u32 in) { u32 out = 0; - #ifdef VECT_SIZE1 out |= (sapb_trans_tbl[(in >> 0) & 0xff]) << 0; out |= (sapb_trans_tbl[(in >> 8) & 0xff]) << 8; out |= (sapb_trans_tbl[(in >> 16) & 0xff]) << 16; out |= (sapb_trans_tbl[(in >> 24) & 0xff]) << 24; - #endif return out; } diff --git a/OpenCL/m07700_a3.cl b/OpenCL/m07700_a3.cl index 3dc8268..5f845f0 100644 --- a/OpenCL/m07700_a3.cl +++ b/OpenCL/m07700_a3.cl @@ -64,12 +64,10 @@ static u32 sapb_trans (const u32 in) { u32 out = 0; - #ifdef VECT_SIZE1 out |= (sapb_trans_tbl[(in >> 0) & 0xff]) << 0; out |= (sapb_trans_tbl[(in >> 8) & 0xff]) << 8; out |= (sapb_trans_tbl[(in >> 16) & 0xff]) << 16; out |= (sapb_trans_tbl[(in >> 24) & 0xff]) << 24; - #endif return out; } diff --git a/OpenCL/m07900.cl b/OpenCL/m07900.cl index c6812a7..bb2b995 100644 --- a/OpenCL/m07900.cl +++ b/OpenCL/m07900.cl @@ -17,9 +17,8 @@ #include "types_ocl.c" #include "common.c" -#ifdef VECT_SIZE1 -#define COMPARE_M "check_multi_vect1_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u64 k_sha512[80] = { diff --git a/OpenCL/m08200.cl b/OpenCL/m08200.cl index 9d3b843..0ca1a09 100644 --- a/OpenCL/m08200.cl +++ b/OpenCL/m08200.cl @@ -17,9 +17,8 @@ #include "types_ocl.c" #include "common.c" -#ifdef VECT_SIZE1 -#define COMPARE_M "check_multi_vect1_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u32 k_sha256[64] = { diff --git a/OpenCL/m08400_a0.cl b/OpenCL/m08400_a0.cl index 00d0971..31343f9 100644 --- a/OpenCL/m08400_a0.cl +++ b/OpenCL/m08400_a0.cl @@ -22,17 +22,7 @@ #define COMPARE_S "check_single_comp4.c" #define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE1 #define uint_to_hex_lower8_le(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8_le(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8_le(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) -#endif static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { diff --git a/OpenCL/m08400_a1.cl b/OpenCL/m08400_a1.cl index 36b86af..c5bef54 100644 --- a/OpenCL/m08400_a1.cl +++ b/OpenCL/m08400_a1.cl @@ -20,17 +20,7 @@ #define COMPARE_S "check_single_comp4.c" #define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE1 #define uint_to_hex_lower8_le(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8_le(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8_le(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) -#endif static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { diff --git a/OpenCL/m08400_a3.cl b/OpenCL/m08400_a3.cl index 59a2c72..07782d7 100644 --- a/OpenCL/m08400_a3.cl +++ b/OpenCL/m08400_a3.cl @@ -20,17 +20,7 @@ #define COMPARE_S "check_single_comp4.c" #define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE1 #define uint_to_hex_lower8_le(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8_le(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8_le(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) -#endif static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { diff --git a/OpenCL/m08500_a0.cl b/OpenCL/m08500_a0.cl index 02deea4..4e1d8ee 100644 --- a/OpenCL/m08500_a0.cl +++ b/OpenCL/m08500_a0.cl @@ -377,74 +377,14 @@ static void _des_crypt_encrypt (u32 iv[2], u32 data[2], u32 Kc[16], u32 Kd[16], u32 u = Kc[i] ^ r; u32 t = Kd[i] ^ rotl32 (r, 28u); - #ifdef VECT_SIZE1 - l ^= NBOX (((u >> 2) & 0x3f), 0, s_SPtrans) - | NBOX (((u >> 10) & 0x3f), 2, s_SPtrans) - | NBOX (((u >> 18) & 0x3f), 4, s_SPtrans) - | NBOX (((u >> 26) & 0x3f), 6, s_SPtrans) - | NBOX (((t >> 2) & 0x3f), 1, s_SPtrans) - | NBOX (((t >> 10) & 0x3f), 3, s_SPtrans) - | NBOX (((t >> 18) & 0x3f), 5, s_SPtrans) - | NBOX (((t >> 26) & 0x3f), 7, s_SPtrans); - #endif - - #ifdef VECT_SIZE2 - l.s0 ^= NBOX (((u.s0 >> 2) & 0x3f), 0, s_SPtrans) - | NBOX (((u.s0 >> 10) & 0x3f), 2, s_SPtrans) - | NBOX (((u.s0 >> 18) & 0x3f), 4, s_SPtrans) - | NBOX (((u.s0 >> 26) & 0x3f), 6, s_SPtrans) - | NBOX (((t.s0 >> 2) & 0x3f), 1, s_SPtrans) - | NBOX (((t.s0 >> 10) & 0x3f), 3, s_SPtrans) - | NBOX (((t.s0 >> 18) & 0x3f), 5, s_SPtrans) - | NBOX (((t.s0 >> 26) & 0x3f), 7, s_SPtrans); - - l.s1 ^= NBOX (((u.s1 >> 2) & 0x3f), 0, s_SPtrans) - | NBOX (((u.s1 >> 10) & 0x3f), 2, s_SPtrans) - | NBOX (((u.s1 >> 18) & 0x3f), 4, s_SPtrans) - | NBOX (((u.s1 >> 26) & 0x3f), 6, s_SPtrans) - | NBOX (((t.s1 >> 2) & 0x3f), 1, s_SPtrans) - | NBOX (((t.s1 >> 10) & 0x3f), 3, s_SPtrans) - | NBOX (((t.s1 >> 18) & 0x3f), 5, s_SPtrans) - | NBOX (((t.s1 >> 26) & 0x3f), 7, s_SPtrans); - #endif - - #ifdef VECT_SIZE4 - l.s0 ^= NBOX (((u.s0 >> 2) & 0x3f), 0, s_SPtrans) - | NBOX (((u.s0 >> 10) & 0x3f), 2, s_SPtrans) - | NBOX (((u.s0 >> 18) & 0x3f), 4, s_SPtrans) - | NBOX (((u.s0 >> 26) & 0x3f), 6, s_SPtrans) - | NBOX (((t.s0 >> 2) & 0x3f), 1, s_SPtrans) - | NBOX (((t.s0 >> 10) & 0x3f), 3, s_SPtrans) - | NBOX (((t.s0 >> 18) & 0x3f), 5, s_SPtrans) - | NBOX (((t.s0 >> 26) & 0x3f), 7, s_SPtrans); - - l.s1 ^= NBOX (((u.s1 >> 2) & 0x3f), 0, s_SPtrans) - | NBOX (((u.s1 >> 10) & 0x3f), 2, s_SPtrans) - | NBOX (((u.s1 >> 18) & 0x3f), 4, s_SPtrans) - | NBOX (((u.s1 >> 26) & 0x3f), 6, s_SPtrans) - | NBOX (((t.s1 >> 2) & 0x3f), 1, s_SPtrans) - | NBOX (((t.s1 >> 10) & 0x3f), 3, s_SPtrans) - | NBOX (((t.s1 >> 18) & 0x3f), 5, s_SPtrans) - | NBOX (((t.s1 >> 26) & 0x3f), 7, s_SPtrans); - - l.s2 ^= NBOX (((u.s2 >> 2) & 0x3f), 0, s_SPtrans) - | NBOX (((u.s2 >> 10) & 0x3f), 2, s_SPtrans) - | NBOX (((u.s2 >> 18) & 0x3f), 4, s_SPtrans) - | NBOX (((u.s2 >> 26) & 0x3f), 6, s_SPtrans) - | NBOX (((t.s2 >> 2) & 0x3f), 1, s_SPtrans) - | NBOX (((t.s2 >> 10) & 0x3f), 3, s_SPtrans) - | NBOX (((t.s2 >> 18) & 0x3f), 5, s_SPtrans) - | NBOX (((t.s2 >> 26) & 0x3f), 7, s_SPtrans); - - l.s3 ^= NBOX (((u.s3 >> 2) & 0x3f), 0, s_SPtrans) - | NBOX (((u.s3 >> 10) & 0x3f), 2, s_SPtrans) - | NBOX (((u.s3 >> 18) & 0x3f), 4, s_SPtrans) - | NBOX (((u.s3 >> 26) & 0x3f), 6, s_SPtrans) - | NBOX (((t.s3 >> 2) & 0x3f), 1, s_SPtrans) - | NBOX (((t.s3 >> 10) & 0x3f), 3, s_SPtrans) - | NBOX (((t.s3 >> 18) & 0x3f), 5, s_SPtrans) - | NBOX (((t.s3 >> 26) & 0x3f), 7, s_SPtrans); - #endif + l ^= NBOX (((u >> 2) & 0x3f), 0, s_SPtrans) + | NBOX (((u >> 10) & 0x3f), 2, s_SPtrans) + | NBOX (((u >> 18) & 0x3f), 4, s_SPtrans) + | NBOX (((u >> 26) & 0x3f), 6, s_SPtrans) + | NBOX (((t >> 2) & 0x3f), 1, s_SPtrans) + | NBOX (((t >> 10) & 0x3f), 3, s_SPtrans) + | NBOX (((t >> 18) & 0x3f), 5, s_SPtrans) + | NBOX (((t >> 26) & 0x3f), 7, s_SPtrans); tt = l; l = r; @@ -488,123 +428,21 @@ static void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u u32 s; u32 t; - #ifdef VECT_SIZE1 - s = NBOX ((( c >> 0) & 0x3f), 0, s_skb) - | NBOX ((((c >> 6) & 0x03) - | ((c >> 7) & 0x3c)), 1, s_skb) - | NBOX ((((c >> 13) & 0x0f) - | ((c >> 14) & 0x30)), 2, s_skb) - | NBOX ((((c >> 20) & 0x01) - | ((c >> 21) & 0x06) - | ((c >> 22) & 0x38)), 3, s_skb); - - t = NBOX ((( d >> 0) & 0x3f), 4, s_skb) - | NBOX ((((d >> 7) & 0x03) - | ((d >> 8) & 0x3c)), 5, s_skb) - | NBOX ((((d >> 15) & 0x3f)), 6, s_skb) - | NBOX ((((d >> 21) & 0x0f) - | ((d >> 22) & 0x30)), 7, s_skb); - #endif - - #ifdef VECT_SIZE2 - s.s0 = NBOX ((( c.s0 >> 0) & 0x3f), 0, s_skb) - | NBOX ((((c.s0 >> 6) & 0x03) - | ((c.s0 >> 7) & 0x3c)), 1, s_skb) - | NBOX ((((c.s0 >> 13) & 0x0f) - | ((c.s0 >> 14) & 0x30)), 2, s_skb) - | NBOX ((((c.s0 >> 20) & 0x01) - | ((c.s0 >> 21) & 0x06) - | ((c.s0 >> 22) & 0x38)), 3, s_skb); - - t.s0 = NBOX ((( d.s0 >> 0) & 0x3f), 4, s_skb) - | NBOX ((((d.s0 >> 7) & 0x03) - | ((d.s0 >> 8) & 0x3c)), 5, s_skb) - | NBOX ((((d.s0 >> 15) & 0x3f)), 6, s_skb) - | NBOX ((((d.s0 >> 21) & 0x0f) - | ((d.s0 >> 22) & 0x30)), 7, s_skb); - - s.s1 = NBOX ((( c.s1 >> 0) & 0x3f), 0, s_skb) - | NBOX ((((c.s1 >> 6) & 0x03) - | ((c.s1 >> 7) & 0x3c)), 1, s_skb) - | NBOX ((((c.s1 >> 13) & 0x0f) - | ((c.s1 >> 14) & 0x30)), 2, s_skb) - | NBOX ((((c.s1 >> 20) & 0x01) - | ((c.s1 >> 21) & 0x06) - | ((c.s1 >> 22) & 0x38)), 3, s_skb); - - t.s1 = NBOX ((( d.s1 >> 0) & 0x3f), 4, s_skb) - | NBOX ((((d.s1 >> 7) & 0x03) - | ((d.s1 >> 8) & 0x3c)), 5, s_skb) - | NBOX ((((d.s1 >> 15) & 0x3f)), 6, s_skb) - | NBOX ((((d.s1 >> 21) & 0x0f) - | ((d.s1 >> 22) & 0x30)), 7, s_skb); - #endif - - #ifdef VECT_SIZE4 - s.s0 = NBOX ((( c.s0 >> 0) & 0x3f), 0, s_skb) - | NBOX ((((c.s0 >> 6) & 0x03) - | ((c.s0 >> 7) & 0x3c)), 1, s_skb) - | NBOX ((((c.s0 >> 13) & 0x0f) - | ((c.s0 >> 14) & 0x30)), 2, s_skb) - | NBOX ((((c.s0 >> 20) & 0x01) - | ((c.s0 >> 21) & 0x06) - | ((c.s0 >> 22) & 0x38)), 3, s_skb); - - t.s0 = NBOX ((( d.s0 >> 0) & 0x3f), 4, s_skb) - | NBOX ((((d.s0 >> 7) & 0x03) - | ((d.s0 >> 8) & 0x3c)), 5, s_skb) - | NBOX ((((d.s0 >> 15) & 0x3f)), 6, s_skb) - | NBOX ((((d.s0 >> 21) & 0x0f) - | ((d.s0 >> 22) & 0x30)), 7, s_skb); - - s.s1 = NBOX ((( c.s1 >> 0) & 0x3f), 0, s_skb) - | NBOX ((((c.s1 >> 6) & 0x03) - | ((c.s1 >> 7) & 0x3c)), 1, s_skb) - | NBOX ((((c.s1 >> 13) & 0x0f) - | ((c.s1 >> 14) & 0x30)), 2, s_skb) - | NBOX ((((c.s1 >> 20) & 0x01) - | ((c.s1 >> 21) & 0x06) - | ((c.s1 >> 22) & 0x38)), 3, s_skb); - - t.s1 = NBOX ((( d.s1 >> 0) & 0x3f), 4, s_skb) - | NBOX ((((d.s1 >> 7) & 0x03) - | ((d.s1 >> 8) & 0x3c)), 5, s_skb) - | NBOX ((((d.s1 >> 15) & 0x3f)), 6, s_skb) - | NBOX ((((d.s1 >> 21) & 0x0f) - | ((d.s1 >> 22) & 0x30)), 7, s_skb); - - s.s2 = NBOX ((( c.s2 >> 0) & 0x3f), 0, s_skb) - | NBOX ((((c.s2 >> 6) & 0x03) - | ((c.s2 >> 7) & 0x3c)), 1, s_skb) - | NBOX ((((c.s2 >> 13) & 0x0f) - | ((c.s2 >> 14) & 0x30)), 2, s_skb) - | NBOX ((((c.s2 >> 20) & 0x01) - | ((c.s2 >> 21) & 0x06) - | ((c.s2 >> 22) & 0x38)), 3, s_skb); - - t.s2 = NBOX ((( d.s2 >> 0) & 0x3f), 4, s_skb) - | NBOX ((((d.s2 >> 7) & 0x03) - | ((d.s2 >> 8) & 0x3c)), 5, s_skb) - | NBOX ((((d.s2 >> 15) & 0x3f)), 6, s_skb) - | NBOX ((((d.s2 >> 21) & 0x0f) - | ((d.s2 >> 22) & 0x30)), 7, s_skb); - - s.s3 = NBOX ((( c.s3 >> 0) & 0x3f), 0, s_skb) - | NBOX ((((c.s3 >> 6) & 0x03) - | ((c.s3 >> 7) & 0x3c)), 1, s_skb) - | NBOX ((((c.s3 >> 13) & 0x0f) - | ((c.s3 >> 14) & 0x30)), 2, s_skb) - | NBOX ((((c.s3 >> 20) & 0x01) - | ((c.s3 >> 21) & 0x06) - | ((c.s3 >> 22) & 0x38)), 3, s_skb); - - t.s3 = NBOX ((( d.s3 >> 0) & 0x3f), 4, s_skb) - | NBOX ((((d.s3 >> 7) & 0x03) - | ((d.s3 >> 8) & 0x3c)), 5, s_skb) - | NBOX ((((d.s3 >> 15) & 0x3f)), 6, s_skb) - | NBOX ((((d.s3 >> 21) & 0x0f) - | ((d.s3 >> 22) & 0x30)), 7, s_skb); - #endif + s = NBOX ((( c >> 0) & 0x3f), 0, s_skb) + | NBOX ((((c >> 6) & 0x03) + | ((c >> 7) & 0x3c)), 1, s_skb) + | NBOX ((((c >> 13) & 0x0f) + | ((c >> 14) & 0x30)), 2, s_skb) + | NBOX ((((c >> 20) & 0x01) + | ((c >> 21) & 0x06) + | ((c >> 22) & 0x38)), 3, s_skb); + + t = NBOX ((( d >> 0) & 0x3f), 4, s_skb) + | NBOX ((((d >> 7) & 0x03) + | ((d >> 8) & 0x3c)), 5, s_skb) + | NBOX ((((d >> 15) & 0x3f)), 6, s_skb) + | NBOX ((((d >> 21) & 0x0f) + | ((d >> 22) & 0x30)), 7, s_skb); #if defined cl_amd_media_ops Kc[i] = amd_bytealign (t, s << 16, 2); @@ -621,83 +459,15 @@ static void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u static void transform_racf_key (const u32 w0, const u32 w1, u32 key[2]) { - #ifdef VECT_SIZE1 - - key[0] = (ascii_to_ebcdic_pc[(w0 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w0 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w0 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w0 >> 24) & 0xff]) << 24; - - key[1] = (ascii_to_ebcdic_pc[(w1 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w1 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w1 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w1 >> 24) & 0xff]) << 24; - #endif - - #ifdef VECT_SIZE2 - - key[0].s0 = (ascii_to_ebcdic_pc[(w0.s0 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w0.s0 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w0.s0 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w0.s0 >> 24) & 0xff]) << 24; - - key[0].s1 = (ascii_to_ebcdic_pc[(w0.s1 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w0.s1 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w0.s1 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w0.s1 >> 24) & 0xff]) << 24; - - key[1].s0 = (ascii_to_ebcdic_pc[(w1.s0 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w1.s0 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w1.s0 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w1.s0 >> 24) & 0xff]) << 24; - - key[1].s1 = (ascii_to_ebcdic_pc[(w1.s1 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w1.s1 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w1.s1 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w1.s1 >> 24) & 0xff]) << 24; - #endif - - #ifdef VECT_SIZE4 - key[0].s0 = (ascii_to_ebcdic_pc[(w0.s0 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w0.s0 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w0.s0 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w0.s0 >> 24) & 0xff]) << 24; - - key[0].s1 = (ascii_to_ebcdic_pc[(w0.s1 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w0.s1 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w0.s1 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w0.s1 >> 24) & 0xff]) << 24; - - key[0].s2 = (ascii_to_ebcdic_pc[(w0.s2 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w0.s2 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w0.s2 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w0.s2 >> 24) & 0xff]) << 24; - - key[0].s3 = (ascii_to_ebcdic_pc[(w0.s3 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w0.s3 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w0.s3 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w0.s3 >> 24) & 0xff]) << 24; - - key[1].s0 = (ascii_to_ebcdic_pc[(w1.s0 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w1.s0 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w1.s0 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w1.s0 >> 24) & 0xff]) << 24; - - key[1].s1 = (ascii_to_ebcdic_pc[(w1.s1 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w1.s1 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w1.s1 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w1.s1 >> 24) & 0xff]) << 24; - - key[1].s2 = (ascii_to_ebcdic_pc[(w1.s2 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w1.s2 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w1.s2 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w1.s2 >> 24) & 0xff]) << 24; - - key[1].s3 = (ascii_to_ebcdic_pc[(w1.s3 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w1.s3 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w1.s3 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w1.s3 >> 24) & 0xff]) << 24; - #endif + key[0] = (ascii_to_ebcdic_pc[(w0 >> 0) & 0xff]) << 0 + | (ascii_to_ebcdic_pc[(w0 >> 8) & 0xff]) << 8 + | (ascii_to_ebcdic_pc[(w0 >> 16) & 0xff]) << 16 + | (ascii_to_ebcdic_pc[(w0 >> 24) & 0xff]) << 24; + + key[1] = (ascii_to_ebcdic_pc[(w1 >> 0) & 0xff]) << 0 + | (ascii_to_ebcdic_pc[(w1 >> 8) & 0xff]) << 8 + | (ascii_to_ebcdic_pc[(w1 >> 16) & 0xff]) << 16 + | (ascii_to_ebcdic_pc[(w1 >> 24) & 0xff]) << 24; } __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08500_m04 (__global pw_t *pws, __global gpu_rule_t * rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) diff --git a/OpenCL/m08500_a1.cl b/OpenCL/m08500_a1.cl index 3f58df8..abb38d3 100644 --- a/OpenCL/m08500_a1.cl +++ b/OpenCL/m08500_a1.cl @@ -375,74 +375,14 @@ static void _des_crypt_encrypt (u32 iv[2], u32 data[2], u32 Kc[16], u32 Kd[16], u32 u = Kc[i] ^ r; u32 t = Kd[i] ^ rotl32 (r, 28u); - #ifdef VECT_SIZE1 - l ^= NBOX (((u >> 2) & 0x3f), 0, s_SPtrans) - | NBOX (((u >> 10) & 0x3f), 2, s_SPtrans) - | NBOX (((u >> 18) & 0x3f), 4, s_SPtrans) - | NBOX (((u >> 26) & 0x3f), 6, s_SPtrans) - | NBOX (((t >> 2) & 0x3f), 1, s_SPtrans) - | NBOX (((t >> 10) & 0x3f), 3, s_SPtrans) - | NBOX (((t >> 18) & 0x3f), 5, s_SPtrans) - | NBOX (((t >> 26) & 0x3f), 7, s_SPtrans); - #endif - - #ifdef VECT_SIZE2 - l.s0 ^= NBOX (((u.s0 >> 2) & 0x3f), 0, s_SPtrans) - | NBOX (((u.s0 >> 10) & 0x3f), 2, s_SPtrans) - | NBOX (((u.s0 >> 18) & 0x3f), 4, s_SPtrans) - | NBOX (((u.s0 >> 26) & 0x3f), 6, s_SPtrans) - | NBOX (((t.s0 >> 2) & 0x3f), 1, s_SPtrans) - | NBOX (((t.s0 >> 10) & 0x3f), 3, s_SPtrans) - | NBOX (((t.s0 >> 18) & 0x3f), 5, s_SPtrans) - | NBOX (((t.s0 >> 26) & 0x3f), 7, s_SPtrans); - - l.s1 ^= NBOX (((u.s1 >> 2) & 0x3f), 0, s_SPtrans) - | NBOX (((u.s1 >> 10) & 0x3f), 2, s_SPtrans) - | NBOX (((u.s1 >> 18) & 0x3f), 4, s_SPtrans) - | NBOX (((u.s1 >> 26) & 0x3f), 6, s_SPtrans) - | NBOX (((t.s1 >> 2) & 0x3f), 1, s_SPtrans) - | NBOX (((t.s1 >> 10) & 0x3f), 3, s_SPtrans) - | NBOX (((t.s1 >> 18) & 0x3f), 5, s_SPtrans) - | NBOX (((t.s1 >> 26) & 0x3f), 7, s_SPtrans); - #endif - - #ifdef VECT_SIZE4 - l.s0 ^= NBOX (((u.s0 >> 2) & 0x3f), 0, s_SPtrans) - | NBOX (((u.s0 >> 10) & 0x3f), 2, s_SPtrans) - | NBOX (((u.s0 >> 18) & 0x3f), 4, s_SPtrans) - | NBOX (((u.s0 >> 26) & 0x3f), 6, s_SPtrans) - | NBOX (((t.s0 >> 2) & 0x3f), 1, s_SPtrans) - | NBOX (((t.s0 >> 10) & 0x3f), 3, s_SPtrans) - | NBOX (((t.s0 >> 18) & 0x3f), 5, s_SPtrans) - | NBOX (((t.s0 >> 26) & 0x3f), 7, s_SPtrans); - - l.s1 ^= NBOX (((u.s1 >> 2) & 0x3f), 0, s_SPtrans) - | NBOX (((u.s1 >> 10) & 0x3f), 2, s_SPtrans) - | NBOX (((u.s1 >> 18) & 0x3f), 4, s_SPtrans) - | NBOX (((u.s1 >> 26) & 0x3f), 6, s_SPtrans) - | NBOX (((t.s1 >> 2) & 0x3f), 1, s_SPtrans) - | NBOX (((t.s1 >> 10) & 0x3f), 3, s_SPtrans) - | NBOX (((t.s1 >> 18) & 0x3f), 5, s_SPtrans) - | NBOX (((t.s1 >> 26) & 0x3f), 7, s_SPtrans); - - l.s2 ^= NBOX (((u.s2 >> 2) & 0x3f), 0, s_SPtrans) - | NBOX (((u.s2 >> 10) & 0x3f), 2, s_SPtrans) - | NBOX (((u.s2 >> 18) & 0x3f), 4, s_SPtrans) - | NBOX (((u.s2 >> 26) & 0x3f), 6, s_SPtrans) - | NBOX (((t.s2 >> 2) & 0x3f), 1, s_SPtrans) - | NBOX (((t.s2 >> 10) & 0x3f), 3, s_SPtrans) - | NBOX (((t.s2 >> 18) & 0x3f), 5, s_SPtrans) - | NBOX (((t.s2 >> 26) & 0x3f), 7, s_SPtrans); - - l.s3 ^= NBOX (((u.s3 >> 2) & 0x3f), 0, s_SPtrans) - | NBOX (((u.s3 >> 10) & 0x3f), 2, s_SPtrans) - | NBOX (((u.s3 >> 18) & 0x3f), 4, s_SPtrans) - | NBOX (((u.s3 >> 26) & 0x3f), 6, s_SPtrans) - | NBOX (((t.s3 >> 2) & 0x3f), 1, s_SPtrans) - | NBOX (((t.s3 >> 10) & 0x3f), 3, s_SPtrans) - | NBOX (((t.s3 >> 18) & 0x3f), 5, s_SPtrans) - | NBOX (((t.s3 >> 26) & 0x3f), 7, s_SPtrans); - #endif + l ^= NBOX (((u >> 2) & 0x3f), 0, s_SPtrans) + | NBOX (((u >> 10) & 0x3f), 2, s_SPtrans) + | NBOX (((u >> 18) & 0x3f), 4, s_SPtrans) + | NBOX (((u >> 26) & 0x3f), 6, s_SPtrans) + | NBOX (((t >> 2) & 0x3f), 1, s_SPtrans) + | NBOX (((t >> 10) & 0x3f), 3, s_SPtrans) + | NBOX (((t >> 18) & 0x3f), 5, s_SPtrans) + | NBOX (((t >> 26) & 0x3f), 7, s_SPtrans); tt = l; l = r; @@ -486,123 +426,21 @@ static void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u u32 s; u32 t; - #ifdef VECT_SIZE1 - s = NBOX ((( c >> 0) & 0x3f), 0, s_skb) - | NBOX ((((c >> 6) & 0x03) - | ((c >> 7) & 0x3c)), 1, s_skb) - | NBOX ((((c >> 13) & 0x0f) - | ((c >> 14) & 0x30)), 2, s_skb) - | NBOX ((((c >> 20) & 0x01) - | ((c >> 21) & 0x06) - | ((c >> 22) & 0x38)), 3, s_skb); - - t = NBOX ((( d >> 0) & 0x3f), 4, s_skb) - | NBOX ((((d >> 7) & 0x03) - | ((d >> 8) & 0x3c)), 5, s_skb) - | NBOX ((((d >> 15) & 0x3f)), 6, s_skb) - | NBOX ((((d >> 21) & 0x0f) - | ((d >> 22) & 0x30)), 7, s_skb); - #endif - - #ifdef VECT_SIZE2 - s.s0 = NBOX ((( c.s0 >> 0) & 0x3f), 0, s_skb) - | NBOX ((((c.s0 >> 6) & 0x03) - | ((c.s0 >> 7) & 0x3c)), 1, s_skb) - | NBOX ((((c.s0 >> 13) & 0x0f) - | ((c.s0 >> 14) & 0x30)), 2, s_skb) - | NBOX ((((c.s0 >> 20) & 0x01) - | ((c.s0 >> 21) & 0x06) - | ((c.s0 >> 22) & 0x38)), 3, s_skb); - - t.s0 = NBOX ((( d.s0 >> 0) & 0x3f), 4, s_skb) - | NBOX ((((d.s0 >> 7) & 0x03) - | ((d.s0 >> 8) & 0x3c)), 5, s_skb) - | NBOX ((((d.s0 >> 15) & 0x3f)), 6, s_skb) - | NBOX ((((d.s0 >> 21) & 0x0f) - | ((d.s0 >> 22) & 0x30)), 7, s_skb); - - s.s1 = NBOX ((( c.s1 >> 0) & 0x3f), 0, s_skb) - | NBOX ((((c.s1 >> 6) & 0x03) - | ((c.s1 >> 7) & 0x3c)), 1, s_skb) - | NBOX ((((c.s1 >> 13) & 0x0f) - | ((c.s1 >> 14) & 0x30)), 2, s_skb) - | NBOX ((((c.s1 >> 20) & 0x01) - | ((c.s1 >> 21) & 0x06) - | ((c.s1 >> 22) & 0x38)), 3, s_skb); - - t.s1 = NBOX ((( d.s1 >> 0) & 0x3f), 4, s_skb) - | NBOX ((((d.s1 >> 7) & 0x03) - | ((d.s1 >> 8) & 0x3c)), 5, s_skb) - | NBOX ((((d.s1 >> 15) & 0x3f)), 6, s_skb) - | NBOX ((((d.s1 >> 21) & 0x0f) - | ((d.s1 >> 22) & 0x30)), 7, s_skb); - #endif - - #ifdef VECT_SIZE4 - s.s0 = NBOX ((( c.s0 >> 0) & 0x3f), 0, s_skb) - | NBOX ((((c.s0 >> 6) & 0x03) - | ((c.s0 >> 7) & 0x3c)), 1, s_skb) - | NBOX ((((c.s0 >> 13) & 0x0f) - | ((c.s0 >> 14) & 0x30)), 2, s_skb) - | NBOX ((((c.s0 >> 20) & 0x01) - | ((c.s0 >> 21) & 0x06) - | ((c.s0 >> 22) & 0x38)), 3, s_skb); - - t.s0 = NBOX ((( d.s0 >> 0) & 0x3f), 4, s_skb) - | NBOX ((((d.s0 >> 7) & 0x03) - | ((d.s0 >> 8) & 0x3c)), 5, s_skb) - | NBOX ((((d.s0 >> 15) & 0x3f)), 6, s_skb) - | NBOX ((((d.s0 >> 21) & 0x0f) - | ((d.s0 >> 22) & 0x30)), 7, s_skb); - - s.s1 = NBOX ((( c.s1 >> 0) & 0x3f), 0, s_skb) - | NBOX ((((c.s1 >> 6) & 0x03) - | ((c.s1 >> 7) & 0x3c)), 1, s_skb) - | NBOX ((((c.s1 >> 13) & 0x0f) - | ((c.s1 >> 14) & 0x30)), 2, s_skb) - | NBOX ((((c.s1 >> 20) & 0x01) - | ((c.s1 >> 21) & 0x06) - | ((c.s1 >> 22) & 0x38)), 3, s_skb); - - t.s1 = NBOX ((( d.s1 >> 0) & 0x3f), 4, s_skb) - | NBOX ((((d.s1 >> 7) & 0x03) - | ((d.s1 >> 8) & 0x3c)), 5, s_skb) - | NBOX ((((d.s1 >> 15) & 0x3f)), 6, s_skb) - | NBOX ((((d.s1 >> 21) & 0x0f) - | ((d.s1 >> 22) & 0x30)), 7, s_skb); - - s.s2 = NBOX ((( c.s2 >> 0) & 0x3f), 0, s_skb) - | NBOX ((((c.s2 >> 6) & 0x03) - | ((c.s2 >> 7) & 0x3c)), 1, s_skb) - | NBOX ((((c.s2 >> 13) & 0x0f) - | ((c.s2 >> 14) & 0x30)), 2, s_skb) - | NBOX ((((c.s2 >> 20) & 0x01) - | ((c.s2 >> 21) & 0x06) - | ((c.s2 >> 22) & 0x38)), 3, s_skb); - - t.s2 = NBOX ((( d.s2 >> 0) & 0x3f), 4, s_skb) - | NBOX ((((d.s2 >> 7) & 0x03) - | ((d.s2 >> 8) & 0x3c)), 5, s_skb) - | NBOX ((((d.s2 >> 15) & 0x3f)), 6, s_skb) - | NBOX ((((d.s2 >> 21) & 0x0f) - | ((d.s2 >> 22) & 0x30)), 7, s_skb); - - s.s3 = NBOX ((( c.s3 >> 0) & 0x3f), 0, s_skb) - | NBOX ((((c.s3 >> 6) & 0x03) - | ((c.s3 >> 7) & 0x3c)), 1, s_skb) - | NBOX ((((c.s3 >> 13) & 0x0f) - | ((c.s3 >> 14) & 0x30)), 2, s_skb) - | NBOX ((((c.s3 >> 20) & 0x01) - | ((c.s3 >> 21) & 0x06) - | ((c.s3 >> 22) & 0x38)), 3, s_skb); - - t.s3 = NBOX ((( d.s3 >> 0) & 0x3f), 4, s_skb) - | NBOX ((((d.s3 >> 7) & 0x03) - | ((d.s3 >> 8) & 0x3c)), 5, s_skb) - | NBOX ((((d.s3 >> 15) & 0x3f)), 6, s_skb) - | NBOX ((((d.s3 >> 21) & 0x0f) - | ((d.s3 >> 22) & 0x30)), 7, s_skb); - #endif + s = NBOX ((( c >> 0) & 0x3f), 0, s_skb) + | NBOX ((((c >> 6) & 0x03) + | ((c >> 7) & 0x3c)), 1, s_skb) + | NBOX ((((c >> 13) & 0x0f) + | ((c >> 14) & 0x30)), 2, s_skb) + | NBOX ((((c >> 20) & 0x01) + | ((c >> 21) & 0x06) + | ((c >> 22) & 0x38)), 3, s_skb); + + t = NBOX ((( d >> 0) & 0x3f), 4, s_skb) + | NBOX ((((d >> 7) & 0x03) + | ((d >> 8) & 0x3c)), 5, s_skb) + | NBOX ((((d >> 15) & 0x3f)), 6, s_skb) + | NBOX ((((d >> 21) & 0x0f) + | ((d >> 22) & 0x30)), 7, s_skb); #if defined cl_amd_media_ops Kc[i] = amd_bytealign (t, s << 16, 2); @@ -619,83 +457,15 @@ static void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u static void transform_racf_key (const u32 w0, const u32 w1, u32 key[2]) { - #ifdef VECT_SIZE1 - - key[0] = (ascii_to_ebcdic_pc[(w0 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w0 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w0 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w0 >> 24) & 0xff]) << 24; - - key[1] = (ascii_to_ebcdic_pc[(w1 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w1 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w1 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w1 >> 24) & 0xff]) << 24; - #endif - - #ifdef VECT_SIZE2 - - key[0].s0 = (ascii_to_ebcdic_pc[(w0.s0 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w0.s0 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w0.s0 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w0.s0 >> 24) & 0xff]) << 24; - - key[0].s1 = (ascii_to_ebcdic_pc[(w0.s1 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w0.s1 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w0.s1 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w0.s1 >> 24) & 0xff]) << 24; - - key[1].s0 = (ascii_to_ebcdic_pc[(w1.s0 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w1.s0 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w1.s0 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w1.s0 >> 24) & 0xff]) << 24; - - key[1].s1 = (ascii_to_ebcdic_pc[(w1.s1 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w1.s1 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w1.s1 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w1.s1 >> 24) & 0xff]) << 24; - #endif - - #ifdef VECT_SIZE4 - key[0].s0 = (ascii_to_ebcdic_pc[(w0.s0 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w0.s0 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w0.s0 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w0.s0 >> 24) & 0xff]) << 24; - - key[0].s1 = (ascii_to_ebcdic_pc[(w0.s1 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w0.s1 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w0.s1 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w0.s1 >> 24) & 0xff]) << 24; - - key[0].s2 = (ascii_to_ebcdic_pc[(w0.s2 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w0.s2 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w0.s2 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w0.s2 >> 24) & 0xff]) << 24; - - key[0].s3 = (ascii_to_ebcdic_pc[(w0.s3 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w0.s3 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w0.s3 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w0.s3 >> 24) & 0xff]) << 24; - - key[1].s0 = (ascii_to_ebcdic_pc[(w1.s0 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w1.s0 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w1.s0 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w1.s0 >> 24) & 0xff]) << 24; - - key[1].s1 = (ascii_to_ebcdic_pc[(w1.s1 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w1.s1 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w1.s1 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w1.s1 >> 24) & 0xff]) << 24; - - key[1].s2 = (ascii_to_ebcdic_pc[(w1.s2 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w1.s2 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w1.s2 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w1.s2 >> 24) & 0xff]) << 24; - - key[1].s3 = (ascii_to_ebcdic_pc[(w1.s3 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w1.s3 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w1.s3 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w1.s3 >> 24) & 0xff]) << 24; - #endif + key[0] = (ascii_to_ebcdic_pc[(w0 >> 0) & 0xff]) << 0 + | (ascii_to_ebcdic_pc[(w0 >> 8) & 0xff]) << 8 + | (ascii_to_ebcdic_pc[(w0 >> 16) & 0xff]) << 16 + | (ascii_to_ebcdic_pc[(w0 >> 24) & 0xff]) << 24; + + key[1] = (ascii_to_ebcdic_pc[(w1 >> 0) & 0xff]) << 0 + | (ascii_to_ebcdic_pc[(w1 >> 8) & 0xff]) << 8 + | (ascii_to_ebcdic_pc[(w1 >> 16) & 0xff]) << 16 + | (ascii_to_ebcdic_pc[(w1 >> 24) & 0xff]) << 24; } __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08500_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) diff --git a/OpenCL/m08500_a3.cl b/OpenCL/m08500_a3.cl index e9bb6dc..6cccf08 100644 --- a/OpenCL/m08500_a3.cl +++ b/OpenCL/m08500_a3.cl @@ -375,74 +375,14 @@ static void _des_crypt_encrypt (u32 iv[2], u32 data[2], u32 Kc[16], u32 Kd[16], u32 u = Kc[i] ^ r; u32 t = Kd[i] ^ rotl32 (r, 28u); - #ifdef VECT_SIZE1 - l ^= NBOX (((u >> 2) & 0x3f), 0, s_SPtrans) - | NBOX (((u >> 10) & 0x3f), 2, s_SPtrans) - | NBOX (((u >> 18) & 0x3f), 4, s_SPtrans) - | NBOX (((u >> 26) & 0x3f), 6, s_SPtrans) - | NBOX (((t >> 2) & 0x3f), 1, s_SPtrans) - | NBOX (((t >> 10) & 0x3f), 3, s_SPtrans) - | NBOX (((t >> 18) & 0x3f), 5, s_SPtrans) - | NBOX (((t >> 26) & 0x3f), 7, s_SPtrans); - #endif - - #ifdef VECT_SIZE2 - l.s0 ^= NBOX (((u.s0 >> 2) & 0x3f), 0, s_SPtrans) - | NBOX (((u.s0 >> 10) & 0x3f), 2, s_SPtrans) - | NBOX (((u.s0 >> 18) & 0x3f), 4, s_SPtrans) - | NBOX (((u.s0 >> 26) & 0x3f), 6, s_SPtrans) - | NBOX (((t.s0 >> 2) & 0x3f), 1, s_SPtrans) - | NBOX (((t.s0 >> 10) & 0x3f), 3, s_SPtrans) - | NBOX (((t.s0 >> 18) & 0x3f), 5, s_SPtrans) - | NBOX (((t.s0 >> 26) & 0x3f), 7, s_SPtrans); - - l.s1 ^= NBOX (((u.s1 >> 2) & 0x3f), 0, s_SPtrans) - | NBOX (((u.s1 >> 10) & 0x3f), 2, s_SPtrans) - | NBOX (((u.s1 >> 18) & 0x3f), 4, s_SPtrans) - | NBOX (((u.s1 >> 26) & 0x3f), 6, s_SPtrans) - | NBOX (((t.s1 >> 2) & 0x3f), 1, s_SPtrans) - | NBOX (((t.s1 >> 10) & 0x3f), 3, s_SPtrans) - | NBOX (((t.s1 >> 18) & 0x3f), 5, s_SPtrans) - | NBOX (((t.s1 >> 26) & 0x3f), 7, s_SPtrans); - #endif - - #ifdef VECT_SIZE4 - l.s0 ^= NBOX (((u.s0 >> 2) & 0x3f), 0, s_SPtrans) - | NBOX (((u.s0 >> 10) & 0x3f), 2, s_SPtrans) - | NBOX (((u.s0 >> 18) & 0x3f), 4, s_SPtrans) - | NBOX (((u.s0 >> 26) & 0x3f), 6, s_SPtrans) - | NBOX (((t.s0 >> 2) & 0x3f), 1, s_SPtrans) - | NBOX (((t.s0 >> 10) & 0x3f), 3, s_SPtrans) - | NBOX (((t.s0 >> 18) & 0x3f), 5, s_SPtrans) - | NBOX (((t.s0 >> 26) & 0x3f), 7, s_SPtrans); - - l.s1 ^= NBOX (((u.s1 >> 2) & 0x3f), 0, s_SPtrans) - | NBOX (((u.s1 >> 10) & 0x3f), 2, s_SPtrans) - | NBOX (((u.s1 >> 18) & 0x3f), 4, s_SPtrans) - | NBOX (((u.s1 >> 26) & 0x3f), 6, s_SPtrans) - | NBOX (((t.s1 >> 2) & 0x3f), 1, s_SPtrans) - | NBOX (((t.s1 >> 10) & 0x3f), 3, s_SPtrans) - | NBOX (((t.s1 >> 18) & 0x3f), 5, s_SPtrans) - | NBOX (((t.s1 >> 26) & 0x3f), 7, s_SPtrans); - - l.s2 ^= NBOX (((u.s2 >> 2) & 0x3f), 0, s_SPtrans) - | NBOX (((u.s2 >> 10) & 0x3f), 2, s_SPtrans) - | NBOX (((u.s2 >> 18) & 0x3f), 4, s_SPtrans) - | NBOX (((u.s2 >> 26) & 0x3f), 6, s_SPtrans) - | NBOX (((t.s2 >> 2) & 0x3f), 1, s_SPtrans) - | NBOX (((t.s2 >> 10) & 0x3f), 3, s_SPtrans) - | NBOX (((t.s2 >> 18) & 0x3f), 5, s_SPtrans) - | NBOX (((t.s2 >> 26) & 0x3f), 7, s_SPtrans); - - l.s3 ^= NBOX (((u.s3 >> 2) & 0x3f), 0, s_SPtrans) - | NBOX (((u.s3 >> 10) & 0x3f), 2, s_SPtrans) - | NBOX (((u.s3 >> 18) & 0x3f), 4, s_SPtrans) - | NBOX (((u.s3 >> 26) & 0x3f), 6, s_SPtrans) - | NBOX (((t.s3 >> 2) & 0x3f), 1, s_SPtrans) - | NBOX (((t.s3 >> 10) & 0x3f), 3, s_SPtrans) - | NBOX (((t.s3 >> 18) & 0x3f), 5, s_SPtrans) - | NBOX (((t.s3 >> 26) & 0x3f), 7, s_SPtrans); - #endif + l ^= NBOX (((u >> 2) & 0x3f), 0, s_SPtrans) + | NBOX (((u >> 10) & 0x3f), 2, s_SPtrans) + | NBOX (((u >> 18) & 0x3f), 4, s_SPtrans) + | NBOX (((u >> 26) & 0x3f), 6, s_SPtrans) + | NBOX (((t >> 2) & 0x3f), 1, s_SPtrans) + | NBOX (((t >> 10) & 0x3f), 3, s_SPtrans) + | NBOX (((t >> 18) & 0x3f), 5, s_SPtrans) + | NBOX (((t >> 26) & 0x3f), 7, s_SPtrans); tt = l; l = r; @@ -486,123 +426,21 @@ static void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u u32 s; u32 t; - #ifdef VECT_SIZE1 - s = NBOX ((( c >> 0) & 0x3f), 0, s_skb) - | NBOX ((((c >> 6) & 0x03) - | ((c >> 7) & 0x3c)), 1, s_skb) - | NBOX ((((c >> 13) & 0x0f) - | ((c >> 14) & 0x30)), 2, s_skb) - | NBOX ((((c >> 20) & 0x01) - | ((c >> 21) & 0x06) - | ((c >> 22) & 0x38)), 3, s_skb); - - t = NBOX ((( d >> 0) & 0x3f), 4, s_skb) - | NBOX ((((d >> 7) & 0x03) - | ((d >> 8) & 0x3c)), 5, s_skb) - | NBOX ((((d >> 15) & 0x3f)), 6, s_skb) - | NBOX ((((d >> 21) & 0x0f) - | ((d >> 22) & 0x30)), 7, s_skb); - #endif - - #ifdef VECT_SIZE2 - s.s0 = NBOX ((( c.s0 >> 0) & 0x3f), 0, s_skb) - | NBOX ((((c.s0 >> 6) & 0x03) - | ((c.s0 >> 7) & 0x3c)), 1, s_skb) - | NBOX ((((c.s0 >> 13) & 0x0f) - | ((c.s0 >> 14) & 0x30)), 2, s_skb) - | NBOX ((((c.s0 >> 20) & 0x01) - | ((c.s0 >> 21) & 0x06) - | ((c.s0 >> 22) & 0x38)), 3, s_skb); - - t.s0 = NBOX ((( d.s0 >> 0) & 0x3f), 4, s_skb) - | NBOX ((((d.s0 >> 7) & 0x03) - | ((d.s0 >> 8) & 0x3c)), 5, s_skb) - | NBOX ((((d.s0 >> 15) & 0x3f)), 6, s_skb) - | NBOX ((((d.s0 >> 21) & 0x0f) - | ((d.s0 >> 22) & 0x30)), 7, s_skb); - - s.s1 = NBOX ((( c.s1 >> 0) & 0x3f), 0, s_skb) - | NBOX ((((c.s1 >> 6) & 0x03) - | ((c.s1 >> 7) & 0x3c)), 1, s_skb) - | NBOX ((((c.s1 >> 13) & 0x0f) - | ((c.s1 >> 14) & 0x30)), 2, s_skb) - | NBOX ((((c.s1 >> 20) & 0x01) - | ((c.s1 >> 21) & 0x06) - | ((c.s1 >> 22) & 0x38)), 3, s_skb); - - t.s1 = NBOX ((( d.s1 >> 0) & 0x3f), 4, s_skb) - | NBOX ((((d.s1 >> 7) & 0x03) - | ((d.s1 >> 8) & 0x3c)), 5, s_skb) - | NBOX ((((d.s1 >> 15) & 0x3f)), 6, s_skb) - | NBOX ((((d.s1 >> 21) & 0x0f) - | ((d.s1 >> 22) & 0x30)), 7, s_skb); - #endif - - #ifdef VECT_SIZE4 - s.s0 = NBOX ((( c.s0 >> 0) & 0x3f), 0, s_skb) - | NBOX ((((c.s0 >> 6) & 0x03) - | ((c.s0 >> 7) & 0x3c)), 1, s_skb) - | NBOX ((((c.s0 >> 13) & 0x0f) - | ((c.s0 >> 14) & 0x30)), 2, s_skb) - | NBOX ((((c.s0 >> 20) & 0x01) - | ((c.s0 >> 21) & 0x06) - | ((c.s0 >> 22) & 0x38)), 3, s_skb); - - t.s0 = NBOX ((( d.s0 >> 0) & 0x3f), 4, s_skb) - | NBOX ((((d.s0 >> 7) & 0x03) - | ((d.s0 >> 8) & 0x3c)), 5, s_skb) - | NBOX ((((d.s0 >> 15) & 0x3f)), 6, s_skb) - | NBOX ((((d.s0 >> 21) & 0x0f) - | ((d.s0 >> 22) & 0x30)), 7, s_skb); - - s.s1 = NBOX ((( c.s1 >> 0) & 0x3f), 0, s_skb) - | NBOX ((((c.s1 >> 6) & 0x03) - | ((c.s1 >> 7) & 0x3c)), 1, s_skb) - | NBOX ((((c.s1 >> 13) & 0x0f) - | ((c.s1 >> 14) & 0x30)), 2, s_skb) - | NBOX ((((c.s1 >> 20) & 0x01) - | ((c.s1 >> 21) & 0x06) - | ((c.s1 >> 22) & 0x38)), 3, s_skb); - - t.s1 = NBOX ((( d.s1 >> 0) & 0x3f), 4, s_skb) - | NBOX ((((d.s1 >> 7) & 0x03) - | ((d.s1 >> 8) & 0x3c)), 5, s_skb) - | NBOX ((((d.s1 >> 15) & 0x3f)), 6, s_skb) - | NBOX ((((d.s1 >> 21) & 0x0f) - | ((d.s1 >> 22) & 0x30)), 7, s_skb); - - s.s2 = NBOX ((( c.s2 >> 0) & 0x3f), 0, s_skb) - | NBOX ((((c.s2 >> 6) & 0x03) - | ((c.s2 >> 7) & 0x3c)), 1, s_skb) - | NBOX ((((c.s2 >> 13) & 0x0f) - | ((c.s2 >> 14) & 0x30)), 2, s_skb) - | NBOX ((((c.s2 >> 20) & 0x01) - | ((c.s2 >> 21) & 0x06) - | ((c.s2 >> 22) & 0x38)), 3, s_skb); - - t.s2 = NBOX ((( d.s2 >> 0) & 0x3f), 4, s_skb) - | NBOX ((((d.s2 >> 7) & 0x03) - | ((d.s2 >> 8) & 0x3c)), 5, s_skb) - | NBOX ((((d.s2 >> 15) & 0x3f)), 6, s_skb) - | NBOX ((((d.s2 >> 21) & 0x0f) - | ((d.s2 >> 22) & 0x30)), 7, s_skb); - - s.s3 = NBOX ((( c.s3 >> 0) & 0x3f), 0, s_skb) - | NBOX ((((c.s3 >> 6) & 0x03) - | ((c.s3 >> 7) & 0x3c)), 1, s_skb) - | NBOX ((((c.s3 >> 13) & 0x0f) - | ((c.s3 >> 14) & 0x30)), 2, s_skb) - | NBOX ((((c.s3 >> 20) & 0x01) - | ((c.s3 >> 21) & 0x06) - | ((c.s3 >> 22) & 0x38)), 3, s_skb); - - t.s3 = NBOX ((( d.s3 >> 0) & 0x3f), 4, s_skb) - | NBOX ((((d.s3 >> 7) & 0x03) - | ((d.s3 >> 8) & 0x3c)), 5, s_skb) - | NBOX ((((d.s3 >> 15) & 0x3f)), 6, s_skb) - | NBOX ((((d.s3 >> 21) & 0x0f) - | ((d.s3 >> 22) & 0x30)), 7, s_skb); - #endif + s = NBOX ((( c >> 0) & 0x3f), 0, s_skb) + | NBOX ((((c >> 6) & 0x03) + | ((c >> 7) & 0x3c)), 1, s_skb) + | NBOX ((((c >> 13) & 0x0f) + | ((c >> 14) & 0x30)), 2, s_skb) + | NBOX ((((c >> 20) & 0x01) + | ((c >> 21) & 0x06) + | ((c >> 22) & 0x38)), 3, s_skb); + + t = NBOX ((( d >> 0) & 0x3f), 4, s_skb) + | NBOX ((((d >> 7) & 0x03) + | ((d >> 8) & 0x3c)), 5, s_skb) + | NBOX ((((d >> 15) & 0x3f)), 6, s_skb) + | NBOX ((((d >> 21) & 0x0f) + | ((d >> 22) & 0x30)), 7, s_skb); #if defined cl_amd_media_ops Kc[i] = amd_bytealign (t, s << 16, 2); @@ -619,83 +457,15 @@ static void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u static void transform_racf_key (const u32 w0, const u32 w1, u32 key[2]) { - #ifdef VECT_SIZE1 - - key[0] = (ascii_to_ebcdic_pc[(w0 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w0 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w0 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w0 >> 24) & 0xff]) << 24; - - key[1] = (ascii_to_ebcdic_pc[(w1 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w1 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w1 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w1 >> 24) & 0xff]) << 24; - #endif - - #ifdef VECT_SIZE2 - - key[0].s0 = (ascii_to_ebcdic_pc[(w0.s0 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w0.s0 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w0.s0 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w0.s0 >> 24) & 0xff]) << 24; - - key[0].s1 = (ascii_to_ebcdic_pc[(w0.s1 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w0.s1 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w0.s1 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w0.s1 >> 24) & 0xff]) << 24; - - key[1].s0 = (ascii_to_ebcdic_pc[(w1.s0 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w1.s0 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w1.s0 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w1.s0 >> 24) & 0xff]) << 24; - - key[1].s1 = (ascii_to_ebcdic_pc[(w1.s1 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w1.s1 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w1.s1 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w1.s1 >> 24) & 0xff]) << 24; - #endif - - #ifdef VECT_SIZE4 - key[0].s0 = (ascii_to_ebcdic_pc[(w0.s0 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w0.s0 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w0.s0 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w0.s0 >> 24) & 0xff]) << 24; - - key[0].s1 = (ascii_to_ebcdic_pc[(w0.s1 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w0.s1 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w0.s1 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w0.s1 >> 24) & 0xff]) << 24; - - key[0].s2 = (ascii_to_ebcdic_pc[(w0.s2 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w0.s2 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w0.s2 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w0.s2 >> 24) & 0xff]) << 24; - - key[0].s3 = (ascii_to_ebcdic_pc[(w0.s3 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w0.s3 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w0.s3 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w0.s3 >> 24) & 0xff]) << 24; - - key[1].s0 = (ascii_to_ebcdic_pc[(w1.s0 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w1.s0 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w1.s0 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w1.s0 >> 24) & 0xff]) << 24; - - key[1].s1 = (ascii_to_ebcdic_pc[(w1.s1 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w1.s1 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w1.s1 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w1.s1 >> 24) & 0xff]) << 24; - - key[1].s2 = (ascii_to_ebcdic_pc[(w1.s2 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w1.s2 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w1.s2 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w1.s2 >> 24) & 0xff]) << 24; - - key[1].s3 = (ascii_to_ebcdic_pc[(w1.s3 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w1.s3 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w1.s3 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w1.s3 >> 24) & 0xff]) << 24; - #endif + key[0] = (ascii_to_ebcdic_pc[(w0 >> 0) & 0xff]) << 0 + | (ascii_to_ebcdic_pc[(w0 >> 8) & 0xff]) << 8 + | (ascii_to_ebcdic_pc[(w0 >> 16) & 0xff]) << 16 + | (ascii_to_ebcdic_pc[(w0 >> 24) & 0xff]) << 24; + + key[1] = (ascii_to_ebcdic_pc[(w1 >> 0) & 0xff]) << 0 + | (ascii_to_ebcdic_pc[(w1 >> 8) & 0xff]) << 8 + | (ascii_to_ebcdic_pc[(w1 >> 16) & 0xff]) << 16 + | (ascii_to_ebcdic_pc[(w1 >> 24) & 0xff]) << 24; } static void m08500m (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) diff --git a/OpenCL/m08600_a0.cl b/OpenCL/m08600_a0.cl index 959fffc..f4b3a50 100644 --- a/OpenCL/m08600_a0.cl +++ b/OpenCL/m08600_a0.cl @@ -58,17 +58,7 @@ __constant u32 lotus_magic_table[256] = 0x29, 0x39, 0xb9, 0xe9, 0x4c, 0xff, 0x43, 0xab, }; -#ifdef VECT_SIZE1 -#define BOX(S,i) u32 ((S)[(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(S,i) u32 ((S)[(i).s0], (S)[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define BOX(S,i) u32 ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3]) -#endif +#define BOX(S,i) (S)[(i)] static void lotus_mix (u32 *in, __local u32 s_lotus_magic_table[256]) { diff --git a/OpenCL/m08600_a1.cl b/OpenCL/m08600_a1.cl index da89b02..a202f09 100644 --- a/OpenCL/m08600_a1.cl +++ b/OpenCL/m08600_a1.cl @@ -56,17 +56,7 @@ __constant u32 lotus_magic_table[256] = 0x29, 0x39, 0xb9, 0xe9, 0x4c, 0xff, 0x43, 0xab, }; -#ifdef VECT_SIZE1 -#define BOX(S,i) u32 ((S)[(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(S,i) u32 ((S)[(i).s0], (S)[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define BOX(S,i) u32 ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3]) -#endif +#define BOX(S,i) (S)[(i)] static void lotus_mix (u32 *in, __local u32 s_lotus_magic_table[256]) { diff --git a/OpenCL/m08600_a3.cl b/OpenCL/m08600_a3.cl index dfc2d36..d700497 100644 --- a/OpenCL/m08600_a3.cl +++ b/OpenCL/m08600_a3.cl @@ -56,17 +56,7 @@ __constant u32 lotus_magic_table[256] = 0x29, 0x39, 0xb9, 0xe9, 0x4c, 0xff, 0x43, 0xab, }; -#ifdef VECT_SIZE1 -#define BOX(S,i) u32 ((S)[(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(S,i) u32 ((S)[(i).s0], (S)[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define BOX(S,i) u32 ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3]) -#endif +#define BOX(S,i) (S)[(i)] static void lotus_mix (u32 *in, __local u32 s_lotus_magic_table[256]) { diff --git a/OpenCL/m08700_a0.cl b/OpenCL/m08700_a0.cl index 753b9a2..88521e9 100644 --- a/OpenCL/m08700_a0.cl +++ b/OpenCL/m08700_a0.cl @@ -58,29 +58,9 @@ __constant u32 lotus_magic_table[256] = 0x29, 0x39, 0xb9, 0xe9, 0x4c, 0xff, 0x43, 0xab, }; -#ifdef VECT_SIZE1 -#define BOX(S,i) (u32x) ((S)[(i)]) -#endif +#define BOX(S,i) (S)[(i)] -#ifdef VECT_SIZE2 -#define BOX(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define BOX(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3]) -#endif - -#ifdef VECT_SIZE1 -#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i)]) -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) -#endif +#define uint_to_hex_upper8(i) l_bin2asc[(i)] static void lotus_mix (u32 *in, __local u32 s_lotus_magic_table[256]) { diff --git a/OpenCL/m08700_a1.cl b/OpenCL/m08700_a1.cl index 7515e91..2e0f043 100644 --- a/OpenCL/m08700_a1.cl +++ b/OpenCL/m08700_a1.cl @@ -56,29 +56,9 @@ __constant u32 lotus_magic_table[256] = 0x29, 0x39, 0xb9, 0xe9, 0x4c, 0xff, 0x43, 0xab, }; -#ifdef VECT_SIZE1 -#define BOX(S,i) (u32x) ((S)[(i)]) -#endif +#define BOX(S,i) (S)[(i)] -#ifdef VECT_SIZE2 -#define BOX(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define BOX(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3]) -#endif - -#ifdef VECT_SIZE1 -#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i)]) -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) -#endif +#define uint_to_hex_upper8(i) l_bin2asc[(i)] static void lotus_mix (u32 *in, __local u32 s_lotus_magic_table[256]) { diff --git a/OpenCL/m08700_a3.cl b/OpenCL/m08700_a3.cl index 84609cc..2f307de 100644 --- a/OpenCL/m08700_a3.cl +++ b/OpenCL/m08700_a3.cl @@ -56,29 +56,9 @@ __constant u32 lotus_magic_table[256] = 0x29, 0x39, 0xb9, 0xe9, 0x4c, 0xff, 0x43, 0xab, }; -#ifdef VECT_SIZE1 -#define BOX(S,i) (u32x) ((S)[(i)]) -#endif +#define BOX(S,i) (S)[(i)] -#ifdef VECT_SIZE2 -#define BOX(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define BOX(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3]) -#endif - -#ifdef VECT_SIZE1 -#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i)]) -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) -#endif +#define uint_to_hex_upper8(i) l_bin2asc[(i)] static void lotus_mix (u32 *in, __local u32 s_lotus_magic_table[256]) { diff --git a/OpenCL/m09000.cl b/OpenCL/m09000.cl index b6a5e92..4f4cd32 100644 --- a/OpenCL/m09000.cl +++ b/OpenCL/m09000.cl @@ -17,9 +17,8 @@ #include "types_ocl.c" #include "common.c" -#ifdef VECT_SIZE1 -#define COMPARE_M "check_multi_vect1_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" // http://www.schneier.com/code/constants.txt diff --git a/OpenCL/m09100.cl b/OpenCL/m09100.cl index f50dcfb..92a86c0 100644 --- a/OpenCL/m09100.cl +++ b/OpenCL/m09100.cl @@ -61,29 +61,9 @@ __constant u32 lotus_magic_table[256] = 0x29, 0x39, 0xb9, 0xe9, 0x4c, 0xff, 0x43, 0xab, }; -#ifdef VECT_SIZE1 -#define BOX(S,i) u32 ((S)[(i)]) -#endif +#define BOX(S,i) (S)[(i)] -#ifdef VECT_SIZE2 -#define BOX(S,i) u32 ((S)[(i).s0], (S)[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define BOX(S,i) u32 ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3]) -#endif - -#ifdef VECT_SIZE1 -#define uint_to_hex_upper8(i) u32 (l_bin2asc[(i)]) -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_upper8(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_upper8(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) -#endif +#define uint_to_hex_upper8(i) l_bin2asc[(i)] static void lotus_mix (u32 *in, __local u32 s_lotus_magic_table[256]) { @@ -523,7 +503,6 @@ static void lotus6_base64_encode (u8 base64_hash[24], const u32 salt0, const u32 uchar4 salt0c = as_uchar4 (salt0); uchar4 salt1c = as_uchar4 (salt1); - #ifdef VECT_SIZE1 uchar4 ac; uchar4 bc; uchar4 cc; @@ -531,21 +510,6 @@ static void lotus6_base64_encode (u8 base64_hash[24], const u32 salt0, const u32 ac = as_uchar4 (a); bc = as_uchar4 (b); cc = as_uchar4 (c); - #endif - - #ifdef VECT_SIZE2 - uchar4 ac[2]; - uchar4 bc[2]; - uchar4 cc[2]; - - ac[0] = as_uchar4 (a.s0); - bc[0] = as_uchar4 (b.s0); - cc[0] = as_uchar4 (c.s0); - - ac[1] = as_uchar4 (a.s1); - bc[1] = as_uchar4 (b.s1); - cc[1] = as_uchar4 (c.s1); - #endif u8 tmp[24]; // size 22 (=pw_len) is needed but base64 needs size divisible by 4 @@ -561,8 +525,6 @@ static void lotus6_base64_encode (u8 base64_hash[24], const u32 salt0, const u32 base64_plain[ 3] = salt0c.s3; base64_plain[ 3] -= -4; // dont ask! base64_plain[ 4] = salt1c.s0; - - #ifdef VECT_SIZE1 base64_plain[ 5] = ac.s0; base64_plain[ 6] = ac.s1; base64_plain[ 7] = ac.s2; @@ -603,95 +565,8 @@ static void lotus6_base64_encode (u8 base64_hash[24], const u32 salt0, const u32 base64_hash[19] = tmp[19]; base64_hash[20] = tmp[20]; base64_hash[21] = ')'; - #endif - - #ifdef VECT_SIZE2 - base64_plain[ 5] = ac[0].s0; - base64_plain[ 6] = ac[0].s1; - base64_plain[ 7] = ac[0].s2; - base64_plain[ 8] = ac[0].s3; - base64_plain[ 9] = bc[0].s0; - base64_plain[10] = bc[0].s1; - base64_plain[11] = bc[0].s2; - base64_plain[12] = bc[0].s3; - base64_plain[13] = cc[0].s0; - base64_plain[14] = cc[0].s1; - base64_plain[15] = cc[0].s2; - - /* - * base64 encode the $salt.$digest string - */ - - base64_encode (tmp + 2, 14, base64_plain); - - base64_hash[ 0].s0 = '('; - base64_hash[ 1].s0 = 'G'; - base64_hash[ 2].s0 = tmp[ 2]; - base64_hash[ 3].s0 = tmp[ 3]; - base64_hash[ 4].s0 = tmp[ 4]; - base64_hash[ 5].s0 = tmp[ 5]; - base64_hash[ 6].s0 = tmp[ 6]; - base64_hash[ 7].s0 = tmp[ 7]; - base64_hash[ 8].s0 = tmp[ 8]; - base64_hash[ 9].s0 = tmp[ 9]; - base64_hash[10].s0 = tmp[10]; - base64_hash[11].s0 = tmp[11]; - base64_hash[12].s0 = tmp[12]; - base64_hash[13].s0 = tmp[13]; - base64_hash[14].s0 = tmp[14]; - base64_hash[15].s0 = tmp[15]; - base64_hash[16].s0 = tmp[16]; - base64_hash[17].s0 = tmp[17]; - base64_hash[18].s0 = tmp[18]; - base64_hash[19].s0 = tmp[19]; - base64_hash[20].s0 = tmp[20]; - base64_hash[21].s0 = ')'; - - base64_plain[ 5] = ac[1].s0; - base64_plain[ 6] = ac[1].s1; - base64_plain[ 7] = ac[1].s2; - base64_plain[ 8] = ac[1].s3; - base64_plain[ 9] = bc[1].s0; - base64_plain[10] = bc[1].s1; - base64_plain[11] = bc[1].s2; - base64_plain[12] = bc[1].s3; - base64_plain[13] = cc[1].s0; - base64_plain[14] = cc[1].s1; - base64_plain[15] = cc[1].s2; - - /* - * base64 encode the $salt.$digest string - */ - - base64_encode (tmp + 2, 14, base64_plain); - - base64_hash[ 0].s1 = '('; - base64_hash[ 1].s1 = 'G'; - base64_hash[ 2].s1 = tmp[ 2]; - base64_hash[ 3].s1 = tmp[ 3]; - base64_hash[ 4].s1 = tmp[ 4]; - base64_hash[ 5].s1 = tmp[ 5]; - base64_hash[ 6].s1 = tmp[ 6]; - base64_hash[ 7].s1 = tmp[ 7]; - base64_hash[ 8].s1 = tmp[ 8]; - base64_hash[ 9].s1 = tmp[ 9]; - base64_hash[10].s1 = tmp[10]; - base64_hash[11].s1 = tmp[11]; - base64_hash[12].s1 = tmp[12]; - base64_hash[13].s1 = tmp[13]; - base64_hash[14].s1 = tmp[14]; - base64_hash[15].s1 = tmp[15]; - base64_hash[16].s1 = tmp[16]; - base64_hash[17].s1 = tmp[17]; - base64_hash[18].s1 = tmp[18]; - base64_hash[19].s1 = tmp[19]; - base64_hash[20].s1 = tmp[20]; - base64_hash[21].s1 = ')'; - #endif - } - __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09100_init (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global lotus8_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global wpa_t *wpa_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** @@ -919,62 +794,27 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09100_init (__gl lotus6_base64_encode (base64_hash, salt_buf0[0], salt_buf0[1], a, b, c); - /** * PBKDF2 - HMACSHA1 - 1st iteration */ - #ifdef VECT_SIZE1 u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = (base64_hash[ 0] << 24) | (base64_hash[ 1] << 16) | (base64_hash[ 2] << 8) | base64_hash[ 3]; w0[1] = (base64_hash[ 4] << 24) | (base64_hash[ 5] << 16) | (base64_hash[ 6] << 8) | base64_hash[ 7]; w0[2] = (base64_hash[ 8] << 24) | (base64_hash[ 9] << 16) | (base64_hash[10] << 8) | base64_hash[11]; w0[3] = (base64_hash[12] << 24) | (base64_hash[13] << 16) | (base64_hash[14] << 8) | base64_hash[15]; - - u32 w1[4]; - w1[0] = (base64_hash[16] << 24) | (base64_hash[17] << 16) | (base64_hash[18] << 8) | base64_hash[19]; w1[1] = (base64_hash[20] << 24) | (base64_hash[21] << 16); w1[2] = 0; w1[3] = 0; - #endif - - #ifdef VECT_SIZE2 - u32 w0[4]; - - w0[0].s0 = (base64_hash[ 0].s0 << 24) | (base64_hash[ 1].s0 << 16) | (base64_hash[ 2].s0 << 8) | base64_hash[ 3].s0; - w0[1].s0 = (base64_hash[ 4].s0 << 24) | (base64_hash[ 5].s0 << 16) | (base64_hash[ 6].s0 << 8) | base64_hash[ 7].s0; - w0[2].s0 = (base64_hash[ 8].s0 << 24) | (base64_hash[ 9].s0 << 16) | (base64_hash[10].s0 << 8) | base64_hash[11].s0; - w0[3].s0 = (base64_hash[12].s0 << 24) | (base64_hash[13].s0 << 16) | (base64_hash[14].s0 << 8) | base64_hash[15].s0; - - w0[0].s1 = (base64_hash[ 0].s1 << 24) | (base64_hash[ 1].s1 << 16) | (base64_hash[ 2].s1 << 8) | base64_hash[ 3].s1; - w0[1].s1 = (base64_hash[ 4].s1 << 24) | (base64_hash[ 5].s1 << 16) | (base64_hash[ 6].s1 << 8) | base64_hash[ 7].s1; - w0[2].s1 = (base64_hash[ 8].s1 << 24) | (base64_hash[ 9].s1 << 16) | (base64_hash[10].s1 << 8) | base64_hash[11].s1; - w0[3].s1 = (base64_hash[12].s1 << 24) | (base64_hash[13].s1 << 16) | (base64_hash[14].s1 << 8) | base64_hash[15].s1; - - u32 w1[4]; - - w1[0].s0 = (base64_hash[16].s0 << 24) | (base64_hash[17].s0 << 16) | (base64_hash[18].s0 << 8) | base64_hash[19].s0; - w1[1].s0 = (base64_hash[20].s0 << 24) | (base64_hash[21].s0 << 16); - w1[2].s0 = 0; - w1[3].s0 = 0; - - w1[0].s1 = (base64_hash[16].s1 << 24) | (base64_hash[17].s1 << 16) | (base64_hash[18].s1 << 8) | base64_hash[19].s1; - w1[1].s1 = (base64_hash[20].s1 << 24) | (base64_hash[21].s1 << 16); - w1[2].s1 = 0; - w1[3].s1 = 0; - #endif - - u32 w2[4]; - w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - - u32 w3[4]; - w3[0] = 0; w3[1] = 0; w3[2] = 0; diff --git a/OpenCL/m10100_a0.cl b/OpenCL/m10100_a0.cl index 1be7d61..902099e 100644 --- a/OpenCL/m10100_a0.cl +++ b/OpenCL/m10100_a0.cl @@ -22,7 +22,6 @@ #define COMPARE_S "check_single_comp4.c" #define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE1 #define SIPROUND(v0,v1,v2,v3) \ (v0) += (v1); \ (v1) = rotl64 ((v1), 13); \ @@ -38,23 +37,6 @@ (v1) = rotl64 ((v1), 17); \ (v1) ^= (v2); \ (v2) = as_ulong (as_uint2 ((v2)).s10); -#else -#define SIPROUND(v0,v1,v2,v3) \ - (v0) += (v1); \ - (v1) = rotl64 ((v1), 13); \ - (v1) ^= (v0); \ - (v0) = rotl64 ((v0), 32); \ - (v2) += (v3); \ - (v3) = rotl64 ((v3), 16); \ - (v3) ^= (v2); \ - (v0) += (v3); \ - (v3) = rotl64 ((v3), 21); \ - (v3) ^= (v0); \ - (v2) += (v1); \ - (v1) = rotl64 ((v1), 17); \ - (v1) ^= (v2); \ - (v2) = rotl64 ((v2), 32); -#endif __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_m04 (__global pw_t *pws, __global gpu_rule_t * rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { diff --git a/OpenCL/m10100_a1.cl b/OpenCL/m10100_a1.cl index 9461036..d35224f 100644 --- a/OpenCL/m10100_a1.cl +++ b/OpenCL/m10100_a1.cl @@ -20,7 +20,6 @@ #define COMPARE_S "check_single_comp4.c" #define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE1 #define SIPROUND(v0,v1,v2,v3) \ (v0) += (v1); \ (v1) = rotl64 ((v1), 13); \ @@ -36,23 +35,6 @@ (v1) = rotl64 ((v1), 17); \ (v1) ^= (v2); \ (v2) = as_ulong (as_uint2 ((v2)).s10); -#else -#define SIPROUND(v0,v1,v2,v3) \ - (v0) += (v1); \ - (v1) = rotl64 ((v1), 13); \ - (v1) ^= (v0); \ - (v0) = rotl64 ((v0), 32); \ - (v2) += (v3); \ - (v3) = rotl64 ((v3), 16); \ - (v3) ^= (v2); \ - (v0) += (v3); \ - (v3) = rotl64 ((v3), 21); \ - (v3) ^= (v0); \ - (v2) += (v1); \ - (v1) = rotl64 ((v1), 17); \ - (v1) ^= (v2); \ - (v2) = rotl64 ((v2), 32); -#endif __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { diff --git a/OpenCL/m10100_a3.cl b/OpenCL/m10100_a3.cl index 10f4907..46be97b 100644 --- a/OpenCL/m10100_a3.cl +++ b/OpenCL/m10100_a3.cl @@ -20,7 +20,6 @@ #define COMPARE_S "check_single_comp4.c" #define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE1 #define SIPROUND(v0,v1,v2,v3) \ (v0) += (v1); \ (v1) = rotl64 ((v1), 13); \ @@ -36,23 +35,6 @@ (v1) = rotl64 ((v1), 17); \ (v1) ^= (v2); \ (v2) = as_ulong (as_uint2 ((v2)).s10); -#else -#define SIPROUND(v0,v1,v2,v3) \ - (v0) += (v1); \ - (v1) = rotl64 ((v1), 13); \ - (v1) ^= (v0); \ - (v0) = rotl64 ((v0), 32); \ - (v2) += (v3); \ - (v3) = rotl64 ((v3), 16); \ - (v3) ^= (v2); \ - (v0) += (v3); \ - (v3) = rotl64 ((v3), 21); \ - (v3) ^= (v0); \ - (v2) += (v1); \ - (v1) = rotl64 ((v1), 17); \ - (v1) ^= (v2); \ - (v2) = rotl64 ((v2), 32); -#endif static void m10100m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { diff --git a/OpenCL/m10900.cl b/OpenCL/m10900.cl index 96c4195..d30290a 100644 --- a/OpenCL/m10900.cl +++ b/OpenCL/m10900.cl @@ -18,9 +18,8 @@ #include "types_ocl.c" #include "common.c" -#ifdef VECT_SIZE1 -#define COMPARE_M "check_multi_vect1_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u32 k_sha256[64] = { diff --git a/OpenCL/m11100_a0.cl b/OpenCL/m11100_a0.cl index 0b6e8cf..ca2f33f 100644 --- a/OpenCL/m11100_a0.cl +++ b/OpenCL/m11100_a0.cl @@ -22,17 +22,7 @@ #define COMPARE_S "check_single_comp4.c" #define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) -#endif __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11100_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { diff --git a/OpenCL/m11100_a1.cl b/OpenCL/m11100_a1.cl index 2b7d89b..cc2a412 100644 --- a/OpenCL/m11100_a1.cl +++ b/OpenCL/m11100_a1.cl @@ -20,17 +20,7 @@ #define COMPARE_S "check_single_comp4.c" #define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) -#endif __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11100_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { diff --git a/OpenCL/m11100_a3.cl b/OpenCL/m11100_a3.cl index dfaa843..ed0fa92 100644 --- a/OpenCL/m11100_a3.cl +++ b/OpenCL/m11100_a3.cl @@ -20,17 +20,7 @@ #define COMPARE_S "check_single_comp4.c" #define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) -#endif static void m11100m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { diff --git a/OpenCL/m11500_a0.cl b/OpenCL/m11500_a0.cl index 23550ec..51359c0 100644 --- a/OpenCL/m11500_a0.cl +++ b/OpenCL/m11500_a0.cl @@ -96,21 +96,7 @@ static u32 round_crc32 (u32 a, const u32 v) const u32 s = a >> 8; - #ifdef VECT_SIZE1 - a = crc32tab[k]; - #endif - - #ifdef VECT_SIZE2 - a.s0 = crc32tab[k.s0]; - a.s1 = crc32tab[k.s1]; - #endif - - #ifdef VECT_SIZE4 - a.s0 = crc32tab[k.s0]; - a.s1 = crc32tab[k.s1]; - a.s2 = crc32tab[k.s2]; - a.s3 = crc32tab[k.s3]; - #endif + a = crc32tab[k]; a ^= s; diff --git a/OpenCL/m11500_a1.cl b/OpenCL/m11500_a1.cl index b297b98..1d74528 100644 --- a/OpenCL/m11500_a1.cl +++ b/OpenCL/m11500_a1.cl @@ -94,21 +94,7 @@ static u32 round_crc32 (u32 a, const u32 v) const u32 s = a >> 8; - #ifdef VECT_SIZE1 - a = crc32tab[k]; - #endif - - #ifdef VECT_SIZE2 - a.s0 = crc32tab[k.s0]; - a.s1 = crc32tab[k.s1]; - #endif - - #ifdef VECT_SIZE4 - a.s0 = crc32tab[k.s0]; - a.s1 = crc32tab[k.s1]; - a.s2 = crc32tab[k.s2]; - a.s3 = crc32tab[k.s3]; - #endif + a = crc32tab[k]; a ^= s; diff --git a/OpenCL/m11500_a3.cl b/OpenCL/m11500_a3.cl index b29b184..bb09028 100644 --- a/OpenCL/m11500_a3.cl +++ b/OpenCL/m11500_a3.cl @@ -94,21 +94,7 @@ static u32 round_crc32 (u32 a, const u32 v) const u32 s = a >> 8; - #ifdef VECT_SIZE1 - a = crc32tab[k]; - #endif - - #ifdef VECT_SIZE2 - a.s0 = crc32tab[k.s0]; - a.s1 = crc32tab[k.s1]; - #endif - - #ifdef VECT_SIZE4 - a.s0 = crc32tab[k.s0]; - a.s1 = crc32tab[k.s1]; - a.s2 = crc32tab[k.s2]; - a.s3 = crc32tab[k.s3]; - #endif + a = crc32tab[k]; a ^= s; diff --git a/OpenCL/m11600.cl b/OpenCL/m11600.cl index 1ba1fbe..14a7a49 100644 --- a/OpenCL/m11600.cl +++ b/OpenCL/m11600.cl @@ -1073,21 +1073,7 @@ static u32 round_crc32 (u32 a, const u32 v) const u32 s = a >> 8; - #ifdef VECT_SIZE1 - a = crc32tab[k]; - #endif - - #ifdef VECT_SIZE2 - a.s0 = crc32tab[k.s0]; - a.s1 = crc32tab[k.s1]; - #endif - - #ifdef VECT_SIZE4 - a.s0 = crc32tab[k.s0]; - a.s1 = crc32tab[k.s1]; - a.s2 = crc32tab[k.s2]; - a.s3 = crc32tab[k.s3]; - #endif + a = crc32tab[k]; a ^= s; diff --git a/OpenCL/m11900.cl b/OpenCL/m11900.cl index b3fede6..fb09dbb 100644 --- a/OpenCL/m11900.cl +++ b/OpenCL/m11900.cl @@ -18,9 +18,8 @@ #include "types_ocl.c" #include "common.c" -#ifdef VECT_SIZE1 -#define COMPARE_M "check_multi_vect1_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { diff --git a/OpenCL/m12000.cl b/OpenCL/m12000.cl index eeb17ae..4b07762 100644 --- a/OpenCL/m12000.cl +++ b/OpenCL/m12000.cl @@ -18,9 +18,8 @@ #include "types_ocl.c" #include "common.c" -#ifdef VECT_SIZE1 -#define COMPARE_M "check_multi_vect1_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { diff --git a/OpenCL/m12200.cl b/OpenCL/m12200.cl index 298bbc7..b682656 100644 --- a/OpenCL/m12200.cl +++ b/OpenCL/m12200.cl @@ -17,9 +17,8 @@ #include "types_ocl.c" #include "common.c" -#ifdef VECT_SIZE1 -#define COMPARE_M "check_multi_vect1_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u64 k_sha512[80] = { diff --git a/OpenCL/m12300.cl b/OpenCL/m12300.cl index 8d4df81..5f6e0f4 100644 --- a/OpenCL/m12300.cl +++ b/OpenCL/m12300.cl @@ -17,9 +17,8 @@ #include "types_ocl.c" #include "common.c" -#ifdef VECT_SIZE1 -#define COMPARE_M "check_multi_vect1_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u64 k_sha512[80] = { diff --git a/OpenCL/m12400.cl b/OpenCL/m12400.cl index 79cfaae..9ebde0f 100644 --- a/OpenCL/m12400.cl +++ b/OpenCL/m12400.cl @@ -338,17 +338,7 @@ __constant u32 c_skb[8][64] = 0x00002822, 0x04002822, 0x00042822, 0x04042822 }; -#ifdef VECT_SIZE1 -#define BOX(i,n,S) (u32x) ((S)[(n)][(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3]) -#endif +#define BOX(i,n,S) (S)[(n)][(i)] static void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u32 s_skb[8][64]) { diff --git a/OpenCL/m12500.cl b/OpenCL/m12500.cl index e726518..15fe60a 100644 --- a/OpenCL/m12500.cl +++ b/OpenCL/m12500.cl @@ -18,9 +18,8 @@ #include "types_ocl.c" #include "common.c" -#ifdef VECT_SIZE1 -#define COMPARE_M "check_multi_vect1_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #define ROUNDS 0x40000 diff --git a/OpenCL/m12600_a0.cl b/OpenCL/m12600_a0.cl index 33e7cd1..02edce1 100644 --- a/OpenCL/m12600_a0.cl +++ b/OpenCL/m12600_a0.cl @@ -22,17 +22,7 @@ #define COMPARE_S "check_single_comp4.c" #define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE1 #define uint_to_hex_upper8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) -#endif __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12600_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { diff --git a/OpenCL/m12600_a1.cl b/OpenCL/m12600_a1.cl index 978dfbb..0b6ff1c 100644 --- a/OpenCL/m12600_a1.cl +++ b/OpenCL/m12600_a1.cl @@ -20,17 +20,7 @@ #define COMPARE_S "check_single_comp4.c" #define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE1 #define uint_to_hex_upper8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) -#endif __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12600_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { diff --git a/OpenCL/m12600_a3.cl b/OpenCL/m12600_a3.cl index 14addfa..12c3942 100644 --- a/OpenCL/m12600_a3.cl +++ b/OpenCL/m12600_a3.cl @@ -20,17 +20,7 @@ #define COMPARE_S "check_single_comp4.c" #define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE1 #define uint_to_hex_upper8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) -#endif static void m12600m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { diff --git a/OpenCL/m12700.cl b/OpenCL/m12700.cl index 214ceb0..7aeeb42 100644 --- a/OpenCL/m12700.cl +++ b/OpenCL/m12700.cl @@ -17,13 +17,8 @@ #include "types_ocl.c" #include "common.c" -#ifdef VECT_SIZE1 -#define COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define COMPARE_M "check_multi_vect2_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u32 te0[256] = { diff --git a/OpenCL/m12800.cl b/OpenCL/m12800.cl index 54375f4..ba58495 100644 --- a/OpenCL/m12800.cl +++ b/OpenCL/m12800.cl @@ -18,13 +18,10 @@ #include "types_ocl.c" #include "common.c" -#ifdef VECT_SIZE1 -#define COMPARE_M "check_multi_vect1_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif __constant u32 k_sha256[64] = { -- 2.43.0