#define DGST_R3 3
#include "include/kernel_functions.c"
-#include "types_ocl.c"
-#include "common.c"
+#include "OpenCL/types_ocl.c"
+#include "OpenCL/common.c"
-#ifdef VECT_SIZE1
-#define COMPARE_M "check_multi_vect1_comp4.c"
-#endif
+#define COMPARE_S "OpenCL/check_single_comp4.c"
+#define COMPARE_M "OpenCL/check_multi_comp4.c"
__constant u32 k_sha256[64] =
{
u32 g = digest[6];
u32 h = digest[7];
- u32 w0_t = swap_workaround (w[ 0]);
- u32 w1_t = swap_workaround (w[ 1]);
- u32 w2_t = swap_workaround (w[ 2]);
- u32 w3_t = swap_workaround (w[ 3]);
- u32 w4_t = swap_workaround (w[ 4]);
- u32 w5_t = swap_workaround (w[ 5]);
- u32 w6_t = swap_workaround (w[ 6]);
- u32 w7_t = swap_workaround (w[ 7]);
- u32 w8_t = swap_workaround (w[ 8]);
- u32 w9_t = swap_workaround (w[ 9]);
- u32 wa_t = swap_workaround (w[10]);
- u32 wb_t = swap_workaround (w[11]);
- u32 wc_t = swap_workaround (w[12]);
- u32 wd_t = swap_workaround (w[13]);
- u32 we_t = swap_workaround (w[14]);
- u32 wf_t = swap_workaround (w[15]);
+ u32 w0_t = swap32 (w[ 0]);
+ u32 w1_t = swap32 (w[ 1]);
+ u32 w2_t = swap32 (w[ 2]);
+ u32 w3_t = swap32 (w[ 3]);
+ u32 w4_t = swap32 (w[ 4]);
+ u32 w5_t = swap32 (w[ 5]);
+ u32 w6_t = swap32 (w[ 6]);
+ u32 w7_t = swap32 (w[ 7]);
+ u32 w8_t = swap32 (w[ 8]);
+ u32 w9_t = swap32 (w[ 9]);
+ u32 wa_t = swap32 (w[10]);
+ u32 wb_t = swap32 (w[11]);
+ u32 wc_t = swap32 (w[12]);
+ u32 wd_t = swap32 (w[13]);
+ u32 we_t = swap32 (w[14]);
+ u32 wf_t = swap32 (w[15]);
#define ROUND_EXPAND() \
{ \
static void bswap8 (u32 block[16])
{
- block[ 0] = swap_workaround (block[ 0]);
- block[ 1] = swap_workaround (block[ 1]);
- block[ 2] = swap_workaround (block[ 2]);
- block[ 3] = swap_workaround (block[ 3]);
- block[ 4] = swap_workaround (block[ 4]);
- block[ 5] = swap_workaround (block[ 5]);
- block[ 6] = swap_workaround (block[ 6]);
- block[ 7] = swap_workaround (block[ 7]);
+ block[ 0] = swap32 (block[ 0]);
+ block[ 1] = swap32 (block[ 1]);
+ block[ 2] = swap32 (block[ 2]);
+ block[ 3] = swap32 (block[ 3]);
+ block[ 4] = swap32 (block[ 4]);
+ block[ 5] = swap32 (block[ 5]);
+ block[ 6] = swap32 (block[ 6]);
+ block[ 7] = swap32 (block[ 7]);
}
static u32 memcat16 (u32 block[16], const u32 block_len, const u32 append[4], const u32 append_len)
u32 tmp3;
u32 tmp4;
+ #if defined IS_AMD || defined IS_GENERIC
const int offset_minus_4 = 4 - block_len;
tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
tmp3 = tmp4;
tmp4 = 0;
}
+ #endif
+
+ #ifdef IS_NV
+ const int offset_minus_4 = 4 - (block_len & 3);
+
+ const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
+
+ tmp0 = __byte_perm ( 0, append[0], selector);
+ tmp1 = __byte_perm (append[0], append[1], selector);
+ tmp2 = __byte_perm (append[1], append[2], selector);
+ tmp3 = __byte_perm (append[2], append[3], selector);
+ tmp4 = __byte_perm (append[3], 0, selector);
+ #endif
switch (div)
{
u32 tmp3;
u32 tmp4;
+ #if defined IS_AMD || defined IS_GENERIC
const int offset_minus_4 = 4 - block_len;
tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
tmp3 = tmp4;
tmp4 = 0;
}
+ #endif
+
+ #ifdef IS_NV
+ const int offset_minus_4 = 4 - (block_len & 3);
+
+ const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
+
+ tmp0 = __byte_perm ( 0, append[0], selector);
+ tmp1 = __byte_perm (append[0], append[1], selector);
+ tmp2 = __byte_perm (append[1], append[2], selector);
+ tmp3 = __byte_perm (append[2], append[3], selector);
+ tmp4 = __byte_perm (append[3], 0, selector);
+ #endif
u32 carry[4] = { 0, 0, 0, 0 };
u32 tmp3;
u32 tmp4;
+ #if defined IS_AMD || defined IS_GENERIC
const int offset_minus_4 = 4 - block_len;
tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
tmp3 = tmp4;
tmp4 = 0;
}
+ #endif
+
+ #ifdef IS_NV
+ const int offset_minus_4 = 4 - (block_len & 3);
+
+ const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
+
+ tmp0 = __byte_perm ( 0, append[0], selector);
+ tmp1 = __byte_perm (append[0], append[1], selector);
+ tmp2 = __byte_perm (append[1], append[2], selector);
+ tmp3 = __byte_perm (append[2], append[3], selector);
+ tmp4 = __byte_perm (append[3], 0, selector);
+ #endif
switch (div)
{
u32 tmp3;
u32 tmp4;
+ #if defined IS_AMD || defined IS_GENERIC
const int offset_minus_4 = 4 - block_len;
tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
tmp3 = tmp4;
tmp4 = 0x80;
}
+ #endif
+
+ #ifdef IS_NV
+ const int offset_minus_4 = 4 - (block_len & 3);
+
+ const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
+
+ tmp0 = __byte_perm ( 0, append[0], selector);
+ tmp1 = __byte_perm (append[0], append[1], selector);
+ tmp2 = __byte_perm (append[1], append[2], selector);
+ tmp3 = __byte_perm (append[2], append[3], selector);
+ tmp4 = __byte_perm (append[3], 0x80, selector);
+ #endif
switch (div)
{
return block_len + append_len;
}
-__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07400_init (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global sha256crypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
+__kernel void m07400_init (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global sha256crypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
block_len = memcat16 (block, block_len, w0, pw_len);
- append_0x80_4 (block, block_len);
+ append_0x80_1x16 (block, block_len);
- block[15] = swap_workaround (block_len * 8);
+ block[15] = swap32 (block_len * 8);
init_ctx (alt_result);
}
}
- append_0x80_4 (block, block_len);
+ append_0x80_1x16 (block, block_len);
if (block_len >= 56)
{
bzero16 (block);
}
- block[15] = swap_workaround (transform_len * 8);
+ block[15] = swap32 (transform_len * 8);
sha256_transform (block, alt_result);
/* Finish the digest. */
- append_0x80_4 (block, block_len);
+ append_0x80_1x16 (block, block_len);
if (block_len >= 56)
{
bzero16 (block);
}
- block[15] = swap_workaround (transform_len * 8);
+ block[15] = swap32 (transform_len * 8);
sha256_transform (block, p_bytes);
/* Finish the digest. */
- append_0x80_4 (block, block_len);
+ append_0x80_1x16 (block, block_len);
if (block_len >= 56)
{
bzero16 (block);
}
- block[15] = swap_workaround (transform_len * 8);
+ block[15] = swap32 (transform_len * 8);
sha256_transform (block, s_bytes);
tmps[gid].s_bytes[3] = s_bytes[3];
}
-__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07400_loop (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global sha256crypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
+__kernel void m07400_loop (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global sha256crypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
p_bytes_x80[2] = tmps[gid].p_bytes[2];
p_bytes_x80[3] = tmps[gid].p_bytes[3];
- append_0x80_1 (p_bytes_x80, pw_len);
+ append_0x80_1x4 (p_bytes_x80, pw_len);
u32 s_bytes[4];
block[15] = 0;
}
- block[15] = swap_workaround (block_len * 8);
+ block[15] = swap32 (block_len * 8);
sha256_transform_no14 (block, tmp);
tmps[gid].alt_result[7] = alt_result[7];
}
-__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07400_comp (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global sha256crypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
+__kernel void m07400_comp (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global sha256crypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base