// this is a highly optimized that assumes dgst[16] = { 0 }; only reuse of no 2nd transform is needed
-static void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256])
+void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256])
{
u32x Kh[8];
u32x Kl[8];
dgst[15] = statel[7] ^ w[15];
}
-static void m06100m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 (*s_Cl)[256], __local u32 (*s_Ch)[256])
+void m06100m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 (*s_Cl)[256], __local u32 (*s_Ch)[256])
{
/**
* modifier
const u32x w0lr = w0l | w0r;
- u32x wl[16];
-
- wl[ 0] = w0lr;
- wl[ 1] = w0[1];
- wl[ 2] = w0[2];
- wl[ 3] = w0[3];
- wl[ 4] = w1[0];
- wl[ 5] = w1[1];
- wl[ 6] = w1[2];
- wl[ 7] = w1[3];
- wl[ 8] = w2[0];
- wl[ 9] = w2[1];
- wl[10] = w2[2];
- wl[11] = w2[3];
- wl[12] = w3[0];
- wl[13] = w3[1];
- wl[14] = 0;
- wl[15] = pw_len * 8;
+ u32x w[16];
+
+ w[ 0] = w0lr;
+ w[ 1] = w0[1];
+ w[ 2] = w0[2];
+ w[ 3] = w0[3];
+ w[ 4] = w1[0];
+ w[ 5] = w1[1];
+ w[ 6] = w1[2];
+ w[ 7] = w1[3];
+ w[ 8] = w2[0];
+ w[ 9] = w2[1];
+ w[10] = w2[2];
+ w[11] = w2[3];
+ w[12] = w3[0];
+ w[13] = w3[1];
+ w[14] = 0;
+ w[15] = pw_len * 8;
+
+ /**
+ * Whirlool
+ */
u32x dgst[16];
- whirlpool_transform (wl, dgst, s_Ch, s_Cl);
+ dgst[ 0] = 0;
+ dgst[ 1] = 0;
+ dgst[ 2] = 0;
+ dgst[ 3] = 0;
+ dgst[ 4] = 0;
+ dgst[ 5] = 0;
+ dgst[ 6] = 0;
+ dgst[ 7] = 0;
+ dgst[ 8] = 0;
+ dgst[ 9] = 0;
+ dgst[10] = 0;
+ dgst[11] = 0;
+ dgst[12] = 0;
+ dgst[13] = 0;
+ dgst[14] = 0;
+ dgst[15] = 0;
+
+ whirlpool_transform (w, dgst, s_Ch, s_Cl);
COMPARE_M_SIMD (dgst[0], dgst[1], dgst[2], dgst[3]);
}
}
-static void m06100s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 (*s_Cl)[256], __local u32 (*s_Ch)[256])
+void m06100s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 (*s_Cl)[256], __local u32 (*s_Ch)[256])
{
/**
* modifier
const u32x w0lr = w0l | w0r;
- u32x wl[16];
-
- wl[ 0] = w0lr;
- wl[ 1] = w0[1];
- wl[ 2] = w0[2];
- wl[ 3] = w0[3];
- wl[ 4] = w1[0];
- wl[ 5] = w1[1];
- wl[ 6] = w1[2];
- wl[ 7] = w1[3];
- wl[ 8] = w2[0];
- wl[ 9] = w2[1];
- wl[10] = w2[2];
- wl[11] = w2[3];
- wl[12] = w3[0];
- wl[13] = w3[1];
- wl[14] = 0;
- wl[15] = pw_len * 8;
+ u32x w[16];
+
+ w[ 0] = w0lr;
+ w[ 1] = w0[1];
+ w[ 2] = w0[2];
+ w[ 3] = w0[3];
+ w[ 4] = w1[0];
+ w[ 5] = w1[1];
+ w[ 6] = w1[2];
+ w[ 7] = w1[3];
+ w[ 8] = w2[0];
+ w[ 9] = w2[1];
+ w[10] = w2[2];
+ w[11] = w2[3];
+ w[12] = w3[0];
+ w[13] = w3[1];
+ w[14] = 0;
+ w[15] = pw_len * 8;
+
+ /**
+ * Whirlool
+ */
u32x dgst[16];
- whirlpool_transform (wl, dgst, s_Ch, s_Cl);
+ dgst[ 0] = 0;
+ dgst[ 1] = 0;
+ dgst[ 2] = 0;
+ dgst[ 3] = 0;
+ dgst[ 4] = 0;
+ dgst[ 5] = 0;
+ dgst[ 6] = 0;
+ dgst[ 7] = 0;
+ dgst[ 8] = 0;
+ dgst[ 9] = 0;
+ dgst[10] = 0;
+ dgst[11] = 0;
+ dgst[12] = 0;
+ dgst[13] = 0;
+ dgst[14] = 0;
+ dgst[15] = 0;
+
+ whirlpool_transform (w, dgst, s_Ch, s_Cl);
COMPARE_S_SIMD (dgst[0], dgst[1], dgst[2], dgst[3]);
}