SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f,
};
-static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[8])
+void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[8])
{
u32x a = digest[0];
u32x b = digest[1];
ROUND_STEP (0);
+ #ifdef _unroll
#pragma unroll
+ #endif
for (int i = 16; i < 64; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);
digest[7] += h;
}
-static void hmac_sha256_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8])
+void hmac_sha256_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8])
{
w0[0] = w0[0] ^ 0x36363636;
w0[1] = w0[1] ^ 0x36363636;
sha256_transform (w0, w1, w2, w3, opad);
}
-static void hmac_sha256_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8], u32x digest[8])
+void hmac_sha256_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8], u32x digest[8])
{
digest[0] = ipad[0];
digest[1] = ipad[1];
if (gid >= gid_max) return;
u32 pw_buf0[4];
-
- pw_buf0[0] = pws[gid].i[ 0];
- pw_buf0[1] = pws[gid].i[ 1];
- pw_buf0[2] = pws[gid].i[ 2];
- pw_buf0[3] = pws[gid].i[ 3];
-
u32 pw_buf1[4];
- pw_buf1[0] = pws[gid].i[ 4];
- pw_buf1[1] = pws[gid].i[ 5];
- pw_buf1[2] = pws[gid].i[ 6];
- pw_buf1[3] = pws[gid].i[ 7];
+ pw_buf0[0] = pws[gid].i[0];
+ pw_buf0[1] = pws[gid].i[1];
+ pw_buf0[2] = pws[gid].i[2];
+ pw_buf0[3] = pws[gid].i[3];
+ pw_buf1[0] = pws[gid].i[4];
+ pw_buf1[1] = pws[gid].i[5];
+ pw_buf1[2] = pws[gid].i[6];
+ pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len;
*/
u32 salt_buf0[4];
-
- salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
- salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
- salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
- salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
-
u32 salt_buf1[4];
-
- salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
- salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
- salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
- salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
+ u32 salt_buf2[4];
+ u32 salt_buf3[4];
+
+ salt_buf0[0] = swap32_S (salt_bufs[salt_pos].salt_buf[ 0]);
+ salt_buf0[1] = swap32_S (salt_bufs[salt_pos].salt_buf[ 1]);
+ salt_buf0[2] = swap32_S (salt_bufs[salt_pos].salt_buf[ 2]);
+ salt_buf0[3] = swap32_S (salt_bufs[salt_pos].salt_buf[ 3]);
+ salt_buf1[0] = swap32_S (salt_bufs[salt_pos].salt_buf[ 4]);
+ salt_buf1[1] = swap32_S (salt_bufs[salt_pos].salt_buf[ 5]);
+ salt_buf1[2] = swap32_S (salt_bufs[salt_pos].salt_buf[ 6]);
+ salt_buf1[3] = swap32_S (salt_bufs[salt_pos].salt_buf[ 7]);
+ salt_buf2[0] = swap32_S (salt_bufs[salt_pos].salt_buf[ 8]);
+ salt_buf2[1] = swap32_S (salt_bufs[salt_pos].salt_buf[ 9]);
+ salt_buf2[2] = swap32_S (salt_bufs[salt_pos].salt_buf[10]);
+ salt_buf2[3] = swap32_S (salt_bufs[salt_pos].salt_buf[11]);
+ salt_buf3[0] = swap32_S (salt_bufs[salt_pos].salt_buf[12]);
+ salt_buf3[1] = swap32_S (salt_bufs[salt_pos].salt_buf[13]);
+ salt_buf3[2] = swap32_S (salt_bufs[salt_pos].salt_buf[14]);
+ salt_buf3[3] = swap32_S (salt_bufs[salt_pos].salt_buf[15]);
const u32 salt_len = salt_bufs[salt_pos].salt_len;
apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
+ w0[0] = swap32 (w0[0]);
+ w0[1] = swap32 (w0[1]);
+ w0[2] = swap32 (w0[2]);
+ w0[3] = swap32 (w0[3]);
+ w1[0] = swap32 (w1[0]);
+ w1[1] = swap32 (w1[1]);
+ w1[2] = swap32 (w1[2]);
+ w1[3] = swap32 (w1[3]);
+
/**
* pads
*/
- u32x w0_t[4];
-
- w0_t[0] = swap32 (w0[0]);
- w0_t[1] = swap32 (w0[1]);
- w0_t[2] = swap32 (w0[2]);
- w0_t[3] = swap32 (w0[3]);
-
- u32x w1_t[4];
-
- w1_t[0] = swap32 (w1[0]);
- w1_t[1] = swap32 (w1[1]);
- w1_t[2] = swap32 (w1[2]);
- w1_t[3] = swap32 (w1[3]);
-
- u32x w2_t[4];
-
- w2_t[0] = 0;
- w2_t[1] = 0;
- w2_t[2] = 0;
- w2_t[3] = 0;
-
- u32x w3_t[4];
-
- w3_t[0] = 0;
- w3_t[1] = 0;
- w3_t[2] = 0;
- w3_t[3] = 0;
-
u32x ipad[8];
u32x opad[8];
- hmac_sha256_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad);
-
- w0_t[0] = swap32 (salt_buf0[0]);
- w0_t[1] = swap32 (salt_buf0[1]);
- w0_t[2] = swap32 (salt_buf0[2]);
- w0_t[3] = swap32 (salt_buf0[3]);
- w1_t[0] = swap32 (salt_buf1[0]);
- w1_t[1] = swap32 (salt_buf1[1]);
- w1_t[2] = swap32 (salt_buf1[2]);
- w1_t[3] = swap32 (salt_buf1[3]);
- w2_t[0] = 0;
- w2_t[1] = 0;
- w2_t[2] = 0;
- w2_t[3] = 0;
- w3_t[0] = 0;
- w3_t[1] = 0;
- w3_t[2] = 0;
- w3_t[3] = (64 + salt_len) * 8;
+ hmac_sha256_pad (w0, w1, w2, w3, ipad, opad);
+
+ w0[0] = salt_buf0[0];
+ w0[1] = salt_buf0[1];
+ w0[2] = salt_buf0[2];
+ w0[3] = salt_buf0[3];
+ w1[0] = salt_buf1[0];
+ w1[1] = salt_buf1[1];
+ w1[2] = salt_buf1[2];
+ w1[3] = salt_buf1[3];
+ w2[0] = salt_buf2[0];
+ w2[1] = salt_buf2[1];
+ w2[2] = salt_buf2[2];
+ w2[3] = salt_buf2[3];
+ w3[0] = salt_buf3[0];
+ w3[1] = salt_buf3[1];
+ w3[2] = 0;
+ w3[3] = (64 + salt_len) * 8;
u32x digest[8];
- hmac_sha256_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest);
+ hmac_sha256_run (w0, w1, w2, w3, ipad, opad, digest);
COMPARE_M_SIMD (digest[3], digest[7], digest[2], digest[6]);
}
if (gid >= gid_max) return;
u32 pw_buf0[4];
-
- pw_buf0[0] = pws[gid].i[ 0];
- pw_buf0[1] = pws[gid].i[ 1];
- pw_buf0[2] = pws[gid].i[ 2];
- pw_buf0[3] = pws[gid].i[ 3];
-
u32 pw_buf1[4];
- pw_buf1[0] = pws[gid].i[ 4];
- pw_buf1[1] = pws[gid].i[ 5];
- pw_buf1[2] = pws[gid].i[ 6];
- pw_buf1[3] = pws[gid].i[ 7];
+ pw_buf0[0] = pws[gid].i[0];
+ pw_buf0[1] = pws[gid].i[1];
+ pw_buf0[2] = pws[gid].i[2];
+ pw_buf0[3] = pws[gid].i[3];
+ pw_buf1[0] = pws[gid].i[4];
+ pw_buf1[1] = pws[gid].i[5];
+ pw_buf1[2] = pws[gid].i[6];
+ pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len;
*/
u32 salt_buf0[4];
-
- salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
- salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
- salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
- salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
-
u32 salt_buf1[4];
-
- salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
- salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
- salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
- salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
+ u32 salt_buf2[4];
+ u32 salt_buf3[4];
+
+ salt_buf0[0] = swap32_S (salt_bufs[salt_pos].salt_buf[ 0]);
+ salt_buf0[1] = swap32_S (salt_bufs[salt_pos].salt_buf[ 1]);
+ salt_buf0[2] = swap32_S (salt_bufs[salt_pos].salt_buf[ 2]);
+ salt_buf0[3] = swap32_S (salt_bufs[salt_pos].salt_buf[ 3]);
+ salt_buf1[0] = swap32_S (salt_bufs[salt_pos].salt_buf[ 4]);
+ salt_buf1[1] = swap32_S (salt_bufs[salt_pos].salt_buf[ 5]);
+ salt_buf1[2] = swap32_S (salt_bufs[salt_pos].salt_buf[ 6]);
+ salt_buf1[3] = swap32_S (salt_bufs[salt_pos].salt_buf[ 7]);
+ salt_buf2[0] = swap32_S (salt_bufs[salt_pos].salt_buf[ 8]);
+ salt_buf2[1] = swap32_S (salt_bufs[salt_pos].salt_buf[ 9]);
+ salt_buf2[2] = swap32_S (salt_bufs[salt_pos].salt_buf[10]);
+ salt_buf2[3] = swap32_S (salt_bufs[salt_pos].salt_buf[11]);
+ salt_buf3[0] = swap32_S (salt_bufs[salt_pos].salt_buf[12]);
+ salt_buf3[1] = swap32_S (salt_bufs[salt_pos].salt_buf[13]);
+ salt_buf3[2] = swap32_S (salt_bufs[salt_pos].salt_buf[14]);
+ salt_buf3[3] = swap32_S (salt_bufs[salt_pos].salt_buf[15]);
const u32 salt_len = salt_bufs[salt_pos].salt_len;
apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
+ w0[0] = swap32 (w0[0]);
+ w0[1] = swap32 (w0[1]);
+ w0[2] = swap32 (w0[2]);
+ w0[3] = swap32 (w0[3]);
+ w1[0] = swap32 (w1[0]);
+ w1[1] = swap32 (w1[1]);
+ w1[2] = swap32 (w1[2]);
+ w1[3] = swap32 (w1[3]);
+
/**
* pads
*/
- u32x w0_t[4];
-
- w0_t[0] = swap32 (w0[0]);
- w0_t[1] = swap32 (w0[1]);
- w0_t[2] = swap32 (w0[2]);
- w0_t[3] = swap32 (w0[3]);
-
- u32x w1_t[4];
-
- w1_t[0] = swap32 (w1[0]);
- w1_t[1] = swap32 (w1[1]);
- w1_t[2] = swap32 (w1[2]);
- w1_t[3] = swap32 (w1[3]);
-
- u32x w2_t[4];
-
- w2_t[0] = 0;
- w2_t[1] = 0;
- w2_t[2] = 0;
- w2_t[3] = 0;
-
- u32x w3_t[4];
-
- w3_t[0] = 0;
- w3_t[1] = 0;
- w3_t[2] = 0;
- w3_t[3] = 0;
-
u32x ipad[8];
u32x opad[8];
- hmac_sha256_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad);
-
- w0_t[0] = swap32 (salt_buf0[0]);
- w0_t[1] = swap32 (salt_buf0[1]);
- w0_t[2] = swap32 (salt_buf0[2]);
- w0_t[3] = swap32 (salt_buf0[3]);
- w1_t[0] = swap32 (salt_buf1[0]);
- w1_t[1] = swap32 (salt_buf1[1]);
- w1_t[2] = swap32 (salt_buf1[2]);
- w1_t[3] = swap32 (salt_buf1[3]);
- w2_t[0] = 0;
- w2_t[1] = 0;
- w2_t[2] = 0;
- w2_t[3] = 0;
- w3_t[0] = 0;
- w3_t[1] = 0;
- w3_t[2] = 0;
- w3_t[3] = (64 + salt_len) * 8;
+ hmac_sha256_pad (w0, w1, w2, w3, ipad, opad);
+
+ w0[0] = salt_buf0[0];
+ w0[1] = salt_buf0[1];
+ w0[2] = salt_buf0[2];
+ w0[3] = salt_buf0[3];
+ w1[0] = salt_buf1[0];
+ w1[1] = salt_buf1[1];
+ w1[2] = salt_buf1[2];
+ w1[3] = salt_buf1[3];
+ w2[0] = salt_buf2[0];
+ w2[1] = salt_buf2[1];
+ w2[2] = salt_buf2[2];
+ w2[3] = salt_buf2[3];
+ w3[0] = salt_buf3[0];
+ w3[1] = salt_buf3[1];
+ w3[2] = 0;
+ w3[3] = (64 + salt_len) * 8;
u32x digest[8];
- hmac_sha256_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest);
+ hmac_sha256_run (w0, w1, w2, w3, ipad, opad, digest);
COMPARE_S_SIMD (digest[3], digest[7], digest[2], digest[6]);
}