/**
- * Author......: Jens Steube <jens.steube@gmail.com>
+ * Authors.....: Jens Steube <jens.steube@gmail.com>
+ * magnum <john.magnum@hushmail.com>
+ *
* License.....: MIT
*/
-u32 apply_rule (const u32 name, const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len);
-u32 apply_rules (const __global u32 *cmds, u32 buf0[4], u32 buf1[4], const u32 len);
-u32 apply_rules_vect (const u32 pw_buf0[4], const u32 pw_buf1[4], const u32 pw_len, const __global kernel_rule_t *rules_buf, const u32 il_pos, u32x w0[4], u32x w1[4]);
+inline u32 apply_rule (const u32 name, const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len);
+inline u32 apply_rules (const __global u32 *cmds, u32 buf0[4], u32 buf1[4], const u32 len);
+inline u32x apply_rules_vect (const u32 pw_buf0[4], const u32 pw_buf1[4], const u32 pw_len, const __global kernel_rule_t *rules_buf, const u32 il_pos, u32x w0[4], u32x w1[4]);
-static u32 generate_cmask (u32 buf)
+inline u32 generate_cmask (u32 buf)
{
const u32 rmask = ((buf & 0x40404040) >> 1)
& ~((buf & 0x80808080) >> 2);
return rmask & ~hmask & lmask;
}
-static void truncate_right (u32 w0[4], u32 w1[4], const u32 len)
+inline void truncate_right (u32 w0[4], u32 w1[4], const u32 len)
{
const u32 tmp = (1 << ((len % 4) * 8)) - 1;
}
}
-static void truncate_left (u32 w0[4], u32 w1[4], const u32 len)
+inline void truncate_left (u32 w0[4], u32 w1[4], const u32 len)
{
const u32 tmp = ~((1 << ((len % 4) * 8)) - 1);
}
}
-static void lshift_block (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4])
+inline void lshift_block (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4])
{
#ifdef IS_NV
- out0[0] = __byte_perm (in0[0], in0[1], 0x4321);
- out0[1] = __byte_perm (in0[1], in0[2], 0x4321);
- out0[2] = __byte_perm (in0[2], in0[3], 0x4321);
- out0[3] = __byte_perm (in0[3], in1[0], 0x4321);
- out1[0] = __byte_perm (in1[0], in1[1], 0x4321);
- out1[1] = __byte_perm (in1[1], in1[2], 0x4321);
- out1[2] = __byte_perm (in1[2], in1[3], 0x4321);
- out1[3] = __byte_perm (in1[3], 0, 0x4321);
+ out0[0] = __byte_perm_S (in0[0], in0[1], 0x4321);
+ out0[1] = __byte_perm_S (in0[1], in0[2], 0x4321);
+ out0[2] = __byte_perm_S (in0[2], in0[3], 0x4321);
+ out0[3] = __byte_perm_S (in0[3], in1[0], 0x4321);
+ out1[0] = __byte_perm_S (in1[0], in1[1], 0x4321);
+ out1[1] = __byte_perm_S (in1[1], in1[2], 0x4321);
+ out1[2] = __byte_perm_S (in1[2], in1[3], 0x4321);
+ out1[3] = __byte_perm_S (in1[3], 0, 0x4321);
#endif
#if defined IS_AMD || defined IS_GENERIC
- out0[0] = amd_bytealign (in0[1], in0[0], 1);
- out0[1] = amd_bytealign (in0[2], in0[1], 1);
- out0[2] = amd_bytealign (in0[3], in0[2], 1);
- out0[3] = amd_bytealign (in1[0], in0[3], 1);
- out1[0] = amd_bytealign (in1[1], in1[0], 1);
- out1[1] = amd_bytealign (in1[2], in1[1], 1);
- out1[2] = amd_bytealign (in1[3], in1[2], 1);
- out1[3] = amd_bytealign ( 0, in1[3], 1);
+ out0[0] = amd_bytealign_S (in0[1], in0[0], 1);
+ out0[1] = amd_bytealign_S (in0[2], in0[1], 1);
+ out0[2] = amd_bytealign_S (in0[3], in0[2], 1);
+ out0[3] = amd_bytealign_S (in1[0], in0[3], 1);
+ out1[0] = amd_bytealign_S (in1[1], in1[0], 1);
+ out1[1] = amd_bytealign_S (in1[2], in1[1], 1);
+ out1[2] = amd_bytealign_S (in1[3], in1[2], 1);
+ out1[3] = amd_bytealign_S ( 0, in1[3], 1);
#endif
}
-static void rshift_block (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4])
+inline void rshift_block (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4])
{
#ifdef IS_NV
- out1[3] = __byte_perm (in1[2], in1[3], 0x6543);
- out1[2] = __byte_perm (in1[1], in1[2], 0x6543);
- out1[1] = __byte_perm (in1[0], in1[1], 0x6543);
- out1[0] = __byte_perm (in0[3], in1[0], 0x6543);
- out0[3] = __byte_perm (in0[2], in0[3], 0x6543);
- out0[2] = __byte_perm (in0[1], in0[2], 0x6543);
- out0[1] = __byte_perm (in0[0], in0[1], 0x6543);
- out0[0] = __byte_perm ( 0, in0[0], 0x6543);
+ out1[3] = __byte_perm_S (in1[2], in1[3], 0x6543);
+ out1[2] = __byte_perm_S (in1[1], in1[2], 0x6543);
+ out1[1] = __byte_perm_S (in1[0], in1[1], 0x6543);
+ out1[0] = __byte_perm_S (in0[3], in1[0], 0x6543);
+ out0[3] = __byte_perm_S (in0[2], in0[3], 0x6543);
+ out0[2] = __byte_perm_S (in0[1], in0[2], 0x6543);
+ out0[1] = __byte_perm_S (in0[0], in0[1], 0x6543);
+ out0[0] = __byte_perm_S ( 0, in0[0], 0x6543);
#endif
#if defined IS_AMD || defined IS_GENERIC
- out1[3] = amd_bytealign (in1[3], in1[2], 3);
- out1[2] = amd_bytealign (in1[2], in1[1], 3);
- out1[1] = amd_bytealign (in1[1], in1[0], 3);
- out1[0] = amd_bytealign (in1[0], in0[3], 3);
- out0[3] = amd_bytealign (in0[3], in0[2], 3);
- out0[2] = amd_bytealign (in0[2], in0[1], 3);
- out0[1] = amd_bytealign (in0[1], in0[0], 3);
- out0[0] = amd_bytealign (in0[0], 0, 3);
+ out1[3] = amd_bytealign_S (in1[3], in1[2], 3);
+ out1[2] = amd_bytealign_S (in1[2], in1[1], 3);
+ out1[1] = amd_bytealign_S (in1[1], in1[0], 3);
+ out1[0] = amd_bytealign_S (in1[0], in0[3], 3);
+ out0[3] = amd_bytealign_S (in0[3], in0[2], 3);
+ out0[2] = amd_bytealign_S (in0[2], in0[1], 3);
+ out0[1] = amd_bytealign_S (in0[1], in0[0], 3);
+ out0[0] = amd_bytealign_S (in0[0], 0, 3);
#endif
}
-static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4], const u32 num)
+inline void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4], const u32 num)
{
#ifdef IS_NV
switch (num)
out1[2] = in1[2];
out1[3] = in1[3];
break;
- case 1: out0[0] = __byte_perm (in0[0], in0[1], 0x4321);
- out0[1] = __byte_perm (in0[1], in0[2], 0x4321);
- out0[2] = __byte_perm (in0[2], in0[3], 0x4321);
- out0[3] = __byte_perm (in0[3], in1[0], 0x4321);
- out1[0] = __byte_perm (in1[0], in1[1], 0x4321);
- out1[1] = __byte_perm (in1[1], in1[2], 0x4321);
- out1[2] = __byte_perm (in1[2], in1[3], 0x4321);
- out1[3] = __byte_perm (in1[3], 0, 0x4321);
- break;
- case 2: out0[0] = __byte_perm (in0[0], in0[1], 0x5432);
- out0[1] = __byte_perm (in0[1], in0[2], 0x5432);
- out0[2] = __byte_perm (in0[2], in0[3], 0x5432);
- out0[3] = __byte_perm (in0[3], in1[0], 0x5432);
- out1[0] = __byte_perm (in1[0], in1[1], 0x5432);
- out1[1] = __byte_perm (in1[1], in1[2], 0x5432);
- out1[2] = __byte_perm (in1[2], in1[3], 0x5432);
- out1[3] = __byte_perm (in1[3], 0, 0x5432);
- break;
- case 3: out0[0] = __byte_perm (in0[0], in0[1], 0x6543);
- out0[1] = __byte_perm (in0[1], in0[2], 0x6543);
- out0[2] = __byte_perm (in0[2], in0[3], 0x6543);
- out0[3] = __byte_perm (in0[3], in1[0], 0x6543);
- out1[0] = __byte_perm (in1[0], in1[1], 0x6543);
- out1[1] = __byte_perm (in1[1], in1[2], 0x6543);
- out1[2] = __byte_perm (in1[2], in1[3], 0x6543);
- out1[3] = __byte_perm (in1[3], 0, 0x6543);
+ case 1: out0[0] = __byte_perm_S (in0[0], in0[1], 0x4321);
+ out0[1] = __byte_perm_S (in0[1], in0[2], 0x4321);
+ out0[2] = __byte_perm_S (in0[2], in0[3], 0x4321);
+ out0[3] = __byte_perm_S (in0[3], in1[0], 0x4321);
+ out1[0] = __byte_perm_S (in1[0], in1[1], 0x4321);
+ out1[1] = __byte_perm_S (in1[1], in1[2], 0x4321);
+ out1[2] = __byte_perm_S (in1[2], in1[3], 0x4321);
+ out1[3] = __byte_perm_S (in1[3], 0, 0x4321);
+ break;
+ case 2: out0[0] = __byte_perm_S (in0[0], in0[1], 0x5432);
+ out0[1] = __byte_perm_S (in0[1], in0[2], 0x5432);
+ out0[2] = __byte_perm_S (in0[2], in0[3], 0x5432);
+ out0[3] = __byte_perm_S (in0[3], in1[0], 0x5432);
+ out1[0] = __byte_perm_S (in1[0], in1[1], 0x5432);
+ out1[1] = __byte_perm_S (in1[1], in1[2], 0x5432);
+ out1[2] = __byte_perm_S (in1[2], in1[3], 0x5432);
+ out1[3] = __byte_perm_S (in1[3], 0, 0x5432);
+ break;
+ case 3: out0[0] = __byte_perm_S (in0[0], in0[1], 0x6543);
+ out0[1] = __byte_perm_S (in0[1], in0[2], 0x6543);
+ out0[2] = __byte_perm_S (in0[2], in0[3], 0x6543);
+ out0[3] = __byte_perm_S (in0[3], in1[0], 0x6543);
+ out1[0] = __byte_perm_S (in1[0], in1[1], 0x6543);
+ out1[1] = __byte_perm_S (in1[1], in1[2], 0x6543);
+ out1[2] = __byte_perm_S (in1[2], in1[3], 0x6543);
+ out1[3] = __byte_perm_S (in1[3], 0, 0x6543);
break;
case 4: out0[0] = in0[1];
out0[1] = in0[2];
out1[2] = in1[3];
out1[3] = 0;
break;
- case 5: out0[0] = __byte_perm (in0[1], in0[2], 0x4321);
- out0[1] = __byte_perm (in0[2], in0[3], 0x4321);
- out0[2] = __byte_perm (in0[3], in1[0], 0x4321);
- out0[3] = __byte_perm (in1[0], in1[1], 0x4321);
- out1[0] = __byte_perm (in1[1], in1[2], 0x4321);
- out1[1] = __byte_perm (in1[2], in1[3], 0x4321);
- out1[2] = __byte_perm (in1[3], 0, 0x4321);
+ case 5: out0[0] = __byte_perm_S (in0[1], in0[2], 0x4321);
+ out0[1] = __byte_perm_S (in0[2], in0[3], 0x4321);
+ out0[2] = __byte_perm_S (in0[3], in1[0], 0x4321);
+ out0[3] = __byte_perm_S (in1[0], in1[1], 0x4321);
+ out1[0] = __byte_perm_S (in1[1], in1[2], 0x4321);
+ out1[1] = __byte_perm_S (in1[2], in1[3], 0x4321);
+ out1[2] = __byte_perm_S (in1[3], 0, 0x4321);
out1[3] = 0;
break;
- case 6: out0[0] = __byte_perm (in0[1], in0[2], 0x5432);
- out0[1] = __byte_perm (in0[2], in0[3], 0x5432);
- out0[2] = __byte_perm (in0[3], in1[0], 0x5432);
- out0[3] = __byte_perm (in1[0], in1[1], 0x5432);
- out1[0] = __byte_perm (in1[1], in1[2], 0x5432);
- out1[1] = __byte_perm (in1[2], in1[3], 0x5432);
- out1[2] = __byte_perm (in1[3], 0, 0x5432);
+ case 6: out0[0] = __byte_perm_S (in0[1], in0[2], 0x5432);
+ out0[1] = __byte_perm_S (in0[2], in0[3], 0x5432);
+ out0[2] = __byte_perm_S (in0[3], in1[0], 0x5432);
+ out0[3] = __byte_perm_S (in1[0], in1[1], 0x5432);
+ out1[0] = __byte_perm_S (in1[1], in1[2], 0x5432);
+ out1[1] = __byte_perm_S (in1[2], in1[3], 0x5432);
+ out1[2] = __byte_perm_S (in1[3], 0, 0x5432);
out1[3] = 0;
break;
- case 7: out0[0] = __byte_perm (in0[1], in0[2], 0x6543);
- out0[1] = __byte_perm (in0[2], in0[3], 0x6543);
- out0[2] = __byte_perm (in0[3], in1[0], 0x6543);
- out0[3] = __byte_perm (in1[0], in1[1], 0x6543);
- out1[0] = __byte_perm (in1[1], in1[2], 0x6543);
- out1[1] = __byte_perm (in1[2], in1[3], 0x6543);
- out1[2] = __byte_perm (in1[3], 0, 0x6543);
+ case 7: out0[0] = __byte_perm_S (in0[1], in0[2], 0x6543);
+ out0[1] = __byte_perm_S (in0[2], in0[3], 0x6543);
+ out0[2] = __byte_perm_S (in0[3], in1[0], 0x6543);
+ out0[3] = __byte_perm_S (in1[0], in1[1], 0x6543);
+ out1[0] = __byte_perm_S (in1[1], in1[2], 0x6543);
+ out1[1] = __byte_perm_S (in1[2], in1[3], 0x6543);
+ out1[2] = __byte_perm_S (in1[3], 0, 0x6543);
out1[3] = 0;
break;
case 8: out0[0] = in0[2];
out1[2] = 0;
out1[3] = 0;
break;
- case 9: out0[0] = __byte_perm (in0[2], in0[3], 0x4321);
- out0[1] = __byte_perm (in0[3], in1[0], 0x4321);
- out0[2] = __byte_perm (in1[0], in1[1], 0x4321);
- out0[3] = __byte_perm (in1[1], in1[2], 0x4321);
- out1[0] = __byte_perm (in1[2], in1[3], 0x4321);
- out1[1] = __byte_perm (in1[3], 0, 0x4321);
+ case 9: out0[0] = __byte_perm_S (in0[2], in0[3], 0x4321);
+ out0[1] = __byte_perm_S (in0[3], in1[0], 0x4321);
+ out0[2] = __byte_perm_S (in1[0], in1[1], 0x4321);
+ out0[3] = __byte_perm_S (in1[1], in1[2], 0x4321);
+ out1[0] = __byte_perm_S (in1[2], in1[3], 0x4321);
+ out1[1] = __byte_perm_S (in1[3], 0, 0x4321);
out1[2] = 0;
out1[3] = 0;
break;
- case 10: out0[0] = __byte_perm (in0[2], in0[3], 0x5432);
- out0[1] = __byte_perm (in0[3], in1[0], 0x5432);
- out0[2] = __byte_perm (in1[0], in1[1], 0x5432);
- out0[3] = __byte_perm (in1[1], in1[2], 0x5432);
- out1[0] = __byte_perm (in1[2], in1[3], 0x5432);
- out1[1] = __byte_perm (in1[3], 0, 0x5432);
+ case 10: out0[0] = __byte_perm_S (in0[2], in0[3], 0x5432);
+ out0[1] = __byte_perm_S (in0[3], in1[0], 0x5432);
+ out0[2] = __byte_perm_S (in1[0], in1[1], 0x5432);
+ out0[3] = __byte_perm_S (in1[1], in1[2], 0x5432);
+ out1[0] = __byte_perm_S (in1[2], in1[3], 0x5432);
+ out1[1] = __byte_perm_S (in1[3], 0, 0x5432);
out1[2] = 0;
out1[3] = 0;
break;
- case 11: out0[0] = __byte_perm (in0[2], in0[3], 0x6543);
- out0[1] = __byte_perm (in0[3], in1[0], 0x6543);
- out0[2] = __byte_perm (in1[0], in1[1], 0x6543);
- out0[3] = __byte_perm (in1[1], in1[2], 0x6543);
- out1[0] = __byte_perm (in1[2], in1[3], 0x6543);
- out1[1] = __byte_perm (in1[3], 0, 0x6543);
+ case 11: out0[0] = __byte_perm_S (in0[2], in0[3], 0x6543);
+ out0[1] = __byte_perm_S (in0[3], in1[0], 0x6543);
+ out0[2] = __byte_perm_S (in1[0], in1[1], 0x6543);
+ out0[3] = __byte_perm_S (in1[1], in1[2], 0x6543);
+ out1[0] = __byte_perm_S (in1[2], in1[3], 0x6543);
+ out1[1] = __byte_perm_S (in1[3], 0, 0x6543);
out1[2] = 0;
out1[3] = 0;
break;
out1[3] = 0;
break;
case 13:
- out0[0] = __byte_perm (in0[3], in1[0], 0x4321);
- out0[1] = __byte_perm (in1[0], in1[1], 0x4321);
- out0[2] = __byte_perm (in1[1], in1[2], 0x4321);
- out0[3] = __byte_perm (in1[2], in1[3], 0x4321);
- out1[0] = __byte_perm (in1[3], 0, 0x4321);
+ out0[0] = __byte_perm_S (in0[3], in1[0], 0x4321);
+ out0[1] = __byte_perm_S (in1[0], in1[1], 0x4321);
+ out0[2] = __byte_perm_S (in1[1], in1[2], 0x4321);
+ out0[3] = __byte_perm_S (in1[2], in1[3], 0x4321);
+ out1[0] = __byte_perm_S (in1[3], 0, 0x4321);
out1[1] = 0;
out1[2] = 0;
out1[3] = 0;
break;
- case 14: out0[0] = __byte_perm (in0[3], in1[0], 0x5432);
- out0[1] = __byte_perm (in1[0], in1[1], 0x5432);
- out0[2] = __byte_perm (in1[1], in1[2], 0x5432);
- out0[3] = __byte_perm (in1[2], in1[3], 0x5432);
- out1[0] = __byte_perm (in1[3], 0, 0x5432);
+ case 14: out0[0] = __byte_perm_S (in0[3], in1[0], 0x5432);
+ out0[1] = __byte_perm_S (in1[0], in1[1], 0x5432);
+ out0[2] = __byte_perm_S (in1[1], in1[2], 0x5432);
+ out0[3] = __byte_perm_S (in1[2], in1[3], 0x5432);
+ out1[0] = __byte_perm_S (in1[3], 0, 0x5432);
out1[1] = 0;
out1[2] = 0;
out1[3] = 0;
break;
- case 15: out0[0] = __byte_perm (in0[3], in1[0], 0x6543);
- out0[1] = __byte_perm (in1[0], in1[1], 0x6543);
- out0[2] = __byte_perm (in1[1], in1[2], 0x6543);
- out0[3] = __byte_perm (in1[2], in1[3], 0x6543);
- out1[0] = __byte_perm (in1[3], 0, 0x6543);
+ case 15: out0[0] = __byte_perm_S (in0[3], in1[0], 0x6543);
+ out0[1] = __byte_perm_S (in1[0], in1[1], 0x6543);
+ out0[2] = __byte_perm_S (in1[1], in1[2], 0x6543);
+ out0[3] = __byte_perm_S (in1[2], in1[3], 0x6543);
+ out1[0] = __byte_perm_S (in1[3], 0, 0x6543);
out1[1] = 0;
out1[2] = 0;
out1[3] = 0;
out1[2] = 0;
out1[3] = 0;
break;
- case 17: out0[0] = __byte_perm (in1[0], in1[1], 0x4321);
- out0[1] = __byte_perm (in1[1], in1[2], 0x4321);
- out0[2] = __byte_perm (in1[2], in1[3], 0x4321);
- out0[3] = __byte_perm (in1[3], 0, 0x4321);
+ case 17: out0[0] = __byte_perm_S (in1[0], in1[1], 0x4321);
+ out0[1] = __byte_perm_S (in1[1], in1[2], 0x4321);
+ out0[2] = __byte_perm_S (in1[2], in1[3], 0x4321);
+ out0[3] = __byte_perm_S (in1[3], 0, 0x4321);
out1[0] = 0;
out1[1] = 0;
out1[2] = 0;
out1[3] = 0;
break;
- case 18: out0[0] = __byte_perm (in1[0], in1[1], 0x5432);
- out0[1] = __byte_perm (in1[1], in1[2], 0x5432);
- out0[2] = __byte_perm (in1[2], in1[3], 0x5432);
- out0[3] = __byte_perm (in1[3], 0, 0x5432);
+ case 18: out0[0] = __byte_perm_S (in1[0], in1[1], 0x5432);
+ out0[1] = __byte_perm_S (in1[1], in1[2], 0x5432);
+ out0[2] = __byte_perm_S (in1[2], in1[3], 0x5432);
+ out0[3] = __byte_perm_S (in1[3], 0, 0x5432);
out1[0] = 0;
out1[1] = 0;
out1[2] = 0;
out1[3] = 0;
break;
- case 19: out0[0] = __byte_perm (in1[0], in1[1], 0x6543);
- out0[1] = __byte_perm (in1[1], in1[2], 0x6543);
- out0[2] = __byte_perm (in1[2], in1[3], 0x6543);
- out0[3] = __byte_perm (in1[3], 0, 0x6543);
+ case 19: out0[0] = __byte_perm_S (in1[0], in1[1], 0x6543);
+ out0[1] = __byte_perm_S (in1[1], in1[2], 0x6543);
+ out0[2] = __byte_perm_S (in1[2], in1[3], 0x6543);
+ out0[3] = __byte_perm_S (in1[3], 0, 0x6543);
out1[0] = 0;
out1[1] = 0;
out1[2] = 0;
out1[2] = 0;
out1[3] = 0;
break;
- case 21: out0[0] = __byte_perm (in1[1], in1[2], 0x4321);
- out0[1] = __byte_perm (in1[2], in1[3], 0x4321);
- out0[2] = __byte_perm (in1[3], 0, 0x4321);
+ case 21: out0[0] = __byte_perm_S (in1[1], in1[2], 0x4321);
+ out0[1] = __byte_perm_S (in1[2], in1[3], 0x4321);
+ out0[2] = __byte_perm_S (in1[3], 0, 0x4321);
out0[3] = 0;
out1[0] = 0;
out1[1] = 0;
out1[2] = 0;
out1[3] = 0;
break;
- case 22: out0[0] = __byte_perm (in1[1], in1[2], 0x5432);
- out0[1] = __byte_perm (in1[2], in1[3], 0x5432);
- out0[2] = __byte_perm (in1[3], 0, 0x5432);
+ case 22: out0[0] = __byte_perm_S (in1[1], in1[2], 0x5432);
+ out0[1] = __byte_perm_S (in1[2], in1[3], 0x5432);
+ out0[2] = __byte_perm_S (in1[3], 0, 0x5432);
out0[3] = 0;
out1[0] = 0;
out1[1] = 0;
out1[2] = 0;
out1[3] = 0;
break;
- case 23: out0[0] = __byte_perm (in1[1], in1[2], 0x6543);
- out0[1] = __byte_perm (in1[2], in1[3], 0x6543);
- out0[2] = __byte_perm (in1[3], 0, 0x6543);
+ case 23: out0[0] = __byte_perm_S (in1[1], in1[2], 0x6543);
+ out0[1] = __byte_perm_S (in1[2], in1[3], 0x6543);
+ out0[2] = __byte_perm_S (in1[3], 0, 0x6543);
out0[3] = 0;
out1[0] = 0;
out1[1] = 0;
out1[2] = 0;
out1[3] = 0;
break;
- case 25: out0[0] = __byte_perm (in1[2], in1[3], 0x4321);
- out0[1] = __byte_perm (in1[3], 0, 0x4321);
+ case 25: out0[0] = __byte_perm_S (in1[2], in1[3], 0x4321);
+ out0[1] = __byte_perm_S (in1[3], 0, 0x4321);
out0[2] = 0;
out0[3] = 0;
out1[0] = 0;
out1[2] = 0;
out1[3] = 0;
break;
- case 26: out0[0] = __byte_perm (in1[2], in1[3], 0x5432);
- out0[1] = __byte_perm (in1[3], 0, 0x5432);
+ case 26: out0[0] = __byte_perm_S (in1[2], in1[3], 0x5432);
+ out0[1] = __byte_perm_S (in1[3], 0, 0x5432);
out0[2] = 0;
out0[3] = 0;
out1[0] = 0;
out1[2] = 0;
out1[3] = 0;
break;
- case 27: out0[0] = __byte_perm (in1[2], in1[3], 0x6543);
- out0[1] = __byte_perm (in1[3], 0, 0x6543);
+ case 27: out0[0] = __byte_perm_S (in1[2], in1[3], 0x6543);
+ out0[1] = __byte_perm_S (in1[3], 0, 0x6543);
out0[2] = 0;
out0[3] = 0;
out1[0] = 0;
out1[2] = 0;
out1[3] = 0;
break;
- case 29: out0[0] = __byte_perm (in1[3], 0, 0x4321);
+ case 29: out0[0] = __byte_perm_S (in1[3], 0, 0x4321);
out0[1] = 0;
out0[2] = 0;
out0[3] = 0;
out1[2] = 0;
out1[3] = 0;
break;
- case 30: out0[0] = __byte_perm (in1[3], 0, 0x5432);
+ case 30: out0[0] = __byte_perm_S (in1[3], 0, 0x5432);
out0[1] = 0;
out0[2] = 0;
out0[3] = 0;
out1[2] = 0;
out1[3] = 0;
break;
- case 31: out0[0] = __byte_perm (in1[3], 0, 0x6543);
+ case 31: out0[0] = __byte_perm_S (in1[3], 0, 0x6543);
out0[1] = 0;
out0[2] = 0;
out0[3] = 0;
out1[2] = in1[2];
out1[3] = in1[3];
break;
- case 1: out0[0] = amd_bytealign (in0[1], in0[0], 1);
- out0[1] = amd_bytealign (in0[2], in0[1], 1);
- out0[2] = amd_bytealign (in0[3], in0[2], 1);
- out0[3] = amd_bytealign (in1[0], in0[3], 1);
- out1[0] = amd_bytealign (in1[1], in1[0], 1);
- out1[1] = amd_bytealign (in1[2], in1[1], 1);
- out1[2] = amd_bytealign (in1[3], in1[2], 1);
- out1[3] = amd_bytealign ( 0, in1[3], 1);
- break;
- case 2: out0[0] = amd_bytealign (in0[1], in0[0], 2);
- out0[1] = amd_bytealign (in0[2], in0[1], 2);
- out0[2] = amd_bytealign (in0[3], in0[2], 2);
- out0[3] = amd_bytealign (in1[0], in0[3], 2);
- out1[0] = amd_bytealign (in1[1], in1[0], 2);
- out1[1] = amd_bytealign (in1[2], in1[1], 2);
- out1[2] = amd_bytealign (in1[3], in1[2], 2);
- out1[3] = amd_bytealign ( 0, in1[3], 2);
- break;
- case 3: out0[0] = amd_bytealign (in0[1], in0[0], 3);
- out0[1] = amd_bytealign (in0[2], in0[1], 3);
- out0[2] = amd_bytealign (in0[3], in0[2], 3);
- out0[3] = amd_bytealign (in1[0], in0[3], 3);
- out1[0] = amd_bytealign (in1[1], in1[0], 3);
- out1[1] = amd_bytealign (in1[2], in1[1], 3);
- out1[2] = amd_bytealign (in1[3], in1[2], 3);
- out1[3] = amd_bytealign ( 0, in1[3], 3);
+ case 1: out0[0] = amd_bytealign_S (in0[1], in0[0], 1);
+ out0[1] = amd_bytealign_S (in0[2], in0[1], 1);
+ out0[2] = amd_bytealign_S (in0[3], in0[2], 1);
+ out0[3] = amd_bytealign_S (in1[0], in0[3], 1);
+ out1[0] = amd_bytealign_S (in1[1], in1[0], 1);
+ out1[1] = amd_bytealign_S (in1[2], in1[1], 1);
+ out1[2] = amd_bytealign_S (in1[3], in1[2], 1);
+ out1[3] = amd_bytealign_S ( 0, in1[3], 1);
+ break;
+ case 2: out0[0] = amd_bytealign_S (in0[1], in0[0], 2);
+ out0[1] = amd_bytealign_S (in0[2], in0[1], 2);
+ out0[2] = amd_bytealign_S (in0[3], in0[2], 2);
+ out0[3] = amd_bytealign_S (in1[0], in0[3], 2);
+ out1[0] = amd_bytealign_S (in1[1], in1[0], 2);
+ out1[1] = amd_bytealign_S (in1[2], in1[1], 2);
+ out1[2] = amd_bytealign_S (in1[3], in1[2], 2);
+ out1[3] = amd_bytealign_S ( 0, in1[3], 2);
+ break;
+ case 3: out0[0] = amd_bytealign_S (in0[1], in0[0], 3);
+ out0[1] = amd_bytealign_S (in0[2], in0[1], 3);
+ out0[2] = amd_bytealign_S (in0[3], in0[2], 3);
+ out0[3] = amd_bytealign_S (in1[0], in0[3], 3);
+ out1[0] = amd_bytealign_S (in1[1], in1[0], 3);
+ out1[1] = amd_bytealign_S (in1[2], in1[1], 3);
+ out1[2] = amd_bytealign_S (in1[3], in1[2], 3);
+ out1[3] = amd_bytealign_S ( 0, in1[3], 3);
break;
case 4: out0[0] = in0[1];
out0[1] = in0[2];
out1[2] = in1[3];
out1[3] = 0;
break;
- case 5: out0[0] = amd_bytealign (in0[2], in0[1], 1);
- out0[1] = amd_bytealign (in0[3], in0[2], 1);
- out0[2] = amd_bytealign (in1[0], in0[3], 1);
- out0[3] = amd_bytealign (in1[1], in1[0], 1);
- out1[0] = amd_bytealign (in1[2], in1[1], 1);
- out1[1] = amd_bytealign (in1[3], in1[2], 1);
- out1[2] = amd_bytealign ( 0, in1[3], 1);
+ case 5: out0[0] = amd_bytealign_S (in0[2], in0[1], 1);
+ out0[1] = amd_bytealign_S (in0[3], in0[2], 1);
+ out0[2] = amd_bytealign_S (in1[0], in0[3], 1);
+ out0[3] = amd_bytealign_S (in1[1], in1[0], 1);
+ out1[0] = amd_bytealign_S (in1[2], in1[1], 1);
+ out1[1] = amd_bytealign_S (in1[3], in1[2], 1);
+ out1[2] = amd_bytealign_S ( 0, in1[3], 1);
out1[3] = 0;
break;
- case 6: out0[0] = amd_bytealign (in0[2], in0[1], 2);
- out0[1] = amd_bytealign (in0[3], in0[2], 2);
- out0[2] = amd_bytealign (in1[0], in0[3], 2);
- out0[3] = amd_bytealign (in1[1], in1[0], 2);
- out1[0] = amd_bytealign (in1[2], in1[1], 2);
- out1[1] = amd_bytealign (in1[3], in1[2], 2);
- out1[2] = amd_bytealign ( 0, in1[3], 2);
+ case 6: out0[0] = amd_bytealign_S (in0[2], in0[1], 2);
+ out0[1] = amd_bytealign_S (in0[3], in0[2], 2);
+ out0[2] = amd_bytealign_S (in1[0], in0[3], 2);
+ out0[3] = amd_bytealign_S (in1[1], in1[0], 2);
+ out1[0] = amd_bytealign_S (in1[2], in1[1], 2);
+ out1[1] = amd_bytealign_S (in1[3], in1[2], 2);
+ out1[2] = amd_bytealign_S ( 0, in1[3], 2);
out1[3] = 0;
break;
- case 7: out0[0] = amd_bytealign (in0[2], in0[1], 3);
- out0[1] = amd_bytealign (in0[3], in0[2], 3);
- out0[2] = amd_bytealign (in1[0], in0[3], 3);
- out0[3] = amd_bytealign (in1[1], in1[0], 3);
- out1[0] = amd_bytealign (in1[2], in1[1], 3);
- out1[1] = amd_bytealign (in1[3], in1[2], 3);
- out1[2] = amd_bytealign ( 0, in1[3], 3);
+ case 7: out0[0] = amd_bytealign_S (in0[2], in0[1], 3);
+ out0[1] = amd_bytealign_S (in0[3], in0[2], 3);
+ out0[2] = amd_bytealign_S (in1[0], in0[3], 3);
+ out0[3] = amd_bytealign_S (in1[1], in1[0], 3);
+ out1[0] = amd_bytealign_S (in1[2], in1[1], 3);
+ out1[1] = amd_bytealign_S (in1[3], in1[2], 3);
+ out1[2] = amd_bytealign_S ( 0, in1[3], 3);
out1[3] = 0;
break;
case 8: out0[0] = in0[2];
out1[2] = 0;
out1[3] = 0;
break;
- case 9: out0[0] = amd_bytealign (in0[3], in0[2], 1);
- out0[1] = amd_bytealign (in1[0], in0[3], 1);
- out0[2] = amd_bytealign (in1[1], in1[0], 1);
- out0[3] = amd_bytealign (in1[2], in1[1], 1);
- out1[0] = amd_bytealign (in1[3], in1[2], 1);
- out1[1] = amd_bytealign ( 0, in1[3], 1);
+ case 9: out0[0] = amd_bytealign_S (in0[3], in0[2], 1);
+ out0[1] = amd_bytealign_S (in1[0], in0[3], 1);
+ out0[2] = amd_bytealign_S (in1[1], in1[0], 1);
+ out0[3] = amd_bytealign_S (in1[2], in1[1], 1);
+ out1[0] = amd_bytealign_S (in1[3], in1[2], 1);
+ out1[1] = amd_bytealign_S ( 0, in1[3], 1);
out1[2] = 0;
out1[3] = 0;
break;
- case 10: out0[0] = amd_bytealign (in0[3], in0[2], 2);
- out0[1] = amd_bytealign (in1[0], in0[3], 2);
- out0[2] = amd_bytealign (in1[1], in1[0], 2);
- out0[3] = amd_bytealign (in1[2], in1[1], 2);
- out1[0] = amd_bytealign (in1[3], in1[2], 2);
- out1[1] = amd_bytealign ( 0, in1[3], 2);
+ case 10: out0[0] = amd_bytealign_S (in0[3], in0[2], 2);
+ out0[1] = amd_bytealign_S (in1[0], in0[3], 2);
+ out0[2] = amd_bytealign_S (in1[1], in1[0], 2);
+ out0[3] = amd_bytealign_S (in1[2], in1[1], 2);
+ out1[0] = amd_bytealign_S (in1[3], in1[2], 2);
+ out1[1] = amd_bytealign_S ( 0, in1[3], 2);
out1[2] = 0;
out1[3] = 0;
break;
- case 11: out0[0] = amd_bytealign (in0[3], in0[2], 3);
- out0[1] = amd_bytealign (in1[0], in0[3], 3);
- out0[2] = amd_bytealign (in1[1], in1[0], 3);
- out0[3] = amd_bytealign (in1[2], in1[1], 3);
- out1[0] = amd_bytealign (in1[3], in1[2], 3);
- out1[1] = amd_bytealign ( 0, in1[3], 3);
+ case 11: out0[0] = amd_bytealign_S (in0[3], in0[2], 3);
+ out0[1] = amd_bytealign_S (in1[0], in0[3], 3);
+ out0[2] = amd_bytealign_S (in1[1], in1[0], 3);
+ out0[3] = amd_bytealign_S (in1[2], in1[1], 3);
+ out1[0] = amd_bytealign_S (in1[3], in1[2], 3);
+ out1[1] = amd_bytealign_S ( 0, in1[3], 3);
out1[2] = 0;
out1[3] = 0;
break;
out1[2] = 0;
out1[3] = 0;
break;
- case 13: out0[0] = amd_bytealign (in1[0], in0[3], 1);
- out0[1] = amd_bytealign (in1[1], in1[0], 1);
- out0[2] = amd_bytealign (in1[2], in1[1], 1);
- out0[3] = amd_bytealign (in1[3], in1[2], 1);
- out1[0] = amd_bytealign ( 0, in1[3], 1);
+ case 13: out0[0] = amd_bytealign_S (in1[0], in0[3], 1);
+ out0[1] = amd_bytealign_S (in1[1], in1[0], 1);
+ out0[2] = amd_bytealign_S (in1[2], in1[1], 1);
+ out0[3] = amd_bytealign_S (in1[3], in1[2], 1);
+ out1[0] = amd_bytealign_S ( 0, in1[3], 1);
out1[1] = 0;
out1[2] = 0;
out1[3] = 0;
break;
- case 14: out0[0] = amd_bytealign (in1[0], in0[3], 2);
- out0[1] = amd_bytealign (in1[1], in1[0], 2);
- out0[2] = amd_bytealign (in1[2], in1[1], 2);
- out0[3] = amd_bytealign (in1[3], in1[2], 2);
- out1[0] = amd_bytealign ( 0, in1[3], 2);
+ case 14: out0[0] = amd_bytealign_S (in1[0], in0[3], 2);
+ out0[1] = amd_bytealign_S (in1[1], in1[0], 2);
+ out0[2] = amd_bytealign_S (in1[2], in1[1], 2);
+ out0[3] = amd_bytealign_S (in1[3], in1[2], 2);
+ out1[0] = amd_bytealign_S ( 0, in1[3], 2);
out1[1] = 0;
out1[2] = 0;
out1[3] = 0;
break;
- case 15: out0[0] = amd_bytealign (in1[0], in0[3], 3);
- out0[1] = amd_bytealign (in1[1], in1[0], 3);
- out0[2] = amd_bytealign (in1[2], in1[1], 3);
- out0[3] = amd_bytealign (in1[3], in1[2], 3);
- out1[0] = amd_bytealign ( 0, in1[3], 3);
+ case 15: out0[0] = amd_bytealign_S (in1[0], in0[3], 3);
+ out0[1] = amd_bytealign_S (in1[1], in1[0], 3);
+ out0[2] = amd_bytealign_S (in1[2], in1[1], 3);
+ out0[3] = amd_bytealign_S (in1[3], in1[2], 3);
+ out1[0] = amd_bytealign_S ( 0, in1[3], 3);
out1[1] = 0;
out1[2] = 0;
out1[3] = 0;
out1[2] = 0;
out1[3] = 0;
break;
- case 17: out0[0] = amd_bytealign (in1[1], in1[0], 1);
- out0[1] = amd_bytealign (in1[2], in1[1], 1);
- out0[2] = amd_bytealign (in1[3], in1[2], 1);
- out0[3] = amd_bytealign ( 0, in1[3], 1);
+ case 17: out0[0] = amd_bytealign_S (in1[1], in1[0], 1);
+ out0[1] = amd_bytealign_S (in1[2], in1[1], 1);
+ out0[2] = amd_bytealign_S (in1[3], in1[2], 1);
+ out0[3] = amd_bytealign_S ( 0, in1[3], 1);
out1[0] = 0;
out1[1] = 0;
out1[2] = 0;
out1[3] = 0;
break;
- case 18: out0[0] = amd_bytealign (in1[1], in1[0], 2);
- out0[1] = amd_bytealign (in1[2], in1[1], 2);
- out0[2] = amd_bytealign (in1[3], in1[2], 2);
- out0[3] = amd_bytealign ( 0, in1[3], 2);
+ case 18: out0[0] = amd_bytealign_S (in1[1], in1[0], 2);
+ out0[1] = amd_bytealign_S (in1[2], in1[1], 2);
+ out0[2] = amd_bytealign_S (in1[3], in1[2], 2);
+ out0[3] = amd_bytealign_S ( 0, in1[3], 2);
out1[0] = 0;
out1[1] = 0;
out1[2] = 0;
out1[3] = 0;
break;
- case 19: out0[0] = amd_bytealign (in1[1], in1[0], 3);
- out0[1] = amd_bytealign (in1[2], in1[1], 3);
- out0[2] = amd_bytealign (in1[3], in1[2], 3);
- out0[3] = amd_bytealign ( 0, in1[3], 3);
+ case 19: out0[0] = amd_bytealign_S (in1[1], in1[0], 3);
+ out0[1] = amd_bytealign_S (in1[2], in1[1], 3);
+ out0[2] = amd_bytealign_S (in1[3], in1[2], 3);
+ out0[3] = amd_bytealign_S ( 0, in1[3], 3);
out1[0] = 0;
out1[1] = 0;
out1[2] = 0;
out1[2] = 0;
out1[3] = 0;
break;
- case 21: out0[0] = amd_bytealign (in1[2], in1[1], 1);
- out0[1] = amd_bytealign (in1[3], in1[2], 1);
- out0[2] = amd_bytealign ( 0, in1[3], 1);
+ case 21: out0[0] = amd_bytealign_S (in1[2], in1[1], 1);
+ out0[1] = amd_bytealign_S (in1[3], in1[2], 1);
+ out0[2] = amd_bytealign_S ( 0, in1[3], 1);
out0[3] = 0;
out1[0] = 0;
out1[1] = 0;
out1[2] = 0;
out1[3] = 0;
break;
- case 22: out0[0] = amd_bytealign (in1[2], in1[1], 2);
- out0[1] = amd_bytealign (in1[3], in1[2], 2);
- out0[2] = amd_bytealign ( 0, in1[3], 2);
+ case 22: out0[0] = amd_bytealign_S (in1[2], in1[1], 2);
+ out0[1] = amd_bytealign_S (in1[3], in1[2], 2);
+ out0[2] = amd_bytealign_S ( 0, in1[3], 2);
out0[3] = 0;
out1[0] = 0;
out1[1] = 0;
out1[2] = 0;
out1[3] = 0;
break;
- case 23: out0[0] = amd_bytealign (in1[2], in1[1], 3);
- out0[1] = amd_bytealign (in1[3], in1[2], 3);
- out0[2] = amd_bytealign ( 0, in1[3], 3);
+ case 23: out0[0] = amd_bytealign_S (in1[2], in1[1], 3);
+ out0[1] = amd_bytealign_S (in1[3], in1[2], 3);
+ out0[2] = amd_bytealign_S ( 0, in1[3], 3);
out0[3] = 0;
out1[0] = 0;
out1[1] = 0;
out1[2] = 0;
out1[3] = 0;
break;
- case 25: out0[0] = amd_bytealign (in1[3], in1[2], 1);
- out0[1] = amd_bytealign ( 0, in1[3], 1);
+ case 25: out0[0] = amd_bytealign_S (in1[3], in1[2], 1);
+ out0[1] = amd_bytealign_S ( 0, in1[3], 1);
out0[2] = 0;
out0[3] = 0;
out1[0] = 0;
out1[2] = 0;
out1[3] = 0;
break;
- case 26: out0[0] = amd_bytealign (in1[3], in1[2], 2);
- out0[1] = amd_bytealign ( 0, in1[3], 2);
+ case 26: out0[0] = amd_bytealign_S (in1[3], in1[2], 2);
+ out0[1] = amd_bytealign_S ( 0, in1[3], 2);
out0[2] = 0;
out0[3] = 0;
out1[0] = 0;
out1[2] = 0;
out1[3] = 0;
break;
- case 27: out0[0] = amd_bytealign (in1[3], in1[2], 3);
- out0[1] = amd_bytealign ( 0, in1[3], 3);
+ case 27: out0[0] = amd_bytealign_S (in1[3], in1[2], 3);
+ out0[1] = amd_bytealign_S ( 0, in1[3], 3);
out0[2] = 0;
out0[3] = 0;
out1[0] = 0;
out1[2] = 0;
out1[3] = 0;
break;
- case 29: out0[0] = amd_bytealign ( 0, in1[3], 1);
+ case 29: out0[0] = amd_bytealign_S ( 0, in1[3], 1);
out0[1] = 0;
out0[2] = 0;
out0[3] = 0;
out1[2] = 0;
out1[3] = 0;
break;
- case 30: out0[0] = amd_bytealign ( 0, in1[3], 2);
+ case 30: out0[0] = amd_bytealign_S ( 0, in1[3], 2);
out0[1] = 0;
out0[2] = 0;
out0[3] = 0;
out1[2] = 0;
out1[3] = 0;
break;
- case 31: out0[0] = amd_bytealign ( 0, in1[3], 3);
+ case 31: out0[0] = amd_bytealign_S ( 0, in1[3], 3);
out0[1] = 0;
out0[2] = 0;
out0[3] = 0;
#endif
}
-static void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4], const u32 num)
+inline void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4], const u32 num)
{
#ifdef IS_NV
switch (num)
out0[1] = in0[1];
out0[0] = in0[0];
break;
- case 1: out1[3] = __byte_perm (in1[2], in1[3], 0x6543);
- out1[2] = __byte_perm (in1[1], in1[2], 0x6543);
- out1[1] = __byte_perm (in1[0], in1[1], 0x6543);
- out1[0] = __byte_perm (in0[3], in1[0], 0x6543);
- out0[3] = __byte_perm (in0[2], in0[3], 0x6543);
- out0[2] = __byte_perm (in0[1], in0[2], 0x6543);
- out0[1] = __byte_perm (in0[0], in0[1], 0x6543);
- out0[0] = __byte_perm ( 0, in0[0], 0x6543);
- break;
- case 2: out1[3] = __byte_perm (in1[2], in1[3], 0x5432);
- out1[2] = __byte_perm (in1[1], in1[2], 0x5432);
- out1[1] = __byte_perm (in1[0], in1[1], 0x5432);
- out1[0] = __byte_perm (in0[3], in1[0], 0x5432);
- out0[3] = __byte_perm (in0[2], in0[3], 0x5432);
- out0[2] = __byte_perm (in0[1], in0[2], 0x5432);
- out0[1] = __byte_perm (in0[0], in0[1], 0x5432);
- out0[0] = __byte_perm ( 0, in0[0], 0x5432);
- break;
- case 3: out1[3] = __byte_perm (in1[2], in1[3], 0x4321);
- out1[2] = __byte_perm (in1[1], in1[2], 0x4321);
- out1[1] = __byte_perm (in1[0], in1[1], 0x4321);
- out1[0] = __byte_perm (in0[3], in1[0], 0x4321);
- out0[3] = __byte_perm (in0[2], in0[3], 0x4321);
- out0[2] = __byte_perm (in0[1], in0[2], 0x4321);
- out0[1] = __byte_perm (in0[0], in0[1], 0x4321);
- out0[0] = __byte_perm ( 0, in0[0], 0x4321);
+ case 1: out1[3] = __byte_perm_S (in1[2], in1[3], 0x6543);
+ out1[2] = __byte_perm_S (in1[1], in1[2], 0x6543);
+ out1[1] = __byte_perm_S (in1[0], in1[1], 0x6543);
+ out1[0] = __byte_perm_S (in0[3], in1[0], 0x6543);
+ out0[3] = __byte_perm_S (in0[2], in0[3], 0x6543);
+ out0[2] = __byte_perm_S (in0[1], in0[2], 0x6543);
+ out0[1] = __byte_perm_S (in0[0], in0[1], 0x6543);
+ out0[0] = __byte_perm_S ( 0, in0[0], 0x6543);
+ break;
+ case 2: out1[3] = __byte_perm_S (in1[2], in1[3], 0x5432);
+ out1[2] = __byte_perm_S (in1[1], in1[2], 0x5432);
+ out1[1] = __byte_perm_S (in1[0], in1[1], 0x5432);
+ out1[0] = __byte_perm_S (in0[3], in1[0], 0x5432);
+ out0[3] = __byte_perm_S (in0[2], in0[3], 0x5432);
+ out0[2] = __byte_perm_S (in0[1], in0[2], 0x5432);
+ out0[1] = __byte_perm_S (in0[0], in0[1], 0x5432);
+ out0[0] = __byte_perm_S ( 0, in0[0], 0x5432);
+ break;
+ case 3: out1[3] = __byte_perm_S (in1[2], in1[3], 0x4321);
+ out1[2] = __byte_perm_S (in1[1], in1[2], 0x4321);
+ out1[1] = __byte_perm_S (in1[0], in1[1], 0x4321);
+ out1[0] = __byte_perm_S (in0[3], in1[0], 0x4321);
+ out0[3] = __byte_perm_S (in0[2], in0[3], 0x4321);
+ out0[2] = __byte_perm_S (in0[1], in0[2], 0x4321);
+ out0[1] = __byte_perm_S (in0[0], in0[1], 0x4321);
+ out0[0] = __byte_perm_S ( 0, in0[0], 0x4321);
break;
case 4: out1[3] = in1[2];
out1[2] = in1[1];
out0[1] = in0[0];
out0[0] = 0;
break;
- case 5: out1[3] = __byte_perm (in1[1], in1[2], 0x6543);
- out1[2] = __byte_perm (in1[0], in1[1], 0x6543);
- out1[1] = __byte_perm (in0[3], in1[0], 0x6543);
- out1[0] = __byte_perm (in0[2], in0[3], 0x6543);
- out0[3] = __byte_perm (in0[1], in0[2], 0x6543);
- out0[2] = __byte_perm (in0[0], in0[1], 0x6543);
- out0[1] = __byte_perm ( 0, in0[0], 0x6543);
+ case 5: out1[3] = __byte_perm_S (in1[1], in1[2], 0x6543);
+ out1[2] = __byte_perm_S (in1[0], in1[1], 0x6543);
+ out1[1] = __byte_perm_S (in0[3], in1[0], 0x6543);
+ out1[0] = __byte_perm_S (in0[2], in0[3], 0x6543);
+ out0[3] = __byte_perm_S (in0[1], in0[2], 0x6543);
+ out0[2] = __byte_perm_S (in0[0], in0[1], 0x6543);
+ out0[1] = __byte_perm_S ( 0, in0[0], 0x6543);
out0[0] = 0;
break;
- case 6: out1[3] = __byte_perm (in1[1], in1[2], 0x5432);
- out1[2] = __byte_perm (in1[0], in1[1], 0x5432);
- out1[1] = __byte_perm (in0[3], in1[0], 0x5432);
- out1[0] = __byte_perm (in0[2], in0[3], 0x5432);
- out0[3] = __byte_perm (in0[1], in0[2], 0x5432);
- out0[2] = __byte_perm (in0[0], in0[1], 0x5432);
- out0[1] = __byte_perm ( 0, in0[0], 0x5432);
+ case 6: out1[3] = __byte_perm_S (in1[1], in1[2], 0x5432);
+ out1[2] = __byte_perm_S (in1[0], in1[1], 0x5432);
+ out1[1] = __byte_perm_S (in0[3], in1[0], 0x5432);
+ out1[0] = __byte_perm_S (in0[2], in0[3], 0x5432);
+ out0[3] = __byte_perm_S (in0[1], in0[2], 0x5432);
+ out0[2] = __byte_perm_S (in0[0], in0[1], 0x5432);
+ out0[1] = __byte_perm_S ( 0, in0[0], 0x5432);
out0[0] = 0;
break;
- case 7: out1[3] = __byte_perm (in1[1], in1[2], 0x4321);
- out1[2] = __byte_perm (in1[0], in1[1], 0x4321);
- out1[1] = __byte_perm (in0[3], in1[0], 0x4321);
- out1[0] = __byte_perm (in0[2], in0[3], 0x4321);
- out0[3] = __byte_perm (in0[1], in0[2], 0x4321);
- out0[2] = __byte_perm (in0[0], in0[1], 0x4321);
- out0[1] = __byte_perm ( 0, in0[0], 0x4321);
+ case 7: out1[3] = __byte_perm_S (in1[1], in1[2], 0x4321);
+ out1[2] = __byte_perm_S (in1[0], in1[1], 0x4321);
+ out1[1] = __byte_perm_S (in0[3], in1[0], 0x4321);
+ out1[0] = __byte_perm_S (in0[2], in0[3], 0x4321);
+ out0[3] = __byte_perm_S (in0[1], in0[2], 0x4321);
+ out0[2] = __byte_perm_S (in0[0], in0[1], 0x4321);
+ out0[1] = __byte_perm_S ( 0, in0[0], 0x4321);
out0[0] = 0;
break;
case 8: out1[3] = in1[1];
out0[1] = 0;
out0[0] = 0;
break;
- case 9: out1[3] = __byte_perm (in1[0], in1[1], 0x6543);
- out1[2] = __byte_perm (in0[3], in1[0], 0x6543);
- out1[1] = __byte_perm (in0[2], in0[3], 0x6543);
- out1[0] = __byte_perm (in0[1], in0[2], 0x6543);
- out0[3] = __byte_perm (in0[0], in0[1], 0x6543);
- out0[2] = __byte_perm ( 0, in0[0], 0x6543);
+ case 9: out1[3] = __byte_perm_S (in1[0], in1[1], 0x6543);
+ out1[2] = __byte_perm_S (in0[3], in1[0], 0x6543);
+ out1[1] = __byte_perm_S (in0[2], in0[3], 0x6543);
+ out1[0] = __byte_perm_S (in0[1], in0[2], 0x6543);
+ out0[3] = __byte_perm_S (in0[0], in0[1], 0x6543);
+ out0[2] = __byte_perm_S ( 0, in0[0], 0x6543);
out0[1] = 0;
out0[0] = 0;
break;
- case 10: out1[3] = __byte_perm (in1[0], in1[1], 0x5432);
- out1[2] = __byte_perm (in0[3], in1[0], 0x5432);
- out1[1] = __byte_perm (in0[2], in0[3], 0x5432);
- out1[0] = __byte_perm (in0[1], in0[2], 0x5432);
- out0[3] = __byte_perm (in0[0], in0[1], 0x5432);
- out0[2] = __byte_perm ( 0, in0[0], 0x5432);
+ case 10: out1[3] = __byte_perm_S (in1[0], in1[1], 0x5432);
+ out1[2] = __byte_perm_S (in0[3], in1[0], 0x5432);
+ out1[1] = __byte_perm_S (in0[2], in0[3], 0x5432);
+ out1[0] = __byte_perm_S (in0[1], in0[2], 0x5432);
+ out0[3] = __byte_perm_S (in0[0], in0[1], 0x5432);
+ out0[2] = __byte_perm_S ( 0, in0[0], 0x5432);
out0[1] = 0;
out0[0] = 0;
break;
- case 11: out1[3] = __byte_perm (in1[0], in1[1], 0x4321);
- out1[2] = __byte_perm (in0[3], in1[0], 0x4321);
- out1[1] = __byte_perm (in0[2], in0[3], 0x4321);
- out1[0] = __byte_perm (in0[1], in0[2], 0x4321);
- out0[3] = __byte_perm (in0[0], in0[1], 0x4321);
- out0[2] = __byte_perm ( 0, in0[0], 0x4321);
+ case 11: out1[3] = __byte_perm_S (in1[0], in1[1], 0x4321);
+ out1[2] = __byte_perm_S (in0[3], in1[0], 0x4321);
+ out1[1] = __byte_perm_S (in0[2], in0[3], 0x4321);
+ out1[0] = __byte_perm_S (in0[1], in0[2], 0x4321);
+ out0[3] = __byte_perm_S (in0[0], in0[1], 0x4321);
+ out0[2] = __byte_perm_S ( 0, in0[0], 0x4321);
out0[1] = 0;
out0[0] = 0;
break;
out0[1] = 0;
out0[0] = 0;
break;
- case 13: out1[3] = __byte_perm (in0[3], in1[0], 0x6543);
- out1[2] = __byte_perm (in0[2], in0[3], 0x6543);
- out1[1] = __byte_perm (in0[1], in0[2], 0x6543);
- out1[0] = __byte_perm (in0[0], in0[1], 0x6543);
- out0[3] = __byte_perm ( 0, in0[0], 0x6543);
+ case 13: out1[3] = __byte_perm_S (in0[3], in1[0], 0x6543);
+ out1[2] = __byte_perm_S (in0[2], in0[3], 0x6543);
+ out1[1] = __byte_perm_S (in0[1], in0[2], 0x6543);
+ out1[0] = __byte_perm_S (in0[0], in0[1], 0x6543);
+ out0[3] = __byte_perm_S ( 0, in0[0], 0x6543);
out0[2] = 0;
out0[1] = 0;
out0[0] = 0;
break;
- case 14: out1[3] = __byte_perm (in0[3], in1[0], 0x5432);
- out1[2] = __byte_perm (in0[2], in0[3], 0x5432);
- out1[1] = __byte_perm (in0[1], in0[2], 0x5432);
- out1[0] = __byte_perm (in0[0], in0[1], 0x5432);
- out0[3] = __byte_perm ( 0, in0[0], 0x5432);
+ case 14: out1[3] = __byte_perm_S (in0[3], in1[0], 0x5432);
+ out1[2] = __byte_perm_S (in0[2], in0[3], 0x5432);
+ out1[1] = __byte_perm_S (in0[1], in0[2], 0x5432);
+ out1[0] = __byte_perm_S (in0[0], in0[1], 0x5432);
+ out0[3] = __byte_perm_S ( 0, in0[0], 0x5432);
out0[2] = 0;
out0[1] = 0;
out0[0] = 0;
break;
- case 15: out1[3] = __byte_perm (in0[3], in1[0], 0x4321);
- out1[2] = __byte_perm (in0[2], in0[3], 0x4321);
- out1[1] = __byte_perm (in0[1], in0[2], 0x4321);
- out1[0] = __byte_perm (in0[0], in0[1], 0x4321);
- out0[3] = __byte_perm ( 0, in0[0], 0x4321);
+ case 15: out1[3] = __byte_perm_S (in0[3], in1[0], 0x4321);
+ out1[2] = __byte_perm_S (in0[2], in0[3], 0x4321);
+ out1[1] = __byte_perm_S (in0[1], in0[2], 0x4321);
+ out1[0] = __byte_perm_S (in0[0], in0[1], 0x4321);
+ out0[3] = __byte_perm_S ( 0, in0[0], 0x4321);
out0[2] = 0;
out0[1] = 0;
out0[0] = 0;
out0[1] = 0;
out0[0] = 0;
break;
- case 17: out1[3] = __byte_perm (in0[2], in0[3], 0x6543);
- out1[2] = __byte_perm (in0[1], in0[2], 0x6543);
- out1[1] = __byte_perm (in0[0], in0[1], 0x6543);
- out1[0] = __byte_perm ( 0, in0[0], 0x6543);
+ case 17: out1[3] = __byte_perm_S (in0[2], in0[3], 0x6543);
+ out1[2] = __byte_perm_S (in0[1], in0[2], 0x6543);
+ out1[1] = __byte_perm_S (in0[0], in0[1], 0x6543);
+ out1[0] = __byte_perm_S ( 0, in0[0], 0x6543);
out0[3] = 0;
out0[2] = 0;
out0[1] = 0;
out0[0] = 0;
break;
- case 18: out1[3] = __byte_perm (in0[2], in0[3], 0x5432);
- out1[2] = __byte_perm (in0[1], in0[2], 0x5432);
- out1[1] = __byte_perm (in0[0], in0[1], 0x5432);
- out1[0] = __byte_perm ( 0, in0[0], 0x5432);
+ case 18: out1[3] = __byte_perm_S (in0[2], in0[3], 0x5432);
+ out1[2] = __byte_perm_S (in0[1], in0[2], 0x5432);
+ out1[1] = __byte_perm_S (in0[0], in0[1], 0x5432);
+ out1[0] = __byte_perm_S ( 0, in0[0], 0x5432);
out0[3] = 0;
out0[2] = 0;
out0[1] = 0;
out0[0] = 0;
break;
- case 19: out1[3] = __byte_perm (in0[2], in0[3], 0x4321);
- out1[2] = __byte_perm (in0[1], in0[2], 0x4321);
- out1[1] = __byte_perm (in0[0], in0[1], 0x4321);
- out1[0] = __byte_perm ( 0, in0[0], 0x4321);
+ case 19: out1[3] = __byte_perm_S (in0[2], in0[3], 0x4321);
+ out1[2] = __byte_perm_S (in0[1], in0[2], 0x4321);
+ out1[1] = __byte_perm_S (in0[0], in0[1], 0x4321);
+ out1[0] = __byte_perm_S ( 0, in0[0], 0x4321);
out0[3] = 0;
out0[2] = 0;
out0[1] = 0;
out0[1] = 0;
out0[0] = 0;
break;
- case 21: out1[3] = __byte_perm (in0[1], in0[2], 0x6543);
- out1[2] = __byte_perm (in0[0], in0[1], 0x6543);
- out1[1] = __byte_perm ( 0, in0[0], 0x6543);
+ case 21: out1[3] = __byte_perm_S (in0[1], in0[2], 0x6543);
+ out1[2] = __byte_perm_S (in0[0], in0[1], 0x6543);
+ out1[1] = __byte_perm_S ( 0, in0[0], 0x6543);
out1[0] = 0;
out0[3] = 0;
out0[2] = 0;
out0[1] = 0;
out0[0] = 0;
break;
- case 22: out1[3] = __byte_perm (in0[1], in0[2], 0x5432);
- out1[2] = __byte_perm (in0[0], in0[1], 0x5432);
- out1[1] = __byte_perm ( 0, in0[0], 0x5432);
+ case 22: out1[3] = __byte_perm_S (in0[1], in0[2], 0x5432);
+ out1[2] = __byte_perm_S (in0[0], in0[1], 0x5432);
+ out1[1] = __byte_perm_S ( 0, in0[0], 0x5432);
out1[0] = 0;
out0[3] = 0;
out0[2] = 0;
out0[1] = 0;
out0[0] = 0;
break;
- case 23: out1[3] = __byte_perm (in0[1], in0[2], 0x4321);
- out1[2] = __byte_perm (in0[0], in0[1], 0x4321);
- out1[1] = __byte_perm ( 0, in0[0], 0x4321);
+ case 23: out1[3] = __byte_perm_S (in0[1], in0[2], 0x4321);
+ out1[2] = __byte_perm_S (in0[0], in0[1], 0x4321);
+ out1[1] = __byte_perm_S ( 0, in0[0], 0x4321);
out1[0] = 0;
out0[3] = 0;
out0[2] = 0;
out0[1] = 0;
out0[0] = 0;
break;
- case 25: out1[3] = __byte_perm (in0[0], in0[1], 0x6543);
- out1[2] = __byte_perm ( 0, in0[0], 0x6543);
+ case 25: out1[3] = __byte_perm_S (in0[0], in0[1], 0x6543);
+ out1[2] = __byte_perm_S ( 0, in0[0], 0x6543);
out1[1] = 0;
out1[0] = 0;
out0[3] = 0;
out0[1] = 0;
out0[0] = 0;
break;
- case 26: out1[3] = __byte_perm (in0[0], in0[1], 0x5432);
- out1[2] = __byte_perm ( 0, in0[0], 0x5432);
+ case 26: out1[3] = __byte_perm_S (in0[0], in0[1], 0x5432);
+ out1[2] = __byte_perm_S ( 0, in0[0], 0x5432);
out1[1] = 0;
out1[0] = 0;
out0[3] = 0;
out0[1] = 0;
out0[0] = 0;
break;
- case 27: out1[3] = __byte_perm (in0[0], in0[1], 0x4321);
- out1[2] = __byte_perm ( 0, in0[0], 0x4321);
+ case 27: out1[3] = __byte_perm_S (in0[0], in0[1], 0x4321);
+ out1[2] = __byte_perm_S ( 0, in0[0], 0x4321);
out1[1] = 0;
out1[0] = 0;
out0[3] = 0;
out0[1] = 0;
out0[0] = 0;
break;
- case 29: out1[3] = __byte_perm ( 0, in0[0], 0x6543);
+ case 29: out1[3] = __byte_perm_S ( 0, in0[0], 0x6543);
out1[2] = 0;
out1[1] = 0;
out1[0] = 0;
out0[1] = 0;
out0[0] = 0;
break;
- case 30: out1[3] = __byte_perm ( 0, in0[0], 0x5432);
+ case 30: out1[3] = __byte_perm_S ( 0, in0[0], 0x5432);
out1[2] = 0;
out1[1] = 0;
out1[0] = 0;
out0[1] = 0;
out0[0] = 0;
break;
- case 31: out1[3] = __byte_perm ( 0, in0[0], 0x4321);
+ case 31: out1[3] = __byte_perm_S ( 0, in0[0], 0x4321);
out1[2] = 0;
out1[1] = 0;
out1[0] = 0;
out0[1] = in0[1];
out0[0] = in0[0];
break;
- case 1: out1[3] = amd_bytealign (in1[3], in1[2], 3);
- out1[2] = amd_bytealign (in1[2], in1[1], 3);
- out1[1] = amd_bytealign (in1[1], in1[0], 3);
- out1[0] = amd_bytealign (in1[0], in0[3], 3);
- out0[3] = amd_bytealign (in0[3], in0[2], 3);
- out0[2] = amd_bytealign (in0[2], in0[1], 3);
- out0[1] = amd_bytealign (in0[1], in0[0], 3);
- out0[0] = amd_bytealign (in0[0], 0, 3);
- break;
- case 2: out1[3] = amd_bytealign (in1[3], in1[2], 2);
- out1[2] = amd_bytealign (in1[2], in1[1], 2);
- out1[1] = amd_bytealign (in1[1], in1[0], 2);
- out1[0] = amd_bytealign (in1[0], in0[3], 2);
- out0[3] = amd_bytealign (in0[3], in0[2], 2);
- out0[2] = amd_bytealign (in0[2], in0[1], 2);
- out0[1] = amd_bytealign (in0[1], in0[0], 2);
- out0[0] = amd_bytealign (in0[0], 0, 2);
- break;
- case 3: out1[3] = amd_bytealign (in1[3], in1[2], 1);
- out1[2] = amd_bytealign (in1[2], in1[1], 1);
- out1[1] = amd_bytealign (in1[1], in1[0], 1);
- out1[0] = amd_bytealign (in1[0], in0[3], 1);
- out0[3] = amd_bytealign (in0[3], in0[2], 1);
- out0[2] = amd_bytealign (in0[2], in0[1], 1);
- out0[1] = amd_bytealign (in0[1], in0[0], 1);
- out0[0] = amd_bytealign (in0[0], 0, 1);
+ case 1: out1[3] = amd_bytealign_S (in1[3], in1[2], 3);
+ out1[2] = amd_bytealign_S (in1[2], in1[1], 3);
+ out1[1] = amd_bytealign_S (in1[1], in1[0], 3);
+ out1[0] = amd_bytealign_S (in1[0], in0[3], 3);
+ out0[3] = amd_bytealign_S (in0[3], in0[2], 3);
+ out0[2] = amd_bytealign_S (in0[2], in0[1], 3);
+ out0[1] = amd_bytealign_S (in0[1], in0[0], 3);
+ out0[0] = amd_bytealign_S (in0[0], 0, 3);
+ break;
+ case 2: out1[3] = amd_bytealign_S (in1[3], in1[2], 2);
+ out1[2] = amd_bytealign_S (in1[2], in1[1], 2);
+ out1[1] = amd_bytealign_S (in1[1], in1[0], 2);
+ out1[0] = amd_bytealign_S (in1[0], in0[3], 2);
+ out0[3] = amd_bytealign_S (in0[3], in0[2], 2);
+ out0[2] = amd_bytealign_S (in0[2], in0[1], 2);
+ out0[1] = amd_bytealign_S (in0[1], in0[0], 2);
+ out0[0] = amd_bytealign_S (in0[0], 0, 2);
+ break;
+ case 3: out1[3] = amd_bytealign_S (in1[3], in1[2], 1);
+ out1[2] = amd_bytealign_S (in1[2], in1[1], 1);
+ out1[1] = amd_bytealign_S (in1[1], in1[0], 1);
+ out1[0] = amd_bytealign_S (in1[0], in0[3], 1);
+ out0[3] = amd_bytealign_S (in0[3], in0[2], 1);
+ out0[2] = amd_bytealign_S (in0[2], in0[1], 1);
+ out0[1] = amd_bytealign_S (in0[1], in0[0], 1);
+ out0[0] = amd_bytealign_S (in0[0], 0, 1);
break;
case 4: out1[3] = in1[2];
out1[2] = in1[1];
out0[1] = in0[0];
out0[0] = 0;
break;
- case 5: out1[3] = amd_bytealign (in1[2], in1[1], 3);
- out1[2] = amd_bytealign (in1[1], in1[0], 3);
- out1[1] = amd_bytealign (in1[0], in0[3], 3);
- out1[0] = amd_bytealign (in0[3], in0[2], 3);
- out0[3] = amd_bytealign (in0[2], in0[1], 3);
- out0[2] = amd_bytealign (in0[1], in0[0], 3);
- out0[1] = amd_bytealign (in0[0], 0, 3);
+ case 5: out1[3] = amd_bytealign_S (in1[2], in1[1], 3);
+ out1[2] = amd_bytealign_S (in1[1], in1[0], 3);
+ out1[1] = amd_bytealign_S (in1[0], in0[3], 3);
+ out1[0] = amd_bytealign_S (in0[3], in0[2], 3);
+ out0[3] = amd_bytealign_S (in0[2], in0[1], 3);
+ out0[2] = amd_bytealign_S (in0[1], in0[0], 3);
+ out0[1] = amd_bytealign_S (in0[0], 0, 3);
out0[0] = 0;
break;
- case 6: out1[3] = amd_bytealign (in1[2], in1[1], 2);
- out1[2] = amd_bytealign (in1[1], in1[0], 2);
- out1[1] = amd_bytealign (in1[0], in0[3], 2);
- out1[0] = amd_bytealign (in0[3], in0[2], 2);
- out0[3] = amd_bytealign (in0[2], in0[1], 2);
- out0[2] = amd_bytealign (in0[1], in0[0], 2);
- out0[1] = amd_bytealign (in0[0], 0, 2);
+ case 6: out1[3] = amd_bytealign_S (in1[2], in1[1], 2);
+ out1[2] = amd_bytealign_S (in1[1], in1[0], 2);
+ out1[1] = amd_bytealign_S (in1[0], in0[3], 2);
+ out1[0] = amd_bytealign_S (in0[3], in0[2], 2);
+ out0[3] = amd_bytealign_S (in0[2], in0[1], 2);
+ out0[2] = amd_bytealign_S (in0[1], in0[0], 2);
+ out0[1] = amd_bytealign_S (in0[0], 0, 2);
out0[0] = 0;
break;
- case 7: out1[3] = amd_bytealign (in1[2], in1[1], 1);
- out1[2] = amd_bytealign (in1[1], in1[0], 1);
- out1[1] = amd_bytealign (in1[0], in0[3], 1);
- out1[0] = amd_bytealign (in0[3], in0[2], 1);
- out0[3] = amd_bytealign (in0[2], in0[1], 1);
- out0[2] = amd_bytealign (in0[1], in0[0], 1);
- out0[1] = amd_bytealign (in0[0], 0, 1);
+ case 7: out1[3] = amd_bytealign_S (in1[2], in1[1], 1);
+ out1[2] = amd_bytealign_S (in1[1], in1[0], 1);
+ out1[1] = amd_bytealign_S (in1[0], in0[3], 1);
+ out1[0] = amd_bytealign_S (in0[3], in0[2], 1);
+ out0[3] = amd_bytealign_S (in0[2], in0[1], 1);
+ out0[2] = amd_bytealign_S (in0[1], in0[0], 1);
+ out0[1] = amd_bytealign_S (in0[0], 0, 1);
out0[0] = 0;
break;
case 8: out1[3] = in1[1];
out0[1] = 0;
out0[0] = 0;
break;
- case 9: out1[3] = amd_bytealign (in1[1], in1[0], 3);
- out1[2] = amd_bytealign (in1[0], in0[3], 3);
- out1[1] = amd_bytealign (in0[3], in0[2], 3);
- out1[0] = amd_bytealign (in0[2], in0[1], 3);
- out0[3] = amd_bytealign (in0[1], in0[0], 3);
- out0[2] = amd_bytealign (in0[0], 0, 3);
+ case 9: out1[3] = amd_bytealign_S (in1[1], in1[0], 3);
+ out1[2] = amd_bytealign_S (in1[0], in0[3], 3);
+ out1[1] = amd_bytealign_S (in0[3], in0[2], 3);
+ out1[0] = amd_bytealign_S (in0[2], in0[1], 3);
+ out0[3] = amd_bytealign_S (in0[1], in0[0], 3);
+ out0[2] = amd_bytealign_S (in0[0], 0, 3);
out0[1] = 0;
out0[0] = 0;
break;
- case 10: out1[3] = amd_bytealign (in1[1], in1[0], 2);
- out1[2] = amd_bytealign (in1[0], in0[3], 2);
- out1[1] = amd_bytealign (in0[3], in0[2], 2);
- out1[0] = amd_bytealign (in0[2], in0[1], 2);
- out0[3] = amd_bytealign (in0[1], in0[0], 2);
- out0[2] = amd_bytealign (in0[0], 0, 2);
+ case 10: out1[3] = amd_bytealign_S (in1[1], in1[0], 2);
+ out1[2] = amd_bytealign_S (in1[0], in0[3], 2);
+ out1[1] = amd_bytealign_S (in0[3], in0[2], 2);
+ out1[0] = amd_bytealign_S (in0[2], in0[1], 2);
+ out0[3] = amd_bytealign_S (in0[1], in0[0], 2);
+ out0[2] = amd_bytealign_S (in0[0], 0, 2);
out0[1] = 0;
out0[0] = 0;
break;
- case 11: out1[3] = amd_bytealign (in1[1], in1[0], 1);
- out1[2] = amd_bytealign (in1[0], in0[3], 1);
- out1[1] = amd_bytealign (in0[3], in0[2], 1);
- out1[0] = amd_bytealign (in0[2], in0[1], 1);
- out0[3] = amd_bytealign (in0[1], in0[0], 1);
- out0[2] = amd_bytealign (in0[0], 0, 1);
+ case 11: out1[3] = amd_bytealign_S (in1[1], in1[0], 1);
+ out1[2] = amd_bytealign_S (in1[0], in0[3], 1);
+ out1[1] = amd_bytealign_S (in0[3], in0[2], 1);
+ out1[0] = amd_bytealign_S (in0[2], in0[1], 1);
+ out0[3] = amd_bytealign_S (in0[1], in0[0], 1);
+ out0[2] = amd_bytealign_S (in0[0], 0, 1);
out0[1] = 0;
out0[0] = 0;
break;
out0[1] = 0;
out0[0] = 0;
break;
- case 13: out1[3] = amd_bytealign (in1[0], in0[3], 3);
- out1[2] = amd_bytealign (in0[3], in0[2], 3);
- out1[1] = amd_bytealign (in0[2], in0[1], 3);
- out1[0] = amd_bytealign (in0[1], in0[0], 3);
- out0[3] = amd_bytealign (in0[0], 0, 3);
+ case 13: out1[3] = amd_bytealign_S (in1[0], in0[3], 3);
+ out1[2] = amd_bytealign_S (in0[3], in0[2], 3);
+ out1[1] = amd_bytealign_S (in0[2], in0[1], 3);
+ out1[0] = amd_bytealign_S (in0[1], in0[0], 3);
+ out0[3] = amd_bytealign_S (in0[0], 0, 3);
out0[2] = 0;
out0[1] = 0;
out0[0] = 0;
break;
- case 14: out1[3] = amd_bytealign (in1[0], in0[3], 2);
- out1[2] = amd_bytealign (in0[3], in0[2], 2);
- out1[1] = amd_bytealign (in0[2], in0[1], 2);
- out1[0] = amd_bytealign (in0[1], in0[0], 2);
- out0[3] = amd_bytealign (in0[0], 0, 2);
+ case 14: out1[3] = amd_bytealign_S (in1[0], in0[3], 2);
+ out1[2] = amd_bytealign_S (in0[3], in0[2], 2);
+ out1[1] = amd_bytealign_S (in0[2], in0[1], 2);
+ out1[0] = amd_bytealign_S (in0[1], in0[0], 2);
+ out0[3] = amd_bytealign_S (in0[0], 0, 2);
out0[2] = 0;
out0[1] = 0;
out0[0] = 0;
break;
- case 15: out1[3] = amd_bytealign (in1[0], in0[3], 1);
- out1[2] = amd_bytealign (in0[3], in0[2], 1);
- out1[1] = amd_bytealign (in0[2], in0[1], 1);
- out1[0] = amd_bytealign (in0[1], in0[0], 1);
- out0[3] = amd_bytealign (in0[0], 0, 1);
+ case 15: out1[3] = amd_bytealign_S (in1[0], in0[3], 1);
+ out1[2] = amd_bytealign_S (in0[3], in0[2], 1);
+ out1[1] = amd_bytealign_S (in0[2], in0[1], 1);
+ out1[0] = amd_bytealign_S (in0[1], in0[0], 1);
+ out0[3] = amd_bytealign_S (in0[0], 0, 1);
out0[2] = 0;
out0[1] = 0;
out0[0] = 0;
out0[1] = 0;
out0[0] = 0;
break;
- case 17: out1[3] = amd_bytealign (in0[3], in0[2], 3);
- out1[2] = amd_bytealign (in0[2], in0[1], 3);
- out1[1] = amd_bytealign (in0[1], in0[0], 3);
- out1[0] = amd_bytealign (in0[0], 0, 3);
+ case 17: out1[3] = amd_bytealign_S (in0[3], in0[2], 3);
+ out1[2] = amd_bytealign_S (in0[2], in0[1], 3);
+ out1[1] = amd_bytealign_S (in0[1], in0[0], 3);
+ out1[0] = amd_bytealign_S (in0[0], 0, 3);
out0[3] = 0;
out0[2] = 0;
out0[1] = 0;
out0[0] = 0;
break;
- case 18: out1[3] = amd_bytealign (in0[3], in0[2], 2);
- out1[2] = amd_bytealign (in0[2], in0[1], 2);
- out1[1] = amd_bytealign (in0[1], in0[0], 2);
- out1[0] = amd_bytealign (in0[0], 0, 2);
+ case 18: out1[3] = amd_bytealign_S (in0[3], in0[2], 2);
+ out1[2] = amd_bytealign_S (in0[2], in0[1], 2);
+ out1[1] = amd_bytealign_S (in0[1], in0[0], 2);
+ out1[0] = amd_bytealign_S (in0[0], 0, 2);
out0[3] = 0;
out0[2] = 0;
out0[1] = 0;
out0[0] = 0;
break;
- case 19: out1[3] = amd_bytealign (in0[3], in0[2], 1);
- out1[2] = amd_bytealign (in0[2], in0[1], 1);
- out1[1] = amd_bytealign (in0[1], in0[0], 1);
- out1[0] = amd_bytealign (in0[0], 0, 1);
+ case 19: out1[3] = amd_bytealign_S (in0[3], in0[2], 1);
+ out1[2] = amd_bytealign_S (in0[2], in0[1], 1);
+ out1[1] = amd_bytealign_S (in0[1], in0[0], 1);
+ out1[0] = amd_bytealign_S (in0[0], 0, 1);
out0[3] = 0;
out0[2] = 0;
out0[1] = 0;
out0[1] = 0;
out0[0] = 0;
break;
- case 21: out1[3] = amd_bytealign (in0[2], in0[1], 3);
- out1[2] = amd_bytealign (in0[1], in0[0], 3);
- out1[1] = amd_bytealign (in0[0], 0, 3);
+ case 21: out1[3] = amd_bytealign_S (in0[2], in0[1], 3);
+ out1[2] = amd_bytealign_S (in0[1], in0[0], 3);
+ out1[1] = amd_bytealign_S (in0[0], 0, 3);
out1[0] = 0;
out0[3] = 0;
out0[2] = 0;
out0[1] = 0;
out0[0] = 0;
break;
- case 22: out1[3] = amd_bytealign (in0[2], in0[1], 2);
- out1[2] = amd_bytealign (in0[1], in0[0], 2);
- out1[1] = amd_bytealign (in0[0], 0, 2);
+ case 22: out1[3] = amd_bytealign_S (in0[2], in0[1], 2);
+ out1[2] = amd_bytealign_S (in0[1], in0[0], 2);
+ out1[1] = amd_bytealign_S (in0[0], 0, 2);
out1[0] = 0;
out0[3] = 0;
out0[2] = 0;
out0[1] = 0;
out0[0] = 0;
break;
- case 23: out1[3] = amd_bytealign (in0[2], in0[1], 1);
- out1[2] = amd_bytealign (in0[1], in0[0], 1);
- out1[1] = amd_bytealign (in0[0], 0, 1);
+ case 23: out1[3] = amd_bytealign_S (in0[2], in0[1], 1);
+ out1[2] = amd_bytealign_S (in0[1], in0[0], 1);
+ out1[1] = amd_bytealign_S (in0[0], 0, 1);
out1[0] = 0;
out0[3] = 0;
out0[2] = 0;
out0[1] = 0;
out0[0] = 0;
break;
- case 25: out1[3] = amd_bytealign (in0[1], in0[0], 3);
- out1[2] = amd_bytealign (in0[0], 0, 3);
+ case 25: out1[3] = amd_bytealign_S (in0[1], in0[0], 3);
+ out1[2] = amd_bytealign_S (in0[0], 0, 3);
out1[1] = 0;
out1[0] = 0;
out0[3] = 0;
out0[1] = 0;
out0[0] = 0;
break;
- case 26: out1[3] = amd_bytealign (in0[1], in0[0], 2);
- out1[2] = amd_bytealign (in0[0], 0, 2);
+ case 26: out1[3] = amd_bytealign_S (in0[1], in0[0], 2);
+ out1[2] = amd_bytealign_S (in0[0], 0, 2);
out1[1] = 0;
out1[0] = 0;
out0[3] = 0;
out0[1] = 0;
out0[0] = 0;
break;
- case 27: out1[3] = amd_bytealign (in0[1], in0[0], 1);
- out1[2] = amd_bytealign (in0[0], 0, 1);
+ case 27: out1[3] = amd_bytealign_S (in0[1], in0[0], 1);
+ out1[2] = amd_bytealign_S (in0[0], 0, 1);
out1[1] = 0;
out1[0] = 0;
out0[3] = 0;
out0[1] = 0;
out0[0] = 0;
break;
- case 29: out1[3] = amd_bytealign (in0[0], 0, 3);
+ case 29: out1[3] = amd_bytealign_S (in0[0], 0, 3);
out1[2] = 0;
out1[1] = 0;
out1[0] = 0;
out0[1] = 0;
out0[0] = 0;
break;
- case 30: out1[3] = amd_bytealign (in0[0], 0, 2);
+ case 30: out1[3] = amd_bytealign_S (in0[0], 0, 2);
out1[2] = 0;
out1[1] = 0;
out1[0] = 0;
out0[1] = 0;
out0[0] = 0;
break;
- case 31: out1[3] = amd_bytealign (in0[0], 0, 1);
+ case 31: out1[3] = amd_bytealign_S (in0[0], 0, 1);
out1[2] = 0;
out1[1] = 0;
out1[0] = 0;
#endif
}
-static void append_block1 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32 src_r0)
+inline void append_block1 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32 src_r0)
{
u32 tmp[2];
}
}
-static void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32 src_l0[4], const u32 src_l1[4], const u32 src_r0[4], const u32 src_r1[4])
+inline void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32 src_l0[4], const u32 src_l1[4], const u32 src_r0[4], const u32 src_r1[4])
{
+/*
#ifdef IS_NV
switch (offset)
{
break;
case 1:
- dst0[0] = __byte_perm (src_l0[0], src_r0[0], 0x6540);
- dst0[1] = __byte_perm (src_r0[0], src_r0[1], 0x6543);
- dst0[2] = __byte_perm (src_r0[1], src_r0[2], 0x6543);
- dst0[3] = __byte_perm (src_r0[2], src_r0[3], 0x6543);
- dst1[0] = __byte_perm (src_r0[3], src_r1[0], 0x6543);
- dst1[1] = __byte_perm (src_r1[0], src_r1[1], 0x6543);
- dst1[2] = __byte_perm (src_r1[1], src_r1[2], 0x6543);
- dst1[3] = __byte_perm (src_r1[2], src_r1[3], 0x6543);
+ dst0[0] = __byte_perm_S (src_l0[0], src_r0[0], 0x6540);
+ dst0[1] = __byte_perm_S (src_r0[0], src_r0[1], 0x6543);
+ dst0[2] = __byte_perm_S (src_r0[1], src_r0[2], 0x6543);
+ dst0[3] = __byte_perm_S (src_r0[2], src_r0[3], 0x6543);
+ dst1[0] = __byte_perm_S (src_r0[3], src_r1[0], 0x6543);
+ dst1[1] = __byte_perm_S (src_r1[0], src_r1[1], 0x6543);
+ dst1[2] = __byte_perm_S (src_r1[1], src_r1[2], 0x6543);
+ dst1[3] = __byte_perm_S (src_r1[2], src_r1[3], 0x6543);
break;
case 2:
- dst0[0] = __byte_perm (src_l0[0], src_r0[0], 0x5410);
- dst0[1] = __byte_perm (src_r0[0], src_r0[1], 0x5432);
- dst0[2] = __byte_perm (src_r0[1], src_r0[2], 0x5432);
- dst0[3] = __byte_perm (src_r0[2], src_r0[3], 0x5432);
- dst1[0] = __byte_perm (src_r0[3], src_r1[0], 0x5432);
- dst1[1] = __byte_perm (src_r1[0], src_r1[1], 0x5432);
- dst1[2] = __byte_perm (src_r1[1], src_r1[2], 0x5432);
- dst1[3] = __byte_perm (src_r1[2], src_r1[3], 0x5432);
+ dst0[0] = __byte_perm_S (src_l0[0], src_r0[0], 0x5410);
+ dst0[1] = __byte_perm_S (src_r0[0], src_r0[1], 0x5432);
+ dst0[2] = __byte_perm_S (src_r0[1], src_r0[2], 0x5432);
+ dst0[3] = __byte_perm_S (src_r0[2], src_r0[3], 0x5432);
+ dst1[0] = __byte_perm_S (src_r0[3], src_r1[0], 0x5432);
+ dst1[1] = __byte_perm_S (src_r1[0], src_r1[1], 0x5432);
+ dst1[2] = __byte_perm_S (src_r1[1], src_r1[2], 0x5432);
+ dst1[3] = __byte_perm_S (src_r1[2], src_r1[3], 0x5432);
break;
case 3:
- dst0[0] = __byte_perm (src_l0[0], src_r0[0], 0x4210);
- dst0[1] = __byte_perm (src_r0[0], src_r0[1], 0x4321);
- dst0[2] = __byte_perm (src_r0[1], src_r0[2], 0x4321);
- dst0[3] = __byte_perm (src_r0[2], src_r0[3], 0x4321);
- dst1[0] = __byte_perm (src_r0[3], src_r1[0], 0x4321);
- dst1[1] = __byte_perm (src_r1[0], src_r1[1], 0x4321);
- dst1[2] = __byte_perm (src_r1[1], src_r1[2], 0x4321);
- dst1[3] = __byte_perm (src_r1[2], src_r1[3], 0x4321);
+ dst0[0] = __byte_perm_S (src_l0[0], src_r0[0], 0x4210);
+ dst0[1] = __byte_perm_S (src_r0[0], src_r0[1], 0x4321);
+ dst0[2] = __byte_perm_S (src_r0[1], src_r0[2], 0x4321);
+ dst0[3] = __byte_perm_S (src_r0[2], src_r0[3], 0x4321);
+ dst1[0] = __byte_perm_S (src_r0[3], src_r1[0], 0x4321);
+ dst1[1] = __byte_perm_S (src_r1[0], src_r1[1], 0x4321);
+ dst1[2] = __byte_perm_S (src_r1[1], src_r1[2], 0x4321);
+ dst1[3] = __byte_perm_S (src_r1[2], src_r1[3], 0x4321);
break;
case 4:
break;
case 5:
- dst0[1] = __byte_perm (src_l0[1], src_r0[0], 0x6540);
- dst0[2] = __byte_perm (src_r0[0], src_r0[1], 0x6543);
- dst0[3] = __byte_perm (src_r0[1], src_r0[2], 0x6543);
- dst1[0] = __byte_perm (src_r0[2], src_r0[3], 0x6543);
- dst1[1] = __byte_perm (src_r0[3], src_r1[0], 0x6543);
- dst1[2] = __byte_perm (src_r1[0], src_r1[1], 0x6543);
- dst1[3] = __byte_perm (src_r1[1], src_r1[2], 0x6543);
+ dst0[1] = __byte_perm_S (src_l0[1], src_r0[0], 0x6540);
+ dst0[2] = __byte_perm_S (src_r0[0], src_r0[1], 0x6543);
+ dst0[3] = __byte_perm_S (src_r0[1], src_r0[2], 0x6543);
+ dst1[0] = __byte_perm_S (src_r0[2], src_r0[3], 0x6543);
+ dst1[1] = __byte_perm_S (src_r0[3], src_r1[0], 0x6543);
+ dst1[2] = __byte_perm_S (src_r1[0], src_r1[1], 0x6543);
+ dst1[3] = __byte_perm_S (src_r1[1], src_r1[2], 0x6543);
break;
case 6:
- dst0[1] = __byte_perm (src_l0[1], src_r0[0], 0x5410);
- dst0[2] = __byte_perm (src_r0[0], src_r0[1], 0x5432);
- dst0[3] = __byte_perm (src_r0[1], src_r0[2], 0x5432);
- dst1[0] = __byte_perm (src_r0[2], src_r0[3], 0x5432);
- dst1[1] = __byte_perm (src_r0[3], src_r1[0], 0x5432);
- dst1[2] = __byte_perm (src_r1[0], src_r1[1], 0x5432);
- dst1[3] = __byte_perm (src_r1[1], src_r1[2], 0x5432);
+ dst0[1] = __byte_perm_S (src_l0[1], src_r0[0], 0x5410);
+ dst0[2] = __byte_perm_S (src_r0[0], src_r0[1], 0x5432);
+ dst0[3] = __byte_perm_S (src_r0[1], src_r0[2], 0x5432);
+ dst1[0] = __byte_perm_S (src_r0[2], src_r0[3], 0x5432);
+ dst1[1] = __byte_perm_S (src_r0[3], src_r1[0], 0x5432);
+ dst1[2] = __byte_perm_S (src_r1[0], src_r1[1], 0x5432);
+ dst1[3] = __byte_perm_S (src_r1[1], src_r1[2], 0x5432);
break;
case 7:
- dst0[1] = __byte_perm (src_l0[1], src_r0[0], 0x4210);
- dst0[2] = __byte_perm (src_r0[0], src_r0[1], 0x4321);
- dst0[3] = __byte_perm (src_r0[1], src_r0[2], 0x4321);
- dst1[0] = __byte_perm (src_r0[2], src_r0[3], 0x4321);
- dst1[1] = __byte_perm (src_r0[3], src_r1[0], 0x4321);
- dst1[2] = __byte_perm (src_r1[0], src_r1[1], 0x4321);
- dst1[3] = __byte_perm (src_r1[1], src_r1[2], 0x4321);
+ dst0[1] = __byte_perm_S (src_l0[1], src_r0[0], 0x4210);
+ dst0[2] = __byte_perm_S (src_r0[0], src_r0[1], 0x4321);
+ dst0[3] = __byte_perm_S (src_r0[1], src_r0[2], 0x4321);
+ dst1[0] = __byte_perm_S (src_r0[2], src_r0[3], 0x4321);
+ dst1[1] = __byte_perm_S (src_r0[3], src_r1[0], 0x4321);
+ dst1[2] = __byte_perm_S (src_r1[0], src_r1[1], 0x4321);
+ dst1[3] = __byte_perm_S (src_r1[1], src_r1[2], 0x4321);
break;
case 8:
break;
case 9:
- dst0[2] = __byte_perm (src_l0[2], src_r0[0], 0x6540);
- dst0[3] = __byte_perm (src_r0[0], src_r0[1], 0x6543);
- dst1[0] = __byte_perm (src_r0[1], src_r0[2], 0x6543);
- dst1[1] = __byte_perm (src_r0[2], src_r0[3], 0x6543);
- dst1[2] = __byte_perm (src_r0[3], src_r1[0], 0x6543);
- dst1[3] = __byte_perm (src_r1[0], src_r1[1], 0x6543);
+ dst0[2] = __byte_perm_S (src_l0[2], src_r0[0], 0x6540);
+ dst0[3] = __byte_perm_S (src_r0[0], src_r0[1], 0x6543);
+ dst1[0] = __byte_perm_S (src_r0[1], src_r0[2], 0x6543);
+ dst1[1] = __byte_perm_S (src_r0[2], src_r0[3], 0x6543);
+ dst1[2] = __byte_perm_S (src_r0[3], src_r1[0], 0x6543);
+ dst1[3] = __byte_perm_S (src_r1[0], src_r1[1], 0x6543);
break;
case 10:
- dst0[2] = __byte_perm (src_l0[2], src_r0[0], 0x5410);
- dst0[3] = __byte_perm (src_r0[0], src_r0[1], 0x5432);
- dst1[0] = __byte_perm (src_r0[1], src_r0[2], 0x5432);
- dst1[1] = __byte_perm (src_r0[2], src_r0[3], 0x5432);
- dst1[2] = __byte_perm (src_r0[3], src_r1[0], 0x5432);
- dst1[3] = __byte_perm (src_r1[0], src_r1[1], 0x5432);
+ dst0[2] = __byte_perm_S (src_l0[2], src_r0[0], 0x5410);
+ dst0[3] = __byte_perm_S (src_r0[0], src_r0[1], 0x5432);
+ dst1[0] = __byte_perm_S (src_r0[1], src_r0[2], 0x5432);
+ dst1[1] = __byte_perm_S (src_r0[2], src_r0[3], 0x5432);
+ dst1[2] = __byte_perm_S (src_r0[3], src_r1[0], 0x5432);
+ dst1[3] = __byte_perm_S (src_r1[0], src_r1[1], 0x5432);
break;
case 11:
- dst0[2] = __byte_perm (src_l0[2], src_r0[0], 0x4210);
- dst0[3] = __byte_perm (src_r0[0], src_r0[1], 0x4321);
- dst1[0] = __byte_perm (src_r0[1], src_r0[2], 0x4321);
- dst1[1] = __byte_perm (src_r0[2], src_r0[3], 0x4321);
- dst1[2] = __byte_perm (src_r0[3], src_r1[0], 0x4321);
- dst1[3] = __byte_perm (src_r1[0], src_r1[1], 0x4321);
+ dst0[2] = __byte_perm_S (src_l0[2], src_r0[0], 0x4210);
+ dst0[3] = __byte_perm_S (src_r0[0], src_r0[1], 0x4321);
+ dst1[0] = __byte_perm_S (src_r0[1], src_r0[2], 0x4321);
+ dst1[1] = __byte_perm_S (src_r0[2], src_r0[3], 0x4321);
+ dst1[2] = __byte_perm_S (src_r0[3], src_r1[0], 0x4321);
+ dst1[3] = __byte_perm_S (src_r1[0], src_r1[1], 0x4321);
break;
case 12:
break;
case 13:
- dst0[3] = __byte_perm (src_l0[3], src_r0[0], 0x6540);
- dst1[0] = __byte_perm (src_r0[0], src_r0[1], 0x6543);
- dst1[1] = __byte_perm (src_r0[1], src_r0[2], 0x6543);
- dst1[2] = __byte_perm (src_r0[2], src_r0[3], 0x6543);
- dst1[3] = __byte_perm (src_r0[3], src_r1[0], 0x6543);
+ dst0[3] = __byte_perm_S (src_l0[3], src_r0[0], 0x6540);
+ dst1[0] = __byte_perm_S (src_r0[0], src_r0[1], 0x6543);
+ dst1[1] = __byte_perm_S (src_r0[1], src_r0[2], 0x6543);
+ dst1[2] = __byte_perm_S (src_r0[2], src_r0[3], 0x6543);
+ dst1[3] = __byte_perm_S (src_r0[3], src_r1[0], 0x6543);
break;
case 14:
- dst0[3] = __byte_perm (src_l0[3], src_r0[0], 0x5410);
- dst1[0] = __byte_perm (src_r0[0], src_r0[1], 0x5432);
- dst1[1] = __byte_perm (src_r0[1], src_r0[2], 0x5432);
- dst1[2] = __byte_perm (src_r0[2], src_r0[3], 0x5432);
- dst1[3] = __byte_perm (src_r0[3], src_r1[0], 0x5432);
+ dst0[3] = __byte_perm_S (src_l0[3], src_r0[0], 0x5410);
+ dst1[0] = __byte_perm_S (src_r0[0], src_r0[1], 0x5432);
+ dst1[1] = __byte_perm_S (src_r0[1], src_r0[2], 0x5432);
+ dst1[2] = __byte_perm_S (src_r0[2], src_r0[3], 0x5432);
+ dst1[3] = __byte_perm_S (src_r0[3], src_r1[0], 0x5432);
break;
case 15:
- dst0[3] = __byte_perm (src_l0[3], src_r0[0], 0x4210);
- dst1[0] = __byte_perm (src_r0[0], src_r0[1], 0x4321);
- dst1[1] = __byte_perm (src_r0[1], src_r0[2], 0x4321);
- dst1[2] = __byte_perm (src_r0[2], src_r0[3], 0x4321);
- dst1[3] = __byte_perm (src_r0[3], src_r1[0], 0x4321);
+ dst0[3] = __byte_perm_S (src_l0[3], src_r0[0], 0x4210);
+ dst1[0] = __byte_perm_S (src_r0[0], src_r0[1], 0x4321);
+ dst1[1] = __byte_perm_S (src_r0[1], src_r0[2], 0x4321);
+ dst1[2] = __byte_perm_S (src_r0[2], src_r0[3], 0x4321);
+ dst1[3] = __byte_perm_S (src_r0[3], src_r1[0], 0x4321);
break;
case 16:
break;
case 17:
- dst1[0] = __byte_perm (src_l1[0], src_r0[0], 0x6540);
- dst1[1] = __byte_perm (src_r0[0], src_r0[1], 0x6543);
- dst1[2] = __byte_perm (src_r0[1], src_r0[2], 0x6543);
- dst1[3] = __byte_perm (src_r0[2], src_r0[3], 0x6543);
+ dst1[0] = __byte_perm_S (src_l1[0], src_r0[0], 0x6540);
+ dst1[1] = __byte_perm_S (src_r0[0], src_r0[1], 0x6543);
+ dst1[2] = __byte_perm_S (src_r0[1], src_r0[2], 0x6543);
+ dst1[3] = __byte_perm_S (src_r0[2], src_r0[3], 0x6543);
break;
case 18:
- dst1[0] = __byte_perm (src_l1[0], src_r0[0], 0x5410);
- dst1[1] = __byte_perm (src_r0[0], src_r0[1], 0x5432);
- dst1[2] = __byte_perm (src_r0[1], src_r0[2], 0x5432);
- dst1[3] = __byte_perm (src_r0[2], src_r0[3], 0x5432);
+ dst1[0] = __byte_perm_S (src_l1[0], src_r0[0], 0x5410);
+ dst1[1] = __byte_perm_S (src_r0[0], src_r0[1], 0x5432);
+ dst1[2] = __byte_perm_S (src_r0[1], src_r0[2], 0x5432);
+ dst1[3] = __byte_perm_S (src_r0[2], src_r0[3], 0x5432);
break;
case 19:
- dst1[0] = __byte_perm (src_l1[0], src_r0[0], 0x4210);
- dst1[1] = __byte_perm (src_r0[0], src_r0[1], 0x4321);
- dst1[2] = __byte_perm (src_r0[1], src_r0[2], 0x4321);
- dst1[3] = __byte_perm (src_r0[2], src_r0[3], 0x4321);
+ dst1[0] = __byte_perm_S (src_l1[0], src_r0[0], 0x4210);
+ dst1[1] = __byte_perm_S (src_r0[0], src_r0[1], 0x4321);
+ dst1[2] = __byte_perm_S (src_r0[1], src_r0[2], 0x4321);
+ dst1[3] = __byte_perm_S (src_r0[2], src_r0[3], 0x4321);
break;
case 20:
break;
case 21:
- dst1[1] = __byte_perm (src_l1[1], src_r0[0], 0x6540);
- dst1[2] = __byte_perm (src_r0[0], src_r0[1], 0x6543);
- dst1[3] = __byte_perm (src_r0[1], src_r0[2], 0x6543);
+ dst1[1] = __byte_perm_S (src_l1[1], src_r0[0], 0x6540);
+ dst1[2] = __byte_perm_S (src_r0[0], src_r0[1], 0x6543);
+ dst1[3] = __byte_perm_S (src_r0[1], src_r0[2], 0x6543);
break;
case 22:
- dst1[1] = __byte_perm (src_l1[1], src_r0[0], 0x5410);
- dst1[2] = __byte_perm (src_r0[0], src_r0[1], 0x5432);
- dst1[3] = __byte_perm (src_r0[1], src_r0[2], 0x5432);
+ dst1[1] = __byte_perm_S (src_l1[1], src_r0[0], 0x5410);
+ dst1[2] = __byte_perm_S (src_r0[0], src_r0[1], 0x5432);
+ dst1[3] = __byte_perm_S (src_r0[1], src_r0[2], 0x5432);
break;
case 23:
- dst1[1] = __byte_perm (src_l1[1], src_r0[0], 0x4210);
- dst1[2] = __byte_perm (src_r0[0], src_r0[1], 0x4321);
- dst1[3] = __byte_perm (src_r0[1], src_r0[2], 0x4321);
+ dst1[1] = __byte_perm_S (src_l1[1], src_r0[0], 0x4210);
+ dst1[2] = __byte_perm_S (src_r0[0], src_r0[1], 0x4321);
+ dst1[3] = __byte_perm_S (src_r0[1], src_r0[2], 0x4321);
break;
case 24:
break;
case 25:
- dst1[2] = __byte_perm (src_l1[2], src_r0[0], 0x6540);
- dst1[3] = __byte_perm (src_r0[0], src_r0[1], 0x6543);
+ dst1[2] = __byte_perm_S (src_l1[2], src_r0[0], 0x6540);
+ dst1[3] = __byte_perm_S (src_r0[0], src_r0[1], 0x6543);
break;
case 26:
- dst1[2] = __byte_perm (src_l1[2], src_r0[0], 0x5410);
- dst1[3] = __byte_perm (src_r0[0], src_r0[1], 0x5432);
+ dst1[2] = __byte_perm_S (src_l1[2], src_r0[0], 0x5410);
+ dst1[3] = __byte_perm_S (src_r0[0], src_r0[1], 0x5432);
break;
case 27:
- dst1[2] = __byte_perm (src_l1[2], src_r0[0], 0x4210);
- dst1[3] = __byte_perm (src_r0[0], src_r0[1], 0x4321);
+ dst1[2] = __byte_perm_S (src_l1[2], src_r0[0], 0x4210);
+ dst1[3] = __byte_perm_S (src_r0[0], src_r0[1], 0x4321);
break;
case 28:
break;
case 29:
- dst1[3] = __byte_perm (src_l1[3], src_r0[0], 0x6540);
+ dst1[3] = __byte_perm_S (src_l1[3], src_r0[0], 0x6540);
break;
case 30:
- dst1[3] = __byte_perm (src_l1[3], src_r0[0], 0x5410);
+ dst1[3] = __byte_perm_S (src_l1[3], src_r0[0], 0x5410);
break;
case 31:
- dst1[3] = __byte_perm (src_l1[3], src_r0[0], 0x4210);
+ dst1[3] = __byte_perm_S (src_l1[3], src_r0[0], 0x4210);
break;
}
#endif
#if defined IS_AMD || defined IS_GENERIC
+*/
switch (offset)
{
case 31:
dst1[3] = src_r0[0];
break;
case 27:
- dst1[3] = amd_bytealign (src_r0[1], src_r0[0], 1);
+ dst1[3] = amd_bytealign_S (src_r0[1], src_r0[0], 1);
dst1[2] = src_l1[2] | src_r0[0] << 24;
break;
case 26:
- dst1[3] = amd_bytealign (src_r0[1], src_r0[0], 2);
+ dst1[3] = amd_bytealign_S (src_r0[1], src_r0[0], 2);
dst1[2] = src_l1[2] | src_r0[0] << 16;
break;
case 25:
- dst1[3] = amd_bytealign (src_r0[1], src_r0[0], 3);
+ dst1[3] = amd_bytealign_S (src_r0[1], src_r0[0], 3);
dst1[2] = src_l1[2] | src_r0[0] << 8;
break;
case 24:
dst1[2] = src_r0[0];
break;
case 23:
- dst1[3] = amd_bytealign (src_r0[2], src_r0[1], 1);
- dst1[2] = amd_bytealign (src_r0[1], src_r0[0], 1);
+ dst1[3] = amd_bytealign_S (src_r0[2], src_r0[1], 1);
+ dst1[2] = amd_bytealign_S (src_r0[1], src_r0[0], 1);
dst1[1] = src_l1[1] | src_r0[0] << 24;
break;
case 22:
- dst1[3] = amd_bytealign (src_r0[2], src_r0[1], 2);
- dst1[2] = amd_bytealign (src_r0[1], src_r0[0], 2);
+ dst1[3] = amd_bytealign_S (src_r0[2], src_r0[1], 2);
+ dst1[2] = amd_bytealign_S (src_r0[1], src_r0[0], 2);
dst1[1] = src_l1[1] | src_r0[0] << 16;
break;
case 21:
- dst1[3] = amd_bytealign (src_r0[2], src_r0[1], 3);
- dst1[2] = amd_bytealign (src_r0[1], src_r0[0], 3);
+ dst1[3] = amd_bytealign_S (src_r0[2], src_r0[1], 3);
+ dst1[2] = amd_bytealign_S (src_r0[1], src_r0[0], 3);
dst1[1] = src_l1[1] | src_r0[0] << 8;
break;
case 20:
dst1[1] = src_r0[0];
break;
case 19:
- dst1[3] = amd_bytealign (src_r0[3], src_r0[2], 1);
- dst1[2] = amd_bytealign (src_r0[2], src_r0[1], 1);
- dst1[1] = amd_bytealign (src_r0[1], src_r0[0], 1);
+ dst1[3] = amd_bytealign_S (src_r0[3], src_r0[2], 1);
+ dst1[2] = amd_bytealign_S (src_r0[2], src_r0[1], 1);
+ dst1[1] = amd_bytealign_S (src_r0[1], src_r0[0], 1);
dst1[0] = src_l1[0] | src_r0[0] << 24;
break;
case 18:
- dst1[3] = amd_bytealign (src_r0[3], src_r0[2], 2);
- dst1[2] = amd_bytealign (src_r0[2], src_r0[1], 2);
- dst1[1] = amd_bytealign (src_r0[1], src_r0[0], 2);
+ dst1[3] = amd_bytealign_S (src_r0[3], src_r0[2], 2);
+ dst1[2] = amd_bytealign_S (src_r0[2], src_r0[1], 2);
+ dst1[1] = amd_bytealign_S (src_r0[1], src_r0[0], 2);
dst1[0] = src_l1[0] | src_r0[0] << 16;
break;
case 17:
- dst1[3] = amd_bytealign (src_r0[3], src_r0[2], 3);
- dst1[2] = amd_bytealign (src_r0[2], src_r0[1], 3);
- dst1[1] = amd_bytealign (src_r0[1], src_r0[0], 3);
+ dst1[3] = amd_bytealign_S (src_r0[3], src_r0[2], 3);
+ dst1[2] = amd_bytealign_S (src_r0[2], src_r0[1], 3);
+ dst1[1] = amd_bytealign_S (src_r0[1], src_r0[0], 3);
dst1[0] = src_l1[0] | src_r0[0] << 8;
break;
case 16:
dst1[0] = src_r0[0];
break;
case 15:
- dst1[3] = amd_bytealign (src_r1[0], src_r0[3], 1);
- dst1[2] = amd_bytealign (src_r0[3], src_r0[2], 1);
- dst1[1] = amd_bytealign (src_r0[2], src_r0[1], 1);
- dst1[0] = amd_bytealign (src_r0[1], src_r0[0], 1);
+ dst1[3] = amd_bytealign_S (src_r1[0], src_r0[3], 1);
+ dst1[2] = amd_bytealign_S (src_r0[3], src_r0[2], 1);
+ dst1[1] = amd_bytealign_S (src_r0[2], src_r0[1], 1);
+ dst1[0] = amd_bytealign_S (src_r0[1], src_r0[0], 1);
dst0[3] = src_l0[3] | src_r0[0] << 24;
break;
case 14:
- dst1[3] = amd_bytealign (src_r1[0], src_r0[3], 2);
- dst1[2] = amd_bytealign (src_r0[3], src_r0[2], 2);
- dst1[1] = amd_bytealign (src_r0[2], src_r0[1], 2);
- dst1[0] = amd_bytealign (src_r0[1], src_r0[0], 2);
+ dst1[3] = amd_bytealign_S (src_r1[0], src_r0[3], 2);
+ dst1[2] = amd_bytealign_S (src_r0[3], src_r0[2], 2);
+ dst1[1] = amd_bytealign_S (src_r0[2], src_r0[1], 2);
+ dst1[0] = amd_bytealign_S (src_r0[1], src_r0[0], 2);
dst0[3] = src_l0[3] | src_r0[0] << 16;
break;
case 13:
- dst1[3] = amd_bytealign (src_r1[0], src_r0[3], 3);
- dst1[2] = amd_bytealign (src_r0[3], src_r0[2], 3);
- dst1[1] = amd_bytealign (src_r0[2], src_r0[1], 3);
- dst1[0] = amd_bytealign (src_r0[1], src_r0[0], 3);
+ dst1[3] = amd_bytealign_S (src_r1[0], src_r0[3], 3);
+ dst1[2] = amd_bytealign_S (src_r0[3], src_r0[2], 3);
+ dst1[1] = amd_bytealign_S (src_r0[2], src_r0[1], 3);
+ dst1[0] = amd_bytealign_S (src_r0[1], src_r0[0], 3);
dst0[3] = src_l0[3] | src_r0[0] << 8;
break;
case 12:
dst0[3] = src_r0[0];
break;
case 11:
- dst1[3] = amd_bytealign (src_r1[1], src_r1[0], 1);
- dst1[2] = amd_bytealign (src_r1[0], src_r0[3], 1);
- dst1[1] = amd_bytealign (src_r0[3], src_r0[2], 1);
- dst1[0] = amd_bytealign (src_r0[2], src_r0[1], 1);
- dst0[3] = amd_bytealign (src_r0[1], src_r0[0], 1);
+ dst1[3] = amd_bytealign_S (src_r1[1], src_r1[0], 1);
+ dst1[2] = amd_bytealign_S (src_r1[0], src_r0[3], 1);
+ dst1[1] = amd_bytealign_S (src_r0[3], src_r0[2], 1);
+ dst1[0] = amd_bytealign_S (src_r0[2], src_r0[1], 1);
+ dst0[3] = amd_bytealign_S (src_r0[1], src_r0[0], 1);
dst0[2] = src_l0[2] | src_r0[0] << 24;
break;
case 10:
- dst1[3] = amd_bytealign (src_r1[1], src_r1[0], 2);
- dst1[2] = amd_bytealign (src_r1[0], src_r0[3], 2);
- dst1[1] = amd_bytealign (src_r0[3], src_r0[2], 2);
- dst1[0] = amd_bytealign (src_r0[2], src_r0[1], 2);
- dst0[3] = amd_bytealign (src_r0[1], src_r0[0], 2);
+ dst1[3] = amd_bytealign_S (src_r1[1], src_r1[0], 2);
+ dst1[2] = amd_bytealign_S (src_r1[0], src_r0[3], 2);
+ dst1[1] = amd_bytealign_S (src_r0[3], src_r0[2], 2);
+ dst1[0] = amd_bytealign_S (src_r0[2], src_r0[1], 2);
+ dst0[3] = amd_bytealign_S (src_r0[1], src_r0[0], 2);
dst0[2] = src_l0[2] | src_r0[0] << 16;
break;
case 9:
- dst1[3] = amd_bytealign (src_r1[1], src_r1[0], 3);
- dst1[2] = amd_bytealign (src_r1[0], src_r0[3], 3);
- dst1[1] = amd_bytealign (src_r0[3], src_r0[2], 3);
- dst1[0] = amd_bytealign (src_r0[2], src_r0[1], 3);
- dst0[3] = amd_bytealign (src_r0[1], src_r0[0], 3);
+ dst1[3] = amd_bytealign_S (src_r1[1], src_r1[0], 3);
+ dst1[2] = amd_bytealign_S (src_r1[0], src_r0[3], 3);
+ dst1[1] = amd_bytealign_S (src_r0[3], src_r0[2], 3);
+ dst1[0] = amd_bytealign_S (src_r0[2], src_r0[1], 3);
+ dst0[3] = amd_bytealign_S (src_r0[1], src_r0[0], 3);
dst0[2] = src_l0[2] | src_r0[0] << 8;
break;
case 8:
dst0[2] = src_r0[0];
break;
case 7:
- dst1[3] = amd_bytealign (src_r1[2], src_r1[1], 1);
- dst1[2] = amd_bytealign (src_r1[1], src_r1[0], 1);
- dst1[1] = amd_bytealign (src_r1[0], src_r0[3], 1);
- dst1[0] = amd_bytealign (src_r0[3], src_r0[2], 1);
- dst0[3] = amd_bytealign (src_r0[2], src_r0[1], 1);
- dst0[2] = amd_bytealign (src_r0[1], src_r0[0], 1);
+ dst1[3] = amd_bytealign_S (src_r1[2], src_r1[1], 1);
+ dst1[2] = amd_bytealign_S (src_r1[1], src_r1[0], 1);
+ dst1[1] = amd_bytealign_S (src_r1[0], src_r0[3], 1);
+ dst1[0] = amd_bytealign_S (src_r0[3], src_r0[2], 1);
+ dst0[3] = amd_bytealign_S (src_r0[2], src_r0[1], 1);
+ dst0[2] = amd_bytealign_S (src_r0[1], src_r0[0], 1);
dst0[1] = src_l0[1] | src_r0[0] << 24;
break;
case 6:
- dst1[3] = amd_bytealign (src_r1[2], src_r1[1], 2);
- dst1[2] = amd_bytealign (src_r1[1], src_r1[0], 2);
- dst1[1] = amd_bytealign (src_r1[0], src_r0[3], 2);
- dst1[0] = amd_bytealign (src_r0[3], src_r0[2], 2);
- dst0[3] = amd_bytealign (src_r0[2], src_r0[1], 2);
- dst0[2] = amd_bytealign (src_r0[1], src_r0[0], 2);
+ dst1[3] = amd_bytealign_S (src_r1[2], src_r1[1], 2);
+ dst1[2] = amd_bytealign_S (src_r1[1], src_r1[0], 2);
+ dst1[1] = amd_bytealign_S (src_r1[0], src_r0[3], 2);
+ dst1[0] = amd_bytealign_S (src_r0[3], src_r0[2], 2);
+ dst0[3] = amd_bytealign_S (src_r0[2], src_r0[1], 2);
+ dst0[2] = amd_bytealign_S (src_r0[1], src_r0[0], 2);
dst0[1] = src_l0[1] | src_r0[0] << 16;
break;
case 5:
- dst1[3] = amd_bytealign (src_r1[2], src_r1[1], 3);
- dst1[2] = amd_bytealign (src_r1[1], src_r1[0], 3);
- dst1[1] = amd_bytealign (src_r1[0], src_r0[3], 3);
- dst1[0] = amd_bytealign (src_r0[3], src_r0[2], 3);
- dst0[3] = amd_bytealign (src_r0[2], src_r0[1], 3);
- dst0[2] = amd_bytealign (src_r0[1], src_r0[0], 3);
+ dst1[3] = amd_bytealign_S (src_r1[2], src_r1[1], 3);
+ dst1[2] = amd_bytealign_S (src_r1[1], src_r1[0], 3);
+ dst1[1] = amd_bytealign_S (src_r1[0], src_r0[3], 3);
+ dst1[0] = amd_bytealign_S (src_r0[3], src_r0[2], 3);
+ dst0[3] = amd_bytealign_S (src_r0[2], src_r0[1], 3);
+ dst0[2] = amd_bytealign_S (src_r0[1], src_r0[0], 3);
dst0[1] = src_l0[1] | src_r0[0] << 8;
break;
case 4:
dst0[1] = src_r0[0];
break;
case 3:
- dst1[3] = amd_bytealign (src_r1[3], src_r1[2], 1);
- dst1[2] = amd_bytealign (src_r1[2], src_r1[1], 1);
- dst1[1] = amd_bytealign (src_r1[1], src_r1[0], 1);
- dst1[0] = amd_bytealign (src_r1[0], src_r0[3], 1);
- dst0[3] = amd_bytealign (src_r0[3], src_r0[2], 1);
- dst0[2] = amd_bytealign (src_r0[2], src_r0[1], 1);
- dst0[1] = amd_bytealign (src_r0[1], src_r0[0], 1);
+ dst1[3] = amd_bytealign_S (src_r1[3], src_r1[2], 1);
+ dst1[2] = amd_bytealign_S (src_r1[2], src_r1[1], 1);
+ dst1[1] = amd_bytealign_S (src_r1[1], src_r1[0], 1);
+ dst1[0] = amd_bytealign_S (src_r1[0], src_r0[3], 1);
+ dst0[3] = amd_bytealign_S (src_r0[3], src_r0[2], 1);
+ dst0[2] = amd_bytealign_S (src_r0[2], src_r0[1], 1);
+ dst0[1] = amd_bytealign_S (src_r0[1], src_r0[0], 1);
dst0[0] = src_l0[0] | src_r0[0] << 24;
break;
case 2:
- dst1[3] = amd_bytealign (src_r1[3], src_r1[2], 2);
- dst1[2] = amd_bytealign (src_r1[2], src_r1[1], 2);
- dst1[1] = amd_bytealign (src_r1[1], src_r1[0], 2);
- dst1[0] = amd_bytealign (src_r1[0], src_r0[3], 2);
- dst0[3] = amd_bytealign (src_r0[3], src_r0[2], 2);
- dst0[2] = amd_bytealign (src_r0[2], src_r0[1], 2);
- dst0[1] = amd_bytealign (src_r0[1], src_r0[0], 2);
+ dst1[3] = amd_bytealign_S (src_r1[3], src_r1[2], 2);
+ dst1[2] = amd_bytealign_S (src_r1[2], src_r1[1], 2);
+ dst1[1] = amd_bytealign_S (src_r1[1], src_r1[0], 2);
+ dst1[0] = amd_bytealign_S (src_r1[0], src_r0[3], 2);
+ dst0[3] = amd_bytealign_S (src_r0[3], src_r0[2], 2);
+ dst0[2] = amd_bytealign_S (src_r0[2], src_r0[1], 2);
+ dst0[1] = amd_bytealign_S (src_r0[1], src_r0[0], 2);
dst0[0] = src_l0[0] | src_r0[0] << 16;
break;
case 1:
- dst1[3] = amd_bytealign (src_r1[3], src_r1[2], 3);
- dst1[2] = amd_bytealign (src_r1[2], src_r1[1], 3);
- dst1[1] = amd_bytealign (src_r1[1], src_r1[0], 3);
- dst1[0] = amd_bytealign (src_r1[0], src_r0[3], 3);
- dst0[3] = amd_bytealign (src_r0[3], src_r0[2], 3);
- dst0[2] = amd_bytealign (src_r0[2], src_r0[1], 3);
- dst0[1] = amd_bytealign (src_r0[1], src_r0[0], 3);
+ dst1[3] = amd_bytealign_S (src_r1[3], src_r1[2], 3);
+ dst1[2] = amd_bytealign_S (src_r1[2], src_r1[1], 3);
+ dst1[1] = amd_bytealign_S (src_r1[1], src_r1[0], 3);
+ dst1[0] = amd_bytealign_S (src_r1[0], src_r0[3], 3);
+ dst0[3] = amd_bytealign_S (src_r0[3], src_r0[2], 3);
+ dst0[2] = amd_bytealign_S (src_r0[2], src_r0[1], 3);
+ dst0[1] = amd_bytealign_S (src_r0[1], src_r0[0], 3);
dst0[0] = src_l0[0] | src_r0[0] << 8;
break;
case 0:
dst0[0] = src_r0[0];
break;
}
- #endif
+// #endif
}
-static void reverse_block (u32 in0[4], u32 in1[4], u32 out0[4], u32 out1[4], const u32 len)
+inline void reverse_block (u32 in0[4], u32 in1[4], u32 out0[4], u32 out1[4], const u32 len)
{
rshift_block_N (in0, in1, out0, out1, 32 - len);
tib41[2] = out0[1];
tib41[3] = out0[0];
- out0[0] = swap32 (tib40[0]);
- out0[1] = swap32 (tib40[1]);
- out0[2] = swap32 (tib40[2]);
- out0[3] = swap32 (tib40[3]);
- out1[0] = swap32 (tib41[0]);
- out1[1] = swap32 (tib41[1]);
- out1[2] = swap32 (tib41[2]);
- out1[3] = swap32 (tib41[3]);
+ out0[0] = swap32_S (tib40[0]);
+ out0[1] = swap32_S (tib40[1]);
+ out0[2] = swap32_S (tib40[2]);
+ out0[3] = swap32_S (tib40[3]);
+ out1[0] = swap32_S (tib41[0]);
+ out1[1] = swap32_S (tib41[1]);
+ out1[2] = swap32_S (tib41[2]);
+ out1[3] = swap32_S (tib41[3]);
}
-static u32 rule_op_mangle_lrest (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
+inline u32 rule_op_mangle_lrest (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
buf0[0] |= (generate_cmask (buf0[0]));
buf0[1] |= (generate_cmask (buf0[1]));
return in_len;
}
-static u32 rule_op_mangle_urest (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
+inline u32 rule_op_mangle_urest (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
buf0[0] &= ~(generate_cmask (buf0[0]));
buf0[1] &= ~(generate_cmask (buf0[1]));
return in_len;
}
-static u32 rule_op_mangle_lrest_ufirst (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
+inline u32 rule_op_mangle_lrest_ufirst (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
rule_op_mangle_lrest (p0, p1, buf0, buf1, in_len);
return in_len;
}
-static u32 rule_op_mangle_urest_lfirst (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
+inline u32 rule_op_mangle_urest_lfirst (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
rule_op_mangle_urest (p0, p1, buf0, buf1, in_len);
return in_len;
}
-static u32 rule_op_mangle_trest (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
+inline u32 rule_op_mangle_trest (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
buf0[0] ^= (generate_cmask (buf0[0]));
buf0[1] ^= (generate_cmask (buf0[1]));
return in_len;
}
-static u32 rule_op_mangle_toggle_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
+inline u32 rule_op_mangle_toggle_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (p0 >= in_len) return (in_len);
return in_len;
}
-static u32 rule_op_mangle_reverse (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
+inline u32 rule_op_mangle_reverse (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
reverse_block (buf0, buf1, buf0, buf1, in_len);
return in_len;
}
-static u32 rule_op_mangle_dupeword (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
+inline u32 rule_op_mangle_dupeword (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if ((in_len + in_len) >= 32) return (in_len);
return out_len;
}
-static u32 rule_op_mangle_dupeword_times (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
+inline u32 rule_op_mangle_dupeword_times (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (((in_len * p0) + in_len) >= 32) return (in_len);
return out_len;
}
-static u32 rule_op_mangle_reflect (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
+inline u32 rule_op_mangle_reflect (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if ((in_len + in_len) >= 32) return (in_len);
return out_len;
}
-static u32 rule_op_mangle_append (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
+inline u32 rule_op_mangle_append (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if ((in_len + 1) >= 32) return (in_len);
return out_len;
}
-static u32 rule_op_mangle_prepend (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
+inline u32 rule_op_mangle_prepend (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if ((in_len + 1) >= 32) return (in_len);
return out_len;
}
-static u32 rule_op_mangle_rotate_left (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
+inline u32 rule_op_mangle_rotate_left (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (in_len == 0) return (in_len);
return in_len;
}
-static u32 rule_op_mangle_rotate_right (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
+inline u32 rule_op_mangle_rotate_right (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (in_len == 0) return (in_len);
return in_len;
}
-static u32 rule_op_mangle_delete_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
+inline u32 rule_op_mangle_delete_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (in_len == 0) return (in_len);
return in_len1;
}
-static u32 rule_op_mangle_delete_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
+inline u32 rule_op_mangle_delete_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (in_len == 0) return (in_len);
return in_len1;
}
-static u32 rule_op_mangle_delete_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
+inline u32 rule_op_mangle_delete_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (p0 >= in_len) return (in_len);
return out_len;
}
-static u32 rule_op_mangle_extract (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
+inline u32 rule_op_mangle_extract (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (p0 >= in_len) return (in_len);
return out_len;
}
-static u32 rule_op_mangle_omit (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
+inline u32 rule_op_mangle_omit (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (p0 >= in_len) return (in_len);
return out_len;
}
-static u32 rule_op_mangle_insert (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
+inline u32 rule_op_mangle_insert (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (p0 > in_len) return (in_len);
return out_len;
}
-static u32 rule_op_mangle_overstrike (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
+inline u32 rule_op_mangle_overstrike (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (p0 >= in_len) return (in_len);
return in_len;
}
-static u32 rule_op_mangle_truncate_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
+inline u32 rule_op_mangle_truncate_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (p0 >= in_len) return (in_len);
return p0;
}
-static u32 rule_op_mangle_replace (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
+inline u32 rule_op_mangle_replace (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
#ifdef IS_NV
for (u32 i = 0; i < in_len; i++)
{
switch (i)
{
- case 0: if ((__byte_perm (buf0[0], 0, 0x6540)) == p0) buf0[0] = __byte_perm (p1, buf0[0], 0x7650);
+ case 0: if ((__byte_perm_S (buf0[0], 0, 0x6540)) == p0) buf0[0] = __byte_perm_S (p1, buf0[0], 0x7650);
break;
- case 1: if ((__byte_perm (buf0[0], 0, 0x6541)) == p0) buf0[0] = __byte_perm (p1, buf0[0], 0x7604);
+ case 1: if ((__byte_perm_S (buf0[0], 0, 0x6541)) == p0) buf0[0] = __byte_perm_S (p1, buf0[0], 0x7604);
break;
- case 2: if ((__byte_perm (buf0[0], 0, 0x6542)) == p0) buf0[0] = __byte_perm (p1, buf0[0], 0x7054);
+ case 2: if ((__byte_perm_S (buf0[0], 0, 0x6542)) == p0) buf0[0] = __byte_perm_S (p1, buf0[0], 0x7054);
break;
- case 3: if ((__byte_perm (buf0[0], 0, 0x6543)) == p0) buf0[0] = __byte_perm (p1, buf0[0], 0x0654);
+ case 3: if ((__byte_perm_S (buf0[0], 0, 0x6543)) == p0) buf0[0] = __byte_perm_S (p1, buf0[0], 0x0654);
break;
- case 4: if ((__byte_perm (buf0[1], 0, 0x6540)) == p0) buf0[1] = __byte_perm (p1, buf0[1], 0x7650);
+ case 4: if ((__byte_perm_S (buf0[1], 0, 0x6540)) == p0) buf0[1] = __byte_perm_S (p1, buf0[1], 0x7650);
break;
- case 5: if ((__byte_perm (buf0[1], 0, 0x6541)) == p0) buf0[1] = __byte_perm (p1, buf0[1], 0x7604);
+ case 5: if ((__byte_perm_S (buf0[1], 0, 0x6541)) == p0) buf0[1] = __byte_perm_S (p1, buf0[1], 0x7604);
break;
- case 6: if ((__byte_perm (buf0[1], 0, 0x6542)) == p0) buf0[1] = __byte_perm (p1, buf0[1], 0x7054);
+ case 6: if ((__byte_perm_S (buf0[1], 0, 0x6542)) == p0) buf0[1] = __byte_perm_S (p1, buf0[1], 0x7054);
break;
- case 7: if ((__byte_perm (buf0[1], 0, 0x6543)) == p0) buf0[1] = __byte_perm (p1, buf0[1], 0x0654);
+ case 7: if ((__byte_perm_S (buf0[1], 0, 0x6543)) == p0) buf0[1] = __byte_perm_S (p1, buf0[1], 0x0654);
break;
- case 8: if ((__byte_perm (buf0[2], 0, 0x6540)) == p0) buf0[2] = __byte_perm (p1, buf0[2], 0x7650);
+ case 8: if ((__byte_perm_S (buf0[2], 0, 0x6540)) == p0) buf0[2] = __byte_perm_S (p1, buf0[2], 0x7650);
break;
- case 9: if ((__byte_perm (buf0[2], 0, 0x6541)) == p0) buf0[2] = __byte_perm (p1, buf0[2], 0x7604);
+ case 9: if ((__byte_perm_S (buf0[2], 0, 0x6541)) == p0) buf0[2] = __byte_perm_S (p1, buf0[2], 0x7604);
break;
- case 10: if ((__byte_perm (buf0[2], 0, 0x6542)) == p0) buf0[2] = __byte_perm (p1, buf0[2], 0x7054);
+ case 10: if ((__byte_perm_S (buf0[2], 0, 0x6542)) == p0) buf0[2] = __byte_perm_S (p1, buf0[2], 0x7054);
break;
- case 11: if ((__byte_perm (buf0[2], 0, 0x6543)) == p0) buf0[2] = __byte_perm (p1, buf0[2], 0x0654);
+ case 11: if ((__byte_perm_S (buf0[2], 0, 0x6543)) == p0) buf0[2] = __byte_perm_S (p1, buf0[2], 0x0654);
break;
- case 12: if ((__byte_perm (buf0[3], 0, 0x6540)) == p0) buf0[3] = __byte_perm (p1, buf0[3], 0x7650);
+ case 12: if ((__byte_perm_S (buf0[3], 0, 0x6540)) == p0) buf0[3] = __byte_perm_S (p1, buf0[3], 0x7650);
break;
- case 13: if ((__byte_perm (buf0[3], 0, 0x6541)) == p0) buf0[3] = __byte_perm (p1, buf0[3], 0x7604);
+ case 13: if ((__byte_perm_S (buf0[3], 0, 0x6541)) == p0) buf0[3] = __byte_perm_S (p1, buf0[3], 0x7604);
break;
- case 14: if ((__byte_perm (buf0[3], 0, 0x6542)) == p0) buf0[3] = __byte_perm (p1, buf0[3], 0x7054);
+ case 14: if ((__byte_perm_S (buf0[3], 0, 0x6542)) == p0) buf0[3] = __byte_perm_S (p1, buf0[3], 0x7054);
break;
- case 15: if ((__byte_perm (buf0[3], 0, 0x6543)) == p0) buf0[3] = __byte_perm (p1, buf0[3], 0x0654);
+ case 15: if ((__byte_perm_S (buf0[3], 0, 0x6543)) == p0) buf0[3] = __byte_perm_S (p1, buf0[3], 0x0654);
break;
- case 16: if ((__byte_perm (buf1[0], 0, 0x6540)) == p0) buf1[0] = __byte_perm (p1, buf1[0], 0x7650);
+ case 16: if ((__byte_perm_S (buf1[0], 0, 0x6540)) == p0) buf1[0] = __byte_perm_S (p1, buf1[0], 0x7650);
break;
- case 17: if ((__byte_perm (buf1[0], 0, 0x6541)) == p0) buf1[0] = __byte_perm (p1, buf1[0], 0x7604);
+ case 17: if ((__byte_perm_S (buf1[0], 0, 0x6541)) == p0) buf1[0] = __byte_perm_S (p1, buf1[0], 0x7604);
break;
- case 18: if ((__byte_perm (buf1[0], 0, 0x6542)) == p0) buf1[0] = __byte_perm (p1, buf1[0], 0x7054);
+ case 18: if ((__byte_perm_S (buf1[0], 0, 0x6542)) == p0) buf1[0] = __byte_perm_S (p1, buf1[0], 0x7054);
break;
- case 19: if ((__byte_perm (buf1[0], 0, 0x6543)) == p0) buf1[0] = __byte_perm (p1, buf1[0], 0x0654);
+ case 19: if ((__byte_perm_S (buf1[0], 0, 0x6543)) == p0) buf1[0] = __byte_perm_S (p1, buf1[0], 0x0654);
break;
- case 20: if ((__byte_perm (buf1[1], 0, 0x6540)) == p0) buf1[1] = __byte_perm (p1, buf1[1], 0x7650);
+ case 20: if ((__byte_perm_S (buf1[1], 0, 0x6540)) == p0) buf1[1] = __byte_perm_S (p1, buf1[1], 0x7650);
break;
- case 21: if ((__byte_perm (buf1[1], 0, 0x6541)) == p0) buf1[1] = __byte_perm (p1, buf1[1], 0x7604);
+ case 21: if ((__byte_perm_S (buf1[1], 0, 0x6541)) == p0) buf1[1] = __byte_perm_S (p1, buf1[1], 0x7604);
break;
- case 22: if ((__byte_perm (buf1[1], 0, 0x6542)) == p0) buf1[1] = __byte_perm (p1, buf1[1], 0x7054);
+ case 22: if ((__byte_perm_S (buf1[1], 0, 0x6542)) == p0) buf1[1] = __byte_perm_S (p1, buf1[1], 0x7054);
break;
- case 23: if ((__byte_perm (buf1[1], 0, 0x6543)) == p0) buf1[1] = __byte_perm (p1, buf1[1], 0x0654);
+ case 23: if ((__byte_perm_S (buf1[1], 0, 0x6543)) == p0) buf1[1] = __byte_perm_S (p1, buf1[1], 0x0654);
break;
- case 24: if ((__byte_perm (buf1[2], 0, 0x6540)) == p0) buf1[2] = __byte_perm (p1, buf1[2], 0x7650);
+ case 24: if ((__byte_perm_S (buf1[2], 0, 0x6540)) == p0) buf1[2] = __byte_perm_S (p1, buf1[2], 0x7650);
break;
- case 25: if ((__byte_perm (buf1[2], 0, 0x6541)) == p0) buf1[2] = __byte_perm (p1, buf1[2], 0x7604);
+ case 25: if ((__byte_perm_S (buf1[2], 0, 0x6541)) == p0) buf1[2] = __byte_perm_S (p1, buf1[2], 0x7604);
break;
- case 26: if ((__byte_perm (buf1[2], 0, 0x6542)) == p0) buf1[2] = __byte_perm (p1, buf1[2], 0x7054);
+ case 26: if ((__byte_perm_S (buf1[2], 0, 0x6542)) == p0) buf1[2] = __byte_perm_S (p1, buf1[2], 0x7054);
break;
- case 27: if ((__byte_perm (buf1[2], 0, 0x6543)) == p0) buf1[2] = __byte_perm (p1, buf1[2], 0x0654);
+ case 27: if ((__byte_perm_S (buf1[2], 0, 0x6543)) == p0) buf1[2] = __byte_perm_S (p1, buf1[2], 0x0654);
break;
- case 28: if ((__byte_perm (buf1[3], 0, 0x6540)) == p0) buf1[3] = __byte_perm (p1, buf1[3], 0x7650);
+ case 28: if ((__byte_perm_S (buf1[3], 0, 0x6540)) == p0) buf1[3] = __byte_perm_S (p1, buf1[3], 0x7650);
break;
- case 29: if ((__byte_perm (buf1[3], 0, 0x6541)) == p0) buf1[3] = __byte_perm (p1, buf1[3], 0x7604);
+ case 29: if ((__byte_perm_S (buf1[3], 0, 0x6541)) == p0) buf1[3] = __byte_perm_S (p1, buf1[3], 0x7604);
break;
- case 30: if ((__byte_perm (buf1[3], 0, 0x6542)) == p0) buf1[3] = __byte_perm (p1, buf1[3], 0x7054);
+ case 30: if ((__byte_perm_S (buf1[3], 0, 0x6542)) == p0) buf1[3] = __byte_perm_S (p1, buf1[3], 0x7054);
break;
- case 31: if ((__byte_perm (buf1[3], 0, 0x6543)) == p0) buf1[3] = __byte_perm (p1, buf1[3], 0x0654);
+ case 31: if ((__byte_perm_S (buf1[3], 0, 0x6543)) == p0) buf1[3] = __byte_perm_S (p1, buf1[3], 0x0654);
break;
}
}
return in_len;
}
-static u32 rule_op_mangle_purgechar (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
+inline u32 rule_op_mangle_purgechar (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
// TODO
return in_len;
}
-static u32 rule_op_mangle_togglecase_rec (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
+inline u32 rule_op_mangle_togglecase_rec (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
// TODO
return in_len;
}
-static u32 rule_op_mangle_dupechar_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
+inline u32 rule_op_mangle_dupechar_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if ( in_len == 0) return (in_len);
if ((in_len + p0) >= 32) return (in_len);
{
case 1: buf0[0] |= tmp;
break;
- case 2: buf0[0] |= __byte_perm (tmp, 0, 0x5400);
+ case 2: buf0[0] |= __byte_perm_S (tmp, 0, 0x5400);
break;
- case 3: buf0[0] |= __byte_perm (tmp, 0, 0x4000);
+ case 3: buf0[0] |= __byte_perm_S (tmp, 0, 0x4000);
break;
- case 4: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
+ case 4: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
break;
- case 5: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
+ case 5: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
buf0[1] |= tmp;
break;
- case 6: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
- buf0[1] |= __byte_perm (tmp, 0, 0x5400);
+ case 6: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[1] |= __byte_perm_S (tmp, 0, 0x5400);
break;
- case 7: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
- buf0[1] |= __byte_perm (tmp, 0, 0x4000);
+ case 7: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[1] |= __byte_perm_S (tmp, 0, 0x4000);
break;
- case 8: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
- buf0[1] |= __byte_perm (tmp, 0, 0x0000);
+ case 8: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
break;
- case 9: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
- buf0[1] |= __byte_perm (tmp, 0, 0x0000);
+ case 9: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
buf0[2] |= tmp;
break;
- case 10: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
- buf0[1] |= __byte_perm (tmp, 0, 0x0000);
- buf0[2] |= __byte_perm (tmp, 0, 0x5400);
+ case 10: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[2] |= __byte_perm_S (tmp, 0, 0x5400);
break;
- case 11: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
- buf0[1] |= __byte_perm (tmp, 0, 0x0000);
- buf0[2] |= __byte_perm (tmp, 0, 0x4000);
+ case 11: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[2] |= __byte_perm_S (tmp, 0, 0x4000);
break;
- case 12: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
- buf0[1] |= __byte_perm (tmp, 0, 0x0000);
- buf0[2] |= __byte_perm (tmp, 0, 0x0000);
+ case 12: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
break;
- case 13: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
- buf0[1] |= __byte_perm (tmp, 0, 0x0000);
- buf0[2] |= __byte_perm (tmp, 0, 0x0000);
+ case 13: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
buf0[3] |= tmp;
break;
- case 14: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
- buf0[1] |= __byte_perm (tmp, 0, 0x0000);
- buf0[2] |= __byte_perm (tmp, 0, 0x0000);
- buf0[3] |= __byte_perm (tmp, 0, 0x5400);
- break;
- case 15: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
- buf0[1] |= __byte_perm (tmp, 0, 0x0000);
- buf0[2] |= __byte_perm (tmp, 0, 0x0000);
- buf0[3] |= __byte_perm (tmp, 0, 0x4000);
- break;
- case 16: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
- buf0[1] |= __byte_perm (tmp, 0, 0x0000);
- buf0[2] |= __byte_perm (tmp, 0, 0x0000);
- buf0[3] |= __byte_perm (tmp, 0, 0x0000);
- break;
- case 17: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
- buf0[1] |= __byte_perm (tmp, 0, 0x0000);
- buf0[2] |= __byte_perm (tmp, 0, 0x0000);
- buf0[3] |= __byte_perm (tmp, 0, 0x0000);
+ case 14: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[3] |= __byte_perm_S (tmp, 0, 0x5400);
+ break;
+ case 15: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[3] |= __byte_perm_S (tmp, 0, 0x4000);
+ break;
+ case 16: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
+ break;
+ case 17: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
buf1[0] |= tmp;
break;
- case 18: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
- buf0[1] |= __byte_perm (tmp, 0, 0x0000);
- buf0[2] |= __byte_perm (tmp, 0, 0x0000);
- buf0[3] |= __byte_perm (tmp, 0, 0x0000);
- buf1[0] |= __byte_perm (tmp, 0, 0x5400);
- break;
- case 19: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
- buf0[1] |= __byte_perm (tmp, 0, 0x0000);
- buf0[2] |= __byte_perm (tmp, 0, 0x0000);
- buf0[3] |= __byte_perm (tmp, 0, 0x0000);
- buf1[0] |= __byte_perm (tmp, 0, 0x4000);
- break;
- case 20: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
- buf0[1] |= __byte_perm (tmp, 0, 0x0000);
- buf0[2] |= __byte_perm (tmp, 0, 0x0000);
- buf0[3] |= __byte_perm (tmp, 0, 0x0000);
- buf1[0] |= __byte_perm (tmp, 0, 0x0000);
- break;
- case 21: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
- buf0[1] |= __byte_perm (tmp, 0, 0x0000);
- buf0[2] |= __byte_perm (tmp, 0, 0x0000);
- buf0[3] |= __byte_perm (tmp, 0, 0x0000);
- buf1[0] |= __byte_perm (tmp, 0, 0x0000);
+ case 18: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf1[0] |= __byte_perm_S (tmp, 0, 0x5400);
+ break;
+ case 19: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf1[0] |= __byte_perm_S (tmp, 0, 0x4000);
+ break;
+ case 20: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf1[0] |= __byte_perm_S (tmp, 0, 0x0000);
+ break;
+ case 21: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf1[0] |= __byte_perm_S (tmp, 0, 0x0000);
buf1[1] |= tmp;
break;
- case 22: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
- buf0[1] |= __byte_perm (tmp, 0, 0x0000);
- buf0[2] |= __byte_perm (tmp, 0, 0x0000);
- buf0[3] |= __byte_perm (tmp, 0, 0x0000);
- buf1[0] |= __byte_perm (tmp, 0, 0x0000);
- buf1[1] |= __byte_perm (tmp, 0, 0x5400);
- break;
- case 23: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
- buf0[1] |= __byte_perm (tmp, 0, 0x0000);
- buf0[2] |= __byte_perm (tmp, 0, 0x0000);
- buf0[3] |= __byte_perm (tmp, 0, 0x0000);
- buf1[0] |= __byte_perm (tmp, 0, 0x0000);
- buf1[1] |= __byte_perm (tmp, 0, 0x4000);
- break;
- case 24: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
- buf0[1] |= __byte_perm (tmp, 0, 0x0000);
- buf0[2] |= __byte_perm (tmp, 0, 0x0000);
- buf0[3] |= __byte_perm (tmp, 0, 0x0000);
- buf1[0] |= __byte_perm (tmp, 0, 0x0000);
- buf1[1] |= __byte_perm (tmp, 0, 0x0000);
- break;
- case 25: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
- buf0[1] |= __byte_perm (tmp, 0, 0x0000);
- buf0[2] |= __byte_perm (tmp, 0, 0x0000);
- buf0[3] |= __byte_perm (tmp, 0, 0x0000);
- buf1[0] |= __byte_perm (tmp, 0, 0x0000);
- buf1[1] |= __byte_perm (tmp, 0, 0x0000);
+ case 22: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf1[0] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf1[1] |= __byte_perm_S (tmp, 0, 0x5400);
+ break;
+ case 23: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf1[0] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf1[1] |= __byte_perm_S (tmp, 0, 0x4000);
+ break;
+ case 24: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf1[0] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf1[1] |= __byte_perm_S (tmp, 0, 0x0000);
+ break;
+ case 25: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf1[0] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf1[1] |= __byte_perm_S (tmp, 0, 0x0000);
buf1[2] |= tmp;
break;
- case 26: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
- buf0[1] |= __byte_perm (tmp, 0, 0x0000);
- buf0[2] |= __byte_perm (tmp, 0, 0x0000);
- buf0[3] |= __byte_perm (tmp, 0, 0x0000);
- buf1[0] |= __byte_perm (tmp, 0, 0x0000);
- buf1[1] |= __byte_perm (tmp, 0, 0x0000);
- buf1[2] |= __byte_perm (tmp, 0, 0x5400);
- break;
- case 27: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
- buf0[1] |= __byte_perm (tmp, 0, 0x0000);
- buf0[2] |= __byte_perm (tmp, 0, 0x0000);
- buf0[3] |= __byte_perm (tmp, 0, 0x0000);
- buf1[0] |= __byte_perm (tmp, 0, 0x0000);
- buf1[1] |= __byte_perm (tmp, 0, 0x0000);
- buf1[2] |= __byte_perm (tmp, 0, 0x4000);
- break;
- case 28: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
- buf0[1] |= __byte_perm (tmp, 0, 0x0000);
- buf0[2] |= __byte_perm (tmp, 0, 0x0000);
- buf0[3] |= __byte_perm (tmp, 0, 0x0000);
- buf1[0] |= __byte_perm (tmp, 0, 0x0000);
- buf1[1] |= __byte_perm (tmp, 0, 0x0000);
- buf1[2] |= __byte_perm (tmp, 0, 0x0000);
- break;
- case 29: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
- buf0[1] |= __byte_perm (tmp, 0, 0x0000);
- buf0[2] |= __byte_perm (tmp, 0, 0x0000);
- buf0[3] |= __byte_perm (tmp, 0, 0x0000);
- buf1[0] |= __byte_perm (tmp, 0, 0x0000);
- buf1[1] |= __byte_perm (tmp, 0, 0x0000);
- buf1[2] |= __byte_perm (tmp, 0, 0x0000);
+ case 26: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf1[0] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf1[1] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf1[2] |= __byte_perm_S (tmp, 0, 0x5400);
+ break;
+ case 27: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf1[0] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf1[1] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf1[2] |= __byte_perm_S (tmp, 0, 0x4000);
+ break;
+ case 28: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf1[0] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf1[1] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf1[2] |= __byte_perm_S (tmp, 0, 0x0000);
+ break;
+ case 29: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf1[0] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf1[1] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf1[2] |= __byte_perm_S (tmp, 0, 0x0000);
buf1[3] |= tmp;
break;
- case 30: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
- buf0[1] |= __byte_perm (tmp, 0, 0x0000);
- buf0[2] |= __byte_perm (tmp, 0, 0x0000);
- buf0[3] |= __byte_perm (tmp, 0, 0x0000);
- buf1[0] |= __byte_perm (tmp, 0, 0x0000);
- buf1[1] |= __byte_perm (tmp, 0, 0x0000);
- buf1[2] |= __byte_perm (tmp, 0, 0x0000);
- buf1[3] |= __byte_perm (tmp, 0, 0x5400);
- break;
- case 31: buf0[0] |= __byte_perm (tmp, 0, 0x0000);
- buf0[1] |= __byte_perm (tmp, 0, 0x0000);
- buf0[2] |= __byte_perm (tmp, 0, 0x0000);
- buf0[3] |= __byte_perm (tmp, 0, 0x0000);
- buf1[0] |= __byte_perm (tmp, 0, 0x0000);
- buf1[1] |= __byte_perm (tmp, 0, 0x0000);
- buf1[2] |= __byte_perm (tmp, 0, 0x0000);
- buf1[3] |= __byte_perm (tmp, 0, 0x4000);
+ case 30: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf1[0] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf1[1] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf1[2] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf1[3] |= __byte_perm_S (tmp, 0, 0x5400);
+ break;
+ case 31: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[1] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[2] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf0[3] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf1[0] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf1[1] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf1[2] |= __byte_perm_S (tmp, 0, 0x0000);
+ buf1[3] |= __byte_perm_S (tmp, 0, 0x4000);
break;
}
#endif
return out_len;
}
-static u32 rule_op_mangle_dupechar_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
+inline u32 rule_op_mangle_dupechar_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if ( in_len == 0) return (in_len);
if ((in_len + p0) >= 32) return (in_len);
return out_len;
}
-static u32 rule_op_mangle_dupechar_all (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
+inline u32 rule_op_mangle_dupechar_all (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if ( in_len == 0) return (in_len);
if ((in_len + in_len) >= 32) return (in_len);
u32 tib41[4];
#ifdef IS_NV
- tib40[0] = __byte_perm (buf0[0], 0, 0x1100);
- tib40[1] = __byte_perm (buf0[0], 0, 0x3322);
- tib40[2] = __byte_perm (buf0[1], 0, 0x1100);
- tib40[3] = __byte_perm (buf0[1], 0, 0x3322);
- tib41[0] = __byte_perm (buf0[2], 0, 0x1100);
- tib41[1] = __byte_perm (buf0[2], 0, 0x3322);
- tib41[2] = __byte_perm (buf0[3], 0, 0x1100);
- tib41[3] = __byte_perm (buf0[3], 0, 0x3322);
+ tib40[0] = __byte_perm_S (buf0[0], 0, 0x1100);
+ tib40[1] = __byte_perm_S (buf0[0], 0, 0x3322);
+ tib40[2] = __byte_perm_S (buf0[1], 0, 0x1100);
+ tib40[3] = __byte_perm_S (buf0[1], 0, 0x3322);
+ tib41[0] = __byte_perm_S (buf0[2], 0, 0x1100);
+ tib41[1] = __byte_perm_S (buf0[2], 0, 0x3322);
+ tib41[2] = __byte_perm_S (buf0[3], 0, 0x1100);
+ tib41[3] = __byte_perm_S (buf0[3], 0, 0x3322);
buf0[0] = tib40[0];
buf0[1] = tib40[1];
return out_len;
}
-static u32 rule_op_mangle_switch_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
+inline u32 rule_op_mangle_switch_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (in_len < 2) return (in_len);
#ifdef IS_NV
- buf0[0] = __byte_perm (buf0[0], 0, 0x3201);
+ buf0[0] = __byte_perm_S (buf0[0], 0, 0x3201);
#endif
#if defined IS_AMD || defined IS_GENERIC
return in_len;
}
-static u32 rule_op_mangle_switch_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
+inline u32 rule_op_mangle_switch_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (in_len < 2) return (in_len);
#ifdef IS_NV
switch (in_len)
{
- case 2: buf0[0] = __byte_perm (buf0[0], 0, 0x5401);
+ case 2: buf0[0] = __byte_perm_S (buf0[0], 0, 0x5401);
break;
- case 3: buf0[0] = __byte_perm (buf0[0], 0, 0x4120);
+ case 3: buf0[0] = __byte_perm_S (buf0[0], 0, 0x4120);
break;
- case 4: buf0[0] = __byte_perm (buf0[0], 0, 0x2310);
+ case 4: buf0[0] = __byte_perm_S (buf0[0], 0, 0x2310);
break;
- case 5: buf0[1] = __byte_perm (buf0[1], buf0[0], 0x7210);
- buf0[0] = __byte_perm (buf0[0], buf0[1], 0x4210);
- buf0[1] = __byte_perm (buf0[1], 0, 0x6543);
+ case 5: buf0[1] = __byte_perm_S (buf0[1], buf0[0], 0x7210);
+ buf0[0] = __byte_perm_S (buf0[0], buf0[1], 0x4210);
+ buf0[1] = __byte_perm_S (buf0[1], 0, 0x6543);
break;
- case 6: buf0[1] = __byte_perm (buf0[1], 0, 0x5401);
+ case 6: buf0[1] = __byte_perm_S (buf0[1], 0, 0x5401);
break;
- case 7: buf0[1] = __byte_perm (buf0[1], 0, 0x4120);
+ case 7: buf0[1] = __byte_perm_S (buf0[1], 0, 0x4120);
break;
- case 8: buf0[1] = __byte_perm (buf0[1], 0, 0x2310);
+ case 8: buf0[1] = __byte_perm_S (buf0[1], 0, 0x2310);
break;
- case 9: buf0[2] = __byte_perm (buf0[2], buf0[1], 0x7210);
- buf0[1] = __byte_perm (buf0[1], buf0[2], 0x4210);
- buf0[2] = __byte_perm (buf0[2], 0, 0x6543);
+ case 9: buf0[2] = __byte_perm_S (buf0[2], buf0[1], 0x7210);
+ buf0[1] = __byte_perm_S (buf0[1], buf0[2], 0x4210);
+ buf0[2] = __byte_perm_S (buf0[2], 0, 0x6543);
break;
- case 10: buf0[2] = __byte_perm (buf0[2], 0, 0x5401);
+ case 10: buf0[2] = __byte_perm_S (buf0[2], 0, 0x5401);
break;
- case 11: buf0[2] = __byte_perm (buf0[2], 0, 0x4120);
+ case 11: buf0[2] = __byte_perm_S (buf0[2], 0, 0x4120);
break;
- case 12: buf0[2] = __byte_perm (buf0[2], 0, 0x2310);
+ case 12: buf0[2] = __byte_perm_S (buf0[2], 0, 0x2310);
break;
- case 13: buf0[3] = __byte_perm (buf0[3], buf0[2], 0x7210);
- buf0[2] = __byte_perm (buf0[2], buf0[3], 0x4210);
- buf0[3] = __byte_perm (buf0[3], 0, 0x6543);
+ case 13: buf0[3] = __byte_perm_S (buf0[3], buf0[2], 0x7210);
+ buf0[2] = __byte_perm_S (buf0[2], buf0[3], 0x4210);
+ buf0[3] = __byte_perm_S (buf0[3], 0, 0x6543);
break;
- case 14: buf0[3] = __byte_perm (buf0[3], 0, 0x5401);
+ case 14: buf0[3] = __byte_perm_S (buf0[3], 0, 0x5401);
break;
- case 15: buf0[3] = __byte_perm (buf0[3], 0, 0x4120);
+ case 15: buf0[3] = __byte_perm_S (buf0[3], 0, 0x4120);
break;
- case 16: buf0[3] = __byte_perm (buf0[3], 0, 0x2310);
+ case 16: buf0[3] = __byte_perm_S (buf0[3], 0, 0x2310);
break;
- case 17: buf1[0] = __byte_perm (buf1[0], buf0[3], 0x7210);
- buf0[3] = __byte_perm (buf0[3], buf1[0], 0x4210);
- buf1[0] = __byte_perm (buf1[0], 0, 0x6543);
+ case 17: buf1[0] = __byte_perm_S (buf1[0], buf0[3], 0x7210);
+ buf0[3] = __byte_perm_S (buf0[3], buf1[0], 0x4210);
+ buf1[0] = __byte_perm_S (buf1[0], 0, 0x6543);
break;
- case 18: buf1[0] = __byte_perm (buf1[0], 0, 0x5401);
+ case 18: buf1[0] = __byte_perm_S (buf1[0], 0, 0x5401);
break;
- case 19: buf1[0] = __byte_perm (buf1[0], 0, 0x4120);
+ case 19: buf1[0] = __byte_perm_S (buf1[0], 0, 0x4120);
break;
- case 20: buf1[0] = __byte_perm (buf1[0], 0, 0x2310);
+ case 20: buf1[0] = __byte_perm_S (buf1[0], 0, 0x2310);
break;
- case 21: buf1[1] = __byte_perm (buf1[1], buf1[0], 0x7210);
- buf1[0] = __byte_perm (buf1[0], buf1[1], 0x4210);
- buf1[1] = __byte_perm (buf1[1], 0, 0x6543);
+ case 21: buf1[1] = __byte_perm_S (buf1[1], buf1[0], 0x7210);
+ buf1[0] = __byte_perm_S (buf1[0], buf1[1], 0x4210);
+ buf1[1] = __byte_perm_S (buf1[1], 0, 0x6543);
break;
- case 22: buf1[1] = __byte_perm (buf1[1], 0, 0x5401);
+ case 22: buf1[1] = __byte_perm_S (buf1[1], 0, 0x5401);
break;
- case 23: buf1[1] = __byte_perm (buf1[1], 0, 0x4120);
+ case 23: buf1[1] = __byte_perm_S (buf1[1], 0, 0x4120);
break;
- case 24: buf1[1] = __byte_perm (buf1[1], 0, 0x2310);
+ case 24: buf1[1] = __byte_perm_S (buf1[1], 0, 0x2310);
break;
- case 25: buf1[2] = __byte_perm (buf1[2], buf1[1], 0x7210);
- buf1[1] = __byte_perm (buf1[1], buf1[2], 0x4210);
- buf1[2] = __byte_perm (buf1[2], 0, 0x6543);
+ case 25: buf1[2] = __byte_perm_S (buf1[2], buf1[1], 0x7210);
+ buf1[1] = __byte_perm_S (buf1[1], buf1[2], 0x4210);
+ buf1[2] = __byte_perm_S (buf1[2], 0, 0x6543);
break;
- case 26: buf1[2] = __byte_perm (buf1[2], 0, 0x5401);
+ case 26: buf1[2] = __byte_perm_S (buf1[2], 0, 0x5401);
break;
- case 27: buf1[2] = __byte_perm (buf1[2], 0, 0x4120);
+ case 27: buf1[2] = __byte_perm_S (buf1[2], 0, 0x4120);
break;
- case 28: buf1[2] = __byte_perm (buf1[2], 0, 0x2310);
+ case 28: buf1[2] = __byte_perm_S (buf1[2], 0, 0x2310);
break;
- case 29: buf1[3] = __byte_perm (buf1[3], buf1[2], 0x7210);
- buf1[2] = __byte_perm (buf1[2], buf1[3], 0x4210);
- buf1[3] = __byte_perm (buf1[3], 0, 0x6543);
+ case 29: buf1[3] = __byte_perm_S (buf1[3], buf1[2], 0x7210);
+ buf1[2] = __byte_perm_S (buf1[2], buf1[3], 0x4210);
+ buf1[3] = __byte_perm_S (buf1[3], 0, 0x6543);
break;
- case 30: buf1[3] = __byte_perm (buf1[3], 0, 0x5401);
+ case 30: buf1[3] = __byte_perm_S (buf1[3], 0, 0x5401);
break;
- case 31: buf1[3] = __byte_perm (buf1[3], 0, 0x4120);
+ case 31: buf1[3] = __byte_perm_S (buf1[3], 0, 0x4120);
break;
}
#endif
return in_len;
}
-static u32 rule_op_mangle_switch_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
+inline u32 rule_op_mangle_switch_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (p0 >= in_len) return (in_len);
if (p1 >= in_len) return (in_len);
#ifdef IS_NV
switch (p0)
{
- case 0: tmp0 = __byte_perm (buf0[0], 0, 0x6540);
+ case 0: tmp0 = __byte_perm_S (buf0[0], 0, 0x6540);
break;
- case 1: tmp0 = __byte_perm (buf0[0], 0, 0x6541);
+ case 1: tmp0 = __byte_perm_S (buf0[0], 0, 0x6541);
break;
- case 2: tmp0 = __byte_perm (buf0[0], 0, 0x6542);
+ case 2: tmp0 = __byte_perm_S (buf0[0], 0, 0x6542);
break;
- case 3: tmp0 = __byte_perm (buf0[0], 0, 0x6543);
+ case 3: tmp0 = __byte_perm_S (buf0[0], 0, 0x6543);
break;
- case 4: tmp0 = __byte_perm (buf0[1], 0, 0x6540);
+ case 4: tmp0 = __byte_perm_S (buf0[1], 0, 0x6540);
break;
- case 5: tmp0 = __byte_perm (buf0[1], 0, 0x6541);
+ case 5: tmp0 = __byte_perm_S (buf0[1], 0, 0x6541);
break;
- case 6: tmp0 = __byte_perm (buf0[1], 0, 0x6542);
+ case 6: tmp0 = __byte_perm_S (buf0[1], 0, 0x6542);
break;
- case 7: tmp0 = __byte_perm (buf0[1], 0, 0x6543);
+ case 7: tmp0 = __byte_perm_S (buf0[1], 0, 0x6543);
break;
- case 8: tmp0 = __byte_perm (buf0[2], 0, 0x6540);
+ case 8: tmp0 = __byte_perm_S (buf0[2], 0, 0x6540);
break;
- case 9: tmp0 = __byte_perm (buf0[2], 0, 0x6541);
+ case 9: tmp0 = __byte_perm_S (buf0[2], 0, 0x6541);
break;
- case 10: tmp0 = __byte_perm (buf0[2], 0, 0x6542);
+ case 10: tmp0 = __byte_perm_S (buf0[2], 0, 0x6542);
break;
- case 11: tmp0 = __byte_perm (buf0[2], 0, 0x6543);
+ case 11: tmp0 = __byte_perm_S (buf0[2], 0, 0x6543);
break;
- case 12: tmp0 = __byte_perm (buf0[3], 0, 0x6540);
+ case 12: tmp0 = __byte_perm_S (buf0[3], 0, 0x6540);
break;
- case 13: tmp0 = __byte_perm (buf0[3], 0, 0x6541);
+ case 13: tmp0 = __byte_perm_S (buf0[3], 0, 0x6541);
break;
- case 14: tmp0 = __byte_perm (buf0[3], 0, 0x6542);
+ case 14: tmp0 = __byte_perm_S (buf0[3], 0, 0x6542);
break;
- case 15: tmp0 = __byte_perm (buf0[3], 0, 0x6543);
+ case 15: tmp0 = __byte_perm_S (buf0[3], 0, 0x6543);
break;
- case 16: tmp0 = __byte_perm (buf1[0], 0, 0x6540);
+ case 16: tmp0 = __byte_perm_S (buf1[0], 0, 0x6540);
break;
- case 17: tmp0 = __byte_perm (buf1[0], 0, 0x6541);
+ case 17: tmp0 = __byte_perm_S (buf1[0], 0, 0x6541);
break;
- case 18: tmp0 = __byte_perm (buf1[0], 0, 0x6542);
+ case 18: tmp0 = __byte_perm_S (buf1[0], 0, 0x6542);
break;
- case 19: tmp0 = __byte_perm (buf1[0], 0, 0x6543);
+ case 19: tmp0 = __byte_perm_S (buf1[0], 0, 0x6543);
break;
- case 20: tmp0 = __byte_perm (buf1[1], 0, 0x6540);
+ case 20: tmp0 = __byte_perm_S (buf1[1], 0, 0x6540);
break;
- case 21: tmp0 = __byte_perm (buf1[1], 0, 0x6541);
+ case 21: tmp0 = __byte_perm_S (buf1[1], 0, 0x6541);
break;
- case 22: tmp0 = __byte_perm (buf1[1], 0, 0x6542);
+ case 22: tmp0 = __byte_perm_S (buf1[1], 0, 0x6542);
break;
- case 23: tmp0 = __byte_perm (buf1[1], 0, 0x6543);
+ case 23: tmp0 = __byte_perm_S (buf1[1], 0, 0x6543);
break;
- case 24: tmp0 = __byte_perm (buf1[2], 0, 0x6540);
+ case 24: tmp0 = __byte_perm_S (buf1[2], 0, 0x6540);
break;
- case 25: tmp0 = __byte_perm (buf1[2], 0, 0x6541);
+ case 25: tmp0 = __byte_perm_S (buf1[2], 0, 0x6541);
break;
- case 26: tmp0 = __byte_perm (buf1[2], 0, 0x6542);
+ case 26: tmp0 = __byte_perm_S (buf1[2], 0, 0x6542);
break;
- case 27: tmp0 = __byte_perm (buf1[2], 0, 0x6543);
+ case 27: tmp0 = __byte_perm_S (buf1[2], 0, 0x6543);
break;
- case 28: tmp0 = __byte_perm (buf1[3], 0, 0x6540);
+ case 28: tmp0 = __byte_perm_S (buf1[3], 0, 0x6540);
break;
- case 29: tmp0 = __byte_perm (buf1[3], 0, 0x6541);
+ case 29: tmp0 = __byte_perm_S (buf1[3], 0, 0x6541);
break;
- case 30: tmp0 = __byte_perm (buf1[3], 0, 0x6542);
+ case 30: tmp0 = __byte_perm_S (buf1[3], 0, 0x6542);
break;
- case 31: tmp0 = __byte_perm (buf1[3], 0, 0x6543);
+ case 31: tmp0 = __byte_perm_S (buf1[3], 0, 0x6543);
break;
}
switch (p1)
{
- case 0: tmp1 = __byte_perm (buf0[0], 0, 0x6540);
- buf0[0] = __byte_perm (tmp0, buf0[0], 0x7650);
+ case 0: tmp1 = __byte_perm_S (buf0[0], 0, 0x6540);
+ buf0[0] = __byte_perm_S (tmp0, buf0[0], 0x7650);
break;
- case 1: tmp1 = __byte_perm (buf0[0], 0, 0x6541);
- buf0[0] = __byte_perm (tmp0, buf0[0], 0x7604);
+ case 1: tmp1 = __byte_perm_S (buf0[0], 0, 0x6541);
+ buf0[0] = __byte_perm_S (tmp0, buf0[0], 0x7604);
break;
- case 2: tmp1 = __byte_perm (buf0[0], 0, 0x6542);
- buf0[0] = __byte_perm (tmp0, buf0[0], 0x7054);
+ case 2: tmp1 = __byte_perm_S (buf0[0], 0, 0x6542);
+ buf0[0] = __byte_perm_S (tmp0, buf0[0], 0x7054);
break;
- case 3: tmp1 = __byte_perm (buf0[0], 0, 0x6543);
- buf0[0] = __byte_perm (tmp0, buf0[0], 0x0654);
+ case 3: tmp1 = __byte_perm_S (buf0[0], 0, 0x6543);
+ buf0[0] = __byte_perm_S (tmp0, buf0[0], 0x0654);
break;
- case 4: tmp1 = __byte_perm (buf0[1], 0, 0x6540);
- buf0[1] = __byte_perm (tmp0, buf0[1], 0x7650);
+ case 4: tmp1 = __byte_perm_S (buf0[1], 0, 0x6540);
+ buf0[1] = __byte_perm_S (tmp0, buf0[1], 0x7650);
break;
- case 5: tmp1 = __byte_perm (buf0[1], 0, 0x6541);
- buf0[1] = __byte_perm (tmp0, buf0[1], 0x7604);
+ case 5: tmp1 = __byte_perm_S (buf0[1], 0, 0x6541);
+ buf0[1] = __byte_perm_S (tmp0, buf0[1], 0x7604);
break;
- case 6: tmp1 = __byte_perm (buf0[1], 0, 0x6542);
- buf0[1] = __byte_perm (tmp0, buf0[1], 0x7054);
+ case 6: tmp1 = __byte_perm_S (buf0[1], 0, 0x6542);
+ buf0[1] = __byte_perm_S (tmp0, buf0[1], 0x7054);
break;
- case 7: tmp1 = __byte_perm (buf0[1], 0, 0x6543);
- buf0[1] = __byte_perm (tmp0, buf0[1], 0x0654);
+ case 7: tmp1 = __byte_perm_S (buf0[1], 0, 0x6543);
+ buf0[1] = __byte_perm_S (tmp0, buf0[1], 0x0654);
break;
- case 8: tmp1 = __byte_perm (buf0[2], 0, 0x6540);
- buf0[2] = __byte_perm (tmp0, buf0[2], 0x7650);
+ case 8: tmp1 = __byte_perm_S (buf0[2], 0, 0x6540);
+ buf0[2] = __byte_perm_S (tmp0, buf0[2], 0x7650);
break;
- case 9: tmp1 = __byte_perm (buf0[2], 0, 0x6541);
- buf0[2] = __byte_perm (tmp0, buf0[2], 0x7604);
+ case 9: tmp1 = __byte_perm_S (buf0[2], 0, 0x6541);
+ buf0[2] = __byte_perm_S (tmp0, buf0[2], 0x7604);
break;
- case 10: tmp1 = __byte_perm (buf0[2], 0, 0x6542);
- buf0[2] = __byte_perm (tmp0, buf0[2], 0x7054);
+ case 10: tmp1 = __byte_perm_S (buf0[2], 0, 0x6542);
+ buf0[2] = __byte_perm_S (tmp0, buf0[2], 0x7054);
break;
- case 11: tmp1 = __byte_perm (buf0[2], 0, 0x6543);
- buf0[2] = __byte_perm (tmp0, buf0[2], 0x0654);
+ case 11: tmp1 = __byte_perm_S (buf0[2], 0, 0x6543);
+ buf0[2] = __byte_perm_S (tmp0, buf0[2], 0x0654);
break;
- case 12: tmp1 = __byte_perm (buf0[3], 0, 0x6540);
- buf0[3] = __byte_perm (tmp0, buf0[3], 0x7650);
+ case 12: tmp1 = __byte_perm_S (buf0[3], 0, 0x6540);
+ buf0[3] = __byte_perm_S (tmp0, buf0[3], 0x7650);
break;
- case 13: tmp1 = __byte_perm (buf0[3], 0, 0x6541);
- buf0[3] = __byte_perm (tmp0, buf0[3], 0x7604);
+ case 13: tmp1 = __byte_perm_S (buf0[3], 0, 0x6541);
+ buf0[3] = __byte_perm_S (tmp0, buf0[3], 0x7604);
break;
- case 14: tmp1 = __byte_perm (buf0[3], 0, 0x6542);
- buf0[3] = __byte_perm (tmp0, buf0[3], 0x7054);
+ case 14: tmp1 = __byte_perm_S (buf0[3], 0, 0x6542);
+ buf0[3] = __byte_perm_S (tmp0, buf0[3], 0x7054);
break;
- case 15: tmp1 = __byte_perm (buf0[3], 0, 0x6543);
- buf0[3] = __byte_perm (tmp0, buf0[3], 0x0654);
+ case 15: tmp1 = __byte_perm_S (buf0[3], 0, 0x6543);
+ buf0[3] = __byte_perm_S (tmp0, buf0[3], 0x0654);
break;
- case 16: tmp1 = __byte_perm (buf1[0], 0, 0x6540);
- buf1[0] = __byte_perm (tmp0, buf1[0], 0x7650);
+ case 16: tmp1 = __byte_perm_S (buf1[0], 0, 0x6540);
+ buf1[0] = __byte_perm_S (tmp0, buf1[0], 0x7650);
break;
- case 17: tmp1 = __byte_perm (buf1[0], 0, 0x6541);
- buf1[0] = __byte_perm (tmp0, buf1[0], 0x7604);
+ case 17: tmp1 = __byte_perm_S (buf1[0], 0, 0x6541);
+ buf1[0] = __byte_perm_S (tmp0, buf1[0], 0x7604);
break;
- case 18: tmp1 = __byte_perm (buf1[0], 0, 0x6542);
- buf1[0] = __byte_perm (tmp0, buf1[0], 0x7054);
+ case 18: tmp1 = __byte_perm_S (buf1[0], 0, 0x6542);
+ buf1[0] = __byte_perm_S (tmp0, buf1[0], 0x7054);
break;
- case 19: tmp1 = __byte_perm (buf1[0], 0, 0x6543);
- buf1[0] = __byte_perm (tmp0, buf1[0], 0x0654);
+ case 19: tmp1 = __byte_perm_S (buf1[0], 0, 0x6543);
+ buf1[0] = __byte_perm_S (tmp0, buf1[0], 0x0654);
break;
- case 20: tmp1 = __byte_perm (buf1[1], 0, 0x6540);
- buf1[1] = __byte_perm (tmp0, buf1[1], 0x7650);
+ case 20: tmp1 = __byte_perm_S (buf1[1], 0, 0x6540);
+ buf1[1] = __byte_perm_S (tmp0, buf1[1], 0x7650);
break;
- case 21: tmp1 = __byte_perm (buf1[1], 0, 0x6541);
- buf1[1] = __byte_perm (tmp0, buf1[1], 0x7604);
+ case 21: tmp1 = __byte_perm_S (buf1[1], 0, 0x6541);
+ buf1[1] = __byte_perm_S (tmp0, buf1[1], 0x7604);
break;
- case 22: tmp1 = __byte_perm (buf1[1], 0, 0x6542);
- buf1[1] = __byte_perm (tmp0, buf1[1], 0x7054);
+ case 22: tmp1 = __byte_perm_S (buf1[1], 0, 0x6542);
+ buf1[1] = __byte_perm_S (tmp0, buf1[1], 0x7054);
break;
- case 23: tmp1 = __byte_perm (buf1[1], 0, 0x6543);
- buf1[1] = __byte_perm (tmp0, buf1[1], 0x0654);
+ case 23: tmp1 = __byte_perm_S (buf1[1], 0, 0x6543);
+ buf1[1] = __byte_perm_S (tmp0, buf1[1], 0x0654);
break;
- case 24: tmp1 = __byte_perm (buf1[2], 0, 0x6540);
- buf1[2] = __byte_perm (tmp0, buf1[2], 0x7650);
+ case 24: tmp1 = __byte_perm_S (buf1[2], 0, 0x6540);
+ buf1[2] = __byte_perm_S (tmp0, buf1[2], 0x7650);
break;
- case 25: tmp1 = __byte_perm (buf1[2], 0, 0x6541);
- buf1[2] = __byte_perm (tmp0, buf1[2], 0x7604);
+ case 25: tmp1 = __byte_perm_S (buf1[2], 0, 0x6541);
+ buf1[2] = __byte_perm_S (tmp0, buf1[2], 0x7604);
break;
- case 26: tmp1 = __byte_perm (buf1[2], 0, 0x6542);
- buf1[2] = __byte_perm (tmp0, buf1[2], 0x7054);
+ case 26: tmp1 = __byte_perm_S (buf1[2], 0, 0x6542);
+ buf1[2] = __byte_perm_S (tmp0, buf1[2], 0x7054);
break;
- case 27: tmp1 = __byte_perm (buf1[2], 0, 0x6543);
- buf1[2] = __byte_perm (tmp0, buf1[2], 0x0654);
+ case 27: tmp1 = __byte_perm_S (buf1[2], 0, 0x6543);
+ buf1[2] = __byte_perm_S (tmp0, buf1[2], 0x0654);
break;
- case 28: tmp1 = __byte_perm (buf1[3], 0, 0x6540);
- buf1[3] = __byte_perm (tmp0, buf1[3], 0x7650);
+ case 28: tmp1 = __byte_perm_S (buf1[3], 0, 0x6540);
+ buf1[3] = __byte_perm_S (tmp0, buf1[3], 0x7650);
break;
- case 29: tmp1 = __byte_perm (buf1[3], 0, 0x6541);
- buf1[3] = __byte_perm (tmp0, buf1[3], 0x7604);
+ case 29: tmp1 = __byte_perm_S (buf1[3], 0, 0x6541);
+ buf1[3] = __byte_perm_S (tmp0, buf1[3], 0x7604);
break;
- case 30: tmp1 = __byte_perm (buf1[3], 0, 0x6542);
- buf1[3] = __byte_perm (tmp0, buf1[3], 0x7054);
+ case 30: tmp1 = __byte_perm_S (buf1[3], 0, 0x6542);
+ buf1[3] = __byte_perm_S (tmp0, buf1[3], 0x7054);
break;
- case 31: tmp1 = __byte_perm (buf1[3], 0, 0x6543);
- buf1[3] = __byte_perm (tmp0, buf1[3], 0x0654);
+ case 31: tmp1 = __byte_perm_S (buf1[3], 0, 0x6543);
+ buf1[3] = __byte_perm_S (tmp0, buf1[3], 0x0654);
break;
}
switch (p0)
{
- case 0: buf0[0] = __byte_perm (tmp1, buf0[0], 0x7650);
+ case 0: buf0[0] = __byte_perm_S (tmp1, buf0[0], 0x7650);
break;
- case 1: buf0[0] = __byte_perm (tmp1, buf0[0], 0x7604);
+ case 1: buf0[0] = __byte_perm_S (tmp1, buf0[0], 0x7604);
break;
- case 2: buf0[0] = __byte_perm (tmp1, buf0[0], 0x7054);
+ case 2: buf0[0] = __byte_perm_S (tmp1, buf0[0], 0x7054);
break;
- case 3: buf0[0] = __byte_perm (tmp1, buf0[0], 0x0654);
+ case 3: buf0[0] = __byte_perm_S (tmp1, buf0[0], 0x0654);
break;
- case 4: buf0[1] = __byte_perm (tmp1, buf0[1], 0x7650);
+ case 4: buf0[1] = __byte_perm_S (tmp1, buf0[1], 0x7650);
break;
- case 5: buf0[1] = __byte_perm (tmp1, buf0[1], 0x7604);
+ case 5: buf0[1] = __byte_perm_S (tmp1, buf0[1], 0x7604);
break;
- case 6: buf0[1] = __byte_perm (tmp1, buf0[1], 0x7054);
+ case 6: buf0[1] = __byte_perm_S (tmp1, buf0[1], 0x7054);
break;
- case 7: buf0[1] = __byte_perm (tmp1, buf0[1], 0x0654);
+ case 7: buf0[1] = __byte_perm_S (tmp1, buf0[1], 0x0654);
break;
- case 8: buf0[2] = __byte_perm (tmp1, buf0[2], 0x7650);
+ case 8: buf0[2] = __byte_perm_S (tmp1, buf0[2], 0x7650);
break;
- case 9: buf0[2] = __byte_perm (tmp1, buf0[2], 0x7604);
+ case 9: buf0[2] = __byte_perm_S (tmp1, buf0[2], 0x7604);
break;
- case 10: buf0[2] = __byte_perm (tmp1, buf0[2], 0x7054);
+ case 10: buf0[2] = __byte_perm_S (tmp1, buf0[2], 0x7054);
break;
- case 11: buf0[2] = __byte_perm (tmp1, buf0[2], 0x0654);
+ case 11: buf0[2] = __byte_perm_S (tmp1, buf0[2], 0x0654);
break;
- case 12: buf0[3] = __byte_perm (tmp1, buf0[3], 0x7650);
+ case 12: buf0[3] = __byte_perm_S (tmp1, buf0[3], 0x7650);
break;
- case 13: buf0[3] = __byte_perm (tmp1, buf0[3], 0x7604);
+ case 13: buf0[3] = __byte_perm_S (tmp1, buf0[3], 0x7604);
break;
- case 14: buf0[3] = __byte_perm (tmp1, buf0[3], 0x7054);
+ case 14: buf0[3] = __byte_perm_S (tmp1, buf0[3], 0x7054);
break;
- case 15: buf0[3] = __byte_perm (tmp1, buf0[3], 0x0654);
+ case 15: buf0[3] = __byte_perm_S (tmp1, buf0[3], 0x0654);
break;
- case 16: buf1[0] = __byte_perm (tmp1, buf1[0], 0x7650);
+ case 16: buf1[0] = __byte_perm_S (tmp1, buf1[0], 0x7650);
break;
- case 17: buf1[0] = __byte_perm (tmp1, buf1[0], 0x7604);
+ case 17: buf1[0] = __byte_perm_S (tmp1, buf1[0], 0x7604);
break;
- case 18: buf1[0] = __byte_perm (tmp1, buf1[0], 0x7054);
+ case 18: buf1[0] = __byte_perm_S (tmp1, buf1[0], 0x7054);
break;
- case 19: buf1[0] = __byte_perm (tmp1, buf1[0], 0x0654);
+ case 19: buf1[0] = __byte_perm_S (tmp1, buf1[0], 0x0654);
break;
- case 20: buf1[1] = __byte_perm (tmp1, buf1[1], 0x7650);
+ case 20: buf1[1] = __byte_perm_S (tmp1, buf1[1], 0x7650);
break;
- case 21: buf1[1] = __byte_perm (tmp1, buf1[1], 0x7604);
+ case 21: buf1[1] = __byte_perm_S (tmp1, buf1[1], 0x7604);
break;
- case 22: buf1[1] = __byte_perm (tmp1, buf1[1], 0x7054);
+ case 22: buf1[1] = __byte_perm_S (tmp1, buf1[1], 0x7054);
break;
- case 23: buf1[1] = __byte_perm (tmp1, buf1[1], 0x0654);
+ case 23: buf1[1] = __byte_perm_S (tmp1, buf1[1], 0x0654);
break;
- case 24: buf1[2] = __byte_perm (tmp1, buf1[2], 0x7650);
+ case 24: buf1[2] = __byte_perm_S (tmp1, buf1[2], 0x7650);
break;
- case 25: buf1[2] = __byte_perm (tmp1, buf1[2], 0x7604);
+ case 25: buf1[2] = __byte_perm_S (tmp1, buf1[2], 0x7604);
break;
- case 26: buf1[2] = __byte_perm (tmp1, buf1[2], 0x7054);
+ case 26: buf1[2] = __byte_perm_S (tmp1, buf1[2], 0x7054);
break;
- case 27: buf1[2] = __byte_perm (tmp1, buf1[2], 0x0654);
+ case 27: buf1[2] = __byte_perm_S (tmp1, buf1[2], 0x0654);
break;
- case 28: buf1[3] = __byte_perm (tmp1, buf1[3], 0x7650);
+ case 28: buf1[3] = __byte_perm_S (tmp1, buf1[3], 0x7650);
break;
- case 29: buf1[3] = __byte_perm (tmp1, buf1[3], 0x7604);
+ case 29: buf1[3] = __byte_perm_S (tmp1, buf1[3], 0x7604);
break;
- case 30: buf1[3] = __byte_perm (tmp1, buf1[3], 0x7054);
+ case 30: buf1[3] = __byte_perm_S (tmp1, buf1[3], 0x7054);
break;
- case 31: buf1[3] = __byte_perm (tmp1, buf1[3], 0x0654);
+ case 31: buf1[3] = __byte_perm_S (tmp1, buf1[3], 0x0654);
break;
}
#endif
return in_len;
}
-static u32 rule_op_mangle_chr_shiftl (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
+inline u32 rule_op_mangle_chr_shiftl (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (p0 >= in_len) return (in_len);
return in_len;
}
-static u32 rule_op_mangle_chr_shiftr (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
+inline u32 rule_op_mangle_chr_shiftr (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (p0 >= in_len) return (in_len);
return in_len;
}
-static u32 rule_op_mangle_chr_incr (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
+inline u32 rule_op_mangle_chr_incr (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (p0 >= in_len) return (in_len);
return in_len;
}
-static u32 rule_op_mangle_chr_decr (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
+inline u32 rule_op_mangle_chr_decr (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (p0 >= in_len) return (in_len);
return in_len;
}
-static u32 rule_op_mangle_replace_np1 (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
+inline u32 rule_op_mangle_replace_np1 (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if ((p0 + 1) >= in_len) return (in_len);
return in_len;
}
-static u32 rule_op_mangle_replace_nm1 (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
+inline u32 rule_op_mangle_replace_nm1 (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (p0 == 0) return (in_len);
return in_len;
}
-static u32 rule_op_mangle_dupeblock_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
+inline u32 rule_op_mangle_dupeblock_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (p0 > in_len) return (in_len);
return out_len;
}
-static u32 rule_op_mangle_dupeblock_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
+inline u32 rule_op_mangle_dupeblock_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (p0 > in_len) return (in_len);
return out_len;
}
-static u32 rule_op_mangle_title (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
+inline u32 rule_op_mangle_title (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
buf0[0] |= (generate_cmask (buf0[0]));
buf0[1] |= (generate_cmask (buf0[1]));
switch (i)
{
- case 0: tmp0 = __byte_perm (buf0[0], 0, 0x6540);
+ case 0: tmp0 = __byte_perm_S (buf0[0], 0, 0x6540);
tmp1 = ~(0x00002000 & generate_cmask (buf0[0])); break;
- case 1: tmp0 = __byte_perm (buf0[0], 0, 0x6541);
+ case 1: tmp0 = __byte_perm_S (buf0[0], 0, 0x6541);
tmp1 = ~(0x00200000 & generate_cmask (buf0[0])); break;
- case 2: tmp0 = __byte_perm (buf0[0], 0, 0x6542);
+ case 2: tmp0 = __byte_perm_S (buf0[0], 0, 0x6542);
tmp1 = ~(0x20000000 & generate_cmask (buf0[0])); break;
- case 3: tmp0 = __byte_perm (buf0[0], 0, 0x6543);
+ case 3: tmp0 = __byte_perm_S (buf0[0], 0, 0x6543);
tmp1 = ~(0x00000020 & generate_cmask (buf0[1])); break;
- case 4: tmp0 = __byte_perm (buf0[1], 0, 0x6540);
+ case 4: tmp0 = __byte_perm_S (buf0[1], 0, 0x6540);
tmp1 = ~(0x00002000 & generate_cmask (buf0[1])); break;
- case 5: tmp0 = __byte_perm (buf0[1], 0, 0x6541);
+ case 5: tmp0 = __byte_perm_S (buf0[1], 0, 0x6541);
tmp1 = ~(0x00200000 & generate_cmask (buf0[1])); break;
- case 6: tmp0 = __byte_perm (buf0[1], 0, 0x6542);
+ case 6: tmp0 = __byte_perm_S (buf0[1], 0, 0x6542);
tmp1 = ~(0x20000000 & generate_cmask (buf0[1])); break;
- case 7: tmp0 = __byte_perm (buf0[1], 0, 0x6543);
+ case 7: tmp0 = __byte_perm_S (buf0[1], 0, 0x6543);
tmp1 = ~(0x00000020 & generate_cmask (buf0[2])); break;
- case 8: tmp0 = __byte_perm (buf0[2], 0, 0x6540);
+ case 8: tmp0 = __byte_perm_S (buf0[2], 0, 0x6540);
tmp1 = ~(0x00002000 & generate_cmask (buf0[2])); break;
- case 9: tmp0 = __byte_perm (buf0[2], 0, 0x6541);
+ case 9: tmp0 = __byte_perm_S (buf0[2], 0, 0x6541);
tmp1 = ~(0x00200000 & generate_cmask (buf0[2])); break;
- case 10: tmp0 = __byte_perm (buf0[2], 0, 0x6542);
+ case 10: tmp0 = __byte_perm_S (buf0[2], 0, 0x6542);
tmp1 = ~(0x20000000 & generate_cmask (buf0[2])); break;
- case 11: tmp0 = __byte_perm (buf0[2], 0, 0x6543);
+ case 11: tmp0 = __byte_perm_S (buf0[2], 0, 0x6543);
tmp1 = ~(0x00000020 & generate_cmask (buf0[3])); break;
- case 12: tmp0 = __byte_perm (buf0[3], 0, 0x6540);
+ case 12: tmp0 = __byte_perm_S (buf0[3], 0, 0x6540);
tmp1 = ~(0x00002000 & generate_cmask (buf0[3])); break;
- case 13: tmp0 = __byte_perm (buf0[3], 0, 0x6541);
+ case 13: tmp0 = __byte_perm_S (buf0[3], 0, 0x6541);
tmp1 = ~(0x00200000 & generate_cmask (buf0[3])); break;
- case 14: tmp0 = __byte_perm (buf0[3], 0, 0x6542);
+ case 14: tmp0 = __byte_perm_S (buf0[3], 0, 0x6542);
tmp1 = ~(0x20000000 & generate_cmask (buf0[3])); break;
- case 15: tmp0 = __byte_perm (buf0[3], 0, 0x6543);
+ case 15: tmp0 = __byte_perm_S (buf0[3], 0, 0x6543);
tmp1 = ~(0x00000020 & generate_cmask (buf1[0])); break;
- case 16: tmp0 = __byte_perm (buf1[0], 0, 0x6540);
+ case 16: tmp0 = __byte_perm_S (buf1[0], 0, 0x6540);
tmp1 = ~(0x00002000 & generate_cmask (buf1[0])); break;
- case 17: tmp0 = __byte_perm (buf1[0], 0, 0x6541);
+ case 17: tmp0 = __byte_perm_S (buf1[0], 0, 0x6541);
tmp1 = ~(0x00200000 & generate_cmask (buf1[0])); break;
- case 18: tmp0 = __byte_perm (buf1[0], 0, 0x6542);
+ case 18: tmp0 = __byte_perm_S (buf1[0], 0, 0x6542);
tmp1 = ~(0x20000000 & generate_cmask (buf1[0])); break;
- case 19: tmp0 = __byte_perm (buf1[0], 0, 0x6543);
+ case 19: tmp0 = __byte_perm_S (buf1[0], 0, 0x6543);
tmp1 = ~(0x00000020 & generate_cmask (buf1[1])); break;
- case 20: tmp0 = __byte_perm (buf1[1], 0, 0x6540);
+ case 20: tmp0 = __byte_perm_S (buf1[1], 0, 0x6540);
tmp1 = ~(0x00002000 & generate_cmask (buf1[1])); break;
- case 21: tmp0 = __byte_perm (buf1[1], 0, 0x6541);
+ case 21: tmp0 = __byte_perm_S (buf1[1], 0, 0x6541);
tmp1 = ~(0x00200000 & generate_cmask (buf1[1])); break;
- case 22: tmp0 = __byte_perm (buf1[1], 0, 0x6542);
+ case 22: tmp0 = __byte_perm_S (buf1[1], 0, 0x6542);
tmp1 = ~(0x20000000 & generate_cmask (buf1[1])); break;
- case 23: tmp0 = __byte_perm (buf1[1], 0, 0x6543);
+ case 23: tmp0 = __byte_perm_S (buf1[1], 0, 0x6543);
tmp1 = ~(0x00000020 & generate_cmask (buf1[2])); break;
- case 24: tmp0 = __byte_perm (buf1[2], 0, 0x6540);
+ case 24: tmp0 = __byte_perm_S (buf1[2], 0, 0x6540);
tmp1 = ~(0x00002000 & generate_cmask (buf1[2])); break;
- case 25: tmp0 = __byte_perm (buf1[2], 0, 0x6541);
+ case 25: tmp0 = __byte_perm_S (buf1[2], 0, 0x6541);
tmp1 = ~(0x00200000 & generate_cmask (buf1[2])); break;
- case 26: tmp0 = __byte_perm (buf1[2], 0, 0x6542);
+ case 26: tmp0 = __byte_perm_S (buf1[2], 0, 0x6542);
tmp1 = ~(0x20000000 & generate_cmask (buf1[2])); break;
- case 27: tmp0 = __byte_perm (buf1[2], 0, 0x6543);
+ case 27: tmp0 = __byte_perm_S (buf1[2], 0, 0x6543);
tmp1 = ~(0x00000020 & generate_cmask (buf1[3])); break;
- case 28: tmp0 = __byte_perm (buf1[3], 0, 0x6540);
+ case 28: tmp0 = __byte_perm_S (buf1[3], 0, 0x6540);
tmp1 = ~(0x00002000 & generate_cmask (buf1[3])); break;
- case 29: tmp0 = __byte_perm (buf1[3], 0, 0x6541);
+ case 29: tmp0 = __byte_perm_S (buf1[3], 0, 0x6541);
tmp1 = ~(0x00200000 & generate_cmask (buf1[3])); break;
- case 30: tmp0 = __byte_perm (buf1[3], 0, 0x6542);
+ case 30: tmp0 = __byte_perm_S (buf1[3], 0, 0x6542);
tmp1 = ~(0x20000000 & generate_cmask (buf1[3])); break;
}
return in_len;
}
-u32 apply_rule (const u32 name, const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
+inline u32 apply_rule (const u32 name, const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
u32 out_len = in_len;
return out_len;
}
-u32 apply_rules (const __global u32 *cmds, u32 buf0[4], u32 buf1[4], const u32 len)
+inline u32 apply_rules (const __global u32 *cmds, u32 buf0[4], u32 buf1[4], const u32 len)
{
u32 out_len = len;
return out_len;
}
-u32 apply_rules_vect (const u32 pw_buf0[4], const u32 pw_buf1[4], const u32 pw_len, const __global kernel_rule_t *rules_buf, const u32 il_pos, u32x w0[4], u32x w1[4])
+inline u32x apply_rules_vect (const u32 pw_buf0[4], const u32 pw_buf1[4], const u32 pw_len, const __global kernel_rule_t *rules_buf, const u32 il_pos, u32x w0[4], u32x w1[4])
{
#if VECT_SIZE == 1
#else
- u32 out_len = 0;
+ u32x out_len = 0;
+ #ifdef _unroll
#pragma unroll
+ #endif
for (int i = 0; i < VECT_SIZE; i++)
{
u32 tmp0[4];
tmp1[2] = pw_buf1[2];
tmp1[3] = pw_buf1[3];
- out_len = apply_rules (rules_buf[il_pos + i].cmds, tmp0, tmp1, pw_len);
-
- // it's guaranteed to have out_len always the same for each call in the loop
+ const u32 tmp_len = apply_rules (rules_buf[il_pos + i].cmds, tmp0, tmp1, pw_len);
switch (i)
{
w1[1].s0 = tmp1[1];
w1[2].s0 = tmp1[2];
w1[3].s0 = tmp1[3];
+ out_len.s0 = tmp_len;
break;
case 1:
w1[1].s1 = tmp1[1];
w1[2].s1 = tmp1[2];
w1[3].s1 = tmp1[3];
+ out_len.s1 = tmp_len;
break;
#endif
w1[1].s2 = tmp1[1];
w1[2].s2 = tmp1[2];
w1[3].s2 = tmp1[3];
+ out_len.s2 = tmp_len;
break;
case 3:
w1[1].s3 = tmp1[1];
w1[2].s3 = tmp1[2];
w1[3].s3 = tmp1[3];
+ out_len.s3 = tmp_len;
break;
#endif
w1[1].s4 = tmp1[1];
w1[2].s4 = tmp1[2];
w1[3].s4 = tmp1[3];
+ out_len.s4 = tmp_len;
break;
case 5:
w1[1].s5 = tmp1[1];
w1[2].s5 = tmp1[2];
w1[3].s5 = tmp1[3];
+ out_len.s5 = tmp_len;
break;
case 6:
w1[1].s6 = tmp1[1];
w1[2].s6 = tmp1[2];
w1[3].s6 = tmp1[3];
+ out_len.s6 = tmp_len;
break;
case 7:
w1[1].s7 = tmp1[1];
w1[2].s7 = tmp1[2];
w1[3].s7 = tmp1[3];
+ out_len.s7 = tmp_len;
+ break;
+ #endif
+
+ #if VECT_SIZE >= 16
+ case 8:
+ w0[0].s8 = tmp0[0];
+ w0[1].s8 = tmp0[1];
+ w0[2].s8 = tmp0[2];
+ w0[3].s8 = tmp0[3];
+ w1[0].s8 = tmp1[0];
+ w1[1].s8 = tmp1[1];
+ w1[2].s8 = tmp1[2];
+ w1[3].s8 = tmp1[3];
+ out_len.s8 = tmp_len;
+ break;
+
+ case 9:
+ w0[0].s9 = tmp0[0];
+ w0[1].s9 = tmp0[1];
+ w0[2].s9 = tmp0[2];
+ w0[3].s9 = tmp0[3];
+ w1[0].s9 = tmp1[0];
+ w1[1].s9 = tmp1[1];
+ w1[2].s9 = tmp1[2];
+ w1[3].s9 = tmp1[3];
+ out_len.s9 = tmp_len;
+ break;
+
+ case 10:
+ w0[0].sa = tmp0[0];
+ w0[1].sa = tmp0[1];
+ w0[2].sa = tmp0[2];
+ w0[3].sa = tmp0[3];
+ w1[0].sa = tmp1[0];
+ w1[1].sa = tmp1[1];
+ w1[2].sa = tmp1[2];
+ w1[3].sa = tmp1[3];
+ out_len.sa = tmp_len;
+ break;
+
+ case 11:
+ w0[0].sb = tmp0[0];
+ w0[1].sb = tmp0[1];
+ w0[2].sb = tmp0[2];
+ w0[3].sb = tmp0[3];
+ w1[0].sb = tmp1[0];
+ w1[1].sb = tmp1[1];
+ w1[2].sb = tmp1[2];
+ w1[3].sb = tmp1[3];
+ out_len.sb = tmp_len;
+ break;
+
+ case 12:
+ w0[0].sc = tmp0[0];
+ w0[1].sc = tmp0[1];
+ w0[2].sc = tmp0[2];
+ w0[3].sc = tmp0[3];
+ w1[0].sc = tmp1[0];
+ w1[1].sc = tmp1[1];
+ w1[2].sc = tmp1[2];
+ w1[3].sc = tmp1[3];
+ out_len.sc = tmp_len;
+ break;
+
+ case 13:
+ w0[0].sd = tmp0[0];
+ w0[1].sd = tmp0[1];
+ w0[2].sd = tmp0[2];
+ w0[3].sd = tmp0[3];
+ w1[0].sd = tmp1[0];
+ w1[1].sd = tmp1[1];
+ w1[2].sd = tmp1[2];
+ w1[3].sd = tmp1[3];
+ out_len.sd = tmp_len;
+ break;
+
+ case 14:
+ w0[0].se = tmp0[0];
+ w0[1].se = tmp0[1];
+ w0[2].se = tmp0[2];
+ w0[3].se = tmp0[3];
+ w1[0].se = tmp1[0];
+ w1[1].se = tmp1[1];
+ w1[2].se = tmp1[2];
+ w1[3].se = tmp1[3];
+ out_len.se = tmp_len;
+ break;
+
+ case 15:
+ w0[0].sf = tmp0[0];
+ w0[1].sf = tmp0[1];
+ w0[2].sf = tmp0[2];
+ w0[3].sf = tmp0[3];
+ w1[0].sf = tmp1[0];
+ w1[1].sf = tmp1[1];
+ w1[2].sf = tmp1[2];
+ w1[3].sf = tmp1[3];
+ out_len.sf = tmp_len;
break;
#endif
}