* License.....: MIT
*/
-static int device_memcmp (const u32 d1[4], __global u32 *d2)
+static int hash_comp (const u32 d1[4], __global u32 *d2)
{
if (d1[3] > d2[DGST_R3]) return ( 1);
if (d1[3] < d2[DGST_R3]) return (-1);
const u32 c = l + m;
- const int cmp = device_memcmp (digest, digests_buf[c].digest_buf);
+ const int cmp = hash_comp (digest, digests_buf[c].digest_buf);
if (cmp > 0)
{
}
}
-// before: append_0x80_2_be
-static void append_0x80_2x4_be (u32 w0[4], u32 w1[4], const u32 offset)
-{
- switch (offset)
- {
- case 0:
- w0[0] |= 0x80000000;
- break;
-
- case 1:
- w0[0] |= 0x800000;
- break;
-
- case 2:
- w0[0] |= 0x8000;
- break;
-
- case 3:
- w0[0] |= 0x80;
- break;
-
- case 4:
- w0[1] |= 0x80000000;
- break;
-
- case 5:
- w0[1] |= 0x800000;
- break;
-
- case 6:
- w0[1] |= 0x8000;
- break;
-
- case 7:
- w0[1] |= 0x80;
- break;
-
- case 8:
- w0[2] |= 0x80000000;
- break;
-
- case 9:
- w0[2] |= 0x800000;
- break;
-
- case 10:
- w0[2] |= 0x8000;
- break;
-
- case 11:
- w0[2] |= 0x80;
- break;
-
- case 12:
- w0[3] |= 0x80000000;
- break;
-
- case 13:
- w0[3] |= 0x800000;
- break;
-
- case 14:
- w0[3] |= 0x8000;
- break;
-
- case 15:
- w0[3] |= 0x80;
- break;
-
- case 16:
- w1[0] |= 0x80000000;
- break;
-
- case 17:
- w1[0] |= 0x800000;
- break;
-
- case 18:
- w1[0] |= 0x8000;
- break;
-
- case 19:
- w1[0] |= 0x80;
- break;
-
- case 20:
- w1[1] |= 0x80000000;
- break;
-
- case 21:
- w1[1] |= 0x800000;
- break;
-
- case 22:
- w1[1] |= 0x8000;
- break;
-
- case 23:
- w1[1] |= 0x80;
- break;
-
- case 24:
- w1[2] |= 0x80000000;
- break;
-
- case 25:
- w1[2] |= 0x800000;
- break;
-
- case 26:
- w1[2] |= 0x8000;
- break;
-
- case 27:
- w1[2] |= 0x80;
- break;
-
- case 28:
- w1[3] |= 0x80000000;
- break;
-
- case 29:
- w1[3] |= 0x800000;
- break;
-
- case 30:
- w1[3] |= 0x8000;
- break;
-
- case 31:
- w1[3] |= 0x80;
- break;
- }
-}
-
// before: append_0x80_3
static void append_0x80_3x4 (u32 w0[4], u32 w1[4], u32 w2[4], const u32 offset)
{
}
}
-// before: append_0x80_4
-static void append_0x80_1x16 (u32 w[16], const u32 offset)
+// before: device_memcat2L
+static void memcat_c7_d1x2_sl1x2_sr1x2 (const u32 offset, u32 dst0[2], u32 src_l0[2], u32 src_r0[2])
{
switch (offset)
{
- case 0:
- w[ 0] = 0x80;
- break;
-
case 1:
- w[ 0] = w[ 0] | 0x8000;
+ dst0[0] = src_l0[0] | src_r0[0] << 8;
+ dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
break;
case 2:
- w[ 0] = w[ 0] | 0x800000;
+ dst0[0] = src_l0[0] | src_r0[0] << 16;
+ dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
break;
case 3:
- w[ 0] = w[ 0] | 0x80000000;
+ dst0[0] = src_l0[0] | src_r0[0] << 24;
+ dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
break;
case 4:
- w[ 1] = 0x80;
+ dst0[1] = src_r0[0];
break;
case 5:
- w[ 1] = w[ 1] | 0x8000;
+ dst0[1] = src_l0[1] | src_r0[0] << 8;
break;
case 6:
- w[ 1] = w[ 1] | 0x800000;
+ dst0[1] = src_l0[1] | src_r0[0] << 16;
break;
case 7:
- w[ 1] = w[ 1] | 0x80000000;
- break;
-
- case 8:
- w[ 2] = 0x80;
+ dst0[1] = src_l0[1] | src_r0[0] << 24;
break;
+ }
+}
- case 9:
- w[ 2] = w[ 2] | 0x8000;
+// before: device_memcat4L
+static void memcat_c15_d1x4_sl1x4_sr1x4 (const u32 offset, u32 dst0[4], u32 src_l0[4], u32 src_r0[4])
+{
+ switch (offset)
+ {
+ case 1:
+ dst0[0] = src_l0[0] | src_r0[0] << 8;
+ dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8;
break;
- case 10:
- w[ 2] = w[ 2] | 0x800000;
+ case 2:
+ dst0[0] = src_l0[0] | src_r0[0] << 16;
+ dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16;
break;
- case 11:
- w[ 2] = w[ 2] | 0x80000000;
+ case 3:
+ dst0[0] = src_l0[0] | src_r0[0] << 24;
+ dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24;
break;
- case 12:
- w[ 3] = 0x80;
+ case 4:
+ dst0[1] = src_r0[0];
+ dst0[2] = src_r0[1];
+ dst0[3] = src_r0[2];
break;
- case 13:
- w[ 3] = w[ 3] | 0x8000;
+ case 5:
+ dst0[1] = src_l0[1] | src_r0[0] << 8;
+ dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8;
break;
- case 14:
- w[ 3] = w[ 3] | 0x800000;
+ case 6:
+ dst0[1] = src_l0[1] | src_r0[0] << 16;
+ dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16;
break;
- case 15:
- w[ 3] = w[ 3] | 0x80000000;
+ case 7:
+ dst0[1] = src_l0[1] | src_r0[0] << 24;
+ dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24;
break;
- case 16:
- w[ 4] = 0x80;
+ case 8:
+ dst0[2] = src_r0[0];
+ dst0[3] = src_r0[1];
break;
- case 17:
- w[ 4] = w[ 4] | 0x8000;
+ case 9:
+ dst0[2] = src_l0[2] | src_r0[0] << 8;
+ dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8;
break;
- case 18:
- w[ 4] = w[ 4] | 0x800000;
+ case 10:
+ dst0[2] = src_l0[2] | src_r0[0] << 16;
+ dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16;
break;
- case 19:
- w[ 4] = w[ 4] | 0x80000000;
+ case 11:
+ dst0[2] = src_l0[2] | src_r0[0] << 24;
+ dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24;
break;
- case 20:
- w[ 5] = 0x80;
+ case 12:
+ dst0[3] = src_r0[0];
break;
- case 21:
- w[ 5] = w[ 5] | 0x8000;
+ case 13:
+ dst0[3] = src_l0[3] | src_r0[0] << 8;
break;
- case 22:
- w[ 5] = w[ 5] | 0x800000;
+ case 14:
+ dst0[3] = src_l0[3] | src_r0[0] << 16;
break;
- case 23:
- w[ 5] = w[ 5] | 0x80000000;
+ case 15:
+ dst0[3] = src_l0[3] | src_r0[0] << 24;
break;
+ }
+}
- case 24:
- w[ 6] = 0x80;
+// before: device_memcat8L
+static void memcat_c31_d2x4_sl2x4_sr1x4 (const u32 offset, u32 dst0[4], u32 dst1[4], u32 src_l0[4], u32 src_l1[4], u32 src_r0[4])
+{
+ switch (offset)
+ {
+ case 1:
+ dst0[0] = src_l0[0] | src_r0[0] << 8;
+ dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8;
+ dst1[0] = src_r0[3] >> 24;
break;
- case 25:
- w[ 6] = w[ 6] | 0x8000;
+ case 2:
+ dst0[0] = src_l0[0] | src_r0[0] << 16;
+ dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16;
+ dst1[0] = src_r0[3] >> 16;
break;
- case 26:
- w[ 6] = w[ 6] | 0x800000;
+ case 3:
+ dst0[0] = src_l0[0] | src_r0[0] << 24;
+ dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24;
+ dst1[0] = src_r0[3] >> 8;
break;
- case 27:
- w[ 6] = w[ 6] | 0x80000000;
+ case 4:
+ dst0[1] = src_r0[0];
+ dst0[2] = src_r0[1];
+ dst0[3] = src_r0[2];
+ dst1[0] = src_r0[3];
break;
- case 28:
- w[ 7] = 0x80;
+ case 5:
+ dst0[1] = src_l0[1] | src_r0[0] << 8;
+ dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8;
+ dst1[1] = src_r0[3] >> 24;
break;
- case 29:
- w[ 7] = w[ 7] | 0x8000;
+ case 6:
+ dst0[1] = src_l0[1] | src_r0[0] << 16;
+ dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16;
+ dst1[1] = src_r0[3] >> 16;
break;
- case 30:
- w[ 7] = w[ 7] | 0x800000;
+ case 7:
+ dst0[1] = src_l0[1] | src_r0[0] << 24;
+ dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24;
+ dst1[1] = src_r0[3] >> 8;
break;
- case 31:
- w[ 7] = w[ 7] | 0x80000000;
+ case 8:
+ dst0[2] = src_r0[0];
+ dst0[3] = src_r0[1];
+ dst1[0] = src_r0[2];
+ dst1[1] = src_r0[3];
break;
- case 32:
- w[ 8] = 0x80;
+ case 9:
+ dst0[2] = src_l0[2] | src_r0[0] << 8;
+ dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8;
+ dst1[2] = src_r0[3] >> 24;
break;
- case 33:
- w[ 8] = w[ 8] | 0x8000;
+ case 10:
+ dst0[2] = src_l0[2] | src_r0[0] << 16;
+ dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16;
+ dst1[2] = src_r0[3] >> 16;
break;
- case 34:
- w[ 8] = w[ 8] | 0x800000;
+ case 11:
+ dst0[2] = src_l0[2] | src_r0[0] << 24;
+ dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24;
+ dst1[2] = src_r0[3] >> 8;
break;
- case 35:
- w[ 8] = w[ 8] | 0x80000000;
+ case 12:
+ dst0[3] = src_r0[0];
+ dst1[0] = src_r0[1];
+ dst1[1] = src_r0[2];
+ dst1[2] = src_r0[3];
break;
- case 36:
- w[ 9] = 0x80;
+ case 13:
+ dst0[3] = src_l0[3] | src_r0[0] << 8;
+ dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8;
+ dst1[3] = src_r0[3] >> 24;
break;
- case 37:
- w[ 9] = w[ 9] | 0x8000;
+ case 14:
+ dst0[3] = src_l0[3] | src_r0[0] << 16;
+ dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16;
+ dst1[3] = src_r0[3] >> 16;
break;
- case 38:
- w[ 9] = w[ 9] | 0x800000;
+ case 15:
+ dst0[3] = src_l0[3] | src_r0[0] << 24;
+ dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24;
+ dst1[3] = src_r0[3] >> 8;
break;
- case 39:
- w[ 9] = w[ 9] | 0x80000000;
+ case 16:
+ dst1[0] = src_r0[0];
+ dst1[1] = src_r0[1];
+ dst1[2] = src_r0[2];
+ dst1[3] = src_r0[3];
break;
- case 40:
- w[10] = 0x80;
+ case 17:
+ dst1[0] = src_l1[0] | src_r0[0] << 8;
+ dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8;
break;
- case 41:
- w[10] = w[10] | 0x8000;
+ case 18:
+ dst1[0] = src_l1[0] | src_r0[0] << 16;
+ dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16;
break;
- case 42:
- w[10] = w[10] | 0x800000;
+ case 19:
+ dst1[0] = src_l1[0] | src_r0[0] << 24;
+ dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24;
break;
- case 43:
- w[10] = w[10] | 0x80000000;
+ case 20:
+ dst1[1] = src_r0[0];
+ dst1[2] = src_r0[1];
+ dst1[3] = src_r0[2];
break;
- case 44:
- w[11] = 0x80;
+ case 21:
+ dst1[1] = src_l1[1] | src_r0[0] << 8;
+ dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8;
break;
- case 45:
- w[11] = w[11] | 0x8000;
+ case 22:
+ dst1[1] = src_l1[1] | src_r0[0] << 16;
+ dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16;
break;
- case 46:
- w[11] = w[11] | 0x800000;
+ case 23:
+ dst1[1] = src_l1[1] | src_r0[0] << 24;
+ dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24;
break;
- case 47:
- w[11] = w[11] | 0x80000000;
+ case 24:
+ dst1[2] = src_r0[0];
+ dst1[3] = src_r0[1];
break;
- case 48:
- w[12] = 0x80;
+ case 25:
+ dst1[2] = src_l1[2] | src_r0[0] << 8;
+ dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8;
break;
- case 49:
- w[12] = w[12] | 0x8000;
+ case 26:
+ dst1[2] = src_l1[2] | src_r0[0] << 16;
+ dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16;
break;
- case 50:
- w[12] = w[12] | 0x800000;
+ case 27:
+ dst1[2] = src_l1[2] | src_r0[0] << 24;
+ dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24;
break;
- case 51:
- w[12] = w[12] | 0x80000000;
+ case 28:
+ dst1[3] = src_r0[0];
break;
- case 52:
- w[13] = 0x80;
+ case 29:
+ dst1[3] = src_l1[3] | src_r0[0] << 8;
break;
- case 53:
- w[13] = w[13] | 0x8000;
+ case 30:
+ dst1[3] = src_l1[3] | src_r0[0] << 16;
break;
- case 54:
- w[13] = w[13] | 0x800000;
+ case 31:
+ dst1[3] = src_l1[3] | src_r0[0] << 24;
break;
+ }
+}
- case 55:
- w[13] = w[13] | 0x80000000;
+// before: device_memcat12L
+static void memcat_c47_d3x4_sl3x4_sr1x4 (const u32 offset, u32 dst0[4], u32 dst1[4], u32 dst2[4], u32 src_l0[4], u32 src_l1[4], u32 src_l2[4], u32 src_r0[4])
+{
+ switch (offset)
+ {
+ case 1:
+ dst0[0] = src_l0[0] | src_r0[0] << 8;
+ dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8;
+ dst1[0] = src_r0[3] >> 24;
break;
- case 56:
- w[14] = 0x80;
+ case 2:
+ dst0[0] = src_l0[0] | src_r0[0] << 16;
+ dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16;
+ dst1[0] = src_r0[3] >> 16;
break;
- case 57:
- w[14] = w[14] | 0x8000;
+ case 3:
+ dst0[0] = src_l0[0] | src_r0[0] << 24;
+ dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24;
+ dst1[0] = src_r0[3] >> 8;
break;
- case 58:
- w[14] = w[14] | 0x800000;
+ case 4:
+ dst0[1] = src_r0[0];
+ dst0[2] = src_r0[1];
+ dst0[3] = src_r0[2];
+ dst1[0] = src_r0[3];
break;
- case 59:
- w[14] = w[14] | 0x80000000;
- break;
-
- case 60:
- w[15] = 0x80;
- break;
-
- case 61:
- w[15] = w[15] | 0x8000;
- break;
-
- case 62:
- w[15] = w[15] | 0x800000;
- break;
-
- case 63:
- w[15] = w[15] | 0x80000000;
- break;
- }
-}
-
-// before: append_0x80_8
-static void append_0x80_1x32 (u32 w[32], const u32 offset)
-{
- switch (offset)
- {
- case 0:
- w[ 0] = 0x80;
- break;
-
- case 1:
- w[ 0] = w[ 0] | 0x8000;
- break;
-
- case 2:
- w[ 0] = w[ 0] | 0x800000;
- break;
-
- case 3:
- w[ 0] = w[ 0] | 0x80000000;
- break;
-
- case 4:
- w[ 1] = 0x80;
- break;
-
- case 5:
- w[ 1] = w[ 1] | 0x8000;
+ case 5:
+ dst0[1] = src_l0[1] | src_r0[0] << 8;
+ dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8;
+ dst1[1] = src_r0[3] >> 24;
break;
case 6:
- w[ 1] = w[ 1] | 0x800000;
+ dst0[1] = src_l0[1] | src_r0[0] << 16;
+ dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16;
+ dst1[1] = src_r0[3] >> 16;
break;
case 7:
- w[ 1] = w[ 1] | 0x80000000;
+ dst0[1] = src_l0[1] | src_r0[0] << 24;
+ dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24;
+ dst1[1] = src_r0[3] >> 8;
break;
case 8:
- w[ 2] = 0x80;
+ dst0[2] = src_r0[0];
+ dst0[3] = src_r0[1];
+ dst1[0] = src_r0[2];
+ dst1[1] = src_r0[3];
break;
case 9:
- w[ 2] = w[ 2] | 0x8000;
+ dst0[2] = src_l0[2] | src_r0[0] << 8;
+ dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8;
+ dst1[2] = src_r0[3] >> 24;
break;
case 10:
- w[ 2] = w[ 2] | 0x800000;
+ dst0[2] = src_l0[2] | src_r0[0] << 16;
+ dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16;
+ dst1[2] = src_r0[3] >> 16;
break;
case 11:
- w[ 2] = w[ 2] | 0x80000000;
+ dst0[2] = src_l0[2] | src_r0[0] << 24;
+ dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24;
+ dst1[2] = src_r0[3] >> 8;
break;
case 12:
- w[ 3] = 0x80;
+ dst0[3] = src_r0[0];
+ dst1[0] = src_r0[1];
+ dst1[1] = src_r0[2];
+ dst1[2] = src_r0[3];
break;
case 13:
- w[ 3] = w[ 3] | 0x8000;
+ dst0[3] = src_l0[3] | src_r0[0] << 8;
+ dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8;
+ dst1[3] = src_r0[3] >> 24;
break;
case 14:
- w[ 3] = w[ 3] | 0x800000;
+ dst0[3] = src_l0[3] | src_r0[0] << 16;
+ dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16;
+ dst1[3] = src_r0[3] >> 16;
break;
case 15:
- w[ 3] = w[ 3] | 0x80000000;
+ dst0[3] = src_l0[3] | src_r0[0] << 24;
+ dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24;
+ dst1[3] = src_r0[3] >> 8;
break;
case 16:
- w[ 4] = 0x80;
+ dst1[0] = src_r0[0];
+ dst1[1] = src_r0[1];
+ dst1[2] = src_r0[2];
+ dst1[3] = src_r0[3];
break;
case 17:
- w[ 4] = w[ 4] | 0x8000;
+ dst1[0] = src_l1[0] | src_r0[0] << 8;
+ dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8;
+ dst2[0] = src_r0[3] >> 24;
break;
case 18:
- w[ 4] = w[ 4] | 0x800000;
+ dst1[0] = src_l1[0] | src_r0[0] << 16;
+ dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16;
+ dst2[0] = src_r0[3] >> 16;
break;
case 19:
- w[ 4] = w[ 4] | 0x80000000;
+ dst1[0] = src_l1[0] | src_r0[0] << 24;
+ dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24;
+ dst2[0] = src_r0[3] >> 8;
break;
case 20:
- w[ 5] = 0x80;
+ dst1[1] = src_r0[0];
+ dst1[2] = src_r0[1];
+ dst1[3] = src_r0[2];
+ dst2[0] = src_r0[3];
break;
case 21:
- w[ 5] = w[ 5] | 0x8000;
+ dst1[1] = src_l1[1] | src_r0[0] << 8;
+ dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst2[0] = src_r0[2] >> 24 | src_r0[3] << 8;
+ dst2[1] = src_r0[3] >> 24;
break;
case 22:
- w[ 5] = w[ 5] | 0x800000;
+ dst1[1] = src_l1[1] | src_r0[0] << 16;
+ dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst2[0] = src_r0[2] >> 16 | src_r0[3] << 16;
+ dst2[1] = src_r0[3] >> 16;
break;
case 23:
- w[ 5] = w[ 5] | 0x80000000;
+ dst1[1] = src_l1[1] | src_r0[0] << 24;
+ dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst2[0] = src_r0[2] >> 8 | src_r0[3] << 24;
+ dst2[1] = src_r0[3] >> 8;
break;
case 24:
- w[ 6] = 0x80;
+ dst1[2] = src_r0[0];
+ dst1[3] = src_r0[1];
+ dst2[0] = src_r0[2];
+ dst2[1] = src_r0[3];
break;
case 25:
- w[ 6] = w[ 6] | 0x8000;
+ dst1[2] = src_l1[2] | src_r0[0] << 8;
+ dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst2[0] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst2[1] = src_r0[2] >> 24 | src_r0[3] << 8;
+ dst2[2] = src_r0[3] >> 24;
break;
case 26:
- w[ 6] = w[ 6] | 0x800000;
+ dst1[2] = src_l1[2] | src_r0[0] << 16;
+ dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst2[0] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst2[1] = src_r0[2] >> 16 | src_r0[3] << 16;
+ dst2[2] = src_r0[3] >> 16;
break;
case 27:
- w[ 6] = w[ 6] | 0x80000000;
+ dst1[2] = src_l1[2] | src_r0[0] << 24;
+ dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst2[0] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst2[1] = src_r0[2] >> 8 | src_r0[3] << 24;
+ dst2[2] = src_r0[3] >> 8;
break;
case 28:
- w[ 7] = 0x80;
+ dst1[3] = src_r0[0];
+ dst2[0] = src_r0[1];
+ dst2[1] = src_r0[2];
+ dst2[2] = src_r0[3];
break;
case 29:
- w[ 7] = w[ 7] | 0x8000;
+ dst1[3] = src_l1[3] | src_r0[0] << 8;
+ dst2[0] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst2[1] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst2[2] = src_r0[2] >> 24 | src_r0[3] << 8;
+ dst2[3] = src_r0[3] >> 24;
break;
case 30:
- w[ 7] = w[ 7] | 0x800000;
+ dst1[3] = src_l1[3] | src_r0[0] << 16;
+ dst2[0] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst2[1] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst2[2] = src_r0[2] >> 16 | src_r0[3] << 16;
+ dst2[3] = src_r0[3] >> 16;
break;
case 31:
- w[ 7] = w[ 7] | 0x80000000;
+ dst1[3] = src_l1[3] | src_r0[0] << 24;
+ dst2[0] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst2[1] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst2[2] = src_r0[2] >> 8 | src_r0[3] << 24;
+ dst2[3] = src_r0[3] >> 8;
break;
case 32:
- w[ 8] = 0x80;
+ dst2[0] = src_r0[0];
+ dst2[1] = src_r0[1];
+ dst2[2] = src_r0[2];
+ dst2[3] = src_r0[3];
break;
case 33:
- w[ 8] = w[ 8] | 0x8000;
+ dst2[0] = src_l2[0] | src_r0[0] << 8;
+ dst2[1] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst2[2] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst2[3] = src_r0[2] >> 24 | src_r0[3] << 8;
break;
case 34:
- w[ 8] = w[ 8] | 0x800000;
+ dst2[0] = src_l2[0] | src_r0[0] << 16;
+ dst2[1] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst2[2] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst2[3] = src_r0[2] >> 16 | src_r0[3] << 16;
break;
case 35:
- w[ 8] = w[ 8] | 0x80000000;
+ dst2[0] = src_l2[0] | src_r0[0] << 24;
+ dst2[1] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst2[2] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst2[3] = src_r0[2] >> 8 | src_r0[3] << 24;
break;
case 36:
- w[ 9] = 0x80;
+ dst2[1] = src_r0[0];
+ dst2[2] = src_r0[1];
+ dst2[3] = src_r0[2];
break;
case 37:
- w[ 9] = w[ 9] | 0x8000;
+ dst2[1] = src_l2[1] | src_r0[0] << 8;
+ dst2[2] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst2[3] = src_r0[1] >> 24 | src_r0[2] << 8;
break;
case 38:
- w[ 9] = w[ 9] | 0x800000;
+ dst2[1] = src_l2[1] | src_r0[0] << 16;
+ dst2[2] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst2[3] = src_r0[1] >> 16 | src_r0[2] << 16;
break;
case 39:
- w[ 9] = w[ 9] | 0x80000000;
+ dst2[1] = src_l2[1] | src_r0[0] << 24;
+ dst2[2] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst2[3] = src_r0[1] >> 8 | src_r0[2] << 24;
break;
case 40:
- w[10] = 0x80;
+ dst2[2] = src_r0[0];
+ dst2[3] = src_r0[1];
break;
case 41:
- w[10] = w[10] | 0x8000;
+ dst2[2] = src_l2[2] | src_r0[0] << 8;
+ dst2[3] = src_r0[0] >> 24 | src_r0[1] << 8;
break;
case 42:
- w[10] = w[10] | 0x800000;
+ dst2[2] = src_l2[2] | src_r0[0] << 16;
+ dst2[3] = src_r0[0] >> 16 | src_r0[1] << 16;
break;
case 43:
- w[10] = w[10] | 0x80000000;
+ dst2[2] = src_l2[2] | src_r0[0] << 24;
+ dst2[3] = src_r0[0] >> 8 | src_r0[1] << 24;
break;
case 44:
- w[11] = 0x80;
+ dst2[3] = src_r0[0];
break;
case 45:
- w[11] = w[11] | 0x8000;
+ dst2[3] = src_l2[3] | src_r0[0] << 8;
break;
case 46:
- w[11] = w[11] | 0x800000;
+ dst2[3] = src_l2[3] | src_r0[0] << 16;
break;
case 47:
- w[11] = w[11] | 0x80000000;
+ dst2[3] = src_l2[3] | src_r0[0] << 24;
break;
+ }
+}
- case 48:
- w[12] = 0x80;
- break;
+// before: device_memcat12L
+static void memcat_c47_d3x4_sl3x4_sr2x4 (const u32 offset, u32 dst0[4], u32 dst1[4], u32 dst2[4], u32 src_l0[4], u32 src_l1[4], u32 src_l2[4], u32 src_r0[4], u32 src_r1[4])
+{
+ switch (offset)
+ {
+ case 0:
+ dst0[0] = src_r0[0];
+ dst0[1] = src_r0[1];
+ dst0[2] = src_r0[2];
+ dst0[3] = src_r0[3];
+ dst1[0] = src_r1[0];
+ dst1[1] = src_r1[1];
+ dst1[2] = src_r1[2];
+ dst1[3] = src_r1[3];
+ break;
- case 49:
- w[12] = w[12] | 0x8000;
+ case 1:
+ dst0[0] = src_l0[0] | src_r0[0] << 8;
+ dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8;
+ dst1[0] = src_r0[3] >> 24 | src_r1[0] << 8;
+ dst1[1] = src_r1[0] >> 24 | src_r1[1] << 8;
+ dst1[2] = src_r1[1] >> 24 | src_r1[2] << 8;
+ dst1[3] = src_r1[2] >> 24 | src_r1[3] << 8;
+ dst2[0] = src_r1[3] >> 24;
break;
- case 50:
- w[12] = w[12] | 0x800000;
+ case 2:
+ dst0[0] = src_l0[0] | src_r0[0] << 16;
+ dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16;
+ dst1[0] = src_r0[3] >> 16 | src_r1[0] << 16;
+ dst1[1] = src_r1[0] >> 16 | src_r1[1] << 16;
+ dst1[2] = src_r1[1] >> 16 | src_r1[2] << 16;
+ dst1[3] = src_r1[2] >> 16 | src_r1[3] << 16;
+ dst2[0] = src_r1[3] >> 16;
break;
- case 51:
- w[12] = w[12] | 0x80000000;
+ case 3:
+ dst0[0] = src_l0[0] | src_r0[0] << 24;
+ dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24;
+ dst1[0] = src_r0[3] >> 8 | src_r1[0] << 24;
+ dst1[1] = src_r1[0] >> 8 | src_r1[1] << 24;
+ dst1[2] = src_r1[1] >> 8 | src_r1[2] << 24;
+ dst1[3] = src_r1[2] >> 8 | src_r1[3] << 24;
+ dst2[0] = src_r1[3] >> 8;
break;
- case 52:
- w[13] = 0x80;
+ case 4:
+ dst0[1] = src_r0[0];
+ dst0[2] = src_r0[1];
+ dst0[3] = src_r0[2];
+ dst1[0] = src_r0[3];
+ dst1[1] = src_r1[0];
+ dst1[2] = src_r1[1];
+ dst1[3] = src_r1[2];
+ dst2[0] = src_r1[3];
break;
- case 53:
- w[13] = w[13] | 0x8000;
+ case 5:
+ dst0[1] = src_l0[1] | src_r0[0] << 8;
+ dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8;
+ dst1[1] = src_r0[3] >> 24 | src_r1[0] << 8;
+ dst1[2] = src_r1[0] >> 24 | src_r1[1] << 8;
+ dst1[3] = src_r1[1] >> 24 | src_r1[2] << 8;
+ dst2[0] = src_r1[2] >> 24 | src_r1[3] << 8;
+ dst2[1] = src_r1[3] >> 24;
break;
- case 54:
- w[13] = w[13] | 0x800000;
+ case 6:
+ dst0[1] = src_l0[1] | src_r0[0] << 16;
+ dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16;
+ dst1[1] = src_r0[3] >> 16 | src_r1[0] << 16;
+ dst1[2] = src_r1[0] >> 16 | src_r1[1] << 16;
+ dst1[3] = src_r1[1] >> 16 | src_r1[2] << 16;
+ dst2[0] = src_r1[2] >> 16 | src_r1[3] << 16;
+ dst2[1] = src_r1[3] >> 16;
break;
- case 55:
- w[13] = w[13] | 0x80000000;
+ case 7:
+ dst0[1] = src_l0[1] | src_r0[0] << 24;
+ dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24;
+ dst1[1] = src_r0[3] >> 8 | src_r1[0] << 24;
+ dst1[2] = src_r1[0] >> 8 | src_r1[1] << 24;
+ dst1[3] = src_r1[1] >> 8 | src_r1[2] << 24;
+ dst2[0] = src_r1[2] >> 8 | src_r1[3] << 24;
+ dst2[1] = src_r1[3] >> 8;
break;
- case 56:
- w[14] = 0x80;
+ case 8:
+ dst0[2] = src_r0[0];
+ dst0[3] = src_r0[1];
+ dst1[0] = src_r0[2];
+ dst1[1] = src_r0[3];
+ dst1[2] = src_r1[0];
+ dst1[3] = src_r1[1];
+ dst2[0] = src_r1[2];
+ dst2[1] = src_r1[3];
break;
- case 57:
- w[14] = w[14] | 0x8000;
+ case 9:
+ dst0[2] = src_l0[2] | src_r0[0] << 8;
+ dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8;
+ dst1[2] = src_r0[3] >> 24 | src_r1[0] << 8;
+ dst1[3] = src_r1[0] >> 24 | src_r1[1] << 8;
+ dst2[0] = src_r1[1] >> 24 | src_r1[2] << 8;
+ dst2[1] = src_r1[2] >> 24 | src_r1[3] << 8;
+ dst2[2] = src_r1[3] >> 24;
break;
- case 58:
- w[14] = w[14] | 0x800000;
+ case 10:
+ dst0[2] = src_l0[2] | src_r0[0] << 16;
+ dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16;
+ dst1[2] = src_r0[3] >> 16 | src_r1[0] << 16;
+ dst1[3] = src_r1[0] >> 16 | src_r1[1] << 16;
+ dst2[0] = src_r1[1] >> 16 | src_r1[2] << 16;
+ dst2[1] = src_r1[2] >> 16 | src_r1[3] << 16;
+ dst2[2] = src_r1[3] >> 16;
break;
- case 59:
- w[14] = w[14] | 0x80000000;
+ case 11:
+ dst0[2] = src_l0[2] | src_r0[0] << 24;
+ dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24;
+ dst1[2] = src_r0[3] >> 8 | src_r1[0] << 24;
+ dst1[3] = src_r1[0] >> 8 | src_r1[1] << 24;
+ dst2[0] = src_r1[1] >> 8 | src_r1[2] << 24;
+ dst2[1] = src_r1[2] >> 8 | src_r1[3] << 24;
+ dst2[2] = src_r1[3] >> 8;
break;
- case 60:
- w[15] = 0x80;
+ case 12:
+ dst0[3] = src_r0[0];
+ dst1[0] = src_r0[1];
+ dst1[1] = src_r0[2];
+ dst1[2] = src_r0[3];
+ dst1[3] = src_r1[0];
+ dst2[0] = src_r1[1];
+ dst2[1] = src_r1[2];
+ dst2[2] = src_r1[3];
break;
- case 61:
- w[15] = w[15] | 0x8000;
+ case 13:
+ dst0[3] = src_l0[3] | src_r0[0] << 8;
+ dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8;
+ dst1[3] = src_r0[3] >> 24 | src_r1[0] << 8;
+ dst2[0] = src_r1[0] >> 24 | src_r1[1] << 8;
+ dst2[1] = src_r1[1] >> 24 | src_r1[2] << 8;
+ dst2[2] = src_r1[2] >> 24 | src_r1[3] << 8;
+ dst2[3] = src_r1[3] >> 24;
break;
- case 62:
- w[15] = w[15] | 0x800000;
+ case 14:
+ dst0[3] = src_l0[3] | src_r0[0] << 16;
+ dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16;
+ dst1[3] = src_r0[3] >> 16 | src_r1[0] << 16;
+ dst2[0] = src_r1[0] >> 16 | src_r1[1] << 16;
+ dst2[1] = src_r1[1] >> 16 | src_r1[2] << 16;
+ dst2[2] = src_r1[2] >> 16 | src_r1[3] << 16;
+ dst2[3] = src_r1[3] >> 16;
break;
- case 63:
- w[15] = w[15] | 0x80000000;
+ case 15:
+ dst0[3] = src_l0[3] | src_r0[0] << 24;
+ dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24;
+ dst1[3] = src_r0[3] >> 8 | src_r1[0] << 24;
+ dst2[0] = src_r1[0] >> 8 | src_r1[1] << 24;
+ dst2[1] = src_r1[1] >> 8 | src_r1[2] << 24;
+ dst2[2] = src_r1[2] >> 8 | src_r1[3] << 24;
+ dst2[3] = src_r1[3] >> 8;
break;
- case 64:
- w[16] = 0x80;
+ case 16:
+ dst1[0] = src_r0[0];
+ dst1[1] = src_r0[1];
+ dst1[2] = src_r0[2];
+ dst1[3] = src_r0[3];
+ dst2[0] = src_r1[0];
+ dst2[1] = src_r1[1];
+ dst2[2] = src_r1[2];
+ dst2[3] = src_r1[3];
break;
- case 65:
- w[16] = w[16] | 0x8000;
+ case 17:
+ dst1[0] = src_l1[0] | src_r0[0] << 8;
+ dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8;
+ dst2[0] = src_r0[3] >> 24 | src_r1[0] << 8;
+ dst2[1] = src_r1[0] >> 24 | src_r1[1] << 8;
+ dst2[2] = src_r1[1] >> 24 | src_r1[2] << 8;
+ dst2[3] = src_r1[2] >> 24 | src_r1[3] << 8;
break;
- case 66:
- w[16] = w[16] | 0x800000;
+ case 18:
+ dst1[0] = src_l1[0] | src_r0[0] << 16;
+ dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16;
+ dst2[0] = src_r0[3] >> 16 | src_r1[0] << 16;
+ dst2[1] = src_r1[0] >> 16 | src_r1[1] << 16;
+ dst2[2] = src_r1[1] >> 16 | src_r1[2] << 16;
+ dst2[3] = src_r1[2] >> 16 | src_r1[3] << 16;
break;
- case 67:
- w[16] = w[16] | 0x80000000;
+ case 19:
+ dst1[0] = src_l1[0] | src_r0[0] << 24;
+ dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24;
+ dst2[0] = src_r0[3] >> 8 | src_r1[0] << 24;
+ dst2[1] = src_r1[0] >> 8 | src_r1[1] << 24;
+ dst2[2] = src_r1[1] >> 8 | src_r1[2] << 24;
+ dst2[3] = src_r1[2] >> 8 | src_r1[3] << 24;
break;
- case 68:
- w[17] = 0x80;
+ case 20:
+ dst1[1] = src_r1[0];
+ dst1[2] = src_r0[1];
+ dst1[3] = src_r0[2];
+ dst2[0] = src_r0[3];
+ dst2[1] = src_r1[0];
+ dst2[2] = src_r1[1];
+ dst2[3] = src_r1[2];
break;
- case 69:
- w[17] = w[17] | 0x8000;
+ case 21:
+ dst1[1] = src_l1[1] | src_r0[0] << 8;
+ dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst2[0] = src_r0[2] >> 24 | src_r0[3] << 8;
+ dst2[1] = src_r0[3] >> 24 | src_r1[0] << 8;
+ dst2[2] = src_r1[0] >> 24 | src_r1[1] << 8;
+ dst2[3] = src_r1[1] >> 24 | src_r1[2] << 8;
break;
- case 70:
- w[17] = w[17] | 0x800000;
- break;
-
- case 71:
- w[17] = w[17] | 0x80000000;
- break;
-
- case 72:
- w[18] = 0x80;
+ case 22:
+ dst1[1] = src_l1[1] | src_r0[0] << 16;
+ dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst2[0] = src_r0[2] >> 16 | src_r0[3] << 16;
+ dst2[1] = src_r0[3] >> 16 | src_r1[0] << 16;
+ dst2[2] = src_r1[0] >> 16 | src_r1[1] << 16;
+ dst2[3] = src_r1[1] >> 16 | src_r1[2] << 16;
break;
- case 73:
- w[18] = w[18] | 0x8000;
+ case 23:
+ dst1[1] = src_l1[1] | src_r0[0] << 24;
+ dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst2[0] = src_r0[2] >> 8 | src_r0[3] << 24;
+ dst2[1] = src_r0[3] >> 8 | src_r1[0] << 24;
+ dst2[2] = src_r1[0] >> 8 | src_r1[1] << 24;
+ dst2[3] = src_r1[1] >> 8 | src_r1[2] << 24;
break;
- case 74:
- w[18] = w[18] | 0x800000;
+ case 24:
+ dst1[2] = src_r1[0];
+ dst1[3] = src_r0[1];
+ dst2[0] = src_r0[2];
+ dst2[1] = src_r0[3];
+ dst2[2] = src_r1[0];
+ dst2[3] = src_r1[1];
break;
- case 75:
- w[18] = w[18] | 0x80000000;
+ case 25:
+ dst1[2] = src_l1[2] | src_r0[0] << 8;
+ dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst2[0] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst2[1] = src_r0[2] >> 24 | src_r0[3] << 8;
+ dst2[2] = src_r0[3] >> 24 | src_r1[0] << 8;
+ dst2[3] = src_r1[0] >> 24 | src_r1[1] << 8;
break;
- case 76:
- w[19] = 0x80;
+ case 26:
+ dst1[2] = src_l1[2] | src_r0[0] << 16;
+ dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst2[0] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst2[1] = src_r0[2] >> 16 | src_r0[3] << 16;
+ dst2[2] = src_r0[3] >> 16 | src_r1[0] << 16;
+ dst2[3] = src_r1[0] >> 16 | src_r1[1] << 16;
break;
- case 77:
- w[19] = w[19] | 0x8000;
+ case 27:
+ dst1[2] = src_l1[2] | src_r0[0] << 24;
+ dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst2[0] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst2[1] = src_r0[2] >> 8 | src_r0[3] << 24;
+ dst2[2] = src_r0[3] >> 8 | src_r1[0] << 24;
+ dst2[3] = src_r1[0] >> 8 | src_r1[1] << 24;
break;
- case 78:
- w[19] = w[19] | 0x800000;
+ case 28:
+ dst1[3] = src_r1[0];
+ dst2[0] = src_r0[1];
+ dst2[1] = src_r0[2];
+ dst2[2] = src_r0[3];
+ dst2[3] = src_r1[0];
break;
- case 79:
- w[19] = w[19] | 0x80000000;
+ case 29:
+ dst1[3] = src_l1[3] | src_r0[0] << 8;
+ dst2[0] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst2[1] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst2[2] = src_r0[2] >> 24 | src_r0[3] << 8;
+ dst2[3] = src_r0[3] >> 24 | src_r1[0] << 8;
break;
- case 80:
- w[20] = 0x80;
+ case 30:
+ dst1[3] = src_l1[3] | src_r0[0] << 16;
+ dst2[0] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst2[1] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst2[2] = src_r0[2] >> 16 | src_r0[3] << 16;
+ dst2[3] = src_r0[3] >> 16 | src_r1[0] << 16;
break;
- case 81:
- w[20] = w[20] | 0x8000;
+ case 31:
+ dst1[3] = src_l1[3] | src_r0[0] << 24;
+ dst2[0] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst2[1] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst2[2] = src_r0[2] >> 8 | src_r0[3] << 24;
+ dst2[3] = src_r0[3] >> 8 | src_r1[0] << 24;
break;
- case 82:
- w[20] = w[20] | 0x800000;
+ case 32:
+ dst2[0] = src_r0[0];
+ dst2[1] = src_r0[1];
+ dst2[2] = src_r0[2];
+ dst2[3] = src_r0[3];
break;
- case 83:
- w[20] = w[20] | 0x80000000;
+ case 33:
+ dst2[0] = src_l2[0] | src_r0[0] << 8;
+ dst2[1] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst2[2] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst2[3] = src_r0[2] >> 24 | src_r0[3] << 8;
break;
- case 84:
- w[21] = 0x80;
+ case 34:
+ dst2[0] = src_l2[0] | src_r0[0] << 16;
+ dst2[1] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst2[2] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst2[3] = src_r0[2] >> 16 | src_r0[3] << 16;
break;
- case 85:
- w[21] = w[21] | 0x8000;
+ case 35:
+ dst2[0] = src_l2[0] | src_r0[0] << 24;
+ dst2[1] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst2[2] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst2[3] = src_r0[2] >> 8 | src_r0[3] << 24;
break;
- case 86:
- w[21] = w[21] | 0x800000;
+ case 36:
+ dst2[1] = src_r0[0];
+ dst2[2] = src_r0[1];
+ dst2[3] = src_r0[2];
break;
- case 87:
- w[21] = w[21] | 0x80000000;
+ case 37:
+ dst2[1] = src_l2[1] | src_r0[0] << 8;
+ dst2[2] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst2[3] = src_r0[1] >> 24 | src_r0[2] << 8;
break;
- case 88:
- w[22] = 0x80;
+ case 38:
+ dst2[1] = src_l2[1] | src_r0[0] << 16;
+ dst2[2] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst2[3] = src_r0[1] >> 16 | src_r0[2] << 16;
break;
- case 89:
- w[22] = w[22] | 0x8000;
+ case 39:
+ dst2[1] = src_l2[1] | src_r0[0] << 24;
+ dst2[2] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst2[3] = src_r0[1] >> 8 | src_r0[2] << 24;
break;
- case 90:
- w[22] = w[22] | 0x800000;
+ case 40:
+ dst2[2] = src_r0[0];
+ dst2[3] = src_r0[1];
break;
- case 91:
- w[22] = w[22] | 0x80000000;
+ case 41:
+ dst2[2] = src_l2[2] | src_r0[0] << 8;
+ dst2[3] = src_r0[0] >> 24 | src_r0[1] << 8;
break;
- case 92:
- w[23] = 0x80;
+ case 42:
+ dst2[2] = src_l2[2] | src_r0[0] << 16;
+ dst2[3] = src_r0[0] >> 16 | src_r0[1] << 16;
break;
- case 93:
- w[23] = w[23] | 0x8000;
+ case 43:
+ dst2[2] = src_l2[2] | src_r0[0] << 24;
+ dst2[3] = src_r0[0] >> 8 | src_r0[1] << 24;
break;
- case 94:
- w[23] = w[23] | 0x800000;
+ case 44:
+ dst2[3] = src_r0[0];
break;
- case 95:
- w[23] = w[23] | 0x80000000;
+ case 45:
+ dst2[3] = src_l2[3] | src_r0[0] << 8;
break;
- case 96:
- w[24] = 0x80;
+ case 46:
+ dst2[3] = src_l2[3] | src_r0[0] << 16;
break;
- case 97:
- w[24] = w[24] | 0x8000;
+ case 47:
+ dst2[3] = src_l2[3] | src_r0[0] << 24;
break;
+ }
+}
- case 98:
- w[24] = w[24] | 0x800000;
+// before: memcat16_9
+static void memcat_c15_w4x4_a3x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 append0[4], const u32 append1[4], const u32 append2[4], const u32 offset)
+{
+ switch (offset)
+ {
+ case 0:
+ w0[0] = append0[0];
+ w0[1] = append0[1];
+ w0[2] = append0[2];
+ w0[3] = append0[3];
+ w1[0] = append1[0];
+ w1[1] = append1[1];
+ w1[2] = append1[2];
+ w1[3] = append1[3];
+ w2[0] = append2[0];
break;
- case 99:
- w[24] = w[24] | 0x80000000;
+ case 1:
+ w0[0] = w0[0] | append0[0] << 8;
+ w0[1] = append0[0] >> 24 | append0[1] << 8;
+ w0[2] = append0[1] >> 24 | append0[2] << 8;
+ w0[3] = append0[2] >> 24 | append0[3] << 8;
+ w1[0] = append0[3] >> 24 | append1[0] << 8;
+ w1[1] = append1[0] >> 24 | append1[1] << 8;
+ w1[2] = append1[1] >> 24 | append1[2] << 8;
+ w1[3] = append1[2] >> 24 | append1[3] << 8;
+ w2[0] = append1[3] >> 24 | append2[0] << 8;
+ w2[1] = append2[0] >> 24;
break;
- case 100:
- w[25] = 0x80;
+ case 2:
+ w0[0] = w0[0] | append0[0] << 16;
+ w0[1] = append0[0] >> 16 | append0[1] << 16;
+ w0[2] = append0[1] >> 16 | append0[2] << 16;
+ w0[3] = append0[2] >> 16 | append0[3] << 16;
+ w1[0] = append0[3] >> 16 | append1[0] << 16;
+ w1[1] = append1[0] >> 16 | append1[1] << 16;
+ w1[2] = append1[1] >> 16 | append1[2] << 16;
+ w1[3] = append1[2] >> 16 | append1[3] << 16;
+ w2[0] = append1[3] >> 16 | append2[0] << 16;
+ w2[1] = append2[0] >> 16;
break;
- case 101:
- w[25] = w[25] | 0x8000;
+ case 3:
+ w0[0] = w0[0] | append0[0] << 24;
+ w0[1] = append0[0] >> 8 | append0[1] << 24;
+ w0[2] = append0[1] >> 8 | append0[2] << 24;
+ w0[3] = append0[2] >> 8 | append0[3] << 24;
+ w1[0] = append0[3] >> 8 | append1[0] << 24;
+ w1[1] = append1[0] >> 8 | append1[1] << 24;
+ w1[2] = append1[1] >> 8 | append1[2] << 24;
+ w1[3] = append1[2] >> 8 | append1[3] << 24;
+ w2[0] = append1[3] >> 8 | append2[0] << 24;
+ w2[1] = append2[0] >> 8;
break;
- case 102:
- w[25] = w[25] | 0x800000;
+ case 4:
+ w0[1] = append0[0];
+ w0[2] = append0[1];
+ w0[3] = append0[2];
+ w1[0] = append0[3];
+ w1[1] = append1[0];
+ w1[2] = append1[1];
+ w1[3] = append1[2];
+ w2[0] = append1[3];
+ w2[1] = append2[0];
break;
- case 103:
- w[25] = w[25] | 0x80000000;
- break;
-
- case 104:
- w[26] = 0x80;
- break;
-
- case 105:
- w[26] = w[26] | 0x8000;
- break;
-
- case 106:
- w[26] = w[26] | 0x800000;
- break;
-
- case 107:
- w[26] = w[26] | 0x80000000;
- break;
-
- case 108:
- w[27] = 0x80;
- break;
-
- case 109:
- w[27] = w[27] | 0x8000;
- break;
-
- case 110:
- w[27] = w[27] | 0x800000;
- break;
-
- case 111:
- w[27] = w[27] | 0x80000000;
- break;
-
- case 112:
- w[28] = 0x80;
- break;
-
- case 113:
- w[28] = w[28] | 0x8000;
- break;
-
- case 114:
- w[28] = w[28] | 0x800000;
- break;
-
- case 115:
- w[28] = w[28] | 0x80000000;
- break;
-
- case 116:
- w[29] = 0x80;
- break;
-
- case 117:
- w[29] = w[29] | 0x8000;
+ case 5:
+ w0[1] = w0[1] | append0[0] << 8;
+ w0[2] = append0[0] >> 24 | append0[1] << 8;
+ w0[3] = append0[1] >> 24 | append0[2] << 8;
+ w1[0] = append0[2] >> 24 | append0[3] << 8;
+ w1[1] = append0[3] >> 24 | append1[0] << 8;
+ w1[2] = append1[0] >> 24 | append1[1] << 8;
+ w1[3] = append1[1] >> 24 | append1[2] << 8;
+ w2[0] = append1[2] >> 24 | append1[3] << 8;
+ w2[1] = append1[3] >> 24 | append2[0] << 8;
+ w2[2] = append2[0] >> 24;
break;
- case 118:
- w[29] = w[29] | 0x800000;
+ case 6:
+ w0[1] = w0[1] | append0[0] << 16;
+ w0[2] = append0[0] >> 16 | append0[1] << 16;
+ w0[3] = append0[1] >> 16 | append0[2] << 16;
+ w1[0] = append0[2] >> 16 | append0[3] << 16;
+ w1[1] = append0[3] >> 16 | append1[0] << 16;
+ w1[2] = append1[0] >> 16 | append1[1] << 16;
+ w1[3] = append1[1] >> 16 | append1[2] << 16;
+ w2[0] = append1[2] >> 16 | append1[3] << 16;
+ w2[1] = append1[3] >> 16 | append2[0] << 16;
+ w2[2] = append2[0] >> 16;
break;
- case 119:
- w[29] = w[29] | 0x80000000;
+ case 7:
+ w0[1] = w0[1] | append0[0] << 24;
+ w0[2] = append0[0] >> 8 | append0[1] << 24;
+ w0[3] = append0[1] >> 8 | append0[2] << 24;
+ w1[0] = append0[2] >> 8 | append0[3] << 24;
+ w1[1] = append0[3] >> 8 | append1[0] << 24;
+ w1[2] = append1[0] >> 8 | append1[1] << 24;
+ w1[3] = append1[1] >> 8 | append1[2] << 24;
+ w2[0] = append1[2] >> 8 | append1[3] << 24;
+ w2[1] = append1[3] >> 8 | append2[0] << 24;
+ w2[2] = append2[0] >> 8;
break;
- case 120:
- w[30] = 0x80;
+ case 8:
+ w0[2] = append0[0];
+ w0[3] = append0[1];
+ w1[0] = append0[2];
+ w1[1] = append0[3];
+ w1[2] = append1[0];
+ w1[3] = append1[1];
+ w2[0] = append1[2];
+ w2[1] = append1[3];
+ w2[2] = append2[0];
break;
- case 121:
- w[30] = w[30] | 0x8000;
+ case 9:
+ w0[2] = w0[2] | append0[0] << 8;
+ w0[3] = append0[0] >> 24 | append0[1] << 8;
+ w1[0] = append0[1] >> 24 | append0[2] << 8;
+ w1[1] = append0[2] >> 24 | append0[3] << 8;
+ w1[2] = append0[3] >> 24 | append1[0] << 8;
+ w1[3] = append1[0] >> 24 | append1[1] << 8;
+ w2[0] = append1[1] >> 24 | append1[2] << 8;
+ w2[1] = append1[2] >> 24 | append1[3] << 8;
+ w2[2] = append1[3] >> 24 | append2[0] << 8;
+ w2[3] = append2[0] >> 24;
break;
- case 122:
- w[30] = w[30] | 0x800000;
+ case 10:
+ w0[2] = w0[2] | append0[0] << 16;
+ w0[3] = append0[0] >> 16 | append0[1] << 16;
+ w1[0] = append0[1] >> 16 | append0[2] << 16;
+ w1[1] = append0[2] >> 16 | append0[3] << 16;
+ w1[2] = append0[3] >> 16 | append1[0] << 16;
+ w1[3] = append1[0] >> 16 | append1[1] << 16;
+ w2[0] = append1[1] >> 16 | append1[2] << 16;
+ w2[1] = append1[2] >> 16 | append1[3] << 16;
+ w2[2] = append1[3] >> 16 | append2[0] << 16;
+ w2[3] = append2[0] >> 16;
break;
- case 123:
- w[30] = w[30] | 0x80000000;
+ case 11:
+ w0[2] = w0[2] | append0[0] << 24;
+ w0[3] = append0[0] >> 8 | append0[1] << 24;
+ w1[0] = append0[1] >> 8 | append0[2] << 24;
+ w1[1] = append0[2] >> 8 | append0[3] << 24;
+ w1[2] = append0[3] >> 8 | append1[0] << 24;
+ w1[3] = append1[0] >> 8 | append1[1] << 24;
+ w2[0] = append1[1] >> 8 | append1[2] << 24;
+ w2[1] = append1[2] >> 8 | append1[3] << 24;
+ w2[2] = append1[3] >> 8 | append2[0] << 24;
+ w2[3] = append2[0] >> 8;
break;
- case 124:
- w[31] = 0x80;
+ case 12:
+ w0[3] = append0[0];
+ w1[0] = append0[1];
+ w1[1] = append0[2];
+ w1[2] = append0[3];
+ w1[3] = append1[0];
+ w2[0] = append1[1];
+ w2[1] = append1[2];
+ w2[2] = append1[3];
+ w2[3] = append2[0];
break;
- case 125:
- w[31] = w[31] | 0x8000;
+ case 13:
+ w0[3] = w0[3] | append0[0] << 8;
+ w1[0] = append0[0] >> 24 | append0[1] << 8;
+ w1[1] = append0[1] >> 24 | append0[2] << 8;
+ w1[2] = append0[2] >> 24 | append0[3] << 8;
+ w1[3] = append0[3] >> 24 | append1[0] << 8;
+ w2[0] = append1[0] >> 24 | append1[1] << 8;
+ w2[1] = append1[1] >> 24 | append1[2] << 8;
+ w2[2] = append1[2] >> 24 | append1[3] << 8;
+ w2[3] = append1[3] >> 24 | append2[0] << 8;
+ w3[0] = append2[0] >> 24;
break;
- case 126:
- w[31] = w[31] | 0x800000;
+ case 14:
+ w0[3] = w0[3] | append0[0] << 16;
+ w1[0] = append0[0] >> 16 | append0[1] << 16;
+ w1[1] = append0[1] >> 16 | append0[2] << 16;
+ w1[2] = append0[2] >> 16 | append0[3] << 16;
+ w1[3] = append0[3] >> 16 | append1[0] << 16;
+ w2[0] = append1[0] >> 16 | append1[1] << 16;
+ w2[1] = append1[1] >> 16 | append1[2] << 16;
+ w2[2] = append1[2] >> 16 | append1[3] << 16;
+ w2[3] = append1[3] >> 16 | append2[0] << 16;
+ w3[0] = append2[0] >> 16;
break;
- case 127:
- w[31] = w[31] | 0x80000000;
+ case 15:
+ w0[3] = w0[3] | append0[0] << 24;
+ w1[0] = append0[0] >> 8 | append0[1] << 24;
+ w1[1] = append0[1] >> 8 | append0[2] << 24;
+ w1[2] = append0[2] >> 8 | append0[3] << 24;
+ w1[3] = append0[3] >> 8 | append1[0] << 24;
+ w2[0] = append1[0] >> 8 | append1[1] << 24;
+ w2[1] = append1[1] >> 8 | append1[2] << 24;
+ w2[2] = append1[2] >> 8 | append1[3] << 24;
+ w2[3] = append1[3] >> 8 | append2[0] << 24;
+ w3[0] = append2[0] >> 8;
break;
}
}
-// before: device_memcat2L
-static void memcat_c7_d1x2_sl1x2_sr1x2 (const u32 offset, u32 dst0[2], u32 src_l0[2], u32 src_r0[2])
+// before: memcat32_8
+static void memcat_c32_w4x4_a2x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 append0[4], const u32 append1[4], const u32 offset)
{
switch (offset)
{
+ case 0:
+ w0[0] = append0[0];
+ w0[1] = append0[1];
+ w0[2] = append0[2];
+ w0[3] = append0[3];
+ w1[0] = append1[0];
+ w1[1] = append1[1];
+ w1[2] = append1[2];
+ w1[3] = append1[3];
+ break;
+
case 1:
- dst0[0] = src_l0[0] | src_r0[0] << 8;
- dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
+ w0[0] = w0[0] | append0[0] << 8;
+ w0[1] = append0[0] >> 24 | append0[1] << 8;
+ w0[2] = append0[1] >> 24 | append0[2] << 8;
+ w0[3] = append0[2] >> 24 | append0[3] << 8;
+ w1[0] = append0[3] >> 24 | append1[0] << 8;
+ w1[1] = append1[0] >> 24 | append1[1] << 8;
+ w1[2] = append1[1] >> 24 | append1[2] << 8;
+ w1[3] = append1[2] >> 24 | append1[3] << 8;
+ w2[0] = append1[3] >> 24;
break;
case 2:
- dst0[0] = src_l0[0] | src_r0[0] << 16;
- dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
+ w0[0] = w0[0] | append0[0] << 16;
+ w0[1] = append0[0] >> 16 | append0[1] << 16;
+ w0[2] = append0[1] >> 16 | append0[2] << 16;
+ w0[3] = append0[2] >> 16 | append0[3] << 16;
+ w1[0] = append0[3] >> 16 | append1[0] << 16;
+ w1[1] = append1[0] >> 16 | append1[1] << 16;
+ w1[2] = append1[1] >> 16 | append1[2] << 16;
+ w1[3] = append1[2] >> 16 | append1[3] << 16;
+ w2[0] = append1[3] >> 16;
break;
case 3:
- dst0[0] = src_l0[0] | src_r0[0] << 24;
- dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
+ w0[0] = w0[0] | append0[0] << 24;
+ w0[1] = append0[0] >> 8 | append0[1] << 24;
+ w0[2] = append0[1] >> 8 | append0[2] << 24;
+ w0[3] = append0[2] >> 8 | append0[3] << 24;
+ w1[0] = append0[3] >> 8 | append1[0] << 24;
+ w1[1] = append1[0] >> 8 | append1[1] << 24;
+ w1[2] = append1[1] >> 8 | append1[2] << 24;
+ w1[3] = append1[2] >> 8 | append1[3] << 24;
+ w2[0] = append1[3] >> 8;
break;
case 4:
- dst0[1] = src_r0[0];
+ w0[1] = append0[0];
+ w0[2] = append0[1];
+ w0[3] = append0[2];
+ w1[0] = append0[3];
+ w1[1] = append1[0];
+ w1[2] = append1[1];
+ w1[3] = append1[2];
+ w2[0] = append1[3];
break;
case 5:
- dst0[1] = src_l0[1] | src_r0[0] << 8;
- break;
-
- case 6:
- dst0[1] = src_l0[1] | src_r0[0] << 16;
- break;
-
- case 7:
- dst0[1] = src_l0[1] | src_r0[0] << 24;
- break;
- }
-}
-
-// before: device_memcat4L
-static void memcat_c15_d1x4_sl1x4_sr1x4 (const u32 offset, u32 dst0[4], u32 src_l0[4], u32 src_r0[4])
-{
- switch (offset)
- {
- case 1:
- dst0[0] = src_l0[0] | src_r0[0] << 8;
- dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8;
- break;
-
- case 2:
- dst0[0] = src_l0[0] | src_r0[0] << 16;
- dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16;
- break;
-
- case 3:
- dst0[0] = src_l0[0] | src_r0[0] << 24;
- dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24;
- break;
-
- case 4:
- dst0[1] = src_r0[0];
- dst0[2] = src_r0[1];
- dst0[3] = src_r0[2];
- break;
-
- case 5:
- dst0[1] = src_l0[1] | src_r0[0] << 8;
- dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8;
+ w0[1] = w0[1] | append0[0] << 8;
+ w0[2] = append0[0] >> 24 | append0[1] << 8;
+ w0[3] = append0[1] >> 24 | append0[2] << 8;
+ w1[0] = append0[2] >> 24 | append0[3] << 8;
+ w1[1] = append0[3] >> 24 | append1[0] << 8;
+ w1[2] = append1[0] >> 24 | append1[1] << 8;
+ w1[3] = append1[1] >> 24 | append1[2] << 8;
+ w2[0] = append1[2] >> 24 | append1[3] << 8;
+ w2[1] = append1[3] >> 24;
break;
case 6:
- dst0[1] = src_l0[1] | src_r0[0] << 16;
- dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16;
+ w0[1] = w0[1] | append0[0] << 16;
+ w0[2] = append0[0] >> 16 | append0[1] << 16;
+ w0[3] = append0[1] >> 16 | append0[2] << 16;
+ w1[0] = append0[2] >> 16 | append0[3] << 16;
+ w1[1] = append0[3] >> 16 | append1[0] << 16;
+ w1[2] = append1[0] >> 16 | append1[1] << 16;
+ w1[3] = append1[1] >> 16 | append1[2] << 16;
+ w2[0] = append1[2] >> 16 | append1[3] << 16;
+ w2[1] = append1[3] >> 16;
break;
case 7:
- dst0[1] = src_l0[1] | src_r0[0] << 24;
- dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24;
+ w0[1] = w0[1] | append0[0] << 24;
+ w0[2] = append0[0] >> 8 | append0[1] << 24;
+ w0[3] = append0[1] >> 8 | append0[2] << 24;
+ w1[0] = append0[2] >> 8 | append0[3] << 24;
+ w1[1] = append0[3] >> 8 | append1[0] << 24;
+ w1[2] = append1[0] >> 8 | append1[1] << 24;
+ w1[3] = append1[1] >> 8 | append1[2] << 24;
+ w2[0] = append1[2] >> 8 | append1[3] << 24;
+ w2[1] = append1[3] >> 8;
break;
case 8:
- dst0[2] = src_r0[0];
- dst0[3] = src_r0[1];
+ w0[2] = append0[0];
+ w0[3] = append0[1];
+ w1[0] = append0[2];
+ w1[1] = append0[3];
+ w1[2] = append1[0];
+ w1[3] = append1[1];
+ w2[0] = append1[2];
+ w2[1] = append1[3];
break;
case 9:
- dst0[2] = src_l0[2] | src_r0[0] << 8;
- dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8;
+ w0[2] = w0[2] | append0[0] << 8;
+ w0[3] = append0[0] >> 24 | append0[1] << 8;
+ w1[0] = append0[1] >> 24 | append0[2] << 8;
+ w1[1] = append0[2] >> 24 | append0[3] << 8;
+ w1[2] = append0[3] >> 24 | append1[0] << 8;
+ w1[3] = append1[0] >> 24 | append1[1] << 8;
+ w2[0] = append1[1] >> 24 | append1[2] << 8;
+ w2[1] = append1[2] >> 24 | append1[3] << 8;
+ w2[2] = append1[3] >> 24;
break;
case 10:
- dst0[2] = src_l0[2] | src_r0[0] << 16;
- dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16;
+ w0[2] = w0[2] | append0[0] << 16;
+ w0[3] = append0[0] >> 16 | append0[1] << 16;
+ w1[0] = append0[1] >> 16 | append0[2] << 16;
+ w1[1] = append0[2] >> 16 | append0[3] << 16;
+ w1[2] = append0[3] >> 16 | append1[0] << 16;
+ w1[3] = append1[0] >> 16 | append1[1] << 16;
+ w2[0] = append1[1] >> 16 | append1[2] << 16;
+ w2[1] = append1[2] >> 16 | append1[3] << 16;
+ w2[2] = append1[3] >> 16;
break;
case 11:
- dst0[2] = src_l0[2] | src_r0[0] << 24;
- dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24;
+ w0[2] = w0[2] | append0[0] << 24;
+ w0[3] = append0[0] >> 8 | append0[1] << 24;
+ w1[0] = append0[1] >> 8 | append0[2] << 24;
+ w1[1] = append0[2] >> 8 | append0[3] << 24;
+ w1[2] = append0[3] >> 8 | append1[0] << 24;
+ w1[3] = append1[0] >> 8 | append1[1] << 24;
+ w2[0] = append1[1] >> 8 | append1[2] << 24;
+ w2[1] = append1[2] >> 8 | append1[3] << 24;
+ w2[2] = append1[3] >> 8;
break;
case 12:
- dst0[3] = src_r0[0];
+ w0[3] = append0[0];
+ w1[0] = append0[1];
+ w1[1] = append0[2];
+ w1[2] = append0[3];
+ w1[3] = append1[0];
+ w2[0] = append1[1];
+ w2[1] = append1[2];
+ w2[2] = append1[3];
break;
case 13:
- dst0[3] = src_l0[3] | src_r0[0] << 8;
+ w0[3] = w0[3] | append0[0] << 8;
+ w1[0] = append0[0] >> 24 | append0[1] << 8;
+ w1[1] = append0[1] >> 24 | append0[2] << 8;
+ w1[2] = append0[2] >> 24 | append0[3] << 8;
+ w1[3] = append0[3] >> 24 | append1[0] << 8;
+ w2[0] = append1[0] >> 24 | append1[1] << 8;
+ w2[1] = append1[1] >> 24 | append1[2] << 8;
+ w2[2] = append1[2] >> 24 | append1[3] << 8;
+ w2[3] = append1[3] >> 24;
break;
case 14:
- dst0[3] = src_l0[3] | src_r0[0] << 16;
+ w0[3] = w0[3] | append0[0] << 16;
+ w1[0] = append0[0] >> 16 | append0[1] << 16;
+ w1[1] = append0[1] >> 16 | append0[2] << 16;
+ w1[2] = append0[2] >> 16 | append0[3] << 16;
+ w1[3] = append0[3] >> 16 | append1[0] << 16;
+ w2[0] = append1[0] >> 16 | append1[1] << 16;
+ w2[1] = append1[1] >> 16 | append1[2] << 16;
+ w2[2] = append1[2] >> 16 | append1[3] << 16;
+ w2[3] = append1[3] >> 16;
break;
case 15:
- dst0[3] = src_l0[3] | src_r0[0] << 24;
- break;
- }
-}
-
-// before: device_memcat8L
-static void memcat_c31_d2x4_sl2x4_sr1x4 (const u32 offset, u32 dst0[4], u32 dst1[4], u32 src_l0[4], u32 src_l1[4], u32 src_r0[4])
-{
- switch (offset)
- {
- case 1:
- dst0[0] = src_l0[0] | src_r0[0] << 8;
- dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8;
- dst1[0] = src_r0[3] >> 24;
+ w0[3] = w0[3] | append0[0] << 24;
+ w1[0] = append0[0] >> 8 | append0[1] << 24;
+ w1[1] = append0[1] >> 8 | append0[2] << 24;
+ w1[2] = append0[2] >> 8 | append0[3] << 24;
+ w1[3] = append0[3] >> 8 | append1[0] << 24;
+ w2[0] = append1[0] >> 8 | append1[1] << 24;
+ w2[1] = append1[1] >> 8 | append1[2] << 24;
+ w2[2] = append1[2] >> 8 | append1[3] << 24;
+ w2[3] = append1[3] >> 8;
break;
- case 2:
- dst0[0] = src_l0[0] | src_r0[0] << 16;
- dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16;
- dst1[0] = src_r0[3] >> 16;
+ case 16:
+ w1[0] = append0[0];
+ w1[1] = append0[1];
+ w1[2] = append0[2];
+ w1[3] = append0[3];
+ w2[0] = append1[0];
+ w2[1] = append1[1];
+ w2[2] = append1[2];
+ w2[3] = append1[3];
break;
- case 3:
- dst0[0] = src_l0[0] | src_r0[0] << 24;
- dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24;
- dst1[0] = src_r0[3] >> 8;
+ case 17:
+ w1[0] = w1[0] | append0[0] << 8;
+ w1[1] = append0[0] >> 24 | append0[1] << 8;
+ w1[2] = append0[1] >> 24 | append0[2] << 8;
+ w1[3] = append0[2] >> 24 | append0[3] << 8;
+ w2[0] = append0[3] >> 24 | append1[0] << 8;
+ w2[1] = append1[0] >> 24 | append1[1] << 8;
+ w2[2] = append1[1] >> 24 | append1[2] << 8;
+ w2[3] = append1[2] >> 24 | append1[3] << 8;
+ w3[0] = append1[3] >> 24;
break;
- case 4:
- dst0[1] = src_r0[0];
- dst0[2] = src_r0[1];
- dst0[3] = src_r0[2];
- dst1[0] = src_r0[3];
+ case 18:
+ w1[0] = w1[0] | append0[0] << 16;
+ w1[1] = append0[0] >> 16 | append0[1] << 16;
+ w1[2] = append0[1] >> 16 | append0[2] << 16;
+ w1[3] = append0[2] >> 16 | append0[3] << 16;
+ w2[0] = append0[3] >> 16 | append1[0] << 16;
+ w2[1] = append1[0] >> 16 | append1[1] << 16;
+ w2[2] = append1[1] >> 16 | append1[2] << 16;
+ w2[3] = append1[2] >> 16 | append1[3] << 16;
+ w3[0] = append1[3] >> 16;
break;
- case 5:
- dst0[1] = src_l0[1] | src_r0[0] << 8;
- dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8;
- dst1[1] = src_r0[3] >> 24;
+ case 19:
+ w1[0] = w1[0] | append0[0] << 24;
+ w1[1] = append0[0] >> 8 | append0[1] << 24;
+ w1[2] = append0[1] >> 8 | append0[2] << 24;
+ w1[3] = append0[2] >> 8 | append0[3] << 24;
+ w2[0] = append0[3] >> 8 | append1[0] << 24;
+ w2[1] = append1[0] >> 8 | append1[1] << 24;
+ w2[2] = append1[1] >> 8 | append1[2] << 24;
+ w2[3] = append1[2] >> 8 | append1[3] << 24;
+ w3[0] = append1[3] >> 8;
break;
- case 6:
- dst0[1] = src_l0[1] | src_r0[0] << 16;
- dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16;
- dst1[1] = src_r0[3] >> 16;
- break;
-
- case 7:
- dst0[1] = src_l0[1] | src_r0[0] << 24;
- dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24;
- dst1[1] = src_r0[3] >> 8;
- break;
-
- case 8:
- dst0[2] = src_r0[0];
- dst0[3] = src_r0[1];
- dst1[0] = src_r0[2];
- dst1[1] = src_r0[3];
- break;
-
- case 9:
- dst0[2] = src_l0[2] | src_r0[0] << 8;
- dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8;
- dst1[2] = src_r0[3] >> 24;
- break;
-
- case 10:
- dst0[2] = src_l0[2] | src_r0[0] << 16;
- dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16;
- dst1[2] = src_r0[3] >> 16;
- break;
-
- case 11:
- dst0[2] = src_l0[2] | src_r0[0] << 24;
- dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24;
- dst1[2] = src_r0[3] >> 8;
- break;
-
- case 12:
- dst0[3] = src_r0[0];
- dst1[0] = src_r0[1];
- dst1[1] = src_r0[2];
- dst1[2] = src_r0[3];
- break;
-
- case 13:
- dst0[3] = src_l0[3] | src_r0[0] << 8;
- dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8;
- dst1[3] = src_r0[3] >> 24;
- break;
-
- case 14:
- dst0[3] = src_l0[3] | src_r0[0] << 16;
- dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16;
- dst1[3] = src_r0[3] >> 16;
- break;
-
- case 15:
- dst0[3] = src_l0[3] | src_r0[0] << 24;
- dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24;
- dst1[3] = src_r0[3] >> 8;
- break;
-
- case 16:
- dst1[0] = src_r0[0];
- dst1[1] = src_r0[1];
- dst1[2] = src_r0[2];
- dst1[3] = src_r0[3];
- break;
-
- case 17:
- dst1[0] = src_l1[0] | src_r0[0] << 8;
- dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8;
- break;
-
- case 18:
- dst1[0] = src_l1[0] | src_r0[0] << 16;
- dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16;
- break;
-
- case 19:
- dst1[0] = src_l1[0] | src_r0[0] << 24;
- dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24;
- break;
-
- case 20:
- dst1[1] = src_r0[0];
- dst1[2] = src_r0[1];
- dst1[3] = src_r0[2];
+ case 20:
+ w1[1] = append0[0];
+ w1[2] = append0[1];
+ w1[3] = append0[2];
+ w2[0] = append0[3];
+ w2[1] = append1[0];
+ w2[2] = append1[1];
+ w2[3] = append1[2];
+ w3[0] = append1[3];
break;
case 21:
- dst1[1] = src_l1[1] | src_r0[0] << 8;
- dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8;
+ w1[1] = w1[1] | append0[0] << 8;
+ w1[2] = append0[0] >> 24 | append0[1] << 8;
+ w1[3] = append0[1] >> 24 | append0[2] << 8;
+ w2[0] = append0[2] >> 24 | append0[3] << 8;
+ w2[1] = append0[3] >> 24 | append1[0] << 8;
+ w2[2] = append1[0] >> 24 | append1[1] << 8;
+ w2[3] = append1[1] >> 24 | append1[2] << 8;
+ w3[0] = append1[2] >> 24 | append1[3] << 8;
+ w3[1] = append1[3] >> 24;
break;
case 22:
- dst1[1] = src_l1[1] | src_r0[0] << 16;
- dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16;
+ w1[1] = w1[1] | append0[0] << 16;
+ w1[2] = append0[0] >> 16 | append0[1] << 16;
+ w1[3] = append0[1] >> 16 | append0[2] << 16;
+ w2[0] = append0[2] >> 16 | append0[3] << 16;
+ w2[1] = append0[3] >> 16 | append1[0] << 16;
+ w2[2] = append1[0] >> 16 | append1[1] << 16;
+ w2[3] = append1[1] >> 16 | append1[2] << 16;
+ w3[0] = append1[2] >> 16 | append1[3] << 16;
+ w3[1] = append1[3] >> 16;
break;
case 23:
- dst1[1] = src_l1[1] | src_r0[0] << 24;
- dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24;
+ w1[1] = w1[1] | append0[0] << 24;
+ w1[2] = append0[0] >> 8 | append0[1] << 24;
+ w1[3] = append0[1] >> 8 | append0[2] << 24;
+ w2[0] = append0[2] >> 8 | append0[3] << 24;
+ w2[1] = append0[3] >> 8 | append1[0] << 24;
+ w2[2] = append1[0] >> 8 | append1[1] << 24;
+ w2[3] = append1[1] >> 8 | append1[2] << 24;
+ w3[0] = append1[2] >> 8 | append1[3] << 24;
+ w3[1] = append1[3] >> 8;
break;
case 24:
- dst1[2] = src_r0[0];
- dst1[3] = src_r0[1];
+ w1[2] = append0[0];
+ w1[3] = append0[1];
+ w2[0] = append0[2];
+ w2[1] = append0[3];
+ w2[2] = append1[0];
+ w2[3] = append1[1];
+ w3[0] = append1[2];
+ w3[1] = append1[3];
break;
case 25:
- dst1[2] = src_l1[2] | src_r0[0] << 8;
- dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8;
+ w1[2] = w1[2] | append0[0] << 8;
+ w1[3] = append0[0] >> 24 | append0[1] << 8;
+ w2[0] = append0[1] >> 24 | append0[2] << 8;
+ w2[1] = append0[2] >> 24 | append0[3] << 8;
+ w2[2] = append0[3] >> 24 | append1[0] << 8;
+ w2[3] = append1[0] >> 24 | append1[1] << 8;
+ w3[0] = append1[1] >> 24 | append1[2] << 8;
+ w3[1] = append1[2] >> 24 | append1[3] << 8;
break;
case 26:
- dst1[2] = src_l1[2] | src_r0[0] << 16;
- dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16;
+ w1[2] = w1[2] | append0[0] << 16;
+ w1[3] = append0[0] >> 16 | append0[1] << 16;
+ w2[0] = append0[1] >> 16 | append0[2] << 16;
+ w2[1] = append0[2] >> 16 | append0[3] << 16;
+ w2[2] = append0[3] >> 16 | append1[0] << 16;
+ w2[3] = append1[0] >> 16 | append1[1] << 16;
+ w3[0] = append1[1] >> 16 | append1[2] << 16;
+ w3[1] = append1[2] >> 16 | append1[3] << 16;
break;
case 27:
- dst1[2] = src_l1[2] | src_r0[0] << 24;
- dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24;
+ w1[2] = w1[2] | append0[0] << 24;
+ w1[3] = append0[0] >> 8 | append0[1] << 24;
+ w2[0] = append0[1] >> 8 | append0[2] << 24;
+ w2[1] = append0[2] >> 8 | append0[3] << 24;
+ w2[2] = append0[3] >> 8 | append1[0] << 24;
+ w2[3] = append1[0] >> 8 | append1[1] << 24;
+ w3[0] = append1[1] >> 8 | append1[2] << 24;
+ w3[1] = append1[2] >> 8 | append1[3] << 24;
break;
case 28:
- dst1[3] = src_r0[0];
+ w1[3] = append0[0];
+ w2[0] = append0[1];
+ w2[1] = append0[2];
+ w2[2] = append0[3];
+ w2[3] = append1[0];
+ w3[0] = append1[1];
+ w3[1] = append1[2];
break;
case 29:
- dst1[3] = src_l1[3] | src_r0[0] << 8;
+ w1[3] = w1[3] | append0[0] << 8;
+ w2[0] = append0[0] >> 24 | append0[1] << 8;
+ w2[1] = append0[1] >> 24 | append0[2] << 8;
+ w2[2] = append0[2] >> 24 | append0[3] << 8;
+ w2[3] = append0[3] >> 24 | append1[0] << 8;
+ w3[0] = append1[0] >> 24 | append1[1] << 8;
+ w3[1] = append1[1] >> 24 | append1[2] << 8;
break;
case 30:
- dst1[3] = src_l1[3] | src_r0[0] << 16;
+ w1[3] = w1[3] | append0[0] << 16;
+ w2[0] = append0[0] >> 16 | append0[1] << 16;
+ w2[1] = append0[1] >> 16 | append0[2] << 16;
+ w2[2] = append0[2] >> 16 | append0[3] << 16;
+ w2[3] = append0[3] >> 16 | append1[0] << 16;
+ w3[0] = append1[0] >> 16 | append1[1] << 16;
+ w3[1] = append1[1] >> 16 | append1[2] << 16;
break;
case 31:
- dst1[3] = src_l1[3] | src_r0[0] << 24;
+ w1[3] = w1[3] | append0[0] << 24;
+ w2[0] = append0[0] >> 8 | append0[1] << 24;
+ w2[1] = append0[1] >> 8 | append0[2] << 24;
+ w2[2] = append0[2] >> 8 | append0[3] << 24;
+ w2[3] = append0[3] >> 8 | append1[0] << 24;
+ w3[0] = append1[0] >> 8 | append1[1] << 24;
+ w3[1] = append1[1] >> 8 | append1[2] << 24;
+ break;
+
+ case 32:
+ w2[0] = append0[0];
+ w2[1] = append0[1];
+ w2[2] = append0[2];
+ w2[3] = append0[3];
+ w3[0] = append1[0];
+ w3[1] = append1[1];
break;
}
}
-// before: device_memcat12L
-static void memcat_c47_d3x4_sl3x4_sr1x4 (const u32 offset, u32 dst0[4], u32 dst1[4], u32 dst2[4], u32 src_l0[4], u32 src_l1[4], u32 src_l2[4], u32 src_r0[4])
+// before: memcat32_9
+static void memcat_c32_w4x4_a3x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 append0[4], const u32 append1[4], const u32 append2[4], const u32 offset)
{
switch (offset)
{
- case 1:
- dst0[0] = src_l0[0] | src_r0[0] << 8;
- dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8;
- dst1[0] = src_r0[3] >> 24;
+ case 0:
+ w0[0] = append0[0];
+ w0[1] = append0[1];
+ w0[2] = append0[2];
+ w0[3] = append0[3];
+ w1[0] = append1[0];
+ w1[1] = append1[1];
+ w1[2] = append1[2];
+ w1[3] = append1[3];
+ w2[0] = append2[0];
break;
- case 2:
- dst0[0] = src_l0[0] | src_r0[0] << 16;
- dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16;
- dst1[0] = src_r0[3] >> 16;
+ case 1:
+ w0[0] = w0[0] | append0[0] << 8;
+ w0[1] = append0[0] >> 24 | append0[1] << 8;
+ w0[2] = append0[1] >> 24 | append0[2] << 8;
+ w0[3] = append0[2] >> 24 | append0[3] << 8;
+ w1[0] = append0[3] >> 24 | append1[0] << 8;
+ w1[1] = append1[0] >> 24 | append1[1] << 8;
+ w1[2] = append1[1] >> 24 | append1[2] << 8;
+ w1[3] = append1[2] >> 24 | append1[3] << 8;
+ w2[0] = append1[3] >> 24 | append2[0] << 8;
+ w2[1] = append2[0] >> 24;
break;
- case 3:
- dst0[0] = src_l0[0] | src_r0[0] << 24;
- dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24;
- dst1[0] = src_r0[3] >> 8;
+ case 2:
+ w0[0] = w0[0] | append0[0] << 16;
+ w0[1] = append0[0] >> 16 | append0[1] << 16;
+ w0[2] = append0[1] >> 16 | append0[2] << 16;
+ w0[3] = append0[2] >> 16 | append0[3] << 16;
+ w1[0] = append0[3] >> 16 | append1[0] << 16;
+ w1[1] = append1[0] >> 16 | append1[1] << 16;
+ w1[2] = append1[1] >> 16 | append1[2] << 16;
+ w1[3] = append1[2] >> 16 | append1[3] << 16;
+ w2[0] = append1[3] >> 16 | append2[0] << 16;
+ w2[1] = append2[0] >> 16;
+ break;
+
+ case 3:
+ w0[0] = w0[0] | append0[0] << 24;
+ w0[1] = append0[0] >> 8 | append0[1] << 24;
+ w0[2] = append0[1] >> 8 | append0[2] << 24;
+ w0[3] = append0[2] >> 8 | append0[3] << 24;
+ w1[0] = append0[3] >> 8 | append1[0] << 24;
+ w1[1] = append1[0] >> 8 | append1[1] << 24;
+ w1[2] = append1[1] >> 8 | append1[2] << 24;
+ w1[3] = append1[2] >> 8 | append1[3] << 24;
+ w2[0] = append1[3] >> 8 | append2[0] << 24;
+ w2[1] = append2[0] >> 8;
break;
case 4:
- dst0[1] = src_r0[0];
- dst0[2] = src_r0[1];
- dst0[3] = src_r0[2];
- dst1[0] = src_r0[3];
+ w0[1] = append0[0];
+ w0[2] = append0[1];
+ w0[3] = append0[2];
+ w1[0] = append0[3];
+ w1[1] = append1[0];
+ w1[2] = append1[1];
+ w1[3] = append1[2];
+ w2[0] = append1[3];
+ w2[1] = append2[0];
break;
case 5:
- dst0[1] = src_l0[1] | src_r0[0] << 8;
- dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8;
- dst1[1] = src_r0[3] >> 24;
+ w0[1] = w0[1] | append0[0] << 8;
+ w0[2] = append0[0] >> 24 | append0[1] << 8;
+ w0[3] = append0[1] >> 24 | append0[2] << 8;
+ w1[0] = append0[2] >> 24 | append0[3] << 8;
+ w1[1] = append0[3] >> 24 | append1[0] << 8;
+ w1[2] = append1[0] >> 24 | append1[1] << 8;
+ w1[3] = append1[1] >> 24 | append1[2] << 8;
+ w2[0] = append1[2] >> 24 | append1[3] << 8;
+ w2[1] = append1[3] >> 24 | append2[0] << 8;
+ w2[2] = append2[0] >> 24;
break;
case 6:
- dst0[1] = src_l0[1] | src_r0[0] << 16;
- dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16;
- dst1[1] = src_r0[3] >> 16;
+ w0[1] = w0[1] | append0[0] << 16;
+ w0[2] = append0[0] >> 16 | append0[1] << 16;
+ w0[3] = append0[1] >> 16 | append0[2] << 16;
+ w1[0] = append0[2] >> 16 | append0[3] << 16;
+ w1[1] = append0[3] >> 16 | append1[0] << 16;
+ w1[2] = append1[0] >> 16 | append1[1] << 16;
+ w1[3] = append1[1] >> 16 | append1[2] << 16;
+ w2[0] = append1[2] >> 16 | append1[3] << 16;
+ w2[1] = append1[3] >> 16 | append2[0] << 16;
+ w2[2] = append2[0] >> 16;
break;
case 7:
- dst0[1] = src_l0[1] | src_r0[0] << 24;
- dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24;
- dst1[1] = src_r0[3] >> 8;
+ w0[1] = w0[1] | append0[0] << 24;
+ w0[2] = append0[0] >> 8 | append0[1] << 24;
+ w0[3] = append0[1] >> 8 | append0[2] << 24;
+ w1[0] = append0[2] >> 8 | append0[3] << 24;
+ w1[1] = append0[3] >> 8 | append1[0] << 24;
+ w1[2] = append1[0] >> 8 | append1[1] << 24;
+ w1[3] = append1[1] >> 8 | append1[2] << 24;
+ w2[0] = append1[2] >> 8 | append1[3] << 24;
+ w2[1] = append1[3] >> 8 | append2[0] << 24;
+ w2[2] = append2[0] >> 8;
break;
case 8:
- dst0[2] = src_r0[0];
- dst0[3] = src_r0[1];
- dst1[0] = src_r0[2];
- dst1[1] = src_r0[3];
+ w0[2] = append0[0];
+ w0[3] = append0[1];
+ w1[0] = append0[2];
+ w1[1] = append0[3];
+ w1[2] = append1[0];
+ w1[3] = append1[1];
+ w2[0] = append1[2];
+ w2[1] = append1[3];
+ w2[2] = append2[0];
break;
case 9:
- dst0[2] = src_l0[2] | src_r0[0] << 8;
- dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8;
- dst1[2] = src_r0[3] >> 24;
+ w0[2] = w0[2] | append0[0] << 8;
+ w0[3] = append0[0] >> 24 | append0[1] << 8;
+ w1[0] = append0[1] >> 24 | append0[2] << 8;
+ w1[1] = append0[2] >> 24 | append0[3] << 8;
+ w1[2] = append0[3] >> 24 | append1[0] << 8;
+ w1[3] = append1[0] >> 24 | append1[1] << 8;
+ w2[0] = append1[1] >> 24 | append1[2] << 8;
+ w2[1] = append1[2] >> 24 | append1[3] << 8;
+ w2[2] = append1[3] >> 24 | append2[0] << 8;
+ w2[3] = append2[0] >> 24;
break;
case 10:
- dst0[2] = src_l0[2] | src_r0[0] << 16;
- dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16;
- dst1[2] = src_r0[3] >> 16;
+ w0[2] = w0[2] | append0[0] << 16;
+ w0[3] = append0[0] >> 16 | append0[1] << 16;
+ w1[0] = append0[1] >> 16 | append0[2] << 16;
+ w1[1] = append0[2] >> 16 | append0[3] << 16;
+ w1[2] = append0[3] >> 16 | append1[0] << 16;
+ w1[3] = append1[0] >> 16 | append1[1] << 16;
+ w2[0] = append1[1] >> 16 | append1[2] << 16;
+ w2[1] = append1[2] >> 16 | append1[3] << 16;
+ w2[2] = append1[3] >> 16 | append2[0] << 16;
+ w2[3] = append2[0] >> 16;
break;
case 11:
- dst0[2] = src_l0[2] | src_r0[0] << 24;
- dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24;
- dst1[2] = src_r0[3] >> 8;
+ w0[2] = w0[2] | append0[0] << 24;
+ w0[3] = append0[0] >> 8 | append0[1] << 24;
+ w1[0] = append0[1] >> 8 | append0[2] << 24;
+ w1[1] = append0[2] >> 8 | append0[3] << 24;
+ w1[2] = append0[3] >> 8 | append1[0] << 24;
+ w1[3] = append1[0] >> 8 | append1[1] << 24;
+ w2[0] = append1[1] >> 8 | append1[2] << 24;
+ w2[1] = append1[2] >> 8 | append1[3] << 24;
+ w2[2] = append1[3] >> 8 | append2[0] << 24;
+ w2[3] = append2[0] >> 8;
break;
case 12:
- dst0[3] = src_r0[0];
- dst1[0] = src_r0[1];
- dst1[1] = src_r0[2];
- dst1[2] = src_r0[3];
+ w0[3] = append0[0];
+ w1[0] = append0[1];
+ w1[1] = append0[2];
+ w1[2] = append0[3];
+ w1[3] = append1[0];
+ w2[0] = append1[1];
+ w2[1] = append1[2];
+ w2[2] = append1[3];
+ w2[3] = append2[0];
break;
case 13:
- dst0[3] = src_l0[3] | src_r0[0] << 8;
- dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8;
- dst1[3] = src_r0[3] >> 24;
+ w0[3] = w0[3] | append0[0] << 8;
+ w1[0] = append0[0] >> 24 | append0[1] << 8;
+ w1[1] = append0[1] >> 24 | append0[2] << 8;
+ w1[2] = append0[2] >> 24 | append0[3] << 8;
+ w1[3] = append0[3] >> 24 | append1[0] << 8;
+ w2[0] = append1[0] >> 24 | append1[1] << 8;
+ w2[1] = append1[1] >> 24 | append1[2] << 8;
+ w2[2] = append1[2] >> 24 | append1[3] << 8;
+ w2[3] = append1[3] >> 24 | append2[0] << 8;
+ w3[0] = append2[0] >> 24;
break;
case 14:
- dst0[3] = src_l0[3] | src_r0[0] << 16;
- dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16;
- dst1[3] = src_r0[3] >> 16;
+ w0[3] = w0[3] | append0[0] << 16;
+ w1[0] = append0[0] >> 16 | append0[1] << 16;
+ w1[1] = append0[1] >> 16 | append0[2] << 16;
+ w1[2] = append0[2] >> 16 | append0[3] << 16;
+ w1[3] = append0[3] >> 16 | append1[0] << 16;
+ w2[0] = append1[0] >> 16 | append1[1] << 16;
+ w2[1] = append1[1] >> 16 | append1[2] << 16;
+ w2[2] = append1[2] >> 16 | append1[3] << 16;
+ w2[3] = append1[3] >> 16 | append2[0] << 16;
+ w3[0] = append2[0] >> 16;
break;
case 15:
- dst0[3] = src_l0[3] | src_r0[0] << 24;
- dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24;
- dst1[3] = src_r0[3] >> 8;
+ w0[3] = w0[3] | append0[0] << 24;
+ w1[0] = append0[0] >> 8 | append0[1] << 24;
+ w1[1] = append0[1] >> 8 | append0[2] << 24;
+ w1[2] = append0[2] >> 8 | append0[3] << 24;
+ w1[3] = append0[3] >> 8 | append1[0] << 24;
+ w2[0] = append1[0] >> 8 | append1[1] << 24;
+ w2[1] = append1[1] >> 8 | append1[2] << 24;
+ w2[2] = append1[2] >> 8 | append1[3] << 24;
+ w2[3] = append1[3] >> 8 | append2[0] << 24;
+ w3[0] = append2[0] >> 8;
break;
case 16:
- dst1[0] = src_r0[0];
- dst1[1] = src_r0[1];
- dst1[2] = src_r0[2];
- dst1[3] = src_r0[3];
+ w1[0] = append0[0];
+ w1[1] = append0[1];
+ w1[2] = append0[2];
+ w1[3] = append0[3];
+ w2[0] = append1[0];
+ w2[1] = append1[1];
+ w2[2] = append1[2];
+ w2[3] = append1[3];
+ w3[0] = append2[0];
break;
case 17:
- dst1[0] = src_l1[0] | src_r0[0] << 8;
- dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8;
- dst2[0] = src_r0[3] >> 24;
+ w1[0] = w1[0] | append0[0] << 8;
+ w1[1] = append0[0] >> 24 | append0[1] << 8;
+ w1[2] = append0[1] >> 24 | append0[2] << 8;
+ w1[3] = append0[2] >> 24 | append0[3] << 8;
+ w2[0] = append0[3] >> 24 | append1[0] << 8;
+ w2[1] = append1[0] >> 24 | append1[1] << 8;
+ w2[2] = append1[1] >> 24 | append1[2] << 8;
+ w2[3] = append1[2] >> 24 | append1[3] << 8;
+ w3[0] = append1[3] >> 24 | append2[0] << 8;
+ w3[1] = append2[0] >> 24;
break;
case 18:
- dst1[0] = src_l1[0] | src_r0[0] << 16;
- dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16;
- dst2[0] = src_r0[3] >> 16;
+ w1[0] = w1[0] | append0[0] << 16;
+ w1[1] = append0[0] >> 16 | append0[1] << 16;
+ w1[2] = append0[1] >> 16 | append0[2] << 16;
+ w1[3] = append0[2] >> 16 | append0[3] << 16;
+ w2[0] = append0[3] >> 16 | append1[0] << 16;
+ w2[1] = append1[0] >> 16 | append1[1] << 16;
+ w2[2] = append1[1] >> 16 | append1[2] << 16;
+ w2[3] = append1[2] >> 16 | append1[3] << 16;
+ w3[0] = append1[3] >> 16 | append2[0] << 16;
+ w3[1] = append2[0] >> 16;
break;
case 19:
- dst1[0] = src_l1[0] | src_r0[0] << 24;
- dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24;
- dst2[0] = src_r0[3] >> 8;
- break;
-
- case 20:
- dst1[1] = src_r0[0];
- dst1[2] = src_r0[1];
- dst1[3] = src_r0[2];
- dst2[0] = src_r0[3];
+ w1[0] = w1[0] | append0[0] << 24;
+ w1[1] = append0[0] >> 8 | append0[1] << 24;
+ w1[2] = append0[1] >> 8 | append0[2] << 24;
+ w1[3] = append0[2] >> 8 | append0[3] << 24;
+ w2[0] = append0[3] >> 8 | append1[0] << 24;
+ w2[1] = append1[0] >> 8 | append1[1] << 24;
+ w2[2] = append1[1] >> 8 | append1[2] << 24;
+ w2[3] = append1[2] >> 8 | append1[3] << 24;
+ w3[0] = append1[3] >> 8 | append2[0] << 24;
+ w3[1] = append2[0] >> 8;
+ break;
+
+ case 20:
+ w1[1] = append0[0];
+ w1[2] = append0[1];
+ w1[3] = append0[2];
+ w2[0] = append0[3];
+ w2[1] = append1[0];
+ w2[2] = append1[1];
+ w2[3] = append1[2];
+ w3[0] = append1[3];
+ w3[1] = append2[0];
break;
case 21:
- dst1[1] = src_l1[1] | src_r0[0] << 8;
- dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst2[0] = src_r0[2] >> 24 | src_r0[3] << 8;
- dst2[1] = src_r0[3] >> 24;
+ w1[1] = w1[1] | append0[0] << 8;
+ w1[2] = append0[0] >> 24 | append0[1] << 8;
+ w1[3] = append0[1] >> 24 | append0[2] << 8;
+ w2[0] = append0[2] >> 24 | append0[3] << 8;
+ w2[1] = append0[3] >> 24 | append1[0] << 8;
+ w2[2] = append1[0] >> 24 | append1[1] << 8;
+ w2[3] = append1[1] >> 24 | append1[2] << 8;
+ w3[0] = append1[2] >> 24 | append1[3] << 8;
+ w3[1] = append1[3] >> 24 | append2[0] << 8;
break;
case 22:
- dst1[1] = src_l1[1] | src_r0[0] << 16;
- dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst2[0] = src_r0[2] >> 16 | src_r0[3] << 16;
- dst2[1] = src_r0[3] >> 16;
+ w1[1] = w1[1] | append0[0] << 16;
+ w1[2] = append0[0] >> 16 | append0[1] << 16;
+ w1[3] = append0[1] >> 16 | append0[2] << 16;
+ w2[0] = append0[2] >> 16 | append0[3] << 16;
+ w2[1] = append0[3] >> 16 | append1[0] << 16;
+ w2[2] = append1[0] >> 16 | append1[1] << 16;
+ w2[3] = append1[1] >> 16 | append1[2] << 16;
+ w3[0] = append1[2] >> 16 | append1[3] << 16;
+ w3[1] = append1[3] >> 16 | append2[0] << 16;
break;
case 23:
- dst1[1] = src_l1[1] | src_r0[0] << 24;
- dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst2[0] = src_r0[2] >> 8 | src_r0[3] << 24;
- dst2[1] = src_r0[3] >> 8;
+ w1[1] = w1[1] | append0[0] << 24;
+ w1[2] = append0[0] >> 8 | append0[1] << 24;
+ w1[3] = append0[1] >> 8 | append0[2] << 24;
+ w2[0] = append0[2] >> 8 | append0[3] << 24;
+ w2[1] = append0[3] >> 8 | append1[0] << 24;
+ w2[2] = append1[0] >> 8 | append1[1] << 24;
+ w2[3] = append1[1] >> 8 | append1[2] << 24;
+ w3[0] = append1[2] >> 8 | append1[3] << 24;
+ w3[1] = append1[3] >> 8 | append2[0] << 24;
break;
case 24:
- dst1[2] = src_r0[0];
- dst1[3] = src_r0[1];
- dst2[0] = src_r0[2];
- dst2[1] = src_r0[3];
+ w1[2] = append0[0];
+ w1[3] = append0[1];
+ w2[0] = append0[2];
+ w2[1] = append0[3];
+ w2[2] = append1[0];
+ w2[3] = append1[1];
+ w3[0] = append1[2];
+ w3[1] = append1[3];
break;
case 25:
- dst1[2] = src_l1[2] | src_r0[0] << 8;
- dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst2[0] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst2[1] = src_r0[2] >> 24 | src_r0[3] << 8;
- dst2[2] = src_r0[3] >> 24;
+ w1[2] = w1[2] | append0[0] << 8;
+ w1[3] = append0[0] >> 24 | append0[1] << 8;
+ w2[0] = append0[1] >> 24 | append0[2] << 8;
+ w2[1] = append0[2] >> 24 | append0[3] << 8;
+ w2[2] = append0[3] >> 24 | append1[0] << 8;
+ w2[3] = append1[0] >> 24 | append1[1] << 8;
+ w3[0] = append1[1] >> 24 | append1[2] << 8;
+ w3[1] = append1[2] >> 24 | append1[3] << 8;
break;
case 26:
- dst1[2] = src_l1[2] | src_r0[0] << 16;
- dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst2[0] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst2[1] = src_r0[2] >> 16 | src_r0[3] << 16;
- dst2[2] = src_r0[3] >> 16;
+ w1[2] = w1[2] | append0[0] << 16;
+ w1[3] = append0[0] >> 16 | append0[1] << 16;
+ w2[0] = append0[1] >> 16 | append0[2] << 16;
+ w2[1] = append0[2] >> 16 | append0[3] << 16;
+ w2[2] = append0[3] >> 16 | append1[0] << 16;
+ w2[3] = append1[0] >> 16 | append1[1] << 16;
+ w3[0] = append1[1] >> 16 | append1[2] << 16;
+ w3[1] = append1[2] >> 16 | append1[3] << 16;
break;
case 27:
- dst1[2] = src_l1[2] | src_r0[0] << 24;
- dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst2[0] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst2[1] = src_r0[2] >> 8 | src_r0[3] << 24;
- dst2[2] = src_r0[3] >> 8;
+ w1[2] = w1[2] | append0[0] << 24;
+ w1[3] = append0[0] >> 8 | append0[1] << 24;
+ w2[0] = append0[1] >> 8 | append0[2] << 24;
+ w2[1] = append0[2] >> 8 | append0[3] << 24;
+ w2[2] = append0[3] >> 8 | append1[0] << 24;
+ w2[3] = append1[0] >> 8 | append1[1] << 24;
+ w3[0] = append1[1] >> 8 | append1[2] << 24;
+ w3[1] = append1[2] >> 8 | append1[3] << 24;
break;
case 28:
- dst1[3] = src_r0[0];
- dst2[0] = src_r0[1];
- dst2[1] = src_r0[2];
- dst2[2] = src_r0[3];
+ w1[3] = append0[0];
+ w2[0] = append0[1];
+ w2[1] = append0[2];
+ w2[2] = append0[3];
+ w2[3] = append1[0];
+ w3[0] = append1[1];
+ w3[1] = append1[2];
break;
case 29:
- dst1[3] = src_l1[3] | src_r0[0] << 8;
- dst2[0] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst2[1] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst2[2] = src_r0[2] >> 24 | src_r0[3] << 8;
- dst2[3] = src_r0[3] >> 24;
+ w1[3] = w1[3] | append0[0] << 8;
+ w2[0] = append0[0] >> 24 | append0[1] << 8;
+ w2[1] = append0[1] >> 24 | append0[2] << 8;
+ w2[2] = append0[2] >> 24 | append0[3] << 8;
+ w2[3] = append0[3] >> 24 | append1[0] << 8;
+ w3[0] = append1[0] >> 24 | append1[1] << 8;
+ w3[1] = append1[1] >> 24 | append1[2] << 8;
break;
case 30:
- dst1[3] = src_l1[3] | src_r0[0] << 16;
- dst2[0] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst2[1] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst2[2] = src_r0[2] >> 16 | src_r0[3] << 16;
- dst2[3] = src_r0[3] >> 16;
+ w1[3] = w1[3] | append0[0] << 16;
+ w2[0] = append0[0] >> 16 | append0[1] << 16;
+ w2[1] = append0[1] >> 16 | append0[2] << 16;
+ w2[2] = append0[2] >> 16 | append0[3] << 16;
+ w2[3] = append0[3] >> 16 | append1[0] << 16;
+ w3[0] = append1[0] >> 16 | append1[1] << 16;
+ w3[1] = append1[1] >> 16 | append1[2] << 16;
break;
case 31:
- dst1[3] = src_l1[3] | src_r0[0] << 24;
- dst2[0] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst2[1] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst2[2] = src_r0[2] >> 8 | src_r0[3] << 24;
- dst2[3] = src_r0[3] >> 8;
+ w1[3] = w1[3] | append0[0] << 24;
+ w2[0] = append0[0] >> 8 | append0[1] << 24;
+ w2[1] = append0[1] >> 8 | append0[2] << 24;
+ w2[2] = append0[2] >> 8 | append0[3] << 24;
+ w2[3] = append0[3] >> 8 | append1[0] << 24;
+ w3[0] = append1[0] >> 8 | append1[1] << 24;
+ w3[1] = append1[1] >> 8 | append1[2] << 24;
break;
case 32:
- dst2[0] = src_r0[0];
- dst2[1] = src_r0[1];
- dst2[2] = src_r0[2];
- dst2[3] = src_r0[3];
- break;
-
- case 33:
- dst2[0] = src_l2[0] | src_r0[0] << 8;
- dst2[1] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst2[2] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst2[3] = src_r0[2] >> 24 | src_r0[3] << 8;
- break;
-
- case 34:
- dst2[0] = src_l2[0] | src_r0[0] << 16;
- dst2[1] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst2[2] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst2[3] = src_r0[2] >> 16 | src_r0[3] << 16;
+ w2[0] = append0[0];
+ w2[1] = append0[1];
+ w2[2] = append0[2];
+ w2[3] = append0[3];
+ w3[0] = append1[0];
+ w3[1] = append1[1];
break;
+ }
+}
- case 35:
- dst2[0] = src_l2[0] | src_r0[0] << 24;
- dst2[1] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst2[2] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst2[3] = src_r0[2] >> 8 | src_r0[3] << 24;
- break;
+static void switch_buffer_by_offset (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
+{
+ #ifdef IS_AMD
+ const int offset_mod_4 = offset & 3;
- case 36:
- dst2[1] = src_r0[0];
- dst2[2] = src_r0[1];
- dst2[3] = src_r0[2];
- break;
+ const int offset_minus_4 = 4 - offset;
- case 37:
- dst2[1] = src_l2[1] | src_r0[0] << 8;
- dst2[2] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst2[3] = src_r0[1] >> 24 | src_r0[2] << 8;
- break;
+ switch (offset / 4)
+ {
+ case 0:
+ w3[2] = amd_bytealign ( 0, w3[1], offset_minus_4);
+ w3[1] = amd_bytealign (w3[1], w3[0], offset_minus_4);
+ w3[0] = amd_bytealign (w3[0], w2[3], offset_minus_4);
+ w2[3] = amd_bytealign (w2[3], w2[2], offset_minus_4);
+ w2[2] = amd_bytealign (w2[2], w2[1], offset_minus_4);
+ w2[1] = amd_bytealign (w2[1], w2[0], offset_minus_4);
+ w2[0] = amd_bytealign (w2[0], w1[3], offset_minus_4);
+ w1[3] = amd_bytealign (w1[3], w1[2], offset_minus_4);
+ w1[2] = amd_bytealign (w1[2], w1[1], offset_minus_4);
+ w1[1] = amd_bytealign (w1[1], w1[0], offset_minus_4);
+ w1[0] = amd_bytealign (w1[0], w0[3], offset_minus_4);
+ w0[3] = amd_bytealign (w0[3], w0[2], offset_minus_4);
+ w0[2] = amd_bytealign (w0[2], w0[1], offset_minus_4);
+ w0[1] = amd_bytealign (w0[1], w0[0], offset_minus_4);
+ w0[0] = amd_bytealign (w0[0], 0, offset_minus_4);
- case 38:
- dst2[1] = src_l2[1] | src_r0[0] << 16;
- dst2[2] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst2[3] = src_r0[1] >> 16 | src_r0[2] << 16;
- break;
+ if (offset_mod_4 == 0)
+ {
+ w0[0] = w0[1];
+ w0[1] = w0[2];
+ w0[2] = w0[3];
+ w0[3] = w1[0];
+ w1[0] = w1[1];
+ w1[1] = w1[2];
+ w1[2] = w1[3];
+ w1[3] = w2[0];
+ w2[0] = w2[1];
+ w2[1] = w2[2];
+ w2[2] = w2[3];
+ w2[3] = w3[0];
+ w3[0] = w3[1];
+ w3[1] = w3[2];
+ w3[2] = 0;
+ }
- case 39:
- dst2[1] = src_l2[1] | src_r0[0] << 24;
- dst2[2] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst2[3] = src_r0[1] >> 8 | src_r0[2] << 24;
break;
- case 40:
- dst2[2] = src_r0[0];
- dst2[3] = src_r0[1];
+ case 1:
+ w3[2] = amd_bytealign ( 0, w3[0], offset_minus_4);
+ w3[1] = amd_bytealign (w3[0], w2[3], offset_minus_4);
+ w3[0] = amd_bytealign (w2[3], w2[2], offset_minus_4);
+ w2[3] = amd_bytealign (w2[2], w2[1], offset_minus_4);
+ w2[2] = amd_bytealign (w2[1], w2[0], offset_minus_4);
+ w2[1] = amd_bytealign (w2[0], w1[3], offset_minus_4);
+ w2[0] = amd_bytealign (w1[3], w1[2], offset_minus_4);
+ w1[3] = amd_bytealign (w1[2], w1[1], offset_minus_4);
+ w1[2] = amd_bytealign (w1[1], w1[0], offset_minus_4);
+ w1[1] = amd_bytealign (w1[0], w0[3], offset_minus_4);
+ w1[0] = amd_bytealign (w0[3], w0[2], offset_minus_4);
+ w0[3] = amd_bytealign (w0[2], w0[1], offset_minus_4);
+ w0[2] = amd_bytealign (w0[1], w0[0], offset_minus_4);
+ w0[1] = amd_bytealign (w0[0], 0, offset_minus_4);
+ w0[0] = 0;
+
+ if (offset_mod_4 == 0)
+ {
+ w0[1] = w0[2];
+ w0[2] = w0[3];
+ w0[3] = w1[0];
+ w1[0] = w1[1];
+ w1[1] = w1[2];
+ w1[2] = w1[3];
+ w1[3] = w2[0];
+ w2[0] = w2[1];
+ w2[1] = w2[2];
+ w2[2] = w2[3];
+ w2[3] = w3[0];
+ w3[0] = w3[1];
+ w3[1] = w3[2];
+ w3[2] = 0;
+ }
+
+ break;
+
+ case 2:
+ w3[2] = amd_bytealign ( 0, w2[3], offset_minus_4);
+ w3[1] = amd_bytealign (w2[3], w2[2], offset_minus_4);
+ w3[0] = amd_bytealign (w2[2], w2[1], offset_minus_4);
+ w2[3] = amd_bytealign (w2[1], w2[0], offset_minus_4);
+ w2[2] = amd_bytealign (w2[0], w1[3], offset_minus_4);
+ w2[1] = amd_bytealign (w1[3], w1[2], offset_minus_4);
+ w2[0] = amd_bytealign (w1[2], w1[1], offset_minus_4);
+ w1[3] = amd_bytealign (w1[1], w1[0], offset_minus_4);
+ w1[2] = amd_bytealign (w1[0], w0[3], offset_minus_4);
+ w1[1] = amd_bytealign (w0[3], w0[2], offset_minus_4);
+ w1[0] = amd_bytealign (w0[2], w0[1], offset_minus_4);
+ w0[3] = amd_bytealign (w0[1], w0[0], offset_minus_4);
+ w0[2] = amd_bytealign (w0[0], 0, offset_minus_4);
+ w0[1] = 0;
+ w0[0] = 0;
+
+ if (offset_mod_4 == 0)
+ {
+ w0[2] = w0[3];
+ w0[3] = w1[0];
+ w1[0] = w1[1];
+ w1[1] = w1[2];
+ w1[2] = w1[3];
+ w1[3] = w2[0];
+ w2[0] = w2[1];
+ w2[1] = w2[2];
+ w2[2] = w2[3];
+ w2[3] = w3[0];
+ w3[0] = w3[1];
+ w3[1] = w3[2];
+ w3[2] = 0;
+ }
+
+ break;
+
+ case 3:
+ w3[2] = amd_bytealign ( 0, w2[2], offset_minus_4);
+ w3[1] = amd_bytealign (w2[2], w2[1], offset_minus_4);
+ w3[0] = amd_bytealign (w2[1], w2[0], offset_minus_4);
+ w2[3] = amd_bytealign (w2[0], w1[3], offset_minus_4);
+ w2[2] = amd_bytealign (w1[3], w1[2], offset_minus_4);
+ w2[1] = amd_bytealign (w1[2], w1[1], offset_minus_4);
+ w2[0] = amd_bytealign (w1[1], w1[0], offset_minus_4);
+ w1[3] = amd_bytealign (w1[0], w0[3], offset_minus_4);
+ w1[2] = amd_bytealign (w0[3], w0[2], offset_minus_4);
+ w1[1] = amd_bytealign (w0[2], w0[1], offset_minus_4);
+ w1[0] = amd_bytealign (w0[1], w0[0], offset_minus_4);
+ w0[3] = amd_bytealign (w0[0], 0, offset_minus_4);
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
+
+ if (offset_mod_4 == 0)
+ {
+ w0[3] = w1[0];
+ w1[0] = w1[1];
+ w1[1] = w1[2];
+ w1[2] = w1[3];
+ w1[3] = w2[0];
+ w2[0] = w2[1];
+ w2[1] = w2[2];
+ w2[2] = w2[3];
+ w2[3] = w3[0];
+ w3[0] = w3[1];
+ w3[1] = w3[2];
+ w3[2] = 0;
+ }
+
+ break;
+
+ case 4:
+ w3[2] = amd_bytealign ( 0, w2[1], offset_minus_4);
+ w3[1] = amd_bytealign (w2[1], w2[0], offset_minus_4);
+ w3[0] = amd_bytealign (w2[0], w1[3], offset_minus_4);
+ w2[3] = amd_bytealign (w1[3], w1[2], offset_minus_4);
+ w2[2] = amd_bytealign (w1[2], w1[1], offset_minus_4);
+ w2[1] = amd_bytealign (w1[1], w1[0], offset_minus_4);
+ w2[0] = amd_bytealign (w1[0], w0[3], offset_minus_4);
+ w1[3] = amd_bytealign (w0[3], w0[2], offset_minus_4);
+ w1[2] = amd_bytealign (w0[2], w0[1], offset_minus_4);
+ w1[1] = amd_bytealign (w0[1], w0[0], offset_minus_4);
+ w1[0] = amd_bytealign (w0[0], 0, offset_minus_4);
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
+
+ if (offset_mod_4 == 0)
+ {
+ w1[0] = w1[1];
+ w1[1] = w1[2];
+ w1[2] = w1[3];
+ w1[3] = w2[0];
+ w2[0] = w2[1];
+ w2[1] = w2[2];
+ w2[2] = w2[3];
+ w2[3] = w3[0];
+ w3[0] = w3[1];
+ w3[1] = w3[2];
+ w3[2] = 0;
+ }
+
+ break;
+
+ case 5:
+ w3[2] = amd_bytealign ( 0, w2[0], offset_minus_4);
+ w3[1] = amd_bytealign (w2[0], w1[3], offset_minus_4);
+ w3[0] = amd_bytealign (w1[3], w1[2], offset_minus_4);
+ w2[3] = amd_bytealign (w1[2], w1[1], offset_minus_4);
+ w2[2] = amd_bytealign (w1[1], w1[0], offset_minus_4);
+ w2[1] = amd_bytealign (w1[0], w0[3], offset_minus_4);
+ w2[0] = amd_bytealign (w0[3], w0[2], offset_minus_4);
+ w1[3] = amd_bytealign (w0[2], w0[1], offset_minus_4);
+ w1[2] = amd_bytealign (w0[1], w0[0], offset_minus_4);
+ w1[1] = amd_bytealign (w0[0], 0, offset_minus_4);
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
+
+ if (offset_mod_4 == 0)
+ {
+ w1[1] = w1[2];
+ w1[2] = w1[3];
+ w1[3] = w2[0];
+ w2[0] = w2[1];
+ w2[1] = w2[2];
+ w2[2] = w2[3];
+ w2[3] = w3[0];
+ w3[0] = w3[1];
+ w3[1] = w3[2];
+ w3[2] = 0;
+ }
+
+ break;
+
+ case 6:
+ w3[2] = amd_bytealign ( 0, w1[3], offset_minus_4);
+ w3[1] = amd_bytealign (w1[3], w1[2], offset_minus_4);
+ w3[0] = amd_bytealign (w1[2], w1[1], offset_minus_4);
+ w2[3] = amd_bytealign (w1[1], w1[0], offset_minus_4);
+ w2[2] = amd_bytealign (w1[0], w0[3], offset_minus_4);
+ w2[1] = amd_bytealign (w0[3], w0[2], offset_minus_4);
+ w2[0] = amd_bytealign (w0[2], w0[1], offset_minus_4);
+ w1[3] = amd_bytealign (w0[1], w0[0], offset_minus_4);
+ w1[2] = amd_bytealign (w0[0], 0, offset_minus_4);
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
+
+ if (offset_mod_4 == 0)
+ {
+ w1[2] = w1[3];
+ w1[3] = w2[0];
+ w2[0] = w2[1];
+ w2[1] = w2[2];
+ w2[2] = w2[3];
+ w2[3] = w3[0];
+ w3[0] = w3[1];
+ w3[1] = w3[2];
+ w3[2] = 0;
+ }
+
+ break;
+
+ case 7:
+ w3[2] = amd_bytealign ( 0, w1[2], offset_minus_4);
+ w3[1] = amd_bytealign (w1[2], w1[1], offset_minus_4);
+ w3[0] = amd_bytealign (w1[1], w1[0], offset_minus_4);
+ w2[3] = amd_bytealign (w1[0], w0[3], offset_minus_4);
+ w2[2] = amd_bytealign (w0[3], w0[2], offset_minus_4);
+ w2[1] = amd_bytealign (w0[2], w0[1], offset_minus_4);
+ w2[0] = amd_bytealign (w0[1], w0[0], offset_minus_4);
+ w1[3] = amd_bytealign (w0[0], 0, offset_minus_4);
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
+
+ if (offset_mod_4 == 0)
+ {
+ w1[3] = w2[0];
+ w2[0] = w2[1];
+ w2[1] = w2[2];
+ w2[2] = w2[3];
+ w2[3] = w3[0];
+ w3[0] = w3[1];
+ w3[1] = w3[2];
+ w3[2] = 0;
+ }
+
+ break;
+
+ case 8:
+ w3[2] = amd_bytealign ( 0, w1[1], offset_minus_4);
+ w3[1] = amd_bytealign (w1[1], w1[0], offset_minus_4);
+ w3[0] = amd_bytealign (w1[0], w0[3], offset_minus_4);
+ w2[3] = amd_bytealign (w0[3], w0[2], offset_minus_4);
+ w2[2] = amd_bytealign (w0[2], w0[1], offset_minus_4);
+ w2[1] = amd_bytealign (w0[1], w0[0], offset_minus_4);
+ w2[0] = amd_bytealign (w0[0], 0, offset_minus_4);
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
+
+ if (offset_mod_4 == 0)
+ {
+ w2[0] = w2[1];
+ w2[1] = w2[2];
+ w2[2] = w2[3];
+ w2[3] = w3[0];
+ w3[0] = w3[1];
+ w3[1] = w3[2];
+ w3[2] = 0;
+ }
+
+ break;
+
+ case 9:
+ w3[2] = amd_bytealign ( 0, w1[0], offset_minus_4);
+ w3[1] = amd_bytealign (w1[0], w0[3], offset_minus_4);
+ w3[0] = amd_bytealign (w0[3], w0[2], offset_minus_4);
+ w2[3] = amd_bytealign (w0[2], w0[1], offset_minus_4);
+ w2[2] = amd_bytealign (w0[1], w0[0], offset_minus_4);
+ w2[1] = amd_bytealign (w0[0], 0, offset_minus_4);
+ w2[0] = 0;
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
+
+ if (offset_mod_4 == 0)
+ {
+ w2[1] = w2[2];
+ w2[2] = w2[3];
+ w2[3] = w3[0];
+ w3[0] = w3[1];
+ w3[1] = w3[2];
+ w3[2] = 0;
+ }
+
+ break;
+
+ case 10:
+ w3[2] = amd_bytealign ( 0, w0[3], offset_minus_4);
+ w3[1] = amd_bytealign (w0[3], w0[2], offset_minus_4);
+ w3[0] = amd_bytealign (w0[2], w0[1], offset_minus_4);
+ w2[3] = amd_bytealign (w0[1], w0[0], offset_minus_4);
+ w2[2] = amd_bytealign (w0[0], 0, offset_minus_4);
+ w2[1] = 0;
+ w2[0] = 0;
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
+
+ if (offset_mod_4 == 0)
+ {
+ w2[2] = w2[3];
+ w2[3] = w3[0];
+ w3[0] = w3[1];
+ w3[1] = w3[2];
+ w3[2] = 0;
+ }
+
+ break;
+
+ case 11:
+ w3[2] = amd_bytealign ( 0, w0[2], offset_minus_4);
+ w3[1] = amd_bytealign (w0[2], w0[1], offset_minus_4);
+ w3[0] = amd_bytealign (w0[1], w0[0], offset_minus_4);
+ w2[3] = amd_bytealign (w0[0], 0, offset_minus_4);
+ w2[2] = 0;
+ w2[1] = 0;
+ w2[0] = 0;
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
+
+ if (offset_mod_4 == 0)
+ {
+ w2[3] = w3[0];
+ w3[0] = w3[1];
+ w3[1] = w3[2];
+ w3[2] = 0;
+ }
+
+ break;
+
+ case 12:
+ w3[2] = amd_bytealign ( 0, w0[1], offset_minus_4);
+ w3[1] = amd_bytealign (w0[1], w0[0], offset_minus_4);
+ w3[0] = amd_bytealign (w0[0], 0, offset_minus_4);
+ w2[3] = 0;
+ w2[2] = 0;
+ w2[1] = 0;
+ w2[0] = 0;
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
+
+ if (offset_mod_4 == 0)
+ {
+ w3[0] = w3[1];
+ w3[1] = w3[2];
+ w3[2] = 0;
+ }
+
+ break;
+
+ case 13:
+ w3[2] = amd_bytealign ( 0, w0[0], offset_minus_4);
+ w3[1] = amd_bytealign (w0[0], 0, offset_minus_4);
+ w3[0] = 0;
+ w2[3] = 0;
+ w2[2] = 0;
+ w2[1] = 0;
+ w2[0] = 0;
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
+
+ if (offset_mod_4 == 0)
+ {
+ w3[1] = w3[2];
+ w3[2] = 0;
+ }
+
+ break;
+ }
+ #endif
+
+ #ifdef IS_NV
+ const int offset_minus_4 = 4 - (offset % 4);
+
+ const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
+
+ switch (offset / 4)
+ {
+ case 0:
+ w3[1] = __byte_perm (w3[0], w3[1], selector);
+ w3[0] = __byte_perm (w2[3], w3[0], selector);
+ w2[3] = __byte_perm (w2[2], w2[3], selector);
+ w2[2] = __byte_perm (w2[1], w2[2], selector);
+ w2[1] = __byte_perm (w2[0], w2[1], selector);
+ w2[0] = __byte_perm (w1[3], w2[0], selector);
+ w1[3] = __byte_perm (w1[2], w1[3], selector);
+ w1[2] = __byte_perm (w1[1], w1[2], selector);
+ w1[1] = __byte_perm (w1[0], w1[1], selector);
+ w1[0] = __byte_perm (w0[3], w1[0], selector);
+ w0[3] = __byte_perm (w0[2], w0[3], selector);
+ w0[2] = __byte_perm (w0[1], w0[2], selector);
+ w0[1] = __byte_perm (w0[0], w0[1], selector);
+ w0[0] = __byte_perm ( 0, w0[0], selector);
+
+ break;
+
+ case 1:
+ w3[1] = __byte_perm (w2[3], w3[0], selector);
+ w3[0] = __byte_perm (w2[2], w2[3], selector);
+ w2[3] = __byte_perm (w2[1], w2[2], selector);
+ w2[2] = __byte_perm (w2[0], w2[1], selector);
+ w2[1] = __byte_perm (w1[3], w2[0], selector);
+ w2[0] = __byte_perm (w1[2], w1[3], selector);
+ w1[3] = __byte_perm (w1[1], w1[2], selector);
+ w1[2] = __byte_perm (w1[0], w1[1], selector);
+ w1[1] = __byte_perm (w0[3], w1[0], selector);
+ w1[0] = __byte_perm (w0[2], w0[3], selector);
+ w0[3] = __byte_perm (w0[1], w0[2], selector);
+ w0[2] = __byte_perm (w0[0], w0[1], selector);
+ w0[1] = __byte_perm ( 0, w0[0], selector);
+ w0[0] = 0;
+
+ break;
+
+ case 2:
+ w3[1] = __byte_perm (w2[2], w2[3], selector);
+ w3[0] = __byte_perm (w2[1], w2[2], selector);
+ w2[3] = __byte_perm (w2[0], w2[1], selector);
+ w2[2] = __byte_perm (w1[3], w2[0], selector);
+ w2[1] = __byte_perm (w1[2], w1[3], selector);
+ w2[0] = __byte_perm (w1[1], w1[2], selector);
+ w1[3] = __byte_perm (w1[0], w1[1], selector);
+ w1[2] = __byte_perm (w0[3], w1[0], selector);
+ w1[1] = __byte_perm (w0[2], w0[3], selector);
+ w1[0] = __byte_perm (w0[1], w0[2], selector);
+ w0[3] = __byte_perm (w0[0], w0[1], selector);
+ w0[2] = __byte_perm ( 0, w0[0], selector);
+ w0[1] = 0;
+ w0[0] = 0;
+
+ break;
+
+ case 3:
+ w3[1] = __byte_perm (w2[1], w2[2], selector);
+ w3[0] = __byte_perm (w2[0], w2[1], selector);
+ w2[3] = __byte_perm (w1[3], w2[0], selector);
+ w2[2] = __byte_perm (w1[2], w1[3], selector);
+ w2[1] = __byte_perm (w1[1], w1[2], selector);
+ w2[0] = __byte_perm (w1[0], w1[1], selector);
+ w1[3] = __byte_perm (w0[3], w1[0], selector);
+ w1[2] = __byte_perm (w0[2], w0[3], selector);
+ w1[1] = __byte_perm (w0[1], w0[2], selector);
+ w1[0] = __byte_perm (w0[0], w0[1], selector);
+ w0[3] = __byte_perm ( 0, w0[0], selector);
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
+
+ break;
+
+ case 4:
+ w3[1] = __byte_perm (w2[0], w2[1], selector);
+ w3[0] = __byte_perm (w1[3], w2[0], selector);
+ w2[3] = __byte_perm (w1[2], w1[3], selector);
+ w2[2] = __byte_perm (w1[1], w1[2], selector);
+ w2[1] = __byte_perm (w1[0], w1[1], selector);
+ w2[0] = __byte_perm (w0[3], w1[0], selector);
+ w1[3] = __byte_perm (w0[2], w0[3], selector);
+ w1[2] = __byte_perm (w0[1], w0[2], selector);
+ w1[1] = __byte_perm (w0[0], w0[1], selector);
+ w1[0] = __byte_perm ( 0, w0[0], selector);
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
+
break;
- case 41:
- dst2[2] = src_l2[2] | src_r0[0] << 8;
- dst2[3] = src_r0[0] >> 24 | src_r0[1] << 8;
+ case 5:
+ w3[1] = __byte_perm (w1[3], w2[0], selector);
+ w3[0] = __byte_perm (w1[2], w1[3], selector);
+ w2[3] = __byte_perm (w1[1], w1[2], selector);
+ w2[2] = __byte_perm (w1[0], w1[1], selector);
+ w2[1] = __byte_perm (w0[3], w1[0], selector);
+ w2[0] = __byte_perm (w0[2], w0[3], selector);
+ w1[3] = __byte_perm (w0[1], w0[2], selector);
+ w1[2] = __byte_perm (w0[0], w0[1], selector);
+ w1[1] = __byte_perm ( 0, w0[0], selector);
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
+
break;
- case 42:
- dst2[2] = src_l2[2] | src_r0[0] << 16;
- dst2[3] = src_r0[0] >> 16 | src_r0[1] << 16;
+ case 6:
+ w3[1] = __byte_perm (w1[2], w1[3], selector);
+ w3[0] = __byte_perm (w1[1], w1[2], selector);
+ w2[3] = __byte_perm (w1[0], w1[1], selector);
+ w2[2] = __byte_perm (w0[3], w1[0], selector);
+ w2[1] = __byte_perm (w0[2], w0[3], selector);
+ w2[0] = __byte_perm (w0[1], w0[2], selector);
+ w1[3] = __byte_perm (w0[0], w0[1], selector);
+ w1[2] = __byte_perm ( 0, w0[0], selector);
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
+
break;
- case 43:
- dst2[2] = src_l2[2] | src_r0[0] << 24;
- dst2[3] = src_r0[0] >> 8 | src_r0[1] << 24;
+ case 7:
+ w3[1] = __byte_perm (w1[1], w1[2], selector);
+ w3[0] = __byte_perm (w1[0], w1[1], selector);
+ w2[3] = __byte_perm (w0[3], w1[0], selector);
+ w2[2] = __byte_perm (w0[2], w0[3], selector);
+ w2[1] = __byte_perm (w0[1], w0[2], selector);
+ w2[0] = __byte_perm (w0[0], w0[1], selector);
+ w1[3] = __byte_perm ( 0, w0[0], selector);
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
+
break;
- case 44:
- dst2[3] = src_r0[0];
+ case 8:
+ w3[1] = __byte_perm (w1[0], w1[1], selector);
+ w3[0] = __byte_perm (w0[3], w1[0], selector);
+ w2[3] = __byte_perm (w0[2], w0[3], selector);
+ w2[2] = __byte_perm (w0[1], w0[2], selector);
+ w2[1] = __byte_perm (w0[0], w0[1], selector);
+ w2[0] = __byte_perm ( 0, w0[0], selector);
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
+
break;
- case 45:
- dst2[3] = src_l2[3] | src_r0[0] << 8;
+ case 9:
+ w3[1] = __byte_perm (w0[3], w1[0], selector);
+ w3[0] = __byte_perm (w0[2], w0[3], selector);
+ w2[3] = __byte_perm (w0[1], w0[2], selector);
+ w2[2] = __byte_perm (w0[0], w0[1], selector);
+ w2[1] = __byte_perm ( 0, w0[0], selector);
+ w2[0] = 0;
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
+
break;
- case 46:
- dst2[3] = src_l2[3] | src_r0[0] << 16;
+ case 10:
+ w3[1] = __byte_perm (w0[2], w0[3], selector);
+ w3[0] = __byte_perm (w0[1], w0[2], selector);
+ w2[3] = __byte_perm (w0[0], w0[1], selector);
+ w2[2] = __byte_perm ( 0, w0[0], selector);
+ w2[1] = 0;
+ w2[0] = 0;
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
+
break;
- case 47:
- dst2[3] = src_l2[3] | src_r0[0] << 24;
+ case 11:
+ w3[1] = __byte_perm (w0[1], w0[2], selector);
+ w3[0] = __byte_perm (w0[0], w0[1], selector);
+ w2[3] = __byte_perm ( 0, w0[0], selector);
+ w2[2] = 0;
+ w2[1] = 0;
+ w2[0] = 0;
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
+
+ break;
+
+ case 12:
+ w3[1] = __byte_perm (w0[0], w0[1], selector);
+ w3[0] = __byte_perm ( 0, w0[0], selector);
+ w2[3] = 0;
+ w2[2] = 0;
+ w2[1] = 0;
+ w2[0] = 0;
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
+
+ break;
+
+ case 13:
+ w3[1] = __byte_perm ( 0, w0[0], selector);
+ w3[0] = 0;
+ w2[3] = 0;
+ w2[2] = 0;
+ w2[1] = 0;
+ w2[0] = 0;
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
+
break;
}
+ #endif
}
-// before: device_memcat12L
-static void memcat_c47_d3x4_sl3x4_sr2x4 (const u32 offset, u32 dst0[4], u32 dst1[4], u32 dst2[4], u32 src_l0[4], u32 src_l1[4], u32 src_l2[4], u32 src_r0[4], u32 src_r1[4])
+static void switch_buffer_by_offset_be (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
{
- switch (offset)
+ #ifdef IS_AMD
+ switch (offset / 4)
{
case 0:
- dst0[0] = src_r0[0];
- dst0[1] = src_r0[1];
- dst0[2] = src_r0[2];
- dst0[3] = src_r0[3];
- dst1[0] = src_r1[0];
- dst1[1] = src_r1[1];
- dst1[2] = src_r1[2];
- dst1[3] = src_r1[3];
+ w3[2] = amd_bytealign (w3[1], 0, offset);
+ w3[1] = amd_bytealign (w3[0], w3[1], offset);
+ w3[0] = amd_bytealign (w2[3], w3[0], offset);
+ w2[3] = amd_bytealign (w2[2], w2[3], offset);
+ w2[2] = amd_bytealign (w2[1], w2[2], offset);
+ w2[1] = amd_bytealign (w2[0], w2[1], offset);
+ w2[0] = amd_bytealign (w1[3], w2[0], offset);
+ w1[3] = amd_bytealign (w1[2], w1[3], offset);
+ w1[2] = amd_bytealign (w1[1], w1[2], offset);
+ w1[1] = amd_bytealign (w1[0], w1[1], offset);
+ w1[0] = amd_bytealign (w0[3], w1[0], offset);
+ w0[3] = amd_bytealign (w0[2], w0[3], offset);
+ w0[2] = amd_bytealign (w0[1], w0[2], offset);
+ w0[1] = amd_bytealign (w0[0], w0[1], offset);
+ w0[0] = amd_bytealign ( 0, w0[0], offset);
break;
case 1:
- dst0[0] = src_l0[0] | src_r0[0] << 8;
- dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8;
- dst1[0] = src_r0[3] >> 24 | src_r1[0] << 8;
- dst1[1] = src_r1[0] >> 24 | src_r1[1] << 8;
- dst1[2] = src_r1[1] >> 24 | src_r1[2] << 8;
- dst1[3] = src_r1[2] >> 24 | src_r1[3] << 8;
- dst2[0] = src_r1[3] >> 24;
+ w3[2] = amd_bytealign (w3[0], 0, offset);
+ w3[1] = amd_bytealign (w2[3], w3[0], offset);
+ w3[0] = amd_bytealign (w2[2], w2[3], offset);
+ w2[3] = amd_bytealign (w2[1], w2[2], offset);
+ w2[2] = amd_bytealign (w2[0], w2[1], offset);
+ w2[1] = amd_bytealign (w1[3], w2[0], offset);
+ w2[0] = amd_bytealign (w1[2], w1[3], offset);
+ w1[3] = amd_bytealign (w1[1], w1[2], offset);
+ w1[2] = amd_bytealign (w1[0], w1[1], offset);
+ w1[1] = amd_bytealign (w0[3], w1[0], offset);
+ w1[0] = amd_bytealign (w0[2], w0[3], offset);
+ w0[3] = amd_bytealign (w0[1], w0[2], offset);
+ w0[2] = amd_bytealign (w0[0], w0[1], offset);
+ w0[1] = amd_bytealign ( 0, w0[0], offset);
+ w0[0] = 0;
break;
case 2:
- dst0[0] = src_l0[0] | src_r0[0] << 16;
- dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16;
- dst1[0] = src_r0[3] >> 16 | src_r1[0] << 16;
- dst1[1] = src_r1[0] >> 16 | src_r1[1] << 16;
- dst1[2] = src_r1[1] >> 16 | src_r1[2] << 16;
- dst1[3] = src_r1[2] >> 16 | src_r1[3] << 16;
- dst2[0] = src_r1[3] >> 16;
+ w3[2] = amd_bytealign (w2[3], 0, offset);
+ w3[1] = amd_bytealign (w2[2], w2[3], offset);
+ w3[0] = amd_bytealign (w2[1], w2[2], offset);
+ w2[3] = amd_bytealign (w2[0], w2[1], offset);
+ w2[2] = amd_bytealign (w1[3], w2[0], offset);
+ w2[1] = amd_bytealign (w1[2], w1[3], offset);
+ w2[0] = amd_bytealign (w1[1], w1[2], offset);
+ w1[3] = amd_bytealign (w1[0], w1[1], offset);
+ w1[2] = amd_bytealign (w0[3], w1[0], offset);
+ w1[1] = amd_bytealign (w0[2], w0[3], offset);
+ w1[0] = amd_bytealign (w0[1], w0[2], offset);
+ w0[3] = amd_bytealign (w0[0], w0[1], offset);
+ w0[2] = amd_bytealign ( 0, w0[0], offset);
+ w0[1] = 0;
+ w0[0] = 0;
break;
case 3:
- dst0[0] = src_l0[0] | src_r0[0] << 24;
- dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24;
- dst1[0] = src_r0[3] >> 8 | src_r1[0] << 24;
- dst1[1] = src_r1[0] >> 8 | src_r1[1] << 24;
- dst1[2] = src_r1[1] >> 8 | src_r1[2] << 24;
- dst1[3] = src_r1[2] >> 8 | src_r1[3] << 24;
- dst2[0] = src_r1[3] >> 8;
+ w3[2] = amd_bytealign (w2[2], 0, offset);
+ w3[1] = amd_bytealign (w2[1], w2[2], offset);
+ w3[0] = amd_bytealign (w2[0], w2[1], offset);
+ w2[3] = amd_bytealign (w1[3], w2[0], offset);
+ w2[2] = amd_bytealign (w1[2], w1[3], offset);
+ w2[1] = amd_bytealign (w1[1], w1[2], offset);
+ w2[0] = amd_bytealign (w1[0], w1[1], offset);
+ w1[3] = amd_bytealign (w0[3], w1[0], offset);
+ w1[2] = amd_bytealign (w0[2], w0[3], offset);
+ w1[1] = amd_bytealign (w0[1], w0[2], offset);
+ w1[0] = amd_bytealign (w0[0], w0[1], offset);
+ w0[3] = amd_bytealign ( 0, w0[0], offset);
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
case 4:
- dst0[1] = src_r0[0];
- dst0[2] = src_r0[1];
- dst0[3] = src_r0[2];
- dst1[0] = src_r0[3];
- dst1[1] = src_r1[0];
- dst1[2] = src_r1[1];
- dst1[3] = src_r1[2];
- dst2[0] = src_r1[3];
+ w3[2] = amd_bytealign (w2[1], 0, offset);
+ w3[1] = amd_bytealign (w2[0], w2[1], offset);
+ w3[0] = amd_bytealign (w1[3], w2[0], offset);
+ w2[3] = amd_bytealign (w1[2], w1[3], offset);
+ w2[2] = amd_bytealign (w1[1], w1[2], offset);
+ w2[1] = amd_bytealign (w1[0], w1[1], offset);
+ w2[0] = amd_bytealign (w0[3], w1[0], offset);
+ w1[3] = amd_bytealign (w0[2], w0[3], offset);
+ w1[2] = amd_bytealign (w0[1], w0[2], offset);
+ w1[1] = amd_bytealign (w0[0], w0[1], offset);
+ w1[0] = amd_bytealign ( 0, w0[0], offset);
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
case 5:
- dst0[1] = src_l0[1] | src_r0[0] << 8;
- dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8;
- dst1[1] = src_r0[3] >> 24 | src_r1[0] << 8;
- dst1[2] = src_r1[0] >> 24 | src_r1[1] << 8;
- dst1[3] = src_r1[1] >> 24 | src_r1[2] << 8;
- dst2[0] = src_r1[2] >> 24 | src_r1[3] << 8;
- dst2[1] = src_r1[3] >> 24;
+ w3[2] = amd_bytealign (w2[0], 0, offset);
+ w3[1] = amd_bytealign (w1[3], w2[0], offset);
+ w3[0] = amd_bytealign (w1[2], w1[3], offset);
+ w2[3] = amd_bytealign (w1[1], w1[2], offset);
+ w2[2] = amd_bytealign (w1[0], w1[1], offset);
+ w2[1] = amd_bytealign (w0[3], w1[0], offset);
+ w2[0] = amd_bytealign (w0[2], w0[3], offset);
+ w1[3] = amd_bytealign (w0[1], w0[2], offset);
+ w1[2] = amd_bytealign (w0[0], w0[1], offset);
+ w1[1] = amd_bytealign ( 0, w0[0], offset);
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
case 6:
- dst0[1] = src_l0[1] | src_r0[0] << 16;
- dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16;
- dst1[1] = src_r0[3] >> 16 | src_r1[0] << 16;
- dst1[2] = src_r1[0] >> 16 | src_r1[1] << 16;
- dst1[3] = src_r1[1] >> 16 | src_r1[2] << 16;
- dst2[0] = src_r1[2] >> 16 | src_r1[3] << 16;
- dst2[1] = src_r1[3] >> 16;
+ w3[2] = amd_bytealign (w1[3], 0, offset);
+ w3[1] = amd_bytealign (w1[2], w1[3], offset);
+ w3[0] = amd_bytealign (w1[1], w1[2], offset);
+ w2[3] = amd_bytealign (w1[0], w1[1], offset);
+ w2[2] = amd_bytealign (w0[3], w1[0], offset);
+ w2[1] = amd_bytealign (w0[2], w0[3], offset);
+ w2[0] = amd_bytealign (w0[1], w0[2], offset);
+ w1[3] = amd_bytealign (w0[0], w0[1], offset);
+ w1[2] = amd_bytealign ( 0, w0[0], offset);
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
case 7:
- dst0[1] = src_l0[1] | src_r0[0] << 24;
- dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24;
- dst1[1] = src_r0[3] >> 8 | src_r1[0] << 24;
- dst1[2] = src_r1[0] >> 8 | src_r1[1] << 24;
- dst1[3] = src_r1[1] >> 8 | src_r1[2] << 24;
- dst2[0] = src_r1[2] >> 8 | src_r1[3] << 24;
- dst2[1] = src_r1[3] >> 8;
+ w3[2] = amd_bytealign (w1[2], 0, offset);
+ w3[1] = amd_bytealign (w1[1], w1[2], offset);
+ w3[0] = amd_bytealign (w1[0], w1[1], offset);
+ w2[3] = amd_bytealign (w0[3], w1[0], offset);
+ w2[2] = amd_bytealign (w0[2], w0[3], offset);
+ w2[1] = amd_bytealign (w0[1], w0[2], offset);
+ w2[0] = amd_bytealign (w0[0], w0[1], offset);
+ w1[3] = amd_bytealign ( 0, w0[0], offset);
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
case 8:
- dst0[2] = src_r0[0];
- dst0[3] = src_r0[1];
- dst1[0] = src_r0[2];
- dst1[1] = src_r0[3];
- dst1[2] = src_r1[0];
- dst1[3] = src_r1[1];
- dst2[0] = src_r1[2];
- dst2[1] = src_r1[3];
+ w3[2] = amd_bytealign (w1[1], 0, offset);
+ w3[1] = amd_bytealign (w1[0], w1[1], offset);
+ w3[0] = amd_bytealign (w0[3], w1[0], offset);
+ w2[3] = amd_bytealign (w0[2], w0[3], offset);
+ w2[2] = amd_bytealign (w0[1], w0[2], offset);
+ w2[1] = amd_bytealign (w0[0], w0[1], offset);
+ w2[0] = amd_bytealign ( 0, w0[0], offset);
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
case 9:
- dst0[2] = src_l0[2] | src_r0[0] << 8;
- dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8;
- dst1[2] = src_r0[3] >> 24 | src_r1[0] << 8;
- dst1[3] = src_r1[0] >> 24 | src_r1[1] << 8;
- dst2[0] = src_r1[1] >> 24 | src_r1[2] << 8;
- dst2[1] = src_r1[2] >> 24 | src_r1[3] << 8;
- dst2[2] = src_r1[3] >> 24;
+ w3[2] = amd_bytealign (w1[0], 0, offset);
+ w3[1] = amd_bytealign (w0[3], w1[0], offset);
+ w3[0] = amd_bytealign (w0[2], w0[3], offset);
+ w2[3] = amd_bytealign (w0[1], w0[2], offset);
+ w2[2] = amd_bytealign (w0[0], w0[1], offset);
+ w2[1] = amd_bytealign ( 0, w0[0], offset);
+ w2[0] = 0;
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
case 10:
- dst0[2] = src_l0[2] | src_r0[0] << 16;
- dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16;
- dst1[2] = src_r0[3] >> 16 | src_r1[0] << 16;
- dst1[3] = src_r1[0] >> 16 | src_r1[1] << 16;
- dst2[0] = src_r1[1] >> 16 | src_r1[2] << 16;
- dst2[1] = src_r1[2] >> 16 | src_r1[3] << 16;
- dst2[2] = src_r1[3] >> 16;
+ w3[2] = amd_bytealign (w0[3], 0, offset);
+ w3[1] = amd_bytealign (w0[2], w0[3], offset);
+ w3[0] = amd_bytealign (w0[1], w0[2], offset);
+ w2[3] = amd_bytealign (w0[0], w0[1], offset);
+ w2[2] = amd_bytealign ( 0, w0[0], offset);
+ w2[1] = 0;
+ w2[0] = 0;
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
case 11:
- dst0[2] = src_l0[2] | src_r0[0] << 24;
- dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24;
- dst1[2] = src_r0[3] >> 8 | src_r1[0] << 24;
- dst1[3] = src_r1[0] >> 8 | src_r1[1] << 24;
- dst2[0] = src_r1[1] >> 8 | src_r1[2] << 24;
- dst2[1] = src_r1[2] >> 8 | src_r1[3] << 24;
- dst2[2] = src_r1[3] >> 8;
- break;
-
- case 12:
- dst0[3] = src_r0[0];
- dst1[0] = src_r0[1];
- dst1[1] = src_r0[2];
- dst1[2] = src_r0[3];
- dst1[3] = src_r1[0];
- dst2[0] = src_r1[1];
- dst2[1] = src_r1[2];
- dst2[2] = src_r1[3];
- break;
-
- case 13:
- dst0[3] = src_l0[3] | src_r0[0] << 8;
- dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8;
- dst1[3] = src_r0[3] >> 24 | src_r1[0] << 8;
- dst2[0] = src_r1[0] >> 24 | src_r1[1] << 8;
- dst2[1] = src_r1[1] >> 24 | src_r1[2] << 8;
- dst2[2] = src_r1[2] >> 24 | src_r1[3] << 8;
- dst2[3] = src_r1[3] >> 24;
- break;
-
- case 14:
- dst0[3] = src_l0[3] | src_r0[0] << 16;
- dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16;
- dst1[3] = src_r0[3] >> 16 | src_r1[0] << 16;
- dst2[0] = src_r1[0] >> 16 | src_r1[1] << 16;
- dst2[1] = src_r1[1] >> 16 | src_r1[2] << 16;
- dst2[2] = src_r1[2] >> 16 | src_r1[3] << 16;
- dst2[3] = src_r1[3] >> 16;
- break;
-
- case 15:
- dst0[3] = src_l0[3] | src_r0[0] << 24;
- dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24;
- dst1[3] = src_r0[3] >> 8 | src_r1[0] << 24;
- dst2[0] = src_r1[0] >> 8 | src_r1[1] << 24;
- dst2[1] = src_r1[1] >> 8 | src_r1[2] << 24;
- dst2[2] = src_r1[2] >> 8 | src_r1[3] << 24;
- dst2[3] = src_r1[3] >> 8;
- break;
-
- case 16:
- dst1[0] = src_r0[0];
- dst1[1] = src_r0[1];
- dst1[2] = src_r0[2];
- dst1[3] = src_r0[3];
- dst2[0] = src_r1[0];
- dst2[1] = src_r1[1];
- dst2[2] = src_r1[2];
- dst2[3] = src_r1[3];
+ w3[2] = amd_bytealign (w0[2], 0, offset);
+ w3[1] = amd_bytealign (w0[1], w0[2], offset);
+ w3[0] = amd_bytealign (w0[0], w0[1], offset);
+ w2[3] = amd_bytealign ( 0, w0[0], offset);
+ w2[2] = 0;
+ w2[1] = 0;
+ w2[0] = 0;
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
- case 17:
- dst1[0] = src_l1[0] | src_r0[0] << 8;
- dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8;
- dst2[0] = src_r0[3] >> 24 | src_r1[0] << 8;
- dst2[1] = src_r1[0] >> 24 | src_r1[1] << 8;
- dst2[2] = src_r1[1] >> 24 | src_r1[2] << 8;
- dst2[3] = src_r1[2] >> 24 | src_r1[3] << 8;
+ case 12:
+ w3[2] = amd_bytealign (w0[1], 0, offset);
+ w3[1] = amd_bytealign (w0[0], w0[1], offset);
+ w3[0] = amd_bytealign ( 0, w0[0], offset);
+ w2[3] = 0;
+ w2[2] = 0;
+ w2[1] = 0;
+ w2[0] = 0;
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
- case 18:
- dst1[0] = src_l1[0] | src_r0[0] << 16;
- dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16;
- dst2[0] = src_r0[3] >> 16 | src_r1[0] << 16;
- dst2[1] = src_r1[0] >> 16 | src_r1[1] << 16;
- dst2[2] = src_r1[1] >> 16 | src_r1[2] << 16;
- dst2[3] = src_r1[2] >> 16 | src_r1[3] << 16;
+ case 13:
+ w3[2] = amd_bytealign (w0[0], 0, offset);
+ w3[1] = amd_bytealign ( 0, w0[0], offset);
+ w3[0] = 0;
+ w2[3] = 0;
+ w2[2] = 0;
+ w2[1] = 0;
+ w2[0] = 0;
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
+ }
+ #endif
- case 19:
- dst1[0] = src_l1[0] | src_r0[0] << 24;
- dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24;
- dst2[0] = src_r0[3] >> 8 | src_r1[0] << 24;
- dst2[1] = src_r1[0] >> 8 | src_r1[1] << 24;
- dst2[2] = src_r1[1] >> 8 | src_r1[2] << 24;
- dst2[3] = src_r1[2] >> 8 | src_r1[3] << 24;
- break;
+ #ifdef IS_NV
+ const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
- case 20:
- dst1[1] = src_r1[0];
- dst1[2] = src_r0[1];
- dst1[3] = src_r0[2];
- dst2[0] = src_r0[3];
- dst2[1] = src_r1[0];
- dst2[2] = src_r1[1];
- dst2[3] = src_r1[2];
+ switch (offset / 4)
+ {
+ case 0:
+ w3[1] = __byte_perm (w3[1], w3[0], selector);
+ w3[0] = __byte_perm (w3[0], w2[3], selector);
+ w2[3] = __byte_perm (w2[3], w2[2], selector);
+ w2[2] = __byte_perm (w2[2], w2[1], selector);
+ w2[1] = __byte_perm (w2[1], w2[0], selector);
+ w2[0] = __byte_perm (w2[0], w1[3], selector);
+ w1[3] = __byte_perm (w1[3], w1[2], selector);
+ w1[2] = __byte_perm (w1[2], w1[1], selector);
+ w1[1] = __byte_perm (w1[1], w1[0], selector);
+ w1[0] = __byte_perm (w1[0], w0[3], selector);
+ w0[3] = __byte_perm (w0[3], w0[2], selector);
+ w0[2] = __byte_perm (w0[2], w0[1], selector);
+ w0[1] = __byte_perm (w0[1], w0[0], selector);
+ w0[0] = __byte_perm (w0[0], 0, selector);
break;
- case 21:
- dst1[1] = src_l1[1] | src_r0[0] << 8;
- dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst2[0] = src_r0[2] >> 24 | src_r0[3] << 8;
- dst2[1] = src_r0[3] >> 24 | src_r1[0] << 8;
- dst2[2] = src_r1[0] >> 24 | src_r1[1] << 8;
- dst2[3] = src_r1[1] >> 24 | src_r1[2] << 8;
+ case 1:
+ w3[1] = __byte_perm (w3[0], w2[3], selector);
+ w3[0] = __byte_perm (w2[3], w2[2], selector);
+ w2[3] = __byte_perm (w2[2], w2[1], selector);
+ w2[2] = __byte_perm (w2[1], w2[0], selector);
+ w2[1] = __byte_perm (w2[0], w1[3], selector);
+ w2[0] = __byte_perm (w1[3], w1[2], selector);
+ w1[3] = __byte_perm (w1[2], w1[1], selector);
+ w1[2] = __byte_perm (w1[1], w1[0], selector);
+ w1[1] = __byte_perm (w1[0], w0[3], selector);
+ w1[0] = __byte_perm (w0[3], w0[2], selector);
+ w0[3] = __byte_perm (w0[2], w0[1], selector);
+ w0[2] = __byte_perm (w0[1], w0[0], selector);
+ w0[1] = __byte_perm (w0[0], 0, selector);
+ w0[0] = 0;
break;
- case 22:
- dst1[1] = src_l1[1] | src_r0[0] << 16;
- dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst2[0] = src_r0[2] >> 16 | src_r0[3] << 16;
- dst2[1] = src_r0[3] >> 16 | src_r1[0] << 16;
- dst2[2] = src_r1[0] >> 16 | src_r1[1] << 16;
- dst2[3] = src_r1[1] >> 16 | src_r1[2] << 16;
+ case 2:
+ w3[1] = __byte_perm (w2[3], w2[2], selector);
+ w3[0] = __byte_perm (w2[2], w2[1], selector);
+ w2[3] = __byte_perm (w2[1], w2[0], selector);
+ w2[2] = __byte_perm (w2[0], w1[3], selector);
+ w2[1] = __byte_perm (w1[3], w1[2], selector);
+ w2[0] = __byte_perm (w1[2], w1[1], selector);
+ w1[3] = __byte_perm (w1[1], w1[0], selector);
+ w1[2] = __byte_perm (w1[0], w0[3], selector);
+ w1[1] = __byte_perm (w0[3], w0[2], selector);
+ w1[0] = __byte_perm (w0[2], w0[1], selector);
+ w0[3] = __byte_perm (w0[1], w0[0], selector);
+ w0[2] = __byte_perm (w0[0], 0, selector);
+ w0[1] = 0;
+ w0[0] = 0;
break;
- case 23:
- dst1[1] = src_l1[1] | src_r0[0] << 24;
- dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst2[0] = src_r0[2] >> 8 | src_r0[3] << 24;
- dst2[1] = src_r0[3] >> 8 | src_r1[0] << 24;
- dst2[2] = src_r1[0] >> 8 | src_r1[1] << 24;
- dst2[3] = src_r1[1] >> 8 | src_r1[2] << 24;
+ case 3:
+ w3[1] = __byte_perm (w2[2], w2[1], selector);
+ w3[0] = __byte_perm (w2[1], w2[0], selector);
+ w2[3] = __byte_perm (w2[0], w1[3], selector);
+ w2[2] = __byte_perm (w1[3], w1[2], selector);
+ w2[1] = __byte_perm (w1[2], w1[1], selector);
+ w2[0] = __byte_perm (w1[1], w1[0], selector);
+ w1[3] = __byte_perm (w1[0], w0[3], selector);
+ w1[2] = __byte_perm (w0[3], w0[2], selector);
+ w1[1] = __byte_perm (w0[2], w0[1], selector);
+ w1[0] = __byte_perm (w0[1], w0[0], selector);
+ w0[3] = __byte_perm (w0[0], 0, selector);
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
- case 24:
- dst1[2] = src_r1[0];
- dst1[3] = src_r0[1];
- dst2[0] = src_r0[2];
- dst2[1] = src_r0[3];
- dst2[2] = src_r1[0];
- dst2[3] = src_r1[1];
+ case 4:
+ w3[1] = __byte_perm (w2[1], w2[0], selector);
+ w3[0] = __byte_perm (w2[0], w1[3], selector);
+ w2[3] = __byte_perm (w1[3], w1[2], selector);
+ w2[2] = __byte_perm (w1[2], w1[1], selector);
+ w2[1] = __byte_perm (w1[1], w1[0], selector);
+ w2[0] = __byte_perm (w1[0], w0[3], selector);
+ w1[3] = __byte_perm (w0[3], w0[2], selector);
+ w1[2] = __byte_perm (w0[2], w0[1], selector);
+ w1[1] = __byte_perm (w0[1], w0[0], selector);
+ w1[0] = __byte_perm (w0[0], 0, selector);
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
- case 25:
- dst1[2] = src_l1[2] | src_r0[0] << 8;
- dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst2[0] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst2[1] = src_r0[2] >> 24 | src_r0[3] << 8;
- dst2[2] = src_r0[3] >> 24 | src_r1[0] << 8;
- dst2[3] = src_r1[0] >> 24 | src_r1[1] << 8;
+ case 5:
+ w3[1] = __byte_perm (w2[0], w1[3], selector);
+ w3[0] = __byte_perm (w1[3], w1[2], selector);
+ w2[3] = __byte_perm (w1[2], w1[1], selector);
+ w2[2] = __byte_perm (w1[1], w1[0], selector);
+ w2[1] = __byte_perm (w1[0], w0[3], selector);
+ w2[0] = __byte_perm (w0[3], w0[2], selector);
+ w1[3] = __byte_perm (w0[2], w0[1], selector);
+ w1[2] = __byte_perm (w0[1], w0[0], selector);
+ w1[1] = __byte_perm (w0[0], 0, selector);
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
- case 26:
- dst1[2] = src_l1[2] | src_r0[0] << 16;
- dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst2[0] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst2[1] = src_r0[2] >> 16 | src_r0[3] << 16;
- dst2[2] = src_r0[3] >> 16 | src_r1[0] << 16;
- dst2[3] = src_r1[0] >> 16 | src_r1[1] << 16;
+ case 6:
+ w3[1] = __byte_perm (w1[3], w1[2], selector);
+ w3[0] = __byte_perm (w1[2], w1[1], selector);
+ w2[3] = __byte_perm (w1[1], w1[0], selector);
+ w2[2] = __byte_perm (w1[0], w0[3], selector);
+ w2[1] = __byte_perm (w0[3], w0[2], selector);
+ w2[0] = __byte_perm (w0[2], w0[1], selector);
+ w1[3] = __byte_perm (w0[1], w0[0], selector);
+ w1[2] = __byte_perm (w0[0], 0, selector);
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
- case 27:
- dst1[2] = src_l1[2] | src_r0[0] << 24;
- dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst2[0] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst2[1] = src_r0[2] >> 8 | src_r0[3] << 24;
- dst2[2] = src_r0[3] >> 8 | src_r1[0] << 24;
- dst2[3] = src_r1[0] >> 8 | src_r1[1] << 24;
+ case 7:
+ w3[1] = __byte_perm (w1[2], w1[1], selector);
+ w3[0] = __byte_perm (w1[1], w1[0], selector);
+ w2[3] = __byte_perm (w1[0], w0[3], selector);
+ w2[2] = __byte_perm (w0[3], w0[2], selector);
+ w2[1] = __byte_perm (w0[2], w0[1], selector);
+ w2[0] = __byte_perm (w0[1], w0[0], selector);
+ w1[3] = __byte_perm (w0[0], 0, selector);
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
- case 28:
- dst1[3] = src_r1[0];
- dst2[0] = src_r0[1];
- dst2[1] = src_r0[2];
- dst2[2] = src_r0[3];
- dst2[3] = src_r1[0];
+ case 8:
+ w3[1] = __byte_perm (w1[1], w1[0], selector);
+ w3[0] = __byte_perm (w1[0], w0[3], selector);
+ w2[3] = __byte_perm (w0[3], w0[2], selector);
+ w2[2] = __byte_perm (w0[2], w0[1], selector);
+ w2[1] = __byte_perm (w0[1], w0[0], selector);
+ w2[0] = __byte_perm (w0[0], 0, selector);
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
- case 29:
- dst1[3] = src_l1[3] | src_r0[0] << 8;
- dst2[0] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst2[1] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst2[2] = src_r0[2] >> 24 | src_r0[3] << 8;
- dst2[3] = src_r0[3] >> 24 | src_r1[0] << 8;
+ case 9:
+ w3[1] = __byte_perm (w1[0], w0[3], selector);
+ w3[0] = __byte_perm (w0[3], w0[2], selector);
+ w2[3] = __byte_perm (w0[2], w0[1], selector);
+ w2[2] = __byte_perm (w0[1], w0[0], selector);
+ w2[1] = __byte_perm (w0[0], 0, selector);
+ w2[0] = 0;
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
- case 30:
- dst1[3] = src_l1[3] | src_r0[0] << 16;
- dst2[0] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst2[1] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst2[2] = src_r0[2] >> 16 | src_r0[3] << 16;
- dst2[3] = src_r0[3] >> 16 | src_r1[0] << 16;
+ case 10:
+ w3[1] = __byte_perm (w0[3], w0[2], selector);
+ w3[0] = __byte_perm (w0[2], w0[1], selector);
+ w2[3] = __byte_perm (w0[1], w0[0], selector);
+ w2[2] = __byte_perm (w0[0], 0, selector);
+ w2[1] = 0;
+ w2[0] = 0;
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
- case 31:
- dst1[3] = src_l1[3] | src_r0[0] << 24;
- dst2[0] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst2[1] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst2[2] = src_r0[2] >> 8 | src_r0[3] << 24;
- dst2[3] = src_r0[3] >> 8 | src_r1[0] << 24;
+ case 11:
+ w3[1] = __byte_perm (w0[2], w0[1], selector);
+ w3[0] = __byte_perm (w0[1], w0[0], selector);
+ w2[3] = __byte_perm (w0[0], 0, selector);
+ w2[2] = 0;
+ w2[1] = 0;
+ w2[0] = 0;
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
- case 32:
- dst2[0] = src_r0[0];
- dst2[1] = src_r0[1];
- dst2[2] = src_r0[2];
- dst2[3] = src_r0[3];
+ case 12:
+ w3[1] = __byte_perm (w0[1], w0[0], selector);
+ w3[0] = __byte_perm (w0[0], 0, selector);
+ w2[3] = 0;
+ w2[2] = 0;
+ w2[1] = 0;
+ w2[0] = 0;
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
- case 33:
- dst2[0] = src_l2[0] | src_r0[0] << 8;
- dst2[1] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst2[2] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst2[3] = src_r0[2] >> 24 | src_r0[3] << 8;
+ case 13:
+ w3[1] = __byte_perm (w0[0], 0, selector);
+ w3[0] = 0;
+ w2[3] = 0;
+ w2[2] = 0;
+ w2[1] = 0;
+ w2[0] = 0;
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
+ }
+ #endif
+}
- case 34:
- dst2[0] = src_l2[0] | src_r0[0] << 16;
- dst2[1] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst2[2] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst2[3] = src_r0[2] >> 16 | src_r0[3] << 16;
+/* not needed anymore?
+// before: append_0x80_2_be
+static void append_0x80_2x4_be (u32 w0[4], u32 w1[4], const u32 offset)
+{
+ switch (offset)
+ {
+ case 0:
+ w0[0] |= 0x80000000;
break;
- case 35:
- dst2[0] = src_l2[0] | src_r0[0] << 24;
- dst2[1] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst2[2] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst2[3] = src_r0[2] >> 8 | src_r0[3] << 24;
+ case 1:
+ w0[0] |= 0x800000;
break;
- case 36:
- dst2[1] = src_r0[0];
- dst2[2] = src_r0[1];
- dst2[3] = src_r0[2];
+ case 2:
+ w0[0] |= 0x8000;
break;
- case 37:
- dst2[1] = src_l2[1] | src_r0[0] << 8;
- dst2[2] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst2[3] = src_r0[1] >> 24 | src_r0[2] << 8;
+ case 3:
+ w0[0] |= 0x80;
break;
- case 38:
- dst2[1] = src_l2[1] | src_r0[0] << 16;
- dst2[2] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst2[3] = src_r0[1] >> 16 | src_r0[2] << 16;
+ case 4:
+ w0[1] |= 0x80000000;
break;
- case 39:
- dst2[1] = src_l2[1] | src_r0[0] << 24;
- dst2[2] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst2[3] = src_r0[1] >> 8 | src_r0[2] << 24;
+ case 5:
+ w0[1] |= 0x800000;
break;
- case 40:
- dst2[2] = src_r0[0];
- dst2[3] = src_r0[1];
+ case 6:
+ w0[1] |= 0x8000;
break;
- case 41:
- dst2[2] = src_l2[2] | src_r0[0] << 8;
- dst2[3] = src_r0[0] >> 24 | src_r0[1] << 8;
+ case 7:
+ w0[1] |= 0x80;
break;
- case 42:
- dst2[2] = src_l2[2] | src_r0[0] << 16;
- dst2[3] = src_r0[0] >> 16 | src_r0[1] << 16;
+ case 8:
+ w0[2] |= 0x80000000;
break;
- case 43:
- dst2[2] = src_l2[2] | src_r0[0] << 24;
- dst2[3] = src_r0[0] >> 8 | src_r0[1] << 24;
+ case 9:
+ w0[2] |= 0x800000;
break;
- case 44:
- dst2[3] = src_r0[0];
+ case 10:
+ w0[2] |= 0x8000;
break;
- case 45:
- dst2[3] = src_l2[3] | src_r0[0] << 8;
+ case 11:
+ w0[2] |= 0x80;
break;
- case 46:
- dst2[3] = src_l2[3] | src_r0[0] << 16;
+ case 12:
+ w0[3] |= 0x80000000;
break;
- case 47:
- dst2[3] = src_l2[3] | src_r0[0] << 24;
+ case 13:
+ w0[3] |= 0x800000;
break;
- }
-}
-// before: memcat16_9
-static void memcat_c15_w4x4_a3x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 append0[4], const u32 append1[4], const u32 append2[4], const u32 offset)
-{
- switch (offset)
- {
- case 0:
- w0[0] = append0[0];
- w0[1] = append0[1];
- w0[2] = append0[2];
- w0[3] = append0[3];
- w1[0] = append1[0];
- w1[1] = append1[1];
- w1[2] = append1[2];
- w1[3] = append1[3];
- w2[0] = append2[0];
+ case 14:
+ w0[3] |= 0x8000;
break;
- case 1:
- w0[0] = w0[0] | append0[0] << 8;
- w0[1] = append0[0] >> 24 | append0[1] << 8;
- w0[2] = append0[1] >> 24 | append0[2] << 8;
- w0[3] = append0[2] >> 24 | append0[3] << 8;
- w1[0] = append0[3] >> 24 | append1[0] << 8;
- w1[1] = append1[0] >> 24 | append1[1] << 8;
- w1[2] = append1[1] >> 24 | append1[2] << 8;
- w1[3] = append1[2] >> 24 | append1[3] << 8;
- w2[0] = append1[3] >> 24 | append2[0] << 8;
- w2[1] = append2[0] >> 24;
+ case 15:
+ w0[3] |= 0x80;
break;
- case 2:
- w0[0] = w0[0] | append0[0] << 16;
- w0[1] = append0[0] >> 16 | append0[1] << 16;
- w0[2] = append0[1] >> 16 | append0[2] << 16;
- w0[3] = append0[2] >> 16 | append0[3] << 16;
- w1[0] = append0[3] >> 16 | append1[0] << 16;
- w1[1] = append1[0] >> 16 | append1[1] << 16;
- w1[2] = append1[1] >> 16 | append1[2] << 16;
- w1[3] = append1[2] >> 16 | append1[3] << 16;
- w2[0] = append1[3] >> 16 | append2[0] << 16;
- w2[1] = append2[0] >> 16;
+ case 16:
+ w1[0] |= 0x80000000;
break;
-
- case 3:
- w0[0] = w0[0] | append0[0] << 24;
- w0[1] = append0[0] >> 8 | append0[1] << 24;
- w0[2] = append0[1] >> 8 | append0[2] << 24;
- w0[3] = append0[2] >> 8 | append0[3] << 24;
- w1[0] = append0[3] >> 8 | append1[0] << 24;
- w1[1] = append1[0] >> 8 | append1[1] << 24;
- w1[2] = append1[1] >> 8 | append1[2] << 24;
- w1[3] = append1[2] >> 8 | append1[3] << 24;
- w2[0] = append1[3] >> 8 | append2[0] << 24;
- w2[1] = append2[0] >> 8;
+
+ case 17:
+ w1[0] |= 0x800000;
break;
- case 4:
- w0[1] = append0[0];
- w0[2] = append0[1];
- w0[3] = append0[2];
- w1[0] = append0[3];
- w1[1] = append1[0];
- w1[2] = append1[1];
- w1[3] = append1[2];
- w2[0] = append1[3];
- w2[1] = append2[0];
+ case 18:
+ w1[0] |= 0x8000;
break;
- case 5:
- w0[1] = w0[1] | append0[0] << 8;
- w0[2] = append0[0] >> 24 | append0[1] << 8;
- w0[3] = append0[1] >> 24 | append0[2] << 8;
- w1[0] = append0[2] >> 24 | append0[3] << 8;
- w1[1] = append0[3] >> 24 | append1[0] << 8;
- w1[2] = append1[0] >> 24 | append1[1] << 8;
- w1[3] = append1[1] >> 24 | append1[2] << 8;
- w2[0] = append1[2] >> 24 | append1[3] << 8;
- w2[1] = append1[3] >> 24 | append2[0] << 8;
- w2[2] = append2[0] >> 24;
+ case 19:
+ w1[0] |= 0x80;
break;
- case 6:
- w0[1] = w0[1] | append0[0] << 16;
- w0[2] = append0[0] >> 16 | append0[1] << 16;
- w0[3] = append0[1] >> 16 | append0[2] << 16;
- w1[0] = append0[2] >> 16 | append0[3] << 16;
- w1[1] = append0[3] >> 16 | append1[0] << 16;
- w1[2] = append1[0] >> 16 | append1[1] << 16;
- w1[3] = append1[1] >> 16 | append1[2] << 16;
- w2[0] = append1[2] >> 16 | append1[3] << 16;
- w2[1] = append1[3] >> 16 | append2[0] << 16;
- w2[2] = append2[0] >> 16;
+ case 20:
+ w1[1] |= 0x80000000;
break;
- case 7:
- w0[1] = w0[1] | append0[0] << 24;
- w0[2] = append0[0] >> 8 | append0[1] << 24;
- w0[3] = append0[1] >> 8 | append0[2] << 24;
- w1[0] = append0[2] >> 8 | append0[3] << 24;
- w1[1] = append0[3] >> 8 | append1[0] << 24;
- w1[2] = append1[0] >> 8 | append1[1] << 24;
- w1[3] = append1[1] >> 8 | append1[2] << 24;
- w2[0] = append1[2] >> 8 | append1[3] << 24;
- w2[1] = append1[3] >> 8 | append2[0] << 24;
- w2[2] = append2[0] >> 8;
+ case 21:
+ w1[1] |= 0x800000;
break;
- case 8:
- w0[2] = append0[0];
- w0[3] = append0[1];
- w1[0] = append0[2];
- w1[1] = append0[3];
- w1[2] = append1[0];
- w1[3] = append1[1];
- w2[0] = append1[2];
- w2[1] = append1[3];
- w2[2] = append2[0];
+ case 22:
+ w1[1] |= 0x8000;
break;
- case 9:
- w0[2] = w0[2] | append0[0] << 8;
- w0[3] = append0[0] >> 24 | append0[1] << 8;
- w1[0] = append0[1] >> 24 | append0[2] << 8;
- w1[1] = append0[2] >> 24 | append0[3] << 8;
- w1[2] = append0[3] >> 24 | append1[0] << 8;
- w1[3] = append1[0] >> 24 | append1[1] << 8;
- w2[0] = append1[1] >> 24 | append1[2] << 8;
- w2[1] = append1[2] >> 24 | append1[3] << 8;
- w2[2] = append1[3] >> 24 | append2[0] << 8;
- w2[3] = append2[0] >> 24;
+ case 23:
+ w1[1] |= 0x80;
break;
- case 10:
- w0[2] = w0[2] | append0[0] << 16;
- w0[3] = append0[0] >> 16 | append0[1] << 16;
- w1[0] = append0[1] >> 16 | append0[2] << 16;
- w1[1] = append0[2] >> 16 | append0[3] << 16;
- w1[2] = append0[3] >> 16 | append1[0] << 16;
- w1[3] = append1[0] >> 16 | append1[1] << 16;
- w2[0] = append1[1] >> 16 | append1[2] << 16;
- w2[1] = append1[2] >> 16 | append1[3] << 16;
- w2[2] = append1[3] >> 16 | append2[0] << 16;
- w2[3] = append2[0] >> 16;
+ case 24:
+ w1[2] |= 0x80000000;
break;
- case 11:
- w0[2] = w0[2] | append0[0] << 24;
- w0[3] = append0[0] >> 8 | append0[1] << 24;
- w1[0] = append0[1] >> 8 | append0[2] << 24;
- w1[1] = append0[2] >> 8 | append0[3] << 24;
- w1[2] = append0[3] >> 8 | append1[0] << 24;
- w1[3] = append1[0] >> 8 | append1[1] << 24;
- w2[0] = append1[1] >> 8 | append1[2] << 24;
- w2[1] = append1[2] >> 8 | append1[3] << 24;
- w2[2] = append1[3] >> 8 | append2[0] << 24;
- w2[3] = append2[0] >> 8;
+ case 25:
+ w1[2] |= 0x800000;
break;
- case 12:
- w0[3] = append0[0];
- w1[0] = append0[1];
- w1[1] = append0[2];
- w1[2] = append0[3];
- w1[3] = append1[0];
- w2[0] = append1[1];
- w2[1] = append1[2];
- w2[2] = append1[3];
- w2[3] = append2[0];
+ case 26:
+ w1[2] |= 0x8000;
break;
- case 13:
- w0[3] = w0[3] | append0[0] << 8;
- w1[0] = append0[0] >> 24 | append0[1] << 8;
- w1[1] = append0[1] >> 24 | append0[2] << 8;
- w1[2] = append0[2] >> 24 | append0[3] << 8;
- w1[3] = append0[3] >> 24 | append1[0] << 8;
- w2[0] = append1[0] >> 24 | append1[1] << 8;
- w2[1] = append1[1] >> 24 | append1[2] << 8;
- w2[2] = append1[2] >> 24 | append1[3] << 8;
- w2[3] = append1[3] >> 24 | append2[0] << 8;
- w3[0] = append2[0] >> 24;
+ case 27:
+ w1[2] |= 0x80;
break;
- case 14:
- w0[3] = w0[3] | append0[0] << 16;
- w1[0] = append0[0] >> 16 | append0[1] << 16;
- w1[1] = append0[1] >> 16 | append0[2] << 16;
- w1[2] = append0[2] >> 16 | append0[3] << 16;
- w1[3] = append0[3] >> 16 | append1[0] << 16;
- w2[0] = append1[0] >> 16 | append1[1] << 16;
- w2[1] = append1[1] >> 16 | append1[2] << 16;
- w2[2] = append1[2] >> 16 | append1[3] << 16;
- w2[3] = append1[3] >> 16 | append2[0] << 16;
- w3[0] = append2[0] >> 16;
+ case 28:
+ w1[3] |= 0x80000000;
break;
- case 15:
- w0[3] = w0[3] | append0[0] << 24;
- w1[0] = append0[0] >> 8 | append0[1] << 24;
- w1[1] = append0[1] >> 8 | append0[2] << 24;
- w1[2] = append0[2] >> 8 | append0[3] << 24;
- w1[3] = append0[3] >> 8 | append1[0] << 24;
- w2[0] = append1[0] >> 8 | append1[1] << 24;
- w2[1] = append1[1] >> 8 | append1[2] << 24;
- w2[2] = append1[2] >> 8 | append1[3] << 24;
- w2[3] = append1[3] >> 8 | append2[0] << 24;
- w3[0] = append2[0] >> 8;
+ case 29:
+ w1[3] |= 0x800000;
+ break;
+
+ case 30:
+ w1[3] |= 0x8000;
+ break;
+
+ case 31:
+ w1[3] |= 0x80;
break;
}
}
-// before: memcat32_8
-static void memcat_c32_w4x4_a2x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 append0[4], const u32 append1[4], const u32 offset)
+// before: append_0x80_4
+static void append_0x80_1x16 (u32 w[16], const u32 offset)
{
switch (offset)
{
case 0:
- w0[0] = append0[0];
- w0[1] = append0[1];
- w0[2] = append0[2];
- w0[3] = append0[3];
- w1[0] = append1[0];
- w1[1] = append1[1];
- w1[2] = append1[2];
- w1[3] = append1[3];
+ w[ 0] = 0x80;
break;
case 1:
- w0[0] = w0[0] | append0[0] << 8;
- w0[1] = append0[0] >> 24 | append0[1] << 8;
- w0[2] = append0[1] >> 24 | append0[2] << 8;
- w0[3] = append0[2] >> 24 | append0[3] << 8;
- w1[0] = append0[3] >> 24 | append1[0] << 8;
- w1[1] = append1[0] >> 24 | append1[1] << 8;
- w1[2] = append1[1] >> 24 | append1[2] << 8;
- w1[3] = append1[2] >> 24 | append1[3] << 8;
- w2[0] = append1[3] >> 24;
+ w[ 0] = w[ 0] | 0x8000;
break;
case 2:
- w0[0] = w0[0] | append0[0] << 16;
- w0[1] = append0[0] >> 16 | append0[1] << 16;
- w0[2] = append0[1] >> 16 | append0[2] << 16;
- w0[3] = append0[2] >> 16 | append0[3] << 16;
- w1[0] = append0[3] >> 16 | append1[0] << 16;
- w1[1] = append1[0] >> 16 | append1[1] << 16;
- w1[2] = append1[1] >> 16 | append1[2] << 16;
- w1[3] = append1[2] >> 16 | append1[3] << 16;
- w2[0] = append1[3] >> 16;
+ w[ 0] = w[ 0] | 0x800000;
break;
case 3:
- w0[0] = w0[0] | append0[0] << 24;
- w0[1] = append0[0] >> 8 | append0[1] << 24;
- w0[2] = append0[1] >> 8 | append0[2] << 24;
- w0[3] = append0[2] >> 8 | append0[3] << 24;
- w1[0] = append0[3] >> 8 | append1[0] << 24;
- w1[1] = append1[0] >> 8 | append1[1] << 24;
- w1[2] = append1[1] >> 8 | append1[2] << 24;
- w1[3] = append1[2] >> 8 | append1[3] << 24;
- w2[0] = append1[3] >> 8;
+ w[ 0] = w[ 0] | 0x80000000;
break;
case 4:
- w0[1] = append0[0];
- w0[2] = append0[1];
- w0[3] = append0[2];
- w1[0] = append0[3];
- w1[1] = append1[0];
- w1[2] = append1[1];
- w1[3] = append1[2];
- w2[0] = append1[3];
+ w[ 1] = 0x80;
break;
case 5:
- w0[1] = w0[1] | append0[0] << 8;
- w0[2] = append0[0] >> 24 | append0[1] << 8;
- w0[3] = append0[1] >> 24 | append0[2] << 8;
- w1[0] = append0[2] >> 24 | append0[3] << 8;
- w1[1] = append0[3] >> 24 | append1[0] << 8;
- w1[2] = append1[0] >> 24 | append1[1] << 8;
- w1[3] = append1[1] >> 24 | append1[2] << 8;
- w2[0] = append1[2] >> 24 | append1[3] << 8;
- w2[1] = append1[3] >> 24;
+ w[ 1] = w[ 1] | 0x8000;
break;
case 6:
- w0[1] = w0[1] | append0[0] << 16;
- w0[2] = append0[0] >> 16 | append0[1] << 16;
- w0[3] = append0[1] >> 16 | append0[2] << 16;
- w1[0] = append0[2] >> 16 | append0[3] << 16;
- w1[1] = append0[3] >> 16 | append1[0] << 16;
- w1[2] = append1[0] >> 16 | append1[1] << 16;
- w1[3] = append1[1] >> 16 | append1[2] << 16;
- w2[0] = append1[2] >> 16 | append1[3] << 16;
- w2[1] = append1[3] >> 16;
+ w[ 1] = w[ 1] | 0x800000;
break;
case 7:
- w0[1] = w0[1] | append0[0] << 24;
- w0[2] = append0[0] >> 8 | append0[1] << 24;
- w0[3] = append0[1] >> 8 | append0[2] << 24;
- w1[0] = append0[2] >> 8 | append0[3] << 24;
- w1[1] = append0[3] >> 8 | append1[0] << 24;
- w1[2] = append1[0] >> 8 | append1[1] << 24;
- w1[3] = append1[1] >> 8 | append1[2] << 24;
- w2[0] = append1[2] >> 8 | append1[3] << 24;
- w2[1] = append1[3] >> 8;
- break;
-
- case 8:
- w0[2] = append0[0];
- w0[3] = append0[1];
- w1[0] = append0[2];
- w1[1] = append0[3];
- w1[2] = append1[0];
- w1[3] = append1[1];
- w2[0] = append1[2];
- w2[1] = append1[3];
+ w[ 1] = w[ 1] | 0x80000000;
+ break;
+
+ case 8:
+ w[ 2] = 0x80;
break;
case 9:
- w0[2] = w0[2] | append0[0] << 8;
- w0[3] = append0[0] >> 24 | append0[1] << 8;
- w1[0] = append0[1] >> 24 | append0[2] << 8;
- w1[1] = append0[2] >> 24 | append0[3] << 8;
- w1[2] = append0[3] >> 24 | append1[0] << 8;
- w1[3] = append1[0] >> 24 | append1[1] << 8;
- w2[0] = append1[1] >> 24 | append1[2] << 8;
- w2[1] = append1[2] >> 24 | append1[3] << 8;
- w2[2] = append1[3] >> 24;
+ w[ 2] = w[ 2] | 0x8000;
break;
case 10:
- w0[2] = w0[2] | append0[0] << 16;
- w0[3] = append0[0] >> 16 | append0[1] << 16;
- w1[0] = append0[1] >> 16 | append0[2] << 16;
- w1[1] = append0[2] >> 16 | append0[3] << 16;
- w1[2] = append0[3] >> 16 | append1[0] << 16;
- w1[3] = append1[0] >> 16 | append1[1] << 16;
- w2[0] = append1[1] >> 16 | append1[2] << 16;
- w2[1] = append1[2] >> 16 | append1[3] << 16;
- w2[2] = append1[3] >> 16;
+ w[ 2] = w[ 2] | 0x800000;
break;
case 11:
- w0[2] = w0[2] | append0[0] << 24;
- w0[3] = append0[0] >> 8 | append0[1] << 24;
- w1[0] = append0[1] >> 8 | append0[2] << 24;
- w1[1] = append0[2] >> 8 | append0[3] << 24;
- w1[2] = append0[3] >> 8 | append1[0] << 24;
- w1[3] = append1[0] >> 8 | append1[1] << 24;
- w2[0] = append1[1] >> 8 | append1[2] << 24;
- w2[1] = append1[2] >> 8 | append1[3] << 24;
- w2[2] = append1[3] >> 8;
+ w[ 2] = w[ 2] | 0x80000000;
break;
case 12:
- w0[3] = append0[0];
- w1[0] = append0[1];
- w1[1] = append0[2];
- w1[2] = append0[3];
- w1[3] = append1[0];
- w2[0] = append1[1];
- w2[1] = append1[2];
- w2[2] = append1[3];
+ w[ 3] = 0x80;
break;
case 13:
- w0[3] = w0[3] | append0[0] << 8;
- w1[0] = append0[0] >> 24 | append0[1] << 8;
- w1[1] = append0[1] >> 24 | append0[2] << 8;
- w1[2] = append0[2] >> 24 | append0[3] << 8;
- w1[3] = append0[3] >> 24 | append1[0] << 8;
- w2[0] = append1[0] >> 24 | append1[1] << 8;
- w2[1] = append1[1] >> 24 | append1[2] << 8;
- w2[2] = append1[2] >> 24 | append1[3] << 8;
- w2[3] = append1[3] >> 24;
+ w[ 3] = w[ 3] | 0x8000;
break;
case 14:
- w0[3] = w0[3] | append0[0] << 16;
- w1[0] = append0[0] >> 16 | append0[1] << 16;
- w1[1] = append0[1] >> 16 | append0[2] << 16;
- w1[2] = append0[2] >> 16 | append0[3] << 16;
- w1[3] = append0[3] >> 16 | append1[0] << 16;
- w2[0] = append1[0] >> 16 | append1[1] << 16;
- w2[1] = append1[1] >> 16 | append1[2] << 16;
- w2[2] = append1[2] >> 16 | append1[3] << 16;
- w2[3] = append1[3] >> 16;
+ w[ 3] = w[ 3] | 0x800000;
break;
case 15:
- w0[3] = w0[3] | append0[0] << 24;
- w1[0] = append0[0] >> 8 | append0[1] << 24;
- w1[1] = append0[1] >> 8 | append0[2] << 24;
- w1[2] = append0[2] >> 8 | append0[3] << 24;
- w1[3] = append0[3] >> 8 | append1[0] << 24;
- w2[0] = append1[0] >> 8 | append1[1] << 24;
- w2[1] = append1[1] >> 8 | append1[2] << 24;
- w2[2] = append1[2] >> 8 | append1[3] << 24;
- w2[3] = append1[3] >> 8;
+ w[ 3] = w[ 3] | 0x80000000;
break;
case 16:
- w1[0] = append0[0];
- w1[1] = append0[1];
- w1[2] = append0[2];
- w1[3] = append0[3];
- w2[0] = append1[0];
- w2[1] = append1[1];
- w2[2] = append1[2];
- w2[3] = append1[3];
+ w[ 4] = 0x80;
break;
case 17:
- w1[0] = w1[0] | append0[0] << 8;
- w1[1] = append0[0] >> 24 | append0[1] << 8;
- w1[2] = append0[1] >> 24 | append0[2] << 8;
- w1[3] = append0[2] >> 24 | append0[3] << 8;
- w2[0] = append0[3] >> 24 | append1[0] << 8;
- w2[1] = append1[0] >> 24 | append1[1] << 8;
- w2[2] = append1[1] >> 24 | append1[2] << 8;
- w2[3] = append1[2] >> 24 | append1[3] << 8;
- w3[0] = append1[3] >> 24;
+ w[ 4] = w[ 4] | 0x8000;
break;
case 18:
- w1[0] = w1[0] | append0[0] << 16;
- w1[1] = append0[0] >> 16 | append0[1] << 16;
- w1[2] = append0[1] >> 16 | append0[2] << 16;
- w1[3] = append0[2] >> 16 | append0[3] << 16;
- w2[0] = append0[3] >> 16 | append1[0] << 16;
- w2[1] = append1[0] >> 16 | append1[1] << 16;
- w2[2] = append1[1] >> 16 | append1[2] << 16;
- w2[3] = append1[2] >> 16 | append1[3] << 16;
- w3[0] = append1[3] >> 16;
+ w[ 4] = w[ 4] | 0x800000;
break;
case 19:
- w1[0] = w1[0] | append0[0] << 24;
- w1[1] = append0[0] >> 8 | append0[1] << 24;
- w1[2] = append0[1] >> 8 | append0[2] << 24;
- w1[3] = append0[2] >> 8 | append0[3] << 24;
- w2[0] = append0[3] >> 8 | append1[0] << 24;
- w2[1] = append1[0] >> 8 | append1[1] << 24;
- w2[2] = append1[1] >> 8 | append1[2] << 24;
- w2[3] = append1[2] >> 8 | append1[3] << 24;
- w3[0] = append1[3] >> 8;
+ w[ 4] = w[ 4] | 0x80000000;
break;
case 20:
- w1[1] = append0[0];
- w1[2] = append0[1];
- w1[3] = append0[2];
- w2[0] = append0[3];
- w2[1] = append1[0];
- w2[2] = append1[1];
- w2[3] = append1[2];
- w3[0] = append1[3];
+ w[ 5] = 0x80;
break;
case 21:
- w1[1] = w1[1] | append0[0] << 8;
- w1[2] = append0[0] >> 24 | append0[1] << 8;
- w1[3] = append0[1] >> 24 | append0[2] << 8;
- w2[0] = append0[2] >> 24 | append0[3] << 8;
- w2[1] = append0[3] >> 24 | append1[0] << 8;
- w2[2] = append1[0] >> 24 | append1[1] << 8;
- w2[3] = append1[1] >> 24 | append1[2] << 8;
- w3[0] = append1[2] >> 24 | append1[3] << 8;
- w3[1] = append1[3] >> 24;
+ w[ 5] = w[ 5] | 0x8000;
break;
case 22:
- w1[1] = w1[1] | append0[0] << 16;
- w1[2] = append0[0] >> 16 | append0[1] << 16;
- w1[3] = append0[1] >> 16 | append0[2] << 16;
- w2[0] = append0[2] >> 16 | append0[3] << 16;
- w2[1] = append0[3] >> 16 | append1[0] << 16;
- w2[2] = append1[0] >> 16 | append1[1] << 16;
- w2[3] = append1[1] >> 16 | append1[2] << 16;
- w3[0] = append1[2] >> 16 | append1[3] << 16;
- w3[1] = append1[3] >> 16;
+ w[ 5] = w[ 5] | 0x800000;
break;
case 23:
- w1[1] = w1[1] | append0[0] << 24;
- w1[2] = append0[0] >> 8 | append0[1] << 24;
- w1[3] = append0[1] >> 8 | append0[2] << 24;
- w2[0] = append0[2] >> 8 | append0[3] << 24;
- w2[1] = append0[3] >> 8 | append1[0] << 24;
- w2[2] = append1[0] >> 8 | append1[1] << 24;
- w2[3] = append1[1] >> 8 | append1[2] << 24;
- w3[0] = append1[2] >> 8 | append1[3] << 24;
- w3[1] = append1[3] >> 8;
+ w[ 5] = w[ 5] | 0x80000000;
break;
case 24:
- w1[2] = append0[0];
- w1[3] = append0[1];
- w2[0] = append0[2];
- w2[1] = append0[3];
- w2[2] = append1[0];
- w2[3] = append1[1];
- w3[0] = append1[2];
- w3[1] = append1[3];
+ w[ 6] = 0x80;
break;
case 25:
- w1[2] = w1[2] | append0[0] << 8;
- w1[3] = append0[0] >> 24 | append0[1] << 8;
- w2[0] = append0[1] >> 24 | append0[2] << 8;
- w2[1] = append0[2] >> 24 | append0[3] << 8;
- w2[2] = append0[3] >> 24 | append1[0] << 8;
- w2[3] = append1[0] >> 24 | append1[1] << 8;
- w3[0] = append1[1] >> 24 | append1[2] << 8;
- w3[1] = append1[2] >> 24 | append1[3] << 8;
+ w[ 6] = w[ 6] | 0x8000;
break;
case 26:
- w1[2] = w1[2] | append0[0] << 16;
- w1[3] = append0[0] >> 16 | append0[1] << 16;
- w2[0] = append0[1] >> 16 | append0[2] << 16;
- w2[1] = append0[2] >> 16 | append0[3] << 16;
- w2[2] = append0[3] >> 16 | append1[0] << 16;
- w2[3] = append1[0] >> 16 | append1[1] << 16;
- w3[0] = append1[1] >> 16 | append1[2] << 16;
- w3[1] = append1[2] >> 16 | append1[3] << 16;
+ w[ 6] = w[ 6] | 0x800000;
break;
case 27:
- w1[2] = w1[2] | append0[0] << 24;
- w1[3] = append0[0] >> 8 | append0[1] << 24;
- w2[0] = append0[1] >> 8 | append0[2] << 24;
- w2[1] = append0[2] >> 8 | append0[3] << 24;
- w2[2] = append0[3] >> 8 | append1[0] << 24;
- w2[3] = append1[0] >> 8 | append1[1] << 24;
- w3[0] = append1[1] >> 8 | append1[2] << 24;
- w3[1] = append1[2] >> 8 | append1[3] << 24;
+ w[ 6] = w[ 6] | 0x80000000;
break;
case 28:
- w1[3] = append0[0];
- w2[0] = append0[1];
- w2[1] = append0[2];
- w2[2] = append0[3];
- w2[3] = append1[0];
- w3[0] = append1[1];
- w3[1] = append1[2];
+ w[ 7] = 0x80;
break;
case 29:
- w1[3] = w1[3] | append0[0] << 8;
- w2[0] = append0[0] >> 24 | append0[1] << 8;
- w2[1] = append0[1] >> 24 | append0[2] << 8;
- w2[2] = append0[2] >> 24 | append0[3] << 8;
- w2[3] = append0[3] >> 24 | append1[0] << 8;
- w3[0] = append1[0] >> 24 | append1[1] << 8;
- w3[1] = append1[1] >> 24 | append1[2] << 8;
+ w[ 7] = w[ 7] | 0x8000;
break;
case 30:
- w1[3] = w1[3] | append0[0] << 16;
- w2[0] = append0[0] >> 16 | append0[1] << 16;
- w2[1] = append0[1] >> 16 | append0[2] << 16;
- w2[2] = append0[2] >> 16 | append0[3] << 16;
- w2[3] = append0[3] >> 16 | append1[0] << 16;
- w3[0] = append1[0] >> 16 | append1[1] << 16;
- w3[1] = append1[1] >> 16 | append1[2] << 16;
+ w[ 7] = w[ 7] | 0x800000;
+ break;
+
+ case 31:
+ w[ 7] = w[ 7] | 0x80000000;
+ break;
+
+ case 32:
+ w[ 8] = 0x80;
+ break;
+
+ case 33:
+ w[ 8] = w[ 8] | 0x8000;
+ break;
+
+ case 34:
+ w[ 8] = w[ 8] | 0x800000;
+ break;
+
+ case 35:
+ w[ 8] = w[ 8] | 0x80000000;
+ break;
+
+ case 36:
+ w[ 9] = 0x80;
+ break;
+
+ case 37:
+ w[ 9] = w[ 9] | 0x8000;
+ break;
+
+ case 38:
+ w[ 9] = w[ 9] | 0x800000;
+ break;
+
+ case 39:
+ w[ 9] = w[ 9] | 0x80000000;
+ break;
+
+ case 40:
+ w[10] = 0x80;
+ break;
+
+ case 41:
+ w[10] = w[10] | 0x8000;
break;
- case 31:
- w1[3] = w1[3] | append0[0] << 24;
- w2[0] = append0[0] >> 8 | append0[1] << 24;
- w2[1] = append0[1] >> 8 | append0[2] << 24;
- w2[2] = append0[2] >> 8 | append0[3] << 24;
- w2[3] = append0[3] >> 8 | append1[0] << 24;
- w3[0] = append1[0] >> 8 | append1[1] << 24;
- w3[1] = append1[1] >> 8 | append1[2] << 24;
+ case 42:
+ w[10] = w[10] | 0x800000;
break;
- case 32:
- w2[0] = append0[0];
- w2[1] = append0[1];
- w2[2] = append0[2];
- w2[3] = append0[3];
- w3[0] = append1[0];
- w3[1] = append1[1];
+ case 43:
+ w[10] = w[10] | 0x80000000;
break;
- }
-}
-// before: memcat32_9
-static void memcat_c32_w4x4_a3x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 append0[4], const u32 append1[4], const u32 append2[4], const u32 offset)
-{
- switch (offset)
- {
- case 0:
- w0[0] = append0[0];
- w0[1] = append0[1];
- w0[2] = append0[2];
- w0[3] = append0[3];
- w1[0] = append1[0];
- w1[1] = append1[1];
- w1[2] = append1[2];
- w1[3] = append1[3];
- w2[0] = append2[0];
+ case 44:
+ w[11] = 0x80;
break;
- case 1:
- w0[0] = w0[0] | append0[0] << 8;
- w0[1] = append0[0] >> 24 | append0[1] << 8;
- w0[2] = append0[1] >> 24 | append0[2] << 8;
- w0[3] = append0[2] >> 24 | append0[3] << 8;
- w1[0] = append0[3] >> 24 | append1[0] << 8;
- w1[1] = append1[0] >> 24 | append1[1] << 8;
- w1[2] = append1[1] >> 24 | append1[2] << 8;
- w1[3] = append1[2] >> 24 | append1[3] << 8;
- w2[0] = append1[3] >> 24 | append2[0] << 8;
- w2[1] = append2[0] >> 24;
+ case 45:
+ w[11] = w[11] | 0x8000;
break;
- case 2:
- w0[0] = w0[0] | append0[0] << 16;
- w0[1] = append0[0] >> 16 | append0[1] << 16;
- w0[2] = append0[1] >> 16 | append0[2] << 16;
- w0[3] = append0[2] >> 16 | append0[3] << 16;
- w1[0] = append0[3] >> 16 | append1[0] << 16;
- w1[1] = append1[0] >> 16 | append1[1] << 16;
- w1[2] = append1[1] >> 16 | append1[2] << 16;
- w1[3] = append1[2] >> 16 | append1[3] << 16;
- w2[0] = append1[3] >> 16 | append2[0] << 16;
- w2[1] = append2[0] >> 16;
+ case 46:
+ w[11] = w[11] | 0x800000;
break;
- case 3:
- w0[0] = w0[0] | append0[0] << 24;
- w0[1] = append0[0] >> 8 | append0[1] << 24;
- w0[2] = append0[1] >> 8 | append0[2] << 24;
- w0[3] = append0[2] >> 8 | append0[3] << 24;
- w1[0] = append0[3] >> 8 | append1[0] << 24;
- w1[1] = append1[0] >> 8 | append1[1] << 24;
- w1[2] = append1[1] >> 8 | append1[2] << 24;
- w1[3] = append1[2] >> 8 | append1[3] << 24;
- w2[0] = append1[3] >> 8 | append2[0] << 24;
- w2[1] = append2[0] >> 8;
+ case 47:
+ w[11] = w[11] | 0x80000000;
break;
- case 4:
- w0[1] = append0[0];
- w0[2] = append0[1];
- w0[3] = append0[2];
- w1[0] = append0[3];
- w1[1] = append1[0];
- w1[2] = append1[1];
- w1[3] = append1[2];
- w2[0] = append1[3];
- w2[1] = append2[0];
+ case 48:
+ w[12] = 0x80;
break;
- case 5:
- w0[1] = w0[1] | append0[0] << 8;
- w0[2] = append0[0] >> 24 | append0[1] << 8;
- w0[3] = append0[1] >> 24 | append0[2] << 8;
- w1[0] = append0[2] >> 24 | append0[3] << 8;
- w1[1] = append0[3] >> 24 | append1[0] << 8;
- w1[2] = append1[0] >> 24 | append1[1] << 8;
- w1[3] = append1[1] >> 24 | append1[2] << 8;
- w2[0] = append1[2] >> 24 | append1[3] << 8;
- w2[1] = append1[3] >> 24 | append2[0] << 8;
- w2[2] = append2[0] >> 24;
+ case 49:
+ w[12] = w[12] | 0x8000;
break;
- case 6:
- w0[1] = w0[1] | append0[0] << 16;
- w0[2] = append0[0] >> 16 | append0[1] << 16;
- w0[3] = append0[1] >> 16 | append0[2] << 16;
- w1[0] = append0[2] >> 16 | append0[3] << 16;
- w1[1] = append0[3] >> 16 | append1[0] << 16;
- w1[2] = append1[0] >> 16 | append1[1] << 16;
- w1[3] = append1[1] >> 16 | append1[2] << 16;
- w2[0] = append1[2] >> 16 | append1[3] << 16;
- w2[1] = append1[3] >> 16 | append2[0] << 16;
- w2[2] = append2[0] >> 16;
+ case 50:
+ w[12] = w[12] | 0x800000;
break;
- case 7:
- w0[1] = w0[1] | append0[0] << 24;
- w0[2] = append0[0] >> 8 | append0[1] << 24;
- w0[3] = append0[1] >> 8 | append0[2] << 24;
- w1[0] = append0[2] >> 8 | append0[3] << 24;
- w1[1] = append0[3] >> 8 | append1[0] << 24;
- w1[2] = append1[0] >> 8 | append1[1] << 24;
- w1[3] = append1[1] >> 8 | append1[2] << 24;
- w2[0] = append1[2] >> 8 | append1[3] << 24;
- w2[1] = append1[3] >> 8 | append2[0] << 24;
- w2[2] = append2[0] >> 8;
+ case 51:
+ w[12] = w[12] | 0x80000000;
break;
- case 8:
- w0[2] = append0[0];
- w0[3] = append0[1];
- w1[0] = append0[2];
- w1[1] = append0[3];
- w1[2] = append1[0];
- w1[3] = append1[1];
- w2[0] = append1[2];
- w2[1] = append1[3];
- w2[2] = append2[0];
+ case 52:
+ w[13] = 0x80;
break;
- case 9:
- w0[2] = w0[2] | append0[0] << 8;
- w0[3] = append0[0] >> 24 | append0[1] << 8;
- w1[0] = append0[1] >> 24 | append0[2] << 8;
- w1[1] = append0[2] >> 24 | append0[3] << 8;
- w1[2] = append0[3] >> 24 | append1[0] << 8;
- w1[3] = append1[0] >> 24 | append1[1] << 8;
- w2[0] = append1[1] >> 24 | append1[2] << 8;
- w2[1] = append1[2] >> 24 | append1[3] << 8;
- w2[2] = append1[3] >> 24 | append2[0] << 8;
- w2[3] = append2[0] >> 24;
+ case 53:
+ w[13] = w[13] | 0x8000;
break;
- case 10:
- w0[2] = w0[2] | append0[0] << 16;
- w0[3] = append0[0] >> 16 | append0[1] << 16;
- w1[0] = append0[1] >> 16 | append0[2] << 16;
- w1[1] = append0[2] >> 16 | append0[3] << 16;
- w1[2] = append0[3] >> 16 | append1[0] << 16;
- w1[3] = append1[0] >> 16 | append1[1] << 16;
- w2[0] = append1[1] >> 16 | append1[2] << 16;
- w2[1] = append1[2] >> 16 | append1[3] << 16;
- w2[2] = append1[3] >> 16 | append2[0] << 16;
- w2[3] = append2[0] >> 16;
+ case 54:
+ w[13] = w[13] | 0x800000;
break;
- case 11:
- w0[2] = w0[2] | append0[0] << 24;
- w0[3] = append0[0] >> 8 | append0[1] << 24;
- w1[0] = append0[1] >> 8 | append0[2] << 24;
- w1[1] = append0[2] >> 8 | append0[3] << 24;
- w1[2] = append0[3] >> 8 | append1[0] << 24;
- w1[3] = append1[0] >> 8 | append1[1] << 24;
- w2[0] = append1[1] >> 8 | append1[2] << 24;
- w2[1] = append1[2] >> 8 | append1[3] << 24;
- w2[2] = append1[3] >> 8 | append2[0] << 24;
- w2[3] = append2[0] >> 8;
+ case 55:
+ w[13] = w[13] | 0x80000000;
break;
- case 12:
- w0[3] = append0[0];
- w1[0] = append0[1];
- w1[1] = append0[2];
- w1[2] = append0[3];
- w1[3] = append1[0];
- w2[0] = append1[1];
- w2[1] = append1[2];
- w2[2] = append1[3];
- w2[3] = append2[0];
+ case 56:
+ w[14] = 0x80;
break;
- case 13:
- w0[3] = w0[3] | append0[0] << 8;
- w1[0] = append0[0] >> 24 | append0[1] << 8;
- w1[1] = append0[1] >> 24 | append0[2] << 8;
- w1[2] = append0[2] >> 24 | append0[3] << 8;
- w1[3] = append0[3] >> 24 | append1[0] << 8;
- w2[0] = append1[0] >> 24 | append1[1] << 8;
- w2[1] = append1[1] >> 24 | append1[2] << 8;
- w2[2] = append1[2] >> 24 | append1[3] << 8;
- w2[3] = append1[3] >> 24 | append2[0] << 8;
- w3[0] = append2[0] >> 24;
+ case 57:
+ w[14] = w[14] | 0x8000;
break;
- case 14:
- w0[3] = w0[3] | append0[0] << 16;
- w1[0] = append0[0] >> 16 | append0[1] << 16;
- w1[1] = append0[1] >> 16 | append0[2] << 16;
- w1[2] = append0[2] >> 16 | append0[3] << 16;
- w1[3] = append0[3] >> 16 | append1[0] << 16;
- w2[0] = append1[0] >> 16 | append1[1] << 16;
- w2[1] = append1[1] >> 16 | append1[2] << 16;
- w2[2] = append1[2] >> 16 | append1[3] << 16;
- w2[3] = append1[3] >> 16 | append2[0] << 16;
- w3[0] = append2[0] >> 16;
+ case 58:
+ w[14] = w[14] | 0x800000;
break;
- case 15:
- w0[3] = w0[3] | append0[0] << 24;
- w1[0] = append0[0] >> 8 | append0[1] << 24;
- w1[1] = append0[1] >> 8 | append0[2] << 24;
- w1[2] = append0[2] >> 8 | append0[3] << 24;
- w1[3] = append0[3] >> 8 | append1[0] << 24;
- w2[0] = append1[0] >> 8 | append1[1] << 24;
- w2[1] = append1[1] >> 8 | append1[2] << 24;
- w2[2] = append1[2] >> 8 | append1[3] << 24;
- w2[3] = append1[3] >> 8 | append2[0] << 24;
- w3[0] = append2[0] >> 8;
+ case 59:
+ w[14] = w[14] | 0x80000000;
break;
- case 16:
- w1[0] = append0[0];
- w1[1] = append0[1];
- w1[2] = append0[2];
- w1[3] = append0[3];
- w2[0] = append1[0];
- w2[1] = append1[1];
- w2[2] = append1[2];
- w2[3] = append1[3];
- w3[0] = append2[0];
+ case 60:
+ w[15] = 0x80;
break;
- case 17:
- w1[0] = w1[0] | append0[0] << 8;
- w1[1] = append0[0] >> 24 | append0[1] << 8;
- w1[2] = append0[1] >> 24 | append0[2] << 8;
- w1[3] = append0[2] >> 24 | append0[3] << 8;
- w2[0] = append0[3] >> 24 | append1[0] << 8;
- w2[1] = append1[0] >> 24 | append1[1] << 8;
- w2[2] = append1[1] >> 24 | append1[2] << 8;
- w2[3] = append1[2] >> 24 | append1[3] << 8;
- w3[0] = append1[3] >> 24 | append2[0] << 8;
- w3[1] = append2[0] >> 24;
+ case 61:
+ w[15] = w[15] | 0x8000;
+ break;
+
+ case 62:
+ w[15] = w[15] | 0x800000;
break;
- case 18:
- w1[0] = w1[0] | append0[0] << 16;
- w1[1] = append0[0] >> 16 | append0[1] << 16;
- w1[2] = append0[1] >> 16 | append0[2] << 16;
- w1[3] = append0[2] >> 16 | append0[3] << 16;
- w2[0] = append0[3] >> 16 | append1[0] << 16;
- w2[1] = append1[0] >> 16 | append1[1] << 16;
- w2[2] = append1[1] >> 16 | append1[2] << 16;
- w2[3] = append1[2] >> 16 | append1[3] << 16;
- w3[0] = append1[3] >> 16 | append2[0] << 16;
- w3[1] = append2[0] >> 16;
+ case 63:
+ w[15] = w[15] | 0x80000000;
break;
+ }
+}
- case 19:
- w1[0] = w1[0] | append0[0] << 24;
- w1[1] = append0[0] >> 8 | append0[1] << 24;
- w1[2] = append0[1] >> 8 | append0[2] << 24;
- w1[3] = append0[2] >> 8 | append0[3] << 24;
- w2[0] = append0[3] >> 8 | append1[0] << 24;
- w2[1] = append1[0] >> 8 | append1[1] << 24;
- w2[2] = append1[1] >> 8 | append1[2] << 24;
- w2[3] = append1[2] >> 8 | append1[3] << 24;
- w3[0] = append1[3] >> 8 | append2[0] << 24;
- w3[1] = append2[0] >> 8;
+// before: append_0x80_8
+static void append_0x80_1x32 (u32 w[32], const u32 offset)
+{
+ switch (offset)
+ {
+ case 0:
+ w[ 0] = 0x80;
break;
- case 20:
- w1[1] = append0[0];
- w1[2] = append0[1];
- w1[3] = append0[2];
- w2[0] = append0[3];
- w2[1] = append1[0];
- w2[2] = append1[1];
- w2[3] = append1[2];
- w3[0] = append1[3];
- w3[1] = append2[0];
+ case 1:
+ w[ 0] = w[ 0] | 0x8000;
break;
- case 21:
- w1[1] = w1[1] | append0[0] << 8;
- w1[2] = append0[0] >> 24 | append0[1] << 8;
- w1[3] = append0[1] >> 24 | append0[2] << 8;
- w2[0] = append0[2] >> 24 | append0[3] << 8;
- w2[1] = append0[3] >> 24 | append1[0] << 8;
- w2[2] = append1[0] >> 24 | append1[1] << 8;
- w2[3] = append1[1] >> 24 | append1[2] << 8;
- w3[0] = append1[2] >> 24 | append1[3] << 8;
- w3[1] = append1[3] >> 24 | append2[0] << 8;
+ case 2:
+ w[ 0] = w[ 0] | 0x800000;
break;
- case 22:
- w1[1] = w1[1] | append0[0] << 16;
- w1[2] = append0[0] >> 16 | append0[1] << 16;
- w1[3] = append0[1] >> 16 | append0[2] << 16;
- w2[0] = append0[2] >> 16 | append0[3] << 16;
- w2[1] = append0[3] >> 16 | append1[0] << 16;
- w2[2] = append1[0] >> 16 | append1[1] << 16;
- w2[3] = append1[1] >> 16 | append1[2] << 16;
- w3[0] = append1[2] >> 16 | append1[3] << 16;
- w3[1] = append1[3] >> 16 | append2[0] << 16;
+ case 3:
+ w[ 0] = w[ 0] | 0x80000000;
break;
- case 23:
- w1[1] = w1[1] | append0[0] << 24;
- w1[2] = append0[0] >> 8 | append0[1] << 24;
- w1[3] = append0[1] >> 8 | append0[2] << 24;
- w2[0] = append0[2] >> 8 | append0[3] << 24;
- w2[1] = append0[3] >> 8 | append1[0] << 24;
- w2[2] = append1[0] >> 8 | append1[1] << 24;
- w2[3] = append1[1] >> 8 | append1[2] << 24;
- w3[0] = append1[2] >> 8 | append1[3] << 24;
- w3[1] = append1[3] >> 8 | append2[0] << 24;
+ case 4:
+ w[ 1] = 0x80;
break;
- case 24:
- w1[2] = append0[0];
- w1[3] = append0[1];
- w2[0] = append0[2];
- w2[1] = append0[3];
- w2[2] = append1[0];
- w2[3] = append1[1];
- w3[0] = append1[2];
- w3[1] = append1[3];
+ case 5:
+ w[ 1] = w[ 1] | 0x8000;
break;
- case 25:
- w1[2] = w1[2] | append0[0] << 8;
- w1[3] = append0[0] >> 24 | append0[1] << 8;
- w2[0] = append0[1] >> 24 | append0[2] << 8;
- w2[1] = append0[2] >> 24 | append0[3] << 8;
- w2[2] = append0[3] >> 24 | append1[0] << 8;
- w2[3] = append1[0] >> 24 | append1[1] << 8;
- w3[0] = append1[1] >> 24 | append1[2] << 8;
- w3[1] = append1[2] >> 24 | append1[3] << 8;
+ case 6:
+ w[ 1] = w[ 1] | 0x800000;
break;
- case 26:
- w1[2] = w1[2] | append0[0] << 16;
- w1[3] = append0[0] >> 16 | append0[1] << 16;
- w2[0] = append0[1] >> 16 | append0[2] << 16;
- w2[1] = append0[2] >> 16 | append0[3] << 16;
- w2[2] = append0[3] >> 16 | append1[0] << 16;
- w2[3] = append1[0] >> 16 | append1[1] << 16;
- w3[0] = append1[1] >> 16 | append1[2] << 16;
- w3[1] = append1[2] >> 16 | append1[3] << 16;
+ case 7:
+ w[ 1] = w[ 1] | 0x80000000;
break;
- case 27:
- w1[2] = w1[2] | append0[0] << 24;
- w1[3] = append0[0] >> 8 | append0[1] << 24;
- w2[0] = append0[1] >> 8 | append0[2] << 24;
- w2[1] = append0[2] >> 8 | append0[3] << 24;
- w2[2] = append0[3] >> 8 | append1[0] << 24;
- w2[3] = append1[0] >> 8 | append1[1] << 24;
- w3[0] = append1[1] >> 8 | append1[2] << 24;
- w3[1] = append1[2] >> 8 | append1[3] << 24;
+ case 8:
+ w[ 2] = 0x80;
break;
- case 28:
- w1[3] = append0[0];
- w2[0] = append0[1];
- w2[1] = append0[2];
- w2[2] = append0[3];
- w2[3] = append1[0];
- w3[0] = append1[1];
- w3[1] = append1[2];
+ case 9:
+ w[ 2] = w[ 2] | 0x8000;
break;
- case 29:
- w1[3] = w1[3] | append0[0] << 8;
- w2[0] = append0[0] >> 24 | append0[1] << 8;
- w2[1] = append0[1] >> 24 | append0[2] << 8;
- w2[2] = append0[2] >> 24 | append0[3] << 8;
- w2[3] = append0[3] >> 24 | append1[0] << 8;
- w3[0] = append1[0] >> 24 | append1[1] << 8;
- w3[1] = append1[1] >> 24 | append1[2] << 8;
+ case 10:
+ w[ 2] = w[ 2] | 0x800000;
break;
- case 30:
- w1[3] = w1[3] | append0[0] << 16;
- w2[0] = append0[0] >> 16 | append0[1] << 16;
- w2[1] = append0[1] >> 16 | append0[2] << 16;
- w2[2] = append0[2] >> 16 | append0[3] << 16;
- w2[3] = append0[3] >> 16 | append1[0] << 16;
- w3[0] = append1[0] >> 16 | append1[1] << 16;
- w3[1] = append1[1] >> 16 | append1[2] << 16;
+ case 11:
+ w[ 2] = w[ 2] | 0x80000000;
break;
- case 31:
- w1[3] = w1[3] | append0[0] << 24;
- w2[0] = append0[0] >> 8 | append0[1] << 24;
- w2[1] = append0[1] >> 8 | append0[2] << 24;
- w2[2] = append0[2] >> 8 | append0[3] << 24;
- w2[3] = append0[3] >> 8 | append1[0] << 24;
- w3[0] = append1[0] >> 8 | append1[1] << 24;
- w3[1] = append1[1] >> 8 | append1[2] << 24;
+ case 12:
+ w[ 3] = 0x80;
break;
- case 32:
- w2[0] = append0[0];
- w2[1] = append0[1];
- w2[2] = append0[2];
- w2[3] = append0[3];
- w3[0] = append1[0];
- w3[1] = append1[1];
+ case 13:
+ w[ 3] = w[ 3] | 0x8000;
break;
- }
-}
-static void switch_buffer_by_offset (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
-{
- #ifdef IS_AMD
- const int offset_mod_4 = offset & 3;
+ case 14:
+ w[ 3] = w[ 3] | 0x800000;
+ break;
- const int offset_minus_4 = 4 - offset;
+ case 15:
+ w[ 3] = w[ 3] | 0x80000000;
+ break;
- switch (offset / 4)
- {
- case 0:
- w3[2] = amd_bytealign ( 0, w3[1], offset_minus_4);
- w3[1] = amd_bytealign (w3[1], w3[0], offset_minus_4);
- w3[0] = amd_bytealign (w3[0], w2[3], offset_minus_4);
- w2[3] = amd_bytealign (w2[3], w2[2], offset_minus_4);
- w2[2] = amd_bytealign (w2[2], w2[1], offset_minus_4);
- w2[1] = amd_bytealign (w2[1], w2[0], offset_minus_4);
- w2[0] = amd_bytealign (w2[0], w1[3], offset_minus_4);
- w1[3] = amd_bytealign (w1[3], w1[2], offset_minus_4);
- w1[2] = amd_bytealign (w1[2], w1[1], offset_minus_4);
- w1[1] = amd_bytealign (w1[1], w1[0], offset_minus_4);
- w1[0] = amd_bytealign (w1[0], w0[3], offset_minus_4);
- w0[3] = amd_bytealign (w0[3], w0[2], offset_minus_4);
- w0[2] = amd_bytealign (w0[2], w0[1], offset_minus_4);
- w0[1] = amd_bytealign (w0[1], w0[0], offset_minus_4);
- w0[0] = amd_bytealign (w0[0], 0, offset_minus_4);
+ case 16:
+ w[ 4] = 0x80;
+ break;
- if (offset_mod_4 == 0)
- {
- w0[0] = w0[1];
- w0[1] = w0[2];
- w0[2] = w0[3];
- w0[3] = w1[0];
- w1[0] = w1[1];
- w1[1] = w1[2];
- w1[2] = w1[3];
- w1[3] = w2[0];
- w2[0] = w2[1];
- w2[1] = w2[2];
- w2[2] = w2[3];
- w2[3] = w3[0];
- w3[0] = w3[1];
- w3[1] = w3[2];
- w3[2] = 0;
- }
+ case 17:
+ w[ 4] = w[ 4] | 0x8000;
+ break;
+ case 18:
+ w[ 4] = w[ 4] | 0x800000;
break;
- case 1:
- w3[2] = amd_bytealign ( 0, w3[0], offset_minus_4);
- w3[1] = amd_bytealign (w3[0], w2[3], offset_minus_4);
- w3[0] = amd_bytealign (w2[3], w2[2], offset_minus_4);
- w2[3] = amd_bytealign (w2[2], w2[1], offset_minus_4);
- w2[2] = amd_bytealign (w2[1], w2[0], offset_minus_4);
- w2[1] = amd_bytealign (w2[0], w1[3], offset_minus_4);
- w2[0] = amd_bytealign (w1[3], w1[2], offset_minus_4);
- w1[3] = amd_bytealign (w1[2], w1[1], offset_minus_4);
- w1[2] = amd_bytealign (w1[1], w1[0], offset_minus_4);
- w1[1] = amd_bytealign (w1[0], w0[3], offset_minus_4);
- w1[0] = amd_bytealign (w0[3], w0[2], offset_minus_4);
- w0[3] = amd_bytealign (w0[2], w0[1], offset_minus_4);
- w0[2] = amd_bytealign (w0[1], w0[0], offset_minus_4);
- w0[1] = amd_bytealign (w0[0], 0, offset_minus_4);
- w0[0] = 0;
+ case 19:
+ w[ 4] = w[ 4] | 0x80000000;
+ break;
+
+ case 20:
+ w[ 5] = 0x80;
+ break;
- if (offset_mod_4 == 0)
- {
- w0[1] = w0[2];
- w0[2] = w0[3];
- w0[3] = w1[0];
- w1[0] = w1[1];
- w1[1] = w1[2];
- w1[2] = w1[3];
- w1[3] = w2[0];
- w2[0] = w2[1];
- w2[1] = w2[2];
- w2[2] = w2[3];
- w2[3] = w3[0];
- w3[0] = w3[1];
- w3[1] = w3[2];
- w3[2] = 0;
- }
+ case 21:
+ w[ 5] = w[ 5] | 0x8000;
+ break;
+ case 22:
+ w[ 5] = w[ 5] | 0x800000;
break;
- case 2:
- w3[2] = amd_bytealign ( 0, w2[3], offset_minus_4);
- w3[1] = amd_bytealign (w2[3], w2[2], offset_minus_4);
- w3[0] = amd_bytealign (w2[2], w2[1], offset_minus_4);
- w2[3] = amd_bytealign (w2[1], w2[0], offset_minus_4);
- w2[2] = amd_bytealign (w2[0], w1[3], offset_minus_4);
- w2[1] = amd_bytealign (w1[3], w1[2], offset_minus_4);
- w2[0] = amd_bytealign (w1[2], w1[1], offset_minus_4);
- w1[3] = amd_bytealign (w1[1], w1[0], offset_minus_4);
- w1[2] = amd_bytealign (w1[0], w0[3], offset_minus_4);
- w1[1] = amd_bytealign (w0[3], w0[2], offset_minus_4);
- w1[0] = amd_bytealign (w0[2], w0[1], offset_minus_4);
- w0[3] = amd_bytealign (w0[1], w0[0], offset_minus_4);
- w0[2] = amd_bytealign (w0[0], 0, offset_minus_4);
- w0[1] = 0;
- w0[0] = 0;
+ case 23:
+ w[ 5] = w[ 5] | 0x80000000;
+ break;
- if (offset_mod_4 == 0)
- {
- w0[2] = w0[3];
- w0[3] = w1[0];
- w1[0] = w1[1];
- w1[1] = w1[2];
- w1[2] = w1[3];
- w1[3] = w2[0];
- w2[0] = w2[1];
- w2[1] = w2[2];
- w2[2] = w2[3];
- w2[3] = w3[0];
- w3[0] = w3[1];
- w3[1] = w3[2];
- w3[2] = 0;
- }
+ case 24:
+ w[ 6] = 0x80;
+ break;
+ case 25:
+ w[ 6] = w[ 6] | 0x8000;
break;
- case 3:
- w3[2] = amd_bytealign ( 0, w2[2], offset_minus_4);
- w3[1] = amd_bytealign (w2[2], w2[1], offset_minus_4);
- w3[0] = amd_bytealign (w2[1], w2[0], offset_minus_4);
- w2[3] = amd_bytealign (w2[0], w1[3], offset_minus_4);
- w2[2] = amd_bytealign (w1[3], w1[2], offset_minus_4);
- w2[1] = amd_bytealign (w1[2], w1[1], offset_minus_4);
- w2[0] = amd_bytealign (w1[1], w1[0], offset_minus_4);
- w1[3] = amd_bytealign (w1[0], w0[3], offset_minus_4);
- w1[2] = amd_bytealign (w0[3], w0[2], offset_minus_4);
- w1[1] = amd_bytealign (w0[2], w0[1], offset_minus_4);
- w1[0] = amd_bytealign (w0[1], w0[0], offset_minus_4);
- w0[3] = amd_bytealign (w0[0], 0, offset_minus_4);
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 26:
+ w[ 6] = w[ 6] | 0x800000;
+ break;
- if (offset_mod_4 == 0)
- {
- w0[3] = w1[0];
- w1[0] = w1[1];
- w1[1] = w1[2];
- w1[2] = w1[3];
- w1[3] = w2[0];
- w2[0] = w2[1];
- w2[1] = w2[2];
- w2[2] = w2[3];
- w2[3] = w3[0];
- w3[0] = w3[1];
- w3[1] = w3[2];
- w3[2] = 0;
- }
+ case 27:
+ w[ 6] = w[ 6] | 0x80000000;
+ break;
+ case 28:
+ w[ 7] = 0x80;
break;
- case 4:
- w3[2] = amd_bytealign ( 0, w2[1], offset_minus_4);
- w3[1] = amd_bytealign (w2[1], w2[0], offset_minus_4);
- w3[0] = amd_bytealign (w2[0], w1[3], offset_minus_4);
- w2[3] = amd_bytealign (w1[3], w1[2], offset_minus_4);
- w2[2] = amd_bytealign (w1[2], w1[1], offset_minus_4);
- w2[1] = amd_bytealign (w1[1], w1[0], offset_minus_4);
- w2[0] = amd_bytealign (w1[0], w0[3], offset_minus_4);
- w1[3] = amd_bytealign (w0[3], w0[2], offset_minus_4);
- w1[2] = amd_bytealign (w0[2], w0[1], offset_minus_4);
- w1[1] = amd_bytealign (w0[1], w0[0], offset_minus_4);
- w1[0] = amd_bytealign (w0[0], 0, offset_minus_4);
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 29:
+ w[ 7] = w[ 7] | 0x8000;
+ break;
- if (offset_mod_4 == 0)
- {
- w1[0] = w1[1];
- w1[1] = w1[2];
- w1[2] = w1[3];
- w1[3] = w2[0];
- w2[0] = w2[1];
- w2[1] = w2[2];
- w2[2] = w2[3];
- w2[3] = w3[0];
- w3[0] = w3[1];
- w3[1] = w3[2];
- w3[2] = 0;
- }
+ case 30:
+ w[ 7] = w[ 7] | 0x800000;
+ break;
+ case 31:
+ w[ 7] = w[ 7] | 0x80000000;
break;
- case 5:
- w3[2] = amd_bytealign ( 0, w2[0], offset_minus_4);
- w3[1] = amd_bytealign (w2[0], w1[3], offset_minus_4);
- w3[0] = amd_bytealign (w1[3], w1[2], offset_minus_4);
- w2[3] = amd_bytealign (w1[2], w1[1], offset_minus_4);
- w2[2] = amd_bytealign (w1[1], w1[0], offset_minus_4);
- w2[1] = amd_bytealign (w1[0], w0[3], offset_minus_4);
- w2[0] = amd_bytealign (w0[3], w0[2], offset_minus_4);
- w1[3] = amd_bytealign (w0[2], w0[1], offset_minus_4);
- w1[2] = amd_bytealign (w0[1], w0[0], offset_minus_4);
- w1[1] = amd_bytealign (w0[0], 0, offset_minus_4);
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 32:
+ w[ 8] = 0x80;
+ break;
- if (offset_mod_4 == 0)
- {
- w1[1] = w1[2];
- w1[2] = w1[3];
- w1[3] = w2[0];
- w2[0] = w2[1];
- w2[1] = w2[2];
- w2[2] = w2[3];
- w2[3] = w3[0];
- w3[0] = w3[1];
- w3[1] = w3[2];
- w3[2] = 0;
- }
+ case 33:
+ w[ 8] = w[ 8] | 0x8000;
+ break;
+ case 34:
+ w[ 8] = w[ 8] | 0x800000;
break;
- case 6:
- w3[2] = amd_bytealign ( 0, w1[3], offset_minus_4);
- w3[1] = amd_bytealign (w1[3], w1[2], offset_minus_4);
- w3[0] = amd_bytealign (w1[2], w1[1], offset_minus_4);
- w2[3] = amd_bytealign (w1[1], w1[0], offset_minus_4);
- w2[2] = amd_bytealign (w1[0], w0[3], offset_minus_4);
- w2[1] = amd_bytealign (w0[3], w0[2], offset_minus_4);
- w2[0] = amd_bytealign (w0[2], w0[1], offset_minus_4);
- w1[3] = amd_bytealign (w0[1], w0[0], offset_minus_4);
- w1[2] = amd_bytealign (w0[0], 0, offset_minus_4);
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 35:
+ w[ 8] = w[ 8] | 0x80000000;
+ break;
- if (offset_mod_4 == 0)
- {
- w1[2] = w1[3];
- w1[3] = w2[0];
- w2[0] = w2[1];
- w2[1] = w2[2];
- w2[2] = w2[3];
- w2[3] = w3[0];
- w3[0] = w3[1];
- w3[1] = w3[2];
- w3[2] = 0;
- }
+ case 36:
+ w[ 9] = 0x80;
+ break;
+ case 37:
+ w[ 9] = w[ 9] | 0x8000;
break;
- case 7:
- w3[2] = amd_bytealign ( 0, w1[2], offset_minus_4);
- w3[1] = amd_bytealign (w1[2], w1[1], offset_minus_4);
- w3[0] = amd_bytealign (w1[1], w1[0], offset_minus_4);
- w2[3] = amd_bytealign (w1[0], w0[3], offset_minus_4);
- w2[2] = amd_bytealign (w0[3], w0[2], offset_minus_4);
- w2[1] = amd_bytealign (w0[2], w0[1], offset_minus_4);
- w2[0] = amd_bytealign (w0[1], w0[0], offset_minus_4);
- w1[3] = amd_bytealign (w0[0], 0, offset_minus_4);
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 38:
+ w[ 9] = w[ 9] | 0x800000;
+ break;
- if (offset_mod_4 == 0)
- {
- w1[3] = w2[0];
- w2[0] = w2[1];
- w2[1] = w2[2];
- w2[2] = w2[3];
- w2[3] = w3[0];
- w3[0] = w3[1];
- w3[1] = w3[2];
- w3[2] = 0;
- }
+ case 39:
+ w[ 9] = w[ 9] | 0x80000000;
+ break;
+
+ case 40:
+ w[10] = 0x80;
+ break;
+
+ case 41:
+ w[10] = w[10] | 0x8000;
+ break;
+ case 42:
+ w[10] = w[10] | 0x800000;
break;
- case 8:
- w3[2] = amd_bytealign ( 0, w1[1], offset_minus_4);
- w3[1] = amd_bytealign (w1[1], w1[0], offset_minus_4);
- w3[0] = amd_bytealign (w1[0], w0[3], offset_minus_4);
- w2[3] = amd_bytealign (w0[3], w0[2], offset_minus_4);
- w2[2] = amd_bytealign (w0[2], w0[1], offset_minus_4);
- w2[1] = amd_bytealign (w0[1], w0[0], offset_minus_4);
- w2[0] = amd_bytealign (w0[0], 0, offset_minus_4);
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
-
- if (offset_mod_4 == 0)
- {
- w2[0] = w2[1];
- w2[1] = w2[2];
- w2[2] = w2[3];
- w2[3] = w3[0];
- w3[0] = w3[1];
- w3[1] = w3[2];
- w3[2] = 0;
- }
+ case 43:
+ w[10] = w[10] | 0x80000000;
+ break;
+ case 44:
+ w[11] = 0x80;
break;
- case 9:
- w3[2] = amd_bytealign ( 0, w1[0], offset_minus_4);
- w3[1] = amd_bytealign (w1[0], w0[3], offset_minus_4);
- w3[0] = amd_bytealign (w0[3], w0[2], offset_minus_4);
- w2[3] = amd_bytealign (w0[2], w0[1], offset_minus_4);
- w2[2] = amd_bytealign (w0[1], w0[0], offset_minus_4);
- w2[1] = amd_bytealign (w0[0], 0, offset_minus_4);
- w2[0] = 0;
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 45:
+ w[11] = w[11] | 0x8000;
+ break;
- if (offset_mod_4 == 0)
- {
- w2[1] = w2[2];
- w2[2] = w2[3];
- w2[3] = w3[0];
- w3[0] = w3[1];
- w3[1] = w3[2];
- w3[2] = 0;
- }
+ case 46:
+ w[11] = w[11] | 0x800000;
+ break;
+ case 47:
+ w[11] = w[11] | 0x80000000;
break;
- case 10:
- w3[2] = amd_bytealign ( 0, w0[3], offset_minus_4);
- w3[1] = amd_bytealign (w0[3], w0[2], offset_minus_4);
- w3[0] = amd_bytealign (w0[2], w0[1], offset_minus_4);
- w2[3] = amd_bytealign (w0[1], w0[0], offset_minus_4);
- w2[2] = amd_bytealign (w0[0], 0, offset_minus_4);
- w2[1] = 0;
- w2[0] = 0;
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 48:
+ w[12] = 0x80;
+ break;
- if (offset_mod_4 == 0)
- {
- w2[2] = w2[3];
- w2[3] = w3[0];
- w3[0] = w3[1];
- w3[1] = w3[2];
- w3[2] = 0;
- }
+ case 49:
+ w[12] = w[12] | 0x8000;
+ break;
+ case 50:
+ w[12] = w[12] | 0x800000;
break;
- case 11:
- w3[2] = amd_bytealign ( 0, w0[2], offset_minus_4);
- w3[1] = amd_bytealign (w0[2], w0[1], offset_minus_4);
- w3[0] = amd_bytealign (w0[1], w0[0], offset_minus_4);
- w2[3] = amd_bytealign (w0[0], 0, offset_minus_4);
- w2[2] = 0;
- w2[1] = 0;
- w2[0] = 0;
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 51:
+ w[12] = w[12] | 0x80000000;
+ break;
- if (offset_mod_4 == 0)
- {
- w2[3] = w3[0];
- w3[0] = w3[1];
- w3[1] = w3[2];
- w3[2] = 0;
- }
+ case 52:
+ w[13] = 0x80;
+ break;
+ case 53:
+ w[13] = w[13] | 0x8000;
break;
- case 12:
- w3[2] = amd_bytealign ( 0, w0[1], offset_minus_4);
- w3[1] = amd_bytealign (w0[1], w0[0], offset_minus_4);
- w3[0] = amd_bytealign (w0[0], 0, offset_minus_4);
- w2[3] = 0;
- w2[2] = 0;
- w2[1] = 0;
- w2[0] = 0;
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 54:
+ w[13] = w[13] | 0x800000;
+ break;
- if (offset_mod_4 == 0)
- {
- w3[0] = w3[1];
- w3[1] = w3[2];
- w3[2] = 0;
- }
+ case 55:
+ w[13] = w[13] | 0x80000000;
+ break;
+ case 56:
+ w[14] = 0x80;
break;
- case 13:
- w3[2] = amd_bytealign ( 0, w0[0], offset_minus_4);
- w3[1] = amd_bytealign (w0[0], 0, offset_minus_4);
- w3[0] = 0;
- w2[3] = 0;
- w2[2] = 0;
- w2[1] = 0;
- w2[0] = 0;
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 57:
+ w[14] = w[14] | 0x8000;
+ break;
- if (offset_mod_4 == 0)
- {
- w3[1] = w3[2];
- w3[2] = 0;
- }
+ case 58:
+ w[14] = w[14] | 0x800000;
+ break;
+ case 59:
+ w[14] = w[14] | 0x80000000;
break;
- }
- #endif
- #ifdef IS_NV
- const int offset_minus_4 = 4 - (offset % 4);
+ case 60:
+ w[15] = 0x80;
+ break;
- const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
+ case 61:
+ w[15] = w[15] | 0x8000;
+ break;
- switch (offset / 4)
- {
- case 0:
- w3[1] = __byte_perm (w3[0], w3[1], selector);
- w3[0] = __byte_perm (w2[3], w3[0], selector);
- w2[3] = __byte_perm (w2[2], w2[3], selector);
- w2[2] = __byte_perm (w2[1], w2[2], selector);
- w2[1] = __byte_perm (w2[0], w2[1], selector);
- w2[0] = __byte_perm (w1[3], w2[0], selector);
- w1[3] = __byte_perm (w1[2], w1[3], selector);
- w1[2] = __byte_perm (w1[1], w1[2], selector);
- w1[1] = __byte_perm (w1[0], w1[1], selector);
- w1[0] = __byte_perm (w0[3], w1[0], selector);
- w0[3] = __byte_perm (w0[2], w0[3], selector);
- w0[2] = __byte_perm (w0[1], w0[2], selector);
- w0[1] = __byte_perm (w0[0], w0[1], selector);
- w0[0] = __byte_perm ( 0, w0[0], selector);
+ case 62:
+ w[15] = w[15] | 0x800000;
+ break;
+ case 63:
+ w[15] = w[15] | 0x80000000;
break;
- case 1:
- w3[1] = __byte_perm (w2[3], w3[0], selector);
- w3[0] = __byte_perm (w2[2], w2[3], selector);
- w2[3] = __byte_perm (w2[1], w2[2], selector);
- w2[2] = __byte_perm (w2[0], w2[1], selector);
- w2[1] = __byte_perm (w1[3], w2[0], selector);
- w2[0] = __byte_perm (w1[2], w1[3], selector);
- w1[3] = __byte_perm (w1[1], w1[2], selector);
- w1[2] = __byte_perm (w1[0], w1[1], selector);
- w1[1] = __byte_perm (w0[3], w1[0], selector);
- w1[0] = __byte_perm (w0[2], w0[3], selector);
- w0[3] = __byte_perm (w0[1], w0[2], selector);
- w0[2] = __byte_perm (w0[0], w0[1], selector);
- w0[1] = __byte_perm ( 0, w0[0], selector);
- w0[0] = 0;
+ case 64:
+ w[16] = 0x80;
+ break;
+ case 65:
+ w[16] = w[16] | 0x8000;
break;
- case 2:
- w3[1] = __byte_perm (w2[2], w2[3], selector);
- w3[0] = __byte_perm (w2[1], w2[2], selector);
- w2[3] = __byte_perm (w2[0], w2[1], selector);
- w2[2] = __byte_perm (w1[3], w2[0], selector);
- w2[1] = __byte_perm (w1[2], w1[3], selector);
- w2[0] = __byte_perm (w1[1], w1[2], selector);
- w1[3] = __byte_perm (w1[0], w1[1], selector);
- w1[2] = __byte_perm (w0[3], w1[0], selector);
- w1[1] = __byte_perm (w0[2], w0[3], selector);
- w1[0] = __byte_perm (w0[1], w0[2], selector);
- w0[3] = __byte_perm (w0[0], w0[1], selector);
- w0[2] = __byte_perm ( 0, w0[0], selector);
- w0[1] = 0;
- w0[0] = 0;
+ case 66:
+ w[16] = w[16] | 0x800000;
+ break;
+ case 67:
+ w[16] = w[16] | 0x80000000;
break;
- case 3:
- w3[1] = __byte_perm (w2[1], w2[2], selector);
- w3[0] = __byte_perm (w2[0], w2[1], selector);
- w2[3] = __byte_perm (w1[3], w2[0], selector);
- w2[2] = __byte_perm (w1[2], w1[3], selector);
- w2[1] = __byte_perm (w1[1], w1[2], selector);
- w2[0] = __byte_perm (w1[0], w1[1], selector);
- w1[3] = __byte_perm (w0[3], w1[0], selector);
- w1[2] = __byte_perm (w0[2], w0[3], selector);
- w1[1] = __byte_perm (w0[1], w0[2], selector);
- w1[0] = __byte_perm (w0[0], w0[1], selector);
- w0[3] = __byte_perm ( 0, w0[0], selector);
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 68:
+ w[17] = 0x80;
+ break;
+ case 69:
+ w[17] = w[17] | 0x8000;
break;
- case 4:
- w3[1] = __byte_perm (w2[0], w2[1], selector);
- w3[0] = __byte_perm (w1[3], w2[0], selector);
- w2[3] = __byte_perm (w1[2], w1[3], selector);
- w2[2] = __byte_perm (w1[1], w1[2], selector);
- w2[1] = __byte_perm (w1[0], w1[1], selector);
- w2[0] = __byte_perm (w0[3], w1[0], selector);
- w1[3] = __byte_perm (w0[2], w0[3], selector);
- w1[2] = __byte_perm (w0[1], w0[2], selector);
- w1[1] = __byte_perm (w0[0], w0[1], selector);
- w1[0] = __byte_perm ( 0, w0[0], selector);
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 70:
+ w[17] = w[17] | 0x800000;
+ break;
+ case 71:
+ w[17] = w[17] | 0x80000000;
break;
- case 5:
- w3[1] = __byte_perm (w1[3], w2[0], selector);
- w3[0] = __byte_perm (w1[2], w1[3], selector);
- w2[3] = __byte_perm (w1[1], w1[2], selector);
- w2[2] = __byte_perm (w1[0], w1[1], selector);
- w2[1] = __byte_perm (w0[3], w1[0], selector);
- w2[0] = __byte_perm (w0[2], w0[3], selector);
- w1[3] = __byte_perm (w0[1], w0[2], selector);
- w1[2] = __byte_perm (w0[0], w0[1], selector);
- w1[1] = __byte_perm ( 0, w0[0], selector);
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 72:
+ w[18] = 0x80;
+ break;
+ case 73:
+ w[18] = w[18] | 0x8000;
break;
- case 6:
- w3[1] = __byte_perm (w1[2], w1[3], selector);
- w3[0] = __byte_perm (w1[1], w1[2], selector);
- w2[3] = __byte_perm (w1[0], w1[1], selector);
- w2[2] = __byte_perm (w0[3], w1[0], selector);
- w2[1] = __byte_perm (w0[2], w0[3], selector);
- w2[0] = __byte_perm (w0[1], w0[2], selector);
- w1[3] = __byte_perm (w0[0], w0[1], selector);
- w1[2] = __byte_perm ( 0, w0[0], selector);
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 74:
+ w[18] = w[18] | 0x800000;
+ break;
+ case 75:
+ w[18] = w[18] | 0x80000000;
break;
- case 7:
- w3[1] = __byte_perm (w1[1], w1[2], selector);
- w3[0] = __byte_perm (w1[0], w1[1], selector);
- w2[3] = __byte_perm (w0[3], w1[0], selector);
- w2[2] = __byte_perm (w0[2], w0[3], selector);
- w2[1] = __byte_perm (w0[1], w0[2], selector);
- w2[0] = __byte_perm (w0[0], w0[1], selector);
- w1[3] = __byte_perm ( 0, w0[0], selector);
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 76:
+ w[19] = 0x80;
+ break;
+ case 77:
+ w[19] = w[19] | 0x8000;
break;
- case 8:
- w3[1] = __byte_perm (w1[0], w1[1], selector);
- w3[0] = __byte_perm (w0[3], w1[0], selector);
- w2[3] = __byte_perm (w0[2], w0[3], selector);
- w2[2] = __byte_perm (w0[1], w0[2], selector);
- w2[1] = __byte_perm (w0[0], w0[1], selector);
- w2[0] = __byte_perm ( 0, w0[0], selector);
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 78:
+ w[19] = w[19] | 0x800000;
+ break;
+ case 79:
+ w[19] = w[19] | 0x80000000;
break;
- case 9:
- w3[1] = __byte_perm (w0[3], w1[0], selector);
- w3[0] = __byte_perm (w0[2], w0[3], selector);
- w2[3] = __byte_perm (w0[1], w0[2], selector);
- w2[2] = __byte_perm (w0[0], w0[1], selector);
- w2[1] = __byte_perm ( 0, w0[0], selector);
- w2[0] = 0;
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 80:
+ w[20] = 0x80;
+ break;
+ case 81:
+ w[20] = w[20] | 0x8000;
break;
- case 10:
- w3[1] = __byte_perm (w0[2], w0[3], selector);
- w3[0] = __byte_perm (w0[1], w0[2], selector);
- w2[3] = __byte_perm (w0[0], w0[1], selector);
- w2[2] = __byte_perm ( 0, w0[0], selector);
- w2[1] = 0;
- w2[0] = 0;
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 82:
+ w[20] = w[20] | 0x800000;
+ break;
+ case 83:
+ w[20] = w[20] | 0x80000000;
break;
- case 11:
- w3[1] = __byte_perm (w0[1], w0[2], selector);
- w3[0] = __byte_perm (w0[0], w0[1], selector);
- w2[3] = __byte_perm ( 0, w0[0], selector);
- w2[2] = 0;
- w2[1] = 0;
- w2[0] = 0;
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 84:
+ w[21] = 0x80;
+ break;
+ case 85:
+ w[21] = w[21] | 0x8000;
break;
- case 12:
- w3[1] = __byte_perm (w0[0], w0[1], selector);
- w3[0] = __byte_perm ( 0, w0[0], selector);
- w2[3] = 0;
- w2[2] = 0;
- w2[1] = 0;
- w2[0] = 0;
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 86:
+ w[21] = w[21] | 0x800000;
+ break;
+ case 87:
+ w[21] = w[21] | 0x80000000;
break;
- case 13:
- w3[1] = __byte_perm ( 0, w0[0], selector);
- w3[0] = 0;
- w2[3] = 0;
- w2[2] = 0;
- w2[1] = 0;
- w2[0] = 0;
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 88:
+ w[22] = 0x80;
+ break;
+
+ case 89:
+ w[22] = w[22] | 0x8000;
+ break;
+ case 90:
+ w[22] = w[22] | 0x800000;
break;
- }
- #endif
-}
-static void switch_buffer_by_offset_be (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
-{
- #ifdef IS_AMD
- switch (offset / 4)
- {
- case 0:
- w3[2] = amd_bytealign (w3[1], 0, offset);
- w3[1] = amd_bytealign (w3[0], w3[1], offset);
- w3[0] = amd_bytealign (w2[3], w3[0], offset);
- w2[3] = amd_bytealign (w2[2], w2[3], offset);
- w2[2] = amd_bytealign (w2[1], w2[2], offset);
- w2[1] = amd_bytealign (w2[0], w2[1], offset);
- w2[0] = amd_bytealign (w1[3], w2[0], offset);
- w1[3] = amd_bytealign (w1[2], w1[3], offset);
- w1[2] = amd_bytealign (w1[1], w1[2], offset);
- w1[1] = amd_bytealign (w1[0], w1[1], offset);
- w1[0] = amd_bytealign (w0[3], w1[0], offset);
- w0[3] = amd_bytealign (w0[2], w0[3], offset);
- w0[2] = amd_bytealign (w0[1], w0[2], offset);
- w0[1] = amd_bytealign (w0[0], w0[1], offset);
- w0[0] = amd_bytealign ( 0, w0[0], offset);
+ case 91:
+ w[22] = w[22] | 0x80000000;
+ break;
+
+ case 92:
+ w[23] = 0x80;
break;
- case 1:
- w3[2] = amd_bytealign (w3[0], 0, offset);
- w3[1] = amd_bytealign (w2[3], w3[0], offset);
- w3[0] = amd_bytealign (w2[2], w2[3], offset);
- w2[3] = amd_bytealign (w2[1], w2[2], offset);
- w2[2] = amd_bytealign (w2[0], w2[1], offset);
- w2[1] = amd_bytealign (w1[3], w2[0], offset);
- w2[0] = amd_bytealign (w1[2], w1[3], offset);
- w1[3] = amd_bytealign (w1[1], w1[2], offset);
- w1[2] = amd_bytealign (w1[0], w1[1], offset);
- w1[1] = amd_bytealign (w0[3], w1[0], offset);
- w1[0] = amd_bytealign (w0[2], w0[3], offset);
- w0[3] = amd_bytealign (w0[1], w0[2], offset);
- w0[2] = amd_bytealign (w0[0], w0[1], offset);
- w0[1] = amd_bytealign ( 0, w0[0], offset);
- w0[0] = 0;
+ case 93:
+ w[23] = w[23] | 0x8000;
break;
- case 2:
- w3[2] = amd_bytealign (w2[3], 0, offset);
- w3[1] = amd_bytealign (w2[2], w2[3], offset);
- w3[0] = amd_bytealign (w2[1], w2[2], offset);
- w2[3] = amd_bytealign (w2[0], w2[1], offset);
- w2[2] = amd_bytealign (w1[3], w2[0], offset);
- w2[1] = amd_bytealign (w1[2], w1[3], offset);
- w2[0] = amd_bytealign (w1[1], w1[2], offset);
- w1[3] = amd_bytealign (w1[0], w1[1], offset);
- w1[2] = amd_bytealign (w0[3], w1[0], offset);
- w1[1] = amd_bytealign (w0[2], w0[3], offset);
- w1[0] = amd_bytealign (w0[1], w0[2], offset);
- w0[3] = amd_bytealign (w0[0], w0[1], offset);
- w0[2] = amd_bytealign ( 0, w0[0], offset);
- w0[1] = 0;
- w0[0] = 0;
+ case 94:
+ w[23] = w[23] | 0x800000;
break;
- case 3:
- w3[2] = amd_bytealign (w2[2], 0, offset);
- w3[1] = amd_bytealign (w2[1], w2[2], offset);
- w3[0] = amd_bytealign (w2[0], w2[1], offset);
- w2[3] = amd_bytealign (w1[3], w2[0], offset);
- w2[2] = amd_bytealign (w1[2], w1[3], offset);
- w2[1] = amd_bytealign (w1[1], w1[2], offset);
- w2[0] = amd_bytealign (w1[0], w1[1], offset);
- w1[3] = amd_bytealign (w0[3], w1[0], offset);
- w1[2] = amd_bytealign (w0[2], w0[3], offset);
- w1[1] = amd_bytealign (w0[1], w0[2], offset);
- w1[0] = amd_bytealign (w0[0], w0[1], offset);
- w0[3] = amd_bytealign ( 0, w0[0], offset);
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 95:
+ w[23] = w[23] | 0x80000000;
break;
- case 4:
- w3[2] = amd_bytealign (w2[1], 0, offset);
- w3[1] = amd_bytealign (w2[0], w2[1], offset);
- w3[0] = amd_bytealign (w1[3], w2[0], offset);
- w2[3] = amd_bytealign (w1[2], w1[3], offset);
- w2[2] = amd_bytealign (w1[1], w1[2], offset);
- w2[1] = amd_bytealign (w1[0], w1[1], offset);
- w2[0] = amd_bytealign (w0[3], w1[0], offset);
- w1[3] = amd_bytealign (w0[2], w0[3], offset);
- w1[2] = amd_bytealign (w0[1], w0[2], offset);
- w1[1] = amd_bytealign (w0[0], w0[1], offset);
- w1[0] = amd_bytealign ( 0, w0[0], offset);
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 96:
+ w[24] = 0x80;
break;
- case 5:
- w3[2] = amd_bytealign (w2[0], 0, offset);
- w3[1] = amd_bytealign (w1[3], w2[0], offset);
- w3[0] = amd_bytealign (w1[2], w1[3], offset);
- w2[3] = amd_bytealign (w1[1], w1[2], offset);
- w2[2] = amd_bytealign (w1[0], w1[1], offset);
- w2[1] = amd_bytealign (w0[3], w1[0], offset);
- w2[0] = amd_bytealign (w0[2], w0[3], offset);
- w1[3] = amd_bytealign (w0[1], w0[2], offset);
- w1[2] = amd_bytealign (w0[0], w0[1], offset);
- w1[1] = amd_bytealign ( 0, w0[0], offset);
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 97:
+ w[24] = w[24] | 0x8000;
break;
- case 6:
- w3[2] = amd_bytealign (w1[3], 0, offset);
- w3[1] = amd_bytealign (w1[2], w1[3], offset);
- w3[0] = amd_bytealign (w1[1], w1[2], offset);
- w2[3] = amd_bytealign (w1[0], w1[1], offset);
- w2[2] = amd_bytealign (w0[3], w1[0], offset);
- w2[1] = amd_bytealign (w0[2], w0[3], offset);
- w2[0] = amd_bytealign (w0[1], w0[2], offset);
- w1[3] = amd_bytealign (w0[0], w0[1], offset);
- w1[2] = amd_bytealign ( 0, w0[0], offset);
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 98:
+ w[24] = w[24] | 0x800000;
break;
- case 7:
- w3[2] = amd_bytealign (w1[2], 0, offset);
- w3[1] = amd_bytealign (w1[1], w1[2], offset);
- w3[0] = amd_bytealign (w1[0], w1[1], offset);
- w2[3] = amd_bytealign (w0[3], w1[0], offset);
- w2[2] = amd_bytealign (w0[2], w0[3], offset);
- w2[1] = amd_bytealign (w0[1], w0[2], offset);
- w2[0] = amd_bytealign (w0[0], w0[1], offset);
- w1[3] = amd_bytealign ( 0, w0[0], offset);
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 99:
+ w[24] = w[24] | 0x80000000;
break;
- case 8:
- w3[2] = amd_bytealign (w1[1], 0, offset);
- w3[1] = amd_bytealign (w1[0], w1[1], offset);
- w3[0] = amd_bytealign (w0[3], w1[0], offset);
- w2[3] = amd_bytealign (w0[2], w0[3], offset);
- w2[2] = amd_bytealign (w0[1], w0[2], offset);
- w2[1] = amd_bytealign (w0[0], w0[1], offset);
- w2[0] = amd_bytealign ( 0, w0[0], offset);
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 100:
+ w[25] = 0x80;
break;
- case 9:
- w3[2] = amd_bytealign (w1[0], 0, offset);
- w3[1] = amd_bytealign (w0[3], w1[0], offset);
- w3[0] = amd_bytealign (w0[2], w0[3], offset);
- w2[3] = amd_bytealign (w0[1], w0[2], offset);
- w2[2] = amd_bytealign (w0[0], w0[1], offset);
- w2[1] = amd_bytealign ( 0, w0[0], offset);
- w2[0] = 0;
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 101:
+ w[25] = w[25] | 0x8000;
break;
- case 10:
- w3[2] = amd_bytealign (w0[3], 0, offset);
- w3[1] = amd_bytealign (w0[2], w0[3], offset);
- w3[0] = amd_bytealign (w0[1], w0[2], offset);
- w2[3] = amd_bytealign (w0[0], w0[1], offset);
- w2[2] = amd_bytealign ( 0, w0[0], offset);
- w2[1] = 0;
- w2[0] = 0;
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 102:
+ w[25] = w[25] | 0x800000;
break;
- case 11:
- w3[2] = amd_bytealign (w0[2], 0, offset);
- w3[1] = amd_bytealign (w0[1], w0[2], offset);
- w3[0] = amd_bytealign (w0[0], w0[1], offset);
- w2[3] = amd_bytealign ( 0, w0[0], offset);
- w2[2] = 0;
- w2[1] = 0;
- w2[0] = 0;
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 103:
+ w[25] = w[25] | 0x80000000;
break;
- case 12:
- w3[2] = amd_bytealign (w0[1], 0, offset);
- w3[1] = amd_bytealign (w0[0], w0[1], offset);
- w3[0] = amd_bytealign ( 0, w0[0], offset);
- w2[3] = 0;
- w2[2] = 0;
- w2[1] = 0;
- w2[0] = 0;
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 104:
+ w[26] = 0x80;
break;
- case 13:
- w3[2] = amd_bytealign (w0[0], 0, offset);
- w3[1] = amd_bytealign ( 0, w0[0], offset);
- w3[0] = 0;
- w2[3] = 0;
- w2[2] = 0;
- w2[1] = 0;
- w2[0] = 0;
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 105:
+ w[26] = w[26] | 0x8000;
+ break;
+
+ case 106:
+ w[26] = w[26] | 0x800000;
+ break;
+
+ case 107:
+ w[26] = w[26] | 0x80000000;
+ break;
+
+ case 108:
+ w[27] = 0x80;
+ break;
+
+ case 109:
+ w[27] = w[27] | 0x8000;
+ break;
+
+ case 110:
+ w[27] = w[27] | 0x800000;
break;
- }
- #endif
- #ifdef IS_NV
- const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
+ case 111:
+ w[27] = w[27] | 0x80000000;
+ break;
- switch (offset / 4)
- {
- case 0:
- w3[1] = __byte_perm (w3[1], w3[0], selector);
- w3[0] = __byte_perm (w3[0], w2[3], selector);
- w2[3] = __byte_perm (w2[3], w2[2], selector);
- w2[2] = __byte_perm (w2[2], w2[1], selector);
- w2[1] = __byte_perm (w2[1], w2[0], selector);
- w2[0] = __byte_perm (w2[0], w1[3], selector);
- w1[3] = __byte_perm (w1[3], w1[2], selector);
- w1[2] = __byte_perm (w1[2], w1[1], selector);
- w1[1] = __byte_perm (w1[1], w1[0], selector);
- w1[0] = __byte_perm (w1[0], w0[3], selector);
- w0[3] = __byte_perm (w0[3], w0[2], selector);
- w0[2] = __byte_perm (w0[2], w0[1], selector);
- w0[1] = __byte_perm (w0[1], w0[0], selector);
- w0[0] = __byte_perm (w0[0], 0, selector);
+ case 112:
+ w[28] = 0x80;
break;
- case 1:
- w3[1] = __byte_perm (w3[0], w2[3], selector);
- w3[0] = __byte_perm (w2[3], w2[2], selector);
- w2[3] = __byte_perm (w2[2], w2[1], selector);
- w2[2] = __byte_perm (w2[1], w2[0], selector);
- w2[1] = __byte_perm (w2[0], w1[3], selector);
- w2[0] = __byte_perm (w1[3], w1[2], selector);
- w1[3] = __byte_perm (w1[2], w1[1], selector);
- w1[2] = __byte_perm (w1[1], w1[0], selector);
- w1[1] = __byte_perm (w1[0], w0[3], selector);
- w1[0] = __byte_perm (w0[3], w0[2], selector);
- w0[3] = __byte_perm (w0[2], w0[1], selector);
- w0[2] = __byte_perm (w0[1], w0[0], selector);
- w0[1] = __byte_perm (w0[0], 0, selector);
- w0[0] = 0;
+ case 113:
+ w[28] = w[28] | 0x8000;
break;
- case 2:
- w3[1] = __byte_perm (w2[3], w2[2], selector);
- w3[0] = __byte_perm (w2[2], w2[1], selector);
- w2[3] = __byte_perm (w2[1], w2[0], selector);
- w2[2] = __byte_perm (w2[0], w1[3], selector);
- w2[1] = __byte_perm (w1[3], w1[2], selector);
- w2[0] = __byte_perm (w1[2], w1[1], selector);
- w1[3] = __byte_perm (w1[1], w1[0], selector);
- w1[2] = __byte_perm (w1[0], w0[3], selector);
- w1[1] = __byte_perm (w0[3], w0[2], selector);
- w1[0] = __byte_perm (w0[2], w0[1], selector);
- w0[3] = __byte_perm (w0[1], w0[0], selector);
- w0[2] = __byte_perm (w0[0], 0, selector);
- w0[1] = 0;
- w0[0] = 0;
+ case 114:
+ w[28] = w[28] | 0x800000;
break;
- case 3:
- w3[1] = __byte_perm (w2[2], w2[1], selector);
- w3[0] = __byte_perm (w2[1], w2[0], selector);
- w2[3] = __byte_perm (w2[0], w1[3], selector);
- w2[2] = __byte_perm (w1[3], w1[2], selector);
- w2[1] = __byte_perm (w1[2], w1[1], selector);
- w2[0] = __byte_perm (w1[1], w1[0], selector);
- w1[3] = __byte_perm (w1[0], w0[3], selector);
- w1[2] = __byte_perm (w0[3], w0[2], selector);
- w1[1] = __byte_perm (w0[2], w0[1], selector);
- w1[0] = __byte_perm (w0[1], w0[0], selector);
- w0[3] = __byte_perm (w0[0], 0, selector);
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 115:
+ w[28] = w[28] | 0x80000000;
break;
- case 4:
- w3[1] = __byte_perm (w2[1], w2[0], selector);
- w3[0] = __byte_perm (w2[0], w1[3], selector);
- w2[3] = __byte_perm (w1[3], w1[2], selector);
- w2[2] = __byte_perm (w1[2], w1[1], selector);
- w2[1] = __byte_perm (w1[1], w1[0], selector);
- w2[0] = __byte_perm (w1[0], w0[3], selector);
- w1[3] = __byte_perm (w0[3], w0[2], selector);
- w1[2] = __byte_perm (w0[2], w0[1], selector);
- w1[1] = __byte_perm (w0[1], w0[0], selector);
- w1[0] = __byte_perm (w0[0], 0, selector);
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 116:
+ w[29] = 0x80;
break;
- case 5:
- w3[1] = __byte_perm (w2[0], w1[3], selector);
- w3[0] = __byte_perm (w1[3], w1[2], selector);
- w2[3] = __byte_perm (w1[2], w1[1], selector);
- w2[2] = __byte_perm (w1[1], w1[0], selector);
- w2[1] = __byte_perm (w1[0], w0[3], selector);
- w2[0] = __byte_perm (w0[3], w0[2], selector);
- w1[3] = __byte_perm (w0[2], w0[1], selector);
- w1[2] = __byte_perm (w0[1], w0[0], selector);
- w1[1] = __byte_perm (w0[0], 0, selector);
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 117:
+ w[29] = w[29] | 0x8000;
break;
- case 6:
- w3[1] = __byte_perm (w1[3], w1[2], selector);
- w3[0] = __byte_perm (w1[2], w1[1], selector);
- w2[3] = __byte_perm (w1[1], w1[0], selector);
- w2[2] = __byte_perm (w1[0], w0[3], selector);
- w2[1] = __byte_perm (w0[3], w0[2], selector);
- w2[0] = __byte_perm (w0[2], w0[1], selector);
- w1[3] = __byte_perm (w0[1], w0[0], selector);
- w1[2] = __byte_perm (w0[0], 0, selector);
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 118:
+ w[29] = w[29] | 0x800000;
break;
- case 7:
- w3[1] = __byte_perm (w1[2], w1[1], selector);
- w3[0] = __byte_perm (w1[1], w1[0], selector);
- w2[3] = __byte_perm (w1[0], w0[3], selector);
- w2[2] = __byte_perm (w0[3], w0[2], selector);
- w2[1] = __byte_perm (w0[2], w0[1], selector);
- w2[0] = __byte_perm (w0[1], w0[0], selector);
- w1[3] = __byte_perm (w0[0], 0, selector);
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 119:
+ w[29] = w[29] | 0x80000000;
break;
- case 8:
- w3[1] = __byte_perm (w1[1], w1[0], selector);
- w3[0] = __byte_perm (w1[0], w0[3], selector);
- w2[3] = __byte_perm (w0[3], w0[2], selector);
- w2[2] = __byte_perm (w0[2], w0[1], selector);
- w2[1] = __byte_perm (w0[1], w0[0], selector);
- w2[0] = __byte_perm (w0[0], 0, selector);
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 120:
+ w[30] = 0x80;
break;
- case 9:
- w3[1] = __byte_perm (w1[0], w0[3], selector);
- w3[0] = __byte_perm (w0[3], w0[2], selector);
- w2[3] = __byte_perm (w0[2], w0[1], selector);
- w2[2] = __byte_perm (w0[1], w0[0], selector);
- w2[1] = __byte_perm (w0[0], 0, selector);
- w2[0] = 0;
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 121:
+ w[30] = w[30] | 0x8000;
break;
- case 10:
- w3[1] = __byte_perm (w0[3], w0[2], selector);
- w3[0] = __byte_perm (w0[2], w0[1], selector);
- w2[3] = __byte_perm (w0[1], w0[0], selector);
- w2[2] = __byte_perm (w0[0], 0, selector);
- w2[1] = 0;
- w2[0] = 0;
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 122:
+ w[30] = w[30] | 0x800000;
break;
- case 11:
- w3[1] = __byte_perm (w0[2], w0[1], selector);
- w3[0] = __byte_perm (w0[1], w0[0], selector);
- w2[3] = __byte_perm (w0[0], 0, selector);
- w2[2] = 0;
- w2[1] = 0;
- w2[0] = 0;
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 123:
+ w[30] = w[30] | 0x80000000;
break;
- case 12:
- w3[1] = __byte_perm (w0[1], w0[0], selector);
- w3[0] = __byte_perm (w0[0], 0, selector);
- w2[3] = 0;
- w2[2] = 0;
- w2[1] = 0;
- w2[0] = 0;
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 124:
+ w[31] = 0x80;
break;
- case 13:
- w3[1] = __byte_perm (w0[0], 0, selector);
- w3[0] = 0;
- w2[3] = 0;
- w2[2] = 0;
- w2[1] = 0;
- w2[0] = 0;
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 125:
+ w[31] = w[31] | 0x8000;
+ break;
+
+ case 126:
+ w[31] = w[31] | 0x800000;
+ break;
+
+ case 127:
+ w[31] = w[31] | 0x80000000;
break;
}
- #endif
}
+*/