- #ifdef IS_NV
- switch (p0)
- {
- case 0: tmp0 = __byte_perm_S (buf0[0], 0, 0x6540);
- break;
- case 1: tmp0 = __byte_perm_S (buf0[0], 0, 0x6541);
- break;
- case 2: tmp0 = __byte_perm_S (buf0[0], 0, 0x6542);
- break;
- case 3: tmp0 = __byte_perm_S (buf0[0], 0, 0x6543);
- break;
- case 4: tmp0 = __byte_perm_S (buf0[1], 0, 0x6540);
- break;
- case 5: tmp0 = __byte_perm_S (buf0[1], 0, 0x6541);
- break;
- case 6: tmp0 = __byte_perm_S (buf0[1], 0, 0x6542);
- break;
- case 7: tmp0 = __byte_perm_S (buf0[1], 0, 0x6543);
- break;
- case 8: tmp0 = __byte_perm_S (buf0[2], 0, 0x6540);
- break;
- case 9: tmp0 = __byte_perm_S (buf0[2], 0, 0x6541);
- break;
- case 10: tmp0 = __byte_perm_S (buf0[2], 0, 0x6542);
- break;
- case 11: tmp0 = __byte_perm_S (buf0[2], 0, 0x6543);
- break;
- case 12: tmp0 = __byte_perm_S (buf0[3], 0, 0x6540);
- break;
- case 13: tmp0 = __byte_perm_S (buf0[3], 0, 0x6541);
- break;
- case 14: tmp0 = __byte_perm_S (buf0[3], 0, 0x6542);
- break;
- case 15: tmp0 = __byte_perm_S (buf0[3], 0, 0x6543);
- break;
- case 16: tmp0 = __byte_perm_S (buf1[0], 0, 0x6540);
- break;
- case 17: tmp0 = __byte_perm_S (buf1[0], 0, 0x6541);
- break;
- case 18: tmp0 = __byte_perm_S (buf1[0], 0, 0x6542);
- break;
- case 19: tmp0 = __byte_perm_S (buf1[0], 0, 0x6543);
- break;
- case 20: tmp0 = __byte_perm_S (buf1[1], 0, 0x6540);
- break;
- case 21: tmp0 = __byte_perm_S (buf1[1], 0, 0x6541);
- break;
- case 22: tmp0 = __byte_perm_S (buf1[1], 0, 0x6542);
- break;
- case 23: tmp0 = __byte_perm_S (buf1[1], 0, 0x6543);
- break;
- case 24: tmp0 = __byte_perm_S (buf1[2], 0, 0x6540);
- break;
- case 25: tmp0 = __byte_perm_S (buf1[2], 0, 0x6541);
- break;
- case 26: tmp0 = __byte_perm_S (buf1[2], 0, 0x6542);
- break;
- case 27: tmp0 = __byte_perm_S (buf1[2], 0, 0x6543);
- break;
- case 28: tmp0 = __byte_perm_S (buf1[3], 0, 0x6540);
- break;
- case 29: tmp0 = __byte_perm_S (buf1[3], 0, 0x6541);
- break;
- case 30: tmp0 = __byte_perm_S (buf1[3], 0, 0x6542);
- break;
- case 31: tmp0 = __byte_perm_S (buf1[3], 0, 0x6543);
- break;
- }
-
- switch (p1)
- {
- case 0: tmp1 = __byte_perm_S (buf0[0], 0, 0x6540);
- buf0[0] = __byte_perm_S (tmp0, buf0[0], 0x7650);
- break;
- case 1: tmp1 = __byte_perm_S (buf0[0], 0, 0x6541);
- buf0[0] = __byte_perm_S (tmp0, buf0[0], 0x7604);
- break;
- case 2: tmp1 = __byte_perm_S (buf0[0], 0, 0x6542);
- buf0[0] = __byte_perm_S (tmp0, buf0[0], 0x7054);
- break;
- case 3: tmp1 = __byte_perm_S (buf0[0], 0, 0x6543);
- buf0[0] = __byte_perm_S (tmp0, buf0[0], 0x0654);
- break;
- case 4: tmp1 = __byte_perm_S (buf0[1], 0, 0x6540);
- buf0[1] = __byte_perm_S (tmp0, buf0[1], 0x7650);
- break;
- case 5: tmp1 = __byte_perm_S (buf0[1], 0, 0x6541);
- buf0[1] = __byte_perm_S (tmp0, buf0[1], 0x7604);
- break;
- case 6: tmp1 = __byte_perm_S (buf0[1], 0, 0x6542);
- buf0[1] = __byte_perm_S (tmp0, buf0[1], 0x7054);
- break;
- case 7: tmp1 = __byte_perm_S (buf0[1], 0, 0x6543);
- buf0[1] = __byte_perm_S (tmp0, buf0[1], 0x0654);
- break;
- case 8: tmp1 = __byte_perm_S (buf0[2], 0, 0x6540);
- buf0[2] = __byte_perm_S (tmp0, buf0[2], 0x7650);
- break;
- case 9: tmp1 = __byte_perm_S (buf0[2], 0, 0x6541);
- buf0[2] = __byte_perm_S (tmp0, buf0[2], 0x7604);
- break;
- case 10: tmp1 = __byte_perm_S (buf0[2], 0, 0x6542);
- buf0[2] = __byte_perm_S (tmp0, buf0[2], 0x7054);
- break;
- case 11: tmp1 = __byte_perm_S (buf0[2], 0, 0x6543);
- buf0[2] = __byte_perm_S (tmp0, buf0[2], 0x0654);
- break;
- case 12: tmp1 = __byte_perm_S (buf0[3], 0, 0x6540);
- buf0[3] = __byte_perm_S (tmp0, buf0[3], 0x7650);
- break;
- case 13: tmp1 = __byte_perm_S (buf0[3], 0, 0x6541);
- buf0[3] = __byte_perm_S (tmp0, buf0[3], 0x7604);
- break;
- case 14: tmp1 = __byte_perm_S (buf0[3], 0, 0x6542);
- buf0[3] = __byte_perm_S (tmp0, buf0[3], 0x7054);
- break;
- case 15: tmp1 = __byte_perm_S (buf0[3], 0, 0x6543);
- buf0[3] = __byte_perm_S (tmp0, buf0[3], 0x0654);
- break;
- case 16: tmp1 = __byte_perm_S (buf1[0], 0, 0x6540);
- buf1[0] = __byte_perm_S (tmp0, buf1[0], 0x7650);
- break;
- case 17: tmp1 = __byte_perm_S (buf1[0], 0, 0x6541);
- buf1[0] = __byte_perm_S (tmp0, buf1[0], 0x7604);
- break;
- case 18: tmp1 = __byte_perm_S (buf1[0], 0, 0x6542);
- buf1[0] = __byte_perm_S (tmp0, buf1[0], 0x7054);
- break;
- case 19: tmp1 = __byte_perm_S (buf1[0], 0, 0x6543);
- buf1[0] = __byte_perm_S (tmp0, buf1[0], 0x0654);
- break;
- case 20: tmp1 = __byte_perm_S (buf1[1], 0, 0x6540);
- buf1[1] = __byte_perm_S (tmp0, buf1[1], 0x7650);
- break;
- case 21: tmp1 = __byte_perm_S (buf1[1], 0, 0x6541);
- buf1[1] = __byte_perm_S (tmp0, buf1[1], 0x7604);
- break;
- case 22: tmp1 = __byte_perm_S (buf1[1], 0, 0x6542);
- buf1[1] = __byte_perm_S (tmp0, buf1[1], 0x7054);
- break;
- case 23: tmp1 = __byte_perm_S (buf1[1], 0, 0x6543);
- buf1[1] = __byte_perm_S (tmp0, buf1[1], 0x0654);
- break;
- case 24: tmp1 = __byte_perm_S (buf1[2], 0, 0x6540);
- buf1[2] = __byte_perm_S (tmp0, buf1[2], 0x7650);
- break;
- case 25: tmp1 = __byte_perm_S (buf1[2], 0, 0x6541);
- buf1[2] = __byte_perm_S (tmp0, buf1[2], 0x7604);
- break;
- case 26: tmp1 = __byte_perm_S (buf1[2], 0, 0x6542);
- buf1[2] = __byte_perm_S (tmp0, buf1[2], 0x7054);
- break;
- case 27: tmp1 = __byte_perm_S (buf1[2], 0, 0x6543);
- buf1[2] = __byte_perm_S (tmp0, buf1[2], 0x0654);
- break;
- case 28: tmp1 = __byte_perm_S (buf1[3], 0, 0x6540);
- buf1[3] = __byte_perm_S (tmp0, buf1[3], 0x7650);
- break;
- case 29: tmp1 = __byte_perm_S (buf1[3], 0, 0x6541);
- buf1[3] = __byte_perm_S (tmp0, buf1[3], 0x7604);
- break;
- case 30: tmp1 = __byte_perm_S (buf1[3], 0, 0x6542);
- buf1[3] = __byte_perm_S (tmp0, buf1[3], 0x7054);
- break;
- case 31: tmp1 = __byte_perm_S (buf1[3], 0, 0x6543);
- buf1[3] = __byte_perm_S (tmp0, buf1[3], 0x0654);
- break;
- }
-
- switch (p0)
- {
- case 0: buf0[0] = __byte_perm_S (tmp1, buf0[0], 0x7650);
- break;
- case 1: buf0[0] = __byte_perm_S (tmp1, buf0[0], 0x7604);
- break;
- case 2: buf0[0] = __byte_perm_S (tmp1, buf0[0], 0x7054);
- break;
- case 3: buf0[0] = __byte_perm_S (tmp1, buf0[0], 0x0654);
- break;
- case 4: buf0[1] = __byte_perm_S (tmp1, buf0[1], 0x7650);
- break;
- case 5: buf0[1] = __byte_perm_S (tmp1, buf0[1], 0x7604);
- break;
- case 6: buf0[1] = __byte_perm_S (tmp1, buf0[1], 0x7054);
- break;
- case 7: buf0[1] = __byte_perm_S (tmp1, buf0[1], 0x0654);
- break;
- case 8: buf0[2] = __byte_perm_S (tmp1, buf0[2], 0x7650);
- break;
- case 9: buf0[2] = __byte_perm_S (tmp1, buf0[2], 0x7604);
- break;
- case 10: buf0[2] = __byte_perm_S (tmp1, buf0[2], 0x7054);
- break;
- case 11: buf0[2] = __byte_perm_S (tmp1, buf0[2], 0x0654);
- break;
- case 12: buf0[3] = __byte_perm_S (tmp1, buf0[3], 0x7650);
- break;
- case 13: buf0[3] = __byte_perm_S (tmp1, buf0[3], 0x7604);
- break;
- case 14: buf0[3] = __byte_perm_S (tmp1, buf0[3], 0x7054);
- break;
- case 15: buf0[3] = __byte_perm_S (tmp1, buf0[3], 0x0654);
- break;
- case 16: buf1[0] = __byte_perm_S (tmp1, buf1[0], 0x7650);
- break;
- case 17: buf1[0] = __byte_perm_S (tmp1, buf1[0], 0x7604);
- break;
- case 18: buf1[0] = __byte_perm_S (tmp1, buf1[0], 0x7054);
- break;
- case 19: buf1[0] = __byte_perm_S (tmp1, buf1[0], 0x0654);
- break;
- case 20: buf1[1] = __byte_perm_S (tmp1, buf1[1], 0x7650);
- break;
- case 21: buf1[1] = __byte_perm_S (tmp1, buf1[1], 0x7604);
- break;
- case 22: buf1[1] = __byte_perm_S (tmp1, buf1[1], 0x7054);
- break;
- case 23: buf1[1] = __byte_perm_S (tmp1, buf1[1], 0x0654);
- break;
- case 24: buf1[2] = __byte_perm_S (tmp1, buf1[2], 0x7650);
- break;
- case 25: buf1[2] = __byte_perm_S (tmp1, buf1[2], 0x7604);
- break;
- case 26: buf1[2] = __byte_perm_S (tmp1, buf1[2], 0x7054);
- break;
- case 27: buf1[2] = __byte_perm_S (tmp1, buf1[2], 0x0654);
- break;
- case 28: buf1[3] = __byte_perm_S (tmp1, buf1[3], 0x7650);
- break;
- case 29: buf1[3] = __byte_perm_S (tmp1, buf1[3], 0x7604);
- break;
- case 30: buf1[3] = __byte_perm_S (tmp1, buf1[3], 0x7054);
- break;
- case 31: buf1[3] = __byte_perm_S (tmp1, buf1[3], 0x0654);
- break;
- }
- #endif
-
- #if defined IS_AMD || defined IS_GENERIC