#endif
}
-// before: append_0x01_1
static void append_0x01_1x4 (u32 w0[4], const u32 offset)
{
switch (offset)
}
}
-// before: append_0x01_2
static void append_0x01_2x4 (u32 w0[4], u32 w1[4], const u32 offset)
{
switch (offset)
}
}
-// before: append_0x01_3
static void append_0x01_3x4 (u32 w0[4], u32 w1[4], u32 w2[4], const u32 offset)
{
switch (offset)
}
}
-// before: append_0x01_4
static void append_0x01_4x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
{
switch (offset)
}
}
-// before: append_0x01_8
static void append_0x01_8x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const u32 offset)
{
switch (offset)
}
}
-// before: append_0x02_1
static void append_0x02_1x4 (u32 w0[4], const u32 offset)
{
switch (offset)
}
}
-// before: append_0x02_2
static void append_0x02_2x4 (u32 w0[4], u32 w1[4], const u32 offset)
{
switch (offset)
}
}
-// before: append_0x02_3
static void append_0x02_3x4 (u32 w0[4], u32 w1[4], u32 w2[4], const u32 offset)
{
switch (offset)
}
}
-// before: append_0x02_4
static void append_0x02_4x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
{
switch (offset)
}
}
-// before: append_0x02_8
static void append_0x02_8x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const u32 offset)
{
switch (offset)
}
}
-// before: append_0x80_1
static void append_0x80_1x4 (u32 w0[4], const u32 offset)
{
switch (offset)
}
}
-// before: append_0x80_2
static void append_0x80_2x4 (u32 w0[4], u32 w1[4], const u32 offset)
{
switch (offset)
}
}
-// before: append_0x80_3
static void append_0x80_3x4 (u32 w0[4], u32 w1[4], u32 w2[4], const u32 offset)
{
switch (offset)
}
}
-// before: append_0x80_4
static void append_0x80_4x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
{
switch (offset)
}
}
-// before: append_0x80_8
static void append_0x80_8x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const u32 offset)
{
switch (offset)
}
}
-// before: device_memcat2L
-static void memcat_c7_d1x2_sl1x2_sr1x2 (const u32 offset, u32 dst0[2], u32 src_l0[2], u32 src_r0[2])
+static void append_0x80_1x16 (u32 w[16], const u32 offset)
{
switch (offset)
{
- case 1:
- dst0[0] = src_l0[0] | src_r0[0] << 8;
- dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
- break;
-
- case 2:
- dst0[0] = src_l0[0] | src_r0[0] << 16;
- dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
- break;
-
- case 3:
- dst0[0] = src_l0[0] | src_r0[0] << 24;
- dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
- break;
-
- case 4:
- dst0[1] = src_r0[0];
- break;
-
- case 5:
- dst0[1] = src_l0[1] | src_r0[0] << 8;
- break;
-
- case 6:
- dst0[1] = src_l0[1] | src_r0[0] << 16;
- break;
-
- case 7:
- dst0[1] = src_l0[1] | src_r0[0] << 24;
+ case 0:
+ w[ 0] = 0x80;
break;
- }
-}
-// before: device_memcat4L
-static void memcat_c15_d1x4_sl1x4_sr1x4 (const u32 offset, u32 dst0[4], u32 src_l0[4], u32 src_r0[4])
-{
- switch (offset)
- {
case 1:
- dst0[0] = src_l0[0] | src_r0[0] << 8;
- dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8;
+ w[ 0] = w[ 0] | 0x8000;
break;
case 2:
- dst0[0] = src_l0[0] | src_r0[0] << 16;
- dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16;
+ w[ 0] = w[ 0] | 0x800000;
break;
case 3:
- dst0[0] = src_l0[0] | src_r0[0] << 24;
- dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24;
+ w[ 0] = w[ 0] | 0x80000000;
break;
case 4:
- dst0[1] = src_r0[0];
- dst0[2] = src_r0[1];
- dst0[3] = src_r0[2];
+ w[ 1] = 0x80;
break;
case 5:
- dst0[1] = src_l0[1] | src_r0[0] << 8;
- dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8;
+ w[ 1] = w[ 1] | 0x8000;
break;
case 6:
- dst0[1] = src_l0[1] | src_r0[0] << 16;
- dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16;
+ w[ 1] = w[ 1] | 0x800000;
break;
case 7:
- dst0[1] = src_l0[1] | src_r0[0] << 24;
- dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24;
+ w[ 1] = w[ 1] | 0x80000000;
break;
case 8:
- dst0[2] = src_r0[0];
- dst0[3] = src_r0[1];
+ w[ 2] = 0x80;
break;
case 9:
- dst0[2] = src_l0[2] | src_r0[0] << 8;
- dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8;
+ w[ 2] = w[ 2] | 0x8000;
break;
case 10:
- dst0[2] = src_l0[2] | src_r0[0] << 16;
- dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16;
+ w[ 2] = w[ 2] | 0x800000;
break;
case 11:
- dst0[2] = src_l0[2] | src_r0[0] << 24;
- dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24;
+ w[ 2] = w[ 2] | 0x80000000;
break;
case 12:
- dst0[3] = src_r0[0];
+ w[ 3] = 0x80;
break;
case 13:
- dst0[3] = src_l0[3] | src_r0[0] << 8;
+ w[ 3] = w[ 3] | 0x8000;
break;
case 14:
- dst0[3] = src_l0[3] | src_r0[0] << 16;
+ w[ 3] = w[ 3] | 0x800000;
break;
case 15:
- dst0[3] = src_l0[3] | src_r0[0] << 24;
+ w[ 3] = w[ 3] | 0x80000000;
break;
- }
-}
-// before: device_memcat8L
-static void memcat_c31_d2x4_sl2x4_sr1x4 (const u32 offset, u32 dst0[4], u32 dst1[4], u32 src_l0[4], u32 src_l1[4], u32 src_r0[4])
-{
- switch (offset)
- {
- case 1:
- dst0[0] = src_l0[0] | src_r0[0] << 8;
- dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8;
- dst1[0] = src_r0[3] >> 24;
+ case 16:
+ w[ 4] = 0x80;
break;
- case 2:
- dst0[0] = src_l0[0] | src_r0[0] << 16;
- dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16;
- dst1[0] = src_r0[3] >> 16;
+ case 17:
+ w[ 4] = w[ 4] | 0x8000;
break;
- case 3:
- dst0[0] = src_l0[0] | src_r0[0] << 24;
- dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24;
- dst1[0] = src_r0[3] >> 8;
+ case 18:
+ w[ 4] = w[ 4] | 0x800000;
break;
- case 4:
- dst0[1] = src_r0[0];
- dst0[2] = src_r0[1];
- dst0[3] = src_r0[2];
- dst1[0] = src_r0[3];
+ case 19:
+ w[ 4] = w[ 4] | 0x80000000;
break;
- case 5:
- dst0[1] = src_l0[1] | src_r0[0] << 8;
- dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8;
- dst1[1] = src_r0[3] >> 24;
+ case 20:
+ w[ 5] = 0x80;
break;
- case 6:
- dst0[1] = src_l0[1] | src_r0[0] << 16;
- dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16;
- dst1[1] = src_r0[3] >> 16;
+ case 21:
+ w[ 5] = w[ 5] | 0x8000;
break;
- case 7:
- dst0[1] = src_l0[1] | src_r0[0] << 24;
- dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24;
- dst1[1] = src_r0[3] >> 8;
+ case 22:
+ w[ 5] = w[ 5] | 0x800000;
break;
- case 8:
- dst0[2] = src_r0[0];
- dst0[3] = src_r0[1];
- dst1[0] = src_r0[2];
- dst1[1] = src_r0[3];
+ case 23:
+ w[ 5] = w[ 5] | 0x80000000;
break;
- case 9:
- dst0[2] = src_l0[2] | src_r0[0] << 8;
- dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8;
- dst1[2] = src_r0[3] >> 24;
+ case 24:
+ w[ 6] = 0x80;
break;
- case 10:
- dst0[2] = src_l0[2] | src_r0[0] << 16;
- dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16;
- dst1[2] = src_r0[3] >> 16;
+ case 25:
+ w[ 6] = w[ 6] | 0x8000;
break;
- case 11:
- dst0[2] = src_l0[2] | src_r0[0] << 24;
- dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24;
- dst1[2] = src_r0[3] >> 8;
+ case 26:
+ w[ 6] = w[ 6] | 0x800000;
break;
- case 12:
- dst0[3] = src_r0[0];
- dst1[0] = src_r0[1];
- dst1[1] = src_r0[2];
- dst1[2] = src_r0[3];
+ case 27:
+ w[ 6] = w[ 6] | 0x80000000;
break;
- case 13:
- dst0[3] = src_l0[3] | src_r0[0] << 8;
- dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8;
- dst1[3] = src_r0[3] >> 24;
+ case 28:
+ w[ 7] = 0x80;
break;
- case 14:
- dst0[3] = src_l0[3] | src_r0[0] << 16;
- dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16;
- dst1[3] = src_r0[3] >> 16;
+ case 29:
+ w[ 7] = w[ 7] | 0x8000;
break;
- case 15:
- dst0[3] = src_l0[3] | src_r0[0] << 24;
- dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24;
- dst1[3] = src_r0[3] >> 8;
+ case 30:
+ w[ 7] = w[ 7] | 0x800000;
break;
- case 16:
- dst1[0] = src_r0[0];
- dst1[1] = src_r0[1];
- dst1[2] = src_r0[2];
- dst1[3] = src_r0[3];
+ case 31:
+ w[ 7] = w[ 7] | 0x80000000;
break;
- case 17:
- dst1[0] = src_l1[0] | src_r0[0] << 8;
- dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8;
+ case 32:
+ w[ 8] = 0x80;
break;
- case 18:
- dst1[0] = src_l1[0] | src_r0[0] << 16;
- dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16;
+ case 33:
+ w[ 8] = w[ 8] | 0x8000;
break;
- case 19:
- dst1[0] = src_l1[0] | src_r0[0] << 24;
- dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24;
+ case 34:
+ w[ 8] = w[ 8] | 0x800000;
break;
- case 20:
- dst1[1] = src_r0[0];
- dst1[2] = src_r0[1];
- dst1[3] = src_r0[2];
+ case 35:
+ w[ 8] = w[ 8] | 0x80000000;
break;
- case 21:
- dst1[1] = src_l1[1] | src_r0[0] << 8;
- dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8;
- break;
-
- case 22:
- dst1[1] = src_l1[1] | src_r0[0] << 16;
- dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16;
- break;
-
- case 23:
- dst1[1] = src_l1[1] | src_r0[0] << 24;
- dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24;
+ case 36:
+ w[ 9] = 0x80;
break;
- case 24:
- dst1[2] = src_r0[0];
- dst1[3] = src_r0[1];
+ case 37:
+ w[ 9] = w[ 9] | 0x8000;
break;
- case 25:
- dst1[2] = src_l1[2] | src_r0[0] << 8;
- dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8;
+ case 38:
+ w[ 9] = w[ 9] | 0x800000;
break;
- case 26:
- dst1[2] = src_l1[2] | src_r0[0] << 16;
- dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16;
+ case 39:
+ w[ 9] = w[ 9] | 0x80000000;
break;
- case 27:
- dst1[2] = src_l1[2] | src_r0[0] << 24;
- dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24;
+ case 40:
+ w[10] = 0x80;
break;
- case 28:
- dst1[3] = src_r0[0];
+ case 41:
+ w[10] = w[10] | 0x8000;
break;
- case 29:
- dst1[3] = src_l1[3] | src_r0[0] << 8;
+ case 42:
+ w[10] = w[10] | 0x800000;
break;
- case 30:
- dst1[3] = src_l1[3] | src_r0[0] << 16;
+ case 43:
+ w[10] = w[10] | 0x80000000;
break;
- case 31:
- dst1[3] = src_l1[3] | src_r0[0] << 24;
+ case 44:
+ w[11] = 0x80;
break;
- }
-}
-// before: device_memcat12L
-static void memcat_c47_d3x4_sl3x4_sr1x4 (const u32 offset, u32 dst0[4], u32 dst1[4], u32 dst2[4], u32 src_l0[4], u32 src_l1[4], u32 src_l2[4], u32 src_r0[4])
-{
- switch (offset)
- {
- case 1:
- dst0[0] = src_l0[0] | src_r0[0] << 8;
- dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8;
- dst1[0] = src_r0[3] >> 24;
+ case 45:
+ w[11] = w[11] | 0x8000;
break;
- case 2:
- dst0[0] = src_l0[0] | src_r0[0] << 16;
- dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16;
- dst1[0] = src_r0[3] >> 16;
+ case 46:
+ w[11] = w[11] | 0x800000;
break;
- case 3:
- dst0[0] = src_l0[0] | src_r0[0] << 24;
- dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24;
- dst1[0] = src_r0[3] >> 8;
+ case 47:
+ w[11] = w[11] | 0x80000000;
break;
- case 4:
- dst0[1] = src_r0[0];
- dst0[2] = src_r0[1];
- dst0[3] = src_r0[2];
- dst1[0] = src_r0[3];
+ case 48:
+ w[12] = 0x80;
break;
- case 5:
- dst0[1] = src_l0[1] | src_r0[0] << 8;
- dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8;
- dst1[1] = src_r0[3] >> 24;
+ case 49:
+ w[12] = w[12] | 0x8000;
break;
- case 6:
- dst0[1] = src_l0[1] | src_r0[0] << 16;
- dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16;
- dst1[1] = src_r0[3] >> 16;
+ case 50:
+ w[12] = w[12] | 0x800000;
break;
- case 7:
- dst0[1] = src_l0[1] | src_r0[0] << 24;
- dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24;
- dst1[1] = src_r0[3] >> 8;
+ case 51:
+ w[12] = w[12] | 0x80000000;
break;
- case 8:
- dst0[2] = src_r0[0];
- dst0[3] = src_r0[1];
- dst1[0] = src_r0[2];
- dst1[1] = src_r0[3];
+ case 52:
+ w[13] = 0x80;
break;
- case 9:
- dst0[2] = src_l0[2] | src_r0[0] << 8;
- dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8;
- dst1[2] = src_r0[3] >> 24;
+ case 53:
+ w[13] = w[13] | 0x8000;
break;
- case 10:
- dst0[2] = src_l0[2] | src_r0[0] << 16;
- dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16;
- dst1[2] = src_r0[3] >> 16;
+ case 54:
+ w[13] = w[13] | 0x800000;
break;
- case 11:
- dst0[2] = src_l0[2] | src_r0[0] << 24;
- dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24;
- dst1[2] = src_r0[3] >> 8;
+ case 55:
+ w[13] = w[13] | 0x80000000;
break;
- case 12:
- dst0[3] = src_r0[0];
- dst1[0] = src_r0[1];
- dst1[1] = src_r0[2];
- dst1[2] = src_r0[3];
+ case 56:
+ w[14] = 0x80;
break;
- case 13:
- dst0[3] = src_l0[3] | src_r0[0] << 8;
- dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8;
- dst1[3] = src_r0[3] >> 24;
+ case 57:
+ w[14] = w[14] | 0x8000;
break;
- case 14:
- dst0[3] = src_l0[3] | src_r0[0] << 16;
- dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16;
- dst1[3] = src_r0[3] >> 16;
+ case 58:
+ w[14] = w[14] | 0x800000;
break;
- case 15:
- dst0[3] = src_l0[3] | src_r0[0] << 24;
- dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24;
- dst1[3] = src_r0[3] >> 8;
+ case 59:
+ w[14] = w[14] | 0x80000000;
break;
- case 16:
- dst1[0] = src_r0[0];
- dst1[1] = src_r0[1];
- dst1[2] = src_r0[2];
- dst1[3] = src_r0[3];
+ case 60:
+ w[15] = 0x80;
break;
- case 17:
- dst1[0] = src_l1[0] | src_r0[0] << 8;
- dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8;
- dst2[0] = src_r0[3] >> 24;
+ case 61:
+ w[15] = w[15] | 0x8000;
break;
- case 18:
- dst1[0] = src_l1[0] | src_r0[0] << 16;
- dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16;
- dst2[0] = src_r0[3] >> 16;
+ case 62:
+ w[15] = w[15] | 0x800000;
break;
- case 19:
- dst1[0] = src_l1[0] | src_r0[0] << 24;
- dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24;
- dst2[0] = src_r0[3] >> 8;
+ case 63:
+ w[15] = w[15] | 0x80000000;
break;
+ }
+}
- case 20:
- dst1[1] = src_r0[0];
- dst1[2] = src_r0[1];
- dst1[3] = src_r0[2];
- dst2[0] = src_r0[3];
- break;
+static void switch_buffer_by_offset (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
+{
+ #ifdef IS_AMD
+ const int offset_mod_4 = offset & 3;
- case 21:
- dst1[1] = src_l1[1] | src_r0[0] << 8;
- dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst2[0] = src_r0[2] >> 24 | src_r0[3] << 8;
- dst2[1] = src_r0[3] >> 24;
- break;
+ const int offset_minus_4 = 4 - offset;
- case 22:
- dst1[1] = src_l1[1] | src_r0[0] << 16;
- dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst2[0] = src_r0[2] >> 16 | src_r0[3] << 16;
- dst2[1] = src_r0[3] >> 16;
- break;
+ switch (offset / 4)
+ {
+ case 0:
+ w3[2] = amd_bytealign ( 0, w3[1], offset_minus_4);
+ w3[1] = amd_bytealign (w3[1], w3[0], offset_minus_4);
+ w3[0] = amd_bytealign (w3[0], w2[3], offset_minus_4);
+ w2[3] = amd_bytealign (w2[3], w2[2], offset_minus_4);
+ w2[2] = amd_bytealign (w2[2], w2[1], offset_minus_4);
+ w2[1] = amd_bytealign (w2[1], w2[0], offset_minus_4);
+ w2[0] = amd_bytealign (w2[0], w1[3], offset_minus_4);
+ w1[3] = amd_bytealign (w1[3], w1[2], offset_minus_4);
+ w1[2] = amd_bytealign (w1[2], w1[1], offset_minus_4);
+ w1[1] = amd_bytealign (w1[1], w1[0], offset_minus_4);
+ w1[0] = amd_bytealign (w1[0], w0[3], offset_minus_4);
+ w0[3] = amd_bytealign (w0[3], w0[2], offset_minus_4);
+ w0[2] = amd_bytealign (w0[2], w0[1], offset_minus_4);
+ w0[1] = amd_bytealign (w0[1], w0[0], offset_minus_4);
+ w0[0] = amd_bytealign (w0[0], 0, offset_minus_4);
- case 23:
- dst1[1] = src_l1[1] | src_r0[0] << 24;
- dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst2[0] = src_r0[2] >> 8 | src_r0[3] << 24;
- dst2[1] = src_r0[3] >> 8;
- break;
+ if (offset_mod_4 == 0)
+ {
+ w0[0] = w0[1];
+ w0[1] = w0[2];
+ w0[2] = w0[3];
+ w0[3] = w1[0];
+ w1[0] = w1[1];
+ w1[1] = w1[2];
+ w1[2] = w1[3];
+ w1[3] = w2[0];
+ w2[0] = w2[1];
+ w2[1] = w2[2];
+ w2[2] = w2[3];
+ w2[3] = w3[0];
+ w3[0] = w3[1];
+ w3[1] = w3[2];
+ w3[2] = 0;
+ }
- case 24:
- dst1[2] = src_r0[0];
- dst1[3] = src_r0[1];
- dst2[0] = src_r0[2];
- dst2[1] = src_r0[3];
break;
- case 25:
- dst1[2] = src_l1[2] | src_r0[0] << 8;
- dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst2[0] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst2[1] = src_r0[2] >> 24 | src_r0[3] << 8;
- dst2[2] = src_r0[3] >> 24;
- break;
+ case 1:
+ w3[2] = amd_bytealign ( 0, w3[0], offset_minus_4);
+ w3[1] = amd_bytealign (w3[0], w2[3], offset_minus_4);
+ w3[0] = amd_bytealign (w2[3], w2[2], offset_minus_4);
+ w2[3] = amd_bytealign (w2[2], w2[1], offset_minus_4);
+ w2[2] = amd_bytealign (w2[1], w2[0], offset_minus_4);
+ w2[1] = amd_bytealign (w2[0], w1[3], offset_minus_4);
+ w2[0] = amd_bytealign (w1[3], w1[2], offset_minus_4);
+ w1[3] = amd_bytealign (w1[2], w1[1], offset_minus_4);
+ w1[2] = amd_bytealign (w1[1], w1[0], offset_minus_4);
+ w1[1] = amd_bytealign (w1[0], w0[3], offset_minus_4);
+ w1[0] = amd_bytealign (w0[3], w0[2], offset_minus_4);
+ w0[3] = amd_bytealign (w0[2], w0[1], offset_minus_4);
+ w0[2] = amd_bytealign (w0[1], w0[0], offset_minus_4);
+ w0[1] = amd_bytealign (w0[0], 0, offset_minus_4);
+ w0[0] = 0;
- case 26:
- dst1[2] = src_l1[2] | src_r0[0] << 16;
- dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst2[0] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst2[1] = src_r0[2] >> 16 | src_r0[3] << 16;
- dst2[2] = src_r0[3] >> 16;
- break;
+ if (offset_mod_4 == 0)
+ {
+ w0[1] = w0[2];
+ w0[2] = w0[3];
+ w0[3] = w1[0];
+ w1[0] = w1[1];
+ w1[1] = w1[2];
+ w1[2] = w1[3];
+ w1[3] = w2[0];
+ w2[0] = w2[1];
+ w2[1] = w2[2];
+ w2[2] = w2[3];
+ w2[3] = w3[0];
+ w3[0] = w3[1];
+ w3[1] = w3[2];
+ w3[2] = 0;
+ }
- case 27:
- dst1[2] = src_l1[2] | src_r0[0] << 24;
- dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst2[0] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst2[1] = src_r0[2] >> 8 | src_r0[3] << 24;
- dst2[2] = src_r0[3] >> 8;
break;
- case 28:
- dst1[3] = src_r0[0];
- dst2[0] = src_r0[1];
- dst2[1] = src_r0[2];
- dst2[2] = src_r0[3];
- break;
+ case 2:
+ w3[2] = amd_bytealign ( 0, w2[3], offset_minus_4);
+ w3[1] = amd_bytealign (w2[3], w2[2], offset_minus_4);
+ w3[0] = amd_bytealign (w2[2], w2[1], offset_minus_4);
+ w2[3] = amd_bytealign (w2[1], w2[0], offset_minus_4);
+ w2[2] = amd_bytealign (w2[0], w1[3], offset_minus_4);
+ w2[1] = amd_bytealign (w1[3], w1[2], offset_minus_4);
+ w2[0] = amd_bytealign (w1[2], w1[1], offset_minus_4);
+ w1[3] = amd_bytealign (w1[1], w1[0], offset_minus_4);
+ w1[2] = amd_bytealign (w1[0], w0[3], offset_minus_4);
+ w1[1] = amd_bytealign (w0[3], w0[2], offset_minus_4);
+ w1[0] = amd_bytealign (w0[2], w0[1], offset_minus_4);
+ w0[3] = amd_bytealign (w0[1], w0[0], offset_minus_4);
+ w0[2] = amd_bytealign (w0[0], 0, offset_minus_4);
+ w0[1] = 0;
+ w0[0] = 0;
- case 29:
- dst1[3] = src_l1[3] | src_r0[0] << 8;
- dst2[0] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst2[1] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst2[2] = src_r0[2] >> 24 | src_r0[3] << 8;
- dst2[3] = src_r0[3] >> 24;
- break;
+ if (offset_mod_4 == 0)
+ {
+ w0[2] = w0[3];
+ w0[3] = w1[0];
+ w1[0] = w1[1];
+ w1[1] = w1[2];
+ w1[2] = w1[3];
+ w1[3] = w2[0];
+ w2[0] = w2[1];
+ w2[1] = w2[2];
+ w2[2] = w2[3];
+ w2[3] = w3[0];
+ w3[0] = w3[1];
+ w3[1] = w3[2];
+ w3[2] = 0;
+ }
- case 30:
- dst1[3] = src_l1[3] | src_r0[0] << 16;
- dst2[0] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst2[1] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst2[2] = src_r0[2] >> 16 | src_r0[3] << 16;
- dst2[3] = src_r0[3] >> 16;
break;
- case 31:
- dst1[3] = src_l1[3] | src_r0[0] << 24;
- dst2[0] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst2[1] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst2[2] = src_r0[2] >> 8 | src_r0[3] << 24;
- dst2[3] = src_r0[3] >> 8;
- break;
+ case 3:
+ w3[2] = amd_bytealign ( 0, w2[2], offset_minus_4);
+ w3[1] = amd_bytealign (w2[2], w2[1], offset_minus_4);
+ w3[0] = amd_bytealign (w2[1], w2[0], offset_minus_4);
+ w2[3] = amd_bytealign (w2[0], w1[3], offset_minus_4);
+ w2[2] = amd_bytealign (w1[3], w1[2], offset_minus_4);
+ w2[1] = amd_bytealign (w1[2], w1[1], offset_minus_4);
+ w2[0] = amd_bytealign (w1[1], w1[0], offset_minus_4);
+ w1[3] = amd_bytealign (w1[0], w0[3], offset_minus_4);
+ w1[2] = amd_bytealign (w0[3], w0[2], offset_minus_4);
+ w1[1] = amd_bytealign (w0[2], w0[1], offset_minus_4);
+ w1[0] = amd_bytealign (w0[1], w0[0], offset_minus_4);
+ w0[3] = amd_bytealign (w0[0], 0, offset_minus_4);
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
- case 32:
- dst2[0] = src_r0[0];
- dst2[1] = src_r0[1];
- dst2[2] = src_r0[2];
- dst2[3] = src_r0[3];
- break;
+ if (offset_mod_4 == 0)
+ {
+ w0[3] = w1[0];
+ w1[0] = w1[1];
+ w1[1] = w1[2];
+ w1[2] = w1[3];
+ w1[3] = w2[0];
+ w2[0] = w2[1];
+ w2[1] = w2[2];
+ w2[2] = w2[3];
+ w2[3] = w3[0];
+ w3[0] = w3[1];
+ w3[1] = w3[2];
+ w3[2] = 0;
+ }
- case 33:
- dst2[0] = src_l2[0] | src_r0[0] << 8;
- dst2[1] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst2[2] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst2[3] = src_r0[2] >> 24 | src_r0[3] << 8;
break;
- case 34:
- dst2[0] = src_l2[0] | src_r0[0] << 16;
- dst2[1] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst2[2] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst2[3] = src_r0[2] >> 16 | src_r0[3] << 16;
- break;
+ case 4:
+ w3[2] = amd_bytealign ( 0, w2[1], offset_minus_4);
+ w3[1] = amd_bytealign (w2[1], w2[0], offset_minus_4);
+ w3[0] = amd_bytealign (w2[0], w1[3], offset_minus_4);
+ w2[3] = amd_bytealign (w1[3], w1[2], offset_minus_4);
+ w2[2] = amd_bytealign (w1[2], w1[1], offset_minus_4);
+ w2[1] = amd_bytealign (w1[1], w1[0], offset_minus_4);
+ w2[0] = amd_bytealign (w1[0], w0[3], offset_minus_4);
+ w1[3] = amd_bytealign (w0[3], w0[2], offset_minus_4);
+ w1[2] = amd_bytealign (w0[2], w0[1], offset_minus_4);
+ w1[1] = amd_bytealign (w0[1], w0[0], offset_minus_4);
+ w1[0] = amd_bytealign (w0[0], 0, offset_minus_4);
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
- case 35:
- dst2[0] = src_l2[0] | src_r0[0] << 24;
- dst2[1] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst2[2] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst2[3] = src_r0[2] >> 8 | src_r0[3] << 24;
- break;
+ if (offset_mod_4 == 0)
+ {
+ w1[0] = w1[1];
+ w1[1] = w1[2];
+ w1[2] = w1[3];
+ w1[3] = w2[0];
+ w2[0] = w2[1];
+ w2[1] = w2[2];
+ w2[2] = w2[3];
+ w2[3] = w3[0];
+ w3[0] = w3[1];
+ w3[1] = w3[2];
+ w3[2] = 0;
+ }
- case 36:
- dst2[1] = src_r0[0];
- dst2[2] = src_r0[1];
- dst2[3] = src_r0[2];
break;
- case 37:
- dst2[1] = src_l2[1] | src_r0[0] << 8;
- dst2[2] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst2[3] = src_r0[1] >> 24 | src_r0[2] << 8;
- break;
+ case 5:
+ w3[2] = amd_bytealign ( 0, w2[0], offset_minus_4);
+ w3[1] = amd_bytealign (w2[0], w1[3], offset_minus_4);
+ w3[0] = amd_bytealign (w1[3], w1[2], offset_minus_4);
+ w2[3] = amd_bytealign (w1[2], w1[1], offset_minus_4);
+ w2[2] = amd_bytealign (w1[1], w1[0], offset_minus_4);
+ w2[1] = amd_bytealign (w1[0], w0[3], offset_minus_4);
+ w2[0] = amd_bytealign (w0[3], w0[2], offset_minus_4);
+ w1[3] = amd_bytealign (w0[2], w0[1], offset_minus_4);
+ w1[2] = amd_bytealign (w0[1], w0[0], offset_minus_4);
+ w1[1] = amd_bytealign (w0[0], 0, offset_minus_4);
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
- case 38:
- dst2[1] = src_l2[1] | src_r0[0] << 16;
- dst2[2] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst2[3] = src_r0[1] >> 16 | src_r0[2] << 16;
- break;
+ if (offset_mod_4 == 0)
+ {
+ w1[1] = w1[2];
+ w1[2] = w1[3];
+ w1[3] = w2[0];
+ w2[0] = w2[1];
+ w2[1] = w2[2];
+ w2[2] = w2[3];
+ w2[3] = w3[0];
+ w3[0] = w3[1];
+ w3[1] = w3[2];
+ w3[2] = 0;
+ }
- case 39:
- dst2[1] = src_l2[1] | src_r0[0] << 24;
- dst2[2] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst2[3] = src_r0[1] >> 8 | src_r0[2] << 24;
break;
- case 40:
- dst2[2] = src_r0[0];
- dst2[3] = src_r0[1];
- break;
+ case 6:
+ w3[2] = amd_bytealign ( 0, w1[3], offset_minus_4);
+ w3[1] = amd_bytealign (w1[3], w1[2], offset_minus_4);
+ w3[0] = amd_bytealign (w1[2], w1[1], offset_minus_4);
+ w2[3] = amd_bytealign (w1[1], w1[0], offset_minus_4);
+ w2[2] = amd_bytealign (w1[0], w0[3], offset_minus_4);
+ w2[1] = amd_bytealign (w0[3], w0[2], offset_minus_4);
+ w2[0] = amd_bytealign (w0[2], w0[1], offset_minus_4);
+ w1[3] = amd_bytealign (w0[1], w0[0], offset_minus_4);
+ w1[2] = amd_bytealign (w0[0], 0, offset_minus_4);
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
- case 41:
- dst2[2] = src_l2[2] | src_r0[0] << 8;
- dst2[3] = src_r0[0] >> 24 | src_r0[1] << 8;
- break;
+ if (offset_mod_4 == 0)
+ {
+ w1[2] = w1[3];
+ w1[3] = w2[0];
+ w2[0] = w2[1];
+ w2[1] = w2[2];
+ w2[2] = w2[3];
+ w2[3] = w3[0];
+ w3[0] = w3[1];
+ w3[1] = w3[2];
+ w3[2] = 0;
+ }
- case 42:
- dst2[2] = src_l2[2] | src_r0[0] << 16;
- dst2[3] = src_r0[0] >> 16 | src_r0[1] << 16;
break;
- case 43:
- dst2[2] = src_l2[2] | src_r0[0] << 24;
- dst2[3] = src_r0[0] >> 8 | src_r0[1] << 24;
+ case 7:
+ w3[2] = amd_bytealign ( 0, w1[2], offset_minus_4);
+ w3[1] = amd_bytealign (w1[2], w1[1], offset_minus_4);
+ w3[0] = amd_bytealign (w1[1], w1[0], offset_minus_4);
+ w2[3] = amd_bytealign (w1[0], w0[3], offset_minus_4);
+ w2[2] = amd_bytealign (w0[3], w0[2], offset_minus_4);
+ w2[1] = amd_bytealign (w0[2], w0[1], offset_minus_4);
+ w2[0] = amd_bytealign (w0[1], w0[0], offset_minus_4);
+ w1[3] = amd_bytealign (w0[0], 0, offset_minus_4);
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
+
+ if (offset_mod_4 == 0)
+ {
+ w1[3] = w2[0];
+ w2[0] = w2[1];
+ w2[1] = w2[2];
+ w2[2] = w2[3];
+ w2[3] = w3[0];
+ w3[0] = w3[1];
+ w3[1] = w3[2];
+ w3[2] = 0;
+ }
+
break;
- case 44:
- dst2[3] = src_r0[0];
+ case 8:
+ w3[2] = amd_bytealign ( 0, w1[1], offset_minus_4);
+ w3[1] = amd_bytealign (w1[1], w1[0], offset_minus_4);
+ w3[0] = amd_bytealign (w1[0], w0[3], offset_minus_4);
+ w2[3] = amd_bytealign (w0[3], w0[2], offset_minus_4);
+ w2[2] = amd_bytealign (w0[2], w0[1], offset_minus_4);
+ w2[1] = amd_bytealign (w0[1], w0[0], offset_minus_4);
+ w2[0] = amd_bytealign (w0[0], 0, offset_minus_4);
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
+
+ if (offset_mod_4 == 0)
+ {
+ w2[0] = w2[1];
+ w2[1] = w2[2];
+ w2[2] = w2[3];
+ w2[3] = w3[0];
+ w3[0] = w3[1];
+ w3[1] = w3[2];
+ w3[2] = 0;
+ }
+
break;
- case 45:
- dst2[3] = src_l2[3] | src_r0[0] << 8;
+ case 9:
+ w3[2] = amd_bytealign ( 0, w1[0], offset_minus_4);
+ w3[1] = amd_bytealign (w1[0], w0[3], offset_minus_4);
+ w3[0] = amd_bytealign (w0[3], w0[2], offset_minus_4);
+ w2[3] = amd_bytealign (w0[2], w0[1], offset_minus_4);
+ w2[2] = amd_bytealign (w0[1], w0[0], offset_minus_4);
+ w2[1] = amd_bytealign (w0[0], 0, offset_minus_4);
+ w2[0] = 0;
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
+
+ if (offset_mod_4 == 0)
+ {
+ w2[1] = w2[2];
+ w2[2] = w2[3];
+ w2[3] = w3[0];
+ w3[0] = w3[1];
+ w3[1] = w3[2];
+ w3[2] = 0;
+ }
+
break;
- case 46:
- dst2[3] = src_l2[3] | src_r0[0] << 16;
+ case 10:
+ w3[2] = amd_bytealign ( 0, w0[3], offset_minus_4);
+ w3[1] = amd_bytealign (w0[3], w0[2], offset_minus_4);
+ w3[0] = amd_bytealign (w0[2], w0[1], offset_minus_4);
+ w2[3] = amd_bytealign (w0[1], w0[0], offset_minus_4);
+ w2[2] = amd_bytealign (w0[0], 0, offset_minus_4);
+ w2[1] = 0;
+ w2[0] = 0;
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
+
+ if (offset_mod_4 == 0)
+ {
+ w2[2] = w2[3];
+ w2[3] = w3[0];
+ w3[0] = w3[1];
+ w3[1] = w3[2];
+ w3[2] = 0;
+ }
+
break;
- case 47:
- dst2[3] = src_l2[3] | src_r0[0] << 24;
- break;
- }
-}
+ case 11:
+ w3[2] = amd_bytealign ( 0, w0[2], offset_minus_4);
+ w3[1] = amd_bytealign (w0[2], w0[1], offset_minus_4);
+ w3[0] = amd_bytealign (w0[1], w0[0], offset_minus_4);
+ w2[3] = amd_bytealign (w0[0], 0, offset_minus_4);
+ w2[2] = 0;
+ w2[1] = 0;
+ w2[0] = 0;
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
+
+ if (offset_mod_4 == 0)
+ {
+ w2[3] = w3[0];
+ w3[0] = w3[1];
+ w3[1] = w3[2];
+ w3[2] = 0;
+ }
-// before: device_memcat12L
-static void memcat_c47_d3x4_sl3x4_sr2x4 (const u32 offset, u32 dst0[4], u32 dst1[4], u32 dst2[4], u32 src_l0[4], u32 src_l1[4], u32 src_l2[4], u32 src_r0[4], u32 src_r1[4])
-{
- switch (offset)
- {
- case 0:
- dst0[0] = src_r0[0];
- dst0[1] = src_r0[1];
- dst0[2] = src_r0[2];
- dst0[3] = src_r0[3];
- dst1[0] = src_r1[0];
- dst1[1] = src_r1[1];
- dst1[2] = src_r1[2];
- dst1[3] = src_r1[3];
break;
- case 1:
- dst0[0] = src_l0[0] | src_r0[0] << 8;
- dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8;
- dst1[0] = src_r0[3] >> 24 | src_r1[0] << 8;
- dst1[1] = src_r1[0] >> 24 | src_r1[1] << 8;
- dst1[2] = src_r1[1] >> 24 | src_r1[2] << 8;
- dst1[3] = src_r1[2] >> 24 | src_r1[3] << 8;
- dst2[0] = src_r1[3] >> 24;
- break;
+ case 12:
+ w3[2] = amd_bytealign ( 0, w0[1], offset_minus_4);
+ w3[1] = amd_bytealign (w0[1], w0[0], offset_minus_4);
+ w3[0] = amd_bytealign (w0[0], 0, offset_minus_4);
+ w2[3] = 0;
+ w2[2] = 0;
+ w2[1] = 0;
+ w2[0] = 0;
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
- case 2:
- dst0[0] = src_l0[0] | src_r0[0] << 16;
- dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16;
- dst1[0] = src_r0[3] >> 16 | src_r1[0] << 16;
- dst1[1] = src_r1[0] >> 16 | src_r1[1] << 16;
- dst1[2] = src_r1[1] >> 16 | src_r1[2] << 16;
- dst1[3] = src_r1[2] >> 16 | src_r1[3] << 16;
- dst2[0] = src_r1[3] >> 16;
- break;
+ if (offset_mod_4 == 0)
+ {
+ w3[0] = w3[1];
+ w3[1] = w3[2];
+ w3[2] = 0;
+ }
- case 3:
- dst0[0] = src_l0[0] | src_r0[0] << 24;
- dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24;
- dst1[0] = src_r0[3] >> 8 | src_r1[0] << 24;
- dst1[1] = src_r1[0] >> 8 | src_r1[1] << 24;
- dst1[2] = src_r1[1] >> 8 | src_r1[2] << 24;
- dst1[3] = src_r1[2] >> 8 | src_r1[3] << 24;
- dst2[0] = src_r1[3] >> 8;
break;
- case 4:
- dst0[1] = src_r0[0];
- dst0[2] = src_r0[1];
- dst0[3] = src_r0[2];
- dst1[0] = src_r0[3];
- dst1[1] = src_r1[0];
- dst1[2] = src_r1[1];
- dst1[3] = src_r1[2];
- dst2[0] = src_r1[3];
- break;
+ case 13:
+ w3[2] = amd_bytealign ( 0, w0[0], offset_minus_4);
+ w3[1] = amd_bytealign (w0[0], 0, offset_minus_4);
+ w3[0] = 0;
+ w2[3] = 0;
+ w2[2] = 0;
+ w2[1] = 0;
+ w2[0] = 0;
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
- case 5:
- dst0[1] = src_l0[1] | src_r0[0] << 8;
- dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8;
- dst1[1] = src_r0[3] >> 24 | src_r1[0] << 8;
- dst1[2] = src_r1[0] >> 24 | src_r1[1] << 8;
- dst1[3] = src_r1[1] >> 24 | src_r1[2] << 8;
- dst2[0] = src_r1[2] >> 24 | src_r1[3] << 8;
- dst2[1] = src_r1[3] >> 24;
- break;
+ if (offset_mod_4 == 0)
+ {
+ w3[1] = w3[2];
+ w3[2] = 0;
+ }
- case 6:
- dst0[1] = src_l0[1] | src_r0[0] << 16;
- dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16;
- dst1[1] = src_r0[3] >> 16 | src_r1[0] << 16;
- dst1[2] = src_r1[0] >> 16 | src_r1[1] << 16;
- dst1[3] = src_r1[1] >> 16 | src_r1[2] << 16;
- dst2[0] = src_r1[2] >> 16 | src_r1[3] << 16;
- dst2[1] = src_r1[3] >> 16;
break;
+ }
+ #endif
- case 7:
- dst0[1] = src_l0[1] | src_r0[0] << 24;
- dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24;
- dst1[1] = src_r0[3] >> 8 | src_r1[0] << 24;
- dst1[2] = src_r1[0] >> 8 | src_r1[1] << 24;
- dst1[3] = src_r1[1] >> 8 | src_r1[2] << 24;
- dst2[0] = src_r1[2] >> 8 | src_r1[3] << 24;
- dst2[1] = src_r1[3] >> 8;
- break;
+ #ifdef IS_NV
+ const int offset_minus_4 = 4 - (offset % 4);
- case 8:
- dst0[2] = src_r0[0];
- dst0[3] = src_r0[1];
- dst1[0] = src_r0[2];
- dst1[1] = src_r0[3];
- dst1[2] = src_r1[0];
- dst1[3] = src_r1[1];
- dst2[0] = src_r1[2];
- dst2[1] = src_r1[3];
- break;
+ const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
- case 9:
- dst0[2] = src_l0[2] | src_r0[0] << 8;
- dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8;
- dst1[2] = src_r0[3] >> 24 | src_r1[0] << 8;
- dst1[3] = src_r1[0] >> 24 | src_r1[1] << 8;
- dst2[0] = src_r1[1] >> 24 | src_r1[2] << 8;
- dst2[1] = src_r1[2] >> 24 | src_r1[3] << 8;
- dst2[2] = src_r1[3] >> 24;
- break;
+ switch (offset / 4)
+ {
+ case 0:
+ w3[1] = __byte_perm (w3[0], w3[1], selector);
+ w3[0] = __byte_perm (w2[3], w3[0], selector);
+ w2[3] = __byte_perm (w2[2], w2[3], selector);
+ w2[2] = __byte_perm (w2[1], w2[2], selector);
+ w2[1] = __byte_perm (w2[0], w2[1], selector);
+ w2[0] = __byte_perm (w1[3], w2[0], selector);
+ w1[3] = __byte_perm (w1[2], w1[3], selector);
+ w1[2] = __byte_perm (w1[1], w1[2], selector);
+ w1[1] = __byte_perm (w1[0], w1[1], selector);
+ w1[0] = __byte_perm (w0[3], w1[0], selector);
+ w0[3] = __byte_perm (w0[2], w0[3], selector);
+ w0[2] = __byte_perm (w0[1], w0[2], selector);
+ w0[1] = __byte_perm (w0[0], w0[1], selector);
+ w0[0] = __byte_perm ( 0, w0[0], selector);
- case 10:
- dst0[2] = src_l0[2] | src_r0[0] << 16;
- dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16;
- dst1[2] = src_r0[3] >> 16 | src_r1[0] << 16;
- dst1[3] = src_r1[0] >> 16 | src_r1[1] << 16;
- dst2[0] = src_r1[1] >> 16 | src_r1[2] << 16;
- dst2[1] = src_r1[2] >> 16 | src_r1[3] << 16;
- dst2[2] = src_r1[3] >> 16;
break;
- case 11:
- dst0[2] = src_l0[2] | src_r0[0] << 24;
- dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24;
- dst1[2] = src_r0[3] >> 8 | src_r1[0] << 24;
- dst1[3] = src_r1[0] >> 8 | src_r1[1] << 24;
- dst2[0] = src_r1[1] >> 8 | src_r1[2] << 24;
- dst2[1] = src_r1[2] >> 8 | src_r1[3] << 24;
- dst2[2] = src_r1[3] >> 8;
- break;
+ case 1:
+ w3[1] = __byte_perm (w2[3], w3[0], selector);
+ w3[0] = __byte_perm (w2[2], w2[3], selector);
+ w2[3] = __byte_perm (w2[1], w2[2], selector);
+ w2[2] = __byte_perm (w2[0], w2[1], selector);
+ w2[1] = __byte_perm (w1[3], w2[0], selector);
+ w2[0] = __byte_perm (w1[2], w1[3], selector);
+ w1[3] = __byte_perm (w1[1], w1[2], selector);
+ w1[2] = __byte_perm (w1[0], w1[1], selector);
+ w1[1] = __byte_perm (w0[3], w1[0], selector);
+ w1[0] = __byte_perm (w0[2], w0[3], selector);
+ w0[3] = __byte_perm (w0[1], w0[2], selector);
+ w0[2] = __byte_perm (w0[0], w0[1], selector);
+ w0[1] = __byte_perm ( 0, w0[0], selector);
+ w0[0] = 0;
- case 12:
- dst0[3] = src_r0[0];
- dst1[0] = src_r0[1];
- dst1[1] = src_r0[2];
- dst1[2] = src_r0[3];
- dst1[3] = src_r1[0];
- dst2[0] = src_r1[1];
- dst2[1] = src_r1[2];
- dst2[2] = src_r1[3];
break;
- case 13:
- dst0[3] = src_l0[3] | src_r0[0] << 8;
- dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8;
- dst1[3] = src_r0[3] >> 24 | src_r1[0] << 8;
- dst2[0] = src_r1[0] >> 24 | src_r1[1] << 8;
- dst2[1] = src_r1[1] >> 24 | src_r1[2] << 8;
- dst2[2] = src_r1[2] >> 24 | src_r1[3] << 8;
- dst2[3] = src_r1[3] >> 24;
- break;
+ case 2:
+ w3[1] = __byte_perm (w2[2], w2[3], selector);
+ w3[0] = __byte_perm (w2[1], w2[2], selector);
+ w2[3] = __byte_perm (w2[0], w2[1], selector);
+ w2[2] = __byte_perm (w1[3], w2[0], selector);
+ w2[1] = __byte_perm (w1[2], w1[3], selector);
+ w2[0] = __byte_perm (w1[1], w1[2], selector);
+ w1[3] = __byte_perm (w1[0], w1[1], selector);
+ w1[2] = __byte_perm (w0[3], w1[0], selector);
+ w1[1] = __byte_perm (w0[2], w0[3], selector);
+ w1[0] = __byte_perm (w0[1], w0[2], selector);
+ w0[3] = __byte_perm (w0[0], w0[1], selector);
+ w0[2] = __byte_perm ( 0, w0[0], selector);
+ w0[1] = 0;
+ w0[0] = 0;
- case 14:
- dst0[3] = src_l0[3] | src_r0[0] << 16;
- dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16;
- dst1[3] = src_r0[3] >> 16 | src_r1[0] << 16;
- dst2[0] = src_r1[0] >> 16 | src_r1[1] << 16;
- dst2[1] = src_r1[1] >> 16 | src_r1[2] << 16;
- dst2[2] = src_r1[2] >> 16 | src_r1[3] << 16;
- dst2[3] = src_r1[3] >> 16;
break;
- case 15:
- dst0[3] = src_l0[3] | src_r0[0] << 24;
- dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24;
- dst1[3] = src_r0[3] >> 8 | src_r1[0] << 24;
- dst2[0] = src_r1[0] >> 8 | src_r1[1] << 24;
- dst2[1] = src_r1[1] >> 8 | src_r1[2] << 24;
- dst2[2] = src_r1[2] >> 8 | src_r1[3] << 24;
- dst2[3] = src_r1[3] >> 8;
- break;
+ case 3:
+ w3[1] = __byte_perm (w2[1], w2[2], selector);
+ w3[0] = __byte_perm (w2[0], w2[1], selector);
+ w2[3] = __byte_perm (w1[3], w2[0], selector);
+ w2[2] = __byte_perm (w1[2], w1[3], selector);
+ w2[1] = __byte_perm (w1[1], w1[2], selector);
+ w2[0] = __byte_perm (w1[0], w1[1], selector);
+ w1[3] = __byte_perm (w0[3], w1[0], selector);
+ w1[2] = __byte_perm (w0[2], w0[3], selector);
+ w1[1] = __byte_perm (w0[1], w0[2], selector);
+ w1[0] = __byte_perm (w0[0], w0[1], selector);
+ w0[3] = __byte_perm ( 0, w0[0], selector);
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
- case 16:
- dst1[0] = src_r0[0];
- dst1[1] = src_r0[1];
- dst1[2] = src_r0[2];
- dst1[3] = src_r0[3];
- dst2[0] = src_r1[0];
- dst2[1] = src_r1[1];
- dst2[2] = src_r1[2];
- dst2[3] = src_r1[3];
break;
- case 17:
- dst1[0] = src_l1[0] | src_r0[0] << 8;
- dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8;
- dst2[0] = src_r0[3] >> 24 | src_r1[0] << 8;
- dst2[1] = src_r1[0] >> 24 | src_r1[1] << 8;
- dst2[2] = src_r1[1] >> 24 | src_r1[2] << 8;
- dst2[3] = src_r1[2] >> 24 | src_r1[3] << 8;
- break;
+ case 4:
+ w3[1] = __byte_perm (w2[0], w2[1], selector);
+ w3[0] = __byte_perm (w1[3], w2[0], selector);
+ w2[3] = __byte_perm (w1[2], w1[3], selector);
+ w2[2] = __byte_perm (w1[1], w1[2], selector);
+ w2[1] = __byte_perm (w1[0], w1[1], selector);
+ w2[0] = __byte_perm (w0[3], w1[0], selector);
+ w1[3] = __byte_perm (w0[2], w0[3], selector);
+ w1[2] = __byte_perm (w0[1], w0[2], selector);
+ w1[1] = __byte_perm (w0[0], w0[1], selector);
+ w1[0] = __byte_perm ( 0, w0[0], selector);
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
- case 18:
- dst1[0] = src_l1[0] | src_r0[0] << 16;
- dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16;
- dst2[0] = src_r0[3] >> 16 | src_r1[0] << 16;
- dst2[1] = src_r1[0] >> 16 | src_r1[1] << 16;
- dst2[2] = src_r1[1] >> 16 | src_r1[2] << 16;
- dst2[3] = src_r1[2] >> 16 | src_r1[3] << 16;
break;
- case 19:
- dst1[0] = src_l1[0] | src_r0[0] << 24;
- dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24;
- dst2[0] = src_r0[3] >> 8 | src_r1[0] << 24;
- dst2[1] = src_r1[0] >> 8 | src_r1[1] << 24;
- dst2[2] = src_r1[1] >> 8 | src_r1[2] << 24;
- dst2[3] = src_r1[2] >> 8 | src_r1[3] << 24;
- break;
+ case 5:
+ w3[1] = __byte_perm (w1[3], w2[0], selector);
+ w3[0] = __byte_perm (w1[2], w1[3], selector);
+ w2[3] = __byte_perm (w1[1], w1[2], selector);
+ w2[2] = __byte_perm (w1[0], w1[1], selector);
+ w2[1] = __byte_perm (w0[3], w1[0], selector);
+ w2[0] = __byte_perm (w0[2], w0[3], selector);
+ w1[3] = __byte_perm (w0[1], w0[2], selector);
+ w1[2] = __byte_perm (w0[0], w0[1], selector);
+ w1[1] = __byte_perm ( 0, w0[0], selector);
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
- case 20:
- dst1[1] = src_r1[0];
- dst1[2] = src_r0[1];
- dst1[3] = src_r0[2];
- dst2[0] = src_r0[3];
- dst2[1] = src_r1[0];
- dst2[2] = src_r1[1];
- dst2[3] = src_r1[2];
break;
- case 21:
- dst1[1] = src_l1[1] | src_r0[0] << 8;
- dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst2[0] = src_r0[2] >> 24 | src_r0[3] << 8;
- dst2[1] = src_r0[3] >> 24 | src_r1[0] << 8;
- dst2[2] = src_r1[0] >> 24 | src_r1[1] << 8;
- dst2[3] = src_r1[1] >> 24 | src_r1[2] << 8;
- break;
+ case 6:
+ w3[1] = __byte_perm (w1[2], w1[3], selector);
+ w3[0] = __byte_perm (w1[1], w1[2], selector);
+ w2[3] = __byte_perm (w1[0], w1[1], selector);
+ w2[2] = __byte_perm (w0[3], w1[0], selector);
+ w2[1] = __byte_perm (w0[2], w0[3], selector);
+ w2[0] = __byte_perm (w0[1], w0[2], selector);
+ w1[3] = __byte_perm (w0[0], w0[1], selector);
+ w1[2] = __byte_perm ( 0, w0[0], selector);
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
- case 22:
- dst1[1] = src_l1[1] | src_r0[0] << 16;
- dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst2[0] = src_r0[2] >> 16 | src_r0[3] << 16;
- dst2[1] = src_r0[3] >> 16 | src_r1[0] << 16;
- dst2[2] = src_r1[0] >> 16 | src_r1[1] << 16;
- dst2[3] = src_r1[1] >> 16 | src_r1[2] << 16;
break;
- case 23:
- dst1[1] = src_l1[1] | src_r0[0] << 24;
- dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst2[0] = src_r0[2] >> 8 | src_r0[3] << 24;
- dst2[1] = src_r0[3] >> 8 | src_r1[0] << 24;
- dst2[2] = src_r1[0] >> 8 | src_r1[1] << 24;
- dst2[3] = src_r1[1] >> 8 | src_r1[2] << 24;
- break;
+ case 7:
+ w3[1] = __byte_perm (w1[1], w1[2], selector);
+ w3[0] = __byte_perm (w1[0], w1[1], selector);
+ w2[3] = __byte_perm (w0[3], w1[0], selector);
+ w2[2] = __byte_perm (w0[2], w0[3], selector);
+ w2[1] = __byte_perm (w0[1], w0[2], selector);
+ w2[0] = __byte_perm (w0[0], w0[1], selector);
+ w1[3] = __byte_perm ( 0, w0[0], selector);
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
- case 24:
- dst1[2] = src_r1[0];
- dst1[3] = src_r0[1];
- dst2[0] = src_r0[2];
- dst2[1] = src_r0[3];
- dst2[2] = src_r1[0];
- dst2[3] = src_r1[1];
break;
- case 25:
- dst1[2] = src_l1[2] | src_r0[0] << 8;
- dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst2[0] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst2[1] = src_r0[2] >> 24 | src_r0[3] << 8;
- dst2[2] = src_r0[3] >> 24 | src_r1[0] << 8;
- dst2[3] = src_r1[0] >> 24 | src_r1[1] << 8;
- break;
+ case 8:
+ w3[1] = __byte_perm (w1[0], w1[1], selector);
+ w3[0] = __byte_perm (w0[3], w1[0], selector);
+ w2[3] = __byte_perm (w0[2], w0[3], selector);
+ w2[2] = __byte_perm (w0[1], w0[2], selector);
+ w2[1] = __byte_perm (w0[0], w0[1], selector);
+ w2[0] = __byte_perm ( 0, w0[0], selector);
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
- case 26:
- dst1[2] = src_l1[2] | src_r0[0] << 16;
- dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst2[0] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst2[1] = src_r0[2] >> 16 | src_r0[3] << 16;
- dst2[2] = src_r0[3] >> 16 | src_r1[0] << 16;
- dst2[3] = src_r1[0] >> 16 | src_r1[1] << 16;
break;
- case 27:
- dst1[2] = src_l1[2] | src_r0[0] << 24;
- dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst2[0] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst2[1] = src_r0[2] >> 8 | src_r0[3] << 24;
- dst2[2] = src_r0[3] >> 8 | src_r1[0] << 24;
- dst2[3] = src_r1[0] >> 8 | src_r1[1] << 24;
- break;
+ case 9:
+ w3[1] = __byte_perm (w0[3], w1[0], selector);
+ w3[0] = __byte_perm (w0[2], w0[3], selector);
+ w2[3] = __byte_perm (w0[1], w0[2], selector);
+ w2[2] = __byte_perm (w0[0], w0[1], selector);
+ w2[1] = __byte_perm ( 0, w0[0], selector);
+ w2[0] = 0;
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
- case 28:
- dst1[3] = src_r1[0];
- dst2[0] = src_r0[1];
- dst2[1] = src_r0[2];
- dst2[2] = src_r0[3];
- dst2[3] = src_r1[0];
break;
- case 29:
- dst1[3] = src_l1[3] | src_r0[0] << 8;
- dst2[0] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst2[1] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst2[2] = src_r0[2] >> 24 | src_r0[3] << 8;
- dst2[3] = src_r0[3] >> 24 | src_r1[0] << 8;
- break;
+ case 10:
+ w3[1] = __byte_perm (w0[2], w0[3], selector);
+ w3[0] = __byte_perm (w0[1], w0[2], selector);
+ w2[3] = __byte_perm (w0[0], w0[1], selector);
+ w2[2] = __byte_perm ( 0, w0[0], selector);
+ w2[1] = 0;
+ w2[0] = 0;
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
- case 30:
- dst1[3] = src_l1[3] | src_r0[0] << 16;
- dst2[0] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst2[1] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst2[2] = src_r0[2] >> 16 | src_r0[3] << 16;
- dst2[3] = src_r0[3] >> 16 | src_r1[0] << 16;
break;
- case 31:
- dst1[3] = src_l1[3] | src_r0[0] << 24;
- dst2[0] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst2[1] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst2[2] = src_r0[2] >> 8 | src_r0[3] << 24;
- dst2[3] = src_r0[3] >> 8 | src_r1[0] << 24;
+ case 11:
+ w3[1] = __byte_perm (w0[1], w0[2], selector);
+ w3[0] = __byte_perm (w0[0], w0[1], selector);
+ w2[3] = __byte_perm ( 0, w0[0], selector);
+ w2[2] = 0;
+ w2[1] = 0;
+ w2[0] = 0;
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
+
break;
- case 32:
- dst2[0] = src_r0[0];
- dst2[1] = src_r0[1];
- dst2[2] = src_r0[2];
- dst2[3] = src_r0[3];
+ case 12:
+ w3[1] = __byte_perm (w0[0], w0[1], selector);
+ w3[0] = __byte_perm ( 0, w0[0], selector);
+ w2[3] = 0;
+ w2[2] = 0;
+ w2[1] = 0;
+ w2[0] = 0;
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
+
break;
- case 33:
- dst2[0] = src_l2[0] | src_r0[0] << 8;
- dst2[1] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst2[2] = src_r0[1] >> 24 | src_r0[2] << 8;
- dst2[3] = src_r0[2] >> 24 | src_r0[3] << 8;
+ case 13:
+ w3[1] = __byte_perm ( 0, w0[0], selector);
+ w3[0] = 0;
+ w2[3] = 0;
+ w2[2] = 0;
+ w2[1] = 0;
+ w2[0] = 0;
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
+
break;
+ }
+ #endif
+}
- case 34:
- dst2[0] = src_l2[0] | src_r0[0] << 16;
- dst2[1] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst2[2] = src_r0[1] >> 16 | src_r0[2] << 16;
- dst2[3] = src_r0[2] >> 16 | src_r0[3] << 16;
+static void switch_buffer_by_offset_be (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
+{
+ #ifdef IS_AMD
+ switch (offset / 4)
+ {
+ case 0:
+ w3[2] = amd_bytealign (w3[1], 0, offset);
+ w3[1] = amd_bytealign (w3[0], w3[1], offset);
+ w3[0] = amd_bytealign (w2[3], w3[0], offset);
+ w2[3] = amd_bytealign (w2[2], w2[3], offset);
+ w2[2] = amd_bytealign (w2[1], w2[2], offset);
+ w2[1] = amd_bytealign (w2[0], w2[1], offset);
+ w2[0] = amd_bytealign (w1[3], w2[0], offset);
+ w1[3] = amd_bytealign (w1[2], w1[3], offset);
+ w1[2] = amd_bytealign (w1[1], w1[2], offset);
+ w1[1] = amd_bytealign (w1[0], w1[1], offset);
+ w1[0] = amd_bytealign (w0[3], w1[0], offset);
+ w0[3] = amd_bytealign (w0[2], w0[3], offset);
+ w0[2] = amd_bytealign (w0[1], w0[2], offset);
+ w0[1] = amd_bytealign (w0[0], w0[1], offset);
+ w0[0] = amd_bytealign ( 0, w0[0], offset);
break;
- case 35:
- dst2[0] = src_l2[0] | src_r0[0] << 24;
- dst2[1] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst2[2] = src_r0[1] >> 8 | src_r0[2] << 24;
- dst2[3] = src_r0[2] >> 8 | src_r0[3] << 24;
+ case 1:
+ w3[2] = amd_bytealign (w3[0], 0, offset);
+ w3[1] = amd_bytealign (w2[3], w3[0], offset);
+ w3[0] = amd_bytealign (w2[2], w2[3], offset);
+ w2[3] = amd_bytealign (w2[1], w2[2], offset);
+ w2[2] = amd_bytealign (w2[0], w2[1], offset);
+ w2[1] = amd_bytealign (w1[3], w2[0], offset);
+ w2[0] = amd_bytealign (w1[2], w1[3], offset);
+ w1[3] = amd_bytealign (w1[1], w1[2], offset);
+ w1[2] = amd_bytealign (w1[0], w1[1], offset);
+ w1[1] = amd_bytealign (w0[3], w1[0], offset);
+ w1[0] = amd_bytealign (w0[2], w0[3], offset);
+ w0[3] = amd_bytealign (w0[1], w0[2], offset);
+ w0[2] = amd_bytealign (w0[0], w0[1], offset);
+ w0[1] = amd_bytealign ( 0, w0[0], offset);
+ w0[0] = 0;
break;
- case 36:
- dst2[1] = src_r0[0];
- dst2[2] = src_r0[1];
- dst2[3] = src_r0[2];
+ case 2:
+ w3[2] = amd_bytealign (w2[3], 0, offset);
+ w3[1] = amd_bytealign (w2[2], w2[3], offset);
+ w3[0] = amd_bytealign (w2[1], w2[2], offset);
+ w2[3] = amd_bytealign (w2[0], w2[1], offset);
+ w2[2] = amd_bytealign (w1[3], w2[0], offset);
+ w2[1] = amd_bytealign (w1[2], w1[3], offset);
+ w2[0] = amd_bytealign (w1[1], w1[2], offset);
+ w1[3] = amd_bytealign (w1[0], w1[1], offset);
+ w1[2] = amd_bytealign (w0[3], w1[0], offset);
+ w1[1] = amd_bytealign (w0[2], w0[3], offset);
+ w1[0] = amd_bytealign (w0[1], w0[2], offset);
+ w0[3] = amd_bytealign (w0[0], w0[1], offset);
+ w0[2] = amd_bytealign ( 0, w0[0], offset);
+ w0[1] = 0;
+ w0[0] = 0;
break;
- case 37:
- dst2[1] = src_l2[1] | src_r0[0] << 8;
- dst2[2] = src_r0[0] >> 24 | src_r0[1] << 8;
- dst2[3] = src_r0[1] >> 24 | src_r0[2] << 8;
+ case 3:
+ w3[2] = amd_bytealign (w2[2], 0, offset);
+ w3[1] = amd_bytealign (w2[1], w2[2], offset);
+ w3[0] = amd_bytealign (w2[0], w2[1], offset);
+ w2[3] = amd_bytealign (w1[3], w2[0], offset);
+ w2[2] = amd_bytealign (w1[2], w1[3], offset);
+ w2[1] = amd_bytealign (w1[1], w1[2], offset);
+ w2[0] = amd_bytealign (w1[0], w1[1], offset);
+ w1[3] = amd_bytealign (w0[3], w1[0], offset);
+ w1[2] = amd_bytealign (w0[2], w0[3], offset);
+ w1[1] = amd_bytealign (w0[1], w0[2], offset);
+ w1[0] = amd_bytealign (w0[0], w0[1], offset);
+ w0[3] = amd_bytealign ( 0, w0[0], offset);
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
- case 38:
- dst2[1] = src_l2[1] | src_r0[0] << 16;
- dst2[2] = src_r0[0] >> 16 | src_r0[1] << 16;
- dst2[3] = src_r0[1] >> 16 | src_r0[2] << 16;
+ case 4:
+ w3[2] = amd_bytealign (w2[1], 0, offset);
+ w3[1] = amd_bytealign (w2[0], w2[1], offset);
+ w3[0] = amd_bytealign (w1[3], w2[0], offset);
+ w2[3] = amd_bytealign (w1[2], w1[3], offset);
+ w2[2] = amd_bytealign (w1[1], w1[2], offset);
+ w2[1] = amd_bytealign (w1[0], w1[1], offset);
+ w2[0] = amd_bytealign (w0[3], w1[0], offset);
+ w1[3] = amd_bytealign (w0[2], w0[3], offset);
+ w1[2] = amd_bytealign (w0[1], w0[2], offset);
+ w1[1] = amd_bytealign (w0[0], w0[1], offset);
+ w1[0] = amd_bytealign ( 0, w0[0], offset);
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
- case 39:
- dst2[1] = src_l2[1] | src_r0[0] << 24;
- dst2[2] = src_r0[0] >> 8 | src_r0[1] << 24;
- dst2[3] = src_r0[1] >> 8 | src_r0[2] << 24;
+ case 5:
+ w3[2] = amd_bytealign (w2[0], 0, offset);
+ w3[1] = amd_bytealign (w1[3], w2[0], offset);
+ w3[0] = amd_bytealign (w1[2], w1[3], offset);
+ w2[3] = amd_bytealign (w1[1], w1[2], offset);
+ w2[2] = amd_bytealign (w1[0], w1[1], offset);
+ w2[1] = amd_bytealign (w0[3], w1[0], offset);
+ w2[0] = amd_bytealign (w0[2], w0[3], offset);
+ w1[3] = amd_bytealign (w0[1], w0[2], offset);
+ w1[2] = amd_bytealign (w0[0], w0[1], offset);
+ w1[1] = amd_bytealign ( 0, w0[0], offset);
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
- case 40:
- dst2[2] = src_r0[0];
- dst2[3] = src_r0[1];
+ case 6:
+ w3[2] = amd_bytealign (w1[3], 0, offset);
+ w3[1] = amd_bytealign (w1[2], w1[3], offset);
+ w3[0] = amd_bytealign (w1[1], w1[2], offset);
+ w2[3] = amd_bytealign (w1[0], w1[1], offset);
+ w2[2] = amd_bytealign (w0[3], w1[0], offset);
+ w2[1] = amd_bytealign (w0[2], w0[3], offset);
+ w2[0] = amd_bytealign (w0[1], w0[2], offset);
+ w1[3] = amd_bytealign (w0[0], w0[1], offset);
+ w1[2] = amd_bytealign ( 0, w0[0], offset);
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
- case 41:
- dst2[2] = src_l2[2] | src_r0[0] << 8;
- dst2[3] = src_r0[0] >> 24 | src_r0[1] << 8;
+ case 7:
+ w3[2] = amd_bytealign (w1[2], 0, offset);
+ w3[1] = amd_bytealign (w1[1], w1[2], offset);
+ w3[0] = amd_bytealign (w1[0], w1[1], offset);
+ w2[3] = amd_bytealign (w0[3], w1[0], offset);
+ w2[2] = amd_bytealign (w0[2], w0[3], offset);
+ w2[1] = amd_bytealign (w0[1], w0[2], offset);
+ w2[0] = amd_bytealign (w0[0], w0[1], offset);
+ w1[3] = amd_bytealign ( 0, w0[0], offset);
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
- case 42:
- dst2[2] = src_l2[2] | src_r0[0] << 16;
- dst2[3] = src_r0[0] >> 16 | src_r0[1] << 16;
+ case 8:
+ w3[2] = amd_bytealign (w1[1], 0, offset);
+ w3[1] = amd_bytealign (w1[0], w1[1], offset);
+ w3[0] = amd_bytealign (w0[3], w1[0], offset);
+ w2[3] = amd_bytealign (w0[2], w0[3], offset);
+ w2[2] = amd_bytealign (w0[1], w0[2], offset);
+ w2[1] = amd_bytealign (w0[0], w0[1], offset);
+ w2[0] = amd_bytealign ( 0, w0[0], offset);
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
- case 43:
- dst2[2] = src_l2[2] | src_r0[0] << 24;
- dst2[3] = src_r0[0] >> 8 | src_r0[1] << 24;
+ case 9:
+ w3[2] = amd_bytealign (w1[0], 0, offset);
+ w3[1] = amd_bytealign (w0[3], w1[0], offset);
+ w3[0] = amd_bytealign (w0[2], w0[3], offset);
+ w2[3] = amd_bytealign (w0[1], w0[2], offset);
+ w2[2] = amd_bytealign (w0[0], w0[1], offset);
+ w2[1] = amd_bytealign ( 0, w0[0], offset);
+ w2[0] = 0;
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
- case 44:
- dst2[3] = src_r0[0];
+ case 10:
+ w3[2] = amd_bytealign (w0[3], 0, offset);
+ w3[1] = amd_bytealign (w0[2], w0[3], offset);
+ w3[0] = amd_bytealign (w0[1], w0[2], offset);
+ w2[3] = amd_bytealign (w0[0], w0[1], offset);
+ w2[2] = amd_bytealign ( 0, w0[0], offset);
+ w2[1] = 0;
+ w2[0] = 0;
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
- case 45:
- dst2[3] = src_l2[3] | src_r0[0] << 8;
+ case 11:
+ w3[2] = amd_bytealign (w0[2], 0, offset);
+ w3[1] = amd_bytealign (w0[1], w0[2], offset);
+ w3[0] = amd_bytealign (w0[0], w0[1], offset);
+ w2[3] = amd_bytealign ( 0, w0[0], offset);
+ w2[2] = 0;
+ w2[1] = 0;
+ w2[0] = 0;
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
- case 46:
- dst2[3] = src_l2[3] | src_r0[0] << 16;
+ case 12:
+ w3[2] = amd_bytealign (w0[1], 0, offset);
+ w3[1] = amd_bytealign (w0[0], w0[1], offset);
+ w3[0] = amd_bytealign ( 0, w0[0], offset);
+ w2[3] = 0;
+ w2[2] = 0;
+ w2[1] = 0;
+ w2[0] = 0;
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
- case 47:
- dst2[3] = src_l2[3] | src_r0[0] << 24;
+ case 13:
+ w3[2] = amd_bytealign (w0[0], 0, offset);
+ w3[1] = amd_bytealign ( 0, w0[0], offset);
+ w3[0] = 0;
+ w2[3] = 0;
+ w2[2] = 0;
+ w2[1] = 0;
+ w2[0] = 0;
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
}
-}
+ #endif
-// before: memcat16_9
-static void memcat_c15_w4x4_a3x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 append0[4], const u32 append1[4], const u32 append2[4], const u32 offset)
-{
- switch (offset)
+ #ifdef IS_NV
+ const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
+
+ switch (offset / 4)
{
case 0:
- w0[0] = append0[0];
- w0[1] = append0[1];
- w0[2] = append0[2];
- w0[3] = append0[3];
- w1[0] = append1[0];
- w1[1] = append1[1];
- w1[2] = append1[2];
- w1[3] = append1[3];
- w2[0] = append2[0];
+ w3[1] = __byte_perm (w3[1], w3[0], selector);
+ w3[0] = __byte_perm (w3[0], w2[3], selector);
+ w2[3] = __byte_perm (w2[3], w2[2], selector);
+ w2[2] = __byte_perm (w2[2], w2[1], selector);
+ w2[1] = __byte_perm (w2[1], w2[0], selector);
+ w2[0] = __byte_perm (w2[0], w1[3], selector);
+ w1[3] = __byte_perm (w1[3], w1[2], selector);
+ w1[2] = __byte_perm (w1[2], w1[1], selector);
+ w1[1] = __byte_perm (w1[1], w1[0], selector);
+ w1[0] = __byte_perm (w1[0], w0[3], selector);
+ w0[3] = __byte_perm (w0[3], w0[2], selector);
+ w0[2] = __byte_perm (w0[2], w0[1], selector);
+ w0[1] = __byte_perm (w0[1], w0[0], selector);
+ w0[0] = __byte_perm (w0[0], 0, selector);
break;
case 1:
- w0[0] = w0[0] | append0[0] << 8;
- w0[1] = append0[0] >> 24 | append0[1] << 8;
- w0[2] = append0[1] >> 24 | append0[2] << 8;
- w0[3] = append0[2] >> 24 | append0[3] << 8;
- w1[0] = append0[3] >> 24 | append1[0] << 8;
- w1[1] = append1[0] >> 24 | append1[1] << 8;
- w1[2] = append1[1] >> 24 | append1[2] << 8;
- w1[3] = append1[2] >> 24 | append1[3] << 8;
- w2[0] = append1[3] >> 24 | append2[0] << 8;
- w2[1] = append2[0] >> 24;
+ w3[1] = __byte_perm (w3[0], w2[3], selector);
+ w3[0] = __byte_perm (w2[3], w2[2], selector);
+ w2[3] = __byte_perm (w2[2], w2[1], selector);
+ w2[2] = __byte_perm (w2[1], w2[0], selector);
+ w2[1] = __byte_perm (w2[0], w1[3], selector);
+ w2[0] = __byte_perm (w1[3], w1[2], selector);
+ w1[3] = __byte_perm (w1[2], w1[1], selector);
+ w1[2] = __byte_perm (w1[1], w1[0], selector);
+ w1[1] = __byte_perm (w1[0], w0[3], selector);
+ w1[0] = __byte_perm (w0[3], w0[2], selector);
+ w0[3] = __byte_perm (w0[2], w0[1], selector);
+ w0[2] = __byte_perm (w0[1], w0[0], selector);
+ w0[1] = __byte_perm (w0[0], 0, selector);
+ w0[0] = 0;
break;
case 2:
- w0[0] = w0[0] | append0[0] << 16;
- w0[1] = append0[0] >> 16 | append0[1] << 16;
- w0[2] = append0[1] >> 16 | append0[2] << 16;
- w0[3] = append0[2] >> 16 | append0[3] << 16;
- w1[0] = append0[3] >> 16 | append1[0] << 16;
- w1[1] = append1[0] >> 16 | append1[1] << 16;
- w1[2] = append1[1] >> 16 | append1[2] << 16;
- w1[3] = append1[2] >> 16 | append1[3] << 16;
- w2[0] = append1[3] >> 16 | append2[0] << 16;
- w2[1] = append2[0] >> 16;
+ w3[1] = __byte_perm (w2[3], w2[2], selector);
+ w3[0] = __byte_perm (w2[2], w2[1], selector);
+ w2[3] = __byte_perm (w2[1], w2[0], selector);
+ w2[2] = __byte_perm (w2[0], w1[3], selector);
+ w2[1] = __byte_perm (w1[3], w1[2], selector);
+ w2[0] = __byte_perm (w1[2], w1[1], selector);
+ w1[3] = __byte_perm (w1[1], w1[0], selector);
+ w1[2] = __byte_perm (w1[0], w0[3], selector);
+ w1[1] = __byte_perm (w0[3], w0[2], selector);
+ w1[0] = __byte_perm (w0[2], w0[1], selector);
+ w0[3] = __byte_perm (w0[1], w0[0], selector);
+ w0[2] = __byte_perm (w0[0], 0, selector);
+ w0[1] = 0;
+ w0[0] = 0;
break;
case 3:
- w0[0] = w0[0] | append0[0] << 24;
- w0[1] = append0[0] >> 8 | append0[1] << 24;
- w0[2] = append0[1] >> 8 | append0[2] << 24;
- w0[3] = append0[2] >> 8 | append0[3] << 24;
- w1[0] = append0[3] >> 8 | append1[0] << 24;
- w1[1] = append1[0] >> 8 | append1[1] << 24;
- w1[2] = append1[1] >> 8 | append1[2] << 24;
- w1[3] = append1[2] >> 8 | append1[3] << 24;
- w2[0] = append1[3] >> 8 | append2[0] << 24;
- w2[1] = append2[0] >> 8;
+ w3[1] = __byte_perm (w2[2], w2[1], selector);
+ w3[0] = __byte_perm (w2[1], w2[0], selector);
+ w2[3] = __byte_perm (w2[0], w1[3], selector);
+ w2[2] = __byte_perm (w1[3], w1[2], selector);
+ w2[1] = __byte_perm (w1[2], w1[1], selector);
+ w2[0] = __byte_perm (w1[1], w1[0], selector);
+ w1[3] = __byte_perm (w1[0], w0[3], selector);
+ w1[2] = __byte_perm (w0[3], w0[2], selector);
+ w1[1] = __byte_perm (w0[2], w0[1], selector);
+ w1[0] = __byte_perm (w0[1], w0[0], selector);
+ w0[3] = __byte_perm (w0[0], 0, selector);
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
case 4:
- w0[1] = append0[0];
- w0[2] = append0[1];
- w0[3] = append0[2];
- w1[0] = append0[3];
- w1[1] = append1[0];
- w1[2] = append1[1];
- w1[3] = append1[2];
- w2[0] = append1[3];
- w2[1] = append2[0];
+ w3[1] = __byte_perm (w2[1], w2[0], selector);
+ w3[0] = __byte_perm (w2[0], w1[3], selector);
+ w2[3] = __byte_perm (w1[3], w1[2], selector);
+ w2[2] = __byte_perm (w1[2], w1[1], selector);
+ w2[1] = __byte_perm (w1[1], w1[0], selector);
+ w2[0] = __byte_perm (w1[0], w0[3], selector);
+ w1[3] = __byte_perm (w0[3], w0[2], selector);
+ w1[2] = __byte_perm (w0[2], w0[1], selector);
+ w1[1] = __byte_perm (w0[1], w0[0], selector);
+ w1[0] = __byte_perm (w0[0], 0, selector);
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
case 5:
- w0[1] = w0[1] | append0[0] << 8;
- w0[2] = append0[0] >> 24 | append0[1] << 8;
- w0[3] = append0[1] >> 24 | append0[2] << 8;
- w1[0] = append0[2] >> 24 | append0[3] << 8;
- w1[1] = append0[3] >> 24 | append1[0] << 8;
- w1[2] = append1[0] >> 24 | append1[1] << 8;
- w1[3] = append1[1] >> 24 | append1[2] << 8;
- w2[0] = append1[2] >> 24 | append1[3] << 8;
- w2[1] = append1[3] >> 24 | append2[0] << 8;
- w2[2] = append2[0] >> 24;
- break;
-
- case 6:
- w0[1] = w0[1] | append0[0] << 16;
- w0[2] = append0[0] >> 16 | append0[1] << 16;
- w0[3] = append0[1] >> 16 | append0[2] << 16;
- w1[0] = append0[2] >> 16 | append0[3] << 16;
- w1[1] = append0[3] >> 16 | append1[0] << 16;
- w1[2] = append1[0] >> 16 | append1[1] << 16;
- w1[3] = append1[1] >> 16 | append1[2] << 16;
- w2[0] = append1[2] >> 16 | append1[3] << 16;
- w2[1] = append1[3] >> 16 | append2[0] << 16;
- w2[2] = append2[0] >> 16;
+ w3[1] = __byte_perm (w2[0], w1[3], selector);
+ w3[0] = __byte_perm (w1[3], w1[2], selector);
+ w2[3] = __byte_perm (w1[2], w1[1], selector);
+ w2[2] = __byte_perm (w1[1], w1[0], selector);
+ w2[1] = __byte_perm (w1[0], w0[3], selector);
+ w2[0] = __byte_perm (w0[3], w0[2], selector);
+ w1[3] = __byte_perm (w0[2], w0[1], selector);
+ w1[2] = __byte_perm (w0[1], w0[0], selector);
+ w1[1] = __byte_perm (w0[0], 0, selector);
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
+ break;
+
+ case 6:
+ w3[1] = __byte_perm (w1[3], w1[2], selector);
+ w3[0] = __byte_perm (w1[2], w1[1], selector);
+ w2[3] = __byte_perm (w1[1], w1[0], selector);
+ w2[2] = __byte_perm (w1[0], w0[3], selector);
+ w2[1] = __byte_perm (w0[3], w0[2], selector);
+ w2[0] = __byte_perm (w0[2], w0[1], selector);
+ w1[3] = __byte_perm (w0[1], w0[0], selector);
+ w1[2] = __byte_perm (w0[0], 0, selector);
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
case 7:
- w0[1] = w0[1] | append0[0] << 24;
- w0[2] = append0[0] >> 8 | append0[1] << 24;
- w0[3] = append0[1] >> 8 | append0[2] << 24;
- w1[0] = append0[2] >> 8 | append0[3] << 24;
- w1[1] = append0[3] >> 8 | append1[0] << 24;
- w1[2] = append1[0] >> 8 | append1[1] << 24;
- w1[3] = append1[1] >> 8 | append1[2] << 24;
- w2[0] = append1[2] >> 8 | append1[3] << 24;
- w2[1] = append1[3] >> 8 | append2[0] << 24;
- w2[2] = append2[0] >> 8;
+ w3[1] = __byte_perm (w1[2], w1[1], selector);
+ w3[0] = __byte_perm (w1[1], w1[0], selector);
+ w2[3] = __byte_perm (w1[0], w0[3], selector);
+ w2[2] = __byte_perm (w0[3], w0[2], selector);
+ w2[1] = __byte_perm (w0[2], w0[1], selector);
+ w2[0] = __byte_perm (w0[1], w0[0], selector);
+ w1[3] = __byte_perm (w0[0], 0, selector);
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
case 8:
- w0[2] = append0[0];
- w0[3] = append0[1];
- w1[0] = append0[2];
- w1[1] = append0[3];
- w1[2] = append1[0];
- w1[3] = append1[1];
- w2[0] = append1[2];
- w2[1] = append1[3];
- w2[2] = append2[0];
+ w3[1] = __byte_perm (w1[1], w1[0], selector);
+ w3[0] = __byte_perm (w1[0], w0[3], selector);
+ w2[3] = __byte_perm (w0[3], w0[2], selector);
+ w2[2] = __byte_perm (w0[2], w0[1], selector);
+ w2[1] = __byte_perm (w0[1], w0[0], selector);
+ w2[0] = __byte_perm (w0[0], 0, selector);
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
case 9:
- w0[2] = w0[2] | append0[0] << 8;
- w0[3] = append0[0] >> 24 | append0[1] << 8;
- w1[0] = append0[1] >> 24 | append0[2] << 8;
- w1[1] = append0[2] >> 24 | append0[3] << 8;
- w1[2] = append0[3] >> 24 | append1[0] << 8;
- w1[3] = append1[0] >> 24 | append1[1] << 8;
- w2[0] = append1[1] >> 24 | append1[2] << 8;
- w2[1] = append1[2] >> 24 | append1[3] << 8;
- w2[2] = append1[3] >> 24 | append2[0] << 8;
- w2[3] = append2[0] >> 24;
+ w3[1] = __byte_perm (w1[0], w0[3], selector);
+ w3[0] = __byte_perm (w0[3], w0[2], selector);
+ w2[3] = __byte_perm (w0[2], w0[1], selector);
+ w2[2] = __byte_perm (w0[1], w0[0], selector);
+ w2[1] = __byte_perm (w0[0], 0, selector);
+ w2[0] = 0;
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
case 10:
- w0[2] = w0[2] | append0[0] << 16;
- w0[3] = append0[0] >> 16 | append0[1] << 16;
- w1[0] = append0[1] >> 16 | append0[2] << 16;
- w1[1] = append0[2] >> 16 | append0[3] << 16;
- w1[2] = append0[3] >> 16 | append1[0] << 16;
- w1[3] = append1[0] >> 16 | append1[1] << 16;
- w2[0] = append1[1] >> 16 | append1[2] << 16;
- w2[1] = append1[2] >> 16 | append1[3] << 16;
- w2[2] = append1[3] >> 16 | append2[0] << 16;
- w2[3] = append2[0] >> 16;
+ w3[1] = __byte_perm (w0[3], w0[2], selector);
+ w3[0] = __byte_perm (w0[2], w0[1], selector);
+ w2[3] = __byte_perm (w0[1], w0[0], selector);
+ w2[2] = __byte_perm (w0[0], 0, selector);
+ w2[1] = 0;
+ w2[0] = 0;
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
case 11:
- w0[2] = w0[2] | append0[0] << 24;
- w0[3] = append0[0] >> 8 | append0[1] << 24;
- w1[0] = append0[1] >> 8 | append0[2] << 24;
- w1[1] = append0[2] >> 8 | append0[3] << 24;
- w1[2] = append0[3] >> 8 | append1[0] << 24;
- w1[3] = append1[0] >> 8 | append1[1] << 24;
- w2[0] = append1[1] >> 8 | append1[2] << 24;
- w2[1] = append1[2] >> 8 | append1[3] << 24;
- w2[2] = append1[3] >> 8 | append2[0] << 24;
- w2[3] = append2[0] >> 8;
+ w3[1] = __byte_perm (w0[2], w0[1], selector);
+ w3[0] = __byte_perm (w0[1], w0[0], selector);
+ w2[3] = __byte_perm (w0[0], 0, selector);
+ w2[2] = 0;
+ w2[1] = 0;
+ w2[0] = 0;
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
case 12:
- w0[3] = append0[0];
- w1[0] = append0[1];
- w1[1] = append0[2];
- w1[2] = append0[3];
- w1[3] = append1[0];
- w2[0] = append1[1];
- w2[1] = append1[2];
- w2[2] = append1[3];
- w2[3] = append2[0];
+ w3[1] = __byte_perm (w0[1], w0[0], selector);
+ w3[0] = __byte_perm (w0[0], 0, selector);
+ w2[3] = 0;
+ w2[2] = 0;
+ w2[1] = 0;
+ w2[0] = 0;
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
case 13:
- w0[3] = w0[3] | append0[0] << 8;
- w1[0] = append0[0] >> 24 | append0[1] << 8;
- w1[1] = append0[1] >> 24 | append0[2] << 8;
- w1[2] = append0[2] >> 24 | append0[3] << 8;
- w1[3] = append0[3] >> 24 | append1[0] << 8;
- w2[0] = append1[0] >> 24 | append1[1] << 8;
- w2[1] = append1[1] >> 24 | append1[2] << 8;
- w2[2] = append1[2] >> 24 | append1[3] << 8;
- w2[3] = append1[3] >> 24 | append2[0] << 8;
- w3[0] = append2[0] >> 24;
- break;
-
- case 14:
- w0[3] = w0[3] | append0[0] << 16;
- w1[0] = append0[0] >> 16 | append0[1] << 16;
- w1[1] = append0[1] >> 16 | append0[2] << 16;
- w1[2] = append0[2] >> 16 | append0[3] << 16;
- w1[3] = append0[3] >> 16 | append1[0] << 16;
- w2[0] = append1[0] >> 16 | append1[1] << 16;
- w2[1] = append1[1] >> 16 | append1[2] << 16;
- w2[2] = append1[2] >> 16 | append1[3] << 16;
- w2[3] = append1[3] >> 16 | append2[0] << 16;
- w3[0] = append2[0] >> 16;
- break;
-
- case 15:
- w0[3] = w0[3] | append0[0] << 24;
- w1[0] = append0[0] >> 8 | append0[1] << 24;
- w1[1] = append0[1] >> 8 | append0[2] << 24;
- w1[2] = append0[2] >> 8 | append0[3] << 24;
- w1[3] = append0[3] >> 8 | append1[0] << 24;
- w2[0] = append1[0] >> 8 | append1[1] << 24;
- w2[1] = append1[1] >> 8 | append1[2] << 24;
- w2[2] = append1[2] >> 8 | append1[3] << 24;
- w2[3] = append1[3] >> 8 | append2[0] << 24;
- w3[0] = append2[0] >> 8;
+ w3[1] = __byte_perm (w0[0], 0, selector);
+ w3[0] = 0;
+ w2[3] = 0;
+ w2[2] = 0;
+ w2[1] = 0;
+ w2[0] = 0;
+ w1[3] = 0;
+ w1[2] = 0;
+ w1[1] = 0;
+ w1[0] = 0;
+ w0[3] = 0;
+ w0[2] = 0;
+ w0[1] = 0;
+ w0[0] = 0;
break;
}
+ #endif
}
-// before: memcat32_8
-static void memcat_c32_w4x4_a2x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 append0[4], const u32 append1[4], const u32 offset)
+/* not needed anymore?
+
+// before: append_0x80_2_be
+static void append_0x80_2x4_be (u32 w0[4], u32 w1[4], const u32 offset)
{
switch (offset)
{
- case 0:
- w0[0] = append0[0];
- w0[1] = append0[1];
- w0[2] = append0[2];
- w0[3] = append0[3];
- w1[0] = append1[0];
- w1[1] = append1[1];
- w1[2] = append1[2];
- w1[3] = append1[3];
+ case 0:
+ w0[0] |= 0x80000000;
break;
- case 1:
- w0[0] = w0[0] | append0[0] << 8;
- w0[1] = append0[0] >> 24 | append0[1] << 8;
- w0[2] = append0[1] >> 24 | append0[2] << 8;
- w0[3] = append0[2] >> 24 | append0[3] << 8;
- w1[0] = append0[3] >> 24 | append1[0] << 8;
- w1[1] = append1[0] >> 24 | append1[1] << 8;
- w1[2] = append1[1] >> 24 | append1[2] << 8;
- w1[3] = append1[2] >> 24 | append1[3] << 8;
- w2[0] = append1[3] >> 24;
+ case 1:
+ w0[0] |= 0x800000;
break;
- case 2:
- w0[0] = w0[0] | append0[0] << 16;
- w0[1] = append0[0] >> 16 | append0[1] << 16;
- w0[2] = append0[1] >> 16 | append0[2] << 16;
- w0[3] = append0[2] >> 16 | append0[3] << 16;
- w1[0] = append0[3] >> 16 | append1[0] << 16;
- w1[1] = append1[0] >> 16 | append1[1] << 16;
- w1[2] = append1[1] >> 16 | append1[2] << 16;
- w1[3] = append1[2] >> 16 | append1[3] << 16;
- w2[0] = append1[3] >> 16;
+ case 2:
+ w0[0] |= 0x8000;
break;
- case 3:
- w0[0] = w0[0] | append0[0] << 24;
- w0[1] = append0[0] >> 8 | append0[1] << 24;
- w0[2] = append0[1] >> 8 | append0[2] << 24;
- w0[3] = append0[2] >> 8 | append0[3] << 24;
- w1[0] = append0[3] >> 8 | append1[0] << 24;
- w1[1] = append1[0] >> 8 | append1[1] << 24;
- w1[2] = append1[1] >> 8 | append1[2] << 24;
- w1[3] = append1[2] >> 8 | append1[3] << 24;
- w2[0] = append1[3] >> 8;
+ case 3:
+ w0[0] |= 0x80;
break;
- case 4:
- w0[1] = append0[0];
- w0[2] = append0[1];
- w0[3] = append0[2];
- w1[0] = append0[3];
- w1[1] = append1[0];
- w1[2] = append1[1];
- w1[3] = append1[2];
- w2[0] = append1[3];
+ case 4:
+ w0[1] |= 0x80000000;
break;
- case 5:
- w0[1] = w0[1] | append0[0] << 8;
- w0[2] = append0[0] >> 24 | append0[1] << 8;
- w0[3] = append0[1] >> 24 | append0[2] << 8;
- w1[0] = append0[2] >> 24 | append0[3] << 8;
- w1[1] = append0[3] >> 24 | append1[0] << 8;
- w1[2] = append1[0] >> 24 | append1[1] << 8;
- w1[3] = append1[1] >> 24 | append1[2] << 8;
- w2[0] = append1[2] >> 24 | append1[3] << 8;
- w2[1] = append1[3] >> 24;
+ case 5:
+ w0[1] |= 0x800000;
break;
- case 6:
- w0[1] = w0[1] | append0[0] << 16;
- w0[2] = append0[0] >> 16 | append0[1] << 16;
- w0[3] = append0[1] >> 16 | append0[2] << 16;
- w1[0] = append0[2] >> 16 | append0[3] << 16;
- w1[1] = append0[3] >> 16 | append1[0] << 16;
- w1[2] = append1[0] >> 16 | append1[1] << 16;
- w1[3] = append1[1] >> 16 | append1[2] << 16;
- w2[0] = append1[2] >> 16 | append1[3] << 16;
- w2[1] = append1[3] >> 16;
+ case 6:
+ w0[1] |= 0x8000;
break;
- case 7:
- w0[1] = w0[1] | append0[0] << 24;
- w0[2] = append0[0] >> 8 | append0[1] << 24;
- w0[3] = append0[1] >> 8 | append0[2] << 24;
- w1[0] = append0[2] >> 8 | append0[3] << 24;
- w1[1] = append0[3] >> 8 | append1[0] << 24;
- w1[2] = append1[0] >> 8 | append1[1] << 24;
- w1[3] = append1[1] >> 8 | append1[2] << 24;
- w2[0] = append1[2] >> 8 | append1[3] << 24;
- w2[1] = append1[3] >> 8;
+ case 7:
+ w0[1] |= 0x80;
break;
- case 8:
- w0[2] = append0[0];
- w0[3] = append0[1];
- w1[0] = append0[2];
- w1[1] = append0[3];
- w1[2] = append1[0];
- w1[3] = append1[1];
- w2[0] = append1[2];
- w2[1] = append1[3];
+ case 8:
+ w0[2] |= 0x80000000;
break;
- case 9:
- w0[2] = w0[2] | append0[0] << 8;
- w0[3] = append0[0] >> 24 | append0[1] << 8;
- w1[0] = append0[1] >> 24 | append0[2] << 8;
- w1[1] = append0[2] >> 24 | append0[3] << 8;
- w1[2] = append0[3] >> 24 | append1[0] << 8;
- w1[3] = append1[0] >> 24 | append1[1] << 8;
- w2[0] = append1[1] >> 24 | append1[2] << 8;
- w2[1] = append1[2] >> 24 | append1[3] << 8;
- w2[2] = append1[3] >> 24;
+ case 9:
+ w0[2] |= 0x800000;
break;
case 10:
- w0[2] = w0[2] | append0[0] << 16;
- w0[3] = append0[0] >> 16 | append0[1] << 16;
- w1[0] = append0[1] >> 16 | append0[2] << 16;
- w1[1] = append0[2] >> 16 | append0[3] << 16;
- w1[2] = append0[3] >> 16 | append1[0] << 16;
- w1[3] = append1[0] >> 16 | append1[1] << 16;
- w2[0] = append1[1] >> 16 | append1[2] << 16;
- w2[1] = append1[2] >> 16 | append1[3] << 16;
- w2[2] = append1[3] >> 16;
+ w0[2] |= 0x8000;
break;
case 11:
- w0[2] = w0[2] | append0[0] << 24;
- w0[3] = append0[0] >> 8 | append0[1] << 24;
- w1[0] = append0[1] >> 8 | append0[2] << 24;
- w1[1] = append0[2] >> 8 | append0[3] << 24;
- w1[2] = append0[3] >> 8 | append1[0] << 24;
- w1[3] = append1[0] >> 8 | append1[1] << 24;
- w2[0] = append1[1] >> 8 | append1[2] << 24;
- w2[1] = append1[2] >> 8 | append1[3] << 24;
- w2[2] = append1[3] >> 8;
+ w0[2] |= 0x80;
break;
case 12:
- w0[3] = append0[0];
- w1[0] = append0[1];
- w1[1] = append0[2];
- w1[2] = append0[3];
- w1[3] = append1[0];
- w2[0] = append1[1];
- w2[1] = append1[2];
- w2[2] = append1[3];
+ w0[3] |= 0x80000000;
break;
case 13:
- w0[3] = w0[3] | append0[0] << 8;
- w1[0] = append0[0] >> 24 | append0[1] << 8;
- w1[1] = append0[1] >> 24 | append0[2] << 8;
- w1[2] = append0[2] >> 24 | append0[3] << 8;
- w1[3] = append0[3] >> 24 | append1[0] << 8;
- w2[0] = append1[0] >> 24 | append1[1] << 8;
- w2[1] = append1[1] >> 24 | append1[2] << 8;
- w2[2] = append1[2] >> 24 | append1[3] << 8;
- w2[3] = append1[3] >> 24;
+ w0[3] |= 0x800000;
break;
case 14:
- w0[3] = w0[3] | append0[0] << 16;
- w1[0] = append0[0] >> 16 | append0[1] << 16;
- w1[1] = append0[1] >> 16 | append0[2] << 16;
- w1[2] = append0[2] >> 16 | append0[3] << 16;
- w1[3] = append0[3] >> 16 | append1[0] << 16;
- w2[0] = append1[0] >> 16 | append1[1] << 16;
- w2[1] = append1[1] >> 16 | append1[2] << 16;
- w2[2] = append1[2] >> 16 | append1[3] << 16;
- w2[3] = append1[3] >> 16;
+ w0[3] |= 0x8000;
break;
case 15:
- w0[3] = w0[3] | append0[0] << 24;
- w1[0] = append0[0] >> 8 | append0[1] << 24;
- w1[1] = append0[1] >> 8 | append0[2] << 24;
- w1[2] = append0[2] >> 8 | append0[3] << 24;
- w1[3] = append0[3] >> 8 | append1[0] << 24;
- w2[0] = append1[0] >> 8 | append1[1] << 24;
- w2[1] = append1[1] >> 8 | append1[2] << 24;
- w2[2] = append1[2] >> 8 | append1[3] << 24;
- w2[3] = append1[3] >> 8;
+ w0[3] |= 0x80;
break;
case 16:
- w1[0] = append0[0];
- w1[1] = append0[1];
- w1[2] = append0[2];
- w1[3] = append0[3];
- w2[0] = append1[0];
- w2[1] = append1[1];
- w2[2] = append1[2];
- w2[3] = append1[3];
+ w1[0] |= 0x80000000;
break;
case 17:
- w1[0] = w1[0] | append0[0] << 8;
- w1[1] = append0[0] >> 24 | append0[1] << 8;
- w1[2] = append0[1] >> 24 | append0[2] << 8;
- w1[3] = append0[2] >> 24 | append0[3] << 8;
- w2[0] = append0[3] >> 24 | append1[0] << 8;
- w2[1] = append1[0] >> 24 | append1[1] << 8;
- w2[2] = append1[1] >> 24 | append1[2] << 8;
- w2[3] = append1[2] >> 24 | append1[3] << 8;
- w3[0] = append1[3] >> 24;
+ w1[0] |= 0x800000;
break;
case 18:
- w1[0] = w1[0] | append0[0] << 16;
- w1[1] = append0[0] >> 16 | append0[1] << 16;
- w1[2] = append0[1] >> 16 | append0[2] << 16;
- w1[3] = append0[2] >> 16 | append0[3] << 16;
- w2[0] = append0[3] >> 16 | append1[0] << 16;
- w2[1] = append1[0] >> 16 | append1[1] << 16;
- w2[2] = append1[1] >> 16 | append1[2] << 16;
- w2[3] = append1[2] >> 16 | append1[3] << 16;
- w3[0] = append1[3] >> 16;
+ w1[0] |= 0x8000;
break;
case 19:
- w1[0] = w1[0] | append0[0] << 24;
- w1[1] = append0[0] >> 8 | append0[1] << 24;
- w1[2] = append0[1] >> 8 | append0[2] << 24;
- w1[3] = append0[2] >> 8 | append0[3] << 24;
- w2[0] = append0[3] >> 8 | append1[0] << 24;
- w2[1] = append1[0] >> 8 | append1[1] << 24;
- w2[2] = append1[1] >> 8 | append1[2] << 24;
- w2[3] = append1[2] >> 8 | append1[3] << 24;
- w3[0] = append1[3] >> 8;
+ w1[0] |= 0x80;
break;
case 20:
- w1[1] = append0[0];
- w1[2] = append0[1];
- w1[3] = append0[2];
- w2[0] = append0[3];
- w2[1] = append1[0];
- w2[2] = append1[1];
- w2[3] = append1[2];
- w3[0] = append1[3];
+ w1[1] |= 0x80000000;
break;
case 21:
- w1[1] = w1[1] | append0[0] << 8;
- w1[2] = append0[0] >> 24 | append0[1] << 8;
- w1[3] = append0[1] >> 24 | append0[2] << 8;
- w2[0] = append0[2] >> 24 | append0[3] << 8;
- w2[1] = append0[3] >> 24 | append1[0] << 8;
- w2[2] = append1[0] >> 24 | append1[1] << 8;
- w2[3] = append1[1] >> 24 | append1[2] << 8;
- w3[0] = append1[2] >> 24 | append1[3] << 8;
- w3[1] = append1[3] >> 24;
+ w1[1] |= 0x800000;
break;
case 22:
- w1[1] = w1[1] | append0[0] << 16;
- w1[2] = append0[0] >> 16 | append0[1] << 16;
- w1[3] = append0[1] >> 16 | append0[2] << 16;
- w2[0] = append0[2] >> 16 | append0[3] << 16;
- w2[1] = append0[3] >> 16 | append1[0] << 16;
- w2[2] = append1[0] >> 16 | append1[1] << 16;
- w2[3] = append1[1] >> 16 | append1[2] << 16;
- w3[0] = append1[2] >> 16 | append1[3] << 16;
- w3[1] = append1[3] >> 16;
+ w1[1] |= 0x8000;
break;
case 23:
- w1[1] = w1[1] | append0[0] << 24;
- w1[2] = append0[0] >> 8 | append0[1] << 24;
- w1[3] = append0[1] >> 8 | append0[2] << 24;
- w2[0] = append0[2] >> 8 | append0[3] << 24;
- w2[1] = append0[3] >> 8 | append1[0] << 24;
- w2[2] = append1[0] >> 8 | append1[1] << 24;
- w2[3] = append1[1] >> 8 | append1[2] << 24;
- w3[0] = append1[2] >> 8 | append1[3] << 24;
- w3[1] = append1[3] >> 8;
+ w1[1] |= 0x80;
break;
case 24:
- w1[2] = append0[0];
- w1[3] = append0[1];
- w2[0] = append0[2];
- w2[1] = append0[3];
- w2[2] = append1[0];
- w2[3] = append1[1];
- w3[0] = append1[2];
- w3[1] = append1[3];
+ w1[2] |= 0x80000000;
break;
case 25:
- w1[2] = w1[2] | append0[0] << 8;
- w1[3] = append0[0] >> 24 | append0[1] << 8;
- w2[0] = append0[1] >> 24 | append0[2] << 8;
- w2[1] = append0[2] >> 24 | append0[3] << 8;
- w2[2] = append0[3] >> 24 | append1[0] << 8;
- w2[3] = append1[0] >> 24 | append1[1] << 8;
- w3[0] = append1[1] >> 24 | append1[2] << 8;
- w3[1] = append1[2] >> 24 | append1[3] << 8;
+ w1[2] |= 0x800000;
break;
case 26:
- w1[2] = w1[2] | append0[0] << 16;
- w1[3] = append0[0] >> 16 | append0[1] << 16;
- w2[0] = append0[1] >> 16 | append0[2] << 16;
- w2[1] = append0[2] >> 16 | append0[3] << 16;
- w2[2] = append0[3] >> 16 | append1[0] << 16;
- w2[3] = append1[0] >> 16 | append1[1] << 16;
- w3[0] = append1[1] >> 16 | append1[2] << 16;
- w3[1] = append1[2] >> 16 | append1[3] << 16;
+ w1[2] |= 0x8000;
break;
-
- case 27:
- w1[2] = w1[2] | append0[0] << 24;
- w1[3] = append0[0] >> 8 | append0[1] << 24;
- w2[0] = append0[1] >> 8 | append0[2] << 24;
- w2[1] = append0[2] >> 8 | append0[3] << 24;
- w2[2] = append0[3] >> 8 | append1[0] << 24;
- w2[3] = append1[0] >> 8 | append1[1] << 24;
- w3[0] = append1[1] >> 8 | append1[2] << 24;
- w3[1] = append1[2] >> 8 | append1[3] << 24;
+
+ case 27:
+ w1[2] |= 0x80;
break;
case 28:
- w1[3] = append0[0];
- w2[0] = append0[1];
- w2[1] = append0[2];
- w2[2] = append0[3];
- w2[3] = append1[0];
- w3[0] = append1[1];
- w3[1] = append1[2];
+ w1[3] |= 0x80000000;
break;
case 29:
- w1[3] = w1[3] | append0[0] << 8;
- w2[0] = append0[0] >> 24 | append0[1] << 8;
- w2[1] = append0[1] >> 24 | append0[2] << 8;
- w2[2] = append0[2] >> 24 | append0[3] << 8;
- w2[3] = append0[3] >> 24 | append1[0] << 8;
- w3[0] = append1[0] >> 24 | append1[1] << 8;
- w3[1] = append1[1] >> 24 | append1[2] << 8;
+ w1[3] |= 0x800000;
break;
case 30:
- w1[3] = w1[3] | append0[0] << 16;
- w2[0] = append0[0] >> 16 | append0[1] << 16;
- w2[1] = append0[1] >> 16 | append0[2] << 16;
- w2[2] = append0[2] >> 16 | append0[3] << 16;
- w2[3] = append0[3] >> 16 | append1[0] << 16;
- w3[0] = append1[0] >> 16 | append1[1] << 16;
- w3[1] = append1[1] >> 16 | append1[2] << 16;
+ w1[3] |= 0x8000;
break;
case 31:
- w1[3] = w1[3] | append0[0] << 24;
- w2[0] = append0[0] >> 8 | append0[1] << 24;
- w2[1] = append0[1] >> 8 | append0[2] << 24;
- w2[2] = append0[2] >> 8 | append0[3] << 24;
- w2[3] = append0[3] >> 8 | append1[0] << 24;
- w3[0] = append1[0] >> 8 | append1[1] << 24;
- w3[1] = append1[1] >> 8 | append1[2] << 24;
- break;
-
- case 32:
- w2[0] = append0[0];
- w2[1] = append0[1];
- w2[2] = append0[2];
- w2[3] = append0[3];
- w3[0] = append1[0];
- w3[1] = append1[1];
+ w1[3] |= 0x80;
break;
}
}
-// before: memcat32_9
-static void memcat_c32_w4x4_a3x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 append0[4], const u32 append1[4], const u32 append2[4], const u32 offset)
+// before: append_0x80_8
+static void append_0x80_1x32 (u32 w[32], const u32 offset)
{
switch (offset)
{
case 0:
- w0[0] = append0[0];
- w0[1] = append0[1];
- w0[2] = append0[2];
- w0[3] = append0[3];
- w1[0] = append1[0];
- w1[1] = append1[1];
- w1[2] = append1[2];
- w1[3] = append1[3];
- w2[0] = append2[0];
+ w[ 0] = 0x80;
break;
case 1:
- w0[0] = w0[0] | append0[0] << 8;
- w0[1] = append0[0] >> 24 | append0[1] << 8;
- w0[2] = append0[1] >> 24 | append0[2] << 8;
- w0[3] = append0[2] >> 24 | append0[3] << 8;
- w1[0] = append0[3] >> 24 | append1[0] << 8;
- w1[1] = append1[0] >> 24 | append1[1] << 8;
- w1[2] = append1[1] >> 24 | append1[2] << 8;
- w1[3] = append1[2] >> 24 | append1[3] << 8;
- w2[0] = append1[3] >> 24 | append2[0] << 8;
- w2[1] = append2[0] >> 24;
+ w[ 0] = w[ 0] | 0x8000;
break;
case 2:
- w0[0] = w0[0] | append0[0] << 16;
- w0[1] = append0[0] >> 16 | append0[1] << 16;
- w0[2] = append0[1] >> 16 | append0[2] << 16;
- w0[3] = append0[2] >> 16 | append0[3] << 16;
- w1[0] = append0[3] >> 16 | append1[0] << 16;
- w1[1] = append1[0] >> 16 | append1[1] << 16;
- w1[2] = append1[1] >> 16 | append1[2] << 16;
- w1[3] = append1[2] >> 16 | append1[3] << 16;
- w2[0] = append1[3] >> 16 | append2[0] << 16;
- w2[1] = append2[0] >> 16;
+ w[ 0] = w[ 0] | 0x800000;
break;
case 3:
- w0[0] = w0[0] | append0[0] << 24;
- w0[1] = append0[0] >> 8 | append0[1] << 24;
- w0[2] = append0[1] >> 8 | append0[2] << 24;
- w0[3] = append0[2] >> 8 | append0[3] << 24;
- w1[0] = append0[3] >> 8 | append1[0] << 24;
- w1[1] = append1[0] >> 8 | append1[1] << 24;
- w1[2] = append1[1] >> 8 | append1[2] << 24;
- w1[3] = append1[2] >> 8 | append1[3] << 24;
- w2[0] = append1[3] >> 8 | append2[0] << 24;
- w2[1] = append2[0] >> 8;
+ w[ 0] = w[ 0] | 0x80000000;
break;
case 4:
- w0[1] = append0[0];
- w0[2] = append0[1];
- w0[3] = append0[2];
- w1[0] = append0[3];
- w1[1] = append1[0];
- w1[2] = append1[1];
- w1[3] = append1[2];
- w2[0] = append1[3];
- w2[1] = append2[0];
+ w[ 1] = 0x80;
break;
case 5:
- w0[1] = w0[1] | append0[0] << 8;
- w0[2] = append0[0] >> 24 | append0[1] << 8;
- w0[3] = append0[1] >> 24 | append0[2] << 8;
- w1[0] = append0[2] >> 24 | append0[3] << 8;
- w1[1] = append0[3] >> 24 | append1[0] << 8;
- w1[2] = append1[0] >> 24 | append1[1] << 8;
- w1[3] = append1[1] >> 24 | append1[2] << 8;
- w2[0] = append1[2] >> 24 | append1[3] << 8;
- w2[1] = append1[3] >> 24 | append2[0] << 8;
- w2[2] = append2[0] >> 24;
+ w[ 1] = w[ 1] | 0x8000;
break;
case 6:
- w0[1] = w0[1] | append0[0] << 16;
- w0[2] = append0[0] >> 16 | append0[1] << 16;
- w0[3] = append0[1] >> 16 | append0[2] << 16;
- w1[0] = append0[2] >> 16 | append0[3] << 16;
- w1[1] = append0[3] >> 16 | append1[0] << 16;
- w1[2] = append1[0] >> 16 | append1[1] << 16;
- w1[3] = append1[1] >> 16 | append1[2] << 16;
- w2[0] = append1[2] >> 16 | append1[3] << 16;
- w2[1] = append1[3] >> 16 | append2[0] << 16;
- w2[2] = append2[0] >> 16;
+ w[ 1] = w[ 1] | 0x800000;
break;
case 7:
- w0[1] = w0[1] | append0[0] << 24;
- w0[2] = append0[0] >> 8 | append0[1] << 24;
- w0[3] = append0[1] >> 8 | append0[2] << 24;
- w1[0] = append0[2] >> 8 | append0[3] << 24;
- w1[1] = append0[3] >> 8 | append1[0] << 24;
- w1[2] = append1[0] >> 8 | append1[1] << 24;
- w1[3] = append1[1] >> 8 | append1[2] << 24;
- w2[0] = append1[2] >> 8 | append1[3] << 24;
- w2[1] = append1[3] >> 8 | append2[0] << 24;
- w2[2] = append2[0] >> 8;
+ w[ 1] = w[ 1] | 0x80000000;
break;
case 8:
- w0[2] = append0[0];
- w0[3] = append0[1];
- w1[0] = append0[2];
- w1[1] = append0[3];
- w1[2] = append1[0];
- w1[3] = append1[1];
- w2[0] = append1[2];
- w2[1] = append1[3];
- w2[2] = append2[0];
+ w[ 2] = 0x80;
break;
case 9:
- w0[2] = w0[2] | append0[0] << 8;
- w0[3] = append0[0] >> 24 | append0[1] << 8;
- w1[0] = append0[1] >> 24 | append0[2] << 8;
- w1[1] = append0[2] >> 24 | append0[3] << 8;
- w1[2] = append0[3] >> 24 | append1[0] << 8;
- w1[3] = append1[0] >> 24 | append1[1] << 8;
- w2[0] = append1[1] >> 24 | append1[2] << 8;
- w2[1] = append1[2] >> 24 | append1[3] << 8;
- w2[2] = append1[3] >> 24 | append2[0] << 8;
- w2[3] = append2[0] >> 24;
+ w[ 2] = w[ 2] | 0x8000;
break;
case 10:
- w0[2] = w0[2] | append0[0] << 16;
- w0[3] = append0[0] >> 16 | append0[1] << 16;
- w1[0] = append0[1] >> 16 | append0[2] << 16;
- w1[1] = append0[2] >> 16 | append0[3] << 16;
- w1[2] = append0[3] >> 16 | append1[0] << 16;
- w1[3] = append1[0] >> 16 | append1[1] << 16;
- w2[0] = append1[1] >> 16 | append1[2] << 16;
- w2[1] = append1[2] >> 16 | append1[3] << 16;
- w2[2] = append1[3] >> 16 | append2[0] << 16;
- w2[3] = append2[0] >> 16;
+ w[ 2] = w[ 2] | 0x800000;
break;
case 11:
- w0[2] = w0[2] | append0[0] << 24;
- w0[3] = append0[0] >> 8 | append0[1] << 24;
- w1[0] = append0[1] >> 8 | append0[2] << 24;
- w1[1] = append0[2] >> 8 | append0[3] << 24;
- w1[2] = append0[3] >> 8 | append1[0] << 24;
- w1[3] = append1[0] >> 8 | append1[1] << 24;
- w2[0] = append1[1] >> 8 | append1[2] << 24;
- w2[1] = append1[2] >> 8 | append1[3] << 24;
- w2[2] = append1[3] >> 8 | append2[0] << 24;
- w2[3] = append2[0] >> 8;
+ w[ 2] = w[ 2] | 0x80000000;
break;
case 12:
- w0[3] = append0[0];
- w1[0] = append0[1];
- w1[1] = append0[2];
- w1[2] = append0[3];
- w1[3] = append1[0];
- w2[0] = append1[1];
- w2[1] = append1[2];
- w2[2] = append1[3];
- w2[3] = append2[0];
+ w[ 3] = 0x80;
break;
case 13:
- w0[3] = w0[3] | append0[0] << 8;
- w1[0] = append0[0] >> 24 | append0[1] << 8;
- w1[1] = append0[1] >> 24 | append0[2] << 8;
- w1[2] = append0[2] >> 24 | append0[3] << 8;
- w1[3] = append0[3] >> 24 | append1[0] << 8;
- w2[0] = append1[0] >> 24 | append1[1] << 8;
- w2[1] = append1[1] >> 24 | append1[2] << 8;
- w2[2] = append1[2] >> 24 | append1[3] << 8;
- w2[3] = append1[3] >> 24 | append2[0] << 8;
- w3[0] = append2[0] >> 24;
+ w[ 3] = w[ 3] | 0x8000;
break;
case 14:
- w0[3] = w0[3] | append0[0] << 16;
- w1[0] = append0[0] >> 16 | append0[1] << 16;
- w1[1] = append0[1] >> 16 | append0[2] << 16;
- w1[2] = append0[2] >> 16 | append0[3] << 16;
- w1[3] = append0[3] >> 16 | append1[0] << 16;
- w2[0] = append1[0] >> 16 | append1[1] << 16;
- w2[1] = append1[1] >> 16 | append1[2] << 16;
- w2[2] = append1[2] >> 16 | append1[3] << 16;
- w2[3] = append1[3] >> 16 | append2[0] << 16;
- w3[0] = append2[0] >> 16;
+ w[ 3] = w[ 3] | 0x800000;
break;
case 15:
- w0[3] = w0[3] | append0[0] << 24;
- w1[0] = append0[0] >> 8 | append0[1] << 24;
- w1[1] = append0[1] >> 8 | append0[2] << 24;
- w1[2] = append0[2] >> 8 | append0[3] << 24;
- w1[3] = append0[3] >> 8 | append1[0] << 24;
- w2[0] = append1[0] >> 8 | append1[1] << 24;
- w2[1] = append1[1] >> 8 | append1[2] << 24;
- w2[2] = append1[2] >> 8 | append1[3] << 24;
- w2[3] = append1[3] >> 8 | append2[0] << 24;
- w3[0] = append2[0] >> 8;
- break;
-
- case 16:
- w1[0] = append0[0];
- w1[1] = append0[1];
- w1[2] = append0[2];
- w1[3] = append0[3];
- w2[0] = append1[0];
- w2[1] = append1[1];
- w2[2] = append1[2];
- w2[3] = append1[3];
- w3[0] = append2[0];
- break;
-
- case 17:
- w1[0] = w1[0] | append0[0] << 8;
- w1[1] = append0[0] >> 24 | append0[1] << 8;
- w1[2] = append0[1] >> 24 | append0[2] << 8;
- w1[3] = append0[2] >> 24 | append0[3] << 8;
- w2[0] = append0[3] >> 24 | append1[0] << 8;
- w2[1] = append1[0] >> 24 | append1[1] << 8;
- w2[2] = append1[1] >> 24 | append1[2] << 8;
- w2[3] = append1[2] >> 24 | append1[3] << 8;
- w3[0] = append1[3] >> 24 | append2[0] << 8;
- w3[1] = append2[0] >> 24;
+ w[ 3] = w[ 3] | 0x80000000;
break;
- case 18:
- w1[0] = w1[0] | append0[0] << 16;
- w1[1] = append0[0] >> 16 | append0[1] << 16;
- w1[2] = append0[1] >> 16 | append0[2] << 16;
- w1[3] = append0[2] >> 16 | append0[3] << 16;
- w2[0] = append0[3] >> 16 | append1[0] << 16;
- w2[1] = append1[0] >> 16 | append1[1] << 16;
- w2[2] = append1[1] >> 16 | append1[2] << 16;
- w2[3] = append1[2] >> 16 | append1[3] << 16;
- w3[0] = append1[3] >> 16 | append2[0] << 16;
- w3[1] = append2[0] >> 16;
+ case 16:
+ w[ 4] = 0x80;
+ break;
+
+ case 17:
+ w[ 4] = w[ 4] | 0x8000;
+ break;
+
+ case 18:
+ w[ 4] = w[ 4] | 0x800000;
break;
case 19:
- w1[0] = w1[0] | append0[0] << 24;
- w1[1] = append0[0] >> 8 | append0[1] << 24;
- w1[2] = append0[1] >> 8 | append0[2] << 24;
- w1[3] = append0[2] >> 8 | append0[3] << 24;
- w2[0] = append0[3] >> 8 | append1[0] << 24;
- w2[1] = append1[0] >> 8 | append1[1] << 24;
- w2[2] = append1[1] >> 8 | append1[2] << 24;
- w2[3] = append1[2] >> 8 | append1[3] << 24;
- w3[0] = append1[3] >> 8 | append2[0] << 24;
- w3[1] = append2[0] >> 8;
+ w[ 4] = w[ 4] | 0x80000000;
break;
case 20:
- w1[1] = append0[0];
- w1[2] = append0[1];
- w1[3] = append0[2];
- w2[0] = append0[3];
- w2[1] = append1[0];
- w2[2] = append1[1];
- w2[3] = append1[2];
- w3[0] = append1[3];
- w3[1] = append2[0];
+ w[ 5] = 0x80;
break;
case 21:
- w1[1] = w1[1] | append0[0] << 8;
- w1[2] = append0[0] >> 24 | append0[1] << 8;
- w1[3] = append0[1] >> 24 | append0[2] << 8;
- w2[0] = append0[2] >> 24 | append0[3] << 8;
- w2[1] = append0[3] >> 24 | append1[0] << 8;
- w2[2] = append1[0] >> 24 | append1[1] << 8;
- w2[3] = append1[1] >> 24 | append1[2] << 8;
- w3[0] = append1[2] >> 24 | append1[3] << 8;
- w3[1] = append1[3] >> 24 | append2[0] << 8;
+ w[ 5] = w[ 5] | 0x8000;
break;
case 22:
- w1[1] = w1[1] | append0[0] << 16;
- w1[2] = append0[0] >> 16 | append0[1] << 16;
- w1[3] = append0[1] >> 16 | append0[2] << 16;
- w2[0] = append0[2] >> 16 | append0[3] << 16;
- w2[1] = append0[3] >> 16 | append1[0] << 16;
- w2[2] = append1[0] >> 16 | append1[1] << 16;
- w2[3] = append1[1] >> 16 | append1[2] << 16;
- w3[0] = append1[2] >> 16 | append1[3] << 16;
- w3[1] = append1[3] >> 16 | append2[0] << 16;
+ w[ 5] = w[ 5] | 0x800000;
break;
case 23:
- w1[1] = w1[1] | append0[0] << 24;
- w1[2] = append0[0] >> 8 | append0[1] << 24;
- w1[3] = append0[1] >> 8 | append0[2] << 24;
- w2[0] = append0[2] >> 8 | append0[3] << 24;
- w2[1] = append0[3] >> 8 | append1[0] << 24;
- w2[2] = append1[0] >> 8 | append1[1] << 24;
- w2[3] = append1[1] >> 8 | append1[2] << 24;
- w3[0] = append1[2] >> 8 | append1[3] << 24;
- w3[1] = append1[3] >> 8 | append2[0] << 24;
+ w[ 5] = w[ 5] | 0x80000000;
break;
case 24:
- w1[2] = append0[0];
- w1[3] = append0[1];
- w2[0] = append0[2];
- w2[1] = append0[3];
- w2[2] = append1[0];
- w2[3] = append1[1];
- w3[0] = append1[2];
- w3[1] = append1[3];
+ w[ 6] = 0x80;
break;
case 25:
- w1[2] = w1[2] | append0[0] << 8;
- w1[3] = append0[0] >> 24 | append0[1] << 8;
- w2[0] = append0[1] >> 24 | append0[2] << 8;
- w2[1] = append0[2] >> 24 | append0[3] << 8;
- w2[2] = append0[3] >> 24 | append1[0] << 8;
- w2[3] = append1[0] >> 24 | append1[1] << 8;
- w3[0] = append1[1] >> 24 | append1[2] << 8;
- w3[1] = append1[2] >> 24 | append1[3] << 8;
+ w[ 6] = w[ 6] | 0x8000;
break;
case 26:
- w1[2] = w1[2] | append0[0] << 16;
- w1[3] = append0[0] >> 16 | append0[1] << 16;
- w2[0] = append0[1] >> 16 | append0[2] << 16;
- w2[1] = append0[2] >> 16 | append0[3] << 16;
- w2[2] = append0[3] >> 16 | append1[0] << 16;
- w2[3] = append1[0] >> 16 | append1[1] << 16;
- w3[0] = append1[1] >> 16 | append1[2] << 16;
- w3[1] = append1[2] >> 16 | append1[3] << 16;
+ w[ 6] = w[ 6] | 0x800000;
break;
case 27:
- w1[2] = w1[2] | append0[0] << 24;
- w1[3] = append0[0] >> 8 | append0[1] << 24;
- w2[0] = append0[1] >> 8 | append0[2] << 24;
- w2[1] = append0[2] >> 8 | append0[3] << 24;
- w2[2] = append0[3] >> 8 | append1[0] << 24;
- w2[3] = append1[0] >> 8 | append1[1] << 24;
- w3[0] = append1[1] >> 8 | append1[2] << 24;
- w3[1] = append1[2] >> 8 | append1[3] << 24;
+ w[ 6] = w[ 6] | 0x80000000;
break;
case 28:
- w1[3] = append0[0];
- w2[0] = append0[1];
- w2[1] = append0[2];
- w2[2] = append0[3];
- w2[3] = append1[0];
- w3[0] = append1[1];
- w3[1] = append1[2];
+ w[ 7] = 0x80;
break;
case 29:
- w1[3] = w1[3] | append0[0] << 8;
- w2[0] = append0[0] >> 24 | append0[1] << 8;
- w2[1] = append0[1] >> 24 | append0[2] << 8;
- w2[2] = append0[2] >> 24 | append0[3] << 8;
- w2[3] = append0[3] >> 24 | append1[0] << 8;
- w3[0] = append1[0] >> 24 | append1[1] << 8;
- w3[1] = append1[1] >> 24 | append1[2] << 8;
+ w[ 7] = w[ 7] | 0x8000;
break;
case 30:
- w1[3] = w1[3] | append0[0] << 16;
- w2[0] = append0[0] >> 16 | append0[1] << 16;
- w2[1] = append0[1] >> 16 | append0[2] << 16;
- w2[2] = append0[2] >> 16 | append0[3] << 16;
- w2[3] = append0[3] >> 16 | append1[0] << 16;
- w3[0] = append1[0] >> 16 | append1[1] << 16;
- w3[1] = append1[1] >> 16 | append1[2] << 16;
+ w[ 7] = w[ 7] | 0x800000;
break;
case 31:
- w1[3] = w1[3] | append0[0] << 24;
- w2[0] = append0[0] >> 8 | append0[1] << 24;
- w2[1] = append0[1] >> 8 | append0[2] << 24;
- w2[2] = append0[2] >> 8 | append0[3] << 24;
- w2[3] = append0[3] >> 8 | append1[0] << 24;
- w3[0] = append1[0] >> 8 | append1[1] << 24;
- w3[1] = append1[1] >> 8 | append1[2] << 24;
+ w[ 7] = w[ 7] | 0x80000000;
break;
case 32:
- w2[0] = append0[0];
- w2[1] = append0[1];
- w2[2] = append0[2];
- w2[3] = append0[3];
- w3[0] = append1[0];
- w3[1] = append1[1];
+ w[ 8] = 0x80;
break;
- }
-}
-static void switch_buffer_by_offset (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
-{
- #ifdef IS_AMD
- const int offset_mod_4 = offset & 3;
+ case 33:
+ w[ 8] = w[ 8] | 0x8000;
+ break;
- const int offset_minus_4 = 4 - offset;
+ case 34:
+ w[ 8] = w[ 8] | 0x800000;
+ break;
- switch (offset / 4)
- {
- case 0:
- w3[2] = amd_bytealign ( 0, w3[1], offset_minus_4);
- w3[1] = amd_bytealign (w3[1], w3[0], offset_minus_4);
- w3[0] = amd_bytealign (w3[0], w2[3], offset_minus_4);
- w2[3] = amd_bytealign (w2[3], w2[2], offset_minus_4);
- w2[2] = amd_bytealign (w2[2], w2[1], offset_minus_4);
- w2[1] = amd_bytealign (w2[1], w2[0], offset_minus_4);
- w2[0] = amd_bytealign (w2[0], w1[3], offset_minus_4);
- w1[3] = amd_bytealign (w1[3], w1[2], offset_minus_4);
- w1[2] = amd_bytealign (w1[2], w1[1], offset_minus_4);
- w1[1] = amd_bytealign (w1[1], w1[0], offset_minus_4);
- w1[0] = amd_bytealign (w1[0], w0[3], offset_minus_4);
- w0[3] = amd_bytealign (w0[3], w0[2], offset_minus_4);
- w0[2] = amd_bytealign (w0[2], w0[1], offset_minus_4);
- w0[1] = amd_bytealign (w0[1], w0[0], offset_minus_4);
- w0[0] = amd_bytealign (w0[0], 0, offset_minus_4);
+ case 35:
+ w[ 8] = w[ 8] | 0x80000000;
+ break;
- if (offset_mod_4 == 0)
- {
- w0[0] = w0[1];
- w0[1] = w0[2];
- w0[2] = w0[3];
- w0[3] = w1[0];
- w1[0] = w1[1];
- w1[1] = w1[2];
- w1[2] = w1[3];
- w1[3] = w2[0];
- w2[0] = w2[1];
- w2[1] = w2[2];
- w2[2] = w2[3];
- w2[3] = w3[0];
- w3[0] = w3[1];
- w3[1] = w3[2];
- w3[2] = 0;
- }
+ case 36:
+ w[ 9] = 0x80;
+ break;
+
+ case 37:
+ w[ 9] = w[ 9] | 0x8000;
+ break;
+ case 38:
+ w[ 9] = w[ 9] | 0x800000;
break;
- case 1:
- w3[2] = amd_bytealign ( 0, w3[0], offset_minus_4);
- w3[1] = amd_bytealign (w3[0], w2[3], offset_minus_4);
- w3[0] = amd_bytealign (w2[3], w2[2], offset_minus_4);
- w2[3] = amd_bytealign (w2[2], w2[1], offset_minus_4);
- w2[2] = amd_bytealign (w2[1], w2[0], offset_minus_4);
- w2[1] = amd_bytealign (w2[0], w1[3], offset_minus_4);
- w2[0] = amd_bytealign (w1[3], w1[2], offset_minus_4);
- w1[3] = amd_bytealign (w1[2], w1[1], offset_minus_4);
- w1[2] = amd_bytealign (w1[1], w1[0], offset_minus_4);
- w1[1] = amd_bytealign (w1[0], w0[3], offset_minus_4);
- w1[0] = amd_bytealign (w0[3], w0[2], offset_minus_4);
- w0[3] = amd_bytealign (w0[2], w0[1], offset_minus_4);
- w0[2] = amd_bytealign (w0[1], w0[0], offset_minus_4);
- w0[1] = amd_bytealign (w0[0], 0, offset_minus_4);
- w0[0] = 0;
+ case 39:
+ w[ 9] = w[ 9] | 0x80000000;
+ break;
- if (offset_mod_4 == 0)
- {
- w0[1] = w0[2];
- w0[2] = w0[3];
- w0[3] = w1[0];
- w1[0] = w1[1];
- w1[1] = w1[2];
- w1[2] = w1[3];
- w1[3] = w2[0];
- w2[0] = w2[1];
- w2[1] = w2[2];
- w2[2] = w2[3];
- w2[3] = w3[0];
- w3[0] = w3[1];
- w3[1] = w3[2];
- w3[2] = 0;
- }
+ case 40:
+ w[10] = 0x80;
+ break;
+
+ case 41:
+ w[10] = w[10] | 0x8000;
+ break;
+
+ case 42:
+ w[10] = w[10] | 0x800000;
+ break;
+
+ case 43:
+ w[10] = w[10] | 0x80000000;
+ break;
+
+ case 44:
+ w[11] = 0x80;
+ break;
+
+ case 45:
+ w[11] = w[11] | 0x8000;
+ break;
+ case 46:
+ w[11] = w[11] | 0x800000;
break;
- case 2:
- w3[2] = amd_bytealign ( 0, w2[3], offset_minus_4);
- w3[1] = amd_bytealign (w2[3], w2[2], offset_minus_4);
- w3[0] = amd_bytealign (w2[2], w2[1], offset_minus_4);
- w2[3] = amd_bytealign (w2[1], w2[0], offset_minus_4);
- w2[2] = amd_bytealign (w2[0], w1[3], offset_minus_4);
- w2[1] = amd_bytealign (w1[3], w1[2], offset_minus_4);
- w2[0] = amd_bytealign (w1[2], w1[1], offset_minus_4);
- w1[3] = amd_bytealign (w1[1], w1[0], offset_minus_4);
- w1[2] = amd_bytealign (w1[0], w0[3], offset_minus_4);
- w1[1] = amd_bytealign (w0[3], w0[2], offset_minus_4);
- w1[0] = amd_bytealign (w0[2], w0[1], offset_minus_4);
- w0[3] = amd_bytealign (w0[1], w0[0], offset_minus_4);
- w0[2] = amd_bytealign (w0[0], 0, offset_minus_4);
- w0[1] = 0;
- w0[0] = 0;
+ case 47:
+ w[11] = w[11] | 0x80000000;
+ break;
- if (offset_mod_4 == 0)
- {
- w0[2] = w0[3];
- w0[3] = w1[0];
- w1[0] = w1[1];
- w1[1] = w1[2];
- w1[2] = w1[3];
- w1[3] = w2[0];
- w2[0] = w2[1];
- w2[1] = w2[2];
- w2[2] = w2[3];
- w2[3] = w3[0];
- w3[0] = w3[1];
- w3[1] = w3[2];
- w3[2] = 0;
- }
+ case 48:
+ w[12] = 0x80;
+ break;
+ case 49:
+ w[12] = w[12] | 0x8000;
break;
- case 3:
- w3[2] = amd_bytealign ( 0, w2[2], offset_minus_4);
- w3[1] = amd_bytealign (w2[2], w2[1], offset_minus_4);
- w3[0] = amd_bytealign (w2[1], w2[0], offset_minus_4);
- w2[3] = amd_bytealign (w2[0], w1[3], offset_minus_4);
- w2[2] = amd_bytealign (w1[3], w1[2], offset_minus_4);
- w2[1] = amd_bytealign (w1[2], w1[1], offset_minus_4);
- w2[0] = amd_bytealign (w1[1], w1[0], offset_minus_4);
- w1[3] = amd_bytealign (w1[0], w0[3], offset_minus_4);
- w1[2] = amd_bytealign (w0[3], w0[2], offset_minus_4);
- w1[1] = amd_bytealign (w0[2], w0[1], offset_minus_4);
- w1[0] = amd_bytealign (w0[1], w0[0], offset_minus_4);
- w0[3] = amd_bytealign (w0[0], 0, offset_minus_4);
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 50:
+ w[12] = w[12] | 0x800000;
+ break;
- if (offset_mod_4 == 0)
- {
- w0[3] = w1[0];
- w1[0] = w1[1];
- w1[1] = w1[2];
- w1[2] = w1[3];
- w1[3] = w2[0];
- w2[0] = w2[1];
- w2[1] = w2[2];
- w2[2] = w2[3];
- w2[3] = w3[0];
- w3[0] = w3[1];
- w3[1] = w3[2];
- w3[2] = 0;
- }
+ case 51:
+ w[12] = w[12] | 0x80000000;
+ break;
+ case 52:
+ w[13] = 0x80;
break;
- case 4:
- w3[2] = amd_bytealign ( 0, w2[1], offset_minus_4);
- w3[1] = amd_bytealign (w2[1], w2[0], offset_minus_4);
- w3[0] = amd_bytealign (w2[0], w1[3], offset_minus_4);
- w2[3] = amd_bytealign (w1[3], w1[2], offset_minus_4);
- w2[2] = amd_bytealign (w1[2], w1[1], offset_minus_4);
- w2[1] = amd_bytealign (w1[1], w1[0], offset_minus_4);
- w2[0] = amd_bytealign (w1[0], w0[3], offset_minus_4);
- w1[3] = amd_bytealign (w0[3], w0[2], offset_minus_4);
- w1[2] = amd_bytealign (w0[2], w0[1], offset_minus_4);
- w1[1] = amd_bytealign (w0[1], w0[0], offset_minus_4);
- w1[0] = amd_bytealign (w0[0], 0, offset_minus_4);
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 53:
+ w[13] = w[13] | 0x8000;
+ break;
- if (offset_mod_4 == 0)
- {
- w1[0] = w1[1];
- w1[1] = w1[2];
- w1[2] = w1[3];
- w1[3] = w2[0];
- w2[0] = w2[1];
- w2[1] = w2[2];
- w2[2] = w2[3];
- w2[3] = w3[0];
- w3[0] = w3[1];
- w3[1] = w3[2];
- w3[2] = 0;
- }
+ case 54:
+ w[13] = w[13] | 0x800000;
+ break;
+ case 55:
+ w[13] = w[13] | 0x80000000;
break;
- case 5:
- w3[2] = amd_bytealign ( 0, w2[0], offset_minus_4);
- w3[1] = amd_bytealign (w2[0], w1[3], offset_minus_4);
- w3[0] = amd_bytealign (w1[3], w1[2], offset_minus_4);
- w2[3] = amd_bytealign (w1[2], w1[1], offset_minus_4);
- w2[2] = amd_bytealign (w1[1], w1[0], offset_minus_4);
- w2[1] = amd_bytealign (w1[0], w0[3], offset_minus_4);
- w2[0] = amd_bytealign (w0[3], w0[2], offset_minus_4);
- w1[3] = amd_bytealign (w0[2], w0[1], offset_minus_4);
- w1[2] = amd_bytealign (w0[1], w0[0], offset_minus_4);
- w1[1] = amd_bytealign (w0[0], 0, offset_minus_4);
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 56:
+ w[14] = 0x80;
+ break;
- if (offset_mod_4 == 0)
- {
- w1[1] = w1[2];
- w1[2] = w1[3];
- w1[3] = w2[0];
- w2[0] = w2[1];
- w2[1] = w2[2];
- w2[2] = w2[3];
- w2[3] = w3[0];
- w3[0] = w3[1];
- w3[1] = w3[2];
- w3[2] = 0;
- }
+ case 57:
+ w[14] = w[14] | 0x8000;
+ break;
+ case 58:
+ w[14] = w[14] | 0x800000;
break;
- case 6:
- w3[2] = amd_bytealign ( 0, w1[3], offset_minus_4);
- w3[1] = amd_bytealign (w1[3], w1[2], offset_minus_4);
- w3[0] = amd_bytealign (w1[2], w1[1], offset_minus_4);
- w2[3] = amd_bytealign (w1[1], w1[0], offset_minus_4);
- w2[2] = amd_bytealign (w1[0], w0[3], offset_minus_4);
- w2[1] = amd_bytealign (w0[3], w0[2], offset_minus_4);
- w2[0] = amd_bytealign (w0[2], w0[1], offset_minus_4);
- w1[3] = amd_bytealign (w0[1], w0[0], offset_minus_4);
- w1[2] = amd_bytealign (w0[0], 0, offset_minus_4);
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 59:
+ w[14] = w[14] | 0x80000000;
+ break;
- if (offset_mod_4 == 0)
- {
- w1[2] = w1[3];
- w1[3] = w2[0];
- w2[0] = w2[1];
- w2[1] = w2[2];
- w2[2] = w2[3];
- w2[3] = w3[0];
- w3[0] = w3[1];
- w3[1] = w3[2];
- w3[2] = 0;
- }
+ case 60:
+ w[15] = 0x80;
+ break;
+ case 61:
+ w[15] = w[15] | 0x8000;
break;
- case 7:
- w3[2] = amd_bytealign ( 0, w1[2], offset_minus_4);
- w3[1] = amd_bytealign (w1[2], w1[1], offset_minus_4);
- w3[0] = amd_bytealign (w1[1], w1[0], offset_minus_4);
- w2[3] = amd_bytealign (w1[0], w0[3], offset_minus_4);
- w2[2] = amd_bytealign (w0[3], w0[2], offset_minus_4);
- w2[1] = amd_bytealign (w0[2], w0[1], offset_minus_4);
- w2[0] = amd_bytealign (w0[1], w0[0], offset_minus_4);
- w1[3] = amd_bytealign (w0[0], 0, offset_minus_4);
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 62:
+ w[15] = w[15] | 0x800000;
+ break;
- if (offset_mod_4 == 0)
- {
- w1[3] = w2[0];
- w2[0] = w2[1];
- w2[1] = w2[2];
- w2[2] = w2[3];
- w2[3] = w3[0];
- w3[0] = w3[1];
- w3[1] = w3[2];
- w3[2] = 0;
- }
+ case 63:
+ w[15] = w[15] | 0x80000000;
+ break;
+ case 64:
+ w[16] = 0x80;
break;
- case 8:
- w3[2] = amd_bytealign ( 0, w1[1], offset_minus_4);
- w3[1] = amd_bytealign (w1[1], w1[0], offset_minus_4);
- w3[0] = amd_bytealign (w1[0], w0[3], offset_minus_4);
- w2[3] = amd_bytealign (w0[3], w0[2], offset_minus_4);
- w2[2] = amd_bytealign (w0[2], w0[1], offset_minus_4);
- w2[1] = amd_bytealign (w0[1], w0[0], offset_minus_4);
- w2[0] = amd_bytealign (w0[0], 0, offset_minus_4);
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 65:
+ w[16] = w[16] | 0x8000;
+ break;
- if (offset_mod_4 == 0)
- {
- w2[0] = w2[1];
- w2[1] = w2[2];
- w2[2] = w2[3];
- w2[3] = w3[0];
- w3[0] = w3[1];
- w3[1] = w3[2];
- w3[2] = 0;
- }
+ case 66:
+ w[16] = w[16] | 0x800000;
+ break;
+ case 67:
+ w[16] = w[16] | 0x80000000;
break;
- case 9:
- w3[2] = amd_bytealign ( 0, w1[0], offset_minus_4);
- w3[1] = amd_bytealign (w1[0], w0[3], offset_minus_4);
- w3[0] = amd_bytealign (w0[3], w0[2], offset_minus_4);
- w2[3] = amd_bytealign (w0[2], w0[1], offset_minus_4);
- w2[2] = amd_bytealign (w0[1], w0[0], offset_minus_4);
- w2[1] = amd_bytealign (w0[0], 0, offset_minus_4);
- w2[0] = 0;
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 68:
+ w[17] = 0x80;
+ break;
- if (offset_mod_4 == 0)
- {
- w2[1] = w2[2];
- w2[2] = w2[3];
- w2[3] = w3[0];
- w3[0] = w3[1];
- w3[1] = w3[2];
- w3[2] = 0;
- }
+ case 69:
+ w[17] = w[17] | 0x8000;
+ break;
+ case 70:
+ w[17] = w[17] | 0x800000;
break;
- case 10:
- w3[2] = amd_bytealign ( 0, w0[3], offset_minus_4);
- w3[1] = amd_bytealign (w0[3], w0[2], offset_minus_4);
- w3[0] = amd_bytealign (w0[2], w0[1], offset_minus_4);
- w2[3] = amd_bytealign (w0[1], w0[0], offset_minus_4);
- w2[2] = amd_bytealign (w0[0], 0, offset_minus_4);
- w2[1] = 0;
- w2[0] = 0;
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 71:
+ w[17] = w[17] | 0x80000000;
+ break;
- if (offset_mod_4 == 0)
- {
- w2[2] = w2[3];
- w2[3] = w3[0];
- w3[0] = w3[1];
- w3[1] = w3[2];
- w3[2] = 0;
- }
+ case 72:
+ w[18] = 0x80;
+ break;
+ case 73:
+ w[18] = w[18] | 0x8000;
break;
- case 11:
- w3[2] = amd_bytealign ( 0, w0[2], offset_minus_4);
- w3[1] = amd_bytealign (w0[2], w0[1], offset_minus_4);
- w3[0] = amd_bytealign (w0[1], w0[0], offset_minus_4);
- w2[3] = amd_bytealign (w0[0], 0, offset_minus_4);
- w2[2] = 0;
- w2[1] = 0;
- w2[0] = 0;
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 74:
+ w[18] = w[18] | 0x800000;
+ break;
- if (offset_mod_4 == 0)
- {
- w2[3] = w3[0];
- w3[0] = w3[1];
- w3[1] = w3[2];
- w3[2] = 0;
- }
+ case 75:
+ w[18] = w[18] | 0x80000000;
+ break;
+ case 76:
+ w[19] = 0x80;
break;
- case 12:
- w3[2] = amd_bytealign ( 0, w0[1], offset_minus_4);
- w3[1] = amd_bytealign (w0[1], w0[0], offset_minus_4);
- w3[0] = amd_bytealign (w0[0], 0, offset_minus_4);
- w2[3] = 0;
- w2[2] = 0;
- w2[1] = 0;
- w2[0] = 0;
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 77:
+ w[19] = w[19] | 0x8000;
+ break;
- if (offset_mod_4 == 0)
- {
- w3[0] = w3[1];
- w3[1] = w3[2];
- w3[2] = 0;
- }
+ case 78:
+ w[19] = w[19] | 0x800000;
+ break;
+ case 79:
+ w[19] = w[19] | 0x80000000;
break;
- case 13:
- w3[2] = amd_bytealign ( 0, w0[0], offset_minus_4);
- w3[1] = amd_bytealign (w0[0], 0, offset_minus_4);
- w3[0] = 0;
- w2[3] = 0;
- w2[2] = 0;
- w2[1] = 0;
- w2[0] = 0;
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 80:
+ w[20] = 0x80;
+ break;
- if (offset_mod_4 == 0)
- {
- w3[1] = w3[2];
- w3[2] = 0;
- }
+ case 81:
+ w[20] = w[20] | 0x8000;
+ break;
+
+ case 82:
+ w[20] = w[20] | 0x800000;
+ break;
+ case 83:
+ w[20] = w[20] | 0x80000000;
break;
- }
- #endif
- #ifdef IS_NV
- const int offset_minus_4 = 4 - (offset % 4);
+ case 84:
+ w[21] = 0x80;
+ break;
- const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
+ case 85:
+ w[21] = w[21] | 0x8000;
+ break;
- switch (offset / 4)
- {
- case 0:
- w3[1] = __byte_perm (w3[0], w3[1], selector);
- w3[0] = __byte_perm (w2[3], w3[0], selector);
- w2[3] = __byte_perm (w2[2], w2[3], selector);
- w2[2] = __byte_perm (w2[1], w2[2], selector);
- w2[1] = __byte_perm (w2[0], w2[1], selector);
- w2[0] = __byte_perm (w1[3], w2[0], selector);
- w1[3] = __byte_perm (w1[2], w1[3], selector);
- w1[2] = __byte_perm (w1[1], w1[2], selector);
- w1[1] = __byte_perm (w1[0], w1[1], selector);
- w1[0] = __byte_perm (w0[3], w1[0], selector);
- w0[3] = __byte_perm (w0[2], w0[3], selector);
- w0[2] = __byte_perm (w0[1], w0[2], selector);
- w0[1] = __byte_perm (w0[0], w0[1], selector);
- w0[0] = __byte_perm ( 0, w0[0], selector);
+ case 86:
+ w[21] = w[21] | 0x800000;
+ break;
+ case 87:
+ w[21] = w[21] | 0x80000000;
break;
- case 1:
- w3[1] = __byte_perm (w2[3], w3[0], selector);
- w3[0] = __byte_perm (w2[2], w2[3], selector);
- w2[3] = __byte_perm (w2[1], w2[2], selector);
- w2[2] = __byte_perm (w2[0], w2[1], selector);
- w2[1] = __byte_perm (w1[3], w2[0], selector);
- w2[0] = __byte_perm (w1[2], w1[3], selector);
- w1[3] = __byte_perm (w1[1], w1[2], selector);
- w1[2] = __byte_perm (w1[0], w1[1], selector);
- w1[1] = __byte_perm (w0[3], w1[0], selector);
- w1[0] = __byte_perm (w0[2], w0[3], selector);
- w0[3] = __byte_perm (w0[1], w0[2], selector);
- w0[2] = __byte_perm (w0[0], w0[1], selector);
- w0[1] = __byte_perm ( 0, w0[0], selector);
- w0[0] = 0;
+ case 88:
+ w[22] = 0x80;
+ break;
+ case 89:
+ w[22] = w[22] | 0x8000;
break;
- case 2:
- w3[1] = __byte_perm (w2[2], w2[3], selector);
- w3[0] = __byte_perm (w2[1], w2[2], selector);
- w2[3] = __byte_perm (w2[0], w2[1], selector);
- w2[2] = __byte_perm (w1[3], w2[0], selector);
- w2[1] = __byte_perm (w1[2], w1[3], selector);
- w2[0] = __byte_perm (w1[1], w1[2], selector);
- w1[3] = __byte_perm (w1[0], w1[1], selector);
- w1[2] = __byte_perm (w0[3], w1[0], selector);
- w1[1] = __byte_perm (w0[2], w0[3], selector);
- w1[0] = __byte_perm (w0[1], w0[2], selector);
- w0[3] = __byte_perm (w0[0], w0[1], selector);
- w0[2] = __byte_perm ( 0, w0[0], selector);
- w0[1] = 0;
- w0[0] = 0;
+ case 90:
+ w[22] = w[22] | 0x800000;
+ break;
+ case 91:
+ w[22] = w[22] | 0x80000000;
break;
- case 3:
- w3[1] = __byte_perm (w2[1], w2[2], selector);
- w3[0] = __byte_perm (w2[0], w2[1], selector);
- w2[3] = __byte_perm (w1[3], w2[0], selector);
- w2[2] = __byte_perm (w1[2], w1[3], selector);
- w2[1] = __byte_perm (w1[1], w1[2], selector);
- w2[0] = __byte_perm (w1[0], w1[1], selector);
- w1[3] = __byte_perm (w0[3], w1[0], selector);
- w1[2] = __byte_perm (w0[2], w0[3], selector);
- w1[1] = __byte_perm (w0[1], w0[2], selector);
- w1[0] = __byte_perm (w0[0], w0[1], selector);
- w0[3] = __byte_perm ( 0, w0[0], selector);
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 92:
+ w[23] = 0x80;
+ break;
+
+ case 93:
+ w[23] = w[23] | 0x8000;
+ break;
+ case 94:
+ w[23] = w[23] | 0x800000;
break;
- case 4:
- w3[1] = __byte_perm (w2[0], w2[1], selector);
- w3[0] = __byte_perm (w1[3], w2[0], selector);
- w2[3] = __byte_perm (w1[2], w1[3], selector);
- w2[2] = __byte_perm (w1[1], w1[2], selector);
- w2[1] = __byte_perm (w1[0], w1[1], selector);
- w2[0] = __byte_perm (w0[3], w1[0], selector);
- w1[3] = __byte_perm (w0[2], w0[3], selector);
- w1[2] = __byte_perm (w0[1], w0[2], selector);
- w1[1] = __byte_perm (w0[0], w0[1], selector);
- w1[0] = __byte_perm ( 0, w0[0], selector);
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 95:
+ w[23] = w[23] | 0x80000000;
+ break;
+ case 96:
+ w[24] = 0x80;
break;
- case 5:
- w3[1] = __byte_perm (w1[3], w2[0], selector);
- w3[0] = __byte_perm (w1[2], w1[3], selector);
- w2[3] = __byte_perm (w1[1], w1[2], selector);
- w2[2] = __byte_perm (w1[0], w1[1], selector);
- w2[1] = __byte_perm (w0[3], w1[0], selector);
- w2[0] = __byte_perm (w0[2], w0[3], selector);
- w1[3] = __byte_perm (w0[1], w0[2], selector);
- w1[2] = __byte_perm (w0[0], w0[1], selector);
- w1[1] = __byte_perm ( 0, w0[0], selector);
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 97:
+ w[24] = w[24] | 0x8000;
+ break;
+ case 98:
+ w[24] = w[24] | 0x800000;
break;
- case 6:
- w3[1] = __byte_perm (w1[2], w1[3], selector);
- w3[0] = __byte_perm (w1[1], w1[2], selector);
- w2[3] = __byte_perm (w1[0], w1[1], selector);
- w2[2] = __byte_perm (w0[3], w1[0], selector);
- w2[1] = __byte_perm (w0[2], w0[3], selector);
- w2[0] = __byte_perm (w0[1], w0[2], selector);
- w1[3] = __byte_perm (w0[0], w0[1], selector);
- w1[2] = __byte_perm ( 0, w0[0], selector);
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 99:
+ w[24] = w[24] | 0x80000000;
+ break;
+ case 100:
+ w[25] = 0x80;
break;
- case 7:
- w3[1] = __byte_perm (w1[1], w1[2], selector);
- w3[0] = __byte_perm (w1[0], w1[1], selector);
- w2[3] = __byte_perm (w0[3], w1[0], selector);
- w2[2] = __byte_perm (w0[2], w0[3], selector);
- w2[1] = __byte_perm (w0[1], w0[2], selector);
- w2[0] = __byte_perm (w0[0], w0[1], selector);
- w1[3] = __byte_perm ( 0, w0[0], selector);
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 101:
+ w[25] = w[25] | 0x8000;
+ break;
+ case 102:
+ w[25] = w[25] | 0x800000;
break;
- case 8:
- w3[1] = __byte_perm (w1[0], w1[1], selector);
- w3[0] = __byte_perm (w0[3], w1[0], selector);
- w2[3] = __byte_perm (w0[2], w0[3], selector);
- w2[2] = __byte_perm (w0[1], w0[2], selector);
- w2[1] = __byte_perm (w0[0], w0[1], selector);
- w2[0] = __byte_perm ( 0, w0[0], selector);
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 103:
+ w[25] = w[25] | 0x80000000;
+ break;
+ case 104:
+ w[26] = 0x80;
break;
- case 9:
- w3[1] = __byte_perm (w0[3], w1[0], selector);
- w3[0] = __byte_perm (w0[2], w0[3], selector);
- w2[3] = __byte_perm (w0[1], w0[2], selector);
- w2[2] = __byte_perm (w0[0], w0[1], selector);
- w2[1] = __byte_perm ( 0, w0[0], selector);
- w2[0] = 0;
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 105:
+ w[26] = w[26] | 0x8000;
+ break;
+ case 106:
+ w[26] = w[26] | 0x800000;
break;
- case 10:
- w3[1] = __byte_perm (w0[2], w0[3], selector);
- w3[0] = __byte_perm (w0[1], w0[2], selector);
- w2[3] = __byte_perm (w0[0], w0[1], selector);
- w2[2] = __byte_perm ( 0, w0[0], selector);
- w2[1] = 0;
- w2[0] = 0;
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 107:
+ w[26] = w[26] | 0x80000000;
+ break;
+ case 108:
+ w[27] = 0x80;
break;
- case 11:
- w3[1] = __byte_perm (w0[1], w0[2], selector);
- w3[0] = __byte_perm (w0[0], w0[1], selector);
- w2[3] = __byte_perm ( 0, w0[0], selector);
- w2[2] = 0;
- w2[1] = 0;
- w2[0] = 0;
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 109:
+ w[27] = w[27] | 0x8000;
+ break;
+ case 110:
+ w[27] = w[27] | 0x800000;
break;
- case 12:
- w3[1] = __byte_perm (w0[0], w0[1], selector);
- w3[0] = __byte_perm ( 0, w0[0], selector);
- w2[3] = 0;
- w2[2] = 0;
- w2[1] = 0;
- w2[0] = 0;
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 111:
+ w[27] = w[27] | 0x80000000;
+ break;
+ case 112:
+ w[28] = 0x80;
break;
- case 13:
- w3[1] = __byte_perm ( 0, w0[0], selector);
- w3[0] = 0;
- w2[3] = 0;
- w2[2] = 0;
- w2[1] = 0;
- w2[0] = 0;
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 113:
+ w[28] = w[28] | 0x8000;
+ break;
+ case 114:
+ w[28] = w[28] | 0x800000;
break;
- }
- #endif
-}
-static void switch_buffer_by_offset_be (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
-{
- #ifdef IS_AMD
- switch (offset / 4)
- {
- case 0:
- w3[2] = amd_bytealign (w3[1], 0, offset);
- w3[1] = amd_bytealign (w3[0], w3[1], offset);
- w3[0] = amd_bytealign (w2[3], w3[0], offset);
- w2[3] = amd_bytealign (w2[2], w2[3], offset);
- w2[2] = amd_bytealign (w2[1], w2[2], offset);
- w2[1] = amd_bytealign (w2[0], w2[1], offset);
- w2[0] = amd_bytealign (w1[3], w2[0], offset);
- w1[3] = amd_bytealign (w1[2], w1[3], offset);
- w1[2] = amd_bytealign (w1[1], w1[2], offset);
- w1[1] = amd_bytealign (w1[0], w1[1], offset);
- w1[0] = amd_bytealign (w0[3], w1[0], offset);
- w0[3] = amd_bytealign (w0[2], w0[3], offset);
- w0[2] = amd_bytealign (w0[1], w0[2], offset);
- w0[1] = amd_bytealign (w0[0], w0[1], offset);
- w0[0] = amd_bytealign ( 0, w0[0], offset);
+ case 115:
+ w[28] = w[28] | 0x80000000;
break;
- case 1:
- w3[2] = amd_bytealign (w3[0], 0, offset);
- w3[1] = amd_bytealign (w2[3], w3[0], offset);
- w3[0] = amd_bytealign (w2[2], w2[3], offset);
- w2[3] = amd_bytealign (w2[1], w2[2], offset);
- w2[2] = amd_bytealign (w2[0], w2[1], offset);
- w2[1] = amd_bytealign (w1[3], w2[0], offset);
- w2[0] = amd_bytealign (w1[2], w1[3], offset);
- w1[3] = amd_bytealign (w1[1], w1[2], offset);
- w1[2] = amd_bytealign (w1[0], w1[1], offset);
- w1[1] = amd_bytealign (w0[3], w1[0], offset);
- w1[0] = amd_bytealign (w0[2], w0[3], offset);
- w0[3] = amd_bytealign (w0[1], w0[2], offset);
- w0[2] = amd_bytealign (w0[0], w0[1], offset);
- w0[1] = amd_bytealign ( 0, w0[0], offset);
- w0[0] = 0;
+ case 116:
+ w[29] = 0x80;
+ break;
+
+ case 117:
+ w[29] = w[29] | 0x8000;
+ break;
+
+ case 118:
+ w[29] = w[29] | 0x800000;
break;
- case 2:
- w3[2] = amd_bytealign (w2[3], 0, offset);
- w3[1] = amd_bytealign (w2[2], w2[3], offset);
- w3[0] = amd_bytealign (w2[1], w2[2], offset);
- w2[3] = amd_bytealign (w2[0], w2[1], offset);
- w2[2] = amd_bytealign (w1[3], w2[0], offset);
- w2[1] = amd_bytealign (w1[2], w1[3], offset);
- w2[0] = amd_bytealign (w1[1], w1[2], offset);
- w1[3] = amd_bytealign (w1[0], w1[1], offset);
- w1[2] = amd_bytealign (w0[3], w1[0], offset);
- w1[1] = amd_bytealign (w0[2], w0[3], offset);
- w1[0] = amd_bytealign (w0[1], w0[2], offset);
- w0[3] = amd_bytealign (w0[0], w0[1], offset);
- w0[2] = amd_bytealign ( 0, w0[0], offset);
- w0[1] = 0;
- w0[0] = 0;
+ case 119:
+ w[29] = w[29] | 0x80000000;
break;
- case 3:
- w3[2] = amd_bytealign (w2[2], 0, offset);
- w3[1] = amd_bytealign (w2[1], w2[2], offset);
- w3[0] = amd_bytealign (w2[0], w2[1], offset);
- w2[3] = amd_bytealign (w1[3], w2[0], offset);
- w2[2] = amd_bytealign (w1[2], w1[3], offset);
- w2[1] = amd_bytealign (w1[1], w1[2], offset);
- w2[0] = amd_bytealign (w1[0], w1[1], offset);
- w1[3] = amd_bytealign (w0[3], w1[0], offset);
- w1[2] = amd_bytealign (w0[2], w0[3], offset);
- w1[1] = amd_bytealign (w0[1], w0[2], offset);
- w1[0] = amd_bytealign (w0[0], w0[1], offset);
- w0[3] = amd_bytealign ( 0, w0[0], offset);
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 120:
+ w[30] = 0x80;
break;
- case 4:
- w3[2] = amd_bytealign (w2[1], 0, offset);
- w3[1] = amd_bytealign (w2[0], w2[1], offset);
- w3[0] = amd_bytealign (w1[3], w2[0], offset);
- w2[3] = amd_bytealign (w1[2], w1[3], offset);
- w2[2] = amd_bytealign (w1[1], w1[2], offset);
- w2[1] = amd_bytealign (w1[0], w1[1], offset);
- w2[0] = amd_bytealign (w0[3], w1[0], offset);
- w1[3] = amd_bytealign (w0[2], w0[3], offset);
- w1[2] = amd_bytealign (w0[1], w0[2], offset);
- w1[1] = amd_bytealign (w0[0], w0[1], offset);
- w1[0] = amd_bytealign ( 0, w0[0], offset);
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 121:
+ w[30] = w[30] | 0x8000;
break;
- case 5:
- w3[2] = amd_bytealign (w2[0], 0, offset);
- w3[1] = amd_bytealign (w1[3], w2[0], offset);
- w3[0] = amd_bytealign (w1[2], w1[3], offset);
- w2[3] = amd_bytealign (w1[1], w1[2], offset);
- w2[2] = amd_bytealign (w1[0], w1[1], offset);
- w2[1] = amd_bytealign (w0[3], w1[0], offset);
- w2[0] = amd_bytealign (w0[2], w0[3], offset);
- w1[3] = amd_bytealign (w0[1], w0[2], offset);
- w1[2] = amd_bytealign (w0[0], w0[1], offset);
- w1[1] = amd_bytealign ( 0, w0[0], offset);
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 122:
+ w[30] = w[30] | 0x800000;
break;
- case 6:
- w3[2] = amd_bytealign (w1[3], 0, offset);
- w3[1] = amd_bytealign (w1[2], w1[3], offset);
- w3[0] = amd_bytealign (w1[1], w1[2], offset);
- w2[3] = amd_bytealign (w1[0], w1[1], offset);
- w2[2] = amd_bytealign (w0[3], w1[0], offset);
- w2[1] = amd_bytealign (w0[2], w0[3], offset);
- w2[0] = amd_bytealign (w0[1], w0[2], offset);
- w1[3] = amd_bytealign (w0[0], w0[1], offset);
- w1[2] = amd_bytealign ( 0, w0[0], offset);
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 123:
+ w[30] = w[30] | 0x80000000;
break;
- case 7:
- w3[2] = amd_bytealign (w1[2], 0, offset);
- w3[1] = amd_bytealign (w1[1], w1[2], offset);
- w3[0] = amd_bytealign (w1[0], w1[1], offset);
- w2[3] = amd_bytealign (w0[3], w1[0], offset);
- w2[2] = amd_bytealign (w0[2], w0[3], offset);
- w2[1] = amd_bytealign (w0[1], w0[2], offset);
- w2[0] = amd_bytealign (w0[0], w0[1], offset);
- w1[3] = amd_bytealign ( 0, w0[0], offset);
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 124:
+ w[31] = 0x80;
break;
- case 8:
- w3[2] = amd_bytealign (w1[1], 0, offset);
- w3[1] = amd_bytealign (w1[0], w1[1], offset);
- w3[0] = amd_bytealign (w0[3], w1[0], offset);
- w2[3] = amd_bytealign (w0[2], w0[3], offset);
- w2[2] = amd_bytealign (w0[1], w0[2], offset);
- w2[1] = amd_bytealign (w0[0], w0[1], offset);
- w2[0] = amd_bytealign ( 0, w0[0], offset);
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 125:
+ w[31] = w[31] | 0x8000;
break;
- case 9:
- w3[2] = amd_bytealign (w1[0], 0, offset);
- w3[1] = amd_bytealign (w0[3], w1[0], offset);
- w3[0] = amd_bytealign (w0[2], w0[3], offset);
- w2[3] = amd_bytealign (w0[1], w0[2], offset);
- w2[2] = amd_bytealign (w0[0], w0[1], offset);
- w2[1] = amd_bytealign ( 0, w0[0], offset);
- w2[0] = 0;
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 126:
+ w[31] = w[31] | 0x800000;
break;
- case 10:
- w3[2] = amd_bytealign (w0[3], 0, offset);
- w3[1] = amd_bytealign (w0[2], w0[3], offset);
- w3[0] = amd_bytealign (w0[1], w0[2], offset);
- w2[3] = amd_bytealign (w0[0], w0[1], offset);
- w2[2] = amd_bytealign ( 0, w0[0], offset);
- w2[1] = 0;
- w2[0] = 0;
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 127:
+ w[31] = w[31] | 0x80000000;
break;
+ }
+}
- case 11:
- w3[2] = amd_bytealign (w0[2], 0, offset);
- w3[1] = amd_bytealign (w0[1], w0[2], offset);
- w3[0] = amd_bytealign (w0[0], w0[1], offset);
- w2[3] = amd_bytealign ( 0, w0[0], offset);
- w2[2] = 0;
- w2[1] = 0;
- w2[0] = 0;
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+// before: device_memcat2L
+static void memcat_c7_d1x2_sl1x2_sr1x2 (const u32 offset, u32 dst0[2], u32 src_l0[2], u32 src_r0[2])
+{
+ switch (offset)
+ {
+ case 1:
+ dst0[0] = src_l0[0] | src_r0[0] << 8;
+ dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
break;
- case 12:
- w3[2] = amd_bytealign (w0[1], 0, offset);
- w3[1] = amd_bytealign (w0[0], w0[1], offset);
- w3[0] = amd_bytealign ( 0, w0[0], offset);
- w2[3] = 0;
- w2[2] = 0;
- w2[1] = 0;
- w2[0] = 0;
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 2:
+ dst0[0] = src_l0[0] | src_r0[0] << 16;
+ dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
break;
- case 13:
- w3[2] = amd_bytealign (w0[0], 0, offset);
- w3[1] = amd_bytealign ( 0, w0[0], offset);
- w3[0] = 0;
- w2[3] = 0;
- w2[2] = 0;
- w2[1] = 0;
- w2[0] = 0;
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ case 3:
+ dst0[0] = src_l0[0] | src_r0[0] << 24;
+ dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
break;
- }
- #endif
- #ifdef IS_NV
- const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
+ case 4:
+ dst0[1] = src_r0[0];
+ break;
- switch (offset / 4)
- {
- case 0:
- w3[1] = __byte_perm (w3[1], w3[0], selector);
- w3[0] = __byte_perm (w3[0], w2[3], selector);
- w2[3] = __byte_perm (w2[3], w2[2], selector);
- w2[2] = __byte_perm (w2[2], w2[1], selector);
- w2[1] = __byte_perm (w2[1], w2[0], selector);
- w2[0] = __byte_perm (w2[0], w1[3], selector);
- w1[3] = __byte_perm (w1[3], w1[2], selector);
- w1[2] = __byte_perm (w1[2], w1[1], selector);
- w1[1] = __byte_perm (w1[1], w1[0], selector);
- w1[0] = __byte_perm (w1[0], w0[3], selector);
- w0[3] = __byte_perm (w0[3], w0[2], selector);
- w0[2] = __byte_perm (w0[2], w0[1], selector);
- w0[1] = __byte_perm (w0[1], w0[0], selector);
- w0[0] = __byte_perm (w0[0], 0, selector);
+ case 5:
+ dst0[1] = src_l0[1] | src_r0[0] << 8;
+ break;
+
+ case 6:
+ dst0[1] = src_l0[1] | src_r0[0] << 16;
+ break;
+
+ case 7:
+ dst0[1] = src_l0[1] | src_r0[0] << 24;
break;
+ }
+}
+// before: device_memcat4L
+static void memcat_c15_d1x4_sl1x4_sr1x4 (const u32 offset, u32 dst0[4], u32 src_l0[4], u32 src_r0[4])
+{
+ switch (offset)
+ {
case 1:
- w3[1] = __byte_perm (w3[0], w2[3], selector);
- w3[0] = __byte_perm (w2[3], w2[2], selector);
- w2[3] = __byte_perm (w2[2], w2[1], selector);
- w2[2] = __byte_perm (w2[1], w2[0], selector);
- w2[1] = __byte_perm (w2[0], w1[3], selector);
- w2[0] = __byte_perm (w1[3], w1[2], selector);
- w1[3] = __byte_perm (w1[2], w1[1], selector);
- w1[2] = __byte_perm (w1[1], w1[0], selector);
- w1[1] = __byte_perm (w1[0], w0[3], selector);
- w1[0] = __byte_perm (w0[3], w0[2], selector);
- w0[3] = __byte_perm (w0[2], w0[1], selector);
- w0[2] = __byte_perm (w0[1], w0[0], selector);
- w0[1] = __byte_perm (w0[0], 0, selector);
- w0[0] = 0;
+ dst0[0] = src_l0[0] | src_r0[0] << 8;
+ dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8;
break;
case 2:
- w3[1] = __byte_perm (w2[3], w2[2], selector);
- w3[0] = __byte_perm (w2[2], w2[1], selector);
- w2[3] = __byte_perm (w2[1], w2[0], selector);
- w2[2] = __byte_perm (w2[0], w1[3], selector);
- w2[1] = __byte_perm (w1[3], w1[2], selector);
- w2[0] = __byte_perm (w1[2], w1[1], selector);
- w1[3] = __byte_perm (w1[1], w1[0], selector);
- w1[2] = __byte_perm (w1[0], w0[3], selector);
- w1[1] = __byte_perm (w0[3], w0[2], selector);
- w1[0] = __byte_perm (w0[2], w0[1], selector);
- w0[3] = __byte_perm (w0[1], w0[0], selector);
- w0[2] = __byte_perm (w0[0], 0, selector);
- w0[1] = 0;
- w0[0] = 0;
+ dst0[0] = src_l0[0] | src_r0[0] << 16;
+ dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16;
break;
case 3:
- w3[1] = __byte_perm (w2[2], w2[1], selector);
- w3[0] = __byte_perm (w2[1], w2[0], selector);
- w2[3] = __byte_perm (w2[0], w1[3], selector);
- w2[2] = __byte_perm (w1[3], w1[2], selector);
- w2[1] = __byte_perm (w1[2], w1[1], selector);
- w2[0] = __byte_perm (w1[1], w1[0], selector);
- w1[3] = __byte_perm (w1[0], w0[3], selector);
- w1[2] = __byte_perm (w0[3], w0[2], selector);
- w1[1] = __byte_perm (w0[2], w0[1], selector);
- w1[0] = __byte_perm (w0[1], w0[0], selector);
- w0[3] = __byte_perm (w0[0], 0, selector);
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ dst0[0] = src_l0[0] | src_r0[0] << 24;
+ dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24;
break;
case 4:
- w3[1] = __byte_perm (w2[1], w2[0], selector);
- w3[0] = __byte_perm (w2[0], w1[3], selector);
- w2[3] = __byte_perm (w1[3], w1[2], selector);
- w2[2] = __byte_perm (w1[2], w1[1], selector);
- w2[1] = __byte_perm (w1[1], w1[0], selector);
- w2[0] = __byte_perm (w1[0], w0[3], selector);
- w1[3] = __byte_perm (w0[3], w0[2], selector);
- w1[2] = __byte_perm (w0[2], w0[1], selector);
- w1[1] = __byte_perm (w0[1], w0[0], selector);
- w1[0] = __byte_perm (w0[0], 0, selector);
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ dst0[1] = src_r0[0];
+ dst0[2] = src_r0[1];
+ dst0[3] = src_r0[2];
break;
case 5:
- w3[1] = __byte_perm (w2[0], w1[3], selector);
- w3[0] = __byte_perm (w1[3], w1[2], selector);
- w2[3] = __byte_perm (w1[2], w1[1], selector);
- w2[2] = __byte_perm (w1[1], w1[0], selector);
- w2[1] = __byte_perm (w1[0], w0[3], selector);
- w2[0] = __byte_perm (w0[3], w0[2], selector);
- w1[3] = __byte_perm (w0[2], w0[1], selector);
- w1[2] = __byte_perm (w0[1], w0[0], selector);
- w1[1] = __byte_perm (w0[0], 0, selector);
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ dst0[1] = src_l0[1] | src_r0[0] << 8;
+ dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8;
break;
case 6:
- w3[1] = __byte_perm (w1[3], w1[2], selector);
- w3[0] = __byte_perm (w1[2], w1[1], selector);
- w2[3] = __byte_perm (w1[1], w1[0], selector);
- w2[2] = __byte_perm (w1[0], w0[3], selector);
- w2[1] = __byte_perm (w0[3], w0[2], selector);
- w2[0] = __byte_perm (w0[2], w0[1], selector);
- w1[3] = __byte_perm (w0[1], w0[0], selector);
- w1[2] = __byte_perm (w0[0], 0, selector);
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ dst0[1] = src_l0[1] | src_r0[0] << 16;
+ dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16;
break;
case 7:
- w3[1] = __byte_perm (w1[2], w1[1], selector);
- w3[0] = __byte_perm (w1[1], w1[0], selector);
- w2[3] = __byte_perm (w1[0], w0[3], selector);
- w2[2] = __byte_perm (w0[3], w0[2], selector);
- w2[1] = __byte_perm (w0[2], w0[1], selector);
- w2[0] = __byte_perm (w0[1], w0[0], selector);
- w1[3] = __byte_perm (w0[0], 0, selector);
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ dst0[1] = src_l0[1] | src_r0[0] << 24;
+ dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24;
break;
case 8:
- w3[1] = __byte_perm (w1[1], w1[0], selector);
- w3[0] = __byte_perm (w1[0], w0[3], selector);
- w2[3] = __byte_perm (w0[3], w0[2], selector);
- w2[2] = __byte_perm (w0[2], w0[1], selector);
- w2[1] = __byte_perm (w0[1], w0[0], selector);
- w2[0] = __byte_perm (w0[0], 0, selector);
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ dst0[2] = src_r0[0];
+ dst0[3] = src_r0[1];
break;
case 9:
- w3[1] = __byte_perm (w1[0], w0[3], selector);
- w3[0] = __byte_perm (w0[3], w0[2], selector);
- w2[3] = __byte_perm (w0[2], w0[1], selector);
- w2[2] = __byte_perm (w0[1], w0[0], selector);
- w2[1] = __byte_perm (w0[0], 0, selector);
- w2[0] = 0;
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ dst0[2] = src_l0[2] | src_r0[0] << 8;
+ dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8;
break;
case 10:
- w3[1] = __byte_perm (w0[3], w0[2], selector);
- w3[0] = __byte_perm (w0[2], w0[1], selector);
- w2[3] = __byte_perm (w0[1], w0[0], selector);
- w2[2] = __byte_perm (w0[0], 0, selector);
- w2[1] = 0;
- w2[0] = 0;
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ dst0[2] = src_l0[2] | src_r0[0] << 16;
+ dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16;
break;
case 11:
- w3[1] = __byte_perm (w0[2], w0[1], selector);
- w3[0] = __byte_perm (w0[1], w0[0], selector);
- w2[3] = __byte_perm (w0[0], 0, selector);
- w2[2] = 0;
- w2[1] = 0;
- w2[0] = 0;
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ dst0[2] = src_l0[2] | src_r0[0] << 24;
+ dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24;
break;
case 12:
- w3[1] = __byte_perm (w0[1], w0[0], selector);
- w3[0] = __byte_perm (w0[0], 0, selector);
- w2[3] = 0;
- w2[2] = 0;
- w2[1] = 0;
- w2[0] = 0;
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ dst0[3] = src_r0[0];
break;
case 13:
- w3[1] = __byte_perm (w0[0], 0, selector);
- w3[0] = 0;
- w2[3] = 0;
- w2[2] = 0;
- w2[1] = 0;
- w2[0] = 0;
- w1[3] = 0;
- w1[2] = 0;
- w1[1] = 0;
- w1[0] = 0;
- w0[3] = 0;
- w0[2] = 0;
- w0[1] = 0;
- w0[0] = 0;
+ dst0[3] = src_l0[3] | src_r0[0] << 8;
+ break;
+
+ case 14:
+ dst0[3] = src_l0[3] | src_r0[0] << 16;
+ break;
+
+ case 15:
+ dst0[3] = src_l0[3] | src_r0[0] << 24;
break;
}
- #endif
}
-/* not needed anymore?
-// before: append_0x80_2_be
-static void append_0x80_2x4_be (u32 w0[4], u32 w1[4], const u32 offset)
+// before: device_memcat8L
+static void memcat_c31_d2x4_sl2x4_sr1x4 (const u32 offset, u32 dst0[4], u32 dst1[4], u32 src_l0[4], u32 src_l1[4], u32 src_r0[4])
{
switch (offset)
{
- case 0:
- w0[0] |= 0x80000000;
- break;
-
- case 1:
- w0[0] |= 0x800000;
+ case 1:
+ dst0[0] = src_l0[0] | src_r0[0] << 8;
+ dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8;
+ dst1[0] = src_r0[3] >> 24;
break;
- case 2:
- w0[0] |= 0x8000;
+ case 2:
+ dst0[0] = src_l0[0] | src_r0[0] << 16;
+ dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16;
+ dst1[0] = src_r0[3] >> 16;
break;
- case 3:
- w0[0] |= 0x80;
+ case 3:
+ dst0[0] = src_l0[0] | src_r0[0] << 24;
+ dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24;
+ dst1[0] = src_r0[3] >> 8;
break;
- case 4:
- w0[1] |= 0x80000000;
+ case 4:
+ dst0[1] = src_r0[0];
+ dst0[2] = src_r0[1];
+ dst0[3] = src_r0[2];
+ dst1[0] = src_r0[3];
break;
- case 5:
- w0[1] |= 0x800000;
+ case 5:
+ dst0[1] = src_l0[1] | src_r0[0] << 8;
+ dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8;
+ dst1[1] = src_r0[3] >> 24;
break;
- case 6:
- w0[1] |= 0x8000;
+ case 6:
+ dst0[1] = src_l0[1] | src_r0[0] << 16;
+ dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16;
+ dst1[1] = src_r0[3] >> 16;
break;
- case 7:
- w0[1] |= 0x80;
+ case 7:
+ dst0[1] = src_l0[1] | src_r0[0] << 24;
+ dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24;
+ dst1[1] = src_r0[3] >> 8;
break;
- case 8:
- w0[2] |= 0x80000000;
+ case 8:
+ dst0[2] = src_r0[0];
+ dst0[3] = src_r0[1];
+ dst1[0] = src_r0[2];
+ dst1[1] = src_r0[3];
break;
- case 9:
- w0[2] |= 0x800000;
+ case 9:
+ dst0[2] = src_l0[2] | src_r0[0] << 8;
+ dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8;
+ dst1[2] = src_r0[3] >> 24;
break;
case 10:
- w0[2] |= 0x8000;
+ dst0[2] = src_l0[2] | src_r0[0] << 16;
+ dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16;
+ dst1[2] = src_r0[3] >> 16;
break;
case 11:
- w0[2] |= 0x80;
+ dst0[2] = src_l0[2] | src_r0[0] << 24;
+ dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24;
+ dst1[2] = src_r0[3] >> 8;
break;
case 12:
- w0[3] |= 0x80000000;
+ dst0[3] = src_r0[0];
+ dst1[0] = src_r0[1];
+ dst1[1] = src_r0[2];
+ dst1[2] = src_r0[3];
break;
case 13:
- w0[3] |= 0x800000;
+ dst0[3] = src_l0[3] | src_r0[0] << 8;
+ dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8;
+ dst1[3] = src_r0[3] >> 24;
break;
case 14:
- w0[3] |= 0x8000;
+ dst0[3] = src_l0[3] | src_r0[0] << 16;
+ dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16;
+ dst1[3] = src_r0[3] >> 16;
break;
case 15:
- w0[3] |= 0x80;
+ dst0[3] = src_l0[3] | src_r0[0] << 24;
+ dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24;
+ dst1[3] = src_r0[3] >> 8;
break;
case 16:
- w1[0] |= 0x80000000;
+ dst1[0] = src_r0[0];
+ dst1[1] = src_r0[1];
+ dst1[2] = src_r0[2];
+ dst1[3] = src_r0[3];
break;
case 17:
- w1[0] |= 0x800000;
+ dst1[0] = src_l1[0] | src_r0[0] << 8;
+ dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8;
break;
case 18:
- w1[0] |= 0x8000;
+ dst1[0] = src_l1[0] | src_r0[0] << 16;
+ dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16;
break;
case 19:
- w1[0] |= 0x80;
+ dst1[0] = src_l1[0] | src_r0[0] << 24;
+ dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24;
break;
case 20:
- w1[1] |= 0x80000000;
+ dst1[1] = src_r0[0];
+ dst1[2] = src_r0[1];
+ dst1[3] = src_r0[2];
break;
case 21:
- w1[1] |= 0x800000;
+ dst1[1] = src_l1[1] | src_r0[0] << 8;
+ dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8;
break;
case 22:
- w1[1] |= 0x8000;
+ dst1[1] = src_l1[1] | src_r0[0] << 16;
+ dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16;
break;
case 23:
- w1[1] |= 0x80;
+ dst1[1] = src_l1[1] | src_r0[0] << 24;
+ dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24;
break;
case 24:
- w1[2] |= 0x80000000;
+ dst1[2] = src_r0[0];
+ dst1[3] = src_r0[1];
break;
case 25:
- w1[2] |= 0x800000;
+ dst1[2] = src_l1[2] | src_r0[0] << 8;
+ dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8;
break;
case 26:
- w1[2] |= 0x8000;
+ dst1[2] = src_l1[2] | src_r0[0] << 16;
+ dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16;
break;
case 27:
- w1[2] |= 0x80;
+ dst1[2] = src_l1[2] | src_r0[0] << 24;
+ dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24;
break;
case 28:
- w1[3] |= 0x80000000;
+ dst1[3] = src_r0[0];
break;
case 29:
- w1[3] |= 0x800000;
+ dst1[3] = src_l1[3] | src_r0[0] << 8;
break;
case 30:
- w1[3] |= 0x8000;
+ dst1[3] = src_l1[3] | src_r0[0] << 16;
break;
case 31:
- w1[3] |= 0x80;
+ dst1[3] = src_l1[3] | src_r0[0] << 24;
break;
}
}
-// before: append_0x80_4
-static void append_0x80_1x16 (u32 w[16], const u32 offset)
+// before: device_memcat12L
+static void memcat_c47_d3x4_sl3x4_sr1x4 (const u32 offset, u32 dst0[4], u32 dst1[4], u32 dst2[4], u32 src_l0[4], u32 src_l1[4], u32 src_l2[4], u32 src_r0[4])
{
switch (offset)
{
- case 0:
- w[ 0] = 0x80;
- break;
-
case 1:
- w[ 0] = w[ 0] | 0x8000;
+ dst0[0] = src_l0[0] | src_r0[0] << 8;
+ dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8;
+ dst1[0] = src_r0[3] >> 24;
break;
case 2:
- w[ 0] = w[ 0] | 0x800000;
+ dst0[0] = src_l0[0] | src_r0[0] << 16;
+ dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16;
+ dst1[0] = src_r0[3] >> 16;
break;
case 3:
- w[ 0] = w[ 0] | 0x80000000;
+ dst0[0] = src_l0[0] | src_r0[0] << 24;
+ dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24;
+ dst1[0] = src_r0[3] >> 8;
break;
case 4:
- w[ 1] = 0x80;
+ dst0[1] = src_r0[0];
+ dst0[2] = src_r0[1];
+ dst0[3] = src_r0[2];
+ dst1[0] = src_r0[3];
break;
case 5:
- w[ 1] = w[ 1] | 0x8000;
+ dst0[1] = src_l0[1] | src_r0[0] << 8;
+ dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8;
+ dst1[1] = src_r0[3] >> 24;
break;
case 6:
- w[ 1] = w[ 1] | 0x800000;
+ dst0[1] = src_l0[1] | src_r0[0] << 16;
+ dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16;
+ dst1[1] = src_r0[3] >> 16;
break;
case 7:
- w[ 1] = w[ 1] | 0x80000000;
+ dst0[1] = src_l0[1] | src_r0[0] << 24;
+ dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24;
+ dst1[1] = src_r0[3] >> 8;
break;
case 8:
- w[ 2] = 0x80;
+ dst0[2] = src_r0[0];
+ dst0[3] = src_r0[1];
+ dst1[0] = src_r0[2];
+ dst1[1] = src_r0[3];
break;
case 9:
- w[ 2] = w[ 2] | 0x8000;
+ dst0[2] = src_l0[2] | src_r0[0] << 8;
+ dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8;
+ dst1[2] = src_r0[3] >> 24;
break;
case 10:
- w[ 2] = w[ 2] | 0x800000;
+ dst0[2] = src_l0[2] | src_r0[0] << 16;
+ dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16;
+ dst1[2] = src_r0[3] >> 16;
break;
case 11:
- w[ 2] = w[ 2] | 0x80000000;
+ dst0[2] = src_l0[2] | src_r0[0] << 24;
+ dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24;
+ dst1[2] = src_r0[3] >> 8;
break;
case 12:
- w[ 3] = 0x80;
+ dst0[3] = src_r0[0];
+ dst1[0] = src_r0[1];
+ dst1[1] = src_r0[2];
+ dst1[2] = src_r0[3];
break;
case 13:
- w[ 3] = w[ 3] | 0x8000;
+ dst0[3] = src_l0[3] | src_r0[0] << 8;
+ dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8;
+ dst1[3] = src_r0[3] >> 24;
break;
case 14:
- w[ 3] = w[ 3] | 0x800000;
+ dst0[3] = src_l0[3] | src_r0[0] << 16;
+ dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16;
+ dst1[3] = src_r0[3] >> 16;
break;
case 15:
- w[ 3] = w[ 3] | 0x80000000;
+ dst0[3] = src_l0[3] | src_r0[0] << 24;
+ dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24;
+ dst1[3] = src_r0[3] >> 8;
break;
case 16:
- w[ 4] = 0x80;
+ dst1[0] = src_r0[0];
+ dst1[1] = src_r0[1];
+ dst1[2] = src_r0[2];
+ dst1[3] = src_r0[3];
break;
case 17:
- w[ 4] = w[ 4] | 0x8000;
+ dst1[0] = src_l1[0] | src_r0[0] << 8;
+ dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8;
+ dst2[0] = src_r0[3] >> 24;
break;
case 18:
- w[ 4] = w[ 4] | 0x800000;
+ dst1[0] = src_l1[0] | src_r0[0] << 16;
+ dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16;
+ dst2[0] = src_r0[3] >> 16;
break;
case 19:
- w[ 4] = w[ 4] | 0x80000000;
+ dst1[0] = src_l1[0] | src_r0[0] << 24;
+ dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24;
+ dst2[0] = src_r0[3] >> 8;
break;
case 20:
- w[ 5] = 0x80;
+ dst1[1] = src_r0[0];
+ dst1[2] = src_r0[1];
+ dst1[3] = src_r0[2];
+ dst2[0] = src_r0[3];
break;
case 21:
- w[ 5] = w[ 5] | 0x8000;
+ dst1[1] = src_l1[1] | src_r0[0] << 8;
+ dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst2[0] = src_r0[2] >> 24 | src_r0[3] << 8;
+ dst2[1] = src_r0[3] >> 24;
break;
case 22:
- w[ 5] = w[ 5] | 0x800000;
+ dst1[1] = src_l1[1] | src_r0[0] << 16;
+ dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst2[0] = src_r0[2] >> 16 | src_r0[3] << 16;
+ dst2[1] = src_r0[3] >> 16;
break;
case 23:
- w[ 5] = w[ 5] | 0x80000000;
+ dst1[1] = src_l1[1] | src_r0[0] << 24;
+ dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst2[0] = src_r0[2] >> 8 | src_r0[3] << 24;
+ dst2[1] = src_r0[3] >> 8;
break;
case 24:
- w[ 6] = 0x80;
+ dst1[2] = src_r0[0];
+ dst1[3] = src_r0[1];
+ dst2[0] = src_r0[2];
+ dst2[1] = src_r0[3];
break;
case 25:
- w[ 6] = w[ 6] | 0x8000;
+ dst1[2] = src_l1[2] | src_r0[0] << 8;
+ dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst2[0] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst2[1] = src_r0[2] >> 24 | src_r0[3] << 8;
+ dst2[2] = src_r0[3] >> 24;
break;
case 26:
- w[ 6] = w[ 6] | 0x800000;
+ dst1[2] = src_l1[2] | src_r0[0] << 16;
+ dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst2[0] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst2[1] = src_r0[2] >> 16 | src_r0[3] << 16;
+ dst2[2] = src_r0[3] >> 16;
break;
case 27:
- w[ 6] = w[ 6] | 0x80000000;
+ dst1[2] = src_l1[2] | src_r0[0] << 24;
+ dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst2[0] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst2[1] = src_r0[2] >> 8 | src_r0[3] << 24;
+ dst2[2] = src_r0[3] >> 8;
break;
case 28:
- w[ 7] = 0x80;
+ dst1[3] = src_r0[0];
+ dst2[0] = src_r0[1];
+ dst2[1] = src_r0[2];
+ dst2[2] = src_r0[3];
break;
case 29:
- w[ 7] = w[ 7] | 0x8000;
+ dst1[3] = src_l1[3] | src_r0[0] << 8;
+ dst2[0] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst2[1] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst2[2] = src_r0[2] >> 24 | src_r0[3] << 8;
+ dst2[3] = src_r0[3] >> 24;
break;
case 30:
- w[ 7] = w[ 7] | 0x800000;
+ dst1[3] = src_l1[3] | src_r0[0] << 16;
+ dst2[0] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst2[1] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst2[2] = src_r0[2] >> 16 | src_r0[3] << 16;
+ dst2[3] = src_r0[3] >> 16;
break;
case 31:
- w[ 7] = w[ 7] | 0x80000000;
+ dst1[3] = src_l1[3] | src_r0[0] << 24;
+ dst2[0] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst2[1] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst2[2] = src_r0[2] >> 8 | src_r0[3] << 24;
+ dst2[3] = src_r0[3] >> 8;
break;
case 32:
- w[ 8] = 0x80;
+ dst2[0] = src_r0[0];
+ dst2[1] = src_r0[1];
+ dst2[2] = src_r0[2];
+ dst2[3] = src_r0[3];
break;
case 33:
- w[ 8] = w[ 8] | 0x8000;
+ dst2[0] = src_l2[0] | src_r0[0] << 8;
+ dst2[1] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst2[2] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst2[3] = src_r0[2] >> 24 | src_r0[3] << 8;
break;
case 34:
- w[ 8] = w[ 8] | 0x800000;
+ dst2[0] = src_l2[0] | src_r0[0] << 16;
+ dst2[1] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst2[2] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst2[3] = src_r0[2] >> 16 | src_r0[3] << 16;
break;
case 35:
- w[ 8] = w[ 8] | 0x80000000;
+ dst2[0] = src_l2[0] | src_r0[0] << 24;
+ dst2[1] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst2[2] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst2[3] = src_r0[2] >> 8 | src_r0[3] << 24;
break;
case 36:
- w[ 9] = 0x80;
+ dst2[1] = src_r0[0];
+ dst2[2] = src_r0[1];
+ dst2[3] = src_r0[2];
break;
case 37:
- w[ 9] = w[ 9] | 0x8000;
+ dst2[1] = src_l2[1] | src_r0[0] << 8;
+ dst2[2] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst2[3] = src_r0[1] >> 24 | src_r0[2] << 8;
break;
case 38:
- w[ 9] = w[ 9] | 0x800000;
+ dst2[1] = src_l2[1] | src_r0[0] << 16;
+ dst2[2] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst2[3] = src_r0[1] >> 16 | src_r0[2] << 16;
break;
case 39:
- w[ 9] = w[ 9] | 0x80000000;
+ dst2[1] = src_l2[1] | src_r0[0] << 24;
+ dst2[2] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst2[3] = src_r0[1] >> 8 | src_r0[2] << 24;
break;
case 40:
- w[10] = 0x80;
+ dst2[2] = src_r0[0];
+ dst2[3] = src_r0[1];
break;
case 41:
- w[10] = w[10] | 0x8000;
+ dst2[2] = src_l2[2] | src_r0[0] << 8;
+ dst2[3] = src_r0[0] >> 24 | src_r0[1] << 8;
break;
case 42:
- w[10] = w[10] | 0x800000;
+ dst2[2] = src_l2[2] | src_r0[0] << 16;
+ dst2[3] = src_r0[0] >> 16 | src_r0[1] << 16;
break;
case 43:
- w[10] = w[10] | 0x80000000;
+ dst2[2] = src_l2[2] | src_r0[0] << 24;
+ dst2[3] = src_r0[0] >> 8 | src_r0[1] << 24;
break;
case 44:
- w[11] = 0x80;
+ dst2[3] = src_r0[0];
break;
case 45:
- w[11] = w[11] | 0x8000;
+ dst2[3] = src_l2[3] | src_r0[0] << 8;
break;
case 46:
- w[11] = w[11] | 0x800000;
+ dst2[3] = src_l2[3] | src_r0[0] << 16;
break;
case 47:
- w[11] = w[11] | 0x80000000;
- break;
-
- case 48:
- w[12] = 0x80;
- break;
-
- case 49:
- w[12] = w[12] | 0x8000;
- break;
-
- case 50:
- w[12] = w[12] | 0x800000;
- break;
-
- case 51:
- w[12] = w[12] | 0x80000000;
- break;
-
- case 52:
- w[13] = 0x80;
- break;
-
- case 53:
- w[13] = w[13] | 0x8000;
- break;
-
- case 54:
- w[13] = w[13] | 0x800000;
- break;
-
- case 55:
- w[13] = w[13] | 0x80000000;
- break;
-
- case 56:
- w[14] = 0x80;
- break;
-
- case 57:
- w[14] = w[14] | 0x8000;
- break;
-
- case 58:
- w[14] = w[14] | 0x800000;
- break;
-
- case 59:
- w[14] = w[14] | 0x80000000;
- break;
-
- case 60:
- w[15] = 0x80;
- break;
-
- case 61:
- w[15] = w[15] | 0x8000;
- break;
-
- case 62:
- w[15] = w[15] | 0x800000;
- break;
-
- case 63:
- w[15] = w[15] | 0x80000000;
+ dst2[3] = src_l2[3] | src_r0[0] << 24;
break;
}
}
-// before: append_0x80_8
-static void append_0x80_1x32 (u32 w[32], const u32 offset)
+// before: device_memcat12L
+static void memcat_c47_d3x4_sl3x4_sr2x4 (const u32 offset, u32 dst0[4], u32 dst1[4], u32 dst2[4], u32 src_l0[4], u32 src_l1[4], u32 src_l2[4], u32 src_r0[4], u32 src_r1[4])
{
switch (offset)
{
case 0:
- w[ 0] = 0x80;
+ dst0[0] = src_r0[0];
+ dst0[1] = src_r0[1];
+ dst0[2] = src_r0[2];
+ dst0[3] = src_r0[3];
+ dst1[0] = src_r1[0];
+ dst1[1] = src_r1[1];
+ dst1[2] = src_r1[2];
+ dst1[3] = src_r1[3];
break;
case 1:
- w[ 0] = w[ 0] | 0x8000;
+ dst0[0] = src_l0[0] | src_r0[0] << 8;
+ dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8;
+ dst1[0] = src_r0[3] >> 24 | src_r1[0] << 8;
+ dst1[1] = src_r1[0] >> 24 | src_r1[1] << 8;
+ dst1[2] = src_r1[1] >> 24 | src_r1[2] << 8;
+ dst1[3] = src_r1[2] >> 24 | src_r1[3] << 8;
+ dst2[0] = src_r1[3] >> 24;
break;
case 2:
- w[ 0] = w[ 0] | 0x800000;
+ dst0[0] = src_l0[0] | src_r0[0] << 16;
+ dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16;
+ dst1[0] = src_r0[3] >> 16 | src_r1[0] << 16;
+ dst1[1] = src_r1[0] >> 16 | src_r1[1] << 16;
+ dst1[2] = src_r1[1] >> 16 | src_r1[2] << 16;
+ dst1[3] = src_r1[2] >> 16 | src_r1[3] << 16;
+ dst2[0] = src_r1[3] >> 16;
break;
case 3:
- w[ 0] = w[ 0] | 0x80000000;
+ dst0[0] = src_l0[0] | src_r0[0] << 24;
+ dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24;
+ dst1[0] = src_r0[3] >> 8 | src_r1[0] << 24;
+ dst1[1] = src_r1[0] >> 8 | src_r1[1] << 24;
+ dst1[2] = src_r1[1] >> 8 | src_r1[2] << 24;
+ dst1[3] = src_r1[2] >> 8 | src_r1[3] << 24;
+ dst2[0] = src_r1[3] >> 8;
break;
case 4:
- w[ 1] = 0x80;
+ dst0[1] = src_r0[0];
+ dst0[2] = src_r0[1];
+ dst0[3] = src_r0[2];
+ dst1[0] = src_r0[3];
+ dst1[1] = src_r1[0];
+ dst1[2] = src_r1[1];
+ dst1[3] = src_r1[2];
+ dst2[0] = src_r1[3];
break;
case 5:
- w[ 1] = w[ 1] | 0x8000;
+ dst0[1] = src_l0[1] | src_r0[0] << 8;
+ dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8;
+ dst1[1] = src_r0[3] >> 24 | src_r1[0] << 8;
+ dst1[2] = src_r1[0] >> 24 | src_r1[1] << 8;
+ dst1[3] = src_r1[1] >> 24 | src_r1[2] << 8;
+ dst2[0] = src_r1[2] >> 24 | src_r1[3] << 8;
+ dst2[1] = src_r1[3] >> 24;
break;
case 6:
- w[ 1] = w[ 1] | 0x800000;
+ dst0[1] = src_l0[1] | src_r0[0] << 16;
+ dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16;
+ dst1[1] = src_r0[3] >> 16 | src_r1[0] << 16;
+ dst1[2] = src_r1[0] >> 16 | src_r1[1] << 16;
+ dst1[3] = src_r1[1] >> 16 | src_r1[2] << 16;
+ dst2[0] = src_r1[2] >> 16 | src_r1[3] << 16;
+ dst2[1] = src_r1[3] >> 16;
break;
case 7:
- w[ 1] = w[ 1] | 0x80000000;
+ dst0[1] = src_l0[1] | src_r0[0] << 24;
+ dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24;
+ dst1[1] = src_r0[3] >> 8 | src_r1[0] << 24;
+ dst1[2] = src_r1[0] >> 8 | src_r1[1] << 24;
+ dst1[3] = src_r1[1] >> 8 | src_r1[2] << 24;
+ dst2[0] = src_r1[2] >> 8 | src_r1[3] << 24;
+ dst2[1] = src_r1[3] >> 8;
break;
case 8:
- w[ 2] = 0x80;
+ dst0[2] = src_r0[0];
+ dst0[3] = src_r0[1];
+ dst1[0] = src_r0[2];
+ dst1[1] = src_r0[3];
+ dst1[2] = src_r1[0];
+ dst1[3] = src_r1[1];
+ dst2[0] = src_r1[2];
+ dst2[1] = src_r1[3];
break;
case 9:
- w[ 2] = w[ 2] | 0x8000;
+ dst0[2] = src_l0[2] | src_r0[0] << 8;
+ dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8;
+ dst1[2] = src_r0[3] >> 24 | src_r1[0] << 8;
+ dst1[3] = src_r1[0] >> 24 | src_r1[1] << 8;
+ dst2[0] = src_r1[1] >> 24 | src_r1[2] << 8;
+ dst2[1] = src_r1[2] >> 24 | src_r1[3] << 8;
+ dst2[2] = src_r1[3] >> 24;
break;
case 10:
- w[ 2] = w[ 2] | 0x800000;
+ dst0[2] = src_l0[2] | src_r0[0] << 16;
+ dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16;
+ dst1[2] = src_r0[3] >> 16 | src_r1[0] << 16;
+ dst1[3] = src_r1[0] >> 16 | src_r1[1] << 16;
+ dst2[0] = src_r1[1] >> 16 | src_r1[2] << 16;
+ dst2[1] = src_r1[2] >> 16 | src_r1[3] << 16;
+ dst2[2] = src_r1[3] >> 16;
break;
case 11:
- w[ 2] = w[ 2] | 0x80000000;
+ dst0[2] = src_l0[2] | src_r0[0] << 24;
+ dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24;
+ dst1[2] = src_r0[3] >> 8 | src_r1[0] << 24;
+ dst1[3] = src_r1[0] >> 8 | src_r1[1] << 24;
+ dst2[0] = src_r1[1] >> 8 | src_r1[2] << 24;
+ dst2[1] = src_r1[2] >> 8 | src_r1[3] << 24;
+ dst2[2] = src_r1[3] >> 8;
break;
case 12:
- w[ 3] = 0x80;
+ dst0[3] = src_r0[0];
+ dst1[0] = src_r0[1];
+ dst1[1] = src_r0[2];
+ dst1[2] = src_r0[3];
+ dst1[3] = src_r1[0];
+ dst2[0] = src_r1[1];
+ dst2[1] = src_r1[2];
+ dst2[2] = src_r1[3];
break;
case 13:
- w[ 3] = w[ 3] | 0x8000;
+ dst0[3] = src_l0[3] | src_r0[0] << 8;
+ dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8;
+ dst1[3] = src_r0[3] >> 24 | src_r1[0] << 8;
+ dst2[0] = src_r1[0] >> 24 | src_r1[1] << 8;
+ dst2[1] = src_r1[1] >> 24 | src_r1[2] << 8;
+ dst2[2] = src_r1[2] >> 24 | src_r1[3] << 8;
+ dst2[3] = src_r1[3] >> 24;
break;
case 14:
- w[ 3] = w[ 3] | 0x800000;
+ dst0[3] = src_l0[3] | src_r0[0] << 16;
+ dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16;
+ dst1[3] = src_r0[3] >> 16 | src_r1[0] << 16;
+ dst2[0] = src_r1[0] >> 16 | src_r1[1] << 16;
+ dst2[1] = src_r1[1] >> 16 | src_r1[2] << 16;
+ dst2[2] = src_r1[2] >> 16 | src_r1[3] << 16;
+ dst2[3] = src_r1[3] >> 16;
break;
case 15:
- w[ 3] = w[ 3] | 0x80000000;
+ dst0[3] = src_l0[3] | src_r0[0] << 24;
+ dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24;
+ dst1[3] = src_r0[3] >> 8 | src_r1[0] << 24;
+ dst2[0] = src_r1[0] >> 8 | src_r1[1] << 24;
+ dst2[1] = src_r1[1] >> 8 | src_r1[2] << 24;
+ dst2[2] = src_r1[2] >> 8 | src_r1[3] << 24;
+ dst2[3] = src_r1[3] >> 8;
break;
case 16:
- w[ 4] = 0x80;
+ dst1[0] = src_r0[0];
+ dst1[1] = src_r0[1];
+ dst1[2] = src_r0[2];
+ dst1[3] = src_r0[3];
+ dst2[0] = src_r1[0];
+ dst2[1] = src_r1[1];
+ dst2[2] = src_r1[2];
+ dst2[3] = src_r1[3];
break;
case 17:
- w[ 4] = w[ 4] | 0x8000;
+ dst1[0] = src_l1[0] | src_r0[0] << 8;
+ dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8;
+ dst2[0] = src_r0[3] >> 24 | src_r1[0] << 8;
+ dst2[1] = src_r1[0] >> 24 | src_r1[1] << 8;
+ dst2[2] = src_r1[1] >> 24 | src_r1[2] << 8;
+ dst2[3] = src_r1[2] >> 24 | src_r1[3] << 8;
break;
case 18:
- w[ 4] = w[ 4] | 0x800000;
+ dst1[0] = src_l1[0] | src_r0[0] << 16;
+ dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16;
+ dst2[0] = src_r0[3] >> 16 | src_r1[0] << 16;
+ dst2[1] = src_r1[0] >> 16 | src_r1[1] << 16;
+ dst2[2] = src_r1[1] >> 16 | src_r1[2] << 16;
+ dst2[3] = src_r1[2] >> 16 | src_r1[3] << 16;
break;
case 19:
- w[ 4] = w[ 4] | 0x80000000;
+ dst1[0] = src_l1[0] | src_r0[0] << 24;
+ dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24;
+ dst2[0] = src_r0[3] >> 8 | src_r1[0] << 24;
+ dst2[1] = src_r1[0] >> 8 | src_r1[1] << 24;
+ dst2[2] = src_r1[1] >> 8 | src_r1[2] << 24;
+ dst2[3] = src_r1[2] >> 8 | src_r1[3] << 24;
break;
case 20:
- w[ 5] = 0x80;
+ dst1[1] = src_r1[0];
+ dst1[2] = src_r0[1];
+ dst1[3] = src_r0[2];
+ dst2[0] = src_r0[3];
+ dst2[1] = src_r1[0];
+ dst2[2] = src_r1[1];
+ dst2[3] = src_r1[2];
break;
case 21:
- w[ 5] = w[ 5] | 0x8000;
+ dst1[1] = src_l1[1] | src_r0[0] << 8;
+ dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst2[0] = src_r0[2] >> 24 | src_r0[3] << 8;
+ dst2[1] = src_r0[3] >> 24 | src_r1[0] << 8;
+ dst2[2] = src_r1[0] >> 24 | src_r1[1] << 8;
+ dst2[3] = src_r1[1] >> 24 | src_r1[2] << 8;
break;
case 22:
- w[ 5] = w[ 5] | 0x800000;
+ dst1[1] = src_l1[1] | src_r0[0] << 16;
+ dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst2[0] = src_r0[2] >> 16 | src_r0[3] << 16;
+ dst2[1] = src_r0[3] >> 16 | src_r1[0] << 16;
+ dst2[2] = src_r1[0] >> 16 | src_r1[1] << 16;
+ dst2[3] = src_r1[1] >> 16 | src_r1[2] << 16;
break;
case 23:
- w[ 5] = w[ 5] | 0x80000000;
+ dst1[1] = src_l1[1] | src_r0[0] << 24;
+ dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst2[0] = src_r0[2] >> 8 | src_r0[3] << 24;
+ dst2[1] = src_r0[3] >> 8 | src_r1[0] << 24;
+ dst2[2] = src_r1[0] >> 8 | src_r1[1] << 24;
+ dst2[3] = src_r1[1] >> 8 | src_r1[2] << 24;
break;
case 24:
- w[ 6] = 0x80;
+ dst1[2] = src_r1[0];
+ dst1[3] = src_r0[1];
+ dst2[0] = src_r0[2];
+ dst2[1] = src_r0[3];
+ dst2[2] = src_r1[0];
+ dst2[3] = src_r1[1];
break;
case 25:
- w[ 6] = w[ 6] | 0x8000;
+ dst1[2] = src_l1[2] | src_r0[0] << 8;
+ dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst2[0] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst2[1] = src_r0[2] >> 24 | src_r0[3] << 8;
+ dst2[2] = src_r0[3] >> 24 | src_r1[0] << 8;
+ dst2[3] = src_r1[0] >> 24 | src_r1[1] << 8;
break;
case 26:
- w[ 6] = w[ 6] | 0x800000;
+ dst1[2] = src_l1[2] | src_r0[0] << 16;
+ dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst2[0] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst2[1] = src_r0[2] >> 16 | src_r0[3] << 16;
+ dst2[2] = src_r0[3] >> 16 | src_r1[0] << 16;
+ dst2[3] = src_r1[0] >> 16 | src_r1[1] << 16;
break;
case 27:
- w[ 6] = w[ 6] | 0x80000000;
+ dst1[2] = src_l1[2] | src_r0[0] << 24;
+ dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst2[0] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst2[1] = src_r0[2] >> 8 | src_r0[3] << 24;
+ dst2[2] = src_r0[3] >> 8 | src_r1[0] << 24;
+ dst2[3] = src_r1[0] >> 8 | src_r1[1] << 24;
break;
case 28:
- w[ 7] = 0x80;
+ dst1[3] = src_r1[0];
+ dst2[0] = src_r0[1];
+ dst2[1] = src_r0[2];
+ dst2[2] = src_r0[3];
+ dst2[3] = src_r1[0];
break;
case 29:
- w[ 7] = w[ 7] | 0x8000;
+ dst1[3] = src_l1[3] | src_r0[0] << 8;
+ dst2[0] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst2[1] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst2[2] = src_r0[2] >> 24 | src_r0[3] << 8;
+ dst2[3] = src_r0[3] >> 24 | src_r1[0] << 8;
break;
case 30:
- w[ 7] = w[ 7] | 0x800000;
+ dst1[3] = src_l1[3] | src_r0[0] << 16;
+ dst2[0] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst2[1] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst2[2] = src_r0[2] >> 16 | src_r0[3] << 16;
+ dst2[3] = src_r0[3] >> 16 | src_r1[0] << 16;
break;
case 31:
- w[ 7] = w[ 7] | 0x80000000;
+ dst1[3] = src_l1[3] | src_r0[0] << 24;
+ dst2[0] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst2[1] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst2[2] = src_r0[2] >> 8 | src_r0[3] << 24;
+ dst2[3] = src_r0[3] >> 8 | src_r1[0] << 24;
break;
case 32:
- w[ 8] = 0x80;
+ dst2[0] = src_r0[0];
+ dst2[1] = src_r0[1];
+ dst2[2] = src_r0[2];
+ dst2[3] = src_r0[3];
break;
case 33:
- w[ 8] = w[ 8] | 0x8000;
+ dst2[0] = src_l2[0] | src_r0[0] << 8;
+ dst2[1] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst2[2] = src_r0[1] >> 24 | src_r0[2] << 8;
+ dst2[3] = src_r0[2] >> 24 | src_r0[3] << 8;
break;
case 34:
- w[ 8] = w[ 8] | 0x800000;
+ dst2[0] = src_l2[0] | src_r0[0] << 16;
+ dst2[1] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst2[2] = src_r0[1] >> 16 | src_r0[2] << 16;
+ dst2[3] = src_r0[2] >> 16 | src_r0[3] << 16;
break;
case 35:
- w[ 8] = w[ 8] | 0x80000000;
+ dst2[0] = src_l2[0] | src_r0[0] << 24;
+ dst2[1] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst2[2] = src_r0[1] >> 8 | src_r0[2] << 24;
+ dst2[3] = src_r0[2] >> 8 | src_r0[3] << 24;
break;
case 36:
- w[ 9] = 0x80;
+ dst2[1] = src_r0[0];
+ dst2[2] = src_r0[1];
+ dst2[3] = src_r0[2];
break;
case 37:
- w[ 9] = w[ 9] | 0x8000;
+ dst2[1] = src_l2[1] | src_r0[0] << 8;
+ dst2[2] = src_r0[0] >> 24 | src_r0[1] << 8;
+ dst2[3] = src_r0[1] >> 24 | src_r0[2] << 8;
break;
case 38:
- w[ 9] = w[ 9] | 0x800000;
+ dst2[1] = src_l2[1] | src_r0[0] << 16;
+ dst2[2] = src_r0[0] >> 16 | src_r0[1] << 16;
+ dst2[3] = src_r0[1] >> 16 | src_r0[2] << 16;
break;
case 39:
- w[ 9] = w[ 9] | 0x80000000;
+ dst2[1] = src_l2[1] | src_r0[0] << 24;
+ dst2[2] = src_r0[0] >> 8 | src_r0[1] << 24;
+ dst2[3] = src_r0[1] >> 8 | src_r0[2] << 24;
break;
case 40:
- w[10] = 0x80;
+ dst2[2] = src_r0[0];
+ dst2[3] = src_r0[1];
break;
case 41:
- w[10] = w[10] | 0x8000;
+ dst2[2] = src_l2[2] | src_r0[0] << 8;
+ dst2[3] = src_r0[0] >> 24 | src_r0[1] << 8;
break;
case 42:
- w[10] = w[10] | 0x800000;
+ dst2[2] = src_l2[2] | src_r0[0] << 16;
+ dst2[3] = src_r0[0] >> 16 | src_r0[1] << 16;
break;
case 43:
- w[10] = w[10] | 0x80000000;
+ dst2[2] = src_l2[2] | src_r0[0] << 24;
+ dst2[3] = src_r0[0] >> 8 | src_r0[1] << 24;
break;
case 44:
- w[11] = 0x80;
+ dst2[3] = src_r0[0];
break;
case 45:
- w[11] = w[11] | 0x8000;
+ dst2[3] = src_l2[3] | src_r0[0] << 8;
break;
case 46:
- w[11] = w[11] | 0x800000;
+ dst2[3] = src_l2[3] | src_r0[0] << 16;
break;
case 47:
- w[11] = w[11] | 0x80000000;
+ dst2[3] = src_l2[3] | src_r0[0] << 24;
break;
+ }
+}
- case 48:
- w[12] = 0x80;
+// before: memcat16_9
+static void memcat_c15_w4x4_a3x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 append0[4], const u32 append1[4], const u32 append2[4], const u32 offset)
+{
+ switch (offset)
+ {
+ case 0:
+ w0[0] = append0[0];
+ w0[1] = append0[1];
+ w0[2] = append0[2];
+ w0[3] = append0[3];
+ w1[0] = append1[0];
+ w1[1] = append1[1];
+ w1[2] = append1[2];
+ w1[3] = append1[3];
+ w2[0] = append2[0];
break;
- case 49:
- w[12] = w[12] | 0x8000;
+ case 1:
+ w0[0] = w0[0] | append0[0] << 8;
+ w0[1] = append0[0] >> 24 | append0[1] << 8;
+ w0[2] = append0[1] >> 24 | append0[2] << 8;
+ w0[3] = append0[2] >> 24 | append0[3] << 8;
+ w1[0] = append0[3] >> 24 | append1[0] << 8;
+ w1[1] = append1[0] >> 24 | append1[1] << 8;
+ w1[2] = append1[1] >> 24 | append1[2] << 8;
+ w1[3] = append1[2] >> 24 | append1[3] << 8;
+ w2[0] = append1[3] >> 24 | append2[0] << 8;
+ w2[1] = append2[0] >> 24;
break;
- case 50:
- w[12] = w[12] | 0x800000;
+ case 2:
+ w0[0] = w0[0] | append0[0] << 16;
+ w0[1] = append0[0] >> 16 | append0[1] << 16;
+ w0[2] = append0[1] >> 16 | append0[2] << 16;
+ w0[3] = append0[2] >> 16 | append0[3] << 16;
+ w1[0] = append0[3] >> 16 | append1[0] << 16;
+ w1[1] = append1[0] >> 16 | append1[1] << 16;
+ w1[2] = append1[1] >> 16 | append1[2] << 16;
+ w1[3] = append1[2] >> 16 | append1[3] << 16;
+ w2[0] = append1[3] >> 16 | append2[0] << 16;
+ w2[1] = append2[0] >> 16;
break;
- case 51:
- w[12] = w[12] | 0x80000000;
+ case 3:
+ w0[0] = w0[0] | append0[0] << 24;
+ w0[1] = append0[0] >> 8 | append0[1] << 24;
+ w0[2] = append0[1] >> 8 | append0[2] << 24;
+ w0[3] = append0[2] >> 8 | append0[3] << 24;
+ w1[0] = append0[3] >> 8 | append1[0] << 24;
+ w1[1] = append1[0] >> 8 | append1[1] << 24;
+ w1[2] = append1[1] >> 8 | append1[2] << 24;
+ w1[3] = append1[2] >> 8 | append1[3] << 24;
+ w2[0] = append1[3] >> 8 | append2[0] << 24;
+ w2[1] = append2[0] >> 8;
break;
- case 52:
- w[13] = 0x80;
+ case 4:
+ w0[1] = append0[0];
+ w0[2] = append0[1];
+ w0[3] = append0[2];
+ w1[0] = append0[3];
+ w1[1] = append1[0];
+ w1[2] = append1[1];
+ w1[3] = append1[2];
+ w2[0] = append1[3];
+ w2[1] = append2[0];
break;
- case 53:
- w[13] = w[13] | 0x8000;
+ case 5:
+ w0[1] = w0[1] | append0[0] << 8;
+ w0[2] = append0[0] >> 24 | append0[1] << 8;
+ w0[3] = append0[1] >> 24 | append0[2] << 8;
+ w1[0] = append0[2] >> 24 | append0[3] << 8;
+ w1[1] = append0[3] >> 24 | append1[0] << 8;
+ w1[2] = append1[0] >> 24 | append1[1] << 8;
+ w1[3] = append1[1] >> 24 | append1[2] << 8;
+ w2[0] = append1[2] >> 24 | append1[3] << 8;
+ w2[1] = append1[3] >> 24 | append2[0] << 8;
+ w2[2] = append2[0] >> 24;
break;
- case 54:
- w[13] = w[13] | 0x800000;
+ case 6:
+ w0[1] = w0[1] | append0[0] << 16;
+ w0[2] = append0[0] >> 16 | append0[1] << 16;
+ w0[3] = append0[1] >> 16 | append0[2] << 16;
+ w1[0] = append0[2] >> 16 | append0[3] << 16;
+ w1[1] = append0[3] >> 16 | append1[0] << 16;
+ w1[2] = append1[0] >> 16 | append1[1] << 16;
+ w1[3] = append1[1] >> 16 | append1[2] << 16;
+ w2[0] = append1[2] >> 16 | append1[3] << 16;
+ w2[1] = append1[3] >> 16 | append2[0] << 16;
+ w2[2] = append2[0] >> 16;
break;
- case 55:
- w[13] = w[13] | 0x80000000;
+ case 7:
+ w0[1] = w0[1] | append0[0] << 24;
+ w0[2] = append0[0] >> 8 | append0[1] << 24;
+ w0[3] = append0[1] >> 8 | append0[2] << 24;
+ w1[0] = append0[2] >> 8 | append0[3] << 24;
+ w1[1] = append0[3] >> 8 | append1[0] << 24;
+ w1[2] = append1[0] >> 8 | append1[1] << 24;
+ w1[3] = append1[1] >> 8 | append1[2] << 24;
+ w2[0] = append1[2] >> 8 | append1[3] << 24;
+ w2[1] = append1[3] >> 8 | append2[0] << 24;
+ w2[2] = append2[0] >> 8;
break;
- case 56:
- w[14] = 0x80;
+ case 8:
+ w0[2] = append0[0];
+ w0[3] = append0[1];
+ w1[0] = append0[2];
+ w1[1] = append0[3];
+ w1[2] = append1[0];
+ w1[3] = append1[1];
+ w2[0] = append1[2];
+ w2[1] = append1[3];
+ w2[2] = append2[0];
break;
- case 57:
- w[14] = w[14] | 0x8000;
+ case 9:
+ w0[2] = w0[2] | append0[0] << 8;
+ w0[3] = append0[0] >> 24 | append0[1] << 8;
+ w1[0] = append0[1] >> 24 | append0[2] << 8;
+ w1[1] = append0[2] >> 24 | append0[3] << 8;
+ w1[2] = append0[3] >> 24 | append1[0] << 8;
+ w1[3] = append1[0] >> 24 | append1[1] << 8;
+ w2[0] = append1[1] >> 24 | append1[2] << 8;
+ w2[1] = append1[2] >> 24 | append1[3] << 8;
+ w2[2] = append1[3] >> 24 | append2[0] << 8;
+ w2[3] = append2[0] >> 24;
break;
- case 58:
- w[14] = w[14] | 0x800000;
+ case 10:
+ w0[2] = w0[2] | append0[0] << 16;
+ w0[3] = append0[0] >> 16 | append0[1] << 16;
+ w1[0] = append0[1] >> 16 | append0[2] << 16;
+ w1[1] = append0[2] >> 16 | append0[3] << 16;
+ w1[2] = append0[3] >> 16 | append1[0] << 16;
+ w1[3] = append1[0] >> 16 | append1[1] << 16;
+ w2[0] = append1[1] >> 16 | append1[2] << 16;
+ w2[1] = append1[2] >> 16 | append1[3] << 16;
+ w2[2] = append1[3] >> 16 | append2[0] << 16;
+ w2[3] = append2[0] >> 16;
break;
- case 59:
- w[14] = w[14] | 0x80000000;
+ case 11:
+ w0[2] = w0[2] | append0[0] << 24;
+ w0[3] = append0[0] >> 8 | append0[1] << 24;
+ w1[0] = append0[1] >> 8 | append0[2] << 24;
+ w1[1] = append0[2] >> 8 | append0[3] << 24;
+ w1[2] = append0[3] >> 8 | append1[0] << 24;
+ w1[3] = append1[0] >> 8 | append1[1] << 24;
+ w2[0] = append1[1] >> 8 | append1[2] << 24;
+ w2[1] = append1[2] >> 8 | append1[3] << 24;
+ w2[2] = append1[3] >> 8 | append2[0] << 24;
+ w2[3] = append2[0] >> 8;
break;
- case 60:
- w[15] = 0x80;
+ case 12:
+ w0[3] = append0[0];
+ w1[0] = append0[1];
+ w1[1] = append0[2];
+ w1[2] = append0[3];
+ w1[3] = append1[0];
+ w2[0] = append1[1];
+ w2[1] = append1[2];
+ w2[2] = append1[3];
+ w2[3] = append2[0];
break;
- case 61:
- w[15] = w[15] | 0x8000;
+ case 13:
+ w0[3] = w0[3] | append0[0] << 8;
+ w1[0] = append0[0] >> 24 | append0[1] << 8;
+ w1[1] = append0[1] >> 24 | append0[2] << 8;
+ w1[2] = append0[2] >> 24 | append0[3] << 8;
+ w1[3] = append0[3] >> 24 | append1[0] << 8;
+ w2[0] = append1[0] >> 24 | append1[1] << 8;
+ w2[1] = append1[1] >> 24 | append1[2] << 8;
+ w2[2] = append1[2] >> 24 | append1[3] << 8;
+ w2[3] = append1[3] >> 24 | append2[0] << 8;
+ w3[0] = append2[0] >> 24;
break;
- case 62:
- w[15] = w[15] | 0x800000;
+ case 14:
+ w0[3] = w0[3] | append0[0] << 16;
+ w1[0] = append0[0] >> 16 | append0[1] << 16;
+ w1[1] = append0[1] >> 16 | append0[2] << 16;
+ w1[2] = append0[2] >> 16 | append0[3] << 16;
+ w1[3] = append0[3] >> 16 | append1[0] << 16;
+ w2[0] = append1[0] >> 16 | append1[1] << 16;
+ w2[1] = append1[1] >> 16 | append1[2] << 16;
+ w2[2] = append1[2] >> 16 | append1[3] << 16;
+ w2[3] = append1[3] >> 16 | append2[0] << 16;
+ w3[0] = append2[0] >> 16;
break;
- case 63:
- w[15] = w[15] | 0x80000000;
+ case 15:
+ w0[3] = w0[3] | append0[0] << 24;
+ w1[0] = append0[0] >> 8 | append0[1] << 24;
+ w1[1] = append0[1] >> 8 | append0[2] << 24;
+ w1[2] = append0[2] >> 8 | append0[3] << 24;
+ w1[3] = append0[3] >> 8 | append1[0] << 24;
+ w2[0] = append1[0] >> 8 | append1[1] << 24;
+ w2[1] = append1[1] >> 8 | append1[2] << 24;
+ w2[2] = append1[2] >> 8 | append1[3] << 24;
+ w2[3] = append1[3] >> 8 | append2[0] << 24;
+ w3[0] = append2[0] >> 8;
+ break;
+ }
+}
+
+// before: memcat32_8
+static void memcat_c32_w4x4_a2x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 append0[4], const u32 append1[4], const u32 offset)
+{
+ switch (offset)
+ {
+ case 0:
+ w0[0] = append0[0];
+ w0[1] = append0[1];
+ w0[2] = append0[2];
+ w0[3] = append0[3];
+ w1[0] = append1[0];
+ w1[1] = append1[1];
+ w1[2] = append1[2];
+ w1[3] = append1[3];
+ break;
+
+ case 1:
+ w0[0] = w0[0] | append0[0] << 8;
+ w0[1] = append0[0] >> 24 | append0[1] << 8;
+ w0[2] = append0[1] >> 24 | append0[2] << 8;
+ w0[3] = append0[2] >> 24 | append0[3] << 8;
+ w1[0] = append0[3] >> 24 | append1[0] << 8;
+ w1[1] = append1[0] >> 24 | append1[1] << 8;
+ w1[2] = append1[1] >> 24 | append1[2] << 8;
+ w1[3] = append1[2] >> 24 | append1[3] << 8;
+ w2[0] = append1[3] >> 24;
break;
- case 64:
- w[16] = 0x80;
+ case 2:
+ w0[0] = w0[0] | append0[0] << 16;
+ w0[1] = append0[0] >> 16 | append0[1] << 16;
+ w0[2] = append0[1] >> 16 | append0[2] << 16;
+ w0[3] = append0[2] >> 16 | append0[3] << 16;
+ w1[0] = append0[3] >> 16 | append1[0] << 16;
+ w1[1] = append1[0] >> 16 | append1[1] << 16;
+ w1[2] = append1[1] >> 16 | append1[2] << 16;
+ w1[3] = append1[2] >> 16 | append1[3] << 16;
+ w2[0] = append1[3] >> 16;
break;
- case 65:
- w[16] = w[16] | 0x8000;
+ case 3:
+ w0[0] = w0[0] | append0[0] << 24;
+ w0[1] = append0[0] >> 8 | append0[1] << 24;
+ w0[2] = append0[1] >> 8 | append0[2] << 24;
+ w0[3] = append0[2] >> 8 | append0[3] << 24;
+ w1[0] = append0[3] >> 8 | append1[0] << 24;
+ w1[1] = append1[0] >> 8 | append1[1] << 24;
+ w1[2] = append1[1] >> 8 | append1[2] << 24;
+ w1[3] = append1[2] >> 8 | append1[3] << 24;
+ w2[0] = append1[3] >> 8;
break;
- case 66:
- w[16] = w[16] | 0x800000;
+ case 4:
+ w0[1] = append0[0];
+ w0[2] = append0[1];
+ w0[3] = append0[2];
+ w1[0] = append0[3];
+ w1[1] = append1[0];
+ w1[2] = append1[1];
+ w1[3] = append1[2];
+ w2[0] = append1[3];
break;
- case 67:
- w[16] = w[16] | 0x80000000;
+ case 5:
+ w0[1] = w0[1] | append0[0] << 8;
+ w0[2] = append0[0] >> 24 | append0[1] << 8;
+ w0[3] = append0[1] >> 24 | append0[2] << 8;
+ w1[0] = append0[2] >> 24 | append0[3] << 8;
+ w1[1] = append0[3] >> 24 | append1[0] << 8;
+ w1[2] = append1[0] >> 24 | append1[1] << 8;
+ w1[3] = append1[1] >> 24 | append1[2] << 8;
+ w2[0] = append1[2] >> 24 | append1[3] << 8;
+ w2[1] = append1[3] >> 24;
break;
- case 68:
- w[17] = 0x80;
+ case 6:
+ w0[1] = w0[1] | append0[0] << 16;
+ w0[2] = append0[0] >> 16 | append0[1] << 16;
+ w0[3] = append0[1] >> 16 | append0[2] << 16;
+ w1[0] = append0[2] >> 16 | append0[3] << 16;
+ w1[1] = append0[3] >> 16 | append1[0] << 16;
+ w1[2] = append1[0] >> 16 | append1[1] << 16;
+ w1[3] = append1[1] >> 16 | append1[2] << 16;
+ w2[0] = append1[2] >> 16 | append1[3] << 16;
+ w2[1] = append1[3] >> 16;
break;
- case 69:
- w[17] = w[17] | 0x8000;
+ case 7:
+ w0[1] = w0[1] | append0[0] << 24;
+ w0[2] = append0[0] >> 8 | append0[1] << 24;
+ w0[3] = append0[1] >> 8 | append0[2] << 24;
+ w1[0] = append0[2] >> 8 | append0[3] << 24;
+ w1[1] = append0[3] >> 8 | append1[0] << 24;
+ w1[2] = append1[0] >> 8 | append1[1] << 24;
+ w1[3] = append1[1] >> 8 | append1[2] << 24;
+ w2[0] = append1[2] >> 8 | append1[3] << 24;
+ w2[1] = append1[3] >> 8;
break;
- case 70:
- w[17] = w[17] | 0x800000;
+ case 8:
+ w0[2] = append0[0];
+ w0[3] = append0[1];
+ w1[0] = append0[2];
+ w1[1] = append0[3];
+ w1[2] = append1[0];
+ w1[3] = append1[1];
+ w2[0] = append1[2];
+ w2[1] = append1[3];
break;
- case 71:
- w[17] = w[17] | 0x80000000;
+ case 9:
+ w0[2] = w0[2] | append0[0] << 8;
+ w0[3] = append0[0] >> 24 | append0[1] << 8;
+ w1[0] = append0[1] >> 24 | append0[2] << 8;
+ w1[1] = append0[2] >> 24 | append0[3] << 8;
+ w1[2] = append0[3] >> 24 | append1[0] << 8;
+ w1[3] = append1[0] >> 24 | append1[1] << 8;
+ w2[0] = append1[1] >> 24 | append1[2] << 8;
+ w2[1] = append1[2] >> 24 | append1[3] << 8;
+ w2[2] = append1[3] >> 24;
break;
- case 72:
- w[18] = 0x80;
+ case 10:
+ w0[2] = w0[2] | append0[0] << 16;
+ w0[3] = append0[0] >> 16 | append0[1] << 16;
+ w1[0] = append0[1] >> 16 | append0[2] << 16;
+ w1[1] = append0[2] >> 16 | append0[3] << 16;
+ w1[2] = append0[3] >> 16 | append1[0] << 16;
+ w1[3] = append1[0] >> 16 | append1[1] << 16;
+ w2[0] = append1[1] >> 16 | append1[2] << 16;
+ w2[1] = append1[2] >> 16 | append1[3] << 16;
+ w2[2] = append1[3] >> 16;
break;
- case 73:
- w[18] = w[18] | 0x8000;
+ case 11:
+ w0[2] = w0[2] | append0[0] << 24;
+ w0[3] = append0[0] >> 8 | append0[1] << 24;
+ w1[0] = append0[1] >> 8 | append0[2] << 24;
+ w1[1] = append0[2] >> 8 | append0[3] << 24;
+ w1[2] = append0[3] >> 8 | append1[0] << 24;
+ w1[3] = append1[0] >> 8 | append1[1] << 24;
+ w2[0] = append1[1] >> 8 | append1[2] << 24;
+ w2[1] = append1[2] >> 8 | append1[3] << 24;
+ w2[2] = append1[3] >> 8;
break;
- case 74:
- w[18] = w[18] | 0x800000;
+ case 12:
+ w0[3] = append0[0];
+ w1[0] = append0[1];
+ w1[1] = append0[2];
+ w1[2] = append0[3];
+ w1[3] = append1[0];
+ w2[0] = append1[1];
+ w2[1] = append1[2];
+ w2[2] = append1[3];
break;
- case 75:
- w[18] = w[18] | 0x80000000;
+ case 13:
+ w0[3] = w0[3] | append0[0] << 8;
+ w1[0] = append0[0] >> 24 | append0[1] << 8;
+ w1[1] = append0[1] >> 24 | append0[2] << 8;
+ w1[2] = append0[2] >> 24 | append0[3] << 8;
+ w1[3] = append0[3] >> 24 | append1[0] << 8;
+ w2[0] = append1[0] >> 24 | append1[1] << 8;
+ w2[1] = append1[1] >> 24 | append1[2] << 8;
+ w2[2] = append1[2] >> 24 | append1[3] << 8;
+ w2[3] = append1[3] >> 24;
break;
- case 76:
- w[19] = 0x80;
+ case 14:
+ w0[3] = w0[3] | append0[0] << 16;
+ w1[0] = append0[0] >> 16 | append0[1] << 16;
+ w1[1] = append0[1] >> 16 | append0[2] << 16;
+ w1[2] = append0[2] >> 16 | append0[3] << 16;
+ w1[3] = append0[3] >> 16 | append1[0] << 16;
+ w2[0] = append1[0] >> 16 | append1[1] << 16;
+ w2[1] = append1[1] >> 16 | append1[2] << 16;
+ w2[2] = append1[2] >> 16 | append1[3] << 16;
+ w2[3] = append1[3] >> 16;
break;
- case 77:
- w[19] = w[19] | 0x8000;
+ case 15:
+ w0[3] = w0[3] | append0[0] << 24;
+ w1[0] = append0[0] >> 8 | append0[1] << 24;
+ w1[1] = append0[1] >> 8 | append0[2] << 24;
+ w1[2] = append0[2] >> 8 | append0[3] << 24;
+ w1[3] = append0[3] >> 8 | append1[0] << 24;
+ w2[0] = append1[0] >> 8 | append1[1] << 24;
+ w2[1] = append1[1] >> 8 | append1[2] << 24;
+ w2[2] = append1[2] >> 8 | append1[3] << 24;
+ w2[3] = append1[3] >> 8;
break;
- case 78:
- w[19] = w[19] | 0x800000;
+ case 16:
+ w1[0] = append0[0];
+ w1[1] = append0[1];
+ w1[2] = append0[2];
+ w1[3] = append0[3];
+ w2[0] = append1[0];
+ w2[1] = append1[1];
+ w2[2] = append1[2];
+ w2[3] = append1[3];
break;
- case 79:
- w[19] = w[19] | 0x80000000;
+ case 17:
+ w1[0] = w1[0] | append0[0] << 8;
+ w1[1] = append0[0] >> 24 | append0[1] << 8;
+ w1[2] = append0[1] >> 24 | append0[2] << 8;
+ w1[3] = append0[2] >> 24 | append0[3] << 8;
+ w2[0] = append0[3] >> 24 | append1[0] << 8;
+ w2[1] = append1[0] >> 24 | append1[1] << 8;
+ w2[2] = append1[1] >> 24 | append1[2] << 8;
+ w2[3] = append1[2] >> 24 | append1[3] << 8;
+ w3[0] = append1[3] >> 24;
break;
- case 80:
- w[20] = 0x80;
+ case 18:
+ w1[0] = w1[0] | append0[0] << 16;
+ w1[1] = append0[0] >> 16 | append0[1] << 16;
+ w1[2] = append0[1] >> 16 | append0[2] << 16;
+ w1[3] = append0[2] >> 16 | append0[3] << 16;
+ w2[0] = append0[3] >> 16 | append1[0] << 16;
+ w2[1] = append1[0] >> 16 | append1[1] << 16;
+ w2[2] = append1[1] >> 16 | append1[2] << 16;
+ w2[3] = append1[2] >> 16 | append1[3] << 16;
+ w3[0] = append1[3] >> 16;
break;
- case 81:
- w[20] = w[20] | 0x8000;
+ case 19:
+ w1[0] = w1[0] | append0[0] << 24;
+ w1[1] = append0[0] >> 8 | append0[1] << 24;
+ w1[2] = append0[1] >> 8 | append0[2] << 24;
+ w1[3] = append0[2] >> 8 | append0[3] << 24;
+ w2[0] = append0[3] >> 8 | append1[0] << 24;
+ w2[1] = append1[0] >> 8 | append1[1] << 24;
+ w2[2] = append1[1] >> 8 | append1[2] << 24;
+ w2[3] = append1[2] >> 8 | append1[3] << 24;
+ w3[0] = append1[3] >> 8;
break;
- case 82:
- w[20] = w[20] | 0x800000;
+ case 20:
+ w1[1] = append0[0];
+ w1[2] = append0[1];
+ w1[3] = append0[2];
+ w2[0] = append0[3];
+ w2[1] = append1[0];
+ w2[2] = append1[1];
+ w2[3] = append1[2];
+ w3[0] = append1[3];
break;
- case 83:
- w[20] = w[20] | 0x80000000;
+ case 21:
+ w1[1] = w1[1] | append0[0] << 8;
+ w1[2] = append0[0] >> 24 | append0[1] << 8;
+ w1[3] = append0[1] >> 24 | append0[2] << 8;
+ w2[0] = append0[2] >> 24 | append0[3] << 8;
+ w2[1] = append0[3] >> 24 | append1[0] << 8;
+ w2[2] = append1[0] >> 24 | append1[1] << 8;
+ w2[3] = append1[1] >> 24 | append1[2] << 8;
+ w3[0] = append1[2] >> 24 | append1[3] << 8;
+ w3[1] = append1[3] >> 24;
break;
- case 84:
- w[21] = 0x80;
+ case 22:
+ w1[1] = w1[1] | append0[0] << 16;
+ w1[2] = append0[0] >> 16 | append0[1] << 16;
+ w1[3] = append0[1] >> 16 | append0[2] << 16;
+ w2[0] = append0[2] >> 16 | append0[3] << 16;
+ w2[1] = append0[3] >> 16 | append1[0] << 16;
+ w2[2] = append1[0] >> 16 | append1[1] << 16;
+ w2[3] = append1[1] >> 16 | append1[2] << 16;
+ w3[0] = append1[2] >> 16 | append1[3] << 16;
+ w3[1] = append1[3] >> 16;
break;
- case 85:
- w[21] = w[21] | 0x8000;
+ case 23:
+ w1[1] = w1[1] | append0[0] << 24;
+ w1[2] = append0[0] >> 8 | append0[1] << 24;
+ w1[3] = append0[1] >> 8 | append0[2] << 24;
+ w2[0] = append0[2] >> 8 | append0[3] << 24;
+ w2[1] = append0[3] >> 8 | append1[0] << 24;
+ w2[2] = append1[0] >> 8 | append1[1] << 24;
+ w2[3] = append1[1] >> 8 | append1[2] << 24;
+ w3[0] = append1[2] >> 8 | append1[3] << 24;
+ w3[1] = append1[3] >> 8;
break;
- case 86:
- w[21] = w[21] | 0x800000;
+ case 24:
+ w1[2] = append0[0];
+ w1[3] = append0[1];
+ w2[0] = append0[2];
+ w2[1] = append0[3];
+ w2[2] = append1[0];
+ w2[3] = append1[1];
+ w3[0] = append1[2];
+ w3[1] = append1[3];
break;
- case 87:
- w[21] = w[21] | 0x80000000;
+ case 25:
+ w1[2] = w1[2] | append0[0] << 8;
+ w1[3] = append0[0] >> 24 | append0[1] << 8;
+ w2[0] = append0[1] >> 24 | append0[2] << 8;
+ w2[1] = append0[2] >> 24 | append0[3] << 8;
+ w2[2] = append0[3] >> 24 | append1[0] << 8;
+ w2[3] = append1[0] >> 24 | append1[1] << 8;
+ w3[0] = append1[1] >> 24 | append1[2] << 8;
+ w3[1] = append1[2] >> 24 | append1[3] << 8;
break;
- case 88:
- w[22] = 0x80;
+ case 26:
+ w1[2] = w1[2] | append0[0] << 16;
+ w1[3] = append0[0] >> 16 | append0[1] << 16;
+ w2[0] = append0[1] >> 16 | append0[2] << 16;
+ w2[1] = append0[2] >> 16 | append0[3] << 16;
+ w2[2] = append0[3] >> 16 | append1[0] << 16;
+ w2[3] = append1[0] >> 16 | append1[1] << 16;
+ w3[0] = append1[1] >> 16 | append1[2] << 16;
+ w3[1] = append1[2] >> 16 | append1[3] << 16;
break;
- case 89:
- w[22] = w[22] | 0x8000;
+ case 27:
+ w1[2] = w1[2] | append0[0] << 24;
+ w1[3] = append0[0] >> 8 | append0[1] << 24;
+ w2[0] = append0[1] >> 8 | append0[2] << 24;
+ w2[1] = append0[2] >> 8 | append0[3] << 24;
+ w2[2] = append0[3] >> 8 | append1[0] << 24;
+ w2[3] = append1[0] >> 8 | append1[1] << 24;
+ w3[0] = append1[1] >> 8 | append1[2] << 24;
+ w3[1] = append1[2] >> 8 | append1[3] << 24;
break;
- case 90:
- w[22] = w[22] | 0x800000;
+ case 28:
+ w1[3] = append0[0];
+ w2[0] = append0[1];
+ w2[1] = append0[2];
+ w2[2] = append0[3];
+ w2[3] = append1[0];
+ w3[0] = append1[1];
+ w3[1] = append1[2];
break;
- case 91:
- w[22] = w[22] | 0x80000000;
+ case 29:
+ w1[3] = w1[3] | append0[0] << 8;
+ w2[0] = append0[0] >> 24 | append0[1] << 8;
+ w2[1] = append0[1] >> 24 | append0[2] << 8;
+ w2[2] = append0[2] >> 24 | append0[3] << 8;
+ w2[3] = append0[3] >> 24 | append1[0] << 8;
+ w3[0] = append1[0] >> 24 | append1[1] << 8;
+ w3[1] = append1[1] >> 24 | append1[2] << 8;
break;
- case 92:
- w[23] = 0x80;
+ case 30:
+ w1[3] = w1[3] | append0[0] << 16;
+ w2[0] = append0[0] >> 16 | append0[1] << 16;
+ w2[1] = append0[1] >> 16 | append0[2] << 16;
+ w2[2] = append0[2] >> 16 | append0[3] << 16;
+ w2[3] = append0[3] >> 16 | append1[0] << 16;
+ w3[0] = append1[0] >> 16 | append1[1] << 16;
+ w3[1] = append1[1] >> 16 | append1[2] << 16;
break;
- case 93:
- w[23] = w[23] | 0x8000;
+ case 31:
+ w1[3] = w1[3] | append0[0] << 24;
+ w2[0] = append0[0] >> 8 | append0[1] << 24;
+ w2[1] = append0[1] >> 8 | append0[2] << 24;
+ w2[2] = append0[2] >> 8 | append0[3] << 24;
+ w2[3] = append0[3] >> 8 | append1[0] << 24;
+ w3[0] = append1[0] >> 8 | append1[1] << 24;
+ w3[1] = append1[1] >> 8 | append1[2] << 24;
break;
- case 94:
- w[23] = w[23] | 0x800000;
+ case 32:
+ w2[0] = append0[0];
+ w2[1] = append0[1];
+ w2[2] = append0[2];
+ w2[3] = append0[3];
+ w3[0] = append1[0];
+ w3[1] = append1[1];
break;
+ }
+}
- case 95:
- w[23] = w[23] | 0x80000000;
+// before: memcat32_9
+static void memcat_c32_w4x4_a3x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 append0[4], const u32 append1[4], const u32 append2[4], const u32 offset)
+{
+ switch (offset)
+ {
+ case 0:
+ w0[0] = append0[0];
+ w0[1] = append0[1];
+ w0[2] = append0[2];
+ w0[3] = append0[3];
+ w1[0] = append1[0];
+ w1[1] = append1[1];
+ w1[2] = append1[2];
+ w1[3] = append1[3];
+ w2[0] = append2[0];
break;
- case 96:
- w[24] = 0x80;
+ case 1:
+ w0[0] = w0[0] | append0[0] << 8;
+ w0[1] = append0[0] >> 24 | append0[1] << 8;
+ w0[2] = append0[1] >> 24 | append0[2] << 8;
+ w0[3] = append0[2] >> 24 | append0[3] << 8;
+ w1[0] = append0[3] >> 24 | append1[0] << 8;
+ w1[1] = append1[0] >> 24 | append1[1] << 8;
+ w1[2] = append1[1] >> 24 | append1[2] << 8;
+ w1[3] = append1[2] >> 24 | append1[3] << 8;
+ w2[0] = append1[3] >> 24 | append2[0] << 8;
+ w2[1] = append2[0] >> 24;
break;
- case 97:
- w[24] = w[24] | 0x8000;
+ case 2:
+ w0[0] = w0[0] | append0[0] << 16;
+ w0[1] = append0[0] >> 16 | append0[1] << 16;
+ w0[2] = append0[1] >> 16 | append0[2] << 16;
+ w0[3] = append0[2] >> 16 | append0[3] << 16;
+ w1[0] = append0[3] >> 16 | append1[0] << 16;
+ w1[1] = append1[0] >> 16 | append1[1] << 16;
+ w1[2] = append1[1] >> 16 | append1[2] << 16;
+ w1[3] = append1[2] >> 16 | append1[3] << 16;
+ w2[0] = append1[3] >> 16 | append2[0] << 16;
+ w2[1] = append2[0] >> 16;
break;
- case 98:
- w[24] = w[24] | 0x800000;
+ case 3:
+ w0[0] = w0[0] | append0[0] << 24;
+ w0[1] = append0[0] >> 8 | append0[1] << 24;
+ w0[2] = append0[1] >> 8 | append0[2] << 24;
+ w0[3] = append0[2] >> 8 | append0[3] << 24;
+ w1[0] = append0[3] >> 8 | append1[0] << 24;
+ w1[1] = append1[0] >> 8 | append1[1] << 24;
+ w1[2] = append1[1] >> 8 | append1[2] << 24;
+ w1[3] = append1[2] >> 8 | append1[3] << 24;
+ w2[0] = append1[3] >> 8 | append2[0] << 24;
+ w2[1] = append2[0] >> 8;
break;
- case 99:
- w[24] = w[24] | 0x80000000;
+ case 4:
+ w0[1] = append0[0];
+ w0[2] = append0[1];
+ w0[3] = append0[2];
+ w1[0] = append0[3];
+ w1[1] = append1[0];
+ w1[2] = append1[1];
+ w1[3] = append1[2];
+ w2[0] = append1[3];
+ w2[1] = append2[0];
break;
- case 100:
- w[25] = 0x80;
+ case 5:
+ w0[1] = w0[1] | append0[0] << 8;
+ w0[2] = append0[0] >> 24 | append0[1] << 8;
+ w0[3] = append0[1] >> 24 | append0[2] << 8;
+ w1[0] = append0[2] >> 24 | append0[3] << 8;
+ w1[1] = append0[3] >> 24 | append1[0] << 8;
+ w1[2] = append1[0] >> 24 | append1[1] << 8;
+ w1[3] = append1[1] >> 24 | append1[2] << 8;
+ w2[0] = append1[2] >> 24 | append1[3] << 8;
+ w2[1] = append1[3] >> 24 | append2[0] << 8;
+ w2[2] = append2[0] >> 24;
break;
- case 101:
- w[25] = w[25] | 0x8000;
+ case 6:
+ w0[1] = w0[1] | append0[0] << 16;
+ w0[2] = append0[0] >> 16 | append0[1] << 16;
+ w0[3] = append0[1] >> 16 | append0[2] << 16;
+ w1[0] = append0[2] >> 16 | append0[3] << 16;
+ w1[1] = append0[3] >> 16 | append1[0] << 16;
+ w1[2] = append1[0] >> 16 | append1[1] << 16;
+ w1[3] = append1[1] >> 16 | append1[2] << 16;
+ w2[0] = append1[2] >> 16 | append1[3] << 16;
+ w2[1] = append1[3] >> 16 | append2[0] << 16;
+ w2[2] = append2[0] >> 16;
break;
- case 102:
- w[25] = w[25] | 0x800000;
+ case 7:
+ w0[1] = w0[1] | append0[0] << 24;
+ w0[2] = append0[0] >> 8 | append0[1] << 24;
+ w0[3] = append0[1] >> 8 | append0[2] << 24;
+ w1[0] = append0[2] >> 8 | append0[3] << 24;
+ w1[1] = append0[3] >> 8 | append1[0] << 24;
+ w1[2] = append1[0] >> 8 | append1[1] << 24;
+ w1[3] = append1[1] >> 8 | append1[2] << 24;
+ w2[0] = append1[2] >> 8 | append1[3] << 24;
+ w2[1] = append1[3] >> 8 | append2[0] << 24;
+ w2[2] = append2[0] >> 8;
break;
- case 103:
- w[25] = w[25] | 0x80000000;
+ case 8:
+ w0[2] = append0[0];
+ w0[3] = append0[1];
+ w1[0] = append0[2];
+ w1[1] = append0[3];
+ w1[2] = append1[0];
+ w1[3] = append1[1];
+ w2[0] = append1[2];
+ w2[1] = append1[3];
+ w2[2] = append2[0];
break;
- case 104:
- w[26] = 0x80;
+ case 9:
+ w0[2] = w0[2] | append0[0] << 8;
+ w0[3] = append0[0] >> 24 | append0[1] << 8;
+ w1[0] = append0[1] >> 24 | append0[2] << 8;
+ w1[1] = append0[2] >> 24 | append0[3] << 8;
+ w1[2] = append0[3] >> 24 | append1[0] << 8;
+ w1[3] = append1[0] >> 24 | append1[1] << 8;
+ w2[0] = append1[1] >> 24 | append1[2] << 8;
+ w2[1] = append1[2] >> 24 | append1[3] << 8;
+ w2[2] = append1[3] >> 24 | append2[0] << 8;
+ w2[3] = append2[0] >> 24;
break;
- case 105:
- w[26] = w[26] | 0x8000;
+ case 10:
+ w0[2] = w0[2] | append0[0] << 16;
+ w0[3] = append0[0] >> 16 | append0[1] << 16;
+ w1[0] = append0[1] >> 16 | append0[2] << 16;
+ w1[1] = append0[2] >> 16 | append0[3] << 16;
+ w1[2] = append0[3] >> 16 | append1[0] << 16;
+ w1[3] = append1[0] >> 16 | append1[1] << 16;
+ w2[0] = append1[1] >> 16 | append1[2] << 16;
+ w2[1] = append1[2] >> 16 | append1[3] << 16;
+ w2[2] = append1[3] >> 16 | append2[0] << 16;
+ w2[3] = append2[0] >> 16;
break;
-
- case 106:
- w[26] = w[26] | 0x800000;
+
+ case 11:
+ w0[2] = w0[2] | append0[0] << 24;
+ w0[3] = append0[0] >> 8 | append0[1] << 24;
+ w1[0] = append0[1] >> 8 | append0[2] << 24;
+ w1[1] = append0[2] >> 8 | append0[3] << 24;
+ w1[2] = append0[3] >> 8 | append1[0] << 24;
+ w1[3] = append1[0] >> 8 | append1[1] << 24;
+ w2[0] = append1[1] >> 8 | append1[2] << 24;
+ w2[1] = append1[2] >> 8 | append1[3] << 24;
+ w2[2] = append1[3] >> 8 | append2[0] << 24;
+ w2[3] = append2[0] >> 8;
break;
- case 107:
- w[26] = w[26] | 0x80000000;
+ case 12:
+ w0[3] = append0[0];
+ w1[0] = append0[1];
+ w1[1] = append0[2];
+ w1[2] = append0[3];
+ w1[3] = append1[0];
+ w2[0] = append1[1];
+ w2[1] = append1[2];
+ w2[2] = append1[3];
+ w2[3] = append2[0];
break;
- case 108:
- w[27] = 0x80;
+ case 13:
+ w0[3] = w0[3] | append0[0] << 8;
+ w1[0] = append0[0] >> 24 | append0[1] << 8;
+ w1[1] = append0[1] >> 24 | append0[2] << 8;
+ w1[2] = append0[2] >> 24 | append0[3] << 8;
+ w1[3] = append0[3] >> 24 | append1[0] << 8;
+ w2[0] = append1[0] >> 24 | append1[1] << 8;
+ w2[1] = append1[1] >> 24 | append1[2] << 8;
+ w2[2] = append1[2] >> 24 | append1[3] << 8;
+ w2[3] = append1[3] >> 24 | append2[0] << 8;
+ w3[0] = append2[0] >> 24;
break;
- case 109:
- w[27] = w[27] | 0x8000;
+ case 14:
+ w0[3] = w0[3] | append0[0] << 16;
+ w1[0] = append0[0] >> 16 | append0[1] << 16;
+ w1[1] = append0[1] >> 16 | append0[2] << 16;
+ w1[2] = append0[2] >> 16 | append0[3] << 16;
+ w1[3] = append0[3] >> 16 | append1[0] << 16;
+ w2[0] = append1[0] >> 16 | append1[1] << 16;
+ w2[1] = append1[1] >> 16 | append1[2] << 16;
+ w2[2] = append1[2] >> 16 | append1[3] << 16;
+ w2[3] = append1[3] >> 16 | append2[0] << 16;
+ w3[0] = append2[0] >> 16;
break;
- case 110:
- w[27] = w[27] | 0x800000;
+ case 15:
+ w0[3] = w0[3] | append0[0] << 24;
+ w1[0] = append0[0] >> 8 | append0[1] << 24;
+ w1[1] = append0[1] >> 8 | append0[2] << 24;
+ w1[2] = append0[2] >> 8 | append0[3] << 24;
+ w1[3] = append0[3] >> 8 | append1[0] << 24;
+ w2[0] = append1[0] >> 8 | append1[1] << 24;
+ w2[1] = append1[1] >> 8 | append1[2] << 24;
+ w2[2] = append1[2] >> 8 | append1[3] << 24;
+ w2[3] = append1[3] >> 8 | append2[0] << 24;
+ w3[0] = append2[0] >> 8;
break;
- case 111:
- w[27] = w[27] | 0x80000000;
+ case 16:
+ w1[0] = append0[0];
+ w1[1] = append0[1];
+ w1[2] = append0[2];
+ w1[3] = append0[3];
+ w2[0] = append1[0];
+ w2[1] = append1[1];
+ w2[2] = append1[2];
+ w2[3] = append1[3];
+ w3[0] = append2[0];
break;
- case 112:
- w[28] = 0x80;
+ case 17:
+ w1[0] = w1[0] | append0[0] << 8;
+ w1[1] = append0[0] >> 24 | append0[1] << 8;
+ w1[2] = append0[1] >> 24 | append0[2] << 8;
+ w1[3] = append0[2] >> 24 | append0[3] << 8;
+ w2[0] = append0[3] >> 24 | append1[0] << 8;
+ w2[1] = append1[0] >> 24 | append1[1] << 8;
+ w2[2] = append1[1] >> 24 | append1[2] << 8;
+ w2[3] = append1[2] >> 24 | append1[3] << 8;
+ w3[0] = append1[3] >> 24 | append2[0] << 8;
+ w3[1] = append2[0] >> 24;
break;
- case 113:
- w[28] = w[28] | 0x8000;
+ case 18:
+ w1[0] = w1[0] | append0[0] << 16;
+ w1[1] = append0[0] >> 16 | append0[1] << 16;
+ w1[2] = append0[1] >> 16 | append0[2] << 16;
+ w1[3] = append0[2] >> 16 | append0[3] << 16;
+ w2[0] = append0[3] >> 16 | append1[0] << 16;
+ w2[1] = append1[0] >> 16 | append1[1] << 16;
+ w2[2] = append1[1] >> 16 | append1[2] << 16;
+ w2[3] = append1[2] >> 16 | append1[3] << 16;
+ w3[0] = append1[3] >> 16 | append2[0] << 16;
+ w3[1] = append2[0] >> 16;
break;
- case 114:
- w[28] = w[28] | 0x800000;
+ case 19:
+ w1[0] = w1[0] | append0[0] << 24;
+ w1[1] = append0[0] >> 8 | append0[1] << 24;
+ w1[2] = append0[1] >> 8 | append0[2] << 24;
+ w1[3] = append0[2] >> 8 | append0[3] << 24;
+ w2[0] = append0[3] >> 8 | append1[0] << 24;
+ w2[1] = append1[0] >> 8 | append1[1] << 24;
+ w2[2] = append1[1] >> 8 | append1[2] << 24;
+ w2[3] = append1[2] >> 8 | append1[3] << 24;
+ w3[0] = append1[3] >> 8 | append2[0] << 24;
+ w3[1] = append2[0] >> 8;
break;
- case 115:
- w[28] = w[28] | 0x80000000;
+ case 20:
+ w1[1] = append0[0];
+ w1[2] = append0[1];
+ w1[3] = append0[2];
+ w2[0] = append0[3];
+ w2[1] = append1[0];
+ w2[2] = append1[1];
+ w2[3] = append1[2];
+ w3[0] = append1[3];
+ w3[1] = append2[0];
break;
- case 116:
- w[29] = 0x80;
+ case 21:
+ w1[1] = w1[1] | append0[0] << 8;
+ w1[2] = append0[0] >> 24 | append0[1] << 8;
+ w1[3] = append0[1] >> 24 | append0[2] << 8;
+ w2[0] = append0[2] >> 24 | append0[3] << 8;
+ w2[1] = append0[3] >> 24 | append1[0] << 8;
+ w2[2] = append1[0] >> 24 | append1[1] << 8;
+ w2[3] = append1[1] >> 24 | append1[2] << 8;
+ w3[0] = append1[2] >> 24 | append1[3] << 8;
+ w3[1] = append1[3] >> 24 | append2[0] << 8;
break;
- case 117:
- w[29] = w[29] | 0x8000;
+ case 22:
+ w1[1] = w1[1] | append0[0] << 16;
+ w1[2] = append0[0] >> 16 | append0[1] << 16;
+ w1[3] = append0[1] >> 16 | append0[2] << 16;
+ w2[0] = append0[2] >> 16 | append0[3] << 16;
+ w2[1] = append0[3] >> 16 | append1[0] << 16;
+ w2[2] = append1[0] >> 16 | append1[1] << 16;
+ w2[3] = append1[1] >> 16 | append1[2] << 16;
+ w3[0] = append1[2] >> 16 | append1[3] << 16;
+ w3[1] = append1[3] >> 16 | append2[0] << 16;
break;
- case 118:
- w[29] = w[29] | 0x800000;
+ case 23:
+ w1[1] = w1[1] | append0[0] << 24;
+ w1[2] = append0[0] >> 8 | append0[1] << 24;
+ w1[3] = append0[1] >> 8 | append0[2] << 24;
+ w2[0] = append0[2] >> 8 | append0[3] << 24;
+ w2[1] = append0[3] >> 8 | append1[0] << 24;
+ w2[2] = append1[0] >> 8 | append1[1] << 24;
+ w2[3] = append1[1] >> 8 | append1[2] << 24;
+ w3[0] = append1[2] >> 8 | append1[3] << 24;
+ w3[1] = append1[3] >> 8 | append2[0] << 24;
break;
- case 119:
- w[29] = w[29] | 0x80000000;
+ case 24:
+ w1[2] = append0[0];
+ w1[3] = append0[1];
+ w2[0] = append0[2];
+ w2[1] = append0[3];
+ w2[2] = append1[0];
+ w2[3] = append1[1];
+ w3[0] = append1[2];
+ w3[1] = append1[3];
break;
- case 120:
- w[30] = 0x80;
+ case 25:
+ w1[2] = w1[2] | append0[0] << 8;
+ w1[3] = append0[0] >> 24 | append0[1] << 8;
+ w2[0] = append0[1] >> 24 | append0[2] << 8;
+ w2[1] = append0[2] >> 24 | append0[3] << 8;
+ w2[2] = append0[3] >> 24 | append1[0] << 8;
+ w2[3] = append1[0] >> 24 | append1[1] << 8;
+ w3[0] = append1[1] >> 24 | append1[2] << 8;
+ w3[1] = append1[2] >> 24 | append1[3] << 8;
break;
- case 121:
- w[30] = w[30] | 0x8000;
+ case 26:
+ w1[2] = w1[2] | append0[0] << 16;
+ w1[3] = append0[0] >> 16 | append0[1] << 16;
+ w2[0] = append0[1] >> 16 | append0[2] << 16;
+ w2[1] = append0[2] >> 16 | append0[3] << 16;
+ w2[2] = append0[3] >> 16 | append1[0] << 16;
+ w2[3] = append1[0] >> 16 | append1[1] << 16;
+ w3[0] = append1[1] >> 16 | append1[2] << 16;
+ w3[1] = append1[2] >> 16 | append1[3] << 16;
break;
- case 122:
- w[30] = w[30] | 0x800000;
+ case 27:
+ w1[2] = w1[2] | append0[0] << 24;
+ w1[3] = append0[0] >> 8 | append0[1] << 24;
+ w2[0] = append0[1] >> 8 | append0[2] << 24;
+ w2[1] = append0[2] >> 8 | append0[3] << 24;
+ w2[2] = append0[3] >> 8 | append1[0] << 24;
+ w2[3] = append1[0] >> 8 | append1[1] << 24;
+ w3[0] = append1[1] >> 8 | append1[2] << 24;
+ w3[1] = append1[2] >> 8 | append1[3] << 24;
break;
- case 123:
- w[30] = w[30] | 0x80000000;
+ case 28:
+ w1[3] = append0[0];
+ w2[0] = append0[1];
+ w2[1] = append0[2];
+ w2[2] = append0[3];
+ w2[3] = append1[0];
+ w3[0] = append1[1];
+ w3[1] = append1[2];
break;
- case 124:
- w[31] = 0x80;
+ case 29:
+ w1[3] = w1[3] | append0[0] << 8;
+ w2[0] = append0[0] >> 24 | append0[1] << 8;
+ w2[1] = append0[1] >> 24 | append0[2] << 8;
+ w2[2] = append0[2] >> 24 | append0[3] << 8;
+ w2[3] = append0[3] >> 24 | append1[0] << 8;
+ w3[0] = append1[0] >> 24 | append1[1] << 8;
+ w3[1] = append1[1] >> 24 | append1[2] << 8;
break;
- case 125:
- w[31] = w[31] | 0x8000;
+ case 30:
+ w1[3] = w1[3] | append0[0] << 16;
+ w2[0] = append0[0] >> 16 | append0[1] << 16;
+ w2[1] = append0[1] >> 16 | append0[2] << 16;
+ w2[2] = append0[2] >> 16 | append0[3] << 16;
+ w2[3] = append0[3] >> 16 | append1[0] << 16;
+ w3[0] = append1[0] >> 16 | append1[1] << 16;
+ w3[1] = append1[1] >> 16 | append1[2] << 16;
break;
- case 126:
- w[31] = w[31] | 0x800000;
+ case 31:
+ w1[3] = w1[3] | append0[0] << 24;
+ w2[0] = append0[0] >> 8 | append0[1] << 24;
+ w2[1] = append0[1] >> 8 | append0[2] << 24;
+ w2[2] = append0[2] >> 8 | append0[3] << 24;
+ w2[3] = append0[3] >> 8 | append1[0] << 24;
+ w3[0] = append1[0] >> 8 | append1[1] << 24;
+ w3[1] = append1[1] >> 8 | append1[2] << 24;
break;
- case 127:
- w[31] = w[31] | 0x80000000;
+ case 32:
+ w2[0] = append0[0];
+ w2[1] = append0[1];
+ w2[2] = append0[2];
+ w2[3] = append0[3];
+ w3[0] = append1[0];
+ w3[1] = append1[1];
break;
}
}
+
*/
{
u32 ukey_s[8];
- ukey_s[0] = swap_workaround (ukey[0]);
- ukey_s[1] = swap_workaround (ukey[1]);
- ukey_s[2] = swap_workaround (ukey[2]);
- ukey_s[3] = swap_workaround (ukey[3]);
- ukey_s[4] = swap_workaround (ukey[4]);
- ukey_s[5] = swap_workaround (ukey[5]);
- ukey_s[6] = swap_workaround (ukey[6]);
- ukey_s[7] = swap_workaround (ukey[7]);
+ ukey_s[0] = swap32 (ukey[0]);
+ ukey_s[1] = swap32 (ukey[1]);
+ ukey_s[2] = swap32 (ukey[2]);
+ ukey_s[3] = swap32 (ukey[3]);
+ ukey_s[4] = swap32 (ukey[4]);
+ ukey_s[5] = swap32 (ukey[5]);
+ ukey_s[6] = swap32 (ukey[6]);
+ ukey_s[7] = swap32 (ukey[7]);
aes256_ExpandKey (ks, ukey_s);
}
{
u32 ukey_s[8];
- ukey_s[0] = swap_workaround (ukey[0]);
- ukey_s[1] = swap_workaround (ukey[1]);
- ukey_s[2] = swap_workaround (ukey[2]);
- ukey_s[3] = swap_workaround (ukey[3]);
- ukey_s[4] = swap_workaround (ukey[4]);
- ukey_s[5] = swap_workaround (ukey[5]);
- ukey_s[6] = swap_workaround (ukey[6]);
- ukey_s[7] = swap_workaround (ukey[7]);
+ ukey_s[0] = swap32 (ukey[0]);
+ ukey_s[1] = swap32 (ukey[1]);
+ ukey_s[2] = swap32 (ukey[2]);
+ ukey_s[3] = swap32 (ukey[3]);
+ ukey_s[4] = swap32 (ukey[4]);
+ ukey_s[5] = swap32 (ukey[5]);
+ ukey_s[6] = swap32 (ukey[6]);
+ ukey_s[7] = swap32 (ukey[7]);
aes256_ExpandKey (ks, ukey_s);
{
u32 in_s[4];
- in_s[0] = swap_workaround (in[0]);
- in_s[1] = swap_workaround (in[1]);
- in_s[2] = swap_workaround (in[2]);
- in_s[3] = swap_workaround (in[3]);
+ in_s[0] = swap32 (in[0]);
+ in_s[1] = swap32 (in[1]);
+ in_s[2] = swap32 (in[2]);
+ in_s[3] = swap32 (in[3]);
u32 s0 = in_s[0] ^ ks[0];
u32 s1 = in_s[1] ^ ks[1];
^ (td4[(t0 >> 0) & 0xff] & 0x000000ff)
^ ks[59];
- out[0] = swap_workaround (out[0]);
- out[1] = swap_workaround (out[1]);
- out[2] = swap_workaround (out[2]);
- out[3] = swap_workaround (out[3]);
+ out[0] = swap32 (out[0]);
+ out[1] = swap32 (out[1]);
+ out[2] = swap32 (out[2]);
+ out[3] = swap32 (out[3]);
}
static void aes256_encrypt (const u32 *ks, const u32 *in, u32 *out)
{
u32 in_s[4];
- in_s[0] = swap_workaround (in[0]);
- in_s[1] = swap_workaround (in[1]);
- in_s[2] = swap_workaround (in[2]);
- in_s[3] = swap_workaround (in[3]);
+ in_s[0] = swap32 (in[0]);
+ in_s[1] = swap32 (in[1]);
+ in_s[2] = swap32 (in[2]);
+ in_s[3] = swap32 (in[3]);
u32 s0 = in_s[0] ^ ks[0];
u32 s1 = in_s[1] ^ ks[1];
^ (te4[(t2 >> 0) & 0xff] & 0x000000ff)
^ ks[59];
- out[0] = swap_workaround (out[0]);
- out[1] = swap_workaround (out[1]);
- out[2] = swap_workaround (out[2]);
- out[3] = swap_workaround (out[3]);
+ out[0] = swap32 (out[0]);
+ out[1] = swap32 (out[1]);
+ out[2] = swap32 (out[2]);
+ out[3] = swap32 (out[3]);
}
static void aes256_decrypt_xts (const u32 *ukey1, const u32 *ukey2, const u32 *in, u32 *out)
* sha1
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = out_len * 8;
* sha1
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = out_len * 8;
* sha1
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = pw_len * 8;
* sha1
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = pw_len * 8;
* sha1
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = pw_salt_len * 8;
* sha1
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = pw_salt_len * 8;
* sha1
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = pw_salt_len * 8;
* sha1
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = pw_salt_len * 8;
switch_buffer_by_offset (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len);
- w[ 0] |= swap_workaround (salt_buf0[0]);
- w[ 1] |= swap_workaround (salt_buf0[1]);
- w[ 2] |= swap_workaround (salt_buf0[2]);
- w[ 3] |= swap_workaround (salt_buf0[3]);
- w[ 4] |= swap_workaround (salt_buf1[0]);
- w[ 5] |= swap_workaround (salt_buf1[1]);
- w[ 6] |= swap_workaround (salt_buf1[2]);
- w[ 7] |= swap_workaround (salt_buf1[3]);
- w[ 8] |= swap_workaround (salt_buf2[0]);
- w[ 9] |= swap_workaround (salt_buf2[1]);
- w[10] |= swap_workaround (salt_buf2[2]);
- w[11] |= swap_workaround (salt_buf2[3]);
- w[12] |= swap_workaround (salt_buf3[0]);
- w[13] |= swap_workaround (salt_buf3[1]);
- w[14] |= swap_workaround (salt_buf3[2]);
- w[15] |= swap_workaround (salt_buf3[3]);
+ w[ 0] |= swap32 (salt_buf0[0]);
+ w[ 1] |= swap32 (salt_buf0[1]);
+ w[ 2] |= swap32 (salt_buf0[2]);
+ w[ 3] |= swap32 (salt_buf0[3]);
+ w[ 4] |= swap32 (salt_buf1[0]);
+ w[ 5] |= swap32 (salt_buf1[1]);
+ w[ 6] |= swap32 (salt_buf1[2]);
+ w[ 7] |= swap32 (salt_buf1[3]);
+ w[ 8] |= swap32 (salt_buf2[0]);
+ w[ 9] |= swap32 (salt_buf2[1]);
+ w[10] |= swap32 (salt_buf2[2]);
+ w[11] |= swap32 (salt_buf2[3]);
+ w[12] |= swap32 (salt_buf3[0]);
+ w[13] |= swap32 (salt_buf3[1]);
+ w[14] |= swap32 (salt_buf3[2]);
+ w[15] |= swap32 (salt_buf3[3]);
const u32 salt_len = salt_bufs[salt_pos].salt_len;
* sha1
*/
- w0_t[0] = swap_workaround (w0_t[0]);
- w0_t[1] = swap_workaround (w0_t[1]);
- w0_t[2] = swap_workaround (w0_t[2]);
- w0_t[3] = swap_workaround (w0_t[3]);
- w1_t[0] = swap_workaround (w1_t[0]);
- w1_t[1] = swap_workaround (w1_t[1]);
- w1_t[2] = swap_workaround (w1_t[2]);
- w1_t[3] = swap_workaround (w1_t[3]);
- w2_t[0] = swap_workaround (w2_t[0]);
- w2_t[1] = swap_workaround (w2_t[1]);
- w2_t[2] = swap_workaround (w2_t[2]);
- w2_t[3] = swap_workaround (w2_t[3]);
- w3_t[0] = swap_workaround (w3_t[0]);
- w3_t[1] = swap_workaround (w3_t[1]);
- //w3_t[2] = swap_workaround (w3_t[2]);
- //w3_t[3] = swap_workaround (w3_t[3]);
+ w0_t[0] = swap32 (w0_t[0]);
+ w0_t[1] = swap32 (w0_t[1]);
+ w0_t[2] = swap32 (w0_t[2]);
+ w0_t[3] = swap32 (w0_t[3]);
+ w1_t[0] = swap32 (w1_t[0]);
+ w1_t[1] = swap32 (w1_t[1]);
+ w1_t[2] = swap32 (w1_t[2]);
+ w1_t[3] = swap32 (w1_t[3]);
+ w2_t[0] = swap32 (w2_t[0]);
+ w2_t[1] = swap32 (w2_t[1]);
+ w2_t[2] = swap32 (w2_t[2]);
+ w2_t[3] = swap32 (w2_t[3]);
+ w3_t[0] = swap32 (w3_t[0]);
+ w3_t[1] = swap32 (w3_t[1]);
+ //w3_t[2] = swap32 (w3_t[2]);
+ //w3_t[3] = swap32 (w3_t[3]);
u32 a = SHA1M_A;
u32 b = SHA1M_B;
* sha1
*/
- w0_t[0] = swap_workaround (w0_t[0]);
- w0_t[1] = swap_workaround (w0_t[1]);
- w0_t[2] = swap_workaround (w0_t[2]);
- w0_t[3] = swap_workaround (w0_t[3]);
- w1_t[0] = swap_workaround (w1_t[0]);
- w1_t[1] = swap_workaround (w1_t[1]);
- w1_t[2] = swap_workaround (w1_t[2]);
- w1_t[3] = swap_workaround (w1_t[3]);
- w2_t[0] = swap_workaround (w2_t[0]);
- w2_t[1] = swap_workaround (w2_t[1]);
- w2_t[2] = swap_workaround (w2_t[2]);
- w2_t[3] = swap_workaround (w2_t[3]);
- w3_t[0] = swap_workaround (w3_t[0]);
- w3_t[1] = swap_workaround (w3_t[1]);
- //w3_t[2] = swap_workaround (w3_t[2]);
- //w3_t[3] = swap_workaround (w3_t[3]);
+ w0_t[0] = swap32 (w0_t[0]);
+ w0_t[1] = swap32 (w0_t[1]);
+ w0_t[2] = swap32 (w0_t[2]);
+ w0_t[3] = swap32 (w0_t[3]);
+ w1_t[0] = swap32 (w1_t[0]);
+ w1_t[1] = swap32 (w1_t[1]);
+ w1_t[2] = swap32 (w1_t[2]);
+ w1_t[3] = swap32 (w1_t[3]);
+ w2_t[0] = swap32 (w2_t[0]);
+ w2_t[1] = swap32 (w2_t[1]);
+ w2_t[2] = swap32 (w2_t[2]);
+ w2_t[3] = swap32 (w2_t[3]);
+ w3_t[0] = swap32 (w3_t[0]);
+ w3_t[1] = swap32 (w3_t[1]);
+ //w3_t[2] = swap32 (w3_t[2]);
+ //w3_t[3] = swap32 (w3_t[3]);
u32 a = SHA1M_A;
u32 b = SHA1M_B;
* sha1
*/
- w0_t[0] = swap_workaround (w0_t[0]);
- w0_t[1] = swap_workaround (w0_t[1]);
- w0_t[2] = swap_workaround (w0_t[2]);
- w0_t[3] = swap_workaround (w0_t[3]);
- w1_t[0] = swap_workaround (w1_t[0]);
- w1_t[1] = swap_workaround (w1_t[1]);
- w1_t[2] = swap_workaround (w1_t[2]);
- w1_t[3] = swap_workaround (w1_t[3]);
- w2_t[0] = swap_workaround (w2_t[0]);
- w2_t[1] = swap_workaround (w2_t[1]);
- w2_t[2] = swap_workaround (w2_t[2]);
- w2_t[3] = swap_workaround (w2_t[3]);
- w3_t[0] = swap_workaround (w3_t[0]);
- w3_t[1] = swap_workaround (w3_t[1]);
- //w3_t[2] = swap_workaround (w3_t[2]);
- //w3_t[3] = swap_workaround (w3_t[3]);
+ w0_t[0] = swap32 (w0_t[0]);
+ w0_t[1] = swap32 (w0_t[1]);
+ w0_t[2] = swap32 (w0_t[2]);
+ w0_t[3] = swap32 (w0_t[3]);
+ w1_t[0] = swap32 (w1_t[0]);
+ w1_t[1] = swap32 (w1_t[1]);
+ w1_t[2] = swap32 (w1_t[2]);
+ w1_t[3] = swap32 (w1_t[3]);
+ w2_t[0] = swap32 (w2_t[0]);
+ w2_t[1] = swap32 (w2_t[1]);
+ w2_t[2] = swap32 (w2_t[2]);
+ w2_t[3] = swap32 (w2_t[3]);
+ w3_t[0] = swap32 (w3_t[0]);
+ w3_t[1] = swap32 (w3_t[1]);
+ //w3_t[2] = swap32 (w3_t[2]);
+ //w3_t[3] = swap32 (w3_t[3]);
u32 a = SHA1M_A;
u32 b = SHA1M_B;
* sha1
*/
- w0_t[0] = swap_workaround (w0_t[0]);
- w0_t[1] = swap_workaround (w0_t[1]);
- w0_t[2] = swap_workaround (w0_t[2]);
- w0_t[3] = swap_workaround (w0_t[3]);
- w1_t[0] = swap_workaround (w1_t[0]);
- w1_t[1] = swap_workaround (w1_t[1]);
- w1_t[2] = swap_workaround (w1_t[2]);
- w1_t[3] = swap_workaround (w1_t[3]);
- w2_t[0] = swap_workaround (w2_t[0]);
- w2_t[1] = swap_workaround (w2_t[1]);
- w2_t[2] = swap_workaround (w2_t[2]);
- w2_t[3] = swap_workaround (w2_t[3]);
- w3_t[0] = swap_workaround (w3_t[0]);
- w3_t[1] = swap_workaround (w3_t[1]);
- //w3_t[2] = swap_workaround (w3_t[2]);
- //w3_t[3] = swap_workaround (w3_t[3]);
+ w0_t[0] = swap32 (w0_t[0]);
+ w0_t[1] = swap32 (w0_t[1]);
+ w0_t[2] = swap32 (w0_t[2]);
+ w0_t[3] = swap32 (w0_t[3]);
+ w1_t[0] = swap32 (w1_t[0]);
+ w1_t[1] = swap32 (w1_t[1]);
+ w1_t[2] = swap32 (w1_t[2]);
+ w1_t[3] = swap32 (w1_t[3]);
+ w2_t[0] = swap32 (w2_t[0]);
+ w2_t[1] = swap32 (w2_t[1]);
+ w2_t[2] = swap32 (w2_t[2]);
+ w2_t[3] = swap32 (w2_t[3]);
+ w3_t[0] = swap32 (w3_t[0]);
+ w3_t[1] = swap32 (w3_t[1]);
+ //w3_t[2] = swap32 (w3_t[2]);
+ //w3_t[3] = swap32 (w3_t[3]);
u32 a = SHA1M_A;
u32 b = SHA1M_B;
u32 w2_t[4];
u32 w3_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
- w2_t[0] = swap_workaround (w2[0]);
- w2_t[1] = swap_workaround (w2[1]);
- w2_t[2] = swap_workaround (w2[2]);
- w2_t[3] = swap_workaround (w2[3]);
- w3_t[0] = swap_workaround (w3[0]);
- w3_t[1] = swap_workaround (w3[1]);
- w3_t[2] = swap_workaround (w3[2]);
- w3_t[3] = swap_workaround (w3[3]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
+ w2_t[0] = swap32 (w2[0]);
+ w2_t[1] = swap32 (w2[1]);
+ w2_t[2] = swap32 (w2[2]);
+ w2_t[3] = swap32 (w2[3]);
+ w3_t[0] = swap32 (w3[0]);
+ w3_t[1] = swap32 (w3[1]);
+ w3_t[2] = swap32 (w3[2]);
+ w3_t[3] = swap32 (w3[3]);
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
w3_t[2] |= salt_buf3[2];
w3_t[3] |= salt_buf3[3];
- w0_t[0] = swap_workaround (w0_t[0]);
- w0_t[1] = swap_workaround (w0_t[1]);
- w0_t[2] = swap_workaround (w0_t[2]);
- w0_t[3] = swap_workaround (w0_t[3]);
- w1_t[0] = swap_workaround (w1_t[0]);
- w1_t[1] = swap_workaround (w1_t[1]);
- w1_t[2] = swap_workaround (w1_t[2]);
- w1_t[3] = swap_workaround (w1_t[3]);
- w2_t[0] = swap_workaround (w2_t[0]);
- w2_t[1] = swap_workaround (w2_t[1]);
- w2_t[2] = swap_workaround (w2_t[2]);
- w2_t[3] = swap_workaround (w2_t[3]);
- w3_t[0] = swap_workaround (w3_t[0]);
- w3_t[1] = swap_workaround (w3_t[1]);
- w3_t[2] = swap_workaround (w3_t[2]);
- w3_t[3] = swap_workaround (w3_t[3]);
+ w0_t[0] = swap32 (w0_t[0]);
+ w0_t[1] = swap32 (w0_t[1]);
+ w0_t[2] = swap32 (w0_t[2]);
+ w0_t[3] = swap32 (w0_t[3]);
+ w1_t[0] = swap32 (w1_t[0]);
+ w1_t[1] = swap32 (w1_t[1]);
+ w1_t[2] = swap32 (w1_t[2]);
+ w1_t[3] = swap32 (w1_t[3]);
+ w2_t[0] = swap32 (w2_t[0]);
+ w2_t[1] = swap32 (w2_t[1]);
+ w2_t[2] = swap32 (w2_t[2]);
+ w2_t[3] = swap32 (w2_t[3]);
+ w3_t[0] = swap32 (w3_t[0]);
+ w3_t[1] = swap32 (w3_t[1]);
+ w3_t[2] = swap32 (w3_t[2]);
+ w3_t[3] = swap32 (w3_t[3]);
/**
* loop
u32 w2_t[4];
u32 w3_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
- w2_t[0] = swap_workaround (w2[0]);
- w2_t[1] = swap_workaround (w2[1]);
- w2_t[2] = swap_workaround (w2[2]);
- w2_t[3] = swap_workaround (w2[3]);
- w3_t[0] = swap_workaround (w3[0]);
- w3_t[1] = swap_workaround (w3[1]);
- w3_t[2] = swap_workaround (w3[2]);
- w3_t[3] = swap_workaround (w3[3]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
+ w2_t[0] = swap32 (w2[0]);
+ w2_t[1] = swap32 (w2[1]);
+ w2_t[2] = swap32 (w2[2]);
+ w2_t[3] = swap32 (w2[3]);
+ w3_t[0] = swap32 (w3[0]);
+ w3_t[1] = swap32 (w3[1]);
+ w3_t[2] = swap32 (w3[2]);
+ w3_t[3] = swap32 (w3[3]);
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
w3_t[2] |= salt_buf3[2];
w3_t[3] |= salt_buf3[3];
- w0_t[0] = swap_workaround (w0_t[0]);
- w0_t[1] = swap_workaround (w0_t[1]);
- w0_t[2] = swap_workaround (w0_t[2]);
- w0_t[3] = swap_workaround (w0_t[3]);
- w1_t[0] = swap_workaround (w1_t[0]);
- w1_t[1] = swap_workaround (w1_t[1]);
- w1_t[2] = swap_workaround (w1_t[2]);
- w1_t[3] = swap_workaround (w1_t[3]);
- w2_t[0] = swap_workaround (w2_t[0]);
- w2_t[1] = swap_workaround (w2_t[1]);
- w2_t[2] = swap_workaround (w2_t[2]);
- w2_t[3] = swap_workaround (w2_t[3]);
- w3_t[0] = swap_workaround (w3_t[0]);
- w3_t[1] = swap_workaround (w3_t[1]);
- w3_t[2] = swap_workaround (w3_t[2]);
- w3_t[3] = swap_workaround (w3_t[3]);
+ w0_t[0] = swap32 (w0_t[0]);
+ w0_t[1] = swap32 (w0_t[1]);
+ w0_t[2] = swap32 (w0_t[2]);
+ w0_t[3] = swap32 (w0_t[3]);
+ w1_t[0] = swap32 (w1_t[0]);
+ w1_t[1] = swap32 (w1_t[1]);
+ w1_t[2] = swap32 (w1_t[2]);
+ w1_t[3] = swap32 (w1_t[3]);
+ w2_t[0] = swap32 (w2_t[0]);
+ w2_t[1] = swap32 (w2_t[1]);
+ w2_t[2] = swap32 (w2_t[2]);
+ w2_t[3] = swap32 (w2_t[3]);
+ w3_t[0] = swap32 (w3_t[0]);
+ w3_t[1] = swap32 (w3_t[1]);
+ w3_t[2] = swap32 (w3_t[2]);
+ w3_t[3] = swap32 (w3_t[3]);
/**
* loop
* sha1
*/
- w0_t[0] = swap_workaround (w0_t[0]);
- w0_t[1] = swap_workaround (w0_t[1]);
- w0_t[2] = swap_workaround (w0_t[2]);
- w0_t[3] = swap_workaround (w0_t[3]);
- w1_t[0] = swap_workaround (w1_t[0]);
- w1_t[1] = swap_workaround (w1_t[1]);
- w1_t[2] = swap_workaround (w1_t[2]);
- w1_t[3] = swap_workaround (w1_t[3]);
- w2_t[0] = swap_workaround (w2_t[0]);
- w2_t[1] = swap_workaround (w2_t[1]);
- w2_t[2] = swap_workaround (w2_t[2]);
- w2_t[3] = swap_workaround (w2_t[3]);
- w3_t[0] = swap_workaround (w3_t[0]);
- w3_t[1] = swap_workaround (w3_t[1]);
- //w3_t[2] = swap_workaround (w3_t[2]);
- //w3_t[3] = swap_workaround (w3_t[3]);
+ w0_t[0] = swap32 (w0_t[0]);
+ w0_t[1] = swap32 (w0_t[1]);
+ w0_t[2] = swap32 (w0_t[2]);
+ w0_t[3] = swap32 (w0_t[3]);
+ w1_t[0] = swap32 (w1_t[0]);
+ w1_t[1] = swap32 (w1_t[1]);
+ w1_t[2] = swap32 (w1_t[2]);
+ w1_t[3] = swap32 (w1_t[3]);
+ w2_t[0] = swap32 (w2_t[0]);
+ w2_t[1] = swap32 (w2_t[1]);
+ w2_t[2] = swap32 (w2_t[2]);
+ w2_t[3] = swap32 (w2_t[3]);
+ w3_t[0] = swap32 (w3_t[0]);
+ w3_t[1] = swap32 (w3_t[1]);
+ //w3_t[2] = swap32 (w3_t[2]);
+ //w3_t[3] = swap32 (w3_t[3]);
u32 a = SHA1M_A;
u32 b = SHA1M_B;
* sha1
*/
- w0_t[0] = swap_workaround (w0_t[0]);
- w0_t[1] = swap_workaround (w0_t[1]);
- w0_t[2] = swap_workaround (w0_t[2]);
- w0_t[3] = swap_workaround (w0_t[3]);
- w1_t[0] = swap_workaround (w1_t[0]);
- w1_t[1] = swap_workaround (w1_t[1]);
- w1_t[2] = swap_workaround (w1_t[2]);
- w1_t[3] = swap_workaround (w1_t[3]);
- w2_t[0] = swap_workaround (w2_t[0]);
- w2_t[1] = swap_workaround (w2_t[1]);
- w2_t[2] = swap_workaround (w2_t[2]);
- w2_t[3] = swap_workaround (w2_t[3]);
- w3_t[0] = swap_workaround (w3_t[0]);
- w3_t[1] = swap_workaround (w3_t[1]);
- //w3_t[2] = swap_workaround (w3_t[2]);
- //w3_t[3] = swap_workaround (w3_t[3]);
+ w0_t[0] = swap32 (w0_t[0]);
+ w0_t[1] = swap32 (w0_t[1]);
+ w0_t[2] = swap32 (w0_t[2]);
+ w0_t[3] = swap32 (w0_t[3]);
+ w1_t[0] = swap32 (w1_t[0]);
+ w1_t[1] = swap32 (w1_t[1]);
+ w1_t[2] = swap32 (w1_t[2]);
+ w1_t[3] = swap32 (w1_t[3]);
+ w2_t[0] = swap32 (w2_t[0]);
+ w2_t[1] = swap32 (w2_t[1]);
+ w2_t[2] = swap32 (w2_t[2]);
+ w2_t[3] = swap32 (w2_t[3]);
+ w3_t[0] = swap32 (w3_t[0]);
+ w3_t[1] = swap32 (w3_t[1]);
+ //w3_t[2] = swap32 (w3_t[2]);
+ //w3_t[3] = swap32 (w3_t[3]);
u32 a = SHA1M_A;
u32 b = SHA1M_B;
* sha1
*/
- w0_t[0] = swap_workaround (w0_t[0]);
- w0_t[1] = swap_workaround (w0_t[1]);
- w0_t[2] = swap_workaround (w0_t[2]);
- w0_t[3] = swap_workaround (w0_t[3]);
- w1_t[0] = swap_workaround (w1_t[0]);
- w1_t[1] = swap_workaround (w1_t[1]);
- w1_t[2] = swap_workaround (w1_t[2]);
- w1_t[3] = swap_workaround (w1_t[3]);
- w2_t[0] = swap_workaround (w2_t[0]);
- w2_t[1] = swap_workaround (w2_t[1]);
- w2_t[2] = swap_workaround (w2_t[2]);
- w2_t[3] = swap_workaround (w2_t[3]);
- w3_t[0] = swap_workaround (w3_t[0]);
- w3_t[1] = swap_workaround (w3_t[1]);
- //w3_t[2] = swap_workaround (w3_t[2]);
- //w3_t[3] = swap_workaround (w3_t[3]);
+ w0_t[0] = swap32 (w0_t[0]);
+ w0_t[1] = swap32 (w0_t[1]);
+ w0_t[2] = swap32 (w0_t[2]);
+ w0_t[3] = swap32 (w0_t[3]);
+ w1_t[0] = swap32 (w1_t[0]);
+ w1_t[1] = swap32 (w1_t[1]);
+ w1_t[2] = swap32 (w1_t[2]);
+ w1_t[3] = swap32 (w1_t[3]);
+ w2_t[0] = swap32 (w2_t[0]);
+ w2_t[1] = swap32 (w2_t[1]);
+ w2_t[2] = swap32 (w2_t[2]);
+ w2_t[3] = swap32 (w2_t[3]);
+ w3_t[0] = swap32 (w3_t[0]);
+ w3_t[1] = swap32 (w3_t[1]);
+ //w3_t[2] = swap32 (w3_t[2]);
+ //w3_t[3] = swap32 (w3_t[3]);
u32 a = SHA1M_A;
u32 b = SHA1M_B;
* sha1
*/
- w0_t[0] = swap_workaround (w0_t[0]);
- w0_t[1] = swap_workaround (w0_t[1]);
- w0_t[2] = swap_workaround (w0_t[2]);
- w0_t[3] = swap_workaround (w0_t[3]);
- w1_t[0] = swap_workaround (w1_t[0]);
- w1_t[1] = swap_workaround (w1_t[1]);
- w1_t[2] = swap_workaround (w1_t[2]);
- w1_t[3] = swap_workaround (w1_t[3]);
- w2_t[0] = swap_workaround (w2_t[0]);
- w2_t[1] = swap_workaround (w2_t[1]);
- w2_t[2] = swap_workaround (w2_t[2]);
- w2_t[3] = swap_workaround (w2_t[3]);
- w3_t[0] = swap_workaround (w3_t[0]);
- w3_t[1] = swap_workaround (w3_t[1]);
- //w3_t[2] = swap_workaround (w3_t[2]);
- //w3_t[3] = swap_workaround (w3_t[3]);
+ w0_t[0] = swap32 (w0_t[0]);
+ w0_t[1] = swap32 (w0_t[1]);
+ w0_t[2] = swap32 (w0_t[2]);
+ w0_t[3] = swap32 (w0_t[3]);
+ w1_t[0] = swap32 (w1_t[0]);
+ w1_t[1] = swap32 (w1_t[1]);
+ w1_t[2] = swap32 (w1_t[2]);
+ w1_t[3] = swap32 (w1_t[3]);
+ w2_t[0] = swap32 (w2_t[0]);
+ w2_t[1] = swap32 (w2_t[1]);
+ w2_t[2] = swap32 (w2_t[2]);
+ w2_t[3] = swap32 (w2_t[3]);
+ w3_t[0] = swap32 (w3_t[0]);
+ w3_t[1] = swap32 (w3_t[1]);
+ //w3_t[2] = swap32 (w3_t[2]);
+ //w3_t[3] = swap32 (w3_t[3]);
u32 a = SHA1M_A;
u32 b = SHA1M_B;
switch_buffer_by_offset (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len);
- w[ 0] |= swap_workaround (salt_buf0[0]);
- w[ 1] |= swap_workaround (salt_buf0[1]);
- w[ 2] |= swap_workaround (salt_buf0[2]);
- w[ 3] |= swap_workaround (salt_buf0[3]);
- w[ 4] |= swap_workaround (salt_buf1[0]);
- w[ 5] |= swap_workaround (salt_buf1[1]);
- w[ 6] |= swap_workaround (salt_buf1[2]);
- w[ 7] |= swap_workaround (salt_buf1[3]);
- w[ 8] |= swap_workaround (salt_buf2[0]);
- w[ 9] |= swap_workaround (salt_buf2[1]);
- w[10] |= swap_workaround (salt_buf2[2]);
- w[11] |= swap_workaround (salt_buf2[3]);
- w[12] |= swap_workaround (salt_buf3[0]);
- w[13] |= swap_workaround (salt_buf3[1]);
- w[14] |= swap_workaround (salt_buf3[2]);
- w[15] |= swap_workaround (salt_buf3[3]);
+ w[ 0] |= swap32 (salt_buf0[0]);
+ w[ 1] |= swap32 (salt_buf0[1]);
+ w[ 2] |= swap32 (salt_buf0[2]);
+ w[ 3] |= swap32 (salt_buf0[3]);
+ w[ 4] |= swap32 (salt_buf1[0]);
+ w[ 5] |= swap32 (salt_buf1[1]);
+ w[ 6] |= swap32 (salt_buf1[2]);
+ w[ 7] |= swap32 (salt_buf1[3]);
+ w[ 8] |= swap32 (salt_buf2[0]);
+ w[ 9] |= swap32 (salt_buf2[1]);
+ w[10] |= swap32 (salt_buf2[2]);
+ w[11] |= swap32 (salt_buf2[3]);
+ w[12] |= swap32 (salt_buf3[0]);
+ w[13] |= swap32 (salt_buf3[1]);
+ w[14] |= swap32 (salt_buf3[2]);
+ w[15] |= swap32 (salt_buf3[3]);
const u32 salt_len = salt_bufs[salt_pos].salt_len;
* sha1
*/
- w0_t[0] = swap_workaround (w0_t[0]);
- w0_t[1] = swap_workaround (w0_t[1]);
- w0_t[2] = swap_workaround (w0_t[2]);
- w0_t[3] = swap_workaround (w0_t[3]);
- w1_t[0] = swap_workaround (w1_t[0]);
- w1_t[1] = swap_workaround (w1_t[1]);
- w1_t[2] = swap_workaround (w1_t[2]);
- w1_t[3] = swap_workaround (w1_t[3]);
- w2_t[0] = swap_workaround (w2_t[0]);
- w2_t[1] = swap_workaround (w2_t[1]);
- w2_t[2] = swap_workaround (w2_t[2]);
- w2_t[3] = swap_workaround (w2_t[3]);
- w3_t[0] = swap_workaround (w3_t[0]);
- w3_t[1] = swap_workaround (w3_t[1]);
- //w3_t[2] = swap_workaround (w3_t[2]);
- //w3_t[3] = swap_workaround (w3_t[3]);
+ w0_t[0] = swap32 (w0_t[0]);
+ w0_t[1] = swap32 (w0_t[1]);
+ w0_t[2] = swap32 (w0_t[2]);
+ w0_t[3] = swap32 (w0_t[3]);
+ w1_t[0] = swap32 (w1_t[0]);
+ w1_t[1] = swap32 (w1_t[1]);
+ w1_t[2] = swap32 (w1_t[2]);
+ w1_t[3] = swap32 (w1_t[3]);
+ w2_t[0] = swap32 (w2_t[0]);
+ w2_t[1] = swap32 (w2_t[1]);
+ w2_t[2] = swap32 (w2_t[2]);
+ w2_t[3] = swap32 (w2_t[3]);
+ w3_t[0] = swap32 (w3_t[0]);
+ w3_t[1] = swap32 (w3_t[1]);
+ //w3_t[2] = swap32 (w3_t[2]);
+ //w3_t[3] = swap32 (w3_t[3]);
u32 a = SHA1M_A;
u32 b = SHA1M_B;
* sha1
*/
- w0_t[0] = swap_workaround (w0_t[0]);
- w0_t[1] = swap_workaround (w0_t[1]);
- w0_t[2] = swap_workaround (w0_t[2]);
- w0_t[3] = swap_workaround (w0_t[3]);
- w1_t[0] = swap_workaround (w1_t[0]);
- w1_t[1] = swap_workaround (w1_t[1]);
- w1_t[2] = swap_workaround (w1_t[2]);
- w1_t[3] = swap_workaround (w1_t[3]);
- w2_t[0] = swap_workaround (w2_t[0]);
- w2_t[1] = swap_workaround (w2_t[1]);
- w2_t[2] = swap_workaround (w2_t[2]);
- w2_t[3] = swap_workaround (w2_t[3]);
- w3_t[0] = swap_workaround (w3_t[0]);
- w3_t[1] = swap_workaround (w3_t[1]);
- //w3_t[2] = swap_workaround (w3_t[2]);
- //w3_t[3] = swap_workaround (w3_t[3]);
+ w0_t[0] = swap32 (w0_t[0]);
+ w0_t[1] = swap32 (w0_t[1]);
+ w0_t[2] = swap32 (w0_t[2]);
+ w0_t[3] = swap32 (w0_t[3]);
+ w1_t[0] = swap32 (w1_t[0]);
+ w1_t[1] = swap32 (w1_t[1]);
+ w1_t[2] = swap32 (w1_t[2]);
+ w1_t[3] = swap32 (w1_t[3]);
+ w2_t[0] = swap32 (w2_t[0]);
+ w2_t[1] = swap32 (w2_t[1]);
+ w2_t[2] = swap32 (w2_t[2]);
+ w2_t[3] = swap32 (w2_t[3]);
+ w3_t[0] = swap32 (w3_t[0]);
+ w3_t[1] = swap32 (w3_t[1]);
+ //w3_t[2] = swap32 (w3_t[2]);
+ //w3_t[3] = swap32 (w3_t[3]);
u32 a = SHA1M_A;
u32 b = SHA1M_B;
* sha1
*/
- w0_t[0] = swap_workaround (w0_t[0]);
- w0_t[1] = swap_workaround (w0_t[1]);
- w0_t[2] = swap_workaround (w0_t[2]);
- w0_t[3] = swap_workaround (w0_t[3]);
- w1_t[0] = swap_workaround (w1_t[0]);
- w1_t[1] = swap_workaround (w1_t[1]);
- w1_t[2] = swap_workaround (w1_t[2]);
- w1_t[3] = swap_workaround (w1_t[3]);
- w2_t[0] = swap_workaround (w2_t[0]);
- w2_t[1] = swap_workaround (w2_t[1]);
- w2_t[2] = swap_workaround (w2_t[2]);
- w2_t[3] = swap_workaround (w2_t[3]);
- w3_t[0] = swap_workaround (w3_t[0]);
- w3_t[1] = swap_workaround (w3_t[1]);
- //w3_t[2] = swap_workaround (w3_t[2]);
- //w3_t[3] = swap_workaround (w3_t[3]);
+ w0_t[0] = swap32 (w0_t[0]);
+ w0_t[1] = swap32 (w0_t[1]);
+ w0_t[2] = swap32 (w0_t[2]);
+ w0_t[3] = swap32 (w0_t[3]);
+ w1_t[0] = swap32 (w1_t[0]);
+ w1_t[1] = swap32 (w1_t[1]);
+ w1_t[2] = swap32 (w1_t[2]);
+ w1_t[3] = swap32 (w1_t[3]);
+ w2_t[0] = swap32 (w2_t[0]);
+ w2_t[1] = swap32 (w2_t[1]);
+ w2_t[2] = swap32 (w2_t[2]);
+ w2_t[3] = swap32 (w2_t[3]);
+ w3_t[0] = swap32 (w3_t[0]);
+ w3_t[1] = swap32 (w3_t[1]);
+ //w3_t[2] = swap32 (w3_t[2]);
+ //w3_t[3] = swap32 (w3_t[3]);
u32 a = SHA1M_A;
u32 b = SHA1M_B;
* sha1
*/
- w0_t[0] = swap_workaround (w0_t[0]);
- w0_t[1] = swap_workaround (w0_t[1]);
- w0_t[2] = swap_workaround (w0_t[2]);
- w0_t[3] = swap_workaround (w0_t[3]);
- w1_t[0] = swap_workaround (w1_t[0]);
- w1_t[1] = swap_workaround (w1_t[1]);
- w1_t[2] = swap_workaround (w1_t[2]);
- w1_t[3] = swap_workaround (w1_t[3]);
- w2_t[0] = swap_workaround (w2_t[0]);
- w2_t[1] = swap_workaround (w2_t[1]);
- w2_t[2] = swap_workaround (w2_t[2]);
- w2_t[3] = swap_workaround (w2_t[3]);
- w3_t[0] = swap_workaround (w3_t[0]);
- w3_t[1] = swap_workaround (w3_t[1]);
- //w3_t[2] = swap_workaround (w3_t[2]);
- //w3_t[3] = swap_workaround (w3_t[3]);
+ w0_t[0] = swap32 (w0_t[0]);
+ w0_t[1] = swap32 (w0_t[1]);
+ w0_t[2] = swap32 (w0_t[2]);
+ w0_t[3] = swap32 (w0_t[3]);
+ w1_t[0] = swap32 (w1_t[0]);
+ w1_t[1] = swap32 (w1_t[1]);
+ w1_t[2] = swap32 (w1_t[2]);
+ w1_t[3] = swap32 (w1_t[3]);
+ w2_t[0] = swap32 (w2_t[0]);
+ w2_t[1] = swap32 (w2_t[1]);
+ w2_t[2] = swap32 (w2_t[2]);
+ w2_t[3] = swap32 (w2_t[3]);
+ w3_t[0] = swap32 (w3_t[0]);
+ w3_t[1] = swap32 (w3_t[1]);
+ //w3_t[2] = swap32 (w3_t[2]);
+ //w3_t[3] = swap32 (w3_t[3]);
u32 a = SHA1M_A;
u32 b = SHA1M_B;
u32 w2_t[4];
u32 w3_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
- w2_t[0] = swap_workaround (w2[0]);
- w2_t[1] = swap_workaround (w2[1]);
- w2_t[2] = swap_workaround (w2[2]);
- w2_t[3] = swap_workaround (w2[3]);
- w3_t[0] = swap_workaround (w3[0]);
- w3_t[1] = swap_workaround (w3[1]);
- w3_t[2] = swap_workaround (w3[2]);
- w3_t[3] = swap_workaround (w3[3]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
+ w2_t[0] = swap32 (w2[0]);
+ w2_t[1] = swap32 (w2[1]);
+ w2_t[2] = swap32 (w2[2]);
+ w2_t[3] = swap32 (w2[3]);
+ w3_t[0] = swap32 (w3[0]);
+ w3_t[1] = swap32 (w3[1]);
+ w3_t[2] = swap32 (w3[2]);
+ w3_t[3] = swap32 (w3[3]);
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
w3_t[2] |= salt_buf3[2];
w3_t[3] |= salt_buf3[3];
- w0_t[0] = swap_workaround (w0_t[0]);
- w0_t[1] = swap_workaround (w0_t[1]);
- w0_t[2] = swap_workaround (w0_t[2]);
- w0_t[3] = swap_workaround (w0_t[3]);
- w1_t[0] = swap_workaround (w1_t[0]);
- w1_t[1] = swap_workaround (w1_t[1]);
- w1_t[2] = swap_workaround (w1_t[2]);
- w1_t[3] = swap_workaround (w1_t[3]);
- w2_t[0] = swap_workaround (w2_t[0]);
- w2_t[1] = swap_workaround (w2_t[1]);
- w2_t[2] = swap_workaround (w2_t[2]);
- w2_t[3] = swap_workaround (w2_t[3]);
- w3_t[0] = swap_workaround (w3_t[0]);
- w3_t[1] = swap_workaround (w3_t[1]);
- w3_t[2] = swap_workaround (w3_t[2]);
- w3_t[3] = swap_workaround (w3_t[3]);
+ w0_t[0] = swap32 (w0_t[0]);
+ w0_t[1] = swap32 (w0_t[1]);
+ w0_t[2] = swap32 (w0_t[2]);
+ w0_t[3] = swap32 (w0_t[3]);
+ w1_t[0] = swap32 (w1_t[0]);
+ w1_t[1] = swap32 (w1_t[1]);
+ w1_t[2] = swap32 (w1_t[2]);
+ w1_t[3] = swap32 (w1_t[3]);
+ w2_t[0] = swap32 (w2_t[0]);
+ w2_t[1] = swap32 (w2_t[1]);
+ w2_t[2] = swap32 (w2_t[2]);
+ w2_t[3] = swap32 (w2_t[3]);
+ w3_t[0] = swap32 (w3_t[0]);
+ w3_t[1] = swap32 (w3_t[1]);
+ w3_t[2] = swap32 (w3_t[2]);
+ w3_t[3] = swap32 (w3_t[3]);
/**
* loop
u32 w2_t[4];
u32 w3_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
- w2_t[0] = swap_workaround (w2[0]);
- w2_t[1] = swap_workaround (w2[1]);
- w2_t[2] = swap_workaround (w2[2]);
- w2_t[3] = swap_workaround (w2[3]);
- w3_t[0] = swap_workaround (w3[0]);
- w3_t[1] = swap_workaround (w3[1]);
- w3_t[2] = swap_workaround (w3[2]);
- w3_t[3] = swap_workaround (w3[3]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
+ w2_t[0] = swap32 (w2[0]);
+ w2_t[1] = swap32 (w2[1]);
+ w2_t[2] = swap32 (w2[2]);
+ w2_t[3] = swap32 (w2[3]);
+ w3_t[0] = swap32 (w3[0]);
+ w3_t[1] = swap32 (w3[1]);
+ w3_t[2] = swap32 (w3[2]);
+ w3_t[3] = swap32 (w3[3]);
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
w3_t[2] |= salt_buf3[2];
w3_t[3] |= salt_buf3[3];
- w0_t[0] = swap_workaround (w0_t[0]);
- w0_t[1] = swap_workaround (w0_t[1]);
- w0_t[2] = swap_workaround (w0_t[2]);
- w0_t[3] = swap_workaround (w0_t[3]);
- w1_t[0] = swap_workaround (w1_t[0]);
- w1_t[1] = swap_workaround (w1_t[1]);
- w1_t[2] = swap_workaround (w1_t[2]);
- w1_t[3] = swap_workaround (w1_t[3]);
- w2_t[0] = swap_workaround (w2_t[0]);
- w2_t[1] = swap_workaround (w2_t[1]);
- w2_t[2] = swap_workaround (w2_t[2]);
- w2_t[3] = swap_workaround (w2_t[3]);
- w3_t[0] = swap_workaround (w3_t[0]);
- w3_t[1] = swap_workaround (w3_t[1]);
- w3_t[2] = swap_workaround (w3_t[2]);
- w3_t[3] = swap_workaround (w3_t[3]);
+ w0_t[0] = swap32 (w0_t[0]);
+ w0_t[1] = swap32 (w0_t[1]);
+ w0_t[2] = swap32 (w0_t[2]);
+ w0_t[3] = swap32 (w0_t[3]);
+ w1_t[0] = swap32 (w1_t[0]);
+ w1_t[1] = swap32 (w1_t[1]);
+ w1_t[2] = swap32 (w1_t[2]);
+ w1_t[3] = swap32 (w1_t[3]);
+ w2_t[0] = swap32 (w2_t[0]);
+ w2_t[1] = swap32 (w2_t[1]);
+ w2_t[2] = swap32 (w2_t[2]);
+ w2_t[3] = swap32 (w2_t[3]);
+ w3_t[0] = swap32 (w3_t[0]);
+ w3_t[1] = swap32 (w3_t[1]);
+ w3_t[2] = swap32 (w3_t[2]);
+ w3_t[3] = swap32 (w3_t[3]);
/**
* loop
u32 w0_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
u32 w2_t[4];
hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad);
- w0_t[0] = swap_workaround (salt_buf0[0]);
- w0_t[1] = swap_workaround (salt_buf0[1]);
- w0_t[2] = swap_workaround (salt_buf0[2]);
- w0_t[3] = swap_workaround (salt_buf0[3]);
- w1_t[0] = swap_workaround (salt_buf1[0]);
- w1_t[1] = swap_workaround (salt_buf1[1]);
- w1_t[2] = swap_workaround (salt_buf1[2]);
- w1_t[3] = swap_workaround (salt_buf1[3]);
+ w0_t[0] = swap32 (salt_buf0[0]);
+ w0_t[1] = swap32 (salt_buf0[1]);
+ w0_t[2] = swap32 (salt_buf0[2]);
+ w0_t[3] = swap32 (salt_buf0[3]);
+ w1_t[0] = swap32 (salt_buf1[0]);
+ w1_t[1] = swap32 (salt_buf1[1]);
+ w1_t[2] = swap32 (salt_buf1[2]);
+ w1_t[3] = swap32 (salt_buf1[3]);
w2_t[0] = 0;
w2_t[1] = 0;
w2_t[2] = 0;
u32 w0_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
u32 w2_t[4];
hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad);
- w0_t[0] = swap_workaround (salt_buf0[0]);
- w0_t[1] = swap_workaround (salt_buf0[1]);
- w0_t[2] = swap_workaround (salt_buf0[2]);
- w0_t[3] = swap_workaround (salt_buf0[3]);
- w1_t[0] = swap_workaround (salt_buf1[0]);
- w1_t[1] = swap_workaround (salt_buf1[1]);
- w1_t[2] = swap_workaround (salt_buf1[2]);
- w1_t[3] = swap_workaround (salt_buf1[3]);
+ w0_t[0] = swap32 (salt_buf0[0]);
+ w0_t[1] = swap32 (salt_buf0[1]);
+ w0_t[2] = swap32 (salt_buf0[2]);
+ w0_t[3] = swap32 (salt_buf0[3]);
+ w1_t[0] = swap32 (salt_buf1[0]);
+ w1_t[1] = swap32 (salt_buf1[1]);
+ w1_t[2] = swap32 (salt_buf1[2]);
+ w1_t[3] = swap32 (salt_buf1[3]);
w2_t[0] = 0;
w2_t[1] = 0;
w2_t[2] = 0;
u32 w0_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
u32 w2_t[4];
hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad);
- w0_t[0] = swap_workaround (salt_buf0[0]);
- w0_t[1] = swap_workaround (salt_buf0[1]);
- w0_t[2] = swap_workaround (salt_buf0[2]);
- w0_t[3] = swap_workaround (salt_buf0[3]);
- w1_t[0] = swap_workaround (salt_buf1[0]);
- w1_t[1] = swap_workaround (salt_buf1[1]);
- w1_t[2] = swap_workaround (salt_buf1[2]);
- w1_t[3] = swap_workaround (salt_buf1[3]);
+ w0_t[0] = swap32 (salt_buf0[0]);
+ w0_t[1] = swap32 (salt_buf0[1]);
+ w0_t[2] = swap32 (salt_buf0[2]);
+ w0_t[3] = swap32 (salt_buf0[3]);
+ w1_t[0] = swap32 (salt_buf1[0]);
+ w1_t[1] = swap32 (salt_buf1[1]);
+ w1_t[2] = swap32 (salt_buf1[2]);
+ w1_t[3] = swap32 (salt_buf1[3]);
w2_t[0] = 0;
w2_t[1] = 0;
w2_t[2] = 0;
u32 w0_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
u32 w2_t[4];
hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad);
- w0_t[0] = swap_workaround (salt_buf0[0]);
- w0_t[1] = swap_workaround (salt_buf0[1]);
- w0_t[2] = swap_workaround (salt_buf0[2]);
- w0_t[3] = swap_workaround (salt_buf0[3]);
- w1_t[0] = swap_workaround (salt_buf1[0]);
- w1_t[1] = swap_workaround (salt_buf1[1]);
- w1_t[2] = swap_workaround (salt_buf1[2]);
- w1_t[3] = swap_workaround (salt_buf1[3]);
+ w0_t[0] = swap32 (salt_buf0[0]);
+ w0_t[1] = swap32 (salt_buf0[1]);
+ w0_t[2] = swap32 (salt_buf0[2]);
+ w0_t[3] = swap32 (salt_buf0[3]);
+ w1_t[0] = swap32 (salt_buf1[0]);
+ w1_t[1] = swap32 (salt_buf1[1]);
+ w1_t[2] = swap32 (salt_buf1[2]);
+ w1_t[3] = swap32 (salt_buf1[3]);
w2_t[0] = 0;
w2_t[1] = 0;
w2_t[2] = 0;
hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad);
- w0_t[0] = swap_workaround (salt_buf0[0]);
- w0_t[1] = swap_workaround (salt_buf0[1]);
- w0_t[2] = swap_workaround (salt_buf0[2]);
- w0_t[3] = swap_workaround (salt_buf0[3]);
- w1_t[0] = swap_workaround (salt_buf1[0]);
- w1_t[1] = swap_workaround (salt_buf1[1]);
- w1_t[2] = swap_workaround (salt_buf1[2]);
- w1_t[3] = swap_workaround (salt_buf1[3]);
+ w0_t[0] = swap32 (salt_buf0[0]);
+ w0_t[1] = swap32 (salt_buf0[1]);
+ w0_t[2] = swap32 (salt_buf0[2]);
+ w0_t[3] = swap32 (salt_buf0[3]);
+ w1_t[0] = swap32 (salt_buf1[0]);
+ w1_t[1] = swap32 (salt_buf1[1]);
+ w1_t[2] = swap32 (salt_buf1[2]);
+ w1_t[3] = swap32 (salt_buf1[3]);
w2_t[0] = 0;
w2_t[1] = 0;
w2_t[2] = 0;
hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad);
- w0_t[0] = swap_workaround (salt_buf0[0]);
- w0_t[1] = swap_workaround (salt_buf0[1]);
- w0_t[2] = swap_workaround (salt_buf0[2]);
- w0_t[3] = swap_workaround (salt_buf0[3]);
- w1_t[0] = swap_workaround (salt_buf1[0]);
- w1_t[1] = swap_workaround (salt_buf1[1]);
- w1_t[2] = swap_workaround (salt_buf1[2]);
- w1_t[3] = swap_workaround (salt_buf1[3]);
+ w0_t[0] = swap32 (salt_buf0[0]);
+ w0_t[1] = swap32 (salt_buf0[1]);
+ w0_t[2] = swap32 (salt_buf0[2]);
+ w0_t[3] = swap32 (salt_buf0[3]);
+ w1_t[0] = swap32 (salt_buf1[0]);
+ w1_t[1] = swap32 (salt_buf1[1]);
+ w1_t[2] = swap32 (salt_buf1[2]);
+ w1_t[3] = swap32 (salt_buf1[3]);
w2_t[0] = 0;
w2_t[1] = 0;
w2_t[2] = 0;
u32 w0_t[4];
- w0_t[0] = swap_workaround (salt_buf0[0]);
- w0_t[1] = swap_workaround (salt_buf0[1]);
- w0_t[2] = swap_workaround (salt_buf0[2]);
- w0_t[3] = swap_workaround (salt_buf0[3]);
+ w0_t[0] = swap32 (salt_buf0[0]);
+ w0_t[1] = swap32 (salt_buf0[1]);
+ w0_t[2] = swap32 (salt_buf0[2]);
+ w0_t[3] = swap32 (salt_buf0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (salt_buf1[0]);
- w1_t[1] = swap_workaround (salt_buf1[1]);
- w1_t[2] = swap_workaround (salt_buf1[2]);
- w1_t[3] = swap_workaround (salt_buf1[3]);
+ w1_t[0] = swap32 (salt_buf1[0]);
+ w1_t[1] = swap32 (salt_buf1[1]);
+ w1_t[2] = swap32 (salt_buf1[2]);
+ w1_t[3] = swap32 (salt_buf1[3]);
u32 w2_t[4];
append_0x80_2x4 (w0, w1, out_len);
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
w2_t[0] = 0;
w2_t[1] = 0;
w2_t[2] = 0;
u32 w0_t[4];
- w0_t[0] = swap_workaround (salt_buf0[0]);
- w0_t[1] = swap_workaround (salt_buf0[1]);
- w0_t[2] = swap_workaround (salt_buf0[2]);
- w0_t[3] = swap_workaround (salt_buf0[3]);
+ w0_t[0] = swap32 (salt_buf0[0]);
+ w0_t[1] = swap32 (salt_buf0[1]);
+ w0_t[2] = swap32 (salt_buf0[2]);
+ w0_t[3] = swap32 (salt_buf0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (salt_buf1[0]);
- w1_t[1] = swap_workaround (salt_buf1[1]);
- w1_t[2] = swap_workaround (salt_buf1[2]);
- w1_t[3] = swap_workaround (salt_buf1[3]);
+ w1_t[0] = swap32 (salt_buf1[0]);
+ w1_t[1] = swap32 (salt_buf1[1]);
+ w1_t[2] = swap32 (salt_buf1[2]);
+ w1_t[3] = swap32 (salt_buf1[3]);
u32 w2_t[4];
append_0x80_2x4 (w0, w1, out_len);
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
w2_t[0] = 0;
w2_t[1] = 0;
w2_t[2] = 0;
u32 w0_t[4];
- w0_t[0] = swap_workaround (salt_buf0[0]);
- w0_t[1] = swap_workaround (salt_buf0[1]);
- w0_t[2] = swap_workaround (salt_buf0[2]);
- w0_t[3] = swap_workaround (salt_buf0[3]);
+ w0_t[0] = swap32 (salt_buf0[0]);
+ w0_t[1] = swap32 (salt_buf0[1]);
+ w0_t[2] = swap32 (salt_buf0[2]);
+ w0_t[3] = swap32 (salt_buf0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (salt_buf1[0]);
- w1_t[1] = swap_workaround (salt_buf1[1]);
- w1_t[2] = swap_workaround (salt_buf1[2]);
- w1_t[3] = swap_workaround (salt_buf1[3]);
+ w1_t[0] = swap32 (salt_buf1[0]);
+ w1_t[1] = swap32 (salt_buf1[1]);
+ w1_t[2] = swap32 (salt_buf1[2]);
+ w1_t[3] = swap32 (salt_buf1[3]);
u32 w2_t[4];
append_0x80_4x4 (w0, w1, w2, w3, pw_len);
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
- w2_t[0] = swap_workaround (w2[0]);
- w2_t[1] = swap_workaround (w2[1]);
- w2_t[2] = swap_workaround (w2[2]);
- w2_t[3] = swap_workaround (w2[3]);
- w3_t[0] = swap_workaround (w3[0]);
- w3_t[1] = swap_workaround (w3[1]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
+ w2_t[0] = swap32 (w2[0]);
+ w2_t[1] = swap32 (w2[1]);
+ w2_t[2] = swap32 (w2[2]);
+ w2_t[3] = swap32 (w2[3]);
+ w3_t[0] = swap32 (w3[0]);
+ w3_t[1] = swap32 (w3[1]);
w3_t[2] = 0;
w3_t[3] = (64 + pw_len) * 8;
u32 w0_t[4];
- w0_t[0] = swap_workaround (salt_buf0[0]);
- w0_t[1] = swap_workaround (salt_buf0[1]);
- w0_t[2] = swap_workaround (salt_buf0[2]);
- w0_t[3] = swap_workaround (salt_buf0[3]);
+ w0_t[0] = swap32 (salt_buf0[0]);
+ w0_t[1] = swap32 (salt_buf0[1]);
+ w0_t[2] = swap32 (salt_buf0[2]);
+ w0_t[3] = swap32 (salt_buf0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (salt_buf1[0]);
- w1_t[1] = swap_workaround (salt_buf1[1]);
- w1_t[2] = swap_workaround (salt_buf1[2]);
- w1_t[3] = swap_workaround (salt_buf1[3]);
+ w1_t[0] = swap32 (salt_buf1[0]);
+ w1_t[1] = swap32 (salt_buf1[1]);
+ w1_t[2] = swap32 (salt_buf1[2]);
+ w1_t[3] = swap32 (salt_buf1[3]);
u32 w2_t[4];
append_0x80_4x4 (w0, w1, w2, w3, pw_len);
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
- w2_t[0] = swap_workaround (w2[0]);
- w2_t[1] = swap_workaround (w2[1]);
- w2_t[2] = swap_workaround (w2[2]);
- w2_t[3] = swap_workaround (w2[3]);
- w3_t[0] = swap_workaround (w3[0]);
- w3_t[1] = swap_workaround (w3[1]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
+ w2_t[0] = swap32 (w2[0]);
+ w2_t[1] = swap32 (w2[1]);
+ w2_t[2] = swap32 (w2[2]);
+ w2_t[3] = swap32 (w2[3]);
+ w3_t[0] = swap32 (w3[0]);
+ w3_t[1] = swap32 (w3[1]);
w3_t[2] = 0;
w3_t[3] = (64 + pw_len) * 8;
u32 w0_t[4];
- w0_t[0] = swap_workaround (salt_buf0[0]);
- w0_t[1] = swap_workaround (salt_buf0[1]);
- w0_t[2] = swap_workaround (salt_buf0[2]);
- w0_t[3] = swap_workaround (salt_buf0[3]);
+ w0_t[0] = swap32 (salt_buf0[0]);
+ w0_t[1] = swap32 (salt_buf0[1]);
+ w0_t[2] = swap32 (salt_buf0[2]);
+ w0_t[3] = swap32 (salt_buf0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (salt_buf1[0]);
- w1_t[1] = swap_workaround (salt_buf1[1]);
- w1_t[2] = swap_workaround (salt_buf1[2]);
- w1_t[3] = swap_workaround (salt_buf1[3]);
+ w1_t[0] = swap32 (salt_buf1[0]);
+ w1_t[1] = swap32 (salt_buf1[1]);
+ w1_t[2] = swap32 (salt_buf1[2]);
+ w1_t[3] = swap32 (salt_buf1[3]);
u32 w2_t[4];
u32 w0_t[4];
- w0_t[0] = swap_workaround (salt_buf0[0]);
- w0_t[1] = swap_workaround (salt_buf0[1]);
- w0_t[2] = swap_workaround (salt_buf0[2]);
- w0_t[3] = swap_workaround (salt_buf0[3]);
+ w0_t[0] = swap32 (salt_buf0[0]);
+ w0_t[1] = swap32 (salt_buf0[1]);
+ w0_t[2] = swap32 (salt_buf0[2]);
+ w0_t[3] = swap32 (salt_buf0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (salt_buf1[0]);
- w1_t[1] = swap_workaround (salt_buf1[1]);
- w1_t[2] = swap_workaround (salt_buf1[2]);
- w1_t[3] = swap_workaround (salt_buf1[3]);
+ w1_t[0] = swap32 (salt_buf1[0]);
+ w1_t[1] = swap32 (salt_buf1[1]);
+ w1_t[2] = swap32 (salt_buf1[2]);
+ w1_t[3] = swap32 (salt_buf1[3]);
u32 w2_t[4];
* sha1
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = out_len * 8;
* sha1
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = out_len * 8;
* sha1
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = pw_len * 8;
* sha1
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = pw_len * 8;
* sha1
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = out_len * 8;
* sha1
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = out_len * 8;
* sha1
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = pw_len * 8;
* sha1
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = pw_len * 8;
* SHA256
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
u32 w8_t = 0;
u32 w9_t = 0;
u32 wa_t = 0;
* SHA256
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
u32 w8_t = 0;
u32 w9_t = 0;
u32 wa_t = 0;
* SHA256
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = pw_len * 8;
* SHA256
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = pw_len * 8;
* sha256
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = out_salt_len * 8;
* sha256
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = out_salt_len * 8;
* sha256
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = pw_salt_len * 8;
* sha256
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = pw_salt_len * 8;
switch_buffer_by_offset (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len);
- w[ 0] |= swap_workaround (salt_buf0[0]);
- w[ 1] |= swap_workaround (salt_buf0[1]);
- w[ 2] |= swap_workaround (salt_buf0[2]);
- w[ 3] |= swap_workaround (salt_buf0[3]);
- w[ 4] |= swap_workaround (salt_buf1[0]);
- w[ 5] |= swap_workaround (salt_buf1[1]);
- w[ 6] |= swap_workaround (salt_buf1[2]);
- w[ 7] |= swap_workaround (salt_buf1[3]);
- w[ 8] |= swap_workaround (salt_buf2[0]);
- w[ 9] |= swap_workaround (salt_buf2[1]);
- w[10] |= swap_workaround (salt_buf2[2]);
- w[11] |= swap_workaround (salt_buf2[3]);
- w[12] |= swap_workaround (salt_buf3[0]);
- w[13] |= swap_workaround (salt_buf3[1]);
- w[14] |= swap_workaround (salt_buf3[2]);
- w[15] |= swap_workaround (salt_buf3[3]);
+ w[ 0] |= swap32 (salt_buf0[0]);
+ w[ 1] |= swap32 (salt_buf0[1]);
+ w[ 2] |= swap32 (salt_buf0[2]);
+ w[ 3] |= swap32 (salt_buf0[3]);
+ w[ 4] |= swap32 (salt_buf1[0]);
+ w[ 5] |= swap32 (salt_buf1[1]);
+ w[ 6] |= swap32 (salt_buf1[2]);
+ w[ 7] |= swap32 (salt_buf1[3]);
+ w[ 8] |= swap32 (salt_buf2[0]);
+ w[ 9] |= swap32 (salt_buf2[1]);
+ w[10] |= swap32 (salt_buf2[2]);
+ w[11] |= swap32 (salt_buf2[3]);
+ w[12] |= swap32 (salt_buf3[0]);
+ w[13] |= swap32 (salt_buf3[1]);
+ w[14] |= swap32 (salt_buf3[2]);
+ w[15] |= swap32 (salt_buf3[3]);
const u32 salt_len = salt_bufs[salt_pos].salt_len;
* sha256
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = out_salt_len * 8;
* sha256
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = out_salt_len * 8;
* sha256
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = pw_salt_len * 8;
* sha256
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = pw_salt_len * 8;
u32 w2_t2[4];
u32 w3_t2[4];
- w0_t2[0] = swap_workaround (w0[0]);
- w0_t2[1] = swap_workaround (w0[1]);
- w0_t2[2] = swap_workaround (w0[2]);
- w0_t2[3] = swap_workaround (w0[3]);
- w1_t2[0] = swap_workaround (w1[0]);
- w1_t2[1] = swap_workaround (w1[1]);
- w1_t2[2] = swap_workaround (w1[2]);
- w1_t2[3] = swap_workaround (w1[3]);
- w2_t2[0] = swap_workaround (w2[0]);
- w2_t2[1] = swap_workaround (w2[1]);
- w2_t2[2] = swap_workaround (w2[2]);
- w2_t2[3] = swap_workaround (w2[3]);
- w3_t2[0] = swap_workaround (w3[0]);
- w3_t2[1] = swap_workaround (w3[1]);
- w3_t2[2] = swap_workaround (w3[2]);
- w3_t2[3] = swap_workaround (w3[3]);
+ w0_t2[0] = swap32 (w0[0]);
+ w0_t2[1] = swap32 (w0[1]);
+ w0_t2[2] = swap32 (w0[2]);
+ w0_t2[3] = swap32 (w0[3]);
+ w1_t2[0] = swap32 (w1[0]);
+ w1_t2[1] = swap32 (w1[1]);
+ w1_t2[2] = swap32 (w1[2]);
+ w1_t2[3] = swap32 (w1[3]);
+ w2_t2[0] = swap32 (w2[0]);
+ w2_t2[1] = swap32 (w2[1]);
+ w2_t2[2] = swap32 (w2[2]);
+ w2_t2[3] = swap32 (w2[3]);
+ w3_t2[0] = swap32 (w3[0]);
+ w3_t2[1] = swap32 (w3[1]);
+ w3_t2[2] = swap32 (w3[2]);
+ w3_t2[3] = swap32 (w3[3]);
switch_buffer_by_offset (w0_t2, w1_t2, w2_t2, w3_t2, salt_len);
* sha256
*/
- u32 w0_t = swap_workaround (w0_t2[0]);
- u32 w1_t = swap_workaround (w0_t2[1]);
- u32 w2_t = swap_workaround (w0_t2[2]);
- u32 w3_t = swap_workaround (w0_t2[3]);
- u32 w4_t = swap_workaround (w1_t2[0]);
- u32 w5_t = swap_workaround (w1_t2[1]);
- u32 w6_t = swap_workaround (w1_t2[2]);
- u32 w7_t = swap_workaround (w1_t2[3]);
- u32 w8_t = swap_workaround (w2_t2[0]);
- u32 w9_t = swap_workaround (w2_t2[1]);
- u32 wa_t = swap_workaround (w2_t2[2]);
- u32 wb_t = swap_workaround (w2_t2[3]);
- u32 wc_t = swap_workaround (w3_t2[0]);
- u32 wd_t = swap_workaround (w3_t2[1]);
+ u32 w0_t = swap32 (w0_t2[0]);
+ u32 w1_t = swap32 (w0_t2[1]);
+ u32 w2_t = swap32 (w0_t2[2]);
+ u32 w3_t = swap32 (w0_t2[3]);
+ u32 w4_t = swap32 (w1_t2[0]);
+ u32 w5_t = swap32 (w1_t2[1]);
+ u32 w6_t = swap32 (w1_t2[2]);
+ u32 w7_t = swap32 (w1_t2[3]);
+ u32 w8_t = swap32 (w2_t2[0]);
+ u32 w9_t = swap32 (w2_t2[1]);
+ u32 wa_t = swap32 (w2_t2[2]);
+ u32 wb_t = swap32 (w2_t2[3]);
+ u32 wc_t = swap32 (w3_t2[0]);
+ u32 wd_t = swap32 (w3_t2[1]);
u32 we_t = 0;
u32 wf_t = pw_salt_len * 8;
u32 w2_t2[4];
u32 w3_t2[4];
- w0_t2[0] = swap_workaround (w0[0]);
- w0_t2[1] = swap_workaround (w0[1]);
- w0_t2[2] = swap_workaround (w0[2]);
- w0_t2[3] = swap_workaround (w0[3]);
- w1_t2[0] = swap_workaround (w1[0]);
- w1_t2[1] = swap_workaround (w1[1]);
- w1_t2[2] = swap_workaround (w1[2]);
- w1_t2[3] = swap_workaround (w1[3]);
- w2_t2[0] = swap_workaround (w2[0]);
- w2_t2[1] = swap_workaround (w2[1]);
- w2_t2[2] = swap_workaround (w2[2]);
- w2_t2[3] = swap_workaround (w2[3]);
- w3_t2[0] = swap_workaround (w3[0]);
- w3_t2[1] = swap_workaround (w3[1]);
- w3_t2[2] = swap_workaround (w3[2]);
- w3_t2[3] = swap_workaround (w3[3]);
+ w0_t2[0] = swap32 (w0[0]);
+ w0_t2[1] = swap32 (w0[1]);
+ w0_t2[2] = swap32 (w0[2]);
+ w0_t2[3] = swap32 (w0[3]);
+ w1_t2[0] = swap32 (w1[0]);
+ w1_t2[1] = swap32 (w1[1]);
+ w1_t2[2] = swap32 (w1[2]);
+ w1_t2[3] = swap32 (w1[3]);
+ w2_t2[0] = swap32 (w2[0]);
+ w2_t2[1] = swap32 (w2[1]);
+ w2_t2[2] = swap32 (w2[2]);
+ w2_t2[3] = swap32 (w2[3]);
+ w3_t2[0] = swap32 (w3[0]);
+ w3_t2[1] = swap32 (w3[1]);
+ w3_t2[2] = swap32 (w3[2]);
+ w3_t2[3] = swap32 (w3[3]);
switch_buffer_by_offset (w0_t2, w1_t2, w2_t2, w3_t2, salt_len);
* sha256
*/
- u32 w0_t = swap_workaround (w0_t2[0]);
- u32 w1_t = swap_workaround (w0_t2[1]);
- u32 w2_t = swap_workaround (w0_t2[2]);
- u32 w3_t = swap_workaround (w0_t2[3]);
- u32 w4_t = swap_workaround (w1_t2[0]);
- u32 w5_t = swap_workaround (w1_t2[1]);
- u32 w6_t = swap_workaround (w1_t2[2]);
- u32 w7_t = swap_workaround (w1_t2[3]);
- u32 w8_t = swap_workaround (w2_t2[0]);
- u32 w9_t = swap_workaround (w2_t2[1]);
- u32 wa_t = swap_workaround (w2_t2[2]);
- u32 wb_t = swap_workaround (w2_t2[3]);
- u32 wc_t = swap_workaround (w3_t2[0]);
- u32 wd_t = swap_workaround (w3_t2[1]);
+ u32 w0_t = swap32 (w0_t2[0]);
+ u32 w1_t = swap32 (w0_t2[1]);
+ u32 w2_t = swap32 (w0_t2[2]);
+ u32 w3_t = swap32 (w0_t2[3]);
+ u32 w4_t = swap32 (w1_t2[0]);
+ u32 w5_t = swap32 (w1_t2[1]);
+ u32 w6_t = swap32 (w1_t2[2]);
+ u32 w7_t = swap32 (w1_t2[3]);
+ u32 w8_t = swap32 (w2_t2[0]);
+ u32 w9_t = swap32 (w2_t2[1]);
+ u32 wa_t = swap32 (w2_t2[2]);
+ u32 wb_t = swap32 (w2_t2[3]);
+ u32 wc_t = swap32 (w3_t2[0]);
+ u32 wd_t = swap32 (w3_t2[1]);
u32 we_t = 0;
u32 wf_t = pw_salt_len * 8;
* sha256
*/
- u32 w0_t = swap_workaround (w0_t2[0]);
- u32 w1_t = swap_workaround (w0_t2[1]);
- u32 w2_t = swap_workaround (w0_t2[2]);
- u32 w3_t = swap_workaround (w0_t2[3]);
- u32 w4_t = swap_workaround (w1_t2[0]);
- u32 w5_t = swap_workaround (w1_t2[1]);
- u32 w6_t = swap_workaround (w1_t2[2]);
- u32 w7_t = swap_workaround (w1_t2[3]);
- u32 w8_t = swap_workaround (w2_t2[0]);
- u32 w9_t = swap_workaround (w2_t2[1]);
- u32 wa_t = swap_workaround (w2_t2[2]);
- u32 wb_t = swap_workaround (w2_t2[3]);
- u32 wc_t = swap_workaround (w3_t2[0]);
- u32 wd_t = swap_workaround (w3_t2[1]);
+ u32 w0_t = swap32 (w0_t2[0]);
+ u32 w1_t = swap32 (w0_t2[1]);
+ u32 w2_t = swap32 (w0_t2[2]);
+ u32 w3_t = swap32 (w0_t2[3]);
+ u32 w4_t = swap32 (w1_t2[0]);
+ u32 w5_t = swap32 (w1_t2[1]);
+ u32 w6_t = swap32 (w1_t2[2]);
+ u32 w7_t = swap32 (w1_t2[3]);
+ u32 w8_t = swap32 (w2_t2[0]);
+ u32 w9_t = swap32 (w2_t2[1]);
+ u32 wa_t = swap32 (w2_t2[2]);
+ u32 wb_t = swap32 (w2_t2[3]);
+ u32 wc_t = swap32 (w3_t2[0]);
+ u32 wd_t = swap32 (w3_t2[1]);
u32 we_t = 0;
u32 wf_t = out_salt_len * 8;
* sha256
*/
- u32 w0_t = swap_workaround (w0_t2[0]);
- u32 w1_t = swap_workaround (w0_t2[1]);
- u32 w2_t = swap_workaround (w0_t2[2]);
- u32 w3_t = swap_workaround (w0_t2[3]);
- u32 w4_t = swap_workaround (w1_t2[0]);
- u32 w5_t = swap_workaround (w1_t2[1]);
- u32 w6_t = swap_workaround (w1_t2[2]);
- u32 w7_t = swap_workaround (w1_t2[3]);
- u32 w8_t = swap_workaround (w2_t2[0]);
- u32 w9_t = swap_workaround (w2_t2[1]);
- u32 wa_t = swap_workaround (w2_t2[2]);
- u32 wb_t = swap_workaround (w2_t2[3]);
- u32 wc_t = swap_workaround (w3_t2[0]);
- u32 wd_t = swap_workaround (w3_t2[1]);
+ u32 w0_t = swap32 (w0_t2[0]);
+ u32 w1_t = swap32 (w0_t2[1]);
+ u32 w2_t = swap32 (w0_t2[2]);
+ u32 w3_t = swap32 (w0_t2[3]);
+ u32 w4_t = swap32 (w1_t2[0]);
+ u32 w5_t = swap32 (w1_t2[1]);
+ u32 w6_t = swap32 (w1_t2[2]);
+ u32 w7_t = swap32 (w1_t2[3]);
+ u32 w8_t = swap32 (w2_t2[0]);
+ u32 w9_t = swap32 (w2_t2[1]);
+ u32 wa_t = swap32 (w2_t2[2]);
+ u32 wb_t = swap32 (w2_t2[3]);
+ u32 wc_t = swap32 (w3_t2[0]);
+ u32 wd_t = swap32 (w3_t2[1]);
u32 we_t = 0;
u32 wf_t = out_salt_len * 8;
* sha256
*/
- u32 w0_t = swap_workaround (w0_t2[0]);
- u32 w1_t = swap_workaround (w0_t2[1]);
- u32 w2_t = swap_workaround (w0_t2[2]);
- u32 w3_t = swap_workaround (w0_t2[3]);
- u32 w4_t = swap_workaround (w1_t2[0]);
- u32 w5_t = swap_workaround (w1_t2[1]);
- u32 w6_t = swap_workaround (w1_t2[2]);
- u32 w7_t = swap_workaround (w1_t2[3]);
- u32 w8_t = swap_workaround (w2_t2[0]);
- u32 w9_t = swap_workaround (w2_t2[1]);
- u32 wa_t = swap_workaround (w2_t2[2]);
- u32 wb_t = swap_workaround (w2_t2[3]);
- u32 wc_t = swap_workaround (w3_t2[0]);
- u32 wd_t = swap_workaround (w3_t2[1]);
+ u32 w0_t = swap32 (w0_t2[0]);
+ u32 w1_t = swap32 (w0_t2[1]);
+ u32 w2_t = swap32 (w0_t2[2]);
+ u32 w3_t = swap32 (w0_t2[3]);
+ u32 w4_t = swap32 (w1_t2[0]);
+ u32 w5_t = swap32 (w1_t2[1]);
+ u32 w6_t = swap32 (w1_t2[2]);
+ u32 w7_t = swap32 (w1_t2[3]);
+ u32 w8_t = swap32 (w2_t2[0]);
+ u32 w9_t = swap32 (w2_t2[1]);
+ u32 wa_t = swap32 (w2_t2[2]);
+ u32 wb_t = swap32 (w2_t2[3]);
+ u32 wc_t = swap32 (w3_t2[0]);
+ u32 wd_t = swap32 (w3_t2[1]);
u32 we_t = 0;
u32 wf_t = pw_salt_len * 8;
* sha256
*/
- u32 w0_t = swap_workaround (w0_t2[0]);
- u32 w1_t = swap_workaround (w0_t2[1]);
- u32 w2_t = swap_workaround (w0_t2[2]);
- u32 w3_t = swap_workaround (w0_t2[3]);
- u32 w4_t = swap_workaround (w1_t2[0]);
- u32 w5_t = swap_workaround (w1_t2[1]);
- u32 w6_t = swap_workaround (w1_t2[2]);
- u32 w7_t = swap_workaround (w1_t2[3]);
- u32 w8_t = swap_workaround (w2_t2[0]);
- u32 w9_t = swap_workaround (w2_t2[1]);
- u32 wa_t = swap_workaround (w2_t2[2]);
- u32 wb_t = swap_workaround (w2_t2[3]);
- u32 wc_t = swap_workaround (w3_t2[0]);
- u32 wd_t = swap_workaround (w3_t2[1]);
+ u32 w0_t = swap32 (w0_t2[0]);
+ u32 w1_t = swap32 (w0_t2[1]);
+ u32 w2_t = swap32 (w0_t2[2]);
+ u32 w3_t = swap32 (w0_t2[3]);
+ u32 w4_t = swap32 (w1_t2[0]);
+ u32 w5_t = swap32 (w1_t2[1]);
+ u32 w6_t = swap32 (w1_t2[2]);
+ u32 w7_t = swap32 (w1_t2[3]);
+ u32 w8_t = swap32 (w2_t2[0]);
+ u32 w9_t = swap32 (w2_t2[1]);
+ u32 wa_t = swap32 (w2_t2[2]);
+ u32 wb_t = swap32 (w2_t2[3]);
+ u32 wc_t = swap32 (w3_t2[0]);
+ u32 wd_t = swap32 (w3_t2[1]);
u32 we_t = 0;
u32 wf_t = pw_salt_len * 8;
switch_buffer_by_offset (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len);
- w[ 0] |= swap_workaround (salt_buf0[0]);
- w[ 1] |= swap_workaround (salt_buf0[1]);
- w[ 2] |= swap_workaround (salt_buf0[2]);
- w[ 3] |= swap_workaround (salt_buf0[3]);
- w[ 4] |= swap_workaround (salt_buf1[0]);
- w[ 5] |= swap_workaround (salt_buf1[1]);
- w[ 6] |= swap_workaround (salt_buf1[2]);
- w[ 7] |= swap_workaround (salt_buf1[3]);
- w[ 8] |= swap_workaround (salt_buf2[0]);
- w[ 9] |= swap_workaround (salt_buf2[1]);
- w[10] |= swap_workaround (salt_buf2[2]);
- w[11] |= swap_workaround (salt_buf2[3]);
- w[12] |= swap_workaround (salt_buf3[0]);
- w[13] |= swap_workaround (salt_buf3[1]);
- w[14] |= swap_workaround (salt_buf3[2]);
- w[15] |= swap_workaround (salt_buf3[3]);
+ w[ 0] |= swap32 (salt_buf0[0]);
+ w[ 1] |= swap32 (salt_buf0[1]);
+ w[ 2] |= swap32 (salt_buf0[2]);
+ w[ 3] |= swap32 (salt_buf0[3]);
+ w[ 4] |= swap32 (salt_buf1[0]);
+ w[ 5] |= swap32 (salt_buf1[1]);
+ w[ 6] |= swap32 (salt_buf1[2]);
+ w[ 7] |= swap32 (salt_buf1[3]);
+ w[ 8] |= swap32 (salt_buf2[0]);
+ w[ 9] |= swap32 (salt_buf2[1]);
+ w[10] |= swap32 (salt_buf2[2]);
+ w[11] |= swap32 (salt_buf2[3]);
+ w[12] |= swap32 (salt_buf3[0]);
+ w[13] |= swap32 (salt_buf3[1]);
+ w[14] |= swap32 (salt_buf3[2]);
+ w[15] |= swap32 (salt_buf3[3]);
const u32 salt_len = salt_bufs[salt_pos].salt_len;
* sha256
*/
- u32 w0_t = swap_workaround (w0_t2[0]);
- u32 w1_t = swap_workaround (w0_t2[1]);
- u32 w2_t = swap_workaround (w0_t2[2]);
- u32 w3_t = swap_workaround (w0_t2[3]);
- u32 w4_t = swap_workaround (w1_t2[0]);
- u32 w5_t = swap_workaround (w1_t2[1]);
- u32 w6_t = swap_workaround (w1_t2[2]);
- u32 w7_t = swap_workaround (w1_t2[3]);
- u32 w8_t = swap_workaround (w2_t2[0]);
- u32 w9_t = swap_workaround (w2_t2[1]);
- u32 wa_t = swap_workaround (w2_t2[2]);
- u32 wb_t = swap_workaround (w2_t2[3]);
- u32 wc_t = swap_workaround (w3_t2[0]);
- u32 wd_t = swap_workaround (w3_t2[1]);
+ u32 w0_t = swap32 (w0_t2[0]);
+ u32 w1_t = swap32 (w0_t2[1]);
+ u32 w2_t = swap32 (w0_t2[2]);
+ u32 w3_t = swap32 (w0_t2[3]);
+ u32 w4_t = swap32 (w1_t2[0]);
+ u32 w5_t = swap32 (w1_t2[1]);
+ u32 w6_t = swap32 (w1_t2[2]);
+ u32 w7_t = swap32 (w1_t2[3]);
+ u32 w8_t = swap32 (w2_t2[0]);
+ u32 w9_t = swap32 (w2_t2[1]);
+ u32 wa_t = swap32 (w2_t2[2]);
+ u32 wb_t = swap32 (w2_t2[3]);
+ u32 wc_t = swap32 (w3_t2[0]);
+ u32 wd_t = swap32 (w3_t2[1]);
u32 we_t = 0;
u32 wf_t = out_salt_len * 8;
* sha256
*/
- u32 w0_t = swap_workaround (w0_t2[0]);
- u32 w1_t = swap_workaround (w0_t2[1]);
- u32 w2_t = swap_workaround (w0_t2[2]);
- u32 w3_t = swap_workaround (w0_t2[3]);
- u32 w4_t = swap_workaround (w1_t2[0]);
- u32 w5_t = swap_workaround (w1_t2[1]);
- u32 w6_t = swap_workaround (w1_t2[2]);
- u32 w7_t = swap_workaround (w1_t2[3]);
- u32 w8_t = swap_workaround (w2_t2[0]);
- u32 w9_t = swap_workaround (w2_t2[1]);
- u32 wa_t = swap_workaround (w2_t2[2]);
- u32 wb_t = swap_workaround (w2_t2[3]);
- u32 wc_t = swap_workaround (w3_t2[0]);
- u32 wd_t = swap_workaround (w3_t2[1]);
+ u32 w0_t = swap32 (w0_t2[0]);
+ u32 w1_t = swap32 (w0_t2[1]);
+ u32 w2_t = swap32 (w0_t2[2]);
+ u32 w3_t = swap32 (w0_t2[3]);
+ u32 w4_t = swap32 (w1_t2[0]);
+ u32 w5_t = swap32 (w1_t2[1]);
+ u32 w6_t = swap32 (w1_t2[2]);
+ u32 w7_t = swap32 (w1_t2[3]);
+ u32 w8_t = swap32 (w2_t2[0]);
+ u32 w9_t = swap32 (w2_t2[1]);
+ u32 wa_t = swap32 (w2_t2[2]);
+ u32 wb_t = swap32 (w2_t2[3]);
+ u32 wc_t = swap32 (w3_t2[0]);
+ u32 wd_t = swap32 (w3_t2[1]);
u32 we_t = 0;
u32 wf_t = out_salt_len * 8;
* sha256
*/
- u32 w0_t = swap_workaround (w0_t2[0]);
- u32 w1_t = swap_workaround (w0_t2[1]);
- u32 w2_t = swap_workaround (w0_t2[2]);
- u32 w3_t = swap_workaround (w0_t2[3]);
- u32 w4_t = swap_workaround (w1_t2[0]);
- u32 w5_t = swap_workaround (w1_t2[1]);
- u32 w6_t = swap_workaround (w1_t2[2]);
- u32 w7_t = swap_workaround (w1_t2[3]);
- u32 w8_t = swap_workaround (w2_t2[0]);
- u32 w9_t = swap_workaround (w2_t2[1]);
- u32 wa_t = swap_workaround (w2_t2[2]);
- u32 wb_t = swap_workaround (w2_t2[3]);
- u32 wc_t = swap_workaround (w3_t2[0]);
- u32 wd_t = swap_workaround (w3_t2[1]);
+ u32 w0_t = swap32 (w0_t2[0]);
+ u32 w1_t = swap32 (w0_t2[1]);
+ u32 w2_t = swap32 (w0_t2[2]);
+ u32 w3_t = swap32 (w0_t2[3]);
+ u32 w4_t = swap32 (w1_t2[0]);
+ u32 w5_t = swap32 (w1_t2[1]);
+ u32 w6_t = swap32 (w1_t2[2]);
+ u32 w7_t = swap32 (w1_t2[3]);
+ u32 w8_t = swap32 (w2_t2[0]);
+ u32 w9_t = swap32 (w2_t2[1]);
+ u32 wa_t = swap32 (w2_t2[2]);
+ u32 wb_t = swap32 (w2_t2[3]);
+ u32 wc_t = swap32 (w3_t2[0]);
+ u32 wd_t = swap32 (w3_t2[1]);
u32 we_t = 0;
u32 wf_t = pw_salt_len * 8;
* sha256
*/
- u32 w0_t = swap_workaround (w0_t2[0]);
- u32 w1_t = swap_workaround (w0_t2[1]);
- u32 w2_t = swap_workaround (w0_t2[2]);
- u32 w3_t = swap_workaround (w0_t2[3]);
- u32 w4_t = swap_workaround (w1_t2[0]);
- u32 w5_t = swap_workaround (w1_t2[1]);
- u32 w6_t = swap_workaround (w1_t2[2]);
- u32 w7_t = swap_workaround (w1_t2[3]);
- u32 w8_t = swap_workaround (w2_t2[0]);
- u32 w9_t = swap_workaround (w2_t2[1]);
- u32 wa_t = swap_workaround (w2_t2[2]);
- u32 wb_t = swap_workaround (w2_t2[3]);
- u32 wc_t = swap_workaround (w3_t2[0]);
- u32 wd_t = swap_workaround (w3_t2[1]);
+ u32 w0_t = swap32 (w0_t2[0]);
+ u32 w1_t = swap32 (w0_t2[1]);
+ u32 w2_t = swap32 (w0_t2[2]);
+ u32 w3_t = swap32 (w0_t2[3]);
+ u32 w4_t = swap32 (w1_t2[0]);
+ u32 w5_t = swap32 (w1_t2[1]);
+ u32 w6_t = swap32 (w1_t2[2]);
+ u32 w7_t = swap32 (w1_t2[3]);
+ u32 w8_t = swap32 (w2_t2[0]);
+ u32 w9_t = swap32 (w2_t2[1]);
+ u32 wa_t = swap32 (w2_t2[2]);
+ u32 wb_t = swap32 (w2_t2[3]);
+ u32 wc_t = swap32 (w3_t2[0]);
+ u32 wd_t = swap32 (w3_t2[1]);
u32 we_t = 0;
u32 wf_t = pw_salt_len * 8;
u32 w2_t2[4];
u32 w3_t2[4];
- w0_t2[0] = swap_workaround (w0[0]);
- w0_t2[1] = swap_workaround (w0[1]);
- w0_t2[2] = swap_workaround (w0[2]);
- w0_t2[3] = swap_workaround (w0[3]);
- w1_t2[0] = swap_workaround (w1[0]);
- w1_t2[1] = swap_workaround (w1[1]);
- w1_t2[2] = swap_workaround (w1[2]);
- w1_t2[3] = swap_workaround (w1[3]);
- w2_t2[0] = swap_workaround (w2[0]);
- w2_t2[1] = swap_workaround (w2[1]);
- w2_t2[2] = swap_workaround (w2[2]);
- w2_t2[3] = swap_workaround (w2[3]);
- w3_t2[0] = swap_workaround (w3[0]);
- w3_t2[1] = swap_workaround (w3[1]);
- w3_t2[2] = swap_workaround (w3[2]);
- w3_t2[3] = swap_workaround (w3[3]);
+ w0_t2[0] = swap32 (w0[0]);
+ w0_t2[1] = swap32 (w0[1]);
+ w0_t2[2] = swap32 (w0[2]);
+ w0_t2[3] = swap32 (w0[3]);
+ w1_t2[0] = swap32 (w1[0]);
+ w1_t2[1] = swap32 (w1[1]);
+ w1_t2[2] = swap32 (w1[2]);
+ w1_t2[3] = swap32 (w1[3]);
+ w2_t2[0] = swap32 (w2[0]);
+ w2_t2[1] = swap32 (w2[1]);
+ w2_t2[2] = swap32 (w2[2]);
+ w2_t2[3] = swap32 (w2[3]);
+ w3_t2[0] = swap32 (w3[0]);
+ w3_t2[1] = swap32 (w3[1]);
+ w3_t2[2] = swap32 (w3[2]);
+ w3_t2[3] = swap32 (w3[3]);
switch_buffer_by_offset (w0_t2, w1_t2, w2_t2, w3_t2, salt_len);
* sha256
*/
- u32 w0_t = swap_workaround (w0_t2[0]);
- u32 w1_t = swap_workaround (w0_t2[1]);
- u32 w2_t = swap_workaround (w0_t2[2]);
- u32 w3_t = swap_workaround (w0_t2[3]);
- u32 w4_t = swap_workaround (w1_t2[0]);
- u32 w5_t = swap_workaround (w1_t2[1]);
- u32 w6_t = swap_workaround (w1_t2[2]);
- u32 w7_t = swap_workaround (w1_t2[3]);
- u32 w8_t = swap_workaround (w2_t2[0]);
- u32 w9_t = swap_workaround (w2_t2[1]);
- u32 wa_t = swap_workaround (w2_t2[2]);
- u32 wb_t = swap_workaround (w2_t2[3]);
- u32 wc_t = swap_workaround (w3_t2[0]);
- u32 wd_t = swap_workaround (w3_t2[1]);
+ u32 w0_t = swap32 (w0_t2[0]);
+ u32 w1_t = swap32 (w0_t2[1]);
+ u32 w2_t = swap32 (w0_t2[2]);
+ u32 w3_t = swap32 (w0_t2[3]);
+ u32 w4_t = swap32 (w1_t2[0]);
+ u32 w5_t = swap32 (w1_t2[1]);
+ u32 w6_t = swap32 (w1_t2[2]);
+ u32 w7_t = swap32 (w1_t2[3]);
+ u32 w8_t = swap32 (w2_t2[0]);
+ u32 w9_t = swap32 (w2_t2[1]);
+ u32 wa_t = swap32 (w2_t2[2]);
+ u32 wb_t = swap32 (w2_t2[3]);
+ u32 wc_t = swap32 (w3_t2[0]);
+ u32 wd_t = swap32 (w3_t2[1]);
u32 we_t = 0;
u32 wf_t = pw_salt_len * 8;
u32 w2_t2[4];
u32 w3_t2[4];
- w0_t2[0] = swap_workaround (w0[0]);
- w0_t2[1] = swap_workaround (w0[1]);
- w0_t2[2] = swap_workaround (w0[2]);
- w0_t2[3] = swap_workaround (w0[3]);
- w1_t2[0] = swap_workaround (w1[0]);
- w1_t2[1] = swap_workaround (w1[1]);
- w1_t2[2] = swap_workaround (w1[2]);
- w1_t2[3] = swap_workaround (w1[3]);
- w2_t2[0] = swap_workaround (w2[0]);
- w2_t2[1] = swap_workaround (w2[1]);
- w2_t2[2] = swap_workaround (w2[2]);
- w2_t2[3] = swap_workaround (w2[3]);
- w3_t2[0] = swap_workaround (w3[0]);
- w3_t2[1] = swap_workaround (w3[1]);
- w3_t2[2] = swap_workaround (w3[2]);
- w3_t2[3] = swap_workaround (w3[3]);
+ w0_t2[0] = swap32 (w0[0]);
+ w0_t2[1] = swap32 (w0[1]);
+ w0_t2[2] = swap32 (w0[2]);
+ w0_t2[3] = swap32 (w0[3]);
+ w1_t2[0] = swap32 (w1[0]);
+ w1_t2[1] = swap32 (w1[1]);
+ w1_t2[2] = swap32 (w1[2]);
+ w1_t2[3] = swap32 (w1[3]);
+ w2_t2[0] = swap32 (w2[0]);
+ w2_t2[1] = swap32 (w2[1]);
+ w2_t2[2] = swap32 (w2[2]);
+ w2_t2[3] = swap32 (w2[3]);
+ w3_t2[0] = swap32 (w3[0]);
+ w3_t2[1] = swap32 (w3[1]);
+ w3_t2[2] = swap32 (w3[2]);
+ w3_t2[3] = swap32 (w3[3]);
switch_buffer_by_offset (w0_t2, w1_t2, w2_t2, w3_t2, salt_len);
* sha256
*/
- u32 w0_t = swap_workaround (w0_t2[0]);
- u32 w1_t = swap_workaround (w0_t2[1]);
- u32 w2_t = swap_workaround (w0_t2[2]);
- u32 w3_t = swap_workaround (w0_t2[3]);
- u32 w4_t = swap_workaround (w1_t2[0]);
- u32 w5_t = swap_workaround (w1_t2[1]);
- u32 w6_t = swap_workaround (w1_t2[2]);
- u32 w7_t = swap_workaround (w1_t2[3]);
- u32 w8_t = swap_workaround (w2_t2[0]);
- u32 w9_t = swap_workaround (w2_t2[1]);
- u32 wa_t = swap_workaround (w2_t2[2]);
- u32 wb_t = swap_workaround (w2_t2[3]);
- u32 wc_t = swap_workaround (w3_t2[0]);
- u32 wd_t = swap_workaround (w3_t2[1]);
+ u32 w0_t = swap32 (w0_t2[0]);
+ u32 w1_t = swap32 (w0_t2[1]);
+ u32 w2_t = swap32 (w0_t2[2]);
+ u32 w3_t = swap32 (w0_t2[3]);
+ u32 w4_t = swap32 (w1_t2[0]);
+ u32 w5_t = swap32 (w1_t2[1]);
+ u32 w6_t = swap32 (w1_t2[2]);
+ u32 w7_t = swap32 (w1_t2[3]);
+ u32 w8_t = swap32 (w2_t2[0]);
+ u32 w9_t = swap32 (w2_t2[1]);
+ u32 wa_t = swap32 (w2_t2[2]);
+ u32 wb_t = swap32 (w2_t2[3]);
+ u32 wc_t = swap32 (w3_t2[0]);
+ u32 wd_t = swap32 (w3_t2[1]);
u32 we_t = 0;
u32 wf_t = pw_salt_len * 8;
u32 w0_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
u32 w2_t[4];
hmac_sha256_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad);
- w0_t[0] = swap_workaround (salt_buf0[0]);
- w0_t[1] = swap_workaround (salt_buf0[1]);
- w0_t[2] = swap_workaround (salt_buf0[2]);
- w0_t[3] = swap_workaround (salt_buf0[3]);
- w1_t[0] = swap_workaround (salt_buf1[0]);
- w1_t[1] = swap_workaround (salt_buf1[1]);
- w1_t[2] = swap_workaround (salt_buf1[2]);
- w1_t[3] = swap_workaround (salt_buf1[3]);
+ w0_t[0] = swap32 (salt_buf0[0]);
+ w0_t[1] = swap32 (salt_buf0[1]);
+ w0_t[2] = swap32 (salt_buf0[2]);
+ w0_t[3] = swap32 (salt_buf0[3]);
+ w1_t[0] = swap32 (salt_buf1[0]);
+ w1_t[1] = swap32 (salt_buf1[1]);
+ w1_t[2] = swap32 (salt_buf1[2]);
+ w1_t[3] = swap32 (salt_buf1[3]);
w2_t[0] = 0;
w2_t[1] = 0;
w2_t[2] = 0;
u32 w0_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
u32 w2_t[4];
hmac_sha256_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad);
- w0_t[0] = swap_workaround (salt_buf0[0]);
- w0_t[1] = swap_workaround (salt_buf0[1]);
- w0_t[2] = swap_workaround (salt_buf0[2]);
- w0_t[3] = swap_workaround (salt_buf0[3]);
- w1_t[0] = swap_workaround (salt_buf1[0]);
- w1_t[1] = swap_workaround (salt_buf1[1]);
- w1_t[2] = swap_workaround (salt_buf1[2]);
- w1_t[3] = swap_workaround (salt_buf1[3]);
+ w0_t[0] = swap32 (salt_buf0[0]);
+ w0_t[1] = swap32 (salt_buf0[1]);
+ w0_t[2] = swap32 (salt_buf0[2]);
+ w0_t[3] = swap32 (salt_buf0[3]);
+ w1_t[0] = swap32 (salt_buf1[0]);
+ w1_t[1] = swap32 (salt_buf1[1]);
+ w1_t[2] = swap32 (salt_buf1[2]);
+ w1_t[3] = swap32 (salt_buf1[3]);
w2_t[0] = 0;
w2_t[1] = 0;
w2_t[2] = 0;
u32 w0_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
u32 w2_t[4];
hmac_sha256_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad);
- w0_t[0] = swap_workaround (salt_buf0[0]);
- w0_t[1] = swap_workaround (salt_buf0[1]);
- w0_t[2] = swap_workaround (salt_buf0[2]);
- w0_t[3] = swap_workaround (salt_buf0[3]);
- w1_t[0] = swap_workaround (salt_buf1[0]);
- w1_t[1] = swap_workaround (salt_buf1[1]);
- w1_t[2] = swap_workaround (salt_buf1[2]);
- w1_t[3] = swap_workaround (salt_buf1[3]);
+ w0_t[0] = swap32 (salt_buf0[0]);
+ w0_t[1] = swap32 (salt_buf0[1]);
+ w0_t[2] = swap32 (salt_buf0[2]);
+ w0_t[3] = swap32 (salt_buf0[3]);
+ w1_t[0] = swap32 (salt_buf1[0]);
+ w1_t[1] = swap32 (salt_buf1[1]);
+ w1_t[2] = swap32 (salt_buf1[2]);
+ w1_t[3] = swap32 (salt_buf1[3]);
w2_t[0] = 0;
w2_t[1] = 0;
w2_t[2] = 0;
u32 w0_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
u32 w2_t[4];
hmac_sha256_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad);
- w0_t[0] = swap_workaround (salt_buf0[0]);
- w0_t[1] = swap_workaround (salt_buf0[1]);
- w0_t[2] = swap_workaround (salt_buf0[2]);
- w0_t[3] = swap_workaround (salt_buf0[3]);
- w1_t[0] = swap_workaround (salt_buf1[0]);
- w1_t[1] = swap_workaround (salt_buf1[1]);
- w1_t[2] = swap_workaround (salt_buf1[2]);
- w1_t[3] = swap_workaround (salt_buf1[3]);
+ w0_t[0] = swap32 (salt_buf0[0]);
+ w0_t[1] = swap32 (salt_buf0[1]);
+ w0_t[2] = swap32 (salt_buf0[2]);
+ w0_t[3] = swap32 (salt_buf0[3]);
+ w1_t[0] = swap32 (salt_buf1[0]);
+ w1_t[1] = swap32 (salt_buf1[1]);
+ w1_t[2] = swap32 (salt_buf1[2]);
+ w1_t[3] = swap32 (salt_buf1[3]);
w2_t[0] = 0;
w2_t[1] = 0;
w2_t[2] = 0;
hmac_sha256_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad);
- w0_t[0] = swap_workaround (salt_buf0[0]);
- w0_t[1] = swap_workaround (salt_buf0[1]);
- w0_t[2] = swap_workaround (salt_buf0[2]);
- w0_t[3] = swap_workaround (salt_buf0[3]);
- w1_t[0] = swap_workaround (salt_buf1[0]);
- w1_t[1] = swap_workaround (salt_buf1[1]);
- w1_t[2] = swap_workaround (salt_buf1[2]);
- w1_t[3] = swap_workaround (salt_buf1[3]);
+ w0_t[0] = swap32 (salt_buf0[0]);
+ w0_t[1] = swap32 (salt_buf0[1]);
+ w0_t[2] = swap32 (salt_buf0[2]);
+ w0_t[3] = swap32 (salt_buf0[3]);
+ w1_t[0] = swap32 (salt_buf1[0]);
+ w1_t[1] = swap32 (salt_buf1[1]);
+ w1_t[2] = swap32 (salt_buf1[2]);
+ w1_t[3] = swap32 (salt_buf1[3]);
w2_t[0] = 0;
w2_t[1] = 0;
w2_t[2] = 0;
hmac_sha256_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad);
- w0_t[0] = swap_workaround (salt_buf0[0]);
- w0_t[1] = swap_workaround (salt_buf0[1]);
- w0_t[2] = swap_workaround (salt_buf0[2]);
- w0_t[3] = swap_workaround (salt_buf0[3]);
- w1_t[0] = swap_workaround (salt_buf1[0]);
- w1_t[1] = swap_workaround (salt_buf1[1]);
- w1_t[2] = swap_workaround (salt_buf1[2]);
- w1_t[3] = swap_workaround (salt_buf1[3]);
+ w0_t[0] = swap32 (salt_buf0[0]);
+ w0_t[1] = swap32 (salt_buf0[1]);
+ w0_t[2] = swap32 (salt_buf0[2]);
+ w0_t[3] = swap32 (salt_buf0[3]);
+ w1_t[0] = swap32 (salt_buf1[0]);
+ w1_t[1] = swap32 (salt_buf1[1]);
+ w1_t[2] = swap32 (salt_buf1[2]);
+ w1_t[3] = swap32 (salt_buf1[3]);
w2_t[0] = 0;
w2_t[1] = 0;
w2_t[2] = 0;
u32 w0_t[4];
- w0_t[0] = swap_workaround (salt_buf0[0]);
- w0_t[1] = swap_workaround (salt_buf0[1]);
- w0_t[2] = swap_workaround (salt_buf0[2]);
- w0_t[3] = swap_workaround (salt_buf0[3]);
+ w0_t[0] = swap32 (salt_buf0[0]);
+ w0_t[1] = swap32 (salt_buf0[1]);
+ w0_t[2] = swap32 (salt_buf0[2]);
+ w0_t[3] = swap32 (salt_buf0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (salt_buf1[0]);
- w1_t[1] = swap_workaround (salt_buf1[1]);
- w1_t[2] = swap_workaround (salt_buf1[2]);
- w1_t[3] = swap_workaround (salt_buf1[3]);
+ w1_t[0] = swap32 (salt_buf1[0]);
+ w1_t[1] = swap32 (salt_buf1[1]);
+ w1_t[2] = swap32 (salt_buf1[2]);
+ w1_t[3] = swap32 (salt_buf1[3]);
u32 w2_t[4];
append_0x80_2x4 (w0, w1, out_len);
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
w2_t[0] = 0;
w2_t[1] = 0;
w2_t[2] = 0;
u32 w0_t[4];
- w0_t[0] = swap_workaround (salt_buf0[0]);
- w0_t[1] = swap_workaround (salt_buf0[1]);
- w0_t[2] = swap_workaround (salt_buf0[2]);
- w0_t[3] = swap_workaround (salt_buf0[3]);
+ w0_t[0] = swap32 (salt_buf0[0]);
+ w0_t[1] = swap32 (salt_buf0[1]);
+ w0_t[2] = swap32 (salt_buf0[2]);
+ w0_t[3] = swap32 (salt_buf0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (salt_buf1[0]);
- w1_t[1] = swap_workaround (salt_buf1[1]);
- w1_t[2] = swap_workaround (salt_buf1[2]);
- w1_t[3] = swap_workaround (salt_buf1[3]);
+ w1_t[0] = swap32 (salt_buf1[0]);
+ w1_t[1] = swap32 (salt_buf1[1]);
+ w1_t[2] = swap32 (salt_buf1[2]);
+ w1_t[3] = swap32 (salt_buf1[3]);
u32 w2_t[4];
append_0x80_2x4 (w0, w1, out_len);
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
w2_t[0] = 0;
w2_t[1] = 0;
w2_t[2] = 0;
u32 w0_t[4];
- w0_t[0] = swap_workaround (salt_buf0[0]);
- w0_t[1] = swap_workaround (salt_buf0[1]);
- w0_t[2] = swap_workaround (salt_buf0[2]);
- w0_t[3] = swap_workaround (salt_buf0[3]);
+ w0_t[0] = swap32 (salt_buf0[0]);
+ w0_t[1] = swap32 (salt_buf0[1]);
+ w0_t[2] = swap32 (salt_buf0[2]);
+ w0_t[3] = swap32 (salt_buf0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (salt_buf1[0]);
- w1_t[1] = swap_workaround (salt_buf1[1]);
- w1_t[2] = swap_workaround (salt_buf1[2]);
- w1_t[3] = swap_workaround (salt_buf1[3]);
+ w1_t[0] = swap32 (salt_buf1[0]);
+ w1_t[1] = swap32 (salt_buf1[1]);
+ w1_t[2] = swap32 (salt_buf1[2]);
+ w1_t[3] = swap32 (salt_buf1[3]);
u32 w2_t[4];
append_0x80_4x4 (w0, w1, w2, w3, pw_len);
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
- w2_t[0] = swap_workaround (w2[0]);
- w2_t[1] = swap_workaround (w2[1]);
- w2_t[2] = swap_workaround (w2[2]);
- w2_t[3] = swap_workaround (w2[3]);
- w3_t[0] = swap_workaround (w3[0]);
- w3_t[1] = swap_workaround (w3[1]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
+ w2_t[0] = swap32 (w2[0]);
+ w2_t[1] = swap32 (w2[1]);
+ w2_t[2] = swap32 (w2[2]);
+ w2_t[3] = swap32 (w2[3]);
+ w3_t[0] = swap32 (w3[0]);
+ w3_t[1] = swap32 (w3[1]);
w3_t[2] = 0;
w3_t[3] = (64 + pw_len) * 8;
u32 w0_t[4];
- w0_t[0] = swap_workaround (salt_buf0[0]);
- w0_t[1] = swap_workaround (salt_buf0[1]);
- w0_t[2] = swap_workaround (salt_buf0[2]);
- w0_t[3] = swap_workaround (salt_buf0[3]);
+ w0_t[0] = swap32 (salt_buf0[0]);
+ w0_t[1] = swap32 (salt_buf0[1]);
+ w0_t[2] = swap32 (salt_buf0[2]);
+ w0_t[3] = swap32 (salt_buf0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (salt_buf1[0]);
- w1_t[1] = swap_workaround (salt_buf1[1]);
- w1_t[2] = swap_workaround (salt_buf1[2]);
- w1_t[3] = swap_workaround (salt_buf1[3]);
+ w1_t[0] = swap32 (salt_buf1[0]);
+ w1_t[1] = swap32 (salt_buf1[1]);
+ w1_t[2] = swap32 (salt_buf1[2]);
+ w1_t[3] = swap32 (salt_buf1[3]);
u32 w2_t[4];
append_0x80_4x4 (w0, w1, w2, w3, pw_len);
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
- w2_t[0] = swap_workaround (w2[0]);
- w2_t[1] = swap_workaround (w2[1]);
- w2_t[2] = swap_workaround (w2[2]);
- w2_t[3] = swap_workaround (w2[3]);
- w3_t[0] = swap_workaround (w3[0]);
- w3_t[1] = swap_workaround (w3[1]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
+ w2_t[0] = swap32 (w2[0]);
+ w2_t[1] = swap32 (w2[1]);
+ w2_t[2] = swap32 (w2[2]);
+ w2_t[3] = swap32 (w2[3]);
+ w3_t[0] = swap32 (w3[0]);
+ w3_t[1] = swap32 (w3[1]);
w3_t[2] = 0;
w3_t[3] = (64 + pw_len) * 8;
u32 w0_t[4];
- w0_t[0] = swap_workaround (salt_buf0[0]);
- w0_t[1] = swap_workaround (salt_buf0[1]);
- w0_t[2] = swap_workaround (salt_buf0[2]);
- w0_t[3] = swap_workaround (salt_buf0[3]);
+ w0_t[0] = swap32 (salt_buf0[0]);
+ w0_t[1] = swap32 (salt_buf0[1]);
+ w0_t[2] = swap32 (salt_buf0[2]);
+ w0_t[3] = swap32 (salt_buf0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (salt_buf1[0]);
- w1_t[1] = swap_workaround (salt_buf1[1]);
- w1_t[2] = swap_workaround (salt_buf1[2]);
- w1_t[3] = swap_workaround (salt_buf1[3]);
+ w1_t[0] = swap32 (salt_buf1[0]);
+ w1_t[1] = swap32 (salt_buf1[1]);
+ w1_t[2] = swap32 (salt_buf1[2]);
+ w1_t[3] = swap32 (salt_buf1[3]);
u32 w2_t[4];
u32 w0_t[4];
- w0_t[0] = swap_workaround (salt_buf0[0]);
- w0_t[1] = swap_workaround (salt_buf0[1]);
- w0_t[2] = swap_workaround (salt_buf0[2]);
- w0_t[3] = swap_workaround (salt_buf0[3]);
+ w0_t[0] = swap32 (salt_buf0[0]);
+ w0_t[1] = swap32 (salt_buf0[1]);
+ w0_t[2] = swap32 (salt_buf0[2]);
+ w0_t[3] = swap32 (salt_buf0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (salt_buf1[0]);
- w1_t[1] = swap_workaround (salt_buf1[1]);
- w1_t[2] = swap_workaround (salt_buf1[2]);
- w1_t[3] = swap_workaround (salt_buf1[3]);
+ w1_t[0] = swap32 (salt_buf1[0]);
+ w1_t[1] = swap32 (salt_buf1[1]);
+ w1_t[2] = swap32 (salt_buf1[2]);
+ w1_t[3] = swap32 (salt_buf1[3]);
u32 w2_t[4];
#include "types_ocl.c"
#include "common.c"
-#ifdef VECT_SIZE1
-#define COMPARE_M "check_multi_vect1_comp4.c"
-#endif
-
-#ifdef VECT_SIZE4
-#define COMPARE_M "check_multi_vect4_comp4.c"
-#endif
+#define COMPARE_S "check_single_comp4.c"
+#define COMPARE_M "check_multi_comp4.c"
#define md5apr1_magic0 0x72706124
#define md5apr1_magic1 0x00002431
u32 w2_t[4];
u32 w3_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
- w2_t[0] = swap_workaround (w2[0]);
- w2_t[1] = swap_workaround (w2[1]);
- w2_t[2] = swap_workaround (w2[2]);
- w2_t[3] = swap_workaround (w2[3]);
- w3_t[0] = swap_workaround (w3[0]);
- w3_t[1] = swap_workaround (w3[1]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
+ w2_t[0] = swap32 (w2[0]);
+ w2_t[1] = swap32 (w2[1]);
+ w2_t[2] = swap32 (w2[2]);
+ w2_t[3] = swap32 (w2[3]);
+ w3_t[0] = swap32 (w3[0]);
+ w3_t[1] = swap32 (w3[1]);
w3_t[2] = 0;
w3_t[3] = out_len * 8;
u32 w2_t[4];
u32 w3_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
- w2_t[0] = swap_workaround (w2[0]);
- w2_t[1] = swap_workaround (w2[1]);
- w2_t[2] = swap_workaround (w2[2]);
- w2_t[3] = swap_workaround (w2[3]);
- w3_t[0] = swap_workaround (w3[0]);
- w3_t[1] = swap_workaround (w3[1]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
+ w2_t[0] = swap32 (w2[0]);
+ w2_t[1] = swap32 (w2[1]);
+ w2_t[2] = swap32 (w2[2]);
+ w2_t[3] = swap32 (w2[3]);
+ w3_t[0] = swap32 (w3[0]);
+ w3_t[1] = swap32 (w3[1]);
w3_t[2] = 0;
w3_t[3] = out_len * 8;
u32 w2_t[4];
u32 w3_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
- w2_t[0] = swap_workaround (w2[0]);
- w2_t[1] = swap_workaround (w2[1]);
- w2_t[2] = swap_workaround (w2[2]);
- w2_t[3] = swap_workaround (w2[3]);
- w3_t[0] = swap_workaround (w3[0]);
- w3_t[1] = swap_workaround (w3[1]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
+ w2_t[0] = swap32 (w2[0]);
+ w2_t[1] = swap32 (w2[1]);
+ w2_t[2] = swap32 (w2[2]);
+ w2_t[3] = swap32 (w2[3]);
+ w3_t[0] = swap32 (w3[0]);
+ w3_t[1] = swap32 (w3[1]);
w3_t[2] = 0;
w3_t[3] = pw_len * 8;
u32 w2_t[4];
u32 w3_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
- w2_t[0] = swap_workaround (w2[0]);
- w2_t[1] = swap_workaround (w2[1]);
- w2_t[2] = swap_workaround (w2[2]);
- w2_t[3] = swap_workaround (w2[3]);
- w3_t[0] = swap_workaround (w3[0]);
- w3_t[1] = swap_workaround (w3[1]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
+ w2_t[0] = swap32 (w2[0]);
+ w2_t[1] = swap32 (w2[1]);
+ w2_t[2] = swap32 (w2[2]);
+ w2_t[3] = swap32 (w2[3]);
+ w3_t[0] = swap32 (w3[0]);
+ w3_t[1] = swap32 (w3[1]);
w3_t[2] = 0;
w3_t[3] = pw_len * 8;
u32 w2_t[4];
u32 w3_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
- w2_t[0] = swap_workaround (w2[0]);
- w2_t[1] = swap_workaround (w2[1]);
- w2_t[2] = swap_workaround (w2[2]);
- w2_t[3] = swap_workaround (w2[3]);
- w3_t[0] = swap_workaround (w3[0]);
- w3_t[1] = swap_workaround (w3[1]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
+ w2_t[0] = swap32 (w2[0]);
+ w2_t[1] = swap32 (w2[1]);
+ w2_t[2] = swap32 (w2[2]);
+ w2_t[3] = swap32 (w2[3]);
+ w3_t[0] = swap32 (w3[0]);
+ w3_t[1] = swap32 (w3[1]);
w3_t[2] = 0;
w3_t[3] = out_salt_len * 8;
u32 w2_t[4];
u32 w3_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
- w2_t[0] = swap_workaround (w2[0]);
- w2_t[1] = swap_workaround (w2[1]);
- w2_t[2] = swap_workaround (w2[2]);
- w2_t[3] = swap_workaround (w2[3]);
- w3_t[0] = swap_workaround (w3[0]);
- w3_t[1] = swap_workaround (w3[1]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
+ w2_t[0] = swap32 (w2[0]);
+ w2_t[1] = swap32 (w2[1]);
+ w2_t[2] = swap32 (w2[2]);
+ w2_t[3] = swap32 (w2[3]);
+ w3_t[0] = swap32 (w3[0]);
+ w3_t[1] = swap32 (w3[1]);
w3_t[2] = 0;
w3_t[3] = out_salt_len * 8;
u32 w2_t[4];
u32 w3_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
- w2_t[0] = swap_workaround (w2[0]);
- w2_t[1] = swap_workaround (w2[1]);
- w2_t[2] = swap_workaround (w2[2]);
- w2_t[3] = swap_workaround (w2[3]);
- w3_t[0] = swap_workaround (w3[0]);
- w3_t[1] = swap_workaround (w3[1]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
+ w2_t[0] = swap32 (w2[0]);
+ w2_t[1] = swap32 (w2[1]);
+ w2_t[2] = swap32 (w2[2]);
+ w2_t[3] = swap32 (w2[3]);
+ w3_t[0] = swap32 (w3[0]);
+ w3_t[1] = swap32 (w3[1]);
w3_t[2] = 0;
w3_t[3] = pw_salt_len * 8;
u32 w2_t[4];
u32 w3_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
- w2_t[0] = swap_workaround (w2[0]);
- w2_t[1] = swap_workaround (w2[1]);
- w2_t[2] = swap_workaround (w2[2]);
- w2_t[3] = swap_workaround (w2[3]);
- w3_t[0] = swap_workaround (w3[0]);
- w3_t[1] = swap_workaround (w3[1]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
+ w2_t[0] = swap32 (w2[0]);
+ w2_t[1] = swap32 (w2[1]);
+ w2_t[2] = swap32 (w2[2]);
+ w2_t[3] = swap32 (w2[3]);
+ w3_t[0] = swap32 (w3[0]);
+ w3_t[1] = swap32 (w3[1]);
w3_t[2] = 0;
w3_t[3] = pw_salt_len * 8;
switch_buffer_by_offset (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len);
- w[ 0] |= swap_workaround (salt_buf0[0]);
- w[ 1] |= swap_workaround (salt_buf0[1]);
- w[ 2] |= swap_workaround (salt_buf0[2]);
- w[ 3] |= swap_workaround (salt_buf0[3]);
- w[ 4] |= swap_workaround (salt_buf1[0]);
- w[ 5] |= swap_workaround (salt_buf1[1]);
- w[ 6] |= swap_workaround (salt_buf1[2]);
- w[ 7] |= swap_workaround (salt_buf1[3]);
- w[ 8] |= swap_workaround (salt_buf2[0]);
- w[ 9] |= swap_workaround (salt_buf2[1]);
- w[10] |= swap_workaround (salt_buf2[2]);
- w[11] |= swap_workaround (salt_buf2[3]);
- w[12] |= swap_workaround (salt_buf3[0]);
- w[13] |= swap_workaround (salt_buf3[1]);
- w[14] |= swap_workaround (salt_buf3[2]);
- w[15] |= swap_workaround (salt_buf3[3]);
+ w[ 0] |= swap32 (salt_buf0[0]);
+ w[ 1] |= swap32 (salt_buf0[1]);
+ w[ 2] |= swap32 (salt_buf0[2]);
+ w[ 3] |= swap32 (salt_buf0[3]);
+ w[ 4] |= swap32 (salt_buf1[0]);
+ w[ 5] |= swap32 (salt_buf1[1]);
+ w[ 6] |= swap32 (salt_buf1[2]);
+ w[ 7] |= swap32 (salt_buf1[3]);
+ w[ 8] |= swap32 (salt_buf2[0]);
+ w[ 9] |= swap32 (salt_buf2[1]);
+ w[10] |= swap32 (salt_buf2[2]);
+ w[11] |= swap32 (salt_buf2[3]);
+ w[12] |= swap32 (salt_buf3[0]);
+ w[13] |= swap32 (salt_buf3[1]);
+ w[14] |= swap32 (salt_buf3[2]);
+ w[15] |= swap32 (salt_buf3[3]);
const u32 salt_len = salt_bufs[salt_pos].salt_len;
u32 w2_t[4];
u32 w3_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
- w2_t[0] = swap_workaround (w2[0]);
- w2_t[1] = swap_workaround (w2[1]);
- w2_t[2] = swap_workaround (w2[2]);
- w2_t[3] = swap_workaround (w2[3]);
- w3_t[0] = swap_workaround (w3[0]);
- w3_t[1] = swap_workaround (w3[1]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
+ w2_t[0] = swap32 (w2[0]);
+ w2_t[1] = swap32 (w2[1]);
+ w2_t[2] = swap32 (w2[2]);
+ w2_t[3] = swap32 (w2[3]);
+ w3_t[0] = swap32 (w3[0]);
+ w3_t[1] = swap32 (w3[1]);
w3_t[2] = 0;
w3_t[3] = out_salt_len * 8;
u32 w2_t[4];
u32 w3_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
- w2_t[0] = swap_workaround (w2[0]);
- w2_t[1] = swap_workaround (w2[1]);
- w2_t[2] = swap_workaround (w2[2]);
- w2_t[3] = swap_workaround (w2[3]);
- w3_t[0] = swap_workaround (w3[0]);
- w3_t[1] = swap_workaround (w3[1]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
+ w2_t[0] = swap32 (w2[0]);
+ w2_t[1] = swap32 (w2[1]);
+ w2_t[2] = swap32 (w2[2]);
+ w2_t[3] = swap32 (w2[3]);
+ w3_t[0] = swap32 (w3[0]);
+ w3_t[1] = swap32 (w3[1]);
w3_t[2] = 0;
w3_t[3] = out_salt_len * 8;
u32 w2_t[4];
u32 w3_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
- w2_t[0] = swap_workaround (w2[0]);
- w2_t[1] = swap_workaround (w2[1]);
- w2_t[2] = swap_workaround (w2[2]);
- w2_t[3] = swap_workaround (w2[3]);
- w3_t[0] = swap_workaround (w3[0]);
- w3_t[1] = swap_workaround (w3[1]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
+ w2_t[0] = swap32 (w2[0]);
+ w2_t[1] = swap32 (w2[1]);
+ w2_t[2] = swap32 (w2[2]);
+ w2_t[3] = swap32 (w2[3]);
+ w3_t[0] = swap32 (w3[0]);
+ w3_t[1] = swap32 (w3[1]);
w3_t[2] = 0;
w3_t[3] = pw_salt_len * 8;
u32 w2_t[4];
u32 w3_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
- w2_t[0] = swap_workaround (w2[0]);
- w2_t[1] = swap_workaround (w2[1]);
- w2_t[2] = swap_workaround (w2[2]);
- w2_t[3] = swap_workaround (w2[3]);
- w3_t[0] = swap_workaround (w3[0]);
- w3_t[1] = swap_workaround (w3[1]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
+ w2_t[0] = swap32 (w2[0]);
+ w2_t[1] = swap32 (w2[1]);
+ w2_t[2] = swap32 (w2[2]);
+ w2_t[3] = swap32 (w2[3]);
+ w3_t[0] = swap32 (w3[0]);
+ w3_t[1] = swap32 (w3[1]);
w3_t[2] = 0;
w3_t[3] = pw_salt_len * 8;
u32 w2_t[4];
u32 w3_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
- w2_t[0] = swap_workaround (w2[0]);
- w2_t[1] = swap_workaround (w2[1]);
- w2_t[2] = swap_workaround (w2[2]);
- w2_t[3] = swap_workaround (w2[3]);
- w3_t[0] = swap_workaround (w3[0]);
- w3_t[1] = swap_workaround (w3[1]);
- w3_t[2] = swap_workaround (w3[2]);
- w3_t[3] = swap_workaround (w3[3]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
+ w2_t[0] = swap32 (w2[0]);
+ w2_t[1] = swap32 (w2[1]);
+ w2_t[2] = swap32 (w2[2]);
+ w2_t[3] = swap32 (w2[3]);
+ w3_t[0] = swap32 (w3[0]);
+ w3_t[1] = swap32 (w3[1]);
+ w3_t[2] = swap32 (w3[2]);
+ w3_t[3] = swap32 (w3[3]);
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
* sha512
*/
- w0_t[0] = swap_workaround (w0_t[0]);
- w0_t[1] = swap_workaround (w0_t[1]);
- w0_t[2] = swap_workaround (w0_t[2]);
- w0_t[3] = swap_workaround (w0_t[3]);
- w1_t[0] = swap_workaround (w1_t[0]);
- w1_t[1] = swap_workaround (w1_t[1]);
- w1_t[2] = swap_workaround (w1_t[2]);
- w1_t[3] = swap_workaround (w1_t[3]);
- w2_t[0] = swap_workaround (w2_t[0]);
- w2_t[1] = swap_workaround (w2_t[1]);
- w2_t[2] = swap_workaround (w2_t[2]);
- w2_t[3] = swap_workaround (w2_t[3]);
- w3_t[0] = swap_workaround (w3_t[0]);
- w3_t[1] = swap_workaround (w3_t[1]);
- //w3_t[2] = swap_workaround (w3_t[2]);
- //w3_t[3] = swap_workaround (w3_t[3]);
+ w0_t[0] = swap32 (w0_t[0]);
+ w0_t[1] = swap32 (w0_t[1]);
+ w0_t[2] = swap32 (w0_t[2]);
+ w0_t[3] = swap32 (w0_t[3]);
+ w1_t[0] = swap32 (w1_t[0]);
+ w1_t[1] = swap32 (w1_t[1]);
+ w1_t[2] = swap32 (w1_t[2]);
+ w1_t[3] = swap32 (w1_t[3]);
+ w2_t[0] = swap32 (w2_t[0]);
+ w2_t[1] = swap32 (w2_t[1]);
+ w2_t[2] = swap32 (w2_t[2]);
+ w2_t[3] = swap32 (w2_t[3]);
+ w3_t[0] = swap32 (w3_t[0]);
+ w3_t[1] = swap32 (w3_t[1]);
+ //w3_t[2] = swap32 (w3_t[2]);
+ //w3_t[3] = swap32 (w3_t[3]);
u64 digest[8];
u32 w2_t[4];
u32 w3_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
- w2_t[0] = swap_workaround (w2[0]);
- w2_t[1] = swap_workaround (w2[1]);
- w2_t[2] = swap_workaround (w2[2]);
- w2_t[3] = swap_workaround (w2[3]);
- w3_t[0] = swap_workaround (w3[0]);
- w3_t[1] = swap_workaround (w3[1]);
- w3_t[2] = swap_workaround (w3[2]);
- w3_t[3] = swap_workaround (w3[3]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
+ w2_t[0] = swap32 (w2[0]);
+ w2_t[1] = swap32 (w2[1]);
+ w2_t[2] = swap32 (w2[2]);
+ w2_t[3] = swap32 (w2[3]);
+ w3_t[0] = swap32 (w3[0]);
+ w3_t[1] = swap32 (w3[1]);
+ w3_t[2] = swap32 (w3[2]);
+ w3_t[3] = swap32 (w3[3]);
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
* sha512
*/
- w0_t[0] = swap_workaround (w0_t[0]);
- w0_t[1] = swap_workaround (w0_t[1]);
- w0_t[2] = swap_workaround (w0_t[2]);
- w0_t[3] = swap_workaround (w0_t[3]);
- w1_t[0] = swap_workaround (w1_t[0]);
- w1_t[1] = swap_workaround (w1_t[1]);
- w1_t[2] = swap_workaround (w1_t[2]);
- w1_t[3] = swap_workaround (w1_t[3]);
- w2_t[0] = swap_workaround (w2_t[0]);
- w2_t[1] = swap_workaround (w2_t[1]);
- w2_t[2] = swap_workaround (w2_t[2]);
- w2_t[3] = swap_workaround (w2_t[3]);
- w3_t[0] = swap_workaround (w3_t[0]);
- w3_t[1] = swap_workaround (w3_t[1]);
- //w3_t[2] = swap_workaround (w3_t[2]);
- //w3_t[3] = swap_workaround (w3_t[3]);
+ w0_t[0] = swap32 (w0_t[0]);
+ w0_t[1] = swap32 (w0_t[1]);
+ w0_t[2] = swap32 (w0_t[2]);
+ w0_t[3] = swap32 (w0_t[3]);
+ w1_t[0] = swap32 (w1_t[0]);
+ w1_t[1] = swap32 (w1_t[1]);
+ w1_t[2] = swap32 (w1_t[2]);
+ w1_t[3] = swap32 (w1_t[3]);
+ w2_t[0] = swap32 (w2_t[0]);
+ w2_t[1] = swap32 (w2_t[1]);
+ w2_t[2] = swap32 (w2_t[2]);
+ w2_t[3] = swap32 (w2_t[3]);
+ w3_t[0] = swap32 (w3_t[0]);
+ w3_t[1] = swap32 (w3_t[1]);
+ //w3_t[2] = swap32 (w3_t[2]);
+ //w3_t[3] = swap32 (w3_t[3]);
u64 digest[8];
* sha512
*/
- w0_t[0] = swap_workaround (w0_t[0]);
- w0_t[1] = swap_workaround (w0_t[1]);
- w0_t[2] = swap_workaround (w0_t[2]);
- w0_t[3] = swap_workaround (w0_t[3]);
- w1_t[0] = swap_workaround (w1_t[0]);
- w1_t[1] = swap_workaround (w1_t[1]);
- w1_t[2] = swap_workaround (w1_t[2]);
- w1_t[3] = swap_workaround (w1_t[3]);
- w2_t[0] = swap_workaround (w2_t[0]);
- w2_t[1] = swap_workaround (w2_t[1]);
- w2_t[2] = swap_workaround (w2_t[2]);
- w2_t[3] = swap_workaround (w2_t[3]);
- w3_t[0] = swap_workaround (w3_t[0]);
- w3_t[1] = swap_workaround (w3_t[1]);
+ w0_t[0] = swap32 (w0_t[0]);
+ w0_t[1] = swap32 (w0_t[1]);
+ w0_t[2] = swap32 (w0_t[2]);
+ w0_t[3] = swap32 (w0_t[3]);
+ w1_t[0] = swap32 (w1_t[0]);
+ w1_t[1] = swap32 (w1_t[1]);
+ w1_t[2] = swap32 (w1_t[2]);
+ w1_t[3] = swap32 (w1_t[3]);
+ w2_t[0] = swap32 (w2_t[0]);
+ w2_t[1] = swap32 (w2_t[1]);
+ w2_t[2] = swap32 (w2_t[2]);
+ w2_t[3] = swap32 (w2_t[3]);
+ w3_t[0] = swap32 (w3_t[0]);
+ w3_t[1] = swap32 (w3_t[1]);
w3_t[2] = 0;
w3_t[3] = out_salt_len * 8;
* sha512
*/
- w0_t[0] = swap_workaround (w0_t[0]);
- w0_t[1] = swap_workaround (w0_t[1]);
- w0_t[2] = swap_workaround (w0_t[2]);
- w0_t[3] = swap_workaround (w0_t[3]);
- w1_t[0] = swap_workaround (w1_t[0]);
- w1_t[1] = swap_workaround (w1_t[1]);
- w1_t[2] = swap_workaround (w1_t[2]);
- w1_t[3] = swap_workaround (w1_t[3]);
- w2_t[0] = swap_workaround (w2_t[0]);
- w2_t[1] = swap_workaround (w2_t[1]);
- w2_t[2] = swap_workaround (w2_t[2]);
- w2_t[3] = swap_workaround (w2_t[3]);
- w3_t[0] = swap_workaround (w3_t[0]);
- w3_t[1] = swap_workaround (w3_t[1]);
+ w0_t[0] = swap32 (w0_t[0]);
+ w0_t[1] = swap32 (w0_t[1]);
+ w0_t[2] = swap32 (w0_t[2]);
+ w0_t[3] = swap32 (w0_t[3]);
+ w1_t[0] = swap32 (w1_t[0]);
+ w1_t[1] = swap32 (w1_t[1]);
+ w1_t[2] = swap32 (w1_t[2]);
+ w1_t[3] = swap32 (w1_t[3]);
+ w2_t[0] = swap32 (w2_t[0]);
+ w2_t[1] = swap32 (w2_t[1]);
+ w2_t[2] = swap32 (w2_t[2]);
+ w2_t[3] = swap32 (w2_t[3]);
+ w3_t[0] = swap32 (w3_t[0]);
+ w3_t[1] = swap32 (w3_t[1]);
w3_t[2] = 0;
w3_t[3] = out_salt_len * 8;
* sha512
*/
- w0_t[0] = swap_workaround (w0_t[0]);
- w0_t[1] = swap_workaround (w0_t[1]);
- w0_t[2] = swap_workaround (w0_t[2]);
- w0_t[3] = swap_workaround (w0_t[3]);
- w1_t[0] = swap_workaround (w1_t[0]);
- w1_t[1] = swap_workaround (w1_t[1]);
- w1_t[2] = swap_workaround (w1_t[2]);
- w1_t[3] = swap_workaround (w1_t[3]);
- w2_t[0] = swap_workaround (w2_t[0]);
- w2_t[1] = swap_workaround (w2_t[1]);
- w2_t[2] = swap_workaround (w2_t[2]);
- w2_t[3] = swap_workaround (w2_t[3]);
- w3_t[0] = swap_workaround (w3_t[0]);
- w3_t[1] = swap_workaround (w3_t[1]);
+ w0_t[0] = swap32 (w0_t[0]);
+ w0_t[1] = swap32 (w0_t[1]);
+ w0_t[2] = swap32 (w0_t[2]);
+ w0_t[3] = swap32 (w0_t[3]);
+ w1_t[0] = swap32 (w1_t[0]);
+ w1_t[1] = swap32 (w1_t[1]);
+ w1_t[2] = swap32 (w1_t[2]);
+ w1_t[3] = swap32 (w1_t[3]);
+ w2_t[0] = swap32 (w2_t[0]);
+ w2_t[1] = swap32 (w2_t[1]);
+ w2_t[2] = swap32 (w2_t[2]);
+ w2_t[3] = swap32 (w2_t[3]);
+ w3_t[0] = swap32 (w3_t[0]);
+ w3_t[1] = swap32 (w3_t[1]);
w3_t[2] = 0;
w3_t[3] = pw_salt_len * 8;
* sha512
*/
- w0_t[0] = swap_workaround (w0_t[0]);
- w0_t[1] = swap_workaround (w0_t[1]);
- w0_t[2] = swap_workaround (w0_t[2]);
- w0_t[3] = swap_workaround (w0_t[3]);
- w1_t[0] = swap_workaround (w1_t[0]);
- w1_t[1] = swap_workaround (w1_t[1]);
- w1_t[2] = swap_workaround (w1_t[2]);
- w1_t[3] = swap_workaround (w1_t[3]);
- w2_t[0] = swap_workaround (w2_t[0]);
- w2_t[1] = swap_workaround (w2_t[1]);
- w2_t[2] = swap_workaround (w2_t[2]);
- w2_t[3] = swap_workaround (w2_t[3]);
- w3_t[0] = swap_workaround (w3_t[0]);
- w3_t[1] = swap_workaround (w3_t[1]);
+ w0_t[0] = swap32 (w0_t[0]);
+ w0_t[1] = swap32 (w0_t[1]);
+ w0_t[2] = swap32 (w0_t[2]);
+ w0_t[3] = swap32 (w0_t[3]);
+ w1_t[0] = swap32 (w1_t[0]);
+ w1_t[1] = swap32 (w1_t[1]);
+ w1_t[2] = swap32 (w1_t[2]);
+ w1_t[3] = swap32 (w1_t[3]);
+ w2_t[0] = swap32 (w2_t[0]);
+ w2_t[1] = swap32 (w2_t[1]);
+ w2_t[2] = swap32 (w2_t[2]);
+ w2_t[3] = swap32 (w2_t[3]);
+ w3_t[0] = swap32 (w3_t[0]);
+ w3_t[1] = swap32 (w3_t[1]);
w3_t[2] = 0;
w3_t[3] = pw_salt_len * 8;
switch_buffer_by_offset (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len);
- w[ 0] |= swap_workaround (salt_buf0[0]);
- w[ 1] |= swap_workaround (salt_buf0[1]);
- w[ 2] |= swap_workaround (salt_buf0[2]);
- w[ 3] |= swap_workaround (salt_buf0[3]);
- w[ 4] |= swap_workaround (salt_buf1[0]);
- w[ 5] |= swap_workaround (salt_buf1[1]);
- w[ 6] |= swap_workaround (salt_buf1[2]);
- w[ 7] |= swap_workaround (salt_buf1[3]);
- w[ 8] |= swap_workaround (salt_buf2[0]);
- w[ 9] |= swap_workaround (salt_buf2[1]);
- w[10] |= swap_workaround (salt_buf2[2]);
- w[11] |= swap_workaround (salt_buf2[3]);
- w[12] |= swap_workaround (salt_buf3[0]);
- w[13] |= swap_workaround (salt_buf3[1]);
- w[14] |= swap_workaround (salt_buf3[2]);
- w[15] |= swap_workaround (salt_buf3[3]);
+ w[ 0] |= swap32 (salt_buf0[0]);
+ w[ 1] |= swap32 (salt_buf0[1]);
+ w[ 2] |= swap32 (salt_buf0[2]);
+ w[ 3] |= swap32 (salt_buf0[3]);
+ w[ 4] |= swap32 (salt_buf1[0]);
+ w[ 5] |= swap32 (salt_buf1[1]);
+ w[ 6] |= swap32 (salt_buf1[2]);
+ w[ 7] |= swap32 (salt_buf1[3]);
+ w[ 8] |= swap32 (salt_buf2[0]);
+ w[ 9] |= swap32 (salt_buf2[1]);
+ w[10] |= swap32 (salt_buf2[2]);
+ w[11] |= swap32 (salt_buf2[3]);
+ w[12] |= swap32 (salt_buf3[0]);
+ w[13] |= swap32 (salt_buf3[1]);
+ w[14] |= swap32 (salt_buf3[2]);
+ w[15] |= swap32 (salt_buf3[3]);
const u32 salt_len = salt_bufs[salt_pos].salt_len;
* sha512
*/
- w0_t[0] = swap_workaround (w0_t[0]);
- w0_t[1] = swap_workaround (w0_t[1]);
- w0_t[2] = swap_workaround (w0_t[2]);
- w0_t[3] = swap_workaround (w0_t[3]);
- w1_t[0] = swap_workaround (w1_t[0]);
- w1_t[1] = swap_workaround (w1_t[1]);
- w1_t[2] = swap_workaround (w1_t[2]);
- w1_t[3] = swap_workaround (w1_t[3]);
- w2_t[0] = swap_workaround (w2_t[0]);
- w2_t[1] = swap_workaround (w2_t[1]);
- w2_t[2] = swap_workaround (w2_t[2]);
- w2_t[3] = swap_workaround (w2_t[3]);
- w3_t[0] = swap_workaround (w3_t[0]);
- w3_t[1] = swap_workaround (w3_t[1]);
+ w0_t[0] = swap32 (w0_t[0]);
+ w0_t[1] = swap32 (w0_t[1]);
+ w0_t[2] = swap32 (w0_t[2]);
+ w0_t[3] = swap32 (w0_t[3]);
+ w1_t[0] = swap32 (w1_t[0]);
+ w1_t[1] = swap32 (w1_t[1]);
+ w1_t[2] = swap32 (w1_t[2]);
+ w1_t[3] = swap32 (w1_t[3]);
+ w2_t[0] = swap32 (w2_t[0]);
+ w2_t[1] = swap32 (w2_t[1]);
+ w2_t[2] = swap32 (w2_t[2]);
+ w2_t[3] = swap32 (w2_t[3]);
+ w3_t[0] = swap32 (w3_t[0]);
+ w3_t[1] = swap32 (w3_t[1]);
w3_t[2] = 0;
w3_t[3] = out_salt_len * 8;
* sha512
*/
- w0_t[0] = swap_workaround (w0_t[0]);
- w0_t[1] = swap_workaround (w0_t[1]);
- w0_t[2] = swap_workaround (w0_t[2]);
- w0_t[3] = swap_workaround (w0_t[3]);
- w1_t[0] = swap_workaround (w1_t[0]);
- w1_t[1] = swap_workaround (w1_t[1]);
- w1_t[2] = swap_workaround (w1_t[2]);
- w1_t[3] = swap_workaround (w1_t[3]);
- w2_t[0] = swap_workaround (w2_t[0]);
- w2_t[1] = swap_workaround (w2_t[1]);
- w2_t[2] = swap_workaround (w2_t[2]);
- w2_t[3] = swap_workaround (w2_t[3]);
- w3_t[0] = swap_workaround (w3_t[0]);
- w3_t[1] = swap_workaround (w3_t[1]);
+ w0_t[0] = swap32 (w0_t[0]);
+ w0_t[1] = swap32 (w0_t[1]);
+ w0_t[2] = swap32 (w0_t[2]);
+ w0_t[3] = swap32 (w0_t[3]);
+ w1_t[0] = swap32 (w1_t[0]);
+ w1_t[1] = swap32 (w1_t[1]);
+ w1_t[2] = swap32 (w1_t[2]);
+ w1_t[3] = swap32 (w1_t[3]);
+ w2_t[0] = swap32 (w2_t[0]);
+ w2_t[1] = swap32 (w2_t[1]);
+ w2_t[2] = swap32 (w2_t[2]);
+ w2_t[3] = swap32 (w2_t[3]);
+ w3_t[0] = swap32 (w3_t[0]);
+ w3_t[1] = swap32 (w3_t[1]);
w3_t[2] = 0;
w3_t[3] = out_salt_len * 8;
* sha512
*/
- w0_t[0] = swap_workaround (w0_t[0]);
- w0_t[1] = swap_workaround (w0_t[1]);
- w0_t[2] = swap_workaround (w0_t[2]);
- w0_t[3] = swap_workaround (w0_t[3]);
- w1_t[0] = swap_workaround (w1_t[0]);
- w1_t[1] = swap_workaround (w1_t[1]);
- w1_t[2] = swap_workaround (w1_t[2]);
- w1_t[3] = swap_workaround (w1_t[3]);
- w2_t[0] = swap_workaround (w2_t[0]);
- w2_t[1] = swap_workaround (w2_t[1]);
- w2_t[2] = swap_workaround (w2_t[2]);
- w2_t[3] = swap_workaround (w2_t[3]);
- w3_t[0] = swap_workaround (w3_t[0]);
- w3_t[1] = swap_workaround (w3_t[1]);
+ w0_t[0] = swap32 (w0_t[0]);
+ w0_t[1] = swap32 (w0_t[1]);
+ w0_t[2] = swap32 (w0_t[2]);
+ w0_t[3] = swap32 (w0_t[3]);
+ w1_t[0] = swap32 (w1_t[0]);
+ w1_t[1] = swap32 (w1_t[1]);
+ w1_t[2] = swap32 (w1_t[2]);
+ w1_t[3] = swap32 (w1_t[3]);
+ w2_t[0] = swap32 (w2_t[0]);
+ w2_t[1] = swap32 (w2_t[1]);
+ w2_t[2] = swap32 (w2_t[2]);
+ w2_t[3] = swap32 (w2_t[3]);
+ w3_t[0] = swap32 (w3_t[0]);
+ w3_t[1] = swap32 (w3_t[1]);
w3_t[2] = 0;
w3_t[3] = pw_salt_len * 8;
* sha512
*/
- w0_t[0] = swap_workaround (w0_t[0]);
- w0_t[1] = swap_workaround (w0_t[1]);
- w0_t[2] = swap_workaround (w0_t[2]);
- w0_t[3] = swap_workaround (w0_t[3]);
- w1_t[0] = swap_workaround (w1_t[0]);
- w1_t[1] = swap_workaround (w1_t[1]);
- w1_t[2] = swap_workaround (w1_t[2]);
- w1_t[3] = swap_workaround (w1_t[3]);
- w2_t[0] = swap_workaround (w2_t[0]);
- w2_t[1] = swap_workaround (w2_t[1]);
- w2_t[2] = swap_workaround (w2_t[2]);
- w2_t[3] = swap_workaround (w2_t[3]);
- w3_t[0] = swap_workaround (w3_t[0]);
- w3_t[1] = swap_workaround (w3_t[1]);
+ w0_t[0] = swap32 (w0_t[0]);
+ w0_t[1] = swap32 (w0_t[1]);
+ w0_t[2] = swap32 (w0_t[2]);
+ w0_t[3] = swap32 (w0_t[3]);
+ w1_t[0] = swap32 (w1_t[0]);
+ w1_t[1] = swap32 (w1_t[1]);
+ w1_t[2] = swap32 (w1_t[2]);
+ w1_t[3] = swap32 (w1_t[3]);
+ w2_t[0] = swap32 (w2_t[0]);
+ w2_t[1] = swap32 (w2_t[1]);
+ w2_t[2] = swap32 (w2_t[2]);
+ w2_t[3] = swap32 (w2_t[3]);
+ w3_t[0] = swap32 (w3_t[0]);
+ w3_t[1] = swap32 (w3_t[1]);
w3_t[2] = 0;
w3_t[3] = pw_salt_len * 8;
u32 w2_t[4];
u32 w3_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
- w2_t[0] = swap_workaround (w2[0]);
- w2_t[1] = swap_workaround (w2[1]);
- w2_t[2] = swap_workaround (w2[2]);
- w2_t[3] = swap_workaround (w2[3]);
- w3_t[0] = swap_workaround (w3[0]);
- w3_t[1] = swap_workaround (w3[1]);
- w3_t[2] = swap_workaround (w3[2]);
- w3_t[3] = swap_workaround (w3[3]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
+ w2_t[0] = swap32 (w2[0]);
+ w2_t[1] = swap32 (w2[1]);
+ w2_t[2] = swap32 (w2[2]);
+ w2_t[3] = swap32 (w2[3]);
+ w3_t[0] = swap32 (w3[0]);
+ w3_t[1] = swap32 (w3[1]);
+ w3_t[2] = swap32 (w3[2]);
+ w3_t[3] = swap32 (w3[3]);
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
* sha512
*/
- w0_t[0] = swap_workaround (w0_t[0]);
- w0_t[1] = swap_workaround (w0_t[1]);
- w0_t[2] = swap_workaround (w0_t[2]);
- w0_t[3] = swap_workaround (w0_t[3]);
- w1_t[0] = swap_workaround (w1_t[0]);
- w1_t[1] = swap_workaround (w1_t[1]);
- w1_t[2] = swap_workaround (w1_t[2]);
- w1_t[3] = swap_workaround (w1_t[3]);
- w2_t[0] = swap_workaround (w2_t[0]);
- w2_t[1] = swap_workaround (w2_t[1]);
- w2_t[2] = swap_workaround (w2_t[2]);
- w2_t[3] = swap_workaround (w2_t[3]);
- w3_t[0] = swap_workaround (w3_t[0]);
- w3_t[1] = swap_workaround (w3_t[1]);
- //w3_t[2] = swap_workaround (w3_t[2]);
- //w3_t[3] = swap_workaround (w3_t[3]);
+ w0_t[0] = swap32 (w0_t[0]);
+ w0_t[1] = swap32 (w0_t[1]);
+ w0_t[2] = swap32 (w0_t[2]);
+ w0_t[3] = swap32 (w0_t[3]);
+ w1_t[0] = swap32 (w1_t[0]);
+ w1_t[1] = swap32 (w1_t[1]);
+ w1_t[2] = swap32 (w1_t[2]);
+ w1_t[3] = swap32 (w1_t[3]);
+ w2_t[0] = swap32 (w2_t[0]);
+ w2_t[1] = swap32 (w2_t[1]);
+ w2_t[2] = swap32 (w2_t[2]);
+ w2_t[3] = swap32 (w2_t[3]);
+ w3_t[0] = swap32 (w3_t[0]);
+ w3_t[1] = swap32 (w3_t[1]);
+ //w3_t[2] = swap32 (w3_t[2]);
+ //w3_t[3] = swap32 (w3_t[3]);
u64 digest[8];
u32 w2_t[4];
u32 w3_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
- w2_t[0] = swap_workaround (w2[0]);
- w2_t[1] = swap_workaround (w2[1]);
- w2_t[2] = swap_workaround (w2[2]);
- w2_t[3] = swap_workaround (w2[3]);
- w3_t[0] = swap_workaround (w3[0]);
- w3_t[1] = swap_workaround (w3[1]);
- w3_t[2] = swap_workaround (w3[2]);
- w3_t[3] = swap_workaround (w3[3]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
+ w2_t[0] = swap32 (w2[0]);
+ w2_t[1] = swap32 (w2[1]);
+ w2_t[2] = swap32 (w2[2]);
+ w2_t[3] = swap32 (w2[3]);
+ w3_t[0] = swap32 (w3[0]);
+ w3_t[1] = swap32 (w3[1]);
+ w3_t[2] = swap32 (w3[2]);
+ w3_t[3] = swap32 (w3[3]);
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
* sha512
*/
- w0_t[0] = swap_workaround (w0_t[0]);
- w0_t[1] = swap_workaround (w0_t[1]);
- w0_t[2] = swap_workaround (w0_t[2]);
- w0_t[3] = swap_workaround (w0_t[3]);
- w1_t[0] = swap_workaround (w1_t[0]);
- w1_t[1] = swap_workaround (w1_t[1]);
- w1_t[2] = swap_workaround (w1_t[2]);
- w1_t[3] = swap_workaround (w1_t[3]);
- w2_t[0] = swap_workaround (w2_t[0]);
- w2_t[1] = swap_workaround (w2_t[1]);
- w2_t[2] = swap_workaround (w2_t[2]);
- w2_t[3] = swap_workaround (w2_t[3]);
- w3_t[0] = swap_workaround (w3_t[0]);
- w3_t[1] = swap_workaround (w3_t[1]);
- //w3_t[2] = swap_workaround (w3_t[2]);
- //w3_t[3] = swap_workaround (w3_t[3]);
+ w0_t[0] = swap32 (w0_t[0]);
+ w0_t[1] = swap32 (w0_t[1]);
+ w0_t[2] = swap32 (w0_t[2]);
+ w0_t[3] = swap32 (w0_t[3]);
+ w1_t[0] = swap32 (w1_t[0]);
+ w1_t[1] = swap32 (w1_t[1]);
+ w1_t[2] = swap32 (w1_t[2]);
+ w1_t[3] = swap32 (w1_t[3]);
+ w2_t[0] = swap32 (w2_t[0]);
+ w2_t[1] = swap32 (w2_t[1]);
+ w2_t[2] = swap32 (w2_t[2]);
+ w2_t[3] = swap32 (w2_t[3]);
+ w3_t[0] = swap32 (w3_t[0]);
+ w3_t[1] = swap32 (w3_t[1]);
+ //w3_t[2] = swap32 (w3_t[2]);
+ //w3_t[3] = swap32 (w3_t[3]);
u64 digest[8];
u32 w0_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
u32 w2_t[4];
hmac_sha512_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad);
- w0_t[0] = swap_workaround (salt_buf0[0]);
- w0_t[1] = swap_workaround (salt_buf0[1]);
- w0_t[2] = swap_workaround (salt_buf0[2]);
- w0_t[3] = swap_workaround (salt_buf0[3]);
- w1_t[0] = swap_workaround (salt_buf1[0]);
- w1_t[1] = swap_workaround (salt_buf1[1]);
- w1_t[2] = swap_workaround (salt_buf1[2]);
- w1_t[3] = swap_workaround (salt_buf1[3]);
+ w0_t[0] = swap32 (salt_buf0[0]);
+ w0_t[1] = swap32 (salt_buf0[1]);
+ w0_t[2] = swap32 (salt_buf0[2]);
+ w0_t[3] = swap32 (salt_buf0[3]);
+ w1_t[0] = swap32 (salt_buf1[0]);
+ w1_t[1] = swap32 (salt_buf1[1]);
+ w1_t[2] = swap32 (salt_buf1[2]);
+ w1_t[3] = swap32 (salt_buf1[3]);
w2_t[0] = 0;
w2_t[1] = 0;
w2_t[2] = 0;
u32 w0_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
u32 w2_t[4];
hmac_sha512_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad);
- w0_t[0] = swap_workaround (salt_buf0[0]);
- w0_t[1] = swap_workaround (salt_buf0[1]);
- w0_t[2] = swap_workaround (salt_buf0[2]);
- w0_t[3] = swap_workaround (salt_buf0[3]);
- w1_t[0] = swap_workaround (salt_buf1[0]);
- w1_t[1] = swap_workaround (salt_buf1[1]);
- w1_t[2] = swap_workaround (salt_buf1[2]);
- w1_t[3] = swap_workaround (salt_buf1[3]);
+ w0_t[0] = swap32 (salt_buf0[0]);
+ w0_t[1] = swap32 (salt_buf0[1]);
+ w0_t[2] = swap32 (salt_buf0[2]);
+ w0_t[3] = swap32 (salt_buf0[3]);
+ w1_t[0] = swap32 (salt_buf1[0]);
+ w1_t[1] = swap32 (salt_buf1[1]);
+ w1_t[2] = swap32 (salt_buf1[2]);
+ w1_t[3] = swap32 (salt_buf1[3]);
w2_t[0] = 0;
w2_t[1] = 0;
w2_t[2] = 0;
u32 w0_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
u32 w2_t[4];
hmac_sha512_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad);
- w0_t[0] = swap_workaround (salt_buf0[0]);
- w0_t[1] = swap_workaround (salt_buf0[1]);
- w0_t[2] = swap_workaround (salt_buf0[2]);
- w0_t[3] = swap_workaround (salt_buf0[3]);
- w1_t[0] = swap_workaround (salt_buf1[0]);
- w1_t[1] = swap_workaround (salt_buf1[1]);
- w1_t[2] = swap_workaround (salt_buf1[2]);
- w1_t[3] = swap_workaround (salt_buf1[3]);
+ w0_t[0] = swap32 (salt_buf0[0]);
+ w0_t[1] = swap32 (salt_buf0[1]);
+ w0_t[2] = swap32 (salt_buf0[2]);
+ w0_t[3] = swap32 (salt_buf0[3]);
+ w1_t[0] = swap32 (salt_buf1[0]);
+ w1_t[1] = swap32 (salt_buf1[1]);
+ w1_t[2] = swap32 (salt_buf1[2]);
+ w1_t[3] = swap32 (salt_buf1[3]);
w2_t[0] = 0;
w2_t[1] = 0;
w2_t[2] = 0;
u32 w0_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
u32 w2_t[4];
hmac_sha512_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad);
- w0_t[0] = swap_workaround (salt_buf0[0]);
- w0_t[1] = swap_workaround (salt_buf0[1]);
- w0_t[2] = swap_workaround (salt_buf0[2]);
- w0_t[3] = swap_workaround (salt_buf0[3]);
- w1_t[0] = swap_workaround (salt_buf1[0]);
- w1_t[1] = swap_workaround (salt_buf1[1]);
- w1_t[2] = swap_workaround (salt_buf1[2]);
- w1_t[3] = swap_workaround (salt_buf1[3]);
+ w0_t[0] = swap32 (salt_buf0[0]);
+ w0_t[1] = swap32 (salt_buf0[1]);
+ w0_t[2] = swap32 (salt_buf0[2]);
+ w0_t[3] = swap32 (salt_buf0[3]);
+ w1_t[0] = swap32 (salt_buf1[0]);
+ w1_t[1] = swap32 (salt_buf1[1]);
+ w1_t[2] = swap32 (salt_buf1[2]);
+ w1_t[3] = swap32 (salt_buf1[3]);
w2_t[0] = 0;
w2_t[1] = 0;
w2_t[2] = 0;
hmac_sha512_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad);
- w0_t[0] = swap_workaround (salt_buf0[0]);
- w0_t[1] = swap_workaround (salt_buf0[1]);
- w0_t[2] = swap_workaround (salt_buf0[2]);
- w0_t[3] = swap_workaround (salt_buf0[3]);
- w1_t[0] = swap_workaround (salt_buf1[0]);
- w1_t[1] = swap_workaround (salt_buf1[1]);
- w1_t[2] = swap_workaround (salt_buf1[2]);
- w1_t[3] = swap_workaround (salt_buf1[3]);
+ w0_t[0] = swap32 (salt_buf0[0]);
+ w0_t[1] = swap32 (salt_buf0[1]);
+ w0_t[2] = swap32 (salt_buf0[2]);
+ w0_t[3] = swap32 (salt_buf0[3]);
+ w1_t[0] = swap32 (salt_buf1[0]);
+ w1_t[1] = swap32 (salt_buf1[1]);
+ w1_t[2] = swap32 (salt_buf1[2]);
+ w1_t[3] = swap32 (salt_buf1[3]);
w2_t[0] = 0;
w2_t[1] = 0;
w2_t[2] = 0;
hmac_sha512_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad);
- w0_t[0] = swap_workaround (salt_buf0[0]);
- w0_t[1] = swap_workaround (salt_buf0[1]);
- w0_t[2] = swap_workaround (salt_buf0[2]);
- w0_t[3] = swap_workaround (salt_buf0[3]);
- w1_t[0] = swap_workaround (salt_buf1[0]);
- w1_t[1] = swap_workaround (salt_buf1[1]);
- w1_t[2] = swap_workaround (salt_buf1[2]);
- w1_t[3] = swap_workaround (salt_buf1[3]);
+ w0_t[0] = swap32 (salt_buf0[0]);
+ w0_t[1] = swap32 (salt_buf0[1]);
+ w0_t[2] = swap32 (salt_buf0[2]);
+ w0_t[3] = swap32 (salt_buf0[3]);
+ w1_t[0] = swap32 (salt_buf1[0]);
+ w1_t[1] = swap32 (salt_buf1[1]);
+ w1_t[2] = swap32 (salt_buf1[2]);
+ w1_t[3] = swap32 (salt_buf1[3]);
w2_t[0] = 0;
w2_t[1] = 0;
w2_t[2] = 0;
u32 w0_t[4];
- w0_t[0] = swap_workaround (salt_buf0[0]);
- w0_t[1] = swap_workaround (salt_buf0[1]);
- w0_t[2] = swap_workaround (salt_buf0[2]);
- w0_t[3] = swap_workaround (salt_buf0[3]);
+ w0_t[0] = swap32 (salt_buf0[0]);
+ w0_t[1] = swap32 (salt_buf0[1]);
+ w0_t[2] = swap32 (salt_buf0[2]);
+ w0_t[3] = swap32 (salt_buf0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (salt_buf1[0]);
- w1_t[1] = swap_workaround (salt_buf1[1]);
- w1_t[2] = swap_workaround (salt_buf1[2]);
- w1_t[3] = swap_workaround (salt_buf1[3]);
+ w1_t[0] = swap32 (salt_buf1[0]);
+ w1_t[1] = swap32 (salt_buf1[1]);
+ w1_t[2] = swap32 (salt_buf1[2]);
+ w1_t[3] = swap32 (salt_buf1[3]);
u32 w2_t[4];
append_0x80_2x4 (w0, w1, out_len);
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
w2_t[0] = 0;
w2_t[1] = 0;
w2_t[2] = 0;
u32 w0_t[4];
- w0_t[0] = swap_workaround (salt_buf0[0]);
- w0_t[1] = swap_workaround (salt_buf0[1]);
- w0_t[2] = swap_workaround (salt_buf0[2]);
- w0_t[3] = swap_workaround (salt_buf0[3]);
+ w0_t[0] = swap32 (salt_buf0[0]);
+ w0_t[1] = swap32 (salt_buf0[1]);
+ w0_t[2] = swap32 (salt_buf0[2]);
+ w0_t[3] = swap32 (salt_buf0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (salt_buf1[0]);
- w1_t[1] = swap_workaround (salt_buf1[1]);
- w1_t[2] = swap_workaround (salt_buf1[2]);
- w1_t[3] = swap_workaround (salt_buf1[3]);
+ w1_t[0] = swap32 (salt_buf1[0]);
+ w1_t[1] = swap32 (salt_buf1[1]);
+ w1_t[2] = swap32 (salt_buf1[2]);
+ w1_t[3] = swap32 (salt_buf1[3]);
u32 w2_t[4];
append_0x80_2x4 (w0, w1, out_len);
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
w2_t[0] = 0;
w2_t[1] = 0;
w2_t[2] = 0;
u32 w0_t[4];
- w0_t[0] = swap_workaround (salt_buf0[0]);
- w0_t[1] = swap_workaround (salt_buf0[1]);
- w0_t[2] = swap_workaround (salt_buf0[2]);
- w0_t[3] = swap_workaround (salt_buf0[3]);
+ w0_t[0] = swap32 (salt_buf0[0]);
+ w0_t[1] = swap32 (salt_buf0[1]);
+ w0_t[2] = swap32 (salt_buf0[2]);
+ w0_t[3] = swap32 (salt_buf0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (salt_buf1[0]);
- w1_t[1] = swap_workaround (salt_buf1[1]);
- w1_t[2] = swap_workaround (salt_buf1[2]);
- w1_t[3] = swap_workaround (salt_buf1[3]);
+ w1_t[0] = swap32 (salt_buf1[0]);
+ w1_t[1] = swap32 (salt_buf1[1]);
+ w1_t[2] = swap32 (salt_buf1[2]);
+ w1_t[3] = swap32 (salt_buf1[3]);
u32 w2_t[4];
append_0x80_4x4 (w0, w1, w2, w3, pw_len);
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
- w2_t[0] = swap_workaround (w2[0]);
- w2_t[1] = swap_workaround (w2[1]);
- w2_t[2] = swap_workaround (w2[2]);
- w2_t[3] = swap_workaround (w2[3]);
- w3_t[0] = swap_workaround (w3[0]);
- w3_t[1] = swap_workaround (w3[1]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
+ w2_t[0] = swap32 (w2[0]);
+ w2_t[1] = swap32 (w2[1]);
+ w2_t[2] = swap32 (w2[2]);
+ w2_t[3] = swap32 (w2[3]);
+ w3_t[0] = swap32 (w3[0]);
+ w3_t[1] = swap32 (w3[1]);
w3_t[2] = 0;
w3_t[3] = (128 + pw_len) * 8;
u32 w0_t[4];
- w0_t[0] = swap_workaround (salt_buf0[0]);
- w0_t[1] = swap_workaround (salt_buf0[1]);
- w0_t[2] = swap_workaround (salt_buf0[2]);
- w0_t[3] = swap_workaround (salt_buf0[3]);
+ w0_t[0] = swap32 (salt_buf0[0]);
+ w0_t[1] = swap32 (salt_buf0[1]);
+ w0_t[2] = swap32 (salt_buf0[2]);
+ w0_t[3] = swap32 (salt_buf0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (salt_buf1[0]);
- w1_t[1] = swap_workaround (salt_buf1[1]);
- w1_t[2] = swap_workaround (salt_buf1[2]);
- w1_t[3] = swap_workaround (salt_buf1[3]);
+ w1_t[0] = swap32 (salt_buf1[0]);
+ w1_t[1] = swap32 (salt_buf1[1]);
+ w1_t[2] = swap32 (salt_buf1[2]);
+ w1_t[3] = swap32 (salt_buf1[3]);
u32 w2_t[4];
append_0x80_4x4 (w0, w1, w2, w3, pw_len);
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
- w2_t[0] = swap_workaround (w2[0]);
- w2_t[1] = swap_workaround (w2[1]);
- w2_t[2] = swap_workaround (w2[2]);
- w2_t[3] = swap_workaround (w2[3]);
- w3_t[0] = swap_workaround (w3[0]);
- w3_t[1] = swap_workaround (w3[1]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
+ w2_t[0] = swap32 (w2[0]);
+ w2_t[1] = swap32 (w2[1]);
+ w2_t[2] = swap32 (w2[2]);
+ w2_t[3] = swap32 (w2[3]);
+ w3_t[0] = swap32 (w3[0]);
+ w3_t[1] = swap32 (w3[1]);
w3_t[2] = 0;
w3_t[3] = (128 + pw_len) * 8;
u32 w0_t[4];
- w0_t[0] = swap_workaround (salt_buf0[0]);
- w0_t[1] = swap_workaround (salt_buf0[1]);
- w0_t[2] = swap_workaround (salt_buf0[2]);
- w0_t[3] = swap_workaround (salt_buf0[3]);
+ w0_t[0] = swap32 (salt_buf0[0]);
+ w0_t[1] = swap32 (salt_buf0[1]);
+ w0_t[2] = swap32 (salt_buf0[2]);
+ w0_t[3] = swap32 (salt_buf0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (salt_buf1[0]);
- w1_t[1] = swap_workaround (salt_buf1[1]);
- w1_t[2] = swap_workaround (salt_buf1[2]);
- w1_t[3] = swap_workaround (salt_buf1[3]);
+ w1_t[0] = swap32 (salt_buf1[0]);
+ w1_t[1] = swap32 (salt_buf1[1]);
+ w1_t[2] = swap32 (salt_buf1[2]);
+ w1_t[3] = swap32 (salt_buf1[3]);
u32 w2_t[4];
u32 w0_t[4];
- w0_t[0] = swap_workaround (salt_buf0[0]);
- w0_t[1] = swap_workaround (salt_buf0[1]);
- w0_t[2] = swap_workaround (salt_buf0[2]);
- w0_t[3] = swap_workaround (salt_buf0[3]);
+ w0_t[0] = swap32 (salt_buf0[0]);
+ w0_t[1] = swap32 (salt_buf0[1]);
+ w0_t[2] = swap32 (salt_buf0[2]);
+ w0_t[3] = swap32 (salt_buf0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (salt_buf1[0]);
- w1_t[1] = swap_workaround (salt_buf1[1]);
- w1_t[2] = swap_workaround (salt_buf1[2]);
- w1_t[3] = swap_workaround (salt_buf1[3]);
+ w1_t[0] = swap32 (salt_buf1[0]);
+ w1_t[1] = swap32 (salt_buf1[1]);
+ w1_t[2] = swap32 (salt_buf1[2]);
+ w1_t[3] = swap32 (salt_buf1[3]);
u32 w2_t[4];
u64 pw[2];
- pw[0] = swap_workaround (hl32_to_64 (w0[1], w0[0]));
- pw[1] = swap_workaround (hl32_to_64 (w0[3], w0[2]));
+ pw[0] = swap32 (hl32_to_64 (w0[1], w0[0]));
+ pw[1] = swap32 (hl32_to_64 (w0[3], w0[2]));
u64 salt[2];
- salt[0] = swap_workaround (hl32_to_64 (salt_buf[1], salt_buf[0]));
- salt[1] = swap_workaround (hl32_to_64 (salt_buf[3], salt_buf[2]));
+ salt[0] = swap32 (hl32_to_64 (salt_buf[1], salt_buf[0]));
+ salt[1] = swap32 (hl32_to_64 (salt_buf[3], salt_buf[2]));
/**
* begin
const u32 lid = get_local_id (0);
- const u64 a = swap_workaround (tmps[gid].l_alt_result[0]);
- const u64 b = swap_workaround (tmps[gid].l_alt_result[1]);
+ const u64 a = swap32 (tmps[gid].l_alt_result[0]);
+ const u64 b = swap32 (tmps[gid].l_alt_result[1]);
const u32 r0 = l32_from_64 (a);
const u32 r1 = h32_from_64 (a);
* pads
*/
- w0[0] = swap_workaround (digest_md4[0]);
- w0[1] = swap_workaround (digest_md4[1]);
- w0[2] = swap_workaround (digest_md4[2]);
- w0[3] = swap_workaround (digest_md4[3]);
+ w0[0] = swap32 (digest_md4[0]);
+ w0[1] = swap32 (digest_md4[1]);
+ w0[2] = swap32 (digest_md4[2]);
+ w0[3] = swap32 (digest_md4[3]);
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
append_0x01_4x4 (w0, w1, w2, w3, salt_len + 3);
append_0x80_4x4 (w0, w1, w2, w3, salt_len + 4);
- w0[0] = swap_workaround (w0[0]);
- w0[1] = swap_workaround (w0[1]);
- w0[2] = swap_workaround (w0[2]);
- w0[3] = swap_workaround (w0[3]);
- w1[0] = swap_workaround (w1[0]);
- w1[1] = swap_workaround (w1[1]);
- w1[2] = swap_workaround (w1[2]);
- w1[3] = swap_workaround (w1[3]);
- w2[0] = swap_workaround (w2[0]);
- w2[1] = swap_workaround (w2[1]);
- w2[2] = swap_workaround (w2[2]);
- w2[3] = swap_workaround (w2[3]);
- w3[0] = swap_workaround (w3[0]);
- w3[1] = swap_workaround (w3[1]);
+ w0[0] = swap32 (w0[0]);
+ w0[1] = swap32 (w0[1]);
+ w0[2] = swap32 (w0[2]);
+ w0[3] = swap32 (w0[3]);
+ w1[0] = swap32 (w1[0]);
+ w1[1] = swap32 (w1[1]);
+ w1[2] = swap32 (w1[2]);
+ w1[3] = swap32 (w1[3]);
+ w2[0] = swap32 (w2[0]);
+ w2[1] = swap32 (w2[1]);
+ w2[2] = swap32 (w2[2]);
+ w2[3] = swap32 (w2[3]);
+ w3[0] = swap32 (w3[0]);
+ w3[1] = swap32 (w3[1]);
u32 digest[5];
#include "types_ocl.c"
#include "common.c"
-#ifdef VECT_SIZE1
-#define COMPARE_M "check_multi_vect1_comp4.c"
-#endif
-
-#ifdef VECT_SIZE2
-#define COMPARE_M "check_multi_vect2_comp4.c"
-#endif
-
-#ifdef VECT_SIZE4
-#define COMPARE_M "check_multi_vect4_comp4.c"
-#endif
+#define COMPARE_S "check_single_comp4.c"
+#define COMPARE_M "check_multi_comp4.c"
static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4])
{
* pads
*/
- w0[0] = swap_workaround (w0[0]);
- w0[1] = swap_workaround (w0[1]);
- w0[2] = swap_workaround (w0[2]);
- w0[3] = swap_workaround (w0[3]);
- w1[0] = swap_workaround (w1[0]);
- w1[1] = swap_workaround (w1[1]);
- w1[2] = swap_workaround (w1[2]);
- w1[3] = swap_workaround (w1[3]);
- w2[0] = swap_workaround (w2[0]);
- w2[1] = swap_workaround (w2[1]);
- w2[2] = swap_workaround (w2[2]);
- w2[3] = swap_workaround (w2[3]);
- w3[0] = swap_workaround (w3[0]);
- w3[1] = swap_workaround (w3[1]);
- w3[2] = swap_workaround (w3[2]);
- w3[3] = swap_workaround (w3[3]);
+ w0[0] = swap32 (w0[0]);
+ w0[1] = swap32 (w0[1]);
+ w0[2] = swap32 (w0[2]);
+ w0[3] = swap32 (w0[3]);
+ w1[0] = swap32 (w1[0]);
+ w1[1] = swap32 (w1[1]);
+ w1[2] = swap32 (w1[2]);
+ w1[3] = swap32 (w1[3]);
+ w2[0] = swap32 (w2[0]);
+ w2[1] = swap32 (w2[1]);
+ w2[2] = swap32 (w2[2]);
+ w2[3] = swap32 (w2[3]);
+ w3[0] = swap32 (w3[0]);
+ w3[1] = swap32 (w3[1]);
+ w3[2] = swap32 (w3[2]);
+ w3[3] = swap32 (w3[3]);
u32 ipad[5];
u32 opad[5];
append_0x80_3x4 (w0, w1, w2, salt_len + 4);
- w0[0] = swap_workaround (w0[0]);
- w0[1] = swap_workaround (w0[1]);
- w0[2] = swap_workaround (w0[2]);
- w0[3] = swap_workaround (w0[3]);
- w1[0] = swap_workaround (w1[0]);
- w1[1] = swap_workaround (w1[1]);
- w1[2] = swap_workaround (w1[2]);
- w1[3] = swap_workaround (w1[3]);
- w2[0] = swap_workaround (w2[0]);
- w2[1] = swap_workaround (w2[1]);
+ w0[0] = swap32 (w0[0]);
+ w0[1] = swap32 (w0[1]);
+ w0[2] = swap32 (w0[2]);
+ w0[3] = swap32 (w0[3]);
+ w1[0] = swap32 (w1[0]);
+ w1[1] = swap32 (w1[1]);
+ w1[2] = swap32 (w1[2]);
+ w1[3] = swap32 (w1[3]);
+ w2[0] = swap32 (w2[0]);
+ w2[1] = swap32 (w2[1]);
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
hmac_sha1_run (w0, w1, w2, w3, ipad, opad, digest);
{
- w0[0] = swap_workaround (digest[0]);
- w0[1] = swap_workaround (digest[1]);
- w0[2] = swap_workaround (digest[2]);
- w0[3] = swap_workaround (digest[3]);
+ w0[0] = swap32 (digest[0]);
+ w0[1] = swap32 (digest[1]);
+ w0[2] = swap32 (digest[2]);
+ w0[3] = swap32 (digest[3]);
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
expand_key (E, w, pw_len);
- E[ 0] = swap_workaround (E[ 0]);
- E[ 1] = swap_workaround (E[ 1]);
- E[ 2] = swap_workaround (E[ 2]);
- E[ 3] = swap_workaround (E[ 3]);
- E[ 4] = swap_workaround (E[ 4]);
- E[ 5] = swap_workaround (E[ 5]);
- E[ 6] = swap_workaround (E[ 6]);
- E[ 7] = swap_workaround (E[ 7]);
- E[ 8] = swap_workaround (E[ 8]);
- E[ 9] = swap_workaround (E[ 9]);
- E[10] = swap_workaround (E[10]);
- E[11] = swap_workaround (E[11]);
- E[12] = swap_workaround (E[12]);
- E[13] = swap_workaround (E[13]);
- E[14] = swap_workaround (E[14]);
- E[15] = swap_workaround (E[15]);
- E[16] = swap_workaround (E[16]);
- E[17] = swap_workaround (E[17]);
+ E[ 0] = swap32 (E[ 0]);
+ E[ 1] = swap32 (E[ 1]);
+ E[ 2] = swap32 (E[ 2]);
+ E[ 3] = swap32 (E[ 3]);
+ E[ 4] = swap32 (E[ 4]);
+ E[ 5] = swap32 (E[ 5]);
+ E[ 6] = swap32 (E[ 6]);
+ E[ 7] = swap32 (E[ 7]);
+ E[ 8] = swap32 (E[ 8]);
+ E[ 9] = swap32 (E[ 9]);
+ E[10] = swap32 (E[10]);
+ E[11] = swap32 (E[11]);
+ E[12] = swap32 (E[12]);
+ E[13] = swap32 (E[13]);
+ E[14] = swap32 (E[14]);
+ E[15] = swap32 (E[15]);
+ E[16] = swap32 (E[16]);
+ E[17] = swap32 (E[17]);
/**
* salt
expand_key (E, w, pw_len);
- E[ 0] = swap_workaround (E[ 0]);
- E[ 1] = swap_workaround (E[ 1]);
- E[ 2] = swap_workaround (E[ 2]);
- E[ 3] = swap_workaround (E[ 3]);
- E[ 4] = swap_workaround (E[ 4]);
- E[ 5] = swap_workaround (E[ 5]);
- E[ 6] = swap_workaround (E[ 6]);
- E[ 7] = swap_workaround (E[ 7]);
- E[ 8] = swap_workaround (E[ 8]);
- E[ 9] = swap_workaround (E[ 9]);
- E[10] = swap_workaround (E[10]);
- E[11] = swap_workaround (E[11]);
- E[12] = swap_workaround (E[12]);
- E[13] = swap_workaround (E[13]);
- E[14] = swap_workaround (E[14]);
- E[15] = swap_workaround (E[15]);
- E[16] = swap_workaround (E[16]);
- E[17] = swap_workaround (E[17]);
+ E[ 0] = swap32 (E[ 0]);
+ E[ 1] = swap32 (E[ 1]);
+ E[ 2] = swap32 (E[ 2]);
+ E[ 3] = swap32 (E[ 3]);
+ E[ 4] = swap32 (E[ 4]);
+ E[ 5] = swap32 (E[ 5]);
+ E[ 6] = swap32 (E[ 6]);
+ E[ 7] = swap32 (E[ 7]);
+ E[ 8] = swap32 (E[ 8]);
+ E[ 9] = swap32 (E[ 9]);
+ E[10] = swap32 (E[10]);
+ E[11] = swap32 (E[11]);
+ E[12] = swap32 (E[12]);
+ E[13] = swap32 (E[13]);
+ E[14] = swap32 (E[14]);
+ E[15] = swap32 (E[15]);
+ E[16] = swap32 (E[16]);
+ E[17] = swap32 (E[17]);
// load
* sha1
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = out_len * 8;
* sha1
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = out_len * 8;
* sha1
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = pw_len * 8;
* sha1
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = pw_len * 8;
* sha1
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = out_len * 8;
* sha1
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = out_len * 8;
* sha1
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = pw_len * 8;
* sha1
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = pw_len * 8;
append_0x80_4x4 (w0_t, w1_t, w2_t, w3_t, pw_salt_len);
- u32 w0 = swap_workaround (w0_t[0]);
- u32 w1 = swap_workaround (w0_t[1]);
- u32 w2 = swap_workaround (w0_t[2]);
- u32 w3 = swap_workaround (w0_t[3]);
- u32 w4 = swap_workaround (w1_t[0]);
- u32 w5 = swap_workaround (w1_t[1]);
- u32 w6 = swap_workaround (w1_t[2]);
- u32 w7 = swap_workaround (w1_t[3]);
- u32 w8 = swap_workaround (w2_t[0]);
- u32 w9 = swap_workaround (w2_t[1]);
- u32 wa = swap_workaround (w2_t[2]);
- u32 wb = swap_workaround (w2_t[3]);
- u32 wc = swap_workaround (w3_t[0]);
- u32 wd = swap_workaround (w3_t[1]);
+ u32 w0 = swap32 (w0_t[0]);
+ u32 w1 = swap32 (w0_t[1]);
+ u32 w2 = swap32 (w0_t[2]);
+ u32 w3 = swap32 (w0_t[3]);
+ u32 w4 = swap32 (w1_t[0]);
+ u32 w5 = swap32 (w1_t[1]);
+ u32 w6 = swap32 (w1_t[2]);
+ u32 w7 = swap32 (w1_t[3]);
+ u32 w8 = swap32 (w2_t[0]);
+ u32 w9 = swap32 (w2_t[1]);
+ u32 wa = swap32 (w2_t[2]);
+ u32 wb = swap32 (w2_t[3]);
+ u32 wc = swap32 (w3_t[0]);
+ u32 wd = swap32 (w3_t[1]);
u32 we = 0;
u32 wf = pw_salt_len * 8;
append_0x80_4x4 (w0_t, w1_t, w2_t, w3_t, pw_salt_len);
- u32 w0 = swap_workaround (w0_t[0]);
- u32 w1 = swap_workaround (w0_t[1]);
- u32 w2 = swap_workaround (w0_t[2]);
- u32 w3 = swap_workaround (w0_t[3]);
- u32 w4 = swap_workaround (w1_t[0]);
- u32 w5 = swap_workaround (w1_t[1]);
- u32 w6 = swap_workaround (w1_t[2]);
- u32 w7 = swap_workaround (w1_t[3]);
- u32 w8 = swap_workaround (w2_t[0]);
- u32 w9 = swap_workaround (w2_t[1]);
- u32 wa = swap_workaround (w2_t[2]);
- u32 wb = swap_workaround (w2_t[3]);
- u32 wc = swap_workaround (w3_t[0]);
- u32 wd = swap_workaround (w3_t[1]);
+ u32 w0 = swap32 (w0_t[0]);
+ u32 w1 = swap32 (w0_t[1]);
+ u32 w2 = swap32 (w0_t[2]);
+ u32 w3 = swap32 (w0_t[3]);
+ u32 w4 = swap32 (w1_t[0]);
+ u32 w5 = swap32 (w1_t[1]);
+ u32 w6 = swap32 (w1_t[2]);
+ u32 w7 = swap32 (w1_t[3]);
+ u32 w8 = swap32 (w2_t[0]);
+ u32 w9 = swap32 (w2_t[1]);
+ u32 wa = swap32 (w2_t[2]);
+ u32 wb = swap32 (w2_t[3]);
+ u32 wc = swap32 (w3_t[0]);
+ u32 wd = swap32 (w3_t[1]);
u32 we = 0;
u32 wf = pw_salt_len * 8;
append_0x80_4x4 (w0_t, w1_t, w2_t, w3_t, pw_salt_len);
- u32 w0 = swap_workaround (w0_t[0]);
- u32 w1 = swap_workaround (w0_t[1]);
- u32 w2 = swap_workaround (w0_t[2]);
- u32 w3 = swap_workaround (w0_t[3]);
- u32 w4 = swap_workaround (w1_t[0]);
- u32 w5 = swap_workaround (w1_t[1]);
- u32 w6 = swap_workaround (w1_t[2]);
- u32 w7 = swap_workaround (w1_t[3]);
- u32 w8 = swap_workaround (w2_t[0]);
- u32 w9 = swap_workaround (w2_t[1]);
- u32 wa = swap_workaround (w2_t[2]);
- u32 wb = swap_workaround (w2_t[3]);
- u32 wc = swap_workaround (w3_t[0]);
- u32 wd = swap_workaround (w3_t[1]);
+ u32 w0 = swap32 (w0_t[0]);
+ u32 w1 = swap32 (w0_t[1]);
+ u32 w2 = swap32 (w0_t[2]);
+ u32 w3 = swap32 (w0_t[3]);
+ u32 w4 = swap32 (w1_t[0]);
+ u32 w5 = swap32 (w1_t[1]);
+ u32 w6 = swap32 (w1_t[2]);
+ u32 w7 = swap32 (w1_t[3]);
+ u32 w8 = swap32 (w2_t[0]);
+ u32 w9 = swap32 (w2_t[1]);
+ u32 wa = swap32 (w2_t[2]);
+ u32 wb = swap32 (w2_t[3]);
+ u32 wc = swap32 (w3_t[0]);
+ u32 wd = swap32 (w3_t[1]);
u32 we = 0;
u32 wf = pw_salt_len * 8;
append_0x80_4x4 (w0_t, w1_t, w2_t, w3_t, pw_salt_len);
- u32 w0 = swap_workaround (w0_t[0]);
- u32 w1 = swap_workaround (w0_t[1]);
- u32 w2 = swap_workaround (w0_t[2]);
- u32 w3 = swap_workaround (w0_t[3]);
- u32 w4 = swap_workaround (w1_t[0]);
- u32 w5 = swap_workaround (w1_t[1]);
- u32 w6 = swap_workaround (w1_t[2]);
- u32 w7 = swap_workaround (w1_t[3]);
- u32 w8 = swap_workaround (w2_t[0]);
- u32 w9 = swap_workaround (w2_t[1]);
- u32 wa = swap_workaround (w2_t[2]);
- u32 wb = swap_workaround (w2_t[3]);
- u32 wc = swap_workaround (w3_t[0]);
- u32 wd = swap_workaround (w3_t[1]);
+ u32 w0 = swap32 (w0_t[0]);
+ u32 w1 = swap32 (w0_t[1]);
+ u32 w2 = swap32 (w0_t[2]);
+ u32 w3 = swap32 (w0_t[3]);
+ u32 w4 = swap32 (w1_t[0]);
+ u32 w5 = swap32 (w1_t[1]);
+ u32 w6 = swap32 (w1_t[2]);
+ u32 w7 = swap32 (w1_t[3]);
+ u32 w8 = swap32 (w2_t[0]);
+ u32 w9 = swap32 (w2_t[1]);
+ u32 wa = swap32 (w2_t[2]);
+ u32 wb = swap32 (w2_t[3]);
+ u32 wc = swap32 (w3_t[0]);
+ u32 wd = swap32 (w3_t[1]);
u32 we = 0;
u32 wf = pw_salt_len * 8;
w3_t[1] |= salt_buf3[1];
w3_t[2] |= salt_buf3[2];
- u32 w0 = swap_workaround (w0_t[0]);
- u32 w1 = swap_workaround (w0_t[1]);
- u32 w2 = swap_workaround (w0_t[2]);
- u32 w3 = swap_workaround (w0_t[3]);
- u32 w4 = swap_workaround (w1_t[0]);
- u32 w5 = swap_workaround (w1_t[1]);
- u32 w6 = swap_workaround (w1_t[2]);
- u32 w7 = swap_workaround (w1_t[3]);
- u32 w8 = swap_workaround (w2_t[0]);
- u32 w9 = swap_workaround (w2_t[1]);
- u32 wa = swap_workaround (w2_t[2]);
- u32 wb = swap_workaround (w2_t[3]);
- u32 wc = swap_workaround (w3_t[0]);
- u32 wd = swap_workaround (w3_t[1]);
- u32 we = swap_workaround (w3_t[2]);
+ u32 w0 = swap32 (w0_t[0]);
+ u32 w1 = swap32 (w0_t[1]);
+ u32 w2 = swap32 (w0_t[2]);
+ u32 w3 = swap32 (w0_t[3]);
+ u32 w4 = swap32 (w1_t[0]);
+ u32 w5 = swap32 (w1_t[1]);
+ u32 w6 = swap32 (w1_t[2]);
+ u32 w7 = swap32 (w1_t[3]);
+ u32 w8 = swap32 (w2_t[0]);
+ u32 w9 = swap32 (w2_t[1]);
+ u32 wa = swap32 (w2_t[2]);
+ u32 wb = swap32 (w2_t[3]);
+ u32 wc = swap32 (w3_t[0]);
+ u32 wd = swap32 (w3_t[1]);
+ u32 we = swap32 (w3_t[2]);
u32 wf = pw_salt_len * 8;
/**
w3_t[1] |= salt_buf3[1];
w3_t[2] |= salt_buf3[2];
- u32 w0 = swap_workaround (w0_t[0]);
- u32 w1 = swap_workaround (w0_t[1]);
- u32 w2 = swap_workaround (w0_t[2]);
- u32 w3 = swap_workaround (w0_t[3]);
- u32 w4 = swap_workaround (w1_t[0]);
- u32 w5 = swap_workaround (w1_t[1]);
- u32 w6 = swap_workaround (w1_t[2]);
- u32 w7 = swap_workaround (w1_t[3]);
- u32 w8 = swap_workaround (w2_t[0]);
- u32 w9 = swap_workaround (w2_t[1]);
- u32 wa = swap_workaround (w2_t[2]);
- u32 wb = swap_workaround (w2_t[3]);
- u32 wc = swap_workaround (w3_t[0]);
- u32 wd = swap_workaround (w3_t[1]);
- u32 we = swap_workaround (w3_t[2]);
+ u32 w0 = swap32 (w0_t[0]);
+ u32 w1 = swap32 (w0_t[1]);
+ u32 w2 = swap32 (w0_t[2]);
+ u32 w3 = swap32 (w0_t[3]);
+ u32 w4 = swap32 (w1_t[0]);
+ u32 w5 = swap32 (w1_t[1]);
+ u32 w6 = swap32 (w1_t[2]);
+ u32 w7 = swap32 (w1_t[3]);
+ u32 w8 = swap32 (w2_t[0]);
+ u32 w9 = swap32 (w2_t[1]);
+ u32 wa = swap32 (w2_t[2]);
+ u32 wb = swap32 (w2_t[3]);
+ u32 wc = swap32 (w3_t[0]);
+ u32 wd = swap32 (w3_t[1]);
+ u32 we = swap32 (w3_t[2]);
u32 wf = pw_salt_len * 8;
/**
#include "types_ocl.c"
#include "common.c"
-#ifdef VECT_SIZE1
-#define COMPARE_M "check_multi_vect1_comp4.c"
-#endif
-
-#ifdef VECT_SIZE2
-#define COMPARE_M "check_multi_vect2_comp4.c"
-#endif
-
-#ifdef VECT_SIZE4
-#define COMPARE_M "check_multi_vect4_comp4.c"
-#endif
+#define COMPARE_S "check_single_comp4.c"
+#define COMPARE_M "check_multi_comp4.c"
__constant u32 k_sha256[64] =
{
* init
*/
- w0[0] = swap_workaround (w0[0]);
- w0[1] = swap_workaround (w0[1]);
- w0[2] = swap_workaround (w0[2]);
- w0[3] = swap_workaround (w0[3]);
+ w0[0] = swap32 (w0[0]);
+ w0[1] = swap32 (w0[1]);
+ w0[2] = swap32 (w0[2]);
+ w0[3] = swap32 (w0[3]);
- w1[0] = swap_workaround (w1[0]);
- w1[1] = swap_workaround (w1[1]);
- w1[2] = swap_workaround (w1[2]);
- w1[3] = swap_workaround (w1[3]);
+ w1[0] = swap32 (w1[0]);
+ w1[1] = swap32 (w1[1]);
+ w1[2] = swap32 (w1[2]);
+ w1[3] = swap32 (w1[3]);
- w2[0] = swap_workaround (w2[0]);
- w2[1] = swap_workaround (w2[1]);
- w2[2] = swap_workaround (w2[2]);
- w2[3] = swap_workaround (w2[3]);
+ w2[0] = swap32 (w2[0]);
+ w2[1] = swap32 (w2[1]);
+ w2[2] = swap32 (w2[2]);
+ w2[3] = swap32 (w2[3]);
- w3[0] = swap_workaround (w3[0]);
- w3[1] = swap_workaround (w3[1]);
+ w3[0] = swap32 (w3[0]);
+ w3[1] = swap32 (w3[1]);
w3[2] = 0;
w3[3] = block_len * 8;
u32 salt_buf0[4];
- salt_buf0[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 0]);
- salt_buf0[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 1]);
- salt_buf0[2] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 2]);
- salt_buf0[3] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 3]);
+ salt_buf0[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 0]);
+ salt_buf0[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 1]);
+ salt_buf0[2] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 2]);
+ salt_buf0[3] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 3]);
u32 salt_buf1[4];
- salt_buf1[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 4]);
- salt_buf1[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 5]);
- salt_buf1[2] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 6]);
- salt_buf1[3] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 7]);
+ salt_buf1[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 4]);
+ salt_buf1[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 5]);
+ salt_buf1[2] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 6]);
+ salt_buf1[3] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 7]);
u32 salt_buf2[4];
- salt_buf2[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 8]);
- salt_buf2[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 9]);
- salt_buf2[2] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[10]);
- salt_buf2[3] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[11]);
+ salt_buf2[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 8]);
+ salt_buf2[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 9]);
+ salt_buf2[2] = swap32 (ikepsk_bufs[salt_pos].nr_buf[10]);
+ salt_buf2[3] = swap32 (ikepsk_bufs[salt_pos].nr_buf[11]);
u32 salt_buf3[4];
- salt_buf3[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[12]);
- salt_buf3[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[13]);
+ salt_buf3[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[12]);
+ salt_buf3[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[13]);
salt_buf3[2] = 0;
salt_buf3[3] = 0;
const u32 lid2 = lid * 2;
- s_msg_buf[lid2 + 0] = swap_workaround (ikepsk_bufs[salt_pos].msg_buf[lid2 + 0]);
- s_msg_buf[lid2 + 1] = swap_workaround (ikepsk_bufs[salt_pos].msg_buf[lid2 + 1]);
+ s_msg_buf[lid2 + 0] = swap32 (ikepsk_bufs[salt_pos].msg_buf[lid2 + 0]);
+ s_msg_buf[lid2 + 1] = swap32 (ikepsk_bufs[salt_pos].msg_buf[lid2 + 1]);
barrier (CLK_LOCAL_MEM_FENCE);
u32 w0_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
u32 w2_t[4];
u32 salt_buf0[4];
- salt_buf0[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 0]);
- salt_buf0[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 1]);
- salt_buf0[2] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 2]);
- salt_buf0[3] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 3]);
+ salt_buf0[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 0]);
+ salt_buf0[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 1]);
+ salt_buf0[2] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 2]);
+ salt_buf0[3] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 3]);
u32 salt_buf1[4];
- salt_buf1[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 4]);
- salt_buf1[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 5]);
- salt_buf1[2] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 6]);
- salt_buf1[3] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 7]);
+ salt_buf1[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 4]);
+ salt_buf1[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 5]);
+ salt_buf1[2] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 6]);
+ salt_buf1[3] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 7]);
u32 salt_buf2[4];
- salt_buf2[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 8]);
- salt_buf2[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 9]);
- salt_buf2[2] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[10]);
- salt_buf2[3] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[11]);
+ salt_buf2[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 8]);
+ salt_buf2[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 9]);
+ salt_buf2[2] = swap32 (ikepsk_bufs[salt_pos].nr_buf[10]);
+ salt_buf2[3] = swap32 (ikepsk_bufs[salt_pos].nr_buf[11]);
u32 salt_buf3[4];
- salt_buf3[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[12]);
- salt_buf3[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[13]);
+ salt_buf3[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[12]);
+ salt_buf3[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[13]);
salt_buf3[2] = 0;
salt_buf3[3] = 0;
const u32 lid2 = lid * 2;
- s_msg_buf[lid2 + 0] = swap_workaround (ikepsk_bufs[salt_pos].msg_buf[lid2 + 0]);
- s_msg_buf[lid2 + 1] = swap_workaround (ikepsk_bufs[salt_pos].msg_buf[lid2 + 1]);
+ s_msg_buf[lid2 + 0] = swap32 (ikepsk_bufs[salt_pos].msg_buf[lid2 + 0]);
+ s_msg_buf[lid2 + 1] = swap32 (ikepsk_bufs[salt_pos].msg_buf[lid2 + 1]);
barrier (CLK_LOCAL_MEM_FENCE);
u32 w0_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
u32 w2_t[4];
u32 salt_buf0[4];
- salt_buf0[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 0]);
- salt_buf0[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 1]);
- salt_buf0[2] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 2]);
- salt_buf0[3] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 3]);
+ salt_buf0[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 0]);
+ salt_buf0[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 1]);
+ salt_buf0[2] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 2]);
+ salt_buf0[3] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 3]);
u32 salt_buf1[4];
- salt_buf1[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 4]);
- salt_buf1[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 5]);
- salt_buf1[2] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 6]);
- salt_buf1[3] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 7]);
+ salt_buf1[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 4]);
+ salt_buf1[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 5]);
+ salt_buf1[2] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 6]);
+ salt_buf1[3] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 7]);
u32 salt_buf2[4];
- salt_buf2[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 8]);
- salt_buf2[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 9]);
- salt_buf2[2] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[10]);
- salt_buf2[3] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[11]);
+ salt_buf2[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 8]);
+ salt_buf2[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 9]);
+ salt_buf2[2] = swap32 (ikepsk_bufs[salt_pos].nr_buf[10]);
+ salt_buf2[3] = swap32 (ikepsk_bufs[salt_pos].nr_buf[11]);
u32 salt_buf3[4];
- salt_buf3[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[12]);
- salt_buf3[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[13]);
+ salt_buf3[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[12]);
+ salt_buf3[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[13]);
salt_buf3[2] = 0;
salt_buf3[3] = 0;
const u32 lid2 = lid * 2;
- s_msg_buf[lid2 + 0] = swap_workaround (ikepsk_bufs[salt_pos].msg_buf[lid2 + 0]);
- s_msg_buf[lid2 + 1] = swap_workaround (ikepsk_bufs[salt_pos].msg_buf[lid2 + 1]);
+ s_msg_buf[lid2 + 0] = swap32 (ikepsk_bufs[salt_pos].msg_buf[lid2 + 0]);
+ s_msg_buf[lid2 + 1] = swap32 (ikepsk_bufs[salt_pos].msg_buf[lid2 + 1]);
barrier (CLK_LOCAL_MEM_FENCE);
u32 w0_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
u32 w2_t[4];
u32 salt_buf0[4];
- salt_buf0[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 0]);
- salt_buf0[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 1]);
- salt_buf0[2] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 2]);
- salt_buf0[3] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 3]);
+ salt_buf0[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 0]);
+ salt_buf0[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 1]);
+ salt_buf0[2] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 2]);
+ salt_buf0[3] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 3]);
u32 salt_buf1[4];
- salt_buf1[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 4]);
- salt_buf1[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 5]);
- salt_buf1[2] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 6]);
- salt_buf1[3] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 7]);
+ salt_buf1[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 4]);
+ salt_buf1[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 5]);
+ salt_buf1[2] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 6]);
+ salt_buf1[3] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 7]);
u32 salt_buf2[4];
- salt_buf2[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 8]);
- salt_buf2[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 9]);
- salt_buf2[2] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[10]);
- salt_buf2[3] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[11]);
+ salt_buf2[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 8]);
+ salt_buf2[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 9]);
+ salt_buf2[2] = swap32 (ikepsk_bufs[salt_pos].nr_buf[10]);
+ salt_buf2[3] = swap32 (ikepsk_bufs[salt_pos].nr_buf[11]);
u32 salt_buf3[4];
- salt_buf3[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[12]);
- salt_buf3[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[13]);
+ salt_buf3[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[12]);
+ salt_buf3[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[13]);
salt_buf3[2] = 0;
salt_buf3[3] = 0;
const u32 lid2 = lid * 2;
- s_msg_buf[lid2 + 0] = swap_workaround (ikepsk_bufs[salt_pos].msg_buf[lid2 + 0]);
- s_msg_buf[lid2 + 1] = swap_workaround (ikepsk_bufs[salt_pos].msg_buf[lid2 + 1]);
+ s_msg_buf[lid2 + 0] = swap32 (ikepsk_bufs[salt_pos].msg_buf[lid2 + 0]);
+ s_msg_buf[lid2 + 1] = swap32 (ikepsk_bufs[salt_pos].msg_buf[lid2 + 1]);
barrier (CLK_LOCAL_MEM_FENCE);
u32 w0_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
u32 w2_t[4];
u32 salt_buf0[4];
- salt_buf0[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 0]);
- salt_buf0[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 1]);
- salt_buf0[2] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 2]);
- salt_buf0[3] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 3]);
+ salt_buf0[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 0]);
+ salt_buf0[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 1]);
+ salt_buf0[2] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 2]);
+ salt_buf0[3] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 3]);
u32 salt_buf1[4];
- salt_buf1[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 4]);
- salt_buf1[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 5]);
- salt_buf1[2] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 6]);
- salt_buf1[3] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 7]);
+ salt_buf1[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 4]);
+ salt_buf1[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 5]);
+ salt_buf1[2] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 6]);
+ salt_buf1[3] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 7]);
u32 salt_buf2[4];
- salt_buf2[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 8]);
- salt_buf2[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 9]);
- salt_buf2[2] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[10]);
- salt_buf2[3] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[11]);
+ salt_buf2[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 8]);
+ salt_buf2[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 9]);
+ salt_buf2[2] = swap32 (ikepsk_bufs[salt_pos].nr_buf[10]);
+ salt_buf2[3] = swap32 (ikepsk_bufs[salt_pos].nr_buf[11]);
u32 salt_buf3[4];
- salt_buf3[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[12]);
- salt_buf3[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[13]);
+ salt_buf3[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[12]);
+ salt_buf3[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[13]);
salt_buf3[2] = 0;
salt_buf3[3] = 0;
u32 salt_buf0[4];
- salt_buf0[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 0]);
- salt_buf0[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 1]);
- salt_buf0[2] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 2]);
- salt_buf0[3] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 3]);
+ salt_buf0[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 0]);
+ salt_buf0[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 1]);
+ salt_buf0[2] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 2]);
+ salt_buf0[3] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 3]);
u32 salt_buf1[4];
- salt_buf1[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 4]);
- salt_buf1[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 5]);
- salt_buf1[2] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 6]);
- salt_buf1[3] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 7]);
+ salt_buf1[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 4]);
+ salt_buf1[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 5]);
+ salt_buf1[2] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 6]);
+ salt_buf1[3] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 7]);
u32 salt_buf2[4];
- salt_buf2[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 8]);
- salt_buf2[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 9]);
- salt_buf2[2] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[10]);
- salt_buf2[3] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[11]);
+ salt_buf2[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 8]);
+ salt_buf2[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 9]);
+ salt_buf2[2] = swap32 (ikepsk_bufs[salt_pos].nr_buf[10]);
+ salt_buf2[3] = swap32 (ikepsk_bufs[salt_pos].nr_buf[11]);
u32 salt_buf3[4];
- salt_buf3[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[12]);
- salt_buf3[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[13]);
+ salt_buf3[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[12]);
+ salt_buf3[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[13]);
salt_buf3[2] = 0;
salt_buf3[3] = 0;
const u32 lid2 = lid * 2;
- s_msg_buf[lid2 + 0] = swap_workaround (ikepsk_bufs[salt_pos].msg_buf[lid2 + 0]);
- s_msg_buf[lid2 + 1] = swap_workaround (ikepsk_bufs[salt_pos].msg_buf[lid2 + 1]);
+ s_msg_buf[lid2 + 0] = swap32 (ikepsk_bufs[salt_pos].msg_buf[lid2 + 0]);
+ s_msg_buf[lid2 + 1] = swap32 (ikepsk_bufs[salt_pos].msg_buf[lid2 + 1]);
barrier (CLK_LOCAL_MEM_FENCE);
const u32 lid2 = lid * 2;
- s_msg_buf[lid2 + 0] = swap_workaround (ikepsk_bufs[salt_pos].msg_buf[lid2 + 0]);
- s_msg_buf[lid2 + 1] = swap_workaround (ikepsk_bufs[salt_pos].msg_buf[lid2 + 1]);
+ s_msg_buf[lid2 + 0] = swap32 (ikepsk_bufs[salt_pos].msg_buf[lid2 + 0]);
+ s_msg_buf[lid2 + 1] = swap32 (ikepsk_bufs[salt_pos].msg_buf[lid2 + 1]);
barrier (CLK_LOCAL_MEM_FENCE);
const u32 lid2 = lid * 2;
- s_msg_buf[lid2 + 0] = swap_workaround (ikepsk_bufs[salt_pos].msg_buf[lid2 + 0]);
- s_msg_buf[lid2 + 1] = swap_workaround (ikepsk_bufs[salt_pos].msg_buf[lid2 + 1]);
+ s_msg_buf[lid2 + 0] = swap32 (ikepsk_bufs[salt_pos].msg_buf[lid2 + 0]);
+ s_msg_buf[lid2 + 1] = swap32 (ikepsk_bufs[salt_pos].msg_buf[lid2 + 1]);
barrier (CLK_LOCAL_MEM_FENCE);
const u32 lid2 = lid * 2;
- s_msg_buf[lid2 + 0] = swap_workaround (ikepsk_bufs[salt_pos].msg_buf[lid2 + 0]);
- s_msg_buf[lid2 + 1] = swap_workaround (ikepsk_bufs[salt_pos].msg_buf[lid2 + 1]);
+ s_msg_buf[lid2 + 0] = swap32 (ikepsk_bufs[salt_pos].msg_buf[lid2 + 0]);
+ s_msg_buf[lid2 + 1] = swap32 (ikepsk_bufs[salt_pos].msg_buf[lid2 + 1]);
barrier (CLK_LOCAL_MEM_FENCE);
const u32 lid2 = lid * 2;
- s_msg_buf[lid2 + 0] = swap_workaround (ikepsk_bufs[salt_pos].msg_buf[lid2 + 0]);
- s_msg_buf[lid2 + 1] = swap_workaround (ikepsk_bufs[salt_pos].msg_buf[lid2 + 1]);
+ s_msg_buf[lid2 + 0] = swap32 (ikepsk_bufs[salt_pos].msg_buf[lid2 + 0]);
+ s_msg_buf[lid2 + 1] = swap32 (ikepsk_bufs[salt_pos].msg_buf[lid2 + 1]);
barrier (CLK_LOCAL_MEM_FENCE);
const u32 lid2 = lid * 2;
- s_msg_buf[lid2 + 0] = swap_workaround (ikepsk_bufs[salt_pos].msg_buf[lid2 + 0]);
- s_msg_buf[lid2 + 1] = swap_workaround (ikepsk_bufs[salt_pos].msg_buf[lid2 + 1]);
+ s_msg_buf[lid2 + 0] = swap32 (ikepsk_bufs[salt_pos].msg_buf[lid2 + 0]);
+ s_msg_buf[lid2 + 1] = swap32 (ikepsk_bufs[salt_pos].msg_buf[lid2 + 1]);
barrier (CLK_LOCAL_MEM_FENCE);
#include "types_ocl.c"
#include "common.c"
-#ifdef VECT_SIZE1
-#define COMPARE_M "check_multi_vect1_comp4.c"
-#endif
-
-#ifdef VECT_SIZE4
-#define COMPARE_M "check_multi_vect4_comp4.c"
-#endif
+#define COMPARE_S "check_single_comp4.c"
+#define COMPARE_M "check_multi_comp4.c"
typedef struct
{
u32 w2[4];
u32 w3[4];
- w0[0] = swap_workaround (data0[0]);
- w0[1] = swap_workaround (data0[1]);
- w0[2] = swap_workaround (data0[2]);
- w0[3] = swap_workaround (data0[3]);
- w1[0] = swap_workaround (data1[0]);
- w1[1] = swap_workaround (data1[1]);
- w1[2] = swap_workaround (data1[2]);
- w1[3] = swap_workaround (data1[3]);
- w2[0] = swap_workaround (data2[0]);
- w2[1] = swap_workaround (data2[1]);
+ w0[0] = swap32 (data0[0]);
+ w0[1] = swap32 (data0[1]);
+ w0[2] = swap32 (data0[2]);
+ w0[3] = swap32 (data0[3]);
+ w1[0] = swap32 (data1[0]);
+ w1[1] = swap32 (data1[1]);
+ w1[2] = swap32 (data1[2]);
+ w1[3] = swap32 (data1[3]);
+ w2[0] = swap32 (data2[0]);
+ w2[1] = swap32 (data2[1]);
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w0[2] = digest[2];
w0[3] = digest[3];
w1[0] = digest[4];
- w1[1] = swap_workaround (data0[0]);
- w1[2] = swap_workaround (data0[1]);
- w1[3] = swap_workaround (data0[2]);
- w2[0] = swap_workaround (data0[3]);
- w2[1] = swap_workaround (data1[0]);
- w2[2] = swap_workaround (data1[1]);
- w2[3] = swap_workaround (data1[2]);
- w3[0] = swap_workaround (data1[3]);
- w3[1] = swap_workaround (data2[0]);
+ w1[1] = swap32 (data0[0]);
+ w1[2] = swap32 (data0[1]);
+ w1[3] = swap32 (data0[2]);
+ w2[0] = swap32 (data0[3]);
+ w2[1] = swap32 (data1[0]);
+ w2[2] = swap32 (data1[1]);
+ w2[3] = swap32 (data1[2]);
+ w3[0] = swap32 (data1[3]);
+ w3[1] = swap32 (data2[0]);
w3[2] = 0;
w3[3] = (20 + pc_len + pw_len + salt_len) * 8;
u32 wl[16];
- wl[ 0] = swap_workaround (w0[0]);
- wl[ 1] = swap_workaround (w0[1]);
- wl[ 2] = swap_workaround (w0[2]);
- wl[ 3] = swap_workaround (w0[3]);
- wl[ 4] = swap_workaround (w1[0]);
- wl[ 5] = swap_workaround (w1[1]);
- wl[ 6] = swap_workaround (w1[2]);
- wl[ 7] = swap_workaround (w1[3]);
+ wl[ 0] = swap32 (w0[0]);
+ wl[ 1] = swap32 (w0[1]);
+ wl[ 2] = swap32 (w0[2]);
+ wl[ 3] = swap32 (w0[3]);
+ wl[ 4] = swap32 (w1[0]);
+ wl[ 5] = swap32 (w1[1]);
+ wl[ 6] = swap32 (w1[2]);
+ wl[ 7] = swap32 (w1[3]);
wl[ 8] = 0;
wl[ 9] = 0;
wl[10] = 0;
u32 wl[16];
- wl[ 0] = swap_workaround (w0[0]);
- wl[ 1] = swap_workaround (w0[1]);
- wl[ 2] = swap_workaround (w0[2]);
- wl[ 3] = swap_workaround (w0[3]);
- wl[ 4] = swap_workaround (w1[0]);
- wl[ 5] = swap_workaround (w1[1]);
- wl[ 6] = swap_workaround (w1[2]);
- wl[ 7] = swap_workaround (w1[3]);
+ wl[ 0] = swap32 (w0[0]);
+ wl[ 1] = swap32 (w0[1]);
+ wl[ 2] = swap32 (w0[2]);
+ wl[ 3] = swap32 (w0[3]);
+ wl[ 4] = swap32 (w1[0]);
+ wl[ 5] = swap32 (w1[1]);
+ wl[ 6] = swap32 (w1[2]);
+ wl[ 7] = swap32 (w1[3]);
wl[ 8] = 0;
wl[ 9] = 0;
wl[10] = 0;
u32 wl[16];
- wl[ 0] = swap_workaround (w0[0]);
- wl[ 1] = swap_workaround (w0[1]);
- wl[ 2] = swap_workaround (w0[2]);
- wl[ 3] = swap_workaround (w0[3]);
- wl[ 4] = swap_workaround (w1[0]);
- wl[ 5] = swap_workaround (w1[1]);
- wl[ 6] = swap_workaround (w1[2]);
- wl[ 7] = swap_workaround (w1[3]);
+ wl[ 0] = swap32 (w0[0]);
+ wl[ 1] = swap32 (w0[1]);
+ wl[ 2] = swap32 (w0[2]);
+ wl[ 3] = swap32 (w0[3]);
+ wl[ 4] = swap32 (w1[0]);
+ wl[ 5] = swap32 (w1[1]);
+ wl[ 6] = swap32 (w1[2]);
+ wl[ 7] = swap32 (w1[3]);
wl[ 8] = 0;
wl[ 9] = 0;
wl[10] = 0;
u32 wl[16];
- wl[ 0] = swap_workaround (w0[0]);
- wl[ 1] = swap_workaround (w0[1]);
- wl[ 2] = swap_workaround (w0[2]);
- wl[ 3] = swap_workaround (w0[3]);
- wl[ 4] = swap_workaround (w1[0]);
- wl[ 5] = swap_workaround (w1[1]);
- wl[ 6] = swap_workaround (w1[2]);
- wl[ 7] = swap_workaround (w1[3]);
+ wl[ 0] = swap32 (w0[0]);
+ wl[ 1] = swap32 (w0[1]);
+ wl[ 2] = swap32 (w0[2]);
+ wl[ 3] = swap32 (w0[3]);
+ wl[ 4] = swap32 (w1[0]);
+ wl[ 5] = swap32 (w1[1]);
+ wl[ 6] = swap32 (w1[2]);
+ wl[ 7] = swap32 (w1[3]);
wl[ 8] = 0;
wl[ 9] = 0;
wl[10] = 0;
for (u32 i = 0, j = 1; i < (truecrypt_mdlen / 8 / 4); i += 5, j += 1)
{
- salt_buf2[0] = swap_workaround (j);
+ salt_buf2[0] = swap32 (j);
u32 dgst[5];
for (u32 i = 0, j = 1; i < (truecrypt_mdlen / 8 / 4); i += 5, j += 1)
{
- salt_buf2[0] = swap_workaround (j);
+ salt_buf2[0] = swap32 (j);
u32 dgst[5];
for (u32 i = 0, j = 1; i < (truecrypt_mdlen / 8 / 4); i += 5, j += 1)
{
- salt_buf2[0] = swap_workaround (j);
+ salt_buf2[0] = swap32 (j);
u32 dgst[5];
// swap fehlt
- salt_buf[ 0] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 0])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 1]);
- salt_buf[ 1] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 2])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 3]);
- salt_buf[ 2] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 4])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 5]);
- salt_buf[ 3] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 6])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 7]);
- salt_buf[ 4] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 8])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 9]);
- salt_buf[ 5] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[10])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[11]);
- salt_buf[ 6] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[12])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[13]);
- salt_buf[ 7] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[14])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[15]);
+ salt_buf[ 0] = ((u64) swap32 (esalt_bufs[salt_pos].salt_buf[ 0])) << 32 | (u64) swap32 (esalt_bufs[salt_pos].salt_buf[ 1]);
+ salt_buf[ 1] = ((u64) swap32 (esalt_bufs[salt_pos].salt_buf[ 2])) << 32 | (u64) swap32 (esalt_bufs[salt_pos].salt_buf[ 3]);
+ salt_buf[ 2] = ((u64) swap32 (esalt_bufs[salt_pos].salt_buf[ 4])) << 32 | (u64) swap32 (esalt_bufs[salt_pos].salt_buf[ 5]);
+ salt_buf[ 3] = ((u64) swap32 (esalt_bufs[salt_pos].salt_buf[ 6])) << 32 | (u64) swap32 (esalt_bufs[salt_pos].salt_buf[ 7]);
+ salt_buf[ 4] = ((u64) swap32 (esalt_bufs[salt_pos].salt_buf[ 8])) << 32 | (u64) swap32 (esalt_bufs[salt_pos].salt_buf[ 9]);
+ salt_buf[ 5] = ((u64) swap32 (esalt_bufs[salt_pos].salt_buf[10])) << 32 | (u64) swap32 (esalt_bufs[salt_pos].salt_buf[11]);
+ salt_buf[ 6] = ((u64) swap32 (esalt_bufs[salt_pos].salt_buf[12])) << 32 | (u64) swap32 (esalt_bufs[salt_pos].salt_buf[13]);
+ salt_buf[ 7] = ((u64) swap32 (esalt_bufs[salt_pos].salt_buf[14])) << 32 | (u64) swap32 (esalt_bufs[salt_pos].salt_buf[15]);
salt_buf[ 8] = 0;
salt_buf[ 9] = 0;
salt_buf[10] = 0;
u64 w[16];
- w[ 0] = ((u64) swap_workaround (w0[0])) << 32 | (u64) swap_workaround (w0[1]);
- w[ 1] = ((u64) swap_workaround (w0[2])) << 32 | (u64) swap_workaround (w0[3]);
- w[ 2] = ((u64) swap_workaround (w1[0])) << 32 | (u64) swap_workaround (w1[1]);
- w[ 3] = ((u64) swap_workaround (w1[2])) << 32 | (u64) swap_workaround (w1[3]);
- w[ 4] = ((u64) swap_workaround (w2[0])) << 32 | (u64) swap_workaround (w2[1]);
- w[ 5] = ((u64) swap_workaround (w2[2])) << 32 | (u64) swap_workaround (w2[3]);
- w[ 6] = ((u64) swap_workaround (w3[0])) << 32 | (u64) swap_workaround (w3[1]);
- w[ 7] = ((u64) swap_workaround (w3[2])) << 32 | (u64) swap_workaround (w3[3]);
+ w[ 0] = ((u64) swap32 (w0[0])) << 32 | (u64) swap32 (w0[1]);
+ w[ 1] = ((u64) swap32 (w0[2])) << 32 | (u64) swap32 (w0[3]);
+ w[ 2] = ((u64) swap32 (w1[0])) << 32 | (u64) swap32 (w1[1]);
+ w[ 3] = ((u64) swap32 (w1[2])) << 32 | (u64) swap32 (w1[3]);
+ w[ 4] = ((u64) swap32 (w2[0])) << 32 | (u64) swap32 (w2[1]);
+ w[ 5] = ((u64) swap32 (w2[2])) << 32 | (u64) swap32 (w2[3]);
+ w[ 6] = ((u64) swap32 (w3[0])) << 32 | (u64) swap32 (w3[1]);
+ w[ 7] = ((u64) swap32 (w3[2])) << 32 | (u64) swap32 (w3[3]);
w[ 8] = 0;
w[ 9] = 0;
w[10] = 0;
u32 ukey1[8];
- ukey1[0] = swap_workaround (h32_from_64 (tmps[gid].out[ 0]));
- ukey1[1] = swap_workaround (l32_from_64 (tmps[gid].out[ 0]));
- ukey1[2] = swap_workaround (h32_from_64 (tmps[gid].out[ 1]));
- ukey1[3] = swap_workaround (l32_from_64 (tmps[gid].out[ 1]));
- ukey1[4] = swap_workaround (h32_from_64 (tmps[gid].out[ 2]));
- ukey1[5] = swap_workaround (l32_from_64 (tmps[gid].out[ 2]));
- ukey1[6] = swap_workaround (h32_from_64 (tmps[gid].out[ 3]));
- ukey1[7] = swap_workaround (l32_from_64 (tmps[gid].out[ 3]));
+ ukey1[0] = swap32 (h32_from_64 (tmps[gid].out[ 0]));
+ ukey1[1] = swap32 (l32_from_64 (tmps[gid].out[ 0]));
+ ukey1[2] = swap32 (h32_from_64 (tmps[gid].out[ 1]));
+ ukey1[3] = swap32 (l32_from_64 (tmps[gid].out[ 1]));
+ ukey1[4] = swap32 (h32_from_64 (tmps[gid].out[ 2]));
+ ukey1[5] = swap32 (l32_from_64 (tmps[gid].out[ 2]));
+ ukey1[6] = swap32 (h32_from_64 (tmps[gid].out[ 3]));
+ ukey1[7] = swap32 (l32_from_64 (tmps[gid].out[ 3]));
u32 ukey2[8];
- ukey2[0] = swap_workaround (h32_from_64 (tmps[gid].out[ 4]));
- ukey2[1] = swap_workaround (l32_from_64 (tmps[gid].out[ 4]));
- ukey2[2] = swap_workaround (h32_from_64 (tmps[gid].out[ 5]));
- ukey2[3] = swap_workaround (l32_from_64 (tmps[gid].out[ 5]));
- ukey2[4] = swap_workaround (h32_from_64 (tmps[gid].out[ 6]));
- ukey2[5] = swap_workaround (l32_from_64 (tmps[gid].out[ 6]));
- ukey2[6] = swap_workaround (h32_from_64 (tmps[gid].out[ 7]));
- ukey2[7] = swap_workaround (l32_from_64 (tmps[gid].out[ 7]));
+ ukey2[0] = swap32 (h32_from_64 (tmps[gid].out[ 4]));
+ ukey2[1] = swap32 (l32_from_64 (tmps[gid].out[ 4]));
+ ukey2[2] = swap32 (h32_from_64 (tmps[gid].out[ 5]));
+ ukey2[3] = swap32 (l32_from_64 (tmps[gid].out[ 5]));
+ ukey2[4] = swap32 (h32_from_64 (tmps[gid].out[ 6]));
+ ukey2[5] = swap32 (l32_from_64 (tmps[gid].out[ 6]));
+ ukey2[6] = swap32 (h32_from_64 (tmps[gid].out[ 7]));
+ ukey2[7] = swap32 (l32_from_64 (tmps[gid].out[ 7]));
u32 data[4];
// swap fehlt
- salt_buf[ 0] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 0])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 1]);
- salt_buf[ 1] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 2])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 3]);
- salt_buf[ 2] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 4])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 5]);
- salt_buf[ 3] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 6])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 7]);
- salt_buf[ 4] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 8])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 9]);
- salt_buf[ 5] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[10])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[11]);
- salt_buf[ 6] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[12])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[13]);
- salt_buf[ 7] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[14])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[15]);
+ salt_buf[ 0] = ((u64) swap32 (esalt_bufs[salt_pos].salt_buf[ 0])) << 32 | (u64) swap32 (esalt_bufs[salt_pos].salt_buf[ 1]);
+ salt_buf[ 1] = ((u64) swap32 (esalt_bufs[salt_pos].salt_buf[ 2])) << 32 | (u64) swap32 (esalt_bufs[salt_pos].salt_buf[ 3]);
+ salt_buf[ 2] = ((u64) swap32 (esalt_bufs[salt_pos].salt_buf[ 4])) << 32 | (u64) swap32 (esalt_bufs[salt_pos].salt_buf[ 5]);
+ salt_buf[ 3] = ((u64) swap32 (esalt_bufs[salt_pos].salt_buf[ 6])) << 32 | (u64) swap32 (esalt_bufs[salt_pos].salt_buf[ 7]);
+ salt_buf[ 4] = ((u64) swap32 (esalt_bufs[salt_pos].salt_buf[ 8])) << 32 | (u64) swap32 (esalt_bufs[salt_pos].salt_buf[ 9]);
+ salt_buf[ 5] = ((u64) swap32 (esalt_bufs[salt_pos].salt_buf[10])) << 32 | (u64) swap32 (esalt_bufs[salt_pos].salt_buf[11]);
+ salt_buf[ 6] = ((u64) swap32 (esalt_bufs[salt_pos].salt_buf[12])) << 32 | (u64) swap32 (esalt_bufs[salt_pos].salt_buf[13]);
+ salt_buf[ 7] = ((u64) swap32 (esalt_bufs[salt_pos].salt_buf[14])) << 32 | (u64) swap32 (esalt_bufs[salt_pos].salt_buf[15]);
salt_buf[ 8] = 0;
salt_buf[ 9] = 0;
salt_buf[10] = 0;
u64 w[16];
- w[ 0] = ((u64) swap_workaround (w0[0])) << 32 | (u64) swap_workaround (w0[1]);
- w[ 1] = ((u64) swap_workaround (w0[2])) << 32 | (u64) swap_workaround (w0[3]);
- w[ 2] = ((u64) swap_workaround (w1[0])) << 32 | (u64) swap_workaround (w1[1]);
- w[ 3] = ((u64) swap_workaround (w1[2])) << 32 | (u64) swap_workaround (w1[3]);
- w[ 4] = ((u64) swap_workaround (w2[0])) << 32 | (u64) swap_workaround (w2[1]);
- w[ 5] = ((u64) swap_workaround (w2[2])) << 32 | (u64) swap_workaround (w2[3]);
- w[ 6] = ((u64) swap_workaround (w3[0])) << 32 | (u64) swap_workaround (w3[1]);
- w[ 7] = ((u64) swap_workaround (w3[2])) << 32 | (u64) swap_workaround (w3[3]);
+ w[ 0] = ((u64) swap32 (w0[0])) << 32 | (u64) swap32 (w0[1]);
+ w[ 1] = ((u64) swap32 (w0[2])) << 32 | (u64) swap32 (w0[3]);
+ w[ 2] = ((u64) swap32 (w1[0])) << 32 | (u64) swap32 (w1[1]);
+ w[ 3] = ((u64) swap32 (w1[2])) << 32 | (u64) swap32 (w1[3]);
+ w[ 4] = ((u64) swap32 (w2[0])) << 32 | (u64) swap32 (w2[1]);
+ w[ 5] = ((u64) swap32 (w2[2])) << 32 | (u64) swap32 (w2[3]);
+ w[ 6] = ((u64) swap32 (w3[0])) << 32 | (u64) swap32 (w3[1]);
+ w[ 7] = ((u64) swap32 (w3[2])) << 32 | (u64) swap32 (w3[3]);
w[ 8] = 0;
w[ 9] = 0;
w[10] = 0;
u32 ukey1[8];
- ukey1[0] = swap_workaround (h32_from_64 (tmps[gid].out[ 0]));
- ukey1[1] = swap_workaround (l32_from_64 (tmps[gid].out[ 0]));
- ukey1[2] = swap_workaround (h32_from_64 (tmps[gid].out[ 1]));
- ukey1[3] = swap_workaround (l32_from_64 (tmps[gid].out[ 1]));
- ukey1[4] = swap_workaround (h32_from_64 (tmps[gid].out[ 2]));
- ukey1[5] = swap_workaround (l32_from_64 (tmps[gid].out[ 2]));
- ukey1[6] = swap_workaround (h32_from_64 (tmps[gid].out[ 3]));
- ukey1[7] = swap_workaround (l32_from_64 (tmps[gid].out[ 3]));
+ ukey1[0] = swap32 (h32_from_64 (tmps[gid].out[ 0]));
+ ukey1[1] = swap32 (l32_from_64 (tmps[gid].out[ 0]));
+ ukey1[2] = swap32 (h32_from_64 (tmps[gid].out[ 1]));
+ ukey1[3] = swap32 (l32_from_64 (tmps[gid].out[ 1]));
+ ukey1[4] = swap32 (h32_from_64 (tmps[gid].out[ 2]));
+ ukey1[5] = swap32 (l32_from_64 (tmps[gid].out[ 2]));
+ ukey1[6] = swap32 (h32_from_64 (tmps[gid].out[ 3]));
+ ukey1[7] = swap32 (l32_from_64 (tmps[gid].out[ 3]));
u32 ukey2[8];
- ukey2[0] = swap_workaround (h32_from_64 (tmps[gid].out[ 4]));
- ukey2[1] = swap_workaround (l32_from_64 (tmps[gid].out[ 4]));
- ukey2[2] = swap_workaround (h32_from_64 (tmps[gid].out[ 5]));
- ukey2[3] = swap_workaround (l32_from_64 (tmps[gid].out[ 5]));
- ukey2[4] = swap_workaround (h32_from_64 (tmps[gid].out[ 6]));
- ukey2[5] = swap_workaround (l32_from_64 (tmps[gid].out[ 6]));
- ukey2[6] = swap_workaround (h32_from_64 (tmps[gid].out[ 7]));
- ukey2[7] = swap_workaround (l32_from_64 (tmps[gid].out[ 7]));
+ ukey2[0] = swap32 (h32_from_64 (tmps[gid].out[ 4]));
+ ukey2[1] = swap32 (l32_from_64 (tmps[gid].out[ 4]));
+ ukey2[2] = swap32 (h32_from_64 (tmps[gid].out[ 5]));
+ ukey2[3] = swap32 (l32_from_64 (tmps[gid].out[ 5]));
+ ukey2[4] = swap32 (h32_from_64 (tmps[gid].out[ 6]));
+ ukey2[5] = swap32 (l32_from_64 (tmps[gid].out[ 6]));
+ ukey2[6] = swap32 (h32_from_64 (tmps[gid].out[ 7]));
+ ukey2[7] = swap32 (l32_from_64 (tmps[gid].out[ 7]));
u32 data[4];
u32 ukey3[8];
- ukey3[0] = swap_workaround (h32_from_64 (tmps[gid].out[ 8]));
- ukey3[1] = swap_workaround (l32_from_64 (tmps[gid].out[ 8]));
- ukey3[2] = swap_workaround (h32_from_64 (tmps[gid].out[ 9]));
- ukey3[3] = swap_workaround (l32_from_64 (tmps[gid].out[ 9]));
- ukey3[4] = swap_workaround (h32_from_64 (tmps[gid].out[10]));
- ukey3[5] = swap_workaround (l32_from_64 (tmps[gid].out[10]));
- ukey3[6] = swap_workaround (h32_from_64 (tmps[gid].out[11]));
- ukey3[7] = swap_workaround (l32_from_64 (tmps[gid].out[11]));
+ ukey3[0] = swap32 (h32_from_64 (tmps[gid].out[ 8]));
+ ukey3[1] = swap32 (l32_from_64 (tmps[gid].out[ 8]));
+ ukey3[2] = swap32 (h32_from_64 (tmps[gid].out[ 9]));
+ ukey3[3] = swap32 (l32_from_64 (tmps[gid].out[ 9]));
+ ukey3[4] = swap32 (h32_from_64 (tmps[gid].out[10]));
+ ukey3[5] = swap32 (l32_from_64 (tmps[gid].out[10]));
+ ukey3[6] = swap32 (h32_from_64 (tmps[gid].out[11]));
+ ukey3[7] = swap32 (l32_from_64 (tmps[gid].out[11]));
u32 ukey4[8];
- ukey4[0] = swap_workaround (h32_from_64 (tmps[gid].out[12]));
- ukey4[1] = swap_workaround (l32_from_64 (tmps[gid].out[12]));
- ukey4[2] = swap_workaround (h32_from_64 (tmps[gid].out[13]));
- ukey4[3] = swap_workaround (l32_from_64 (tmps[gid].out[13]));
- ukey4[4] = swap_workaround (h32_from_64 (tmps[gid].out[14]));
- ukey4[5] = swap_workaround (l32_from_64 (tmps[gid].out[14]));
- ukey4[6] = swap_workaround (h32_from_64 (tmps[gid].out[15]));
- ukey4[7] = swap_workaround (l32_from_64 (tmps[gid].out[15]));
+ ukey4[0] = swap32 (h32_from_64 (tmps[gid].out[12]));
+ ukey4[1] = swap32 (l32_from_64 (tmps[gid].out[12]));
+ ukey4[2] = swap32 (h32_from_64 (tmps[gid].out[13]));
+ ukey4[3] = swap32 (l32_from_64 (tmps[gid].out[13]));
+ ukey4[4] = swap32 (h32_from_64 (tmps[gid].out[14]));
+ ukey4[5] = swap32 (l32_from_64 (tmps[gid].out[14]));
+ ukey4[6] = swap32 (h32_from_64 (tmps[gid].out[15]));
+ ukey4[7] = swap32 (l32_from_64 (tmps[gid].out[15]));
{
tmp[0] = data[0];
// swap fehlt
- salt_buf[ 0] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 0])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 1]);
- salt_buf[ 1] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 2])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 3]);
- salt_buf[ 2] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 4])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 5]);
- salt_buf[ 3] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 6])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 7]);
- salt_buf[ 4] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 8])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 9]);
- salt_buf[ 5] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[10])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[11]);
- salt_buf[ 6] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[12])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[13]);
- salt_buf[ 7] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[14])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[15]);
+ salt_buf[ 0] = ((u64) swap32 (esalt_bufs[salt_pos].salt_buf[ 0])) << 32 | (u64) swap32 (esalt_bufs[salt_pos].salt_buf[ 1]);
+ salt_buf[ 1] = ((u64) swap32 (esalt_bufs[salt_pos].salt_buf[ 2])) << 32 | (u64) swap32 (esalt_bufs[salt_pos].salt_buf[ 3]);
+ salt_buf[ 2] = ((u64) swap32 (esalt_bufs[salt_pos].salt_buf[ 4])) << 32 | (u64) swap32 (esalt_bufs[salt_pos].salt_buf[ 5]);
+ salt_buf[ 3] = ((u64) swap32 (esalt_bufs[salt_pos].salt_buf[ 6])) << 32 | (u64) swap32 (esalt_bufs[salt_pos].salt_buf[ 7]);
+ salt_buf[ 4] = ((u64) swap32 (esalt_bufs[salt_pos].salt_buf[ 8])) << 32 | (u64) swap32 (esalt_bufs[salt_pos].salt_buf[ 9]);
+ salt_buf[ 5] = ((u64) swap32 (esalt_bufs[salt_pos].salt_buf[10])) << 32 | (u64) swap32 (esalt_bufs[salt_pos].salt_buf[11]);
+ salt_buf[ 6] = ((u64) swap32 (esalt_bufs[salt_pos].salt_buf[12])) << 32 | (u64) swap32 (esalt_bufs[salt_pos].salt_buf[13]);
+ salt_buf[ 7] = ((u64) swap32 (esalt_bufs[salt_pos].salt_buf[14])) << 32 | (u64) swap32 (esalt_bufs[salt_pos].salt_buf[15]);
salt_buf[ 8] = 0;
salt_buf[ 9] = 0;
salt_buf[10] = 0;
u64 w[16];
- w[ 0] = ((u64) swap_workaround (w0[0])) << 32 | (u64) swap_workaround (w0[1]);
- w[ 1] = ((u64) swap_workaround (w0[2])) << 32 | (u64) swap_workaround (w0[3]);
- w[ 2] = ((u64) swap_workaround (w1[0])) << 32 | (u64) swap_workaround (w1[1]);
- w[ 3] = ((u64) swap_workaround (w1[2])) << 32 | (u64) swap_workaround (w1[3]);
- w[ 4] = ((u64) swap_workaround (w2[0])) << 32 | (u64) swap_workaround (w2[1]);
- w[ 5] = ((u64) swap_workaround (w2[2])) << 32 | (u64) swap_workaround (w2[3]);
- w[ 6] = ((u64) swap_workaround (w3[0])) << 32 | (u64) swap_workaround (w3[1]);
- w[ 7] = ((u64) swap_workaround (w3[2])) << 32 | (u64) swap_workaround (w3[3]);
+ w[ 0] = ((u64) swap32 (w0[0])) << 32 | (u64) swap32 (w0[1]);
+ w[ 1] = ((u64) swap32 (w0[2])) << 32 | (u64) swap32 (w0[3]);
+ w[ 2] = ((u64) swap32 (w1[0])) << 32 | (u64) swap32 (w1[1]);
+ w[ 3] = ((u64) swap32 (w1[2])) << 32 | (u64) swap32 (w1[3]);
+ w[ 4] = ((u64) swap32 (w2[0])) << 32 | (u64) swap32 (w2[1]);
+ w[ 5] = ((u64) swap32 (w2[2])) << 32 | (u64) swap32 (w2[3]);
+ w[ 6] = ((u64) swap32 (w3[0])) << 32 | (u64) swap32 (w3[1]);
+ w[ 7] = ((u64) swap32 (w3[2])) << 32 | (u64) swap32 (w3[3]);
w[ 8] = 0;
w[ 9] = 0;
w[10] = 0;
u32 ukey1[8];
- ukey1[0] = swap_workaround (h32_from_64 (tmps[gid].out[ 0]));
- ukey1[1] = swap_workaround (l32_from_64 (tmps[gid].out[ 0]));
- ukey1[2] = swap_workaround (h32_from_64 (tmps[gid].out[ 1]));
- ukey1[3] = swap_workaround (l32_from_64 (tmps[gid].out[ 1]));
- ukey1[4] = swap_workaround (h32_from_64 (tmps[gid].out[ 2]));
- ukey1[5] = swap_workaround (l32_from_64 (tmps[gid].out[ 2]));
- ukey1[6] = swap_workaround (h32_from_64 (tmps[gid].out[ 3]));
- ukey1[7] = swap_workaround (l32_from_64 (tmps[gid].out[ 3]));
+ ukey1[0] = swap32 (h32_from_64 (tmps[gid].out[ 0]));
+ ukey1[1] = swap32 (l32_from_64 (tmps[gid].out[ 0]));
+ ukey1[2] = swap32 (h32_from_64 (tmps[gid].out[ 1]));
+ ukey1[3] = swap32 (l32_from_64 (tmps[gid].out[ 1]));
+ ukey1[4] = swap32 (h32_from_64 (tmps[gid].out[ 2]));
+ ukey1[5] = swap32 (l32_from_64 (tmps[gid].out[ 2]));
+ ukey1[6] = swap32 (h32_from_64 (tmps[gid].out[ 3]));
+ ukey1[7] = swap32 (l32_from_64 (tmps[gid].out[ 3]));
u32 ukey2[8];
- ukey2[0] = swap_workaround (h32_from_64 (tmps[gid].out[ 4]));
- ukey2[1] = swap_workaround (l32_from_64 (tmps[gid].out[ 4]));
- ukey2[2] = swap_workaround (h32_from_64 (tmps[gid].out[ 5]));
- ukey2[3] = swap_workaround (l32_from_64 (tmps[gid].out[ 5]));
- ukey2[4] = swap_workaround (h32_from_64 (tmps[gid].out[ 6]));
- ukey2[5] = swap_workaround (l32_from_64 (tmps[gid].out[ 6]));
- ukey2[6] = swap_workaround (h32_from_64 (tmps[gid].out[ 7]));
- ukey2[7] = swap_workaround (l32_from_64 (tmps[gid].out[ 7]));
+ ukey2[0] = swap32 (h32_from_64 (tmps[gid].out[ 4]));
+ ukey2[1] = swap32 (l32_from_64 (tmps[gid].out[ 4]));
+ ukey2[2] = swap32 (h32_from_64 (tmps[gid].out[ 5]));
+ ukey2[3] = swap32 (l32_from_64 (tmps[gid].out[ 5]));
+ ukey2[4] = swap32 (h32_from_64 (tmps[gid].out[ 6]));
+ ukey2[5] = swap32 (l32_from_64 (tmps[gid].out[ 6]));
+ ukey2[6] = swap32 (h32_from_64 (tmps[gid].out[ 7]));
+ ukey2[7] = swap32 (l32_from_64 (tmps[gid].out[ 7]));
u32 data[4];
u32 ukey3[8];
- ukey3[0] = swap_workaround (h32_from_64 (tmps[gid].out[ 8]));
- ukey3[1] = swap_workaround (l32_from_64 (tmps[gid].out[ 8]));
- ukey3[2] = swap_workaround (h32_from_64 (tmps[gid].out[ 9]));
- ukey3[3] = swap_workaround (l32_from_64 (tmps[gid].out[ 9]));
- ukey3[4] = swap_workaround (h32_from_64 (tmps[gid].out[10]));
- ukey3[5] = swap_workaround (l32_from_64 (tmps[gid].out[10]));
- ukey3[6] = swap_workaround (h32_from_64 (tmps[gid].out[11]));
- ukey3[7] = swap_workaround (l32_from_64 (tmps[gid].out[11]));
+ ukey3[0] = swap32 (h32_from_64 (tmps[gid].out[ 8]));
+ ukey3[1] = swap32 (l32_from_64 (tmps[gid].out[ 8]));
+ ukey3[2] = swap32 (h32_from_64 (tmps[gid].out[ 9]));
+ ukey3[3] = swap32 (l32_from_64 (tmps[gid].out[ 9]));
+ ukey3[4] = swap32 (h32_from_64 (tmps[gid].out[10]));
+ ukey3[5] = swap32 (l32_from_64 (tmps[gid].out[10]));
+ ukey3[6] = swap32 (h32_from_64 (tmps[gid].out[11]));
+ ukey3[7] = swap32 (l32_from_64 (tmps[gid].out[11]));
u32 ukey4[8];
- ukey4[0] = swap_workaround (h32_from_64 (tmps[gid].out[12]));
- ukey4[1] = swap_workaround (l32_from_64 (tmps[gid].out[12]));
- ukey4[2] = swap_workaround (h32_from_64 (tmps[gid].out[13]));
- ukey4[3] = swap_workaround (l32_from_64 (tmps[gid].out[13]));
- ukey4[4] = swap_workaround (h32_from_64 (tmps[gid].out[14]));
- ukey4[5] = swap_workaround (l32_from_64 (tmps[gid].out[14]));
- ukey4[6] = swap_workaround (h32_from_64 (tmps[gid].out[15]));
- ukey4[7] = swap_workaround (l32_from_64 (tmps[gid].out[15]));
+ ukey4[0] = swap32 (h32_from_64 (tmps[gid].out[12]));
+ ukey4[1] = swap32 (l32_from_64 (tmps[gid].out[12]));
+ ukey4[2] = swap32 (h32_from_64 (tmps[gid].out[13]));
+ ukey4[3] = swap32 (l32_from_64 (tmps[gid].out[13]));
+ ukey4[4] = swap32 (h32_from_64 (tmps[gid].out[14]));
+ ukey4[5] = swap32 (l32_from_64 (tmps[gid].out[14]));
+ ukey4[6] = swap32 (h32_from_64 (tmps[gid].out[15]));
+ ukey4[7] = swap32 (l32_from_64 (tmps[gid].out[15]));
{
tmp[0] = data[0];
u32 ukey5[8];
- ukey5[0] = swap_workaround (h32_from_64 (tmps[gid].out[16]));
- ukey5[1] = swap_workaround (l32_from_64 (tmps[gid].out[16]));
- ukey5[2] = swap_workaround (h32_from_64 (tmps[gid].out[17]));
- ukey5[3] = swap_workaround (l32_from_64 (tmps[gid].out[17]));
- ukey5[4] = swap_workaround (h32_from_64 (tmps[gid].out[18]));
- ukey5[5] = swap_workaround (l32_from_64 (tmps[gid].out[18]));
- ukey5[6] = swap_workaround (h32_from_64 (tmps[gid].out[19]));
- ukey5[7] = swap_workaround (l32_from_64 (tmps[gid].out[19]));
+ ukey5[0] = swap32 (h32_from_64 (tmps[gid].out[16]));
+ ukey5[1] = swap32 (l32_from_64 (tmps[gid].out[16]));
+ ukey5[2] = swap32 (h32_from_64 (tmps[gid].out[17]));
+ ukey5[3] = swap32 (l32_from_64 (tmps[gid].out[17]));
+ ukey5[4] = swap32 (h32_from_64 (tmps[gid].out[18]));
+ ukey5[5] = swap32 (l32_from_64 (tmps[gid].out[18]));
+ ukey5[6] = swap32 (h32_from_64 (tmps[gid].out[19]));
+ ukey5[7] = swap32 (l32_from_64 (tmps[gid].out[19]));
u32 ukey6[8];
- ukey6[0] = swap_workaround (h32_from_64 (tmps[gid].out[20]));
- ukey6[1] = swap_workaround (l32_from_64 (tmps[gid].out[20]));
- ukey6[2] = swap_workaround (h32_from_64 (tmps[gid].out[21]));
- ukey6[3] = swap_workaround (l32_from_64 (tmps[gid].out[21]));
- ukey6[4] = swap_workaround (h32_from_64 (tmps[gid].out[22]));
- ukey6[5] = swap_workaround (l32_from_64 (tmps[gid].out[22]));
- ukey6[6] = swap_workaround (h32_from_64 (tmps[gid].out[23]));
- ukey6[7] = swap_workaround (l32_from_64 (tmps[gid].out[23]));
+ ukey6[0] = swap32 (h32_from_64 (tmps[gid].out[20]));
+ ukey6[1] = swap32 (l32_from_64 (tmps[gid].out[20]));
+ ukey6[2] = swap32 (h32_from_64 (tmps[gid].out[21]));
+ ukey6[3] = swap32 (l32_from_64 (tmps[gid].out[21]));
+ ukey6[4] = swap32 (h32_from_64 (tmps[gid].out[22]));
+ ukey6[5] = swap32 (l32_from_64 (tmps[gid].out[22]));
+ ukey6[6] = swap32 (h32_from_64 (tmps[gid].out[23]));
+ ukey6[7] = swap32 (l32_from_64 (tmps[gid].out[23]));
{
tmp[0] = data[0];
u32 salt_buf1[16];
- salt_buf1[ 0] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 0]);
- salt_buf1[ 1] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 1]);
- salt_buf1[ 2] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 2]);
- salt_buf1[ 3] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 3]);
- salt_buf1[ 4] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 4]);
- salt_buf1[ 5] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 5]);
- salt_buf1[ 6] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 6]);
- salt_buf1[ 7] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 7]);
- salt_buf1[ 8] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 8]);
- salt_buf1[ 9] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 9]);
- salt_buf1[10] = swap_workaround (esalt_bufs[salt_pos].salt_buf[10]);
- salt_buf1[11] = swap_workaround (esalt_bufs[salt_pos].salt_buf[11]);
- salt_buf1[12] = swap_workaround (esalt_bufs[salt_pos].salt_buf[12]);
- salt_buf1[13] = swap_workaround (esalt_bufs[salt_pos].salt_buf[13]);
- salt_buf1[14] = swap_workaround (esalt_bufs[salt_pos].salt_buf[14]);
- salt_buf1[15] = swap_workaround (esalt_bufs[salt_pos].salt_buf[15]);
+ salt_buf1[ 0] = swap32 (esalt_bufs[salt_pos].salt_buf[ 0]);
+ salt_buf1[ 1] = swap32 (esalt_bufs[salt_pos].salt_buf[ 1]);
+ salt_buf1[ 2] = swap32 (esalt_bufs[salt_pos].salt_buf[ 2]);
+ salt_buf1[ 3] = swap32 (esalt_bufs[salt_pos].salt_buf[ 3]);
+ salt_buf1[ 4] = swap32 (esalt_bufs[salt_pos].salt_buf[ 4]);
+ salt_buf1[ 5] = swap32 (esalt_bufs[salt_pos].salt_buf[ 5]);
+ salt_buf1[ 6] = swap32 (esalt_bufs[salt_pos].salt_buf[ 6]);
+ salt_buf1[ 7] = swap32 (esalt_bufs[salt_pos].salt_buf[ 7]);
+ salt_buf1[ 8] = swap32 (esalt_bufs[salt_pos].salt_buf[ 8]);
+ salt_buf1[ 9] = swap32 (esalt_bufs[salt_pos].salt_buf[ 9]);
+ salt_buf1[10] = swap32 (esalt_bufs[salt_pos].salt_buf[10]);
+ salt_buf1[11] = swap32 (esalt_bufs[salt_pos].salt_buf[11]);
+ salt_buf1[12] = swap32 (esalt_bufs[salt_pos].salt_buf[12]);
+ salt_buf1[13] = swap32 (esalt_bufs[salt_pos].salt_buf[13]);
+ salt_buf1[14] = swap32 (esalt_bufs[salt_pos].salt_buf[14]);
+ salt_buf1[15] = swap32 (esalt_bufs[salt_pos].salt_buf[15]);
u32 salt_buf2[16];
u32 w[16];
- w[ 0] = swap_workaround (w0[0]);
- w[ 1] = swap_workaround (w0[1]);
- w[ 2] = swap_workaround (w0[2]);
- w[ 3] = swap_workaround (w0[3]);
- w[ 4] = swap_workaround (w1[0]);
- w[ 5] = swap_workaround (w1[1]);
- w[ 6] = swap_workaround (w1[2]);
- w[ 7] = swap_workaround (w1[3]);
- w[ 8] = swap_workaround (w2[0]);
- w[ 9] = swap_workaround (w2[1]);
- w[10] = swap_workaround (w2[2]);
- w[11] = swap_workaround (w2[3]);
- w[12] = swap_workaround (w3[0]);
- w[13] = swap_workaround (w3[1]);
- w[14] = swap_workaround (w3[2]);
- w[15] = swap_workaround (w3[3]);
+ w[ 0] = swap32 (w0[0]);
+ w[ 1] = swap32 (w0[1]);
+ w[ 2] = swap32 (w0[2]);
+ w[ 3] = swap32 (w0[3]);
+ w[ 4] = swap32 (w1[0]);
+ w[ 5] = swap32 (w1[1]);
+ w[ 6] = swap32 (w1[2]);
+ w[ 7] = swap32 (w1[3]);
+ w[ 8] = swap32 (w2[0]);
+ w[ 9] = swap32 (w2[1]);
+ w[10] = swap32 (w2[2]);
+ w[11] = swap32 (w2[3]);
+ w[12] = swap32 (w3[0]);
+ w[13] = swap32 (w3[1]);
+ w[14] = swap32 (w3[2]);
+ w[15] = swap32 (w3[3]);
u32 ipad[16];
u32 opad[16];
u32 ukey1[8];
- ukey1[0] = swap_workaround (tmps[gid].out[ 0]);
- ukey1[1] = swap_workaround (tmps[gid].out[ 1]);
- ukey1[2] = swap_workaround (tmps[gid].out[ 2]);
- ukey1[3] = swap_workaround (tmps[gid].out[ 3]);
- ukey1[4] = swap_workaround (tmps[gid].out[ 4]);
- ukey1[5] = swap_workaround (tmps[gid].out[ 5]);
- ukey1[6] = swap_workaround (tmps[gid].out[ 6]);
- ukey1[7] = swap_workaround (tmps[gid].out[ 7]);
+ ukey1[0] = swap32 (tmps[gid].out[ 0]);
+ ukey1[1] = swap32 (tmps[gid].out[ 1]);
+ ukey1[2] = swap32 (tmps[gid].out[ 2]);
+ ukey1[3] = swap32 (tmps[gid].out[ 3]);
+ ukey1[4] = swap32 (tmps[gid].out[ 4]);
+ ukey1[5] = swap32 (tmps[gid].out[ 5]);
+ ukey1[6] = swap32 (tmps[gid].out[ 6]);
+ ukey1[7] = swap32 (tmps[gid].out[ 7]);
u32 ukey2[8];
- ukey2[0] = swap_workaround (tmps[gid].out[ 8]);
- ukey2[1] = swap_workaround (tmps[gid].out[ 9]);
- ukey2[2] = swap_workaround (tmps[gid].out[10]);
- ukey2[3] = swap_workaround (tmps[gid].out[11]);
- ukey2[4] = swap_workaround (tmps[gid].out[12]);
- ukey2[5] = swap_workaround (tmps[gid].out[13]);
- ukey2[6] = swap_workaround (tmps[gid].out[14]);
- ukey2[7] = swap_workaround (tmps[gid].out[15]);
+ ukey2[0] = swap32 (tmps[gid].out[ 8]);
+ ukey2[1] = swap32 (tmps[gid].out[ 9]);
+ ukey2[2] = swap32 (tmps[gid].out[10]);
+ ukey2[3] = swap32 (tmps[gid].out[11]);
+ ukey2[4] = swap32 (tmps[gid].out[12]);
+ ukey2[5] = swap32 (tmps[gid].out[13]);
+ ukey2[6] = swap32 (tmps[gid].out[14]);
+ ukey2[7] = swap32 (tmps[gid].out[15]);
u32 data[4];
u32 salt_buf1[16];
- salt_buf1[ 0] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 0]);
- salt_buf1[ 1] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 1]);
- salt_buf1[ 2] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 2]);
- salt_buf1[ 3] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 3]);
- salt_buf1[ 4] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 4]);
- salt_buf1[ 5] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 5]);
- salt_buf1[ 6] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 6]);
- salt_buf1[ 7] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 7]);
- salt_buf1[ 8] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 8]);
- salt_buf1[ 9] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 9]);
- salt_buf1[10] = swap_workaround (esalt_bufs[salt_pos].salt_buf[10]);
- salt_buf1[11] = swap_workaround (esalt_bufs[salt_pos].salt_buf[11]);
- salt_buf1[12] = swap_workaround (esalt_bufs[salt_pos].salt_buf[12]);
- salt_buf1[13] = swap_workaround (esalt_bufs[salt_pos].salt_buf[13]);
- salt_buf1[14] = swap_workaround (esalt_bufs[salt_pos].salt_buf[14]);
- salt_buf1[15] = swap_workaround (esalt_bufs[salt_pos].salt_buf[15]);
+ salt_buf1[ 0] = swap32 (esalt_bufs[salt_pos].salt_buf[ 0]);
+ salt_buf1[ 1] = swap32 (esalt_bufs[salt_pos].salt_buf[ 1]);
+ salt_buf1[ 2] = swap32 (esalt_bufs[salt_pos].salt_buf[ 2]);
+ salt_buf1[ 3] = swap32 (esalt_bufs[salt_pos].salt_buf[ 3]);
+ salt_buf1[ 4] = swap32 (esalt_bufs[salt_pos].salt_buf[ 4]);
+ salt_buf1[ 5] = swap32 (esalt_bufs[salt_pos].salt_buf[ 5]);
+ salt_buf1[ 6] = swap32 (esalt_bufs[salt_pos].salt_buf[ 6]);
+ salt_buf1[ 7] = swap32 (esalt_bufs[salt_pos].salt_buf[ 7]);
+ salt_buf1[ 8] = swap32 (esalt_bufs[salt_pos].salt_buf[ 8]);
+ salt_buf1[ 9] = swap32 (esalt_bufs[salt_pos].salt_buf[ 9]);
+ salt_buf1[10] = swap32 (esalt_bufs[salt_pos].salt_buf[10]);
+ salt_buf1[11] = swap32 (esalt_bufs[salt_pos].salt_buf[11]);
+ salt_buf1[12] = swap32 (esalt_bufs[salt_pos].salt_buf[12]);
+ salt_buf1[13] = swap32 (esalt_bufs[salt_pos].salt_buf[13]);
+ salt_buf1[14] = swap32 (esalt_bufs[salt_pos].salt_buf[14]);
+ salt_buf1[15] = swap32 (esalt_bufs[salt_pos].salt_buf[15]);
u32 salt_buf2[16];
u32 w[16];
- w[ 0] = swap_workaround (w0[0]);
- w[ 1] = swap_workaround (w0[1]);
- w[ 2] = swap_workaround (w0[2]);
- w[ 3] = swap_workaround (w0[3]);
- w[ 4] = swap_workaround (w1[0]);
- w[ 5] = swap_workaround (w1[1]);
- w[ 6] = swap_workaround (w1[2]);
- w[ 7] = swap_workaround (w1[3]);
- w[ 8] = swap_workaround (w2[0]);
- w[ 9] = swap_workaround (w2[1]);
- w[10] = swap_workaround (w2[2]);
- w[11] = swap_workaround (w2[3]);
- w[12] = swap_workaround (w3[0]);
- w[13] = swap_workaround (w3[1]);
- w[14] = swap_workaround (w3[2]);
- w[15] = swap_workaround (w3[3]);
+ w[ 0] = swap32 (w0[0]);
+ w[ 1] = swap32 (w0[1]);
+ w[ 2] = swap32 (w0[2]);
+ w[ 3] = swap32 (w0[3]);
+ w[ 4] = swap32 (w1[0]);
+ w[ 5] = swap32 (w1[1]);
+ w[ 6] = swap32 (w1[2]);
+ w[ 7] = swap32 (w1[3]);
+ w[ 8] = swap32 (w2[0]);
+ w[ 9] = swap32 (w2[1]);
+ w[10] = swap32 (w2[2]);
+ w[11] = swap32 (w2[3]);
+ w[12] = swap32 (w3[0]);
+ w[13] = swap32 (w3[1]);
+ w[14] = swap32 (w3[2]);
+ w[15] = swap32 (w3[3]);
u32 ipad[16];
u32 opad[16];
u32 ukey1[8];
- ukey1[0] = swap_workaround (tmps[gid].out[ 0]);
- ukey1[1] = swap_workaround (tmps[gid].out[ 1]);
- ukey1[2] = swap_workaround (tmps[gid].out[ 2]);
- ukey1[3] = swap_workaround (tmps[gid].out[ 3]);
- ukey1[4] = swap_workaround (tmps[gid].out[ 4]);
- ukey1[5] = swap_workaround (tmps[gid].out[ 5]);
- ukey1[6] = swap_workaround (tmps[gid].out[ 6]);
- ukey1[7] = swap_workaround (tmps[gid].out[ 7]);
+ ukey1[0] = swap32 (tmps[gid].out[ 0]);
+ ukey1[1] = swap32 (tmps[gid].out[ 1]);
+ ukey1[2] = swap32 (tmps[gid].out[ 2]);
+ ukey1[3] = swap32 (tmps[gid].out[ 3]);
+ ukey1[4] = swap32 (tmps[gid].out[ 4]);
+ ukey1[5] = swap32 (tmps[gid].out[ 5]);
+ ukey1[6] = swap32 (tmps[gid].out[ 6]);
+ ukey1[7] = swap32 (tmps[gid].out[ 7]);
u32 ukey2[8];
- ukey2[0] = swap_workaround (tmps[gid].out[ 8]);
- ukey2[1] = swap_workaround (tmps[gid].out[ 9]);
- ukey2[2] = swap_workaround (tmps[gid].out[10]);
- ukey2[3] = swap_workaround (tmps[gid].out[11]);
- ukey2[4] = swap_workaround (tmps[gid].out[12]);
- ukey2[5] = swap_workaround (tmps[gid].out[13]);
- ukey2[6] = swap_workaround (tmps[gid].out[14]);
- ukey2[7] = swap_workaround (tmps[gid].out[15]);
+ ukey2[0] = swap32 (tmps[gid].out[ 8]);
+ ukey2[1] = swap32 (tmps[gid].out[ 9]);
+ ukey2[2] = swap32 (tmps[gid].out[10]);
+ ukey2[3] = swap32 (tmps[gid].out[11]);
+ ukey2[4] = swap32 (tmps[gid].out[12]);
+ ukey2[5] = swap32 (tmps[gid].out[13]);
+ ukey2[6] = swap32 (tmps[gid].out[14]);
+ ukey2[7] = swap32 (tmps[gid].out[15]);
u32 data[4];
u32 ukey3[8];
- ukey3[0] = swap_workaround (tmps[gid].out[16]);
- ukey3[1] = swap_workaround (tmps[gid].out[17]);
- ukey3[2] = swap_workaround (tmps[gid].out[18]);
- ukey3[3] = swap_workaround (tmps[gid].out[19]);
- ukey3[4] = swap_workaround (tmps[gid].out[20]);
- ukey3[5] = swap_workaround (tmps[gid].out[21]);
- ukey3[6] = swap_workaround (tmps[gid].out[22]);
- ukey3[7] = swap_workaround (tmps[gid].out[23]);
+ ukey3[0] = swap32 (tmps[gid].out[16]);
+ ukey3[1] = swap32 (tmps[gid].out[17]);
+ ukey3[2] = swap32 (tmps[gid].out[18]);
+ ukey3[3] = swap32 (tmps[gid].out[19]);
+ ukey3[4] = swap32 (tmps[gid].out[20]);
+ ukey3[5] = swap32 (tmps[gid].out[21]);
+ ukey3[6] = swap32 (tmps[gid].out[22]);
+ ukey3[7] = swap32 (tmps[gid].out[23]);
u32 ukey4[8];
- ukey4[0] = swap_workaround (tmps[gid].out[24]);
- ukey4[1] = swap_workaround (tmps[gid].out[25]);
- ukey4[2] = swap_workaround (tmps[gid].out[26]);
- ukey4[3] = swap_workaround (tmps[gid].out[27]);
- ukey4[4] = swap_workaround (tmps[gid].out[28]);
- ukey4[5] = swap_workaround (tmps[gid].out[29]);
- ukey4[6] = swap_workaround (tmps[gid].out[30]);
- ukey4[7] = swap_workaround (tmps[gid].out[31]);
+ ukey4[0] = swap32 (tmps[gid].out[24]);
+ ukey4[1] = swap32 (tmps[gid].out[25]);
+ ukey4[2] = swap32 (tmps[gid].out[26]);
+ ukey4[3] = swap32 (tmps[gid].out[27]);
+ ukey4[4] = swap32 (tmps[gid].out[28]);
+ ukey4[5] = swap32 (tmps[gid].out[29]);
+ ukey4[6] = swap32 (tmps[gid].out[30]);
+ ukey4[7] = swap32 (tmps[gid].out[31]);
{
tmp[0] = data[0];
u32 salt_buf1[16];
- salt_buf1[ 0] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 0]);
- salt_buf1[ 1] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 1]);
- salt_buf1[ 2] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 2]);
- salt_buf1[ 3] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 3]);
- salt_buf1[ 4] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 4]);
- salt_buf1[ 5] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 5]);
- salt_buf1[ 6] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 6]);
- salt_buf1[ 7] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 7]);
- salt_buf1[ 8] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 8]);
- salt_buf1[ 9] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 9]);
- salt_buf1[10] = swap_workaround (esalt_bufs[salt_pos].salt_buf[10]);
- salt_buf1[11] = swap_workaround (esalt_bufs[salt_pos].salt_buf[11]);
- salt_buf1[12] = swap_workaround (esalt_bufs[salt_pos].salt_buf[12]);
- salt_buf1[13] = swap_workaround (esalt_bufs[salt_pos].salt_buf[13]);
- salt_buf1[14] = swap_workaround (esalt_bufs[salt_pos].salt_buf[14]);
- salt_buf1[15] = swap_workaround (esalt_bufs[salt_pos].salt_buf[15]);
+ salt_buf1[ 0] = swap32 (esalt_bufs[salt_pos].salt_buf[ 0]);
+ salt_buf1[ 1] = swap32 (esalt_bufs[salt_pos].salt_buf[ 1]);
+ salt_buf1[ 2] = swap32 (esalt_bufs[salt_pos].salt_buf[ 2]);
+ salt_buf1[ 3] = swap32 (esalt_bufs[salt_pos].salt_buf[ 3]);
+ salt_buf1[ 4] = swap32 (esalt_bufs[salt_pos].salt_buf[ 4]);
+ salt_buf1[ 5] = swap32 (esalt_bufs[salt_pos].salt_buf[ 5]);
+ salt_buf1[ 6] = swap32 (esalt_bufs[salt_pos].salt_buf[ 6]);
+ salt_buf1[ 7] = swap32 (esalt_bufs[salt_pos].salt_buf[ 7]);
+ salt_buf1[ 8] = swap32 (esalt_bufs[salt_pos].salt_buf[ 8]);
+ salt_buf1[ 9] = swap32 (esalt_bufs[salt_pos].salt_buf[ 9]);
+ salt_buf1[10] = swap32 (esalt_bufs[salt_pos].salt_buf[10]);
+ salt_buf1[11] = swap32 (esalt_bufs[salt_pos].salt_buf[11]);
+ salt_buf1[12] = swap32 (esalt_bufs[salt_pos].salt_buf[12]);
+ salt_buf1[13] = swap32 (esalt_bufs[salt_pos].salt_buf[13]);
+ salt_buf1[14] = swap32 (esalt_bufs[salt_pos].salt_buf[14]);
+ salt_buf1[15] = swap32 (esalt_bufs[salt_pos].salt_buf[15]);
u32 salt_buf2[16];
u32 w[16];
- w[ 0] = swap_workaround (w0[0]);
- w[ 1] = swap_workaround (w0[1]);
- w[ 2] = swap_workaround (w0[2]);
- w[ 3] = swap_workaround (w0[3]);
- w[ 4] = swap_workaround (w1[0]);
- w[ 5] = swap_workaround (w1[1]);
- w[ 6] = swap_workaround (w1[2]);
- w[ 7] = swap_workaround (w1[3]);
- w[ 8] = swap_workaround (w2[0]);
- w[ 9] = swap_workaround (w2[1]);
- w[10] = swap_workaround (w2[2]);
- w[11] = swap_workaround (w2[3]);
- w[12] = swap_workaround (w3[0]);
- w[13] = swap_workaround (w3[1]);
- w[14] = swap_workaround (w3[2]);
- w[15] = swap_workaround (w3[3]);
+ w[ 0] = swap32 (w0[0]);
+ w[ 1] = swap32 (w0[1]);
+ w[ 2] = swap32 (w0[2]);
+ w[ 3] = swap32 (w0[3]);
+ w[ 4] = swap32 (w1[0]);
+ w[ 5] = swap32 (w1[1]);
+ w[ 6] = swap32 (w1[2]);
+ w[ 7] = swap32 (w1[3]);
+ w[ 8] = swap32 (w2[0]);
+ w[ 9] = swap32 (w2[1]);
+ w[10] = swap32 (w2[2]);
+ w[11] = swap32 (w2[3]);
+ w[12] = swap32 (w3[0]);
+ w[13] = swap32 (w3[1]);
+ w[14] = swap32 (w3[2]);
+ w[15] = swap32 (w3[3]);
u32 ipad[16];
u32 opad[16];
u32 ukey1[8];
- ukey1[0] = swap_workaround (tmps[gid].out[ 0]);
- ukey1[1] = swap_workaround (tmps[gid].out[ 1]);
- ukey1[2] = swap_workaround (tmps[gid].out[ 2]);
- ukey1[3] = swap_workaround (tmps[gid].out[ 3]);
- ukey1[4] = swap_workaround (tmps[gid].out[ 4]);
- ukey1[5] = swap_workaround (tmps[gid].out[ 5]);
- ukey1[6] = swap_workaround (tmps[gid].out[ 6]);
- ukey1[7] = swap_workaround (tmps[gid].out[ 7]);
+ ukey1[0] = swap32 (tmps[gid].out[ 0]);
+ ukey1[1] = swap32 (tmps[gid].out[ 1]);
+ ukey1[2] = swap32 (tmps[gid].out[ 2]);
+ ukey1[3] = swap32 (tmps[gid].out[ 3]);
+ ukey1[4] = swap32 (tmps[gid].out[ 4]);
+ ukey1[5] = swap32 (tmps[gid].out[ 5]);
+ ukey1[6] = swap32 (tmps[gid].out[ 6]);
+ ukey1[7] = swap32 (tmps[gid].out[ 7]);
u32 ukey2[8];
- ukey2[0] = swap_workaround (tmps[gid].out[ 8]);
- ukey2[1] = swap_workaround (tmps[gid].out[ 9]);
- ukey2[2] = swap_workaround (tmps[gid].out[10]);
- ukey2[3] = swap_workaround (tmps[gid].out[11]);
- ukey2[4] = swap_workaround (tmps[gid].out[12]);
- ukey2[5] = swap_workaround (tmps[gid].out[13]);
- ukey2[6] = swap_workaround (tmps[gid].out[14]);
- ukey2[7] = swap_workaround (tmps[gid].out[15]);
+ ukey2[0] = swap32 (tmps[gid].out[ 8]);
+ ukey2[1] = swap32 (tmps[gid].out[ 9]);
+ ukey2[2] = swap32 (tmps[gid].out[10]);
+ ukey2[3] = swap32 (tmps[gid].out[11]);
+ ukey2[4] = swap32 (tmps[gid].out[12]);
+ ukey2[5] = swap32 (tmps[gid].out[13]);
+ ukey2[6] = swap32 (tmps[gid].out[14]);
+ ukey2[7] = swap32 (tmps[gid].out[15]);
u32 data[4];
u32 ukey3[8];
- ukey3[0] = swap_workaround (tmps[gid].out[16]);
- ukey3[1] = swap_workaround (tmps[gid].out[17]);
- ukey3[2] = swap_workaround (tmps[gid].out[18]);
- ukey3[3] = swap_workaround (tmps[gid].out[19]);
- ukey3[4] = swap_workaround (tmps[gid].out[20]);
- ukey3[5] = swap_workaround (tmps[gid].out[21]);
- ukey3[6] = swap_workaround (tmps[gid].out[22]);
- ukey3[7] = swap_workaround (tmps[gid].out[23]);
+ ukey3[0] = swap32 (tmps[gid].out[16]);
+ ukey3[1] = swap32 (tmps[gid].out[17]);
+ ukey3[2] = swap32 (tmps[gid].out[18]);
+ ukey3[3] = swap32 (tmps[gid].out[19]);
+ ukey3[4] = swap32 (tmps[gid].out[20]);
+ ukey3[5] = swap32 (tmps[gid].out[21]);
+ ukey3[6] = swap32 (tmps[gid].out[22]);
+ ukey3[7] = swap32 (tmps[gid].out[23]);
u32 ukey4[8];
- ukey4[0] = swap_workaround (tmps[gid].out[24]);
- ukey4[1] = swap_workaround (tmps[gid].out[25]);
- ukey4[2] = swap_workaround (tmps[gid].out[26]);
- ukey4[3] = swap_workaround (tmps[gid].out[27]);
- ukey4[4] = swap_workaround (tmps[gid].out[28]);
- ukey4[5] = swap_workaround (tmps[gid].out[29]);
- ukey4[6] = swap_workaround (tmps[gid].out[30]);
- ukey4[7] = swap_workaround (tmps[gid].out[31]);
+ ukey4[0] = swap32 (tmps[gid].out[24]);
+ ukey4[1] = swap32 (tmps[gid].out[25]);
+ ukey4[2] = swap32 (tmps[gid].out[26]);
+ ukey4[3] = swap32 (tmps[gid].out[27]);
+ ukey4[4] = swap32 (tmps[gid].out[28]);
+ ukey4[5] = swap32 (tmps[gid].out[29]);
+ ukey4[6] = swap32 (tmps[gid].out[30]);
+ ukey4[7] = swap32 (tmps[gid].out[31]);
{
tmp[0] = data[0];
u32 ukey5[8];
- ukey5[0] = swap_workaround (tmps[gid].out[32]);
- ukey5[1] = swap_workaround (tmps[gid].out[33]);
- ukey5[2] = swap_workaround (tmps[gid].out[34]);
- ukey5[3] = swap_workaround (tmps[gid].out[35]);
- ukey5[4] = swap_workaround (tmps[gid].out[36]);
- ukey5[5] = swap_workaround (tmps[gid].out[37]);
- ukey5[6] = swap_workaround (tmps[gid].out[38]);
- ukey5[7] = swap_workaround (tmps[gid].out[39]);
+ ukey5[0] = swap32 (tmps[gid].out[32]);
+ ukey5[1] = swap32 (tmps[gid].out[33]);
+ ukey5[2] = swap32 (tmps[gid].out[34]);
+ ukey5[3] = swap32 (tmps[gid].out[35]);
+ ukey5[4] = swap32 (tmps[gid].out[36]);
+ ukey5[5] = swap32 (tmps[gid].out[37]);
+ ukey5[6] = swap32 (tmps[gid].out[38]);
+ ukey5[7] = swap32 (tmps[gid].out[39]);
u32 ukey6[8];
- ukey6[0] = swap_workaround (tmps[gid].out[40]);
- ukey6[1] = swap_workaround (tmps[gid].out[41]);
- ukey6[2] = swap_workaround (tmps[gid].out[42]);
- ukey6[3] = swap_workaround (tmps[gid].out[43]);
- ukey6[4] = swap_workaround (tmps[gid].out[44]);
- ukey6[5] = swap_workaround (tmps[gid].out[45]);
- ukey6[6] = swap_workaround (tmps[gid].out[46]);
- ukey6[7] = swap_workaround (tmps[gid].out[47]);
+ ukey6[0] = swap32 (tmps[gid].out[40]);
+ ukey6[1] = swap32 (tmps[gid].out[41]);
+ ukey6[2] = swap32 (tmps[gid].out[42]);
+ ukey6[3] = swap32 (tmps[gid].out[43]);
+ ukey6[4] = swap32 (tmps[gid].out[44]);
+ ukey6[5] = swap32 (tmps[gid].out[45]);
+ ukey6[6] = swap32 (tmps[gid].out[46]);
+ ukey6[7] = swap32 (tmps[gid].out[47]);
{
tmp[0] = data[0];
#include "types_ocl.c"
#include "common.c"
-#ifdef VECT_SIZE1
-#define COMPARE_M "check_multi_vect1_comp4.c"
-#endif
-
-#ifdef VECT_SIZE4
-#define COMPARE_M "check_multi_vect4_comp4.c"
-#endif
+#define COMPARE_S "check_single_comp4.c"
+#define COMPARE_M "check_multi_comp4.c"
static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4])
{
#include "types_ocl.c"
#include "common.c"
-#ifdef VECT_SIZE1
-#define COMPARE_M "check_multi_vect1_comp4.c"
-#endif
-
-#ifdef VECT_SIZE2
-#define COMPARE_M "check_multi_vect2_comp4.c"
-#endif
-
-#ifdef VECT_SIZE4
-#define COMPARE_M "check_multi_vect4_comp4.c"
-#endif
+#define COMPARE_S "check_single_comp4.c"
+#define COMPARE_M "check_multi_comp4.c"
__constant u32 k_sha256[64] =
{
* pads
*/
- w0[0] = swap_workaround (w0[0]);
- w0[1] = swap_workaround (w0[1]);
- w0[2] = swap_workaround (w0[2]);
- w0[3] = swap_workaround (w0[3]);
- w1[0] = swap_workaround (w1[0]);
- w1[1] = swap_workaround (w1[1]);
- w1[2] = swap_workaround (w1[2]);
- w1[3] = swap_workaround (w1[3]);
- w2[0] = swap_workaround (w2[0]);
- w2[1] = swap_workaround (w2[1]);
- w2[2] = swap_workaround (w2[2]);
- w2[3] = swap_workaround (w2[3]);
- w3[0] = swap_workaround (w3[0]);
- w3[1] = swap_workaround (w3[1]);
- w3[2] = swap_workaround (w3[2]);
- w3[3] = swap_workaround (w3[3]);
+ w0[0] = swap32 (w0[0]);
+ w0[1] = swap32 (w0[1]);
+ w0[2] = swap32 (w0[2]);
+ w0[3] = swap32 (w0[3]);
+ w1[0] = swap32 (w1[0]);
+ w1[1] = swap32 (w1[1]);
+ w1[2] = swap32 (w1[2]);
+ w1[3] = swap32 (w1[3]);
+ w2[0] = swap32 (w2[0]);
+ w2[1] = swap32 (w2[1]);
+ w2[2] = swap32 (w2[2]);
+ w2[3] = swap32 (w2[3]);
+ w3[0] = swap32 (w3[0]);
+ w3[1] = swap32 (w3[1]);
+ w3[2] = swap32 (w3[2]);
+ w3[3] = swap32 (w3[3]);
u32 ipad[8];
u32 opad[8];
w3[2] = salt_buf3[2];
// w3[3] = salt_buf3[3];
- w0[0] = swap_workaround (w0[0]);
- w0[1] = swap_workaround (w0[1]);
- w0[2] = swap_workaround (w0[2]);
- w0[3] = swap_workaround (w0[3]);
- w1[0] = swap_workaround (w1[0]);
- w1[1] = swap_workaround (w1[1]);
- w1[2] = swap_workaround (w1[2]);
- w1[3] = swap_workaround (w1[3]);
- w2[0] = swap_workaround (w2[0]);
- w2[1] = swap_workaround (w2[1]);
- w2[2] = swap_workaround (w2[2]);
- w2[3] = swap_workaround (w2[3]);
- w3[0] = swap_workaround (w3[0]);
- w3[1] = swap_workaround (w3[1]);
- w3[2] = swap_workaround (w3[2]);
+ w0[0] = swap32 (w0[0]);
+ w0[1] = swap32 (w0[1]);
+ w0[2] = swap32 (w0[2]);
+ w0[3] = swap32 (w0[3]);
+ w1[0] = swap32 (w1[0]);
+ w1[1] = swap32 (w1[1]);
+ w1[2] = swap32 (w1[2]);
+ w1[3] = swap32 (w1[3]);
+ w2[0] = swap32 (w2[0]);
+ w2[1] = swap32 (w2[1]);
+ w2[2] = swap32 (w2[2]);
+ w2[3] = swap32 (w2[3]);
+ w3[0] = swap32 (w3[0]);
+ w3[1] = swap32 (w3[1]);
+ w3[2] = swap32 (w3[2]);
w3[3] = (64 + salt_len + 4) * 8;
u32 dgst[8];
#include "types_ocl.c"
#include "common.c"
-#ifdef VECT_SIZE1
-#define COMPARE_M "check_multi_vect1_comp4.c"
-#endif
-
-#ifdef VECT_SIZE2
-#define COMPARE_M "check_multi_vect2_comp4.c"
-#endif
-
-#ifdef VECT_SIZE4
-#define COMPARE_M "check_multi_vect4_comp4.c"
-#endif
+#define COMPARE_S "check_single_comp4.c"
+#define COMPARE_M "check_multi_comp4.c"
__constant u64 k_sha512[80] =
{
* pads
*/
- w0[0] = swap_workaround (w0[0]);
- w0[1] = swap_workaround (w0[1]);
- w0[2] = swap_workaround (w0[2]);
- w0[3] = swap_workaround (w0[3]);
- w1[0] = swap_workaround (w1[0]);
- w1[1] = swap_workaround (w1[1]);
- w1[2] = swap_workaround (w1[2]);
- w1[3] = swap_workaround (w1[3]);
- w2[0] = swap_workaround (w2[0]);
- w2[1] = swap_workaround (w2[1]);
- w2[2] = swap_workaround (w2[2]);
- w2[3] = swap_workaround (w2[3]);
- w3[0] = swap_workaround (w3[0]);
- w3[1] = swap_workaround (w3[1]);
- w3[2] = swap_workaround (w3[2]);
- w3[3] = swap_workaround (w3[3]);
+ w0[0] = swap32 (w0[0]);
+ w0[1] = swap32 (w0[1]);
+ w0[2] = swap32 (w0[2]);
+ w0[3] = swap32 (w0[3]);
+ w1[0] = swap32 (w1[0]);
+ w1[1] = swap32 (w1[1]);
+ w1[2] = swap32 (w1[2]);
+ w1[3] = swap32 (w1[3]);
+ w2[0] = swap32 (w2[0]);
+ w2[1] = swap32 (w2[1]);
+ w2[2] = swap32 (w2[2]);
+ w2[3] = swap32 (w2[3]);
+ w3[0] = swap32 (w3[0]);
+ w3[1] = swap32 (w3[1]);
+ w3[2] = swap32 (w3[2]);
+ w3[3] = swap32 (w3[3]);
u64 w0l[4];
u64 w1l[4];
w3l[2] = 0;
w3l[3] = 0;
- w0l[0] = swap_workaround (w0l[0]);
- w0l[1] = swap_workaround (w0l[1]);
- w0l[2] = swap_workaround (w0l[2]);
- w0l[3] = swap_workaround (w0l[3]);
- w1l[0] = swap_workaround (w1l[0]);
- w1l[1] = swap_workaround (w1l[1]);
- w1l[2] = swap_workaround (w1l[2]);
- w1l[3] = swap_workaround (w1l[3]);
+ w0l[0] = swap32 (w0l[0]);
+ w0l[1] = swap32 (w0l[1]);
+ w0l[2] = swap32 (w0l[2]);
+ w0l[3] = swap32 (w0l[3]);
+ w1l[0] = swap32 (w1l[0]);
+ w1l[1] = swap32 (w1l[1]);
+ w1l[2] = swap32 (w1l[2]);
+ w1l[3] = swap32 (w1l[3]);
w2l[0] = 0;
w2l[1] = 0;
w2l[2] = 0;
* pads
*/
- w0[0] = swap_workaround (w0[0]);
- w0[1] = swap_workaround (w0[1]);
- w0[2] = swap_workaround (w0[2]);
- w0[3] = swap_workaround (w0[3]);
- w1[0] = swap_workaround (w1[0]);
- w1[1] = swap_workaround (w1[1]);
- w1[2] = swap_workaround (w1[2]);
- w1[3] = swap_workaround (w1[3]);
- w2[0] = swap_workaround (w2[0]);
- w2[1] = swap_workaround (w2[1]);
- w2[2] = swap_workaround (w2[2]);
- w2[3] = swap_workaround (w2[3]);
- w3[0] = swap_workaround (w3[0]);
- w3[1] = swap_workaround (w3[1]);
- w3[2] = swap_workaround (w3[2]);
- w3[3] = swap_workaround (w3[3]);
+ w0[0] = swap32 (w0[0]);
+ w0[1] = swap32 (w0[1]);
+ w0[2] = swap32 (w0[2]);
+ w0[3] = swap32 (w0[3]);
+ w1[0] = swap32 (w1[0]);
+ w1[1] = swap32 (w1[1]);
+ w1[2] = swap32 (w1[2]);
+ w1[3] = swap32 (w1[3]);
+ w2[0] = swap32 (w2[0]);
+ w2[1] = swap32 (w2[1]);
+ w2[2] = swap32 (w2[2]);
+ w2[3] = swap32 (w2[3]);
+ w3[0] = swap32 (w3[0]);
+ w3[1] = swap32 (w3[1]);
+ w3[2] = swap32 (w3[2]);
+ w3[3] = swap32 (w3[3]);
u32 ipad[5];
u32 opad[5];
append_0x01_1x4 (w0, salt_len + 3);
append_0x80_1x4 (w0, salt_len + 4);
- w0[0] = swap_workaround (w0[0]);
- w0[1] = swap_workaround (w0[1]);
- w0[2] = swap_workaround (w0[2]);
- w0[3] = swap_workaround (w0[3]);
+ w0[0] = swap32 (w0[0]);
+ w0[1] = swap32 (w0[1]);
+ w0[2] = swap32 (w0[2]);
+ w0[3] = swap32 (w0[3]);
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
#include "types_ocl.c"
#include "common.c"
-#ifdef VECT_SIZE1
-#define COMPARE_M "check_multi_vect1_comp4.c"
-#endif
-
-#ifdef VECT_SIZE2
-#define COMPARE_M "check_multi_vect2_comp4.c"
-#endif
-
-#ifdef VECT_SIZE4
-#define COMPARE_M "check_multi_vect4_comp4.c"
-#endif
+#define COMPARE_S "check_single_comp4.c"
+#define COMPARE_M "check_multi_comp4.c"
static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5])
{
* pads
*/
- w0[0] = swap_workaround (w0[0]);
- w0[1] = swap_workaround (w0[1]);
- w0[2] = swap_workaround (w0[2]);
- w0[3] = swap_workaround (w0[3]);
- w1[0] = swap_workaround (w1[0]);
- w1[1] = swap_workaround (w1[1]);
- w1[2] = swap_workaround (w1[2]);
- w1[3] = swap_workaround (w1[3]);
- w2[0] = swap_workaround (w2[0]);
- w2[1] = swap_workaround (w2[1]);
- w2[2] = swap_workaround (w2[2]);
- w2[3] = swap_workaround (w2[3]);
- w3[0] = swap_workaround (w3[0]);
- w3[1] = swap_workaround (w3[1]);
- w3[2] = swap_workaround (w3[2]);
- w3[3] = swap_workaround (w3[3]);
+ w0[0] = swap32 (w0[0]);
+ w0[1] = swap32 (w0[1]);
+ w0[2] = swap32 (w0[2]);
+ w0[3] = swap32 (w0[3]);
+ w1[0] = swap32 (w1[0]);
+ w1[1] = swap32 (w1[1]);
+ w1[2] = swap32 (w1[2]);
+ w1[3] = swap32 (w1[3]);
+ w2[0] = swap32 (w2[0]);
+ w2[1] = swap32 (w2[1]);
+ w2[2] = swap32 (w2[2]);
+ w2[3] = swap32 (w2[3]);
+ w3[0] = swap32 (w3[0]);
+ w3[1] = swap32 (w3[1]);
+ w3[2] = swap32 (w3[2]);
+ w3[3] = swap32 (w3[3]);
u32 ipad[5];
u32 opad[5];
w3[2] = salt_buf3[2];
//w3[3] = salt_buf3[3];
- w0[0] = swap_workaround (w0[0]);
- w0[1] = swap_workaround (w0[1]);
- w0[2] = swap_workaround (w0[2]);
- w0[3] = swap_workaround (w0[3]);
- w1[0] = swap_workaround (w1[0]);
- w1[1] = swap_workaround (w1[1]);
- w1[2] = swap_workaround (w1[2]);
- w1[3] = swap_workaround (w1[3]);
- w2[0] = swap_workaround (w2[0]);
- w2[1] = swap_workaround (w2[1]);
- w2[2] = swap_workaround (w2[2]);
- w2[3] = swap_workaround (w2[3]);
- w3[0] = swap_workaround (w3[0]);
- w3[1] = swap_workaround (w3[1]);
- w3[2] = swap_workaround (w3[2]);
+ w0[0] = swap32 (w0[0]);
+ w0[1] = swap32 (w0[1]);
+ w0[2] = swap32 (w0[2]);
+ w0[3] = swap32 (w0[3]);
+ w1[0] = swap32 (w1[0]);
+ w1[1] = swap32 (w1[1]);
+ w1[2] = swap32 (w1[2]);
+ w1[3] = swap32 (w1[3]);
+ w2[0] = swap32 (w2[0]);
+ w2[1] = swap32 (w2[1]);
+ w2[2] = swap32 (w2[2]);
+ w2[3] = swap32 (w2[3]);
+ w3[0] = swap32 (w3[0]);
+ w3[1] = swap32 (w3[1]);
+ w3[2] = swap32 (w3[2]);
w3[3] = (64 + salt_len + 4) * 8;
u32 dgst[5];
* pads
*/
- w0[0] = swap_workaround (w0[0]);
- w0[1] = swap_workaround (w0[1]);
- w0[2] = swap_workaround (w0[2]);
- w0[3] = swap_workaround (w0[3]);
- w1[0] = swap_workaround (w1[0]);
- w1[1] = swap_workaround (w1[1]);
- w1[2] = swap_workaround (w1[2]);
- w1[3] = swap_workaround (w1[3]);
- w2[0] = swap_workaround (w2[0]);
- w2[1] = swap_workaround (w2[1]);
- w2[2] = swap_workaround (w2[2]);
- w2[3] = swap_workaround (w2[3]);
- w3[0] = swap_workaround (w3[0]);
- w3[1] = swap_workaround (w3[1]);
- w3[2] = swap_workaround (w3[2]);
- w3[3] = swap_workaround (w3[3]);
+ w0[0] = swap32 (w0[0]);
+ w0[1] = swap32 (w0[1]);
+ w0[2] = swap32 (w0[2]);
+ w0[3] = swap32 (w0[3]);
+ w1[0] = swap32 (w1[0]);
+ w1[1] = swap32 (w1[1]);
+ w1[2] = swap32 (w1[2]);
+ w1[3] = swap32 (w1[3]);
+ w2[0] = swap32 (w2[0]);
+ w2[1] = swap32 (w2[1]);
+ w2[2] = swap32 (w2[2]);
+ w2[3] = swap32 (w2[3]);
+ w3[0] = swap32 (w3[0]);
+ w3[1] = swap32 (w3[1]);
+ w3[2] = swap32 (w3[2]);
+ w3[3] = swap32 (w3[3]);
u32 ipad[8];
u32 opad[8];
append_0x01_3x4 (w0, w1, w2, salt_len + 3);
append_0x80_3x4 (w0, w1, w2, salt_len + 4);
- w0[0] = swap_workaround (w0[0]);
- w0[1] = swap_workaround (w0[1]);
- w0[2] = swap_workaround (w0[2]);
- w0[3] = swap_workaround (w0[3]);
- w1[0] = swap_workaround (w1[0]);
- w1[1] = swap_workaround (w1[1]);
- w1[2] = swap_workaround (w1[2]);
- w1[3] = swap_workaround (w1[3]);
- w2[0] = swap_workaround (w2[0]);
- w2[1] = swap_workaround (w2[1]);
+ w0[0] = swap32 (w0[0]);
+ w0[1] = swap32 (w0[1]);
+ w0[2] = swap32 (w0[2]);
+ w0[3] = swap32 (w0[3]);
+ w1[0] = swap32 (w1[0]);
+ w1[1] = swap32 (w1[1]);
+ w1[2] = swap32 (w1[2]);
+ w1[3] = swap32 (w1[3]);
+ w2[0] = swap32 (w2[0]);
+ w2[1] = swap32 (w2[1]);
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
salt_buf[2] = salt_bufs[salt_pos].salt_buf[2];
salt_buf[3] = salt_bufs[salt_pos].salt_buf[3];
- out[0] = swap_workaround (out[0]);
- out[1] = swap_workaround (out[1]);
- out[2] = swap_workaround (out[2]);
- out[3] = swap_workaround (out[3]);
+ out[0] = swap32 (out[0]);
+ out[1] = swap32 (out[1]);
+ out[2] = swap32 (out[2]);
+ out[3] = swap32 (out[3]);
truncate_block (out, salt_len);
#include "types_ocl.c"
#include "common.c"
-#ifdef VECT_SIZE1
-#define COMPARE_M "check_multi_vect1_comp4.c"
-#endif
+#define COMPARE_S "check_single_comp4.c"
+#define COMPARE_M "check_multi_comp4.c"
__constant u64 k_sha512[80] =
{
u32 w0[4];
- w0[0] = swap_workaround (pws[gid].i[ 0]);
- w0[1] = swap_workaround (pws[gid].i[ 1]);
- w0[2] = swap_workaround (pws[gid].i[ 2]);
- w0[3] = swap_workaround (pws[gid].i[ 3]);
+ w0[0] = swap32 (pws[gid].i[ 0]);
+ w0[1] = swap32 (pws[gid].i[ 1]);
+ w0[2] = swap32 (pws[gid].i[ 2]);
+ w0[3] = swap32 (pws[gid].i[ 3]);
u32 w1[4];
- w1[0] = swap_workaround (pws[gid].i[ 4]);
- w1[1] = swap_workaround (pws[gid].i[ 5]);
- w1[2] = swap_workaround (pws[gid].i[ 6]);
- w1[3] = swap_workaround (pws[gid].i[ 7]);
+ w1[0] = swap32 (pws[gid].i[ 4]);
+ w1[1] = swap32 (pws[gid].i[ 5]);
+ w1[2] = swap32 (pws[gid].i[ 6]);
+ w1[3] = swap32 (pws[gid].i[ 7]);
u32 w2[4];
- w2[0] = swap_workaround (pws[gid].i[ 8]);
- w2[1] = swap_workaround (pws[gid].i[ 9]);
- w2[2] = swap_workaround (pws[gid].i[10]);
- w2[3] = swap_workaround (pws[gid].i[11]);
+ w2[0] = swap32 (pws[gid].i[ 8]);
+ w2[1] = swap32 (pws[gid].i[ 9]);
+ w2[2] = swap32 (pws[gid].i[10]);
+ w2[3] = swap32 (pws[gid].i[11]);
u32 w3[4];
- w3[0] = swap_workaround (pws[gid].i[12]);
- w3[1] = swap_workaround (pws[gid].i[13]);
- w3[2] = swap_workaround (pws[gid].i[14]);
- w3[3] = swap_workaround (pws[gid].i[15]);
+ w3[0] = swap32 (pws[gid].i[12]);
+ w3[1] = swap32 (pws[gid].i[13]);
+ w3[2] = swap32 (pws[gid].i[14]);
+ w3[3] = swap32 (pws[gid].i[15]);
/**
* salt
u32 salt_len = salt_bufs[salt_pos].salt_len;
- esalt_buf[ 0] = hl32_to_64 (swap_workaround (esalt_bufs[salt_pos].salt_buf[ 0]), swap_workaround (esalt_bufs[salt_pos].salt_buf[ 1]));
- esalt_buf[ 1] = hl32_to_64 (swap_workaround (esalt_bufs[salt_pos].salt_buf[ 2]), swap_workaround (esalt_bufs[salt_pos].salt_buf[ 3]));
- esalt_buf[ 2] = hl32_to_64 (swap_workaround (esalt_bufs[salt_pos].salt_buf[ 4]), swap_workaround (esalt_bufs[salt_pos].salt_buf[ 5]));
- esalt_buf[ 3] = hl32_to_64 (swap_workaround (esalt_bufs[salt_pos].salt_buf[ 6]), swap_workaround (esalt_bufs[salt_pos].salt_buf[ 7]));
- esalt_buf[ 4] = hl32_to_64 (swap_workaround (esalt_bufs[salt_pos].salt_buf[ 8]), swap_workaround (esalt_bufs[salt_pos].salt_buf[ 9]));
- esalt_buf[ 5] = hl32_to_64 (swap_workaround (esalt_bufs[salt_pos].salt_buf[10]), swap_workaround (esalt_bufs[salt_pos].salt_buf[11]));
- esalt_buf[ 6] = hl32_to_64 (swap_workaround (esalt_bufs[salt_pos].salt_buf[12]), swap_workaround (esalt_bufs[salt_pos].salt_buf[13]));
- esalt_buf[ 7] = hl32_to_64 (swap_workaround (esalt_bufs[salt_pos].salt_buf[14]), swap_workaround (esalt_bufs[salt_pos].salt_buf[15]));
- esalt_buf[ 8] = hl32_to_64 (swap_workaround (esalt_bufs[salt_pos].salt_buf[16]), swap_workaround (esalt_bufs[salt_pos].salt_buf[17]));
- esalt_buf[ 9] = hl32_to_64 (swap_workaround (esalt_bufs[salt_pos].salt_buf[18]), swap_workaround (esalt_bufs[salt_pos].salt_buf[19]));
- esalt_buf[10] = hl32_to_64 (swap_workaround (esalt_bufs[salt_pos].salt_buf[20]), swap_workaround (esalt_bufs[salt_pos].salt_buf[21]));
- esalt_buf[11] = hl32_to_64 (swap_workaround (esalt_bufs[salt_pos].salt_buf[22]), swap_workaround (esalt_bufs[salt_pos].salt_buf[23]));
- esalt_buf[12] = hl32_to_64 (swap_workaround (esalt_bufs[salt_pos].salt_buf[24]), swap_workaround (esalt_bufs[salt_pos].salt_buf[25]));
- esalt_buf[13] = hl32_to_64 (swap_workaround (esalt_bufs[salt_pos].salt_buf[26]), swap_workaround (esalt_bufs[salt_pos].salt_buf[27]));
+ esalt_buf[ 0] = hl32_to_64 (swap32 (esalt_bufs[salt_pos].salt_buf[ 0]), swap32 (esalt_bufs[salt_pos].salt_buf[ 1]));
+ esalt_buf[ 1] = hl32_to_64 (swap32 (esalt_bufs[salt_pos].salt_buf[ 2]), swap32 (esalt_bufs[salt_pos].salt_buf[ 3]));
+ esalt_buf[ 2] = hl32_to_64 (swap32 (esalt_bufs[salt_pos].salt_buf[ 4]), swap32 (esalt_bufs[salt_pos].salt_buf[ 5]));
+ esalt_buf[ 3] = hl32_to_64 (swap32 (esalt_bufs[salt_pos].salt_buf[ 6]), swap32 (esalt_bufs[salt_pos].salt_buf[ 7]));
+ esalt_buf[ 4] = hl32_to_64 (swap32 (esalt_bufs[salt_pos].salt_buf[ 8]), swap32 (esalt_bufs[salt_pos].salt_buf[ 9]));
+ esalt_buf[ 5] = hl32_to_64 (swap32 (esalt_bufs[salt_pos].salt_buf[10]), swap32 (esalt_bufs[salt_pos].salt_buf[11]));
+ esalt_buf[ 6] = hl32_to_64 (swap32 (esalt_bufs[salt_pos].salt_buf[12]), swap32 (esalt_bufs[salt_pos].salt_buf[13]));
+ esalt_buf[ 7] = hl32_to_64 (swap32 (esalt_bufs[salt_pos].salt_buf[14]), swap32 (esalt_bufs[salt_pos].salt_buf[15]));
+ esalt_buf[ 8] = hl32_to_64 (swap32 (esalt_bufs[salt_pos].salt_buf[16]), swap32 (esalt_bufs[salt_pos].salt_buf[17]));
+ esalt_buf[ 9] = hl32_to_64 (swap32 (esalt_bufs[salt_pos].salt_buf[18]), swap32 (esalt_bufs[salt_pos].salt_buf[19]));
+ esalt_buf[10] = hl32_to_64 (swap32 (esalt_bufs[salt_pos].salt_buf[20]), swap32 (esalt_bufs[salt_pos].salt_buf[21]));
+ esalt_buf[11] = hl32_to_64 (swap32 (esalt_bufs[salt_pos].salt_buf[22]), swap32 (esalt_bufs[salt_pos].salt_buf[23]));
+ esalt_buf[12] = hl32_to_64 (swap32 (esalt_bufs[salt_pos].salt_buf[24]), swap32 (esalt_bufs[salt_pos].salt_buf[25]));
+ esalt_buf[13] = hl32_to_64 (swap32 (esalt_bufs[salt_pos].salt_buf[26]), swap32 (esalt_bufs[salt_pos].salt_buf[27]));
esalt_buf[14] = 0;
esalt_buf[15] = (128 + salt_len + 4) * 8;
u32 w0_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
u32 w2_t[4];
u32 w0_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
u32 w2_t[4];
u32 w0_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
u32 w2_t[4];
u32 w0_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
u32 w2_t[4];
#include "types_ocl.c"
#include "common.c"
-#ifdef VECT_SIZE1
-#define COMPARE_M "check_multi_vect1_comp4.c"
-#endif
+#define COMPARE_S "check_single_comp4.c"
+#define COMPARE_M "check_multi_comp4.c"
__constant u32 k_sha256[64] =
{
u32 g = digest[6];
u32 h = digest[7];
- u32 w0_t = swap_workaround (w[ 0]);
- u32 w1_t = swap_workaround (w[ 1]);
- u32 w2_t = swap_workaround (w[ 2]);
- u32 w3_t = swap_workaround (w[ 3]);
- u32 w4_t = swap_workaround (w[ 4]);
- u32 w5_t = swap_workaround (w[ 5]);
- u32 w6_t = swap_workaround (w[ 6]);
- u32 w7_t = swap_workaround (w[ 7]);
- u32 w8_t = swap_workaround (w[ 8]);
- u32 w9_t = swap_workaround (w[ 9]);
- u32 wa_t = swap_workaround (w[10]);
- u32 wb_t = swap_workaround (w[11]);
- u32 wc_t = swap_workaround (w[12]);
- u32 wd_t = swap_workaround (w[13]);
- u32 we_t = swap_workaround (w[14]);
- u32 wf_t = swap_workaround (w[15]);
+ u32 w0_t = swap32 (w[ 0]);
+ u32 w1_t = swap32 (w[ 1]);
+ u32 w2_t = swap32 (w[ 2]);
+ u32 w3_t = swap32 (w[ 3]);
+ u32 w4_t = swap32 (w[ 4]);
+ u32 w5_t = swap32 (w[ 5]);
+ u32 w6_t = swap32 (w[ 6]);
+ u32 w7_t = swap32 (w[ 7]);
+ u32 w8_t = swap32 (w[ 8]);
+ u32 w9_t = swap32 (w[ 9]);
+ u32 wa_t = swap32 (w[10]);
+ u32 wb_t = swap32 (w[11]);
+ u32 wc_t = swap32 (w[12]);
+ u32 wd_t = swap32 (w[13]);
+ u32 we_t = swap32 (w[14]);
+ u32 wf_t = swap32 (w[15]);
#define ROUND_EXPAND() \
{ \
static void bswap8 (u32 block[16])
{
- block[ 0] = swap_workaround (block[ 0]);
- block[ 1] = swap_workaround (block[ 1]);
- block[ 2] = swap_workaround (block[ 2]);
- block[ 3] = swap_workaround (block[ 3]);
- block[ 4] = swap_workaround (block[ 4]);
- block[ 5] = swap_workaround (block[ 5]);
- block[ 6] = swap_workaround (block[ 6]);
- block[ 7] = swap_workaround (block[ 7]);
+ block[ 0] = swap32 (block[ 0]);
+ block[ 1] = swap32 (block[ 1]);
+ block[ 2] = swap32 (block[ 2]);
+ block[ 3] = swap32 (block[ 3]);
+ block[ 4] = swap32 (block[ 4]);
+ block[ 5] = swap32 (block[ 5]);
+ block[ 6] = swap32 (block[ 6]);
+ block[ 7] = swap32 (block[ 7]);
}
static u32 memcat16 (u32 block[16], const u32 block_len, const u32 append[4], const u32 append_len)
u32 tmp3;
u32 tmp4;
+ #ifdef IS_AMD
const int offset_minus_4 = 4 - block_len;
tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
tmp3 = tmp4;
tmp4 = 0;
}
+ #endif
+
+ #ifdef IS_NV
+ const int offset_minus_4 = 4 - (block_len & 3);
+
+ const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
+
+ tmp0 = __byte_perm ( 0, append[0], selector);
+ tmp1 = __byte_perm (append[0], append[1], selector);
+ tmp2 = __byte_perm (append[1], append[2], selector);
+ tmp3 = __byte_perm (append[2], append[3], selector);
+ tmp4 = __byte_perm (append[3], 0, selector);
+ #endif
switch (div)
{
u32 tmp3;
u32 tmp4;
+ #ifdef IS_AMD
const int offset_minus_4 = 4 - block_len;
tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
tmp3 = tmp4;
tmp4 = 0;
}
+ #endif
+
+ #ifdef IS_NV
+ const int offset_minus_4 = 4 - (block_len & 3);
+
+ const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
+
+ tmp0 = __byte_perm ( 0, append[0], selector);
+ tmp1 = __byte_perm (append[0], append[1], selector);
+ tmp2 = __byte_perm (append[1], append[2], selector);
+ tmp3 = __byte_perm (append[2], append[3], selector);
+ tmp4 = __byte_perm (append[3], 0, selector);
+ #endif
u32 carry[4] = { 0, 0, 0, 0 };
u32 tmp3;
u32 tmp4;
+ #ifdef IS_AMD
const int offset_minus_4 = 4 - block_len;
tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
tmp3 = tmp4;
tmp4 = 0;
}
+ #endif
+
+ #ifdef IS_NV
+ const int offset_minus_4 = 4 - (block_len & 3);
+
+ const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
+
+ tmp0 = __byte_perm ( 0, append[0], selector);
+ tmp1 = __byte_perm (append[0], append[1], selector);
+ tmp2 = __byte_perm (append[1], append[2], selector);
+ tmp3 = __byte_perm (append[2], append[3], selector);
+ tmp4 = __byte_perm (append[3], 0, selector);
+ #endif
switch (div)
{
u32 tmp3;
u32 tmp4;
+ #ifdef IS_AMD
const int offset_minus_4 = 4 - block_len;
tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
tmp3 = tmp4;
tmp4 = 0x80;
}
+ #endif
+
+ #ifdef IS_NV
+ const int offset_minus_4 = 4 - (block_len & 3);
+
+ const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
+
+ tmp0 = __byte_perm ( 0, append[0], selector);
+ tmp1 = __byte_perm (append[0], append[1], selector);
+ tmp2 = __byte_perm (append[1], append[2], selector);
+ tmp3 = __byte_perm (append[2], append[3], selector);
+ tmp4 = __byte_perm (append[3], 0x80, selector);
+ #endif
switch (div)
{
block_len = memcat16 (block, block_len, w0, pw_len);
- append_0x80_4x4 (block, block_len);
+ append_0x80_1x16 (block, block_len);
- block[15] = swap_workaround (block_len * 8);
+ block[15] = swap32 (block_len * 8);
init_ctx (alt_result);
}
}
- append_0x80_4x4 (block, block_len);
+ append_0x80_1x16 (block, block_len);
if (block_len >= 56)
{
bzero16 (block);
}
- block[15] = swap_workaround (transform_len * 8);
+ block[15] = swap32 (transform_len * 8);
sha256_transform (block, alt_result);
/* Finish the digest. */
- append_0x80_4x4 (block, block_len);
+ append_0x80_1x16 (block, block_len);
if (block_len >= 56)
{
bzero16 (block);
}
- block[15] = swap_workaround (transform_len * 8);
+ block[15] = swap32 (transform_len * 8);
sha256_transform (block, p_bytes);
/* Finish the digest. */
- append_0x80_4x4 (block, block_len);
+ append_0x80_1x16 (block, block_len);
if (block_len >= 56)
{
bzero16 (block);
}
- block[15] = swap_workaround (transform_len * 8);
+ block[15] = swap32 (transform_len * 8);
sha256_transform (block, s_bytes);
block[15] = 0;
}
- block[15] = swap_workaround (block_len * 8);
+ block[15] = swap32 (block_len * 8);
sha256_transform_no14 (block, tmp);
* sha1
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = pw_len * 8;
// 1st transform
- w0_t = swap_workaround (w0_t);
- w1_t = swap_workaround (w1_t);
- w2_t = swap_workaround (w2_t);
- w3_t = swap_workaround (w3_t);
- w4_t = swap_workaround (w4_t);
- w5_t = swap_workaround (w5_t);
- w6_t = swap_workaround (w6_t);
- w7_t = swap_workaround (w7_t);
- w8_t = swap_workaround (w8_t);
- w9_t = swap_workaround (w9_t);
- wa_t = swap_workaround (wa_t);
- wb_t = swap_workaround (wb_t);
- wc_t = swap_workaround (wc_t);
- wd_t = swap_workaround (wd_t);
- we_t = swap_workaround (we_t);
- wf_t = swap_workaround (wf_t);
+ w0_t = swap32 (w0_t);
+ w1_t = swap32 (w1_t);
+ w2_t = swap32 (w2_t);
+ w3_t = swap32 (w3_t);
+ w4_t = swap32 (w4_t);
+ w5_t = swap32 (w5_t);
+ w6_t = swap32 (w6_t);
+ w7_t = swap32 (w7_t);
+ w8_t = swap32 (w8_t);
+ w9_t = swap32 (w9_t);
+ wa_t = swap32 (wa_t);
+ wb_t = swap32 (wb_t);
+ wc_t = swap32 (wc_t);
+ wd_t = swap32 (wd_t);
+ we_t = swap32 (we_t);
+ wf_t = swap32 (wf_t);
a = SHA1M_A;
b = SHA1M_B;
// 2nd transform
- w0_t = swap_workaround (w2t[0]);
- w1_t = swap_workaround (w2t[1]);
+ w0_t = swap32 (w2t[0]);
+ w1_t = swap32 (w2t[1]);
w2_t = 0x80000000;
w3_t = 0;
w4_t = 0;
* sha1
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = pw_len * 8;
// 1st transform
- w0_t = swap_workaround (w0_t);
- w1_t = swap_workaround (w1_t);
- w2_t = swap_workaround (w2_t);
- w3_t = swap_workaround (w3_t);
- w4_t = swap_workaround (w4_t);
- w5_t = swap_workaround (w5_t);
- w6_t = swap_workaround (w6_t);
- w7_t = swap_workaround (w7_t);
- w8_t = swap_workaround (w8_t);
- w9_t = swap_workaround (w9_t);
- wa_t = swap_workaround (wa_t);
- wb_t = swap_workaround (wb_t);
- wc_t = swap_workaround (wc_t);
- wd_t = swap_workaround (wd_t);
- we_t = swap_workaround (we_t);
- wf_t = swap_workaround (wf_t);
+ w0_t = swap32 (w0_t);
+ w1_t = swap32 (w1_t);
+ w2_t = swap32 (w2_t);
+ w3_t = swap32 (w3_t);
+ w4_t = swap32 (w4_t);
+ w5_t = swap32 (w5_t);
+ w6_t = swap32 (w6_t);
+ w7_t = swap32 (w7_t);
+ w8_t = swap32 (w8_t);
+ w9_t = swap32 (w9_t);
+ wa_t = swap32 (wa_t);
+ wb_t = swap32 (wb_t);
+ wc_t = swap32 (wc_t);
+ wd_t = swap32 (wd_t);
+ we_t = swap32 (we_t);
+ wf_t = swap32 (wf_t);
a = SHA1M_A;
b = SHA1M_B;
// 2nd transform
- w0_t = swap_workaround (w2t[0]);
- w1_t = swap_workaround (w2t[1]);
+ w0_t = swap32 (w2t[0]);
+ w1_t = swap32 (w2t[1]);
w2_t = 0x80000000;
w3_t = 0;
w4_t = 0;
* sha1
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = pw_len * 8;
// 1st transform
- w0_t = swap_workaround (w0_t);
- w1_t = swap_workaround (w1_t);
- w2_t = swap_workaround (w2_t);
- w3_t = swap_workaround (w3_t);
- w4_t = swap_workaround (w4_t);
- w5_t = swap_workaround (w5_t);
- w6_t = swap_workaround (w6_t);
- w7_t = swap_workaround (w7_t);
- w8_t = swap_workaround (w8_t);
- w9_t = swap_workaround (w9_t);
- wa_t = swap_workaround (wa_t);
- wb_t = swap_workaround (wb_t);
- wc_t = swap_workaround (wc_t);
- wd_t = swap_workaround (wd_t);
- we_t = swap_workaround (we_t);
- wf_t = swap_workaround (wf_t);
+ w0_t = swap32 (w0_t);
+ w1_t = swap32 (w1_t);
+ w2_t = swap32 (w2_t);
+ w3_t = swap32 (w3_t);
+ w4_t = swap32 (w4_t);
+ w5_t = swap32 (w5_t);
+ w6_t = swap32 (w6_t);
+ w7_t = swap32 (w7_t);
+ w8_t = swap32 (w8_t);
+ w9_t = swap32 (w9_t);
+ wa_t = swap32 (wa_t);
+ wb_t = swap32 (wb_t);
+ wc_t = swap32 (wc_t);
+ wd_t = swap32 (wd_t);
+ we_t = swap32 (we_t);
+ wf_t = swap32 (wf_t);
a = SHA1M_A;
b = SHA1M_B;
// 2nd transform
- w0_t = swap_workaround (w2t[0]);
- w1_t = swap_workaround (w2t[1]);
+ w0_t = swap32 (w2t[0]);
+ w1_t = swap32 (w2t[1]);
w2_t = 0x80000000;
w3_t = 0;
w4_t = 0;
* sha1
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = pw_len * 8;
// 1st transform
- w0_t = swap_workaround (w0_t);
- w1_t = swap_workaround (w1_t);
- w2_t = swap_workaround (w2_t);
- w3_t = swap_workaround (w3_t);
- w4_t = swap_workaround (w4_t);
- w5_t = swap_workaround (w5_t);
- w6_t = swap_workaround (w6_t);
- w7_t = swap_workaround (w7_t);
- w8_t = swap_workaround (w8_t);
- w9_t = swap_workaround (w9_t);
- wa_t = swap_workaround (wa_t);
- wb_t = swap_workaround (wb_t);
- wc_t = swap_workaround (wc_t);
- wd_t = swap_workaround (wd_t);
- we_t = swap_workaround (we_t);
- wf_t = swap_workaround (wf_t);
+ w0_t = swap32 (w0_t);
+ w1_t = swap32 (w1_t);
+ w2_t = swap32 (w2_t);
+ w3_t = swap32 (w3_t);
+ w4_t = swap32 (w4_t);
+ w5_t = swap32 (w5_t);
+ w6_t = swap32 (w6_t);
+ w7_t = swap32 (w7_t);
+ w8_t = swap32 (w8_t);
+ w9_t = swap32 (w9_t);
+ wa_t = swap32 (wa_t);
+ wb_t = swap32 (wb_t);
+ wc_t = swap32 (wc_t);
+ wd_t = swap32 (wd_t);
+ we_t = swap32 (we_t);
+ wf_t = swap32 (wf_t);
a = SHA1M_A;
b = SHA1M_B;
// 2nd transform
- w0_t = swap_workaround (w2t[0]);
- w1_t = swap_workaround (w2t[1]);
+ w0_t = swap32 (w2t[0]);
+ w1_t = swap32 (w2t[1]);
w2_t = 0x80000000;
w3_t = 0;
w4_t = 0;
// 1st transform
- w0_t = swap_workaround (w0_t);
- w1_t = swap_workaround (w1_t);
- w2_t = swap_workaround (w2_t);
- w3_t = swap_workaround (w3_t);
- w4_t = swap_workaround (w4_t);
- w5_t = swap_workaround (w5_t);
- w6_t = swap_workaround (w6_t);
- w7_t = swap_workaround (w7_t);
- w8_t = swap_workaround (w8_t);
- w9_t = swap_workaround (w9_t);
- wa_t = swap_workaround (wa_t);
- wb_t = swap_workaround (wb_t);
- wc_t = swap_workaround (wc_t);
- wd_t = swap_workaround (wd_t);
- we_t = swap_workaround (we_t);
- wf_t = swap_workaround (wf_t);
+ w0_t = swap32 (w0_t);
+ w1_t = swap32 (w1_t);
+ w2_t = swap32 (w2_t);
+ w3_t = swap32 (w3_t);
+ w4_t = swap32 (w4_t);
+ w5_t = swap32 (w5_t);
+ w6_t = swap32 (w6_t);
+ w7_t = swap32 (w7_t);
+ w8_t = swap32 (w8_t);
+ w9_t = swap32 (w9_t);
+ wa_t = swap32 (wa_t);
+ wb_t = swap32 (wb_t);
+ wc_t = swap32 (wc_t);
+ wd_t = swap32 (wd_t);
+ we_t = swap32 (we_t);
+ wf_t = swap32 (wf_t);
a = SHA1M_A;
b = SHA1M_B;
// 2nd transform
- w0_t = swap_workaround (w2t[0]);
- w1_t = swap_workaround (w2t[1]);
+ w0_t = swap32 (w2t[0]);
+ w1_t = swap32 (w2t[1]);
w2_t = 0x80000000;
w3_t = 0;
w4_t = 0;
// 1st transform
- w0_t = swap_workaround (w0_t);
- w1_t = swap_workaround (w1_t);
- w2_t = swap_workaround (w2_t);
- w3_t = swap_workaround (w3_t);
- w4_t = swap_workaround (w4_t);
- w5_t = swap_workaround (w5_t);
- w6_t = swap_workaround (w6_t);
- w7_t = swap_workaround (w7_t);
- w8_t = swap_workaround (w8_t);
- w9_t = swap_workaround (w9_t);
- wa_t = swap_workaround (wa_t);
- wb_t = swap_workaround (wb_t);
- wc_t = swap_workaround (wc_t);
- wd_t = swap_workaround (wd_t);
- we_t = swap_workaround (we_t);
- wf_t = swap_workaround (wf_t);
+ w0_t = swap32 (w0_t);
+ w1_t = swap32 (w1_t);
+ w2_t = swap32 (w2_t);
+ w3_t = swap32 (w3_t);
+ w4_t = swap32 (w4_t);
+ w5_t = swap32 (w5_t);
+ w6_t = swap32 (w6_t);
+ w7_t = swap32 (w7_t);
+ w8_t = swap32 (w8_t);
+ w9_t = swap32 (w9_t);
+ wa_t = swap32 (wa_t);
+ wb_t = swap32 (wb_t);
+ wc_t = swap32 (wc_t);
+ wd_t = swap32 (wd_t);
+ we_t = swap32 (we_t);
+ wf_t = swap32 (wf_t);
a = SHA1M_A;
b = SHA1M_B;
// 2nd transform
- w0_t = swap_workaround (w2t[0]);
- w1_t = swap_workaround (w2t[1]);
+ w0_t = swap32 (w2t[0]);
+ w1_t = swap32 (w2t[1]);
w2_t = 0x80000000;
w3_t = 0;
w4_t = 0;
static void swap_buffer (u32 final[16])
{
- final[ 0] = swap_workaround (final[ 0]);
- final[ 1] = swap_workaround (final[ 1]);
- final[ 2] = swap_workaround (final[ 2]);
- final[ 3] = swap_workaround (final[ 3]);
- final[ 4] = swap_workaround (final[ 4]);
- final[ 5] = swap_workaround (final[ 5]);
- final[ 6] = swap_workaround (final[ 6]);
- final[ 7] = swap_workaround (final[ 7]);
- final[ 8] = swap_workaround (final[ 8]);
- final[ 9] = swap_workaround (final[ 9]);
- final[10] = swap_workaround (final[10]);
- final[11] = swap_workaround (final[11]);
- final[12] = swap_workaround (final[12]);
- final[13] = swap_workaround (final[13]);
- final[14] = swap_workaround (final[14]);
- final[15] = swap_workaround (final[15]);
+ final[ 0] = swap32 (final[ 0]);
+ final[ 1] = swap32 (final[ 1]);
+ final[ 2] = swap32 (final[ 2]);
+ final[ 3] = swap32 (final[ 3]);
+ final[ 4] = swap32 (final[ 4]);
+ final[ 5] = swap32 (final[ 5]);
+ final[ 6] = swap32 (final[ 6]);
+ final[ 7] = swap32 (final[ 7]);
+ final[ 8] = swap32 (final[ 8]);
+ final[ 9] = swap32 (final[ 9]);
+ final[10] = swap32 (final[10]);
+ final[11] = swap32 (final[11]);
+ final[12] = swap32 (final[12]);
+ final[13] = swap32 (final[13]);
+ final[14] = swap32 (final[14]);
+ final[15] = swap32 (final[15]);
}
static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5])
u32 final[256];
- final[ 0] = swap_workaround (w0[0] | s0[0]);
- final[ 1] = swap_workaround (w0[1] | s0[1]);
- final[ 2] = swap_workaround (w0[2] | s0[2]);
- final[ 3] = swap_workaround (w0[3] | s0[3]);
- final[ 4] = swap_workaround (w1[0] | s1[0]);
- final[ 5] = swap_workaround (w1[1] | s1[1]);
- final[ 6] = swap_workaround (w1[2] | s1[2]);
- final[ 7] = swap_workaround (w1[3] | s1[3]);
- final[ 8] = swap_workaround (w2[0] | s2[0]);
- final[ 9] = swap_workaround (w2[1] | s2[1]);
- final[10] = swap_workaround (w2[2] | s2[2]);
- final[11] = swap_workaround (w2[3] | s2[3]);
- final[12] = swap_workaround (w3[0] | s3[0]);
- final[13] = swap_workaround (w3[1] | s3[1]);
+ final[ 0] = swap32 (w0[0] | s0[0]);
+ final[ 1] = swap32 (w0[1] | s0[1]);
+ final[ 2] = swap32 (w0[2] | s0[2]);
+ final[ 3] = swap32 (w0[3] | s0[3]);
+ final[ 4] = swap32 (w1[0] | s1[0]);
+ final[ 5] = swap32 (w1[1] | s1[1]);
+ final[ 6] = swap32 (w1[2] | s1[2]);
+ final[ 7] = swap32 (w1[3] | s1[3]);
+ final[ 8] = swap32 (w2[0] | s2[0]);
+ final[ 9] = swap32 (w2[1] | s2[1]);
+ final[10] = swap32 (w2[2] | s2[2]);
+ final[11] = swap32 (w2[3] | s2[3]);
+ final[12] = swap32 (w3[0] | s3[0]);
+ final[13] = swap32 (w3[1] | s3[1]);
final[14] = 0;
final[15] = pw_salt_len * 8;
u32 final[256];
- final[ 0] = swap_workaround (w0[0] | s0[0]);
- final[ 1] = swap_workaround (w0[1] | s0[1]);
- final[ 2] = swap_workaround (w0[2] | s0[2]);
- final[ 3] = swap_workaround (w0[3] | s0[3]);
- final[ 4] = swap_workaround (w1[0] | s1[0]);
- final[ 5] = swap_workaround (w1[1] | s1[1]);
- final[ 6] = swap_workaround (w1[2] | s1[2]);
- final[ 7] = swap_workaround (w1[3] | s1[3]);
- final[ 8] = swap_workaround (w2[0] | s2[0]);
- final[ 9] = swap_workaround (w2[1] | s2[1]);
- final[10] = swap_workaround (w2[2] | s2[2]);
- final[11] = swap_workaround (w2[3] | s2[3]);
- final[12] = swap_workaround (w3[0] | s3[0]);
- final[13] = swap_workaround (w3[1] | s3[1]);
+ final[ 0] = swap32 (w0[0] | s0[0]);
+ final[ 1] = swap32 (w0[1] | s0[1]);
+ final[ 2] = swap32 (w0[2] | s0[2]);
+ final[ 3] = swap32 (w0[3] | s0[3]);
+ final[ 4] = swap32 (w1[0] | s1[0]);
+ final[ 5] = swap32 (w1[1] | s1[1]);
+ final[ 6] = swap32 (w1[2] | s1[2]);
+ final[ 7] = swap32 (w1[3] | s1[3]);
+ final[ 8] = swap32 (w2[0] | s2[0]);
+ final[ 9] = swap32 (w2[1] | s2[1]);
+ final[10] = swap32 (w2[2] | s2[2]);
+ final[11] = swap32 (w2[3] | s2[3]);
+ final[12] = swap32 (w3[0] | s3[0]);
+ final[13] = swap32 (w3[1] | s3[1]);
final[14] = 0;
final[15] = pw_salt_len * 8;
static void swap_buffer (u32 final[16])
{
- final[ 0] = swap_workaround (final[ 0]);
- final[ 1] = swap_workaround (final[ 1]);
- final[ 2] = swap_workaround (final[ 2]);
- final[ 3] = swap_workaround (final[ 3]);
- final[ 4] = swap_workaround (final[ 4]);
- final[ 5] = swap_workaround (final[ 5]);
- final[ 6] = swap_workaround (final[ 6]);
- final[ 7] = swap_workaround (final[ 7]);
- final[ 8] = swap_workaround (final[ 8]);
- final[ 9] = swap_workaround (final[ 9]);
- final[10] = swap_workaround (final[10]);
- final[11] = swap_workaround (final[11]);
- final[12] = swap_workaround (final[12]);
- final[13] = swap_workaround (final[13]);
- final[14] = swap_workaround (final[14]);
- final[15] = swap_workaround (final[15]);
+ final[ 0] = swap32 (final[ 0]);
+ final[ 1] = swap32 (final[ 1]);
+ final[ 2] = swap32 (final[ 2]);
+ final[ 3] = swap32 (final[ 3]);
+ final[ 4] = swap32 (final[ 4]);
+ final[ 5] = swap32 (final[ 5]);
+ final[ 6] = swap32 (final[ 6]);
+ final[ 7] = swap32 (final[ 7]);
+ final[ 8] = swap32 (final[ 8]);
+ final[ 9] = swap32 (final[ 9]);
+ final[10] = swap32 (final[10]);
+ final[11] = swap32 (final[11]);
+ final[12] = swap32 (final[12]);
+ final[13] = swap32 (final[13]);
+ final[14] = swap32 (final[14]);
+ final[15] = swap32 (final[15]);
}
static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5])
u32 final[256];
- final[ 0] = swap_workaround (w0[0] | s0[0]);
- final[ 1] = swap_workaround (w0[1] | s0[1]);
- final[ 2] = swap_workaround (w0[2] | s0[2]);
- final[ 3] = swap_workaround (w0[3] | s0[3]);
- final[ 4] = swap_workaround (w1[0] | s1[0]);
- final[ 5] = swap_workaround (w1[1] | s1[1]);
- final[ 6] = swap_workaround (w1[2] | s1[2]);
- final[ 7] = swap_workaround (w1[3] | s1[3]);
- final[ 8] = swap_workaround (w2[0] | s2[0]);
- final[ 9] = swap_workaround (w2[1] | s2[1]);
- final[10] = swap_workaround (w2[2] | s2[2]);
- final[11] = swap_workaround (w2[3] | s2[3]);
- final[12] = swap_workaround (w3[0] | s3[0]);
- final[13] = swap_workaround (w3[1] | s3[1]);
+ final[ 0] = swap32 (w0[0] | s0[0]);
+ final[ 1] = swap32 (w0[1] | s0[1]);
+ final[ 2] = swap32 (w0[2] | s0[2]);
+ final[ 3] = swap32 (w0[3] | s0[3]);
+ final[ 4] = swap32 (w1[0] | s1[0]);
+ final[ 5] = swap32 (w1[1] | s1[1]);
+ final[ 6] = swap32 (w1[2] | s1[2]);
+ final[ 7] = swap32 (w1[3] | s1[3]);
+ final[ 8] = swap32 (w2[0] | s2[0]);
+ final[ 9] = swap32 (w2[1] | s2[1]);
+ final[10] = swap32 (w2[2] | s2[2]);
+ final[11] = swap32 (w2[3] | s2[3]);
+ final[12] = swap32 (w3[0] | s3[0]);
+ final[13] = swap32 (w3[1] | s3[1]);
final[14] = 0;
final[15] = pw_salt_len * 8;
u32 final[256];
- final[ 0] = swap_workaround (w0[0] | s0[0]);
- final[ 1] = swap_workaround (w0[1] | s0[1]);
- final[ 2] = swap_workaround (w0[2] | s0[2]);
- final[ 3] = swap_workaround (w0[3] | s0[3]);
- final[ 4] = swap_workaround (w1[0] | s1[0]);
- final[ 5] = swap_workaround (w1[1] | s1[1]);
- final[ 6] = swap_workaround (w1[2] | s1[2]);
- final[ 7] = swap_workaround (w1[3] | s1[3]);
- final[ 8] = swap_workaround (w2[0] | s2[0]);
- final[ 9] = swap_workaround (w2[1] | s2[1]);
- final[10] = swap_workaround (w2[2] | s2[2]);
- final[11] = swap_workaround (w2[3] | s2[3]);
- final[12] = swap_workaround (w3[0] | s3[0]);
- final[13] = swap_workaround (w3[1] | s3[1]);
+ final[ 0] = swap32 (w0[0] | s0[0]);
+ final[ 1] = swap32 (w0[1] | s0[1]);
+ final[ 2] = swap32 (w0[2] | s0[2]);
+ final[ 3] = swap32 (w0[3] | s0[3]);
+ final[ 4] = swap32 (w1[0] | s1[0]);
+ final[ 5] = swap32 (w1[1] | s1[1]);
+ final[ 6] = swap32 (w1[2] | s1[2]);
+ final[ 7] = swap32 (w1[3] | s1[3]);
+ final[ 8] = swap32 (w2[0] | s2[0]);
+ final[ 9] = swap32 (w2[1] | s2[1]);
+ final[10] = swap32 (w2[2] | s2[2]);
+ final[11] = swap32 (w2[3] | s2[3]);
+ final[12] = swap32 (w3[0] | s3[0]);
+ final[13] = swap32 (w3[1] | s3[1]);
final[14] = 0;
final[15] = pw_salt_len * 8;
static void swap_buffer (u32 final[16])
{
- final[ 0] = swap_workaround (final[ 0]);
- final[ 1] = swap_workaround (final[ 1]);
- final[ 2] = swap_workaround (final[ 2]);
- final[ 3] = swap_workaround (final[ 3]);
- final[ 4] = swap_workaround (final[ 4]);
- final[ 5] = swap_workaround (final[ 5]);
- final[ 6] = swap_workaround (final[ 6]);
- final[ 7] = swap_workaround (final[ 7]);
- final[ 8] = swap_workaround (final[ 8]);
- final[ 9] = swap_workaround (final[ 9]);
- final[10] = swap_workaround (final[10]);
- final[11] = swap_workaround (final[11]);
- final[12] = swap_workaround (final[12]);
- final[13] = swap_workaround (final[13]);
- final[14] = swap_workaround (final[14]);
- final[15] = swap_workaround (final[15]);
+ final[ 0] = swap32 (final[ 0]);
+ final[ 1] = swap32 (final[ 1]);
+ final[ 2] = swap32 (final[ 2]);
+ final[ 3] = swap32 (final[ 3]);
+ final[ 4] = swap32 (final[ 4]);
+ final[ 5] = swap32 (final[ 5]);
+ final[ 6] = swap32 (final[ 6]);
+ final[ 7] = swap32 (final[ 7]);
+ final[ 8] = swap32 (final[ 8]);
+ final[ 9] = swap32 (final[ 9]);
+ final[10] = swap32 (final[10]);
+ final[11] = swap32 (final[11]);
+ final[12] = swap32 (final[12]);
+ final[13] = swap32 (final[13]);
+ final[14] = swap32 (final[14]);
+ final[15] = swap32 (final[15]);
}
static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5])
const u32 gid = get_global_id (0);
const u32 lid = get_local_id (0);
- w0[0] = swap_workaround (w0[0]);
- w0[1] = swap_workaround (w0[1]);
- w0[2] = swap_workaround (w0[2]);
- w0[3] = swap_workaround (w0[3]);
+ w0[0] = swap32 (w0[0]);
+ w0[1] = swap32 (w0[1]);
+ w0[2] = swap32 (w0[2]);
+ w0[3] = swap32 (w0[3]);
- w1[0] = swap_workaround (w1[0]);
- w1[1] = swap_workaround (w1[1]);
- w1[2] = swap_workaround (w1[2]);
- w1[3] = swap_workaround (w1[3]);
+ w1[0] = swap32 (w1[0]);
+ w1[1] = swap32 (w1[1]);
+ w1[2] = swap32 (w1[2]);
+ w1[3] = swap32 (w1[3]);
/**
* salt
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
{
- const u32 w0r = swap_workaround (bfs_buf[il_pos].i);
+ const u32 w0r = swap32 (bfs_buf[il_pos].i);
w0[0] = w0l | w0r;
u32 final[256];
- final[ 0] = swap_workaround (w0[0] | s0[0]);
- final[ 1] = swap_workaround (w0[1] | s0[1]);
- final[ 2] = swap_workaround (w0[2] | s0[2]);
- final[ 3] = swap_workaround (w0[3] | s0[3]);
- final[ 4] = swap_workaround (w1[0] | s1[0]);
- final[ 5] = swap_workaround (w1[1] | s1[1]);
- final[ 6] = swap_workaround (w1[2] | s1[2]);
- final[ 7] = swap_workaround (w1[3] | s1[3]);
- final[ 8] = swap_workaround (w2[0] | s2[0]);
- final[ 9] = swap_workaround (w2[1] | s2[1]);
- final[10] = swap_workaround (w2[2] | s2[2]);
- final[11] = swap_workaround (w2[3] | s2[3]);
- final[12] = swap_workaround (w3[0] | s3[0]);
- final[13] = swap_workaround (w3[1] | s3[1]);
+ final[ 0] = swap32 (w0[0] | s0[0]);
+ final[ 1] = swap32 (w0[1] | s0[1]);
+ final[ 2] = swap32 (w0[2] | s0[2]);
+ final[ 3] = swap32 (w0[3] | s0[3]);
+ final[ 4] = swap32 (w1[0] | s1[0]);
+ final[ 5] = swap32 (w1[1] | s1[1]);
+ final[ 6] = swap32 (w1[2] | s1[2]);
+ final[ 7] = swap32 (w1[3] | s1[3]);
+ final[ 8] = swap32 (w2[0] | s2[0]);
+ final[ 9] = swap32 (w2[1] | s2[1]);
+ final[10] = swap32 (w2[2] | s2[2]);
+ final[11] = swap32 (w2[3] | s2[3]);
+ final[12] = swap32 (w3[0] | s3[0]);
+ final[13] = swap32 (w3[1] | s3[1]);
final[14] = 0;
final[15] = pw_salt_len * 8;
const u32 gid = get_global_id (0);
const u32 lid = get_local_id (0);
- w0[0] = swap_workaround (w0[0]);
- w0[1] = swap_workaround (w0[1]);
- w0[2] = swap_workaround (w0[2]);
- w0[3] = swap_workaround (w0[3]);
+ w0[0] = swap32 (w0[0]);
+ w0[1] = swap32 (w0[1]);
+ w0[2] = swap32 (w0[2]);
+ w0[3] = swap32 (w0[3]);
- w1[0] = swap_workaround (w1[0]);
- w1[1] = swap_workaround (w1[1]);
- w1[2] = swap_workaround (w1[2]);
- w1[3] = swap_workaround (w1[3]);
+ w1[0] = swap32 (w1[0]);
+ w1[1] = swap32 (w1[1]);
+ w1[2] = swap32 (w1[2]);
+ w1[3] = swap32 (w1[3]);
/**
* salt
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
{
- const u32 w0r = swap_workaround (bfs_buf[il_pos].i);
+ const u32 w0r = swap32 (bfs_buf[il_pos].i);
w0[0] = w0l | w0r;
u32 final[256];
- final[ 0] = swap_workaround (w0[0] | s0[0]);
- final[ 1] = swap_workaround (w0[1] | s0[1]);
- final[ 2] = swap_workaround (w0[2] | s0[2]);
- final[ 3] = swap_workaround (w0[3] | s0[3]);
- final[ 4] = swap_workaround (w1[0] | s1[0]);
- final[ 5] = swap_workaround (w1[1] | s1[1]);
- final[ 6] = swap_workaround (w1[2] | s1[2]);
- final[ 7] = swap_workaround (w1[3] | s1[3]);
- final[ 8] = swap_workaround (w2[0] | s2[0]);
- final[ 9] = swap_workaround (w2[1] | s2[1]);
- final[10] = swap_workaround (w2[2] | s2[2]);
- final[11] = swap_workaround (w2[3] | s2[3]);
- final[12] = swap_workaround (w3[0] | s3[0]);
- final[13] = swap_workaround (w3[1] | s3[1]);
+ final[ 0] = swap32 (w0[0] | s0[0]);
+ final[ 1] = swap32 (w0[1] | s0[1]);
+ final[ 2] = swap32 (w0[2] | s0[2]);
+ final[ 3] = swap32 (w0[3] | s0[3]);
+ final[ 4] = swap32 (w1[0] | s1[0]);
+ final[ 5] = swap32 (w1[1] | s1[1]);
+ final[ 6] = swap32 (w1[2] | s1[2]);
+ final[ 7] = swap32 (w1[3] | s1[3]);
+ final[ 8] = swap32 (w2[0] | s2[0]);
+ final[ 9] = swap32 (w2[1] | s2[1]);
+ final[10] = swap32 (w2[2] | s2[2]);
+ final[11] = swap32 (w2[3] | s2[3]);
+ final[12] = swap32 (w3[0] | s3[0]);
+ final[13] = swap32 (w3[1] | s3[1]);
final[14] = 0;
final[15] = pw_salt_len * 8;
u64 w[16];
- w[ 0] = ((u64) swap_workaround (salt_buf[0])) << 32 | (u64) swap_workaround (salt_buf[1]);
- w[ 1] = ((u64) swap_workaround (w0[0])) << 32 | (u64) swap_workaround (w0[1]);
- w[ 2] = ((u64) swap_workaround (w0[2])) << 32 | (u64) swap_workaround (w0[3]);
- w[ 3] = ((u64) swap_workaround (w1[0])) << 32 | (u64) swap_workaround (w1[1]);
- w[ 4] = ((u64) swap_workaround (w1[2])) << 32 | (u64) swap_workaround (w1[3]);
- w[ 5] = ((u64) swap_workaround (w2[0])) << 32 | (u64) swap_workaround (w2[1]);
- w[ 6] = ((u64) swap_workaround (w2[2])) << 32 | (u64) swap_workaround (w2[3]);
- w[ 7] = ((u64) swap_workaround (w3[0])) << 32 | (u64) swap_workaround (w3[1]);
- w[ 8] = ((u64) swap_workaround (w3[2])) << 32 | (u64) swap_workaround (w3[3]);
+ w[ 0] = ((u64) swap32 (salt_buf[0])) << 32 | (u64) swap32 (salt_buf[1]);
+ w[ 1] = ((u64) swap32 (w0[0])) << 32 | (u64) swap32 (w0[1]);
+ w[ 2] = ((u64) swap32 (w0[2])) << 32 | (u64) swap32 (w0[3]);
+ w[ 3] = ((u64) swap32 (w1[0])) << 32 | (u64) swap32 (w1[1]);
+ w[ 4] = ((u64) swap32 (w1[2])) << 32 | (u64) swap32 (w1[3]);
+ w[ 5] = ((u64) swap32 (w2[0])) << 32 | (u64) swap32 (w2[1]);
+ w[ 6] = ((u64) swap32 (w2[2])) << 32 | (u64) swap32 (w2[3]);
+ w[ 7] = ((u64) swap32 (w3[0])) << 32 | (u64) swap32 (w3[1]);
+ w[ 8] = ((u64) swap32 (w3[2])) << 32 | (u64) swap32 (w3[3]);
w[ 9] = 0;
w[10] = 0;
w[11] = 0;
w[ 5] = 0;
w[ 6] = 0;
w[ 7] = 0;
- w[ 8] = ((u64) swap_workaround (w0[0])) << 32 | (u64) swap_workaround (w0[1]);
- w[ 9] = ((u64) swap_workaround (w0[2])) << 32 | (u64) swap_workaround (w0[3]);
- w[10] = ((u64) swap_workaround (w1[0])) << 32 | (u64) swap_workaround (w1[1]);
- w[11] = ((u64) swap_workaround (w1[2])) << 32 | (u64) swap_workaround (w1[3]);
- w[12] = ((u64) swap_workaround (w2[0])) << 32 | (u64) swap_workaround (w2[1]);
- w[13] = ((u64) swap_workaround (w2[2])) << 32 | (u64) swap_workaround (w2[3]);
+ w[ 8] = ((u64) swap32 (w0[0])) << 32 | (u64) swap32 (w0[1]);
+ w[ 9] = ((u64) swap32 (w0[2])) << 32 | (u64) swap32 (w0[3]);
+ w[10] = ((u64) swap32 (w1[0])) << 32 | (u64) swap32 (w1[1]);
+ w[11] = ((u64) swap32 (w1[2])) << 32 | (u64) swap32 (w1[3]);
+ w[12] = ((u64) swap32 (w2[0])) << 32 | (u64) swap32 (w2[1]);
+ w[13] = ((u64) swap32 (w2[2])) << 32 | (u64) swap32 (w2[3]);
w[14] = 0;
w[15] = block_len * 8;
* salt
*/
- const u32 salt_buf0 = swap_workaround (salt_bufs[salt_pos].salt_buf[ 0]);
- const u32 salt_buf1 = swap_workaround (salt_bufs[salt_pos].salt_buf[ 1]);
- const u32 salt_buf2 = swap_workaround (salt_bufs[salt_pos].salt_buf[ 2]); // 0x80
+ const u32 salt_buf0 = swap32 (salt_bufs[salt_pos].salt_buf[ 0]);
+ const u32 salt_buf1 = swap32 (salt_bufs[salt_pos].salt_buf[ 1]);
+ const u32 salt_buf2 = swap32 (salt_bufs[salt_pos].salt_buf[ 2]); // 0x80
/**
* loop
u32 w_t[16];
- w_t[ 0] = swap_workaround (w0_t[0]);
- w_t[ 1] = swap_workaround (w0_t[1]);
- w_t[ 2] = swap_workaround (w0_t[2]);
- w_t[ 3] = swap_workaround (w0_t[3]);
- w_t[ 4] = swap_workaround (w1_t[0]);
- w_t[ 5] = swap_workaround (w1_t[1]);
- w_t[ 6] = swap_workaround (w1_t[2]);
- w_t[ 7] = swap_workaround (w1_t[3]);
- w_t[ 8] = swap_workaround (w2_t[0]);
- w_t[ 9] = swap_workaround (w2_t[1]);
- w_t[10] = swap_workaround (w2_t[2]);
- w_t[11] = swap_workaround (w2_t[3]);
- w_t[12] = swap_workaround (w3_t[0]);
- w_t[13] = swap_workaround (w3_t[1]);
- w_t[14] = swap_workaround (w3_t[2]);
- w_t[15] = swap_workaround (w3_t[3]);
+ w_t[ 0] = swap32 (w0_t[0]);
+ w_t[ 1] = swap32 (w0_t[1]);
+ w_t[ 2] = swap32 (w0_t[2]);
+ w_t[ 3] = swap32 (w0_t[3]);
+ w_t[ 4] = swap32 (w1_t[0]);
+ w_t[ 5] = swap32 (w1_t[1]);
+ w_t[ 6] = swap32 (w1_t[2]);
+ w_t[ 7] = swap32 (w1_t[3]);
+ w_t[ 8] = swap32 (w2_t[0]);
+ w_t[ 9] = swap32 (w2_t[1]);
+ w_t[10] = swap32 (w2_t[2]);
+ w_t[11] = swap32 (w2_t[3]);
+ w_t[12] = swap32 (w3_t[0]);
+ w_t[13] = swap32 (w3_t[1]);
+ w_t[14] = swap32 (w3_t[2]);
+ w_t[15] = swap32 (w3_t[3]);
w_t[ 0] = w_t[ 0] >> 8;
w_t[ 1] = w_t[ 1] >> 8;
* salt
*/
- const u32 salt_buf0 = swap_workaround (salt_bufs[salt_pos].salt_buf[ 0]);
- const u32 salt_buf1 = swap_workaround (salt_bufs[salt_pos].salt_buf[ 1]);
- const u32 salt_buf2 = swap_workaround (salt_bufs[salt_pos].salt_buf[ 2]); // 0x80
+ const u32 salt_buf0 = swap32 (salt_bufs[salt_pos].salt_buf[ 0]);
+ const u32 salt_buf1 = swap32 (salt_bufs[salt_pos].salt_buf[ 1]);
+ const u32 salt_buf2 = swap32 (salt_bufs[salt_pos].salt_buf[ 2]); // 0x80
/**
* digest
u32 w_t[16];
- w_t[ 0] = swap_workaround (w0_t[0]);
- w_t[ 1] = swap_workaround (w0_t[1]);
- w_t[ 2] = swap_workaround (w0_t[2]);
- w_t[ 3] = swap_workaround (w0_t[3]);
- w_t[ 4] = swap_workaround (w1_t[0]);
- w_t[ 5] = swap_workaround (w1_t[1]);
- w_t[ 6] = swap_workaround (w1_t[2]);
- w_t[ 7] = swap_workaround (w1_t[3]);
- w_t[ 8] = swap_workaround (w2_t[0]);
- w_t[ 9] = swap_workaround (w2_t[1]);
- w_t[10] = swap_workaround (w2_t[2]);
- w_t[11] = swap_workaround (w2_t[3]);
- w_t[12] = swap_workaround (w3_t[0]);
- w_t[13] = swap_workaround (w3_t[1]);
- w_t[14] = swap_workaround (w3_t[2]);
- w_t[15] = swap_workaround (w3_t[3]);
+ w_t[ 0] = swap32 (w0_t[0]);
+ w_t[ 1] = swap32 (w0_t[1]);
+ w_t[ 2] = swap32 (w0_t[2]);
+ w_t[ 3] = swap32 (w0_t[3]);
+ w_t[ 4] = swap32 (w1_t[0]);
+ w_t[ 5] = swap32 (w1_t[1]);
+ w_t[ 6] = swap32 (w1_t[2]);
+ w_t[ 7] = swap32 (w1_t[3]);
+ w_t[ 8] = swap32 (w2_t[0]);
+ w_t[ 9] = swap32 (w2_t[1]);
+ w_t[10] = swap32 (w2_t[2]);
+ w_t[11] = swap32 (w2_t[3]);
+ w_t[12] = swap32 (w3_t[0]);
+ w_t[13] = swap32 (w3_t[1]);
+ w_t[14] = swap32 (w3_t[2]);
+ w_t[15] = swap32 (w3_t[3]);
w_t[ 0] = w_t[ 0] >> 8;
w_t[ 1] = w_t[ 1] >> 8;
* salt
*/
- const u32 salt_buf0 = swap_workaround (salt_bufs[salt_pos].salt_buf[ 0]);
- const u32 salt_buf1 = swap_workaround (salt_bufs[salt_pos].salt_buf[ 1]);
- const u32 salt_buf2 = swap_workaround (salt_bufs[salt_pos].salt_buf[ 2]); // 0x80
+ const u32 salt_buf0 = swap32 (salt_bufs[salt_pos].salt_buf[ 0]);
+ const u32 salt_buf1 = swap32 (salt_bufs[salt_pos].salt_buf[ 1]);
+ const u32 salt_buf2 = swap32 (salt_bufs[salt_pos].salt_buf[ 2]); // 0x80
/**
* loop
u32 w_t[16];
- w_t[ 0] = swap_workaround (w0_t[0]);
- w_t[ 1] = swap_workaround (w0_t[1]);
- w_t[ 2] = swap_workaround (w0_t[2]);
- w_t[ 3] = swap_workaround (w0_t[3]);
- w_t[ 4] = swap_workaround (w1_t[0]);
- w_t[ 5] = swap_workaround (w1_t[1]);
- w_t[ 6] = swap_workaround (w1_t[2]);
- w_t[ 7] = swap_workaround (w1_t[3]);
- w_t[ 8] = swap_workaround (w2_t[0]);
- w_t[ 9] = swap_workaround (w2_t[1]);
- w_t[10] = swap_workaround (w2_t[2]);
- w_t[11] = swap_workaround (w2_t[3]);
- w_t[12] = swap_workaround (w3_t[0]);
- w_t[13] = swap_workaround (w3_t[1]);
- w_t[14] = swap_workaround (w3_t[2]);
- w_t[15] = swap_workaround (w3_t[3]);
+ w_t[ 0] = swap32 (w0_t[0]);
+ w_t[ 1] = swap32 (w0_t[1]);
+ w_t[ 2] = swap32 (w0_t[2]);
+ w_t[ 3] = swap32 (w0_t[3]);
+ w_t[ 4] = swap32 (w1_t[0]);
+ w_t[ 5] = swap32 (w1_t[1]);
+ w_t[ 6] = swap32 (w1_t[2]);
+ w_t[ 7] = swap32 (w1_t[3]);
+ w_t[ 8] = swap32 (w2_t[0]);
+ w_t[ 9] = swap32 (w2_t[1]);
+ w_t[10] = swap32 (w2_t[2]);
+ w_t[11] = swap32 (w2_t[3]);
+ w_t[12] = swap32 (w3_t[0]);
+ w_t[13] = swap32 (w3_t[1]);
+ w_t[14] = swap32 (w3_t[2]);
+ w_t[15] = swap32 (w3_t[3]);
w_t[ 0] = w_t[ 0] >> 8;
w_t[ 1] = w_t[ 1] >> 8;
* salt
*/
- const u32 salt_buf0 = swap_workaround (salt_bufs[salt_pos].salt_buf[ 0]);
- const u32 salt_buf1 = swap_workaround (salt_bufs[salt_pos].salt_buf[ 1]);
- const u32 salt_buf2 = swap_workaround (salt_bufs[salt_pos].salt_buf[ 2]); // 0x80
+ const u32 salt_buf0 = swap32 (salt_bufs[salt_pos].salt_buf[ 0]);
+ const u32 salt_buf1 = swap32 (salt_bufs[salt_pos].salt_buf[ 1]);
+ const u32 salt_buf2 = swap32 (salt_bufs[salt_pos].salt_buf[ 2]); // 0x80
/**
* digest
u32 w_t[16];
- w_t[ 0] = swap_workaround (w0_t[0]);
- w_t[ 1] = swap_workaround (w0_t[1]);
- w_t[ 2] = swap_workaround (w0_t[2]);
- w_t[ 3] = swap_workaround (w0_t[3]);
- w_t[ 4] = swap_workaround (w1_t[0]);
- w_t[ 5] = swap_workaround (w1_t[1]);
- w_t[ 6] = swap_workaround (w1_t[2]);
- w_t[ 7] = swap_workaround (w1_t[3]);
- w_t[ 8] = swap_workaround (w2_t[0]);
- w_t[ 9] = swap_workaround (w2_t[1]);
- w_t[10] = swap_workaround (w2_t[2]);
- w_t[11] = swap_workaround (w2_t[3]);
- w_t[12] = swap_workaround (w3_t[0]);
- w_t[13] = swap_workaround (w3_t[1]);
- w_t[14] = swap_workaround (w3_t[2]);
- w_t[15] = swap_workaround (w3_t[3]);
+ w_t[ 0] = swap32 (w0_t[0]);
+ w_t[ 1] = swap32 (w0_t[1]);
+ w_t[ 2] = swap32 (w0_t[2]);
+ w_t[ 3] = swap32 (w0_t[3]);
+ w_t[ 4] = swap32 (w1_t[0]);
+ w_t[ 5] = swap32 (w1_t[1]);
+ w_t[ 6] = swap32 (w1_t[2]);
+ w_t[ 7] = swap32 (w1_t[3]);
+ w_t[ 8] = swap32 (w2_t[0]);
+ w_t[ 9] = swap32 (w2_t[1]);
+ w_t[10] = swap32 (w2_t[2]);
+ w_t[11] = swap32 (w2_t[3]);
+ w_t[12] = swap32 (w3_t[0]);
+ w_t[13] = swap32 (w3_t[1]);
+ w_t[14] = swap32 (w3_t[2]);
+ w_t[15] = swap32 (w3_t[3]);
w_t[ 0] = w_t[ 0] >> 8;
w_t[ 1] = w_t[ 1] >> 8;
* salt
*/
- const u32 salt_buf0 = swap_workaround (salt_bufs[salt_pos].salt_buf[ 0]);
- const u32 salt_buf1 = swap_workaround (salt_bufs[salt_pos].salt_buf[ 1]);
- const u32 salt_buf2 = swap_workaround (salt_bufs[salt_pos].salt_buf[ 2]); // 0x80
+ const u32 salt_buf0 = swap32 (salt_bufs[salt_pos].salt_buf[ 0]);
+ const u32 salt_buf1 = swap32 (salt_bufs[salt_pos].salt_buf[ 1]);
+ const u32 salt_buf2 = swap32 (salt_bufs[salt_pos].salt_buf[ 2]); // 0x80
/**
* loop
* salt
*/
- const u32 salt_buf0 = swap_workaround (salt_bufs[salt_pos].salt_buf[ 0]);
- const u32 salt_buf1 = swap_workaround (salt_bufs[salt_pos].salt_buf[ 1]);
- const u32 salt_buf2 = swap_workaround (salt_bufs[salt_pos].salt_buf[ 2]); // 0x80
+ const u32 salt_buf0 = swap32 (salt_bufs[salt_pos].salt_buf[ 0]);
+ const u32 salt_buf1 = swap32 (salt_bufs[salt_pos].salt_buf[ 1]);
+ const u32 salt_buf2 = swap32 (salt_bufs[salt_pos].salt_buf[ 2]); // 0x80
/**
* loop
u32 salt_buf0[2];
- salt_buf0[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[0]);
- salt_buf0[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[1]);
+ salt_buf0[0] = swap32 (salt_bufs[salt_pos].salt_buf[0]);
+ salt_buf0[1] = swap32 (salt_bufs[salt_pos].salt_buf[1]);
const u32 salt_len = salt_bufs[salt_pos].salt_len;
* sha1
*/
- w0_t[0] = swap_workaround (w0_t[0]);
- w0_t[1] = swap_workaround (w0_t[1]);
- w0_t[2] = swap_workaround (w0_t[2]);
- w0_t[3] = swap_workaround (w0_t[3]);
- w1_t[0] = swap_workaround (w1_t[0]);
- w1_t[1] = swap_workaround (w1_t[1]);
- w1_t[2] = swap_workaround (w1_t[2]);
- w1_t[3] = swap_workaround (w1_t[3]);
- w2_t[0] = swap_workaround (w2_t[0]);
- w2_t[1] = swap_workaround (w2_t[1]);
- w2_t[2] = swap_workaround (w2_t[2]);
- w2_t[3] = swap_workaround (w2_t[3]);
- w3_t[0] = swap_workaround (w3_t[0]);
- w3_t[1] = swap_workaround (w3_t[1]);
- //w3_t[2] = swap_workaround (w3_t[2]);
- //w3_t[3] = swap_workaround (w3_t[3]);
+ w0_t[0] = swap32 (w0_t[0]);
+ w0_t[1] = swap32 (w0_t[1]);
+ w0_t[2] = swap32 (w0_t[2]);
+ w0_t[3] = swap32 (w0_t[3]);
+ w1_t[0] = swap32 (w1_t[0]);
+ w1_t[1] = swap32 (w1_t[1]);
+ w1_t[2] = swap32 (w1_t[2]);
+ w1_t[3] = swap32 (w1_t[3]);
+ w2_t[0] = swap32 (w2_t[0]);
+ w2_t[1] = swap32 (w2_t[1]);
+ w2_t[2] = swap32 (w2_t[2]);
+ w2_t[3] = swap32 (w2_t[3]);
+ w3_t[0] = swap32 (w3_t[0]);
+ w3_t[1] = swap32 (w3_t[1]);
+ //w3_t[2] = swap32 (w3_t[2]);
+ //w3_t[3] = swap32 (w3_t[3]);
u32 a = SHA1M_A;
u32 b = SHA1M_B;
u32 salt_buf0[2];
- salt_buf0[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[0]);
- salt_buf0[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[1]);
+ salt_buf0[0] = swap32 (salt_bufs[salt_pos].salt_buf[0]);
+ salt_buf0[1] = swap32 (salt_bufs[salt_pos].salt_buf[1]);
const u32 salt_len = salt_bufs[salt_pos].salt_len;
* sha1
*/
- w0_t[0] = swap_workaround (w0_t[0]);
- w0_t[1] = swap_workaround (w0_t[1]);
- w0_t[2] = swap_workaround (w0_t[2]);
- w0_t[3] = swap_workaround (w0_t[3]);
- w1_t[0] = swap_workaround (w1_t[0]);
- w1_t[1] = swap_workaround (w1_t[1]);
- w1_t[2] = swap_workaround (w1_t[2]);
- w1_t[3] = swap_workaround (w1_t[3]);
- w2_t[0] = swap_workaround (w2_t[0]);
- w2_t[1] = swap_workaround (w2_t[1]);
- w2_t[2] = swap_workaround (w2_t[2]);
- w2_t[3] = swap_workaround (w2_t[3]);
- w3_t[0] = swap_workaround (w3_t[0]);
- w3_t[1] = swap_workaround (w3_t[1]);
- //w3_t[2] = swap_workaround (w3_t[2]);
- //w3_t[3] = swap_workaround (w3_t[3]);
+ w0_t[0] = swap32 (w0_t[0]);
+ w0_t[1] = swap32 (w0_t[1]);
+ w0_t[2] = swap32 (w0_t[2]);
+ w0_t[3] = swap32 (w0_t[3]);
+ w1_t[0] = swap32 (w1_t[0]);
+ w1_t[1] = swap32 (w1_t[1]);
+ w1_t[2] = swap32 (w1_t[2]);
+ w1_t[3] = swap32 (w1_t[3]);
+ w2_t[0] = swap32 (w2_t[0]);
+ w2_t[1] = swap32 (w2_t[1]);
+ w2_t[2] = swap32 (w2_t[2]);
+ w2_t[3] = swap32 (w2_t[3]);
+ w3_t[0] = swap32 (w3_t[0]);
+ w3_t[1] = swap32 (w3_t[1]);
+ //w3_t[2] = swap32 (w3_t[2]);
+ //w3_t[3] = swap32 (w3_t[3]);
u32 a = SHA1M_A;
u32 b = SHA1M_B;
u32 salt_buf0[2];
- salt_buf0[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[0]);
- salt_buf0[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[1]);
+ salt_buf0[0] = swap32 (salt_bufs[salt_pos].salt_buf[0]);
+ salt_buf0[1] = swap32 (salt_bufs[salt_pos].salt_buf[1]);
const u32 salt_len = salt_bufs[salt_pos].salt_len;
* sha1
*/
- w0_t[0] = swap_workaround (w0_t[0]);
- w0_t[1] = swap_workaround (w0_t[1]);
- w0_t[2] = swap_workaround (w0_t[2]);
- w0_t[3] = swap_workaround (w0_t[3]);
- w1_t[0] = swap_workaround (w1_t[0]);
- w1_t[1] = swap_workaround (w1_t[1]);
- w1_t[2] = swap_workaround (w1_t[2]);
- w1_t[3] = swap_workaround (w1_t[3]);
- w2_t[0] = swap_workaround (w2_t[0]);
- w2_t[1] = swap_workaround (w2_t[1]);
- w2_t[2] = swap_workaround (w2_t[2]);
- w2_t[3] = swap_workaround (w2_t[3]);
- w3_t[0] = swap_workaround (w3_t[0]);
- w3_t[1] = swap_workaround (w3_t[1]);
- //w3_t[2] = swap_workaround (w3_t[2]);
- //w3_t[3] = swap_workaround (w3_t[3]);
+ w0_t[0] = swap32 (w0_t[0]);
+ w0_t[1] = swap32 (w0_t[1]);
+ w0_t[2] = swap32 (w0_t[2]);
+ w0_t[3] = swap32 (w0_t[3]);
+ w1_t[0] = swap32 (w1_t[0]);
+ w1_t[1] = swap32 (w1_t[1]);
+ w1_t[2] = swap32 (w1_t[2]);
+ w1_t[3] = swap32 (w1_t[3]);
+ w2_t[0] = swap32 (w2_t[0]);
+ w2_t[1] = swap32 (w2_t[1]);
+ w2_t[2] = swap32 (w2_t[2]);
+ w2_t[3] = swap32 (w2_t[3]);
+ w3_t[0] = swap32 (w3_t[0]);
+ w3_t[1] = swap32 (w3_t[1]);
+ //w3_t[2] = swap32 (w3_t[2]);
+ //w3_t[3] = swap32 (w3_t[3]);
u32 a = SHA1M_A;
u32 b = SHA1M_B;
u32 salt_buf0[2];
- salt_buf0[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[0]);
- salt_buf0[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[1]);
+ salt_buf0[0] = swap32 (salt_bufs[salt_pos].salt_buf[0]);
+ salt_buf0[1] = swap32 (salt_bufs[salt_pos].salt_buf[1]);
const u32 salt_len = salt_bufs[salt_pos].salt_len;
* sha1
*/
- w0_t[0] = swap_workaround (w0_t[0]);
- w0_t[1] = swap_workaround (w0_t[1]);
- w0_t[2] = swap_workaround (w0_t[2]);
- w0_t[3] = swap_workaround (w0_t[3]);
- w1_t[0] = swap_workaround (w1_t[0]);
- w1_t[1] = swap_workaround (w1_t[1]);
- w1_t[2] = swap_workaround (w1_t[2]);
- w1_t[3] = swap_workaround (w1_t[3]);
- w2_t[0] = swap_workaround (w2_t[0]);
- w2_t[1] = swap_workaround (w2_t[1]);
- w2_t[2] = swap_workaround (w2_t[2]);
- w2_t[3] = swap_workaround (w2_t[3]);
- w3_t[0] = swap_workaround (w3_t[0]);
- w3_t[1] = swap_workaround (w3_t[1]);
- //w3_t[2] = swap_workaround (w3_t[2]);
- //w3_t[3] = swap_workaround (w3_t[3]);
+ w0_t[0] = swap32 (w0_t[0]);
+ w0_t[1] = swap32 (w0_t[1]);
+ w0_t[2] = swap32 (w0_t[2]);
+ w0_t[3] = swap32 (w0_t[3]);
+ w1_t[0] = swap32 (w1_t[0]);
+ w1_t[1] = swap32 (w1_t[1]);
+ w1_t[2] = swap32 (w1_t[2]);
+ w1_t[3] = swap32 (w1_t[3]);
+ w2_t[0] = swap32 (w2_t[0]);
+ w2_t[1] = swap32 (w2_t[1]);
+ w2_t[2] = swap32 (w2_t[2]);
+ w2_t[3] = swap32 (w2_t[3]);
+ w3_t[0] = swap32 (w3_t[0]);
+ w3_t[1] = swap32 (w3_t[1]);
+ //w3_t[2] = swap32 (w3_t[2]);
+ //w3_t[3] = swap32 (w3_t[3]);
u32 a = SHA1M_A;
u32 b = SHA1M_B;
* base
*/
- w0[0] = swap_workaround (w0[0]);
- w0[1] = swap_workaround (w0[1]);
- w0[2] = swap_workaround (w0[2]);
- w0[3] = swap_workaround (w0[3]);
+ w0[0] = swap32 (w0[0]);
+ w0[1] = swap32 (w0[1]);
+ w0[2] = swap32 (w0[2]);
+ w0[3] = swap32 (w0[3]);
append_0x80_2x4 (w0, w1, pw_len + 1);
- w0[0] = swap_workaround (w0[0]);
- w0[1] = swap_workaround (w0[1]);
- w0[2] = swap_workaround (w0[2]);
- w0[3] = swap_workaround (w0[3]);
- w1[0] = swap_workaround (w1[0]);
+ w0[0] = swap32 (w0[0]);
+ w0[1] = swap32 (w0[1]);
+ w0[2] = swap32 (w0[2]);
+ w0[3] = swap32 (w0[3]);
+ w1[0] = swap32 (w1[0]);
/**
* main
* base
*/
- w0[0] = swap_workaround (w0[0]);
- w0[1] = swap_workaround (w0[1]);
- w0[2] = swap_workaround (w0[2]);
- w0[3] = swap_workaround (w0[3]);
- w1[0] = swap_workaround (w1[0]);
- w1[1] = swap_workaround (w1[1]);
- w1[2] = swap_workaround (w1[2]);
- w1[3] = swap_workaround (w1[3]);
+ w0[0] = swap32 (w0[0]);
+ w0[1] = swap32 (w0[1]);
+ w0[2] = swap32 (w0[2]);
+ w0[3] = swap32 (w0[3]);
+ w1[0] = swap32 (w1[0]);
+ w1[1] = swap32 (w1[1]);
+ w1[2] = swap32 (w1[2]);
+ w1[3] = swap32 (w1[3]);
append_0x80_3x4 (w0, w1, w2, pw_len + 1);
- w0[0] = swap_workaround (w0[0]);
- w0[1] = swap_workaround (w0[1]);
- w0[2] = swap_workaround (w0[2]);
- w0[3] = swap_workaround (w0[3]);
- w1[0] = swap_workaround (w1[0]);
- w1[1] = swap_workaround (w1[1]);
- w1[2] = swap_workaround (w1[2]);
- w1[3] = swap_workaround (w1[3]);
- w2[0] = swap_workaround (w2[0]);
+ w0[0] = swap32 (w0[0]);
+ w0[1] = swap32 (w0[1]);
+ w0[2] = swap32 (w0[2]);
+ w0[3] = swap32 (w0[3]);
+ w1[0] = swap32 (w1[0]);
+ w1[1] = swap32 (w1[1]);
+ w1[2] = swap32 (w1[2]);
+ w1[3] = swap32 (w1[3]);
+ w2[0] = swap32 (w2[0]);
/**
* main
* base
*/
- w0[0] = swap_workaround (w0[0]);
- w0[1] = swap_workaround (w0[1]);
- w0[2] = swap_workaround (w0[2]);
- w0[3] = swap_workaround (w0[3]);
- w1[0] = swap_workaround (w1[0]);
- w1[1] = swap_workaround (w1[1]);
- w1[2] = swap_workaround (w1[2]);
- w1[3] = swap_workaround (w1[3]);
- w2[0] = swap_workaround (w2[0]);
- w2[1] = swap_workaround (w2[1]);
- w2[2] = swap_workaround (w2[2]);
- w2[3] = swap_workaround (w2[3]);
- w3[0] = swap_workaround (w3[0]);
- w3[1] = swap_workaround (w3[1]);
+ w0[0] = swap32 (w0[0]);
+ w0[1] = swap32 (w0[1]);
+ w0[2] = swap32 (w0[2]);
+ w0[3] = swap32 (w0[3]);
+ w1[0] = swap32 (w1[0]);
+ w1[1] = swap32 (w1[1]);
+ w1[2] = swap32 (w1[2]);
+ w1[3] = swap32 (w1[3]);
+ w2[0] = swap32 (w2[0]);
+ w2[1] = swap32 (w2[1]);
+ w2[2] = swap32 (w2[2]);
+ w2[3] = swap32 (w2[3]);
+ w3[0] = swap32 (w3[0]);
+ w3[1] = swap32 (w3[1]);
w3[2] = 0;
w3[3] = 0;
append_0x80_4x4 (w0, w1, w2, w3, pw_len + 1);
- w0[0] = swap_workaround (w0[0]);
- w0[1] = swap_workaround (w0[1]);
- w0[2] = swap_workaround (w0[2]);
- w0[3] = swap_workaround (w0[3]);
- w1[0] = swap_workaround (w1[0]);
- w1[1] = swap_workaround (w1[1]);
- w1[2] = swap_workaround (w1[2]);
- w1[3] = swap_workaround (w1[3]);
- w2[0] = swap_workaround (w2[0]);
- w2[1] = swap_workaround (w2[1]);
- w2[2] = swap_workaround (w2[2]);
- w2[3] = swap_workaround (w2[3]);
- w3[0] = swap_workaround (w3[0]);
- w3[1] = swap_workaround (w3[1]);
+ w0[0] = swap32 (w0[0]);
+ w0[1] = swap32 (w0[1]);
+ w0[2] = swap32 (w0[2]);
+ w0[3] = swap32 (w0[3]);
+ w1[0] = swap32 (w1[0]);
+ w1[1] = swap32 (w1[1]);
+ w1[2] = swap32 (w1[2]);
+ w1[3] = swap32 (w1[3]);
+ w2[0] = swap32 (w2[0]);
+ w2[1] = swap32 (w2[1]);
+ w2[2] = swap32 (w2[2]);
+ w2[3] = swap32 (w2[3]);
+ w3[0] = swap32 (w3[0]);
+ w3[1] = swap32 (w3[1]);
w3[2] = 0;
w3[3] = 0;
* base
*/
- w0[0] = swap_workaround (w0[0]);
- w0[1] = swap_workaround (w0[1]);
- w0[2] = swap_workaround (w0[2]);
- w0[3] = swap_workaround (w0[3]);
+ w0[0] = swap32 (w0[0]);
+ w0[1] = swap32 (w0[1]);
+ w0[2] = swap32 (w0[2]);
+ w0[3] = swap32 (w0[3]);
append_0x80_2x4 (w0, w1, pw_len + 1);
- w0[0] = swap_workaround (w0[0]);
- w0[1] = swap_workaround (w0[1]);
- w0[2] = swap_workaround (w0[2]);
- w0[3] = swap_workaround (w0[3]);
- w1[0] = swap_workaround (w1[0]);
+ w0[0] = swap32 (w0[0]);
+ w0[1] = swap32 (w0[1]);
+ w0[2] = swap32 (w0[2]);
+ w0[3] = swap32 (w0[3]);
+ w1[0] = swap32 (w1[0]);
/**
* main
* base
*/
- w0[0] = swap_workaround (w0[0]);
- w0[1] = swap_workaround (w0[1]);
- w0[2] = swap_workaround (w0[2]);
- w0[3] = swap_workaround (w0[3]);
- w1[0] = swap_workaround (w1[0]);
- w1[1] = swap_workaround (w1[1]);
- w1[2] = swap_workaround (w1[2]);
- w1[3] = swap_workaround (w1[3]);
+ w0[0] = swap32 (w0[0]);
+ w0[1] = swap32 (w0[1]);
+ w0[2] = swap32 (w0[2]);
+ w0[3] = swap32 (w0[3]);
+ w1[0] = swap32 (w1[0]);
+ w1[1] = swap32 (w1[1]);
+ w1[2] = swap32 (w1[2]);
+ w1[3] = swap32 (w1[3]);
append_0x80_3x4 (w0, w1, w2, pw_len + 1);
- w0[0] = swap_workaround (w0[0]);
- w0[1] = swap_workaround (w0[1]);
- w0[2] = swap_workaround (w0[2]);
- w0[3] = swap_workaround (w0[3]);
- w1[0] = swap_workaround (w1[0]);
- w1[1] = swap_workaround (w1[1]);
- w1[2] = swap_workaround (w1[2]);
- w1[3] = swap_workaround (w1[3]);
- w2[0] = swap_workaround (w2[0]);
+ w0[0] = swap32 (w0[0]);
+ w0[1] = swap32 (w0[1]);
+ w0[2] = swap32 (w0[2]);
+ w0[3] = swap32 (w0[3]);
+ w1[0] = swap32 (w1[0]);
+ w1[1] = swap32 (w1[1]);
+ w1[2] = swap32 (w1[2]);
+ w1[3] = swap32 (w1[3]);
+ w2[0] = swap32 (w2[0]);
/**
* main
* base
*/
- w0[0] = swap_workaround (w0[0]);
- w0[1] = swap_workaround (w0[1]);
- w0[2] = swap_workaround (w0[2]);
- w0[3] = swap_workaround (w0[3]);
- w1[0] = swap_workaround (w1[0]);
- w1[1] = swap_workaround (w1[1]);
- w1[2] = swap_workaround (w1[2]);
- w1[3] = swap_workaround (w1[3]);
- w2[0] = swap_workaround (w2[0]);
- w2[1] = swap_workaround (w2[1]);
- w2[2] = swap_workaround (w2[2]);
- w2[3] = swap_workaround (w2[3]);
- w3[0] = swap_workaround (w3[0]);
- w3[1] = swap_workaround (w3[1]);
+ w0[0] = swap32 (w0[0]);
+ w0[1] = swap32 (w0[1]);
+ w0[2] = swap32 (w0[2]);
+ w0[3] = swap32 (w0[3]);
+ w1[0] = swap32 (w1[0]);
+ w1[1] = swap32 (w1[1]);
+ w1[2] = swap32 (w1[2]);
+ w1[3] = swap32 (w1[3]);
+ w2[0] = swap32 (w2[0]);
+ w2[1] = swap32 (w2[1]);
+ w2[2] = swap32 (w2[2]);
+ w2[3] = swap32 (w2[3]);
+ w3[0] = swap32 (w3[0]);
+ w3[1] = swap32 (w3[1]);
w3[2] = 0;
w3[3] = 0;
append_0x80_4x4 (w0, w1, w2, w3, pw_len + 1);
- w0[0] = swap_workaround (w0[0]);
- w0[1] = swap_workaround (w0[1]);
- w0[2] = swap_workaround (w0[2]);
- w0[3] = swap_workaround (w0[3]);
- w1[0] = swap_workaround (w1[0]);
- w1[1] = swap_workaround (w1[1]);
- w1[2] = swap_workaround (w1[2]);
- w1[3] = swap_workaround (w1[3]);
- w2[0] = swap_workaround (w2[0]);
- w2[1] = swap_workaround (w2[1]);
- w2[2] = swap_workaround (w2[2]);
- w2[3] = swap_workaround (w2[3]);
- w3[0] = swap_workaround (w3[0]);
- w3[1] = swap_workaround (w3[1]);
+ w0[0] = swap32 (w0[0]);
+ w0[1] = swap32 (w0[1]);
+ w0[2] = swap32 (w0[2]);
+ w0[3] = swap32 (w0[3]);
+ w1[0] = swap32 (w1[0]);
+ w1[1] = swap32 (w1[1]);
+ w1[2] = swap32 (w1[2]);
+ w1[3] = swap32 (w1[3]);
+ w2[0] = swap32 (w2[0]);
+ w2[1] = swap32 (w2[1]);
+ w2[2] = swap32 (w2[2]);
+ w2[3] = swap32 (w2[3]);
+ w3[0] = swap32 (w3[0]);
+ w3[1] = swap32 (w3[1]);
w3[2] = 0;
w3[3] = 0;
u64 data_buf[16];
- data_buf[ 0] = ((u64) swap_workaround (salt_buf0[ 0])) << 32 | (u64) swap_workaround (salt_buf0[ 1]);
- data_buf[ 1] = ((u64) swap_workaround (salt_buf0[ 2])) << 32 | (u64) swap_workaround (salt_buf0[ 3]);
- data_buf[ 2] = ((u64) swap_workaround (salt_buf1[ 0])) << 32 | (u64) swap_workaround (salt_buf1[ 1]);
+ data_buf[ 0] = ((u64) swap32 (salt_buf0[ 0])) << 32 | (u64) swap32 (salt_buf0[ 1]);
+ data_buf[ 1] = ((u64) swap32 (salt_buf0[ 2])) << 32 | (u64) swap32 (salt_buf0[ 3]);
+ data_buf[ 2] = ((u64) swap32 (salt_buf1[ 0])) << 32 | (u64) swap32 (salt_buf1[ 1]);
data_buf[ 3] = 0;
data_buf[ 4] = 0;
data_buf[ 5] = 0;
u64 w[16];
- w[ 0] = ((u64) swap_workaround (w0[0])) << 32 | (u64) swap_workaround (w0[1]);
- w[ 1] = ((u64) swap_workaround (w0[2])) << 32 | (u64) swap_workaround (w0[3]);
- w[ 2] = ((u64) swap_workaround (w1[0])) << 32 | (u64) swap_workaround (w1[1]);
- w[ 3] = ((u64) swap_workaround (w1[2])) << 32 | (u64) swap_workaround (w1[3]);
- w[ 4] = ((u64) swap_workaround (w2[0])) << 32 | (u64) swap_workaround (w2[1]);
- w[ 5] = ((u64) swap_workaround (w2[2])) << 32 | (u64) swap_workaround (w2[3]);
- w[ 6] = ((u64) swap_workaround (w3[0])) << 32 | (u64) swap_workaround (w3[1]);
- w[ 7] = ((u64) swap_workaround (w3[2])) << 32 | (u64) swap_workaround (w3[3]);
+ w[ 0] = ((u64) swap32 (w0[0])) << 32 | (u64) swap32 (w0[1]);
+ w[ 1] = ((u64) swap32 (w0[2])) << 32 | (u64) swap32 (w0[3]);
+ w[ 2] = ((u64) swap32 (w1[0])) << 32 | (u64) swap32 (w1[1]);
+ w[ 3] = ((u64) swap32 (w1[2])) << 32 | (u64) swap32 (w1[3]);
+ w[ 4] = ((u64) swap32 (w2[0])) << 32 | (u64) swap32 (w2[1]);
+ w[ 5] = ((u64) swap32 (w2[2])) << 32 | (u64) swap32 (w2[3]);
+ w[ 6] = ((u64) swap32 (w3[0])) << 32 | (u64) swap32 (w3[1]);
+ w[ 7] = ((u64) swap32 (w3[2])) << 32 | (u64) swap32 (w3[3]);
w[ 8] = 0;
w[ 9] = 0;
w[10] = 0;
u32 w0_t2[4];
- w0_t2[0] = swap_workaround (w0_t[0] | d0[0] | s0[0]);
- w0_t2[1] = swap_workaround (w0_t[1] | d0[1] | s0[1]);
- w0_t2[2] = swap_workaround (w0_t[2] | d0[2] | s0[2]);
- w0_t2[3] = swap_workaround (w0_t[3] | d0[3] | s0[3]);
+ w0_t2[0] = swap32 (w0_t[0] | d0[0] | s0[0]);
+ w0_t2[1] = swap32 (w0_t[1] | d0[1] | s0[1]);
+ w0_t2[2] = swap32 (w0_t[2] | d0[2] | s0[2]);
+ w0_t2[3] = swap32 (w0_t[3] | d0[3] | s0[3]);
u32 w1_t2[4];
- w1_t2[0] = swap_workaround (w1_t[0] | d1[0] | s1[0]);
- w1_t2[1] = swap_workaround (w1_t[1] | d1[1] | s1[1]);
- w1_t2[2] = swap_workaround (w1_t[2] | d1[2] | s1[2]);
- w1_t2[3] = swap_workaround (w1_t[3] | d1[3] | s1[3]);
+ w1_t2[0] = swap32 (w1_t[0] | d1[0] | s1[0]);
+ w1_t2[1] = swap32 (w1_t[1] | d1[1] | s1[1]);
+ w1_t2[2] = swap32 (w1_t[2] | d1[2] | s1[2]);
+ w1_t2[3] = swap32 (w1_t[3] | d1[3] | s1[3]);
u32 w2_t2[4];
- w2_t2[0] = swap_workaround (w2_t[0] | d2[0] | s2[0]);
- w2_t2[1] = swap_workaround (w2_t[1] | d2[1] | s2[1]);
- w2_t2[2] = swap_workaround (w2_t[2] | d2[2] | s2[2]);
- w2_t2[3] = swap_workaround (w2_t[3] | d2[3] | s2[3]);
+ w2_t2[0] = swap32 (w2_t[0] | d2[0] | s2[0]);
+ w2_t2[1] = swap32 (w2_t[1] | d2[1] | s2[1]);
+ w2_t2[2] = swap32 (w2_t[2] | d2[2] | s2[2]);
+ w2_t2[3] = swap32 (w2_t[3] | d2[3] | s2[3]);
u32 w3_t2[4];
- w3_t2[0] = swap_workaround (w3_t[0] | d3[0] | s3[0]);
- w3_t2[1] = swap_workaround (w3_t[1] | d3[1] | s3[1]);
+ w3_t2[0] = swap32 (w3_t[0] | d3[0] | s3[0]);
+ w3_t2[1] = swap32 (w3_t[1] | d3[1] | s3[1]);
w3_t2[2] = 0;
w3_t2[3] = (1 + out_len + domain_len + 1 + salt_len) * 8;
u32 w1_t3[4];
w1_t3[0] = digest[4];
- w1_t3[1] = swap_workaround (salt_buf0[0]);
- w1_t3[2] = swap_workaround (salt_buf0[1]);
- w1_t3[3] = swap_workaround (salt_buf0[2]);
+ w1_t3[1] = swap32 (salt_buf0[0]);
+ w1_t3[2] = swap32 (salt_buf0[1]);
+ w1_t3[3] = swap32 (salt_buf0[2]);
u32 w2_t3[4];
- w2_t3[0] = swap_workaround (salt_buf0[3]);
- w2_t3[1] = swap_workaround (salt_buf1[0]);
- w2_t3[2] = swap_workaround (salt_buf1[1]);
- w2_t3[3] = swap_workaround (salt_buf1[2]);
+ w2_t3[0] = swap32 (salt_buf0[3]);
+ w2_t3[1] = swap32 (salt_buf1[0]);
+ w2_t3[2] = swap32 (salt_buf1[1]);
+ w2_t3[3] = swap32 (salt_buf1[2]);
u32 w3_t3[4];
- w3_t3[0] = swap_workaround (salt_buf1[3]);
+ w3_t3[0] = swap32 (salt_buf1[3]);
w3_t3[1] = 0;
w3_t3[2] = 0;
w3_t3[3] = (20 + salt_len) * 8;
u32 w0_t2[4];
- w0_t2[0] = swap_workaround (w0_t[0] | d0[0] | s0[0]);
- w0_t2[1] = swap_workaround (w0_t[1] | d0[1] | s0[1]);
- w0_t2[2] = swap_workaround (w0_t[2] | d0[2] | s0[2]);
- w0_t2[3] = swap_workaround (w0_t[3] | d0[3] | s0[3]);
+ w0_t2[0] = swap32 (w0_t[0] | d0[0] | s0[0]);
+ w0_t2[1] = swap32 (w0_t[1] | d0[1] | s0[1]);
+ w0_t2[2] = swap32 (w0_t[2] | d0[2] | s0[2]);
+ w0_t2[3] = swap32 (w0_t[3] | d0[3] | s0[3]);
u32 w1_t2[4];
- w1_t2[0] = swap_workaround (w1_t[0] | d1[0] | s1[0]);
- w1_t2[1] = swap_workaround (w1_t[1] | d1[1] | s1[1]);
- w1_t2[2] = swap_workaround (w1_t[2] | d1[2] | s1[2]);
- w1_t2[3] = swap_workaround (w1_t[3] | d1[3] | s1[3]);
+ w1_t2[0] = swap32 (w1_t[0] | d1[0] | s1[0]);
+ w1_t2[1] = swap32 (w1_t[1] | d1[1] | s1[1]);
+ w1_t2[2] = swap32 (w1_t[2] | d1[2] | s1[2]);
+ w1_t2[3] = swap32 (w1_t[3] | d1[3] | s1[3]);
u32 w2_t2[4];
- w2_t2[0] = swap_workaround (w2_t[0] | d2[0] | s2[0]);
- w2_t2[1] = swap_workaround (w2_t[1] | d2[1] | s2[1]);
- w2_t2[2] = swap_workaround (w2_t[2] | d2[2] | s2[2]);
- w2_t2[3] = swap_workaround (w2_t[3] | d2[3] | s2[3]);
+ w2_t2[0] = swap32 (w2_t[0] | d2[0] | s2[0]);
+ w2_t2[1] = swap32 (w2_t[1] | d2[1] | s2[1]);
+ w2_t2[2] = swap32 (w2_t[2] | d2[2] | s2[2]);
+ w2_t2[3] = swap32 (w2_t[3] | d2[3] | s2[3]);
u32 w3_t2[4];
- w3_t2[0] = swap_workaround (w3_t[0] | d3[0] | s3[0]);
- w3_t2[1] = swap_workaround (w3_t[1] | d3[1] | s3[1]);
+ w3_t2[0] = swap32 (w3_t[0] | d3[0] | s3[0]);
+ w3_t2[1] = swap32 (w3_t[1] | d3[1] | s3[1]);
w3_t2[2] = 0;
w3_t2[3] = (1 + out_len + domain_len + 1 + salt_len) * 8;
u32 w1_t3[4];
w1_t3[0] = digest[4];
- w1_t3[1] = swap_workaround (salt_buf0[0]);
- w1_t3[2] = swap_workaround (salt_buf0[1]);
- w1_t3[3] = swap_workaround (salt_buf0[2]);
+ w1_t3[1] = swap32 (salt_buf0[0]);
+ w1_t3[2] = swap32 (salt_buf0[1]);
+ w1_t3[3] = swap32 (salt_buf0[2]);
u32 w2_t3[4];
- w2_t3[0] = swap_workaround (salt_buf0[3]);
- w2_t3[1] = swap_workaround (salt_buf1[0]);
- w2_t3[2] = swap_workaround (salt_buf1[1]);
- w2_t3[3] = swap_workaround (salt_buf1[2]);
+ w2_t3[0] = swap32 (salt_buf0[3]);
+ w2_t3[1] = swap32 (salt_buf1[0]);
+ w2_t3[2] = swap32 (salt_buf1[1]);
+ w2_t3[3] = swap32 (salt_buf1[2]);
u32 w3_t3[4];
- w3_t3[0] = swap_workaround (salt_buf1[3]);
+ w3_t3[0] = swap32 (salt_buf1[3]);
w3_t3[1] = 0;
w3_t3[2] = 0;
w3_t3[3] = (20 + salt_len) * 8;
u32 w0_t2[4];
- w0_t2[0] = swap_workaround (w0_t[0] | d0[0] | s0[0]);
- w0_t2[1] = swap_workaround (w0_t[1] | d0[1] | s0[1]);
- w0_t2[2] = swap_workaround (w0_t[2] | d0[2] | s0[2]);
- w0_t2[3] = swap_workaround (w0_t[3] | d0[3] | s0[3]);
+ w0_t2[0] = swap32 (w0_t[0] | d0[0] | s0[0]);
+ w0_t2[1] = swap32 (w0_t[1] | d0[1] | s0[1]);
+ w0_t2[2] = swap32 (w0_t[2] | d0[2] | s0[2]);
+ w0_t2[3] = swap32 (w0_t[3] | d0[3] | s0[3]);
u32 w1_t2[4];
- w1_t2[0] = swap_workaround (w1_t[0] | d1[0] | s1[0]);
- w1_t2[1] = swap_workaround (w1_t[1] | d1[1] | s1[1]);
- w1_t2[2] = swap_workaround (w1_t[2] | d1[2] | s1[2]);
- w1_t2[3] = swap_workaround (w1_t[3] | d1[3] | s1[3]);
+ w1_t2[0] = swap32 (w1_t[0] | d1[0] | s1[0]);
+ w1_t2[1] = swap32 (w1_t[1] | d1[1] | s1[1]);
+ w1_t2[2] = swap32 (w1_t[2] | d1[2] | s1[2]);
+ w1_t2[3] = swap32 (w1_t[3] | d1[3] | s1[3]);
u32 w2_t2[4];
- w2_t2[0] = swap_workaround (w2_t[0] | d2[0] | s2[0]);
- w2_t2[1] = swap_workaround (w2_t[1] | d2[1] | s2[1]);
- w2_t2[2] = swap_workaround (w2_t[2] | d2[2] | s2[2]);
- w2_t2[3] = swap_workaround (w2_t[3] | d2[3] | s2[3]);
+ w2_t2[0] = swap32 (w2_t[0] | d2[0] | s2[0]);
+ w2_t2[1] = swap32 (w2_t[1] | d2[1] | s2[1]);
+ w2_t2[2] = swap32 (w2_t[2] | d2[2] | s2[2]);
+ w2_t2[3] = swap32 (w2_t[3] | d2[3] | s2[3]);
u32 w3_t2[4];
- w3_t2[0] = swap_workaround (w3_t[0] | d3[0] | s3[0]);
- w3_t2[1] = swap_workaround (w3_t[1] | d3[1] | s3[1]);
+ w3_t2[0] = swap32 (w3_t[0] | d3[0] | s3[0]);
+ w3_t2[1] = swap32 (w3_t[1] | d3[1] | s3[1]);
w3_t2[2] = 0;
w3_t2[3] = (1 + pw_len + domain_len + 1 + salt_len) * 8;
u32 w1_t3[4];
w1_t3[0] = digest[4];
- w1_t3[1] = swap_workaround (salt_buf0[0]);
- w1_t3[2] = swap_workaround (salt_buf0[1]);
- w1_t3[3] = swap_workaround (salt_buf0[2]);
+ w1_t3[1] = swap32 (salt_buf0[0]);
+ w1_t3[2] = swap32 (salt_buf0[1]);
+ w1_t3[3] = swap32 (salt_buf0[2]);
u32 w2_t3[4];
- w2_t3[0] = swap_workaround (salt_buf0[3]);
- w2_t3[1] = swap_workaround (salt_buf1[0]);
- w2_t3[2] = swap_workaround (salt_buf1[1]);
- w2_t3[3] = swap_workaround (salt_buf1[2]);
+ w2_t3[0] = swap32 (salt_buf0[3]);
+ w2_t3[1] = swap32 (salt_buf1[0]);
+ w2_t3[2] = swap32 (salt_buf1[1]);
+ w2_t3[3] = swap32 (salt_buf1[2]);
u32 w3_t3[4];
- w3_t3[0] = swap_workaround (salt_buf1[3]);
+ w3_t3[0] = swap32 (salt_buf1[3]);
w3_t3[1] = 0;
w3_t3[2] = 0;
w3_t3[3] = (20 + salt_len) * 8;
u32 w0_t2[4];
- w0_t2[0] = swap_workaround (w0_t[0] | d0[0] | s0[0]);
- w0_t2[1] = swap_workaround (w0_t[1] | d0[1] | s0[1]);
- w0_t2[2] = swap_workaround (w0_t[2] | d0[2] | s0[2]);
- w0_t2[3] = swap_workaround (w0_t[3] | d0[3] | s0[3]);
+ w0_t2[0] = swap32 (w0_t[0] | d0[0] | s0[0]);
+ w0_t2[1] = swap32 (w0_t[1] | d0[1] | s0[1]);
+ w0_t2[2] = swap32 (w0_t[2] | d0[2] | s0[2]);
+ w0_t2[3] = swap32 (w0_t[3] | d0[3] | s0[3]);
u32 w1_t2[4];
- w1_t2[0] = swap_workaround (w1_t[0] | d1[0] | s1[0]);
- w1_t2[1] = swap_workaround (w1_t[1] | d1[1] | s1[1]);
- w1_t2[2] = swap_workaround (w1_t[2] | d1[2] | s1[2]);
- w1_t2[3] = swap_workaround (w1_t[3] | d1[3] | s1[3]);
+ w1_t2[0] = swap32 (w1_t[0] | d1[0] | s1[0]);
+ w1_t2[1] = swap32 (w1_t[1] | d1[1] | s1[1]);
+ w1_t2[2] = swap32 (w1_t[2] | d1[2] | s1[2]);
+ w1_t2[3] = swap32 (w1_t[3] | d1[3] | s1[3]);
u32 w2_t2[4];
- w2_t2[0] = swap_workaround (w2_t[0] | d2[0] | s2[0]);
- w2_t2[1] = swap_workaround (w2_t[1] | d2[1] | s2[1]);
- w2_t2[2] = swap_workaround (w2_t[2] | d2[2] | s2[2]);
- w2_t2[3] = swap_workaround (w2_t[3] | d2[3] | s2[3]);
+ w2_t2[0] = swap32 (w2_t[0] | d2[0] | s2[0]);
+ w2_t2[1] = swap32 (w2_t[1] | d2[1] | s2[1]);
+ w2_t2[2] = swap32 (w2_t[2] | d2[2] | s2[2]);
+ w2_t2[3] = swap32 (w2_t[3] | d2[3] | s2[3]);
u32 w3_t2[4];
- w3_t2[0] = swap_workaround (w3_t[0] | d3[0] | s3[0]);
- w3_t2[1] = swap_workaround (w3_t[1] | d3[1] | s3[1]);
+ w3_t2[0] = swap32 (w3_t[0] | d3[0] | s3[0]);
+ w3_t2[1] = swap32 (w3_t[1] | d3[1] | s3[1]);
w3_t2[2] = 0;
w3_t2[3] = (1 + pw_len + domain_len + 1 + salt_len) * 8;
u32 w1_t3[4];
w1_t3[0] = digest[4];
- w1_t3[1] = swap_workaround (salt_buf0[0]);
- w1_t3[2] = swap_workaround (salt_buf0[1]);
- w1_t3[3] = swap_workaround (salt_buf0[2]);
+ w1_t3[1] = swap32 (salt_buf0[0]);
+ w1_t3[2] = swap32 (salt_buf0[1]);
+ w1_t3[3] = swap32 (salt_buf0[2]);
u32 w2_t3[4];
- w2_t3[0] = swap_workaround (salt_buf0[3]);
- w2_t3[1] = swap_workaround (salt_buf1[0]);
- w2_t3[2] = swap_workaround (salt_buf1[1]);
- w2_t3[3] = swap_workaround (salt_buf1[2]);
+ w2_t3[0] = swap32 (salt_buf0[3]);
+ w2_t3[1] = swap32 (salt_buf1[0]);
+ w2_t3[2] = swap32 (salt_buf1[1]);
+ w2_t3[3] = swap32 (salt_buf1[2]);
u32 w3_t3[4];
- w3_t3[0] = swap_workaround (salt_buf1[3]);
+ w3_t3[0] = swap32 (salt_buf1[3]);
w3_t3[1] = 0;
w3_t3[2] = 0;
w3_t3[3] = (20 + salt_len) * 8;
u32 w0_t2[4];
- w0_t2[0] = swap_workaround (w0_t[0] | d0[0] | s0[0]);
- w0_t2[1] = swap_workaround (w0_t[1] | d0[1] | s0[1]);
- w0_t2[2] = swap_workaround (w0_t[2] | d0[2] | s0[2]);
- w0_t2[3] = swap_workaround (w0_t[3] | d0[3] | s0[3]);
+ w0_t2[0] = swap32 (w0_t[0] | d0[0] | s0[0]);
+ w0_t2[1] = swap32 (w0_t[1] | d0[1] | s0[1]);
+ w0_t2[2] = swap32 (w0_t[2] | d0[2] | s0[2]);
+ w0_t2[3] = swap32 (w0_t[3] | d0[3] | s0[3]);
u32 w1_t2[4];
- w1_t2[0] = swap_workaround (w1_t[0] | d1[0] | s1[0]);
- w1_t2[1] = swap_workaround (w1_t[1] | d1[1] | s1[1]);
- w1_t2[2] = swap_workaround (w1_t[2] | d1[2] | s1[2]);
- w1_t2[3] = swap_workaround (w1_t[3] | d1[3] | s1[3]);
+ w1_t2[0] = swap32 (w1_t[0] | d1[0] | s1[0]);
+ w1_t2[1] = swap32 (w1_t[1] | d1[1] | s1[1]);
+ w1_t2[2] = swap32 (w1_t[2] | d1[2] | s1[2]);
+ w1_t2[3] = swap32 (w1_t[3] | d1[3] | s1[3]);
u32 w2_t2[4];
- w2_t2[0] = swap_workaround (w2_t[0] | d2[0] | s2[0]);
- w2_t2[1] = swap_workaround (w2_t[1] | d2[1] | s2[1]);
- w2_t2[2] = swap_workaround (w2_t[2] | d2[2] | s2[2]);
- w2_t2[3] = swap_workaround (w2_t[3] | d2[3] | s2[3]);
+ w2_t2[0] = swap32 (w2_t[0] | d2[0] | s2[0]);
+ w2_t2[1] = swap32 (w2_t[1] | d2[1] | s2[1]);
+ w2_t2[2] = swap32 (w2_t[2] | d2[2] | s2[2]);
+ w2_t2[3] = swap32 (w2_t[3] | d2[3] | s2[3]);
u32 w3_t2[4];
- w3_t2[0] = swap_workaround (w3_t[0] | d3[0] | s3[0]);
- w3_t2[1] = swap_workaround (w3_t[1] | d3[1] | s3[1]);
+ w3_t2[0] = swap32 (w3_t[0] | d3[0] | s3[0]);
+ w3_t2[1] = swap32 (w3_t[1] | d3[1] | s3[1]);
w3_t2[2] = 0;
w3_t2[3] = (1 + pw_len + domain_len + 1 + salt_len) * 8;
u32 w1_t3[4];
w1_t3[0] = digest[4];
- w1_t3[1] = swap_workaround (salt_buf0[0]);
- w1_t3[2] = swap_workaround (salt_buf0[1]);
- w1_t3[3] = swap_workaround (salt_buf0[2]);
+ w1_t3[1] = swap32 (salt_buf0[0]);
+ w1_t3[2] = swap32 (salt_buf0[1]);
+ w1_t3[3] = swap32 (salt_buf0[2]);
u32 w2_t3[4];
- w2_t3[0] = swap_workaround (salt_buf0[3]);
- w2_t3[1] = swap_workaround (salt_buf1[0]);
- w2_t3[2] = swap_workaround (salt_buf1[1]);
- w2_t3[3] = swap_workaround (salt_buf1[2]);
+ w2_t3[0] = swap32 (salt_buf0[3]);
+ w2_t3[1] = swap32 (salt_buf1[0]);
+ w2_t3[2] = swap32 (salt_buf1[1]);
+ w2_t3[3] = swap32 (salt_buf1[2]);
u32 w3_t3[4];
- w3_t3[0] = swap_workaround (salt_buf1[3]);
+ w3_t3[0] = swap32 (salt_buf1[3]);
w3_t3[1] = 0;
w3_t3[2] = 0;
w3_t3[3] = (20 + salt_len) * 8;
u32 w0_t2[4];
- w0_t2[0] = swap_workaround (w0_t[0] | d0[0] | s0[0]);
- w0_t2[1] = swap_workaround (w0_t[1] | d0[1] | s0[1]);
- w0_t2[2] = swap_workaround (w0_t[2] | d0[2] | s0[2]);
- w0_t2[3] = swap_workaround (w0_t[3] | d0[3] | s0[3]);
+ w0_t2[0] = swap32 (w0_t[0] | d0[0] | s0[0]);
+ w0_t2[1] = swap32 (w0_t[1] | d0[1] | s0[1]);
+ w0_t2[2] = swap32 (w0_t[2] | d0[2] | s0[2]);
+ w0_t2[3] = swap32 (w0_t[3] | d0[3] | s0[3]);
u32 w1_t2[4];
- w1_t2[0] = swap_workaround (w1_t[0] | d1[0] | s1[0]);
- w1_t2[1] = swap_workaround (w1_t[1] | d1[1] | s1[1]);
- w1_t2[2] = swap_workaround (w1_t[2] | d1[2] | s1[2]);
- w1_t2[3] = swap_workaround (w1_t[3] | d1[3] | s1[3]);
+ w1_t2[0] = swap32 (w1_t[0] | d1[0] | s1[0]);
+ w1_t2[1] = swap32 (w1_t[1] | d1[1] | s1[1]);
+ w1_t2[2] = swap32 (w1_t[2] | d1[2] | s1[2]);
+ w1_t2[3] = swap32 (w1_t[3] | d1[3] | s1[3]);
u32 w2_t2[4];
- w2_t2[0] = swap_workaround (w2_t[0] | d2[0] | s2[0]);
- w2_t2[1] = swap_workaround (w2_t[1] | d2[1] | s2[1]);
- w2_t2[2] = swap_workaround (w2_t[2] | d2[2] | s2[2]);
- w2_t2[3] = swap_workaround (w2_t[3] | d2[3] | s2[3]);
+ w2_t2[0] = swap32 (w2_t[0] | d2[0] | s2[0]);
+ w2_t2[1] = swap32 (w2_t[1] | d2[1] | s2[1]);
+ w2_t2[2] = swap32 (w2_t[2] | d2[2] | s2[2]);
+ w2_t2[3] = swap32 (w2_t[3] | d2[3] | s2[3]);
u32 w3_t2[4];
- w3_t2[0] = swap_workaround (w3_t[0] | d3[0] | s3[0]);
- w3_t2[1] = swap_workaround (w3_t[1] | d3[1] | s3[1]);
+ w3_t2[0] = swap32 (w3_t[0] | d3[0] | s3[0]);
+ w3_t2[1] = swap32 (w3_t[1] | d3[1] | s3[1]);
w3_t2[2] = 0;
w3_t2[3] = (1 + pw_len + domain_len + 1 + salt_len) * 8;
u32 w1_t3[4];
w1_t3[0] = digest[4];
- w1_t3[1] = swap_workaround (salt_buf0[0]);
- w1_t3[2] = swap_workaround (salt_buf0[1]);
- w1_t3[3] = swap_workaround (salt_buf0[2]);
+ w1_t3[1] = swap32 (salt_buf0[0]);
+ w1_t3[2] = swap32 (salt_buf0[1]);
+ w1_t3[3] = swap32 (salt_buf0[2]);
u32 w2_t3[4];
- w2_t3[0] = swap_workaround (salt_buf0[3]);
- w2_t3[1] = swap_workaround (salt_buf1[0]);
- w2_t3[2] = swap_workaround (salt_buf1[1]);
- w2_t3[3] = swap_workaround (salt_buf1[2]);
+ w2_t3[0] = swap32 (salt_buf0[3]);
+ w2_t3[1] = swap32 (salt_buf1[0]);
+ w2_t3[2] = swap32 (salt_buf1[1]);
+ w2_t3[3] = swap32 (salt_buf1[2]);
u32 w3_t3[4];
- w3_t3[0] = swap_workaround (salt_buf1[3]);
+ w3_t3[0] = swap32 (salt_buf1[3]);
w3_t3[1] = 0;
w3_t3[2] = 0;
w3_t3[3] = (20 + salt_len) * 8;
u32 salt_buf0[4];
- salt_buf0[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 0]);
- salt_buf0[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 1]);
- salt_buf0[2] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 2]);
- salt_buf0[3] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 3]);
+ salt_buf0[0] = swap32 (salt_bufs[salt_pos].salt_buf[ 0]);
+ salt_buf0[1] = swap32 (salt_bufs[salt_pos].salt_buf[ 1]);
+ salt_buf0[2] = swap32 (salt_bufs[salt_pos].salt_buf[ 2]);
+ salt_buf0[3] = swap32 (salt_bufs[salt_pos].salt_buf[ 3]);
u32 salt_buf1[4];
- salt_buf1[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 4]);
- salt_buf1[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 5]);
- salt_buf1[2] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 6]);
- salt_buf1[3] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 7]);
+ salt_buf1[0] = swap32 (salt_bufs[salt_pos].salt_buf[ 4]);
+ salt_buf1[1] = swap32 (salt_bufs[salt_pos].salt_buf[ 5]);
+ salt_buf1[2] = swap32 (salt_bufs[salt_pos].salt_buf[ 6]);
+ salt_buf1[3] = swap32 (salt_bufs[salt_pos].salt_buf[ 7]);
u32 salt_buf2[4];
- salt_buf2[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 8]);
- salt_buf2[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 9]);
+ salt_buf2[0] = swap32 (salt_bufs[salt_pos].salt_buf[ 8]);
+ salt_buf2[1] = swap32 (salt_bufs[salt_pos].salt_buf[ 9]);
salt_buf2[2] = 0;
salt_buf2[3] = 0;
u32 w0_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
u32 w2_t[4];
- w2_t[0] = swap_workaround (w2[0]);
- w2_t[1] = swap_workaround (w2[1]);
- w2_t[2] = swap_workaround (w2[2]);
- w2_t[3] = swap_workaround (w2[3]);
+ w2_t[0] = swap32 (w2[0]);
+ w2_t[1] = swap32 (w2[1]);
+ w2_t[2] = swap32 (w2[2]);
+ w2_t[3] = swap32 (w2[3]);
u32 w3_t[4];
- w3_t[0] = swap_workaround (w3[0]);
- w3_t[1] = swap_workaround (w3[1]);
+ w3_t[0] = swap32 (w3[0]);
+ w3_t[1] = swap32 (w3[1]);
w3_t[2] = 0;
w3_t[3] = pw_len * 8;
u32 salt_buf0[4];
- salt_buf0[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 0]);
- salt_buf0[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 1]);
- salt_buf0[2] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 2]);
- salt_buf0[3] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 3]);
+ salt_buf0[0] = swap32 (salt_bufs[salt_pos].salt_buf[ 0]);
+ salt_buf0[1] = swap32 (salt_bufs[salt_pos].salt_buf[ 1]);
+ salt_buf0[2] = swap32 (salt_bufs[salt_pos].salt_buf[ 2]);
+ salt_buf0[3] = swap32 (salt_bufs[salt_pos].salt_buf[ 3]);
u32 salt_buf1[4];
- salt_buf1[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 4]);
- salt_buf1[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 5]);
- salt_buf1[2] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 6]);
- salt_buf1[3] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 7]);
+ salt_buf1[0] = swap32 (salt_bufs[salt_pos].salt_buf[ 4]);
+ salt_buf1[1] = swap32 (salt_bufs[salt_pos].salt_buf[ 5]);
+ salt_buf1[2] = swap32 (salt_bufs[salt_pos].salt_buf[ 6]);
+ salt_buf1[3] = swap32 (salt_bufs[salt_pos].salt_buf[ 7]);
u32 salt_buf2[4];
- salt_buf2[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 8]);
- salt_buf2[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 9]);
+ salt_buf2[0] = swap32 (salt_bufs[salt_pos].salt_buf[ 8]);
+ salt_buf2[1] = swap32 (salt_bufs[salt_pos].salt_buf[ 9]);
salt_buf2[2] = 0;
salt_buf2[3] = 0;
u32 w0_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
u32 w2_t[4];
- w2_t[0] = swap_workaround (w2[0]);
- w2_t[1] = swap_workaround (w2[1]);
- w2_t[2] = swap_workaround (w2[2]);
- w2_t[3] = swap_workaround (w2[3]);
+ w2_t[0] = swap32 (w2[0]);
+ w2_t[1] = swap32 (w2[1]);
+ w2_t[2] = swap32 (w2[2]);
+ w2_t[3] = swap32 (w2[3]);
u32 w3_t[4];
- w3_t[0] = swap_workaround (w3[0]);
- w3_t[1] = swap_workaround (w3[1]);
+ w3_t[0] = swap32 (w3[0]);
+ w3_t[1] = swap32 (w3[1]);
w3_t[2] = 0;
w3_t[3] = pw_len * 8;
u32 salt_buf0[4];
- salt_buf0[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 0]);
- salt_buf0[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 1]);
- salt_buf0[2] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 2]);
- salt_buf0[3] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 3]);
+ salt_buf0[0] = swap32 (salt_bufs[salt_pos].salt_buf[ 0]);
+ salt_buf0[1] = swap32 (salt_bufs[salt_pos].salt_buf[ 1]);
+ salt_buf0[2] = swap32 (salt_bufs[salt_pos].salt_buf[ 2]);
+ salt_buf0[3] = swap32 (salt_bufs[salt_pos].salt_buf[ 3]);
u32 salt_buf1[4];
- salt_buf1[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 4]);
- salt_buf1[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 5]);
- salt_buf1[2] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 6]);
- salt_buf1[3] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 7]);
+ salt_buf1[0] = swap32 (salt_bufs[salt_pos].salt_buf[ 4]);
+ salt_buf1[1] = swap32 (salt_bufs[salt_pos].salt_buf[ 5]);
+ salt_buf1[2] = swap32 (salt_bufs[salt_pos].salt_buf[ 6]);
+ salt_buf1[3] = swap32 (salt_bufs[salt_pos].salt_buf[ 7]);
u32 salt_buf2[4];
- salt_buf2[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 8]);
- salt_buf2[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 9]);
+ salt_buf2[0] = swap32 (salt_bufs[salt_pos].salt_buf[ 8]);
+ salt_buf2[1] = swap32 (salt_bufs[salt_pos].salt_buf[ 9]);
salt_buf2[2] = 0;
salt_buf2[3] = 0;
u32 w0_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
u32 w2_t[4];
- w2_t[0] = swap_workaround (w2[0]);
- w2_t[1] = swap_workaround (w2[1]);
- w2_t[2] = swap_workaround (w2[2]);
- w2_t[3] = swap_workaround (w2[3]);
+ w2_t[0] = swap32 (w2[0]);
+ w2_t[1] = swap32 (w2[1]);
+ w2_t[2] = swap32 (w2[2]);
+ w2_t[3] = swap32 (w2[3]);
u32 w3_t[4];
- w3_t[0] = swap_workaround (w3[0]);
- w3_t[1] = swap_workaround (w3[1]);
+ w3_t[0] = swap32 (w3[0]);
+ w3_t[1] = swap32 (w3[1]);
w3_t[2] = 0;
w3_t[3] = pw_len * 8;
u32 salt_buf0[4];
- salt_buf0[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 0]);
- salt_buf0[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 1]);
- salt_buf0[2] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 2]);
- salt_buf0[3] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 3]);
+ salt_buf0[0] = swap32 (salt_bufs[salt_pos].salt_buf[ 0]);
+ salt_buf0[1] = swap32 (salt_bufs[salt_pos].salt_buf[ 1]);
+ salt_buf0[2] = swap32 (salt_bufs[salt_pos].salt_buf[ 2]);
+ salt_buf0[3] = swap32 (salt_bufs[salt_pos].salt_buf[ 3]);
u32 salt_buf1[4];
- salt_buf1[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 4]);
- salt_buf1[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 5]);
- salt_buf1[2] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 6]);
- salt_buf1[3] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 7]);
+ salt_buf1[0] = swap32 (salt_bufs[salt_pos].salt_buf[ 4]);
+ salt_buf1[1] = swap32 (salt_bufs[salt_pos].salt_buf[ 5]);
+ salt_buf1[2] = swap32 (salt_bufs[salt_pos].salt_buf[ 6]);
+ salt_buf1[3] = swap32 (salt_bufs[salt_pos].salt_buf[ 7]);
u32 salt_buf2[4];
- salt_buf2[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 8]);
- salt_buf2[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 9]);
+ salt_buf2[0] = swap32 (salt_bufs[salt_pos].salt_buf[ 8]);
+ salt_buf2[1] = swap32 (salt_bufs[salt_pos].salt_buf[ 9]);
salt_buf2[2] = 0;
salt_buf2[3] = 0;
u32 w0_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
u32 w1_t[4];
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
u32 w2_t[4];
- w2_t[0] = swap_workaround (w2[0]);
- w2_t[1] = swap_workaround (w2[1]);
- w2_t[2] = swap_workaround (w2[2]);
- w2_t[3] = swap_workaround (w2[3]);
+ w2_t[0] = swap32 (w2[0]);
+ w2_t[1] = swap32 (w2[1]);
+ w2_t[2] = swap32 (w2[2]);
+ w2_t[3] = swap32 (w2[3]);
u32 w3_t[4];
- w3_t[0] = swap_workaround (w3[0]);
- w3_t[1] = swap_workaround (w3[1]);
+ w3_t[0] = swap32 (w3[0]);
+ w3_t[1] = swap32 (w3[1]);
w3_t[2] = 0;
w3_t[3] = pw_len * 8;
u32 salt_buf0[4];
- salt_buf0[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 0]);
- salt_buf0[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 1]);
- salt_buf0[2] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 2]);
- salt_buf0[3] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 3]);
+ salt_buf0[0] = swap32 (salt_bufs[salt_pos].salt_buf[ 0]);
+ salt_buf0[1] = swap32 (salt_bufs[salt_pos].salt_buf[ 1]);
+ salt_buf0[2] = swap32 (salt_bufs[salt_pos].salt_buf[ 2]);
+ salt_buf0[3] = swap32 (salt_bufs[salt_pos].salt_buf[ 3]);
u32 salt_buf1[4];
- salt_buf1[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 4]);
- salt_buf1[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 5]);
- salt_buf1[2] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 6]);
- salt_buf1[3] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 7]);
+ salt_buf1[0] = swap32 (salt_bufs[salt_pos].salt_buf[ 4]);
+ salt_buf1[1] = swap32 (salt_bufs[salt_pos].salt_buf[ 5]);
+ salt_buf1[2] = swap32 (salt_bufs[salt_pos].salt_buf[ 6]);
+ salt_buf1[3] = swap32 (salt_bufs[salt_pos].salt_buf[ 7]);
u32 salt_buf2[4];
- salt_buf2[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 8]);
- salt_buf2[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 9]);
+ salt_buf2[0] = swap32 (salt_bufs[salt_pos].salt_buf[ 8]);
+ salt_buf2[1] = swap32 (salt_bufs[salt_pos].salt_buf[ 9]);
salt_buf2[2] = 0;
salt_buf2[3] = 0;
u32 salt_buf0[4];
- salt_buf0[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 0]);
- salt_buf0[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 1]);
- salt_buf0[2] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 2]);
- salt_buf0[3] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 3]);
+ salt_buf0[0] = swap32 (salt_bufs[salt_pos].salt_buf[ 0]);
+ salt_buf0[1] = swap32 (salt_bufs[salt_pos].salt_buf[ 1]);
+ salt_buf0[2] = swap32 (salt_bufs[salt_pos].salt_buf[ 2]);
+ salt_buf0[3] = swap32 (salt_bufs[salt_pos].salt_buf[ 3]);
u32 salt_buf1[4];
- salt_buf1[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 4]);
- salt_buf1[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 5]);
- salt_buf1[2] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 6]);
- salt_buf1[3] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 7]);
+ salt_buf1[0] = swap32 (salt_bufs[salt_pos].salt_buf[ 4]);
+ salt_buf1[1] = swap32 (salt_bufs[salt_pos].salt_buf[ 5]);
+ salt_buf1[2] = swap32 (salt_bufs[salt_pos].salt_buf[ 6]);
+ salt_buf1[3] = swap32 (salt_bufs[salt_pos].salt_buf[ 7]);
u32 salt_buf2[4];
- salt_buf2[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 8]);
- salt_buf2[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 9]);
+ salt_buf2[0] = swap32 (salt_bufs[salt_pos].salt_buf[ 8]);
+ salt_buf2[1] = swap32 (salt_bufs[salt_pos].salt_buf[ 9]);
salt_buf2[2] = 0;
salt_buf2[3] = 0;
* pads
*/
- w0[0] = swap_workaround (w0[0]);
- w0[1] = swap_workaround (w0[1]);
- w0[2] = swap_workaround (w0[2]);
- w0[3] = swap_workaround (w0[3]);
- w1[0] = swap_workaround (w1[0]);
- w1[1] = swap_workaround (w1[1]);
- w1[2] = swap_workaround (w1[2]);
- w1[3] = swap_workaround (w1[3]);
- w2[0] = swap_workaround (w2[0]);
- w2[1] = swap_workaround (w2[1]);
- w2[2] = swap_workaround (w2[2]);
- w2[3] = swap_workaround (w2[3]);
- w3[0] = swap_workaround (w3[0]);
- w3[1] = swap_workaround (w3[1]);
- w3[2] = swap_workaround (w3[2]);
- w3[3] = swap_workaround (w3[3]);
+ w0[0] = swap32 (w0[0]);
+ w0[1] = swap32 (w0[1]);
+ w0[2] = swap32 (w0[2]);
+ w0[3] = swap32 (w0[3]);
+ w1[0] = swap32 (w1[0]);
+ w1[1] = swap32 (w1[1]);
+ w1[2] = swap32 (w1[2]);
+ w1[3] = swap32 (w1[3]);
+ w2[0] = swap32 (w2[0]);
+ w2[1] = swap32 (w2[1]);
+ w2[2] = swap32 (w2[2]);
+ w2[3] = swap32 (w2[3]);
+ w3[0] = swap32 (w3[0]);
+ w3[1] = swap32 (w3[1]);
+ w3[2] = swap32 (w3[2]);
+ w3[3] = swap32 (w3[3]);
u32 ipad[5];
u32 opad[5];
append_0x80_3x4 (w0, w1, w2, salt_len + 4);
- w0[0] = swap_workaround (w0[0]);
- w0[1] = swap_workaround (w0[1]);
- w0[2] = swap_workaround (w0[2]);
- w0[3] = swap_workaround (w0[3]);
- w1[0] = swap_workaround (w1[0]);
- w1[1] = swap_workaround (w1[1]);
- w1[2] = swap_workaround (w1[2]);
- w1[3] = swap_workaround (w1[3]);
- w2[0] = swap_workaround (w2[0]);
- w2[1] = swap_workaround (w2[1]);
+ w0[0] = swap32 (w0[0]);
+ w0[1] = swap32 (w0[1]);
+ w0[2] = swap32 (w0[2]);
+ w0[3] = swap32 (w0[3]);
+ w1[0] = swap32 (w1[0]);
+ w1[1] = swap32 (w1[1]);
+ w1[2] = swap32 (w1[2]);
+ w1[3] = swap32 (w1[3]);
+ w2[0] = swap32 (w2[0]);
+ w2[1] = swap32 (w2[1]);
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
}
// we need just a few swapped, because we do not access the others
- r[ 5] = swap_workaround (r[ 5]);
- r[ 6] = swap_workaround (r[ 6]);
- r[14] = swap_workaround (r[14]);
+ r[ 5] = swap32 (r[ 5]);
+ r[ 6] = swap32 (r[ 6]);
+ r[14] = swap32 (r[14]);
// superblock not on id 0 or 1
// assumes max block size is 32MiB
#include "types_ocl.c"
#include "common.c"
-#ifdef VECT_SIZE1
-#define COMPARE_M "check_multi_vect1_comp4.c"
-#endif
-
-#ifdef VECT_SIZE2
-#define COMPARE_M "check_multi_vect2_comp4.c"
-#endif
-
-#ifdef VECT_SIZE4
-#define COMPARE_M "check_multi_vect4_comp4.c"
-#endif
+#define COMPARE_S "check_single_comp4.c"
+#define COMPARE_M "check_multi_comp4.c"
__constant u32 k_sha256[64] =
{
}
}
-static uint4 swap_workaround (uint4 v)
+static uint4 swap32_4 (uint4 v)
{
return (rotate ((v & 0x00FF00FF), 24u) | rotate ((v & 0xFF00FF00), 8u));
}
#define GET_SMIX_CNT(r,N) (2 * (r) * 16 * (N))
#define GET_STATE_CNT(r) (2 * (r) * 16)
+#define SCRYPT_CNT GET_SCRYPT_CNT (SCRYPT_R, SCRYPT_P)
+#define SCRYPT_CNT4 (SCRYPT_CNT / 4)
+#define STATE_CNT GET_STATE_CNT (SCRYPT_R)
+#define STATE_CNT4 (STATE_CNT / 4)
+
#define ADD_ROTATE_XOR(r,i1,i2,s) (r) ^= rotate ((i1) + (i2), (s));
#define SALSA20_2R() \
R3 = R3 + X3; \
}
-static void salsa_r (uint4 *T, const u32 r)
+static void salsa_r (uint4 *T)
{
- const u32 state_cnt = GET_STATE_CNT (r);
-
- const u32 state_cnt4 = state_cnt / 4;
-
- uint4 R0 = T[state_cnt4 - 4];
- uint4 R1 = T[state_cnt4 - 3];
- uint4 R2 = T[state_cnt4 - 2];
- uint4 R3 = T[state_cnt4 - 1];
+ uint4 R0 = T[STATE_CNT4 - 4];
+ uint4 R1 = T[STATE_CNT4 - 3];
+ uint4 R2 = T[STATE_CNT4 - 2];
+ uint4 R3 = T[STATE_CNT4 - 1];
- for (u32 i = 0; i < state_cnt4; i += 8)
+ for (u32 i = 0; i < STATE_CNT4; i += 8)
{
uint4 Y0;
uint4 Y1;
exchg (x4 + 3, y4 + 3); \
}
- for (u32 i = 1; i < r / 1; i++)
+ for (u32 i = 1; i < SCRYPT_R / 1; i++)
{
const u32 x = i * 1;
const u32 y = i * 2;
exchg4 (x, y);
}
- for (u32 i = 1; i < r / 2; i++)
+ for (u32 i = 1; i < SCRYPT_R / 2; i++)
{
const u32 x = i * 1;
const u32 y = i * 2;
- const u32 xr1 = (r * 2) - 1 - x;
- const u32 yr1 = (r * 2) - 1 - y;
+ const u32 xr1 = (SCRYPT_R * 2) - 1 - x;
+ const u32 yr1 = (SCRYPT_R * 2) - 1 - y;
exchg4 (xr1, yr1);
}
}
-static void scrypt_smix (uint4 *X, uint4 *T, const u32 N, const u32 r, const u32 tmto, const u32 phy, __global uint4 *V)
+static void scrypt_smix (uint4 *X, uint4 *T, const u32 phy, __global uint4 *V)
{
- const u32 state_cnt = GET_STATE_CNT (r);
-
- const u32 state_cnt4 = state_cnt / 4;
-
#define Coord(x,y,z) (((x) * zSIZE) + ((y) * zSIZE * xSIZE) + (z))
#define CO Coord(x,y,z)
const u32 xSIZE = phy;
- const u32 ySIZE = N / tmto;
- const u32 zSIZE = state_cnt4;
+ const u32 ySIZE = SCRYPT_N / SCRYPT_TMTO;
+ const u32 zSIZE = STATE_CNT4;
const u32 gid = get_global_id (0);
const u32 x = gid % xSIZE;
#pragma unroll
- for (u32 i = 0; i < state_cnt4; i += 4)
+ for (u32 i = 0; i < STATE_CNT4; i += 4)
{
T[0] = (uint4) (X[i + 0].x, X[i + 1].y, X[i + 2].z, X[i + 3].w);
T[1] = (uint4) (X[i + 1].x, X[i + 2].y, X[i + 3].z, X[i + 0].w);
{
for (u32 z = 0; z < zSIZE; z++) V[CO] = X[z];
- for (u32 i = 0; i < tmto; i++) salsa_r (X, r);
+ for (u32 i = 0; i < SCRYPT_TMTO; i++) salsa_r (X);
}
- for (u32 i = 0; i < N; i++)
+ for (u32 i = 0; i < SCRYPT_N; i++)
{
- const u32 k = X[zSIZE - 4].x & (N - 1);
+ const u32 k = X[zSIZE - 4].x & (SCRYPT_N - 1);
- const u32 y = k / tmto;
+ const u32 y = k / SCRYPT_TMTO;
- const u32 km = k - (y * tmto);
+ const u32 km = k - (y * SCRYPT_TMTO);
for (u32 z = 0; z < zSIZE; z++) T[z] = V[CO];
- for (u32 i = 0; i < km; i++) salsa_r (T, r);
+ for (u32 i = 0; i < km; i++) salsa_r (T);
for (u32 z = 0; z < zSIZE; z++) X[z] ^= T[z];
- salsa_r (X, r);
+ salsa_r (X);
}
#pragma unroll
- for (u32 i = 0; i < state_cnt4; i += 4)
+ for (u32 i = 0; i < STATE_CNT4; i += 4)
{
T[0] = (uint4) (X[i + 0].x, X[i + 3].y, X[i + 2].z, X[i + 1].w);
T[1] = (uint4) (X[i + 1].x, X[i + 0].y, X[i + 3].z, X[i + 2].w);
const u32 salt_len = salt_bufs[salt_pos].salt_len;
- /**
- * memory buffers
- */
-
- const u32 scrypt_r = SCRYPT_R;
- const u32 scrypt_p = SCRYPT_P;
- //const u32 scrypt_N = SCRYPT_N;
-
- //const u32 state_cnt = GET_STATE_CNT (scrypt_r);
- const u32 scrypt_cnt = GET_SCRYPT_CNT (scrypt_r, scrypt_p);
- //const u32 smix_cnt = GET_SMIX_CNT (scrypt_r, scrypt_N);
-
/**
* 1st pbkdf2, creates B
*/
- w0[0] = swap_workaround (w0[0]);
- w0[1] = swap_workaround (w0[1]);
- w0[2] = swap_workaround (w0[2]);
- w0[3] = swap_workaround (w0[3]);
- w1[0] = swap_workaround (w1[0]);
- w1[1] = swap_workaround (w1[1]);
- w1[2] = swap_workaround (w1[2]);
- w1[3] = swap_workaround (w1[3]);
- w2[0] = swap_workaround (w2[0]);
- w2[1] = swap_workaround (w2[1]);
- w2[2] = swap_workaround (w2[2]);
- w2[3] = swap_workaround (w2[3]);
- w3[0] = swap_workaround (w3[0]);
- w3[1] = swap_workaround (w3[1]);
- w3[2] = swap_workaround (w3[2]);
- w3[3] = swap_workaround (w3[3]);
+ w0[0] = swap32 (w0[0]);
+ w0[1] = swap32 (w0[1]);
+ w0[2] = swap32 (w0[2]);
+ w0[3] = swap32 (w0[3]);
+ w1[0] = swap32 (w1[0]);
+ w1[1] = swap32 (w1[1]);
+ w1[2] = swap32 (w1[2]);
+ w1[3] = swap32 (w1[3]);
+ w2[0] = swap32 (w2[0]);
+ w2[1] = swap32 (w2[1]);
+ w2[2] = swap32 (w2[2]);
+ w2[3] = swap32 (w2[3]);
+ w3[0] = swap32 (w3[0]);
+ w3[1] = swap32 (w3[1]);
+ w3[2] = swap32 (w3[2]);
+ w3[3] = swap32 (w3[3]);
u32 ipad[8];
u32 opad[8];
hmac_sha256_pad (w0, w1, w2, w3, ipad, opad);
- for (u32 i = 0, j = 0, k = 0; i < scrypt_cnt; i += 8, j += 1, k += 2)
+ for (u32 i = 0, j = 0, k = 0; i < SCRYPT_CNT; i += 8, j += 1, k += 2)
{
w0[0] = salt_buf0[0];
w0[1] = salt_buf0[1];
u32 append[2];
- append[0] = swap_workaround (j + 1);
+ append[0] = swap32 (j + 1);
append[1] = 0x80;
memcat8 (w0, w1, w2, w3, salt_len, append);
- w0[0] = swap_workaround (w0[0]);
- w0[1] = swap_workaround (w0[1]);
- w0[2] = swap_workaround (w0[2]);
- w0[3] = swap_workaround (w0[3]);
- w1[0] = swap_workaround (w1[0]);
- w1[1] = swap_workaround (w1[1]);
- w1[2] = swap_workaround (w1[2]);
- w1[3] = swap_workaround (w1[3]);
- w2[0] = swap_workaround (w2[0]);
- w2[1] = swap_workaround (w2[1]);
- w2[2] = swap_workaround (w2[2]);
- w2[3] = swap_workaround (w2[3]);
- w3[0] = swap_workaround (w3[0]);
- w3[1] = swap_workaround (w3[1]);
+ w0[0] = swap32 (w0[0]);
+ w0[1] = swap32 (w0[1]);
+ w0[2] = swap32 (w0[2]);
+ w0[3] = swap32 (w0[3]);
+ w1[0] = swap32 (w1[0]);
+ w1[1] = swap32 (w1[1]);
+ w1[2] = swap32 (w1[2]);
+ w1[3] = swap32 (w1[3]);
+ w2[0] = swap32 (w2[0]);
+ w2[1] = swap32 (w2[1]);
+ w2[2] = swap32 (w2[2]);
+ w2[3] = swap32 (w2[3]);
+ w3[0] = swap32 (w3[0]);
+ w3[1] = swap32 (w3[1]);
w3[2] = 0;
w3[3] = (64 + salt_len + 4) * 8;
if (gid >= gid_max) return;
- const u32 scrypt_phy = salt_bufs[salt_pos].scrypt_phy;
-
- const u32 state_cnt = GET_STATE_CNT (SCRYPT_R);
- const u32 scrypt_cnt = GET_SCRYPT_CNT (SCRYPT_R, SCRYPT_P);
+ const u32 scrypt_phy = salt_bufs[salt_pos].scrypt_phy;
- const u32 state_cnt4 = state_cnt / 4;
- const u32 scrypt_cnt4 = scrypt_cnt / 4;
-
- uint4 X[state_cnt4];
- uint4 T[state_cnt4];
+ uint4 X[STATE_CNT4];
+ uint4 T[STATE_CNT4];
#pragma unroll
- for (int z = 0; z < state_cnt4; z++) X[z] = swap_workaround (tmps[gid].P[z]);
+ for (int z = 0; z < STATE_CNT4; z++) X[z] = swap32_4 (tmps[gid].P[z]);
- scrypt_smix (X, T, SCRYPT_N, SCRYPT_R, SCRYPT_TMTO, scrypt_phy, d_scryptV_buf);
+ scrypt_smix (X, T, scrypt_phy, d_scryptV_buf);
#pragma unroll
- for (int z = 0; z < state_cnt4; z++) tmps[gid].P[z] = swap_workaround (X[z]);
+ for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[z] = swap32_4 (X[z]);
#if SCRYPT_P >= 1
- for (int i = state_cnt4; i < scrypt_cnt4; i += state_cnt4)
+ for (int i = STATE_CNT4; i < SCRYPT_CNT4; i += STATE_CNT4)
{
- for (int z = 0; z < state_cnt4; z++) X[z] = swap_workaround (tmps[gid].P[i + z]);
+ for (int z = 0; z < STATE_CNT4; z++) X[z] = swap32_4 (tmps[gid].P[i + z]);
- scrypt_smix (X, T, SCRYPT_N, SCRYPT_R, SCRYPT_TMTO, scrypt_phy, d_scryptV_buf);
+ scrypt_smix (X, T, scrypt_phy, d_scryptV_buf);
- for (int z = 0; z < state_cnt4; z++) tmps[gid].P[i + z] = swap_workaround (X[z]);
+ for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[i + z] = swap32_4 (X[z]);
}
#endif
}
const u32 scrypt_p = SCRYPT_P;
//const u32 scrypt_N = SCRYPT_N;
- const u32 scrypt_cnt = GET_SCRYPT_CNT (scrypt_r, scrypt_p);
-
- const u32 scrypt_cnt4 = scrypt_cnt / 4;
/**
* 2nd pbkdf2, creates B
*/
- w0[0] = swap_workaround (w0[0]);
- w0[1] = swap_workaround (w0[1]);
- w0[2] = swap_workaround (w0[2]);
- w0[3] = swap_workaround (w0[3]);
- w1[0] = swap_workaround (w1[0]);
- w1[1] = swap_workaround (w1[1]);
- w1[2] = swap_workaround (w1[2]);
- w1[3] = swap_workaround (w1[3]);
- w2[0] = swap_workaround (w2[0]);
- w2[1] = swap_workaround (w2[1]);
- w2[2] = swap_workaround (w2[2]);
- w2[3] = swap_workaround (w2[3]);
- w3[0] = swap_workaround (w3[0]);
- w3[1] = swap_workaround (w3[1]);
- w3[2] = swap_workaround (w3[2]);
- w3[3] = swap_workaround (w3[3]);
+ w0[0] = swap32 (w0[0]);
+ w0[1] = swap32 (w0[1]);
+ w0[2] = swap32 (w0[2]);
+ w0[3] = swap32 (w0[3]);
+ w1[0] = swap32 (w1[0]);
+ w1[1] = swap32 (w1[1]);
+ w1[2] = swap32 (w1[2]);
+ w1[3] = swap32 (w1[3]);
+ w2[0] = swap32 (w2[0]);
+ w2[1] = swap32 (w2[1]);
+ w2[2] = swap32 (w2[2]);
+ w2[3] = swap32 (w2[3]);
+ w3[0] = swap32 (w3[0]);
+ w3[1] = swap32 (w3[1]);
+ w3[2] = swap32 (w3[2]);
+ w3[3] = swap32 (w3[3]);
u32 ipad[8];
u32 opad[8];
hmac_sha256_pad (w0, w1, w2, w3, ipad, opad);
- for (u32 l = 0; l < scrypt_cnt4; l += 4)
+ for (u32 l = 0; l < SCRYPT_CNT4; l += 4)
{
barrier (CLK_GLOBAL_MEM_FENCE);
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
- w3[3] = (64 + (scrypt_cnt * 4) + 4) * 8;
+ w3[3] = (64 + (SCRYPT_CNT * 4) + 4) * 8;
u32 digest[8];
hmac_sha256_run (w0, w1, w2, w3, ipad, opad, digest);
- const u32 r0 = swap_workaround (digest[DGST_R0]);
- const u32 r1 = swap_workaround (digest[DGST_R1]);
- const u32 r2 = swap_workaround (digest[DGST_R2]);
- const u32 r3 = swap_workaround (digest[DGST_R3]);
+ const u32 r0 = swap32 (digest[DGST_R0]);
+ const u32 r1 = swap32 (digest[DGST_R1]);
+ const u32 r2 = swap32 (digest[DGST_R2]);
+ const u32 r3 = swap32 (digest[DGST_R3]);
#define il_pos 0
w0[1] = salt_buf[1];
w0[0] = salt_buf[0];
- w0[0] = swap_workaround (w0[0]);
- w0[1] = swap_workaround (w0[1]);
- w0[2] = swap_workaround (w0[2]);
- w0[3] = swap_workaround (w0[3]);
- w1[0] = swap_workaround (w1[0]);
- w1[1] = swap_workaround (w1[1]);
- w1[2] = swap_workaround (w1[2]);
- w1[3] = swap_workaround (w1[3]);
- w2[0] = swap_workaround (w2[0]);
- w2[1] = swap_workaround (w2[1]);
- w2[2] = swap_workaround (w2[2]);
- w2[3] = swap_workaround (w2[3]);
- w3[0] = swap_workaround (w3[0]);
- w3[1] = swap_workaround (w3[1]);
+ w0[0] = swap32 (w0[0]);
+ w0[1] = swap32 (w0[1]);
+ w0[2] = swap32 (w0[2]);
+ w0[3] = swap32 (w0[3]);
+ w1[0] = swap32 (w1[0]);
+ w1[1] = swap32 (w1[1]);
+ w1[2] = swap32 (w1[2]);
+ w1[3] = swap32 (w1[3]);
+ w2[0] = swap32 (w2[0]);
+ w2[1] = swap32 (w2[1]);
+ w2[2] = swap32 (w2[2]);
+ w2[3] = swap32 (w2[3]);
+ w3[0] = swap32 (w3[0]);
+ w3[1] = swap32 (w3[1]);
const u32 block_len = salt_len + 2 + pw_len;
u32 w2[4];
u32 w3[4];
- w0[0] = swap_workaround (digest[0]);
- w0[1] = swap_workaround (digest[1]);
+ w0[0] = swap32 (digest[0]);
+ w0[1] = swap32 (digest[1]);
w0[2] = 0x00008000;
w0[3] = 0;
w1[0] = 0;
#include "types_ocl.c"
#include "common.c"
-#ifdef VECT_SIZE1
-#define COMPARE_M "check_multi_vect1_comp4.c"
-#endif
-
-#ifdef VECT_SIZE2
-#define COMPARE_M "check_multi_vect2_comp4.c"
-#endif
-
-#ifdef VECT_SIZE4
-#define COMPARE_M "check_multi_vect4_comp4.c"
-#endif
+#define COMPARE_S "check_single_comp4.c"
+#define COMPARE_M "check_multi_comp4.c"
__constant char lotus64_table[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
w3[2] = salt_buf3[2];
//w3[3] = salt_buf3[3];
- w0[0] = swap_workaround (w0[0]);
- w0[1] = swap_workaround (w0[1]);
- w0[2] = swap_workaround (w0[2]);
- w0[3] = swap_workaround (w0[3]);
- w1[0] = swap_workaround (w1[0]);
- w1[1] = swap_workaround (w1[1]);
- w1[2] = swap_workaround (w1[2]);
- w1[3] = swap_workaround (w1[3]);
- w2[0] = swap_workaround (w2[0]);
- w2[1] = swap_workaround (w2[1]);
- w2[2] = swap_workaround (w2[2]);
- w2[3] = swap_workaround (w2[3]);
- w3[0] = swap_workaround (w3[0]);
- w3[1] = swap_workaround (w3[1]);
- w3[2] = swap_workaround (w3[2]);
+ w0[0] = swap32 (w0[0]);
+ w0[1] = swap32 (w0[1]);
+ w0[2] = swap32 (w0[2]);
+ w0[3] = swap32 (w0[3]);
+ w1[0] = swap32 (w1[0]);
+ w1[1] = swap32 (w1[1]);
+ w1[2] = swap32 (w1[2]);
+ w1[3] = swap32 (w1[3]);
+ w2[0] = swap32 (w2[0]);
+ w2[1] = swap32 (w2[1]);
+ w2[2] = swap32 (w2[2]);
+ w2[3] = swap32 (w2[3]);
+ w3[0] = swap32 (w3[0]);
+ w3[1] = swap32 (w3[1]);
+ w3[2] = swap32 (w3[2]);
w3[3] = (64 + salt_len + 4) * 8;
u32 dgst[5];
#include "types_ocl.c"
#include "common.c"
-#ifdef VECT_SIZE1
-#define COMPARE_M "check_multi_vect1_comp4.c"
-#endif
-
-#ifdef VECT_SIZE2
-#define COMPARE_M "check_multi_vect2_comp4.c"
-#endif
-
-#ifdef VECT_SIZE4
-#define COMPARE_M "check_multi_vect4_comp4.c"
-#endif
+#define COMPARE_S "check_single_comp4.c"
+#define COMPARE_M "check_multi_comp4.c"
__constant u32 te0[256] =
{
u32 t1[4];
- t1[0] = swap_workaround (w0[0]);
- t1[1] = swap_workaround (w0[1]);
- t1[2] = swap_workaround (w0[2]);
- t1[3] = swap_workaround (w0[3]);
+ t1[0] = swap32 (w0[0]);
+ t1[1] = swap32 (w0[1]);
+ t1[2] = swap32 (w0[2]);
+ t1[3] = swap32 (w0[3]);
u32 t2[4];
- t2[0] = swap_workaround (w1[0]);
- t2[1] = swap_workaround (w1[1]);
- t2[2] = swap_workaround (w1[2]);
- t2[3] = swap_workaround (w1[3]);
+ t2[0] = swap32 (w1[0]);
+ t2[1] = swap32 (w1[1]);
+ t2[2] = swap32 (w1[2]);
+ t2[3] = swap32 (w1[3]);
u32 t3[4];
- t3[0] = swap_workaround (w2[0]);
- t3[1] = swap_workaround (w2[1]);
+ t3[0] = swap32 (w2[0]);
+ t3[1] = swap32 (w2[1]);
t3[2] = 0;
t3[3] = (salt_len + (pw_len * 2)) * 8;
for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++)
{
- w0[0] = swap_workaround (j);
+ w0[0] = swap32 (j);
u32 digest[5];
#include "types_ocl.c"
#include "common.c"
-#ifdef VECT_SIZE1
-#define COMPARE_M "check_multi_vect1_comp4.c"
-#endif
-
-#ifdef VECT_SIZE2
-#define COMPARE_M "check_multi_vect2_comp4.c"
-#endif
-
-#ifdef VECT_SIZE4
-#define COMPARE_M "check_multi_vect4_comp4.c"
-#endif
+#define COMPARE_S "check_single_comp4.c"
+#define COMPARE_M "check_multi_comp4.c"
__constant u32 te0[256] =
{
u32 t1[4];
- t1[0] = swap_workaround (w0[0]);
- t1[1] = swap_workaround (w0[1]);
- t1[2] = swap_workaround (w0[2]);
- t1[3] = swap_workaround (w0[3]);
+ t1[0] = swap32 (w0[0]);
+ t1[1] = swap32 (w0[1]);
+ t1[2] = swap32 (w0[2]);
+ t1[3] = swap32 (w0[3]);
u32 t2[4];
- t2[0] = swap_workaround (w1[0]);
- t2[1] = swap_workaround (w1[1]);
- t2[2] = swap_workaround (w1[2]);
- t2[3] = swap_workaround (w1[3]);
+ t2[0] = swap32 (w1[0]);
+ t2[1] = swap32 (w1[1]);
+ t2[2] = swap32 (w1[2]);
+ t2[3] = swap32 (w1[3]);
u32 t3[4];
- t3[0] = swap_workaround (w2[0]);
- t3[1] = swap_workaround (w2[1]);
+ t3[0] = swap32 (w2[0]);
+ t3[1] = swap32 (w2[1]);
t3[2] = 0;
t3[3] = (salt_len + (pw_len * 2)) * 8;
for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++)
{
- w0[0] = swap_workaround (j);
+ w0[0] = swap32 (j);
u32 digest[5];
#include "types_ocl.c"
#include "common.c"
-#ifdef VECT_SIZE1
-#define COMPARE_M "check_multi_vect1_comp4.c"
-#endif
-
-#ifdef VECT_SIZE2
-#define COMPARE_M "check_multi_vect2_comp4.c"
-#endif
-
-#ifdef VECT_SIZE4
-#define COMPARE_M "check_multi_vect4_comp4.c"
-#endif
+#define COMPARE_S "check_single_comp4.c"
+#define COMPARE_M "check_multi_comp4.c"
__constant u32 te0[256] =
{
t0[0] = (u64) salt_buf[0] << 32 | salt_buf[1];
t0[1] = (u64) salt_buf[2] << 32 | salt_buf[3];
- t0[2] = (u64) swap_workaround (w0[0]) << 32 | swap_workaround (w0[1]);
- t0[3] = (u64) swap_workaround (w0[2]) << 32 | swap_workaround (w0[3]);
+ t0[2] = (u64) swap32 (w0[0]) << 32 | swap32 (w0[1]);
+ t0[3] = (u64) swap32 (w0[2]) << 32 | swap32 (w0[3]);
u64 t1[4];
- t1[0] = (u64) swap_workaround (w1[0]) << 32 | swap_workaround (w1[1]);
- t1[1] = (u64) swap_workaround (w1[2]) << 32 | swap_workaround (w1[3]);
- t1[2] = (u64) swap_workaround (w2[0]) << 32 | swap_workaround (w2[1]);
- t1[3] = (u64) swap_workaround (w2[2]) << 32 | swap_workaround (w2[3]);
+ t1[0] = (u64) swap32 (w1[0]) << 32 | swap32 (w1[1]);
+ t1[1] = (u64) swap32 (w1[2]) << 32 | swap32 (w1[3]);
+ t1[2] = (u64) swap32 (w2[0]) << 32 | swap32 (w2[1]);
+ t1[3] = (u64) swap32 (w2[2]) << 32 | swap32 (w2[3]);
u64 t2[4];
- t2[0] = (u64) swap_workaround (w3[0]) << 32 | swap_workaround (w3[1]);
- t2[1] = (u64) swap_workaround (w3[2]) << 32 | swap_workaround (w3[3]);
+ t2[0] = (u64) swap32 (w3[0]) << 32 | swap32 (w3[1]);
+ t2[1] = (u64) swap32 (w3[2]) << 32 | swap32 (w3[3]);
t2[2] = 0;
t2[3] = 0;
for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++)
{
- w0[0] = (u64) swap_workaround (j) << 32 | w0[0] & 0xffffffff;
+ w0[0] = (u64) swap32 (j) << 32 | w0[0] & 0xffffffff;
u64 digest[8];
w0_t[1] = salt_buf[1];
w0_t[2] = salt_buf[2];
w0_t[3] = salt_buf[3];
- w1_t[0] = swap_workaround (w1_t[0]);
- w1_t[1] = swap_workaround (w1_t[1]);
- w1_t[2] = swap_workaround (w1_t[2]);
- w1_t[3] = swap_workaround (w1_t[3]);
- w2_t[0] = swap_workaround (w2_t[0]);
- w2_t[1] = swap_workaround (w2_t[1]);
- w2_t[2] = swap_workaround (w2_t[2]);
- w2_t[3] = swap_workaround (w2_t[3]);
- w3_t[0] = swap_workaround (w3_t[0]);
- w3_t[1] = swap_workaround (w3_t[1]);
+ w1_t[0] = swap32 (w1_t[0]);
+ w1_t[1] = swap32 (w1_t[1]);
+ w1_t[2] = swap32 (w1_t[2]);
+ w1_t[3] = swap32 (w1_t[3]);
+ w2_t[0] = swap32 (w2_t[0]);
+ w2_t[1] = swap32 (w2_t[1]);
+ w2_t[2] = swap32 (w2_t[2]);
+ w2_t[3] = swap32 (w2_t[3]);
+ w3_t[0] = swap32 (w3_t[0]);
+ w3_t[1] = swap32 (w3_t[1]);
w3_t[2] = 0;
w3_t[3] = pw_salt_len * 8;
u32 key[4];
- key[0] = swap_workaround (digest[0]);
- key[1] = swap_workaround (digest[1]);
- key[2] = swap_workaround (digest[2]);
- key[3] = swap_workaround (digest[3]);
+ key[0] = swap32 (digest[0]);
+ key[1] = swap32 (digest[1]);
+ key[2] = swap32 (digest[2]);
+ key[3] = swap32 (digest[3]);
if (version == 3)
{
u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out);
- w0_t[0] = swap_workaround (out[0]);
- w0_t[1] = swap_workaround (out[1]);
- w0_t[2] = swap_workaround (out[2]);
- w0_t[3] = swap_workaround (out[3]);
+ w0_t[0] = swap32 (out[0]);
+ w0_t[1] = swap32 (out[1]);
+ w0_t[2] = swap32 (out[2]);
+ w0_t[3] = swap32 (out[3]);
w1_t[0] = 0x80000000;
w1_t[1] = 0;
w1_t[2] = 0;
sha1_transform (w0_t, w1_t, w2_t, w3_t, digest);
- digest[0] = swap_workaround (digest[0]);
- digest[1] = swap_workaround (digest[1]);
- digest[2] = swap_workaround (digest[2]);
- digest[3] = swap_workaround (digest[3]);
+ digest[0] = swap32 (digest[0]);
+ digest[1] = swap32 (digest[1]);
+ digest[2] = swap32 (digest[2]);
+ digest[3] = swap32 (digest[3]);
rc4_next_16 (rc4_key, 16, j, digest, out);
w0_t[1] = salt_buf[1];
w0_t[2] = salt_buf[2];
w0_t[3] = salt_buf[3];
- w1_t[0] = swap_workaround (w1_t[0]);
- w1_t[1] = swap_workaround (w1_t[1]);
- w1_t[2] = swap_workaround (w1_t[2]);
- w1_t[3] = swap_workaround (w1_t[3]);
- w2_t[0] = swap_workaround (w2_t[0]);
- w2_t[1] = swap_workaround (w2_t[1]);
- w2_t[2] = swap_workaround (w2_t[2]);
- w2_t[3] = swap_workaround (w2_t[3]);
- w3_t[0] = swap_workaround (w3_t[0]);
- w3_t[1] = swap_workaround (w3_t[1]);
+ w1_t[0] = swap32 (w1_t[0]);
+ w1_t[1] = swap32 (w1_t[1]);
+ w1_t[2] = swap32 (w1_t[2]);
+ w1_t[3] = swap32 (w1_t[3]);
+ w2_t[0] = swap32 (w2_t[0]);
+ w2_t[1] = swap32 (w2_t[1]);
+ w2_t[2] = swap32 (w2_t[2]);
+ w2_t[3] = swap32 (w2_t[3]);
+ w3_t[0] = swap32 (w3_t[0]);
+ w3_t[1] = swap32 (w3_t[1]);
w3_t[2] = 0;
w3_t[3] = pw_salt_len * 8;
u32 key[4];
- key[0] = swap_workaround (digest[0]);
- key[1] = swap_workaround (digest[1]);
- key[2] = swap_workaround (digest[2]);
- key[3] = swap_workaround (digest[3]);
+ key[0] = swap32 (digest[0]);
+ key[1] = swap32 (digest[1]);
+ key[2] = swap32 (digest[2]);
+ key[3] = swap32 (digest[3]);
if (version == 3)
{
u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out);
- w0_t[0] = swap_workaround (out[0]);
- w0_t[1] = swap_workaround (out[1]);
- w0_t[2] = swap_workaround (out[2]);
- w0_t[3] = swap_workaround (out[3]);
+ w0_t[0] = swap32 (out[0]);
+ w0_t[1] = swap32 (out[1]);
+ w0_t[2] = swap32 (out[2]);
+ w0_t[3] = swap32 (out[3]);
w1_t[0] = 0x80000000;
w1_t[1] = 0;
w1_t[2] = 0;
sha1_transform (w0_t, w1_t, w2_t, w3_t, digest);
- digest[0] = swap_workaround (digest[0]);
- digest[1] = swap_workaround (digest[1]);
- digest[2] = swap_workaround (digest[2]);
- digest[3] = swap_workaround (digest[3]);
+ digest[0] = swap32 (digest[0]);
+ digest[1] = swap32 (digest[1]);
+ digest[2] = swap32 (digest[2]);
+ digest[3] = swap32 (digest[3]);
rc4_next_16 (rc4_key, 16, j, digest, out);
w0_t[1] = salt_buf[1];
w0_t[2] = salt_buf[2];
w0_t[3] = salt_buf[3];
- w1_t[0] = swap_workaround (w1_t[0]);
- w1_t[1] = swap_workaround (w1_t[1]);
- w1_t[2] = swap_workaround (w1_t[2]);
- w1_t[3] = swap_workaround (w1_t[3]);
- w2_t[0] = swap_workaround (w2_t[0]);
- w2_t[1] = swap_workaround (w2_t[1]);
- w2_t[2] = swap_workaround (w2_t[2]);
- w2_t[3] = swap_workaround (w2_t[3]);
- w3_t[0] = swap_workaround (w3_t[0]);
- w3_t[1] = swap_workaround (w3_t[1]);
+ w1_t[0] = swap32 (w1_t[0]);
+ w1_t[1] = swap32 (w1_t[1]);
+ w1_t[2] = swap32 (w1_t[2]);
+ w1_t[3] = swap32 (w1_t[3]);
+ w2_t[0] = swap32 (w2_t[0]);
+ w2_t[1] = swap32 (w2_t[1]);
+ w2_t[2] = swap32 (w2_t[2]);
+ w2_t[3] = swap32 (w2_t[3]);
+ w3_t[0] = swap32 (w3_t[0]);
+ w3_t[1] = swap32 (w3_t[1]);
w3_t[2] = 0;
w3_t[3] = pw_salt_len * 8;
u32 key[4];
- key[0] = swap_workaround (digest[0]);
- key[1] = swap_workaround (digest[1]);
- key[2] = swap_workaround (digest[2]);
- key[3] = swap_workaround (digest[3]);
+ key[0] = swap32 (digest[0]);
+ key[1] = swap32 (digest[1]);
+ key[2] = swap32 (digest[2]);
+ key[3] = swap32 (digest[3]);
if (version == 3)
{
u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out);
- w0_t[0] = swap_workaround (out[0]);
- w0_t[1] = swap_workaround (out[1]);
- w0_t[2] = swap_workaround (out[2]);
- w0_t[3] = swap_workaround (out[3]);
+ w0_t[0] = swap32 (out[0]);
+ w0_t[1] = swap32 (out[1]);
+ w0_t[2] = swap32 (out[2]);
+ w0_t[3] = swap32 (out[3]);
w1_t[0] = 0x80000000;
w1_t[1] = 0;
w1_t[2] = 0;
sha1_transform (w0_t, w1_t, w2_t, w3_t, digest);
- digest[0] = swap_workaround (digest[0]);
- digest[1] = swap_workaround (digest[1]);
- digest[2] = swap_workaround (digest[2]);
- digest[3] = swap_workaround (digest[3]);
+ digest[0] = swap32 (digest[0]);
+ digest[1] = swap32 (digest[1]);
+ digest[2] = swap32 (digest[2]);
+ digest[3] = swap32 (digest[3]);
rc4_next_16 (rc4_key, 16, j, digest, out);
w0_t[1] = salt_buf[1];
w0_t[2] = salt_buf[2];
w0_t[3] = salt_buf[3];
- w1_t[0] = swap_workaround (w1_t[0]);
- w1_t[1] = swap_workaround (w1_t[1]);
- w1_t[2] = swap_workaround (w1_t[2]);
- w1_t[3] = swap_workaround (w1_t[3]);
- w2_t[0] = swap_workaround (w2_t[0]);
- w2_t[1] = swap_workaround (w2_t[1]);
- w2_t[2] = swap_workaround (w2_t[2]);
- w2_t[3] = swap_workaround (w2_t[3]);
- w3_t[0] = swap_workaround (w3_t[0]);
- w3_t[1] = swap_workaround (w3_t[1]);
+ w1_t[0] = swap32 (w1_t[0]);
+ w1_t[1] = swap32 (w1_t[1]);
+ w1_t[2] = swap32 (w1_t[2]);
+ w1_t[3] = swap32 (w1_t[3]);
+ w2_t[0] = swap32 (w2_t[0]);
+ w2_t[1] = swap32 (w2_t[1]);
+ w2_t[2] = swap32 (w2_t[2]);
+ w2_t[3] = swap32 (w2_t[3]);
+ w3_t[0] = swap32 (w3_t[0]);
+ w3_t[1] = swap32 (w3_t[1]);
w3_t[2] = 0;
w3_t[3] = pw_salt_len * 8;
u32 key[4];
- key[0] = swap_workaround (digest[0]);
- key[1] = swap_workaround (digest[1]);
- key[2] = swap_workaround (digest[2]);
- key[3] = swap_workaround (digest[3]);
+ key[0] = swap32 (digest[0]);
+ key[1] = swap32 (digest[1]);
+ key[2] = swap32 (digest[2]);
+ key[3] = swap32 (digest[3]);
if (version == 3)
{
u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out);
- w0_t[0] = swap_workaround (out[0]);
- w0_t[1] = swap_workaround (out[1]);
- w0_t[2] = swap_workaround (out[2]);
- w0_t[3] = swap_workaround (out[3]);
+ w0_t[0] = swap32 (out[0]);
+ w0_t[1] = swap32 (out[1]);
+ w0_t[2] = swap32 (out[2]);
+ w0_t[3] = swap32 (out[3]);
w1_t[0] = 0x80000000;
w1_t[1] = 0;
w1_t[2] = 0;
sha1_transform (w0_t, w1_t, w2_t, w3_t, digest);
- digest[0] = swap_workaround (digest[0]);
- digest[1] = swap_workaround (digest[1]);
- digest[2] = swap_workaround (digest[2]);
- digest[3] = swap_workaround (digest[3]);
+ digest[0] = swap32 (digest[0]);
+ digest[1] = swap32 (digest[1]);
+ digest[2] = swap32 (digest[2]);
+ digest[3] = swap32 (digest[3]);
rc4_next_16 (rc4_key, 16, j, digest, out);
u32 key[4];
- key[0] = swap_workaround (digest[0]);
- key[1] = swap_workaround (digest[1]);
- key[2] = swap_workaround (digest[2]);
- key[3] = swap_workaround (digest[3]);
+ key[0] = swap32 (digest[0]);
+ key[1] = swap32 (digest[1]);
+ key[2] = swap32 (digest[2]);
+ key[3] = swap32 (digest[3]);
if (version == 3)
{
u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out);
- w0_t[0] = swap_workaround (out[0]);
- w0_t[1] = swap_workaround (out[1]);
- w0_t[2] = swap_workaround (out[2]);
- w0_t[3] = swap_workaround (out[3]);
+ w0_t[0] = swap32 (out[0]);
+ w0_t[1] = swap32 (out[1]);
+ w0_t[2] = swap32 (out[2]);
+ w0_t[3] = swap32 (out[3]);
w1_t[0] = 0x80000000;
w1_t[1] = 0;
w1_t[2] = 0;
sha1_transform (w0_t, w1_t, w2_t, w3_t, digest);
- digest[0] = swap_workaround (digest[0]);
- digest[1] = swap_workaround (digest[1]);
- digest[2] = swap_workaround (digest[2]);
- digest[3] = swap_workaround (digest[3]);
+ digest[0] = swap32 (digest[0]);
+ digest[1] = swap32 (digest[1]);
+ digest[2] = swap32 (digest[2]);
+ digest[3] = swap32 (digest[3]);
rc4_next_16 (rc4_key, 16, j, digest, out);
u32 key[4];
- key[0] = swap_workaround (digest[0]);
- key[1] = swap_workaround (digest[1]);
- key[2] = swap_workaround (digest[2]);
- key[3] = swap_workaround (digest[3]);
+ key[0] = swap32 (digest[0]);
+ key[1] = swap32 (digest[1]);
+ key[2] = swap32 (digest[2]);
+ key[3] = swap32 (digest[3]);
if (version == 3)
{
u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out);
- w0_t[0] = swap_workaround (out[0]);
- w0_t[1] = swap_workaround (out[1]);
- w0_t[2] = swap_workaround (out[2]);
- w0_t[3] = swap_workaround (out[3]);
+ w0_t[0] = swap32 (out[0]);
+ w0_t[1] = swap32 (out[1]);
+ w0_t[2] = swap32 (out[2]);
+ w0_t[3] = swap32 (out[3]);
w1_t[0] = 0x80000000;
w1_t[1] = 0;
w1_t[2] = 0;
sha1_transform (w0_t, w1_t, w2_t, w3_t, digest);
- digest[0] = swap_workaround (digest[0]);
- digest[1] = swap_workaround (digest[1]);
- digest[2] = swap_workaround (digest[2]);
- digest[3] = swap_workaround (digest[3]);
+ digest[0] = swap32 (digest[0]);
+ digest[1] = swap32 (digest[1]);
+ digest[2] = swap32 (digest[2]);
+ digest[3] = swap32 (digest[3]);
rc4_next_16 (rc4_key, 16, j, digest, out);
u32 w2_t[4];
u32 w3_t[4];
- w0_t[0] = swap_workaround (out[0]);
- w0_t[1] = swap_workaround (out[1]);
- w0_t[2] = swap_workaround (out[2]);
- w0_t[3] = swap_workaround (out[3]);
+ w0_t[0] = swap32 (out[0]);
+ w0_t[1] = swap32 (out[1]);
+ w0_t[2] = swap32 (out[2]);
+ w0_t[3] = swap32 (out[3]);
w1_t[0] = 0x80000000;
w1_t[1] = 0;
w1_t[2] = 0;
sha1_transform (w0_t, w1_t, w2_t, w3_t, digest);
- digest[0] = swap_workaround (digest[0]);
- digest[1] = swap_workaround (digest[1]);
- digest[2] = swap_workaround (digest[2]);
- digest[3] = swap_workaround (digest[3]);
+ digest[0] = swap32 (digest[0]);
+ digest[1] = swap32 (digest[1]);
+ digest[2] = swap32 (digest[2]);
+ digest[3] = swap32 (digest[3]);
rc4_next_16 (rc4_key, 16, j, digest, out);
u32 w2_t[4];
u32 w3_t[4];
- w0_t[0] = swap_workaround (out[0]);
- w0_t[1] = swap_workaround (out[1]);
- w0_t[2] = swap_workaround (out[2]);
- w0_t[3] = swap_workaround (out[3]);
+ w0_t[0] = swap32 (out[0]);
+ w0_t[1] = swap32 (out[1]);
+ w0_t[2] = swap32 (out[2]);
+ w0_t[3] = swap32 (out[3]);
w1_t[0] = 0x80000000;
w1_t[1] = 0;
w1_t[2] = 0;
sha1_transform (w0_t, w1_t, w2_t, w3_t, digest);
- digest[0] = swap_workaround (digest[0]);
- digest[1] = swap_workaround (digest[1]);
- digest[2] = swap_workaround (digest[2]);
- digest[3] = swap_workaround (digest[3]);
+ digest[0] = swap32 (digest[0]);
+ digest[1] = swap32 (digest[1]);
+ digest[2] = swap32 (digest[2]);
+ digest[3] = swap32 (digest[3]);
rc4_next_16 (rc4_key, 16, j, digest, out);
u32 w2_t[4];
u32 w3_t[4];
- w0_t[0] = swap_workaround (out[0]);
- w0_t[1] = swap_workaround (out[1]);
- w0_t[2] = swap_workaround (out[2]);
- w0_t[3] = swap_workaround (out[3]);
+ w0_t[0] = swap32 (out[0]);
+ w0_t[1] = swap32 (out[1]);
+ w0_t[2] = swap32 (out[2]);
+ w0_t[3] = swap32 (out[3]);
w1_t[0] = 0x80000000;
w1_t[1] = 0;
w1_t[2] = 0;
sha1_transform (w0_t, w1_t, w2_t, w3_t, digest);
- digest[0] = swap_workaround (digest[0]);
- digest[1] = swap_workaround (digest[1]);
- digest[2] = swap_workaround (digest[2]);
- digest[3] = swap_workaround (digest[3]);
+ digest[0] = swap32 (digest[0]);
+ digest[1] = swap32 (digest[1]);
+ digest[2] = swap32 (digest[2]);
+ digest[3] = swap32 (digest[3]);
rc4_next_16 (rc4_key, 16, j, digest, out);
u32 w2_t[4];
u32 w3_t[4];
- w0_t[0] = swap_workaround (out[0]);
- w0_t[1] = swap_workaround (out[1]);
- w0_t[2] = swap_workaround (out[2]);
- w0_t[3] = swap_workaround (out[3]);
+ w0_t[0] = swap32 (out[0]);
+ w0_t[1] = swap32 (out[1]);
+ w0_t[2] = swap32 (out[2]);
+ w0_t[3] = swap32 (out[3]);
w1_t[0] = 0x80000000;
w1_t[1] = 0;
w1_t[2] = 0;
sha1_transform (w0_t, w1_t, w2_t, w3_t, digest);
- digest[0] = swap_workaround (digest[0]);
- digest[1] = swap_workaround (digest[1]);
- digest[2] = swap_workaround (digest[2]);
- digest[3] = swap_workaround (digest[3]);
+ digest[0] = swap32 (digest[0]);
+ digest[1] = swap32 (digest[1]);
+ digest[2] = swap32 (digest[2]);
+ digest[3] = swap32 (digest[3]);
rc4_next_16 (rc4_key, 16, j, digest, out);
u32 w2_t[4];
u32 w3_t[4];
- w0_t[0] = swap_workaround (out[0]);
- w0_t[1] = swap_workaround (out[1]);
- w0_t[2] = swap_workaround (out[2]);
- w0_t[3] = swap_workaround (out[3]);
+ w0_t[0] = swap32 (out[0]);
+ w0_t[1] = swap32 (out[1]);
+ w0_t[2] = swap32 (out[2]);
+ w0_t[3] = swap32 (out[3]);
w1_t[0] = 0x80000000;
w1_t[1] = 0;
w1_t[2] = 0;
sha1_transform (w0_t, w1_t, w2_t, w3_t, digest);
- digest[0] = swap_workaround (digest[0]);
- digest[1] = swap_workaround (digest[1]);
- digest[2] = swap_workaround (digest[2]);
- digest[3] = swap_workaround (digest[3]);
+ digest[0] = swap32 (digest[0]);
+ digest[1] = swap32 (digest[1]);
+ digest[2] = swap32 (digest[2]);
+ digest[3] = swap32 (digest[3]);
rc4_next_16 (rc4_key, 16, j, digest, out);
u32 w2_t[4];
u32 w3_t[4];
- w0_t[0] = swap_workaround (out[0]);
- w0_t[1] = swap_workaround (out[1]);
- w0_t[2] = swap_workaround (out[2]);
- w0_t[3] = swap_workaround (out[3]);
+ w0_t[0] = swap32 (out[0]);
+ w0_t[1] = swap32 (out[1]);
+ w0_t[2] = swap32 (out[2]);
+ w0_t[3] = swap32 (out[3]);
w1_t[0] = 0x80000000;
w1_t[1] = 0;
w1_t[2] = 0;
sha1_transform (w0_t, w1_t, w2_t, w3_t, digest);
- digest[0] = swap_workaround (digest[0]);
- digest[1] = swap_workaround (digest[1]);
- digest[2] = swap_workaround (digest[2]);
- digest[3] = swap_workaround (digest[3]);
+ digest[0] = swap32 (digest[0]);
+ digest[1] = swap32 (digest[1]);
+ digest[2] = swap32 (digest[2]);
+ digest[3] = swap32 (digest[3]);
rc4_next_16 (rc4_key, 16, j, digest, out);
w0_t[1] = salt_buf[1];
w0_t[2] = salt_buf[2];
w0_t[3] = salt_buf[3];
- w1_t[0] = swap_workaround (w1_t[0]);
- w1_t[1] = swap_workaround (w1_t[1]);
- w1_t[2] = swap_workaround (w1_t[2]);
- w1_t[3] = swap_workaround (w1_t[3]);
- w2_t[0] = swap_workaround (w2_t[0]);
- w2_t[1] = swap_workaround (w2_t[1]);
- w2_t[2] = swap_workaround (w2_t[2]);
- w2_t[3] = swap_workaround (w2_t[3]);
- w3_t[0] = swap_workaround (w3_t[0]);
- w3_t[1] = swap_workaround (w3_t[1]);
+ w1_t[0] = swap32 (w1_t[0]);
+ w1_t[1] = swap32 (w1_t[1]);
+ w1_t[2] = swap32 (w1_t[2]);
+ w1_t[3] = swap32 (w1_t[3]);
+ w2_t[0] = swap32 (w2_t[0]);
+ w2_t[1] = swap32 (w2_t[1]);
+ w2_t[2] = swap32 (w2_t[2]);
+ w2_t[3] = swap32 (w2_t[3]);
+ w3_t[0] = swap32 (w3_t[0]);
+ w3_t[1] = swap32 (w3_t[1]);
w3_t[2] = 0;
w3_t[3] = pw_salt_len * 8;
sha1_transform (w0_t, w1_t, w2_t, w3_t, digest);
- u32 a = swap_workaround (digest[0]);
- u32 b = swap_workaround (digest[1]) & 0xff;
+ u32 a = swap32 (digest[0]);
+ u32 b = swap32 (digest[1]) & 0xff;
const u32 r0 = a;
const u32 r1 = b;
w0_t[1] = salt_buf[1];
w0_t[2] = salt_buf[2];
w0_t[3] = salt_buf[3];
- w1_t[0] = swap_workaround (w1_t[0]);
- w1_t[1] = swap_workaround (w1_t[1]);
- w1_t[2] = swap_workaround (w1_t[2]);
- w1_t[3] = swap_workaround (w1_t[3]);
- w2_t[0] = swap_workaround (w2_t[0]);
- w2_t[1] = swap_workaround (w2_t[1]);
- w2_t[2] = swap_workaround (w2_t[2]);
- w2_t[3] = swap_workaround (w2_t[3]);
- w3_t[0] = swap_workaround (w3_t[0]);
- w3_t[1] = swap_workaround (w3_t[1]);
+ w1_t[0] = swap32 (w1_t[0]);
+ w1_t[1] = swap32 (w1_t[1]);
+ w1_t[2] = swap32 (w1_t[2]);
+ w1_t[3] = swap32 (w1_t[3]);
+ w2_t[0] = swap32 (w2_t[0]);
+ w2_t[1] = swap32 (w2_t[1]);
+ w2_t[2] = swap32 (w2_t[2]);
+ w2_t[3] = swap32 (w2_t[3]);
+ w3_t[0] = swap32 (w3_t[0]);
+ w3_t[1] = swap32 (w3_t[1]);
w3_t[2] = 0;
w3_t[3] = pw_salt_len * 8;
sha1_transform (w0_t, w1_t, w2_t, w3_t, digest);
- u32 a = swap_workaround (digest[0]);
- u32 b = swap_workaround (digest[1]) & 0xff;
+ u32 a = swap32 (digest[0]);
+ u32 b = swap32 (digest[1]) & 0xff;
const u32 r0 = a;
const u32 r1 = b;
w0_t[1] = salt_buf[1];
w0_t[2] = salt_buf[2];
w0_t[3] = salt_buf[3];
- w1_t[0] = swap_workaround (w1_t[0]);
- w1_t[1] = swap_workaround (w1_t[1]);
- w1_t[2] = swap_workaround (w1_t[2]);
- w1_t[3] = swap_workaround (w1_t[3]);
- w2_t[0] = swap_workaround (w2_t[0]);
- w2_t[1] = swap_workaround (w2_t[1]);
- w2_t[2] = swap_workaround (w2_t[2]);
- w2_t[3] = swap_workaround (w2_t[3]);
- w3_t[0] = swap_workaround (w3_t[0]);
- w3_t[1] = swap_workaround (w3_t[1]);
+ w1_t[0] = swap32 (w1_t[0]);
+ w1_t[1] = swap32 (w1_t[1]);
+ w1_t[2] = swap32 (w1_t[2]);
+ w1_t[3] = swap32 (w1_t[3]);
+ w2_t[0] = swap32 (w2_t[0]);
+ w2_t[1] = swap32 (w2_t[1]);
+ w2_t[2] = swap32 (w2_t[2]);
+ w2_t[3] = swap32 (w2_t[3]);
+ w3_t[0] = swap32 (w3_t[0]);
+ w3_t[1] = swap32 (w3_t[1]);
w3_t[2] = 0;
w3_t[3] = pw_salt_len * 8;
sha1_transform (w0_t, w1_t, w2_t, w3_t, digest);
- u32 a = swap_workaround (digest[0]);
- u32 b = swap_workaround (digest[1]) & 0xff;
+ u32 a = swap32 (digest[0]);
+ u32 b = swap32 (digest[1]) & 0xff;
const u32 r0 = a;
const u32 r1 = b;
w0_t[1] = salt_buf[1];
w0_t[2] = salt_buf[2];
w0_t[3] = salt_buf[3];
- w1_t[0] = swap_workaround (w1_t[0]);
- w1_t[1] = swap_workaround (w1_t[1]);
- w1_t[2] = swap_workaround (w1_t[2]);
- w1_t[3] = swap_workaround (w1_t[3]);
- w2_t[0] = swap_workaround (w2_t[0]);
- w2_t[1] = swap_workaround (w2_t[1]);
- w2_t[2] = swap_workaround (w2_t[2]);
- w2_t[3] = swap_workaround (w2_t[3]);
- w3_t[0] = swap_workaround (w3_t[0]);
- w3_t[1] = swap_workaround (w3_t[1]);
+ w1_t[0] = swap32 (w1_t[0]);
+ w1_t[1] = swap32 (w1_t[1]);
+ w1_t[2] = swap32 (w1_t[2]);
+ w1_t[3] = swap32 (w1_t[3]);
+ w2_t[0] = swap32 (w2_t[0]);
+ w2_t[1] = swap32 (w2_t[1]);
+ w2_t[2] = swap32 (w2_t[2]);
+ w2_t[3] = swap32 (w2_t[3]);
+ w3_t[0] = swap32 (w3_t[0]);
+ w3_t[1] = swap32 (w3_t[1]);
w3_t[2] = 0;
w3_t[3] = pw_salt_len * 8;
sha1_transform (w0_t, w1_t, w2_t, w3_t, digest);
- u32 a = swap_workaround (digest[0]);
- u32 b = swap_workaround (digest[1]) & 0xff;
+ u32 a = swap32 (digest[0]);
+ u32 b = swap32 (digest[1]) & 0xff;
const u32 r0 = a;
const u32 r1 = b;
sha1_transform (w0_t, w1_t, w2_t, w3_t, digest);
- u32 a = swap_workaround (digest[0]);
- u32 b = swap_workaround (digest[1]) & 0xff;
+ u32 a = swap32 (digest[0]);
+ u32 b = swap32 (digest[1]) & 0xff;
const u32 r0 = a;
const u32 r1 = b;
sha1_transform (w0_t, w1_t, w2_t, w3_t, digest);
- u32 a = swap_workaround (digest[0]);
- u32 b = swap_workaround (digest[1]) & 0xff;
+ u32 a = swap32 (digest[0]);
+ u32 b = swap32 (digest[1]) & 0xff;
const u32 r0 = a;
const u32 r1 = b;
#include "types_ocl.c"
#include "common.c"
-#ifdef VECT_SIZE1
-#define COMPARE_M "check_multi_vect1_comp4.c"
-#endif
-
-#ifdef VECT_SIZE2
-#define COMPARE_M "check_multi_vect2_comp4.c"
-#endif
-
-#ifdef VECT_SIZE4
-#define COMPARE_M "check_multi_vect4_comp4.c"
-#endif
+#define COMPARE_S "check_single_comp4.c"
+#define COMPARE_M "check_multi_comp4.c"
static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5])
{
// swaps needed
- w0[0] = swap_workaround (w0[0]);
- w0[1] = swap_workaround (w0[1]);
- w0[2] = swap_workaround (w0[2]);
- w0[3] = swap_workaround (w0[3]);
+ w0[0] = swap32 (w0[0]);
+ w0[1] = swap32 (w0[1]);
+ w0[2] = swap32 (w0[2]);
+ w0[3] = swap32 (w0[3]);
- w1[0] = swap_workaround (w1[0]);
- w1[1] = swap_workaround (w1[1]);
- w1[2] = swap_workaround (w1[2]);
- w1[3] = swap_workaround (w1[3]);
+ w1[0] = swap32 (w1[0]);
+ w1[1] = swap32 (w1[1]);
+ w1[2] = swap32 (w1[2]);
+ w1[3] = swap32 (w1[3]);
- w2[0] = swap_workaround (w2[0]);
- w2[1] = swap_workaround (w2[1]);
- w2[2] = swap_workaround (w2[2]);
- w2[3] = swap_workaround (w2[3]);
+ w2[0] = swap32 (w2[0]);
+ w2[1] = swap32 (w2[1]);
+ w2[2] = swap32 (w2[2]);
+ w2[3] = swap32 (w2[3]);
- w3[0] = swap_workaround (w3[0]);
- w3[1] = swap_workaround (w3[1]);
- w3[2] = swap_workaround (w3[2]);
+ w3[0] = swap32 (w3[0]);
+ w3[1] = swap32 (w3[1]);
+ w3[2] = swap32 (w3[2]);
w3[3] = pw_salt_len * 8;
u32 digest[5];
u32 word_buf0[4];
- word_buf0[0] = swap_workaround (pws[gid].i[0]);
- word_buf0[1] = swap_workaround (pws[gid].i[1]);
- word_buf0[2] = swap_workaround (pws[gid].i[2]);
- word_buf0[3] = swap_workaround (pws[gid].i[3]);
+ word_buf0[0] = swap32 (pws[gid].i[0]);
+ word_buf0[1] = swap32 (pws[gid].i[1]);
+ word_buf0[2] = swap32 (pws[gid].i[2]);
+ word_buf0[3] = swap32 (pws[gid].i[3]);
u32 word_buf1[4];
- word_buf1[0] = swap_workaround (pws[gid].i[4]);
- word_buf1[1] = swap_workaround (pws[gid].i[5]);
- word_buf1[2] = swap_workaround (pws[gid].i[6]);
- word_buf1[3] = swap_workaround (pws[gid].i[7]);
+ word_buf1[0] = swap32 (pws[gid].i[4]);
+ word_buf1[1] = swap32 (pws[gid].i[5]);
+ word_buf1[2] = swap32 (pws[gid].i[6]);
+ word_buf1[3] = swap32 (pws[gid].i[7]);
u32 word_buf2[2];
- word_buf2[0] = swap_workaround (pws[gid].i[8]);
- word_buf2[1] = swap_workaround (pws[gid].i[9]);
+ word_buf2[0] = swap32 (pws[gid].i[8]);
+ word_buf2[1] = swap32 (pws[gid].i[9]);
const u32 pw_len = pws[gid].pw_len;
#include "types_ocl.c"
#include "common.c"
-#ifdef VECT_SIZE1
-#define COMPARE_M "check_multi_vect1_comp4.c"
-#endif
-
-#ifdef VECT_SIZE4
-#define COMPARE_M "check_multi_vect4_comp4.c"
-#endif
+#define COMPARE_S "check_single_comp4.c"
+#define COMPARE_M "check_multi_comp4.c"
__constant u32 padding[8] =
{
#include "types_ocl.c"
#include "common.c"
-#ifdef VECT_SIZE1
-#define COMPARE_M "check_multi_vect1_comp4.c"
-#endif
-
-#ifdef VECT_SIZE4
-#define COMPARE_M "check_multi_vect4_comp4.c"
-#endif
+#define COMPARE_S "check_single_comp4.c"
+#define COMPARE_M "check_multi_comp4.c"
typedef struct
{
union
{
- u32 dgst32[16];
+ u32 dgst32[16];
u64 dgst64[8];
};
union
{
- u32 W32[32];
+ u32 W32[32];
u64 W64[16];
};
u32 g = digest[6];
u32 h = digest[7];
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
- u32 we_t = swap_workaround (w3[2]);
- u32 wf_t = swap_workaround (w3[3]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
+ u32 we_t = swap32 (w3[2]);
+ u32 wf_t = swap32 (w3[3]);
#define ROUND256_EXPAND() \
{ \
u64 g = digest[6];
u64 h = digest[7];
- u64 w0_t = swap_workaround (w0[0]);
- u64 w1_t = swap_workaround (w0[1]);
- u64 w2_t = swap_workaround (w0[2]);
- u64 w3_t = swap_workaround (w0[3]);
- u64 w4_t = swap_workaround (w1[0]);
- u64 w5_t = swap_workaround (w1[1]);
- u64 w6_t = swap_workaround (w1[2]);
- u64 w7_t = swap_workaround (w1[3]);
- u64 w8_t = swap_workaround (w2[0]);
- u64 w9_t = swap_workaround (w2[1]);
- u64 wa_t = swap_workaround (w2[2]);
- u64 wb_t = swap_workaround (w2[3]);
- u64 wc_t = swap_workaround (w3[0]);
- u64 wd_t = swap_workaround (w3[1]);
- u64 we_t = swap_workaround (w3[2]);
- u64 wf_t = swap_workaround (w3[3]);
+ u64 w0_t = swap32 (w0[0]);
+ u64 w1_t = swap32 (w0[1]);
+ u64 w2_t = swap32 (w0[2]);
+ u64 w3_t = swap32 (w0[3]);
+ u64 w4_t = swap32 (w1[0]);
+ u64 w5_t = swap32 (w1[1]);
+ u64 w6_t = swap32 (w1[2]);
+ u64 w7_t = swap32 (w1[3]);
+ u64 w8_t = swap32 (w2[0]);
+ u64 w9_t = swap32 (w2[1]);
+ u64 wa_t = swap32 (w2[2]);
+ u64 wb_t = swap32 (w2[3]);
+ u64 wc_t = swap32 (w3[0]);
+ u64 wd_t = swap32 (w3[1]);
+ u64 we_t = swap32 (w3[2]);
+ u64 wf_t = swap32 (w3[3]);
#define ROUND384_EXPAND() \
{ \
u64 g = digest[6];
u64 h = digest[7];
- u64 w0_t = swap_workaround (w0[0]);
- u64 w1_t = swap_workaround (w0[1]);
- u64 w2_t = swap_workaround (w0[2]);
- u64 w3_t = swap_workaround (w0[3]);
- u64 w4_t = swap_workaround (w1[0]);
- u64 w5_t = swap_workaround (w1[1]);
- u64 w6_t = swap_workaround (w1[2]);
- u64 w7_t = swap_workaround (w1[3]);
- u64 w8_t = swap_workaround (w2[0]);
- u64 w9_t = swap_workaround (w2[1]);
- u64 wa_t = swap_workaround (w2[2]);
- u64 wb_t = swap_workaround (w2[3]);
- u64 wc_t = swap_workaround (w3[0]);
- u64 wd_t = swap_workaround (w3[1]);
- u64 we_t = swap_workaround (w3[2]);
- u64 wf_t = swap_workaround (w3[3]);
+ u64 w0_t = swap32 (w0[0]);
+ u64 w1_t = swap32 (w0[1]);
+ u64 w2_t = swap32 (w0[2]);
+ u64 w3_t = swap32 (w0[3]);
+ u64 w4_t = swap32 (w1[0]);
+ u64 w5_t = swap32 (w1[1]);
+ u64 w6_t = swap32 (w1[2]);
+ u64 w7_t = swap32 (w1[3]);
+ u64 w8_t = swap32 (w2[0]);
+ u64 w9_t = swap32 (w2[1]);
+ u64 wa_t = swap32 (w2[2]);
+ u64 wb_t = swap32 (w2[3]);
+ u64 wc_t = swap32 (w3[0]);
+ u64 wd_t = swap32 (w3[1]);
+ u64 we_t = swap32 (w3[2]);
+ u64 wf_t = swap32 (w3[3]);
#define ROUND512_EXPAND() \
{ \
static void AES128_ExpandKey (u32 *userkey, u32 *rek, __local u32 s_te0[256], __local u32 s_te1[256], __local u32 s_te2[256], __local u32 s_te3[256], __local u32 s_te4[256])
{
- rek[0] = swap_workaround (userkey[0]);
- rek[1] = swap_workaround (userkey[1]);
- rek[2] = swap_workaround (userkey[2]);
- rek[3] = swap_workaround (userkey[3]);
+ rek[0] = swap32 (userkey[0]);
+ rek[1] = swap32 (userkey[1]);
+ rek[2] = swap32 (userkey[2]);
+ rek[3] = swap32 (userkey[3]);
for (u32 i = 0, j = 0; i < 10; i += 1, j += 4)
{
{
u32 in_swap[4];
- in_swap[0] = swap_workaround (in[0]);
- in_swap[1] = swap_workaround (in[1]);
- in_swap[2] = swap_workaround (in[2]);
- in_swap[3] = swap_workaround (in[3]);
+ in_swap[0] = swap32 (in[0]);
+ in_swap[1] = swap32 (in[1]);
+ in_swap[2] = swap32 (in[2]);
+ in_swap[3] = swap32 (in[3]);
u32 s0 = in_swap[0] ^ rek[0];
u32 s1 = in_swap[1] ^ rek[1];
^ (s_te4[(t2 >> 0) & 0xff] & 0x000000ff)
^ rek[43];
- out[0] = swap_workaround (out[0]);
- out[1] = swap_workaround (out[1]);
- out[2] = swap_workaround (out[2]);
- out[3] = swap_workaround (out[3]);
+ out[0] = swap32 (out[0]);
+ out[1] = swap32 (out[1]);
+ out[2] = swap32 (out[2]);
+ out[3] = swap32 (out[3]);
}
static void memcat8 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append[2])
u32 i;
+ #ifdef IS_AMD
for (i = 0; i < pd; i++) sc[idx++] = pw[i];
sc[idx++] = pw[i]
| amd_bytealign (bl[0], 0, pm4);
sc[idx++] = amd_bytealign (sc[0], bl[i - 1], pm4);
for (i = 1; i < 4; i++) sc[idx++] = amd_bytealign (sc[i], sc[i - 1], pm4);
sc[idx++] = amd_bytealign ( 0, sc[i - 1], pm4);
+ #endif
+
+ #ifdef IS_NV
+ int selector = (0x76543210 >> (pm4 * 4)) & 0xffff;
+
+ for (i = 0; i < pd; i++) sc[idx++] = pw[i];
+ sc[idx++] = pw[i]
+ | __byte_perm ( 0, bl[0], selector);
+ for (i = 1; i < bd; i++) sc[idx++] = __byte_perm (bl[i - 1], bl[i], selector);
+ sc[idx++] = __byte_perm (bl[i - 1], sc[0], selector);
+ for (i = 1; i < 4; i++) sc[idx++] = __byte_perm (sc[i - 1], sc[i], selector);
+ sc[idx++] = __byte_perm (sc[i - 1], 0, selector);
+ #endif
}
}
const u32 om = m % 4;
const u32 od = m / 4;
+ #ifdef IS_AMD
pt[0] = amd_bytealign (sc[od + 1], sc[od + 0], om);
pt[1] = amd_bytealign (sc[od + 2], sc[od + 1], om);
pt[2] = amd_bytealign (sc[od + 3], sc[od + 2], om);
pt[3] = amd_bytealign (sc[od + 4], sc[od + 3], om);
+ #endif
+
+ #ifdef IS_NV
+ int selector = (0x76543210 >> (om * 4)) & 0xffff;
+
+ pt[0] = __byte_perm (sc[od + 0], sc[od + 1], selector);
+ pt[1] = __byte_perm (sc[od + 1], sc[od + 2], selector);
+ pt[2] = __byte_perm (sc[od + 2], sc[od + 3], selector);
+ pt[3] = __byte_perm (sc[od + 3], sc[od + 4], selector);
+ #endif
}
static void make_w_with_offset (ctx_t *ctx, const u32 W_len, const u32 offset, const u32 *sc, const u32 pwbl_len, u32 *iv, const u32 *rek, __local u32 s_te0[256], __local u32 s_te1[256], __local u32 s_te2[256], __local u32 s_te3[256], __local u32 s_te4[256])
ctx->W64[12] = 0;
ctx->W64[13] = 0;
ctx->W64[14] = 0;
- ctx->W64[15] = swap_workaround ((u64) (final_len * 8));
+ ctx->W64[15] = swap32 ((u64) (final_len * 8));
ex = ctx->W64[7] >> 56;
break;
case BLSZ512: make_w_with_offset (ctx, 64, offset, sc, pwbl_len, iv, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
ctx->W64[12] = 0;
ctx->W64[13] = 0;
ctx->W64[14] = 0;
- ctx->W64[15] = swap_workaround ((u64) (final_len * 8));
+ ctx->W64[15] = swap32 ((u64) (final_len * 8));
ex = ctx->W64[7] >> 56;
break;
}
ctx->W32[12] = 0;
ctx->W32[13] = 0;
ctx->W32[14] = 0;
- ctx->W32[15] = swap_workaround (final_len * 8);
+ ctx->W32[15] = swap32 (final_len * 8);
break;
case BLSZ384: ex = ctx->W64[15] >> 56;
ctx->W64[ 0] = 0x80;
ctx->W64[12] = 0;
ctx->W64[13] = 0;
ctx->W64[14] = 0;
- ctx->W64[15] = swap_workaround ((u64) (final_len * 8));
+ ctx->W64[15] = swap32 ((u64) (final_len * 8));
break;
case BLSZ512: ex = ctx->W64[15] >> 56;
ctx->W64[ 0] = 0x80;
ctx->W64[12] = 0;
ctx->W64[13] = 0;
ctx->W64[14] = 0;
- ctx->W64[15] = swap_workaround ((u64) (final_len * 8));
+ ctx->W64[15] = swap32 ((u64) (final_len * 8));
break;
}
}
switch (ctx->dgst_len)
{
case BLSZ256: sha256_transform (&ctx->W32[ 0], &ctx->W32[ 4], &ctx->W32[ 8], &ctx->W32[12], ctx->dgst32);
- ctx->dgst32[ 0] = swap_workaround (ctx->dgst32[0]);
- ctx->dgst32[ 1] = swap_workaround (ctx->dgst32[1]);
- ctx->dgst32[ 2] = swap_workaround (ctx->dgst32[2]);
- ctx->dgst32[ 3] = swap_workaround (ctx->dgst32[3]);
- ctx->dgst32[ 4] = swap_workaround (ctx->dgst32[4]);
- ctx->dgst32[ 5] = swap_workaround (ctx->dgst32[5]);
- ctx->dgst32[ 6] = swap_workaround (ctx->dgst32[6]);
- ctx->dgst32[ 7] = swap_workaround (ctx->dgst32[7]);
+ ctx->dgst32[ 0] = swap32 (ctx->dgst32[0]);
+ ctx->dgst32[ 1] = swap32 (ctx->dgst32[1]);
+ ctx->dgst32[ 2] = swap32 (ctx->dgst32[2]);
+ ctx->dgst32[ 3] = swap32 (ctx->dgst32[3]);
+ ctx->dgst32[ 4] = swap32 (ctx->dgst32[4]);
+ ctx->dgst32[ 5] = swap32 (ctx->dgst32[5]);
+ ctx->dgst32[ 6] = swap32 (ctx->dgst32[6]);
+ ctx->dgst32[ 7] = swap32 (ctx->dgst32[7]);
ctx->dgst32[ 8] = 0;
ctx->dgst32[ 9] = 0;
ctx->dgst32[10] = 0;
ctx->dgst32[15] = 0;
break;
case BLSZ384: sha384_transform (&ctx->W64[ 0], &ctx->W64[ 4], &ctx->W64[ 8], &ctx->W64[12], ctx->dgst64);
- ctx->dgst64[0] = swap_workaround (ctx->dgst64[0]);
- ctx->dgst64[1] = swap_workaround (ctx->dgst64[1]);
- ctx->dgst64[2] = swap_workaround (ctx->dgst64[2]);
- ctx->dgst64[3] = swap_workaround (ctx->dgst64[3]);
- ctx->dgst64[4] = swap_workaround (ctx->dgst64[4]);
- ctx->dgst64[5] = swap_workaround (ctx->dgst64[5]);
+ ctx->dgst64[0] = swap32 (ctx->dgst64[0]);
+ ctx->dgst64[1] = swap32 (ctx->dgst64[1]);
+ ctx->dgst64[2] = swap32 (ctx->dgst64[2]);
+ ctx->dgst64[3] = swap32 (ctx->dgst64[3]);
+ ctx->dgst64[4] = swap32 (ctx->dgst64[4]);
+ ctx->dgst64[5] = swap32 (ctx->dgst64[5]);
ctx->dgst64[6] = 0;
ctx->dgst64[7] = 0;
break;
case BLSZ512: sha512_transform (&ctx->W64[ 0], &ctx->W64[ 4], &ctx->W64[ 8], &ctx->W64[12], ctx->dgst64);
- ctx->dgst64[0] = swap_workaround (ctx->dgst64[0]);
- ctx->dgst64[1] = swap_workaround (ctx->dgst64[1]);
- ctx->dgst64[2] = swap_workaround (ctx->dgst64[2]);
- ctx->dgst64[3] = swap_workaround (ctx->dgst64[3]);
- ctx->dgst64[4] = swap_workaround (ctx->dgst64[4]);
- ctx->dgst64[5] = swap_workaround (ctx->dgst64[5]);
- ctx->dgst64[6] = swap_workaround (ctx->dgst64[6]);
- ctx->dgst64[7] = swap_workaround (ctx->dgst64[7]);
+ ctx->dgst64[0] = swap32 (ctx->dgst64[0]);
+ ctx->dgst64[1] = swap32 (ctx->dgst64[1]);
+ ctx->dgst64[2] = swap32 (ctx->dgst64[2]);
+ ctx->dgst64[3] = swap32 (ctx->dgst64[3]);
+ ctx->dgst64[4] = swap32 (ctx->dgst64[4]);
+ ctx->dgst64[5] = swap32 (ctx->dgst64[5]);
+ ctx->dgst64[6] = swap32 (ctx->dgst64[6]);
+ ctx->dgst64[7] = swap32 (ctx->dgst64[7]);
break;
}
append_0x80_2x4 (block0, block1, block_len);
- block3[3] = swap_workaround (block_len * 8);
+ block3[3] = swap32 (block_len * 8);
u32 digest[8];
sha256_transform (block0, block1, block2, block3, digest);
- digest[0] = swap_workaround (digest[0]);
- digest[1] = swap_workaround (digest[1]);
- digest[2] = swap_workaround (digest[2]);
- digest[3] = swap_workaround (digest[3]);
- digest[4] = swap_workaround (digest[4]);
- digest[5] = swap_workaround (digest[5]);
- digest[6] = swap_workaround (digest[6]);
- digest[7] = swap_workaround (digest[7]);
+ digest[0] = swap32 (digest[0]);
+ digest[1] = swap32 (digest[1]);
+ digest[2] = swap32 (digest[2]);
+ digest[3] = swap32 (digest[3]);
+ digest[4] = swap32 (digest[4]);
+ digest[5] = swap32 (digest[5]);
+ digest[6] = swap32 (digest[6]);
+ digest[7] = swap32 (digest[7]);
tmps[gid].dgst32[0] = digest[0];
tmps[gid].dgst32[1] = digest[1];
* digest
*/
- const u32 r0 = swap_workaround (tmps[gid].dgst32[DGST_R0]);
- const u32 r1 = swap_workaround (tmps[gid].dgst32[DGST_R1]);
- const u32 r2 = swap_workaround (tmps[gid].dgst32[DGST_R2]);
- const u32 r3 = swap_workaround (tmps[gid].dgst32[DGST_R3]);
+ const u32 r0 = swap32 (tmps[gid].dgst32[DGST_R0]);
+ const u32 r1 = swap32 (tmps[gid].dgst32[DGST_R1]);
+ const u32 r2 = swap32 (tmps[gid].dgst32[DGST_R2]);
+ const u32 r3 = swap32 (tmps[gid].dgst32[DGST_R3]);
#define il_pos 0
u32 w2_t[4];
u32 w3_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
- w2_t[0] = swap_workaround (w2[0]);
- w2_t[1] = swap_workaround (w2[1]);
- w2_t[2] = swap_workaround (w2[2]);
- w2_t[3] = swap_workaround (w2[3]);
- w3_t[0] = swap_workaround (w3[0]);
- w3_t[1] = swap_workaround (w3[1]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
+ w2_t[0] = swap32 (w2[0]);
+ w2_t[1] = swap32 (w2[1]);
+ w2_t[2] = swap32 (w2[2]);
+ w2_t[3] = swap32 (w2[3]);
+ w3_t[0] = swap32 (w3[0]);
+ w3_t[1] = swap32 (w3[1]);
w3_t[2] = 0;
w3_t[3] = out_len * 8;
u32 w2_t[4];
u32 w3_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
- w2_t[0] = swap_workaround (w2[0]);
- w2_t[1] = swap_workaround (w2[1]);
- w2_t[2] = swap_workaround (w2[2]);
- w2_t[3] = swap_workaround (w2[3]);
- w3_t[0] = swap_workaround (w3[0]);
- w3_t[1] = swap_workaround (w3[1]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
+ w2_t[0] = swap32 (w2[0]);
+ w2_t[1] = swap32 (w2[1]);
+ w2_t[2] = swap32 (w2[2]);
+ w2_t[3] = swap32 (w2[3]);
+ w3_t[0] = swap32 (w3[0]);
+ w3_t[1] = swap32 (w3[1]);
w3_t[2] = 0;
w3_t[3] = out_len * 8;
u32 w2_t[4];
u32 w3_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
- w2_t[0] = swap_workaround (w2[0]);
- w2_t[1] = swap_workaround (w2[1]);
- w2_t[2] = swap_workaround (w2[2]);
- w2_t[3] = swap_workaround (w2[3]);
- w3_t[0] = swap_workaround (w3[0]);
- w3_t[1] = swap_workaround (w3[1]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
+ w2_t[0] = swap32 (w2[0]);
+ w2_t[1] = swap32 (w2[1]);
+ w2_t[2] = swap32 (w2[2]);
+ w2_t[3] = swap32 (w2[3]);
+ w3_t[0] = swap32 (w3[0]);
+ w3_t[1] = swap32 (w3[1]);
w3_t[2] = 0;
w3_t[3] = pw_len * 8;
u32 w2_t[4];
u32 w3_t[4];
- w0_t[0] = swap_workaround (w0[0]);
- w0_t[1] = swap_workaround (w0[1]);
- w0_t[2] = swap_workaround (w0[2]);
- w0_t[3] = swap_workaround (w0[3]);
- w1_t[0] = swap_workaround (w1[0]);
- w1_t[1] = swap_workaround (w1[1]);
- w1_t[2] = swap_workaround (w1[2]);
- w1_t[3] = swap_workaround (w1[3]);
- w2_t[0] = swap_workaround (w2[0]);
- w2_t[1] = swap_workaround (w2[1]);
- w2_t[2] = swap_workaround (w2[2]);
- w2_t[3] = swap_workaround (w2[3]);
- w3_t[0] = swap_workaround (w3[0]);
- w3_t[1] = swap_workaround (w3[1]);
+ w0_t[0] = swap32 (w0[0]);
+ w0_t[1] = swap32 (w0[1]);
+ w0_t[2] = swap32 (w0[2]);
+ w0_t[3] = swap32 (w0[3]);
+ w1_t[0] = swap32 (w1[0]);
+ w1_t[1] = swap32 (w1[1]);
+ w1_t[2] = swap32 (w1[2]);
+ w1_t[3] = swap32 (w1[3]);
+ w2_t[0] = swap32 (w2[0]);
+ w2_t[1] = swap32 (w2[1]);
+ w2_t[2] = swap32 (w2[2]);
+ w2_t[3] = swap32 (w2[3]);
+ w3_t[0] = swap32 (w3[0]);
+ w3_t[1] = swap32 (w3[1]);
w3_t[2] = 0;
w3_t[3] = pw_len * 8;
u32 w0[4];
- w0[0] = swap_workaround (pws[gid].i[ 0]);
- w0[1] = swap_workaround (pws[gid].i[ 1]);
- w0[2] = swap_workaround (pws[gid].i[ 2]);
- w0[3] = swap_workaround (pws[gid].i[ 3]);
+ w0[0] = swap32 (pws[gid].i[ 0]);
+ w0[1] = swap32 (pws[gid].i[ 1]);
+ w0[2] = swap32 (pws[gid].i[ 2]);
+ w0[3] = swap32 (pws[gid].i[ 3]);
u32 w1[4];
- w1[0] = swap_workaround (pws[gid].i[ 4]);
- w1[1] = swap_workaround (pws[gid].i[ 5]);
- w1[2] = swap_workaround (pws[gid].i[ 6]);
- w1[3] = swap_workaround (pws[gid].i[ 7]);
+ w1[0] = swap32 (pws[gid].i[ 4]);
+ w1[1] = swap32 (pws[gid].i[ 5]);
+ w1[2] = swap32 (pws[gid].i[ 6]);
+ w1[3] = swap32 (pws[gid].i[ 7]);
u32 w2[4];
- w2[0] = swap_workaround (pws[gid].i[ 8]);
- w2[1] = swap_workaround (pws[gid].i[ 9]);
- w2[2] = swap_workaround (pws[gid].i[10]);
- w2[3] = swap_workaround (pws[gid].i[11]);
+ w2[0] = swap32 (pws[gid].i[ 8]);
+ w2[1] = swap32 (pws[gid].i[ 9]);
+ w2[2] = swap32 (pws[gid].i[10]);
+ w2[3] = swap32 (pws[gid].i[11]);
u32 w3[4];
- w3[0] = swap_workaround (pws[gid].i[12]);
- w3[1] = swap_workaround (pws[gid].i[13]);
- w3[2] = swap_workaround (pws[gid].i[14]);
- w3[3] = swap_workaround (pws[gid].i[15]);
+ w3[0] = swap32 (pws[gid].i[12]);
+ w3[1] = swap32 (pws[gid].i[13]);
+ w3[2] = swap32 (pws[gid].i[14]);
+ w3[3] = swap32 (pws[gid].i[15]);
/**
* salt
u32 esalt_buf2[4];
u32 esalt_buf3[4];
- esalt_buf0[0] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 0]);
- esalt_buf0[1] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 1]);
- esalt_buf0[2] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 2]);
- esalt_buf0[3] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 3]);
- esalt_buf1[0] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 4]);
- esalt_buf1[1] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 5]);
- esalt_buf1[2] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 6]);
- esalt_buf1[3] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 7]);
- esalt_buf2[0] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 8]);
- esalt_buf2[1] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 9]);
- esalt_buf2[2] = swap_workaround (esalt_bufs[salt_pos].salt_buf[10]);
- esalt_buf2[3] = swap_workaround (esalt_bufs[salt_pos].salt_buf[11]);
- esalt_buf3[0] = swap_workaround (esalt_bufs[salt_pos].salt_buf[12]);
- esalt_buf3[1] = swap_workaround (esalt_bufs[salt_pos].salt_buf[13]);
+ esalt_buf0[0] = swap32 (esalt_bufs[salt_pos].salt_buf[ 0]);
+ esalt_buf0[1] = swap32 (esalt_bufs[salt_pos].salt_buf[ 1]);
+ esalt_buf0[2] = swap32 (esalt_bufs[salt_pos].salt_buf[ 2]);
+ esalt_buf0[3] = swap32 (esalt_bufs[salt_pos].salt_buf[ 3]);
+ esalt_buf1[0] = swap32 (esalt_bufs[salt_pos].salt_buf[ 4]);
+ esalt_buf1[1] = swap32 (esalt_bufs[salt_pos].salt_buf[ 5]);
+ esalt_buf1[2] = swap32 (esalt_bufs[salt_pos].salt_buf[ 6]);
+ esalt_buf1[3] = swap32 (esalt_bufs[salt_pos].salt_buf[ 7]);
+ esalt_buf2[0] = swap32 (esalt_bufs[salt_pos].salt_buf[ 8]);
+ esalt_buf2[1] = swap32 (esalt_bufs[salt_pos].salt_buf[ 9]);
+ esalt_buf2[2] = swap32 (esalt_bufs[salt_pos].salt_buf[10]);
+ esalt_buf2[3] = swap32 (esalt_bufs[salt_pos].salt_buf[11]);
+ esalt_buf3[0] = swap32 (esalt_bufs[salt_pos].salt_buf[12]);
+ esalt_buf3[1] = swap32 (esalt_bufs[salt_pos].salt_buf[13]);
esalt_buf3[2] = 0;
esalt_buf3[3] = (64 + salt_len + 4) * 8;
u32 salt_buf[5];
- salt_buf[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[0]);
- salt_buf[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[1]);
- salt_buf[2] = swap_workaround (salt_bufs[salt_pos].salt_buf[2]);
- salt_buf[3] = swap_workaround (salt_bufs[salt_pos].salt_buf[3]);
- salt_buf[4] = swap_workaround (salt_bufs[salt_pos].salt_buf[4]);
+ salt_buf[0] = swap32 (salt_bufs[salt_pos].salt_buf[0]);
+ salt_buf[1] = swap32 (salt_bufs[salt_pos].salt_buf[1]);
+ salt_buf[2] = swap32 (salt_bufs[salt_pos].salt_buf[2]);
+ salt_buf[3] = swap32 (salt_bufs[salt_pos].salt_buf[3]);
+ salt_buf[4] = swap32 (salt_bufs[salt_pos].salt_buf[4]);
/**
* loop
* sha1 ($pass)
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = out_len * 8;
u32 salt_buf[5];
- salt_buf[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[0]);
- salt_buf[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[1]);
- salt_buf[2] = swap_workaround (salt_bufs[salt_pos].salt_buf[2]);
- salt_buf[3] = swap_workaround (salt_bufs[salt_pos].salt_buf[3]);
- salt_buf[4] = swap_workaround (salt_bufs[salt_pos].salt_buf[4]);
+ salt_buf[0] = swap32 (salt_bufs[salt_pos].salt_buf[0]);
+ salt_buf[1] = swap32 (salt_bufs[salt_pos].salt_buf[1]);
+ salt_buf[2] = swap32 (salt_bufs[salt_pos].salt_buf[2]);
+ salt_buf[3] = swap32 (salt_bufs[salt_pos].salt_buf[3]);
+ salt_buf[4] = swap32 (salt_bufs[salt_pos].salt_buf[4]);
/**
* loop
* sha1 ($pass)
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = out_len * 8;
u32 salt_buf[5];
- salt_buf[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[0]);
- salt_buf[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[1]);
- salt_buf[2] = swap_workaround (salt_bufs[salt_pos].salt_buf[2]);
- salt_buf[3] = swap_workaround (salt_bufs[salt_pos].salt_buf[3]);
- salt_buf[4] = swap_workaround (salt_bufs[salt_pos].salt_buf[4]);
+ salt_buf[0] = swap32 (salt_bufs[salt_pos].salt_buf[0]);
+ salt_buf[1] = swap32 (salt_bufs[salt_pos].salt_buf[1]);
+ salt_buf[2] = swap32 (salt_bufs[salt_pos].salt_buf[2]);
+ salt_buf[3] = swap32 (salt_bufs[salt_pos].salt_buf[3]);
+ salt_buf[4] = swap32 (salt_bufs[salt_pos].salt_buf[4]);
/**
* loop
* sha1 ($pass)
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = pw_len * 8;
u32 salt_buf[5];
- salt_buf[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[0]);
- salt_buf[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[1]);
- salt_buf[2] = swap_workaround (salt_bufs[salt_pos].salt_buf[2]);
- salt_buf[3] = swap_workaround (salt_bufs[salt_pos].salt_buf[3]);
- salt_buf[4] = swap_workaround (salt_bufs[salt_pos].salt_buf[4]);
+ salt_buf[0] = swap32 (salt_bufs[salt_pos].salt_buf[0]);
+ salt_buf[1] = swap32 (salt_bufs[salt_pos].salt_buf[1]);
+ salt_buf[2] = swap32 (salt_bufs[salt_pos].salt_buf[2]);
+ salt_buf[3] = swap32 (salt_bufs[salt_pos].salt_buf[3]);
+ salt_buf[4] = swap32 (salt_bufs[salt_pos].salt_buf[4]);
/**
* loop
* sha1 ($pass)
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = pw_len * 8;
u32 salt_buf[5];
- salt_buf[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[0]);
- salt_buf[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[1]);
- salt_buf[2] = swap_workaround (salt_bufs[salt_pos].salt_buf[2]);
- salt_buf[3] = swap_workaround (salt_bufs[salt_pos].salt_buf[3]);
- salt_buf[4] = swap_workaround (salt_bufs[salt_pos].salt_buf[4]);
+ salt_buf[0] = swap32 (salt_bufs[salt_pos].salt_buf[0]);
+ salt_buf[1] = swap32 (salt_bufs[salt_pos].salt_buf[1]);
+ salt_buf[2] = swap32 (salt_bufs[salt_pos].salt_buf[2]);
+ salt_buf[3] = swap32 (salt_bufs[salt_pos].salt_buf[3]);
+ salt_buf[4] = swap32 (salt_bufs[salt_pos].salt_buf[4]);
/**
* loop
u32 salt_buf[5];
- salt_buf[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[0]);
- salt_buf[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[1]);
- salt_buf[2] = swap_workaround (salt_bufs[salt_pos].salt_buf[2]);
- salt_buf[3] = swap_workaround (salt_bufs[salt_pos].salt_buf[3]);
- salt_buf[4] = swap_workaround (salt_bufs[salt_pos].salt_buf[4]);
+ salt_buf[0] = swap32 (salt_bufs[salt_pos].salt_buf[0]);
+ salt_buf[1] = swap32 (salt_bufs[salt_pos].salt_buf[1]);
+ salt_buf[2] = swap32 (salt_bufs[salt_pos].salt_buf[2]);
+ salt_buf[3] = swap32 (salt_bufs[salt_pos].salt_buf[3]);
+ salt_buf[4] = swap32 (salt_bufs[salt_pos].salt_buf[4]);
/**
* loop
* init
*/
- w0[0] = swap_workaround (w0[0]);
- w0[1] = swap_workaround (w0[1]);
- w0[2] = swap_workaround (w0[2]);
- w0[3] = swap_workaround (w0[3]);
- w1[0] = swap_workaround (w1[0]);
- w1[1] = swap_workaround (w1[1]);
- w1[2] = swap_workaround (w1[2]);
- w1[3] = swap_workaround (w1[3]);
- w2[0] = swap_workaround (w2[0]);
- w2[1] = swap_workaround (w2[1]);
- w2[2] = swap_workaround (w2[2]);
- w2[3] = swap_workaround (w2[3]);
- w3[0] = swap_workaround (w3[0]);
- w3[1] = swap_workaround (w3[1]);
- w3[2] = swap_workaround (w3[2]);
- w3[3] = swap_workaround (w3[3]);
+ w0[0] = swap32 (w0[0]);
+ w0[1] = swap32 (w0[1]);
+ w0[2] = swap32 (w0[2]);
+ w0[3] = swap32 (w0[3]);
+ w1[0] = swap32 (w1[0]);
+ w1[1] = swap32 (w1[1]);
+ w1[2] = swap32 (w1[2]);
+ w1[3] = swap32 (w1[3]);
+ w2[0] = swap32 (w2[0]);
+ w2[1] = swap32 (w2[1]);
+ w2[2] = swap32 (w2[2]);
+ w2[3] = swap32 (w2[3]);
+ w3[0] = swap32 (w3[0]);
+ w3[1] = swap32 (w3[1]);
+ w3[2] = swap32 (w3[2]);
+ w3[3] = swap32 (w3[3]);
u64 w[16];
{
u32 data[4];
- data[0] = swap_workaround (esalt_bufs[salt_pos].cry_master_buf[(i / 4) + 0]);
- data[1] = swap_workaround (esalt_bufs[salt_pos].cry_master_buf[(i / 4) + 1]);
- data[2] = swap_workaround (esalt_bufs[salt_pos].cry_master_buf[(i / 4) + 2]);
- data[3] = swap_workaround (esalt_bufs[salt_pos].cry_master_buf[(i / 4) + 3]);
+ data[0] = swap32 (esalt_bufs[salt_pos].cry_master_buf[(i / 4) + 0]);
+ data[1] = swap32 (esalt_bufs[salt_pos].cry_master_buf[(i / 4) + 1]);
+ data[2] = swap32 (esalt_bufs[salt_pos].cry_master_buf[(i / 4) + 2]);
+ data[3] = swap32 (esalt_bufs[salt_pos].cry_master_buf[(i / 4) + 3]);
AES256_decrypt (data, out, rk, s_td0, s_td1, s_td2, s_td3, s_td4);
#define COMPARE_S "check_single_comp4.c"
#define COMPARE_M "check_multi_comp4.c"
-#ifdef VECT_SIZE1
#define uint_to_hex_lower8(i) l_bin2asc[(i)]
-#endif
-
-#ifdef VECT_SIZE2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
-#endif
-
-#ifdef VECT_SIZE4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
-#endif
static u32 memcat32 (u32 block0[16], u32 block1[16], const u32 block_len, const u32 append0[4], const u32 append1[4], const u32 append2[4], const u32 append3[4], const u32 append_len)
{
const u32 mod = block_len & 3;
const u32 div = block_len / 4;
+ #ifdef IS_AMD
const int offset_minus_4 = 4 - mod;
u32 append0_t[4];
- append0_t[0] = amd_bytealign (append0[0], 0, offset_minus_4);
+ append0_t[0] = amd_bytealign (append0[0], 0, offset_minus_4);
append0_t[1] = amd_bytealign (append0[1], append0[0], offset_minus_4);
append0_t[2] = amd_bytealign (append0[2], append0[1], offset_minus_4);
append0_t[3] = amd_bytealign (append0[3], append0[2], offset_minus_4);
append4_t[2] = 0;
append4_t[3] = 0;
}
+ #endif
+
+ #ifdef IS_NV
+
+ const int offset_minus_4 = 4 - mod;
+
+ const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
+
+ u32 append0_t[4];
+
+ append0_t[0] = __byte_perm ( 0, append0[0], selector);
+ append0_t[1] = __byte_perm (append0[0], append0[1], selector);
+ append0_t[2] = __byte_perm (append0[1], append0[2], selector);
+ append0_t[3] = __byte_perm (append0[2], append0[3], selector);
+
+ u32 append1_t[4];
+
+ append1_t[0] = __byte_perm (append0[3], append1[0], selector);
+ append1_t[1] = __byte_perm (append1[0], append1[1], selector);
+ append1_t[2] = __byte_perm (append1[1], append1[2], selector);
+ append1_t[3] = __byte_perm (append1[2], append1[3], selector);
+
+ u32 append2_t[4];
+
+ append2_t[0] = __byte_perm (append1[3], append2[0], selector);
+ append2_t[1] = __byte_perm (append2[0], append2[1], selector);
+ append2_t[2] = __byte_perm (append2[1], append2[2], selector);
+ append2_t[3] = __byte_perm (append2[2], append2[3], selector);
+
+ u32 append3_t[4];
+
+ append3_t[0] = __byte_perm (append2[3], append3[0], selector);
+ append3_t[1] = __byte_perm (append3[0], append3[1], selector);
+ append3_t[2] = __byte_perm (append3[1], append3[2], selector);
+ append3_t[3] = __byte_perm (append3[2], append3[3], selector);
+
+ u32 append4_t[4];
+
+ append4_t[0] = __byte_perm (append3[3], 0, selector);
+ append4_t[1] = 0;
+ append4_t[2] = 0;
+ append4_t[3] = 0;
+ #endif
switch (div)
{
#define COMPARE_S "check_single_comp4.c"
#define COMPARE_M "check_multi_comp4.c"
-#ifdef VECT_SIZE1
#define uint_to_hex_lower8(i) l_bin2asc[(i)]
-#endif
-
-#ifdef VECT_SIZE2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
-#endif
-
-#ifdef VECT_SIZE4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
-#endif
static u32 memcat32 (u32 block0[16], u32 block1[16], const u32 block_len, const u32 append0[4], const u32 append1[4], const u32 append2[4], const u32 append3[4], const u32 append_len)
{
const u32 mod = block_len & 3;
const u32 div = block_len / 4;
+ #ifdef IS_AMD
const int offset_minus_4 = 4 - mod;
u32 append0_t[4];
- append0_t[0] = amd_bytealign (append0[0], 0, offset_minus_4);
+ append0_t[0] = amd_bytealign (append0[0], 0, offset_minus_4);
append0_t[1] = amd_bytealign (append0[1], append0[0], offset_minus_4);
append0_t[2] = amd_bytealign (append0[2], append0[1], offset_minus_4);
append0_t[3] = amd_bytealign (append0[3], append0[2], offset_minus_4);
append4_t[2] = 0;
append4_t[3] = 0;
}
+ #endif
+
+ #ifdef IS_NV
+
+ const int offset_minus_4 = 4 - mod;
+
+ const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
+
+ u32 append0_t[4];
+
+ append0_t[0] = __byte_perm ( 0, append0[0], selector);
+ append0_t[1] = __byte_perm (append0[0], append0[1], selector);
+ append0_t[2] = __byte_perm (append0[1], append0[2], selector);
+ append0_t[3] = __byte_perm (append0[2], append0[3], selector);
+
+ u32 append1_t[4];
+
+ append1_t[0] = __byte_perm (append0[3], append1[0], selector);
+ append1_t[1] = __byte_perm (append1[0], append1[1], selector);
+ append1_t[2] = __byte_perm (append1[1], append1[2], selector);
+ append1_t[3] = __byte_perm (append1[2], append1[3], selector);
+
+ u32 append2_t[4];
+
+ append2_t[0] = __byte_perm (append1[3], append2[0], selector);
+ append2_t[1] = __byte_perm (append2[0], append2[1], selector);
+ append2_t[2] = __byte_perm (append2[1], append2[2], selector);
+ append2_t[3] = __byte_perm (append2[2], append2[3], selector);
+
+ u32 append3_t[4];
+
+ append3_t[0] = __byte_perm (append2[3], append3[0], selector);
+ append3_t[1] = __byte_perm (append3[0], append3[1], selector);
+ append3_t[2] = __byte_perm (append3[1], append3[2], selector);
+ append3_t[3] = __byte_perm (append3[2], append3[3], selector);
+
+ u32 append4_t[4];
+
+ append4_t[0] = __byte_perm (append3[3], 0, selector);
+ append4_t[1] = 0;
+ append4_t[2] = 0;
+ append4_t[3] = 0;
+ #endif
switch (div)
{
#define COMPARE_S "check_single_comp4.c"
#define COMPARE_M "check_multi_comp4.c"
-#ifdef VECT_SIZE1
#define uint_to_hex_lower8(i) l_bin2asc[(i)]
-#endif
-
-#ifdef VECT_SIZE1
-#define uint_to_hex_lower8(i) l_bin2asc[(i)]
-#endif
-
-#ifdef VECT_SIZE2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
-#endif
-
-#ifdef VECT_SIZE4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
-#endif
static u32 memcat32 (u32 block0[16], u32 block1[16], const u32 block_len, const u32 append0[4], const u32 append1[4], const u32 append2[4], const u32 append3[4], const u32 append_len)
{
const u32 mod = block_len & 3;
const u32 div = block_len / 4;
+ #ifdef IS_AMD
const int offset_minus_4 = 4 - mod;
u32 append0_t[4];
- append0_t[0] = amd_bytealign (append0[0], 0, offset_minus_4);
+ append0_t[0] = amd_bytealign (append0[0], 0, offset_minus_4);
append0_t[1] = amd_bytealign (append0[1], append0[0], offset_minus_4);
append0_t[2] = amd_bytealign (append0[2], append0[1], offset_minus_4);
append0_t[3] = amd_bytealign (append0[3], append0[2], offset_minus_4);
append4_t[2] = 0;
append4_t[3] = 0;
}
+ #endif
+
+ #ifdef IS_NV
+
+ const int offset_minus_4 = 4 - mod;
+
+ const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
+
+ u32 append0_t[4];
+
+ append0_t[0] = __byte_perm ( 0, append0[0], selector);
+ append0_t[1] = __byte_perm (append0[0], append0[1], selector);
+ append0_t[2] = __byte_perm (append0[1], append0[2], selector);
+ append0_t[3] = __byte_perm (append0[2], append0[3], selector);
+
+ u32 append1_t[4];
+
+ append1_t[0] = __byte_perm (append0[3], append1[0], selector);
+ append1_t[1] = __byte_perm (append1[0], append1[1], selector);
+ append1_t[2] = __byte_perm (append1[1], append1[2], selector);
+ append1_t[3] = __byte_perm (append1[2], append1[3], selector);
+
+ u32 append2_t[4];
+
+ append2_t[0] = __byte_perm (append1[3], append2[0], selector);
+ append2_t[1] = __byte_perm (append2[0], append2[1], selector);
+ append2_t[2] = __byte_perm (append2[1], append2[2], selector);
+ append2_t[3] = __byte_perm (append2[2], append2[3], selector);
+
+ u32 append3_t[4];
+
+ append3_t[0] = __byte_perm (append2[3], append3[0], selector);
+ append3_t[1] = __byte_perm (append3[0], append3[1], selector);
+ append3_t[2] = __byte_perm (append3[1], append3[2], selector);
+ append3_t[3] = __byte_perm (append3[2], append3[3], selector);
+
+ u32 append4_t[4];
+
+ append4_t[0] = __byte_perm (append3[3], 0, selector);
+ append4_t[1] = 0;
+ append4_t[2] = 0;
+ append4_t[3] = 0;
+ #endif
switch (div)
{
#include "types_ocl.c"
#include "common.c"
-#ifdef VECT_SIZE1
-#define COMPARE_M "check_multi_vect1_comp4.c"
-#endif
+#define COMPARE_S "check_single_comp4.c"
+#define COMPARE_M "check_multi_comp4.c"
__constant u32 te0[256] =
{
u32 g = digest[6];
u32 h = digest[7];
- u32 w0_t = swap_workaround (w[ 0]);
- u32 w1_t = swap_workaround (w[ 1]);
- u32 w2_t = swap_workaround (w[ 2]);
- u32 w3_t = swap_workaround (w[ 3]);
- u32 w4_t = swap_workaround (w[ 4]);
- u32 w5_t = swap_workaround (w[ 5]);
- u32 w6_t = swap_workaround (w[ 6]);
- u32 w7_t = swap_workaround (w[ 7]);
- u32 w8_t = swap_workaround (w[ 8]);
- u32 w9_t = swap_workaround (w[ 9]);
- u32 wa_t = swap_workaround (w[10]);
- u32 wb_t = swap_workaround (w[11]);
- u32 wc_t = swap_workaround (w[12]);
- u32 wd_t = swap_workaround (w[13]);
- u32 we_t = swap_workaround (w[14]);
- u32 wf_t = swap_workaround (w[15]);
+ u32 w0_t = swap32 (w[ 0]);
+ u32 w1_t = swap32 (w[ 1]);
+ u32 w2_t = swap32 (w[ 2]);
+ u32 w3_t = swap32 (w[ 3]);
+ u32 w4_t = swap32 (w[ 4]);
+ u32 w5_t = swap32 (w[ 5]);
+ u32 w6_t = swap32 (w[ 6]);
+ u32 w7_t = swap32 (w[ 7]);
+ u32 w8_t = swap32 (w[ 8]);
+ u32 w9_t = swap32 (w[ 9]);
+ u32 wa_t = swap32 (w[10]);
+ u32 wb_t = swap32 (w[11]);
+ u32 wc_t = swap32 (w[12]);
+ u32 wd_t = swap32 (w[13]);
+ u32 we_t = swap32 (w[14]);
+ u32 wf_t = swap32 (w[15]);
#define ROUND_EXPAND() \
{ \
u32 tmp1;
u32 tmp2;
+ #ifdef IS_NV
+ const int offset_minus_4 = 4 - (block_len & 3);
+
+ const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
+
+ tmp0 = __byte_perm ( 0, append[0], selector);
+ tmp1 = __byte_perm (append[0], append[1], selector);
+ tmp2 = __byte_perm (append[1], 0, selector);
+ #endif
+
+ #ifdef IS_AMD
const int offset_minus_4 = 4 - block_len;
tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
tmp1 = tmp2;
tmp2 = 0;
}
+ #endif
u32 carry[2] = { 0, 0 };
u32 tmp7;
u32 tmp8;
+ #ifdef IS_NV
+ const int offset_minus_4 = 4 - (block_len & 3);
+
+ const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
+
+ tmp0 = __byte_perm ( 0, append[0], selector);
+ tmp1 = __byte_perm (append[0], append[1], selector);
+ tmp2 = __byte_perm (append[1], append[2], selector);
+ tmp3 = __byte_perm (append[2], append[3], selector);
+ tmp4 = __byte_perm (append[3], append[4], selector);
+ tmp5 = __byte_perm (append[4], append[5], selector);
+ tmp6 = __byte_perm (append[5], append[6], selector);
+ tmp7 = __byte_perm (append[6], append[7], selector);
+ tmp8 = __byte_perm (append[7], 0, selector);
+ #endif
+
+ #ifdef IS_AMD
const int offset_minus_4 = 4 - block_len;
tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
tmp7 = tmp8;
tmp8 = 0;
}
+ #endif
u32 carry[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
u32 block_len = tmps[gid].block_len;
u32 final_len = tmps[gid].final_len;
- append_0x80_4x4 (block, block_len);
+ append_0x80_1x16 (block, block_len);
if (block_len >= 56)
{
bzero16 (block);
}
- block[15] = swap_workaround (final_len * 8);
+ block[15] = swap32 (final_len * 8);
sha256_transform (block, dgst);
{
u32 data[4];
- data[0] = swap_workaround (esalt_bufs[salt_pos].data_buf[j + 0]);
- data[1] = swap_workaround (esalt_bufs[salt_pos].data_buf[j + 1]);
- data[2] = swap_workaround (esalt_bufs[salt_pos].data_buf[j + 2]);
- data[3] = swap_workaround (esalt_bufs[salt_pos].data_buf[j + 3]);
+ data[0] = swap32 (esalt_bufs[salt_pos].data_buf[j + 0]);
+ data[1] = swap32 (esalt_bufs[salt_pos].data_buf[j + 1]);
+ data[2] = swap32 (esalt_bufs[salt_pos].data_buf[j + 2]);
+ data[3] = swap32 (esalt_bufs[salt_pos].data_buf[j + 3]);
u32 out[4];
iv[2] = data[2];
iv[3] = data[3];
- out[0] = swap_workaround (out[0]);
- out[1] = swap_workaround (out[1]);
- out[2] = swap_workaround (out[2]);
- out[3] = swap_workaround (out[3]);
+ out[0] = swap32 (out[0]);
+ out[1] = swap32 (out[1]);
+ out[2] = swap32 (out[2]);
+ out[3] = swap32 (out[3]);
crc = crc32 (out, 16, crc);
}
u32 data[4];
- data[0] = swap_workaround (esalt_bufs[salt_pos].data_buf[j + 0]);
- data[1] = swap_workaround (esalt_bufs[salt_pos].data_buf[j + 1]);
- data[2] = swap_workaround (esalt_bufs[salt_pos].data_buf[j + 2]);
- data[3] = swap_workaround (esalt_bufs[salt_pos].data_buf[j + 3]);
+ data[0] = swap32 (esalt_bufs[salt_pos].data_buf[j + 0]);
+ data[1] = swap32 (esalt_bufs[salt_pos].data_buf[j + 1]);
+ data[2] = swap32 (esalt_bufs[salt_pos].data_buf[j + 2]);
+ data[3] = swap32 (esalt_bufs[salt_pos].data_buf[j + 3]);
u32 out[4];
iv[2] = data[2];
iv[3] = data[3];
- out[0] = swap_workaround (out[0]);
- out[1] = swap_workaround (out[1]);
- out[2] = swap_workaround (out[2]);
- out[3] = swap_workaround (out[3]);
+ out[0] = swap32 (out[0]);
+ out[1] = swap32 (out[1]);
+ out[2] = swap32 (out[2]);
+ out[3] = swap32 (out[3]);
const u32 margin = data_len - unpack_size;
m[6] = hl32_to_64 (w[ 3], w[ 2]);
m[7] = hl32_to_64 (w[ 1], w[ 0]);
- m[0] = swap_workaround (m[0]);
- m[1] = swap_workaround (m[1]);
- m[2] = swap_workaround (m[2]);
- m[3] = swap_workaround (m[3]);
- m[4] = swap_workaround (m[4]);
- m[5] = swap_workaround (m[5]);
- m[6] = swap_workaround (m[6]);
- m[7] = swap_workaround (m[7]);
+ m[0] = swap32 (m[0]);
+ m[1] = swap32 (m[1]);
+ m[2] = swap32 (m[2]);
+ m[3] = swap32 (m[3]);
+ m[4] = swap32 (m[4]);
+ m[5] = swap32 (m[5]);
+ m[6] = swap32 (m[6]);
+ m[7] = swap32 (m[7]);
// state buffer (hash)
z[4] = 0;
z[5] = 0;
z[6] = 0;
- z[7] = swap_workaround ((u64) (out_len * 8));
+ z[7] = swap32 ((u64) (out_len * 8));
streebog_g (h, z, s_sbob_sl64);
streebog_g (h, m, s_sbob_sl64);
m[6] = hl32_to_64 (w[ 3], w[ 2]);
m[7] = hl32_to_64 (w[ 1], w[ 0]);
- m[0] = swap_workaround (m[0]);
- m[1] = swap_workaround (m[1]);
- m[2] = swap_workaround (m[2]);
- m[3] = swap_workaround (m[3]);
- m[4] = swap_workaround (m[4]);
- m[5] = swap_workaround (m[5]);
- m[6] = swap_workaround (m[6]);
- m[7] = swap_workaround (m[7]);
+ m[0] = swap32 (m[0]);
+ m[1] = swap32 (m[1]);
+ m[2] = swap32 (m[2]);
+ m[3] = swap32 (m[3]);
+ m[4] = swap32 (m[4]);
+ m[5] = swap32 (m[5]);
+ m[6] = swap32 (m[6]);
+ m[7] = swap32 (m[7]);
// state buffer (hash)
z[4] = 0;
z[5] = 0;
z[6] = 0;
- z[7] = swap_workaround ((u64) (out_len * 8));
+ z[7] = swap32 ((u64) (out_len * 8));
streebog_g (h, z, s_sbob_sl64);
streebog_g (h, m, s_sbob_sl64);
m[6] = hl32_to_64 (w[ 3], w[ 2]);
m[7] = hl32_to_64 (w[ 1], w[ 0]);
- m[0] = swap_workaround (m[0]);
- m[1] = swap_workaround (m[1]);
- m[2] = swap_workaround (m[2]);
- m[3] = swap_workaround (m[3]);
- m[4] = swap_workaround (m[4]);
- m[5] = swap_workaround (m[5]);
- m[6] = swap_workaround (m[6]);
- m[7] = swap_workaround (m[7]);
+ m[0] = swap32 (m[0]);
+ m[1] = swap32 (m[1]);
+ m[2] = swap32 (m[2]);
+ m[3] = swap32 (m[3]);
+ m[4] = swap32 (m[4]);
+ m[5] = swap32 (m[5]);
+ m[6] = swap32 (m[6]);
+ m[7] = swap32 (m[7]);
// state buffer (hash)
z[4] = 0;
z[5] = 0;
z[6] = 0;
- z[7] = swap_workaround ((u64) (pw_len * 8));
+ z[7] = swap32 ((u64) (pw_len * 8));
streebog_g (h, z, s_sbob_sl64);
streebog_g (h, m, s_sbob_sl64);
m[6] = hl32_to_64 (w[ 3], w[ 2]);
m[7] = hl32_to_64 (w[ 1], w[ 0]);
- m[0] = swap_workaround (m[0]);
- m[1] = swap_workaround (m[1]);
- m[2] = swap_workaround (m[2]);
- m[3] = swap_workaround (m[3]);
- m[4] = swap_workaround (m[4]);
- m[5] = swap_workaround (m[5]);
- m[6] = swap_workaround (m[6]);
- m[7] = swap_workaround (m[7]);
+ m[0] = swap32 (m[0]);
+ m[1] = swap32 (m[1]);
+ m[2] = swap32 (m[2]);
+ m[3] = swap32 (m[3]);
+ m[4] = swap32 (m[4]);
+ m[5] = swap32 (m[5]);
+ m[6] = swap32 (m[6]);
+ m[7] = swap32 (m[7]);
// state buffer (hash)
z[4] = 0;
z[5] = 0;
z[6] = 0;
- z[7] = swap_workaround ((u64) (pw_len * 8));
+ z[7] = swap32 ((u64) (pw_len * 8));
streebog_g (h, z, s_sbob_sl64);
streebog_g (h, m, s_sbob_sl64);
m[6] = hl32_to_64 (w[ 3], w[ 2]);
m[7] = hl32_to_64 (w[ 1], w[ 0]);
- m[0] = swap_workaround (m[0]);
- m[1] = swap_workaround (m[1]);
- m[2] = swap_workaround (m[2]);
- m[3] = swap_workaround (m[3]);
- m[4] = swap_workaround (m[4]);
- m[5] = swap_workaround (m[5]);
- m[6] = swap_workaround (m[6]);
- m[7] = swap_workaround (m[7]);
+ m[0] = swap32 (m[0]);
+ m[1] = swap32 (m[1]);
+ m[2] = swap32 (m[2]);
+ m[3] = swap32 (m[3]);
+ m[4] = swap32 (m[4]);
+ m[5] = swap32 (m[5]);
+ m[6] = swap32 (m[6]);
+ m[7] = swap32 (m[7]);
// state buffer (hash)
z[4] = 0;
z[5] = 0;
z[6] = 0;
- z[7] = swap_workaround ((u64) (pw_len * 8));
+ z[7] = swap32 ((u64) (pw_len * 8));
streebog_g (h, z, s_sbob_sl64);
streebog_g (h, m, s_sbob_sl64);
m[6] = hl32_to_64 (w[ 3], w[ 2]);
m[7] = hl32_to_64 (w[ 1], w[ 0]);
- m[0] = swap_workaround (m[0]);
- m[1] = swap_workaround (m[1]);
- m[2] = swap_workaround (m[2]);
- m[3] = swap_workaround (m[3]);
- m[4] = swap_workaround (m[4]);
- m[5] = swap_workaround (m[5]);
- m[6] = swap_workaround (m[6]);
- m[7] = swap_workaround (m[7]);
+ m[0] = swap32 (m[0]);
+ m[1] = swap32 (m[1]);
+ m[2] = swap32 (m[2]);
+ m[3] = swap32 (m[3]);
+ m[4] = swap32 (m[4]);
+ m[5] = swap32 (m[5]);
+ m[6] = swap32 (m[6]);
+ m[7] = swap32 (m[7]);
// state buffer (hash)
z[4] = 0;
z[5] = 0;
z[6] = 0;
- z[7] = swap_workaround ((u64) (pw_len * 8));
+ z[7] = swap32 ((u64) (pw_len * 8));
streebog_g (h, z, s_sbob_sl64);
streebog_g (h, m, s_sbob_sl64);
m[6] = hl32_to_64 (w[ 3], w[ 2]);
m[7] = hl32_to_64 (w[ 1], w[ 0]);
- m[0] = swap_workaround (m[0]);
- m[1] = swap_workaround (m[1]);
- m[2] = swap_workaround (m[2]);
- m[3] = swap_workaround (m[3]);
- m[4] = swap_workaround (m[4]);
- m[5] = swap_workaround (m[5]);
- m[6] = swap_workaround (m[6]);
- m[7] = swap_workaround (m[7]);
+ m[0] = swap32 (m[0]);
+ m[1] = swap32 (m[1]);
+ m[2] = swap32 (m[2]);
+ m[3] = swap32 (m[3]);
+ m[4] = swap32 (m[4]);
+ m[5] = swap32 (m[5]);
+ m[6] = swap32 (m[6]);
+ m[7] = swap32 (m[7]);
// state buffer (hash)
z[4] = 0;
z[5] = 0;
z[6] = 0;
- z[7] = swap_workaround ((u64) (out_len * 8));
+ z[7] = swap32 ((u64) (out_len * 8));
streebog_g (h, z, s_sbob_sl64);
streebog_g (h, m, s_sbob_sl64);
m[6] = hl32_to_64 (w[ 3], w[ 2]);
m[7] = hl32_to_64 (w[ 1], w[ 0]);
- m[0] = swap_workaround (m[0]);
- m[1] = swap_workaround (m[1]);
- m[2] = swap_workaround (m[2]);
- m[3] = swap_workaround (m[3]);
- m[4] = swap_workaround (m[4]);
- m[5] = swap_workaround (m[5]);
- m[6] = swap_workaround (m[6]);
- m[7] = swap_workaround (m[7]);
+ m[0] = swap32 (m[0]);
+ m[1] = swap32 (m[1]);
+ m[2] = swap32 (m[2]);
+ m[3] = swap32 (m[3]);
+ m[4] = swap32 (m[4]);
+ m[5] = swap32 (m[5]);
+ m[6] = swap32 (m[6]);
+ m[7] = swap32 (m[7]);
// state buffer (hash)
z[4] = 0;
z[5] = 0;
z[6] = 0;
- z[7] = swap_workaround ((u64) (out_len * 8));
+ z[7] = swap32 ((u64) (out_len * 8));
streebog_g (h, z, s_sbob_sl64);
streebog_g (h, m, s_sbob_sl64);
m[6] = hl32_to_64 (w[ 3], w[ 2]);
m[7] = hl32_to_64 (w[ 1], w[ 0]);
- m[0] = swap_workaround (m[0]);
- m[1] = swap_workaround (m[1]);
- m[2] = swap_workaround (m[2]);
- m[3] = swap_workaround (m[3]);
- m[4] = swap_workaround (m[4]);
- m[5] = swap_workaround (m[5]);
- m[6] = swap_workaround (m[6]);
- m[7] = swap_workaround (m[7]);
+ m[0] = swap32 (m[0]);
+ m[1] = swap32 (m[1]);
+ m[2] = swap32 (m[2]);
+ m[3] = swap32 (m[3]);
+ m[4] = swap32 (m[4]);
+ m[5] = swap32 (m[5]);
+ m[6] = swap32 (m[6]);
+ m[7] = swap32 (m[7]);
// state buffer (hash)
z[4] = 0;
z[5] = 0;
z[6] = 0;
- z[7] = swap_workaround ((u64) (pw_len * 8));
+ z[7] = swap32 ((u64) (pw_len * 8));
streebog_g (h, z, s_sbob_sl64);
streebog_g (h, m, s_sbob_sl64);
m[6] = hl32_to_64 (w[ 3], w[ 2]);
m[7] = hl32_to_64 (w[ 1], w[ 0]);
- m[0] = swap_workaround (m[0]);
- m[1] = swap_workaround (m[1]);
- m[2] = swap_workaround (m[2]);
- m[3] = swap_workaround (m[3]);
- m[4] = swap_workaround (m[4]);
- m[5] = swap_workaround (m[5]);
- m[6] = swap_workaround (m[6]);
- m[7] = swap_workaround (m[7]);
+ m[0] = swap32 (m[0]);
+ m[1] = swap32 (m[1]);
+ m[2] = swap32 (m[2]);
+ m[3] = swap32 (m[3]);
+ m[4] = swap32 (m[4]);
+ m[5] = swap32 (m[5]);
+ m[6] = swap32 (m[6]);
+ m[7] = swap32 (m[7]);
// state buffer (hash)
z[4] = 0;
z[5] = 0;
z[6] = 0;
- z[7] = swap_workaround ((u64) (pw_len * 8));
+ z[7] = swap32 ((u64) (pw_len * 8));
streebog_g (h, z, s_sbob_sl64);
streebog_g (h, m, s_sbob_sl64);
m[6] = hl32_to_64 (w[ 3], w[ 2]);
m[7] = hl32_to_64 (w[ 1], w[ 0]);
- m[0] = swap_workaround (m[0]);
- m[1] = swap_workaround (m[1]);
- m[2] = swap_workaround (m[2]);
- m[3] = swap_workaround (m[3]);
- m[4] = swap_workaround (m[4]);
- m[5] = swap_workaround (m[5]);
- m[6] = swap_workaround (m[6]);
- m[7] = swap_workaround (m[7]);
+ m[0] = swap32 (m[0]);
+ m[1] = swap32 (m[1]);
+ m[2] = swap32 (m[2]);
+ m[3] = swap32 (m[3]);
+ m[4] = swap32 (m[4]);
+ m[5] = swap32 (m[5]);
+ m[6] = swap32 (m[6]);
+ m[7] = swap32 (m[7]);
// state buffer (hash)
z[4] = 0;
z[5] = 0;
z[6] = 0;
- z[7] = swap_workaround ((u64) (pw_len * 8));
+ z[7] = swap32 ((u64) (pw_len * 8));
streebog_g (h, z, s_sbob_sl64);
streebog_g (h, m, s_sbob_sl64);
m[6] = hl32_to_64 (w[ 3], w[ 2]);
m[7] = hl32_to_64 (w[ 1], w[ 0]);
- m[0] = swap_workaround (m[0]);
- m[1] = swap_workaround (m[1]);
- m[2] = swap_workaround (m[2]);
- m[3] = swap_workaround (m[3]);
- m[4] = swap_workaround (m[4]);
- m[5] = swap_workaround (m[5]);
- m[6] = swap_workaround (m[6]);
- m[7] = swap_workaround (m[7]);
+ m[0] = swap32 (m[0]);
+ m[1] = swap32 (m[1]);
+ m[2] = swap32 (m[2]);
+ m[3] = swap32 (m[3]);
+ m[4] = swap32 (m[4]);
+ m[5] = swap32 (m[5]);
+ m[6] = swap32 (m[6]);
+ m[7] = swap32 (m[7]);
// state buffer (hash)
z[4] = 0;
z[5] = 0;
z[6] = 0;
- z[7] = swap_workaround ((u64) (pw_len * 8));
+ z[7] = swap32 ((u64) (pw_len * 8));
streebog_g (h, z, s_sbob_sl64);
streebog_g (h, m, s_sbob_sl64);
u32 w0[4];
- w0[0] = swap_workaround (pws[gid].i[ 0]);
- w0[1] = swap_workaround (pws[gid].i[ 1]);
- w0[2] = swap_workaround (pws[gid].i[ 2]);
- w0[3] = swap_workaround (pws[gid].i[ 3]);
+ w0[0] = swap32 (pws[gid].i[ 0]);
+ w0[1] = swap32 (pws[gid].i[ 1]);
+ w0[2] = swap32 (pws[gid].i[ 2]);
+ w0[3] = swap32 (pws[gid].i[ 3]);
u32 w1[4];
- w1[0] = swap_workaround (pws[gid].i[ 4]);
- w1[1] = swap_workaround (pws[gid].i[ 5]);
- w1[2] = swap_workaround (pws[gid].i[ 6]);
- w1[3] = swap_workaround (pws[gid].i[ 7]);
+ w1[0] = swap32 (pws[gid].i[ 4]);
+ w1[1] = swap32 (pws[gid].i[ 5]);
+ w1[2] = swap32 (pws[gid].i[ 6]);
+ w1[3] = swap32 (pws[gid].i[ 7]);
u32 w2[4];
- w2[0] = swap_workaround (pws[gid].i[ 8]);
- w2[1] = swap_workaround (pws[gid].i[ 9]);
- w2[2] = swap_workaround (pws[gid].i[10]);
- w2[3] = swap_workaround (pws[gid].i[11]);
+ w2[0] = swap32 (pws[gid].i[ 8]);
+ w2[1] = swap32 (pws[gid].i[ 9]);
+ w2[2] = swap32 (pws[gid].i[10]);
+ w2[3] = swap32 (pws[gid].i[11]);
u32 w3[4];
- w3[0] = swap_workaround (pws[gid].i[12]);
- w3[1] = swap_workaround (pws[gid].i[13]);
- w3[2] = swap_workaround (pws[gid].i[14]);
- w3[3] = swap_workaround (pws[gid].i[15]);
+ w3[0] = swap32 (pws[gid].i[12]);
+ w3[1] = swap32 (pws[gid].i[13]);
+ w3[2] = swap32 (pws[gid].i[14]);
+ w3[3] = swap32 (pws[gid].i[15]);
/**
* salt
u32 esalt_buf2[4];
u32 esalt_buf3[4];
- esalt_buf0[0] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 0]);
- esalt_buf0[1] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 1]);
- esalt_buf0[2] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 2]);
- esalt_buf0[3] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 3]);
- esalt_buf1[0] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 4]);
- esalt_buf1[1] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 5]);
- esalt_buf1[2] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 6]);
- esalt_buf1[3] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 7]);
- esalt_buf2[0] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 8]);
- esalt_buf2[1] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 9]);
- esalt_buf2[2] = swap_workaround (esalt_bufs[salt_pos].salt_buf[10]);
- esalt_buf2[3] = swap_workaround (esalt_bufs[salt_pos].salt_buf[11]);
- esalt_buf3[0] = swap_workaround (esalt_bufs[salt_pos].salt_buf[12]);
- esalt_buf3[1] = swap_workaround (esalt_bufs[salt_pos].salt_buf[13]);
+ esalt_buf0[0] = swap32 (esalt_bufs[salt_pos].salt_buf[ 0]);
+ esalt_buf0[1] = swap32 (esalt_bufs[salt_pos].salt_buf[ 1]);
+ esalt_buf0[2] = swap32 (esalt_bufs[salt_pos].salt_buf[ 2]);
+ esalt_buf0[3] = swap32 (esalt_bufs[salt_pos].salt_buf[ 3]);
+ esalt_buf1[0] = swap32 (esalt_bufs[salt_pos].salt_buf[ 4]);
+ esalt_buf1[1] = swap32 (esalt_bufs[salt_pos].salt_buf[ 5]);
+ esalt_buf1[2] = swap32 (esalt_bufs[salt_pos].salt_buf[ 6]);
+ esalt_buf1[3] = swap32 (esalt_bufs[salt_pos].salt_buf[ 7]);
+ esalt_buf2[0] = swap32 (esalt_bufs[salt_pos].salt_buf[ 8]);
+ esalt_buf2[1] = swap32 (esalt_bufs[salt_pos].salt_buf[ 9]);
+ esalt_buf2[2] = swap32 (esalt_bufs[salt_pos].salt_buf[10]);
+ esalt_buf2[3] = swap32 (esalt_bufs[salt_pos].salt_buf[11]);
+ esalt_buf3[0] = swap32 (esalt_bufs[salt_pos].salt_buf[12]);
+ esalt_buf3[1] = swap32 (esalt_bufs[salt_pos].salt_buf[13]);
esalt_buf3[2] = 0;
esalt_buf3[3] = (64 + salt_len + 4) * 8;
append_0x80_4x4 (w0, w1, w2, w3, pw_len);
- w0[0] = swap_workaround (w0[0]);
- w0[1] = swap_workaround (w0[1]);
- w0[2] = swap_workaround (w0[2]);
- w0[3] = swap_workaround (w0[3]);
- w1[0] = swap_workaround (w1[0]);
- w1[1] = swap_workaround (w1[1]);
- w1[2] = swap_workaround (w1[2]);
- w1[3] = swap_workaround (w1[3]);
- w2[0] = swap_workaround (w2[0]);
- w2[1] = swap_workaround (w2[1]);
- w2[2] = swap_workaround (w2[2]);
- w2[3] = swap_workaround (w2[3]);
- w3[0] = swap_workaround (w3[0]);
- w3[1] = swap_workaround (w3[1]);
- w3[2] = swap_workaround (w3[2]);
- w3[3] = swap_workaround (w3[3]);
+ w0[0] = swap32 (w0[0]);
+ w0[1] = swap32 (w0[1]);
+ w0[2] = swap32 (w0[2]);
+ w0[3] = swap32 (w0[3]);
+ w1[0] = swap32 (w1[0]);
+ w1[1] = swap32 (w1[1]);
+ w1[2] = swap32 (w1[2]);
+ w1[3] = swap32 (w1[3]);
+ w2[0] = swap32 (w2[0]);
+ w2[1] = swap32 (w2[1]);
+ w2[2] = swap32 (w2[2]);
+ w2[3] = swap32 (w2[3]);
+ w3[0] = swap32 (w3[0]);
+ w3[1] = swap32 (w3[1]);
+ w3[2] = swap32 (w3[2]);
+ w3[3] = swap32 (w3[3]);
/**
* salt
u32 w0[4];
- w0[0] = swap_workaround (pws[gid].i[ 0]);
- w0[1] = swap_workaround (pws[gid].i[ 1]);
- w0[2] = swap_workaround (pws[gid].i[ 2]);
- w0[3] = swap_workaround (pws[gid].i[ 3]);
+ w0[0] = swap32 (pws[gid].i[ 0]);
+ w0[1] = swap32 (pws[gid].i[ 1]);
+ w0[2] = swap32 (pws[gid].i[ 2]);
+ w0[3] = swap32 (pws[gid].i[ 3]);
u32 w1[4];
- w1[0] = swap_workaround (pws[gid].i[ 4]);
- w1[1] = swap_workaround (pws[gid].i[ 5]);
- w1[2] = swap_workaround (pws[gid].i[ 6]);
- w1[3] = swap_workaround (pws[gid].i[ 7]);
+ w1[0] = swap32 (pws[gid].i[ 4]);
+ w1[1] = swap32 (pws[gid].i[ 5]);
+ w1[2] = swap32 (pws[gid].i[ 6]);
+ w1[3] = swap32 (pws[gid].i[ 7]);
u32 w2[4];
- w2[0] = swap_workaround (pws[gid].i[ 8]);
- w2[1] = swap_workaround (pws[gid].i[ 9]);
- w2[2] = swap_workaround (pws[gid].i[10]);
- w2[3] = swap_workaround (pws[gid].i[11]);
+ w2[0] = swap32 (pws[gid].i[ 8]);
+ w2[1] = swap32 (pws[gid].i[ 9]);
+ w2[2] = swap32 (pws[gid].i[10]);
+ w2[3] = swap32 (pws[gid].i[11]);
u32 w3[4];
- w3[0] = swap_workaround (pws[gid].i[12]);
- w3[1] = swap_workaround (pws[gid].i[13]);
- w3[2] = swap_workaround (pws[gid].i[14]);
- w3[3] = swap_workaround (pws[gid].i[15]);
+ w3[0] = swap32 (pws[gid].i[12]);
+ w3[1] = swap32 (pws[gid].i[13]);
+ w3[2] = swap32 (pws[gid].i[14]);
+ w3[3] = swap32 (pws[gid].i[15]);
/**
* salt
u32 ukeyx[4];
- ukeyx[0] = swap_workaround (dgst[0]);
- ukeyx[1] = swap_workaround (dgst[1]);
- ukeyx[2] = swap_workaround (dgst[2]);
- ukeyx[3] = swap_workaround (dgst[3]);
+ ukeyx[0] = swap32 (dgst[0]);
+ ukeyx[1] = swap32 (dgst[1]);
+ ukeyx[2] = swap32 (dgst[2]);
+ ukeyx[3] = swap32 (dgst[3]);
AES128_ExpandKey (ukeyx, rk, s_te0, s_te1, s_te2, s_te3, s_te4);
PUTCHAR (iv, i, dgst[4] & 0xff);
}
- out[0] ^= swap_workaround (iv[0]);
- out[1] ^= swap_workaround (iv[1]);
- out[2] ^= swap_workaround (iv[2]);
- out[3] ^= swap_workaround (iv[3]);
+ out[0] ^= swap32 (iv[0]);
+ out[1] ^= swap32 (iv[1]);
+ out[2] ^= swap32 (iv[2]);
+ out[3] ^= swap32 (iv[3]);
const u32 r0 = out[0];
const u32 r1 = out[1];
* sha1
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = out_len * 8;
w9_t = uint_to_hex_upper8 ((e >> 8) & 255) << 0
| uint_to_hex_upper8 ((e >> 0) & 255) << 16;
- w0_t = swap_workaround (w0_t);
- w1_t = swap_workaround (w1_t);
- w2_t = swap_workaround (w2_t);
- w3_t = swap_workaround (w3_t);
- w4_t = swap_workaround (w4_t);
- w5_t = swap_workaround (w5_t);
- w6_t = swap_workaround (w6_t);
- w7_t = swap_workaround (w7_t);
- w8_t = swap_workaround (w8_t);
- w9_t = swap_workaround (w9_t);
+ w0_t = swap32 (w0_t);
+ w1_t = swap32 (w1_t);
+ w2_t = swap32 (w2_t);
+ w3_t = swap32 (w3_t);
+ w4_t = swap32 (w4_t);
+ w5_t = swap32 (w5_t);
+ w6_t = swap32 (w6_t);
+ w7_t = swap32 (w7_t);
+ w8_t = swap32 (w8_t);
+ w9_t = swap32 (w9_t);
wa_t = 0x80000000;
wb_t = 0;
wc_t = 0;
* sha1
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = out_len * 8;
w9_t = uint_to_hex_upper8 ((e >> 8) & 255) << 0
| uint_to_hex_upper8 ((e >> 0) & 255) << 16;
- w0_t = swap_workaround (w0_t);
- w1_t = swap_workaround (w1_t);
- w2_t = swap_workaround (w2_t);
- w3_t = swap_workaround (w3_t);
- w4_t = swap_workaround (w4_t);
- w5_t = swap_workaround (w5_t);
- w6_t = swap_workaround (w6_t);
- w7_t = swap_workaround (w7_t);
- w8_t = swap_workaround (w8_t);
- w9_t = swap_workaround (w9_t);
+ w0_t = swap32 (w0_t);
+ w1_t = swap32 (w1_t);
+ w2_t = swap32 (w2_t);
+ w3_t = swap32 (w3_t);
+ w4_t = swap32 (w4_t);
+ w5_t = swap32 (w5_t);
+ w6_t = swap32 (w6_t);
+ w7_t = swap32 (w7_t);
+ w8_t = swap32 (w8_t);
+ w9_t = swap32 (w9_t);
wa_t = 0x80000000;
wb_t = 0;
wc_t = 0;
* sha1
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = pw_len * 8;
w9_t = uint_to_hex_upper8 ((e >> 8) & 255) << 0
| uint_to_hex_upper8 ((e >> 0) & 255) << 16;
- w0_t = swap_workaround (w0_t);
- w1_t = swap_workaround (w1_t);
- w2_t = swap_workaround (w2_t);
- w3_t = swap_workaround (w3_t);
- w4_t = swap_workaround (w4_t);
- w5_t = swap_workaround (w5_t);
- w6_t = swap_workaround (w6_t);
- w7_t = swap_workaround (w7_t);
- w8_t = swap_workaround (w8_t);
- w9_t = swap_workaround (w9_t);
+ w0_t = swap32 (w0_t);
+ w1_t = swap32 (w1_t);
+ w2_t = swap32 (w2_t);
+ w3_t = swap32 (w3_t);
+ w4_t = swap32 (w4_t);
+ w5_t = swap32 (w5_t);
+ w6_t = swap32 (w6_t);
+ w7_t = swap32 (w7_t);
+ w8_t = swap32 (w8_t);
+ w9_t = swap32 (w9_t);
wa_t = 0x80000000;
wb_t = 0;
wc_t = 0;
* sha1
*/
- u32 w0_t = swap_workaround (w0[0]);
- u32 w1_t = swap_workaround (w0[1]);
- u32 w2_t = swap_workaround (w0[2]);
- u32 w3_t = swap_workaround (w0[3]);
- u32 w4_t = swap_workaround (w1[0]);
- u32 w5_t = swap_workaround (w1[1]);
- u32 w6_t = swap_workaround (w1[2]);
- u32 w7_t = swap_workaround (w1[3]);
- u32 w8_t = swap_workaround (w2[0]);
- u32 w9_t = swap_workaround (w2[1]);
- u32 wa_t = swap_workaround (w2[2]);
- u32 wb_t = swap_workaround (w2[3]);
- u32 wc_t = swap_workaround (w3[0]);
- u32 wd_t = swap_workaround (w3[1]);
+ u32 w0_t = swap32 (w0[0]);
+ u32 w1_t = swap32 (w0[1]);
+ u32 w2_t = swap32 (w0[2]);
+ u32 w3_t = swap32 (w0[3]);
+ u32 w4_t = swap32 (w1[0]);
+ u32 w5_t = swap32 (w1[1]);
+ u32 w6_t = swap32 (w1[2]);
+ u32 w7_t = swap32 (w1[3]);
+ u32 w8_t = swap32 (w2[0]);
+ u32 w9_t = swap32 (w2[1]);
+ u32 wa_t = swap32 (w2[2]);
+ u32 wb_t = swap32 (w2[3]);
+ u32 wc_t = swap32 (w3[0]);
+ u32 wd_t = swap32 (w3[1]);
u32 we_t = 0;
u32 wf_t = pw_len * 8;
w9_t = uint_to_hex_upper8 ((e >> 8) & 255) << 0
| uint_to_hex_upper8 ((e >> 0) & 255) << 16;
- w0_t = swap_workaround (w0_t);
- w1_t = swap_workaround (w1_t);
- w2_t = swap_workaround (w2_t);
- w3_t = swap_workaround (w3_t);
- w4_t = swap_workaround (w4_t);
- w5_t = swap_workaround (w5_t);
- w6_t = swap_workaround (w6_t);
- w7_t = swap_workaround (w7_t);
- w8_t = swap_workaround (w8_t);
- w9_t = swap_workaround (w9_t);
+ w0_t = swap32 (w0_t);
+ w1_t = swap32 (w1_t);
+ w2_t = swap32 (w2_t);
+ w3_t = swap32 (w3_t);
+ w4_t = swap32 (w4_t);
+ w5_t = swap32 (w5_t);
+ w6_t = swap32 (w6_t);
+ w7_t = swap32 (w7_t);
+ w8_t = swap32 (w8_t);
+ w9_t = swap32 (w9_t);
wa_t = 0x80000000;
wb_t = 0;
wc_t = 0;
w9_t = uint_to_hex_upper8 ((e >> 8) & 255) << 0
| uint_to_hex_upper8 ((e >> 0) & 255) << 16;
- w0_t = swap_workaround (w0_t);
- w1_t = swap_workaround (w1_t);
- w2_t = swap_workaround (w2_t);
- w3_t = swap_workaround (w3_t);
- w4_t = swap_workaround (w4_t);
- w5_t = swap_workaround (w5_t);
- w6_t = swap_workaround (w6_t);
- w7_t = swap_workaround (w7_t);
- w8_t = swap_workaround (w8_t);
- w9_t = swap_workaround (w9_t);
+ w0_t = swap32 (w0_t);
+ w1_t = swap32 (w1_t);
+ w2_t = swap32 (w2_t);
+ w3_t = swap32 (w3_t);
+ w4_t = swap32 (w4_t);
+ w5_t = swap32 (w5_t);
+ w6_t = swap32 (w6_t);
+ w7_t = swap32 (w7_t);
+ w8_t = swap32 (w8_t);
+ w9_t = swap32 (w9_t);
wa_t = 0x80000000;
wb_t = 0;
wc_t = 0;
w9_t = uint_to_hex_upper8 ((e >> 8) & 255) << 0
| uint_to_hex_upper8 ((e >> 0) & 255) << 16;
- w0_t = swap_workaround (w0_t);
- w1_t = swap_workaround (w1_t);
- w2_t = swap_workaround (w2_t);
- w3_t = swap_workaround (w3_t);
- w4_t = swap_workaround (w4_t);
- w5_t = swap_workaround (w5_t);
- w6_t = swap_workaround (w6_t);
- w7_t = swap_workaround (w7_t);
- w8_t = swap_workaround (w8_t);
- w9_t = swap_workaround (w9_t);
+ w0_t = swap32 (w0_t);
+ w1_t = swap32 (w1_t);
+ w2_t = swap32 (w2_t);
+ w3_t = swap32 (w3_t);
+ w4_t = swap32 (w4_t);
+ w5_t = swap32 (w5_t);
+ w6_t = swap32 (w6_t);
+ w7_t = swap32 (w7_t);
+ w8_t = swap32 (w8_t);
+ w9_t = swap32 (w9_t);
wa_t = 0x80000000;
wb_t = 0;
wc_t = 0;
* pads
*/
- w0[0] = swap_workaround (w0[0]);
- w0[1] = swap_workaround (w0[1]);
- w0[2] = swap_workaround (w0[2]);
- w0[3] = swap_workaround (w0[3]);
- w1[0] = swap_workaround (w1[0]);
- w1[1] = swap_workaround (w1[1]);
- w1[2] = swap_workaround (w1[2]);
- w1[3] = swap_workaround (w1[3]);
- w2[0] = swap_workaround (w2[0]);
- w2[1] = swap_workaround (w2[1]);
- w2[2] = swap_workaround (w2[2]);
- w2[3] = swap_workaround (w2[3]);
- w3[0] = swap_workaround (w3[0]);
- w3[1] = swap_workaround (w3[1]);
- w3[2] = swap_workaround (w3[2]);
- w3[3] = swap_workaround (w3[3]);
+ w0[0] = swap32 (w0[0]);
+ w0[1] = swap32 (w0[1]);
+ w0[2] = swap32 (w0[2]);
+ w0[3] = swap32 (w0[3]);
+ w1[0] = swap32 (w1[0]);
+ w1[1] = swap32 (w1[1]);
+ w1[2] = swap32 (w1[2]);
+ w1[3] = swap32 (w1[3]);
+ w2[0] = swap32 (w2[0]);
+ w2[1] = swap32 (w2[1]);
+ w2[2] = swap32 (w2[2]);
+ w2[3] = swap32 (w2[3]);
+ w3[0] = swap32 (w3[0]);
+ w3[1] = swap32 (w3[1]);
+ w3[2] = swap32 (w3[2]);
+ w3[3] = swap32 (w3[3]);
u32 ipad[5];
u32 opad[5];
out[2] ^= iv[2];
out[3] ^= iv[3];
- out[0] = swap_workaround (out[0]);
- out[1] = swap_workaround (out[1]);
- out[2] = swap_workaround (out[2]);
- out[3] = swap_workaround (out[3]);
+ out[0] = swap32 (out[0]);
+ out[1] = swap32 (out[1]);
+ out[2] = swap32 (out[2]);
+ out[3] = swap32 (out[3]);
if ((out[0] & 0xff) != '{') return;
u32 salt_buf0[4];
- salt_buf0[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[0]);
- salt_buf0[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[1]);
- salt_buf0[2] = swap_workaround (salt_bufs[salt_pos].salt_buf[2]);
- salt_buf0[3] = swap_workaround (salt_bufs[salt_pos].salt_buf[3]);
+ salt_buf0[0] = swap32 (salt_bufs[salt_pos].salt_buf[0]);
+ salt_buf0[1] = swap32 (salt_bufs[salt_pos].salt_buf[1]);
+ salt_buf0[2] = swap32 (salt_bufs[salt_pos].salt_buf[2]);
+ salt_buf0[3] = swap32 (salt_bufs[salt_pos].salt_buf[3]);
u32 salt_buf1[4];
make_unicode (w1, w2, w3);
make_unicode (w0, w0, w1);
- w0[0] = swap_workaround (w0[0]);
- w0[1] = swap_workaround (w0[1]);
- w0[2] = swap_workaround (w0[2]);
- w0[3] = swap_workaround (w0[3]);
- w1[0] = swap_workaround (w1[0]);
- w1[1] = swap_workaround (w1[1]);
- w1[2] = swap_workaround (w1[2]);
- w1[3] = swap_workaround (w1[3]);
- w2[0] = swap_workaround (w2[0]);
- w2[1] = swap_workaround (w2[1]);
- w2[2] = swap_workaround (w2[2]);
- w2[3] = swap_workaround (w2[3]);
- w3[0] = swap_workaround (w3[0]);
- w3[1] = swap_workaround (w3[1]);
- w3[2] = swap_workaround (w3[2]);
- w3[3] = swap_workaround (w3[3]);
+ w0[0] = swap32 (w0[0]);
+ w0[1] = swap32 (w0[1]);
+ w0[2] = swap32 (w0[2]);
+ w0[3] = swap32 (w0[3]);
+ w1[0] = swap32 (w1[0]);
+ w1[1] = swap32 (w1[1]);
+ w1[2] = swap32 (w1[2]);
+ w1[3] = swap32 (w1[3]);
+ w2[0] = swap32 (w2[0]);
+ w2[1] = swap32 (w2[1]);
+ w2[2] = swap32 (w2[2]);
+ w2[3] = swap32 (w2[3]);
+ w3[0] = swap32 (w3[0]);
+ w3[1] = swap32 (w3[1]);
+ w3[2] = swap32 (w3[2]);
+ w3[3] = swap32 (w3[3]);
u32 ipad[8];
u32 opad[8];
static u32 swap32 (const u32 v)
{
- return __byte_perm (v, 0, 0x0123);
+ return (as_uint (as_uchar4 (v).s3210));
+ // return __byte_perm (v, 0, 0x0123);
}
static u64 swap64 (const u64 v)
* kernel find
*/
- char build_opts[100];
+ char build_opts[1024];
// we don't have sm_* on AMD but it doesn't matter
* kernel compile
*/
- #ifdef BINARY_KERNEL
+ //#ifdef BINARY_KERNEL
if (force_jit_compilation == 0)
{
sprintf (build_opts, "%s -DSCRYPT_N=%d -DSCRYPT_R=%d -DSCRYPT_P=%d -DSCRYPT_TMTO=%d", build_opts, data.salts_buf[0].scrypt_N, data.salts_buf[0].scrypt_r, data.salts_buf[0].scrypt_p, data.salts_buf[0].scrypt_tmto);
}
- #endif
+ //#endif
clBuildProgram (device_param->program, 1, &device_param->device, build_opts, NULL, NULL);