From ef548d3088c02da1f55d54cc81083ec8d755e98d Mon Sep 17 00:00:00 2001 From: philsmd Date: Sun, 6 Dec 2015 11:29:29 +0100 Subject: [PATCH] fixes issue #5: formatting problem with tabs vs spaces --- amd/gpu_aes256_amd.c | 4 +- amd/m01500_a3.cl | 870 +++++++++++++++++++++---------------------- amd/m03000_a3.cl | 870 +++++++++++++++++++++---------------------- amd/m03100_a0.cl | 2 +- amd/m03100_a1.cl | 2 +- amd/m03100_a3.cl | 2 +- amd/m05800.cl | 256 ++++++------- amd/m06600.cl | 22 +- amd/m08600_a0.cl | 8 +- amd/m08600_a1.cl | 8 +- amd/m08600_a3.cl | 8 +- amd/m08700_a0.cl | 8 +- amd/m08700_a1.cl | 8 +- amd/types_amd.c | 4 +- include/cpu-aes.c | 4 +- include/ext_OpenCL.h | 4 +- include/types.h | 4 +- nv/amp_a1_v1.cu | 76 ++-- nv/amp_a1_v2.cu | 76 ++-- nv/amp_a1_v4.cu | 76 ++-- nv/common_nv.c | 152 ++++---- nv/gpu_aes256_nv.c | 4 +- nv/m01500_a3.cu | 459 ++++++++++++----------- nv/m03000_a3.cu | 459 ++++++++++++----------- nv/m03100_a3.cu | 2 +- nv/m03200.cu | 6 +- nv/m05800.cu | 256 ++++++------- nv/m06600.cu | 22 +- nv/m08600_a0.cu | 8 +- nv/m08600_a1.cu | 8 +- nv/m08600_a3.cu | 8 +- nv/m08700_a0.cu | 10 +- nv/m08700_a1.cu | 10 +- nv/m08700_a3.cu | 10 +- nv/m08800.cu | 22 +- nv/m09000.cu | 6 +- nv/m09400.cu | 22 +- nv/m09500.cu | 22 +- nv/m12500.cu | 22 +- nv/types_nv.c | 4 +- src/ext_OpenCL.c | 8 +- src/shared.c | 2 +- tools/deps.sh | 62 +-- tools/test.pl | 12 +- tools/test.sh | 5 +- 45 files changed, 1953 insertions(+), 1960 deletions(-) diff --git a/amd/gpu_aes256_amd.c b/amd/gpu_aes256_amd.c index 7aca801..ecb6d13 100644 --- a/amd/gpu_aes256_amd.c +++ b/amd/gpu_aes256_amd.c @@ -707,7 +707,7 @@ static void aes256_ExpandKey (u32 *ks, const u32 *ukey) i = 0; j = 0; - while (1) + while (1) { u32 temp = ks[j + 7]; @@ -737,7 +737,7 @@ static void aes256_ExpandKey (u32 *ks, const u32 *ukey) ks[j + 15] = ks[j + 7] ^ ks[j + 14]; j += 8; - } + } } static void aes256_InvertKey (u32 *ks) diff --git a/amd/m01500_a3.cl b/amd/m01500_a3.cl index 3967673..cc907de 100644 --- a/amd/m01500_a3.cl +++ b/amd/m01500_a3.cl @@ -77,488 +77,488 @@ * The effort has been sponsored by Rapid7: http://www.rapid7.com */ -#define vnot(dst, a) (dst) = ~(a) -#define vand(dst, a, b) (dst) = (a) & (b) -#define vor(dst, a, b) (dst) = (a) | (b) -#define vandn(dst, a, b) (dst) = (a) & ~(b) -#define vxor(dst, a, b) (dst) = (a) ^ (b) -#define vsel(dst, a, b, c) (dst) = bitselect((a),(b),(c)) +#define vnot(dst, a) (dst) = ~(a) +#define vand(dst, a, b) (dst) = (a) & (b) +#define vor(dst, a, b) (dst) = (a) | (b) +#define vandn(dst, a, b) (dst) = (a) & ~(b) +#define vxor(dst, a, b) (dst) = (a) ^ (b) +#define vsel(dst, a, b, c) (dst) = bitselect((a),(b),(c)) static void s1(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6, u32 * out1, u32 * out2, u32 * out3, u32 * out4) { - u32 x0F0F3333, x3C3C3C3C, x55FF55FF, x69C369C3, x0903B73F, x09FCB7C0, - x5CA9E295; - u32 x55AFD1B7, x3C3C69C3, x6993B874; - u32 x5CEDE59F, x09FCE295, x5D91A51E, x529E962D; - u32 x29EEADC0, x4B8771A3, x428679F3, x6B68D433; - u32 x5BA7E193, x026F12F3, x6B27C493, x94D83B6C; - u32 x965E0B0F, x3327A113, x847F0A1F, xD6E19C32; - u32 x0DBCE883, x3A25A215, x37994A96; - u32 x8A487EA7, x8B480F07, xB96C2D16; - u32 x0, x1, x2, x3; - - vsel(x0F0F3333, a3, a2, a5); - vxor(x3C3C3C3C, a2, a3); - vor(x55FF55FF, a1, a4); - vxor(x69C369C3, x3C3C3C3C, x55FF55FF); - vsel(x0903B73F, a5, x0F0F3333, x69C369C3); - vxor(x09FCB7C0, a4, x0903B73F); - vxor(x5CA9E295, a1, x09FCB7C0); - - vsel(x55AFD1B7, x5CA9E295, x55FF55FF, x0F0F3333); - vsel(x3C3C69C3, x3C3C3C3C, x69C369C3, a5); - vxor(x6993B874, x55AFD1B7, x3C3C69C3); - - vsel(x5CEDE59F, x55FF55FF, x5CA9E295, x6993B874); - vsel(x09FCE295, x09FCB7C0, x5CA9E295, a5); - vsel(x5D91A51E, x5CEDE59F, x6993B874, x09FCE295); - vxor(x529E962D, x0F0F3333, x5D91A51E); - - vsel(x29EEADC0, x69C369C3, x09FCB7C0, x5CEDE59F); - vsel(x4B8771A3, x0F0F3333, x69C369C3, x5CA9E295); - vsel(x428679F3, a5, x4B8771A3, x529E962D); - vxor(x6B68D433, x29EEADC0, x428679F3); - - vsel(x5BA7E193, x5CA9E295, x4B8771A3, a3); - vsel(x026F12F3, a4, x0F0F3333, x529E962D); - vsel(x6B27C493, x6B68D433, x5BA7E193, x026F12F3); - vnot(x94D83B6C, x6B27C493); - vsel(x0, x94D83B6C, x6B68D433, a6); - vxor(*out1, *out1, x0); - - vsel(x965E0B0F, x94D83B6C, a3, x428679F3); - vsel(x3327A113, x5BA7E193, a2, x69C369C3); - vsel(x847F0A1F, x965E0B0F, a4, x3327A113); - vxor(xD6E19C32, x529E962D, x847F0A1F); - vsel(x1, xD6E19C32, x5CA9E295, a6); - vxor(*out2, *out2, x1); - - vsel(x0DBCE883, x09FCE295, x3C3C69C3, x847F0A1F); - vsel(x3A25A215, x3327A113, x5CA9E295, x0903B73F); - vxor(x37994A96, x0DBCE883, x3A25A215); - vsel(x3, x37994A96, x529E962D, a6); - vxor(*out4, *out4, x3); - - vxor(x8A487EA7, x5CA9E295, xD6E19C32); - vsel(x8B480F07, a3, x8A487EA7, x847F0A1F); - vsel(xB96C2D16, x8B480F07, x3C3C3C3C, x3A25A215); - vsel(x2, xB96C2D16, x6993B874, a6); - vxor(*out3, *out3, x2); + u32 x0F0F3333, x3C3C3C3C, x55FF55FF, x69C369C3, x0903B73F, x09FCB7C0, + x5CA9E295; + u32 x55AFD1B7, x3C3C69C3, x6993B874; + u32 x5CEDE59F, x09FCE295, x5D91A51E, x529E962D; + u32 x29EEADC0, x4B8771A3, x428679F3, x6B68D433; + u32 x5BA7E193, x026F12F3, x6B27C493, x94D83B6C; + u32 x965E0B0F, x3327A113, x847F0A1F, xD6E19C32; + u32 x0DBCE883, x3A25A215, x37994A96; + u32 x8A487EA7, x8B480F07, xB96C2D16; + u32 x0, x1, x2, x3; + + vsel(x0F0F3333, a3, a2, a5); + vxor(x3C3C3C3C, a2, a3); + vor(x55FF55FF, a1, a4); + vxor(x69C369C3, x3C3C3C3C, x55FF55FF); + vsel(x0903B73F, a5, x0F0F3333, x69C369C3); + vxor(x09FCB7C0, a4, x0903B73F); + vxor(x5CA9E295, a1, x09FCB7C0); + + vsel(x55AFD1B7, x5CA9E295, x55FF55FF, x0F0F3333); + vsel(x3C3C69C3, x3C3C3C3C, x69C369C3, a5); + vxor(x6993B874, x55AFD1B7, x3C3C69C3); + + vsel(x5CEDE59F, x55FF55FF, x5CA9E295, x6993B874); + vsel(x09FCE295, x09FCB7C0, x5CA9E295, a5); + vsel(x5D91A51E, x5CEDE59F, x6993B874, x09FCE295); + vxor(x529E962D, x0F0F3333, x5D91A51E); + + vsel(x29EEADC0, x69C369C3, x09FCB7C0, x5CEDE59F); + vsel(x4B8771A3, x0F0F3333, x69C369C3, x5CA9E295); + vsel(x428679F3, a5, x4B8771A3, x529E962D); + vxor(x6B68D433, x29EEADC0, x428679F3); + + vsel(x5BA7E193, x5CA9E295, x4B8771A3, a3); + vsel(x026F12F3, a4, x0F0F3333, x529E962D); + vsel(x6B27C493, x6B68D433, x5BA7E193, x026F12F3); + vnot(x94D83B6C, x6B27C493); + vsel(x0, x94D83B6C, x6B68D433, a6); + vxor(*out1, *out1, x0); + + vsel(x965E0B0F, x94D83B6C, a3, x428679F3); + vsel(x3327A113, x5BA7E193, a2, x69C369C3); + vsel(x847F0A1F, x965E0B0F, a4, x3327A113); + vxor(xD6E19C32, x529E962D, x847F0A1F); + vsel(x1, xD6E19C32, x5CA9E295, a6); + vxor(*out2, *out2, x1); + + vsel(x0DBCE883, x09FCE295, x3C3C69C3, x847F0A1F); + vsel(x3A25A215, x3327A113, x5CA9E295, x0903B73F); + vxor(x37994A96, x0DBCE883, x3A25A215); + vsel(x3, x37994A96, x529E962D, a6); + vxor(*out4, *out4, x3); + + vxor(x8A487EA7, x5CA9E295, xD6E19C32); + vsel(x8B480F07, a3, x8A487EA7, x847F0A1F); + vsel(xB96C2D16, x8B480F07, x3C3C3C3C, x3A25A215); + vsel(x2, xB96C2D16, x6993B874, a6); + vxor(*out3, *out3, x2); } static void s2(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6, u32 * out1, u32 * out2, u32 * out3, u32 * out4) { - u32 x55553333, x0055FF33, x33270F03, x66725A56, x00FFFF00, x668DA556; - u32 x0F0F5A56, xF0F0A5A9, xA5A5969A, xA55A699A; - u32 x0F5AF03C, x6600FF56, x87A5F09C; - u32 xA55A963C, x3C69C30F, xB44BC32D; - u32 x66D7CC56, x0F4B0F2D, x699CC37B, x996C66D2; - u32 xB46C662D, x278DB412, xB66CB43B; - u32 xD2DC4E52, x27993333, xD2994E33; - u32 x278D0F2D, x2E0E547B, x09976748; - u32 x0, x1, x2, x3; - - vsel(x55553333, a1, a3, a6); - vsel(x0055FF33, a6, x55553333, a5); - vsel(x33270F03, a3, a4, x0055FF33); - vxor(x66725A56, a1, x33270F03); - vxor(x00FFFF00, a5, a6); - vxor(x668DA556, x66725A56, x00FFFF00); - - vsel(x0F0F5A56, a4, x66725A56, a6); - vnot(xF0F0A5A9, x0F0F5A56); - vxor(xA5A5969A, x55553333, xF0F0A5A9); - vxor(xA55A699A, x00FFFF00, xA5A5969A); - vsel(x1, xA55A699A, x668DA556, a2); - vxor(*out2, *out2, x1); - - vxor(x0F5AF03C, a4, x0055FF33); - vsel(x6600FF56, x66725A56, a6, x00FFFF00); - vsel(x87A5F09C, xA5A5969A, x0F5AF03C, x6600FF56); - - vsel(xA55A963C, xA5A5969A, x0F5AF03C, a5); - vxor(x3C69C30F, a3, x0F5AF03C); - vsel(xB44BC32D, xA55A963C, x3C69C30F, a1); - - vsel(x66D7CC56, x66725A56, x668DA556, xA5A5969A); - vsel(x0F4B0F2D, a4, xB44BC32D, a5); - vxor(x699CC37B, x66D7CC56, x0F4B0F2D); - vxor(x996C66D2, xF0F0A5A9, x699CC37B); - vsel(x0, x996C66D2, xB44BC32D, a2); - vxor(*out1, *out1, x0); - - vsel(xB46C662D, xB44BC32D, x996C66D2, x00FFFF00); - vsel(x278DB412, x668DA556, xA5A5969A, a1); - vsel(xB66CB43B, xB46C662D, x278DB412, x6600FF56); - - vsel(xD2DC4E52, x66D7CC56, x996C66D2, xB44BC32D); - vsel(x27993333, x278DB412, a3, x0055FF33); - vsel(xD2994E33, xD2DC4E52, x27993333, a5); - vsel(x3, x87A5F09C, xD2994E33, a2); - vxor(*out4, *out4, x3); - - vsel(x278D0F2D, x278DB412, x0F4B0F2D, a6); - vsel(x2E0E547B, x0F0F5A56, xB66CB43B, x278D0F2D); - vxor(x09976748, x27993333, x2E0E547B); - vsel(x2, xB66CB43B, x09976748, a2); - vxor(*out3, *out3, x2); + u32 x55553333, x0055FF33, x33270F03, x66725A56, x00FFFF00, x668DA556; + u32 x0F0F5A56, xF0F0A5A9, xA5A5969A, xA55A699A; + u32 x0F5AF03C, x6600FF56, x87A5F09C; + u32 xA55A963C, x3C69C30F, xB44BC32D; + u32 x66D7CC56, x0F4B0F2D, x699CC37B, x996C66D2; + u32 xB46C662D, x278DB412, xB66CB43B; + u32 xD2DC4E52, x27993333, xD2994E33; + u32 x278D0F2D, x2E0E547B, x09976748; + u32 x0, x1, x2, x3; + + vsel(x55553333, a1, a3, a6); + vsel(x0055FF33, a6, x55553333, a5); + vsel(x33270F03, a3, a4, x0055FF33); + vxor(x66725A56, a1, x33270F03); + vxor(x00FFFF00, a5, a6); + vxor(x668DA556, x66725A56, x00FFFF00); + + vsel(x0F0F5A56, a4, x66725A56, a6); + vnot(xF0F0A5A9, x0F0F5A56); + vxor(xA5A5969A, x55553333, xF0F0A5A9); + vxor(xA55A699A, x00FFFF00, xA5A5969A); + vsel(x1, xA55A699A, x668DA556, a2); + vxor(*out2, *out2, x1); + + vxor(x0F5AF03C, a4, x0055FF33); + vsel(x6600FF56, x66725A56, a6, x00FFFF00); + vsel(x87A5F09C, xA5A5969A, x0F5AF03C, x6600FF56); + + vsel(xA55A963C, xA5A5969A, x0F5AF03C, a5); + vxor(x3C69C30F, a3, x0F5AF03C); + vsel(xB44BC32D, xA55A963C, x3C69C30F, a1); + + vsel(x66D7CC56, x66725A56, x668DA556, xA5A5969A); + vsel(x0F4B0F2D, a4, xB44BC32D, a5); + vxor(x699CC37B, x66D7CC56, x0F4B0F2D); + vxor(x996C66D2, xF0F0A5A9, x699CC37B); + vsel(x0, x996C66D2, xB44BC32D, a2); + vxor(*out1, *out1, x0); + + vsel(xB46C662D, xB44BC32D, x996C66D2, x00FFFF00); + vsel(x278DB412, x668DA556, xA5A5969A, a1); + vsel(xB66CB43B, xB46C662D, x278DB412, x6600FF56); + + vsel(xD2DC4E52, x66D7CC56, x996C66D2, xB44BC32D); + vsel(x27993333, x278DB412, a3, x0055FF33); + vsel(xD2994E33, xD2DC4E52, x27993333, a5); + vsel(x3, x87A5F09C, xD2994E33, a2); + vxor(*out4, *out4, x3); + + vsel(x278D0F2D, x278DB412, x0F4B0F2D, a6); + vsel(x2E0E547B, x0F0F5A56, xB66CB43B, x278D0F2D); + vxor(x09976748, x27993333, x2E0E547B); + vsel(x2, xB66CB43B, x09976748, a2); + vxor(*out3, *out3, x2); } static void s3(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6, u32 * out1, u32 * out2, u32 * out3, u32 * out4) { - u32 x0F330F33, x0F33F0CC, x5A66A599; - u32 x2111B7BB, x03FF3033, x05BB50EE, x074F201F, x265E97A4; - u32 x556BA09E, x665A93AC, x99A56C53; - u32 x25A1A797, x5713754C, x66559355, x47B135C6; - u32 x9A5A5C60, xD07AF8F8, x87698DB4, xE13C1EE1; - u32 x9E48CDE4, x655B905E, x00A55CFF, x9E49915E; - u32 xD6599874, x05330022, xD2699876; - u32 x665F9364, xD573F0F2, xB32C6396; - u32 x0, x1, x2, x3; - - vsel(x0F330F33, a4, a3, a5); - vxor(x0F33F0CC, a6, x0F330F33); - vxor(x5A66A599, a2, x0F33F0CC); - - vsel(x2111B7BB, a3, a6, x5A66A599); - vsel(x03FF3033, a5, a3, x0F33F0CC); - vsel(x05BB50EE, a5, x0F33F0CC, a2); - vsel(x074F201F, x03FF3033, a4, x05BB50EE); - vxor(x265E97A4, x2111B7BB, x074F201F); - - vsel(x556BA09E, x5A66A599, x05BB50EE, a4); - vsel(x665A93AC, x556BA09E, x265E97A4, a3); - vnot(x99A56C53, x665A93AC); - vsel(x1, x265E97A4, x99A56C53, a1); - vxor(*out2, *out2, x1); - - vxor(x25A1A797, x03FF3033, x265E97A4); - vsel(x5713754C, a2, x0F33F0CC, x074F201F); - vsel(x66559355, x665A93AC, a2, a5); - vsel(x47B135C6, x25A1A797, x5713754C, x66559355); - - vxor(x9A5A5C60, x03FF3033, x99A56C53); - vsel(xD07AF8F8, x9A5A5C60, x556BA09E, x5A66A599); - vxor(x87698DB4, x5713754C, xD07AF8F8); - vxor(xE13C1EE1, x66559355, x87698DB4); - - vsel(x9E48CDE4, x9A5A5C60, x87698DB4, x265E97A4); - vsel(x655B905E, x66559355, x05BB50EE, a4); - vsel(x00A55CFF, a5, a6, x9A5A5C60); - vsel(x9E49915E, x9E48CDE4, x655B905E, x00A55CFF); - vsel(x0, x9E49915E, xE13C1EE1, a1); - vxor(*out1, *out1, x0); - - vsel(xD6599874, xD07AF8F8, x66559355, x0F33F0CC); - vand(x05330022, x0F330F33, x05BB50EE); - vsel(xD2699876, xD6599874, x00A55CFF, x05330022); - vsel(x3, x5A66A599, xD2699876, a1); - vxor(*out4, *out4, x3); - - vsel(x665F9364, x265E97A4, x66559355, x47B135C6); - vsel(xD573F0F2, xD07AF8F8, x05330022, a4); - vxor(xB32C6396, x665F9364, xD573F0F2); - vsel(x2, xB32C6396, x47B135C6, a1); - vxor(*out3, *out3, x2); + u32 x0F330F33, x0F33F0CC, x5A66A599; + u32 x2111B7BB, x03FF3033, x05BB50EE, x074F201F, x265E97A4; + u32 x556BA09E, x665A93AC, x99A56C53; + u32 x25A1A797, x5713754C, x66559355, x47B135C6; + u32 x9A5A5C60, xD07AF8F8, x87698DB4, xE13C1EE1; + u32 x9E48CDE4, x655B905E, x00A55CFF, x9E49915E; + u32 xD6599874, x05330022, xD2699876; + u32 x665F9364, xD573F0F2, xB32C6396; + u32 x0, x1, x2, x3; + + vsel(x0F330F33, a4, a3, a5); + vxor(x0F33F0CC, a6, x0F330F33); + vxor(x5A66A599, a2, x0F33F0CC); + + vsel(x2111B7BB, a3, a6, x5A66A599); + vsel(x03FF3033, a5, a3, x0F33F0CC); + vsel(x05BB50EE, a5, x0F33F0CC, a2); + vsel(x074F201F, x03FF3033, a4, x05BB50EE); + vxor(x265E97A4, x2111B7BB, x074F201F); + + vsel(x556BA09E, x5A66A599, x05BB50EE, a4); + vsel(x665A93AC, x556BA09E, x265E97A4, a3); + vnot(x99A56C53, x665A93AC); + vsel(x1, x265E97A4, x99A56C53, a1); + vxor(*out2, *out2, x1); + + vxor(x25A1A797, x03FF3033, x265E97A4); + vsel(x5713754C, a2, x0F33F0CC, x074F201F); + vsel(x66559355, x665A93AC, a2, a5); + vsel(x47B135C6, x25A1A797, x5713754C, x66559355); + + vxor(x9A5A5C60, x03FF3033, x99A56C53); + vsel(xD07AF8F8, x9A5A5C60, x556BA09E, x5A66A599); + vxor(x87698DB4, x5713754C, xD07AF8F8); + vxor(xE13C1EE1, x66559355, x87698DB4); + + vsel(x9E48CDE4, x9A5A5C60, x87698DB4, x265E97A4); + vsel(x655B905E, x66559355, x05BB50EE, a4); + vsel(x00A55CFF, a5, a6, x9A5A5C60); + vsel(x9E49915E, x9E48CDE4, x655B905E, x00A55CFF); + vsel(x0, x9E49915E, xE13C1EE1, a1); + vxor(*out1, *out1, x0); + + vsel(xD6599874, xD07AF8F8, x66559355, x0F33F0CC); + vand(x05330022, x0F330F33, x05BB50EE); + vsel(xD2699876, xD6599874, x00A55CFF, x05330022); + vsel(x3, x5A66A599, xD2699876, a1); + vxor(*out4, *out4, x3); + + vsel(x665F9364, x265E97A4, x66559355, x47B135C6); + vsel(xD573F0F2, xD07AF8F8, x05330022, a4); + vxor(xB32C6396, x665F9364, xD573F0F2); + vsel(x2, xB32C6396, x47B135C6, a1); + vxor(*out3, *out3, x2); } static void s4(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6, u32 * out1, u32 * out2, u32 * out3, u32 * out4) { - u32 x0505AFAF, x0555AF55, x0A5AA05A, x46566456, x0A0A5F5F, x0AF55FA0, - x0AF50F0F, x4CA36B59; - u32 xB35C94A6; - u32 x01BB23BB, x5050FAFA, xA31C26BE, xA91679E1; - u32 x56E9861E; - u32 x50E9FA1E, x0AF55F00, x827D9784, xD2946D9A; - u32 x31F720B3, x11FB21B3, x4712A7AD, x9586CA37; - u32 x0, x1, x2, x3; - - vsel(x0505AFAF, a5, a3, a1); - vsel(x0555AF55, x0505AFAF, a1, a4); - vxor(x0A5AA05A, a3, x0555AF55); - vsel(x46566456, a1, x0A5AA05A, a2); - vsel(x0A0A5F5F, a3, a5, a1); - vxor(x0AF55FA0, a4, x0A0A5F5F); - vsel(x0AF50F0F, x0AF55FA0, a3, a5); - vxor(x4CA36B59, x46566456, x0AF50F0F); - - vnot(xB35C94A6, x4CA36B59); - - vsel(x01BB23BB, a4, a2, x0555AF55); - vxor(x5050FAFA, a1, x0505AFAF); - vsel(xA31C26BE, xB35C94A6, x01BB23BB, x5050FAFA); - vxor(xA91679E1, x0A0A5F5F, xA31C26BE); - - vnot(x56E9861E, xA91679E1); - - vsel(x50E9FA1E, x5050FAFA, x56E9861E, a4); - vsel(x0AF55F00, x0AF50F0F, x0AF55FA0, x0A0A5F5F); - vsel(x827D9784, xB35C94A6, x0AF55F00, a2); - vxor(xD2946D9A, x50E9FA1E, x827D9784); - vsel(x2, xD2946D9A, x4CA36B59, a6); - vxor(*out3, *out3, x2); - vsel(x3, xB35C94A6, xD2946D9A, a6); - vxor(*out4, *out4, x3); - - vsel(x31F720B3, a2, a4, x0AF55FA0); - vsel(x11FB21B3, x01BB23BB, x31F720B3, x5050FAFA); - vxor(x4712A7AD, x56E9861E, x11FB21B3); - vxor(x9586CA37, xD2946D9A, x4712A7AD); - vsel(x0, x56E9861E, x9586CA37, a6); - vxor(*out1, *out1, x0); - vsel(x1, x9586CA37, xA91679E1, a6); - vxor(*out2, *out2, x1); + u32 x0505AFAF, x0555AF55, x0A5AA05A, x46566456, x0A0A5F5F, x0AF55FA0, + x0AF50F0F, x4CA36B59; + u32 xB35C94A6; + u32 x01BB23BB, x5050FAFA, xA31C26BE, xA91679E1; + u32 x56E9861E; + u32 x50E9FA1E, x0AF55F00, x827D9784, xD2946D9A; + u32 x31F720B3, x11FB21B3, x4712A7AD, x9586CA37; + u32 x0, x1, x2, x3; + + vsel(x0505AFAF, a5, a3, a1); + vsel(x0555AF55, x0505AFAF, a1, a4); + vxor(x0A5AA05A, a3, x0555AF55); + vsel(x46566456, a1, x0A5AA05A, a2); + vsel(x0A0A5F5F, a3, a5, a1); + vxor(x0AF55FA0, a4, x0A0A5F5F); + vsel(x0AF50F0F, x0AF55FA0, a3, a5); + vxor(x4CA36B59, x46566456, x0AF50F0F); + + vnot(xB35C94A6, x4CA36B59); + + vsel(x01BB23BB, a4, a2, x0555AF55); + vxor(x5050FAFA, a1, x0505AFAF); + vsel(xA31C26BE, xB35C94A6, x01BB23BB, x5050FAFA); + vxor(xA91679E1, x0A0A5F5F, xA31C26BE); + + vnot(x56E9861E, xA91679E1); + + vsel(x50E9FA1E, x5050FAFA, x56E9861E, a4); + vsel(x0AF55F00, x0AF50F0F, x0AF55FA0, x0A0A5F5F); + vsel(x827D9784, xB35C94A6, x0AF55F00, a2); + vxor(xD2946D9A, x50E9FA1E, x827D9784); + vsel(x2, xD2946D9A, x4CA36B59, a6); + vxor(*out3, *out3, x2); + vsel(x3, xB35C94A6, xD2946D9A, a6); + vxor(*out4, *out4, x3); + + vsel(x31F720B3, a2, a4, x0AF55FA0); + vsel(x11FB21B3, x01BB23BB, x31F720B3, x5050FAFA); + vxor(x4712A7AD, x56E9861E, x11FB21B3); + vxor(x9586CA37, xD2946D9A, x4712A7AD); + vsel(x0, x56E9861E, x9586CA37, a6); + vxor(*out1, *out1, x0); + vsel(x1, x9586CA37, xA91679E1, a6); + vxor(*out2, *out2, x1); } static void s5(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6, u32 * out1, u32 * out2, u32 * out3, u32 * out4) { - u32 x550F550F, xAAF0AAF0, xA5F5A5F5, x96C696C6, x00FFFF00, x963969C6; - u32 x2E3C2E3C, xB73121F7, x1501DF0F, x00558A5F, x2E69A463; - u32 x0679ED42, x045157FD, xB32077FF, x9D49D39C; - u32 xAC81CFB2, xF72577AF, x5BA4B81D; - u32 x5BA477AF, x4895469F, x3A35273A, x1A35669A; - u32 x12E6283D, x9E47D3D4, x1A676AB4; - u32 x891556DF, xE5E77F82, x6CF2295D; - u32 x2E3CA5F5, x9697C1C6, x369CC1D6; - u32 x0, x1, x2, x3; - - vsel(x550F550F, a1, a3, a5); - vnot(xAAF0AAF0, x550F550F); - vsel(xA5F5A5F5, xAAF0AAF0, a1, a3); - vxor(x96C696C6, a2, xA5F5A5F5); - vxor(x00FFFF00, a5, a6); - vxor(x963969C6, x96C696C6, x00FFFF00); - - vsel(x2E3C2E3C, a3, xAAF0AAF0, a2); - vsel(xB73121F7, a2, x963969C6, x96C696C6); - vsel(x1501DF0F, a6, x550F550F, xB73121F7); - vsel(x00558A5F, x1501DF0F, a5, a1); - vxor(x2E69A463, x2E3C2E3C, x00558A5F); - - vsel(x0679ED42, x00FFFF00, x2E69A463, x96C696C6); - vsel(x045157FD, a6, a1, x0679ED42); - vsel(xB32077FF, xB73121F7, a6, x045157FD); - vxor(x9D49D39C, x2E69A463, xB32077FF); - vsel(x2, x9D49D39C, x2E69A463, a4); - vxor(*out3, *out3, x2); - - vsel(xAC81CFB2, xAAF0AAF0, x1501DF0F, x0679ED42); - vsel(xF72577AF, xB32077FF, x550F550F, a1); - vxor(x5BA4B81D, xAC81CFB2, xF72577AF); - vsel(x1, x5BA4B81D, x963969C6, a4); - vxor(*out2, *out2, x1); - - vsel(x5BA477AF, x5BA4B81D, xF72577AF, a6); - vsel(x4895469F, x5BA477AF, x00558A5F, a2); - vsel(x3A35273A, x2E3C2E3C, a2, x963969C6); - vsel(x1A35669A, x4895469F, x3A35273A, x5BA4B81D); - - vsel(x12E6283D, a5, x5BA4B81D, x963969C6); - vsel(x9E47D3D4, x96C696C6, x9D49D39C, xAC81CFB2); - vsel(x1A676AB4, x12E6283D, x9E47D3D4, x4895469F); - - vsel(x891556DF, xB32077FF, x4895469F, x3A35273A); - vsel(xE5E77F82, xF72577AF, x00FFFF00, x12E6283D); - vxor(x6CF2295D, x891556DF, xE5E77F82); - vsel(x3, x1A35669A, x6CF2295D, a4); - vxor(*out4, *out4, x3); - - vsel(x2E3CA5F5, x2E3C2E3C, xA5F5A5F5, a6); - vsel(x9697C1C6, x96C696C6, x963969C6, x045157FD); - vsel(x369CC1D6, x2E3CA5F5, x9697C1C6, x5BA477AF); - vsel(x0, x369CC1D6, x1A676AB4, a4); - vxor(*out1, *out1, x0); + u32 x550F550F, xAAF0AAF0, xA5F5A5F5, x96C696C6, x00FFFF00, x963969C6; + u32 x2E3C2E3C, xB73121F7, x1501DF0F, x00558A5F, x2E69A463; + u32 x0679ED42, x045157FD, xB32077FF, x9D49D39C; + u32 xAC81CFB2, xF72577AF, x5BA4B81D; + u32 x5BA477AF, x4895469F, x3A35273A, x1A35669A; + u32 x12E6283D, x9E47D3D4, x1A676AB4; + u32 x891556DF, xE5E77F82, x6CF2295D; + u32 x2E3CA5F5, x9697C1C6, x369CC1D6; + u32 x0, x1, x2, x3; + + vsel(x550F550F, a1, a3, a5); + vnot(xAAF0AAF0, x550F550F); + vsel(xA5F5A5F5, xAAF0AAF0, a1, a3); + vxor(x96C696C6, a2, xA5F5A5F5); + vxor(x00FFFF00, a5, a6); + vxor(x963969C6, x96C696C6, x00FFFF00); + + vsel(x2E3C2E3C, a3, xAAF0AAF0, a2); + vsel(xB73121F7, a2, x963969C6, x96C696C6); + vsel(x1501DF0F, a6, x550F550F, xB73121F7); + vsel(x00558A5F, x1501DF0F, a5, a1); + vxor(x2E69A463, x2E3C2E3C, x00558A5F); + + vsel(x0679ED42, x00FFFF00, x2E69A463, x96C696C6); + vsel(x045157FD, a6, a1, x0679ED42); + vsel(xB32077FF, xB73121F7, a6, x045157FD); + vxor(x9D49D39C, x2E69A463, xB32077FF); + vsel(x2, x9D49D39C, x2E69A463, a4); + vxor(*out3, *out3, x2); + + vsel(xAC81CFB2, xAAF0AAF0, x1501DF0F, x0679ED42); + vsel(xF72577AF, xB32077FF, x550F550F, a1); + vxor(x5BA4B81D, xAC81CFB2, xF72577AF); + vsel(x1, x5BA4B81D, x963969C6, a4); + vxor(*out2, *out2, x1); + + vsel(x5BA477AF, x5BA4B81D, xF72577AF, a6); + vsel(x4895469F, x5BA477AF, x00558A5F, a2); + vsel(x3A35273A, x2E3C2E3C, a2, x963969C6); + vsel(x1A35669A, x4895469F, x3A35273A, x5BA4B81D); + + vsel(x12E6283D, a5, x5BA4B81D, x963969C6); + vsel(x9E47D3D4, x96C696C6, x9D49D39C, xAC81CFB2); + vsel(x1A676AB4, x12E6283D, x9E47D3D4, x4895469F); + + vsel(x891556DF, xB32077FF, x4895469F, x3A35273A); + vsel(xE5E77F82, xF72577AF, x00FFFF00, x12E6283D); + vxor(x6CF2295D, x891556DF, xE5E77F82); + vsel(x3, x1A35669A, x6CF2295D, a4); + vxor(*out4, *out4, x3); + + vsel(x2E3CA5F5, x2E3C2E3C, xA5F5A5F5, a6); + vsel(x9697C1C6, x96C696C6, x963969C6, x045157FD); + vsel(x369CC1D6, x2E3CA5F5, x9697C1C6, x5BA477AF); + vsel(x0, x369CC1D6, x1A676AB4, a4); + vxor(*out1, *out1, x0); } static void s6(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6, u32 * out1, u32 * out2, u32 * out3, u32 * out4) { - u32 x555500FF, x666633CC, x606F30CF, x353A659A, x353A9A65, xCAC5659A; - u32 x353A6565, x0A3F0A6F, x6C5939A3, x5963A3C6; - u32 x35FF659A, x3AF06A95, x05CF0A9F, x16E94A97; - u32 x86CD4C9B, x12E0FFFD, x942D9A67; - u32 x142956AB, x455D45DF, x1C3EE619; - u32 x2AEA70D5, x20CF7A9F, x3CF19C86, x69A49C79; - u32 x840DBB67, x6DA19C1E, x925E63E1; - u32 x9C3CA761, x257A75D5, xB946D2B4; - u32 x0, x1, x2, x3; - - vsel(x555500FF, a1, a4, a5); - vxor(x666633CC, a2, x555500FF); - vsel(x606F30CF, x666633CC, a4, a3); - vxor(x353A659A, a1, x606F30CF); - vxor(x353A9A65, a5, x353A659A); - vnot(xCAC5659A, x353A9A65); - - vsel(x353A6565, x353A659A, x353A9A65, a4); - vsel(x0A3F0A6F, a3, a4, x353A6565); - vxor(x6C5939A3, x666633CC, x0A3F0A6F); - vxor(x5963A3C6, x353A9A65, x6C5939A3); - - vsel(x35FF659A, a4, x353A659A, x353A6565); - vxor(x3AF06A95, a3, x35FF659A); - vsel(x05CF0A9F, a4, a3, x353A9A65); - vsel(x16E94A97, x3AF06A95, x05CF0A9F, x6C5939A3); - - vsel(x86CD4C9B, xCAC5659A, x05CF0A9F, x6C5939A3); - vsel(x12E0FFFD, a5, x3AF06A95, x16E94A97); - vsel(x942D9A67, x86CD4C9B, x353A9A65, x12E0FFFD); - vsel(x0, xCAC5659A, x942D9A67, a6); - vxor(*out1, *out1, x0); - - vsel(x142956AB, x353A659A, x942D9A67, a2); - vsel(x455D45DF, a1, x86CD4C9B, x142956AB); - vxor(x1C3EE619, x5963A3C6, x455D45DF); - vsel(x3, x5963A3C6, x1C3EE619, a6); - vxor(*out4, *out4, x3); - - vsel(x2AEA70D5, x3AF06A95, x606F30CF, x353A9A65); - vsel(x20CF7A9F, x2AEA70D5, x05CF0A9F, x0A3F0A6F); - vxor(x3CF19C86, x1C3EE619, x20CF7A9F); - vxor(x69A49C79, x555500FF, x3CF19C86); - - vsel(x840DBB67, a5, x942D9A67, x86CD4C9B); - vsel(x6DA19C1E, x69A49C79, x3CF19C86, x840DBB67); - vnot(x925E63E1, x6DA19C1E); - vsel(x1, x925E63E1, x69A49C79, a6); - vxor(*out2, *out2, x1); - - vsel(x9C3CA761, x840DBB67, x1C3EE619, x3CF19C86); - vsel(x257A75D5, x455D45DF, x2AEA70D5, x606F30CF); - vxor(xB946D2B4, x9C3CA761, x257A75D5); - vsel(x2, x16E94A97, xB946D2B4, a6); - vxor(*out3, *out3, x2); + u32 x555500FF, x666633CC, x606F30CF, x353A659A, x353A9A65, xCAC5659A; + u32 x353A6565, x0A3F0A6F, x6C5939A3, x5963A3C6; + u32 x35FF659A, x3AF06A95, x05CF0A9F, x16E94A97; + u32 x86CD4C9B, x12E0FFFD, x942D9A67; + u32 x142956AB, x455D45DF, x1C3EE619; + u32 x2AEA70D5, x20CF7A9F, x3CF19C86, x69A49C79; + u32 x840DBB67, x6DA19C1E, x925E63E1; + u32 x9C3CA761, x257A75D5, xB946D2B4; + u32 x0, x1, x2, x3; + + vsel(x555500FF, a1, a4, a5); + vxor(x666633CC, a2, x555500FF); + vsel(x606F30CF, x666633CC, a4, a3); + vxor(x353A659A, a1, x606F30CF); + vxor(x353A9A65, a5, x353A659A); + vnot(xCAC5659A, x353A9A65); + + vsel(x353A6565, x353A659A, x353A9A65, a4); + vsel(x0A3F0A6F, a3, a4, x353A6565); + vxor(x6C5939A3, x666633CC, x0A3F0A6F); + vxor(x5963A3C6, x353A9A65, x6C5939A3); + + vsel(x35FF659A, a4, x353A659A, x353A6565); + vxor(x3AF06A95, a3, x35FF659A); + vsel(x05CF0A9F, a4, a3, x353A9A65); + vsel(x16E94A97, x3AF06A95, x05CF0A9F, x6C5939A3); + + vsel(x86CD4C9B, xCAC5659A, x05CF0A9F, x6C5939A3); + vsel(x12E0FFFD, a5, x3AF06A95, x16E94A97); + vsel(x942D9A67, x86CD4C9B, x353A9A65, x12E0FFFD); + vsel(x0, xCAC5659A, x942D9A67, a6); + vxor(*out1, *out1, x0); + + vsel(x142956AB, x353A659A, x942D9A67, a2); + vsel(x455D45DF, a1, x86CD4C9B, x142956AB); + vxor(x1C3EE619, x5963A3C6, x455D45DF); + vsel(x3, x5963A3C6, x1C3EE619, a6); + vxor(*out4, *out4, x3); + + vsel(x2AEA70D5, x3AF06A95, x606F30CF, x353A9A65); + vsel(x20CF7A9F, x2AEA70D5, x05CF0A9F, x0A3F0A6F); + vxor(x3CF19C86, x1C3EE619, x20CF7A9F); + vxor(x69A49C79, x555500FF, x3CF19C86); + + vsel(x840DBB67, a5, x942D9A67, x86CD4C9B); + vsel(x6DA19C1E, x69A49C79, x3CF19C86, x840DBB67); + vnot(x925E63E1, x6DA19C1E); + vsel(x1, x925E63E1, x69A49C79, a6); + vxor(*out2, *out2, x1); + + vsel(x9C3CA761, x840DBB67, x1C3EE619, x3CF19C86); + vsel(x257A75D5, x455D45DF, x2AEA70D5, x606F30CF); + vxor(xB946D2B4, x9C3CA761, x257A75D5); + vsel(x2, x16E94A97, xB946D2B4, a6); + vxor(*out3, *out3, x2); } static void s7(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6, u32 * out1, u32 * out2, u32 * out3, u32 * out4) { - u32 x44447777, x4B4B7878, x22772277, x0505F5F5, x220522F5, x694E5A8D; - u32 x00FFFF00, x66666666, x32353235, x26253636, x26DAC936; - u32 x738F9C63, x11EF9867, x26DA9867; - u32 x4B4B9C63, x4B666663, x4E639396; - u32 x4E4B393C, xFF00FF00, xFF05DD21, xB14EE41D; - u32 xD728827B, x6698807B, x699C585B; - u32 x738C847B, xA4A71E18, x74878E78; - u32 x333D9639, x74879639, x8B7869C6; - u32 x0, x1, x2, x3; - - vsel(x44447777, a2, a6, a3); - vxor(x4B4B7878, a4, x44447777); - vsel(x22772277, a3, a5, a2); - vsel(x0505F5F5, a6, a2, a4); - vsel(x220522F5, x22772277, x0505F5F5, a5); - vxor(x694E5A8D, x4B4B7878, x220522F5); - - vxor(x00FFFF00, a5, a6); - vxor(x66666666, a2, a3); - vsel(x32353235, a3, x220522F5, a4); - vsel(x26253636, x66666666, x32353235, x4B4B7878); - vxor(x26DAC936, x00FFFF00, x26253636); - vsel(x0, x26DAC936, x694E5A8D, a1); - vxor(*out1, *out1, x0); - - vxor(x738F9C63, a2, x26DAC936); - vsel(x11EF9867, x738F9C63, a5, x66666666); - vsel(x26DA9867, x26DAC936, x11EF9867, a6); - - vsel(x4B4B9C63, x4B4B7878, x738F9C63, a6); - vsel(x4B666663, x4B4B9C63, x66666666, x00FFFF00); - vxor(x4E639396, x0505F5F5, x4B666663); - - vsel(x4E4B393C, x4B4B7878, x4E639396, a2); - vnot(xFF00FF00, a5); - vsel(xFF05DD21, xFF00FF00, x738F9C63, x32353235); - vxor(xB14EE41D, x4E4B393C, xFF05DD21); - vsel(x1, xB14EE41D, x26DA9867, a1); - vxor(*out2, *out2, x1); - - vxor(xD728827B, x66666666, xB14EE41D); - vsel(x6698807B, x26DA9867, xD728827B, x4E4B393C); - vsel(x699C585B, x6698807B, x694E5A8D, xFF05DD21); - vsel(x2, x699C585B, x4E639396, a1); - vxor(*out3, *out3, x2); - - vsel(x738C847B, x738F9C63, xD728827B, x4B4B7878); - vxor(xA4A71E18, x738F9C63, xD728827B); - vsel(x74878E78, x738C847B, xA4A71E18, a4); - - vsel(x333D9639, x32353235, x738C847B, xB14EE41D); - vsel(x74879639, x74878E78, x333D9639, a6); - vnot(x8B7869C6, x74879639); - vsel(x3, x74878E78, x8B7869C6, a1); - vxor(*out4, *out4, x3); + u32 x44447777, x4B4B7878, x22772277, x0505F5F5, x220522F5, x694E5A8D; + u32 x00FFFF00, x66666666, x32353235, x26253636, x26DAC936; + u32 x738F9C63, x11EF9867, x26DA9867; + u32 x4B4B9C63, x4B666663, x4E639396; + u32 x4E4B393C, xFF00FF00, xFF05DD21, xB14EE41D; + u32 xD728827B, x6698807B, x699C585B; + u32 x738C847B, xA4A71E18, x74878E78; + u32 x333D9639, x74879639, x8B7869C6; + u32 x0, x1, x2, x3; + + vsel(x44447777, a2, a6, a3); + vxor(x4B4B7878, a4, x44447777); + vsel(x22772277, a3, a5, a2); + vsel(x0505F5F5, a6, a2, a4); + vsel(x220522F5, x22772277, x0505F5F5, a5); + vxor(x694E5A8D, x4B4B7878, x220522F5); + + vxor(x00FFFF00, a5, a6); + vxor(x66666666, a2, a3); + vsel(x32353235, a3, x220522F5, a4); + vsel(x26253636, x66666666, x32353235, x4B4B7878); + vxor(x26DAC936, x00FFFF00, x26253636); + vsel(x0, x26DAC936, x694E5A8D, a1); + vxor(*out1, *out1, x0); + + vxor(x738F9C63, a2, x26DAC936); + vsel(x11EF9867, x738F9C63, a5, x66666666); + vsel(x26DA9867, x26DAC936, x11EF9867, a6); + + vsel(x4B4B9C63, x4B4B7878, x738F9C63, a6); + vsel(x4B666663, x4B4B9C63, x66666666, x00FFFF00); + vxor(x4E639396, x0505F5F5, x4B666663); + + vsel(x4E4B393C, x4B4B7878, x4E639396, a2); + vnot(xFF00FF00, a5); + vsel(xFF05DD21, xFF00FF00, x738F9C63, x32353235); + vxor(xB14EE41D, x4E4B393C, xFF05DD21); + vsel(x1, xB14EE41D, x26DA9867, a1); + vxor(*out2, *out2, x1); + + vxor(xD728827B, x66666666, xB14EE41D); + vsel(x6698807B, x26DA9867, xD728827B, x4E4B393C); + vsel(x699C585B, x6698807B, x694E5A8D, xFF05DD21); + vsel(x2, x699C585B, x4E639396, a1); + vxor(*out3, *out3, x2); + + vsel(x738C847B, x738F9C63, xD728827B, x4B4B7878); + vxor(xA4A71E18, x738F9C63, xD728827B); + vsel(x74878E78, x738C847B, xA4A71E18, a4); + + vsel(x333D9639, x32353235, x738C847B, xB14EE41D); + vsel(x74879639, x74878E78, x333D9639, a6); + vnot(x8B7869C6, x74879639); + vsel(x3, x74878E78, x8B7869C6, a1); + vxor(*out4, *out4, x3); } static void s8(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6, u32 * out1, u32 * out2, u32 * out3, u32 * out4) { - u32 x0505F5F5, x05FAF50A, x0F0F00FF, x22227777, x07DA807F, x34E9B34C; - u32 x00FFF00F, x0033FCCF, x5565B15C, x0C0C3F3F, x59698E63; - u32 x3001F74E, x30555745, x693CD926; - u32 x0C0CD926, x0C3F25E9, x38D696A5; - u32 xC729695A; - u32 x03D2117B, xC778395B, xCB471CB2; - u32 x5425B13F, x56B3803F, x919AE965; - u32 x17B3023F, x75555755, x62E6556A, xA59E6C31; - u32 x0, x1, x2, x3; - - vsel(x0505F5F5, a5, a1, a3); - vxor(x05FAF50A, a4, x0505F5F5); - vsel(x0F0F00FF, a3, a4, a5); - vsel(x22227777, a2, a5, a1); - vsel(x07DA807F, x05FAF50A, x0F0F00FF, x22227777); - vxor(x34E9B34C, a2, x07DA807F); - - vsel(x00FFF00F, x05FAF50A, a4, a3); - vsel(x0033FCCF, a5, x00FFF00F, a2); - vsel(x5565B15C, a1, x34E9B34C, x0033FCCF); - vsel(x0C0C3F3F, a3, a5, a2); - vxor(x59698E63, x5565B15C, x0C0C3F3F); - - vsel(x3001F74E, x34E9B34C, a5, x05FAF50A); - vsel(x30555745, x3001F74E, a1, x00FFF00F); - vxor(x693CD926, x59698E63, x30555745); - vsel(x2, x693CD926, x59698E63, a6); - vxor(*out3, *out3, x2); - - vsel(x0C0CD926, x0C0C3F3F, x693CD926, a5); - vxor(x0C3F25E9, x0033FCCF, x0C0CD926); - vxor(x38D696A5, x34E9B34C, x0C3F25E9); - - vnot(xC729695A, x38D696A5); - - vsel(x03D2117B, x07DA807F, a2, x0C0CD926); - vsel(xC778395B, xC729695A, x03D2117B, x30555745); - vxor(xCB471CB2, x0C3F25E9, xC778395B); - vsel(x1, xCB471CB2, x34E9B34C, a6); - vxor(*out2, *out2, x1); - - vsel(x5425B13F, x5565B15C, x0C0C3F3F, x03D2117B); - vsel(x56B3803F, x07DA807F, x5425B13F, x59698E63); - vxor(x919AE965, xC729695A, x56B3803F); - vsel(x3, xC729695A, x919AE965, a6); - vxor(*out4, *out4, x3); - - vsel(x17B3023F, x07DA807F, a2, x59698E63); - vor(x75555755, a1, x30555745); - vxor(x62E6556A, x17B3023F, x75555755); - vxor(xA59E6C31, xC778395B, x62E6556A); - vsel(x0, xA59E6C31, x38D696A5, a6); - vxor(*out1, *out1, x0); + u32 x0505F5F5, x05FAF50A, x0F0F00FF, x22227777, x07DA807F, x34E9B34C; + u32 x00FFF00F, x0033FCCF, x5565B15C, x0C0C3F3F, x59698E63; + u32 x3001F74E, x30555745, x693CD926; + u32 x0C0CD926, x0C3F25E9, x38D696A5; + u32 xC729695A; + u32 x03D2117B, xC778395B, xCB471CB2; + u32 x5425B13F, x56B3803F, x919AE965; + u32 x17B3023F, x75555755, x62E6556A, xA59E6C31; + u32 x0, x1, x2, x3; + + vsel(x0505F5F5, a5, a1, a3); + vxor(x05FAF50A, a4, x0505F5F5); + vsel(x0F0F00FF, a3, a4, a5); + vsel(x22227777, a2, a5, a1); + vsel(x07DA807F, x05FAF50A, x0F0F00FF, x22227777); + vxor(x34E9B34C, a2, x07DA807F); + + vsel(x00FFF00F, x05FAF50A, a4, a3); + vsel(x0033FCCF, a5, x00FFF00F, a2); + vsel(x5565B15C, a1, x34E9B34C, x0033FCCF); + vsel(x0C0C3F3F, a3, a5, a2); + vxor(x59698E63, x5565B15C, x0C0C3F3F); + + vsel(x3001F74E, x34E9B34C, a5, x05FAF50A); + vsel(x30555745, x3001F74E, a1, x00FFF00F); + vxor(x693CD926, x59698E63, x30555745); + vsel(x2, x693CD926, x59698E63, a6); + vxor(*out3, *out3, x2); + + vsel(x0C0CD926, x0C0C3F3F, x693CD926, a5); + vxor(x0C3F25E9, x0033FCCF, x0C0CD926); + vxor(x38D696A5, x34E9B34C, x0C3F25E9); + + vnot(xC729695A, x38D696A5); + + vsel(x03D2117B, x07DA807F, a2, x0C0CD926); + vsel(xC778395B, xC729695A, x03D2117B, x30555745); + vxor(xCB471CB2, x0C3F25E9, xC778395B); + vsel(x1, xCB471CB2, x34E9B34C, a6); + vxor(*out2, *out2, x1); + + vsel(x5425B13F, x5565B15C, x0C0C3F3F, x03D2117B); + vsel(x56B3803F, x07DA807F, x5425B13F, x59698E63); + vxor(x919AE965, xC729695A, x56B3803F); + vsel(x3, xC729695A, x919AE965, a6); + vxor(*out4, *out4, x3); + + vsel(x17B3023F, x07DA807F, a2, x59698E63); + vor(x75555755, a1, x30555745); + vxor(x62E6556A, x17B3023F, x75555755); + vxor(xA59E6C31, xC778395B, x62E6556A); + vsel(x0, xA59E6C31, x38D696A5, a6); + vxor(*out1, *out1, x0); } #define SWAP(a, b) { u32 tmp=a;a=b;b=tmp; } diff --git a/amd/m03000_a3.cl b/amd/m03000_a3.cl index 5befb1d..df44723 100644 --- a/amd/m03000_a3.cl +++ b/amd/m03000_a3.cl @@ -72,488 +72,488 @@ * The effort has been sponsored by Rapid7: http://www.rapid7.com */ -#define vnot(dst, a) (dst) = ~(a) -#define vand(dst, a, b) (dst) = (a) & (b) -#define vor(dst, a, b) (dst) = (a) | (b) -#define vandn(dst, a, b) (dst) = (a) & ~(b) -#define vxor(dst, a, b) (dst) = (a) ^ (b) -#define vsel(dst, a, b, c) (dst) = bitselect((a),(b),(c)) +#define vnot(dst, a) (dst) = ~(a) +#define vand(dst, a, b) (dst) = (a) & (b) +#define vor(dst, a, b) (dst) = (a) | (b) +#define vandn(dst, a, b) (dst) = (a) & ~(b) +#define vxor(dst, a, b) (dst) = (a) ^ (b) +#define vsel(dst, a, b, c) (dst) = bitselect((a),(b),(c)) static void s1(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6, u32 * out1, u32 * out2, u32 * out3, u32 * out4) { - u32 x0F0F3333, x3C3C3C3C, x55FF55FF, x69C369C3, x0903B73F, x09FCB7C0, - x5CA9E295; - u32 x55AFD1B7, x3C3C69C3, x6993B874; - u32 x5CEDE59F, x09FCE295, x5D91A51E, x529E962D; - u32 x29EEADC0, x4B8771A3, x428679F3, x6B68D433; - u32 x5BA7E193, x026F12F3, x6B27C493, x94D83B6C; - u32 x965E0B0F, x3327A113, x847F0A1F, xD6E19C32; - u32 x0DBCE883, x3A25A215, x37994A96; - u32 x8A487EA7, x8B480F07, xB96C2D16; - u32 x0, x1, x2, x3; - - vsel(x0F0F3333, a3, a2, a5); - vxor(x3C3C3C3C, a2, a3); - vor(x55FF55FF, a1, a4); - vxor(x69C369C3, x3C3C3C3C, x55FF55FF); - vsel(x0903B73F, a5, x0F0F3333, x69C369C3); - vxor(x09FCB7C0, a4, x0903B73F); - vxor(x5CA9E295, a1, x09FCB7C0); - - vsel(x55AFD1B7, x5CA9E295, x55FF55FF, x0F0F3333); - vsel(x3C3C69C3, x3C3C3C3C, x69C369C3, a5); - vxor(x6993B874, x55AFD1B7, x3C3C69C3); - - vsel(x5CEDE59F, x55FF55FF, x5CA9E295, x6993B874); - vsel(x09FCE295, x09FCB7C0, x5CA9E295, a5); - vsel(x5D91A51E, x5CEDE59F, x6993B874, x09FCE295); - vxor(x529E962D, x0F0F3333, x5D91A51E); - - vsel(x29EEADC0, x69C369C3, x09FCB7C0, x5CEDE59F); - vsel(x4B8771A3, x0F0F3333, x69C369C3, x5CA9E295); - vsel(x428679F3, a5, x4B8771A3, x529E962D); - vxor(x6B68D433, x29EEADC0, x428679F3); - - vsel(x5BA7E193, x5CA9E295, x4B8771A3, a3); - vsel(x026F12F3, a4, x0F0F3333, x529E962D); - vsel(x6B27C493, x6B68D433, x5BA7E193, x026F12F3); - vnot(x94D83B6C, x6B27C493); - vsel(x0, x94D83B6C, x6B68D433, a6); - vxor(*out1, *out1, x0); - - vsel(x965E0B0F, x94D83B6C, a3, x428679F3); - vsel(x3327A113, x5BA7E193, a2, x69C369C3); - vsel(x847F0A1F, x965E0B0F, a4, x3327A113); - vxor(xD6E19C32, x529E962D, x847F0A1F); - vsel(x1, xD6E19C32, x5CA9E295, a6); - vxor(*out2, *out2, x1); - - vsel(x0DBCE883, x09FCE295, x3C3C69C3, x847F0A1F); - vsel(x3A25A215, x3327A113, x5CA9E295, x0903B73F); - vxor(x37994A96, x0DBCE883, x3A25A215); - vsel(x3, x37994A96, x529E962D, a6); - vxor(*out4, *out4, x3); - - vxor(x8A487EA7, x5CA9E295, xD6E19C32); - vsel(x8B480F07, a3, x8A487EA7, x847F0A1F); - vsel(xB96C2D16, x8B480F07, x3C3C3C3C, x3A25A215); - vsel(x2, xB96C2D16, x6993B874, a6); - vxor(*out3, *out3, x2); + u32 x0F0F3333, x3C3C3C3C, x55FF55FF, x69C369C3, x0903B73F, x09FCB7C0, + x5CA9E295; + u32 x55AFD1B7, x3C3C69C3, x6993B874; + u32 x5CEDE59F, x09FCE295, x5D91A51E, x529E962D; + u32 x29EEADC0, x4B8771A3, x428679F3, x6B68D433; + u32 x5BA7E193, x026F12F3, x6B27C493, x94D83B6C; + u32 x965E0B0F, x3327A113, x847F0A1F, xD6E19C32; + u32 x0DBCE883, x3A25A215, x37994A96; + u32 x8A487EA7, x8B480F07, xB96C2D16; + u32 x0, x1, x2, x3; + + vsel(x0F0F3333, a3, a2, a5); + vxor(x3C3C3C3C, a2, a3); + vor(x55FF55FF, a1, a4); + vxor(x69C369C3, x3C3C3C3C, x55FF55FF); + vsel(x0903B73F, a5, x0F0F3333, x69C369C3); + vxor(x09FCB7C0, a4, x0903B73F); + vxor(x5CA9E295, a1, x09FCB7C0); + + vsel(x55AFD1B7, x5CA9E295, x55FF55FF, x0F0F3333); + vsel(x3C3C69C3, x3C3C3C3C, x69C369C3, a5); + vxor(x6993B874, x55AFD1B7, x3C3C69C3); + + vsel(x5CEDE59F, x55FF55FF, x5CA9E295, x6993B874); + vsel(x09FCE295, x09FCB7C0, x5CA9E295, a5); + vsel(x5D91A51E, x5CEDE59F, x6993B874, x09FCE295); + vxor(x529E962D, x0F0F3333, x5D91A51E); + + vsel(x29EEADC0, x69C369C3, x09FCB7C0, x5CEDE59F); + vsel(x4B8771A3, x0F0F3333, x69C369C3, x5CA9E295); + vsel(x428679F3, a5, x4B8771A3, x529E962D); + vxor(x6B68D433, x29EEADC0, x428679F3); + + vsel(x5BA7E193, x5CA9E295, x4B8771A3, a3); + vsel(x026F12F3, a4, x0F0F3333, x529E962D); + vsel(x6B27C493, x6B68D433, x5BA7E193, x026F12F3); + vnot(x94D83B6C, x6B27C493); + vsel(x0, x94D83B6C, x6B68D433, a6); + vxor(*out1, *out1, x0); + + vsel(x965E0B0F, x94D83B6C, a3, x428679F3); + vsel(x3327A113, x5BA7E193, a2, x69C369C3); + vsel(x847F0A1F, x965E0B0F, a4, x3327A113); + vxor(xD6E19C32, x529E962D, x847F0A1F); + vsel(x1, xD6E19C32, x5CA9E295, a6); + vxor(*out2, *out2, x1); + + vsel(x0DBCE883, x09FCE295, x3C3C69C3, x847F0A1F); + vsel(x3A25A215, x3327A113, x5CA9E295, x0903B73F); + vxor(x37994A96, x0DBCE883, x3A25A215); + vsel(x3, x37994A96, x529E962D, a6); + vxor(*out4, *out4, x3); + + vxor(x8A487EA7, x5CA9E295, xD6E19C32); + vsel(x8B480F07, a3, x8A487EA7, x847F0A1F); + vsel(xB96C2D16, x8B480F07, x3C3C3C3C, x3A25A215); + vsel(x2, xB96C2D16, x6993B874, a6); + vxor(*out3, *out3, x2); } static void s2(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6, u32 * out1, u32 * out2, u32 * out3, u32 * out4) { - u32 x55553333, x0055FF33, x33270F03, x66725A56, x00FFFF00, x668DA556; - u32 x0F0F5A56, xF0F0A5A9, xA5A5969A, xA55A699A; - u32 x0F5AF03C, x6600FF56, x87A5F09C; - u32 xA55A963C, x3C69C30F, xB44BC32D; - u32 x66D7CC56, x0F4B0F2D, x699CC37B, x996C66D2; - u32 xB46C662D, x278DB412, xB66CB43B; - u32 xD2DC4E52, x27993333, xD2994E33; - u32 x278D0F2D, x2E0E547B, x09976748; - u32 x0, x1, x2, x3; - - vsel(x55553333, a1, a3, a6); - vsel(x0055FF33, a6, x55553333, a5); - vsel(x33270F03, a3, a4, x0055FF33); - vxor(x66725A56, a1, x33270F03); - vxor(x00FFFF00, a5, a6); - vxor(x668DA556, x66725A56, x00FFFF00); - - vsel(x0F0F5A56, a4, x66725A56, a6); - vnot(xF0F0A5A9, x0F0F5A56); - vxor(xA5A5969A, x55553333, xF0F0A5A9); - vxor(xA55A699A, x00FFFF00, xA5A5969A); - vsel(x1, xA55A699A, x668DA556, a2); - vxor(*out2, *out2, x1); - - vxor(x0F5AF03C, a4, x0055FF33); - vsel(x6600FF56, x66725A56, a6, x00FFFF00); - vsel(x87A5F09C, xA5A5969A, x0F5AF03C, x6600FF56); - - vsel(xA55A963C, xA5A5969A, x0F5AF03C, a5); - vxor(x3C69C30F, a3, x0F5AF03C); - vsel(xB44BC32D, xA55A963C, x3C69C30F, a1); - - vsel(x66D7CC56, x66725A56, x668DA556, xA5A5969A); - vsel(x0F4B0F2D, a4, xB44BC32D, a5); - vxor(x699CC37B, x66D7CC56, x0F4B0F2D); - vxor(x996C66D2, xF0F0A5A9, x699CC37B); - vsel(x0, x996C66D2, xB44BC32D, a2); - vxor(*out1, *out1, x0); - - vsel(xB46C662D, xB44BC32D, x996C66D2, x00FFFF00); - vsel(x278DB412, x668DA556, xA5A5969A, a1); - vsel(xB66CB43B, xB46C662D, x278DB412, x6600FF56); - - vsel(xD2DC4E52, x66D7CC56, x996C66D2, xB44BC32D); - vsel(x27993333, x278DB412, a3, x0055FF33); - vsel(xD2994E33, xD2DC4E52, x27993333, a5); - vsel(x3, x87A5F09C, xD2994E33, a2); - vxor(*out4, *out4, x3); - - vsel(x278D0F2D, x278DB412, x0F4B0F2D, a6); - vsel(x2E0E547B, x0F0F5A56, xB66CB43B, x278D0F2D); - vxor(x09976748, x27993333, x2E0E547B); - vsel(x2, xB66CB43B, x09976748, a2); - vxor(*out3, *out3, x2); + u32 x55553333, x0055FF33, x33270F03, x66725A56, x00FFFF00, x668DA556; + u32 x0F0F5A56, xF0F0A5A9, xA5A5969A, xA55A699A; + u32 x0F5AF03C, x6600FF56, x87A5F09C; + u32 xA55A963C, x3C69C30F, xB44BC32D; + u32 x66D7CC56, x0F4B0F2D, x699CC37B, x996C66D2; + u32 xB46C662D, x278DB412, xB66CB43B; + u32 xD2DC4E52, x27993333, xD2994E33; + u32 x278D0F2D, x2E0E547B, x09976748; + u32 x0, x1, x2, x3; + + vsel(x55553333, a1, a3, a6); + vsel(x0055FF33, a6, x55553333, a5); + vsel(x33270F03, a3, a4, x0055FF33); + vxor(x66725A56, a1, x33270F03); + vxor(x00FFFF00, a5, a6); + vxor(x668DA556, x66725A56, x00FFFF00); + + vsel(x0F0F5A56, a4, x66725A56, a6); + vnot(xF0F0A5A9, x0F0F5A56); + vxor(xA5A5969A, x55553333, xF0F0A5A9); + vxor(xA55A699A, x00FFFF00, xA5A5969A); + vsel(x1, xA55A699A, x668DA556, a2); + vxor(*out2, *out2, x1); + + vxor(x0F5AF03C, a4, x0055FF33); + vsel(x6600FF56, x66725A56, a6, x00FFFF00); + vsel(x87A5F09C, xA5A5969A, x0F5AF03C, x6600FF56); + + vsel(xA55A963C, xA5A5969A, x0F5AF03C, a5); + vxor(x3C69C30F, a3, x0F5AF03C); + vsel(xB44BC32D, xA55A963C, x3C69C30F, a1); + + vsel(x66D7CC56, x66725A56, x668DA556, xA5A5969A); + vsel(x0F4B0F2D, a4, xB44BC32D, a5); + vxor(x699CC37B, x66D7CC56, x0F4B0F2D); + vxor(x996C66D2, xF0F0A5A9, x699CC37B); + vsel(x0, x996C66D2, xB44BC32D, a2); + vxor(*out1, *out1, x0); + + vsel(xB46C662D, xB44BC32D, x996C66D2, x00FFFF00); + vsel(x278DB412, x668DA556, xA5A5969A, a1); + vsel(xB66CB43B, xB46C662D, x278DB412, x6600FF56); + + vsel(xD2DC4E52, x66D7CC56, x996C66D2, xB44BC32D); + vsel(x27993333, x278DB412, a3, x0055FF33); + vsel(xD2994E33, xD2DC4E52, x27993333, a5); + vsel(x3, x87A5F09C, xD2994E33, a2); + vxor(*out4, *out4, x3); + + vsel(x278D0F2D, x278DB412, x0F4B0F2D, a6); + vsel(x2E0E547B, x0F0F5A56, xB66CB43B, x278D0F2D); + vxor(x09976748, x27993333, x2E0E547B); + vsel(x2, xB66CB43B, x09976748, a2); + vxor(*out3, *out3, x2); } static void s3(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6, u32 * out1, u32 * out2, u32 * out3, u32 * out4) { - u32 x0F330F33, x0F33F0CC, x5A66A599; - u32 x2111B7BB, x03FF3033, x05BB50EE, x074F201F, x265E97A4; - u32 x556BA09E, x665A93AC, x99A56C53; - u32 x25A1A797, x5713754C, x66559355, x47B135C6; - u32 x9A5A5C60, xD07AF8F8, x87698DB4, xE13C1EE1; - u32 x9E48CDE4, x655B905E, x00A55CFF, x9E49915E; - u32 xD6599874, x05330022, xD2699876; - u32 x665F9364, xD573F0F2, xB32C6396; - u32 x0, x1, x2, x3; - - vsel(x0F330F33, a4, a3, a5); - vxor(x0F33F0CC, a6, x0F330F33); - vxor(x5A66A599, a2, x0F33F0CC); - - vsel(x2111B7BB, a3, a6, x5A66A599); - vsel(x03FF3033, a5, a3, x0F33F0CC); - vsel(x05BB50EE, a5, x0F33F0CC, a2); - vsel(x074F201F, x03FF3033, a4, x05BB50EE); - vxor(x265E97A4, x2111B7BB, x074F201F); - - vsel(x556BA09E, x5A66A599, x05BB50EE, a4); - vsel(x665A93AC, x556BA09E, x265E97A4, a3); - vnot(x99A56C53, x665A93AC); - vsel(x1, x265E97A4, x99A56C53, a1); - vxor(*out2, *out2, x1); - - vxor(x25A1A797, x03FF3033, x265E97A4); - vsel(x5713754C, a2, x0F33F0CC, x074F201F); - vsel(x66559355, x665A93AC, a2, a5); - vsel(x47B135C6, x25A1A797, x5713754C, x66559355); - - vxor(x9A5A5C60, x03FF3033, x99A56C53); - vsel(xD07AF8F8, x9A5A5C60, x556BA09E, x5A66A599); - vxor(x87698DB4, x5713754C, xD07AF8F8); - vxor(xE13C1EE1, x66559355, x87698DB4); - - vsel(x9E48CDE4, x9A5A5C60, x87698DB4, x265E97A4); - vsel(x655B905E, x66559355, x05BB50EE, a4); - vsel(x00A55CFF, a5, a6, x9A5A5C60); - vsel(x9E49915E, x9E48CDE4, x655B905E, x00A55CFF); - vsel(x0, x9E49915E, xE13C1EE1, a1); - vxor(*out1, *out1, x0); - - vsel(xD6599874, xD07AF8F8, x66559355, x0F33F0CC); - vand(x05330022, x0F330F33, x05BB50EE); - vsel(xD2699876, xD6599874, x00A55CFF, x05330022); - vsel(x3, x5A66A599, xD2699876, a1); - vxor(*out4, *out4, x3); - - vsel(x665F9364, x265E97A4, x66559355, x47B135C6); - vsel(xD573F0F2, xD07AF8F8, x05330022, a4); - vxor(xB32C6396, x665F9364, xD573F0F2); - vsel(x2, xB32C6396, x47B135C6, a1); - vxor(*out3, *out3, x2); + u32 x0F330F33, x0F33F0CC, x5A66A599; + u32 x2111B7BB, x03FF3033, x05BB50EE, x074F201F, x265E97A4; + u32 x556BA09E, x665A93AC, x99A56C53; + u32 x25A1A797, x5713754C, x66559355, x47B135C6; + u32 x9A5A5C60, xD07AF8F8, x87698DB4, xE13C1EE1; + u32 x9E48CDE4, x655B905E, x00A55CFF, x9E49915E; + u32 xD6599874, x05330022, xD2699876; + u32 x665F9364, xD573F0F2, xB32C6396; + u32 x0, x1, x2, x3; + + vsel(x0F330F33, a4, a3, a5); + vxor(x0F33F0CC, a6, x0F330F33); + vxor(x5A66A599, a2, x0F33F0CC); + + vsel(x2111B7BB, a3, a6, x5A66A599); + vsel(x03FF3033, a5, a3, x0F33F0CC); + vsel(x05BB50EE, a5, x0F33F0CC, a2); + vsel(x074F201F, x03FF3033, a4, x05BB50EE); + vxor(x265E97A4, x2111B7BB, x074F201F); + + vsel(x556BA09E, x5A66A599, x05BB50EE, a4); + vsel(x665A93AC, x556BA09E, x265E97A4, a3); + vnot(x99A56C53, x665A93AC); + vsel(x1, x265E97A4, x99A56C53, a1); + vxor(*out2, *out2, x1); + + vxor(x25A1A797, x03FF3033, x265E97A4); + vsel(x5713754C, a2, x0F33F0CC, x074F201F); + vsel(x66559355, x665A93AC, a2, a5); + vsel(x47B135C6, x25A1A797, x5713754C, x66559355); + + vxor(x9A5A5C60, x03FF3033, x99A56C53); + vsel(xD07AF8F8, x9A5A5C60, x556BA09E, x5A66A599); + vxor(x87698DB4, x5713754C, xD07AF8F8); + vxor(xE13C1EE1, x66559355, x87698DB4); + + vsel(x9E48CDE4, x9A5A5C60, x87698DB4, x265E97A4); + vsel(x655B905E, x66559355, x05BB50EE, a4); + vsel(x00A55CFF, a5, a6, x9A5A5C60); + vsel(x9E49915E, x9E48CDE4, x655B905E, x00A55CFF); + vsel(x0, x9E49915E, xE13C1EE1, a1); + vxor(*out1, *out1, x0); + + vsel(xD6599874, xD07AF8F8, x66559355, x0F33F0CC); + vand(x05330022, x0F330F33, x05BB50EE); + vsel(xD2699876, xD6599874, x00A55CFF, x05330022); + vsel(x3, x5A66A599, xD2699876, a1); + vxor(*out4, *out4, x3); + + vsel(x665F9364, x265E97A4, x66559355, x47B135C6); + vsel(xD573F0F2, xD07AF8F8, x05330022, a4); + vxor(xB32C6396, x665F9364, xD573F0F2); + vsel(x2, xB32C6396, x47B135C6, a1); + vxor(*out3, *out3, x2); } static void s4(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6, u32 * out1, u32 * out2, u32 * out3, u32 * out4) { - u32 x0505AFAF, x0555AF55, x0A5AA05A, x46566456, x0A0A5F5F, x0AF55FA0, - x0AF50F0F, x4CA36B59; - u32 xB35C94A6; - u32 x01BB23BB, x5050FAFA, xA31C26BE, xA91679E1; - u32 x56E9861E; - u32 x50E9FA1E, x0AF55F00, x827D9784, xD2946D9A; - u32 x31F720B3, x11FB21B3, x4712A7AD, x9586CA37; - u32 x0, x1, x2, x3; - - vsel(x0505AFAF, a5, a3, a1); - vsel(x0555AF55, x0505AFAF, a1, a4); - vxor(x0A5AA05A, a3, x0555AF55); - vsel(x46566456, a1, x0A5AA05A, a2); - vsel(x0A0A5F5F, a3, a5, a1); - vxor(x0AF55FA0, a4, x0A0A5F5F); - vsel(x0AF50F0F, x0AF55FA0, a3, a5); - vxor(x4CA36B59, x46566456, x0AF50F0F); - - vnot(xB35C94A6, x4CA36B59); - - vsel(x01BB23BB, a4, a2, x0555AF55); - vxor(x5050FAFA, a1, x0505AFAF); - vsel(xA31C26BE, xB35C94A6, x01BB23BB, x5050FAFA); - vxor(xA91679E1, x0A0A5F5F, xA31C26BE); - - vnot(x56E9861E, xA91679E1); - - vsel(x50E9FA1E, x5050FAFA, x56E9861E, a4); - vsel(x0AF55F00, x0AF50F0F, x0AF55FA0, x0A0A5F5F); - vsel(x827D9784, xB35C94A6, x0AF55F00, a2); - vxor(xD2946D9A, x50E9FA1E, x827D9784); - vsel(x2, xD2946D9A, x4CA36B59, a6); - vxor(*out3, *out3, x2); - vsel(x3, xB35C94A6, xD2946D9A, a6); - vxor(*out4, *out4, x3); - - vsel(x31F720B3, a2, a4, x0AF55FA0); - vsel(x11FB21B3, x01BB23BB, x31F720B3, x5050FAFA); - vxor(x4712A7AD, x56E9861E, x11FB21B3); - vxor(x9586CA37, xD2946D9A, x4712A7AD); - vsel(x0, x56E9861E, x9586CA37, a6); - vxor(*out1, *out1, x0); - vsel(x1, x9586CA37, xA91679E1, a6); - vxor(*out2, *out2, x1); + u32 x0505AFAF, x0555AF55, x0A5AA05A, x46566456, x0A0A5F5F, x0AF55FA0, + x0AF50F0F, x4CA36B59; + u32 xB35C94A6; + u32 x01BB23BB, x5050FAFA, xA31C26BE, xA91679E1; + u32 x56E9861E; + u32 x50E9FA1E, x0AF55F00, x827D9784, xD2946D9A; + u32 x31F720B3, x11FB21B3, x4712A7AD, x9586CA37; + u32 x0, x1, x2, x3; + + vsel(x0505AFAF, a5, a3, a1); + vsel(x0555AF55, x0505AFAF, a1, a4); + vxor(x0A5AA05A, a3, x0555AF55); + vsel(x46566456, a1, x0A5AA05A, a2); + vsel(x0A0A5F5F, a3, a5, a1); + vxor(x0AF55FA0, a4, x0A0A5F5F); + vsel(x0AF50F0F, x0AF55FA0, a3, a5); + vxor(x4CA36B59, x46566456, x0AF50F0F); + + vnot(xB35C94A6, x4CA36B59); + + vsel(x01BB23BB, a4, a2, x0555AF55); + vxor(x5050FAFA, a1, x0505AFAF); + vsel(xA31C26BE, xB35C94A6, x01BB23BB, x5050FAFA); + vxor(xA91679E1, x0A0A5F5F, xA31C26BE); + + vnot(x56E9861E, xA91679E1); + + vsel(x50E9FA1E, x5050FAFA, x56E9861E, a4); + vsel(x0AF55F00, x0AF50F0F, x0AF55FA0, x0A0A5F5F); + vsel(x827D9784, xB35C94A6, x0AF55F00, a2); + vxor(xD2946D9A, x50E9FA1E, x827D9784); + vsel(x2, xD2946D9A, x4CA36B59, a6); + vxor(*out3, *out3, x2); + vsel(x3, xB35C94A6, xD2946D9A, a6); + vxor(*out4, *out4, x3); + + vsel(x31F720B3, a2, a4, x0AF55FA0); + vsel(x11FB21B3, x01BB23BB, x31F720B3, x5050FAFA); + vxor(x4712A7AD, x56E9861E, x11FB21B3); + vxor(x9586CA37, xD2946D9A, x4712A7AD); + vsel(x0, x56E9861E, x9586CA37, a6); + vxor(*out1, *out1, x0); + vsel(x1, x9586CA37, xA91679E1, a6); + vxor(*out2, *out2, x1); } static void s5(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6, u32 * out1, u32 * out2, u32 * out3, u32 * out4) { - u32 x550F550F, xAAF0AAF0, xA5F5A5F5, x96C696C6, x00FFFF00, x963969C6; - u32 x2E3C2E3C, xB73121F7, x1501DF0F, x00558A5F, x2E69A463; - u32 x0679ED42, x045157FD, xB32077FF, x9D49D39C; - u32 xAC81CFB2, xF72577AF, x5BA4B81D; - u32 x5BA477AF, x4895469F, x3A35273A, x1A35669A; - u32 x12E6283D, x9E47D3D4, x1A676AB4; - u32 x891556DF, xE5E77F82, x6CF2295D; - u32 x2E3CA5F5, x9697C1C6, x369CC1D6; - u32 x0, x1, x2, x3; - - vsel(x550F550F, a1, a3, a5); - vnot(xAAF0AAF0, x550F550F); - vsel(xA5F5A5F5, xAAF0AAF0, a1, a3); - vxor(x96C696C6, a2, xA5F5A5F5); - vxor(x00FFFF00, a5, a6); - vxor(x963969C6, x96C696C6, x00FFFF00); - - vsel(x2E3C2E3C, a3, xAAF0AAF0, a2); - vsel(xB73121F7, a2, x963969C6, x96C696C6); - vsel(x1501DF0F, a6, x550F550F, xB73121F7); - vsel(x00558A5F, x1501DF0F, a5, a1); - vxor(x2E69A463, x2E3C2E3C, x00558A5F); - - vsel(x0679ED42, x00FFFF00, x2E69A463, x96C696C6); - vsel(x045157FD, a6, a1, x0679ED42); - vsel(xB32077FF, xB73121F7, a6, x045157FD); - vxor(x9D49D39C, x2E69A463, xB32077FF); - vsel(x2, x9D49D39C, x2E69A463, a4); - vxor(*out3, *out3, x2); - - vsel(xAC81CFB2, xAAF0AAF0, x1501DF0F, x0679ED42); - vsel(xF72577AF, xB32077FF, x550F550F, a1); - vxor(x5BA4B81D, xAC81CFB2, xF72577AF); - vsel(x1, x5BA4B81D, x963969C6, a4); - vxor(*out2, *out2, x1); - - vsel(x5BA477AF, x5BA4B81D, xF72577AF, a6); - vsel(x4895469F, x5BA477AF, x00558A5F, a2); - vsel(x3A35273A, x2E3C2E3C, a2, x963969C6); - vsel(x1A35669A, x4895469F, x3A35273A, x5BA4B81D); - - vsel(x12E6283D, a5, x5BA4B81D, x963969C6); - vsel(x9E47D3D4, x96C696C6, x9D49D39C, xAC81CFB2); - vsel(x1A676AB4, x12E6283D, x9E47D3D4, x4895469F); - - vsel(x891556DF, xB32077FF, x4895469F, x3A35273A); - vsel(xE5E77F82, xF72577AF, x00FFFF00, x12E6283D); - vxor(x6CF2295D, x891556DF, xE5E77F82); - vsel(x3, x1A35669A, x6CF2295D, a4); - vxor(*out4, *out4, x3); - - vsel(x2E3CA5F5, x2E3C2E3C, xA5F5A5F5, a6); - vsel(x9697C1C6, x96C696C6, x963969C6, x045157FD); - vsel(x369CC1D6, x2E3CA5F5, x9697C1C6, x5BA477AF); - vsel(x0, x369CC1D6, x1A676AB4, a4); - vxor(*out1, *out1, x0); + u32 x550F550F, xAAF0AAF0, xA5F5A5F5, x96C696C6, x00FFFF00, x963969C6; + u32 x2E3C2E3C, xB73121F7, x1501DF0F, x00558A5F, x2E69A463; + u32 x0679ED42, x045157FD, xB32077FF, x9D49D39C; + u32 xAC81CFB2, xF72577AF, x5BA4B81D; + u32 x5BA477AF, x4895469F, x3A35273A, x1A35669A; + u32 x12E6283D, x9E47D3D4, x1A676AB4; + u32 x891556DF, xE5E77F82, x6CF2295D; + u32 x2E3CA5F5, x9697C1C6, x369CC1D6; + u32 x0, x1, x2, x3; + + vsel(x550F550F, a1, a3, a5); + vnot(xAAF0AAF0, x550F550F); + vsel(xA5F5A5F5, xAAF0AAF0, a1, a3); + vxor(x96C696C6, a2, xA5F5A5F5); + vxor(x00FFFF00, a5, a6); + vxor(x963969C6, x96C696C6, x00FFFF00); + + vsel(x2E3C2E3C, a3, xAAF0AAF0, a2); + vsel(xB73121F7, a2, x963969C6, x96C696C6); + vsel(x1501DF0F, a6, x550F550F, xB73121F7); + vsel(x00558A5F, x1501DF0F, a5, a1); + vxor(x2E69A463, x2E3C2E3C, x00558A5F); + + vsel(x0679ED42, x00FFFF00, x2E69A463, x96C696C6); + vsel(x045157FD, a6, a1, x0679ED42); + vsel(xB32077FF, xB73121F7, a6, x045157FD); + vxor(x9D49D39C, x2E69A463, xB32077FF); + vsel(x2, x9D49D39C, x2E69A463, a4); + vxor(*out3, *out3, x2); + + vsel(xAC81CFB2, xAAF0AAF0, x1501DF0F, x0679ED42); + vsel(xF72577AF, xB32077FF, x550F550F, a1); + vxor(x5BA4B81D, xAC81CFB2, xF72577AF); + vsel(x1, x5BA4B81D, x963969C6, a4); + vxor(*out2, *out2, x1); + + vsel(x5BA477AF, x5BA4B81D, xF72577AF, a6); + vsel(x4895469F, x5BA477AF, x00558A5F, a2); + vsel(x3A35273A, x2E3C2E3C, a2, x963969C6); + vsel(x1A35669A, x4895469F, x3A35273A, x5BA4B81D); + + vsel(x12E6283D, a5, x5BA4B81D, x963969C6); + vsel(x9E47D3D4, x96C696C6, x9D49D39C, xAC81CFB2); + vsel(x1A676AB4, x12E6283D, x9E47D3D4, x4895469F); + + vsel(x891556DF, xB32077FF, x4895469F, x3A35273A); + vsel(xE5E77F82, xF72577AF, x00FFFF00, x12E6283D); + vxor(x6CF2295D, x891556DF, xE5E77F82); + vsel(x3, x1A35669A, x6CF2295D, a4); + vxor(*out4, *out4, x3); + + vsel(x2E3CA5F5, x2E3C2E3C, xA5F5A5F5, a6); + vsel(x9697C1C6, x96C696C6, x963969C6, x045157FD); + vsel(x369CC1D6, x2E3CA5F5, x9697C1C6, x5BA477AF); + vsel(x0, x369CC1D6, x1A676AB4, a4); + vxor(*out1, *out1, x0); } static void s6(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6, u32 * out1, u32 * out2, u32 * out3, u32 * out4) { - u32 x555500FF, x666633CC, x606F30CF, x353A659A, x353A9A65, xCAC5659A; - u32 x353A6565, x0A3F0A6F, x6C5939A3, x5963A3C6; - u32 x35FF659A, x3AF06A95, x05CF0A9F, x16E94A97; - u32 x86CD4C9B, x12E0FFFD, x942D9A67; - u32 x142956AB, x455D45DF, x1C3EE619; - u32 x2AEA70D5, x20CF7A9F, x3CF19C86, x69A49C79; - u32 x840DBB67, x6DA19C1E, x925E63E1; - u32 x9C3CA761, x257A75D5, xB946D2B4; - u32 x0, x1, x2, x3; - - vsel(x555500FF, a1, a4, a5); - vxor(x666633CC, a2, x555500FF); - vsel(x606F30CF, x666633CC, a4, a3); - vxor(x353A659A, a1, x606F30CF); - vxor(x353A9A65, a5, x353A659A); - vnot(xCAC5659A, x353A9A65); - - vsel(x353A6565, x353A659A, x353A9A65, a4); - vsel(x0A3F0A6F, a3, a4, x353A6565); - vxor(x6C5939A3, x666633CC, x0A3F0A6F); - vxor(x5963A3C6, x353A9A65, x6C5939A3); - - vsel(x35FF659A, a4, x353A659A, x353A6565); - vxor(x3AF06A95, a3, x35FF659A); - vsel(x05CF0A9F, a4, a3, x353A9A65); - vsel(x16E94A97, x3AF06A95, x05CF0A9F, x6C5939A3); - - vsel(x86CD4C9B, xCAC5659A, x05CF0A9F, x6C5939A3); - vsel(x12E0FFFD, a5, x3AF06A95, x16E94A97); - vsel(x942D9A67, x86CD4C9B, x353A9A65, x12E0FFFD); - vsel(x0, xCAC5659A, x942D9A67, a6); - vxor(*out1, *out1, x0); - - vsel(x142956AB, x353A659A, x942D9A67, a2); - vsel(x455D45DF, a1, x86CD4C9B, x142956AB); - vxor(x1C3EE619, x5963A3C6, x455D45DF); - vsel(x3, x5963A3C6, x1C3EE619, a6); - vxor(*out4, *out4, x3); - - vsel(x2AEA70D5, x3AF06A95, x606F30CF, x353A9A65); - vsel(x20CF7A9F, x2AEA70D5, x05CF0A9F, x0A3F0A6F); - vxor(x3CF19C86, x1C3EE619, x20CF7A9F); - vxor(x69A49C79, x555500FF, x3CF19C86); - - vsel(x840DBB67, a5, x942D9A67, x86CD4C9B); - vsel(x6DA19C1E, x69A49C79, x3CF19C86, x840DBB67); - vnot(x925E63E1, x6DA19C1E); - vsel(x1, x925E63E1, x69A49C79, a6); - vxor(*out2, *out2, x1); - - vsel(x9C3CA761, x840DBB67, x1C3EE619, x3CF19C86); - vsel(x257A75D5, x455D45DF, x2AEA70D5, x606F30CF); - vxor(xB946D2B4, x9C3CA761, x257A75D5); - vsel(x2, x16E94A97, xB946D2B4, a6); - vxor(*out3, *out3, x2); + u32 x555500FF, x666633CC, x606F30CF, x353A659A, x353A9A65, xCAC5659A; + u32 x353A6565, x0A3F0A6F, x6C5939A3, x5963A3C6; + u32 x35FF659A, x3AF06A95, x05CF0A9F, x16E94A97; + u32 x86CD4C9B, x12E0FFFD, x942D9A67; + u32 x142956AB, x455D45DF, x1C3EE619; + u32 x2AEA70D5, x20CF7A9F, x3CF19C86, x69A49C79; + u32 x840DBB67, x6DA19C1E, x925E63E1; + u32 x9C3CA761, x257A75D5, xB946D2B4; + u32 x0, x1, x2, x3; + + vsel(x555500FF, a1, a4, a5); + vxor(x666633CC, a2, x555500FF); + vsel(x606F30CF, x666633CC, a4, a3); + vxor(x353A659A, a1, x606F30CF); + vxor(x353A9A65, a5, x353A659A); + vnot(xCAC5659A, x353A9A65); + + vsel(x353A6565, x353A659A, x353A9A65, a4); + vsel(x0A3F0A6F, a3, a4, x353A6565); + vxor(x6C5939A3, x666633CC, x0A3F0A6F); + vxor(x5963A3C6, x353A9A65, x6C5939A3); + + vsel(x35FF659A, a4, x353A659A, x353A6565); + vxor(x3AF06A95, a3, x35FF659A); + vsel(x05CF0A9F, a4, a3, x353A9A65); + vsel(x16E94A97, x3AF06A95, x05CF0A9F, x6C5939A3); + + vsel(x86CD4C9B, xCAC5659A, x05CF0A9F, x6C5939A3); + vsel(x12E0FFFD, a5, x3AF06A95, x16E94A97); + vsel(x942D9A67, x86CD4C9B, x353A9A65, x12E0FFFD); + vsel(x0, xCAC5659A, x942D9A67, a6); + vxor(*out1, *out1, x0); + + vsel(x142956AB, x353A659A, x942D9A67, a2); + vsel(x455D45DF, a1, x86CD4C9B, x142956AB); + vxor(x1C3EE619, x5963A3C6, x455D45DF); + vsel(x3, x5963A3C6, x1C3EE619, a6); + vxor(*out4, *out4, x3); + + vsel(x2AEA70D5, x3AF06A95, x606F30CF, x353A9A65); + vsel(x20CF7A9F, x2AEA70D5, x05CF0A9F, x0A3F0A6F); + vxor(x3CF19C86, x1C3EE619, x20CF7A9F); + vxor(x69A49C79, x555500FF, x3CF19C86); + + vsel(x840DBB67, a5, x942D9A67, x86CD4C9B); + vsel(x6DA19C1E, x69A49C79, x3CF19C86, x840DBB67); + vnot(x925E63E1, x6DA19C1E); + vsel(x1, x925E63E1, x69A49C79, a6); + vxor(*out2, *out2, x1); + + vsel(x9C3CA761, x840DBB67, x1C3EE619, x3CF19C86); + vsel(x257A75D5, x455D45DF, x2AEA70D5, x606F30CF); + vxor(xB946D2B4, x9C3CA761, x257A75D5); + vsel(x2, x16E94A97, xB946D2B4, a6); + vxor(*out3, *out3, x2); } static void s7(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6, u32 * out1, u32 * out2, u32 * out3, u32 * out4) { - u32 x44447777, x4B4B7878, x22772277, x0505F5F5, x220522F5, x694E5A8D; - u32 x00FFFF00, x66666666, x32353235, x26253636, x26DAC936; - u32 x738F9C63, x11EF9867, x26DA9867; - u32 x4B4B9C63, x4B666663, x4E639396; - u32 x4E4B393C, xFF00FF00, xFF05DD21, xB14EE41D; - u32 xD728827B, x6698807B, x699C585B; - u32 x738C847B, xA4A71E18, x74878E78; - u32 x333D9639, x74879639, x8B7869C6; - u32 x0, x1, x2, x3; - - vsel(x44447777, a2, a6, a3); - vxor(x4B4B7878, a4, x44447777); - vsel(x22772277, a3, a5, a2); - vsel(x0505F5F5, a6, a2, a4); - vsel(x220522F5, x22772277, x0505F5F5, a5); - vxor(x694E5A8D, x4B4B7878, x220522F5); - - vxor(x00FFFF00, a5, a6); - vxor(x66666666, a2, a3); - vsel(x32353235, a3, x220522F5, a4); - vsel(x26253636, x66666666, x32353235, x4B4B7878); - vxor(x26DAC936, x00FFFF00, x26253636); - vsel(x0, x26DAC936, x694E5A8D, a1); - vxor(*out1, *out1, x0); - - vxor(x738F9C63, a2, x26DAC936); - vsel(x11EF9867, x738F9C63, a5, x66666666); - vsel(x26DA9867, x26DAC936, x11EF9867, a6); - - vsel(x4B4B9C63, x4B4B7878, x738F9C63, a6); - vsel(x4B666663, x4B4B9C63, x66666666, x00FFFF00); - vxor(x4E639396, x0505F5F5, x4B666663); - - vsel(x4E4B393C, x4B4B7878, x4E639396, a2); - vnot(xFF00FF00, a5); - vsel(xFF05DD21, xFF00FF00, x738F9C63, x32353235); - vxor(xB14EE41D, x4E4B393C, xFF05DD21); - vsel(x1, xB14EE41D, x26DA9867, a1); - vxor(*out2, *out2, x1); - - vxor(xD728827B, x66666666, xB14EE41D); - vsel(x6698807B, x26DA9867, xD728827B, x4E4B393C); - vsel(x699C585B, x6698807B, x694E5A8D, xFF05DD21); - vsel(x2, x699C585B, x4E639396, a1); - vxor(*out3, *out3, x2); - - vsel(x738C847B, x738F9C63, xD728827B, x4B4B7878); - vxor(xA4A71E18, x738F9C63, xD728827B); - vsel(x74878E78, x738C847B, xA4A71E18, a4); - - vsel(x333D9639, x32353235, x738C847B, xB14EE41D); - vsel(x74879639, x74878E78, x333D9639, a6); - vnot(x8B7869C6, x74879639); - vsel(x3, x74878E78, x8B7869C6, a1); - vxor(*out4, *out4, x3); + u32 x44447777, x4B4B7878, x22772277, x0505F5F5, x220522F5, x694E5A8D; + u32 x00FFFF00, x66666666, x32353235, x26253636, x26DAC936; + u32 x738F9C63, x11EF9867, x26DA9867; + u32 x4B4B9C63, x4B666663, x4E639396; + u32 x4E4B393C, xFF00FF00, xFF05DD21, xB14EE41D; + u32 xD728827B, x6698807B, x699C585B; + u32 x738C847B, xA4A71E18, x74878E78; + u32 x333D9639, x74879639, x8B7869C6; + u32 x0, x1, x2, x3; + + vsel(x44447777, a2, a6, a3); + vxor(x4B4B7878, a4, x44447777); + vsel(x22772277, a3, a5, a2); + vsel(x0505F5F5, a6, a2, a4); + vsel(x220522F5, x22772277, x0505F5F5, a5); + vxor(x694E5A8D, x4B4B7878, x220522F5); + + vxor(x00FFFF00, a5, a6); + vxor(x66666666, a2, a3); + vsel(x32353235, a3, x220522F5, a4); + vsel(x26253636, x66666666, x32353235, x4B4B7878); + vxor(x26DAC936, x00FFFF00, x26253636); + vsel(x0, x26DAC936, x694E5A8D, a1); + vxor(*out1, *out1, x0); + + vxor(x738F9C63, a2, x26DAC936); + vsel(x11EF9867, x738F9C63, a5, x66666666); + vsel(x26DA9867, x26DAC936, x11EF9867, a6); + + vsel(x4B4B9C63, x4B4B7878, x738F9C63, a6); + vsel(x4B666663, x4B4B9C63, x66666666, x00FFFF00); + vxor(x4E639396, x0505F5F5, x4B666663); + + vsel(x4E4B393C, x4B4B7878, x4E639396, a2); + vnot(xFF00FF00, a5); + vsel(xFF05DD21, xFF00FF00, x738F9C63, x32353235); + vxor(xB14EE41D, x4E4B393C, xFF05DD21); + vsel(x1, xB14EE41D, x26DA9867, a1); + vxor(*out2, *out2, x1); + + vxor(xD728827B, x66666666, xB14EE41D); + vsel(x6698807B, x26DA9867, xD728827B, x4E4B393C); + vsel(x699C585B, x6698807B, x694E5A8D, xFF05DD21); + vsel(x2, x699C585B, x4E639396, a1); + vxor(*out3, *out3, x2); + + vsel(x738C847B, x738F9C63, xD728827B, x4B4B7878); + vxor(xA4A71E18, x738F9C63, xD728827B); + vsel(x74878E78, x738C847B, xA4A71E18, a4); + + vsel(x333D9639, x32353235, x738C847B, xB14EE41D); + vsel(x74879639, x74878E78, x333D9639, a6); + vnot(x8B7869C6, x74879639); + vsel(x3, x74878E78, x8B7869C6, a1); + vxor(*out4, *out4, x3); } static void s8(u32 a1, u32 a2, u32 a3, u32 a4, u32 a5, u32 a6, u32 * out1, u32 * out2, u32 * out3, u32 * out4) { - u32 x0505F5F5, x05FAF50A, x0F0F00FF, x22227777, x07DA807F, x34E9B34C; - u32 x00FFF00F, x0033FCCF, x5565B15C, x0C0C3F3F, x59698E63; - u32 x3001F74E, x30555745, x693CD926; - u32 x0C0CD926, x0C3F25E9, x38D696A5; - u32 xC729695A; - u32 x03D2117B, xC778395B, xCB471CB2; - u32 x5425B13F, x56B3803F, x919AE965; - u32 x17B3023F, x75555755, x62E6556A, xA59E6C31; - u32 x0, x1, x2, x3; - - vsel(x0505F5F5, a5, a1, a3); - vxor(x05FAF50A, a4, x0505F5F5); - vsel(x0F0F00FF, a3, a4, a5); - vsel(x22227777, a2, a5, a1); - vsel(x07DA807F, x05FAF50A, x0F0F00FF, x22227777); - vxor(x34E9B34C, a2, x07DA807F); - - vsel(x00FFF00F, x05FAF50A, a4, a3); - vsel(x0033FCCF, a5, x00FFF00F, a2); - vsel(x5565B15C, a1, x34E9B34C, x0033FCCF); - vsel(x0C0C3F3F, a3, a5, a2); - vxor(x59698E63, x5565B15C, x0C0C3F3F); - - vsel(x3001F74E, x34E9B34C, a5, x05FAF50A); - vsel(x30555745, x3001F74E, a1, x00FFF00F); - vxor(x693CD926, x59698E63, x30555745); - vsel(x2, x693CD926, x59698E63, a6); - vxor(*out3, *out3, x2); - - vsel(x0C0CD926, x0C0C3F3F, x693CD926, a5); - vxor(x0C3F25E9, x0033FCCF, x0C0CD926); - vxor(x38D696A5, x34E9B34C, x0C3F25E9); - - vnot(xC729695A, x38D696A5); - - vsel(x03D2117B, x07DA807F, a2, x0C0CD926); - vsel(xC778395B, xC729695A, x03D2117B, x30555745); - vxor(xCB471CB2, x0C3F25E9, xC778395B); - vsel(x1, xCB471CB2, x34E9B34C, a6); - vxor(*out2, *out2, x1); - - vsel(x5425B13F, x5565B15C, x0C0C3F3F, x03D2117B); - vsel(x56B3803F, x07DA807F, x5425B13F, x59698E63); - vxor(x919AE965, xC729695A, x56B3803F); - vsel(x3, xC729695A, x919AE965, a6); - vxor(*out4, *out4, x3); - - vsel(x17B3023F, x07DA807F, a2, x59698E63); - vor(x75555755, a1, x30555745); - vxor(x62E6556A, x17B3023F, x75555755); - vxor(xA59E6C31, xC778395B, x62E6556A); - vsel(x0, xA59E6C31, x38D696A5, a6); - vxor(*out1, *out1, x0); + u32 x0505F5F5, x05FAF50A, x0F0F00FF, x22227777, x07DA807F, x34E9B34C; + u32 x00FFF00F, x0033FCCF, x5565B15C, x0C0C3F3F, x59698E63; + u32 x3001F74E, x30555745, x693CD926; + u32 x0C0CD926, x0C3F25E9, x38D696A5; + u32 xC729695A; + u32 x03D2117B, xC778395B, xCB471CB2; + u32 x5425B13F, x56B3803F, x919AE965; + u32 x17B3023F, x75555755, x62E6556A, xA59E6C31; + u32 x0, x1, x2, x3; + + vsel(x0505F5F5, a5, a1, a3); + vxor(x05FAF50A, a4, x0505F5F5); + vsel(x0F0F00FF, a3, a4, a5); + vsel(x22227777, a2, a5, a1); + vsel(x07DA807F, x05FAF50A, x0F0F00FF, x22227777); + vxor(x34E9B34C, a2, x07DA807F); + + vsel(x00FFF00F, x05FAF50A, a4, a3); + vsel(x0033FCCF, a5, x00FFF00F, a2); + vsel(x5565B15C, a1, x34E9B34C, x0033FCCF); + vsel(x0C0C3F3F, a3, a5, a2); + vxor(x59698E63, x5565B15C, x0C0C3F3F); + + vsel(x3001F74E, x34E9B34C, a5, x05FAF50A); + vsel(x30555745, x3001F74E, a1, x00FFF00F); + vxor(x693CD926, x59698E63, x30555745); + vsel(x2, x693CD926, x59698E63, a6); + vxor(*out3, *out3, x2); + + vsel(x0C0CD926, x0C0C3F3F, x693CD926, a5); + vxor(x0C3F25E9, x0033FCCF, x0C0CD926); + vxor(x38D696A5, x34E9B34C, x0C3F25E9); + + vnot(xC729695A, x38D696A5); + + vsel(x03D2117B, x07DA807F, a2, x0C0CD926); + vsel(xC778395B, xC729695A, x03D2117B, x30555745); + vxor(xCB471CB2, x0C3F25E9, xC778395B); + vsel(x1, xCB471CB2, x34E9B34C, a6); + vxor(*out2, *out2, x1); + + vsel(x5425B13F, x5565B15C, x0C0C3F3F, x03D2117B); + vsel(x56B3803F, x07DA807F, x5425B13F, x59698E63); + vxor(x919AE965, xC729695A, x56B3803F); + vsel(x3, xC729695A, x919AE965, a6); + vxor(*out4, *out4, x3); + + vsel(x17B3023F, x07DA807F, a2, x59698E63); + vor(x75555755, a1, x30555745); + vxor(x62E6556A, x17B3023F, x75555755); + vxor(xA59E6C31, xC778395B, x62E6556A); + vsel(x0, xA59E6C31, x38D696A5, a6); + vxor(*out1, *out1, x0); } #define SWAP(a, b) { u32 tmp=a;a=b;b=tmp; } diff --git a/amd/m03100_a0.cl b/amd/m03100_a0.cl index 23f60db..edad8ee 100644 --- a/amd/m03100_a0.cl +++ b/amd/m03100_a0.cl @@ -389,7 +389,7 @@ static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[1 IP (r, l, tt); r = rotl32 (r, 3u); - l = rotl32 (l, 3u); + l = rotl32 (l, 3u); #pragma unroll 16 for (u32 i = 0; i < 16; i += 2) diff --git a/amd/m03100_a1.cl b/amd/m03100_a1.cl index 51de5ba..5793a02 100644 --- a/amd/m03100_a1.cl +++ b/amd/m03100_a1.cl @@ -387,7 +387,7 @@ static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[1 IP (r, l, tt); r = rotl32 (r, 3u); - l = rotl32 (l, 3u); + l = rotl32 (l, 3u); #pragma unroll 16 for (u32 i = 0; i < 16; i += 2) diff --git a/amd/m03100_a3.cl b/amd/m03100_a3.cl index cce9f04..e4b0689 100644 --- a/amd/m03100_a3.cl +++ b/amd/m03100_a3.cl @@ -388,7 +388,7 @@ static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[1 IP (r, l, tt); r = rotl32 (r, 3u); - l = rotl32 (l, 3u); + l = rotl32 (l, 3u); #pragma unroll 16 for (u32 i = 0; i < 16; i += 2) diff --git a/amd/m05800.cl b/amd/m05800.cl index a9c10bf..cf289c5 100644 --- a/amd/m05800.cl +++ b/amd/m05800.cl @@ -46,134 +46,134 @@ typedef struct __constant entry_t pc[1024] = { - 0x00000030, 1, 0x00000031, 1, 0x00000032, 1, 0x00000033, 1, 0x00000034, 1, 0x00000035, 1, 0x00000036, 1, 0x00000037, 1, - 0x00000038, 1, 0x00000039, 1, 0x00003031, 2, 0x00003131, 2, 0x00003231, 2, 0x00003331, 2, 0x00003431, 2, 0x00003531, 2, - 0x00003631, 2, 0x00003731, 2, 0x00003831, 2, 0x00003931, 2, 0x00003032, 2, 0x00003132, 2, 0x00003232, 2, 0x00003332, 2, - 0x00003432, 2, 0x00003532, 2, 0x00003632, 2, 0x00003732, 2, 0x00003832, 2, 0x00003932, 2, 0x00003033, 2, 0x00003133, 2, - 0x00003233, 2, 0x00003333, 2, 0x00003433, 2, 0x00003533, 2, 0x00003633, 2, 0x00003733, 2, 0x00003833, 2, 0x00003933, 2, - 0x00003034, 2, 0x00003134, 2, 0x00003234, 2, 0x00003334, 2, 0x00003434, 2, 0x00003534, 2, 0x00003634, 2, 0x00003734, 2, - 0x00003834, 2, 0x00003934, 2, 0x00003035, 2, 0x00003135, 2, 0x00003235, 2, 0x00003335, 2, 0x00003435, 2, 0x00003535, 2, - 0x00003635, 2, 0x00003735, 2, 0x00003835, 2, 0x00003935, 2, 0x00003036, 2, 0x00003136, 2, 0x00003236, 2, 0x00003336, 2, - 0x00003436, 2, 0x00003536, 2, 0x00003636, 2, 0x00003736, 2, 0x00003836, 2, 0x00003936, 2, 0x00003037, 2, 0x00003137, 2, - 0x00003237, 2, 0x00003337, 2, 0x00003437, 2, 0x00003537, 2, 0x00003637, 2, 0x00003737, 2, 0x00003837, 2, 0x00003937, 2, - 0x00003038, 2, 0x00003138, 2, 0x00003238, 2, 0x00003338, 2, 0x00003438, 2, 0x00003538, 2, 0x00003638, 2, 0x00003738, 2, - 0x00003838, 2, 0x00003938, 2, 0x00003039, 2, 0x00003139, 2, 0x00003239, 2, 0x00003339, 2, 0x00003439, 2, 0x00003539, 2, - 0x00003639, 2, 0x00003739, 2, 0x00003839, 2, 0x00003939, 2, 0x00303031, 3, 0x00313031, 3, 0x00323031, 3, 0x00333031, 3, - 0x00343031, 3, 0x00353031, 3, 0x00363031, 3, 0x00373031, 3, 0x00383031, 3, 0x00393031, 3, 0x00303131, 3, 0x00313131, 3, - 0x00323131, 3, 0x00333131, 3, 0x00343131, 3, 0x00353131, 3, 0x00363131, 3, 0x00373131, 3, 0x00383131, 3, 0x00393131, 3, - 0x00303231, 3, 0x00313231, 3, 0x00323231, 3, 0x00333231, 3, 0x00343231, 3, 0x00353231, 3, 0x00363231, 3, 0x00373231, 3, - 0x00383231, 3, 0x00393231, 3, 0x00303331, 3, 0x00313331, 3, 0x00323331, 3, 0x00333331, 3, 0x00343331, 3, 0x00353331, 3, - 0x00363331, 3, 0x00373331, 3, 0x00383331, 3, 0x00393331, 3, 0x00303431, 3, 0x00313431, 3, 0x00323431, 3, 0x00333431, 3, - 0x00343431, 3, 0x00353431, 3, 0x00363431, 3, 0x00373431, 3, 0x00383431, 3, 0x00393431, 3, 0x00303531, 3, 0x00313531, 3, - 0x00323531, 3, 0x00333531, 3, 0x00343531, 3, 0x00353531, 3, 0x00363531, 3, 0x00373531, 3, 0x00383531, 3, 0x00393531, 3, - 0x00303631, 3, 0x00313631, 3, 0x00323631, 3, 0x00333631, 3, 0x00343631, 3, 0x00353631, 3, 0x00363631, 3, 0x00373631, 3, - 0x00383631, 3, 0x00393631, 3, 0x00303731, 3, 0x00313731, 3, 0x00323731, 3, 0x00333731, 3, 0x00343731, 3, 0x00353731, 3, - 0x00363731, 3, 0x00373731, 3, 0x00383731, 3, 0x00393731, 3, 0x00303831, 3, 0x00313831, 3, 0x00323831, 3, 0x00333831, 3, - 0x00343831, 3, 0x00353831, 3, 0x00363831, 3, 0x00373831, 3, 0x00383831, 3, 0x00393831, 3, 0x00303931, 3, 0x00313931, 3, - 0x00323931, 3, 0x00333931, 3, 0x00343931, 3, 0x00353931, 3, 0x00363931, 3, 0x00373931, 3, 0x00383931, 3, 0x00393931, 3, - 0x00303032, 3, 0x00313032, 3, 0x00323032, 3, 0x00333032, 3, 0x00343032, 3, 0x00353032, 3, 0x00363032, 3, 0x00373032, 3, - 0x00383032, 3, 0x00393032, 3, 0x00303132, 3, 0x00313132, 3, 0x00323132, 3, 0x00333132, 3, 0x00343132, 3, 0x00353132, 3, - 0x00363132, 3, 0x00373132, 3, 0x00383132, 3, 0x00393132, 3, 0x00303232, 3, 0x00313232, 3, 0x00323232, 3, 0x00333232, 3, - 0x00343232, 3, 0x00353232, 3, 0x00363232, 3, 0x00373232, 3, 0x00383232, 3, 0x00393232, 3, 0x00303332, 3, 0x00313332, 3, - 0x00323332, 3, 0x00333332, 3, 0x00343332, 3, 0x00353332, 3, 0x00363332, 3, 0x00373332, 3, 0x00383332, 3, 0x00393332, 3, - 0x00303432, 3, 0x00313432, 3, 0x00323432, 3, 0x00333432, 3, 0x00343432, 3, 0x00353432, 3, 0x00363432, 3, 0x00373432, 3, - 0x00383432, 3, 0x00393432, 3, 0x00303532, 3, 0x00313532, 3, 0x00323532, 3, 0x00333532, 3, 0x00343532, 3, 0x00353532, 3, - 0x00363532, 3, 0x00373532, 3, 0x00383532, 3, 0x00393532, 3, 0x00303632, 3, 0x00313632, 3, 0x00323632, 3, 0x00333632, 3, - 0x00343632, 3, 0x00353632, 3, 0x00363632, 3, 0x00373632, 3, 0x00383632, 3, 0x00393632, 3, 0x00303732, 3, 0x00313732, 3, - 0x00323732, 3, 0x00333732, 3, 0x00343732, 3, 0x00353732, 3, 0x00363732, 3, 0x00373732, 3, 0x00383732, 3, 0x00393732, 3, - 0x00303832, 3, 0x00313832, 3, 0x00323832, 3, 0x00333832, 3, 0x00343832, 3, 0x00353832, 3, 0x00363832, 3, 0x00373832, 3, - 0x00383832, 3, 0x00393832, 3, 0x00303932, 3, 0x00313932, 3, 0x00323932, 3, 0x00333932, 3, 0x00343932, 3, 0x00353932, 3, - 0x00363932, 3, 0x00373932, 3, 0x00383932, 3, 0x00393932, 3, 0x00303033, 3, 0x00313033, 3, 0x00323033, 3, 0x00333033, 3, - 0x00343033, 3, 0x00353033, 3, 0x00363033, 3, 0x00373033, 3, 0x00383033, 3, 0x00393033, 3, 0x00303133, 3, 0x00313133, 3, - 0x00323133, 3, 0x00333133, 3, 0x00343133, 3, 0x00353133, 3, 0x00363133, 3, 0x00373133, 3, 0x00383133, 3, 0x00393133, 3, - 0x00303233, 3, 0x00313233, 3, 0x00323233, 3, 0x00333233, 3, 0x00343233, 3, 0x00353233, 3, 0x00363233, 3, 0x00373233, 3, - 0x00383233, 3, 0x00393233, 3, 0x00303333, 3, 0x00313333, 3, 0x00323333, 3, 0x00333333, 3, 0x00343333, 3, 0x00353333, 3, - 0x00363333, 3, 0x00373333, 3, 0x00383333, 3, 0x00393333, 3, 0x00303433, 3, 0x00313433, 3, 0x00323433, 3, 0x00333433, 3, - 0x00343433, 3, 0x00353433, 3, 0x00363433, 3, 0x00373433, 3, 0x00383433, 3, 0x00393433, 3, 0x00303533, 3, 0x00313533, 3, - 0x00323533, 3, 0x00333533, 3, 0x00343533, 3, 0x00353533, 3, 0x00363533, 3, 0x00373533, 3, 0x00383533, 3, 0x00393533, 3, - 0x00303633, 3, 0x00313633, 3, 0x00323633, 3, 0x00333633, 3, 0x00343633, 3, 0x00353633, 3, 0x00363633, 3, 0x00373633, 3, - 0x00383633, 3, 0x00393633, 3, 0x00303733, 3, 0x00313733, 3, 0x00323733, 3, 0x00333733, 3, 0x00343733, 3, 0x00353733, 3, - 0x00363733, 3, 0x00373733, 3, 0x00383733, 3, 0x00393733, 3, 0x00303833, 3, 0x00313833, 3, 0x00323833, 3, 0x00333833, 3, - 0x00343833, 3, 0x00353833, 3, 0x00363833, 3, 0x00373833, 3, 0x00383833, 3, 0x00393833, 3, 0x00303933, 3, 0x00313933, 3, - 0x00323933, 3, 0x00333933, 3, 0x00343933, 3, 0x00353933, 3, 0x00363933, 3, 0x00373933, 3, 0x00383933, 3, 0x00393933, 3, - 0x00303034, 3, 0x00313034, 3, 0x00323034, 3, 0x00333034, 3, 0x00343034, 3, 0x00353034, 3, 0x00363034, 3, 0x00373034, 3, - 0x00383034, 3, 0x00393034, 3, 0x00303134, 3, 0x00313134, 3, 0x00323134, 3, 0x00333134, 3, 0x00343134, 3, 0x00353134, 3, - 0x00363134, 3, 0x00373134, 3, 0x00383134, 3, 0x00393134, 3, 0x00303234, 3, 0x00313234, 3, 0x00323234, 3, 0x00333234, 3, - 0x00343234, 3, 0x00353234, 3, 0x00363234, 3, 0x00373234, 3, 0x00383234, 3, 0x00393234, 3, 0x00303334, 3, 0x00313334, 3, - 0x00323334, 3, 0x00333334, 3, 0x00343334, 3, 0x00353334, 3, 0x00363334, 3, 0x00373334, 3, 0x00383334, 3, 0x00393334, 3, - 0x00303434, 3, 0x00313434, 3, 0x00323434, 3, 0x00333434, 3, 0x00343434, 3, 0x00353434, 3, 0x00363434, 3, 0x00373434, 3, - 0x00383434, 3, 0x00393434, 3, 0x00303534, 3, 0x00313534, 3, 0x00323534, 3, 0x00333534, 3, 0x00343534, 3, 0x00353534, 3, - 0x00363534, 3, 0x00373534, 3, 0x00383534, 3, 0x00393534, 3, 0x00303634, 3, 0x00313634, 3, 0x00323634, 3, 0x00333634, 3, - 0x00343634, 3, 0x00353634, 3, 0x00363634, 3, 0x00373634, 3, 0x00383634, 3, 0x00393634, 3, 0x00303734, 3, 0x00313734, 3, - 0x00323734, 3, 0x00333734, 3, 0x00343734, 3, 0x00353734, 3, 0x00363734, 3, 0x00373734, 3, 0x00383734, 3, 0x00393734, 3, - 0x00303834, 3, 0x00313834, 3, 0x00323834, 3, 0x00333834, 3, 0x00343834, 3, 0x00353834, 3, 0x00363834, 3, 0x00373834, 3, - 0x00383834, 3, 0x00393834, 3, 0x00303934, 3, 0x00313934, 3, 0x00323934, 3, 0x00333934, 3, 0x00343934, 3, 0x00353934, 3, - 0x00363934, 3, 0x00373934, 3, 0x00383934, 3, 0x00393934, 3, 0x00303035, 3, 0x00313035, 3, 0x00323035, 3, 0x00333035, 3, - 0x00343035, 3, 0x00353035, 3, 0x00363035, 3, 0x00373035, 3, 0x00383035, 3, 0x00393035, 3, 0x00303135, 3, 0x00313135, 3, - 0x00323135, 3, 0x00333135, 3, 0x00343135, 3, 0x00353135, 3, 0x00363135, 3, 0x00373135, 3, 0x00383135, 3, 0x00393135, 3, - 0x00303235, 3, 0x00313235, 3, 0x00323235, 3, 0x00333235, 3, 0x00343235, 3, 0x00353235, 3, 0x00363235, 3, 0x00373235, 3, - 0x00383235, 3, 0x00393235, 3, 0x00303335, 3, 0x00313335, 3, 0x00323335, 3, 0x00333335, 3, 0x00343335, 3, 0x00353335, 3, - 0x00363335, 3, 0x00373335, 3, 0x00383335, 3, 0x00393335, 3, 0x00303435, 3, 0x00313435, 3, 0x00323435, 3, 0x00333435, 3, - 0x00343435, 3, 0x00353435, 3, 0x00363435, 3, 0x00373435, 3, 0x00383435, 3, 0x00393435, 3, 0x00303535, 3, 0x00313535, 3, - 0x00323535, 3, 0x00333535, 3, 0x00343535, 3, 0x00353535, 3, 0x00363535, 3, 0x00373535, 3, 0x00383535, 3, 0x00393535, 3, - 0x00303635, 3, 0x00313635, 3, 0x00323635, 3, 0x00333635, 3, 0x00343635, 3, 0x00353635, 3, 0x00363635, 3, 0x00373635, 3, - 0x00383635, 3, 0x00393635, 3, 0x00303735, 3, 0x00313735, 3, 0x00323735, 3, 0x00333735, 3, 0x00343735, 3, 0x00353735, 3, - 0x00363735, 3, 0x00373735, 3, 0x00383735, 3, 0x00393735, 3, 0x00303835, 3, 0x00313835, 3, 0x00323835, 3, 0x00333835, 3, - 0x00343835, 3, 0x00353835, 3, 0x00363835, 3, 0x00373835, 3, 0x00383835, 3, 0x00393835, 3, 0x00303935, 3, 0x00313935, 3, - 0x00323935, 3, 0x00333935, 3, 0x00343935, 3, 0x00353935, 3, 0x00363935, 3, 0x00373935, 3, 0x00383935, 3, 0x00393935, 3, - 0x00303036, 3, 0x00313036, 3, 0x00323036, 3, 0x00333036, 3, 0x00343036, 3, 0x00353036, 3, 0x00363036, 3, 0x00373036, 3, - 0x00383036, 3, 0x00393036, 3, 0x00303136, 3, 0x00313136, 3, 0x00323136, 3, 0x00333136, 3, 0x00343136, 3, 0x00353136, 3, - 0x00363136, 3, 0x00373136, 3, 0x00383136, 3, 0x00393136, 3, 0x00303236, 3, 0x00313236, 3, 0x00323236, 3, 0x00333236, 3, - 0x00343236, 3, 0x00353236, 3, 0x00363236, 3, 0x00373236, 3, 0x00383236, 3, 0x00393236, 3, 0x00303336, 3, 0x00313336, 3, - 0x00323336, 3, 0x00333336, 3, 0x00343336, 3, 0x00353336, 3, 0x00363336, 3, 0x00373336, 3, 0x00383336, 3, 0x00393336, 3, - 0x00303436, 3, 0x00313436, 3, 0x00323436, 3, 0x00333436, 3, 0x00343436, 3, 0x00353436, 3, 0x00363436, 3, 0x00373436, 3, - 0x00383436, 3, 0x00393436, 3, 0x00303536, 3, 0x00313536, 3, 0x00323536, 3, 0x00333536, 3, 0x00343536, 3, 0x00353536, 3, - 0x00363536, 3, 0x00373536, 3, 0x00383536, 3, 0x00393536, 3, 0x00303636, 3, 0x00313636, 3, 0x00323636, 3, 0x00333636, 3, - 0x00343636, 3, 0x00353636, 3, 0x00363636, 3, 0x00373636, 3, 0x00383636, 3, 0x00393636, 3, 0x00303736, 3, 0x00313736, 3, - 0x00323736, 3, 0x00333736, 3, 0x00343736, 3, 0x00353736, 3, 0x00363736, 3, 0x00373736, 3, 0x00383736, 3, 0x00393736, 3, - 0x00303836, 3, 0x00313836, 3, 0x00323836, 3, 0x00333836, 3, 0x00343836, 3, 0x00353836, 3, 0x00363836, 3, 0x00373836, 3, - 0x00383836, 3, 0x00393836, 3, 0x00303936, 3, 0x00313936, 3, 0x00323936, 3, 0x00333936, 3, 0x00343936, 3, 0x00353936, 3, - 0x00363936, 3, 0x00373936, 3, 0x00383936, 3, 0x00393936, 3, 0x00303037, 3, 0x00313037, 3, 0x00323037, 3, 0x00333037, 3, - 0x00343037, 3, 0x00353037, 3, 0x00363037, 3, 0x00373037, 3, 0x00383037, 3, 0x00393037, 3, 0x00303137, 3, 0x00313137, 3, - 0x00323137, 3, 0x00333137, 3, 0x00343137, 3, 0x00353137, 3, 0x00363137, 3, 0x00373137, 3, 0x00383137, 3, 0x00393137, 3, - 0x00303237, 3, 0x00313237, 3, 0x00323237, 3, 0x00333237, 3, 0x00343237, 3, 0x00353237, 3, 0x00363237, 3, 0x00373237, 3, - 0x00383237, 3, 0x00393237, 3, 0x00303337, 3, 0x00313337, 3, 0x00323337, 3, 0x00333337, 3, 0x00343337, 3, 0x00353337, 3, - 0x00363337, 3, 0x00373337, 3, 0x00383337, 3, 0x00393337, 3, 0x00303437, 3, 0x00313437, 3, 0x00323437, 3, 0x00333437, 3, - 0x00343437, 3, 0x00353437, 3, 0x00363437, 3, 0x00373437, 3, 0x00383437, 3, 0x00393437, 3, 0x00303537, 3, 0x00313537, 3, - 0x00323537, 3, 0x00333537, 3, 0x00343537, 3, 0x00353537, 3, 0x00363537, 3, 0x00373537, 3, 0x00383537, 3, 0x00393537, 3, - 0x00303637, 3, 0x00313637, 3, 0x00323637, 3, 0x00333637, 3, 0x00343637, 3, 0x00353637, 3, 0x00363637, 3, 0x00373637, 3, - 0x00383637, 3, 0x00393637, 3, 0x00303737, 3, 0x00313737, 3, 0x00323737, 3, 0x00333737, 3, 0x00343737, 3, 0x00353737, 3, - 0x00363737, 3, 0x00373737, 3, 0x00383737, 3, 0x00393737, 3, 0x00303837, 3, 0x00313837, 3, 0x00323837, 3, 0x00333837, 3, - 0x00343837, 3, 0x00353837, 3, 0x00363837, 3, 0x00373837, 3, 0x00383837, 3, 0x00393837, 3, 0x00303937, 3, 0x00313937, 3, - 0x00323937, 3, 0x00333937, 3, 0x00343937, 3, 0x00353937, 3, 0x00363937, 3, 0x00373937, 3, 0x00383937, 3, 0x00393937, 3, - 0x00303038, 3, 0x00313038, 3, 0x00323038, 3, 0x00333038, 3, 0x00343038, 3, 0x00353038, 3, 0x00363038, 3, 0x00373038, 3, - 0x00383038, 3, 0x00393038, 3, 0x00303138, 3, 0x00313138, 3, 0x00323138, 3, 0x00333138, 3, 0x00343138, 3, 0x00353138, 3, - 0x00363138, 3, 0x00373138, 3, 0x00383138, 3, 0x00393138, 3, 0x00303238, 3, 0x00313238, 3, 0x00323238, 3, 0x00333238, 3, - 0x00343238, 3, 0x00353238, 3, 0x00363238, 3, 0x00373238, 3, 0x00383238, 3, 0x00393238, 3, 0x00303338, 3, 0x00313338, 3, - 0x00323338, 3, 0x00333338, 3, 0x00343338, 3, 0x00353338, 3, 0x00363338, 3, 0x00373338, 3, 0x00383338, 3, 0x00393338, 3, - 0x00303438, 3, 0x00313438, 3, 0x00323438, 3, 0x00333438, 3, 0x00343438, 3, 0x00353438, 3, 0x00363438, 3, 0x00373438, 3, - 0x00383438, 3, 0x00393438, 3, 0x00303538, 3, 0x00313538, 3, 0x00323538, 3, 0x00333538, 3, 0x00343538, 3, 0x00353538, 3, - 0x00363538, 3, 0x00373538, 3, 0x00383538, 3, 0x00393538, 3, 0x00303638, 3, 0x00313638, 3, 0x00323638, 3, 0x00333638, 3, - 0x00343638, 3, 0x00353638, 3, 0x00363638, 3, 0x00373638, 3, 0x00383638, 3, 0x00393638, 3, 0x00303738, 3, 0x00313738, 3, - 0x00323738, 3, 0x00333738, 3, 0x00343738, 3, 0x00353738, 3, 0x00363738, 3, 0x00373738, 3, 0x00383738, 3, 0x00393738, 3, - 0x00303838, 3, 0x00313838, 3, 0x00323838, 3, 0x00333838, 3, 0x00343838, 3, 0x00353838, 3, 0x00363838, 3, 0x00373838, 3, - 0x00383838, 3, 0x00393838, 3, 0x00303938, 3, 0x00313938, 3, 0x00323938, 3, 0x00333938, 3, 0x00343938, 3, 0x00353938, 3, - 0x00363938, 3, 0x00373938, 3, 0x00383938, 3, 0x00393938, 3, 0x00303039, 3, 0x00313039, 3, 0x00323039, 3, 0x00333039, 3, - 0x00343039, 3, 0x00353039, 3, 0x00363039, 3, 0x00373039, 3, 0x00383039, 3, 0x00393039, 3, 0x00303139, 3, 0x00313139, 3, - 0x00323139, 3, 0x00333139, 3, 0x00343139, 3, 0x00353139, 3, 0x00363139, 3, 0x00373139, 3, 0x00383139, 3, 0x00393139, 3, - 0x00303239, 3, 0x00313239, 3, 0x00323239, 3, 0x00333239, 3, 0x00343239, 3, 0x00353239, 3, 0x00363239, 3, 0x00373239, 3, - 0x00383239, 3, 0x00393239, 3, 0x00303339, 3, 0x00313339, 3, 0x00323339, 3, 0x00333339, 3, 0x00343339, 3, 0x00353339, 3, - 0x00363339, 3, 0x00373339, 3, 0x00383339, 3, 0x00393339, 3, 0x00303439, 3, 0x00313439, 3, 0x00323439, 3, 0x00333439, 3, - 0x00343439, 3, 0x00353439, 3, 0x00363439, 3, 0x00373439, 3, 0x00383439, 3, 0x00393439, 3, 0x00303539, 3, 0x00313539, 3, - 0x00323539, 3, 0x00333539, 3, 0x00343539, 3, 0x00353539, 3, 0x00363539, 3, 0x00373539, 3, 0x00383539, 3, 0x00393539, 3, - 0x00303639, 3, 0x00313639, 3, 0x00323639, 3, 0x00333639, 3, 0x00343639, 3, 0x00353639, 3, 0x00363639, 3, 0x00373639, 3, - 0x00383639, 3, 0x00393639, 3, 0x00303739, 3, 0x00313739, 3, 0x00323739, 3, 0x00333739, 3, 0x00343739, 3, 0x00353739, 3, - 0x00363739, 3, 0x00373739, 3, 0x00383739, 3, 0x00393739, 3, 0x00303839, 3, 0x00313839, 3, 0x00323839, 3, 0x00333839, 3, - 0x00343839, 3, 0x00353839, 3, 0x00363839, 3, 0x00373839, 3, 0x00383839, 3, 0x00393839, 3, 0x00303939, 3, 0x00313939, 3, - 0x00323939, 3, 0x00333939, 3, 0x00343939, 3, 0x00353939, 3, 0x00363939, 3, 0x00373939, 3, 0x00383939, 3, 0x00393939, 3, - 0x30303031, 4, 0x31303031, 4, 0x32303031, 4, 0x33303031, 4, 0x34303031, 4, 0x35303031, 4, 0x36303031, 4, 0x37303031, 4, - 0x38303031, 4, 0x39303031, 4, 0x30313031, 4, 0x31313031, 4, 0x32313031, 4, 0x33313031, 4, 0x34313031, 4, 0x35313031, 4, - 0x36313031, 4, 0x37313031, 4, 0x38313031, 4, 0x39313031, 4, 0x30323031, 4, 0x31323031, 4, 0x32323031, 4, 0x33323031, 4, + 0x00000030, 1, 0x00000031, 1, 0x00000032, 1, 0x00000033, 1, 0x00000034, 1, 0x00000035, 1, 0x00000036, 1, 0x00000037, 1, + 0x00000038, 1, 0x00000039, 1, 0x00003031, 2, 0x00003131, 2, 0x00003231, 2, 0x00003331, 2, 0x00003431, 2, 0x00003531, 2, + 0x00003631, 2, 0x00003731, 2, 0x00003831, 2, 0x00003931, 2, 0x00003032, 2, 0x00003132, 2, 0x00003232, 2, 0x00003332, 2, + 0x00003432, 2, 0x00003532, 2, 0x00003632, 2, 0x00003732, 2, 0x00003832, 2, 0x00003932, 2, 0x00003033, 2, 0x00003133, 2, + 0x00003233, 2, 0x00003333, 2, 0x00003433, 2, 0x00003533, 2, 0x00003633, 2, 0x00003733, 2, 0x00003833, 2, 0x00003933, 2, + 0x00003034, 2, 0x00003134, 2, 0x00003234, 2, 0x00003334, 2, 0x00003434, 2, 0x00003534, 2, 0x00003634, 2, 0x00003734, 2, + 0x00003834, 2, 0x00003934, 2, 0x00003035, 2, 0x00003135, 2, 0x00003235, 2, 0x00003335, 2, 0x00003435, 2, 0x00003535, 2, + 0x00003635, 2, 0x00003735, 2, 0x00003835, 2, 0x00003935, 2, 0x00003036, 2, 0x00003136, 2, 0x00003236, 2, 0x00003336, 2, + 0x00003436, 2, 0x00003536, 2, 0x00003636, 2, 0x00003736, 2, 0x00003836, 2, 0x00003936, 2, 0x00003037, 2, 0x00003137, 2, + 0x00003237, 2, 0x00003337, 2, 0x00003437, 2, 0x00003537, 2, 0x00003637, 2, 0x00003737, 2, 0x00003837, 2, 0x00003937, 2, + 0x00003038, 2, 0x00003138, 2, 0x00003238, 2, 0x00003338, 2, 0x00003438, 2, 0x00003538, 2, 0x00003638, 2, 0x00003738, 2, + 0x00003838, 2, 0x00003938, 2, 0x00003039, 2, 0x00003139, 2, 0x00003239, 2, 0x00003339, 2, 0x00003439, 2, 0x00003539, 2, + 0x00003639, 2, 0x00003739, 2, 0x00003839, 2, 0x00003939, 2, 0x00303031, 3, 0x00313031, 3, 0x00323031, 3, 0x00333031, 3, + 0x00343031, 3, 0x00353031, 3, 0x00363031, 3, 0x00373031, 3, 0x00383031, 3, 0x00393031, 3, 0x00303131, 3, 0x00313131, 3, + 0x00323131, 3, 0x00333131, 3, 0x00343131, 3, 0x00353131, 3, 0x00363131, 3, 0x00373131, 3, 0x00383131, 3, 0x00393131, 3, + 0x00303231, 3, 0x00313231, 3, 0x00323231, 3, 0x00333231, 3, 0x00343231, 3, 0x00353231, 3, 0x00363231, 3, 0x00373231, 3, + 0x00383231, 3, 0x00393231, 3, 0x00303331, 3, 0x00313331, 3, 0x00323331, 3, 0x00333331, 3, 0x00343331, 3, 0x00353331, 3, + 0x00363331, 3, 0x00373331, 3, 0x00383331, 3, 0x00393331, 3, 0x00303431, 3, 0x00313431, 3, 0x00323431, 3, 0x00333431, 3, + 0x00343431, 3, 0x00353431, 3, 0x00363431, 3, 0x00373431, 3, 0x00383431, 3, 0x00393431, 3, 0x00303531, 3, 0x00313531, 3, + 0x00323531, 3, 0x00333531, 3, 0x00343531, 3, 0x00353531, 3, 0x00363531, 3, 0x00373531, 3, 0x00383531, 3, 0x00393531, 3, + 0x00303631, 3, 0x00313631, 3, 0x00323631, 3, 0x00333631, 3, 0x00343631, 3, 0x00353631, 3, 0x00363631, 3, 0x00373631, 3, + 0x00383631, 3, 0x00393631, 3, 0x00303731, 3, 0x00313731, 3, 0x00323731, 3, 0x00333731, 3, 0x00343731, 3, 0x00353731, 3, + 0x00363731, 3, 0x00373731, 3, 0x00383731, 3, 0x00393731, 3, 0x00303831, 3, 0x00313831, 3, 0x00323831, 3, 0x00333831, 3, + 0x00343831, 3, 0x00353831, 3, 0x00363831, 3, 0x00373831, 3, 0x00383831, 3, 0x00393831, 3, 0x00303931, 3, 0x00313931, 3, + 0x00323931, 3, 0x00333931, 3, 0x00343931, 3, 0x00353931, 3, 0x00363931, 3, 0x00373931, 3, 0x00383931, 3, 0x00393931, 3, + 0x00303032, 3, 0x00313032, 3, 0x00323032, 3, 0x00333032, 3, 0x00343032, 3, 0x00353032, 3, 0x00363032, 3, 0x00373032, 3, + 0x00383032, 3, 0x00393032, 3, 0x00303132, 3, 0x00313132, 3, 0x00323132, 3, 0x00333132, 3, 0x00343132, 3, 0x00353132, 3, + 0x00363132, 3, 0x00373132, 3, 0x00383132, 3, 0x00393132, 3, 0x00303232, 3, 0x00313232, 3, 0x00323232, 3, 0x00333232, 3, + 0x00343232, 3, 0x00353232, 3, 0x00363232, 3, 0x00373232, 3, 0x00383232, 3, 0x00393232, 3, 0x00303332, 3, 0x00313332, 3, + 0x00323332, 3, 0x00333332, 3, 0x00343332, 3, 0x00353332, 3, 0x00363332, 3, 0x00373332, 3, 0x00383332, 3, 0x00393332, 3, + 0x00303432, 3, 0x00313432, 3, 0x00323432, 3, 0x00333432, 3, 0x00343432, 3, 0x00353432, 3, 0x00363432, 3, 0x00373432, 3, + 0x00383432, 3, 0x00393432, 3, 0x00303532, 3, 0x00313532, 3, 0x00323532, 3, 0x00333532, 3, 0x00343532, 3, 0x00353532, 3, + 0x00363532, 3, 0x00373532, 3, 0x00383532, 3, 0x00393532, 3, 0x00303632, 3, 0x00313632, 3, 0x00323632, 3, 0x00333632, 3, + 0x00343632, 3, 0x00353632, 3, 0x00363632, 3, 0x00373632, 3, 0x00383632, 3, 0x00393632, 3, 0x00303732, 3, 0x00313732, 3, + 0x00323732, 3, 0x00333732, 3, 0x00343732, 3, 0x00353732, 3, 0x00363732, 3, 0x00373732, 3, 0x00383732, 3, 0x00393732, 3, + 0x00303832, 3, 0x00313832, 3, 0x00323832, 3, 0x00333832, 3, 0x00343832, 3, 0x00353832, 3, 0x00363832, 3, 0x00373832, 3, + 0x00383832, 3, 0x00393832, 3, 0x00303932, 3, 0x00313932, 3, 0x00323932, 3, 0x00333932, 3, 0x00343932, 3, 0x00353932, 3, + 0x00363932, 3, 0x00373932, 3, 0x00383932, 3, 0x00393932, 3, 0x00303033, 3, 0x00313033, 3, 0x00323033, 3, 0x00333033, 3, + 0x00343033, 3, 0x00353033, 3, 0x00363033, 3, 0x00373033, 3, 0x00383033, 3, 0x00393033, 3, 0x00303133, 3, 0x00313133, 3, + 0x00323133, 3, 0x00333133, 3, 0x00343133, 3, 0x00353133, 3, 0x00363133, 3, 0x00373133, 3, 0x00383133, 3, 0x00393133, 3, + 0x00303233, 3, 0x00313233, 3, 0x00323233, 3, 0x00333233, 3, 0x00343233, 3, 0x00353233, 3, 0x00363233, 3, 0x00373233, 3, + 0x00383233, 3, 0x00393233, 3, 0x00303333, 3, 0x00313333, 3, 0x00323333, 3, 0x00333333, 3, 0x00343333, 3, 0x00353333, 3, + 0x00363333, 3, 0x00373333, 3, 0x00383333, 3, 0x00393333, 3, 0x00303433, 3, 0x00313433, 3, 0x00323433, 3, 0x00333433, 3, + 0x00343433, 3, 0x00353433, 3, 0x00363433, 3, 0x00373433, 3, 0x00383433, 3, 0x00393433, 3, 0x00303533, 3, 0x00313533, 3, + 0x00323533, 3, 0x00333533, 3, 0x00343533, 3, 0x00353533, 3, 0x00363533, 3, 0x00373533, 3, 0x00383533, 3, 0x00393533, 3, + 0x00303633, 3, 0x00313633, 3, 0x00323633, 3, 0x00333633, 3, 0x00343633, 3, 0x00353633, 3, 0x00363633, 3, 0x00373633, 3, + 0x00383633, 3, 0x00393633, 3, 0x00303733, 3, 0x00313733, 3, 0x00323733, 3, 0x00333733, 3, 0x00343733, 3, 0x00353733, 3, + 0x00363733, 3, 0x00373733, 3, 0x00383733, 3, 0x00393733, 3, 0x00303833, 3, 0x00313833, 3, 0x00323833, 3, 0x00333833, 3, + 0x00343833, 3, 0x00353833, 3, 0x00363833, 3, 0x00373833, 3, 0x00383833, 3, 0x00393833, 3, 0x00303933, 3, 0x00313933, 3, + 0x00323933, 3, 0x00333933, 3, 0x00343933, 3, 0x00353933, 3, 0x00363933, 3, 0x00373933, 3, 0x00383933, 3, 0x00393933, 3, + 0x00303034, 3, 0x00313034, 3, 0x00323034, 3, 0x00333034, 3, 0x00343034, 3, 0x00353034, 3, 0x00363034, 3, 0x00373034, 3, + 0x00383034, 3, 0x00393034, 3, 0x00303134, 3, 0x00313134, 3, 0x00323134, 3, 0x00333134, 3, 0x00343134, 3, 0x00353134, 3, + 0x00363134, 3, 0x00373134, 3, 0x00383134, 3, 0x00393134, 3, 0x00303234, 3, 0x00313234, 3, 0x00323234, 3, 0x00333234, 3, + 0x00343234, 3, 0x00353234, 3, 0x00363234, 3, 0x00373234, 3, 0x00383234, 3, 0x00393234, 3, 0x00303334, 3, 0x00313334, 3, + 0x00323334, 3, 0x00333334, 3, 0x00343334, 3, 0x00353334, 3, 0x00363334, 3, 0x00373334, 3, 0x00383334, 3, 0x00393334, 3, + 0x00303434, 3, 0x00313434, 3, 0x00323434, 3, 0x00333434, 3, 0x00343434, 3, 0x00353434, 3, 0x00363434, 3, 0x00373434, 3, + 0x00383434, 3, 0x00393434, 3, 0x00303534, 3, 0x00313534, 3, 0x00323534, 3, 0x00333534, 3, 0x00343534, 3, 0x00353534, 3, + 0x00363534, 3, 0x00373534, 3, 0x00383534, 3, 0x00393534, 3, 0x00303634, 3, 0x00313634, 3, 0x00323634, 3, 0x00333634, 3, + 0x00343634, 3, 0x00353634, 3, 0x00363634, 3, 0x00373634, 3, 0x00383634, 3, 0x00393634, 3, 0x00303734, 3, 0x00313734, 3, + 0x00323734, 3, 0x00333734, 3, 0x00343734, 3, 0x00353734, 3, 0x00363734, 3, 0x00373734, 3, 0x00383734, 3, 0x00393734, 3, + 0x00303834, 3, 0x00313834, 3, 0x00323834, 3, 0x00333834, 3, 0x00343834, 3, 0x00353834, 3, 0x00363834, 3, 0x00373834, 3, + 0x00383834, 3, 0x00393834, 3, 0x00303934, 3, 0x00313934, 3, 0x00323934, 3, 0x00333934, 3, 0x00343934, 3, 0x00353934, 3, + 0x00363934, 3, 0x00373934, 3, 0x00383934, 3, 0x00393934, 3, 0x00303035, 3, 0x00313035, 3, 0x00323035, 3, 0x00333035, 3, + 0x00343035, 3, 0x00353035, 3, 0x00363035, 3, 0x00373035, 3, 0x00383035, 3, 0x00393035, 3, 0x00303135, 3, 0x00313135, 3, + 0x00323135, 3, 0x00333135, 3, 0x00343135, 3, 0x00353135, 3, 0x00363135, 3, 0x00373135, 3, 0x00383135, 3, 0x00393135, 3, + 0x00303235, 3, 0x00313235, 3, 0x00323235, 3, 0x00333235, 3, 0x00343235, 3, 0x00353235, 3, 0x00363235, 3, 0x00373235, 3, + 0x00383235, 3, 0x00393235, 3, 0x00303335, 3, 0x00313335, 3, 0x00323335, 3, 0x00333335, 3, 0x00343335, 3, 0x00353335, 3, + 0x00363335, 3, 0x00373335, 3, 0x00383335, 3, 0x00393335, 3, 0x00303435, 3, 0x00313435, 3, 0x00323435, 3, 0x00333435, 3, + 0x00343435, 3, 0x00353435, 3, 0x00363435, 3, 0x00373435, 3, 0x00383435, 3, 0x00393435, 3, 0x00303535, 3, 0x00313535, 3, + 0x00323535, 3, 0x00333535, 3, 0x00343535, 3, 0x00353535, 3, 0x00363535, 3, 0x00373535, 3, 0x00383535, 3, 0x00393535, 3, + 0x00303635, 3, 0x00313635, 3, 0x00323635, 3, 0x00333635, 3, 0x00343635, 3, 0x00353635, 3, 0x00363635, 3, 0x00373635, 3, + 0x00383635, 3, 0x00393635, 3, 0x00303735, 3, 0x00313735, 3, 0x00323735, 3, 0x00333735, 3, 0x00343735, 3, 0x00353735, 3, + 0x00363735, 3, 0x00373735, 3, 0x00383735, 3, 0x00393735, 3, 0x00303835, 3, 0x00313835, 3, 0x00323835, 3, 0x00333835, 3, + 0x00343835, 3, 0x00353835, 3, 0x00363835, 3, 0x00373835, 3, 0x00383835, 3, 0x00393835, 3, 0x00303935, 3, 0x00313935, 3, + 0x00323935, 3, 0x00333935, 3, 0x00343935, 3, 0x00353935, 3, 0x00363935, 3, 0x00373935, 3, 0x00383935, 3, 0x00393935, 3, + 0x00303036, 3, 0x00313036, 3, 0x00323036, 3, 0x00333036, 3, 0x00343036, 3, 0x00353036, 3, 0x00363036, 3, 0x00373036, 3, + 0x00383036, 3, 0x00393036, 3, 0x00303136, 3, 0x00313136, 3, 0x00323136, 3, 0x00333136, 3, 0x00343136, 3, 0x00353136, 3, + 0x00363136, 3, 0x00373136, 3, 0x00383136, 3, 0x00393136, 3, 0x00303236, 3, 0x00313236, 3, 0x00323236, 3, 0x00333236, 3, + 0x00343236, 3, 0x00353236, 3, 0x00363236, 3, 0x00373236, 3, 0x00383236, 3, 0x00393236, 3, 0x00303336, 3, 0x00313336, 3, + 0x00323336, 3, 0x00333336, 3, 0x00343336, 3, 0x00353336, 3, 0x00363336, 3, 0x00373336, 3, 0x00383336, 3, 0x00393336, 3, + 0x00303436, 3, 0x00313436, 3, 0x00323436, 3, 0x00333436, 3, 0x00343436, 3, 0x00353436, 3, 0x00363436, 3, 0x00373436, 3, + 0x00383436, 3, 0x00393436, 3, 0x00303536, 3, 0x00313536, 3, 0x00323536, 3, 0x00333536, 3, 0x00343536, 3, 0x00353536, 3, + 0x00363536, 3, 0x00373536, 3, 0x00383536, 3, 0x00393536, 3, 0x00303636, 3, 0x00313636, 3, 0x00323636, 3, 0x00333636, 3, + 0x00343636, 3, 0x00353636, 3, 0x00363636, 3, 0x00373636, 3, 0x00383636, 3, 0x00393636, 3, 0x00303736, 3, 0x00313736, 3, + 0x00323736, 3, 0x00333736, 3, 0x00343736, 3, 0x00353736, 3, 0x00363736, 3, 0x00373736, 3, 0x00383736, 3, 0x00393736, 3, + 0x00303836, 3, 0x00313836, 3, 0x00323836, 3, 0x00333836, 3, 0x00343836, 3, 0x00353836, 3, 0x00363836, 3, 0x00373836, 3, + 0x00383836, 3, 0x00393836, 3, 0x00303936, 3, 0x00313936, 3, 0x00323936, 3, 0x00333936, 3, 0x00343936, 3, 0x00353936, 3, + 0x00363936, 3, 0x00373936, 3, 0x00383936, 3, 0x00393936, 3, 0x00303037, 3, 0x00313037, 3, 0x00323037, 3, 0x00333037, 3, + 0x00343037, 3, 0x00353037, 3, 0x00363037, 3, 0x00373037, 3, 0x00383037, 3, 0x00393037, 3, 0x00303137, 3, 0x00313137, 3, + 0x00323137, 3, 0x00333137, 3, 0x00343137, 3, 0x00353137, 3, 0x00363137, 3, 0x00373137, 3, 0x00383137, 3, 0x00393137, 3, + 0x00303237, 3, 0x00313237, 3, 0x00323237, 3, 0x00333237, 3, 0x00343237, 3, 0x00353237, 3, 0x00363237, 3, 0x00373237, 3, + 0x00383237, 3, 0x00393237, 3, 0x00303337, 3, 0x00313337, 3, 0x00323337, 3, 0x00333337, 3, 0x00343337, 3, 0x00353337, 3, + 0x00363337, 3, 0x00373337, 3, 0x00383337, 3, 0x00393337, 3, 0x00303437, 3, 0x00313437, 3, 0x00323437, 3, 0x00333437, 3, + 0x00343437, 3, 0x00353437, 3, 0x00363437, 3, 0x00373437, 3, 0x00383437, 3, 0x00393437, 3, 0x00303537, 3, 0x00313537, 3, + 0x00323537, 3, 0x00333537, 3, 0x00343537, 3, 0x00353537, 3, 0x00363537, 3, 0x00373537, 3, 0x00383537, 3, 0x00393537, 3, + 0x00303637, 3, 0x00313637, 3, 0x00323637, 3, 0x00333637, 3, 0x00343637, 3, 0x00353637, 3, 0x00363637, 3, 0x00373637, 3, + 0x00383637, 3, 0x00393637, 3, 0x00303737, 3, 0x00313737, 3, 0x00323737, 3, 0x00333737, 3, 0x00343737, 3, 0x00353737, 3, + 0x00363737, 3, 0x00373737, 3, 0x00383737, 3, 0x00393737, 3, 0x00303837, 3, 0x00313837, 3, 0x00323837, 3, 0x00333837, 3, + 0x00343837, 3, 0x00353837, 3, 0x00363837, 3, 0x00373837, 3, 0x00383837, 3, 0x00393837, 3, 0x00303937, 3, 0x00313937, 3, + 0x00323937, 3, 0x00333937, 3, 0x00343937, 3, 0x00353937, 3, 0x00363937, 3, 0x00373937, 3, 0x00383937, 3, 0x00393937, 3, + 0x00303038, 3, 0x00313038, 3, 0x00323038, 3, 0x00333038, 3, 0x00343038, 3, 0x00353038, 3, 0x00363038, 3, 0x00373038, 3, + 0x00383038, 3, 0x00393038, 3, 0x00303138, 3, 0x00313138, 3, 0x00323138, 3, 0x00333138, 3, 0x00343138, 3, 0x00353138, 3, + 0x00363138, 3, 0x00373138, 3, 0x00383138, 3, 0x00393138, 3, 0x00303238, 3, 0x00313238, 3, 0x00323238, 3, 0x00333238, 3, + 0x00343238, 3, 0x00353238, 3, 0x00363238, 3, 0x00373238, 3, 0x00383238, 3, 0x00393238, 3, 0x00303338, 3, 0x00313338, 3, + 0x00323338, 3, 0x00333338, 3, 0x00343338, 3, 0x00353338, 3, 0x00363338, 3, 0x00373338, 3, 0x00383338, 3, 0x00393338, 3, + 0x00303438, 3, 0x00313438, 3, 0x00323438, 3, 0x00333438, 3, 0x00343438, 3, 0x00353438, 3, 0x00363438, 3, 0x00373438, 3, + 0x00383438, 3, 0x00393438, 3, 0x00303538, 3, 0x00313538, 3, 0x00323538, 3, 0x00333538, 3, 0x00343538, 3, 0x00353538, 3, + 0x00363538, 3, 0x00373538, 3, 0x00383538, 3, 0x00393538, 3, 0x00303638, 3, 0x00313638, 3, 0x00323638, 3, 0x00333638, 3, + 0x00343638, 3, 0x00353638, 3, 0x00363638, 3, 0x00373638, 3, 0x00383638, 3, 0x00393638, 3, 0x00303738, 3, 0x00313738, 3, + 0x00323738, 3, 0x00333738, 3, 0x00343738, 3, 0x00353738, 3, 0x00363738, 3, 0x00373738, 3, 0x00383738, 3, 0x00393738, 3, + 0x00303838, 3, 0x00313838, 3, 0x00323838, 3, 0x00333838, 3, 0x00343838, 3, 0x00353838, 3, 0x00363838, 3, 0x00373838, 3, + 0x00383838, 3, 0x00393838, 3, 0x00303938, 3, 0x00313938, 3, 0x00323938, 3, 0x00333938, 3, 0x00343938, 3, 0x00353938, 3, + 0x00363938, 3, 0x00373938, 3, 0x00383938, 3, 0x00393938, 3, 0x00303039, 3, 0x00313039, 3, 0x00323039, 3, 0x00333039, 3, + 0x00343039, 3, 0x00353039, 3, 0x00363039, 3, 0x00373039, 3, 0x00383039, 3, 0x00393039, 3, 0x00303139, 3, 0x00313139, 3, + 0x00323139, 3, 0x00333139, 3, 0x00343139, 3, 0x00353139, 3, 0x00363139, 3, 0x00373139, 3, 0x00383139, 3, 0x00393139, 3, + 0x00303239, 3, 0x00313239, 3, 0x00323239, 3, 0x00333239, 3, 0x00343239, 3, 0x00353239, 3, 0x00363239, 3, 0x00373239, 3, + 0x00383239, 3, 0x00393239, 3, 0x00303339, 3, 0x00313339, 3, 0x00323339, 3, 0x00333339, 3, 0x00343339, 3, 0x00353339, 3, + 0x00363339, 3, 0x00373339, 3, 0x00383339, 3, 0x00393339, 3, 0x00303439, 3, 0x00313439, 3, 0x00323439, 3, 0x00333439, 3, + 0x00343439, 3, 0x00353439, 3, 0x00363439, 3, 0x00373439, 3, 0x00383439, 3, 0x00393439, 3, 0x00303539, 3, 0x00313539, 3, + 0x00323539, 3, 0x00333539, 3, 0x00343539, 3, 0x00353539, 3, 0x00363539, 3, 0x00373539, 3, 0x00383539, 3, 0x00393539, 3, + 0x00303639, 3, 0x00313639, 3, 0x00323639, 3, 0x00333639, 3, 0x00343639, 3, 0x00353639, 3, 0x00363639, 3, 0x00373639, 3, + 0x00383639, 3, 0x00393639, 3, 0x00303739, 3, 0x00313739, 3, 0x00323739, 3, 0x00333739, 3, 0x00343739, 3, 0x00353739, 3, + 0x00363739, 3, 0x00373739, 3, 0x00383739, 3, 0x00393739, 3, 0x00303839, 3, 0x00313839, 3, 0x00323839, 3, 0x00333839, 3, + 0x00343839, 3, 0x00353839, 3, 0x00363839, 3, 0x00373839, 3, 0x00383839, 3, 0x00393839, 3, 0x00303939, 3, 0x00313939, 3, + 0x00323939, 3, 0x00333939, 3, 0x00343939, 3, 0x00353939, 3, 0x00363939, 3, 0x00373939, 3, 0x00383939, 3, 0x00393939, 3, + 0x30303031, 4, 0x31303031, 4, 0x32303031, 4, 0x33303031, 4, 0x34303031, 4, 0x35303031, 4, 0x36303031, 4, 0x37303031, 4, + 0x38303031, 4, 0x39303031, 4, 0x30313031, 4, 0x31313031, 4, 0x32313031, 4, 0x33313031, 4, 0x34313031, 4, 0x35313031, 4, + 0x36313031, 4, 0x37313031, 4, 0x38313031, 4, 0x39313031, 4, 0x30323031, 4, 0x31323031, 4, 0x32323031, 4, 0x33323031, 4, }; static void append_word (u32x w0[4], u32x w1[4], const u32x append[4], const u32 offset) diff --git a/amd/m06600.cl b/amd/m06600.cl index 27194cd..d3fe70d 100644 --- a/amd/m06600.cl +++ b/amd/m06600.cl @@ -1340,17 +1340,17 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06600_comp (__gl const u32 lid4 = lid * 4; - __local u32 s_td0[256]; - __local u32 s_td1[256]; - __local u32 s_td2[256]; - __local u32 s_td3[256]; - __local u32 s_td4[256]; - - __local u32 s_te0[256]; - __local u32 s_te1[256]; - __local u32 s_te2[256]; - __local u32 s_te3[256]; - __local u32 s_te4[256]; + __local u32 s_td0[256]; + __local u32 s_td1[256]; + __local u32 s_td2[256]; + __local u32 s_td3[256]; + __local u32 s_td4[256]; + + __local u32 s_te0[256]; + __local u32 s_te1[256]; + __local u32 s_te2[256]; + __local u32 s_te3[256]; + __local u32 s_te4[256]; s_td0[lid4 + 0] = td0[lid4 + 0]; s_td0[lid4 + 1] = td0[lid4 + 1]; diff --git a/amd/m08600_a0.cl b/amd/m08600_a0.cl index 581b22e..dfc6402 100644 --- a/amd/m08600_a0.cl +++ b/amd/m08600_a0.cl @@ -215,7 +215,7 @@ static void pad (u32x w[4], const u32 len) static void mdtransform_norecalc (u32x state[4], u32x block[4], __local u32 s_lotus_magic_table[256]) { - u32x x[12]; + u32x x[12]; x[ 0] = state[0]; x[ 1] = state[1]; @@ -247,14 +247,14 @@ static void mdtransform (u32x state[4], u32x checksum[4], u32x block[4], __local static void domino_big_md (const u32x saved_key[16], const u32 size, u32x state[4], __local u32 s_lotus_magic_table[256]) { - u32x checksum[4]; + u32x checksum[4]; checksum[0] = 0; checksum[1] = 0; checksum[2] = 0; checksum[3] = 0; - u32x block[4]; + u32x block[4]; block[0] = saved_key[0]; block[1] = saved_key[1]; @@ -263,7 +263,7 @@ static void domino_big_md (const u32x saved_key[16], const u32 size, u32x state[ mdtransform (state, checksum, block, s_lotus_magic_table); - mdtransform_norecalc (state, checksum, s_lotus_magic_table); + mdtransform_norecalc (state, checksum, s_lotus_magic_table); } __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08600_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) diff --git a/amd/m08600_a1.cl b/amd/m08600_a1.cl index 0f80755..de9644e 100644 --- a/amd/m08600_a1.cl +++ b/amd/m08600_a1.cl @@ -213,7 +213,7 @@ static void pad (u32x w[4], const u32 len) static void mdtransform_norecalc (u32x state[4], u32x block[4], __local u32 s_lotus_magic_table[256]) { - u32x x[12]; + u32x x[12]; x[ 0] = state[0]; x[ 1] = state[1]; @@ -245,14 +245,14 @@ static void mdtransform (u32x state[4], u32x checksum[4], u32x block[4], __local static void domino_big_md (const u32x saved_key[16], const u32 size, u32x state[4], __local u32 s_lotus_magic_table[256]) { - u32x checksum[4]; + u32x checksum[4]; checksum[0] = 0; checksum[1] = 0; checksum[2] = 0; checksum[3] = 0; - u32x block[4]; + u32x block[4]; block[0] = saved_key[0]; block[1] = saved_key[1]; @@ -261,7 +261,7 @@ static void domino_big_md (const u32x saved_key[16], const u32 size, u32x state[ mdtransform (state, checksum, block, s_lotus_magic_table); - mdtransform_norecalc (state, checksum, s_lotus_magic_table); + mdtransform_norecalc (state, checksum, s_lotus_magic_table); } __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08600_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) diff --git a/amd/m08600_a3.cl b/amd/m08600_a3.cl index e25e884..e4caa74 100644 --- a/amd/m08600_a3.cl +++ b/amd/m08600_a3.cl @@ -214,7 +214,7 @@ static void pad (u32 w[4], const u32 len) static void mdtransform_norecalc (u32x state[4], u32x block[4], __local u32 s_lotus_magic_table[256]) { - u32x x[12]; + u32x x[12]; x[ 0] = state[0]; x[ 1] = state[1]; @@ -246,14 +246,14 @@ static void mdtransform (u32x state[4], u32x checksum[4], u32x block[4], __local static void domino_big_md (const u32x saved_key[16], const u32 size, u32x state[4], __local u32 s_lotus_magic_table[256]) { - u32x checksum[4]; + u32x checksum[4]; checksum[0] = 0; checksum[1] = 0; checksum[2] = 0; checksum[3] = 0; - u32x block[4]; + u32x block[4]; block[0] = saved_key[0]; block[1] = saved_key[1]; @@ -262,7 +262,7 @@ static void domino_big_md (const u32x saved_key[16], const u32 size, u32x state[ mdtransform (state, checksum, block, s_lotus_magic_table); - mdtransform_norecalc (state, checksum, s_lotus_magic_table); + mdtransform_norecalc (state, checksum, s_lotus_magic_table); } static void m08600m (__local u32 s_lotus_magic_table[256], u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) diff --git a/amd/m08700_a0.cl b/amd/m08700_a0.cl index 3810b8b..7c3e935 100644 --- a/amd/m08700_a0.cl +++ b/amd/m08700_a0.cl @@ -227,7 +227,7 @@ static void pad (u32x w[4], const u32 len) static void mdtransform_norecalc (u32x state[4], u32x block[4], __local u32 s_lotus_magic_table[256]) { - u32x x[12]; + u32x x[12]; x[ 0] = state[0]; x[ 1] = state[1]; @@ -259,14 +259,14 @@ static void mdtransform (u32x state[4], u32x checksum[4], u32x block[4], __local static void domino_big_md (const u32x saved_key[16], const u32 size, u32x state[4], __local u32 s_lotus_magic_table[256]) { - u32x checksum[4]; + u32x checksum[4]; checksum[0] = 0; checksum[1] = 0; checksum[2] = 0; checksum[3] = 0; - u32x block[4]; + u32x block[4]; block[0] = 0; block[1] = 0; @@ -293,7 +293,7 @@ static void domino_big_md (const u32x saved_key[16], const u32 size, u32x state[ mdtransform (state, checksum, block, s_lotus_magic_table); - mdtransform_norecalc (state, checksum, s_lotus_magic_table); + mdtransform_norecalc (state, checksum, s_lotus_magic_table); } __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08700_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) diff --git a/amd/m08700_a1.cl b/amd/m08700_a1.cl index 7b1060a..67760b1 100644 --- a/amd/m08700_a1.cl +++ b/amd/m08700_a1.cl @@ -225,7 +225,7 @@ static void pad (u32x w[4], const u32 len) static void mdtransform_norecalc (u32x state[4], u32x block[4], __local u32 s_lotus_magic_table[256]) { - u32x x[12]; + u32x x[12]; x[ 0] = state[0]; x[ 1] = state[1]; @@ -257,14 +257,14 @@ static void mdtransform (u32x state[4], u32x checksum[4], u32x block[4], __local static void domino_big_md (const u32x saved_key[16], const u32 size, u32x state[4], __local u32 s_lotus_magic_table[256]) { - u32x checksum[4]; + u32x checksum[4]; checksum[0] = 0; checksum[1] = 0; checksum[2] = 0; checksum[3] = 0; - u32x block[4]; + u32x block[4]; block[0] = 0; block[1] = 0; @@ -291,7 +291,7 @@ static void domino_big_md (const u32x saved_key[16], const u32 size, u32x state[ mdtransform (state, checksum, block, s_lotus_magic_table); - mdtransform_norecalc (state, checksum, s_lotus_magic_table); + mdtransform_norecalc (state, checksum, s_lotus_magic_table); } __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08700_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) diff --git a/amd/types_amd.c b/amd/types_amd.c index d6da9ce..8adb2ab 100644 --- a/amd/types_amd.c +++ b/amd/types_amd.c @@ -471,8 +471,8 @@ typedef struct typedef struct { - u32 data_len; - u32 data_buf[512]; + u32 data_len; + u32 data_buf[512]; } cloudkey_t; diff --git a/include/cpu-aes.c b/include/cpu-aes.c index 34879f0..d312438 100644 --- a/include/cpu-aes.c +++ b/include/cpu-aes.c @@ -973,7 +973,7 @@ static void AES256_ExpandKey (const uint *userkey, uint *rek) i = 0; j = 0; - while (1) + while (1) { uint temp = rek[j + 7]; @@ -1003,7 +1003,7 @@ static void AES256_ExpandKey (const uint *userkey, uint *rek) rek[j + 15] = rek[j + 7] ^ rek[j + 14]; j += 8; - } + } } static void AES256_InvertKey (uint *rdk) diff --git a/include/ext_OpenCL.h b/include/ext_OpenCL.h index ccbc45a..b67dc63 100644 --- a/include/ext_OpenCL.h +++ b/include/ext_OpenCL.h @@ -36,8 +36,8 @@ void hc_clEnqueueWriteBuffer (cl_command_queue command_queue, cl_mem buffer, cl_ void hc_clEnqueueCopyBuffer (cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t cb, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event); void hc_clFlush (cl_command_queue command_queue); void hc_clFinish (cl_command_queue command_queue); -void hc_clGetDeviceIDs (cl_platform_id platform, cl_device_type device_type, cl_uint num_entries, cl_device_id *devices, cl_uint *num_devices); -void hc_clGetDeviceInfo (cl_device_id device, cl_device_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret); +void hc_clGetDeviceIDs (cl_platform_id platform, cl_device_type device_type, cl_uint num_entries, cl_device_id *devices, cl_uint *num_devices); +void hc_clGetDeviceInfo (cl_device_id device, cl_device_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret); void hc_clGetPlatformIDs (cl_uint num_entries, cl_platform_id *platforms, cl_uint *num_platforms); void hc_clGetPlatformInfo (cl_platform_id platform, cl_platform_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret); void hc_clReleaseCommandQueue (cl_command_queue command_queue); diff --git a/include/types.h b/include/types.h index b7be9bc..9864988 100644 --- a/include/types.h +++ b/include/types.h @@ -168,8 +168,8 @@ typedef struct typedef struct { - uint data_len; - uint data_buf[512]; + uint data_len; + uint data_buf[512]; } cloudkey_t; diff --git a/nv/amp_a1_v1.cu b/nv/amp_a1_v1.cu index 0b232dc..4d2ace7 100644 --- a/nv/amp_a1_v1.cu +++ b/nv/amp_a1_v1.cu @@ -279,7 +279,7 @@ __device__ static void switch_buffer_by_offset (u32 w0[4], u32 w1[4], u32 w2[4], switch (offset % 4) { - case 0: + case 0: tmp0[0] = w0[0]; tmp0[1] = w0[1]; tmp0[2] = w0[2]; @@ -289,43 +289,43 @@ __device__ static void switch_buffer_by_offset (u32 w0[4], u32 w1[4], u32 w2[4], tmp1[2] = w1[2]; tmp1[3] = w1[3]; tmp2[0] = 0; - break; - - case 1: - tmp0[0] = w0[0] << 8; - tmp0[1] = w0[0] >> 24 | w0[1] << 8; - tmp0[2] = w0[1] >> 24 | w0[2] << 8; - tmp0[3] = w0[2] >> 24 | w0[3] << 8; - tmp1[0] = w0[3] >> 24 | w1[0] << 8; - tmp1[1] = w1[0] >> 24 | w1[1] << 8; - tmp1[2] = w1[1] >> 24 | w1[2] << 8; - tmp1[3] = w1[2] >> 24 | w1[3] << 8; - tmp2[0] = w1[3] >> 24; - break; - - case 2: - tmp0[0] = w0[0] << 16; - tmp0[1] = w0[0] >> 16 | w0[1] << 16; - tmp0[2] = w0[1] >> 16 | w0[2] << 16; - tmp0[3] = w0[2] >> 16 | w0[3] << 16; - tmp1[0] = w0[3] >> 16 | w1[0] << 16; - tmp1[1] = w1[0] >> 16 | w1[1] << 16; - tmp1[2] = w1[1] >> 16 | w1[2] << 16; - tmp1[3] = w1[2] >> 16 | w1[3] << 16; - tmp2[0] = w1[3] >> 16; - break; - - case 3: - tmp0[0] = w0[0] << 24; - tmp0[1] = w0[0] >> 8 | w0[1] << 24; - tmp0[2] = w0[1] >> 8 | w0[2] << 24; - tmp0[3] = w0[2] >> 8 | w0[3] << 24; - tmp1[0] = w0[3] >> 8 | w1[0] << 24; - tmp1[1] = w1[0] >> 8 | w1[1] << 24; - tmp1[2] = w1[1] >> 8 | w1[2] << 24; - tmp1[3] = w1[2] >> 8 | w1[3] << 24; - tmp2[0] = w1[3] >> 8; - break; + break; + + case 1: + tmp0[0] = w0[0] << 8; + tmp0[1] = w0[0] >> 24 | w0[1] << 8; + tmp0[2] = w0[1] >> 24 | w0[2] << 8; + tmp0[3] = w0[2] >> 24 | w0[3] << 8; + tmp1[0] = w0[3] >> 24 | w1[0] << 8; + tmp1[1] = w1[0] >> 24 | w1[1] << 8; + tmp1[2] = w1[1] >> 24 | w1[2] << 8; + tmp1[3] = w1[2] >> 24 | w1[3] << 8; + tmp2[0] = w1[3] >> 24; + break; + + case 2: + tmp0[0] = w0[0] << 16; + tmp0[1] = w0[0] >> 16 | w0[1] << 16; + tmp0[2] = w0[1] >> 16 | w0[2] << 16; + tmp0[3] = w0[2] >> 16 | w0[3] << 16; + tmp1[0] = w0[3] >> 16 | w1[0] << 16; + tmp1[1] = w1[0] >> 16 | w1[1] << 16; + tmp1[2] = w1[1] >> 16 | w1[2] << 16; + tmp1[3] = w1[2] >> 16 | w1[3] << 16; + tmp2[0] = w1[3] >> 16; + break; + + case 3: + tmp0[0] = w0[0] << 24; + tmp0[1] = w0[0] >> 8 | w0[1] << 24; + tmp0[2] = w0[1] >> 8 | w0[2] << 24; + tmp0[3] = w0[2] >> 8 | w0[3] << 24; + tmp1[0] = w0[3] >> 8 | w1[0] << 24; + tmp1[1] = w1[0] >> 8 | w1[1] << 24; + tmp1[2] = w1[1] >> 8 | w1[2] << 24; + tmp1[3] = w1[2] >> 8 | w1[3] << 24; + tmp2[0] = w1[3] >> 8; + break; } switch (offset / 4) diff --git a/nv/amp_a1_v2.cu b/nv/amp_a1_v2.cu index 36228ef..f000471 100644 --- a/nv/amp_a1_v2.cu +++ b/nv/amp_a1_v2.cu @@ -279,7 +279,7 @@ __device__ static void switch_buffer_by_offset (u32x w0[4], u32x w1[4], u32x w2[ switch (offset % 4) { - case 0: + case 0: tmp0[0] = w0[0]; tmp0[1] = w0[1]; tmp0[2] = w0[2]; @@ -289,43 +289,43 @@ __device__ static void switch_buffer_by_offset (u32x w0[4], u32x w1[4], u32x w2[ tmp1[2] = w1[2]; tmp1[3] = w1[3]; tmp2[0] = 0; - break; - - case 1: - tmp0[0] = w0[0] << 8; - tmp0[1] = w0[0] >> 24 | w0[1] << 8; - tmp0[2] = w0[1] >> 24 | w0[2] << 8; - tmp0[3] = w0[2] >> 24 | w0[3] << 8; - tmp1[0] = w0[3] >> 24 | w1[0] << 8; - tmp1[1] = w1[0] >> 24 | w1[1] << 8; - tmp1[2] = w1[1] >> 24 | w1[2] << 8; - tmp1[3] = w1[2] >> 24 | w1[3] << 8; - tmp2[0] = w1[3] >> 24; - break; - - case 2: - tmp0[0] = w0[0] << 16; - tmp0[1] = w0[0] >> 16 | w0[1] << 16; - tmp0[2] = w0[1] >> 16 | w0[2] << 16; - tmp0[3] = w0[2] >> 16 | w0[3] << 16; - tmp1[0] = w0[3] >> 16 | w1[0] << 16; - tmp1[1] = w1[0] >> 16 | w1[1] << 16; - tmp1[2] = w1[1] >> 16 | w1[2] << 16; - tmp1[3] = w1[2] >> 16 | w1[3] << 16; - tmp2[0] = w1[3] >> 16; - break; - - case 3: - tmp0[0] = w0[0] << 24; - tmp0[1] = w0[0] >> 8 | w0[1] << 24; - tmp0[2] = w0[1] >> 8 | w0[2] << 24; - tmp0[3] = w0[2] >> 8 | w0[3] << 24; - tmp1[0] = w0[3] >> 8 | w1[0] << 24; - tmp1[1] = w1[0] >> 8 | w1[1] << 24; - tmp1[2] = w1[1] >> 8 | w1[2] << 24; - tmp1[3] = w1[2] >> 8 | w1[3] << 24; - tmp2[0] = w1[3] >> 8; - break; + break; + + case 1: + tmp0[0] = w0[0] << 8; + tmp0[1] = w0[0] >> 24 | w0[1] << 8; + tmp0[2] = w0[1] >> 24 | w0[2] << 8; + tmp0[3] = w0[2] >> 24 | w0[3] << 8; + tmp1[0] = w0[3] >> 24 | w1[0] << 8; + tmp1[1] = w1[0] >> 24 | w1[1] << 8; + tmp1[2] = w1[1] >> 24 | w1[2] << 8; + tmp1[3] = w1[2] >> 24 | w1[3] << 8; + tmp2[0] = w1[3] >> 24; + break; + + case 2: + tmp0[0] = w0[0] << 16; + tmp0[1] = w0[0] >> 16 | w0[1] << 16; + tmp0[2] = w0[1] >> 16 | w0[2] << 16; + tmp0[3] = w0[2] >> 16 | w0[3] << 16; + tmp1[0] = w0[3] >> 16 | w1[0] << 16; + tmp1[1] = w1[0] >> 16 | w1[1] << 16; + tmp1[2] = w1[1] >> 16 | w1[2] << 16; + tmp1[3] = w1[2] >> 16 | w1[3] << 16; + tmp2[0] = w1[3] >> 16; + break; + + case 3: + tmp0[0] = w0[0] << 24; + tmp0[1] = w0[0] >> 8 | w0[1] << 24; + tmp0[2] = w0[1] >> 8 | w0[2] << 24; + tmp0[3] = w0[2] >> 8 | w0[3] << 24; + tmp1[0] = w0[3] >> 8 | w1[0] << 24; + tmp1[1] = w1[0] >> 8 | w1[1] << 24; + tmp1[2] = w1[1] >> 8 | w1[2] << 24; + tmp1[3] = w1[2] >> 8 | w1[3] << 24; + tmp2[0] = w1[3] >> 8; + break; } switch (offset / 4) diff --git a/nv/amp_a1_v4.cu b/nv/amp_a1_v4.cu index c8e6e08..abd999a 100644 --- a/nv/amp_a1_v4.cu +++ b/nv/amp_a1_v4.cu @@ -279,7 +279,7 @@ __device__ static void switch_buffer_by_offset (u32x w0[4], u32x w1[4], u32x w2[ switch (offset % 4) { - case 0: + case 0: tmp0[0] = w0[0]; tmp0[1] = w0[1]; tmp0[2] = w0[2]; @@ -289,43 +289,43 @@ __device__ static void switch_buffer_by_offset (u32x w0[4], u32x w1[4], u32x w2[ tmp1[2] = w1[2]; tmp1[3] = w1[3]; tmp2[0] = 0; - break; - - case 1: - tmp0[0] = w0[0] << 8; - tmp0[1] = w0[0] >> 24 | w0[1] << 8; - tmp0[2] = w0[1] >> 24 | w0[2] << 8; - tmp0[3] = w0[2] >> 24 | w0[3] << 8; - tmp1[0] = w0[3] >> 24 | w1[0] << 8; - tmp1[1] = w1[0] >> 24 | w1[1] << 8; - tmp1[2] = w1[1] >> 24 | w1[2] << 8; - tmp1[3] = w1[2] >> 24 | w1[3] << 8; - tmp2[0] = w1[3] >> 24; - break; - - case 2: - tmp0[0] = w0[0] << 16; - tmp0[1] = w0[0] >> 16 | w0[1] << 16; - tmp0[2] = w0[1] >> 16 | w0[2] << 16; - tmp0[3] = w0[2] >> 16 | w0[3] << 16; - tmp1[0] = w0[3] >> 16 | w1[0] << 16; - tmp1[1] = w1[0] >> 16 | w1[1] << 16; - tmp1[2] = w1[1] >> 16 | w1[2] << 16; - tmp1[3] = w1[2] >> 16 | w1[3] << 16; - tmp2[0] = w1[3] >> 16; - break; - - case 3: - tmp0[0] = w0[0] << 24; - tmp0[1] = w0[0] >> 8 | w0[1] << 24; - tmp0[2] = w0[1] >> 8 | w0[2] << 24; - tmp0[3] = w0[2] >> 8 | w0[3] << 24; - tmp1[0] = w0[3] >> 8 | w1[0] << 24; - tmp1[1] = w1[0] >> 8 | w1[1] << 24; - tmp1[2] = w1[1] >> 8 | w1[2] << 24; - tmp1[3] = w1[2] >> 8 | w1[3] << 24; - tmp2[0] = w1[3] >> 8; - break; + break; + + case 1: + tmp0[0] = w0[0] << 8; + tmp0[1] = w0[0] >> 24 | w0[1] << 8; + tmp0[2] = w0[1] >> 24 | w0[2] << 8; + tmp0[3] = w0[2] >> 24 | w0[3] << 8; + tmp1[0] = w0[3] >> 24 | w1[0] << 8; + tmp1[1] = w1[0] >> 24 | w1[1] << 8; + tmp1[2] = w1[1] >> 24 | w1[2] << 8; + tmp1[3] = w1[2] >> 24 | w1[3] << 8; + tmp2[0] = w1[3] >> 24; + break; + + case 2: + tmp0[0] = w0[0] << 16; + tmp0[1] = w0[0] >> 16 | w0[1] << 16; + tmp0[2] = w0[1] >> 16 | w0[2] << 16; + tmp0[3] = w0[2] >> 16 | w0[3] << 16; + tmp1[0] = w0[3] >> 16 | w1[0] << 16; + tmp1[1] = w1[0] >> 16 | w1[1] << 16; + tmp1[2] = w1[1] >> 16 | w1[2] << 16; + tmp1[3] = w1[2] >> 16 | w1[3] << 16; + tmp2[0] = w1[3] >> 16; + break; + + case 3: + tmp0[0] = w0[0] << 24; + tmp0[1] = w0[0] >> 8 | w0[1] << 24; + tmp0[2] = w0[1] >> 8 | w0[2] << 24; + tmp0[3] = w0[2] >> 8 | w0[3] << 24; + tmp1[0] = w0[3] >> 8 | w1[0] << 24; + tmp1[1] = w1[0] >> 8 | w1[1] << 24; + tmp1[2] = w1[1] >> 8 | w1[2] << 24; + tmp1[3] = w1[2] >> 8 | w1[3] << 24; + tmp2[0] = w1[3] >> 8; + break; } switch (offset / 4) diff --git a/nv/common_nv.c b/nv/common_nv.c index 7ff8799..86fe19a 100644 --- a/nv/common_nv.c +++ b/nv/common_nv.c @@ -7004,7 +7004,7 @@ __device__ static void switch_buffer_by_offset (u32 w0[4], u32 w1[4], u32 w2[4], switch (offset % 4) { - case 0: + case 0: tmp0[0] = w0[0]; tmp0[1] = w0[1]; tmp0[2] = w0[2]; @@ -7014,43 +7014,43 @@ __device__ static void switch_buffer_by_offset (u32 w0[4], u32 w1[4], u32 w2[4], tmp1[2] = w1[2]; tmp1[3] = w1[3]; tmp2[0] = 0; - break; - - case 1: - tmp0[0] = w0[0] << 8; - tmp0[1] = w0[0] >> 24 | w0[1] << 8; - tmp0[2] = w0[1] >> 24 | w0[2] << 8; - tmp0[3] = w0[2] >> 24 | w0[3] << 8; - tmp1[0] = w0[3] >> 24 | w1[0] << 8; - tmp1[1] = w1[0] >> 24 | w1[1] << 8; - tmp1[2] = w1[1] >> 24 | w1[2] << 8; - tmp1[3] = w1[2] >> 24 | w1[3] << 8; - tmp2[0] = w1[3] >> 24; - break; - - case 2: - tmp0[0] = w0[0] << 16; - tmp0[1] = w0[0] >> 16 | w0[1] << 16; - tmp0[2] = w0[1] >> 16 | w0[2] << 16; - tmp0[3] = w0[2] >> 16 | w0[3] << 16; - tmp1[0] = w0[3] >> 16 | w1[0] << 16; - tmp1[1] = w1[0] >> 16 | w1[1] << 16; - tmp1[2] = w1[1] >> 16 | w1[2] << 16; - tmp1[3] = w1[2] >> 16 | w1[3] << 16; - tmp2[0] = w1[3] >> 16; - break; - - case 3: - tmp0[0] = w0[0] << 24; - tmp0[1] = w0[0] >> 8 | w0[1] << 24; - tmp0[2] = w0[1] >> 8 | w0[2] << 24; - tmp0[3] = w0[2] >> 8 | w0[3] << 24; - tmp1[0] = w0[3] >> 8 | w1[0] << 24; - tmp1[1] = w1[0] >> 8 | w1[1] << 24; - tmp1[2] = w1[1] >> 8 | w1[2] << 24; - tmp1[3] = w1[2] >> 8 | w1[3] << 24; - tmp2[0] = w1[3] >> 8; - break; + break; + + case 1: + tmp0[0] = w0[0] << 8; + tmp0[1] = w0[0] >> 24 | w0[1] << 8; + tmp0[2] = w0[1] >> 24 | w0[2] << 8; + tmp0[3] = w0[2] >> 24 | w0[3] << 8; + tmp1[0] = w0[3] >> 24 | w1[0] << 8; + tmp1[1] = w1[0] >> 24 | w1[1] << 8; + tmp1[2] = w1[1] >> 24 | w1[2] << 8; + tmp1[3] = w1[2] >> 24 | w1[3] << 8; + tmp2[0] = w1[3] >> 24; + break; + + case 2: + tmp0[0] = w0[0] << 16; + tmp0[1] = w0[0] >> 16 | w0[1] << 16; + tmp0[2] = w0[1] >> 16 | w0[2] << 16; + tmp0[3] = w0[2] >> 16 | w0[3] << 16; + tmp1[0] = w0[3] >> 16 | w1[0] << 16; + tmp1[1] = w1[0] >> 16 | w1[1] << 16; + tmp1[2] = w1[1] >> 16 | w1[2] << 16; + tmp1[3] = w1[2] >> 16 | w1[3] << 16; + tmp2[0] = w1[3] >> 16; + break; + + case 3: + tmp0[0] = w0[0] << 24; + tmp0[1] = w0[0] >> 8 | w0[1] << 24; + tmp0[2] = w0[1] >> 8 | w0[2] << 24; + tmp0[3] = w0[2] >> 8 | w0[3] << 24; + tmp1[0] = w0[3] >> 8 | w1[0] << 24; + tmp1[1] = w1[0] >> 8 | w1[1] << 24; + tmp1[2] = w1[1] >> 8 | w1[2] << 24; + tmp1[3] = w1[2] >> 8 | w1[3] << 24; + tmp2[0] = w1[3] >> 8; + break; } switch (offset / 4) @@ -14452,7 +14452,7 @@ __device__ static void switch_buffer_by_offset (u32x w0[4], u32x w1[4], u32x w2[ switch (offset % 4) { - case 0: + case 0: tmp0[0] = w0[0]; tmp0[1] = w0[1]; tmp0[2] = w0[2]; @@ -14462,43 +14462,43 @@ __device__ static void switch_buffer_by_offset (u32x w0[4], u32x w1[4], u32x w2[ tmp1[2] = w1[2]; tmp1[3] = w1[3]; tmp2[0] = 0; - break; - - case 1: - tmp0[0] = w0[0] << 8; - tmp0[1] = w0[0] >> 24 | w0[1] << 8; - tmp0[2] = w0[1] >> 24 | w0[2] << 8; - tmp0[3] = w0[2] >> 24 | w0[3] << 8; - tmp1[0] = w0[3] >> 24 | w1[0] << 8; - tmp1[1] = w1[0] >> 24 | w1[1] << 8; - tmp1[2] = w1[1] >> 24 | w1[2] << 8; - tmp1[3] = w1[2] >> 24 | w1[3] << 8; - tmp2[0] = w1[3] >> 24; - break; - - case 2: - tmp0[0] = w0[0] << 16; - tmp0[1] = w0[0] >> 16 | w0[1] << 16; - tmp0[2] = w0[1] >> 16 | w0[2] << 16; - tmp0[3] = w0[2] >> 16 | w0[3] << 16; - tmp1[0] = w0[3] >> 16 | w1[0] << 16; - tmp1[1] = w1[0] >> 16 | w1[1] << 16; - tmp1[2] = w1[1] >> 16 | w1[2] << 16; - tmp1[3] = w1[2] >> 16 | w1[3] << 16; - tmp2[0] = w1[3] >> 16; - break; - - case 3: - tmp0[0] = w0[0] << 24; - tmp0[1] = w0[0] >> 8 | w0[1] << 24; - tmp0[2] = w0[1] >> 8 | w0[2] << 24; - tmp0[3] = w0[2] >> 8 | w0[3] << 24; - tmp1[0] = w0[3] >> 8 | w1[0] << 24; - tmp1[1] = w1[0] >> 8 | w1[1] << 24; - tmp1[2] = w1[1] >> 8 | w1[2] << 24; - tmp1[3] = w1[2] >> 8 | w1[3] << 24; - tmp2[0] = w1[3] >> 8; - break; + break; + + case 1: + tmp0[0] = w0[0] << 8; + tmp0[1] = w0[0] >> 24 | w0[1] << 8; + tmp0[2] = w0[1] >> 24 | w0[2] << 8; + tmp0[3] = w0[2] >> 24 | w0[3] << 8; + tmp1[0] = w0[3] >> 24 | w1[0] << 8; + tmp1[1] = w1[0] >> 24 | w1[1] << 8; + tmp1[2] = w1[1] >> 24 | w1[2] << 8; + tmp1[3] = w1[2] >> 24 | w1[3] << 8; + tmp2[0] = w1[3] >> 24; + break; + + case 2: + tmp0[0] = w0[0] << 16; + tmp0[1] = w0[0] >> 16 | w0[1] << 16; + tmp0[2] = w0[1] >> 16 | w0[2] << 16; + tmp0[3] = w0[2] >> 16 | w0[3] << 16; + tmp1[0] = w0[3] >> 16 | w1[0] << 16; + tmp1[1] = w1[0] >> 16 | w1[1] << 16; + tmp1[2] = w1[1] >> 16 | w1[2] << 16; + tmp1[3] = w1[2] >> 16 | w1[3] << 16; + tmp2[0] = w1[3] >> 16; + break; + + case 3: + tmp0[0] = w0[0] << 24; + tmp0[1] = w0[0] >> 8 | w0[1] << 24; + tmp0[2] = w0[1] >> 8 | w0[2] << 24; + tmp0[3] = w0[2] >> 8 | w0[3] << 24; + tmp1[0] = w0[3] >> 8 | w1[0] << 24; + tmp1[1] = w1[0] >> 8 | w1[1] << 24; + tmp1[2] = w1[1] >> 8 | w1[2] << 24; + tmp1[3] = w1[2] >> 8 | w1[3] << 24; + tmp2[0] = w1[3] >> 8; + break; } switch (offset / 4) diff --git a/nv/gpu_aes256_nv.c b/nv/gpu_aes256_nv.c index 825f191..8011899 100644 --- a/nv/gpu_aes256_nv.c +++ b/nv/gpu_aes256_nv.c @@ -707,7 +707,7 @@ __device__ static void aes256_ExpandKey (u32 *ks, const u32 *ukey) i = 0; j = 0; - while (1) + while (1) { u32 temp = ks[j + 7]; @@ -737,7 +737,7 @@ __device__ static void aes256_ExpandKey (u32 *ks, const u32 *ukey) ks[j + 15] = ks[j + 7] ^ ks[j + 14]; j += 8; - } + } } __device__ static void aes256_InvertKey (u32 *ks) diff --git a/nv/m01500_a3.cu b/nv/m01500_a3.cu index 0804f3d..f28e09a 100644 --- a/nv/m01500_a3.cu +++ b/nv/m01500_a3.cu @@ -80,265 +80,262 @@ __device__ __shared__ u32 s_S[64]; __device__ static void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) { - LUT(xAA55AA5500550055, a1, a4, a6, 0xC1) - LUT(xA55AA55AF0F5F0F5, a3, a6, xAA55AA5500550055, 0x9E) - LUT(x5F5F5F5FA5A5A5A5, a1, a3, a6, 0xD6) - LUT(xF5A0F5A0A55AA55A, a4, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x56) - LUT(x947A947AD1E7D1E7, a2, xA55AA55AF0F5F0F5, xF5A0F5A0A55AA55A, 0x6C) - LUT(x5FFF5FFFFFFAFFFA, a6, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x7B) - LUT(xB96CB96C69936993, a2, xF5A0F5A0A55AA55A, x5FFF5FFFFFFAFFFA, 0xD6) - LUT(x3, a5, x947A947AD1E7D1E7, xB96CB96C69936993, 0x6A) - LUT(x55EE55EE55EE55EE, a1, a2, a4, 0x7A) - LUT(x084C084CB77BB77B, a2, a6, xF5A0F5A0A55AA55A, 0xC9) - LUT(x9C329C32E295E295, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x72) - LUT(xA51EA51E50E050E0, a3, a6, x55EE55EE55EE55EE, 0x29) - LUT(x4AD34AD3BE3CBE3C, a2, x947A947AD1E7D1E7, xA51EA51E50E050E0, 0x95) - LUT(x2, a5, x9C329C32E295E295, x4AD34AD3BE3CBE3C, 0xC6) - LUT(xD955D95595D195D1, a1, a2, x9C329C32E295E295, 0xD2) - LUT(x8058805811621162, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x90) - LUT(x7D0F7D0FC4B3C4B3, xA51EA51E50E050E0, xD955D95595D195D1, x8058805811621162, 0x76) - LUT(x0805080500010001, a3, xAA55AA5500550055, xD955D95595D195D1, 0x80) - LUT(x4A964A96962D962D, xB96CB96C69936993, x4AD34AD3BE3CBE3C, x0805080500010001, 0xA6) - LUT(x4, a5, x7D0F7D0FC4B3C4B3, x4A964A96962D962D, 0xA6) - LUT(x148014807B087B08, a1, xAA55AA5500550055, x947A947AD1E7D1E7, 0x21) - LUT(x94D894D86B686B68, xA55AA55AF0F5F0F5, x8058805811621162, x148014807B087B08, 0x6A) - LUT(x5555555540044004, a1, a6, x084C084CB77BB77B, 0x70) - LUT(xAFB4AFB4BF5BBF5B, x5F5F5F5FA5A5A5A5, xA51EA51E50E050E0, x5555555540044004, 0x97) - LUT(x1, a5, x94D894D86B686B68, xAFB4AFB4BF5BBF5B, 0x6C) - - *out1 ^= x1; - *out2 ^= x2; - *out3 ^= x3; - *out4 ^= x4; + LUT(xAA55AA5500550055, a1, a4, a6, 0xC1) + LUT(xA55AA55AF0F5F0F5, a3, a6, xAA55AA5500550055, 0x9E) + LUT(x5F5F5F5FA5A5A5A5, a1, a3, a6, 0xD6) + LUT(xF5A0F5A0A55AA55A, a4, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x56) + LUT(x947A947AD1E7D1E7, a2, xA55AA55AF0F5F0F5, xF5A0F5A0A55AA55A, 0x6C) + LUT(x5FFF5FFFFFFAFFFA, a6, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x7B) + LUT(xB96CB96C69936993, a2, xF5A0F5A0A55AA55A, x5FFF5FFFFFFAFFFA, 0xD6) + LUT(x3, a5, x947A947AD1E7D1E7, xB96CB96C69936993, 0x6A) + LUT(x55EE55EE55EE55EE, a1, a2, a4, 0x7A) + LUT(x084C084CB77BB77B, a2, a6, xF5A0F5A0A55AA55A, 0xC9) + LUT(x9C329C32E295E295, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x72) + LUT(xA51EA51E50E050E0, a3, a6, x55EE55EE55EE55EE, 0x29) + LUT(x4AD34AD3BE3CBE3C, a2, x947A947AD1E7D1E7, xA51EA51E50E050E0, 0x95) + LUT(x2, a5, x9C329C32E295E295, x4AD34AD3BE3CBE3C, 0xC6) + LUT(xD955D95595D195D1, a1, a2, x9C329C32E295E295, 0xD2) + LUT(x8058805811621162, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x90) + LUT(x7D0F7D0FC4B3C4B3, xA51EA51E50E050E0, xD955D95595D195D1, x8058805811621162, 0x76) + LUT(x0805080500010001, a3, xAA55AA5500550055, xD955D95595D195D1, 0x80) + LUT(x4A964A96962D962D, xB96CB96C69936993, x4AD34AD3BE3CBE3C, x0805080500010001, 0xA6) + LUT(x4, a5, x7D0F7D0FC4B3C4B3, x4A964A96962D962D, 0xA6) + LUT(x148014807B087B08, a1, xAA55AA5500550055, x947A947AD1E7D1E7, 0x21) + LUT(x94D894D86B686B68, xA55AA55AF0F5F0F5, x8058805811621162, x148014807B087B08, 0x6A) + LUT(x5555555540044004, a1, a6, x084C084CB77BB77B, 0x70) + LUT(xAFB4AFB4BF5BBF5B, x5F5F5F5FA5A5A5A5, xA51EA51E50E050E0, x5555555540044004, 0x97) + LUT(x1, a5, x94D894D86B686B68, xAFB4AFB4BF5BBF5B, 0x6C) + + *out1 ^= x1; + *out2 ^= x2; + *out3 ^= x3; + *out4 ^= x4; } __device__ static void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) { - LUT(xEEEEEEEE99999999, a1, a2, a6, 0x97) - LUT(xFFFFEEEE66666666, a5, a6, xEEEEEEEE99999999, 0x67) - LUT(x5555FFFFFFFF0000, a1, a5, a6, 0x76) - LUT(x6666DDDD5555AAAA, a2, xFFFFEEEE66666666, x5555FFFFFFFF0000, 0x69) - LUT(x6969D3D35353ACAC, a3, xFFFFEEEE66666666, x6666DDDD5555AAAA, 0x6A) - LUT(xCFCF3030CFCF3030, a2, a3, a5, 0x65) - LUT(xE4E4EEEE9999F0F0, a3, xEEEEEEEE99999999, x5555FFFFFFFF0000, 0x8D) - LUT(xE5E5BABACDCDB0B0, a1, xCFCF3030CFCF3030, xE4E4EEEE9999F0F0, 0xCA) - LUT(x3, a4, x6969D3D35353ACAC, xE5E5BABACDCDB0B0, 0xC6) - LUT(x3333CCCC00000000, a2, a5, a6, 0x14) - LUT(xCCCCDDDDFFFF0F0F, a5, xE4E4EEEE9999F0F0, x3333CCCC00000000, 0xB5) - LUT(x00000101F0F0F0F0, a3, a6, xFFFFEEEE66666666, 0x1C) - LUT(x9A9A64646A6A9595, a1, xCFCF3030CFCF3030, x00000101F0F0F0F0, 0x96) - LUT(x2, a4, xCCCCDDDDFFFF0F0F, x9A9A64646A6A9595, 0x6A) - LUT(x3333BBBB3333FFFF, a1, a2, x6666DDDD5555AAAA, 0xDE) - LUT(x1414141441410000, a1, a3, xE4E4EEEE9999F0F0, 0x90) - LUT(x7F7FF3F3F5F53939, x6969D3D35353ACAC, x9A9A64646A6A9595, x3333BBBB3333FFFF, 0x79) - LUT(x9494E3E34B4B3939, a5, x1414141441410000, x7F7FF3F3F5F53939, 0x29) - LUT(x1, a4, x3333BBBB3333FFFF, x9494E3E34B4B3939, 0xA6) - LUT(xB1B1BBBBCCCCA5A5, a1, a1, xE4E4EEEE9999F0F0, 0x4A) - LUT(xFFFFECECEEEEDDDD, a2, x3333CCCC00000000, x9A9A64646A6A9595, 0xEF) - LUT(xB1B1A9A9DCDC8787, xE5E5BABACDCDB0B0, xB1B1BBBBCCCCA5A5, xFFFFECECEEEEDDDD, 0x8D) - LUT(xFFFFCCCCEEEE4444, a2, a5, xFFFFEEEE66666666, 0x2B) - LUT(x4, a4, xB1B1A9A9DCDC8787, xFFFFCCCCEEEE4444, 0x6C) - - *out1 ^= x1; - *out2 ^= x2; - *out3 ^= x3; - *out4 ^= x4; + LUT(xEEEEEEEE99999999, a1, a2, a6, 0x97) + LUT(xFFFFEEEE66666666, a5, a6, xEEEEEEEE99999999, 0x67) + LUT(x5555FFFFFFFF0000, a1, a5, a6, 0x76) + LUT(x6666DDDD5555AAAA, a2, xFFFFEEEE66666666, x5555FFFFFFFF0000, 0x69) + LUT(x6969D3D35353ACAC, a3, xFFFFEEEE66666666, x6666DDDD5555AAAA, 0x6A) + LUT(xCFCF3030CFCF3030, a2, a3, a5, 0x65) + LUT(xE4E4EEEE9999F0F0, a3, xEEEEEEEE99999999, x5555FFFFFFFF0000, 0x8D) + LUT(xE5E5BABACDCDB0B0, a1, xCFCF3030CFCF3030, xE4E4EEEE9999F0F0, 0xCA) + LUT(x3, a4, x6969D3D35353ACAC, xE5E5BABACDCDB0B0, 0xC6) + LUT(x3333CCCC00000000, a2, a5, a6, 0x14) + LUT(xCCCCDDDDFFFF0F0F, a5, xE4E4EEEE9999F0F0, x3333CCCC00000000, 0xB5) + LUT(x00000101F0F0F0F0, a3, a6, xFFFFEEEE66666666, 0x1C) + LUT(x9A9A64646A6A9595, a1, xCFCF3030CFCF3030, x00000101F0F0F0F0, 0x96) + LUT(x2, a4, xCCCCDDDDFFFF0F0F, x9A9A64646A6A9595, 0x6A) + LUT(x3333BBBB3333FFFF, a1, a2, x6666DDDD5555AAAA, 0xDE) + LUT(x1414141441410000, a1, a3, xE4E4EEEE9999F0F0, 0x90) + LUT(x7F7FF3F3F5F53939, x6969D3D35353ACAC, x9A9A64646A6A9595, x3333BBBB3333FFFF, 0x79) + LUT(x9494E3E34B4B3939, a5, x1414141441410000, x7F7FF3F3F5F53939, 0x29) + LUT(x1, a4, x3333BBBB3333FFFF, x9494E3E34B4B3939, 0xA6) + LUT(xB1B1BBBBCCCCA5A5, a1, a1, xE4E4EEEE9999F0F0, 0x4A) + LUT(xFFFFECECEEEEDDDD, a2, x3333CCCC00000000, x9A9A64646A6A9595, 0xEF) + LUT(xB1B1A9A9DCDC8787, xE5E5BABACDCDB0B0, xB1B1BBBBCCCCA5A5, xFFFFECECEEEEDDDD, 0x8D) + LUT(xFFFFCCCCEEEE4444, a2, a5, xFFFFEEEE66666666, 0x2B) + LUT(x4, a4, xB1B1A9A9DCDC8787, xFFFFCCCCEEEE4444, 0x6C) + + *out1 ^= x1; + *out2 ^= x2; + *out3 ^= x3; + *out4 ^= x4; } __device__ static void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) { - LUT(xA50FA50FA50FA50F, a1, a3, a4, 0xC9) - LUT(xF0F00F0FF0F0F0F0, a3, a5, a6, 0x4B) - LUT(xAF0FA0AAAF0FAF0F, a1, xA50FA50FA50FA50F, xF0F00F0FF0F0F0F0, 0x4D) - LUT(x5AA5A55A5AA55AA5, a1, a4, xF0F00F0FF0F0F0F0, 0x69) - LUT(xAA005FFFAA005FFF, a3, a5, xA50FA50FA50FA50F, 0xD6) - LUT(x5AA5A55A0F5AFAA5, a6, x5AA5A55A5AA55AA5, xAA005FFFAA005FFF, 0x9C) - LUT(x1, a2, xAF0FA0AAAF0FAF0F, x5AA5A55A0F5AFAA5, 0xA6) - LUT(xAA55AA5500AA00AA, a1, a4, a6, 0x49) - LUT(xFAFAA50FFAFAA50F, a1, a5, xA50FA50FA50FA50F, 0x9B) - LUT(x50AF0F5AFA50A5A5, a1, xAA55AA5500AA00AA, xFAFAA50FFAFAA50F, 0x66) - LUT(xAFAFAFAFFAFAFAFA, a1, a3, a6, 0x6F) - LUT(xAFAFFFFFFFFAFAFF, a4, x50AF0F5AFA50A5A5, xAFAFAFAFFAFAFAFA, 0xEB) - LUT(x4, a2, x50AF0F5AFA50A5A5, xAFAFFFFFFFFAFAFF, 0x6C) - LUT(x500F500F500F500F, a1, a3, a4, 0x98) - LUT(xF0505A0505A5050F, x5AA5A55A0F5AFAA5, xAA55AA5500AA00AA, xAFAFAFAFFAFAFAFA, 0x1D) - LUT(xF0505A05AA55AAFF, a6, x500F500F500F500F, xF0505A0505A5050F, 0x9A) - LUT(xFF005F55FF005F55, a1, a4, xAA005FFFAA005FFF, 0xB2) - LUT(xA55F5AF0A55F5AF0, a5, xA50FA50FA50FA50F, x5AA5A55A5AA55AA5, 0x3D) - LUT(x5A5F05A5A55F5AF0, a6, xFF005F55FF005F55, xA55F5AF0A55F5AF0, 0xA6) - LUT(x3, a2, xF0505A05AA55AAFF, x5A5F05A5A55F5AF0, 0xA6) - LUT(x0F0F0F0FA5A5A5A5, a1, a3, a6, 0xC6) - LUT(x5FFFFF5FFFA0FFA0, x5AA5A55A5AA55AA5, xAFAFAFAFFAFAFAFA, x0F0F0F0FA5A5A5A5, 0xDB) - LUT(xF5555AF500A05FFF, a5, xFAFAA50FFAFAA50F, xF0505A0505A5050F, 0xB9) - LUT(x05A5AAF55AFA55A5, xF0505A05AA55AAFF, x0F0F0F0FA5A5A5A5, xF5555AF500A05FFF, 0x9B) - LUT(x2, a2, x5FFFFF5FFFA0FFA0, x05A5AAF55AFA55A5, 0xA6) - - *out1 ^= x1; - *out2 ^= x2; - *out3 ^= x3; - *out4 ^= x4; + LUT(xA50FA50FA50FA50F, a1, a3, a4, 0xC9) + LUT(xF0F00F0FF0F0F0F0, a3, a5, a6, 0x4B) + LUT(xAF0FA0AAAF0FAF0F, a1, xA50FA50FA50FA50F, xF0F00F0FF0F0F0F0, 0x4D) + LUT(x5AA5A55A5AA55AA5, a1, a4, xF0F00F0FF0F0F0F0, 0x69) + LUT(xAA005FFFAA005FFF, a3, a5, xA50FA50FA50FA50F, 0xD6) + LUT(x5AA5A55A0F5AFAA5, a6, x5AA5A55A5AA55AA5, xAA005FFFAA005FFF, 0x9C) + LUT(x1, a2, xAF0FA0AAAF0FAF0F, x5AA5A55A0F5AFAA5, 0xA6) + LUT(xAA55AA5500AA00AA, a1, a4, a6, 0x49) + LUT(xFAFAA50FFAFAA50F, a1, a5, xA50FA50FA50FA50F, 0x9B) + LUT(x50AF0F5AFA50A5A5, a1, xAA55AA5500AA00AA, xFAFAA50FFAFAA50F, 0x66) + LUT(xAFAFAFAFFAFAFAFA, a1, a3, a6, 0x6F) + LUT(xAFAFFFFFFFFAFAFF, a4, x50AF0F5AFA50A5A5, xAFAFAFAFFAFAFAFA, 0xEB) + LUT(x4, a2, x50AF0F5AFA50A5A5, xAFAFFFFFFFFAFAFF, 0x6C) + LUT(x500F500F500F500F, a1, a3, a4, 0x98) + LUT(xF0505A0505A5050F, x5AA5A55A0F5AFAA5, xAA55AA5500AA00AA, xAFAFAFAFFAFAFAFA, 0x1D) + LUT(xF0505A05AA55AAFF, a6, x500F500F500F500F, xF0505A0505A5050F, 0x9A) + LUT(xFF005F55FF005F55, a1, a4, xAA005FFFAA005FFF, 0xB2) + LUT(xA55F5AF0A55F5AF0, a5, xA50FA50FA50FA50F, x5AA5A55A5AA55AA5, 0x3D) + LUT(x5A5F05A5A55F5AF0, a6, xFF005F55FF005F55, xA55F5AF0A55F5AF0, 0xA6) + LUT(x3, a2, xF0505A05AA55AAFF, x5A5F05A5A55F5AF0, 0xA6) + LUT(x0F0F0F0FA5A5A5A5, a1, a3, a6, 0xC6) + LUT(x5FFFFF5FFFA0FFA0, x5AA5A55A5AA55AA5, xAFAFAFAFFAFAFAFA, x0F0F0F0FA5A5A5A5, 0xDB) + LUT(xF5555AF500A05FFF, a5, xFAFAA50FFAFAA50F, xF0505A0505A5050F, 0xB9) + LUT(x05A5AAF55AFA55A5, xF0505A05AA55AAFF, x0F0F0F0FA5A5A5A5, xF5555AF500A05FFF, 0x9B) + LUT(x2, a2, x5FFFFF5FFFA0FFA0, x05A5AAF55AFA55A5, 0xA6) + + *out1 ^= x1; + *out2 ^= x2; + *out3 ^= x3; + *out4 ^= x4; } __device__ static void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) { - LUT(x55F055F055F055F0, a1, a3, a4, 0x72) - LUT(xA500F5F0A500F5F0, a3, a5, x55F055F055F055F0, 0xAD) - LUT(xF50AF50AF50AF50A, a1, a3, a4, 0x59) - LUT(xF5FA0FFFF5FA0FFF, a3, a5, xF50AF50AF50AF50A, 0xE7) - LUT(x61C8F93C61C8F93C, a2, xA500F5F0A500F5F0, xF5FA0FFFF5FA0FFF, 0xC6) - LUT(x9999666699996666, a1, a2, a5, 0x69) - LUT(x22C022C022C022C0, a2, a4, x55F055F055F055F0, 0x18) - LUT(xB35C94A6B35C94A6, xF5FA0FFFF5FA0FFF, x9999666699996666, x22C022C022C022C0, 0x63) - LUT(x4, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x6A) - LUT(x4848484848484848, a1, a2, a3, 0x12) - LUT(x55500AAA55500AAA, a1, a5, xF5FA0FFFF5FA0FFF, 0x28) - LUT(x3C90B3D63C90B3D6, x61C8F93C61C8F93C, x4848484848484848, x55500AAA55500AAA, 0x1E) - LUT(x8484333384843333, a1, x9999666699996666, x4848484848484848, 0x14) - LUT(x4452F1AC4452F1AC, xF50AF50AF50AF50A, xF5FA0FFFF5FA0FFF, xB35C94A6B35C94A6, 0x78) - LUT(x9586CA379586CA37, x55500AAA55500AAA, x8484333384843333, x4452F1AC4452F1AC, 0xD6) - LUT(x2, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0x6A) - LUT(x1, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0xA9) - LUT(x3, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x56) - - *out1 ^= x1; - *out2 ^= x2; - *out3 ^= x3; - *out4 ^= x4; + LUT(x55F055F055F055F0, a1, a3, a4, 0x72) + LUT(xA500F5F0A500F5F0, a3, a5, x55F055F055F055F0, 0xAD) + LUT(xF50AF50AF50AF50A, a1, a3, a4, 0x59) + LUT(xF5FA0FFFF5FA0FFF, a3, a5, xF50AF50AF50AF50A, 0xE7) + LUT(x61C8F93C61C8F93C, a2, xA500F5F0A500F5F0, xF5FA0FFFF5FA0FFF, 0xC6) + LUT(x9999666699996666, a1, a2, a5, 0x69) + LUT(x22C022C022C022C0, a2, a4, x55F055F055F055F0, 0x18) + LUT(xB35C94A6B35C94A6, xF5FA0FFFF5FA0FFF, x9999666699996666, x22C022C022C022C0, 0x63) + LUT(x4, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x6A) + LUT(x4848484848484848, a1, a2, a3, 0x12) + LUT(x55500AAA55500AAA, a1, a5, xF5FA0FFFF5FA0FFF, 0x28) + LUT(x3C90B3D63C90B3D6, x61C8F93C61C8F93C, x4848484848484848, x55500AAA55500AAA, 0x1E) + LUT(x8484333384843333, a1, x9999666699996666, x4848484848484848, 0x14) + LUT(x4452F1AC4452F1AC, xF50AF50AF50AF50A, xF5FA0FFFF5FA0FFF, xB35C94A6B35C94A6, 0x78) + LUT(x9586CA379586CA37, x55500AAA55500AAA, x8484333384843333, x4452F1AC4452F1AC, 0xD6) + LUT(x2, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0x6A) + LUT(x1, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0xA9) + LUT(x3, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x56) + + *out1 ^= x1; + *out2 ^= x2; + *out3 ^= x3; + *out4 ^= x4; } __device__ static void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) { - LUT(xA0A0A0A0FFFFFFFF, a1, a3, a6, 0xAB) - LUT(xFFFF00005555FFFF, a1, a5, a6, 0xB9) - LUT(xB3B320207777FFFF, a2, xA0A0A0A0FFFFFFFF, xFFFF00005555FFFF, 0xE8) - LUT(x50505A5A5A5A5050, a1, a3, xFFFF00005555FFFF, 0x34) - LUT(xA2A2FFFF2222FFFF, a1, a5, xB3B320207777FFFF, 0xCE) - LUT(x2E2E6969A4A46363, a2, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, 0x29) - LUT(x3, a4, xB3B320207777FFFF, x2E2E6969A4A46363, 0xA6) - LUT(xA5A50A0AA5A50A0A, a1, a3, a5, 0x49) - LUT(x969639396969C6C6, a2, a6, xA5A50A0AA5A50A0A, 0x96) - LUT(x1B1B1B1B1B1B1B1B, a1, a2, a3, 0xCA) - LUT(xBFBFBFBFF6F6F9F9, a3, xA0A0A0A0FFFFFFFF, x969639396969C6C6, 0x7E) - LUT(x5B5BA4A4B8B81D1D, xFFFF00005555FFFF, x1B1B1B1B1B1B1B1B, xBFBFBFBFF6F6F9F9, 0x96) - LUT(x2, a4, x969639396969C6C6, x5B5BA4A4B8B81D1D, 0xCA) - LUT(x5555BBBBFFFF5555, a1, a2, xFFFF00005555FFFF, 0xE5) - LUT(x6D6D9C9C95956969, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, x969639396969C6C6, 0x97) - LUT(x1A1A67676A6AB4B4, xA5A50A0AA5A50A0A, x5555BBBBFFFF5555, x6D6D9C9C95956969, 0x47) - LUT(xA0A0FFFFAAAA0000, a3, xFFFF00005555FFFF, xA5A50A0AA5A50A0A, 0x3B) - LUT(x36369C9CC1C1D6D6, x969639396969C6C6, x6D6D9C9C95956969, xA0A0FFFFAAAA0000, 0xD9) - LUT(x1, a4, x1A1A67676A6AB4B4, x36369C9CC1C1D6D6, 0xCA) - LUT(x5555F0F0F5F55555, a1, a3, xFFFF00005555FFFF, 0xB1) - LUT(x79790202DCDC0808, xA2A2FFFF2222FFFF, xA5A50A0AA5A50A0A, x969639396969C6C6, 0x47) - LUT(x6C6CF2F229295D5D, xBFBFBFBFF6F6F9F9, x5555F0F0F5F55555, x79790202DCDC0808, 0x6E) - LUT(xA3A3505010101A1A, a2, xA2A2FFFF2222FFFF, x36369C9CC1C1D6D6, 0x94) - LUT(x7676C7C74F4FC7C7, a1, x2E2E6969A4A46363, xA3A3505010101A1A, 0xD9) - LUT(x4, a4, x6C6CF2F229295D5D, x7676C7C74F4FC7C7, 0xC6) - - *out1 ^= x1; - *out2 ^= x2; - *out3 ^= x3; - *out4 ^= x4; + LUT(xA0A0A0A0FFFFFFFF, a1, a3, a6, 0xAB) + LUT(xFFFF00005555FFFF, a1, a5, a6, 0xB9) + LUT(xB3B320207777FFFF, a2, xA0A0A0A0FFFFFFFF, xFFFF00005555FFFF, 0xE8) + LUT(x50505A5A5A5A5050, a1, a3, xFFFF00005555FFFF, 0x34) + LUT(xA2A2FFFF2222FFFF, a1, a5, xB3B320207777FFFF, 0xCE) + LUT(x2E2E6969A4A46363, a2, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, 0x29) + LUT(x3, a4, xB3B320207777FFFF, x2E2E6969A4A46363, 0xA6) + LUT(xA5A50A0AA5A50A0A, a1, a3, a5, 0x49) + LUT(x969639396969C6C6, a2, a6, xA5A50A0AA5A50A0A, 0x96) + LUT(x1B1B1B1B1B1B1B1B, a1, a2, a3, 0xCA) + LUT(xBFBFBFBFF6F6F9F9, a3, xA0A0A0A0FFFFFFFF, x969639396969C6C6, 0x7E) + LUT(x5B5BA4A4B8B81D1D, xFFFF00005555FFFF, x1B1B1B1B1B1B1B1B, xBFBFBFBFF6F6F9F9, 0x96) + LUT(x2, a4, x969639396969C6C6, x5B5BA4A4B8B81D1D, 0xCA) + LUT(x5555BBBBFFFF5555, a1, a2, xFFFF00005555FFFF, 0xE5) + LUT(x6D6D9C9C95956969, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, x969639396969C6C6, 0x97) + LUT(x1A1A67676A6AB4B4, xA5A50A0AA5A50A0A, x5555BBBBFFFF5555, x6D6D9C9C95956969, 0x47) + LUT(xA0A0FFFFAAAA0000, a3, xFFFF00005555FFFF, xA5A50A0AA5A50A0A, 0x3B) + LUT(x36369C9CC1C1D6D6, x969639396969C6C6, x6D6D9C9C95956969, xA0A0FFFFAAAA0000, 0xD9) + LUT(x1, a4, x1A1A67676A6AB4B4, x36369C9CC1C1D6D6, 0xCA) + LUT(x5555F0F0F5F55555, a1, a3, xFFFF00005555FFFF, 0xB1) + LUT(x79790202DCDC0808, xA2A2FFFF2222FFFF, xA5A50A0AA5A50A0A, x969639396969C6C6, 0x47) + LUT(x6C6CF2F229295D5D, xBFBFBFBFF6F6F9F9, x5555F0F0F5F55555, x79790202DCDC0808, 0x6E) + LUT(xA3A3505010101A1A, a2, xA2A2FFFF2222FFFF, x36369C9CC1C1D6D6, 0x94) + LUT(x7676C7C74F4FC7C7, a1, x2E2E6969A4A46363, xA3A3505010101A1A, 0xD9) + LUT(x4, a4, x6C6CF2F229295D5D, x7676C7C74F4FC7C7, 0xC6) + + *out1 ^= x1; + *out2 ^= x2; + *out3 ^= x3; + *out4 ^= x4; } __device__ static void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) { - LUT(x5050F5F55050F5F5, a1, a3, a5, 0xB2) - LUT(x6363C6C66363C6C6, a1, a2, x5050F5F55050F5F5, 0x66) - LUT(xAAAA5555AAAA5555, a1, a1, a5, 0xA9) - LUT(x3A3A65653A3A6565, a3, x6363C6C66363C6C6, xAAAA5555AAAA5555, 0xA9) - LUT(x5963A3C65963A3C6, a4, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xC6) - LUT(xE7E76565E7E76565, a5, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xAD) - LUT(x455D45DF455D45DF, a1, a4, xE7E76565E7E76565, 0xE4) - LUT(x4, a6, x5963A3C65963A3C6, x455D45DF455D45DF, 0x6C) - LUT(x1101220211012202, a2, xAAAA5555AAAA5555, x5963A3C65963A3C6, 0x20) - LUT(xF00F0FF0F00F0FF0, a3, a4, a5, 0x69) - LUT(x16E94A9716E94A97, xE7E76565E7E76565, x1101220211012202, xF00F0FF0F00F0FF0, 0x9E) - LUT(x2992922929929229, a1, a2, xF00F0FF0F00F0FF0, 0x49) - LUT(xAFAF9823AFAF9823, a5, x5050F5F55050F5F5, x2992922929929229, 0x93) - LUT(x3, a6, x16E94A9716E94A97, xAFAF9823AFAF9823, 0x6C) - LUT(x4801810248018102, a4, x5963A3C65963A3C6, x1101220211012202, 0xA4) - LUT(x5EE8FFFD5EE8FFFD, a5, x16E94A9716E94A97, x4801810248018102, 0x76) - LUT(xF0FF00FFF0FF00FF, a3, a4, a5, 0xCD) - LUT(x942D9A67942D9A67, x3A3A65653A3A6565, x5EE8FFFD5EE8FFFD, xF0FF00FFF0FF00FF, 0x86) - LUT(x1, a6, x5EE8FFFD5EE8FFFD, x942D9A67942D9A67, 0xA6) - LUT(x6A40D4ED6F4DD4EE, a2, x4, xAFAF9823AFAF9823, 0x2D) - LUT(x6CA89C7869A49C79, x1101220211012202, x16E94A9716E94A97, x6A40D4ED6F4DD4EE, 0x26) - LUT(xD6DE73F9D6DE73F9, a3, x6363C6C66363C6C6, x455D45DF455D45DF, 0x6B) - LUT(x925E63E1965A63E1, x3A3A65653A3A6565, x6CA89C7869A49C79, xD6DE73F9D6DE73F9, 0xA2) - LUT(x2, a6, x6CA89C7869A49C79, x925E63E1965A63E1, 0xCA) - - - *out1 ^= x1; - *out2 ^= x2; - *out3 ^= x3; - *out4 ^= x4; + LUT(x5050F5F55050F5F5, a1, a3, a5, 0xB2) + LUT(x6363C6C66363C6C6, a1, a2, x5050F5F55050F5F5, 0x66) + LUT(xAAAA5555AAAA5555, a1, a1, a5, 0xA9) + LUT(x3A3A65653A3A6565, a3, x6363C6C66363C6C6, xAAAA5555AAAA5555, 0xA9) + LUT(x5963A3C65963A3C6, a4, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xC6) + LUT(xE7E76565E7E76565, a5, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xAD) + LUT(x455D45DF455D45DF, a1, a4, xE7E76565E7E76565, 0xE4) + LUT(x4, a6, x5963A3C65963A3C6, x455D45DF455D45DF, 0x6C) + LUT(x1101220211012202, a2, xAAAA5555AAAA5555, x5963A3C65963A3C6, 0x20) + LUT(xF00F0FF0F00F0FF0, a3, a4, a5, 0x69) + LUT(x16E94A9716E94A97, xE7E76565E7E76565, x1101220211012202, xF00F0FF0F00F0FF0, 0x9E) + LUT(x2992922929929229, a1, a2, xF00F0FF0F00F0FF0, 0x49) + LUT(xAFAF9823AFAF9823, a5, x5050F5F55050F5F5, x2992922929929229, 0x93) + LUT(x3, a6, x16E94A9716E94A97, xAFAF9823AFAF9823, 0x6C) + LUT(x4801810248018102, a4, x5963A3C65963A3C6, x1101220211012202, 0xA4) + LUT(x5EE8FFFD5EE8FFFD, a5, x16E94A9716E94A97, x4801810248018102, 0x76) + LUT(xF0FF00FFF0FF00FF, a3, a4, a5, 0xCD) + LUT(x942D9A67942D9A67, x3A3A65653A3A6565, x5EE8FFFD5EE8FFFD, xF0FF00FFF0FF00FF, 0x86) + LUT(x1, a6, x5EE8FFFD5EE8FFFD, x942D9A67942D9A67, 0xA6) + LUT(x6A40D4ED6F4DD4EE, a2, x4, xAFAF9823AFAF9823, 0x2D) + LUT(x6CA89C7869A49C79, x1101220211012202, x16E94A9716E94A97, x6A40D4ED6F4DD4EE, 0x26) + LUT(xD6DE73F9D6DE73F9, a3, x6363C6C66363C6C6, x455D45DF455D45DF, 0x6B) + LUT(x925E63E1965A63E1, x3A3A65653A3A6565, x6CA89C7869A49C79, xD6DE73F9D6DE73F9, 0xA2) + LUT(x2, a6, x6CA89C7869A49C79, x925E63E1965A63E1, 0xCA) + + *out1 ^= x1; + *out2 ^= x2; + *out3 ^= x3; + *out4 ^= x4; } __device__ static void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) { - LUT(x88AA88AA88AA88AA, a1, a2, a4, 0x0B) - LUT(xAAAAFF00AAAAFF00, a1, a4, a5, 0x27) - LUT(xADAFF8A5ADAFF8A5, a3, x88AA88AA88AA88AA, xAAAAFF00AAAAFF00, 0x9E) - LUT(x0A0AF5F50A0AF5F5, a1, a3, a5, 0xA6) - LUT(x6B69C5DC6B69C5DC, a2, xADAFF8A5ADAFF8A5, x0A0AF5F50A0AF5F5, 0x6B) - LUT(x1C69B2DC1C69B2DC, a4, x88AA88AA88AA88AA, x6B69C5DC6B69C5DC, 0xA9) - LUT(x1, a6, xADAFF8A5ADAFF8A5, x1C69B2DC1C69B2DC, 0x6A) - LUT(x9C9C9C9C9C9C9C9C, a1, a2, a3, 0x63) - LUT(xE6E63BFDE6E63BFD, a2, xAAAAFF00AAAAFF00, x0A0AF5F50A0AF5F5, 0xE7) - LUT(x6385639E6385639E, a4, x9C9C9C9C9C9C9C9C, xE6E63BFDE6E63BFD, 0x93) - LUT(x5959C4CE5959C4CE, a2, x6B69C5DC6B69C5DC, xE6E63BFDE6E63BFD, 0x5D) - LUT(x5B53F53B5B53F53B, a4, x0A0AF5F50A0AF5F5, x5959C4CE5959C4CE, 0x6E) - LUT(x3, a6, x6385639E6385639E, x5B53F53B5B53F53B, 0xC6) - LUT(xFAF505FAFAF505FA, a3, a4, x0A0AF5F50A0AF5F5, 0x6D) - LUT(x6A65956A6A65956A, a3, x9C9C9C9C9C9C9C9C, xFAF505FAFAF505FA, 0xA6) - LUT(x8888CCCC8888CCCC, a1, a2, a5, 0x23) - LUT(x94E97A9494E97A94, x1C69B2DC1C69B2DC, x6A65956A6A65956A, x8888CCCC8888CCCC, 0x72) - LUT(x4, a6, x6A65956A6A65956A, x94E97A9494E97A94, 0xAC) - LUT(xA050A050A050A050, a1, a3, a4, 0x21) - LUT(xC1B87A2BC1B87A2B, xAAAAFF00AAAAFF00, x5B53F53B5B53F53B, x94E97A9494E97A94, 0xA4) - LUT(xE96016B7E96016B7, x8888CCCC8888CCCC, xA050A050A050A050, xC1B87A2BC1B87A2B, 0x96) - LUT(xE3CF1FD5E3CF1FD5, x88AA88AA88AA88AA, x6A65956A6A65956A, xE96016B7E96016B7, 0x3E) - LUT(x6776675B6776675B, xADAFF8A5ADAFF8A5, x94E97A9494E97A94, xE3CF1FD5E3CF1FD5, 0x6B) - LUT(x2, a6, xE96016B7E96016B7, x6776675B6776675B, 0xC6) - - - *out1 ^= x1; - *out2 ^= x2; - *out3 ^= x3; - *out4 ^= x4; + LUT(x88AA88AA88AA88AA, a1, a2, a4, 0x0B) + LUT(xAAAAFF00AAAAFF00, a1, a4, a5, 0x27) + LUT(xADAFF8A5ADAFF8A5, a3, x88AA88AA88AA88AA, xAAAAFF00AAAAFF00, 0x9E) + LUT(x0A0AF5F50A0AF5F5, a1, a3, a5, 0xA6) + LUT(x6B69C5DC6B69C5DC, a2, xADAFF8A5ADAFF8A5, x0A0AF5F50A0AF5F5, 0x6B) + LUT(x1C69B2DC1C69B2DC, a4, x88AA88AA88AA88AA, x6B69C5DC6B69C5DC, 0xA9) + LUT(x1, a6, xADAFF8A5ADAFF8A5, x1C69B2DC1C69B2DC, 0x6A) + LUT(x9C9C9C9C9C9C9C9C, a1, a2, a3, 0x63) + LUT(xE6E63BFDE6E63BFD, a2, xAAAAFF00AAAAFF00, x0A0AF5F50A0AF5F5, 0xE7) + LUT(x6385639E6385639E, a4, x9C9C9C9C9C9C9C9C, xE6E63BFDE6E63BFD, 0x93) + LUT(x5959C4CE5959C4CE, a2, x6B69C5DC6B69C5DC, xE6E63BFDE6E63BFD, 0x5D) + LUT(x5B53F53B5B53F53B, a4, x0A0AF5F50A0AF5F5, x5959C4CE5959C4CE, 0x6E) + LUT(x3, a6, x6385639E6385639E, x5B53F53B5B53F53B, 0xC6) + LUT(xFAF505FAFAF505FA, a3, a4, x0A0AF5F50A0AF5F5, 0x6D) + LUT(x6A65956A6A65956A, a3, x9C9C9C9C9C9C9C9C, xFAF505FAFAF505FA, 0xA6) + LUT(x8888CCCC8888CCCC, a1, a2, a5, 0x23) + LUT(x94E97A9494E97A94, x1C69B2DC1C69B2DC, x6A65956A6A65956A, x8888CCCC8888CCCC, 0x72) + LUT(x4, a6, x6A65956A6A65956A, x94E97A9494E97A94, 0xAC) + LUT(xA050A050A050A050, a1, a3, a4, 0x21) + LUT(xC1B87A2BC1B87A2B, xAAAAFF00AAAAFF00, x5B53F53B5B53F53B, x94E97A9494E97A94, 0xA4) + LUT(xE96016B7E96016B7, x8888CCCC8888CCCC, xA050A050A050A050, xC1B87A2BC1B87A2B, 0x96) + LUT(xE3CF1FD5E3CF1FD5, x88AA88AA88AA88AA, x6A65956A6A65956A, xE96016B7E96016B7, 0x3E) + LUT(x6776675B6776675B, xADAFF8A5ADAFF8A5, x94E97A9494E97A94, xE3CF1FD5E3CF1FD5, 0x6B) + LUT(x2, a6, xE96016B7E96016B7, x6776675B6776675B, 0xC6) + + *out1 ^= x1; + *out2 ^= x2; + *out3 ^= x3; + *out4 ^= x4; } __device__ static void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) { - LUT(xEEEE3333EEEE3333, a1, a2, a5, 0x9D) - LUT(xBBBBBBBBBBBBBBBB, a1, a1, a2, 0x83) - LUT(xDDDDAAAADDDDAAAA, a1, a2, a5, 0x5B) - LUT(x29295A5A29295A5A, a3, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0x85) - LUT(xC729695AC729695A, a4, xEEEE3333EEEE3333, x29295A5A29295A5A, 0xA6) - LUT(x3BF77B7B3BF77B7B, a2, a5, xC729695AC729695A, 0xF9) - LUT(x2900FF002900FF00, a4, a5, x29295A5A29295A5A, 0x0E) - LUT(x56B3803F56B3803F, xBBBBBBBBBBBBBBBB, x3BF77B7B3BF77B7B, x2900FF002900FF00, 0x61) - LUT(x4, a6, xC729695AC729695A, x56B3803F56B3803F, 0x6C) - LUT(xFBFBFBFBFBFBFBFB, a1, a2, a3, 0xDF) - LUT(x3012B7B73012B7B7, a2, a5, xC729695AC729695A, 0xD4) - LUT(x34E9B34C34E9B34C, a4, xFBFBFBFBFBFBFBFB, x3012B7B73012B7B7, 0x69) - LUT(xBFEAEBBEBFEAEBBE, a1, x29295A5A29295A5A, x34E9B34C34E9B34C, 0x6F) - LUT(xFFAEAFFEFFAEAFFE, a3, xBBBBBBBBBBBBBBBB, xBFEAEBBEBFEAEBBE, 0xB9) - LUT(x2, a6, x34E9B34C34E9B34C, xFFAEAFFEFFAEAFFE, 0xC6) - LUT(xCFDE88BBCFDE88BB, a2, xDDDDAAAADDDDAAAA, x34E9B34C34E9B34C, 0x5C) - LUT(x3055574530555745, a1, xC729695AC729695A, xCFDE88BBCFDE88BB, 0x71) - LUT(x99DDEEEE99DDEEEE, a4, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0xB9) - LUT(x693CD926693CD926, x3BF77B7B3BF77B7B, x34E9B34C34E9B34C, x99DDEEEE99DDEEEE, 0x69) - LUT(x3, a6, x3055574530555745, x693CD926693CD926, 0x6A) - LUT(x9955EE559955EE55, a1, a4, x99DDEEEE99DDEEEE, 0xE2) - LUT(x9D48FA949D48FA94, x3BF77B7B3BF77B7B, xBFEAEBBEBFEAEBBE, x9955EE559955EE55, 0x9C) - LUT(x1, a6, xC729695AC729695A, x9D48FA949D48FA94, 0x39) - - - *out1 ^= x1; - *out2 ^= x2; - *out3 ^= x3; - *out4 ^= x4; + LUT(xEEEE3333EEEE3333, a1, a2, a5, 0x9D) + LUT(xBBBBBBBBBBBBBBBB, a1, a1, a2, 0x83) + LUT(xDDDDAAAADDDDAAAA, a1, a2, a5, 0x5B) + LUT(x29295A5A29295A5A, a3, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0x85) + LUT(xC729695AC729695A, a4, xEEEE3333EEEE3333, x29295A5A29295A5A, 0xA6) + LUT(x3BF77B7B3BF77B7B, a2, a5, xC729695AC729695A, 0xF9) + LUT(x2900FF002900FF00, a4, a5, x29295A5A29295A5A, 0x0E) + LUT(x56B3803F56B3803F, xBBBBBBBBBBBBBBBB, x3BF77B7B3BF77B7B, x2900FF002900FF00, 0x61) + LUT(x4, a6, xC729695AC729695A, x56B3803F56B3803F, 0x6C) + LUT(xFBFBFBFBFBFBFBFB, a1, a2, a3, 0xDF) + LUT(x3012B7B73012B7B7, a2, a5, xC729695AC729695A, 0xD4) + LUT(x34E9B34C34E9B34C, a4, xFBFBFBFBFBFBFBFB, x3012B7B73012B7B7, 0x69) + LUT(xBFEAEBBEBFEAEBBE, a1, x29295A5A29295A5A, x34E9B34C34E9B34C, 0x6F) + LUT(xFFAEAFFEFFAEAFFE, a3, xBBBBBBBBBBBBBBBB, xBFEAEBBEBFEAEBBE, 0xB9) + LUT(x2, a6, x34E9B34C34E9B34C, xFFAEAFFEFFAEAFFE, 0xC6) + LUT(xCFDE88BBCFDE88BB, a2, xDDDDAAAADDDDAAAA, x34E9B34C34E9B34C, 0x5C) + LUT(x3055574530555745, a1, xC729695AC729695A, xCFDE88BBCFDE88BB, 0x71) + LUT(x99DDEEEE99DDEEEE, a4, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0xB9) + LUT(x693CD926693CD926, x3BF77B7B3BF77B7B, x34E9B34C34E9B34C, x99DDEEEE99DDEEEE, 0x69) + LUT(x3, a6, x3055574530555745, x693CD926693CD926, 0x6A) + LUT(x9955EE559955EE55, a1, a4, x99DDEEEE99DDEEEE, 0xE2) + LUT(x9D48FA949D48FA94, x3BF77B7B3BF77B7B, xBFEAEBBEBFEAEBBE, x9955EE559955EE55, 0x9C) + LUT(x1, a6, xC729695AC729695A, x9D48FA949D48FA94, 0x39) + + *out1 ^= x1; + *out2 ^= x2; + *out3 ^= x3; + *out4 ^= x4; } #else diff --git a/nv/m03000_a3.cu b/nv/m03000_a3.cu index 9d6d9d9..326ae26 100644 --- a/nv/m03000_a3.cu +++ b/nv/m03000_a3.cu @@ -77,265 +77,262 @@ __device__ __shared__ u32 s_S[64]; __device__ static void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) { - LUT(xAA55AA5500550055, a1, a4, a6, 0xC1) - LUT(xA55AA55AF0F5F0F5, a3, a6, xAA55AA5500550055, 0x9E) - LUT(x5F5F5F5FA5A5A5A5, a1, a3, a6, 0xD6) - LUT(xF5A0F5A0A55AA55A, a4, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x56) - LUT(x947A947AD1E7D1E7, a2, xA55AA55AF0F5F0F5, xF5A0F5A0A55AA55A, 0x6C) - LUT(x5FFF5FFFFFFAFFFA, a6, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x7B) - LUT(xB96CB96C69936993, a2, xF5A0F5A0A55AA55A, x5FFF5FFFFFFAFFFA, 0xD6) - LUT(x3, a5, x947A947AD1E7D1E7, xB96CB96C69936993, 0x6A) - LUT(x55EE55EE55EE55EE, a1, a2, a4, 0x7A) - LUT(x084C084CB77BB77B, a2, a6, xF5A0F5A0A55AA55A, 0xC9) - LUT(x9C329C32E295E295, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x72) - LUT(xA51EA51E50E050E0, a3, a6, x55EE55EE55EE55EE, 0x29) - LUT(x4AD34AD3BE3CBE3C, a2, x947A947AD1E7D1E7, xA51EA51E50E050E0, 0x95) - LUT(x2, a5, x9C329C32E295E295, x4AD34AD3BE3CBE3C, 0xC6) - LUT(xD955D95595D195D1, a1, a2, x9C329C32E295E295, 0xD2) - LUT(x8058805811621162, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x90) - LUT(x7D0F7D0FC4B3C4B3, xA51EA51E50E050E0, xD955D95595D195D1, x8058805811621162, 0x76) - LUT(x0805080500010001, a3, xAA55AA5500550055, xD955D95595D195D1, 0x80) - LUT(x4A964A96962D962D, xB96CB96C69936993, x4AD34AD3BE3CBE3C, x0805080500010001, 0xA6) - LUT(x4, a5, x7D0F7D0FC4B3C4B3, x4A964A96962D962D, 0xA6) - LUT(x148014807B087B08, a1, xAA55AA5500550055, x947A947AD1E7D1E7, 0x21) - LUT(x94D894D86B686B68, xA55AA55AF0F5F0F5, x8058805811621162, x148014807B087B08, 0x6A) - LUT(x5555555540044004, a1, a6, x084C084CB77BB77B, 0x70) - LUT(xAFB4AFB4BF5BBF5B, x5F5F5F5FA5A5A5A5, xA51EA51E50E050E0, x5555555540044004, 0x97) - LUT(x1, a5, x94D894D86B686B68, xAFB4AFB4BF5BBF5B, 0x6C) - - *out1 ^= x1; - *out2 ^= x2; - *out3 ^= x3; - *out4 ^= x4; + LUT(xAA55AA5500550055, a1, a4, a6, 0xC1) + LUT(xA55AA55AF0F5F0F5, a3, a6, xAA55AA5500550055, 0x9E) + LUT(x5F5F5F5FA5A5A5A5, a1, a3, a6, 0xD6) + LUT(xF5A0F5A0A55AA55A, a4, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x56) + LUT(x947A947AD1E7D1E7, a2, xA55AA55AF0F5F0F5, xF5A0F5A0A55AA55A, 0x6C) + LUT(x5FFF5FFFFFFAFFFA, a6, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x7B) + LUT(xB96CB96C69936993, a2, xF5A0F5A0A55AA55A, x5FFF5FFFFFFAFFFA, 0xD6) + LUT(x3, a5, x947A947AD1E7D1E7, xB96CB96C69936993, 0x6A) + LUT(x55EE55EE55EE55EE, a1, a2, a4, 0x7A) + LUT(x084C084CB77BB77B, a2, a6, xF5A0F5A0A55AA55A, 0xC9) + LUT(x9C329C32E295E295, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x72) + LUT(xA51EA51E50E050E0, a3, a6, x55EE55EE55EE55EE, 0x29) + LUT(x4AD34AD3BE3CBE3C, a2, x947A947AD1E7D1E7, xA51EA51E50E050E0, 0x95) + LUT(x2, a5, x9C329C32E295E295, x4AD34AD3BE3CBE3C, 0xC6) + LUT(xD955D95595D195D1, a1, a2, x9C329C32E295E295, 0xD2) + LUT(x8058805811621162, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x90) + LUT(x7D0F7D0FC4B3C4B3, xA51EA51E50E050E0, xD955D95595D195D1, x8058805811621162, 0x76) + LUT(x0805080500010001, a3, xAA55AA5500550055, xD955D95595D195D1, 0x80) + LUT(x4A964A96962D962D, xB96CB96C69936993, x4AD34AD3BE3CBE3C, x0805080500010001, 0xA6) + LUT(x4, a5, x7D0F7D0FC4B3C4B3, x4A964A96962D962D, 0xA6) + LUT(x148014807B087B08, a1, xAA55AA5500550055, x947A947AD1E7D1E7, 0x21) + LUT(x94D894D86B686B68, xA55AA55AF0F5F0F5, x8058805811621162, x148014807B087B08, 0x6A) + LUT(x5555555540044004, a1, a6, x084C084CB77BB77B, 0x70) + LUT(xAFB4AFB4BF5BBF5B, x5F5F5F5FA5A5A5A5, xA51EA51E50E050E0, x5555555540044004, 0x97) + LUT(x1, a5, x94D894D86B686B68, xAFB4AFB4BF5BBF5B, 0x6C) + + *out1 ^= x1; + *out2 ^= x2; + *out3 ^= x3; + *out4 ^= x4; } __device__ static void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) { - LUT(xEEEEEEEE99999999, a1, a2, a6, 0x97) - LUT(xFFFFEEEE66666666, a5, a6, xEEEEEEEE99999999, 0x67) - LUT(x5555FFFFFFFF0000, a1, a5, a6, 0x76) - LUT(x6666DDDD5555AAAA, a2, xFFFFEEEE66666666, x5555FFFFFFFF0000, 0x69) - LUT(x6969D3D35353ACAC, a3, xFFFFEEEE66666666, x6666DDDD5555AAAA, 0x6A) - LUT(xCFCF3030CFCF3030, a2, a3, a5, 0x65) - LUT(xE4E4EEEE9999F0F0, a3, xEEEEEEEE99999999, x5555FFFFFFFF0000, 0x8D) - LUT(xE5E5BABACDCDB0B0, a1, xCFCF3030CFCF3030, xE4E4EEEE9999F0F0, 0xCA) - LUT(x3, a4, x6969D3D35353ACAC, xE5E5BABACDCDB0B0, 0xC6) - LUT(x3333CCCC00000000, a2, a5, a6, 0x14) - LUT(xCCCCDDDDFFFF0F0F, a5, xE4E4EEEE9999F0F0, x3333CCCC00000000, 0xB5) - LUT(x00000101F0F0F0F0, a3, a6, xFFFFEEEE66666666, 0x1C) - LUT(x9A9A64646A6A9595, a1, xCFCF3030CFCF3030, x00000101F0F0F0F0, 0x96) - LUT(x2, a4, xCCCCDDDDFFFF0F0F, x9A9A64646A6A9595, 0x6A) - LUT(x3333BBBB3333FFFF, a1, a2, x6666DDDD5555AAAA, 0xDE) - LUT(x1414141441410000, a1, a3, xE4E4EEEE9999F0F0, 0x90) - LUT(x7F7FF3F3F5F53939, x6969D3D35353ACAC, x9A9A64646A6A9595, x3333BBBB3333FFFF, 0x79) - LUT(x9494E3E34B4B3939, a5, x1414141441410000, x7F7FF3F3F5F53939, 0x29) - LUT(x1, a4, x3333BBBB3333FFFF, x9494E3E34B4B3939, 0xA6) - LUT(xB1B1BBBBCCCCA5A5, a1, a1, xE4E4EEEE9999F0F0, 0x4A) - LUT(xFFFFECECEEEEDDDD, a2, x3333CCCC00000000, x9A9A64646A6A9595, 0xEF) - LUT(xB1B1A9A9DCDC8787, xE5E5BABACDCDB0B0, xB1B1BBBBCCCCA5A5, xFFFFECECEEEEDDDD, 0x8D) - LUT(xFFFFCCCCEEEE4444, a2, a5, xFFFFEEEE66666666, 0x2B) - LUT(x4, a4, xB1B1A9A9DCDC8787, xFFFFCCCCEEEE4444, 0x6C) - - *out1 ^= x1; - *out2 ^= x2; - *out3 ^= x3; - *out4 ^= x4; + LUT(xEEEEEEEE99999999, a1, a2, a6, 0x97) + LUT(xFFFFEEEE66666666, a5, a6, xEEEEEEEE99999999, 0x67) + LUT(x5555FFFFFFFF0000, a1, a5, a6, 0x76) + LUT(x6666DDDD5555AAAA, a2, xFFFFEEEE66666666, x5555FFFFFFFF0000, 0x69) + LUT(x6969D3D35353ACAC, a3, xFFFFEEEE66666666, x6666DDDD5555AAAA, 0x6A) + LUT(xCFCF3030CFCF3030, a2, a3, a5, 0x65) + LUT(xE4E4EEEE9999F0F0, a3, xEEEEEEEE99999999, x5555FFFFFFFF0000, 0x8D) + LUT(xE5E5BABACDCDB0B0, a1, xCFCF3030CFCF3030, xE4E4EEEE9999F0F0, 0xCA) + LUT(x3, a4, x6969D3D35353ACAC, xE5E5BABACDCDB0B0, 0xC6) + LUT(x3333CCCC00000000, a2, a5, a6, 0x14) + LUT(xCCCCDDDDFFFF0F0F, a5, xE4E4EEEE9999F0F0, x3333CCCC00000000, 0xB5) + LUT(x00000101F0F0F0F0, a3, a6, xFFFFEEEE66666666, 0x1C) + LUT(x9A9A64646A6A9595, a1, xCFCF3030CFCF3030, x00000101F0F0F0F0, 0x96) + LUT(x2, a4, xCCCCDDDDFFFF0F0F, x9A9A64646A6A9595, 0x6A) + LUT(x3333BBBB3333FFFF, a1, a2, x6666DDDD5555AAAA, 0xDE) + LUT(x1414141441410000, a1, a3, xE4E4EEEE9999F0F0, 0x90) + LUT(x7F7FF3F3F5F53939, x6969D3D35353ACAC, x9A9A64646A6A9595, x3333BBBB3333FFFF, 0x79) + LUT(x9494E3E34B4B3939, a5, x1414141441410000, x7F7FF3F3F5F53939, 0x29) + LUT(x1, a4, x3333BBBB3333FFFF, x9494E3E34B4B3939, 0xA6) + LUT(xB1B1BBBBCCCCA5A5, a1, a1, xE4E4EEEE9999F0F0, 0x4A) + LUT(xFFFFECECEEEEDDDD, a2, x3333CCCC00000000, x9A9A64646A6A9595, 0xEF) + LUT(xB1B1A9A9DCDC8787, xE5E5BABACDCDB0B0, xB1B1BBBBCCCCA5A5, xFFFFECECEEEEDDDD, 0x8D) + LUT(xFFFFCCCCEEEE4444, a2, a5, xFFFFEEEE66666666, 0x2B) + LUT(x4, a4, xB1B1A9A9DCDC8787, xFFFFCCCCEEEE4444, 0x6C) + + *out1 ^= x1; + *out2 ^= x2; + *out3 ^= x3; + *out4 ^= x4; } __device__ static void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) { - LUT(xA50FA50FA50FA50F, a1, a3, a4, 0xC9) - LUT(xF0F00F0FF0F0F0F0, a3, a5, a6, 0x4B) - LUT(xAF0FA0AAAF0FAF0F, a1, xA50FA50FA50FA50F, xF0F00F0FF0F0F0F0, 0x4D) - LUT(x5AA5A55A5AA55AA5, a1, a4, xF0F00F0FF0F0F0F0, 0x69) - LUT(xAA005FFFAA005FFF, a3, a5, xA50FA50FA50FA50F, 0xD6) - LUT(x5AA5A55A0F5AFAA5, a6, x5AA5A55A5AA55AA5, xAA005FFFAA005FFF, 0x9C) - LUT(x1, a2, xAF0FA0AAAF0FAF0F, x5AA5A55A0F5AFAA5, 0xA6) - LUT(xAA55AA5500AA00AA, a1, a4, a6, 0x49) - LUT(xFAFAA50FFAFAA50F, a1, a5, xA50FA50FA50FA50F, 0x9B) - LUT(x50AF0F5AFA50A5A5, a1, xAA55AA5500AA00AA, xFAFAA50FFAFAA50F, 0x66) - LUT(xAFAFAFAFFAFAFAFA, a1, a3, a6, 0x6F) - LUT(xAFAFFFFFFFFAFAFF, a4, x50AF0F5AFA50A5A5, xAFAFAFAFFAFAFAFA, 0xEB) - LUT(x4, a2, x50AF0F5AFA50A5A5, xAFAFFFFFFFFAFAFF, 0x6C) - LUT(x500F500F500F500F, a1, a3, a4, 0x98) - LUT(xF0505A0505A5050F, x5AA5A55A0F5AFAA5, xAA55AA5500AA00AA, xAFAFAFAFFAFAFAFA, 0x1D) - LUT(xF0505A05AA55AAFF, a6, x500F500F500F500F, xF0505A0505A5050F, 0x9A) - LUT(xFF005F55FF005F55, a1, a4, xAA005FFFAA005FFF, 0xB2) - LUT(xA55F5AF0A55F5AF0, a5, xA50FA50FA50FA50F, x5AA5A55A5AA55AA5, 0x3D) - LUT(x5A5F05A5A55F5AF0, a6, xFF005F55FF005F55, xA55F5AF0A55F5AF0, 0xA6) - LUT(x3, a2, xF0505A05AA55AAFF, x5A5F05A5A55F5AF0, 0xA6) - LUT(x0F0F0F0FA5A5A5A5, a1, a3, a6, 0xC6) - LUT(x5FFFFF5FFFA0FFA0, x5AA5A55A5AA55AA5, xAFAFAFAFFAFAFAFA, x0F0F0F0FA5A5A5A5, 0xDB) - LUT(xF5555AF500A05FFF, a5, xFAFAA50FFAFAA50F, xF0505A0505A5050F, 0xB9) - LUT(x05A5AAF55AFA55A5, xF0505A05AA55AAFF, x0F0F0F0FA5A5A5A5, xF5555AF500A05FFF, 0x9B) - LUT(x2, a2, x5FFFFF5FFFA0FFA0, x05A5AAF55AFA55A5, 0xA6) - - *out1 ^= x1; - *out2 ^= x2; - *out3 ^= x3; - *out4 ^= x4; + LUT(xA50FA50FA50FA50F, a1, a3, a4, 0xC9) + LUT(xF0F00F0FF0F0F0F0, a3, a5, a6, 0x4B) + LUT(xAF0FA0AAAF0FAF0F, a1, xA50FA50FA50FA50F, xF0F00F0FF0F0F0F0, 0x4D) + LUT(x5AA5A55A5AA55AA5, a1, a4, xF0F00F0FF0F0F0F0, 0x69) + LUT(xAA005FFFAA005FFF, a3, a5, xA50FA50FA50FA50F, 0xD6) + LUT(x5AA5A55A0F5AFAA5, a6, x5AA5A55A5AA55AA5, xAA005FFFAA005FFF, 0x9C) + LUT(x1, a2, xAF0FA0AAAF0FAF0F, x5AA5A55A0F5AFAA5, 0xA6) + LUT(xAA55AA5500AA00AA, a1, a4, a6, 0x49) + LUT(xFAFAA50FFAFAA50F, a1, a5, xA50FA50FA50FA50F, 0x9B) + LUT(x50AF0F5AFA50A5A5, a1, xAA55AA5500AA00AA, xFAFAA50FFAFAA50F, 0x66) + LUT(xAFAFAFAFFAFAFAFA, a1, a3, a6, 0x6F) + LUT(xAFAFFFFFFFFAFAFF, a4, x50AF0F5AFA50A5A5, xAFAFAFAFFAFAFAFA, 0xEB) + LUT(x4, a2, x50AF0F5AFA50A5A5, xAFAFFFFFFFFAFAFF, 0x6C) + LUT(x500F500F500F500F, a1, a3, a4, 0x98) + LUT(xF0505A0505A5050F, x5AA5A55A0F5AFAA5, xAA55AA5500AA00AA, xAFAFAFAFFAFAFAFA, 0x1D) + LUT(xF0505A05AA55AAFF, a6, x500F500F500F500F, xF0505A0505A5050F, 0x9A) + LUT(xFF005F55FF005F55, a1, a4, xAA005FFFAA005FFF, 0xB2) + LUT(xA55F5AF0A55F5AF0, a5, xA50FA50FA50FA50F, x5AA5A55A5AA55AA5, 0x3D) + LUT(x5A5F05A5A55F5AF0, a6, xFF005F55FF005F55, xA55F5AF0A55F5AF0, 0xA6) + LUT(x3, a2, xF0505A05AA55AAFF, x5A5F05A5A55F5AF0, 0xA6) + LUT(x0F0F0F0FA5A5A5A5, a1, a3, a6, 0xC6) + LUT(x5FFFFF5FFFA0FFA0, x5AA5A55A5AA55AA5, xAFAFAFAFFAFAFAFA, x0F0F0F0FA5A5A5A5, 0xDB) + LUT(xF5555AF500A05FFF, a5, xFAFAA50FFAFAA50F, xF0505A0505A5050F, 0xB9) + LUT(x05A5AAF55AFA55A5, xF0505A05AA55AAFF, x0F0F0F0FA5A5A5A5, xF5555AF500A05FFF, 0x9B) + LUT(x2, a2, x5FFFFF5FFFA0FFA0, x05A5AAF55AFA55A5, 0xA6) + + *out1 ^= x1; + *out2 ^= x2; + *out3 ^= x3; + *out4 ^= x4; } __device__ static void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) { - LUT(x55F055F055F055F0, a1, a3, a4, 0x72) - LUT(xA500F5F0A500F5F0, a3, a5, x55F055F055F055F0, 0xAD) - LUT(xF50AF50AF50AF50A, a1, a3, a4, 0x59) - LUT(xF5FA0FFFF5FA0FFF, a3, a5, xF50AF50AF50AF50A, 0xE7) - LUT(x61C8F93C61C8F93C, a2, xA500F5F0A500F5F0, xF5FA0FFFF5FA0FFF, 0xC6) - LUT(x9999666699996666, a1, a2, a5, 0x69) - LUT(x22C022C022C022C0, a2, a4, x55F055F055F055F0, 0x18) - LUT(xB35C94A6B35C94A6, xF5FA0FFFF5FA0FFF, x9999666699996666, x22C022C022C022C0, 0x63) - LUT(x4, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x6A) - LUT(x4848484848484848, a1, a2, a3, 0x12) - LUT(x55500AAA55500AAA, a1, a5, xF5FA0FFFF5FA0FFF, 0x28) - LUT(x3C90B3D63C90B3D6, x61C8F93C61C8F93C, x4848484848484848, x55500AAA55500AAA, 0x1E) - LUT(x8484333384843333, a1, x9999666699996666, x4848484848484848, 0x14) - LUT(x4452F1AC4452F1AC, xF50AF50AF50AF50A, xF5FA0FFFF5FA0FFF, xB35C94A6B35C94A6, 0x78) - LUT(x9586CA379586CA37, x55500AAA55500AAA, x8484333384843333, x4452F1AC4452F1AC, 0xD6) - LUT(x2, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0x6A) - LUT(x1, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0xA9) - LUT(x3, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x56) - - *out1 ^= x1; - *out2 ^= x2; - *out3 ^= x3; - *out4 ^= x4; + LUT(x55F055F055F055F0, a1, a3, a4, 0x72) + LUT(xA500F5F0A500F5F0, a3, a5, x55F055F055F055F0, 0xAD) + LUT(xF50AF50AF50AF50A, a1, a3, a4, 0x59) + LUT(xF5FA0FFFF5FA0FFF, a3, a5, xF50AF50AF50AF50A, 0xE7) + LUT(x61C8F93C61C8F93C, a2, xA500F5F0A500F5F0, xF5FA0FFFF5FA0FFF, 0xC6) + LUT(x9999666699996666, a1, a2, a5, 0x69) + LUT(x22C022C022C022C0, a2, a4, x55F055F055F055F0, 0x18) + LUT(xB35C94A6B35C94A6, xF5FA0FFFF5FA0FFF, x9999666699996666, x22C022C022C022C0, 0x63) + LUT(x4, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x6A) + LUT(x4848484848484848, a1, a2, a3, 0x12) + LUT(x55500AAA55500AAA, a1, a5, xF5FA0FFFF5FA0FFF, 0x28) + LUT(x3C90B3D63C90B3D6, x61C8F93C61C8F93C, x4848484848484848, x55500AAA55500AAA, 0x1E) + LUT(x8484333384843333, a1, x9999666699996666, x4848484848484848, 0x14) + LUT(x4452F1AC4452F1AC, xF50AF50AF50AF50A, xF5FA0FFFF5FA0FFF, xB35C94A6B35C94A6, 0x78) + LUT(x9586CA379586CA37, x55500AAA55500AAA, x8484333384843333, x4452F1AC4452F1AC, 0xD6) + LUT(x2, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0x6A) + LUT(x1, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0xA9) + LUT(x3, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x56) + + *out1 ^= x1; + *out2 ^= x2; + *out3 ^= x3; + *out4 ^= x4; } __device__ static void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) { - LUT(xA0A0A0A0FFFFFFFF, a1, a3, a6, 0xAB) - LUT(xFFFF00005555FFFF, a1, a5, a6, 0xB9) - LUT(xB3B320207777FFFF, a2, xA0A0A0A0FFFFFFFF, xFFFF00005555FFFF, 0xE8) - LUT(x50505A5A5A5A5050, a1, a3, xFFFF00005555FFFF, 0x34) - LUT(xA2A2FFFF2222FFFF, a1, a5, xB3B320207777FFFF, 0xCE) - LUT(x2E2E6969A4A46363, a2, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, 0x29) - LUT(x3, a4, xB3B320207777FFFF, x2E2E6969A4A46363, 0xA6) - LUT(xA5A50A0AA5A50A0A, a1, a3, a5, 0x49) - LUT(x969639396969C6C6, a2, a6, xA5A50A0AA5A50A0A, 0x96) - LUT(x1B1B1B1B1B1B1B1B, a1, a2, a3, 0xCA) - LUT(xBFBFBFBFF6F6F9F9, a3, xA0A0A0A0FFFFFFFF, x969639396969C6C6, 0x7E) - LUT(x5B5BA4A4B8B81D1D, xFFFF00005555FFFF, x1B1B1B1B1B1B1B1B, xBFBFBFBFF6F6F9F9, 0x96) - LUT(x2, a4, x969639396969C6C6, x5B5BA4A4B8B81D1D, 0xCA) - LUT(x5555BBBBFFFF5555, a1, a2, xFFFF00005555FFFF, 0xE5) - LUT(x6D6D9C9C95956969, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, x969639396969C6C6, 0x97) - LUT(x1A1A67676A6AB4B4, xA5A50A0AA5A50A0A, x5555BBBBFFFF5555, x6D6D9C9C95956969, 0x47) - LUT(xA0A0FFFFAAAA0000, a3, xFFFF00005555FFFF, xA5A50A0AA5A50A0A, 0x3B) - LUT(x36369C9CC1C1D6D6, x969639396969C6C6, x6D6D9C9C95956969, xA0A0FFFFAAAA0000, 0xD9) - LUT(x1, a4, x1A1A67676A6AB4B4, x36369C9CC1C1D6D6, 0xCA) - LUT(x5555F0F0F5F55555, a1, a3, xFFFF00005555FFFF, 0xB1) - LUT(x79790202DCDC0808, xA2A2FFFF2222FFFF, xA5A50A0AA5A50A0A, x969639396969C6C6, 0x47) - LUT(x6C6CF2F229295D5D, xBFBFBFBFF6F6F9F9, x5555F0F0F5F55555, x79790202DCDC0808, 0x6E) - LUT(xA3A3505010101A1A, a2, xA2A2FFFF2222FFFF, x36369C9CC1C1D6D6, 0x94) - LUT(x7676C7C74F4FC7C7, a1, x2E2E6969A4A46363, xA3A3505010101A1A, 0xD9) - LUT(x4, a4, x6C6CF2F229295D5D, x7676C7C74F4FC7C7, 0xC6) - - *out1 ^= x1; - *out2 ^= x2; - *out3 ^= x3; - *out4 ^= x4; + LUT(xA0A0A0A0FFFFFFFF, a1, a3, a6, 0xAB) + LUT(xFFFF00005555FFFF, a1, a5, a6, 0xB9) + LUT(xB3B320207777FFFF, a2, xA0A0A0A0FFFFFFFF, xFFFF00005555FFFF, 0xE8) + LUT(x50505A5A5A5A5050, a1, a3, xFFFF00005555FFFF, 0x34) + LUT(xA2A2FFFF2222FFFF, a1, a5, xB3B320207777FFFF, 0xCE) + LUT(x2E2E6969A4A46363, a2, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, 0x29) + LUT(x3, a4, xB3B320207777FFFF, x2E2E6969A4A46363, 0xA6) + LUT(xA5A50A0AA5A50A0A, a1, a3, a5, 0x49) + LUT(x969639396969C6C6, a2, a6, xA5A50A0AA5A50A0A, 0x96) + LUT(x1B1B1B1B1B1B1B1B, a1, a2, a3, 0xCA) + LUT(xBFBFBFBFF6F6F9F9, a3, xA0A0A0A0FFFFFFFF, x969639396969C6C6, 0x7E) + LUT(x5B5BA4A4B8B81D1D, xFFFF00005555FFFF, x1B1B1B1B1B1B1B1B, xBFBFBFBFF6F6F9F9, 0x96) + LUT(x2, a4, x969639396969C6C6, x5B5BA4A4B8B81D1D, 0xCA) + LUT(x5555BBBBFFFF5555, a1, a2, xFFFF00005555FFFF, 0xE5) + LUT(x6D6D9C9C95956969, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, x969639396969C6C6, 0x97) + LUT(x1A1A67676A6AB4B4, xA5A50A0AA5A50A0A, x5555BBBBFFFF5555, x6D6D9C9C95956969, 0x47) + LUT(xA0A0FFFFAAAA0000, a3, xFFFF00005555FFFF, xA5A50A0AA5A50A0A, 0x3B) + LUT(x36369C9CC1C1D6D6, x969639396969C6C6, x6D6D9C9C95956969, xA0A0FFFFAAAA0000, 0xD9) + LUT(x1, a4, x1A1A67676A6AB4B4, x36369C9CC1C1D6D6, 0xCA) + LUT(x5555F0F0F5F55555, a1, a3, xFFFF00005555FFFF, 0xB1) + LUT(x79790202DCDC0808, xA2A2FFFF2222FFFF, xA5A50A0AA5A50A0A, x969639396969C6C6, 0x47) + LUT(x6C6CF2F229295D5D, xBFBFBFBFF6F6F9F9, x5555F0F0F5F55555, x79790202DCDC0808, 0x6E) + LUT(xA3A3505010101A1A, a2, xA2A2FFFF2222FFFF, x36369C9CC1C1D6D6, 0x94) + LUT(x7676C7C74F4FC7C7, a1, x2E2E6969A4A46363, xA3A3505010101A1A, 0xD9) + LUT(x4, a4, x6C6CF2F229295D5D, x7676C7C74F4FC7C7, 0xC6) + + *out1 ^= x1; + *out2 ^= x2; + *out3 ^= x3; + *out4 ^= x4; } __device__ static void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) { - LUT(x5050F5F55050F5F5, a1, a3, a5, 0xB2) - LUT(x6363C6C66363C6C6, a1, a2, x5050F5F55050F5F5, 0x66) - LUT(xAAAA5555AAAA5555, a1, a1, a5, 0xA9) - LUT(x3A3A65653A3A6565, a3, x6363C6C66363C6C6, xAAAA5555AAAA5555, 0xA9) - LUT(x5963A3C65963A3C6, a4, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xC6) - LUT(xE7E76565E7E76565, a5, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xAD) - LUT(x455D45DF455D45DF, a1, a4, xE7E76565E7E76565, 0xE4) - LUT(x4, a6, x5963A3C65963A3C6, x455D45DF455D45DF, 0x6C) - LUT(x1101220211012202, a2, xAAAA5555AAAA5555, x5963A3C65963A3C6, 0x20) - LUT(xF00F0FF0F00F0FF0, a3, a4, a5, 0x69) - LUT(x16E94A9716E94A97, xE7E76565E7E76565, x1101220211012202, xF00F0FF0F00F0FF0, 0x9E) - LUT(x2992922929929229, a1, a2, xF00F0FF0F00F0FF0, 0x49) - LUT(xAFAF9823AFAF9823, a5, x5050F5F55050F5F5, x2992922929929229, 0x93) - LUT(x3, a6, x16E94A9716E94A97, xAFAF9823AFAF9823, 0x6C) - LUT(x4801810248018102, a4, x5963A3C65963A3C6, x1101220211012202, 0xA4) - LUT(x5EE8FFFD5EE8FFFD, a5, x16E94A9716E94A97, x4801810248018102, 0x76) - LUT(xF0FF00FFF0FF00FF, a3, a4, a5, 0xCD) - LUT(x942D9A67942D9A67, x3A3A65653A3A6565, x5EE8FFFD5EE8FFFD, xF0FF00FFF0FF00FF, 0x86) - LUT(x1, a6, x5EE8FFFD5EE8FFFD, x942D9A67942D9A67, 0xA6) - LUT(x6A40D4ED6F4DD4EE, a2, x4, xAFAF9823AFAF9823, 0x2D) - LUT(x6CA89C7869A49C79, x1101220211012202, x16E94A9716E94A97, x6A40D4ED6F4DD4EE, 0x26) - LUT(xD6DE73F9D6DE73F9, a3, x6363C6C66363C6C6, x455D45DF455D45DF, 0x6B) - LUT(x925E63E1965A63E1, x3A3A65653A3A6565, x6CA89C7869A49C79, xD6DE73F9D6DE73F9, 0xA2) - LUT(x2, a6, x6CA89C7869A49C79, x925E63E1965A63E1, 0xCA) - - - *out1 ^= x1; - *out2 ^= x2; - *out3 ^= x3; - *out4 ^= x4; + LUT(x5050F5F55050F5F5, a1, a3, a5, 0xB2) + LUT(x6363C6C66363C6C6, a1, a2, x5050F5F55050F5F5, 0x66) + LUT(xAAAA5555AAAA5555, a1, a1, a5, 0xA9) + LUT(x3A3A65653A3A6565, a3, x6363C6C66363C6C6, xAAAA5555AAAA5555, 0xA9) + LUT(x5963A3C65963A3C6, a4, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xC6) + LUT(xE7E76565E7E76565, a5, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xAD) + LUT(x455D45DF455D45DF, a1, a4, xE7E76565E7E76565, 0xE4) + LUT(x4, a6, x5963A3C65963A3C6, x455D45DF455D45DF, 0x6C) + LUT(x1101220211012202, a2, xAAAA5555AAAA5555, x5963A3C65963A3C6, 0x20) + LUT(xF00F0FF0F00F0FF0, a3, a4, a5, 0x69) + LUT(x16E94A9716E94A97, xE7E76565E7E76565, x1101220211012202, xF00F0FF0F00F0FF0, 0x9E) + LUT(x2992922929929229, a1, a2, xF00F0FF0F00F0FF0, 0x49) + LUT(xAFAF9823AFAF9823, a5, x5050F5F55050F5F5, x2992922929929229, 0x93) + LUT(x3, a6, x16E94A9716E94A97, xAFAF9823AFAF9823, 0x6C) + LUT(x4801810248018102, a4, x5963A3C65963A3C6, x1101220211012202, 0xA4) + LUT(x5EE8FFFD5EE8FFFD, a5, x16E94A9716E94A97, x4801810248018102, 0x76) + LUT(xF0FF00FFF0FF00FF, a3, a4, a5, 0xCD) + LUT(x942D9A67942D9A67, x3A3A65653A3A6565, x5EE8FFFD5EE8FFFD, xF0FF00FFF0FF00FF, 0x86) + LUT(x1, a6, x5EE8FFFD5EE8FFFD, x942D9A67942D9A67, 0xA6) + LUT(x6A40D4ED6F4DD4EE, a2, x4, xAFAF9823AFAF9823, 0x2D) + LUT(x6CA89C7869A49C79, x1101220211012202, x16E94A9716E94A97, x6A40D4ED6F4DD4EE, 0x26) + LUT(xD6DE73F9D6DE73F9, a3, x6363C6C66363C6C6, x455D45DF455D45DF, 0x6B) + LUT(x925E63E1965A63E1, x3A3A65653A3A6565, x6CA89C7869A49C79, xD6DE73F9D6DE73F9, 0xA2) + LUT(x2, a6, x6CA89C7869A49C79, x925E63E1965A63E1, 0xCA) + + *out1 ^= x1; + *out2 ^= x2; + *out3 ^= x3; + *out4 ^= x4; } __device__ static void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) { - LUT(x88AA88AA88AA88AA, a1, a2, a4, 0x0B) - LUT(xAAAAFF00AAAAFF00, a1, a4, a5, 0x27) - LUT(xADAFF8A5ADAFF8A5, a3, x88AA88AA88AA88AA, xAAAAFF00AAAAFF00, 0x9E) - LUT(x0A0AF5F50A0AF5F5, a1, a3, a5, 0xA6) - LUT(x6B69C5DC6B69C5DC, a2, xADAFF8A5ADAFF8A5, x0A0AF5F50A0AF5F5, 0x6B) - LUT(x1C69B2DC1C69B2DC, a4, x88AA88AA88AA88AA, x6B69C5DC6B69C5DC, 0xA9) - LUT(x1, a6, xADAFF8A5ADAFF8A5, x1C69B2DC1C69B2DC, 0x6A) - LUT(x9C9C9C9C9C9C9C9C, a1, a2, a3, 0x63) - LUT(xE6E63BFDE6E63BFD, a2, xAAAAFF00AAAAFF00, x0A0AF5F50A0AF5F5, 0xE7) - LUT(x6385639E6385639E, a4, x9C9C9C9C9C9C9C9C, xE6E63BFDE6E63BFD, 0x93) - LUT(x5959C4CE5959C4CE, a2, x6B69C5DC6B69C5DC, xE6E63BFDE6E63BFD, 0x5D) - LUT(x5B53F53B5B53F53B, a4, x0A0AF5F50A0AF5F5, x5959C4CE5959C4CE, 0x6E) - LUT(x3, a6, x6385639E6385639E, x5B53F53B5B53F53B, 0xC6) - LUT(xFAF505FAFAF505FA, a3, a4, x0A0AF5F50A0AF5F5, 0x6D) - LUT(x6A65956A6A65956A, a3, x9C9C9C9C9C9C9C9C, xFAF505FAFAF505FA, 0xA6) - LUT(x8888CCCC8888CCCC, a1, a2, a5, 0x23) - LUT(x94E97A9494E97A94, x1C69B2DC1C69B2DC, x6A65956A6A65956A, x8888CCCC8888CCCC, 0x72) - LUT(x4, a6, x6A65956A6A65956A, x94E97A9494E97A94, 0xAC) - LUT(xA050A050A050A050, a1, a3, a4, 0x21) - LUT(xC1B87A2BC1B87A2B, xAAAAFF00AAAAFF00, x5B53F53B5B53F53B, x94E97A9494E97A94, 0xA4) - LUT(xE96016B7E96016B7, x8888CCCC8888CCCC, xA050A050A050A050, xC1B87A2BC1B87A2B, 0x96) - LUT(xE3CF1FD5E3CF1FD5, x88AA88AA88AA88AA, x6A65956A6A65956A, xE96016B7E96016B7, 0x3E) - LUT(x6776675B6776675B, xADAFF8A5ADAFF8A5, x94E97A9494E97A94, xE3CF1FD5E3CF1FD5, 0x6B) - LUT(x2, a6, xE96016B7E96016B7, x6776675B6776675B, 0xC6) - - - *out1 ^= x1; - *out2 ^= x2; - *out3 ^= x3; - *out4 ^= x4; + LUT(x88AA88AA88AA88AA, a1, a2, a4, 0x0B) + LUT(xAAAAFF00AAAAFF00, a1, a4, a5, 0x27) + LUT(xADAFF8A5ADAFF8A5, a3, x88AA88AA88AA88AA, xAAAAFF00AAAAFF00, 0x9E) + LUT(x0A0AF5F50A0AF5F5, a1, a3, a5, 0xA6) + LUT(x6B69C5DC6B69C5DC, a2, xADAFF8A5ADAFF8A5, x0A0AF5F50A0AF5F5, 0x6B) + LUT(x1C69B2DC1C69B2DC, a4, x88AA88AA88AA88AA, x6B69C5DC6B69C5DC, 0xA9) + LUT(x1, a6, xADAFF8A5ADAFF8A5, x1C69B2DC1C69B2DC, 0x6A) + LUT(x9C9C9C9C9C9C9C9C, a1, a2, a3, 0x63) + LUT(xE6E63BFDE6E63BFD, a2, xAAAAFF00AAAAFF00, x0A0AF5F50A0AF5F5, 0xE7) + LUT(x6385639E6385639E, a4, x9C9C9C9C9C9C9C9C, xE6E63BFDE6E63BFD, 0x93) + LUT(x5959C4CE5959C4CE, a2, x6B69C5DC6B69C5DC, xE6E63BFDE6E63BFD, 0x5D) + LUT(x5B53F53B5B53F53B, a4, x0A0AF5F50A0AF5F5, x5959C4CE5959C4CE, 0x6E) + LUT(x3, a6, x6385639E6385639E, x5B53F53B5B53F53B, 0xC6) + LUT(xFAF505FAFAF505FA, a3, a4, x0A0AF5F50A0AF5F5, 0x6D) + LUT(x6A65956A6A65956A, a3, x9C9C9C9C9C9C9C9C, xFAF505FAFAF505FA, 0xA6) + LUT(x8888CCCC8888CCCC, a1, a2, a5, 0x23) + LUT(x94E97A9494E97A94, x1C69B2DC1C69B2DC, x6A65956A6A65956A, x8888CCCC8888CCCC, 0x72) + LUT(x4, a6, x6A65956A6A65956A, x94E97A9494E97A94, 0xAC) + LUT(xA050A050A050A050, a1, a3, a4, 0x21) + LUT(xC1B87A2BC1B87A2B, xAAAAFF00AAAAFF00, x5B53F53B5B53F53B, x94E97A9494E97A94, 0xA4) + LUT(xE96016B7E96016B7, x8888CCCC8888CCCC, xA050A050A050A050, xC1B87A2BC1B87A2B, 0x96) + LUT(xE3CF1FD5E3CF1FD5, x88AA88AA88AA88AA, x6A65956A6A65956A, xE96016B7E96016B7, 0x3E) + LUT(x6776675B6776675B, xADAFF8A5ADAFF8A5, x94E97A9494E97A94, xE3CF1FD5E3CF1FD5, 0x6B) + LUT(x2, a6, xE96016B7E96016B7, x6776675B6776675B, 0xC6) + + *out1 ^= x1; + *out2 ^= x2; + *out3 ^= x3; + *out4 ^= x4; } __device__ static void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) { - LUT(xEEEE3333EEEE3333, a1, a2, a5, 0x9D) - LUT(xBBBBBBBBBBBBBBBB, a1, a1, a2, 0x83) - LUT(xDDDDAAAADDDDAAAA, a1, a2, a5, 0x5B) - LUT(x29295A5A29295A5A, a3, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0x85) - LUT(xC729695AC729695A, a4, xEEEE3333EEEE3333, x29295A5A29295A5A, 0xA6) - LUT(x3BF77B7B3BF77B7B, a2, a5, xC729695AC729695A, 0xF9) - LUT(x2900FF002900FF00, a4, a5, x29295A5A29295A5A, 0x0E) - LUT(x56B3803F56B3803F, xBBBBBBBBBBBBBBBB, x3BF77B7B3BF77B7B, x2900FF002900FF00, 0x61) - LUT(x4, a6, xC729695AC729695A, x56B3803F56B3803F, 0x6C) - LUT(xFBFBFBFBFBFBFBFB, a1, a2, a3, 0xDF) - LUT(x3012B7B73012B7B7, a2, a5, xC729695AC729695A, 0xD4) - LUT(x34E9B34C34E9B34C, a4, xFBFBFBFBFBFBFBFB, x3012B7B73012B7B7, 0x69) - LUT(xBFEAEBBEBFEAEBBE, a1, x29295A5A29295A5A, x34E9B34C34E9B34C, 0x6F) - LUT(xFFAEAFFEFFAEAFFE, a3, xBBBBBBBBBBBBBBBB, xBFEAEBBEBFEAEBBE, 0xB9) - LUT(x2, a6, x34E9B34C34E9B34C, xFFAEAFFEFFAEAFFE, 0xC6) - LUT(xCFDE88BBCFDE88BB, a2, xDDDDAAAADDDDAAAA, x34E9B34C34E9B34C, 0x5C) - LUT(x3055574530555745, a1, xC729695AC729695A, xCFDE88BBCFDE88BB, 0x71) - LUT(x99DDEEEE99DDEEEE, a4, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0xB9) - LUT(x693CD926693CD926, x3BF77B7B3BF77B7B, x34E9B34C34E9B34C, x99DDEEEE99DDEEEE, 0x69) - LUT(x3, a6, x3055574530555745, x693CD926693CD926, 0x6A) - LUT(x9955EE559955EE55, a1, a4, x99DDEEEE99DDEEEE, 0xE2) - LUT(x9D48FA949D48FA94, x3BF77B7B3BF77B7B, xBFEAEBBEBFEAEBBE, x9955EE559955EE55, 0x9C) - LUT(x1, a6, xC729695AC729695A, x9D48FA949D48FA94, 0x39) - - - *out1 ^= x1; - *out2 ^= x2; - *out3 ^= x3; - *out4 ^= x4; + LUT(xEEEE3333EEEE3333, a1, a2, a5, 0x9D) + LUT(xBBBBBBBBBBBBBBBB, a1, a1, a2, 0x83) + LUT(xDDDDAAAADDDDAAAA, a1, a2, a5, 0x5B) + LUT(x29295A5A29295A5A, a3, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0x85) + LUT(xC729695AC729695A, a4, xEEEE3333EEEE3333, x29295A5A29295A5A, 0xA6) + LUT(x3BF77B7B3BF77B7B, a2, a5, xC729695AC729695A, 0xF9) + LUT(x2900FF002900FF00, a4, a5, x29295A5A29295A5A, 0x0E) + LUT(x56B3803F56B3803F, xBBBBBBBBBBBBBBBB, x3BF77B7B3BF77B7B, x2900FF002900FF00, 0x61) + LUT(x4, a6, xC729695AC729695A, x56B3803F56B3803F, 0x6C) + LUT(xFBFBFBFBFBFBFBFB, a1, a2, a3, 0xDF) + LUT(x3012B7B73012B7B7, a2, a5, xC729695AC729695A, 0xD4) + LUT(x34E9B34C34E9B34C, a4, xFBFBFBFBFBFBFBFB, x3012B7B73012B7B7, 0x69) + LUT(xBFEAEBBEBFEAEBBE, a1, x29295A5A29295A5A, x34E9B34C34E9B34C, 0x6F) + LUT(xFFAEAFFEFFAEAFFE, a3, xBBBBBBBBBBBBBBBB, xBFEAEBBEBFEAEBBE, 0xB9) + LUT(x2, a6, x34E9B34C34E9B34C, xFFAEAFFEFFAEAFFE, 0xC6) + LUT(xCFDE88BBCFDE88BB, a2, xDDDDAAAADDDDAAAA, x34E9B34C34E9B34C, 0x5C) + LUT(x3055574530555745, a1, xC729695AC729695A, xCFDE88BBCFDE88BB, 0x71) + LUT(x99DDEEEE99DDEEEE, a4, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0xB9) + LUT(x693CD926693CD926, x3BF77B7B3BF77B7B, x34E9B34C34E9B34C, x99DDEEEE99DDEEEE, 0x69) + LUT(x3, a6, x3055574530555745, x693CD926693CD926, 0x6A) + LUT(x9955EE559955EE55, a1, a4, x99DDEEEE99DDEEEE, 0xE2) + LUT(x9D48FA949D48FA94, x3BF77B7B3BF77B7B, xBFEAEBBEBFEAEBBE, x9955EE559955EE55, 0x9C) + LUT(x1, a6, xC729695AC729695A, x9D48FA949D48FA94, 0x39) + + *out1 ^= x1; + *out2 ^= x2; + *out3 ^= x3; + *out4 ^= x4; } #else diff --git a/nv/m03100_a3.cu b/nv/m03100_a3.cu index 1efe7a9..85af90f 100644 --- a/nv/m03100_a3.cu +++ b/nv/m03100_a3.cu @@ -376,7 +376,7 @@ __device__ static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16] IP (r, l, tt); r = rotl32 (r, 3u); - l = rotl32 (l, 3u); + l = rotl32 (l, 3u); #pragma unroll 16 for (int i = 0; i < 16; i++) diff --git a/nv/m03200.cu b/nv/m03200.cu index 3a4e02d..90f7990 100644 --- a/nv/m03200.cu +++ b/nv/m03200.cu @@ -343,9 +343,9 @@ __device__ __constant__ u32 c_pbox[18] = BF_ROUND (R, L, 14); \ BF_ROUND (L, R, 15); \ BF_ROUND (R, L, 16); \ - tmp = R; \ - R = L; \ - L = tmp ^ P[17]; \ + tmp = R; \ + R = L; \ + L = tmp ^ P[17]; \ } __device__ static void expand_key (u32x E[34], const u32x W[16], const u32 len) diff --git a/nv/m05800.cu b/nv/m05800.cu index d2a0371..ce52532 100644 --- a/nv/m05800.cu +++ b/nv/m05800.cu @@ -42,134 +42,134 @@ typedef struct __device__ __constant__ entry_t pc[1024] = { - 0x00000030, 1, 0x00000031, 1, 0x00000032, 1, 0x00000033, 1, 0x00000034, 1, 0x00000035, 1, 0x00000036, 1, 0x00000037, 1, - 0x00000038, 1, 0x00000039, 1, 0x00003031, 2, 0x00003131, 2, 0x00003231, 2, 0x00003331, 2, 0x00003431, 2, 0x00003531, 2, - 0x00003631, 2, 0x00003731, 2, 0x00003831, 2, 0x00003931, 2, 0x00003032, 2, 0x00003132, 2, 0x00003232, 2, 0x00003332, 2, - 0x00003432, 2, 0x00003532, 2, 0x00003632, 2, 0x00003732, 2, 0x00003832, 2, 0x00003932, 2, 0x00003033, 2, 0x00003133, 2, - 0x00003233, 2, 0x00003333, 2, 0x00003433, 2, 0x00003533, 2, 0x00003633, 2, 0x00003733, 2, 0x00003833, 2, 0x00003933, 2, - 0x00003034, 2, 0x00003134, 2, 0x00003234, 2, 0x00003334, 2, 0x00003434, 2, 0x00003534, 2, 0x00003634, 2, 0x00003734, 2, - 0x00003834, 2, 0x00003934, 2, 0x00003035, 2, 0x00003135, 2, 0x00003235, 2, 0x00003335, 2, 0x00003435, 2, 0x00003535, 2, - 0x00003635, 2, 0x00003735, 2, 0x00003835, 2, 0x00003935, 2, 0x00003036, 2, 0x00003136, 2, 0x00003236, 2, 0x00003336, 2, - 0x00003436, 2, 0x00003536, 2, 0x00003636, 2, 0x00003736, 2, 0x00003836, 2, 0x00003936, 2, 0x00003037, 2, 0x00003137, 2, - 0x00003237, 2, 0x00003337, 2, 0x00003437, 2, 0x00003537, 2, 0x00003637, 2, 0x00003737, 2, 0x00003837, 2, 0x00003937, 2, - 0x00003038, 2, 0x00003138, 2, 0x00003238, 2, 0x00003338, 2, 0x00003438, 2, 0x00003538, 2, 0x00003638, 2, 0x00003738, 2, - 0x00003838, 2, 0x00003938, 2, 0x00003039, 2, 0x00003139, 2, 0x00003239, 2, 0x00003339, 2, 0x00003439, 2, 0x00003539, 2, - 0x00003639, 2, 0x00003739, 2, 0x00003839, 2, 0x00003939, 2, 0x00303031, 3, 0x00313031, 3, 0x00323031, 3, 0x00333031, 3, - 0x00343031, 3, 0x00353031, 3, 0x00363031, 3, 0x00373031, 3, 0x00383031, 3, 0x00393031, 3, 0x00303131, 3, 0x00313131, 3, - 0x00323131, 3, 0x00333131, 3, 0x00343131, 3, 0x00353131, 3, 0x00363131, 3, 0x00373131, 3, 0x00383131, 3, 0x00393131, 3, - 0x00303231, 3, 0x00313231, 3, 0x00323231, 3, 0x00333231, 3, 0x00343231, 3, 0x00353231, 3, 0x00363231, 3, 0x00373231, 3, - 0x00383231, 3, 0x00393231, 3, 0x00303331, 3, 0x00313331, 3, 0x00323331, 3, 0x00333331, 3, 0x00343331, 3, 0x00353331, 3, - 0x00363331, 3, 0x00373331, 3, 0x00383331, 3, 0x00393331, 3, 0x00303431, 3, 0x00313431, 3, 0x00323431, 3, 0x00333431, 3, - 0x00343431, 3, 0x00353431, 3, 0x00363431, 3, 0x00373431, 3, 0x00383431, 3, 0x00393431, 3, 0x00303531, 3, 0x00313531, 3, - 0x00323531, 3, 0x00333531, 3, 0x00343531, 3, 0x00353531, 3, 0x00363531, 3, 0x00373531, 3, 0x00383531, 3, 0x00393531, 3, - 0x00303631, 3, 0x00313631, 3, 0x00323631, 3, 0x00333631, 3, 0x00343631, 3, 0x00353631, 3, 0x00363631, 3, 0x00373631, 3, - 0x00383631, 3, 0x00393631, 3, 0x00303731, 3, 0x00313731, 3, 0x00323731, 3, 0x00333731, 3, 0x00343731, 3, 0x00353731, 3, - 0x00363731, 3, 0x00373731, 3, 0x00383731, 3, 0x00393731, 3, 0x00303831, 3, 0x00313831, 3, 0x00323831, 3, 0x00333831, 3, - 0x00343831, 3, 0x00353831, 3, 0x00363831, 3, 0x00373831, 3, 0x00383831, 3, 0x00393831, 3, 0x00303931, 3, 0x00313931, 3, - 0x00323931, 3, 0x00333931, 3, 0x00343931, 3, 0x00353931, 3, 0x00363931, 3, 0x00373931, 3, 0x00383931, 3, 0x00393931, 3, - 0x00303032, 3, 0x00313032, 3, 0x00323032, 3, 0x00333032, 3, 0x00343032, 3, 0x00353032, 3, 0x00363032, 3, 0x00373032, 3, - 0x00383032, 3, 0x00393032, 3, 0x00303132, 3, 0x00313132, 3, 0x00323132, 3, 0x00333132, 3, 0x00343132, 3, 0x00353132, 3, - 0x00363132, 3, 0x00373132, 3, 0x00383132, 3, 0x00393132, 3, 0x00303232, 3, 0x00313232, 3, 0x00323232, 3, 0x00333232, 3, - 0x00343232, 3, 0x00353232, 3, 0x00363232, 3, 0x00373232, 3, 0x00383232, 3, 0x00393232, 3, 0x00303332, 3, 0x00313332, 3, - 0x00323332, 3, 0x00333332, 3, 0x00343332, 3, 0x00353332, 3, 0x00363332, 3, 0x00373332, 3, 0x00383332, 3, 0x00393332, 3, - 0x00303432, 3, 0x00313432, 3, 0x00323432, 3, 0x00333432, 3, 0x00343432, 3, 0x00353432, 3, 0x00363432, 3, 0x00373432, 3, - 0x00383432, 3, 0x00393432, 3, 0x00303532, 3, 0x00313532, 3, 0x00323532, 3, 0x00333532, 3, 0x00343532, 3, 0x00353532, 3, - 0x00363532, 3, 0x00373532, 3, 0x00383532, 3, 0x00393532, 3, 0x00303632, 3, 0x00313632, 3, 0x00323632, 3, 0x00333632, 3, - 0x00343632, 3, 0x00353632, 3, 0x00363632, 3, 0x00373632, 3, 0x00383632, 3, 0x00393632, 3, 0x00303732, 3, 0x00313732, 3, - 0x00323732, 3, 0x00333732, 3, 0x00343732, 3, 0x00353732, 3, 0x00363732, 3, 0x00373732, 3, 0x00383732, 3, 0x00393732, 3, - 0x00303832, 3, 0x00313832, 3, 0x00323832, 3, 0x00333832, 3, 0x00343832, 3, 0x00353832, 3, 0x00363832, 3, 0x00373832, 3, - 0x00383832, 3, 0x00393832, 3, 0x00303932, 3, 0x00313932, 3, 0x00323932, 3, 0x00333932, 3, 0x00343932, 3, 0x00353932, 3, - 0x00363932, 3, 0x00373932, 3, 0x00383932, 3, 0x00393932, 3, 0x00303033, 3, 0x00313033, 3, 0x00323033, 3, 0x00333033, 3, - 0x00343033, 3, 0x00353033, 3, 0x00363033, 3, 0x00373033, 3, 0x00383033, 3, 0x00393033, 3, 0x00303133, 3, 0x00313133, 3, - 0x00323133, 3, 0x00333133, 3, 0x00343133, 3, 0x00353133, 3, 0x00363133, 3, 0x00373133, 3, 0x00383133, 3, 0x00393133, 3, - 0x00303233, 3, 0x00313233, 3, 0x00323233, 3, 0x00333233, 3, 0x00343233, 3, 0x00353233, 3, 0x00363233, 3, 0x00373233, 3, - 0x00383233, 3, 0x00393233, 3, 0x00303333, 3, 0x00313333, 3, 0x00323333, 3, 0x00333333, 3, 0x00343333, 3, 0x00353333, 3, - 0x00363333, 3, 0x00373333, 3, 0x00383333, 3, 0x00393333, 3, 0x00303433, 3, 0x00313433, 3, 0x00323433, 3, 0x00333433, 3, - 0x00343433, 3, 0x00353433, 3, 0x00363433, 3, 0x00373433, 3, 0x00383433, 3, 0x00393433, 3, 0x00303533, 3, 0x00313533, 3, - 0x00323533, 3, 0x00333533, 3, 0x00343533, 3, 0x00353533, 3, 0x00363533, 3, 0x00373533, 3, 0x00383533, 3, 0x00393533, 3, - 0x00303633, 3, 0x00313633, 3, 0x00323633, 3, 0x00333633, 3, 0x00343633, 3, 0x00353633, 3, 0x00363633, 3, 0x00373633, 3, - 0x00383633, 3, 0x00393633, 3, 0x00303733, 3, 0x00313733, 3, 0x00323733, 3, 0x00333733, 3, 0x00343733, 3, 0x00353733, 3, - 0x00363733, 3, 0x00373733, 3, 0x00383733, 3, 0x00393733, 3, 0x00303833, 3, 0x00313833, 3, 0x00323833, 3, 0x00333833, 3, - 0x00343833, 3, 0x00353833, 3, 0x00363833, 3, 0x00373833, 3, 0x00383833, 3, 0x00393833, 3, 0x00303933, 3, 0x00313933, 3, - 0x00323933, 3, 0x00333933, 3, 0x00343933, 3, 0x00353933, 3, 0x00363933, 3, 0x00373933, 3, 0x00383933, 3, 0x00393933, 3, - 0x00303034, 3, 0x00313034, 3, 0x00323034, 3, 0x00333034, 3, 0x00343034, 3, 0x00353034, 3, 0x00363034, 3, 0x00373034, 3, - 0x00383034, 3, 0x00393034, 3, 0x00303134, 3, 0x00313134, 3, 0x00323134, 3, 0x00333134, 3, 0x00343134, 3, 0x00353134, 3, - 0x00363134, 3, 0x00373134, 3, 0x00383134, 3, 0x00393134, 3, 0x00303234, 3, 0x00313234, 3, 0x00323234, 3, 0x00333234, 3, - 0x00343234, 3, 0x00353234, 3, 0x00363234, 3, 0x00373234, 3, 0x00383234, 3, 0x00393234, 3, 0x00303334, 3, 0x00313334, 3, - 0x00323334, 3, 0x00333334, 3, 0x00343334, 3, 0x00353334, 3, 0x00363334, 3, 0x00373334, 3, 0x00383334, 3, 0x00393334, 3, - 0x00303434, 3, 0x00313434, 3, 0x00323434, 3, 0x00333434, 3, 0x00343434, 3, 0x00353434, 3, 0x00363434, 3, 0x00373434, 3, - 0x00383434, 3, 0x00393434, 3, 0x00303534, 3, 0x00313534, 3, 0x00323534, 3, 0x00333534, 3, 0x00343534, 3, 0x00353534, 3, - 0x00363534, 3, 0x00373534, 3, 0x00383534, 3, 0x00393534, 3, 0x00303634, 3, 0x00313634, 3, 0x00323634, 3, 0x00333634, 3, - 0x00343634, 3, 0x00353634, 3, 0x00363634, 3, 0x00373634, 3, 0x00383634, 3, 0x00393634, 3, 0x00303734, 3, 0x00313734, 3, - 0x00323734, 3, 0x00333734, 3, 0x00343734, 3, 0x00353734, 3, 0x00363734, 3, 0x00373734, 3, 0x00383734, 3, 0x00393734, 3, - 0x00303834, 3, 0x00313834, 3, 0x00323834, 3, 0x00333834, 3, 0x00343834, 3, 0x00353834, 3, 0x00363834, 3, 0x00373834, 3, - 0x00383834, 3, 0x00393834, 3, 0x00303934, 3, 0x00313934, 3, 0x00323934, 3, 0x00333934, 3, 0x00343934, 3, 0x00353934, 3, - 0x00363934, 3, 0x00373934, 3, 0x00383934, 3, 0x00393934, 3, 0x00303035, 3, 0x00313035, 3, 0x00323035, 3, 0x00333035, 3, - 0x00343035, 3, 0x00353035, 3, 0x00363035, 3, 0x00373035, 3, 0x00383035, 3, 0x00393035, 3, 0x00303135, 3, 0x00313135, 3, - 0x00323135, 3, 0x00333135, 3, 0x00343135, 3, 0x00353135, 3, 0x00363135, 3, 0x00373135, 3, 0x00383135, 3, 0x00393135, 3, - 0x00303235, 3, 0x00313235, 3, 0x00323235, 3, 0x00333235, 3, 0x00343235, 3, 0x00353235, 3, 0x00363235, 3, 0x00373235, 3, - 0x00383235, 3, 0x00393235, 3, 0x00303335, 3, 0x00313335, 3, 0x00323335, 3, 0x00333335, 3, 0x00343335, 3, 0x00353335, 3, - 0x00363335, 3, 0x00373335, 3, 0x00383335, 3, 0x00393335, 3, 0x00303435, 3, 0x00313435, 3, 0x00323435, 3, 0x00333435, 3, - 0x00343435, 3, 0x00353435, 3, 0x00363435, 3, 0x00373435, 3, 0x00383435, 3, 0x00393435, 3, 0x00303535, 3, 0x00313535, 3, - 0x00323535, 3, 0x00333535, 3, 0x00343535, 3, 0x00353535, 3, 0x00363535, 3, 0x00373535, 3, 0x00383535, 3, 0x00393535, 3, - 0x00303635, 3, 0x00313635, 3, 0x00323635, 3, 0x00333635, 3, 0x00343635, 3, 0x00353635, 3, 0x00363635, 3, 0x00373635, 3, - 0x00383635, 3, 0x00393635, 3, 0x00303735, 3, 0x00313735, 3, 0x00323735, 3, 0x00333735, 3, 0x00343735, 3, 0x00353735, 3, - 0x00363735, 3, 0x00373735, 3, 0x00383735, 3, 0x00393735, 3, 0x00303835, 3, 0x00313835, 3, 0x00323835, 3, 0x00333835, 3, - 0x00343835, 3, 0x00353835, 3, 0x00363835, 3, 0x00373835, 3, 0x00383835, 3, 0x00393835, 3, 0x00303935, 3, 0x00313935, 3, - 0x00323935, 3, 0x00333935, 3, 0x00343935, 3, 0x00353935, 3, 0x00363935, 3, 0x00373935, 3, 0x00383935, 3, 0x00393935, 3, - 0x00303036, 3, 0x00313036, 3, 0x00323036, 3, 0x00333036, 3, 0x00343036, 3, 0x00353036, 3, 0x00363036, 3, 0x00373036, 3, - 0x00383036, 3, 0x00393036, 3, 0x00303136, 3, 0x00313136, 3, 0x00323136, 3, 0x00333136, 3, 0x00343136, 3, 0x00353136, 3, - 0x00363136, 3, 0x00373136, 3, 0x00383136, 3, 0x00393136, 3, 0x00303236, 3, 0x00313236, 3, 0x00323236, 3, 0x00333236, 3, - 0x00343236, 3, 0x00353236, 3, 0x00363236, 3, 0x00373236, 3, 0x00383236, 3, 0x00393236, 3, 0x00303336, 3, 0x00313336, 3, - 0x00323336, 3, 0x00333336, 3, 0x00343336, 3, 0x00353336, 3, 0x00363336, 3, 0x00373336, 3, 0x00383336, 3, 0x00393336, 3, - 0x00303436, 3, 0x00313436, 3, 0x00323436, 3, 0x00333436, 3, 0x00343436, 3, 0x00353436, 3, 0x00363436, 3, 0x00373436, 3, - 0x00383436, 3, 0x00393436, 3, 0x00303536, 3, 0x00313536, 3, 0x00323536, 3, 0x00333536, 3, 0x00343536, 3, 0x00353536, 3, - 0x00363536, 3, 0x00373536, 3, 0x00383536, 3, 0x00393536, 3, 0x00303636, 3, 0x00313636, 3, 0x00323636, 3, 0x00333636, 3, - 0x00343636, 3, 0x00353636, 3, 0x00363636, 3, 0x00373636, 3, 0x00383636, 3, 0x00393636, 3, 0x00303736, 3, 0x00313736, 3, - 0x00323736, 3, 0x00333736, 3, 0x00343736, 3, 0x00353736, 3, 0x00363736, 3, 0x00373736, 3, 0x00383736, 3, 0x00393736, 3, - 0x00303836, 3, 0x00313836, 3, 0x00323836, 3, 0x00333836, 3, 0x00343836, 3, 0x00353836, 3, 0x00363836, 3, 0x00373836, 3, - 0x00383836, 3, 0x00393836, 3, 0x00303936, 3, 0x00313936, 3, 0x00323936, 3, 0x00333936, 3, 0x00343936, 3, 0x00353936, 3, - 0x00363936, 3, 0x00373936, 3, 0x00383936, 3, 0x00393936, 3, 0x00303037, 3, 0x00313037, 3, 0x00323037, 3, 0x00333037, 3, - 0x00343037, 3, 0x00353037, 3, 0x00363037, 3, 0x00373037, 3, 0x00383037, 3, 0x00393037, 3, 0x00303137, 3, 0x00313137, 3, - 0x00323137, 3, 0x00333137, 3, 0x00343137, 3, 0x00353137, 3, 0x00363137, 3, 0x00373137, 3, 0x00383137, 3, 0x00393137, 3, - 0x00303237, 3, 0x00313237, 3, 0x00323237, 3, 0x00333237, 3, 0x00343237, 3, 0x00353237, 3, 0x00363237, 3, 0x00373237, 3, - 0x00383237, 3, 0x00393237, 3, 0x00303337, 3, 0x00313337, 3, 0x00323337, 3, 0x00333337, 3, 0x00343337, 3, 0x00353337, 3, - 0x00363337, 3, 0x00373337, 3, 0x00383337, 3, 0x00393337, 3, 0x00303437, 3, 0x00313437, 3, 0x00323437, 3, 0x00333437, 3, - 0x00343437, 3, 0x00353437, 3, 0x00363437, 3, 0x00373437, 3, 0x00383437, 3, 0x00393437, 3, 0x00303537, 3, 0x00313537, 3, - 0x00323537, 3, 0x00333537, 3, 0x00343537, 3, 0x00353537, 3, 0x00363537, 3, 0x00373537, 3, 0x00383537, 3, 0x00393537, 3, - 0x00303637, 3, 0x00313637, 3, 0x00323637, 3, 0x00333637, 3, 0x00343637, 3, 0x00353637, 3, 0x00363637, 3, 0x00373637, 3, - 0x00383637, 3, 0x00393637, 3, 0x00303737, 3, 0x00313737, 3, 0x00323737, 3, 0x00333737, 3, 0x00343737, 3, 0x00353737, 3, - 0x00363737, 3, 0x00373737, 3, 0x00383737, 3, 0x00393737, 3, 0x00303837, 3, 0x00313837, 3, 0x00323837, 3, 0x00333837, 3, - 0x00343837, 3, 0x00353837, 3, 0x00363837, 3, 0x00373837, 3, 0x00383837, 3, 0x00393837, 3, 0x00303937, 3, 0x00313937, 3, - 0x00323937, 3, 0x00333937, 3, 0x00343937, 3, 0x00353937, 3, 0x00363937, 3, 0x00373937, 3, 0x00383937, 3, 0x00393937, 3, - 0x00303038, 3, 0x00313038, 3, 0x00323038, 3, 0x00333038, 3, 0x00343038, 3, 0x00353038, 3, 0x00363038, 3, 0x00373038, 3, - 0x00383038, 3, 0x00393038, 3, 0x00303138, 3, 0x00313138, 3, 0x00323138, 3, 0x00333138, 3, 0x00343138, 3, 0x00353138, 3, - 0x00363138, 3, 0x00373138, 3, 0x00383138, 3, 0x00393138, 3, 0x00303238, 3, 0x00313238, 3, 0x00323238, 3, 0x00333238, 3, - 0x00343238, 3, 0x00353238, 3, 0x00363238, 3, 0x00373238, 3, 0x00383238, 3, 0x00393238, 3, 0x00303338, 3, 0x00313338, 3, - 0x00323338, 3, 0x00333338, 3, 0x00343338, 3, 0x00353338, 3, 0x00363338, 3, 0x00373338, 3, 0x00383338, 3, 0x00393338, 3, - 0x00303438, 3, 0x00313438, 3, 0x00323438, 3, 0x00333438, 3, 0x00343438, 3, 0x00353438, 3, 0x00363438, 3, 0x00373438, 3, - 0x00383438, 3, 0x00393438, 3, 0x00303538, 3, 0x00313538, 3, 0x00323538, 3, 0x00333538, 3, 0x00343538, 3, 0x00353538, 3, - 0x00363538, 3, 0x00373538, 3, 0x00383538, 3, 0x00393538, 3, 0x00303638, 3, 0x00313638, 3, 0x00323638, 3, 0x00333638, 3, - 0x00343638, 3, 0x00353638, 3, 0x00363638, 3, 0x00373638, 3, 0x00383638, 3, 0x00393638, 3, 0x00303738, 3, 0x00313738, 3, - 0x00323738, 3, 0x00333738, 3, 0x00343738, 3, 0x00353738, 3, 0x00363738, 3, 0x00373738, 3, 0x00383738, 3, 0x00393738, 3, - 0x00303838, 3, 0x00313838, 3, 0x00323838, 3, 0x00333838, 3, 0x00343838, 3, 0x00353838, 3, 0x00363838, 3, 0x00373838, 3, - 0x00383838, 3, 0x00393838, 3, 0x00303938, 3, 0x00313938, 3, 0x00323938, 3, 0x00333938, 3, 0x00343938, 3, 0x00353938, 3, - 0x00363938, 3, 0x00373938, 3, 0x00383938, 3, 0x00393938, 3, 0x00303039, 3, 0x00313039, 3, 0x00323039, 3, 0x00333039, 3, - 0x00343039, 3, 0x00353039, 3, 0x00363039, 3, 0x00373039, 3, 0x00383039, 3, 0x00393039, 3, 0x00303139, 3, 0x00313139, 3, - 0x00323139, 3, 0x00333139, 3, 0x00343139, 3, 0x00353139, 3, 0x00363139, 3, 0x00373139, 3, 0x00383139, 3, 0x00393139, 3, - 0x00303239, 3, 0x00313239, 3, 0x00323239, 3, 0x00333239, 3, 0x00343239, 3, 0x00353239, 3, 0x00363239, 3, 0x00373239, 3, - 0x00383239, 3, 0x00393239, 3, 0x00303339, 3, 0x00313339, 3, 0x00323339, 3, 0x00333339, 3, 0x00343339, 3, 0x00353339, 3, - 0x00363339, 3, 0x00373339, 3, 0x00383339, 3, 0x00393339, 3, 0x00303439, 3, 0x00313439, 3, 0x00323439, 3, 0x00333439, 3, - 0x00343439, 3, 0x00353439, 3, 0x00363439, 3, 0x00373439, 3, 0x00383439, 3, 0x00393439, 3, 0x00303539, 3, 0x00313539, 3, - 0x00323539, 3, 0x00333539, 3, 0x00343539, 3, 0x00353539, 3, 0x00363539, 3, 0x00373539, 3, 0x00383539, 3, 0x00393539, 3, - 0x00303639, 3, 0x00313639, 3, 0x00323639, 3, 0x00333639, 3, 0x00343639, 3, 0x00353639, 3, 0x00363639, 3, 0x00373639, 3, - 0x00383639, 3, 0x00393639, 3, 0x00303739, 3, 0x00313739, 3, 0x00323739, 3, 0x00333739, 3, 0x00343739, 3, 0x00353739, 3, - 0x00363739, 3, 0x00373739, 3, 0x00383739, 3, 0x00393739, 3, 0x00303839, 3, 0x00313839, 3, 0x00323839, 3, 0x00333839, 3, - 0x00343839, 3, 0x00353839, 3, 0x00363839, 3, 0x00373839, 3, 0x00383839, 3, 0x00393839, 3, 0x00303939, 3, 0x00313939, 3, - 0x00323939, 3, 0x00333939, 3, 0x00343939, 3, 0x00353939, 3, 0x00363939, 3, 0x00373939, 3, 0x00383939, 3, 0x00393939, 3, - 0x30303031, 4, 0x31303031, 4, 0x32303031, 4, 0x33303031, 4, 0x34303031, 4, 0x35303031, 4, 0x36303031, 4, 0x37303031, 4, - 0x38303031, 4, 0x39303031, 4, 0x30313031, 4, 0x31313031, 4, 0x32313031, 4, 0x33313031, 4, 0x34313031, 4, 0x35313031, 4, - 0x36313031, 4, 0x37313031, 4, 0x38313031, 4, 0x39313031, 4, 0x30323031, 4, 0x31323031, 4, 0x32323031, 4, 0x33323031, 4, + 0x00000030, 1, 0x00000031, 1, 0x00000032, 1, 0x00000033, 1, 0x00000034, 1, 0x00000035, 1, 0x00000036, 1, 0x00000037, 1, + 0x00000038, 1, 0x00000039, 1, 0x00003031, 2, 0x00003131, 2, 0x00003231, 2, 0x00003331, 2, 0x00003431, 2, 0x00003531, 2, + 0x00003631, 2, 0x00003731, 2, 0x00003831, 2, 0x00003931, 2, 0x00003032, 2, 0x00003132, 2, 0x00003232, 2, 0x00003332, 2, + 0x00003432, 2, 0x00003532, 2, 0x00003632, 2, 0x00003732, 2, 0x00003832, 2, 0x00003932, 2, 0x00003033, 2, 0x00003133, 2, + 0x00003233, 2, 0x00003333, 2, 0x00003433, 2, 0x00003533, 2, 0x00003633, 2, 0x00003733, 2, 0x00003833, 2, 0x00003933, 2, + 0x00003034, 2, 0x00003134, 2, 0x00003234, 2, 0x00003334, 2, 0x00003434, 2, 0x00003534, 2, 0x00003634, 2, 0x00003734, 2, + 0x00003834, 2, 0x00003934, 2, 0x00003035, 2, 0x00003135, 2, 0x00003235, 2, 0x00003335, 2, 0x00003435, 2, 0x00003535, 2, + 0x00003635, 2, 0x00003735, 2, 0x00003835, 2, 0x00003935, 2, 0x00003036, 2, 0x00003136, 2, 0x00003236, 2, 0x00003336, 2, + 0x00003436, 2, 0x00003536, 2, 0x00003636, 2, 0x00003736, 2, 0x00003836, 2, 0x00003936, 2, 0x00003037, 2, 0x00003137, 2, + 0x00003237, 2, 0x00003337, 2, 0x00003437, 2, 0x00003537, 2, 0x00003637, 2, 0x00003737, 2, 0x00003837, 2, 0x00003937, 2, + 0x00003038, 2, 0x00003138, 2, 0x00003238, 2, 0x00003338, 2, 0x00003438, 2, 0x00003538, 2, 0x00003638, 2, 0x00003738, 2, + 0x00003838, 2, 0x00003938, 2, 0x00003039, 2, 0x00003139, 2, 0x00003239, 2, 0x00003339, 2, 0x00003439, 2, 0x00003539, 2, + 0x00003639, 2, 0x00003739, 2, 0x00003839, 2, 0x00003939, 2, 0x00303031, 3, 0x00313031, 3, 0x00323031, 3, 0x00333031, 3, + 0x00343031, 3, 0x00353031, 3, 0x00363031, 3, 0x00373031, 3, 0x00383031, 3, 0x00393031, 3, 0x00303131, 3, 0x00313131, 3, + 0x00323131, 3, 0x00333131, 3, 0x00343131, 3, 0x00353131, 3, 0x00363131, 3, 0x00373131, 3, 0x00383131, 3, 0x00393131, 3, + 0x00303231, 3, 0x00313231, 3, 0x00323231, 3, 0x00333231, 3, 0x00343231, 3, 0x00353231, 3, 0x00363231, 3, 0x00373231, 3, + 0x00383231, 3, 0x00393231, 3, 0x00303331, 3, 0x00313331, 3, 0x00323331, 3, 0x00333331, 3, 0x00343331, 3, 0x00353331, 3, + 0x00363331, 3, 0x00373331, 3, 0x00383331, 3, 0x00393331, 3, 0x00303431, 3, 0x00313431, 3, 0x00323431, 3, 0x00333431, 3, + 0x00343431, 3, 0x00353431, 3, 0x00363431, 3, 0x00373431, 3, 0x00383431, 3, 0x00393431, 3, 0x00303531, 3, 0x00313531, 3, + 0x00323531, 3, 0x00333531, 3, 0x00343531, 3, 0x00353531, 3, 0x00363531, 3, 0x00373531, 3, 0x00383531, 3, 0x00393531, 3, + 0x00303631, 3, 0x00313631, 3, 0x00323631, 3, 0x00333631, 3, 0x00343631, 3, 0x00353631, 3, 0x00363631, 3, 0x00373631, 3, + 0x00383631, 3, 0x00393631, 3, 0x00303731, 3, 0x00313731, 3, 0x00323731, 3, 0x00333731, 3, 0x00343731, 3, 0x00353731, 3, + 0x00363731, 3, 0x00373731, 3, 0x00383731, 3, 0x00393731, 3, 0x00303831, 3, 0x00313831, 3, 0x00323831, 3, 0x00333831, 3, + 0x00343831, 3, 0x00353831, 3, 0x00363831, 3, 0x00373831, 3, 0x00383831, 3, 0x00393831, 3, 0x00303931, 3, 0x00313931, 3, + 0x00323931, 3, 0x00333931, 3, 0x00343931, 3, 0x00353931, 3, 0x00363931, 3, 0x00373931, 3, 0x00383931, 3, 0x00393931, 3, + 0x00303032, 3, 0x00313032, 3, 0x00323032, 3, 0x00333032, 3, 0x00343032, 3, 0x00353032, 3, 0x00363032, 3, 0x00373032, 3, + 0x00383032, 3, 0x00393032, 3, 0x00303132, 3, 0x00313132, 3, 0x00323132, 3, 0x00333132, 3, 0x00343132, 3, 0x00353132, 3, + 0x00363132, 3, 0x00373132, 3, 0x00383132, 3, 0x00393132, 3, 0x00303232, 3, 0x00313232, 3, 0x00323232, 3, 0x00333232, 3, + 0x00343232, 3, 0x00353232, 3, 0x00363232, 3, 0x00373232, 3, 0x00383232, 3, 0x00393232, 3, 0x00303332, 3, 0x00313332, 3, + 0x00323332, 3, 0x00333332, 3, 0x00343332, 3, 0x00353332, 3, 0x00363332, 3, 0x00373332, 3, 0x00383332, 3, 0x00393332, 3, + 0x00303432, 3, 0x00313432, 3, 0x00323432, 3, 0x00333432, 3, 0x00343432, 3, 0x00353432, 3, 0x00363432, 3, 0x00373432, 3, + 0x00383432, 3, 0x00393432, 3, 0x00303532, 3, 0x00313532, 3, 0x00323532, 3, 0x00333532, 3, 0x00343532, 3, 0x00353532, 3, + 0x00363532, 3, 0x00373532, 3, 0x00383532, 3, 0x00393532, 3, 0x00303632, 3, 0x00313632, 3, 0x00323632, 3, 0x00333632, 3, + 0x00343632, 3, 0x00353632, 3, 0x00363632, 3, 0x00373632, 3, 0x00383632, 3, 0x00393632, 3, 0x00303732, 3, 0x00313732, 3, + 0x00323732, 3, 0x00333732, 3, 0x00343732, 3, 0x00353732, 3, 0x00363732, 3, 0x00373732, 3, 0x00383732, 3, 0x00393732, 3, + 0x00303832, 3, 0x00313832, 3, 0x00323832, 3, 0x00333832, 3, 0x00343832, 3, 0x00353832, 3, 0x00363832, 3, 0x00373832, 3, + 0x00383832, 3, 0x00393832, 3, 0x00303932, 3, 0x00313932, 3, 0x00323932, 3, 0x00333932, 3, 0x00343932, 3, 0x00353932, 3, + 0x00363932, 3, 0x00373932, 3, 0x00383932, 3, 0x00393932, 3, 0x00303033, 3, 0x00313033, 3, 0x00323033, 3, 0x00333033, 3, + 0x00343033, 3, 0x00353033, 3, 0x00363033, 3, 0x00373033, 3, 0x00383033, 3, 0x00393033, 3, 0x00303133, 3, 0x00313133, 3, + 0x00323133, 3, 0x00333133, 3, 0x00343133, 3, 0x00353133, 3, 0x00363133, 3, 0x00373133, 3, 0x00383133, 3, 0x00393133, 3, + 0x00303233, 3, 0x00313233, 3, 0x00323233, 3, 0x00333233, 3, 0x00343233, 3, 0x00353233, 3, 0x00363233, 3, 0x00373233, 3, + 0x00383233, 3, 0x00393233, 3, 0x00303333, 3, 0x00313333, 3, 0x00323333, 3, 0x00333333, 3, 0x00343333, 3, 0x00353333, 3, + 0x00363333, 3, 0x00373333, 3, 0x00383333, 3, 0x00393333, 3, 0x00303433, 3, 0x00313433, 3, 0x00323433, 3, 0x00333433, 3, + 0x00343433, 3, 0x00353433, 3, 0x00363433, 3, 0x00373433, 3, 0x00383433, 3, 0x00393433, 3, 0x00303533, 3, 0x00313533, 3, + 0x00323533, 3, 0x00333533, 3, 0x00343533, 3, 0x00353533, 3, 0x00363533, 3, 0x00373533, 3, 0x00383533, 3, 0x00393533, 3, + 0x00303633, 3, 0x00313633, 3, 0x00323633, 3, 0x00333633, 3, 0x00343633, 3, 0x00353633, 3, 0x00363633, 3, 0x00373633, 3, + 0x00383633, 3, 0x00393633, 3, 0x00303733, 3, 0x00313733, 3, 0x00323733, 3, 0x00333733, 3, 0x00343733, 3, 0x00353733, 3, + 0x00363733, 3, 0x00373733, 3, 0x00383733, 3, 0x00393733, 3, 0x00303833, 3, 0x00313833, 3, 0x00323833, 3, 0x00333833, 3, + 0x00343833, 3, 0x00353833, 3, 0x00363833, 3, 0x00373833, 3, 0x00383833, 3, 0x00393833, 3, 0x00303933, 3, 0x00313933, 3, + 0x00323933, 3, 0x00333933, 3, 0x00343933, 3, 0x00353933, 3, 0x00363933, 3, 0x00373933, 3, 0x00383933, 3, 0x00393933, 3, + 0x00303034, 3, 0x00313034, 3, 0x00323034, 3, 0x00333034, 3, 0x00343034, 3, 0x00353034, 3, 0x00363034, 3, 0x00373034, 3, + 0x00383034, 3, 0x00393034, 3, 0x00303134, 3, 0x00313134, 3, 0x00323134, 3, 0x00333134, 3, 0x00343134, 3, 0x00353134, 3, + 0x00363134, 3, 0x00373134, 3, 0x00383134, 3, 0x00393134, 3, 0x00303234, 3, 0x00313234, 3, 0x00323234, 3, 0x00333234, 3, + 0x00343234, 3, 0x00353234, 3, 0x00363234, 3, 0x00373234, 3, 0x00383234, 3, 0x00393234, 3, 0x00303334, 3, 0x00313334, 3, + 0x00323334, 3, 0x00333334, 3, 0x00343334, 3, 0x00353334, 3, 0x00363334, 3, 0x00373334, 3, 0x00383334, 3, 0x00393334, 3, + 0x00303434, 3, 0x00313434, 3, 0x00323434, 3, 0x00333434, 3, 0x00343434, 3, 0x00353434, 3, 0x00363434, 3, 0x00373434, 3, + 0x00383434, 3, 0x00393434, 3, 0x00303534, 3, 0x00313534, 3, 0x00323534, 3, 0x00333534, 3, 0x00343534, 3, 0x00353534, 3, + 0x00363534, 3, 0x00373534, 3, 0x00383534, 3, 0x00393534, 3, 0x00303634, 3, 0x00313634, 3, 0x00323634, 3, 0x00333634, 3, + 0x00343634, 3, 0x00353634, 3, 0x00363634, 3, 0x00373634, 3, 0x00383634, 3, 0x00393634, 3, 0x00303734, 3, 0x00313734, 3, + 0x00323734, 3, 0x00333734, 3, 0x00343734, 3, 0x00353734, 3, 0x00363734, 3, 0x00373734, 3, 0x00383734, 3, 0x00393734, 3, + 0x00303834, 3, 0x00313834, 3, 0x00323834, 3, 0x00333834, 3, 0x00343834, 3, 0x00353834, 3, 0x00363834, 3, 0x00373834, 3, + 0x00383834, 3, 0x00393834, 3, 0x00303934, 3, 0x00313934, 3, 0x00323934, 3, 0x00333934, 3, 0x00343934, 3, 0x00353934, 3, + 0x00363934, 3, 0x00373934, 3, 0x00383934, 3, 0x00393934, 3, 0x00303035, 3, 0x00313035, 3, 0x00323035, 3, 0x00333035, 3, + 0x00343035, 3, 0x00353035, 3, 0x00363035, 3, 0x00373035, 3, 0x00383035, 3, 0x00393035, 3, 0x00303135, 3, 0x00313135, 3, + 0x00323135, 3, 0x00333135, 3, 0x00343135, 3, 0x00353135, 3, 0x00363135, 3, 0x00373135, 3, 0x00383135, 3, 0x00393135, 3, + 0x00303235, 3, 0x00313235, 3, 0x00323235, 3, 0x00333235, 3, 0x00343235, 3, 0x00353235, 3, 0x00363235, 3, 0x00373235, 3, + 0x00383235, 3, 0x00393235, 3, 0x00303335, 3, 0x00313335, 3, 0x00323335, 3, 0x00333335, 3, 0x00343335, 3, 0x00353335, 3, + 0x00363335, 3, 0x00373335, 3, 0x00383335, 3, 0x00393335, 3, 0x00303435, 3, 0x00313435, 3, 0x00323435, 3, 0x00333435, 3, + 0x00343435, 3, 0x00353435, 3, 0x00363435, 3, 0x00373435, 3, 0x00383435, 3, 0x00393435, 3, 0x00303535, 3, 0x00313535, 3, + 0x00323535, 3, 0x00333535, 3, 0x00343535, 3, 0x00353535, 3, 0x00363535, 3, 0x00373535, 3, 0x00383535, 3, 0x00393535, 3, + 0x00303635, 3, 0x00313635, 3, 0x00323635, 3, 0x00333635, 3, 0x00343635, 3, 0x00353635, 3, 0x00363635, 3, 0x00373635, 3, + 0x00383635, 3, 0x00393635, 3, 0x00303735, 3, 0x00313735, 3, 0x00323735, 3, 0x00333735, 3, 0x00343735, 3, 0x00353735, 3, + 0x00363735, 3, 0x00373735, 3, 0x00383735, 3, 0x00393735, 3, 0x00303835, 3, 0x00313835, 3, 0x00323835, 3, 0x00333835, 3, + 0x00343835, 3, 0x00353835, 3, 0x00363835, 3, 0x00373835, 3, 0x00383835, 3, 0x00393835, 3, 0x00303935, 3, 0x00313935, 3, + 0x00323935, 3, 0x00333935, 3, 0x00343935, 3, 0x00353935, 3, 0x00363935, 3, 0x00373935, 3, 0x00383935, 3, 0x00393935, 3, + 0x00303036, 3, 0x00313036, 3, 0x00323036, 3, 0x00333036, 3, 0x00343036, 3, 0x00353036, 3, 0x00363036, 3, 0x00373036, 3, + 0x00383036, 3, 0x00393036, 3, 0x00303136, 3, 0x00313136, 3, 0x00323136, 3, 0x00333136, 3, 0x00343136, 3, 0x00353136, 3, + 0x00363136, 3, 0x00373136, 3, 0x00383136, 3, 0x00393136, 3, 0x00303236, 3, 0x00313236, 3, 0x00323236, 3, 0x00333236, 3, + 0x00343236, 3, 0x00353236, 3, 0x00363236, 3, 0x00373236, 3, 0x00383236, 3, 0x00393236, 3, 0x00303336, 3, 0x00313336, 3, + 0x00323336, 3, 0x00333336, 3, 0x00343336, 3, 0x00353336, 3, 0x00363336, 3, 0x00373336, 3, 0x00383336, 3, 0x00393336, 3, + 0x00303436, 3, 0x00313436, 3, 0x00323436, 3, 0x00333436, 3, 0x00343436, 3, 0x00353436, 3, 0x00363436, 3, 0x00373436, 3, + 0x00383436, 3, 0x00393436, 3, 0x00303536, 3, 0x00313536, 3, 0x00323536, 3, 0x00333536, 3, 0x00343536, 3, 0x00353536, 3, + 0x00363536, 3, 0x00373536, 3, 0x00383536, 3, 0x00393536, 3, 0x00303636, 3, 0x00313636, 3, 0x00323636, 3, 0x00333636, 3, + 0x00343636, 3, 0x00353636, 3, 0x00363636, 3, 0x00373636, 3, 0x00383636, 3, 0x00393636, 3, 0x00303736, 3, 0x00313736, 3, + 0x00323736, 3, 0x00333736, 3, 0x00343736, 3, 0x00353736, 3, 0x00363736, 3, 0x00373736, 3, 0x00383736, 3, 0x00393736, 3, + 0x00303836, 3, 0x00313836, 3, 0x00323836, 3, 0x00333836, 3, 0x00343836, 3, 0x00353836, 3, 0x00363836, 3, 0x00373836, 3, + 0x00383836, 3, 0x00393836, 3, 0x00303936, 3, 0x00313936, 3, 0x00323936, 3, 0x00333936, 3, 0x00343936, 3, 0x00353936, 3, + 0x00363936, 3, 0x00373936, 3, 0x00383936, 3, 0x00393936, 3, 0x00303037, 3, 0x00313037, 3, 0x00323037, 3, 0x00333037, 3, + 0x00343037, 3, 0x00353037, 3, 0x00363037, 3, 0x00373037, 3, 0x00383037, 3, 0x00393037, 3, 0x00303137, 3, 0x00313137, 3, + 0x00323137, 3, 0x00333137, 3, 0x00343137, 3, 0x00353137, 3, 0x00363137, 3, 0x00373137, 3, 0x00383137, 3, 0x00393137, 3, + 0x00303237, 3, 0x00313237, 3, 0x00323237, 3, 0x00333237, 3, 0x00343237, 3, 0x00353237, 3, 0x00363237, 3, 0x00373237, 3, + 0x00383237, 3, 0x00393237, 3, 0x00303337, 3, 0x00313337, 3, 0x00323337, 3, 0x00333337, 3, 0x00343337, 3, 0x00353337, 3, + 0x00363337, 3, 0x00373337, 3, 0x00383337, 3, 0x00393337, 3, 0x00303437, 3, 0x00313437, 3, 0x00323437, 3, 0x00333437, 3, + 0x00343437, 3, 0x00353437, 3, 0x00363437, 3, 0x00373437, 3, 0x00383437, 3, 0x00393437, 3, 0x00303537, 3, 0x00313537, 3, + 0x00323537, 3, 0x00333537, 3, 0x00343537, 3, 0x00353537, 3, 0x00363537, 3, 0x00373537, 3, 0x00383537, 3, 0x00393537, 3, + 0x00303637, 3, 0x00313637, 3, 0x00323637, 3, 0x00333637, 3, 0x00343637, 3, 0x00353637, 3, 0x00363637, 3, 0x00373637, 3, + 0x00383637, 3, 0x00393637, 3, 0x00303737, 3, 0x00313737, 3, 0x00323737, 3, 0x00333737, 3, 0x00343737, 3, 0x00353737, 3, + 0x00363737, 3, 0x00373737, 3, 0x00383737, 3, 0x00393737, 3, 0x00303837, 3, 0x00313837, 3, 0x00323837, 3, 0x00333837, 3, + 0x00343837, 3, 0x00353837, 3, 0x00363837, 3, 0x00373837, 3, 0x00383837, 3, 0x00393837, 3, 0x00303937, 3, 0x00313937, 3, + 0x00323937, 3, 0x00333937, 3, 0x00343937, 3, 0x00353937, 3, 0x00363937, 3, 0x00373937, 3, 0x00383937, 3, 0x00393937, 3, + 0x00303038, 3, 0x00313038, 3, 0x00323038, 3, 0x00333038, 3, 0x00343038, 3, 0x00353038, 3, 0x00363038, 3, 0x00373038, 3, + 0x00383038, 3, 0x00393038, 3, 0x00303138, 3, 0x00313138, 3, 0x00323138, 3, 0x00333138, 3, 0x00343138, 3, 0x00353138, 3, + 0x00363138, 3, 0x00373138, 3, 0x00383138, 3, 0x00393138, 3, 0x00303238, 3, 0x00313238, 3, 0x00323238, 3, 0x00333238, 3, + 0x00343238, 3, 0x00353238, 3, 0x00363238, 3, 0x00373238, 3, 0x00383238, 3, 0x00393238, 3, 0x00303338, 3, 0x00313338, 3, + 0x00323338, 3, 0x00333338, 3, 0x00343338, 3, 0x00353338, 3, 0x00363338, 3, 0x00373338, 3, 0x00383338, 3, 0x00393338, 3, + 0x00303438, 3, 0x00313438, 3, 0x00323438, 3, 0x00333438, 3, 0x00343438, 3, 0x00353438, 3, 0x00363438, 3, 0x00373438, 3, + 0x00383438, 3, 0x00393438, 3, 0x00303538, 3, 0x00313538, 3, 0x00323538, 3, 0x00333538, 3, 0x00343538, 3, 0x00353538, 3, + 0x00363538, 3, 0x00373538, 3, 0x00383538, 3, 0x00393538, 3, 0x00303638, 3, 0x00313638, 3, 0x00323638, 3, 0x00333638, 3, + 0x00343638, 3, 0x00353638, 3, 0x00363638, 3, 0x00373638, 3, 0x00383638, 3, 0x00393638, 3, 0x00303738, 3, 0x00313738, 3, + 0x00323738, 3, 0x00333738, 3, 0x00343738, 3, 0x00353738, 3, 0x00363738, 3, 0x00373738, 3, 0x00383738, 3, 0x00393738, 3, + 0x00303838, 3, 0x00313838, 3, 0x00323838, 3, 0x00333838, 3, 0x00343838, 3, 0x00353838, 3, 0x00363838, 3, 0x00373838, 3, + 0x00383838, 3, 0x00393838, 3, 0x00303938, 3, 0x00313938, 3, 0x00323938, 3, 0x00333938, 3, 0x00343938, 3, 0x00353938, 3, + 0x00363938, 3, 0x00373938, 3, 0x00383938, 3, 0x00393938, 3, 0x00303039, 3, 0x00313039, 3, 0x00323039, 3, 0x00333039, 3, + 0x00343039, 3, 0x00353039, 3, 0x00363039, 3, 0x00373039, 3, 0x00383039, 3, 0x00393039, 3, 0x00303139, 3, 0x00313139, 3, + 0x00323139, 3, 0x00333139, 3, 0x00343139, 3, 0x00353139, 3, 0x00363139, 3, 0x00373139, 3, 0x00383139, 3, 0x00393139, 3, + 0x00303239, 3, 0x00313239, 3, 0x00323239, 3, 0x00333239, 3, 0x00343239, 3, 0x00353239, 3, 0x00363239, 3, 0x00373239, 3, + 0x00383239, 3, 0x00393239, 3, 0x00303339, 3, 0x00313339, 3, 0x00323339, 3, 0x00333339, 3, 0x00343339, 3, 0x00353339, 3, + 0x00363339, 3, 0x00373339, 3, 0x00383339, 3, 0x00393339, 3, 0x00303439, 3, 0x00313439, 3, 0x00323439, 3, 0x00333439, 3, + 0x00343439, 3, 0x00353439, 3, 0x00363439, 3, 0x00373439, 3, 0x00383439, 3, 0x00393439, 3, 0x00303539, 3, 0x00313539, 3, + 0x00323539, 3, 0x00333539, 3, 0x00343539, 3, 0x00353539, 3, 0x00363539, 3, 0x00373539, 3, 0x00383539, 3, 0x00393539, 3, + 0x00303639, 3, 0x00313639, 3, 0x00323639, 3, 0x00333639, 3, 0x00343639, 3, 0x00353639, 3, 0x00363639, 3, 0x00373639, 3, + 0x00383639, 3, 0x00393639, 3, 0x00303739, 3, 0x00313739, 3, 0x00323739, 3, 0x00333739, 3, 0x00343739, 3, 0x00353739, 3, + 0x00363739, 3, 0x00373739, 3, 0x00383739, 3, 0x00393739, 3, 0x00303839, 3, 0x00313839, 3, 0x00323839, 3, 0x00333839, 3, + 0x00343839, 3, 0x00353839, 3, 0x00363839, 3, 0x00373839, 3, 0x00383839, 3, 0x00393839, 3, 0x00303939, 3, 0x00313939, 3, + 0x00323939, 3, 0x00333939, 3, 0x00343939, 3, 0x00353939, 3, 0x00363939, 3, 0x00373939, 3, 0x00383939, 3, 0x00393939, 3, + 0x30303031, 4, 0x31303031, 4, 0x32303031, 4, 0x33303031, 4, 0x34303031, 4, 0x35303031, 4, 0x36303031, 4, 0x37303031, 4, + 0x38303031, 4, 0x39303031, 4, 0x30313031, 4, 0x31313031, 4, 0x32313031, 4, 0x33313031, 4, 0x34313031, 4, 0x35313031, 4, + 0x36313031, 4, 0x37313031, 4, 0x38313031, 4, 0x39313031, 4, 0x30323031, 4, 0x31323031, 4, 0x32323031, 4, 0x33323031, 4, }; __device__ static void append_word (u32x w0[4], u32x w1[4], const u32x append[4], const u32 offset) diff --git a/nv/m06600.cu b/nv/m06600.cu index 8f46b44..cd606ac 100644 --- a/nv/m06600.cu +++ b/nv/m06600.cu @@ -1334,17 +1334,17 @@ extern "C" __global__ void __launch_bounds__ (256, 1) m06600_comp (const pw_t *p * aes shared */ - __shared__ u32 s_td0[256]; - __shared__ u32 s_td1[256]; - __shared__ u32 s_td2[256]; - __shared__ u32 s_td3[256]; - __shared__ u32 s_td4[256]; - - __shared__ u32 s_te0[256]; - __shared__ u32 s_te1[256]; - __shared__ u32 s_te2[256]; - __shared__ u32 s_te3[256]; - __shared__ u32 s_te4[256]; + __shared__ u32 s_td0[256]; + __shared__ u32 s_td1[256]; + __shared__ u32 s_td2[256]; + __shared__ u32 s_td3[256]; + __shared__ u32 s_td4[256]; + + __shared__ u32 s_te0[256]; + __shared__ u32 s_te1[256]; + __shared__ u32 s_te2[256]; + __shared__ u32 s_te3[256]; + __shared__ u32 s_te4[256]; s_td0[lid] = td0[lid]; s_td1[lid] = td1[lid]; diff --git a/nv/m08600_a0.cu b/nv/m08600_a0.cu index 2039895..4a455be 100644 --- a/nv/m08600_a0.cu +++ b/nv/m08600_a0.cu @@ -201,7 +201,7 @@ __device__ static void pad (u32x w[4], const u32 len) __device__ static void mdtransform_norecalc (u32x state[4], u32x block[4], u32 s_lotus_magic_table[256]) { - u32x x[12]; + u32x x[12]; x[ 0] = state[0]; x[ 1] = state[1]; @@ -233,14 +233,14 @@ __device__ static void mdtransform (u32x state[4], u32x checksum[4], u32x block[ __device__ static void domino_big_md (const u32x saved_key[16], const u32 size, u32x state[4], u32 s_lotus_magic_table[256]) { - u32x checksum[4]; + u32x checksum[4]; checksum[0] = 0; checksum[1] = 0; checksum[2] = 0; checksum[3] = 0; - u32x block[4]; + u32x block[4]; block[0] = saved_key[0]; block[1] = saved_key[1]; @@ -249,7 +249,7 @@ __device__ static void domino_big_md (const u32x saved_key[16], const u32 size, mdtransform (state, checksum, block, s_lotus_magic_table); - mdtransform_norecalc (state, checksum, s_lotus_magic_table); + mdtransform_norecalc (state, checksum, s_lotus_magic_table); } __device__ __constant__ gpu_rule_t c_rules[1024]; diff --git a/nv/m08600_a1.cu b/nv/m08600_a1.cu index adb86cf..b53b47a 100644 --- a/nv/m08600_a1.cu +++ b/nv/m08600_a1.cu @@ -199,7 +199,7 @@ __device__ static void pad (u32x w[4], const u32 len) __device__ static void mdtransform_norecalc (u32x state[4], u32x block[4], u32 s_lotus_magic_table[256]) { - u32x x[12]; + u32x x[12]; x[ 0] = state[0]; x[ 1] = state[1]; @@ -231,14 +231,14 @@ __device__ static void mdtransform (u32x state[4], u32x checksum[4], u32x block[ __device__ static void domino_big_md (const u32x saved_key[16], const u32 size, u32x state[4], u32 s_lotus_magic_table[256]) { - u32x checksum[4]; + u32x checksum[4]; checksum[0] = 0; checksum[1] = 0; checksum[2] = 0; checksum[3] = 0; - u32x block[4]; + u32x block[4]; block[0] = saved_key[0]; block[1] = saved_key[1]; @@ -247,7 +247,7 @@ __device__ static void domino_big_md (const u32x saved_key[16], const u32 size, mdtransform (state, checksum, block, s_lotus_magic_table); - mdtransform_norecalc (state, checksum, s_lotus_magic_table); + mdtransform_norecalc (state, checksum, s_lotus_magic_table); } __device__ __constant__ comb_t c_combs[1024]; diff --git a/nv/m08600_a3.cu b/nv/m08600_a3.cu index 8dd5bd3..a8d53b8 100644 --- a/nv/m08600_a3.cu +++ b/nv/m08600_a3.cu @@ -200,7 +200,7 @@ __device__ static void pad (u32 w[4], const u32 len) __device__ static void mdtransform_norecalc (u32x state[4], u32x block[4], u32 s_lotus_magic_table[256]) { - u32x x[12]; + u32x x[12]; x[ 0] = state[0]; x[ 1] = state[1]; @@ -232,14 +232,14 @@ __device__ static void mdtransform (u32x state[4], u32x checksum[4], u32x block[ __device__ static void domino_big_md (const u32x saved_key[16], const u32 size, u32x state[4], u32 s_lotus_magic_table[256]) { - u32x checksum[4]; + u32x checksum[4]; checksum[0] = 0; checksum[1] = 0; checksum[2] = 0; checksum[3] = 0; - u32x block[4]; + u32x block[4]; block[0] = saved_key[0]; block[1] = saved_key[1]; @@ -248,7 +248,7 @@ __device__ static void domino_big_md (const u32x saved_key[16], const u32 size, mdtransform (state, checksum, block, s_lotus_magic_table); - mdtransform_norecalc (state, checksum, s_lotus_magic_table); + mdtransform_norecalc (state, checksum, s_lotus_magic_table); } __device__ __constant__ u32x c_bfs[1024]; diff --git a/nv/m08700_a0.cu b/nv/m08700_a0.cu index 7796948..26ab61d 100644 --- a/nv/m08700_a0.cu +++ b/nv/m08700_a0.cu @@ -213,7 +213,7 @@ __device__ static void pad (u32x w[4], const u32 len) __device__ static void mdtransform_norecalc (u32x state[4], u32x block[4], u32 s_lotus_magic_table[256]) { - u32x x[12]; + u32x x[12]; x[ 0] = state[0]; x[ 1] = state[1]; @@ -245,14 +245,14 @@ __device__ static void mdtransform (u32x state[4], u32x checksum[4], u32x block[ __device__ static void domino_big_md (const u32x saved_key[16], const u32 size, u32x state[4], u32 s_lotus_magic_table[256]) { - u32x checksum[4]; + u32x checksum[4]; checksum[0] = 0; checksum[1] = 0; checksum[2] = 0; checksum[3] = 0; - u32x block[4]; + u32x block[4]; block[0] = 0; block[1] = 0; @@ -272,7 +272,7 @@ __device__ static void domino_big_md (const u32x saved_key[16], const u32 size, mdtransform (state, checksum, block, s_lotus_magic_table); } - u32 left = size - curpos; + u32 left = size - curpos; block[0] = saved_key[idx + 0]; block[1] = saved_key[idx + 1]; @@ -281,7 +281,7 @@ __device__ static void domino_big_md (const u32x saved_key[16], const u32 size, mdtransform (state, checksum, block, s_lotus_magic_table); - mdtransform_norecalc (state, checksum, s_lotus_magic_table); + mdtransform_norecalc (state, checksum, s_lotus_magic_table); } __device__ __constant__ gpu_rule_t c_rules[1024]; diff --git a/nv/m08700_a1.cu b/nv/m08700_a1.cu index bde1f12..e3a0312 100644 --- a/nv/m08700_a1.cu +++ b/nv/m08700_a1.cu @@ -211,7 +211,7 @@ __device__ static void pad (u32x w[4], const u32 len) __device__ static void mdtransform_norecalc (u32x state[4], u32x block[4], u32 s_lotus_magic_table[256]) { - u32x x[12]; + u32x x[12]; x[ 0] = state[0]; x[ 1] = state[1]; @@ -243,14 +243,14 @@ __device__ static void mdtransform (u32x state[4], u32x checksum[4], u32x block[ __device__ static void domino_big_md (const u32x saved_key[16], const u32 size, u32x state[4], u32 s_lotus_magic_table[256]) { - u32x checksum[4]; + u32x checksum[4]; checksum[0] = 0; checksum[1] = 0; checksum[2] = 0; checksum[3] = 0; - u32x block[4]; + u32x block[4]; block[0] = 0; block[1] = 0; @@ -270,7 +270,7 @@ __device__ static void domino_big_md (const u32x saved_key[16], const u32 size, mdtransform (state, checksum, block, s_lotus_magic_table); } - u32 left = size - curpos; + u32 left = size - curpos; block[0] = saved_key[idx + 0]; block[1] = saved_key[idx + 1]; @@ -279,7 +279,7 @@ __device__ static void domino_big_md (const u32x saved_key[16], const u32 size, mdtransform (state, checksum, block, s_lotus_magic_table); - mdtransform_norecalc (state, checksum, s_lotus_magic_table); + mdtransform_norecalc (state, checksum, s_lotus_magic_table); } __device__ __constant__ comb_t c_combs[1024]; diff --git a/nv/m08700_a3.cu b/nv/m08700_a3.cu index d9d4257..1bdcc1c 100644 --- a/nv/m08700_a3.cu +++ b/nv/m08700_a3.cu @@ -212,7 +212,7 @@ __device__ static void pad (u32 w[4], const u32 len) __device__ static void mdtransform_norecalc (u32x state[4], u32x block[4], u32 s_lotus_magic_table[256]) { - u32x x[12]; + u32x x[12]; x[ 0] = state[0]; x[ 1] = state[1]; @@ -244,14 +244,14 @@ __device__ static void mdtransform (u32x state[4], u32x checksum[4], u32x block[ __device__ static void domino_big_md (const u32x saved_key[16], const u32 size, u32x state[4], u32 s_lotus_magic_table[256]) { - u32x checksum[4]; + u32x checksum[4]; checksum[0] = 0; checksum[1] = 0; checksum[2] = 0; checksum[3] = 0; - u32x block[4]; + u32x block[4]; block[0] = 0; block[1] = 0; @@ -271,7 +271,7 @@ __device__ static void domino_big_md (const u32x saved_key[16], const u32 size, mdtransform (state, checksum, block, s_lotus_magic_table); } - u32 left = size - curpos; + u32 left = size - curpos; block[0] = saved_key[idx + 0]; block[1] = saved_key[idx + 1]; @@ -280,7 +280,7 @@ __device__ static void domino_big_md (const u32x saved_key[16], const u32 size, mdtransform (state, checksum, block, s_lotus_magic_table); - mdtransform_norecalc (state, checksum, s_lotus_magic_table); + mdtransform_norecalc (state, checksum, s_lotus_magic_table); } __device__ __constant__ u32x c_bfs[1024]; diff --git a/nv/m08800.cu b/nv/m08800.cu index 2c7b891..39e88bf 100644 --- a/nv/m08800.cu +++ b/nv/m08800.cu @@ -1704,17 +1704,17 @@ extern "C" __global__ void __launch_bounds__ (256, 1) m08800_comp (const pw_t *p * aes shared */ - __shared__ u32 s_td0[256]; - __shared__ u32 s_td1[256]; - __shared__ u32 s_td2[256]; - __shared__ u32 s_td3[256]; - __shared__ u32 s_td4[256]; - - __shared__ u32 s_te0[256]; - __shared__ u32 s_te1[256]; - __shared__ u32 s_te2[256]; - __shared__ u32 s_te3[256]; - __shared__ u32 s_te4[256]; + __shared__ u32 s_td0[256]; + __shared__ u32 s_td1[256]; + __shared__ u32 s_td2[256]; + __shared__ u32 s_td3[256]; + __shared__ u32 s_td4[256]; + + __shared__ u32 s_te0[256]; + __shared__ u32 s_te1[256]; + __shared__ u32 s_te2[256]; + __shared__ u32 s_te3[256]; + __shared__ u32 s_te4[256]; s_td0[lid] = td0[lid]; s_td1[lid] = td1[lid]; diff --git a/nv/m09000.cu b/nv/m09000.cu index 9a4a2a1..7b10acb 100644 --- a/nv/m09000.cu +++ b/nv/m09000.cu @@ -345,9 +345,9 @@ __device__ __constant__ u32 c_pbox[18] = BF_ROUND (R, L, 14); \ BF_ROUND (L, R, 15); \ BF_ROUND (R, L, 16); \ - tmp = R; \ - R = L; \ - L = tmp ^ P[17]; \ + tmp = R; \ + R = L; \ + L = tmp ^ P[17]; \ } __device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) diff --git a/nv/m09400.cu b/nv/m09400.cu index 4b4bfc2..c0afabb 100644 --- a/nv/m09400.cu +++ b/nv/m09400.cu @@ -1511,17 +1511,17 @@ extern "C" __global__ void __launch_bounds__ (256, 1) m09400_comp (const pw_t *p * aes shared */ - __shared__ u32 s_td0[256]; - __shared__ u32 s_td1[256]; - __shared__ u32 s_td2[256]; - __shared__ u32 s_td3[256]; - __shared__ u32 s_td4[256]; - - __shared__ u32 s_te0[256]; - __shared__ u32 s_te1[256]; - __shared__ u32 s_te2[256]; - __shared__ u32 s_te3[256]; - __shared__ u32 s_te4[256]; + __shared__ u32 s_td0[256]; + __shared__ u32 s_td1[256]; + __shared__ u32 s_td2[256]; + __shared__ u32 s_td3[256]; + __shared__ u32 s_td4[256]; + + __shared__ u32 s_te0[256]; + __shared__ u32 s_te1[256]; + __shared__ u32 s_te2[256]; + __shared__ u32 s_te3[256]; + __shared__ u32 s_te4[256]; s_td0[lid] = td0[lid]; s_td1[lid] = td1[lid]; diff --git a/nv/m09500.cu b/nv/m09500.cu index b324d1c..30c0fdd 100644 --- a/nv/m09500.cu +++ b/nv/m09500.cu @@ -1241,17 +1241,17 @@ extern "C" __global__ void __launch_bounds__ (256, 1) m09500_comp (const pw_t *p * aes shared */ - __shared__ u32 s_td0[256]; - __shared__ u32 s_td1[256]; - __shared__ u32 s_td2[256]; - __shared__ u32 s_td3[256]; - __shared__ u32 s_td4[256]; - - __shared__ u32 s_te0[256]; - __shared__ u32 s_te1[256]; - __shared__ u32 s_te2[256]; - __shared__ u32 s_te3[256]; - __shared__ u32 s_te4[256]; + __shared__ u32 s_td0[256]; + __shared__ u32 s_td1[256]; + __shared__ u32 s_td2[256]; + __shared__ u32 s_td3[256]; + __shared__ u32 s_td4[256]; + + __shared__ u32 s_te0[256]; + __shared__ u32 s_te1[256]; + __shared__ u32 s_te2[256]; + __shared__ u32 s_te3[256]; + __shared__ u32 s_te4[256]; s_td0[lid] = td0[lid]; s_td1[lid] = td1[lid]; diff --git a/nv/m12500.cu b/nv/m12500.cu index 5804475..1252449 100644 --- a/nv/m12500.cu +++ b/nv/m12500.cu @@ -1122,17 +1122,17 @@ extern "C" __global__ void __launch_bounds__ (256, 1) m12500_comp (const pw_t *p * aes shared */ - __shared__ u32 s_td0[256]; - __shared__ u32 s_td1[256]; - __shared__ u32 s_td2[256]; - __shared__ u32 s_td3[256]; - __shared__ u32 s_td4[256]; - - __shared__ u32 s_te0[256]; - __shared__ u32 s_te1[256]; - __shared__ u32 s_te2[256]; - __shared__ u32 s_te3[256]; - __shared__ u32 s_te4[256]; + __shared__ u32 s_td0[256]; + __shared__ u32 s_td1[256]; + __shared__ u32 s_td2[256]; + __shared__ u32 s_td3[256]; + __shared__ u32 s_td4[256]; + + __shared__ u32 s_te0[256]; + __shared__ u32 s_te1[256]; + __shared__ u32 s_te2[256]; + __shared__ u32 s_te3[256]; + __shared__ u32 s_te4[256]; s_td0[lid] = td0[lid]; s_td1[lid] = td1[lid]; diff --git a/nv/types_nv.c b/nv/types_nv.c index 1482f7a..02c7364 100644 --- a/nv/types_nv.c +++ b/nv/types_nv.c @@ -924,8 +924,8 @@ typedef struct typedef struct { - u32 data_len; - u32 data_buf[512]; + u32 data_len; + u32 data_buf[512]; } cloudkey_t; diff --git a/src/ext_OpenCL.c b/src/ext_OpenCL.c index c73522b..fff2b10 100644 --- a/src/ext_OpenCL.c +++ b/src/ext_OpenCL.c @@ -125,9 +125,9 @@ void hc_clGetPlatformInfo (cl_platform_id platform, cl_platform_info param_name, } } -void hc_clGetDeviceIDs (cl_platform_id platform, cl_device_type device_type, cl_uint num_entries, cl_device_id *devices, cl_uint *num_devices) +void hc_clGetDeviceIDs (cl_platform_id platform, cl_device_type device_type, cl_uint num_entries, cl_device_id *devices, cl_uint *num_devices) { - cl_int CL_err = clGetDeviceIDs (platform, device_type, num_entries, devices, num_devices); + cl_int CL_err = clGetDeviceIDs (platform, device_type, num_entries, devices, num_devices); if (CL_err != CL_SUCCESS) { @@ -137,9 +137,9 @@ void hc_clGetDeviceIDs (cl_platform_id platform, cl_device_type device_type, cl_ } } -void hc_clGetDeviceInfo (cl_device_id device, cl_device_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) +void hc_clGetDeviceInfo (cl_device_id device, cl_device_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { - cl_int CL_err = clGetDeviceInfo (device, param_name, param_value_size, param_value, param_value_size_ret); + cl_int CL_err = clGetDeviceInfo (device, param_name, param_value_size, param_value, param_value_size_ret); if (CL_err != CL_SUCCESS) { diff --git a/src/shared.c b/src/shared.c index ad56a73..b669aeb 100644 --- a/src/shared.c +++ b/src/shared.c @@ -13891,7 +13891,7 @@ int racf_parse_hash (char *input_buf, uint input_len, hash_t *hash_buf) IP (salt->salt_buf_pc[0], salt->salt_buf_pc[1], tt); salt->salt_buf_pc[0] = ROTATE_LEFT (salt->salt_buf_pc[0], 3u); - salt->salt_buf_pc[1] = ROTATE_LEFT (salt->salt_buf_pc[1], 3u); + salt->salt_buf_pc[1] = ROTATE_LEFT (salt->salt_buf_pc[1], 3u); digest[0] = hex_to_uint (&digest_pos[ 0]); digest[1] = hex_to_uint (&digest_pos[ 8]); diff --git a/tools/deps.sh b/tools/deps.sh index 24c808b..6385b8a 100755 --- a/tools/deps.sh +++ b/tools/deps.sh @@ -8,8 +8,8 @@ DOWNLOAD_DEPS="ADL_SDK8.zip R352-developer.zip cuda_7.5.18_linux.run NVIDIA-Linu ## root check if [ $(id -u) -ne 0 ]; then - echo "! Must be root" - exit 1 + echo "! Must be root" + exit 1 fi ## cleanup 'hashcat-deps' directories @@ -18,31 +18,31 @@ mkdir -p /opt/hashcat-deps/{tmp,adl-sdk,cuda-7.5,NVIDIA-Linux-x86_64-352.21,nvid cd /opt/hashcat-deps/tmp if [ $? -ne 0 ]; then - echo "! Cannot create hashcat-deps directories." - exit 1 + echo "! Cannot create hashcat-deps directories." + exit 1 fi ## check dependencies i=0 for d in ${DOWNLOAD_DEPS}; do - if [ ! -f "${d}" ]; then - echo "! ${d} not found." - ((i++)) - fi + if [ ! -f "${d}" ]; then + echo "! ${d} not found." + ((i++)) + fi done if [ ${i} -gt 0 ]; then - echo "! Please download manually into the directory /opt/hashcat-deps/tmp" - exit 1 + echo "! Please download manually into the directory /opt/hashcat-deps/tmp" + exit 1 fi ## installing needed packages for pkg in ${DEPS}; do - apt-get -y install ${pkg} - if [ $? -ne 0 ]; then - echo "! failed to install ${pkg}" - exit 1 - fi + apt-get -y install ${pkg} + if [ $? -ne 0 ]; then + echo "! failed to install ${pkg}" + exit 1 + fi done ## extract ADL SDK @@ -50,15 +50,15 @@ unzip ADL_SDK8.zip -d /opt/hashcat-deps/adl-sdk-8 ret=$? if [[ ${ret} -ne 0 ]] && [[ ${ret} -ne 1 ]]; then - echo "! failed to extract ADL SDK" - exit 1 + echo "! failed to extract ADL SDK" + exit 1 fi rm -rf /opt/hashcat-deps/adl-sdk && ln -s /opt/hashcat-deps/adl-sdk-8 /opt/hashcat-deps/adl-sdk if [ $? -ne 0 ]; then - echo "! failed to setup ADL SDK link" - exit 1 + echo "! failed to setup ADL SDK link" + exit 1 fi ## extract NVAPI @@ -66,8 +66,8 @@ unzip R352-developer.zip -d /opt/hashcat-deps/ ret=$? if [[ ${ret} -ne 0 ]] && [[ ${ret} -ne 1 ]]; then - echo "! failed to extract NVAPI" - exit 1 + echo "! failed to extract NVAPI" + exit 1 fi ## install CUDA SDK @@ -75,8 +75,8 @@ chmod +x cuda_7.5.18_linux.run && \ ./cuda_7.5.18_linux.run -toolkit -silent -override --toolkitpath=/opt/hashcat-deps/cuda-7.5 if [ $? -ne 0 ]; then - echo "! failed to install CUDA SDK" - exit 1 + echo "! failed to install CUDA SDK" + exit 1 fi ## install NVIDIA Driver @@ -92,8 +92,8 @@ ln -s libcuda.so.352.21 libcuda.so && \ cd /opt/hashcat-deps/tmp if [ $? -ne 0 ]; then - echo "! failed to install NVIDIA Driver" - exit 1 + echo "! failed to install NVIDIA Driver" + exit 1 fi ## install NVIDIA GPU Deployment Kit @@ -101,8 +101,8 @@ chmod +x gdk_linux_amd64_352_55_release.run && \ ./gdk_linux_amd64_352_55_release.run --silent --installdir=/opt/hashcat-deps/nvidia-gdk if [ $? -ne 0 ]; then - echo "! failed to install NVIDIA GPU Deployment Kit" - exit 1 + echo "! failed to install NVIDIA GPU Deployment Kit" + exit 1 fi ## extract AMD APP SDK @@ -110,15 +110,15 @@ tar xjf AMDAPPSDK-3.0-linux64.tar.bz2 && \ ./AMD-APP-SDK-v3.0.130.135-GA-linux64.sh --noexec --target /opt/hashcat-deps/amd-app-sdk-v3.0.130.135 if [ $? -ne 0 ]; then - echo "! failed to extract AMD APP SDK" - exit 1 + echo "! failed to extract AMD APP SDK" + exit 1 fi rm -rf /opt/hashcat-deps/amd-app-sdk && ln -s /opt/hashcat-deps/amd-app-sdk-v3.0.130.135 /opt/hashcat-deps/amd-app-sdk if [ $? -ne 0 ]; then - echo "! failed to setup ADL SDK link" - exit 1 + echo "! failed to setup ADL SDK link" + exit 1 fi echo "> oclHashcat dependencies have been resolved." diff --git a/tools/test.pl b/tools/test.pl index a105e89..eea8da4 100755 --- a/tools/test.pl +++ b/tools/test.pl @@ -5627,13 +5627,13 @@ sub gen_hash my @ress = split "", $res; - for (my $x = 1; $x <= 19; $x++) + for (my $x = 1; $x <= 19; $x++) { my @xor; - for (my $i = 0; $i < 16; $i++) + for (my $i = 0; $i < 16; $i++) { - $xor[$i] = chr (ord ($ress[$i]) ^ $x); + $xor[$i] = chr (ord ($ress[$i]) ^ $x); } my $s = join ("", @xor); @@ -5641,7 +5641,7 @@ sub gen_hash my $m2 = Crypt::RC4->new ($s); $u = $m2->RC4 ($u); - } + } $u .= substr (pack ("H*", $u_save), 16, 16); @@ -8180,7 +8180,7 @@ sub domino_big_md mdtransform (\@state, \@checksum, \@block); } - my $left = $size - $curpos; + my $left = $size - $curpos; my @block = splice (@{$saved_key_ref}, 0, 16); @@ -8188,7 +8188,7 @@ sub domino_big_md mdtransform (\@state, \@checksum, \@block); - mdtransform_norecalc (\@state, \@checksum); + mdtransform_norecalc (\@state, \@checksum); return @state; } diff --git a/tools/test.sh b/tools/test.sh index 3a2d4c6..301e6ba 100755 --- a/tools/test.sh +++ b/tools/test.sh @@ -1773,9 +1773,8 @@ if [ "${PACKAGE}" -eq 0 -o -z "${PACKAGE_FOLDER}" ]; then IS_SLOW=$? if [[ ${hash_type} -eq 400 ]]; then - - # we use phpass as slow hash for testing the AMP kernel - IS_SLOW=0 + # we use phpass as slow hash for testing the AMP kernel + IS_SLOW=0 fi if [[ ${IS_SLOW} -eq 1 ]]; then -- 2.25.1