SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f,
};
-#ifdef IS_AMD
+#if 1
void sha256_transform (const u32 w[16], u32 digest[8])
{
ROUND_STEP (0);
+ #ifdef _unroll
#pragma unroll
+ #endif
for (int i = 16; i < 64; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);
#else
+// this is basically a much cleaner version, but apparently drops speeds by over 100% :(
+
#define PUTCHAR32_BE(a,p,c) ((u8 *)(a))[(p) ^ 3] = (u8) (c)
#define GETCHAR32_BE(a,p) ((u8 *)(a))[(p) ^ 3]
ROUND_STEP (0);
- //#pragma unroll
+ #ifdef _unroll
+ #pragma unroll
+ #endif
for (int i = 16; i < 64; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);
if (j1)
{
- #pragma unroll 32
+ #ifdef _unroll
+ #pragma unroll
+ #endif
for (u32 k = 0, p = block_len - 32; k < 32; k++, p++)
{
PUTCHAR32_BE (block, p, GETCHAR32_BE (alt_result, k));