static void switch_buffer_by_offset (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
{
- #if defined IS_AMD || defined IS_UNKNOWN
+ #if defined IS_AMD || defined IS_GENERIC
const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - offset;
out1[0] = __byte_perm (in[0], 0, 0x7170);
#endif
- #if defined IS_AMD || defined IS_UNKNOWN
+ #if defined IS_AMD || defined IS_GENERIC
out2[3] = ((in[3] >> 8) & 0x00FF0000) | ((in[3] >> 16) & 0x000000FF);
out2[2] = ((in[3] << 8) & 0x00FF0000) | ((in[3] >> 0) & 0x000000FF);
out2[1] = ((in[2] >> 8) & 0x00FF0000) | ((in[2] >> 16) & 0x000000FF);
out[3] = __byte_perm (in2[2], in2[3], 0x6420);
#endif
- #if defined IS_AMD || defined IS_UNKNOWN
+ #if defined IS_AMD || defined IS_GENERIC
out[0] = ((in1[0] & 0x000000ff) >> 0) | ((in1[0] & 0x00ff0000) >> 8)
| ((in1[1] & 0x000000ff) << 16) | ((in1[1] & 0x00ff0000) << 8);
out[1] = ((in1[2] & 0x000000ff) >> 0) | ((in1[2] & 0x00ff0000) >> 8)
static void switch_buffer_by_offset (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
{
- #if defined IS_AMD || defined IS_UNKNOWN
+ #if defined IS_AMD || defined IS_GENERIC
const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - offset;
static void switch_buffer_by_offset_be (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
{
- #if defined IS_AMD || defined IS_UNKNOWN
+ #if defined IS_AMD || defined IS_GENERIC
switch (offset / 4)
{
case 0:
u32 tmp3;
u32 tmp4;
- #if defined IS_AMD || defined IS_UNKNOWN
+ #if defined IS_AMD || defined IS_GENERIC
const int offset_minus_4 = 4 - (block_len & 3);
u32 tmp3;
u32 tmp4;
- #if defined IS_AMD || defined IS_UNKNOWN
+ #if defined IS_AMD || defined IS_GENERIC
const int offset_minus_4 = 4 - (block_len & 3);
u32 tmp1;
u32 tmp2;
- #if defined IS_AMD || defined IS_UNKNOWN
+ #if defined IS_AMD || defined IS_GENERIC
const int offset_minus_4 = 4 - (block_len & 3);
#endif
#endif
-#if defined IS_AMD || IS_UNKNOWN
+#if defined IS_AMD || defined IS_GENERIC
/*
* Bitslice DES S-boxes making use of a vector conditional select operation
#define sXXX_DECL volatile
#endif
-#ifdef IS_UNKNOWN
+#ifdef IS_GENERIC
#define KXX_DECL
#define sXXX_DECL
#endif
u32 tmp3;
u32 tmp4;
- #if defined IS_AMD || defined IS_UNKNOWN
+ #if defined IS_AMD || defined IS_GENERIC
const int offset_minus_4 = 4 - (block_len & 3);
u32 tmp3;
u32 tmp4;
- #if defined IS_AMD || defined IS_UNKNOWN
+ #if defined IS_AMD || defined IS_GENERIC
const int offset_minus_4 = 4 - (block_len & 3);
u32 tmp1;
u32 tmp2;
- #if defined IS_AMD || defined IS_UNKNOWN
+ #if defined IS_AMD || defined IS_GENERIC
const int offset_minus_4 = 4 - (block_len & 3);
sha512_transform (sha512_ctx->buf, sha512_ctx->state);
#endif
- #ifdef IS_UNKNOWN
+ #ifdef IS_GENERIC
sha512_transform (sha512_ctx->buf, sha512_ctx->state);
#endif
#define KXX_DECL volatile
#endif
-#ifdef IS_UNKNOWN
+#ifdef IS_GENERIC
#define KXX_DECL
#endif
#endif
#endif
-#if defined IS_AMD || IS_UNKNOWN
+#if defined IS_AMD || defined IS_GENERIC
/*
* Bitslice DES S-boxes making use of a vector conditional select operation
#pragma unroll
#endif
- #ifdef IS_UNKNOWN
+ #ifdef IS_GENERIC
#pragma unroll 1
#endif
}
#endif
-#ifdef IS_UNKNOWN
+#ifdef IS_GENERIC
#define BF_ROUND(L,R,N) \
{ \
uchar4 c = as_uchar4 ((L)); \
u32 tmp4;
u32 tmp5;
- #if defined IS_AMD || defined IS_UNKNOWN
+ #if defined IS_AMD || defined IS_GENERIC
const int offset_minus_4 = 4 - (offset & 3);
u32 tmp3;
u32 tmp4;
- #if defined IS_AMD || defined IS_UNKNOWN
+ #if defined IS_AMD || defined IS_GENERIC
const int offset_minus_4 = 4 - (block_len & 3);
u32 tmp3;
u32 tmp4;
- #if defined IS_AMD || defined IS_UNKNOWN
+ #if defined IS_AMD || defined IS_GENERIC
const int offset_minus_4 = 4 - (block_len & 3);
u32 tmp1;
u32 tmp2;
- #if defined IS_AMD || defined IS_UNKNOWN
+ #if defined IS_AMD || defined IS_GENERIC
const int offset_minus_4 = 4 - (block_len & 3);
u32 tmp3;
u32 tmp4;
- #if defined IS_AMD || defined IS_UNKNOWN
+ #if defined IS_AMD || defined IS_GENERIC
const int offset_minus_4 = 4 - block_len;
tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
u32 tmp3;
u32 tmp4;
- #if defined IS_AMD || defined IS_UNKNOWN
+ #if defined IS_AMD || defined IS_GENERIC
const int offset_minus_4 = 4 - block_len;
tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
u32 tmp3;
u32 tmp4;
- #if defined IS_AMD || defined IS_UNKNOWN
+ #if defined IS_AMD || defined IS_GENERIC
const int offset_minus_4 = 4 - block_len;
tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
u32 tmp3;
u32 tmp4;
- #if defined IS_AMD || defined IS_UNKNOWN
+ #if defined IS_AMD || defined IS_GENERIC
const int offset_minus_4 = 4 - block_len;
tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
}
#endif
-#ifdef IS_UNKNOWN
+#ifdef IS_GENERIC
#define BF_ROUND(L,R,N) \
{ \
uchar4 c = as_uchar4 ((L)); \
salt_buf3[3] = 0;
const u32 salt0 = salt_buf0[0];
- const u32 salt1 = salt_buf0[1] & 0xff | '(' << 8;
+ const u32 salt1 = (salt_buf0[1] & 0xff) | ('(' << 8);
/**
* Lotus 6 hash - SEC_pwddigest_V2
u32 i;
- #if defined IS_AMD || defined IS_UNKNOWN
+ #if defined IS_AMD || defined IS_GENERIC
for (i = 0; i < pd; i++) sc[idx++] = pw[i];
sc[idx++] = pw[i]
| amd_bytealign (bl[0], 0, pm4);
const u32 om = m % 4;
const u32 od = m / 4;
- #if defined IS_AMD || defined IS_UNKNOWN
+ #if defined IS_AMD || defined IS_GENERIC
pt[0] = amd_bytealign (sc[od + 1], sc[od + 0], om);
pt[1] = amd_bytealign (sc[od + 2], sc[od + 1], om);
pt[2] = amd_bytealign (sc[od + 3], sc[od + 2], om);
const u32 mod = block_len & 3;
const u32 div = block_len / 4;
- #if defined IS_AMD || defined IS_UNKNOWN
+ #if defined IS_AMD || defined IS_GENERIC
const int offset_minus_4 = 4 - mod;
u32 append0_t[4];
const u32 mod = block_len & 3;
const u32 div = block_len / 4;
- #if defined IS_AMD || defined IS_UNKNOWN
+ #if defined IS_AMD || defined IS_GENERIC
const int offset_minus_4 = 4 - mod;
u32 append0_t[4];
const u32 mod = block_len & 3;
const u32 div = block_len / 4;
- #if defined IS_AMD || defined IS_UNKNOWN
+ #if defined IS_AMD || defined IS_GENERIC
const int offset_minus_4 = 4 - mod;
u32 append0_t[4];
tmp2 = __byte_perm (append[1], 0, selector);
#endif
- #if defined IS_AMD || defined IS_UNKNOWN
+ #if defined IS_AMD || defined IS_GENERIC
const int offset_minus_4 = 4 - block_len;
tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
tmp8 = __byte_perm (append[7], 0, selector);
#endif
- #if defined IS_AMD || defined IS_UNKNOWN
+ #if defined IS_AMD || defined IS_GENERIC
const int offset_minus_4 = 4 - block_len;
tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
out1[3] = __byte_perm (in1[3], 0, 0x4321);
#endif
- #if defined IS_AMD || defined IS_UNKNOWN
+ #if defined IS_AMD || defined IS_GENERIC
out0[0] = amd_bytealign (in0[1], in0[0], 1);
out0[1] = amd_bytealign (in0[2], in0[1], 1);
out0[2] = amd_bytealign (in0[3], in0[2], 1);
out0[0] = __byte_perm ( 0, in0[0], 0x6543);
#endif
- #if defined IS_AMD || defined IS_UNKNOWN
+ #if defined IS_AMD || defined IS_GENERIC
out1[3] = amd_bytealign (in1[3], in1[2], 3);
out1[2] = amd_bytealign (in1[2], in1[1], 3);
out1[1] = amd_bytealign (in1[1], in1[0], 3);
}
#endif
- #if defined IS_AMD || defined IS_UNKNOWN
+ #if defined IS_AMD || defined IS_GENERIC
switch (num)
{
case 0: out0[0] = in0[0];
}
#endif
- #if defined IS_AMD || defined IS_UNKNOWN
+ #if defined IS_AMD || defined IS_GENERIC
switch (num)
{
case 0: out1[3] = in1[3];
}
#endif
- #if defined IS_AMD || defined IS_UNKNOWN
+ #if defined IS_AMD || defined IS_GENERIC
switch (offset)
{
case 0:
}
#endif
- #if defined IS_AMD || defined IS_UNKNOWN
+ #if defined IS_AMD || defined IS_GENERIC
const uchar4 tmp0 = (uchar4) (p0);
const uchar4 tmp1 = (uchar4) (p1);
}
#endif
- #if defined IS_AMD || defined IS_UNKNOWN
+ #if defined IS_AMD || defined IS_GENERIC
switch (p0)
{
case 1: buf0[0] |= tmp << 0;
buf1[3] = tib41[3];
#endif
- #if defined IS_AMD || defined IS_UNKNOWN
+ #if defined IS_AMD || defined IS_GENERIC
tib40[0] = ((buf0[0] & 0x000000FF) << 0) | ((buf0[0] & 0x0000FF00) << 8);
tib40[1] = ((buf0[0] & 0x00FF0000) >> 16) | ((buf0[0] & 0xFF000000) >> 8);
tib40[2] = ((buf0[1] & 0x000000FF) << 0) | ((buf0[1] & 0x0000FF00) << 8);
buf0[0] = __byte_perm (buf0[0], 0, 0x3201);
#endif
- #if defined IS_AMD || defined IS_UNKNOWN
+ #if defined IS_AMD || defined IS_GENERIC
buf0[0] = (buf0[0] & 0xFFFF0000) | ((buf0[0] << 8) & 0x0000FF00) | ((buf0[0] >> 8) & 0x000000FF);
#endif
}
#endif
- #if defined IS_AMD || defined IS_UNKNOWN
+ #if defined IS_AMD || defined IS_GENERIC
switch (in_len)
{
case 2: buf0[0] = ((buf0[0] << 8) & 0x0000FF00) | ((buf0[0] >> 8) & 0x000000FF);
}
#endif
- #if defined IS_AMD || defined IS_UNKNOWN
+ #if defined IS_AMD || defined IS_GENERIC
switch (p0)
{
case 0: tmp0 = (buf0[0] >> 0) & 0xFF;
}
#endif
- #if defined IS_AMD || defined IS_UNKNOWN
+ #if defined IS_AMD || defined IS_GENERIC
u32 tib40[4];
u32 tib41[4];
}
#endif
-#ifdef IS_UNKNOWN
+#ifdef IS_GENERIC
static inline u32 swap32 (const u32 v)
{
return (as_uint (as_uchar4 (v).s3210));
#endif
#endif
-#ifdef IS_UNKNOWN
+#ifdef IS_GENERIC
static inline u32 __bfe (const u32 a, const u32 b, const u32 c)
{
#define BIT(x) (1 << (x))
#endif
#endif
-#ifdef IS_UNKNOWN
+#ifdef IS_GENERIC
static inline u32 rotr32 (const u32 a, const u32 n)
{
return rotate (a, 32 - n);
#define MD4_Go(x,y,z) (bitselect ((x), (y), ((x) ^ (z))))
#endif
-#ifdef IS_UNKNOWN
+#ifdef IS_GENERIC
#define MD4_F(x,y,z) (((x) & (y)) | ((~(x)) & (z)))
#define MD4_G(x,y,z) (((x) & (y)) | ((x) & (z)) | ((y) & (z)))
#define MD4_H(x,y,z) ((x) ^ (y) ^ (z))
-#define MD5_H1(x,y,z) (MD4_H((x), (y), (z)))
-#define MD5_H2(x,y,z) (MD4_H((x), (y), (z)))
+#define MD4_H1(x,y,z) (MD4_H((x), (y), (z)))
+#define MD4_H2(x,y,z) (MD4_H((x), (y), (z)))
#define MD4_Fo(x,y,z) (MD4_F((x), (y), (z)))
#define MD4_Go(x,y,z) (MD4_G((x), (y), (z)))
#endif
#define MD5_Go(x,y,z) (bitselect ((y), (x), (z)))
#endif
-#ifdef IS_UNKNOWN
+#ifdef IS_GENERIC
#define MD5_F(x,y,z) ((z) ^ ((x) & ((y) ^ (z))))
#define MD5_G(x,y,z) ((y) ^ ((z) & ((x) ^ (y))))
#define MD5_H(x,y,z) ((x) ^ (y) ^ (z))
#define SHA1_F2o(x,y,z) (bitselect ((x), (y), ((x) ^ (z))))
#endif
-#ifdef IS_UNKNOWN
+#ifdef IS_GENERIC
#define SHA1_F0(x,y,z) ((z) ^ ((x) & ((y) ^ (z))))
#define SHA1_F1(x,y,z) ((x) ^ (y) ^ (z))
#define SHA1_F2(x,y,z) (((x) & (y)) | ((z) & ((x) ^ (y))))
-// either pocl or llvm fails and produces invalid optimized code
-//#define SHA1_F0o(x,y,z) (SHA1_F0 ((x), (y), (z)))
-//#define SHA1_F2o(x,y,z) (SHA1_F2 ((x), (y), (z)))
-// luckily we can use bitselect as a workaround
-#define SHA1_F0o(x,y,z) (bitselect ((z), (y), (x)))
-#define SHA1_F2o(x,y,z) (bitselect ((x), (y), ((x) ^ (z))))
+#define SHA1_F0o(x,y,z) (SHA1_F0 ((x), (y), (z)))
+#define SHA1_F2o(x,y,z) (SHA1_F2 ((x), (y), (z)))
#endif
#define SHA1_STEP(f,a,b,c,d,e,x) \
#define SHA256_F1o(x,y,z) (bitselect ((z), (y), (x)))
#endif
-#ifdef IS_UNKNOWN
+#ifdef IS_GENERIC
#define SHA256_F0(x,y,z) (((x) & (y)) | ((z) & ((x) ^ (y))))
#define SHA256_F1(x,y,z) ((z) ^ ((x) & ((y) ^ (z))))
#define SHA256_F0o(x,y,z) (SHA256_F0 ((x), (y), (z)))
#define SHA384_F1o(x,y,z) (bitselect ((x), (y), ((x) ^ (z))))
#endif
-#ifdef IS_UNKNOWN
+#ifdef IS_GENERIC
#define SHA384_F0o(x,y,z) (SHA384_F0 ((x), (y), (z)))
#define SHA384_F1o(x,y,z) (SHA384_F1 ((x), (y), (z)))
#endif
#define SHA512_F1o(x,y,z) (bitselect ((x), (y), ((x) ^ (z))))
#endif
-#ifdef IS_UNKNOWN
+#ifdef IS_GENERIC
#define SHA512_F0o(x,y,z) (SHA512_F0 ((x), (y), (z)))
#define SHA512_F1o(x,y,z) (SHA512_F1 ((x), (y), (z)))
#endif
#define RIPEMD160_Io(x,y,z) (bitselect ((y), (x), (z)))
#endif
-#ifdef IS_UNKNOWN
+#ifdef IS_GENERIC
#define RIPEMD160_F(x,y,z) ((x) ^ (y) ^ (z))
#define RIPEMD160_G(x,y,z) ((z) ^ ((x) & ((y) ^ (z)))) /* x ? y : z */
#define RIPEMD160_H(x,y,z) (((x) | ~(y)) ^ (z))
#endif
#if VENDOR_ID == 9998 // temporary for dev
-#define IS_UNKNOWN
+#define IS_GENERIC
#endif
#if VENDOR_ID == 9999
-#define IS_UNKNOWN
+#define IS_GENERIC
#endif
/**
#define CL_VENDOR_NV "NVIDIA Corporation"
#define CL_VENDOR_AMD "Advanced Micro Devices, Inc."
-#define CL_VENDOR_SDS "Shiloh Distributed Solutions"
-#define CL_VENDOR_APPLE "Apple"
#define CL_VENDOR_POCL "The pocl project"
#define VENDOR_ID_AMD 4098
#define VENDOR_ID_NV 4318
-#define VENDOR_ID_POCL 9998
-#define VENDOR_ID_UNKNOWN 9999
+#define VENDOR_ID_GENERIC 9999
#define BLOCK_SIZE 64
myfree (tmp);
}
- if (data.vendor_id == VENDOR_ID_POCL)
- {
- // NOTE: clEnqueueFillBuffer () segfaults with Ubuntu 15.04 pocl
- // We need to workaround...
-
- #define FILLSZ 0x100000
-
- char *tmp = (char *) mymalloc (FILLSZ);
-
- memset (tmp, 0, FILLSZ);
-
- for (uint i = 0; i < size; i += FILLSZ)
- {
- const int left = size - i;
-
- const int fillsz = MIN (FILLSZ, left);
-
- hc_clEnqueueWriteBuffer (device_param->command_queue, buf, CL_TRUE, i, fillsz, tmp, 0, NULL, NULL);
- }
-
- myfree (tmp);
- }
-
- if (data.vendor_id == VENDOR_ID_UNKNOWN)
+ if (data.vendor_id == VENDOR_ID_GENERIC)
{
const cl_uchar zero = 0;
return (-1);
}
- uint CL_platform_sel = 1;
+ int CL_platform_sel = 1;
if (opencl_platform != NULL)
{
return (-1);
}
- if (CL_platform_sel > CL_platforms_cnt)
+ if (CL_platform_sel > (int) CL_platforms_cnt)
{
log_error ("ERROR: invalid OpenCL platforms selected");
hc_clGetPlatformInfo (CL_platform, CL_PLATFORM_VENDOR, sizeof (CL_platform_vendor), CL_platform_vendor, NULL);
- cl_device_type device_type;
+ cl_device_type device_type_filter;
uint vendor_id;
{
vendor_id = VENDOR_ID_AMD;
- device_type = CL_DEVICE_TYPE_GPU;
+ device_type_filter = CL_DEVICE_TYPE_GPU;
}
else if (strcmp (CL_platform_vendor, CL_VENDOR_NV) == 0)
{
vendor_id = VENDOR_ID_NV;
- device_type = CL_DEVICE_TYPE_GPU;
+ device_type_filter = CL_DEVICE_TYPE_GPU;
// make sure that we do not directly control the fan for NVidia
}
else if (strcmp (CL_platform_vendor, CL_VENDOR_POCL) == 0)
{
- vendor_id = VENDOR_ID_POCL;
+ if (force == 0)
+ {
+ log_error ("");
+ log_error ("ATTENTION! All pocl drivers are known to be broken due to broken LLVM <= 3.7");
+ log_error ("You are STRONGLY encouraged not to use it");
+ log_error ("You can use --force to override this but do not post error reports if you do so");
+
+ return (-1);
+ }
- device_type = CL_DEVICE_TYPE_CPU;
+ vendor_id = VENDOR_ID_GENERIC;
- gpu_temp_disable = 1;
+ device_type_filter = CL_DEVICE_TYPE_DEFAULT;
}
else
{
- vendor_id = VENDOR_ID_UNKNOWN;
+ vendor_id = VENDOR_ID_GENERIC;
- device_type = CL_DEVICE_TYPE_DEFAULT;
+ device_type_filter = CL_DEVICE_TYPE_DEFAULT;
}
- if (vendor_id == VENDOR_ID_UNKNOWN)
+ if (vendor_id == VENDOR_ID_GENERIC)
{
log_error ("Warning: unknown OpenCL vendor '%s' detected", CL_platform_vendor);
uint devices_all_cnt = 0;
- hc_clGetDeviceIDs (CL_platform, device_type, DEVICES_MAX, devices_all, (uint *) &devices_all_cnt);
+ hc_clGetDeviceIDs (CL_platform, device_type_filter, DEVICES_MAX, devices_all, (uint *) &devices_all_cnt);
int hm_adapters_all = devices_all_cnt;
device_param->device_name_chksum = mystrdup (tmp);
- if (device_type == CL_DEVICE_TYPE_CPU)
+ if (device_type & CL_DEVICE_TYPE_CPU)
{
cl_uint device_processor_cores = 1;
device_param->device_processor_cores = device_processor_cores;
}
- if (device_type == CL_DEVICE_TYPE_GPU)
+ if (device_type & CL_DEVICE_TYPE_GPU)
{
if (vendor_id == VENDOR_ID_AMD)
{
* common driver check
*/
- if (device_type == CL_DEVICE_TYPE_GPU)
+ if (device_type & CL_DEVICE_TYPE_GPU)
{
if (vendor_id == VENDOR_ID_NV)
{
uint device_processor_cores = device_param->device_processor_cores;
+ cl_device_type device_type = device_param->device_type;
+
/**
* create context for each device
*/
if (hash_mode == 3200) kernel_threads = 8;
if (hash_mode == 9000) kernel_threads = 8;
- if (device_type == CL_DEVICE_TYPE_CPU)
+ if (device_type & CL_DEVICE_TYPE_CPU)
{
// CPU still need lots of workitems, don't know why...
+ // for testing phase, lets start with this
- kernel_accel = (kernel_accel >= 8) ? kernel_accel / 8 : 1;
+ kernel_accel = 1;
}
uint kernel_power = device_processors * kernel_threads * kernel_accel;