static void run_kernel_bzero (hc_device_param_t *device_param, cl_mem buf, const uint size)
{
- // not supported with Nvidia
- // hc_clEnqueueFillBuffer (device_param->command_queue, buf, &zero, sizeof (cl_uchar), 0, size, 0, NULL, NULL);
+ if (data.vendor_id == VENDOR_ID_AMD)
+ {
+ const cl_uchar zero = 0;
+
+ hc_clEnqueueFillBuffer (device_param->command_queue, buf, &zero, sizeof (cl_uchar), 0, size, 0, NULL, NULL);
+ }
- char *tmp = (char *) mymalloc (size);
+ if (data.vendor_id == VENDOR_ID_NV)
+ {
+ // NOTE: clEnqueueFillBuffer () always fails with -59
+ // IOW, it's not supported by Nvidia ForceWare <= 352.21,
+ // How's that possible, OpenCL 1.2 support is advertised??
+ // We need to workaround...
+
+ #define FILLSZ 0x100000
- memset (tmp, 0, size);
+ char *tmp = (char *) mymalloc (FILLSZ);
- hc_clEnqueueWriteBuffer (device_param->command_queue, buf, CL_TRUE, 0, size, tmp, 0, NULL, NULL);
+ memset (tmp, 0, FILLSZ);
- free (tmp);
+ for (uint i = 0; i < size; i += FILLSZ)
+ {
+ const int left = size - i;
+
+ const int fillsz = MIN (FILLSZ, left);
+
+ hc_clEnqueueWriteBuffer (device_param->command_queue, buf, CL_TRUE, i, fillsz, tmp, 0, NULL, NULL);
+ }
+
+ myfree (tmp);
+ }
}
static int run_rule_engine (const int rule_len, const char *rule_buf)