hc_clFinish (data.ocl, device_param->command_queue);
}
+static void run_kernel_memset (hc_device_param_t *device_param, cl_mem buf, const uint value, const uint num)
+{
+ const u32 num16d = num / 16;
+ const u32 num16m = num % 16;
+
+ if (num16d)
+ {
+ device_param->kernel_params_memset_buf32[1] = value;
+ device_param->kernel_params_memset_buf32[2] = num16d;
+
+ uint kernel_threads = device_param->kernel_threads;
+
+ uint num_elements = num16d;
+
+ while (num_elements % kernel_threads) num_elements++;
+
+ cl_kernel kernel = device_param->kernel_memset;
+
+ hc_clSetKernelArg (data.ocl, kernel, 0, sizeof (cl_mem), (void *) &buf);
+ hc_clSetKernelArg (data.ocl, kernel, 1, sizeof (cl_uint), device_param->kernel_params_memset[1]);
+ hc_clSetKernelArg (data.ocl, kernel, 2, sizeof (cl_uint), device_param->kernel_params_memset[2]);
+
+ const size_t global_work_size[3] = { num_elements, 1, 1 };
+ const size_t local_work_size[3] = { kernel_threads, 1, 1 };
+
+ hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
+
+ hc_clFlush (data.ocl, device_param->command_queue);
+
+ hc_clFinish (data.ocl, device_param->command_queue);
+ }
+
+ if (num16m)
+ {
+ u32 tmp[4];
+
+ tmp[0] = value;
+ tmp[1] = value;
+ tmp[2] = value;
+ tmp[3] = value;
+
+ hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, buf, CL_TRUE, num16d * 16, num16m, tmp, 0, NULL, NULL);
+ }
+}
+
static void run_kernel_bzero (hc_device_param_t *device_param, cl_mem buf, const size_t size)
{
+ run_kernel_memset (device_param, buf, 0, size);
+
+ /*
int rc = -1;
if (device_param->opencl_v12 && device_param->platform_vendor_id == VENDOR_ID_AMD)
myfree (tmp);
}
+ */
}
static void choose_kernel (hc_device_param_t *device_param, const uint attack_exec, const uint attack_mode, const uint opts_type, const salt_t *salt_buf, const uint highest_pw_len, const uint pws_cnt)
const u32 kernel_power_max = device_param->device_processors * device_param->kernel_threads * kernel_accel_max;
+ run_kernel_memset (device_param, device_param->d_pws_buf, 7, kernel_power_max * sizeof (pw_t));
+
+ if (data.attack_exec == ATTACK_EXEC_OUTSIDE_KERNEL)
+ {
+ run_kernel_memset (device_param, device_param->d_pws_amp_buf, 7, kernel_power_max * sizeof (pw_t));
+ }
+
+ /*
for (u32 i = 0; i < kernel_power_max; i++)
{
device_param->pws_buf[i].i[0] = i;
{
run_kernel_amp (device_param, kernel_power_max);
}
+ */
#define VERIFIER_CNT 1
// reset them fake words
+ /*
memset (device_param->pws_buf, 0, kernel_power_max * sizeof (pw_t));
hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, kernel_power_max * sizeof (pw_t), device_param->pws_buf, 0, NULL, NULL);
hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_pws_amp_buf, CL_TRUE, 0, kernel_power_max * sizeof (pw_t), device_param->pws_buf, 0, NULL, NULL);
+ */
+
+ run_kernel_memset (device_param, device_param->d_pws_buf, 0, kernel_power_max * sizeof (pw_t));
+
+ if (data.attack_exec == ATTACK_EXEC_OUTSIDE_KERNEL)
+ {
+ run_kernel_memset (device_param, device_param->d_pws_amp_buf, 0, kernel_power_max * sizeof (pw_t));
+ }
// reset timer
hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_digests_shown, CL_TRUE, 0, size_shown, data.digests_shown, 0, NULL, NULL);
hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_salt_bufs, CL_TRUE, 0, size_salts, data.salts_buf, 0, NULL, NULL);
- run_kernel_bzero (device_param, device_param->d_pws_buf, size_pws);
- run_kernel_bzero (device_param, device_param->d_pws_amp_buf, size_pws);
- run_kernel_bzero (device_param, device_param->d_tmps, size_tmps);
- run_kernel_bzero (device_param, device_param->d_hooks, size_hooks);
- run_kernel_bzero (device_param, device_param->d_plain_bufs, size_plains);
- run_kernel_bzero (device_param, device_param->d_result, size_results);
-
/**
* special buffers
*/
device_param->d_rules_c = hc_clCreateBuffer (data.ocl, device_param->context, CL_MEM_READ_ONLY, size_rules_c, NULL);
hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_rules, CL_TRUE, 0, size_rules, kernel_rules_buf, 0, NULL, NULL);
-
- run_kernel_bzero (device_param, device_param->d_rules_c, size_rules_c);
}
else if (attack_kern == ATTACK_KERN_COMBI)
{
device_param->d_combs_c = hc_clCreateBuffer (data.ocl, device_param->context, CL_MEM_READ_ONLY, size_combs, NULL);
device_param->d_root_css_buf = hc_clCreateBuffer (data.ocl, device_param->context, CL_MEM_READ_ONLY, size_root_css, NULL);
device_param->d_markov_css_buf = hc_clCreateBuffer (data.ocl, device_param->context, CL_MEM_READ_ONLY, size_markov_css, NULL);
-
- run_kernel_bzero (device_param, device_param->d_combs, size_combs);
- run_kernel_bzero (device_param, device_param->d_combs_c, size_combs);
- run_kernel_bzero (device_param, device_param->d_root_css_buf, size_root_css);
- run_kernel_bzero (device_param, device_param->d_markov_css_buf, size_markov_css);
}
else if (attack_kern == ATTACK_KERN_BF)
{
device_param->d_tm_c = hc_clCreateBuffer (data.ocl, device_param->context, CL_MEM_READ_ONLY, size_tm, NULL);
device_param->d_root_css_buf = hc_clCreateBuffer (data.ocl, device_param->context, CL_MEM_READ_ONLY, size_root_css, NULL);
device_param->d_markov_css_buf = hc_clCreateBuffer (data.ocl, device_param->context, CL_MEM_READ_ONLY, size_markov_css, NULL);
-
- run_kernel_bzero (device_param, device_param->d_bfs, size_bfs);
- run_kernel_bzero (device_param, device_param->d_bfs_c, size_bfs);
- run_kernel_bzero (device_param, device_param->d_tm_c, size_tm);
- run_kernel_bzero (device_param, device_param->d_root_css_buf, size_root_css);
- run_kernel_bzero (device_param, device_param->d_markov_css_buf, size_markov_css);
}
if (size_esalts)
device_param->kernel_params_tm[0] = &device_param->d_bfs_c;
device_param->kernel_params_tm[1] = &device_param->d_tm_c;
+ device_param->kernel_params_memset_buf32[1] = 0; // value
+ device_param->kernel_params_memset_buf32[2] = 0; // gid_max
+
+ device_param->kernel_params_memset[0] = NULL;
+ device_param->kernel_params_memset[1] = &device_param->kernel_params_memset_buf32[1];
+ device_param->kernel_params_memset[2] = &device_param->kernel_params_memset_buf32[2];
+
/**
* kernel name
*/
if (opts_type & OPTS_TYPE_HOOK23) hc_clSetKernelArg (data.ocl, device_param->kernel23, i, sizeof (cl_uint), device_param->kernel_params[i]);
}
+ // GPU memset
+
+ device_param->kernel_memset = hc_clCreateKernel (data.ocl, device_param->program, "gpu_memset");
+
+ hc_clGetKernelWorkGroupInfo (data.ocl, device_param->kernel_memset, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &kernel_wgs_tmp, NULL); kernel_threads = MIN (kernel_threads, kernel_wgs_tmp);
+
+ hc_clSetKernelArg (data.ocl, device_param->kernel_memset, 0, sizeof (cl_mem), device_param->kernel_params_memset[0]);
+ hc_clSetKernelArg (data.ocl, device_param->kernel_memset, 1, sizeof (cl_uint), device_param->kernel_params_memset[1]);
+ hc_clSetKernelArg (data.ocl, device_param->kernel_memset, 2, sizeof (cl_uint), device_param->kernel_params_memset[2]);
+
+ // MP start
+
if (attack_mode == ATTACK_MODE_BF)
{
device_param->kernel_mp_l = hc_clCreateKernel (data.ocl, device_param->program_mp, "l_markov");
device_param->kernel_threads = kernel_threads;
+ // zero some data buffers
+
+ run_kernel_bzero (device_param, device_param->d_pws_buf, size_pws);
+ run_kernel_bzero (device_param, device_param->d_pws_amp_buf, size_pws);
+ run_kernel_bzero (device_param, device_param->d_tmps, size_tmps);
+ run_kernel_bzero (device_param, device_param->d_hooks, size_hooks);
+ run_kernel_bzero (device_param, device_param->d_plain_bufs, size_plains);
+ run_kernel_bzero (device_param, device_param->d_result, size_results);
+
+ /**
+ * special buffers
+ */
+
+ if (attack_kern == ATTACK_KERN_STRAIGHT)
+ {
+ run_kernel_bzero (device_param, device_param->d_rules_c, size_rules_c);
+ }
+ else if (attack_kern == ATTACK_KERN_COMBI)
+ {
+ run_kernel_bzero (device_param, device_param->d_combs, size_combs);
+ run_kernel_bzero (device_param, device_param->d_combs_c, size_combs);
+ run_kernel_bzero (device_param, device_param->d_root_css_buf, size_root_css);
+ run_kernel_bzero (device_param, device_param->d_markov_css_buf, size_markov_css);
+ }
+ else if (attack_kern == ATTACK_KERN_BF)
+ {
+ run_kernel_bzero (device_param, device_param->d_bfs, size_bfs);
+ run_kernel_bzero (device_param, device_param->d_bfs_c, size_bfs);
+ run_kernel_bzero (device_param, device_param->d_tm_c, size_tm);
+ run_kernel_bzero (device_param, device_param->d_root_css_buf, size_root_css);
+ run_kernel_bzero (device_param, device_param->d_markov_css_buf, size_markov_css);
+ }
+
/**
* Store initial fanspeed if gpu_temp_retain is enabled
*/