speed_ms[device_id] /= SPEED_CACHE;
}
- float hashes_all_ms = 0;
+ double hashes_all_ms = 0;
- float hashes_dev_ms[DEVICES_MAX] = { 0 };
+ double hashes_dev_ms[DEVICES_MAX] = { 0 };
for (uint device_id = 0; device_id < data.devices_cnt; device_id++)
{
if (speed_ms[device_id])
{
- hashes_dev_ms[device_id] = speed_cnt[device_id] / speed_ms[device_id];
+ hashes_dev_ms[device_id] = (double) speed_cnt[device_id] / speed_ms[device_id];
hashes_all_ms += hashes_dev_ms[device_id];
}
#endif // HAVE_HWMON
}
+static void status_benchmark_automat ()
+{
+ u64 speed_cnt[DEVICES_MAX] = { 0 };
+ double speed_ms[DEVICES_MAX] = { 0 };
+
+ for (uint device_id = 0; device_id < data.devices_cnt; device_id++)
+ {
+ hc_device_param_t *device_param = &data.devices_param[device_id];
+
+ if (device_param->skipped) continue;
+
+ speed_cnt[device_id] = device_param->speed_cnt[0];
+ speed_ms[device_id] = device_param->speed_ms[0];
+ }
+
+ double hashes_dev_ms[DEVICES_MAX] = { 0 };
+
+ for (uint device_id = 0; device_id < data.devices_cnt; device_id++)
+ {
+ hc_device_param_t *device_param = &data.devices_param[device_id];
+
+ if (device_param->skipped) continue;
+
+ hashes_dev_ms[device_id] = 0;
+
+ if (speed_ms[device_id])
+ {
+ hashes_dev_ms[device_id] = (double) speed_cnt[device_id] / speed_ms[device_id];
+ }
+ }
+
+ for (uint device_id = 0; device_id < data.devices_cnt; device_id++)
+ {
+ hc_device_param_t *device_param = &data.devices_param[device_id];
+
+ if (device_param->skipped) continue;
+
+ log_info ("%u:%u:%llu", device_id + 1, data.hash_mode, (unsigned long long int) (hashes_dev_ms[device_id] * 1000));
+ }
+}
+
static void status_benchmark ()
{
if (data.devices_status == STATUS_INIT) return;
if (data.devices_status == STATUS_STARTING) return;
+ if (data.devices_status == STATUS_BYPASS) return;
+
+ if (data.status_automat == 1)
+ {
+ status_benchmark_automat ();
- if (data.words_cnt == 0) return;
+ return;
+ }
u64 speed_cnt[DEVICES_MAX] = { 0 };
double speed_ms[DEVICES_MAX] = { 0 };
speed_ms[device_id] = device_param->speed_ms[0];
}
- float hashes_all_ms = 0;
+ double hashes_all_ms = 0;
- float hashes_dev_ms[DEVICES_MAX] = { 0 };
+ double hashes_dev_ms[DEVICES_MAX] = { 0 };
for (uint device_id = 0; device_id < data.devices_cnt; device_id++)
{
if (speed_ms[device_id])
{
- hashes_dev_ms[device_id] = speed_cnt[device_id] / speed_ms[device_id];
+ hashes_dev_ms[device_id] = (double) speed_cnt[device_id] / speed_ms[device_id];
hashes_all_ms += hashes_dev_ms[device_id];
}
hc_clSetKernelArg (data.ocl, kernel, 30, sizeof (cl_uint), device_param->kernel_params[30]);
hc_clSetKernelArg (data.ocl, kernel, 31, sizeof (cl_uint), device_param->kernel_params[31]);
- hc_timer_t timer;
-
- hc_timer_set (&timer);
+ cl_event event;
if ((data.opts_type & OPTS_TYPE_PT_BITSLICE) && (data.attack_mode == ATTACK_MODE_BF))
{
const size_t global_work_size[3] = { num_elements, 32, 1 };
const size_t local_work_size[3] = { kernel_threads / 32, 32, 1 };
- hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 2, NULL, global_work_size, local_work_size, 0, NULL, NULL);
+ hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 2, NULL, global_work_size, local_work_size, 0, NULL, &event);
}
else
{
- size_t workgroup_size = 0;
-
- hc_clGetKernelWorkGroupInfo (data.ocl, kernel, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &workgroup_size, NULL);
-
if (kern_run == KERN_RUN_2)
{
if (data.opti_type & OPTI_TYPE_SLOW_HASH_SIMD)
}
}
- if (kernel_threads > workgroup_size) kernel_threads = workgroup_size;
-
while (num_elements % kernel_threads) num_elements++;
const size_t global_work_size[3] = { num_elements, 1, 1 };
const size_t local_work_size[3] = { kernel_threads, 1, 1 };
- hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
+ hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, &event);
}
hc_clFlush (data.ocl, device_param->command_queue);
- hc_clFinish (data.ocl, device_param->command_queue);
+ hc_clWaitForEvents (data.ocl, 1, &event);
if (event_update)
{
- double exec_time;
+ cl_ulong time_start;
+ cl_ulong time_end;
+
+ hc_clGetEventProfilingInfo (data.ocl, event, CL_PROFILING_COMMAND_START, sizeof (time_start), &time_start, NULL);
+ hc_clGetEventProfilingInfo (data.ocl, event, CL_PROFILING_COMMAND_END, sizeof (time_end), &time_end, NULL);
- hc_timer_get (timer, exec_time);
+ const double exec_time = (double) (time_end - time_start) / 1000000.0;
uint exec_pos = device_param->exec_pos;
device_param->exec_pos = exec_pos;
}
+
+ hc_clReleaseEvent (data.ocl, event);
+
+ hc_clFinish (data.ocl, device_param->command_queue);
}
static void run_kernel_mp (const uint kern_run, hc_device_param_t *device_param, const uint num)
break;
}
- size_t workgroup_size = 0;
-
- hc_clGetKernelWorkGroupInfo (data.ocl, kernel, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &workgroup_size, NULL);
-
- if (kernel_threads > workgroup_size) kernel_threads = workgroup_size;
-
const size_t global_work_size[3] = { num_elements, 1, 1 };
const size_t local_work_size[3] = { kernel_threads, 1, 1 };
cl_kernel kernel = device_param->kernel_tm;
- size_t workgroup_size = 0;
-
- hc_clGetKernelWorkGroupInfo (data.ocl, kernel, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &workgroup_size, NULL);
-
- if (kernel_threads > workgroup_size) kernel_threads = workgroup_size;
-
const size_t global_work_size[3] = { num_elements, 1, 1 };
const size_t local_work_size[3] = { kernel_threads, 1, 1 };
hc_clSetKernelArg (data.ocl, kernel, 5, sizeof (cl_uint), device_param->kernel_params_amp[5]);
hc_clSetKernelArg (data.ocl, kernel, 6, sizeof (cl_uint), device_param->kernel_params_amp[6]);
- size_t workgroup_size = 0;
-
- hc_clGetKernelWorkGroupInfo (data.ocl, kernel, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &workgroup_size, NULL);
-
- if (kernel_threads > workgroup_size) kernel_threads = workgroup_size;
-
const size_t global_work_size[3] = { num_elements, 1, 1 };
const size_t local_work_size[3] = { kernel_threads, 1, 1 };
// balancing the workload turns out to be very efficient
- const u32 kernel_power_balance = kernel_accel * kernel_loops;
+ if (kernel_loops_min != kernel_loops_max)
+ {
+ const u32 kernel_power_balance = kernel_accel * kernel_loops;
- u32 sqrtv;
+ u32 sqrtv;
- for (sqrtv = 1; sqrtv < 0x100000; sqrtv++)
- {
- if ((sqrtv * sqrtv) >= kernel_power_balance) break;
- }
+ for (sqrtv = 1; sqrtv < 0x100000; sqrtv++)
+ {
+ if ((sqrtv * sqrtv) >= kernel_power_balance) break;
+ }
- const u32 kernel_accel_try = sqrtv;
- const u32 kernel_loops_try = sqrtv;
+ const u32 kernel_accel_try = sqrtv;
+ const u32 kernel_loops_try = sqrtv;
- if ((kernel_accel_try <= kernel_accel_max) && (kernel_loops_try >= kernel_loops_min))
- {
- kernel_accel = kernel_accel_try;
- kernel_loops = kernel_loops_try;
+ if ((kernel_accel_try <= kernel_accel_max) && (kernel_loops_try >= kernel_loops_min))
+ {
+ kernel_accel = kernel_accel_try;
+ kernel_loops = kernel_loops_try;
+ }
}
// reset fake words
dgst_size = DGST_SIZE_4_4;
parse_func = dcc2_parse_hash;
sort_by_digest = sort_by_digest_4_4;
- opti_type = OPTI_TYPE_ZERO_BYTE;
+ opti_type = OPTI_TYPE_ZERO_BYTE
+ | OPTI_TYPE_SLOW_HASH_SIMD;
dgst_pos0 = 0;
dgst_pos1 = 1;
dgst_pos2 = 2;
dgst_size = DGST_SIZE_4_4;
parse_func = wpa_parse_hash;
sort_by_digest = sort_by_digest_4_4;
- opti_type = OPTI_TYPE_ZERO_BYTE;
+ opti_type = OPTI_TYPE_ZERO_BYTE
+ | OPTI_TYPE_SLOW_HASH_SIMD;
dgst_pos0 = 0;
dgst_pos1 = 1;
dgst_pos2 = 2;
parse_func = sha512osx_parse_hash;
sort_by_digest = sort_by_digest_8_16;
opti_type = OPTI_TYPE_ZERO_BYTE
- | OPTI_TYPE_USES_BITS_64;
+ | OPTI_TYPE_USES_BITS_64
+ | OPTI_TYPE_SLOW_HASH_SIMD;
dgst_pos0 = 0;
dgst_pos1 = 1;
dgst_pos2 = 2;
parse_func = sha512grub_parse_hash;
sort_by_digest = sort_by_digest_8_16;
opti_type = OPTI_TYPE_ZERO_BYTE
- | OPTI_TYPE_USES_BITS_64;
+ | OPTI_TYPE_USES_BITS_64
+ | OPTI_TYPE_SLOW_HASH_SIMD;
dgst_pos0 = 0;
dgst_pos1 = 1;
dgst_pos2 = 2;
dgst_size = DGST_SIZE_4_32;
parse_func = cisco8_parse_hash;
sort_by_digest = sort_by_digest_4_32;
- opti_type = OPTI_TYPE_ZERO_BYTE;
+ opti_type = OPTI_TYPE_ZERO_BYTE
+ | OPTI_TYPE_SLOW_HASH_SIMD;
dgst_pos0 = 0;
dgst_pos1 = 1;
dgst_pos2 = 2;
dgst_size = DGST_SIZE_4_32;
parse_func = djangopbkdf2_parse_hash;
sort_by_digest = sort_by_digest_4_32;
- opti_type = OPTI_TYPE_ZERO_BYTE;
+ opti_type = OPTI_TYPE_ZERO_BYTE
+ | OPTI_TYPE_SLOW_HASH_SIMD;
dgst_pos0 = 0;
dgst_pos1 = 1;
dgst_pos2 = 2;
dgst_size = DGST_SIZE_4_32;
parse_func = pbkdf2_sha256_parse_hash;
sort_by_digest = sort_by_digest_4_32;
- opti_type = OPTI_TYPE_ZERO_BYTE;
+ opti_type = OPTI_TYPE_ZERO_BYTE
+ | OPTI_TYPE_SLOW_HASH_SIMD;
dgst_pos0 = 0;
dgst_pos1 = 1;
dgst_pos2 = 2;
dgst_size = DGST_SIZE_4_32;
parse_func = pbkdf2_md5_parse_hash;
sort_by_digest = sort_by_digest_4_32;
- opti_type = OPTI_TYPE_ZERO_BYTE;
+ opti_type = OPTI_TYPE_ZERO_BYTE
+ | OPTI_TYPE_SLOW_HASH_SIMD;
dgst_pos0 = 0;
dgst_pos1 = 1;
dgst_pos2 = 2;
dgst_size = DGST_SIZE_4_32;
parse_func = pbkdf2_sha1_parse_hash;
sort_by_digest = sort_by_digest_4_32;
- opti_type = OPTI_TYPE_ZERO_BYTE;
+ opti_type = OPTI_TYPE_ZERO_BYTE
+ | OPTI_TYPE_SLOW_HASH_SIMD;
dgst_pos0 = 0;
dgst_pos1 = 1;
dgst_pos2 = 2;
parse_func = pbkdf2_sha512_parse_hash;
sort_by_digest = sort_by_digest_8_16;
opti_type = OPTI_TYPE_ZERO_BYTE
- | OPTI_TYPE_USES_BITS_64;
+ | OPTI_TYPE_USES_BITS_64
+ | OPTI_TYPE_SLOW_HASH_SIMD;
dgst_pos0 = 0;
dgst_pos1 = 1;
dgst_pos2 = 2;
}
else if (strcmp (platform_vendor, CL_VENDOR_APPLE) == 0)
{
- vendor_id = VENDOR_ID_GENERIC;
+ vendor_id = VENDOR_ID_APPLE;
}
else if (strcmp (platform_vendor, CL_VENDOR_INTEL_BEIGNET) == 0)
{
- vendor_id = VENDOR_ID_GENERIC;
+ vendor_id = VENDOR_ID_INTEL_BEIGNET;
}
else if (strcmp (platform_vendor, CL_VENDOR_INTEL_SDK) == 0)
{
- vendor_id = VENDOR_ID_GENERIC;
+ vendor_id = VENDOR_ID_INTEL_SDK;
}
else if (strcmp (platform_vendor, CL_VENDOR_MESA) == 0)
{
- vendor_id = VENDOR_ID_GENERIC;
+ vendor_id = VENDOR_ID_MESA;
}
else if (strcmp (platform_vendor, CL_VENDOR_NV) == 0)
{
}
else if (strcmp (platform_vendor, CL_VENDOR_POCL) == 0)
{
- vendor_id = VENDOR_ID_GENERIC;
+ vendor_id = VENDOR_ID_POCL;
}
else
{
snprintf (build_opts, sizeof (build_opts) - 1, "-cl-std=CL1.1 -I\"%s/\" -DVENDOR_ID=%u -DCUDA_ARCH=%d -DVECT_SIZE=%u -DDEVICE_TYPE=%u -DKERN_TYPE=%u -D_unroll", shared_dir, device_param->vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->vector_width, (u32) device_param->device_type, kern_type);
+ if (device_param->vendor_id == VENDOR_ID_INTEL_SDK)
+ {
+ // we do vectorizing much better than the auto-vectorizer
+
+ char build_opts_new[1024] = { 0 };
+
+ snprintf (build_opts_new, sizeof (build_opts_new) - 1, "%s -cl-opt-disable", build_opts);
+
+ strncpy (build_opts, build_opts_new, sizeof (build_opts) - 1);
+ }
+
+ #ifdef DEBUG
+ log_info ("Device #%u: build_opts '%s'\n", device_id + 1, build_opts);
+ #endif
+
/**
* main kernel
*/
* kernel name
*/
+ size_t kernel_wgs_tmp;
+
char kernel_name[64] = { 0 };
if (attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
snprintf (kernel_name, sizeof (kernel_name) - 1, "m%05d_tm", kern_type);
device_param->kernel_tm = hc_clCreateKernel (data.ocl, device_param->program, kernel_name);
+
+ hc_clGetKernelWorkGroupInfo (data.ocl, device_param->kernel_tm, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &kernel_wgs_tmp, NULL); kernel_threads = MIN (kernel_threads, kernel_wgs_tmp);
}
}
}
snprintf (kernel_name, sizeof (kernel_name) - 1, "m%05d_hook12", kern_type);
device_param->kernel12 = hc_clCreateKernel (data.ocl, device_param->program, kernel_name);
+
+ hc_clGetKernelWorkGroupInfo (data.ocl, device_param->kernel12, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &kernel_wgs_tmp, NULL); kernel_threads = MIN (kernel_threads, kernel_wgs_tmp);
}
if (opts_type & OPTS_TYPE_HOOK23)
snprintf (kernel_name, sizeof (kernel_name) - 1, "m%05d_hook23", kern_type);
device_param->kernel23 = hc_clCreateKernel (data.ocl, device_param->program, kernel_name);
+
+ hc_clGetKernelWorkGroupInfo (data.ocl, device_param->kernel23, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &kernel_wgs_tmp, NULL); kernel_threads = MIN (kernel_threads, kernel_wgs_tmp);
}
}
+ hc_clGetKernelWorkGroupInfo (data.ocl, device_param->kernel1, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &kernel_wgs_tmp, NULL); kernel_threads = MIN (kernel_threads, kernel_wgs_tmp);
+ hc_clGetKernelWorkGroupInfo (data.ocl, device_param->kernel2, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &kernel_wgs_tmp, NULL); kernel_threads = MIN (kernel_threads, kernel_wgs_tmp);
+ hc_clGetKernelWorkGroupInfo (data.ocl, device_param->kernel3, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &kernel_wgs_tmp, NULL); kernel_threads = MIN (kernel_threads, kernel_wgs_tmp);
+
for (uint i = 0; i <= 20; i++)
{
hc_clSetKernelArg (data.ocl, device_param->kernel1, i, sizeof (cl_mem), device_param->kernel_params[i]);
device_param->kernel_mp_l = hc_clCreateKernel (data.ocl, device_param->program_mp, "l_markov");
device_param->kernel_mp_r = hc_clCreateKernel (data.ocl, device_param->program_mp, "r_markov");
+ hc_clGetKernelWorkGroupInfo (data.ocl, device_param->kernel_mp_l, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &kernel_wgs_tmp, NULL); kernel_threads = MIN (kernel_threads, kernel_wgs_tmp);
+ hc_clGetKernelWorkGroupInfo (data.ocl, device_param->kernel_mp_r, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &kernel_wgs_tmp, NULL); kernel_threads = MIN (kernel_threads, kernel_wgs_tmp);
+
if (opts_type & OPTS_TYPE_PT_BITSLICE)
{
hc_clSetKernelArg (data.ocl, device_param->kernel_tm, 0, sizeof (cl_mem), device_param->kernel_params_tm[0]);
else if (attack_mode == ATTACK_MODE_HYBRID1)
{
device_param->kernel_mp = hc_clCreateKernel (data.ocl, device_param->program_mp, "C_markov");
+
+ hc_clGetKernelWorkGroupInfo (data.ocl, device_param->kernel_mp, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &kernel_wgs_tmp, NULL); kernel_threads = MIN (kernel_threads, kernel_wgs_tmp);
}
else if (attack_mode == ATTACK_MODE_HYBRID2)
{
device_param->kernel_mp = hc_clCreateKernel (data.ocl, device_param->program_mp, "C_markov");
+
+ hc_clGetKernelWorkGroupInfo (data.ocl, device_param->kernel_mp, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &kernel_wgs_tmp, NULL); kernel_threads = MIN (kernel_threads, kernel_wgs_tmp);
}
if (attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
else
{
device_param->kernel_amp = hc_clCreateKernel (data.ocl, device_param->program_amp, "amp");
+
+ hc_clGetKernelWorkGroupInfo (data.ocl, device_param->kernel_amp, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &kernel_wgs_tmp, NULL); kernel_threads = MIN (kernel_threads, kernel_wgs_tmp);
}
if (attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
}
}
+ // maybe this has been updated by clGetKernelWorkGroupInfo()
+ // value can only be decreased, so we don't need to reallocate buffers
+
+ device_param->kernel_threads = kernel_threads;
+
/**
* Store initial fanspeed if gpu_temp_retain is enabled
*/
* In benchmark-mode, inform user which algorithm is checked
*/
- if (benchmark == 1)
+ if (benchmark == 1 && status_automat == 0)
{
quiet = 0;
{
status_benchmark ();
- log_info ("");
+ if (status_automat == 0)
+ {
+ log_info ("");
+ }
}
else
{