/**
* Authors.....: Jens Steube <jens.steube@gmail.com>
* Gabriele Gristina <matrix@hashcat.net>
+ * magnum <john.magnum@hushmail.com>
*
* License.....: MIT
*/
#define MARKOV_DISABLE 0
#define MARKOV_CLASSIC 0
#define BENCHMARK 0
+#define BENCHMARK_REPEATS 2
#define RESTORE 0
#define RESTORE_TIMER 60
#define RESTORE_DISABLE 0
#define MAX_DICTSTAT 10000
-#define NUM_DEFAULT_BENCHMARK_ALGORITHMS 130
+#define NUM_DEFAULT_BENCHMARK_ALGORITHMS 131
#define global_free(attr) \
{ \
5600,
7300,
7500,
+ 13100,
8300,
11100,
11200,
101,
111,
1711,
- 3000, // broken in osx
+ 3000,
1000,
1100,
2100,
12800,
- 1500, // broken in osx
+ 1500,
12400,
500,
3200,
10410,
10500,
10600,
- 10700, // broken in osx
+ 10700,
9000,
5200,
6800,
"* Resources:",
"",
" -b, --benchmark Run benchmark",
+ " --benchmark-repeats=NUM Repeat the kernel on the device NUM times to increase benchmark accuracy",
" -c, --segment-size=NUM Size in MB to cache from the wordfile",
" --bitmap-min=NUM Minimum number of bits allowed for bitmaps",
" --bitmap-max=NUM Maximum number of bits allowed for bitmaps",
" --opencl-platforms=STR OpenCL platforms to use, separate with comma",
" -d, --opencl-devices=STR OpenCL devices to use, separate with comma",
" --opencl-device-types=STR OpenCL device-types to use, separate with comma, see references below",
- " --opencl-vector-width=NUM OpenCL vector-width (either 1, 2, 4 or 8), overrides value from device query",
+ " --opencl-vector-width=NUM OpenCL vector-width (either 1, 2, 4, 8 or 16), overrides value from device query",
" -w, --workload-profile=NUM Enable a specific workload profile, see references below",
" -n, --kernel-accel=NUM Workload tuning, increase the outer-loop step size",
" -u, --kernel-loops=NUM Workload tuning, increase the inner-loop step size",
" 11100 = PostgreSQL Challenge-Response Authentication (MD5)",
" 11200 = MySQL Challenge-Response Authentication (SHA1)",
" 11400 = SIP digest authentication (MD5)",
+ " 13100 = Kerberos 5 TGS-REP etype 23",
"",
"[[ Forums, CMS, E-Commerce, Frameworks, Middleware, Wiki, Management ]]",
"",
hc_clSetKernelArg (data.ocl, kernel, 30, sizeof (cl_uint), device_param->kernel_params[30]);
hc_clSetKernelArg (data.ocl, kernel, 31, sizeof (cl_uint), device_param->kernel_params[31]);
- cl_event event;
+ hc_timer_t timer;
+
+ hc_timer_set (&timer);
if ((data.opts_type & OPTS_TYPE_PT_BITSLICE) && (data.attack_mode == ATTACK_MODE_BF))
{
const size_t global_work_size[3] = { num_elements, 32, 1 };
const size_t local_work_size[3] = { kernel_threads / 32, 32, 1 };
- hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 2, NULL, global_work_size, local_work_size, 0, NULL, &event, true);
+ hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 2, NULL, global_work_size, local_work_size, 0, NULL, NULL);
}
else
{
- const size_t global_work_size[3] = { num_elements, 1, 1 };
- const size_t local_work_size[3] = { kernel_threads, 1, 1 };
+ size_t workgroup_size = 0;
- const cl_int rc = hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, &event, false);
+ hc_clGetKernelWorkGroupInfo (data.ocl, kernel, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &workgroup_size, NULL);
- if (rc != CL_SUCCESS)
- {
- const size_t local_work_size_fallback[3] = { 1, 1, 1 };
+ if (kernel_threads > workgroup_size) kernel_threads = workgroup_size;
- hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size_fallback, 0, NULL, &event, true);
- }
+ const size_t global_work_size[3] = { num_elements, 1, 1 };
+ const size_t local_work_size[3] = { kernel_threads, 1, 1 };
+
+ hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
}
hc_clFlush (data.ocl, device_param->command_queue);
- hc_clWaitForEvents (data.ocl, 1, &event);
+ hc_clFinish (data.ocl, device_param->command_queue);
if (event_update)
{
- cl_ulong time_start;
- cl_ulong time_end;
-
- hc_clGetEventProfilingInfo (data.ocl, event, CL_PROFILING_COMMAND_START, sizeof (time_start), &time_start, NULL);
- hc_clGetEventProfilingInfo (data.ocl, event, CL_PROFILING_COMMAND_END, sizeof (time_end), &time_end, NULL);
+ float exec_time;
- const double exec_time = (time_end - time_start) / 1000000.0;
+ hc_timer_get (timer, exec_time);
uint exec_pos = device_param->exec_pos;
device_param->exec_pos = exec_pos;
}
-
- hc_clReleaseEvent (data.ocl, event);
-
- hc_clFinish (data.ocl, device_param->command_queue);
}
static void run_kernel_mp (const uint kern_run, hc_device_param_t *device_param, const uint num)
// causes problems with special threads like in bcrypt
// const uint kernel_threads = device_param->kernel_threads;
- const uint kernel_threads = KERNEL_THREADS;
+ uint kernel_threads = KERNEL_THREADS;
while (num_elements % kernel_threads) num_elements++;
break;
}
+ size_t workgroup_size = 0;
+ hc_clGetKernelWorkGroupInfo (data.ocl, kernel, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &workgroup_size, NULL);
+ if (kernel_threads > workgroup_size) kernel_threads = workgroup_size;
+
const size_t global_work_size[3] = { num_elements, 1, 1 };
const size_t local_work_size[3] = { kernel_threads, 1, 1 };
- const cl_int rc = hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL, false);
-
- if (rc != CL_SUCCESS)
- {
- const size_t local_work_size_fallback[3] = { 1, 1, 1 };
-
- hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size_fallback, 0, NULL, NULL, true);
- }
+ hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
hc_clFlush (data.ocl, device_param->command_queue);
cl_kernel kernel = device_param->kernel_tb;
+ size_t workgroup_size = 0;
+ hc_clGetKernelWorkGroupInfo (data.ocl, kernel, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &workgroup_size, NULL);
+ if (kernel_threads > workgroup_size) kernel_threads = workgroup_size;
+
const size_t global_work_size[3] = { num_elements, 1, 1 };
const size_t local_work_size[3] = { kernel_threads, 1, 1 };
- const cl_int rc = hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL, false);
-
- if (rc != CL_SUCCESS)
- {
- const size_t local_work_size_fallback[3] = { 1, 1, 1 };
-
- hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size_fallback, 0, NULL, NULL, true);
- }
+ hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
hc_clFlush (data.ocl, device_param->command_queue);
{
const uint num_elements = 1024; // fixed
- const uint kernel_threads = 32;
+ uint kernel_threads = 32;
cl_kernel kernel = device_param->kernel_tm;
+ size_t workgroup_size = 0;
+ hc_clGetKernelWorkGroupInfo (data.ocl, kernel, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &workgroup_size, NULL);
+ if (kernel_threads > workgroup_size) kernel_threads = workgroup_size;
+
const size_t global_work_size[3] = { num_elements, 1, 1 };
const size_t local_work_size[3] = { kernel_threads, 1, 1 };
- const cl_int rc = hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL, false);
-
- if (rc != CL_SUCCESS)
- {
- const size_t local_work_size_fallback[3] = { 1, 1, 1 };
-
- hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size_fallback, 0, NULL, NULL, true);
- }
+ hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
hc_clFlush (data.ocl, device_param->command_queue);
// causes problems with special threads like in bcrypt
// const uint kernel_threads = device_param->kernel_threads;
- const uint kernel_threads = KERNEL_THREADS;
+ uint kernel_threads = KERNEL_THREADS;
while (num_elements % kernel_threads) num_elements++;
hc_clSetKernelArg (data.ocl, kernel, 5, sizeof (cl_uint), device_param->kernel_params_amp[5]);
hc_clSetKernelArg (data.ocl, kernel, 6, sizeof (cl_uint), device_param->kernel_params_amp[6]);
+ size_t workgroup_size = 0;
+ hc_clGetKernelWorkGroupInfo (data.ocl, kernel, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &workgroup_size, NULL);
+ if (kernel_threads > workgroup_size) kernel_threads = workgroup_size;
+
const size_t global_work_size[3] = { num_elements, 1, 1 };
const size_t local_work_size[3] = { kernel_threads, 1, 1 };
- const cl_int rc = hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL, false);
-
- if (rc != CL_SUCCESS)
- {
- const size_t local_work_size_fallback[3] = { 1, 1, 1 };
-
- hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size_fallback, 0, NULL, NULL, true);
- }
+ hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
hc_clFlush (data.ocl, device_param->command_queue);
}
}
+static void choose_kernel (hc_device_param_t *device_param, const uint attack_exec, const uint attack_mode, const uint opts_type, const salt_t *salt_buf, const uint highest_pw_len, const uint pws_cnt)
+{
+ if (attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+ {
+ if (attack_mode == ATTACK_MODE_BF)
+ {
+ if (opts_type & OPTS_TYPE_PT_BITSLICE)
+ {
+ const uint size_tm = 32 * sizeof (bs_word_t);
+
+ run_kernel_bzero (device_param, device_param->d_tm_c, size_tm);
+
+ run_kernel_tm (device_param);
+
+ hc_clEnqueueCopyBuffer (data.ocl, device_param->command_queue, device_param->d_tm_c, device_param->d_bfs_c, 0, 0, size_tm, 0, NULL, NULL);
+ }
+ }
+
+ if (highest_pw_len < 16)
+ {
+ run_kernel (KERN_RUN_1, device_param, pws_cnt, true);
+ }
+ else if (highest_pw_len < 32)
+ {
+ run_kernel (KERN_RUN_2, device_param, pws_cnt, true);
+ }
+ else
+ {
+ run_kernel (KERN_RUN_3, device_param, pws_cnt, true);
+ }
+ }
+ else
+ {
+ run_kernel_amp (device_param, pws_cnt);
+
+ run_kernel (KERN_RUN_1, device_param, pws_cnt, false);
+
+ if (opts_type & OPTS_TYPE_HOOK12)
+ {
+ run_kernel (KERN_RUN_12, device_param, pws_cnt, false);
+ }
+
+ uint iter = salt_buf->salt_iter;
+
+ uint loop_step = device_param->kernel_loops;
+
+ for (uint loop_pos = 0; loop_pos < iter; loop_pos += loop_step)
+ {
+ uint loop_left = iter - loop_pos;
+
+ loop_left = MIN (loop_left, loop_step);
+
+ device_param->kernel_params_buf32[25] = loop_pos;
+ device_param->kernel_params_buf32[26] = loop_left;
+
+ run_kernel (KERN_RUN_2, device_param, pws_cnt, true);
+
+ if (data.devices_status == STATUS_CRACKED) break;
+ if (data.devices_status == STATUS_ABORTED) break;
+ if (data.devices_status == STATUS_QUIT) break;
+ }
+
+ if (opts_type & OPTS_TYPE_HOOK23)
+ {
+ run_kernel (KERN_RUN_23, device_param, pws_cnt, false);
+
+ hc_clEnqueueReadBuffer (data.ocl, device_param->command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
+
+ // do something with data
+
+ hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
+ }
+
+ run_kernel (KERN_RUN_3, device_param, pws_cnt, false);
+ }
+}
+
static int run_rule_engine (const int rule_len, const char *rule_buf)
{
if (rule_len == 0)
device_param->kernel_params_buf32[26] = kernel_loops;
device_param->kernel_params_buf32[27] = kernel_loops;
+ // init some fake words
+
+ if (data.attack_kern == ATTACK_KERN_BF)
+ {
+ run_kernel_mp (KERN_RUN_MP_L, device_param, kernel_power);
+ run_kernel_mp (KERN_RUN_MP_R, device_param, kernel_loops);
+ }
+ else
+ {
+ for (u32 i = 0; i < kernel_power; i++)
+ {
+ device_param->pws_buf[i].pw_len = i & 7;
+ }
+
+ hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, kernel_power * sizeof (pw_t), device_param->pws_buf, 0, NULL, NULL);
+
+ if (data.attack_exec == ATTACK_EXEC_OUTSIDE_KERNEL)
+ {
+ run_kernel_amp (device_param, kernel_power);
+ }
+ }
+
// caching run
if (data.attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
const double exec_ms_prev = get_avg_exec_time (device_param, repeat);
+ // reset fake words
+
+ memset (device_param->pws_buf, 0, kernel_power * sizeof (pw_t));
+
+ hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, kernel_power * sizeof (pw_t), device_param->pws_buf, 0, NULL, NULL);
+ hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_pws_amp_buf, CL_TRUE, 0, kernel_power * sizeof (pw_t), device_param->pws_buf, 0, NULL, NULL);
+
return exec_ms_prev;
}
{
const double target_ms = TARGET_MS_PROFILE[data.workload_profile - 1];
- u32 kernel_loops_min = device_param->kernel_loops_min;
- u32 kernel_loops_max = device_param->kernel_loops_max;
+ const u32 kernel_accel_min = device_param->kernel_accel_min;
+ const u32 kernel_accel_max = device_param->kernel_accel_max;
- u32 kernel_accel_min = device_param->kernel_accel_min;
- u32 kernel_accel_max = device_param->kernel_accel_max;
+ const u32 kernel_loops_min = device_param->kernel_loops_min;
+ const u32 kernel_loops_max = device_param->kernel_loops_max;
- u32 kernel_loops = kernel_loops_min;
u32 kernel_accel = kernel_accel_min;
+ u32 kernel_loops = kernel_loops_min;
- // init some fake words
-
- const u32 kernel_power_max = device_param->device_processors * device_param->kernel_threads * device_param->kernel_accel_max;
+ // steps
- for (u32 i = 0; i < kernel_power_max; i++)
- {
- device_param->pws_buf[i].pw_len = 8;
- }
+ #define STEPS_CNT 10
- hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, device_param->size_pws, device_param->pws_buf, 0, NULL, NULL);
- hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_pws_amp_buf, CL_TRUE, 0, device_param->size_pws, device_param->pws_buf, 0, NULL, NULL);
+ #define STEPS_ACCEL_CNT (STEPS_CNT + 2)
+ #define STEPS_LOOPS_CNT (STEPS_CNT + 2)
- // steps for loops
+ u32 steps_accel[STEPS_ACCEL_CNT];
+ u32 steps_loops[STEPS_LOOPS_CNT];
- #define STEPS_LOOPS_CNT 15
+ for (int i = 0; i < STEPS_ACCEL_CNT; i++)
+ {
+ steps_accel[i] = 1 << i;
+ }
- u32 steps_loops[STEPS_LOOPS_CNT];
+ for (int i = 0; i < STEPS_LOOPS_CNT; i++)
+ {
+ steps_loops[i] = 1 << i;
+ }
- steps_loops[ 0] = 1;
- steps_loops[ 1] = 2;
- steps_loops[ 2] = 4;
- steps_loops[ 3] = 8;
- steps_loops[ 4] = 16;
- steps_loops[ 5] = 32;
- steps_loops[ 6] = 64;
- steps_loops[ 7] = 100;
- steps_loops[ 8] = 128;
- steps_loops[ 9] = 200;
- steps_loops[10] = 256;
- steps_loops[11] = 500;
- steps_loops[12] = 512;
- steps_loops[13] = 1000;
- steps_loops[14] = 1024;
-
- // steps for accel
-
- #define STEPS_ACCEL_CNT 13
+ steps_accel[STEPS_CNT + 0] = kernel_accel_min;
+ steps_accel[STEPS_CNT + 1] = kernel_accel_max;
- u32 steps_accel[STEPS_ACCEL_CNT];
+ steps_loops[STEPS_CNT + 0] = kernel_loops_min;
+ steps_loops[STEPS_CNT + 1] = kernel_loops_max;
- steps_accel[ 0] = 1;
- steps_accel[ 1] = 2;
- steps_accel[ 2] = 4;
- steps_accel[ 3] = 8;
- steps_accel[ 4] = 16;
- steps_accel[ 5] = 32;
- steps_accel[ 6] = 64;
- steps_accel[ 7] = 128;
- steps_accel[ 8] = 256;
- steps_accel[ 9] = 384;
- steps_accel[10] = 512;
- steps_accel[11] = 768;
- steps_accel[12] = 1024;
+ qsort (steps_accel, STEPS_ACCEL_CNT, sizeof (u32), sort_by_u32);
+ qsort (steps_loops, STEPS_LOOPS_CNT, sizeof (u32), sort_by_u32);
// find out highest kernel-loops that stays below target_ms, we can use it later for multiplication as this is a linear function
u32 kernel_loops_tmp;
- for (kernel_loops_tmp = MIN (kernel_loops_max, 200); kernel_loops_tmp >= kernel_loops_min; kernel_loops_tmp >>= 1)
+ for (kernel_loops_tmp = kernel_loops_max; kernel_loops_tmp > kernel_loops_min; kernel_loops_tmp >>= 1)
{
const double exec_ms = try_run (device_param, kernel_accel_min, kernel_loops_tmp, 1);
if (exec_ms < target_ms) break;
-
- if (kernel_loops_tmp == kernel_loops_min) break;
}
// kernel-accel
- double e_best = 0;
-
- for (int i = 0; i < STEPS_ACCEL_CNT; i++)
+ if (kernel_accel_min < kernel_accel_max)
{
- const u32 kernel_accel_try = steps_accel[i];
+ double e_best = 0;
- if (kernel_accel_try < kernel_accel_min) continue;
- if (kernel_accel_try > kernel_accel_max) break;
+ for (int i = 0; i < STEPS_ACCEL_CNT; i++)
+ {
+ const u32 kernel_accel_try = steps_accel[i];
- const double exec_ms = try_run (device_param, kernel_accel_try, kernel_loops_tmp, 1);
+ if (kernel_accel_try < kernel_accel_min) continue;
+ if (kernel_accel_try > kernel_accel_max) break;
- if (exec_ms > target_ms) break;
+ const double exec_ms = try_run (device_param, kernel_accel_try, kernel_loops_tmp, 1);
- const double e = kernel_accel_try / exec_ms;
+ if (exec_ms > target_ms) break;
- if (e > e_best)
- {
- kernel_accel = kernel_accel_try;
+ const double e = kernel_accel_try / exec_ms;
- e_best = e;
+ if (e > e_best)
+ {
+ kernel_accel = kernel_accel_try;
+
+ e_best = e;
+ }
}
}
// kernel-loops final
- e_best = 0;
-
- for (int i = 0; i < STEPS_LOOPS_CNT; i++)
+ if (kernel_loops_min < kernel_loops_max)
{
- const u32 kernel_loops_try = steps_loops[i];
+ double e_best = 0;
- if (kernel_loops_try < kernel_loops_min) continue;
- if (kernel_loops_try > kernel_loops_max) break;
+ for (int i = 0; i < STEPS_LOOPS_CNT; i++)
+ {
+ const u32 kernel_loops_try = steps_loops[i];
- const double exec_ms = try_run (device_param, kernel_accel, kernel_loops_try, 1);
+ if (kernel_loops_try < kernel_loops_min) continue;
+ if (kernel_loops_try > kernel_loops_max) break;
- if (exec_ms > target_ms) break;
+ const double exec_ms = try_run (device_param, kernel_accel, kernel_loops_try, 1);
- const double e = kernel_loops_try / exec_ms;
+ if (exec_ms > target_ms) break;
- if (e > e_best)
- {
- kernel_loops = kernel_loops_try;
+ const double e = kernel_loops_try / exec_ms;
+
+ if (e > e_best)
+ {
+ kernel_loops = kernel_loops_try;
- e_best = e;
+ e_best = e;
+ }
}
}
- // reset timer
+ // final balance
- device_param->exec_pos = 0;
+ const double exec_ms = try_run (device_param, kernel_accel, kernel_loops, 1);
- memset (device_param->exec_ms, 0, EXEC_CACHE * sizeof (double));
+ u32 kernel_accel_best = kernel_accel;
+ u32 kernel_loops_best = kernel_loops;
- // reset fake words
+ u32 exec_best = exec_ms;
- for (u32 i = 0; i < kernel_power_max; i++)
+ // reset
+
+ if (kernel_accel_min < kernel_accel_max)
{
- device_param->pws_buf[i].pw_len = 0;
+ u32 kernel_accel_try = kernel_accel;
+ u32 kernel_loops_try = kernel_loops;
+
+ for (int i = 0; i < 2; i++)
+ {
+ kernel_accel_try >>= 1;
+ kernel_loops_try <<= 1;
+
+ if (kernel_accel_try < kernel_accel_min) break;
+ if (kernel_loops_try > kernel_loops_max) break;
+
+ const double exec_ms = try_run (device_param, kernel_accel_try, kernel_loops_try, 1);
+
+ if (exec_ms < exec_best)
+ {
+ kernel_accel_best = kernel_accel_try;
+ kernel_loops_best = kernel_loops_try;
+
+ exec_best = exec_ms;
+ }
+ }
+
+ kernel_accel = kernel_accel_best;
+ kernel_loops = kernel_loops_best;
}
- hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, device_param->size_pws, device_param->pws_buf, 0, NULL, NULL);
- hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_pws_amp_buf, CL_TRUE, 0, device_param->size_pws, device_param->pws_buf, 0, NULL, NULL);
+ // reset
+
+
+ if (kernel_loops_min < kernel_loops_max)
+ {
+ u32 kernel_accel_try = kernel_accel;
+ u32 kernel_loops_try = kernel_loops;
+
+ for (int i = 0; i < 2; i++)
+ {
+ kernel_accel_try <<= 1;
+ kernel_loops_try >>= 1;
+
+ if (kernel_accel_try > kernel_accel_max) break;
+ if (kernel_loops_try < kernel_loops_min) break;
+
+ const double exec_ms = try_run (device_param, kernel_accel_try, kernel_loops_try, 1);
+
+ if (exec_ms < exec_best)
+ {
+ kernel_accel_best = kernel_accel_try;
+ kernel_loops_best = kernel_loops_try;
+
+ exec_best = exec_ms;
+ }
+ }
+
+ kernel_accel = kernel_accel_best;
+ kernel_loops = kernel_loops_best;
+ }
+
+ // reset timer
+
+ device_param->exec_pos = 0;
+
+ memset (device_param->exec_ms, 0, EXEC_CACHE * sizeof (double));
// store
hc_clEnqueueCopyBuffer (data.ocl, device_param->command_queue, device_param->d_combs, device_param->d_combs_c, 0, 0, innerloop_left * sizeof (comb_t), 0, NULL, NULL);
}
- if (data.attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
- {
- if (data.attack_mode == ATTACK_MODE_BF)
- {
- if (data.opts_type & OPTS_TYPE_PT_BITSLICE)
- {
- const uint size_tm = 32 * sizeof (bs_word_t);
-
- run_kernel_bzero (device_param, device_param->d_tm_c, size_tm);
-
- run_kernel_tm (device_param);
-
- hc_clEnqueueCopyBuffer (data.ocl, device_param->command_queue, device_param->d_tm_c, device_param->d_bfs_c, 0, 0, size_tm, 0, NULL, NULL);
- }
- }
+ choose_kernel (device_param, data.attack_exec, data.attack_mode, data.opts_type, salt_buf, highest_pw_len, pws_cnt);
- if (highest_pw_len < 16)
- {
- run_kernel (KERN_RUN_1, device_param, pws_cnt, true);
- }
- else if (highest_pw_len < 32)
- {
- run_kernel (KERN_RUN_2, device_param, pws_cnt, true);
- }
- else
- {
- run_kernel (KERN_RUN_3, device_param, pws_cnt, true);
- }
- }
- else
+ if (data.benchmark == 1)
{
- run_kernel_amp (device_param, pws_cnt);
-
- run_kernel (KERN_RUN_1, device_param, pws_cnt, false);
-
- if (data.opts_type & OPTS_TYPE_HOOK12)
- {
- run_kernel (KERN_RUN_12, device_param, pws_cnt, false);
- }
-
- uint iter = salt_buf->salt_iter;
-
- uint loop_step = device_param->kernel_loops;
-
- for (uint loop_pos = 0; loop_pos < iter; loop_pos += loop_step)
+ for (u32 i = 0; i < data.benchmark_repeats; i++)
{
- uint loop_left = iter - loop_pos;
-
- loop_left = MIN (loop_left, loop_step);
-
- device_param->kernel_params_buf32[25] = loop_pos;
- device_param->kernel_params_buf32[26] = loop_left;
-
- run_kernel (KERN_RUN_2, device_param, pws_cnt, true);
-
- if (data.devices_status == STATUS_CRACKED) break;
- if (data.devices_status == STATUS_ABORTED) break;
- if (data.devices_status == STATUS_QUIT) break;
+ choose_kernel (device_param, data.attack_exec, data.attack_mode, data.opts_type, salt_buf, highest_pw_len, pws_cnt);
}
-
- if (data.opts_type & OPTS_TYPE_HOOK23)
- {
- run_kernel (KERN_RUN_23, device_param, pws_cnt, false);
-
- hc_clEnqueueReadBuffer (data.ocl, device_param->command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
-
- // do something with data
-
- hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
- }
-
- run_kernel (KERN_RUN_3, device_param, pws_cnt, false);
}
if (data.devices_status == STATUS_STOP_AT_CHECKPOINT) check_checkpoint ();
u64 perf_sum_all = (u64) pw_cnt * (u64) innerloop_left;
+ if (data.benchmark == 1)
+ {
+ perf_sum_all = (perf_sum_all * data.benchmark_repeats) + perf_sum_all;
+ }
+
hc_thread_mutex_lock (mux_counter);
data.words_progress_done[salt_pos] += perf_sum_all;
{
speed_pos = 0;
}
+
+ /**
+ * benchmark
+ */
+
+ if (data.benchmark == 1) break;
}
}
if (device_param->skipped) return NULL;
- if ((device_param->kernel_accel == 0) && (device_param->kernel_loops == 0))
- {
- autotune (device_param);
- }
+ autotune (device_param);
const uint attack_kern = data.attack_kern;
}
}
+ device_param->kernel_accel = 0;
+ device_param->kernel_loops = 0;
+
return NULL;
}
if (device_param->skipped) return NULL;
- if ((device_param->kernel_accel == 0) && (device_param->kernel_loops == 0))
- {
- autotune (device_param);
- }
+ autotune (device_param);
const uint attack_mode = data.attack_mode;
const uint attack_kern = data.attack_kern;
if (data.devices_status == STATUS_QUIT) break;
if (data.devices_status == STATUS_BYPASS) break;
+ if (data.benchmark == 1) break;
+
device_param->words_done = words_fin;
}
}
fclose (fd);
}
+ device_param->kernel_accel = 0;
+ device_param->kernel_loops = 0;
+
return NULL;
}
uint version = VERSION;
uint quiet = QUIET;
uint benchmark = BENCHMARK;
+ uint benchmark_repeats = BENCHMARK_REPEATS;
uint show = SHOW;
uint left = LEFT;
uint username = USERNAME;
#define IDX_FORCE 0xff08
#define IDX_RUNTIME 0xff09
#define IDX_BENCHMARK 'b'
+ #define IDX_BENCHMARK_REPEATS 0xff78
#define IDX_HASH_MODE 'm'
#define IDX_ATTACK_MODE 'a'
#define IDX_RP_FILE 'r'
{"outfile-check-dir", required_argument, 0, IDX_OUTFILE_CHECK_DIR},
{"force", no_argument, 0, IDX_FORCE},
{"benchmark", no_argument, 0, IDX_BENCHMARK},
+ {"benchmark-repeats", required_argument, 0, IDX_BENCHMARK_REPEATS},
{"restore", no_argument, 0, IDX_RESTORE},
{"restore-disable", no_argument, 0, IDX_RESTORE_DISABLE},
{"status", no_argument, 0, IDX_STATUS},
case IDX_LIMIT: limit = atoll (optarg); break;
case IDX_KEYSPACE: keyspace = 1; break;
case IDX_BENCHMARK: benchmark = 1; break;
+ case IDX_BENCHMARK_REPEATS: benchmark_repeats = atoi (optarg); break;
case IDX_RESTORE: break;
case IDX_RESTORE_DISABLE: restore_disable = 1; break;
case IDX_STATUS: status = 1; break;
return (-1);
}
- if (hash_mode_chgd && hash_mode > 13000) // just added to remove compiler warnings for hash_mode_chgd
+ if (hash_mode_chgd && hash_mode > 13100) // just added to remove compiler warnings for hash_mode_chgd
{
log_error ("ERROR: Invalid hash-type specified");
return (-1);
}
- if (kernel_accel_chgd == 1 && kernel_loops_chgd == 0)
- {
- log_error ("ERROR: If kernel-accel is specified, kernel-loops need to be specified as well");
-
- return (-1);
- }
-
- if (kernel_loops_chgd == 1 && kernel_accel_chgd == 0)
- {
- log_error ("ERROR: If kernel-loops is specified, kernel-accel need to be specified as well");
-
- return (-1);
- }
-
if (kernel_accel_chgd == 1)
{
if (kernel_accel < 1)
return (-1);
}
- if ((opencl_vector_width != 0) && (opencl_vector_width != 1) && (opencl_vector_width != 2) && (opencl_vector_width != 4) && (opencl_vector_width != 8))
+ if (opencl_vector_width_chgd && (!is_power_of_2(opencl_vector_width) || opencl_vector_width > 16))
{
log_error ("ERROR: opencl-vector-width %i not allowed", opencl_vector_width);
data.rp_gen_seed = rp_gen_seed;
data.force = force;
data.benchmark = benchmark;
+ data.benchmark_repeats = benchmark_repeats;
data.skip = skip;
data.limit = limit;
#if defined(HAVE_HWMON) && defined(HAVE_ADL)
logfile_top_uint (attack_mode);
logfile_top_uint (attack_kern);
logfile_top_uint (benchmark);
+ logfile_top_uint (benchmark_repeats);
logfile_top_uint (bitmap_min);
logfile_top_uint (bitmap_max);
logfile_top_uint (debug_mode);
restore_disable = 1;
potfile_disable = 1;
weak_hash_threshold = 0;
+ gpu_temp_disable = 1;
data.status_timer = status_timer;
data.restore_timer = restore_timer;
data.workload_profile = workload_profile;
}
-
- if (runtime_chgd == 0)
- {
- runtime = 17;
-
- data.runtime = runtime;
- }
}
/**
dgst_pos3 = 3;
break;
+ case 13100: hash_type = HASH_TYPE_KRB5TGS;
+ salt_type = SALT_TYPE_EMBEDDED;
+ attack_exec = ATTACK_EXEC_INSIDE_KERNEL;
+ opts_type = OPTS_TYPE_PT_GENERATE_LE;
+ kern_type = KERN_TYPE_KRB5TGS;
+ dgst_size = DGST_SIZE_4_4;
+ parse_func = krb5tgs_parse_hash;
+ sort_by_digest = sort_by_digest_4_4;
+ opti_type = OPTI_TYPE_ZERO_BYTE
+ | OPTI_TYPE_NOT_ITERATED;
+ dgst_pos0 = 0;
+ dgst_pos1 = 1;
+ dgst_pos2 = 2;
+ dgst_pos3 = 3;
+ break;
+
default: usage_mini_print (PROGNAME); return (-1);
}
case 12000: esalt_size = sizeof (pbkdf2_sha1_t); break;
case 12100: esalt_size = sizeof (pbkdf2_sha512_t); break;
case 13000: esalt_size = sizeof (rar5_t); break;
+ case 13100: esalt_size = sizeof (krb5tgs_t); break;
}
data.esalt_size = esalt_size;
// tuning db
- tuning_db_entry_t *tuningdb_entry = tuning_db_search (tuning_db, device_param->device_name, attack_mode, hash_mode);
+ tuning_db_entry_t *tuningdb_entry = tuning_db_search (tuning_db, device_param, attack_mode, hash_mode);
// device_version
if (opencl_vector_width_chgd == 0)
{
- if (tuningdb_entry == NULL)
+ if (tuningdb_entry == NULL || tuningdb_entry->vector_width == -1)
{
if (opti_type & OPTI_TYPE_USES_BITS_64)
{
- hc_clGetDeviceInfo (data.ocl, device_param->device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, sizeof (vector_width), &vector_width, NULL);
+ hc_clGetDeviceInfo (data.ocl, device_param->device, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, sizeof (vector_width), &vector_width, NULL);
}
else
{
- hc_clGetDeviceInfo (data.ocl, device_param->device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof (vector_width), &vector_width, NULL);
+ hc_clGetDeviceInfo (data.ocl, device_param->device, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, sizeof (vector_width), &vector_width, NULL);
}
}
else
{
- if (tuningdb_entry->vector_width == -1)
- {
- if (opti_type & OPTI_TYPE_USES_BITS_64)
- {
- hc_clGetDeviceInfo (data.ocl, device_param->device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, sizeof (vector_width), &vector_width, NULL);
- }
- else
- {
- hc_clGetDeviceInfo (data.ocl, device_param->device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof (vector_width), &vector_width, NULL);
- }
- }
- else
- {
- vector_width = (cl_uint) tuningdb_entry->vector_width;
- }
+ vector_width = (cl_uint) tuningdb_entry->vector_width;
}
}
else
vector_width = opencl_vector_width;
}
- if (vector_width > 8) vector_width = 8;
+ if (vector_width > 16) vector_width = 16;
device_param->vector_width = vector_width;
* kernel accel and loops tuning db adjustment
*/
- uint _kernel_accel = kernel_accel;
- uint _kernel_loops = kernel_loops;
+ device_param->kernel_accel_min = 1;
+ device_param->kernel_accel_max = 1024;
+
+ device_param->kernel_loops_min = 1;
+ device_param->kernel_loops_max = 1024;
- tuning_db_entry_t *tuningdb_entry = tuning_db_search (tuning_db, device_param->device_name, attack_mode, hash_mode);
+ tuning_db_entry_t *tuningdb_entry = tuning_db_search (tuning_db, device_param, attack_mode, hash_mode);
- if (kernel_accel_chgd == 0)
+ if (tuningdb_entry)
{
- if (tuningdb_entry)
+ u32 _kernel_accel = tuningdb_entry->kernel_accel;
+ u32 _kernel_loops = tuningdb_entry->kernel_loops;
+
+ if (_kernel_accel)
{
- _kernel_accel = tuningdb_entry->kernel_accel;
+ device_param->kernel_accel_min = _kernel_accel;
+ device_param->kernel_accel_max = _kernel_accel;
}
- }
- if (kernel_loops_chgd == 0)
- {
- if (tuningdb_entry)
+ if (_kernel_loops)
{
- _kernel_loops = tuningdb_entry->kernel_loops;
-
if (workload_profile == 1)
{
_kernel_loops = (_kernel_loops > 8) ? _kernel_loops / 8 : 1;
{
_kernel_loops = (_kernel_loops > 4) ? _kernel_loops / 4 : 1;
}
+
+ device_param->kernel_loops_min = _kernel_loops;
+ device_param->kernel_loops_max = _kernel_loops;
}
}
- device_param->kernel_accel = _kernel_accel;
- device_param->kernel_loops = _kernel_loops;
+ // commandline parameters overwrite tuningdb entries
+
+ if (kernel_accel)
+ {
+ device_param->kernel_accel_min = kernel_accel;
+ device_param->kernel_accel_max = kernel_accel;
+ }
+
+ if (kernel_loops)
+ {
+ device_param->kernel_loops_min = kernel_loops;
+ device_param->kernel_loops_max = kernel_loops;
+ }
+
+ /**
+ * activate device
+ */
devices_active++;
}
#endif // HAVE_ADK
#endif // HAVE_HWMON
- #ifdef OSX
- if (hash_mode == 3000 || hash_mode == 1500 || hash_mode == 10700)
- {
- if (force == 0)
- {
- log_info ("");
- log_info ("Warning: Hash mode %d is not stable with OSX.", hash_mode);
- log_info ("You can use --force to override this but do not post error reports if you do so");
- log_info ("");
-
- continue;
- }
- }
- #endif
-
#ifdef DEBUG
if (benchmark == 1) log_info ("Hashmode: %d", data.hash_mode);
#endif
* some algorithms need a fixed kernel-loops count
*/
- u32 kernel_loops_min = 1;
- u32 kernel_loops_max = 1024;
-
if (hash_mode == 1500)
{
const u32 kernel_loops_fixed = 1024;
- kernel_loops_min = kernel_loops_fixed;
- kernel_loops_max = kernel_loops_fixed;
+ device_param->kernel_loops_min = kernel_loops_fixed;
+ device_param->kernel_loops_max = kernel_loops_fixed;
}
if (hash_mode == 3000)
{
const u32 kernel_loops_fixed = 1024;
- kernel_loops_min = kernel_loops_fixed;
- kernel_loops_max = kernel_loops_fixed;
+ device_param->kernel_loops_min = kernel_loops_fixed;
+ device_param->kernel_loops_max = kernel_loops_fixed;
}
if (hash_mode == 8900)
{
const u32 kernel_loops_fixed = 1;
- kernel_loops_min = kernel_loops_fixed;
- kernel_loops_max = kernel_loops_fixed;
+ device_param->kernel_loops_min = kernel_loops_fixed;
+ device_param->kernel_loops_max = kernel_loops_fixed;
}
if (hash_mode == 9300)
{
const u32 kernel_loops_fixed = 1;
- kernel_loops_min = kernel_loops_fixed;
- kernel_loops_max = kernel_loops_fixed;
+ device_param->kernel_loops_min = kernel_loops_fixed;
+ device_param->kernel_loops_max = kernel_loops_fixed;
}
if (hash_mode == 12500)
{
const u32 kernel_loops_fixed = ROUNDS_RAR3 / 16;
- kernel_loops_min = kernel_loops_fixed;
- kernel_loops_max = kernel_loops_fixed;
+ device_param->kernel_loops_min = kernel_loops_fixed;
+ device_param->kernel_loops_max = kernel_loops_fixed;
}
+ /**
+ * some algorithms have a maximum kernel-loops count
+ */
+
if (attack_exec == ATTACK_EXEC_OUTSIDE_KERNEL)
{
- if (data.salts_buf[0].salt_iter < kernel_loops_max)
+ if (data.salts_buf[0].salt_iter < device_param->kernel_loops_max)
{
- kernel_loops_max = data.salts_buf[0].salt_iter;
+ device_param->kernel_loops_max = data.salts_buf[0].salt_iter;
}
}
- device_param->kernel_loops_min = kernel_loops_min;
- device_param->kernel_loops_max = kernel_loops_max;
-
- // find out if we would request too much memory on memory blocks which are based on kernel_accel
-
- uint size_pws = 4;
- uint size_tmps = 4;
- uint size_hooks = 4;
-
- uint kernel_accel_min = 1;
- uint kernel_accel_max = 1024;
-
/**
* some algorithms need a special kernel-accel
*/
if (hash_mode == 8900)
{
- kernel_accel_max = 64;
+ device_param->kernel_accel_min = 1;
+ device_param->kernel_accel_max = 64;
}
if (hash_mode == 9300)
{
- kernel_accel_max = 64;
+ device_param->kernel_accel_min = 1;
+ device_param->kernel_accel_max = 64;
}
- while (kernel_accel_max)
+ u32 kernel_accel_min = device_param->kernel_accel_min;
+ u32 kernel_accel_max = device_param->kernel_accel_max;
+
+ // find out if we would request too much memory on memory blocks which are based on kernel_accel
+
+ uint size_pws = 4;
+ uint size_tmps = 4;
+ uint size_hooks = 4;
+
+ while (kernel_accel_max >= kernel_accel_min)
{
uint kernel_power_max = device_processors * kernel_threads * kernel_accel_max;
break;
}
+ /*
if (kernel_accel_max == 0)
{
log_error ("Device #%u: Device does not provide enough allocatable device-memory to handle hash-type %u", device_id + 1, data.hash_mode);
return -1;
}
+ */
device_param->kernel_accel_min = kernel_accel_min;
device_param->kernel_accel_max = kernel_accel_max;
+ /*
if (kernel_accel_max < kernel_accel)
{
if (quiet == 0) log_info ("Device #%u: Reduced maximum kernel-accel to %u", device_id + 1, kernel_accel_max);
device_param->kernel_accel = kernel_accel_max;
}
-
- const u32 kernel_accel = device_param->kernel_accel;
+ */
device_param->size_pws = size_pws;
device_param->size_tmps = size_tmps;
// do not confuse kernel_accel_max with kernel_accel here
- const u32 kernel_power = device_processors * kernel_threads * kernel_accel;
+ const u32 kernel_power = device_processors * kernel_threads * kernel_accel_max;
device_param->kernel_threads = kernel_threads;
device_param->kernel_power_user = kernel_power;