#define MARKOV_DISABLE 0
#define MARKOV_CLASSIC 0
#define BENCHMARK 0
-#define BENCHMARK_REPEATS 2
#define RESTORE 0
#define RESTORE_TIMER 60
#define RESTORE_DISABLE 0
#define KERNEL_RULES 1024
#define KERNEL_COMBS 1024
#define KERNEL_BFS 1024
-#define KERNEL_THREADS 64
+#define KERNEL_THREADS_MAX 256
+#define KERNEL_THREADS_MAX_CPU 16
#define POWERTUNE_ENABLE 0
#define LOGFILE_DISABLE 0
#define SCRYPT_TMTO 0
#define MAX_DICTSTAT 10000
-#define NUM_DEFAULT_BENCHMARK_ALGORITHMS 134
+#define NUM_DEFAULT_BENCHMARK_ALGORITHMS 136
#define global_free(attr) \
{ \
8700,
9100,
133,
+ 13500,
11600,
12500,
13000,
8200,
11300,
12700,
- 13400
+ 13400,
+ 125
};
/**
" --remove Enable remove of hash once it is cracked",
" --remove-timer=NUM Update input hash file each NUM seconds",
" --potfile-disable Do not write potfile",
+ " --potfile-path Specific path to potfile",
" --debug-mode=NUM Defines the debug mode (hybrid only by using rules), see references below",
" --debug-file=FILE Output file for debugging rules (see also --debug-mode)",
" --induction-dir=FOLDER Specify induction directory to use, default is $session.induct",
" 8500 = RACF",
" 7200 = GRUB 2",
" 9900 = Radmin2",
+ " 125 = ArubaOS",
"",
"[[ Enterprise Application Software (EAS) ]]",
"",
" 8700 = Lotus Notes/Domino 6",
" 9100 = Lotus Notes/Domino 8",
" 133 = PeopleSoft",
+ " 13500 = PeopleSoft Token",
"",
"[[ Archives ]]",
"",
if (device_param->skipped) continue;
- u64 speed_cnt = 0;
- float speed_ms = 0;
+ u64 speed_cnt = 0;
+ double speed_ms = 0;
for (int i = 0; i < SPEED_CACHE; i++)
{
- float rec_ms;
-
- hc_timer_get (device_param->speed_rec[i], rec_ms);
-
- if (rec_ms > SPEED_MAXAGE) continue;
-
speed_cnt += device_param->speed_cnt[i];
speed_ms += device_param->speed_ms[i];
}
{
wpa_t *wpa = (wpa_t *) data.esalts_buf;
- uint pke[25] = { 0 };
-
- char *pke_ptr = (char *) pke;
-
- for (uint i = 0; i < 25; i++)
- {
- pke[i] = byte_swap_32 (wpa->pke[i]);
- }
-
- char mac1[6] = { 0 };
- char mac2[6] = { 0 };
-
- memcpy (mac1, pke_ptr + 23, 6);
- memcpy (mac2, pke_ptr + 29, 6);
-
log_info ("Hash.Target....: %s (%02x:%02x:%02x:%02x:%02x:%02x <-> %02x:%02x:%02x:%02x:%02x:%02x)",
(char *) data.salts_buf[0].salt_buf,
- mac1[0] & 0xff,
- mac1[1] & 0xff,
- mac1[2] & 0xff,
- mac1[3] & 0xff,
- mac1[4] & 0xff,
- mac1[5] & 0xff,
- mac2[0] & 0xff,
- mac2[1] & 0xff,
- mac2[2] & 0xff,
- mac2[3] & 0xff,
- mac2[4] & 0xff,
- mac2[5] & 0xff);
+ wpa->orig_mac1[0],
+ wpa->orig_mac1[1],
+ wpa->orig_mac1[2],
+ wpa->orig_mac1[3],
+ wpa->orig_mac1[4],
+ wpa->orig_mac1[5],
+ wpa->orig_mac2[0],
+ wpa->orig_mac2[1],
+ wpa->orig_mac2[2],
+ wpa->orig_mac2[3],
+ wpa->orig_mac2[4],
+ wpa->orig_mac2[5]);
}
else if (data.hash_mode == 5200)
{
* speed new
*/
- u64 speed_cnt[DEVICES_MAX] = { 0 };
- float speed_ms[DEVICES_MAX] = { 0 };
+ u64 speed_cnt[DEVICES_MAX] = { 0 };
+ double speed_ms[DEVICES_MAX] = { 0 };
for (uint device_id = 0; device_id < data.devices_cnt; device_id++)
{
if (device_param->skipped) continue;
- // we need to clear values (set to 0) because in case the device does
- // not get new candidates it idles around but speed display would
- // show it as working.
- // if we instantly set it to 0 after reading it happens that the
- // speed can be shown as zero if the users refreshes too fast.
- // therefore, we add a timestamp when a stat was recorded and if its
- // too old we will not use it
-
speed_cnt[device_id] = 0;
speed_ms[device_id] = 0;
for (int i = 0; i < SPEED_CACHE; i++)
{
- float rec_ms;
-
- hc_timer_get (device_param->speed_rec[i], rec_ms);
-
- if (rec_ms > SPEED_MAXAGE) continue;
-
speed_cnt[device_id] += device_param->speed_cnt[i];
speed_ms[device_id] += device_param->speed_ms[i];
}
* timers
*/
- float ms_running = 0;
+ double ms_running = 0;
hc_timer_get (data.timer_running, ms_running);
- float ms_paused = data.ms_paused;
+ double ms_paused = data.ms_paused;
if (data.devices_status == STATUS_PAUSED)
{
- float ms_paused_tmp = 0;
+ double ms_paused_tmp = 0;
hc_timer_get (data.timer_paused, ms_paused_tmp);
}
}
- float ms_real = ms_running - ms_paused;
+ double ms_real = ms_running - ms_paused;
float cpt_avg_min = (float) data.cpt_total / ((ms_real / 1000) / 60);
float cpt_avg_hour = (float) data.cpt_total / ((ms_real / 1000) / 3600);
static void status_benchmark ()
{
- if (data.devices_status == STATUS_INIT) return;
+ if (data.devices_status == STATUS_INIT) return;
if (data.devices_status == STATUS_STARTING) return;
if (data.words_cnt == 0) return;
- u64 speed_cnt[DEVICES_MAX] = { 0 };
- float speed_ms[DEVICES_MAX] = { 0 };
+ u64 speed_cnt[DEVICES_MAX] = { 0 };
+ double speed_ms[DEVICES_MAX] = { 0 };
for (uint device_id = 0; device_id < data.devices_cnt; device_id++)
{
if (device_param->skipped) continue;
- speed_cnt[device_id] = 0;
- speed_ms[device_id] = 0;
-
- for (int i = 0; i < SPEED_CACHE; i++)
- {
- speed_cnt[device_id] += device_param->speed_cnt[i];
- speed_ms[device_id] += device_param->speed_ms[i];
- }
-
- speed_cnt[device_id] /= SPEED_CACHE;
- speed_ms[device_id] /= SPEED_CACHE;
+ speed_cnt[device_id] = device_param->speed_cnt[0];
+ speed_ms[device_id] = device_param->speed_ms[0];
}
float hashes_all_ms = 0;
hc_clEnqueueReadBuffer (data.ocl, device_param->command_queue, device_param->d_result, CL_TRUE, 0, device_param->size_results, device_param->result, 0, NULL, NULL);
- for (uint i = 0; i < KERNEL_THREADS; i++) if (device_param->result[i] == 1) found = 1;
+ for (uint i = 0; i < device_param->kernel_threads; i++) if (device_param->result[i] == 1) found = 1;
if (found == 1)
{
hc_clGetKernelWorkGroupInfo (data.ocl, kernel, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &workgroup_size, NULL);
+ if (kern_run == KERN_RUN_2)
+ {
+ if (data.opti_type & OPTI_TYPE_SLOW_HASH_SIMD)
+ {
+ num_elements = CEIL ((float) num_elements / device_param->vector_width);
+ }
+ }
+
if (kernel_threads > workgroup_size) kernel_threads = workgroup_size;
+ while (num_elements % kernel_threads) num_elements++;
+
const size_t global_work_size[3] = { num_elements, 1, 1 };
const size_t local_work_size[3] = { kernel_threads, 1, 1 };
if (event_update)
{
- float exec_time;
+ double exec_time;
hc_timer_get (timer, exec_time);
// causes problems with special threads like in bcrypt
// const uint kernel_threads = device_param->kernel_threads;
- uint kernel_threads = KERNEL_THREADS;
+ uint kernel_threads = device_param->kernel_threads;
while (num_elements % kernel_threads) num_elements++;
}
size_t workgroup_size = 0;
+
hc_clGetKernelWorkGroupInfo (data.ocl, kernel, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &workgroup_size, NULL);
+
if (kernel_threads > workgroup_size) kernel_threads = workgroup_size;
- const size_t global_work_size[3] = { num_elements, 1, 1 };
+ const size_t global_work_size[3] = { num_elements, 1, 1 };
const size_t local_work_size[3] = { kernel_threads, 1, 1 };
hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
cl_kernel kernel = device_param->kernel_tm;
size_t workgroup_size = 0;
+
hc_clGetKernelWorkGroupInfo (data.ocl, kernel, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &workgroup_size, NULL);
+
if (kernel_threads > workgroup_size) kernel_threads = workgroup_size;
- const size_t global_work_size[3] = { num_elements, 1, 1 };
+ const size_t global_work_size[3] = { num_elements, 1, 1 };
const size_t local_work_size[3] = { kernel_threads, 1, 1 };
hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
// causes problems with special threads like in bcrypt
// const uint kernel_threads = device_param->kernel_threads;
- uint kernel_threads = KERNEL_THREADS;
+ uint kernel_threads = device_param->kernel_threads;
while (num_elements % kernel_threads) num_elements++;
hc_clSetKernelArg (data.ocl, kernel, 6, sizeof (cl_uint), device_param->kernel_params_amp[6]);
size_t workgroup_size = 0;
+
hc_clGetKernelWorkGroupInfo (data.ocl, kernel, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &workgroup_size, NULL);
+
if (kernel_threads > workgroup_size) kernel_threads = workgroup_size;
- const size_t global_work_size[3] = { num_elements, 1, 1 };
+ const size_t global_work_size[3] = { num_elements, 1, 1 };
const size_t local_work_size[3] = { kernel_threads, 1, 1 };
hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
hc_clFinish (data.ocl, device_param->command_queue);
}
-static void run_kernel_bzero (hc_device_param_t *device_param, cl_mem buf, const uint size)
+static void run_kernel_bzero (hc_device_param_t *device_param, cl_mem buf, const size_t size)
{
int rc = -1;
if (rc != 0)
{
// NOTE: clEnqueueFillBuffer () always fails with -59
- // IOW, it's not supported by Nvidia ForceWare <= 352.21, also pocl segfaults, also on apple
+ // IOW, it's not supported by Nvidia drivers <= 352.21, also pocl segfaults, also on apple
// How's that possible, OpenCL 1.2 support is advertised??
// We need to workaround...
char *tmp = (char *) mymalloc (FILLSZ);
- for (uint i = 0; i < size; i += FILLSZ)
+ for (size_t i = 0; i < size; i += FILLSZ)
{
- const int left = size - i;
+ const size_t left = size - i;
- const int fillsz = MIN (FILLSZ, left);
+ const size_t fillsz = MIN (FILLSZ, left);
hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, buf, CL_TRUE, i, fillsz, tmp, 0, NULL, NULL);
}
if (data.devices_status == STATUS_CRACKED) break;
if (data.devices_status == STATUS_ABORTED) break;
if (data.devices_status == STATUS_QUIT) break;
+
+ /**
+ * speed
+ */
+
+ const float iter_part = (float) (loop_pos + loop_left) / iter;
+
+ const u64 perf_sum_all = pws_cnt * iter_part;
+
+ double speed_ms;
+
+ hc_timer_get (device_param->timer_speed, speed_ms);
+
+ const u32 speed_pos = device_param->speed_pos;
+
+ device_param->speed_cnt[speed_pos] = perf_sum_all;
+
+ device_param->speed_ms[speed_pos] = speed_ms;
}
if (opts_type & OPTS_TYPE_HOOK23)
}
}
-static double try_run (hc_device_param_t *device_param, const u32 kernel_accel, const u32 kernel_loops, const int repeat)
+static double try_run (hc_device_param_t *device_param, const u32 kernel_accel, const u32 kernel_loops)
{
const u32 kernel_power = device_param->device_processors * device_param->kernel_threads * kernel_accel;
- device_param->kernel_params_buf32[26] = kernel_loops;
- device_param->kernel_params_buf32[27] = kernel_loops;
+ device_param->kernel_params_buf32[25] = 0;
+ device_param->kernel_params_buf32[26] = kernel_loops; // not a bug, both need to be set
+ device_param->kernel_params_buf32[27] = kernel_loops; // because there's two variables for inner iters for slow and fast hashes
// init some fake words
- for (u32 i = 0; i < kernel_power; i++)
+ if (data.hash_mode == 10700)
{
- device_param->pws_buf[i].i[0] = i;
- device_param->pws_buf[i].i[1] = 0x01234567;
- device_param->pws_buf[i].pw_len = 4 + (i & 3);
- }
+ // hash mode 10700 hangs on length 0 (unlimited loop)
- hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, kernel_power * sizeof (pw_t), device_param->pws_buf, 0, NULL, NULL);
+ for (u32 i = 0; i < kernel_power; i++)
+ {
+ device_param->pws_buf[i].i[0] = i;
+ device_param->pws_buf[i].i[1] = i + 0x01234567;
+ device_param->pws_buf[i].i[2] = i + 0x89abcdef;
+ device_param->pws_buf[i].i[3] = 0xffffffff;
+ device_param->pws_buf[i].pw_len = 4 + (i & 3);
+ }
- if (data.attack_exec == ATTACK_EXEC_OUTSIDE_KERNEL)
- {
- run_kernel_amp (device_param, kernel_power);
- }
+ hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, kernel_power * sizeof (pw_t), device_param->pws_buf, 0, NULL, NULL);
- // caching run
+ if (data.attack_exec == ATTACK_EXEC_OUTSIDE_KERNEL)
+ {
+ run_kernel_amp (device_param, kernel_power);
+ }
+ }
if (data.attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
{
- run_kernel (KERN_RUN_1, device_param, kernel_power, false);
+ run_kernel (KERN_RUN_1, device_param, kernel_power, true);
}
else
{
- run_kernel (KERN_RUN_2, device_param, kernel_power, false);
+ run_kernel (KERN_RUN_2, device_param, kernel_power, true);
}
- // now user repeats
-
- for (int i = 0; i < repeat; i++)
- {
- if (data.attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
- {
- run_kernel (KERN_RUN_1, device_param, kernel_power, true);
- }
- else
- {
- run_kernel (KERN_RUN_2, device_param, kernel_power, true);
- }
- }
-
- const double exec_ms_prev = get_avg_exec_time (device_param, repeat);
+ const double exec_ms_prev = get_avg_exec_time (device_param, 1);
// reset fake words
- memset (device_param->pws_buf, 0, kernel_power * sizeof (pw_t));
+ if (data.hash_mode == 10700)
+ {
+ memset (device_param->pws_buf, 0, kernel_power * sizeof (pw_t));
- hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, kernel_power * sizeof (pw_t), device_param->pws_buf, 0, NULL, NULL);
- hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_pws_amp_buf, CL_TRUE, 0, kernel_power * sizeof (pw_t), device_param->pws_buf, 0, NULL, NULL);
+ hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, kernel_power * sizeof (pw_t), device_param->pws_buf, 0, NULL, NULL);
+ hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_pws_amp_buf, CL_TRUE, 0, kernel_power * sizeof (pw_t), device_param->pws_buf, 0, NULL, NULL);
+ }
return exec_ms_prev;
}
u32 kernel_accel = kernel_accel_min;
u32 kernel_loops = kernel_loops_min;
- // steps
-
#define STEPS_CNT 10
- #define STEPS_ACCEL_CNT (STEPS_CNT + 2)
- #define STEPS_LOOPS_CNT (STEPS_CNT + 2)
-
- u32 steps_accel[STEPS_ACCEL_CNT];
- u32 steps_loops[STEPS_LOOPS_CNT];
-
- for (int i = 0; i < STEPS_ACCEL_CNT; i++)
- {
- steps_accel[i] = 1 << i;
- }
+ #define MAX_RETRIES 1
- for (int i = 0; i < STEPS_LOOPS_CNT; i++)
+ if ((kernel_loops_min == kernel_loops_max) || (kernel_accel_min == kernel_accel_max))
{
- steps_loops[i] = 1 << i;
+ // we do this in case the user specified a fixed -u and -n on the commandline
+ // so we have a cached kernel for benchmark
+
+ try_run (device_param, kernel_accel, kernel_loops);
+ try_run (device_param, kernel_accel, kernel_loops);
+ try_run (device_param, kernel_accel, kernel_loops);
+ try_run (device_param, kernel_accel, kernel_loops);
+ try_run (device_param, kernel_accel, kernel_loops);
}
- steps_accel[STEPS_CNT + 0] = kernel_accel_min;
- steps_accel[STEPS_CNT + 1] = kernel_accel_max;
+ double exec_ms_final = try_run (device_param, kernel_accel, kernel_loops);
- steps_loops[STEPS_CNT + 0] = kernel_loops_min;
- steps_loops[STEPS_CNT + 1] = kernel_loops_max;
+ // first find out highest kernel-loops that stays below target_ms
- qsort (steps_accel, STEPS_ACCEL_CNT, sizeof (u32), sort_by_u32);
- qsort (steps_loops, STEPS_LOOPS_CNT, sizeof (u32), sort_by_u32);
+ for (kernel_loops = kernel_loops_max; kernel_loops > kernel_loops_min; kernel_loops >>= 1)
+ {
+ double exec_ms_best = try_run (device_param, kernel_accel_min, kernel_loops);
- // find out highest kernel-loops that stays below target_ms, we can use it later for multiplication as this is a linear function
+ for (int i = 0; i < MAX_RETRIES; i++)
+ {
+ const double exec_ms_cur = try_run (device_param, kernel_accel_min, kernel_loops);
- u32 kernel_loops_tmp;
+ exec_ms_best = MIN (exec_ms_best, exec_ms_cur);
+ }
- for (kernel_loops_tmp = kernel_loops_max; kernel_loops_tmp > kernel_loops_min; kernel_loops_tmp >>= 1)
- {
- const double exec_ms = try_run (device_param, kernel_accel_min, kernel_loops_tmp, 1);
+ if (exec_ms_final == 0) exec_ms_final = exec_ms_best;
- if (exec_ms < target_ms) break;
+ if (exec_ms_best < target_ms) break;
}
- // kernel-accel
+ // now the same for kernel-accel but with the new kernel-loops from previous loop set
if (kernel_accel_min < kernel_accel_max)
{
- double e_best = 0;
-
- for (int i = 0; i < STEPS_ACCEL_CNT; i++)
+ for (int i = 0; i < STEPS_CNT; i++)
{
- const u32 kernel_accel_try = steps_accel[i];
+ const u32 kernel_accel_try = 1 << i;
if (kernel_accel_try < kernel_accel_min) continue;
if (kernel_accel_try > kernel_accel_max) break;
- const double exec_ms = try_run (device_param, kernel_accel_try, kernel_loops_tmp, 1);
+ double exec_ms_best = try_run (device_param, kernel_accel_try, kernel_loops);
- if (exec_ms > target_ms) break;
-
- const double e = kernel_accel_try / exec_ms;
-
- if (e > e_best)
+ for (int i = 0; i < MAX_RETRIES; i++)
{
- kernel_accel = kernel_accel_try;
+ const double exec_ms_cur = try_run (device_param, kernel_accel_try, kernel_loops);
- e_best = e;
+ exec_ms_best = MIN (exec_ms_best, exec_ms_cur);
}
- }
- }
-
- // kernel-loops final
- if (kernel_loops_min < kernel_loops_max)
- {
- double e_best = 0;
-
- for (int i = 0; i < STEPS_LOOPS_CNT; i++)
- {
- const u32 kernel_loops_try = steps_loops[i];
-
- if (kernel_loops_try < kernel_loops_min) continue;
- if (kernel_loops_try > kernel_loops_max) break;
-
- const double exec_ms = try_run (device_param, kernel_accel, kernel_loops_try, 1);
+ if (exec_ms_best > target_ms) break;
- if (exec_ms > target_ms) break;
+ exec_ms_final = exec_ms_best;
- const double e = kernel_loops_try / exec_ms;
-
- if (e > e_best)
- {
- kernel_loops = kernel_loops_try;
-
- e_best = e;
- }
+ kernel_accel = kernel_accel_try;
}
}
- // final balance
-
- u32 kernel_accel_best = kernel_accel;
- u32 kernel_loops_best = kernel_loops;
-
- u32 exec_best = -1;
-
- if ((kernel_accel_min < kernel_accel_max) || (kernel_loops_min < kernel_loops_max))
- {
- const double exec_ms = try_run (device_param, kernel_accel_best, kernel_loops_best, 1);
-
- exec_best = exec_ms;
- }
-
- // reset
+ // sometimes we're in a bad situation that the algorithm is so slow that we can not
+ // create enough kernel_accel to do both, keep the gpu busy and stay below target_ms.
+ // however, we need to have a minimum kernel_accel and kernel_loops of 32.
+ // luckily, at this level of workload, it became a linear function
- if (kernel_accel_min < kernel_accel_max)
+ while (kernel_accel < 32 && kernel_loops >= 32)
{
- u32 kernel_accel_try = kernel_accel;
- u32 kernel_loops_try = kernel_loops;
-
- for (int i = 0; i < 2; i++)
- {
- kernel_accel_try >>= 1;
- kernel_loops_try <<= 1;
-
- if (kernel_accel_try < kernel_accel_min) break;
- if (kernel_loops_try > kernel_loops_max) break;
+ const u32 kernel_accel_try = kernel_accel * 2;
+ const u32 kernel_loops_try = kernel_loops / 2;
- const double exec_ms = try_run (device_param, kernel_accel_try, kernel_loops_try, 1);
+ if (kernel_accel_try > kernel_accel_max) break;
+ if (kernel_loops_try < kernel_loops_min) break;
- if (exec_ms < exec_best)
- {
- kernel_accel_best = kernel_accel_try;
- kernel_loops_best = kernel_loops_try;
-
- exec_best = exec_ms;
- }
- }
+ kernel_accel = kernel_accel_try;
+ kernel_loops = kernel_loops_try;
}
- // reset
+ // finally there's a chance that we have a fixed kernel_loops but not a fixed kernel_accel
+ // in such a case the above function would not create any change
+ // we'll use the runtime to find out if we're allow to do last improvement
- if (kernel_loops_min < kernel_loops_max)
+ if (exec_ms_final > 0)
{
- u32 kernel_accel_try = kernel_accel;
- u32 kernel_loops_try = kernel_loops;
-
- for (int i = 0; i < 2; i++)
+ if (exec_ms_final < target_ms)
{
- kernel_accel_try <<= 1;
- kernel_loops_try >>= 1;
+ const double exec_left = target_ms / exec_ms_final;
- if (kernel_accel_try > kernel_accel_max) break;
- if (kernel_loops_try < kernel_loops_min) break;
+ const double accel_left = kernel_accel_max / kernel_accel;
- const double exec_ms = try_run (device_param, kernel_accel_try, kernel_loops_try, 1);
+ const double exec_accel_min = MIN (exec_left, accel_left);
- if (exec_ms < exec_best)
+ if (exec_accel_min >= 2)
{
- kernel_accel_best = kernel_accel_try;
- kernel_loops_best = kernel_loops_try;
-
- exec_best = exec_ms;
+ kernel_accel *= exec_accel_min;
}
}
}
// store
- kernel_accel = kernel_accel_best;
- kernel_loops = kernel_loops_best;
-
device_param->kernel_accel = kernel_accel;
device_param->kernel_loops = kernel_loops;
log_info ("Device #%u: autotuned kernel-accel to %u\n"
"Device #%u: autotuned kernel-loops to %u\n",
- device_param->device_id + 1,
- kernel_accel,
- device_param->device_id + 1,
- kernel_loops);
+ device_param->device_id + 1, kernel_accel,
+ device_param->device_id + 1, kernel_loops);
fprintf (stdout, "%s", PROMPT);
+
fflush (stdout);
}
hc_clEnqueueCopyBuffer (data.ocl, device_param->command_queue, device_param->d_combs, device_param->d_combs_c, 0, 0, innerloop_left * sizeof (comb_t), 0, NULL, NULL);
}
- choose_kernel (device_param, data.attack_exec, data.attack_mode, data.opts_type, salt_buf, highest_pw_len, pws_cnt);
-
if (data.benchmark == 1)
{
- for (u32 i = 0; i < data.benchmark_repeats; i++)
- {
- choose_kernel (device_param, data.attack_exec, data.attack_mode, data.opts_type, salt_buf, highest_pw_len, pws_cnt);
- }
+ hc_timer_set (&device_param->timer_speed);
}
+ choose_kernel (device_param, data.attack_exec, data.attack_mode, data.opts_type, salt_buf, highest_pw_len, pws_cnt);
+
if (data.devices_status == STATUS_STOP_AT_CHECKPOINT) check_checkpoint ();
if (data.devices_status == STATUS_CRACKED) break;
u64 perf_sum_all = (u64) pws_cnt * (u64) innerloop_left;
- if (data.benchmark == 1)
- {
- perf_sum_all = (perf_sum_all * data.benchmark_repeats) + perf_sum_all;
- }
-
hc_thread_mutex_lock (mux_counter);
data.words_progress_done[salt_pos] += perf_sum_all;
* speed
*/
- float speed_ms;
+ double speed_ms;
hc_timer_get (device_param->timer_speed, speed_ms);
hc_thread_mutex_lock (mux_display);
+ // current speed
+
device_param->speed_cnt[speed_pos] = perf_sum_all;
device_param->speed_ms[speed_pos] = speed_ms;
- device_param->speed_rec[speed_pos] = device_param->timer_speed;
-
hc_thread_mutex_unlock (mux_display);
speed_pos++;
wpa_t *wpas = (wpa_t *) data.esalts_buf;
wpa_t *wpa = &wpas[salt_pos];
- uint pke[25] = { 0 };
-
- char *pke_ptr = (char *) pke;
-
- for (uint i = 0; i < 25; i++)
- {
- pke[i] = byte_swap_32 (wpa->pke[i]);
- }
-
- u8 mac1[6] = { 0 };
- u8 mac2[6] = { 0 };
-
- memcpy (mac1, pke_ptr + 23, 6);
- memcpy (mac2, pke_ptr + 29, 6);
-
// compare hex string(s) vs binary MAC address(es)
for (uint i = 0, j = 0; i < 6; i++, j += 2)
{
- if (mac1[i] != hex_to_u8 ((const u8 *) &mac1_pos[j]))
+ if (wpa->orig_mac1[i] != hex_to_u8 ((const u8 *) &mac1_pos[j]))
{
cracked = 0;
+
break;
}
}
for (uint i = 0, j = 0; i < 6; i++, j += 2)
{
- if (mac2[i] != hex_to_u8 ((const u8 *) &mac2_pos[j]))
+ if (wpa->orig_mac2[i] != hex_to_u8 ((const u8 *) &mac2_pos[j]))
{
cracked = 0;
+
break;
}
}
if (getenv ("GPU_USE_SYNC_OBJECTS") == NULL)
putenv ((char *) "GPU_USE_SYNC_OBJECTS=1");
+ if (getenv ("CUDA_CACHE_DISABLE") == NULL)
+ putenv ((char *) "CUDA_CACHE_DISABLE=1");
+
+ if (getenv ("POCL_KERNEL_CACHE") == NULL)
+ putenv ((char *) "POCL_KERNEL_CACHE=0");
+
/**
* Real init
*/
uint version = VERSION;
uint quiet = QUIET;
uint benchmark = BENCHMARK;
- uint benchmark_repeats = BENCHMARK_REPEATS;
uint show = SHOW;
uint left = LEFT;
uint username = USERNAME;
u64 limit = LIMIT;
uint keyspace = KEYSPACE;
uint potfile_disable = POTFILE_DISABLE;
+ char *potfile_path = NULL;
uint debug_mode = DEBUG_MODE;
char *debug_file = NULL;
char *induction_dir = NULL;
#define IDX_LIMIT 'l'
#define IDX_KEYSPACE 0xff35
#define IDX_POTFILE_DISABLE 0xff06
+ #define IDX_POTFILE_PATH 0xffe0
#define IDX_DEBUG_MODE 0xff43
#define IDX_DEBUG_FILE 0xff44
#define IDX_INDUCTION_DIR 0xff46
#define IDX_FORCE 0xff08
#define IDX_RUNTIME 0xff09
#define IDX_BENCHMARK 'b'
- #define IDX_BENCHMARK_REPEATS 0xff78
#define IDX_HASH_MODE 'm'
#define IDX_ATTACK_MODE 'a'
#define IDX_RP_FILE 'r'
{"limit", required_argument, 0, IDX_LIMIT},
{"keyspace", no_argument, 0, IDX_KEYSPACE},
{"potfile-disable", no_argument, 0, IDX_POTFILE_DISABLE},
+ {"potfile-path", required_argument, 0, IDX_POTFILE_PATH},
{"debug-mode", required_argument, 0, IDX_DEBUG_MODE},
{"debug-file", required_argument, 0, IDX_DEBUG_FILE},
{"induction-dir", required_argument, 0, IDX_INDUCTION_DIR},
{"outfile-check-dir", required_argument, 0, IDX_OUTFILE_CHECK_DIR},
{"force", no_argument, 0, IDX_FORCE},
{"benchmark", no_argument, 0, IDX_BENCHMARK},
- {"benchmark-repeats", required_argument, 0, IDX_BENCHMARK_REPEATS},
{"restore", no_argument, 0, IDX_RESTORE},
{"restore-disable", no_argument, 0, IDX_RESTORE_DISABLE},
{"status", no_argument, 0, IDX_STATUS},
case IDX_REMOVE_TIMER: remove_timer = atoi (optarg);
remove_timer_chgd = 1; break;
case IDX_POTFILE_DISABLE: potfile_disable = 1; break;
+ case IDX_POTFILE_PATH: potfile_path = optarg; break;
case IDX_DEBUG_MODE: debug_mode = atoi (optarg); break;
case IDX_DEBUG_FILE: debug_file = optarg; break;
case IDX_INDUCTION_DIR: induction_dir = optarg; break;
case IDX_LIMIT: limit = atoll (optarg); break;
case IDX_KEYSPACE: keyspace = 1; break;
case IDX_BENCHMARK: benchmark = 1; break;
- case IDX_BENCHMARK_REPEATS: benchmark_repeats = atoi (optarg); break;
case IDX_RESTORE: break;
case IDX_RESTORE_DISABLE: restore_disable = 1; break;
case IDX_STATUS: status = 1; break;
return (-1);
}
- if (hash_mode_chgd && hash_mode > 13400) // just added to remove compiler warnings for hash_mode_chgd
+ if (hash_mode_chgd && hash_mode > 13500) // just added to remove compiler warnings for hash_mode_chgd
{
log_error ("ERROR: Invalid hash-type specified");
if (loopback == 1)
{
- if (attack_mode == ATTACK_MODE_BF)
- {
- log_error ("ERROR: Parameter loopback not allowed in attack-mode 3");
-
- return (-1);
- }
- else if (attack_mode == ATTACK_MODE_STRAIGHT)
+ if (attack_mode == ATTACK_MODE_STRAIGHT)
{
if ((rp_files_cnt == 0) && (rp_gen == 0))
{
return (-1);
}
}
+ else
+ {
+ log_error ("ERROR: Parameter loopback allowed in attack-mode 0 only");
+
+ return (-1);
+ }
}
if (debug_mode > 0)
data.rp_gen_seed = rp_gen_seed;
data.force = force;
data.benchmark = benchmark;
- data.benchmark_repeats = benchmark_repeats;
data.skip = skip;
data.limit = limit;
#if defined(HAVE_HWMON) && defined(HAVE_ADL)
logfile_top_uint (attack_mode);
logfile_top_uint (attack_kern);
logfile_top_uint (benchmark);
- logfile_top_uint (benchmark_repeats);
logfile_top_uint (bitmap_min);
logfile_top_uint (bitmap_max);
logfile_top_uint (debug_mode);
logfile_top_uint (outfile_check_timer);
logfile_top_uint (outfile_format);
logfile_top_uint (potfile_disable);
+ logfile_top_string (potfile_path);
#if defined(HAVE_HWMON) && defined(HAVE_ADL)
logfile_top_uint (powertune_enable);
#endif
dgst_pos3 = 1;
break;
+ case 125: hash_type = HASH_TYPE_SHA1;
+ salt_type = SALT_TYPE_EMBEDDED;
+ attack_exec = ATTACK_EXEC_INSIDE_KERNEL;
+ opts_type = OPTS_TYPE_PT_GENERATE_BE
+ | OPTS_TYPE_PT_ADD80
+ | OPTS_TYPE_PT_ADDBITS15
+ | OPTS_TYPE_ST_HEX;
+ kern_type = KERN_TYPE_SHA1_SLTPW;
+ dgst_size = DGST_SIZE_4_5;
+ parse_func = arubaos_parse_hash;
+ sort_by_digest = sort_by_digest_4_5;
+ opti_type = OPTI_TYPE_ZERO_BYTE
+ | OPTI_TYPE_PRECOMPUTE_INIT
+ | OPTI_TYPE_PRECOMPUTE_MERKLE
+ | OPTI_TYPE_EARLY_SKIP
+ | OPTI_TYPE_NOT_ITERATED
+ | OPTI_TYPE_PREPENDED_SALT
+ | OPTI_TYPE_RAW_HASH;
+ dgst_pos0 = 3;
+ dgst_pos1 = 4;
+ dgst_pos2 = 2;
+ dgst_pos3 = 1;
+ break;
+
case 130: hash_type = HASH_TYPE_SHA1;
salt_type = SALT_TYPE_INTERN;
attack_exec = ATTACK_EXEC_INSIDE_KERNEL;
dgst_size = DGST_SIZE_4_4;
parse_func = phpass_parse_hash;
sort_by_digest = sort_by_digest_4_4;
- opti_type = OPTI_TYPE_ZERO_BYTE;
+ opti_type = OPTI_TYPE_ZERO_BYTE
+ | OPTI_TYPE_SLOW_HASH_SIMD;
dgst_pos0 = 0;
dgst_pos1 = 1;
dgst_pos2 = 2;
case 8300: hash_type = HASH_TYPE_SHA1;
salt_type = SALT_TYPE_EMBEDDED;
attack_exec = ATTACK_EXEC_INSIDE_KERNEL;
- opts_type = OPTS_TYPE_PT_GENERATE_LE
+ opts_type = OPTS_TYPE_PT_GENERATE_BE
| OPTS_TYPE_ST_HEX
| OPTS_TYPE_ST_ADD80;
kern_type = KERN_TYPE_NSEC3;
dgst_pos3 = 3;
break;
+ case 13500: hash_type = HASH_TYPE_SHA1;
+ salt_type = SALT_TYPE_EMBEDDED;
+ attack_exec = ATTACK_EXEC_INSIDE_KERNEL;
+ opts_type = OPTS_TYPE_PT_GENERATE_BE
+ | OPTS_TYPE_PT_UNICODE
+ | OPTS_TYPE_PT_ADD80;
+ kern_type = KERN_TYPE_PSTOKEN;
+ dgst_size = DGST_SIZE_4_5;
+ parse_func = pstoken_parse_hash;
+ sort_by_digest = sort_by_digest_4_5;
+ opti_type = OPTI_TYPE_ZERO_BYTE
+ | OPTI_TYPE_PRECOMPUTE_INIT
+ | OPTI_TYPE_EARLY_SKIP
+ | OPTI_TYPE_NOT_ITERATED
+ | OPTI_TYPE_PREPENDED_SALT
+ | OPTI_TYPE_RAW_HASH;
+ dgst_pos0 = 3;
+ dgst_pos1 = 4;
+ dgst_pos2 = 2;
+ dgst_pos3 = 1;
+ break;
+
default: usage_mini_print (PROGNAME); return (-1);
}
case 13000: esalt_size = sizeof (rar5_t); break;
case 13100: esalt_size = sizeof (krb5tgs_t); break;
case 13400: esalt_size = sizeof (keepass_t); break;
+ case 13500: esalt_size = sizeof (pstoken_t); break;
}
data.esalt_size = esalt_size;
if (keyspace == 0)
{
- snprintf (dictstat, sizeof (dictstat) - 1, "%s/hashcat.dictstat", profile_dir);
+ snprintf (dictstat, sizeof (dictstat) - 1, "%s/%s", profile_dir, DICTSTAT_FILENAME);
dictstat_fp = fopen (dictstat, "rb");
char potfile[256] = { 0 };
- snprintf (potfile, sizeof (potfile) - 1, "%s/%s.pot", session_dir, session);
+ if (potfile_path == NULL)
+ {
+ snprintf (potfile, sizeof (potfile) - 1, "%s/%s", profile_dir, POTFILE_FILENAME);
+ }
+ else
+ {
+ strncpy (potfile, potfile_path, sizeof (potfile) - 1);
+ }
data.pot_fp = NULL;
switch (hash_mode)
{
+ case 125: if (pw_max > 32) pw_max = 32;
+ break;
case 400: if (pw_max > 40) pw_max = 40;
break;
case 500: if (pw_max > 16) pw_max = 16;
wpa_t *wpa = (wpa_t *) hashes_buf[hashes_cnt].esalt;
- u8 *pke_ptr = (u8 *) wpa->pke;
-
// do the appending task
snprintf (salt_ptr + cur_pos,
rem_len,
":%02x%02x%02x%02x%02x%02x:%02x%02x%02x%02x%02x%02x",
- pke_ptr[20], pke_ptr[27], pke_ptr[26], pke_ptr[25], pke_ptr[24], pke_ptr[31], // MAC1
- pke_ptr[30], pke_ptr[29], pke_ptr[28], pke_ptr[35], pke_ptr[34], pke_ptr[33]); // MAC2
-
+ wpa->orig_mac1[0],
+ wpa->orig_mac1[1],
+ wpa->orig_mac1[2],
+ wpa->orig_mac1[3],
+ wpa->orig_mac1[4],
+ wpa->orig_mac1[5],
+ wpa->orig_mac2[0],
+ wpa->orig_mac2[1],
+ wpa->orig_mac2[2],
+ wpa->orig_mac2[3],
+ wpa->orig_mac2[4],
+ wpa->orig_mac2[5]);
// memset () the remaining part of the salt
switch (hash_mode)
{
- case 1500: hashes_buf[0].salt->salt_len = 2;
+ case 1500: hashes_buf[0].salt->salt_len = 2;
+ hashes_buf[0].salt->salt_buf[0] = 388; // pure magic
break;
case 1731: hashes_buf[0].salt->salt_len = 4;
break;
break;
case 13400: ((keepass_t *) hashes_buf[0].esalt)->version = 2;
break;
+ case 13500: ((pstoken_t *) hashes_buf[0].esalt)->salt_len = 113;
+ break;
}
}
{
wpa_t *wpa = (wpa_t *) found->esalt;
- uint pke[25] = { 0 };
-
- char *pke_ptr = (char *) pke;
-
- for (uint i = 0; i < 25; i++)
- {
- pke[i] = byte_swap_32 (wpa->pke[i]);
- }
-
- u8 mac1[6] = { 0 };
- u8 mac2[6] = { 0 };
-
- memcpy (mac1, pke_ptr + 23, 6);
- memcpy (mac2, pke_ptr + 29, 6);
-
// compare hex string(s) vs binary MAC address(es)
for (uint i = 0, j = 0; i < 6; i++, j += 2)
{
- if (mac1[i] != hex_to_u8 ((const u8 *) &mac1_pos[j]))
+ if (wpa->orig_mac1[i] != hex_to_u8 ((const u8 *) &mac1_pos[j]))
{
found = NULL;
+
break;
}
}
for (uint i = 0, j = 0; i < 6; i++, j += 2)
{
- if (mac2[i] != hex_to_u8 ((const u8 *) &mac2_pos[j]))
+ if (wpa->orig_mac2[i] != hex_to_u8 ((const u8 *) &mac2_pos[j]))
{
found = NULL;
+
break;
}
}
uint digests_cnt = hashes_cnt;
uint digests_done = 0;
- uint size_digests = digests_cnt * dgst_size;
- uint size_shown = digests_cnt * sizeof (uint);
+ size_t size_digests = digests_cnt * dgst_size;
+ size_t size_shown = digests_cnt * sizeof (uint);
uint *digests_shown = (uint *) mymalloc (size_shown);
uint *digests_shown_tmp = (uint *) mymalloc (size_shown);
hc_clGetDeviceIDs (data.ocl, platform, CL_DEVICE_TYPE_ALL, DEVICES_MAX, platform_devices, &platform_devices_cnt);
+ char platform_vendor[INFOSZ] = { 0 };
+
+ hc_clGetPlatformInfo (data.ocl, platform, CL_PLATFORM_VENDOR, sizeof (platform_vendor), platform_vendor, NULL);
+
+ // find our own platform vendor because pocl and mesa are pushing original vendor_id through opencl
+ // this causes trouble with vendor id based macros
+ // we'll assign generic to those without special optimization available
+
+ cl_uint vendor_id = 0;
+
+ if (strcmp (platform_vendor, CL_VENDOR_AMD) == 0)
+ {
+ vendor_id = VENDOR_ID_AMD;
+ }
+ else if (strcmp (platform_vendor, CL_VENDOR_APPLE) == 0)
+ {
+ vendor_id = VENDOR_ID_GENERIC;
+ }
+ else if (strcmp (platform_vendor, CL_VENDOR_INTEL_BEIGNET) == 0)
+ {
+ vendor_id = VENDOR_ID_GENERIC;
+ }
+ else if (strcmp (platform_vendor, CL_VENDOR_INTEL_SDK) == 0)
+ {
+ vendor_id = VENDOR_ID_GENERIC;
+ }
+ else if (strcmp (platform_vendor, CL_VENDOR_MESA) == 0)
+ {
+ vendor_id = VENDOR_ID_GENERIC;
+ }
+ else if (strcmp (platform_vendor, CL_VENDOR_NV) == 0)
+ {
+ vendor_id = VENDOR_ID_NV;
+ }
+ else if (strcmp (platform_vendor, CL_VENDOR_POCL) == 0)
+ {
+ vendor_id = VENDOR_ID_GENERIC;
+ }
+ else
+ {
+ vendor_id = VENDOR_ID_GENERIC;
+ }
+
for (uint platform_devices_id = 0; platform_devices_id < platform_devices_cnt; platform_devices_id++)
{
size_t param_value_size = 0;
hc_device_param_t *device_param = &data.devices_param[device_id];
+ device_param->vendor_id = vendor_id;
+
device_param->device = platform_devices[platform_devices_id];
device_param->device_id = device_id;
device_param->device_type = device_type;
- // vendor_id
-
- cl_uint vendor_id = 0;
-
- hc_clGetDeviceInfo (data.ocl, device_param->device, CL_DEVICE_VENDOR_ID, sizeof (vendor_id), &vendor_id, NULL);
-
- device_param->vendor_id = vendor_id;
-
// device_name
hc_clGetDeviceInfo (data.ocl, device_param->device, CL_DEVICE_NAME, 0, NULL, ¶m_value_size);
myfree (device_opencl_version);
- if (strstr (device_version, "pocl"))
- {
- // pocl returns the real vendor_id in CL_DEVICE_VENDOR_ID which causes many problems because of hms and missing amd_bfe () etc
- // we need to overwrite vendor_id to avoid this. maybe open pocl issue?
-
- cl_uint vendor_id = VENDOR_ID_GENERIC;
-
- device_param->vendor_id = vendor_id;
- }
-
// vector_width
cl_uint vector_width;
device_param->device_processors = device_processors;
- // max_mem_alloc_size
+ // device_maxmem_alloc
+ // note we'll limit to 2gb, otherwise this causes all kinds of weird errors because of possible integer overflows in opencl runtimes
cl_ulong device_maxmem_alloc;
hc_clGetDeviceInfo (data.ocl, device_param->device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof (device_maxmem_alloc), &device_maxmem_alloc, NULL);
- device_param->device_maxmem_alloc = device_maxmem_alloc;
+ device_param->device_maxmem_alloc = MIN (device_maxmem_alloc, 0x7fffffff);
- // max_mem_alloc_size
+ // device_global_mem
cl_ulong device_global_mem;
device_param->device_global_mem = device_global_mem;
+ // max_work_group_size
+
+ size_t device_maxworkgroup_size;
+
+ hc_clGetDeviceInfo (data.ocl, device_param->device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof (device_maxworkgroup_size), &device_maxworkgroup_size, NULL);
+
+ device_param->device_maxworkgroup_size = device_maxworkgroup_size;
+
// max_clock_frequency
cl_uint device_maxclock_frequency;
if (device_param->skipped == 0)
{
- if (strstr (device_version, "pocl"))
- {
- if (force == 0)
- {
- log_info ("");
- log_info ("ATTENTION! All pocl drivers are known to be broken due to broken LLVM <= 3.7");
- log_info ("You are STRONGLY encouraged not to use it");
- log_info ("You can use --force to override this but do not post error reports if you do so");
- log_info ("");
-
- return (-1);
- }
- }
-
if (device_type & CL_DEVICE_TYPE_GPU)
{
- if (vendor_id == VENDOR_ID_NV)
- {
- if (device_param->kernel_exec_timeout != 0)
- {
- if (data.quiet == 0) log_info ("Device #%u: WARNING! Kernel exec timeout is not disabled, it might cause you errors of code 702", device_id + 1);
- if (data.quiet == 0) log_info (" See the wiki on how to disable it: https://hashcat.net/wiki/doku.php?id=timeout_patch");
- }
- }
- else if (vendor_id == VENDOR_ID_AMD)
+ if (vendor_id == VENDOR_ID_AMD)
{
int catalyst_check = (force == 1) ? 0 : 1;
log_info ("You can use --force to override this but do not post error reports if you do so");
log_info ("");
+ return (-1);
+ }
+ }
+ else if (vendor_id == VENDOR_ID_NV)
+ {
+ if (device_param->kernel_exec_timeout != 0)
+ {
+ if (data.quiet == 0) log_info ("Device #%u: WARNING! Kernel exec timeout is not disabled, it might cause you errors of code 702", device_id + 1);
+ if (data.quiet == 0) log_info (" See the wiki on how to disable it: https://hashcat.net/wiki/doku.php?id=timeout_patch");
+ }
+ }
+ }
+
+ if (device_type & CL_DEVICE_TYPE_CPU)
+ {
+ if (vendor_id == VENDOR_ID_AMD)
+ {
+ if (force == 0)
+ {
+ log_info ("");
+ log_info ("ATTENTION! OpenCL support for CPU of catalyst driver is not reliable.");
+ log_info ("You are STRONGLY encouraged not to use it");
+ log_info ("You can use --force to override this but do not post error reports if you do so");
+ log_info ("A good alternative is the free pocl, but make sure to use a version >= 3.8");
+ log_info ("");
+
return (-1);
}
}
device_param->command_queue = hc_clCreateCommandQueue (data.ocl, device_param->context, device_param->device, CL_QUEUE_PROFILING_ENABLE);
+ /**
+ * kernel threads: some algorithms need a fixed kernel-threads count
+ * because of shared memory usage or bitslice
+ * there needs to be some upper limit, otherwise there's too much overhead
+ */
+
+ uint kernel_threads = MIN (KERNEL_THREADS_MAX, device_param->device_maxworkgroup_size);
+
+ if (device_param->device_type & CL_DEVICE_TYPE_CPU)
+ {
+ kernel_threads = KERNEL_THREADS_MAX_CPU;
+ }
+
+ if (hash_mode == 1500) kernel_threads = 64; // DES
+ if (hash_mode == 3000) kernel_threads = 64; // DES
+ if (hash_mode == 3200) kernel_threads = 8; // Blowfish
+ if (hash_mode == 7500) kernel_threads = 64; // RC4
+ if (hash_mode == 9000) kernel_threads = 8; // Blowfish
+ if (hash_mode == 9700) kernel_threads = 64; // RC4
+ if (hash_mode == 9710) kernel_threads = 64; // RC4
+ if (hash_mode == 9800) kernel_threads = 64; // RC4
+ if (hash_mode == 9810) kernel_threads = 64; // RC4
+ if (hash_mode == 10400) kernel_threads = 64; // RC4
+ if (hash_mode == 10410) kernel_threads = 64; // RC4
+ if (hash_mode == 10500) kernel_threads = 64; // RC4
+ if (hash_mode == 13100) kernel_threads = 64; // RC4
+
/**
* create input buffers on device : calculate size of fixed memory buffers
*/
- uint size_root_css = SP_PW_MAX * sizeof (cs_t);
- uint size_markov_css = SP_PW_MAX * CHARSIZ * sizeof (cs_t);
+ size_t size_root_css = SP_PW_MAX * sizeof (cs_t);
+ size_t size_markov_css = SP_PW_MAX * CHARSIZ * sizeof (cs_t);
device_param->size_root_css = size_root_css;
device_param->size_markov_css = size_markov_css;
- uint size_results = KERNEL_THREADS * sizeof (uint);
+ size_t size_results = kernel_threads * sizeof (uint);
device_param->size_results = size_results;
- uint size_rules = kernel_rules_cnt * sizeof (kernel_rule_t);
- uint size_rules_c = KERNEL_RULES * sizeof (kernel_rule_t);
+ size_t size_rules = kernel_rules_cnt * sizeof (kernel_rule_t);
+ size_t size_rules_c = KERNEL_RULES * sizeof (kernel_rule_t);
- uint size_plains = digests_cnt * sizeof (plain_t);
- uint size_salts = salts_cnt * sizeof (salt_t);
- uint size_esalts = salts_cnt * esalt_size;
+ size_t size_plains = digests_cnt * sizeof (plain_t);
+ size_t size_salts = salts_cnt * sizeof (salt_t);
+ size_t size_esalts = salts_cnt * esalt_size;
device_param->size_plains = size_plains;
device_param->size_digests = size_digests;
device_param->size_shown = size_shown;
device_param->size_salts = size_salts;
- uint size_combs = KERNEL_COMBS * sizeof (comb_t);
- uint size_bfs = KERNEL_BFS * sizeof (bf_t);
- uint size_tm = 32 * sizeof (bs_word_t);
+ size_t size_combs = KERNEL_COMBS * sizeof (comb_t);
+ size_t size_bfs = KERNEL_BFS * sizeof (bf_t);
+ size_t size_tm = 32 * sizeof (bs_word_t);
// scryptV stuff
- u64 size_scryptV = 1;
+ size_t size_scryptV = 1;
if ((hash_mode == 8900) || (hash_mode == 9300))
{
}
else if (device_param->vendor_id == VENDOR_ID_NV)
{
- tmto_start = 3;
+ tmto_start = 2;
}
}
else if (hash_mode == 9300)
{
if (device_param->vendor_id == VENDOR_ID_AMD)
{
- tmto_start = 3;
+ tmto_start = 2;
}
else if (device_param->vendor_id == VENDOR_ID_NV)
{
- tmto_start = 5;
+ tmto_start = 2;
}
}
}
if (quiet == 0) log_info ("");
- uint shader_per_mp = 1;
-
- if (device_param->vendor_id == VENDOR_ID_AMD)
- {
- shader_per_mp = 8;
- }
- else if (device_param->vendor_id == VENDOR_ID_NV)
- {
- shader_per_mp = 32;
- }
-
for (uint tmto = tmto_start; tmto < tmto_stop; tmto++)
{
// TODO: in theory the following calculation needs to be done per salt, not global
size_scryptV /= 1 << tmto;
- size_scryptV *= device_processors * device_processor_cores * shader_per_mp;
+ size_scryptV *= device_processors * device_processor_cores;
if (size_scryptV > device_param->device_maxmem_alloc)
{
for (uint salts_pos = 0; salts_pos < data.salts_cnt; salts_pos++)
{
data.salts_buf[salts_pos].scrypt_tmto = tmto;
- data.salts_buf[salts_pos].scrypt_phy = device_processors * device_processor_cores * shader_per_mp;
+ data.salts_buf[salts_pos].scrypt_phy = device_processors * device_processor_cores;
}
break;
if (quiet == 0) log_info ("SCRYPT tmto optimizer value set to: %u, mem: %u\n", data.salts_buf[0].scrypt_tmto, size_scryptV);
}
- /**
- * create input buffers on device : calculate size of dynamic size memory buffers
- */
-
- uint kernel_threads = KERNEL_THREADS;
-
- // some algorithms need a fixed kernel-threads count (mostly because of shared memory usage)
-
- if (hash_mode == 3200) kernel_threads = 8;
- if (hash_mode == 9000) kernel_threads = 8;
-
/**
* some algorithms need a fixed kernel-loops count
*/
* some algorithms have a maximum kernel-loops count
*/
+ /*
if (attack_exec == ATTACK_EXEC_OUTSIDE_KERNEL)
{
if (data.salts_buf[0].salt_iter < device_param->kernel_loops_max)
device_param->kernel_loops_max = data.salts_buf[0].salt_iter;
}
}
+ */
/**
* some algorithms need a special kernel-accel
// find out if we would request too much memory on memory blocks which are based on kernel_accel
- uint size_pws = 4;
- uint size_tmps = 4;
- uint size_hooks = 4;
+ size_t size_pws = 4;
+ size_t size_tmps = 4;
+ size_t size_hooks = 4;
while (kernel_accel_max >= kernel_accel_min)
{
- uint kernel_power_max = device_processors * kernel_threads * kernel_accel_max;
+ const u32 kernel_power_max = device_processors * kernel_threads * kernel_accel_max;
// size_pws
+ size_markov_css
+ size_plains
+ size_pws
+ + size_pws // not a bug
+ size_results
+ size_root_css
+ size_rules
// we don't have sm_* on vendors not NV but it doesn't matter
- snprintf (build_opts, sizeof (build_opts) - 1, "-I\"%s/\" -DVENDOR_ID=%u -DCUDA_ARCH=%d -DVECT_SIZE=%u -DDEVICE_TYPE=%u", shared_dir, device_param->vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->vector_width, (u32) device_param->device_type);
+ snprintf (build_opts, sizeof (build_opts) - 1, "-cl-std=CL1.1 -I\"%s/\" -DVENDOR_ID=%u -DCUDA_ARCH=%d -DVECT_SIZE=%u -DDEVICE_TYPE=%u", shared_dir, device_param->vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->vector_width, (u32) device_param->device_type);
/**
* main kernel
if (data.quiet == 0) log_info ("");
/**
- * Inform user which algorithm is checked and at which workload setting
+ * In benchmark-mode, inform user which algorithm is checked
*/
if (benchmark == 1)
device_param->speed_pos = 0;
memset (device_param->speed_cnt, 0, SPEED_CACHE * sizeof (u64));
- memset (device_param->speed_ms, 0, SPEED_CACHE * sizeof (float));
- memset (device_param->speed_rec, 0, SPEED_CACHE * sizeof (hc_timer_t));
+ memset (device_param->speed_ms, 0, SPEED_CACHE * sizeof (double));
device_param->exec_pos = 0;