#define MAX_RETRIES 1
- double exec_ms_final = 0;
+ if ((kernel_loops_min == kernel_loops_max) || (kernel_accel_min == kernel_accel_max))
+ {
+ // we do this in case the user specified a fixed -u and -n on the commandline
+ // so we have a cached kernel for benchmark
+
+ try_run (device_param, kernel_accel, kernel_loops);
+ try_run (device_param, kernel_accel, kernel_loops);
+ try_run (device_param, kernel_accel, kernel_loops);
+ try_run (device_param, kernel_accel, kernel_loops);
+ try_run (device_param, kernel_accel, kernel_loops);
+ }
+
+ double exec_ms_final = try_run (device_param, kernel_accel, kernel_loops);
// first find out highest kernel-loops that stays below target_ms
// sometimes we're in a bad situation that the algorithm is so slow that we can not
// create enough kernel_accel to do both, keep the gpu busy and stay below target_ms.
- // however, we need to have a minimum kernel_accel of 64.
+ // however, we need to have a minimum kernel_accel and kernel_loops of 32.
// luckily, at this level of workload, it became a linear function
- while (kernel_accel < 64)
+ while (kernel_accel < 32 && kernel_loops >= 32)
{
const u32 kernel_accel_try = kernel_accel * 2;
const u32 kernel_loops_try = kernel_loops / 2;
* some algorithms have a maximum kernel-loops count
*/
+ /*
if (attack_exec == ATTACK_EXEC_OUTSIDE_KERNEL)
{
if (data.salts_buf[0].salt_iter < device_param->kernel_loops_max)
device_param->kernel_loops_max = data.salts_buf[0].salt_iter;
}
}
+ */
/**
* some algorithms need a special kernel-accel