if ((kernel_loops_min < kernel_loops_max) && (kernel_accel_min < kernel_accel_max))
{
- for (u32 f = 2; f < 1024; f++)
+ u32 kernel_accel_orig = kernel_accel;
+ u32 kernel_loops_orig = kernel_loops;
+
+ for (u32 f = 1; f < 1024; f++)
{
- const u32 kernel_accel_try = kernel_accel * f;
- const u32 kernel_loops_try = kernel_loops / f;
+ const u32 kernel_accel_try = (float) kernel_accel_orig * f;
+ const u32 kernel_loops_try = (float) kernel_loops_orig / f;
if (kernel_accel_try > kernel_accel_max) break;
if (kernel_loops_try < kernel_loops_min) break;
if (diff_new > diff) break;
+ diff_new = diff;
+
double exec_ms = try_run (device_param, kernel_accel_try, kernel_loops_try);
for (int i = 0; i < VERIFIER_CNT; i++)