hc_clFlush (data.ocl, device_param->command_queue);
+ if (data.devices_status == STATUS_RUNNING)
+ {
+ switch (kern_run)
+ {
+ case KERN_RUN_1: usleep (device_param->exec_us_prev1); break;
+ case KERN_RUN_2: usleep (device_param->exec_us_prev2); break;
+ case KERN_RUN_3: usleep (device_param->exec_us_prev3); break;
+ }
+ }
+
hc_clWaitForEvents (data.ocl, 1, &event);
- if (event_update)
- {
- cl_ulong time_start;
- cl_ulong time_end;
+ cl_ulong time_start;
+ cl_ulong time_end;
- hc_clGetEventProfilingInfo (data.ocl, event, CL_PROFILING_COMMAND_START, sizeof (time_start), &time_start, NULL);
- hc_clGetEventProfilingInfo (data.ocl, event, CL_PROFILING_COMMAND_END, sizeof (time_end), &time_end, NULL);
+ hc_clGetEventProfilingInfo (data.ocl, event, CL_PROFILING_COMMAND_START, sizeof (time_start), &time_start, NULL);
+ hc_clGetEventProfilingInfo (data.ocl, event, CL_PROFILING_COMMAND_END, sizeof (time_end), &time_end, NULL);
- const double exec_time = (double) (time_end - time_start) / 1000000.0;
+ const double exec_us = (double) (time_end - time_start) / 1000;
+ if (data.devices_status == STATUS_RUNNING)
+ {
+ switch (kern_run)
+ {
+ case KERN_RUN_1: device_param->exec_us_prev1 = exec_us; break;
+ case KERN_RUN_2: device_param->exec_us_prev2 = exec_us; break;
+ case KERN_RUN_3: device_param->exec_us_prev3 = exec_us; break;
+ }
+ }
+
+ if (event_update)
+ {
uint exec_pos = device_param->exec_pos;
- device_param->exec_ms[exec_pos] = exec_time;
+ device_param->exec_ms[exec_pos] = exec_us / 1000;
exec_pos++;
memset (device_param->exec_ms, 0, EXEC_CACHE * sizeof (double));
+ device_param->exec_us_prev1 = 0;
+ device_param->exec_us_prev2 = 0;
+ device_param->exec_us_prev3 = 0;
+
// store
device_param->kernel_accel = kernel_accel;
* main
*/
-#ifdef _WIN
+#ifdef WIN
void SetConsoleWindowSize (const int x)
{
HANDLE h = GetStdHandle (STD_OUTPUT_HANDLE);
}
#endif
-#ifdef _POSIX
-int (*clock_gettime_orig) (clockid_t clk_id, struct timespec *tp);
-
-int clock_gettime (clockid_t clk_id, struct timespec *tp)
-{
- int r = clock_gettime_orig (clk_id, tp);
-
- usleep (NVIDIA_100PERCENTCPU_WORKAROUND);
-
- return r;
-}
-#endif
-
int main (int argc, char **argv)
{
- #ifdef _POSIX
- clock_gettime_orig = dlsym (RTLD_NEXT, "clock_gettime");
- #endif
-
- #ifdef _WIN
+ #ifdef WIN
SetConsoleWindowSize (132);
#endif