From: jsteube <jens.steube@gmail.com>
Date: Wed, 23 Dec 2015 14:51:55 +0000 (+0100)
Subject: - Fixed PHY memory handling for scrypt based algorithms
X-Git-Tag: v3.00-beta~584^2~28
X-Git-Url: https://www.flypig.org.uk/git/?a=commitdiff_plain;h=c17bf5e865307fbc723430590bb06c3f5b7554c0;p=hashcat.git

- Fixed PHY memory handling for scrypt based algorithms
- Bring back kernel exec timeout checking for NV
---

diff --git a/include/ext_OpenCL.h b/include/ext_OpenCL.h
index 937d31f..d86b124 100644
--- a/include/ext_OpenCL.h
+++ b/include/ext_OpenCL.h
@@ -49,5 +49,6 @@ void hc_clSetKernelArg (cl_kernel kernel, cl_uint arg_index, size_t arg_size, co
 void *hc_clEnqueueMapBuffer (cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_map, cl_map_flags map_flags, size_t offset, size_t cb, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event);
 void hc_clEnqueueUnmapMemObject (cl_command_queue command_queue, cl_mem memobj, void *mapped_ptr, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event);
 void hc_clEnqueueFillBuffer (cl_command_queue command_queue, cl_mem buffer, const void *pattern, size_t pattern_size, size_t offset, size_t size, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event);
+void hc_clGetKernelWorkGroupInfo (cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret);
 
 #endif
diff --git a/include/types.h b/include/types.h
index 78798fd..55873eb 100644
--- a/include/types.h
+++ b/include/types.h
@@ -816,8 +816,10 @@ struct __hc_device_param
 
   uint              sm_major;
   uint              sm_minor;
+  uint              kernel_exec_timeout;
 
   uint              gpu_processors;
+  uint              gpu_processor_cores;
   uint              gpu_threads;
   uint              gpu_accel;
   uint64_t          gpu_maxmem_alloc;
diff --git a/src/ext_OpenCL.c b/src/ext_OpenCL.c
index 79a090f..dae654e 100644
--- a/src/ext_OpenCL.c
+++ b/src/ext_OpenCL.c
@@ -376,3 +376,15 @@ void hc_clEnqueueFillBuffer (cl_command_queue command_queue, cl_mem buffer, cons
     exit (-1);
   }
 }
+
+void hc_clGetKernelWorkGroupInfo (cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
+{
+  cl_int CL_err = clGetKernelWorkGroupInfo (kernel, device, param_name, param_value_size, param_value, param_value_size_ret);
+
+  if (CL_err != CL_SUCCESS)
+  {
+    log_error ("ERROR: %s %d\n", "clGetKernelWorkGroupInfo()", CL_err);
+
+    exit (-1);
+  }
+}
diff --git a/src/oclHashcat.c b/src/oclHashcat.c
index cb02675..b212d2a 100644
--- a/src/oclHashcat.c
+++ b/src/oclHashcat.c
@@ -12636,8 +12636,35 @@ int main (int argc, char **argv)
 
       device_param->driver_version = mystrdup (tmp);
 
+      if (vendor_id == VENDOR_ID_AMD)
+      {
+        cl_uint gpu_processor_cores = 0;
+
+        #define CL_DEVICE_WAVEFRONT_WIDTH_AMD               0x4043
+
+        hc_clGetDeviceInfo (device, CL_DEVICE_WAVEFRONT_WIDTH_AMD, sizeof (gpu_processor_cores), &gpu_processor_cores, NULL);
+
+        device_param->gpu_processor_cores = gpu_processor_cores;
+      }
+
       if (vendor_id == VENDOR_ID_NV)
       {
+        cl_uint kernel_exec_timeout = 0;
+
+        #define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV            0x4005
+
+        hc_clGetDeviceInfo (device, CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, sizeof (kernel_exec_timeout), &kernel_exec_timeout, NULL);
+
+        device_param->kernel_exec_timeout = kernel_exec_timeout;
+
+        cl_uint gpu_processor_cores = 0;
+
+        #define CL_DEVICE_WARP_SIZE_NV                      0x4003
+
+        hc_clGetDeviceInfo (device, CL_DEVICE_WARP_SIZE_NV, sizeof (gpu_processor_cores), &gpu_processor_cores, NULL);
+
+        device_param->gpu_processor_cores = gpu_processor_cores;
+
         cl_uint sm_minor = 0;
         cl_uint sm_major = 0;
 
@@ -12652,9 +12679,21 @@ int main (int argc, char **argv)
       }
 
       /**
-       * catalyst driver check
+       * common driver check
        */
 
+      if (vendor_id == VENDOR_ID_NV)
+      {
+        if (device_param->kernel_exec_timeout != 0)
+        {
+          if (data.quiet == 0) log_info ("Device #%u: WARNING! Kernel exec timeout is not disabled, it might cause you errors of code 702", device_id + 1);
+
+          #if _WIN
+          if (data.quiet == 0) log_info ("           You can disable it with a regpatch, see here: http://hashcat.net/wiki/doku.php?id=timeout_patch");
+          #endif
+        }
+      }
+
       if (vendor_id == VENDOR_ID_AMD)
       {
         int catalyst_check = (force == 1) ? 0 : 1;
@@ -12674,43 +12713,6 @@ int main (int argc, char **argv)
             catalyst_warn = 0;
           }
 
-          /*
-          // v14.9
-          if ((strstr (device_param->device_version, "1573.") != NULL)
-           && (strstr (device_param->driver_version, "1573.") != NULL))
-          {
-            catalyst_warn = 0;
-          }
-
-          // v14.12 -- version overlaps with v15.4 beta
-          if ((strstr (device_param->device_version, "1642.") != NULL)
-           && (strstr (device_param->driver_version, "1642.") != NULL))
-          {
-            catalyst_broken = 1;
-          }
-
-          // v15.4 (Beta, Windows only release)
-          if ((strstr (device_param->device_version, "1642.") != NULL)
-           && (strstr (device_param->driver_version, "1642.") != NULL))
-          {
-            catalyst_warn = 0;
-          }
-
-          // v15.5 (Release, Linux)
-          if ((strstr (device_param->device_version, "1702.") != NULL)
-           && (strstr (device_param->driver_version, "1702.") != NULL))
-          {
-            catalyst_warn = 0;
-          }
-
-          // v15.3 (Beta, Ubuntu repository release)
-          if ((strstr (device_param->device_version, "1729.") != NULL)
-           && (strstr (device_param->driver_version, "1729.") != NULL))
-          {
-            catalyst_warn = 0;
-          }
-          */
-
           catalyst_check = 0;
         }
 
@@ -12818,6 +12820,8 @@ int main (int argc, char **argv)
 
       uint gpu_processors   = device_param->gpu_processors;
 
+      uint gpu_processor_cores = device_param->gpu_processor_cores;
+
       /**
        * create context for each device
        */
@@ -12839,6 +12843,7 @@ int main (int argc, char **argv)
 
       uint gpu_threads = GPU_THREADS;
 
+      // bcrypt
       if (hash_mode == 3200) gpu_threads = 8;
       if (hash_mode == 9000) gpu_threads = 8;
 
@@ -12960,24 +12965,6 @@ int main (int argc, char **argv)
 
       if ((hash_mode == 8900) || (hash_mode == 9300))
       {
-        uint m = 0;
-
-        if (vendor_id == VENDOR_ID_NV)
-        {
-          #define NV_SHADER_PER_MP 32
-          #define NV_WARPS         32
-
-          m = NV_SHADER_PER_MP * NV_WARPS;
-        }
-
-        else if (vendor_id == VENDOR_ID_AMD)
-        {
-          #define AMD_SHADER_PER_MP 8
-          #define AMD_WAVEFRONTS    64
-
-          m = AMD_SHADER_PER_MP * AMD_WAVEFRONTS;
-        }
-
         uint tmto_start = 2;
         uint tmto_stop  = 1024;
 
@@ -12995,15 +12982,14 @@ int main (int argc, char **argv)
 
           size_scryptV /= tmto;
 
-          size_scryptV *= gpu_processors * m;
+          size_scryptV *= gpu_processors * gpu_processor_cores * gpu_threads;
 
-//          if (size_scryptV > (device_param->gpu_maxmem_alloc / 2)) continue;
           if (size_scryptV > device_param->gpu_maxmem_alloc) continue;
 
           for (uint salts_pos = 0; salts_pos < data.salts_cnt; salts_pos++)
           {
             data.salts_buf[salts_pos].scrypt_tmto = tmto;
-            data.salts_buf[salts_pos].scrypt_phy  = gpu_processors * m;
+            data.salts_buf[salts_pos].scrypt_phy  = gpu_processors * gpu_processor_cores * gpu_threads;
           }
 
           break;