From: jsteube Date: Sun, 26 Jun 2016 20:59:52 +0000 (+0200) Subject: Don't use device_processor_cores for scrypt X-Git-Tag: v3.00~9 X-Git-Url: https://www.flypig.org.uk/git/?p=hashcat.git;a=commitdiff_plain;h=0e68b2af2510adbb1faffe8ee84eee578462f29d Don't use device_processor_cores for scrypt --- diff --git a/OpenCL/m08900.cl b/OpenCL/m08900.cl index 59e5bd8..a79c5fc 100644 --- a/OpenCL/m08900.cl +++ b/OpenCL/m08900.cl @@ -683,9 +683,11 @@ void scrypt_smix (uint4 *X, uint4 *T, const u32 phy, __global uint4 *V) const u32 ySIZE = SCRYPT_N / SCRYPT_TMTO; const u32 zSIZE = STATE_CNT4; - const u32 gid = get_global_id (0); + const u32 lid = get_local_id (0); + const u32 lsz = get_local_size (0); + const u32 rid = get_group_id (0); - const u32 x = gid % xSIZE; + const u32 x = (rid * lsz) + lid; #ifdef _unroll #pragma unroll diff --git a/include/types.h b/include/types.h index 8578347..b021556 100644 --- a/include/types.h +++ b/include/types.h @@ -936,7 +936,6 @@ struct __hc_device_param uint kernel_exec_timeout; uint device_processors; - uint device_processor_cores; u64 device_maxmem_alloc; u64 device_global_mem; u32 device_maxclock_frequency; diff --git a/src/hashcat.c b/src/hashcat.c index fb38398..1e9b566 100644 --- a/src/hashcat.c +++ b/src/hashcat.c @@ -14188,7 +14188,7 @@ int main (int argc, char **argv) device_param->device_name_chksum = device_name_chksum; - // device_processor_cores + // vendor specific if (device_param->device_type & CL_DEVICE_TYPE_GPU) { @@ -14211,28 +14211,9 @@ int main (int argc, char **argv) } } - // device_processor_cores - - if (device_type & CL_DEVICE_TYPE_CPU) - { - cl_uint device_processor_cores = 1; - - device_param->device_processor_cores = device_processor_cores; - } - if (device_type & CL_DEVICE_TYPE_GPU) { - if (device_vendor_id == VENDOR_ID_AMD) - { - cl_uint device_processor_cores = 0; - - #define CL_DEVICE_WAVEFRONT_WIDTH_AMD 0x4043 - - hc_clGetDeviceInfo (data.ocl, device_param->device, CL_DEVICE_WAVEFRONT_WIDTH_AMD, sizeof (device_processor_cores), &device_processor_cores, NULL); - - device_param->device_processor_cores = device_processor_cores; - } - else if (device_vendor_id == VENDOR_ID_NV) + if (device_vendor_id == VENDOR_ID_NV) { cl_uint kernel_exec_timeout = 0; @@ -14242,14 +14223,6 @@ int main (int argc, char **argv) device_param->kernel_exec_timeout = kernel_exec_timeout; - cl_uint device_processor_cores = 0; - - #define CL_DEVICE_WARP_SIZE_NV 0x4003 - - hc_clGetDeviceInfo (data.ocl, device_param->device, CL_DEVICE_WARP_SIZE_NV, sizeof (device_processor_cores), &device_processor_cores, NULL); - - device_param->device_processor_cores = device_processor_cores; - cl_uint sm_minor = 0; cl_uint sm_major = 0; @@ -14285,12 +14258,6 @@ int main (int argc, char **argv) device_param->nvidia_spin_damp /= 100; } - else - { - cl_uint device_processor_cores = 1; - - device_param->device_processor_cores = device_processor_cores; - } } // display results @@ -15019,7 +14986,6 @@ int main (int argc, char **argv) const char *device_name_chksum = device_param->device_name_chksum; const u32 device_processors = device_param->device_processors; - const u32 device_processor_cores = device_param->device_processor_cores; /** * create context for each device @@ -15053,7 +15019,9 @@ int main (int argc, char **argv) if (hash_mode == 3000) kernel_threads = 64; // DES if (hash_mode == 3200) kernel_threads = 8; // Blowfish if (hash_mode == 7500) kernel_threads = 64; // RC4 + if (hash_mode == 8900) kernel_threads = 32; // scrypt if (hash_mode == 9000) kernel_threads = 8; // Blowfish + if (hash_mode == 9300) kernel_threads = 32; // scrypt if (hash_mode == 9700) kernel_threads = 64; // RC4 if (hash_mode == 9710) kernel_threads = 64; // RC4 if (hash_mode == 9800) kernel_threads = 64; // RC4 @@ -15140,6 +15108,9 @@ int main (int argc, char **argv) } } + device_param->kernel_accel_min = 1; + device_param->kernel_accel_max = 8; + for (uint tmto = tmto_start; tmto < tmto_stop; tmto++) { // TODO: in theory the following calculation needs to be done per salt, not global @@ -15149,7 +15120,7 @@ int main (int argc, char **argv) size_scryptV /= 1 << tmto; - size_scryptV *= device_processors * device_processor_cores; + size_scryptV *= device_param->device_processors * device_param->kernel_threads * device_param->kernel_accel_max; if (size_scryptV > device_param->device_maxmem_alloc) { @@ -15161,7 +15132,7 @@ int main (int argc, char **argv) for (uint salts_pos = 0; salts_pos < data.salts_cnt; salts_pos++) { data.salts_buf[salts_pos].scrypt_tmto = tmto; - data.salts_buf[salts_pos].scrypt_phy = device_processors * device_processor_cores; + data.salts_buf[salts_pos].scrypt_phy = device_param->device_processors * device_param->kernel_threads * device_param->kernel_accel_max; } break;