#define RESTORE_DISABLE 0
#define STATUS 0
#define STATUS_TIMER 10
-#define STATUS_AUTOMAT 0
+#define STATUS_AUTOMATE 0
#define LOOPBACK 0
#define WEAK_HASH_THRESHOLD 100
#define SHOW 0
"",
"Usage: %s [options]... hash|hashfile|hccapfile [dictionary|mask|directory]...",
"",
- "###########",
- "# Options #",
- "###########",
+ "- [ Options ] -",
"",
" Options Short / Long | Type | Description | Example",
"===============================|======|======================================================|=======================",
" --force | | Ignore warnings |",
" --status | | Enable automatic update of the status-screen |",
" --status-timer | Num | Sets seconds between status-screen update to X | --status-timer=1",
- " --status-automat | | Display the status view in a machine readable format |",
+ " --status-automate | | Display the status view in a machine readable format |",
" --loopback | | Add new plains to induct directory |",
" --weak-hash-threshold | Num | Threshold X when to stop checking for weak hashes | --weak=0",
" --markov-hcstat | File | Specify hcstat file to use | --markov-hc=my.hcstat",
" --increment-min | Num | Start mask incrementing at X | --increment-min=4",
" --increment-max | Num | Stop mask incrementing at X | --increment-max=8",
"",
- "################",
- "## Hash modes ##",
- "################",
+ "- [ Hash modes ] -",
"",
" # | Name | Category",
" ------+--------------------------------------------------+--------------------------------------",
" 12700 | Blockchain, My Wallet | Password Managers",
" 13400 | Keepass 1 (AES/Twofish) and Keepass 2 (AES) | Password Managers",
"",
- "#####################",
- "## Outfile Formats ##",
- "#####################",
+ "- [ Outfile Formats ] -",
"",
" # | Format",
" ---+--------",
" 14 | plain:hex_plain:crack_pos",
" 15 | hash[:salt]:plain:hex_plain:crack_pos",
"",
- "##########################",
- "## Rule Debugging Modes ##",
- "##########################",
+ "- [ Rule Debugging Modes ] -",
"",
" # | Format",
" ---+--------",
" 3 | Original-Word:Finding-Rule",
" 4 | Original-Word:Finding-Rule:Processed-Word",
"",
- "##################",
- "## Attack Modes ##",
- "##################",
+ "- [ Attack Modes ] -",
"",
" # | Mode",
" ---+------",
" 6 | Hybrid Wordlist + Mask",
" 7 | Hybrid Mask + Wordlist",
"",
- "#######################",
- "## Built-in Charsets ##",
- "#######################",
+ "- [ Built-in Charsets ] -",
"",
" ? | Charset",
" ---+---------",
" a | ?l?u?d?s",
" b | 0x00 - 0xff",
"",
- "#########################",
- "## OpenCL Device Types ##",
- "#########################",
+ "- [ OpenCL Device Types ] -",
"",
" # | Device Type",
" ---+-------------",
" 2 | GPU",
" 3 | FPGA, DSP, Co-Processor",
"",
- "#######################",
- "## Workload Profiles ##",
- "#######################",
+ "- [ Workload Profiles ] -",
"",
" # | Performance | Runtime | Power Consumption | Desktop Impact",
" ---+-------------+---------+-------------------+----------------",
return exec_ms_sum / exec_ms_cnt;
}
-void status_display_automat ()
+void status_display_automate ()
{
FILE *out = stdout;
if (data.devices_status == STATUS_STARTING) return;
if (data.devices_status == STATUS_BYPASS) return;
- if (data.status_automat == 1)
+ if (data.status_automate == 1)
{
- status_display_automat ();
+ status_display_automate ();
return;
}
hm_device_val_to_str ((char *) utilization, HM_STR_BUF_SIZE, "%", hm_get_utilization_with_device_id (device_id));
hm_device_val_to_str ((char *) temperature, HM_STR_BUF_SIZE, "c", hm_get_temperature_with_device_id (device_id));
- if (device_param->vendor_id == VENDOR_ID_AMD)
+ if (device_param->device_vendor_id == VENDOR_ID_AMD)
{
hm_device_val_to_str ((char *) fanspeed, HM_STR_BUF_SIZE, "%", hm_get_fanspeed_with_device_id (device_id));
}
- else if (device_param->vendor_id == VENDOR_ID_NV)
+ else if (device_param->device_vendor_id == VENDOR_ID_NV)
{
hm_device_val_to_str ((char *) fanspeed, HM_STR_BUF_SIZE, "%", hm_get_fanspeed_with_device_id (device_id));
}
#endif // HAVE_HWMON
}
-static void status_benchmark_automat ()
+static void status_benchmark_automate ()
{
u64 speed_cnt[DEVICES_MAX] = { 0 };
double speed_ms[DEVICES_MAX] = { 0 };
if (data.devices_status == STATUS_STARTING) return;
if (data.devices_status == STATUS_BYPASS) return;
- if (data.status_automat == 1)
+ if (data.status_automate == 1)
{
- status_benchmark_automat ();
+ status_benchmark_automate ();
return;
}
hc_clFinish (data.ocl, device_param->command_queue);
}
+static void run_kernel_memset (hc_device_param_t *device_param, cl_mem buf, const uint value, const uint num)
+{
+ const u32 num16d = num / 16;
+ const u32 num16m = num % 16;
+
+ if (num16d)
+ {
+ device_param->kernel_params_memset_buf32[1] = value;
+ device_param->kernel_params_memset_buf32[2] = num16d;
+
+ uint kernel_threads = device_param->kernel_threads;
+
+ uint num_elements = num16d;
+
+ while (num_elements % kernel_threads) num_elements++;
+
+ cl_kernel kernel = device_param->kernel_memset;
+
+ hc_clSetKernelArg (data.ocl, kernel, 0, sizeof (cl_mem), (void *) &buf);
+ hc_clSetKernelArg (data.ocl, kernel, 1, sizeof (cl_uint), device_param->kernel_params_memset[1]);
+ hc_clSetKernelArg (data.ocl, kernel, 2, sizeof (cl_uint), device_param->kernel_params_memset[2]);
+
+ const size_t global_work_size[3] = { num_elements, 1, 1 };
+ const size_t local_work_size[3] = { kernel_threads, 1, 1 };
+
+ hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
+
+ hc_clFlush (data.ocl, device_param->command_queue);
+
+ hc_clFinish (data.ocl, device_param->command_queue);
+ }
+
+ if (num16m)
+ {
+ u32 tmp[4];
+
+ tmp[0] = value;
+ tmp[1] = value;
+ tmp[2] = value;
+ tmp[3] = value;
+
+ hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, buf, CL_TRUE, num16d * 16, num16m, tmp, 0, NULL, NULL);
+ }
+}
+
static void run_kernel_bzero (hc_device_param_t *device_param, cl_mem buf, const size_t size)
{
+ run_kernel_memset (device_param, buf, 0, size);
+
+ /*
int rc = -1;
- if (device_param->opencl_v12 && device_param->vendor_id == VENDOR_ID_AMD)
+ if (device_param->opencl_v12 && device_param->platform_vendor_id == VENDOR_ID_AMD)
{
// So far tested, amd is the only supporting this OpenCL 1.2 function without segfaulting
myfree (tmp);
}
+ */
}
static void choose_kernel (hc_device_param_t *device_param, const uint attack_exec, const uint attack_mode, const uint opts_type, const salt_t *salt_buf, const uint highest_pw_len, const uint pws_cnt)
const u32 kernel_power_max = device_param->device_processors * device_param->kernel_threads * kernel_accel_max;
+ run_kernel_memset (device_param, device_param->d_pws_buf, 7, kernel_power_max * sizeof (pw_t));
+
+ if (data.attack_exec == ATTACK_EXEC_OUTSIDE_KERNEL)
+ {
+ run_kernel_memset (device_param, device_param->d_pws_amp_buf, 7, kernel_power_max * sizeof (pw_t));
+ }
+
+ /*
for (u32 i = 0; i < kernel_power_max; i++)
{
device_param->pws_buf[i].i[0] = i;
{
run_kernel_amp (device_param, kernel_power_max);
}
+ */
#define VERIFIER_CNT 1
exec_ms_pre_final = MIN (exec_ms_pre_final, exec_ms_pre_final_v);
}
+ u32 diff = kernel_loops - kernel_accel;
+
if ((kernel_loops_min < kernel_loops_max) && (kernel_accel_min < kernel_accel_max))
{
- for (u32 f = 2; f < 1024; f++)
+ u32 kernel_accel_orig = kernel_accel;
+ u32 kernel_loops_orig = kernel_loops;
+
+ for (u32 f = 1; f < 1024; f++)
{
- const u32 kernel_accel_try = kernel_accel * f;
- const u32 kernel_loops_try = kernel_loops / f;
+ const u32 kernel_accel_try = (float) kernel_accel_orig * f;
+ const u32 kernel_loops_try = (float) kernel_loops_orig / f;
if (kernel_accel_try > kernel_accel_max) break;
if (kernel_loops_try < kernel_loops_min) break;
+ u32 diff_new = kernel_loops_try - kernel_accel_try;
+
+ if (diff_new > diff) break;
+
+ diff_new = diff;
+
double exec_ms = try_run (device_param, kernel_accel_try, kernel_loops_try);
for (int i = 0; i < VERIFIER_CNT; i++)
// reset them fake words
+ /*
memset (device_param->pws_buf, 0, kernel_power_max * sizeof (pw_t));
hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, kernel_power_max * sizeof (pw_t), device_param->pws_buf, 0, NULL, NULL);
hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_pws_amp_buf, CL_TRUE, 0, kernel_power_max * sizeof (pw_t), device_param->pws_buf, 0, NULL, NULL);
+ */
+
+ run_kernel_memset (device_param, device_param->d_pws_buf, 0, kernel_power_max * sizeof (pw_t));
+
+ if (data.attack_exec == ATTACK_EXEC_OUTSIDE_KERNEL)
+ {
+ run_kernel_memset (device_param, device_param->d_pws_amp_buf, 0, kernel_power_max * sizeof (pw_t));
+ }
// reset timer
uint restore_disable = RESTORE_DISABLE;
uint status = STATUS;
uint status_timer = STATUS_TIMER;
- uint status_automat = STATUS_AUTOMAT;
+ uint status_automate = STATUS_AUTOMATE;
uint loopback = LOOPBACK;
uint weak_hash_threshold = WEAK_HASH_THRESHOLD;
char *session = NULL;
#define IDX_RESTORE_DISABLE 0xff27
#define IDX_STATUS 0xff17
#define IDX_STATUS_TIMER 0xff18
- #define IDX_STATUS_AUTOMAT 0xff50
+ #define IDX_STATUS_AUTOMATE 0xff50
#define IDX_LOOPBACK 0xff38
#define IDX_WEAK_HASH_THRESHOLD 0xff42
#define IDX_SESSION 0xff19
{"restore-disable", no_argument, 0, IDX_RESTORE_DISABLE},
{"status", no_argument, 0, IDX_STATUS},
{"status-timer", required_argument, 0, IDX_STATUS_TIMER},
- {"status-automat", no_argument, 0, IDX_STATUS_AUTOMAT},
+ {"status-automate", no_argument, 0, IDX_STATUS_AUTOMATE},
{"loopback", no_argument, 0, IDX_LOOPBACK},
{"weak-hash-threshold", required_argument, 0, IDX_WEAK_HASH_THRESHOLD},
{"session", required_argument, 0, IDX_SESSION},
case IDX_RESTORE_DISABLE: restore_disable = 1; break;
case IDX_STATUS: status = 1; break;
case IDX_STATUS_TIMER: status_timer = atoi (optarg); break;
- case IDX_STATUS_AUTOMAT: status_automat = 1; break;
+ case IDX_STATUS_AUTOMATE: status_automate = 1; break;
case IDX_LOOPBACK: loopback = 1; break;
case IDX_WEAK_HASH_THRESHOLD: weak_hash_threshold = atoi (optarg); break;
//case IDX_SESSION: session = optarg; break;
{
if (benchmark == 1)
{
- if (status_automat == 0)
+ if (status_automate == 0)
{
log_info ("%s (%s) starting in benchmark-mode...", PROGNAME, VERSION_TAG);
log_info ("");
data.restore_disable = restore_disable;
data.status = status;
data.status_timer = status_timer;
- data.status_automat = status_automat;
+ data.status_automate = status_automate;
data.loopback = loopback;
data.runtime = runtime;
data.remove = remove;
logfile_top_uint (segment_size);
logfile_top_uint (show);
logfile_top_uint (status);
- logfile_top_uint (status_automat);
+ logfile_top_uint (status_automate);
logfile_top_uint (status_timer);
logfile_top_uint (usage);
logfile_top_uint (username);
break;
case 7400: if (pw_max > 16) pw_max = 16;
break;
+ case 7500: if (pw_max > 8) pw_max = 8;
+ break;
case 7900: if (pw_max > 48) pw_max = 48;
break;
case 8500: if (pw_max > 8) pw_max = 8;
// this causes trouble with vendor id based macros
// we'll assign generic to those without special optimization available
- cl_uint vendor_id = 0;
+ cl_uint platform_vendor_id = 0;
if (strcmp (platform_vendor, CL_VENDOR_AMD) == 0)
{
- vendor_id = VENDOR_ID_AMD;
+ platform_vendor_id = VENDOR_ID_AMD;
+ }
+ else if (strcmp (platform_vendor, CL_VENDOR_AMD_USE_INTEL) == 0)
+ {
+ platform_vendor_id = VENDOR_ID_AMD_USE_INTEL;
}
else if (strcmp (platform_vendor, CL_VENDOR_APPLE) == 0)
{
- vendor_id = VENDOR_ID_APPLE;
+ platform_vendor_id = VENDOR_ID_APPLE;
}
else if (strcmp (platform_vendor, CL_VENDOR_INTEL_BEIGNET) == 0)
{
- vendor_id = VENDOR_ID_INTEL_BEIGNET;
+ platform_vendor_id = VENDOR_ID_INTEL_BEIGNET;
}
else if (strcmp (platform_vendor, CL_VENDOR_INTEL_SDK) == 0)
{
- vendor_id = VENDOR_ID_INTEL_SDK;
+ platform_vendor_id = VENDOR_ID_INTEL_SDK;
}
else if (strcmp (platform_vendor, CL_VENDOR_MESA) == 0)
{
- vendor_id = VENDOR_ID_MESA;
+ platform_vendor_id = VENDOR_ID_MESA;
}
else if (strcmp (platform_vendor, CL_VENDOR_NV) == 0)
{
- vendor_id = VENDOR_ID_NV;
+ platform_vendor_id = VENDOR_ID_NV;
}
else if (strcmp (platform_vendor, CL_VENDOR_POCL) == 0)
{
- vendor_id = VENDOR_ID_POCL;
+ platform_vendor_id = VENDOR_ID_POCL;
}
else
{
- vendor_id = VENDOR_ID_GENERIC;
+ platform_vendor_id = VENDOR_ID_GENERIC;
}
for (uint platform_devices_id = 0; platform_devices_id < platform_devices_cnt; platform_devices_id++)
hc_device_param_t *device_param = &data.devices_param[device_id];
- device_param->vendor_id = vendor_id;
+ device_param->platform_vendor_id = platform_vendor_id;
device_param->device = platform_devices[platform_devices_id];
device_param->device_name = device_name;
+ // device_vendor
+
+ hc_clGetDeviceInfo (data.ocl, device_param->device, CL_DEVICE_VENDOR, 0, NULL, ¶m_value_size);
+
+ char *device_vendor = (char *) mymalloc (param_value_size);
+
+ hc_clGetDeviceInfo (data.ocl, device_param->device, CL_DEVICE_VENDOR, param_value_size, device_vendor, NULL);
+
+ device_param->device_vendor = device_vendor;
+
+ cl_uint device_vendor_id = 0;
+
+ if (strcmp (device_vendor, CL_VENDOR_AMD) == 0)
+ {
+ device_vendor_id = VENDOR_ID_AMD;
+ }
+ else if (strcmp (device_vendor, CL_VENDOR_AMD_USE_INTEL) == 0)
+ {
+ device_vendor_id = VENDOR_ID_AMD_USE_INTEL;
+ }
+ else if (strcmp (device_vendor, CL_VENDOR_APPLE) == 0)
+ {
+ device_vendor_id = VENDOR_ID_APPLE;
+ }
+ else if (strcmp (device_vendor, CL_VENDOR_INTEL_BEIGNET) == 0)
+ {
+ device_vendor_id = VENDOR_ID_INTEL_BEIGNET;
+ }
+ else if (strcmp (device_vendor, CL_VENDOR_INTEL_SDK) == 0)
+ {
+ device_vendor_id = VENDOR_ID_INTEL_SDK;
+ }
+ else if (strcmp (device_vendor, CL_VENDOR_MESA) == 0)
+ {
+ device_vendor_id = VENDOR_ID_MESA;
+ }
+ else if (strcmp (device_vendor, CL_VENDOR_NV) == 0)
+ {
+ device_vendor_id = VENDOR_ID_NV;
+ }
+ else if (strcmp (device_vendor, CL_VENDOR_POCL) == 0)
+ {
+ device_vendor_id = VENDOR_ID_POCL;
+ }
+ else
+ {
+ device_vendor_id = VENDOR_ID_GENERIC;
+ }
+
+ device_param->device_vendor_id = device_vendor_id;
+
// tuning db
tuning_db_entry_t *tuningdb_entry = tuning_db_search (tuning_db, device_param, attack_mode, hash_mode);
if (device_endian_little == CL_FALSE)
{
- log_info ("Device #%u: WARNING: not little endian device", device_id + 1);
+ if (data.quiet == 0) log_info ("Device #%u: WARNING: not little endian device", device_id + 1);
device_param->skipped = 1;
}
if (device_available == CL_FALSE)
{
- log_info ("Device #%u: WARNING: device not available", device_id + 1);
+ if (data.quiet == 0) log_info ("Device #%u: WARNING: device not available", device_id + 1);
device_param->skipped = 1;
}
if (device_compiler_available == CL_FALSE)
{
- log_info ("Device #%u: WARNING: device no compiler available", device_id + 1);
+ if (data.quiet == 0) log_info ("Device #%u: WARNING: device no compiler available", device_id + 1);
device_param->skipped = 1;
}
if ((device_execution_capabilities & CL_EXEC_KERNEL) == 0)
{
- log_info ("Device #%u: WARNING: device does not support executing kernels", device_id + 1);
+ if (data.quiet == 0) log_info ("Device #%u: WARNING: device does not support executing kernels", device_id + 1);
device_param->skipped = 1;
}
if (strstr (device_extensions, "base_atomics") == 0)
{
- log_info ("Device #%u: WARNING: device does not support base atomics", device_id + 1);
+ if (data.quiet == 0) log_info ("Device #%u: WARNING: device does not support base atomics", device_id + 1);
device_param->skipped = 1;
}
if (strstr (device_extensions, "byte_addressable_store") == 0)
{
- log_info ("Device #%u: WARNING: device does not support byte addressable store", device_id + 1);
+ if (data.quiet == 0) log_info ("Device #%u: WARNING: device does not support byte addressable store", device_id + 1);
device_param->skipped = 1;
}
if (device_local_mem_size < 32768)
{
- log_info ("Device #%u: WARNING: device local mem size is too small", device_id + 1);
+ if (data.quiet == 0) log_info ("Device #%u: WARNING: device local mem size is too small", device_id + 1);
device_param->skipped = 1;
}
+ // If there's both an Intel CPU and an AMD OpenCL runtime it's a tricky situation
+ // Both platforms support CPU device types and therefore both will try to use 100% of the physical resources
+ // This results in both utilizing it for 50%
+ // However, Intel has much better SIMD control over their own hardware
+ // It makes sense to give them full control over their own hardware
+
+ if (device_type & CL_DEVICE_TYPE_CPU)
+ {
+ if (device_param->device_vendor_id == VENDOR_ID_AMD_USE_INTEL)
+ {
+ if (data.quiet == 0) log_info ("Device #%u: WARNING: not native intel opencl platform", device_id + 1);
+
+ device_param->skipped = 1;
+ }
+ }
// skipped
char *device_name_chksum = (char *) mymalloc (INFOSZ);
#if __x86_64__
- snprintf (device_name_chksum, INFOSZ - 1, "%u-%u-%u-%s-%s-%s-%u", 64, device_param->vendor_id, device_param->vector_width, device_param->device_name, device_param->device_version, device_param->driver_version, COMPTIME);
+ snprintf (device_name_chksum, INFOSZ - 1, "%u-%u-%u-%s-%s-%s-%u", 64, device_param->platform_vendor_id, device_param->vector_width, device_param->device_name, device_param->device_version, device_param->driver_version, COMPTIME);
#else
- snprintf (device_name_chksum, INFOSZ - 1, "%u-%u-%u-%s-%s-%s-%u", 32, device_param->vendor_id, device_param->vector_width, device_param->device_name, device_param->device_version, device_param->driver_version, COMPTIME);
+ snprintf (device_name_chksum, INFOSZ - 1, "%u-%u-%u-%s-%s-%s-%u", 32, device_param->platform_vendor_id, device_param->vector_width, device_param->device_name, device_param->device_version, device_param->driver_version, COMPTIME);
#endif
uint device_name_digest[4] = { 0 };
if (device_type & CL_DEVICE_TYPE_GPU)
{
- if (vendor_id == VENDOR_ID_AMD)
+ if (device_vendor_id == VENDOR_ID_AMD)
{
cl_uint device_processor_cores = 0;
device_param->device_processor_cores = device_processor_cores;
}
- else if (vendor_id == VENDOR_ID_NV)
+ else if (device_vendor_id == VENDOR_ID_NV)
{
cl_uint kernel_exec_timeout = 0;
if ((benchmark == 1 || quiet == 0) && (algorithm_pos == 0))
{
- if (status_automat == 0)
+ if (status_automate == 0)
{
if (device_param->skipped == 0)
{
{
if (device_type & CL_DEVICE_TYPE_GPU)
{
- if (vendor_id == VENDOR_ID_AMD)
+ if (platform_vendor_id == VENDOR_ID_AMD)
{
int catalyst_check = (force == 1) ? 0 : 1;
return (-1);
}
}
- else if (vendor_id == VENDOR_ID_NV)
+ else if (platform_vendor_id == VENDOR_ID_NV)
{
if (device_param->kernel_exec_timeout != 0)
{
}
}
+ /* turns out pocl still creates segfaults (because of llvm)
if (device_type & CL_DEVICE_TYPE_CPU)
{
- if (vendor_id == VENDOR_ID_AMD)
+ if (platform_vendor_id == VENDOR_ID_AMD)
{
if (force == 0)
{
}
}
}
+ */
/**
* kernel accel and loops tuning db adjustment
if ((benchmark == 1 || quiet == 0) && (algorithm_pos == 0))
{
- if (status_automat == 0)
+ if (status_automate == 0)
{
log_info ("");
}
const uint platform_devices_id = device_param->platform_devices_id;
#if defined(HAVE_NVML) || defined(HAVE_NVAPI)
- if (device_param->vendor_id == VENDOR_ID_NV)
+ if (device_param->device_vendor_id == VENDOR_ID_NV)
{
memcpy (&data.hm_device[device_id], &hm_adapters_nv[platform_devices_id], sizeof (hm_attrs_t));
}
#endif
#ifdef HAVE_ADL
- if (device_param->vendor_id == VENDOR_ID_AMD)
+ if (device_param->device_vendor_id == VENDOR_ID_AMD)
{
memcpy (&data.hm_device[device_id], &hm_adapters_amd[platform_devices_id], sizeof (hm_attrs_t));
}
if (hash_mode == 8900)
{
- if (device_param->vendor_id == VENDOR_ID_AMD)
+ if (device_param->device_vendor_id == VENDOR_ID_AMD)
{
tmto_start = 1;
}
- else if (device_param->vendor_id == VENDOR_ID_NV)
+ else if (device_param->device_vendor_id == VENDOR_ID_NV)
{
tmto_start = 2;
}
}
else if (hash_mode == 9300)
{
- if (device_param->vendor_id == VENDOR_ID_AMD)
+ if (device_param->device_vendor_id == VENDOR_ID_AMD)
{
tmto_start = 2;
}
- else if (device_param->vendor_id == VENDOR_ID_NV)
+ else if (device_param->device_vendor_id == VENDOR_ID_NV)
{
tmto_start = 2;
}
int skip = 0;
- if (size_pws > device_param->device_maxmem_alloc) skip = 1;
- if (size_tmps > device_param->device_maxmem_alloc) skip = 1;
- if (size_hooks > device_param->device_maxmem_alloc) skip = 1;
-
- if (( bitmap_size
- + bitmap_size
- + bitmap_size
- + bitmap_size
- + bitmap_size
- + bitmap_size
- + bitmap_size
- + bitmap_size
- + size_bfs
- + size_combs
- + size_digests
- + size_esalts
- + size_hooks
- + size_markov_css
- + size_plains
- + size_pws
- + size_pws // not a bug
- + size_results
- + size_root_css
- + size_rules
- + size_rules_c
- + size_salts
- + size_scryptV
- + size_shown
- + size_tm
- + size_tmps) > device_param->device_global_mem) skip = 1;
+ const u64 size_total
+ = bitmap_size
+ + bitmap_size
+ + bitmap_size
+ + bitmap_size
+ + bitmap_size
+ + bitmap_size
+ + bitmap_size
+ + bitmap_size
+ + size_bfs
+ + size_combs
+ + size_digests
+ + size_esalts
+ + size_hooks
+ + size_markov_css
+ + size_plains
+ + size_pws
+ + size_pws // not a bug
+ + size_results
+ + size_root_css
+ + size_rules
+ + size_rules_c
+ + size_salts
+ + size_scryptV
+ + size_shown
+ + size_tm
+ + size_tmps;
+
+ // Don't ask me, ask AMD!
+
+ if (size_total > device_param->device_maxmem_alloc) skip = 1;
+ if (size_total > device_param->device_global_mem) skip = 1;
if (skip == 1)
{
// we don't have sm_* on vendors not NV but it doesn't matter
- snprintf (build_opts, sizeof (build_opts) - 1, "-cl-std=CL1.1 -I\"%s/OpenCL\" -DVENDOR_ID=%u -DCUDA_ARCH=%d -DVECT_SIZE=%u -DDEVICE_TYPE=%u -DKERN_TYPE=%u -D_unroll", shared_dir, device_param->vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->vector_width, (u32) device_param->device_type, kern_type);
+ #if _WIN
+ snprintf (build_opts, sizeof (build_opts) - 1, "-I \"%s\\OpenCL\\\" -I '%s\\OpenCL\\' -I %s\\OpenCL\\ -I\"%s\\OpenCL\\\" -I'%s\\OpenCL\\' -I%s\\OpenCL\\", shared_dir, shared_dir, shared_dir, shared_dir, shared_dir, shared_dir);
+ #else
+ snprintf (build_opts, sizeof (build_opts) - 1, "-I \"%s/OpenCL/\" -I '%s/OpenCL/' -I %s/OpenCL/ -I\"%s/OpenCL/\" -I'%s/OpenCL/' -I%s/OpenCL/", shared_dir, shared_dir, shared_dir, shared_dir, shared_dir, shared_dir);
+ #endif
+
+ char build_opts_new[1024] = { 0 };
+
+ snprintf (build_opts_new, sizeof (build_opts_new) - 1, "%s -DVENDOR_ID=%u -DCUDA_ARCH=%d -DVECT_SIZE=%u -DDEVICE_TYPE=%u -DKERN_TYPE=%u -D_unroll -cl-std=CL1.1", build_opts, device_param->device_vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->vector_width, (u32) device_param->device_type, kern_type);
+
+ strncpy (build_opts, build_opts_new, sizeof (build_opts) - 1);
- if (device_param->vendor_id == VENDOR_ID_INTEL_SDK)
+ /*
+ if (device_param->device_vendor_id == VENDOR_ID_INTEL_SDK)
{
// we do vectorizing much better than the auto-vectorizer
- char build_opts_new[1024] = { 0 };
-
snprintf (build_opts_new, sizeof (build_opts_new) - 1, "%s -cl-opt-disable", build_opts);
strncpy (build_opts, build_opts_new, sizeof (build_opts) - 1);
}
+ */
#ifdef DEBUG
log_info ("Device #%u: build_opts '%s'\n", device_id + 1, build_opts);
hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_digests_shown, CL_TRUE, 0, size_shown, data.digests_shown, 0, NULL, NULL);
hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_salt_bufs, CL_TRUE, 0, size_salts, data.salts_buf, 0, NULL, NULL);
- run_kernel_bzero (device_param, device_param->d_pws_buf, size_pws);
- run_kernel_bzero (device_param, device_param->d_pws_amp_buf, size_pws);
- run_kernel_bzero (device_param, device_param->d_tmps, size_tmps);
- run_kernel_bzero (device_param, device_param->d_hooks, size_hooks);
- run_kernel_bzero (device_param, device_param->d_plain_bufs, size_plains);
- run_kernel_bzero (device_param, device_param->d_result, size_results);
-
/**
* special buffers
*/
device_param->d_rules_c = hc_clCreateBuffer (data.ocl, device_param->context, CL_MEM_READ_ONLY, size_rules_c, NULL);
hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_rules, CL_TRUE, 0, size_rules, kernel_rules_buf, 0, NULL, NULL);
-
- run_kernel_bzero (device_param, device_param->d_rules_c, size_rules_c);
}
else if (attack_kern == ATTACK_KERN_COMBI)
{
device_param->d_combs_c = hc_clCreateBuffer (data.ocl, device_param->context, CL_MEM_READ_ONLY, size_combs, NULL);
device_param->d_root_css_buf = hc_clCreateBuffer (data.ocl, device_param->context, CL_MEM_READ_ONLY, size_root_css, NULL);
device_param->d_markov_css_buf = hc_clCreateBuffer (data.ocl, device_param->context, CL_MEM_READ_ONLY, size_markov_css, NULL);
-
- run_kernel_bzero (device_param, device_param->d_combs, size_combs);
- run_kernel_bzero (device_param, device_param->d_combs_c, size_combs);
- run_kernel_bzero (device_param, device_param->d_root_css_buf, size_root_css);
- run_kernel_bzero (device_param, device_param->d_markov_css_buf, size_markov_css);
}
else if (attack_kern == ATTACK_KERN_BF)
{
device_param->d_tm_c = hc_clCreateBuffer (data.ocl, device_param->context, CL_MEM_READ_ONLY, size_tm, NULL);
device_param->d_root_css_buf = hc_clCreateBuffer (data.ocl, device_param->context, CL_MEM_READ_ONLY, size_root_css, NULL);
device_param->d_markov_css_buf = hc_clCreateBuffer (data.ocl, device_param->context, CL_MEM_READ_ONLY, size_markov_css, NULL);
-
- run_kernel_bzero (device_param, device_param->d_bfs, size_bfs);
- run_kernel_bzero (device_param, device_param->d_bfs_c, size_bfs);
- run_kernel_bzero (device_param, device_param->d_tm_c, size_tm);
- run_kernel_bzero (device_param, device_param->d_root_css_buf, size_root_css);
- run_kernel_bzero (device_param, device_param->d_markov_css_buf, size_markov_css);
}
if (size_esalts)
device_param->kernel_params_tm[0] = &device_param->d_bfs_c;
device_param->kernel_params_tm[1] = &device_param->d_tm_c;
+ device_param->kernel_params_memset_buf32[1] = 0; // value
+ device_param->kernel_params_memset_buf32[2] = 0; // gid_max
+
+ device_param->kernel_params_memset[0] = NULL;
+ device_param->kernel_params_memset[1] = &device_param->kernel_params_memset_buf32[1];
+ device_param->kernel_params_memset[2] = &device_param->kernel_params_memset_buf32[2];
+
/**
* kernel name
*/
if (opts_type & OPTS_TYPE_HOOK23) hc_clSetKernelArg (data.ocl, device_param->kernel23, i, sizeof (cl_uint), device_param->kernel_params[i]);
}
+ // GPU memset
+
+ device_param->kernel_memset = hc_clCreateKernel (data.ocl, device_param->program, "gpu_memset");
+
+ hc_clGetKernelWorkGroupInfo (data.ocl, device_param->kernel_memset, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &kernel_wgs_tmp, NULL); kernel_threads = MIN (kernel_threads, kernel_wgs_tmp);
+
+ hc_clSetKernelArg (data.ocl, device_param->kernel_memset, 0, sizeof (cl_mem), device_param->kernel_params_memset[0]);
+ hc_clSetKernelArg (data.ocl, device_param->kernel_memset, 1, sizeof (cl_uint), device_param->kernel_params_memset[1]);
+ hc_clSetKernelArg (data.ocl, device_param->kernel_memset, 2, sizeof (cl_uint), device_param->kernel_params_memset[2]);
+
+ // MP start
+
if (attack_mode == ATTACK_MODE_BF)
{
device_param->kernel_mp_l = hc_clCreateKernel (data.ocl, device_param->program_mp, "l_markov");
device_param->kernel_threads = kernel_threads;
+ // zero some data buffers
+
+ run_kernel_bzero (device_param, device_param->d_pws_buf, size_pws);
+ run_kernel_bzero (device_param, device_param->d_pws_amp_buf, size_pws);
+ run_kernel_bzero (device_param, device_param->d_tmps, size_tmps);
+ run_kernel_bzero (device_param, device_param->d_hooks, size_hooks);
+ run_kernel_bzero (device_param, device_param->d_plain_bufs, size_plains);
+ run_kernel_bzero (device_param, device_param->d_result, size_results);
+
+ /**
+ * special buffers
+ */
+
+ if (attack_kern == ATTACK_KERN_STRAIGHT)
+ {
+ run_kernel_bzero (device_param, device_param->d_rules_c, size_rules_c);
+ }
+ else if (attack_kern == ATTACK_KERN_COMBI)
+ {
+ run_kernel_bzero (device_param, device_param->d_combs, size_combs);
+ run_kernel_bzero (device_param, device_param->d_combs_c, size_combs);
+ run_kernel_bzero (device_param, device_param->d_root_css_buf, size_root_css);
+ run_kernel_bzero (device_param, device_param->d_markov_css_buf, size_markov_css);
+ }
+ else if (attack_kern == ATTACK_KERN_BF)
+ {
+ run_kernel_bzero (device_param, device_param->d_bfs, size_bfs);
+ run_kernel_bzero (device_param, device_param->d_bfs_c, size_bfs);
+ run_kernel_bzero (device_param, device_param->d_tm_c, size_tm);
+ run_kernel_bzero (device_param, device_param->d_root_css_buf, size_root_css);
+ run_kernel_bzero (device_param, device_param->d_markov_css_buf, size_markov_css);
+ }
+
/**
* Store initial fanspeed if gpu_temp_retain is enabled
*/
if (benchmark == 1)
{
- if (status_automat == 0)
+ if (status_automate == 0)
{
quiet = 0;
{
status_benchmark ();
- if (status_automat == 0)
+ if (status_automate == 0)
{
log_info ("");
}
if (device_param->kernel_mp_r) hc_clReleaseKernel (data.ocl, device_param->kernel_mp_r);
if (device_param->kernel_tm) hc_clReleaseKernel (data.ocl, device_param->kernel_tm);
if (device_param->kernel_amp) hc_clReleaseKernel (data.ocl, device_param->kernel_amp);
+ if (device_param->kernel_memset) hc_clReleaseKernel (data.ocl, device_param->kernel_memset);
if (device_param->program) hc_clReleaseProgram (data.ocl, device_param->program);
if (device_param->program_mp) hc_clReleaseProgram (data.ocl, device_param->program_mp);