Fixed some macros

[hashcat.git] / src / hashcat.c
diff --git a/src/hashcat.c b/src/hashcat.c

index 45f5496..3c816fe 100644 (file)
--- a/src/hashcat.c
+++ b/src/hashcat.c
@@ -1,4 +1,4 @@
-/**
+ /**
   * Authors.....: Jens Steube <jens.steube@gmail.com>
   *               Gabriele Gristina <matrix@hashcat.net>
   *               magnum <john.magnum@hushmail.com>
@@ -33,12 +33,13 @@ double TARGET_MS_PROFILE[4]     = { 2, 12, 96, 480 };
  #define MARKOV_DISABLE          0
  #define MARKOV_CLASSIC          0
  #define BENCHMARK               0
+#define STDOUT_FLAG             0
  #define RESTORE                 0
  #define RESTORE_TIMER           60
  #define RESTORE_DISABLE         0
  #define STATUS                  0
  #define STATUS_TIMER            10
-#define STATUS_AUTOMAT          0
+#define MACHINE_READABLE        0
  #define LOOPBACK                0
  #define WEAK_HASH_THRESHOLD     100
  #define SHOW                    0
@@ -74,9 +75,10 @@ double TARGET_MS_PROFILE[4]     = { 2, 12, 96, 480 };
  #define SEPARATOR               ':'
  #define BITMAP_MIN              16
  #define BITMAP_MAX              24
+#define NVIDIA_SPIN_DAMP        100
  #define GPU_TEMP_DISABLE        0
  #define GPU_TEMP_ABORT          90
-#define GPU_TEMP_RETAIN         80
+#define GPU_TEMP_RETAIN         65
  #define WORKLOAD_PROFILE        2
  #define KERNEL_ACCEL            0
  #define KERNEL_LOOPS            0
@@ -150,6 +152,8 @@ double TARGET_MS_PROFILE[4]     = { 2, 12, 96, 480 };
  
  #define NUM_DEFAULT_BENCHMARK_ALGORITHMS 143
  
+#define NVIDIA_100PERCENTCPU_WORKAROUND 100
+
  #define global_free(attr)       \
  {                               \
    myfree ((void *) data.attr);  \
@@ -164,6 +168,12 @@ double TARGET_MS_PROFILE[4]     = { 2, 12, 96, 480 };
    attr = NULL;            \
  }
  
+#if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
+#define HC_API_CALL __stdcall
+#else
+#define HC_API_CALL
+#endif
+
  static uint default_benchmark_algorithms[NUM_DEFAULT_BENCHMARK_ALGORITHMS] =
  {
    900,
@@ -349,9 +359,7 @@ const char *USAGE_BIG[] =
    "",
    "Usage: %s [options]... hash|hashfile|hccapfile [dictionary|mask|directory]...",
    "",
-  "###########",
-  "# Options #",
-  "###########",
+  "- [ Options ] -",
    "",
    " Options Short / Long          | Type | Description                                          | Example",
    "===============================|======|======================================================|=======================",
@@ -366,7 +374,7 @@ const char *USAGE_BIG[] =
    "     --force                   |      | Ignore warnings                                      |",
    "     --status                  |      | Enable automatic update of the status-screen         |",
    "     --status-timer            | Num  | Sets seconds between status-screen update to X       | --status-timer=1",
-  "     --status-automat          |      | Display the status view in a machine readable format |",
+  "     --machine-readable        |      | Display the status view in a machine readable format |",
    "     --loopback                |      | Add new plains to induct directory                   |",
    "     --weak-hash-threshold     | Num  | Threshold X when to stop checking for weak hashes    | --weak=0",
    "     --markov-hcstat           | File | Specify hcstat file to use                           | --markov-hc=my.hcstat",
@@ -382,6 +390,7 @@ const char *USAGE_BIG[] =
    "     --outfile-autohex-disable |      | Disable the use of $HEX[] in output plains           |",
    "     --outfile-check-timer     | Num  | Sets seconds between outfile checks to X             | --outfile-check=30",
    " -p, --separator               | Char | Separator char for hashlists and outfile             | -p :",
+  "     --stdout                  |      | Do not crack a hash, instead print candidates only   |",
    "     --show                    |      | Show cracked passwords only                          |",
    "     --left                    |      | Show un-cracked passwords only                       |",
    "     --username                |      | Enable ignoring of usernames in hashfile             |",
@@ -409,13 +418,12 @@ const char *USAGE_BIG[] =
    " -w, --workload-profile        | Num  | Enable a specific workload profile, see pool below   | -w 3",
    " -n, --kernel-accel            | Num  | Manual workload tuning, set outerloop step size to X | -n 64",
    " -u, --kernel-loops            | Num  | Manual workload tuning, set innerloop step size to X | -u 256",
+  "     --nvidia-spin-damp        | Num  | Workaround NVidias CPU burning loop bug, in percent  | --nvidia-spin-damp=50",
    "     --gpu-temp-disable        |      | Disable temperature and fanspeed reads and triggers  |",
    #ifdef HAVE_HWMON
    "     --gpu-temp-abort          | Num  | Abort if GPU temperature reaches X degrees celsius   | --gpu-temp-abort=100",
    "     --gpu-temp-retain         | Num  | Try to retain GPU temperature at X degrees celsius   | --gpu-temp-retain=95",
-  #ifdef HAVE_ADL
-  "     --powertune-enable        |      | Enable automatic power tuning (AMD OverDrive 6 only) |",
-  #endif
+  "     --powertune-enable        |      | Enable power tuning, restores settings when finished |",
    #endif
    "     --scrypt-tmto             | Num  | Manually override TMTO value for scrypt to X         | --scrypt-tmto=3",
    " -s, --skip                    | Num  | Skip X words from the start                          | -s 1000000",
@@ -436,12 +444,10 @@ const char *USAGE_BIG[] =
    "     --increment-min           | Num  | Start mask incrementing at X                         | --increment-min=4",
    "     --increment-max           | Num  | Stop mask incrementing at X                          | --increment-max=8",
    "",
-  "################",
-  "## Hash modes ##",
-  "################",
+  "- [ Hash modes ] -",
    "",
    "      # | Name                                             | Category",
-  "  ------+--------------------------------------------------+--------------------------------------",
+  "  ======+==================================================+======================================",
    "    900 | MD4                                              | Raw Hash",
    "      0 | MD5                                              | Raw Hash",
    "   5100 | Half MD5                                         | Raw Hash",
@@ -576,6 +582,7 @@ const char *USAGE_BIG[] =
    "     22 | Juniper Netscreen/SSG (ScreenOS)                 | Operating-Systems",
    "    501 | Juniper IVE                                      | Operating-Systems",
    "   5800 | Android PIN                                      | Operating-Systems",
+  "  13800 | Windows 8+ phone PIN/Password                    | Operating-Systems",
    "   8100 | Citrix Netscaler                                 | Operating-Systems",
    "   8500 | RACF                                             | Operating-Systems",
    "   7200 | GRUB 2                                           | Operating-Systems",
@@ -654,12 +661,10 @@ const char *USAGE_BIG[] =
    "  12700 | Blockchain, My Wallet                            | Password Managers",
    "  13400 | Keepass 1 (AES/Twofish) and Keepass 2 (AES)      | Password Managers",
    "",
-  "#####################",
-  "## Outfile Formats ##",
-  "#####################",
+  "- [ Outfile Formats ] -",
    "",
    "  # | Format",
-  " ---+--------",
+  " ===+========",
    "  1 | hash[:salt]",
    "  2 | plain",
    "  3 | hash[:salt]:plain",
@@ -676,35 +681,29 @@ const char *USAGE_BIG[] =
    " 14 | plain:hex_plain:crack_pos",
    " 15 | hash[:salt]:plain:hex_plain:crack_pos",
    "",
-  "##########################",
-  "## Rule Debugging Modes ##",
-  "##########################",
+  "- [ Rule Debugging Modes ] -",
    "",
    "  # | Format",
-  " ---+--------",
+  " ===+========",
    "  1 | Finding-Rule",
    "  2 | Original-Word",
    "  3 | Original-Word:Finding-Rule",
    "  4 | Original-Word:Finding-Rule:Processed-Word",
    "",
-  "##################",
-  "## Attack Modes ##",
-  "##################",
+  "- [ Attack Modes ] -",
    "",
    "  # | Mode",
-  " ---+------",
+  " ===+======",
    "  0 | Straight",
    "  1 | Combination",
    "  3 | Brute-force",
    "  6 | Hybrid Wordlist + Mask",
    "  7 | Hybrid Mask + Wordlist",
    "",
-  "#######################",
-  "## Built-in Charsets ##",
-  "#######################",
+  "- [ Built-in Charsets ] -",
    "",
    "  ? | Charset",
-  " ---+---------",
+  " ===+=========",
    "  l | abcdefghijklmnopqrstuvwxyz",
    "  u | ABCDEFGHIJKLMNOPQRSTUVWXYZ",
    "  d | 0123456789",
@@ -712,22 +711,18 @@ const char *USAGE_BIG[] =
    "  a | ?l?u?d?s",
    "  b | 0x00 - 0xff",
    "",
-  "#########################",
-  "## OpenCL Device Types ##",
-  "#########################",
+  "- [ OpenCL Device Types ] -",
    "",
    "  # | Device Type",
-  " ---+-------------",
+  " ===+=============",
    "  1 | CPU",
    "  2 | GPU",
    "  3 | FPGA, DSP, Co-Processor",
    "",
-  "#######################",
-  "## Workload Profiles ##",
-  "#######################",
+  "- [ Workload Profiles ] -",
    "",
    "  # | Performance | Runtime | Power Consumption | Desktop Impact",
-  " ---+-------------+---------+-------------------+----------------",
+  " ===+=============+=========+===================+=================",
    "  1 | Low         |   2 ms  | Low               | Minimal",
    "  2 | Default     |  12 ms  | Economic          | Noticeable",
    "  3 | High        |  96 ms  | High              | Unresponsive",
@@ -772,7 +767,7 @@ static double get_avg_exec_time (hc_device_param_t *device_param, const int last
    return exec_ms_sum / exec_ms_cnt;
  }
  
-void status_display_automat ()
+void status_display_machine_readable ()
  {
    FILE *out = stdout;
  
@@ -930,9 +925,9 @@ void status_display ()
    if (data.devices_status == STATUS_STARTING) return;
    if (data.devices_status == STATUS_BYPASS)   return;
  
-  if (data.status_automat == 1)
+  if (data.machine_readable == 1)
    {
-    status_display_automat ();
+    status_display_machine_readable ();
  
      return;
    }
@@ -1549,6 +1544,12 @@ void status_display ()
    }
  
    #ifdef HAVE_HWMON
+
+  if (data.devices_status == STATUS_EXHAUSTED)  return;
+  if (data.devices_status == STATUS_CRACKED)    return;
+  if (data.devices_status == STATUS_ABORTED)    return;
+  if (data.devices_status == STATUS_QUIT)       return;
+
    if (data.gpu_temp_disable == 0)
    {
      hc_thread_mutex_lock (mux_adl);
@@ -1559,46 +1560,84 @@ void status_display ()
  
        if (device_param->skipped) continue;
  
-      #define HM_STR_BUF_SIZE 255
+      const int num_temperature = hm_get_temperature_with_device_id (device_id);
+      const int num_fanspeed    = hm_get_fanspeed_with_device_id    (device_id);
+      const int num_utilization = hm_get_utilization_with_device_id (device_id);
+      const int num_corespeed   = hm_get_corespeed_with_device_id   (device_id);
+      const int num_memoryspeed = hm_get_memoryspeed_with_device_id (device_id);
+      const int num_buslanes    = hm_get_buslanes_with_device_id    (device_id);
+      const int num_throttle    = hm_get_throttle_with_device_id    (device_id);
+
+      char output_buf[256] = { 0 };
  
-      if (data.hm_device[device_id].fan_supported == 1)
+      int output_len = 0;
+
+      if (num_temperature >= 0)
        {
-        char utilization[HM_STR_BUF_SIZE] = { 0 };
-        char temperature[HM_STR_BUF_SIZE] = { 0 };
-        char fanspeed[HM_STR_BUF_SIZE] = { 0 };
+        snprintf (output_buf + output_len, sizeof (output_buf) - output_len, " Temp:%3uc", num_temperature);
  
-        hm_device_val_to_str ((char *) utilization, HM_STR_BUF_SIZE, "%", hm_get_utilization_with_device_id (device_id));
-        hm_device_val_to_str ((char *) temperature, HM_STR_BUF_SIZE, "c", hm_get_temperature_with_device_id (device_id));
+        output_len = strlen (output_buf);
+      }
  
-        if (device_param->vendor_id == VENDOR_ID_AMD)
-        {
-          hm_device_val_to_str ((char *) fanspeed, HM_STR_BUF_SIZE, "%", hm_get_fanspeed_with_device_id (device_id));
-        }
-        else if (device_param->vendor_id == VENDOR_ID_NV)
-        {
-          hm_device_val_to_str ((char *) fanspeed, HM_STR_BUF_SIZE, "%", hm_get_fanspeed_with_device_id (device_id));
-        }
+      if (num_fanspeed >= 0)
+      {
+        snprintf (output_buf + output_len, sizeof (output_buf) - output_len, " Fan:%3u%%", num_fanspeed);
  
-        log_info ("HWMon.GPU.#%d...: %s Util, %s Temp, %s Fan", device_id + 1, utilization, temperature, fanspeed);
+        output_len = strlen (output_buf);
        }
-      else
+
+      if (num_utilization >= 0)
+      {
+        snprintf (output_buf + output_len, sizeof (output_buf) - output_len, " Util:%3u%%", num_utilization);
+
+        output_len = strlen (output_buf);
+      }
+
+      if (num_corespeed >= 0)
+      {
+        snprintf (output_buf + output_len, sizeof (output_buf) - output_len, " Core:%4uMhz", num_corespeed);
+
+        output_len = strlen (output_buf);
+      }
+
+      if (num_memoryspeed >= 0)
+      {
+        snprintf (output_buf + output_len, sizeof (output_buf) - output_len, " Mem:%4uMhz", num_memoryspeed);
+
+        output_len = strlen (output_buf);
+      }
+
+      if (num_buslanes >= 0)
+      {
+        snprintf (output_buf + output_len, sizeof (output_buf) - output_len, " Lanes:%u", num_buslanes);
+
+        output_len = strlen (output_buf);
+      }
+
+      if (num_throttle == 1)
        {
-        char utilization[HM_STR_BUF_SIZE] = { 0 };
-        char temperature[HM_STR_BUF_SIZE] = { 0 };
+        snprintf (output_buf + output_len, sizeof (output_buf) - output_len, " *Throttled*");
+
+        output_len = strlen (output_buf);
+      }
  
-        hm_device_val_to_str ((char *) utilization, HM_STR_BUF_SIZE, "%", hm_get_utilization_with_device_id (device_id));
-        hm_device_val_to_str ((char *) temperature, HM_STR_BUF_SIZE, "c", hm_get_temperature_with_device_id (device_id));
+      if (output_len == 0)
+      {
+        snprintf (output_buf + output_len, sizeof (output_buf) - output_len, " N/A");
  
-        log_info ("HWMon.GPU.#%d...: %s Util, %s Temp, N/A Fan", device_id + 1, utilization, temperature);
+        output_len = strlen (output_buf);
        }
+
+      log_info ("HWMon.Dev.#%d...:%s", device_id + 1, output_buf);
      }
  
      hc_thread_mutex_unlock (mux_adl);
    }
+
    #endif // HAVE_HWMON
  }
  
-static void status_benchmark_automat ()
+static void status_benchmark_automate ()
  {
    u64    speed_cnt[DEVICES_MAX] = { 0 };
    double speed_ms[DEVICES_MAX]  = { 0 };
@@ -1645,9 +1684,9 @@ static void status_benchmark ()
    if (data.devices_status == STATUS_STARTING) return;
    if (data.devices_status == STATUS_BYPASS)   return;
  
-  if (data.status_automat == 1)
+  if (data.machine_readable == 1)
    {
-    status_benchmark_automat ();
+    status_benchmark_automate ();
  
      return;
    }
@@ -2116,6 +2155,7 @@ static void check_hash (hc_device_param_t *device_param, plain_t *plain)
  
        out_fp = stdout;
      }
+
      lock_file (out_fp);
    }
    else
@@ -2277,6 +2317,175 @@ static void check_cracked (hc_device_param_t *device_param, const uint salt_pos)
    }
  }
  
+static void process_stdout (hc_device_param_t *device_param, const uint pws_cnt)
+{
+  char out_buf[HCBUFSIZ] = { 0 };
+
+  uint plain_buf[16] = { 0 };
+
+  u8 *plain_ptr = (u8 *) plain_buf;
+
+  uint plain_len = 0;
+
+  const uint il_cnt = device_param->kernel_params_buf32[27]; // ugly, i know
+
+  if (data.attack_mode == ATTACK_MODE_STRAIGHT)
+  {
+    pw_t pw;
+
+    for (uint gidvid = 0; gidvid < pws_cnt; gidvid++)
+    {
+      gidd_to_pw_t (device_param, gidvid, &pw);
+
+      const uint pos = device_param->innerloop_pos;
+
+      for (uint il_pos = 0; il_pos < il_cnt; il_pos++)
+      {
+        for (int i = 0; i < 8; i++)
+        {
+          plain_buf[i] = pw.i[i];
+        }
+
+        plain_len = pw.pw_len;
+
+        plain_len = apply_rules (data.kernel_rules_buf[pos + il_pos].cmds, &plain_buf[0], &plain_buf[4], plain_len);
+
+        if (plain_len > data.pw_max) plain_len = data.pw_max;
+
+        format_output (stdout, out_buf, plain_ptr, plain_len, 0, NULL, 0);
+      }
+    }
+  }
+  else if (data.attack_mode == ATTACK_MODE_COMBI)
+  {
+    pw_t pw;
+
+    for (uint gidvid = 0; gidvid < pws_cnt; gidvid++)
+    {
+      gidd_to_pw_t (device_param, gidvid, &pw);
+
+      for (uint il_pos = 0; il_pos < il_cnt; il_pos++)
+      {
+        for (int i = 0; i < 8; i++)
+        {
+          plain_buf[i] = pw.i[i];
+        }
+
+        plain_len = pw.pw_len;
+
+        char *comb_buf = (char *) device_param->combs_buf[il_pos].i;
+        uint  comb_len =          device_param->combs_buf[il_pos].pw_len;
+
+        if (data.combs_mode == COMBINATOR_MODE_BASE_LEFT)
+        {
+          memcpy (plain_ptr + plain_len, comb_buf, comb_len);
+        }
+        else
+        {
+          memmove (plain_ptr + comb_len, plain_ptr, plain_len);
+
+          memcpy (plain_ptr, comb_buf, comb_len);
+        }
+
+        plain_len += comb_len;
+
+        if (data.pw_max != PW_DICTMAX1)
+        {
+          if (plain_len > data.pw_max) plain_len = data.pw_max;
+        }
+
+        format_output (stdout, out_buf, plain_ptr, plain_len, 0, NULL, 0);
+      }
+    }
+  }
+  else if (data.attack_mode == ATTACK_MODE_BF)
+  {
+    for (uint gidvid = 0; gidvid < pws_cnt; gidvid++)
+    {
+      for (uint il_pos = 0; il_pos < il_cnt; il_pos++)
+      {
+        u64 l_off = device_param->kernel_params_mp_l_buf64[3] + gidvid;
+        u64 r_off = device_param->kernel_params_mp_r_buf64[3] + il_pos;
+
+        uint l_start = device_param->kernel_params_mp_l_buf32[5];
+        uint r_start = device_param->kernel_params_mp_r_buf32[5];
+
+        uint l_stop = device_param->kernel_params_mp_l_buf32[4];
+        uint r_stop = device_param->kernel_params_mp_r_buf32[4];
+
+        sp_exec (l_off, (char *) plain_ptr + l_start, data.root_css_buf, data.markov_css_buf, l_start, l_start + l_stop);
+        sp_exec (r_off, (char *) plain_ptr + r_start, data.root_css_buf, data.markov_css_buf, r_start, r_start + r_stop);
+
+        plain_len = data.css_cnt;
+
+        format_output (stdout, out_buf, plain_ptr, plain_len, 0, NULL, 0);
+      }
+    }
+  }
+  else if (data.attack_mode == ATTACK_MODE_HYBRID1)
+  {
+    pw_t pw;
+
+    for (uint gidvid = 0; gidvid < pws_cnt; gidvid++)
+    {
+      gidd_to_pw_t (device_param, gidvid, &pw);
+
+      for (uint il_pos = 0; il_pos < il_cnt; il_pos++)
+      {
+        for (int i = 0; i < 8; i++)
+        {
+          plain_buf[i] = pw.i[i];
+        }
+
+        plain_len = pw.pw_len;
+
+        u64 off = device_param->kernel_params_mp_buf64[3] + il_pos;
+
+        uint start = 0;
+        uint stop  = device_param->kernel_params_mp_buf32[4];
+
+        sp_exec (off, (char *) plain_ptr + plain_len, data.root_css_buf, data.markov_css_buf, start, start + stop);
+
+        plain_len += start + stop;
+
+        format_output (stdout, out_buf, plain_ptr, plain_len, 0, NULL, 0);
+      }
+    }
+  }
+  else if (data.attack_mode == ATTACK_MODE_HYBRID2)
+  {
+    pw_t pw;
+
+    for (uint gidvid = 0; gidvid < pws_cnt; gidvid++)
+    {
+      gidd_to_pw_t (device_param, gidvid, &pw);
+
+      for (uint il_pos = 0; il_pos < il_cnt; il_pos++)
+      {
+        for (int i = 0; i < 8; i++)
+        {
+          plain_buf[i] = pw.i[i];
+        }
+
+        plain_len = pw.pw_len;
+
+        u64 off = device_param->kernel_params_mp_buf64[3] + il_pos;
+
+        uint start = 0;
+        uint stop  = device_param->kernel_params_mp_buf32[4];
+
+        memmove (plain_ptr + stop, plain_ptr, plain_len);
+
+        sp_exec (off, (char *) plain_ptr, data.root_css_buf, data.markov_css_buf, start, start + stop);
+
+        plain_len += start + stop;
+
+        format_output (stdout, out_buf, plain_ptr, plain_len, 0, NULL, 0);
+      }
+    }
+  }
+}
+
  static void save_hash ()
  {
    char *hashfile = data.hashfile;
@@ -2314,8 +2523,6 @@ static void save_hash ()
  
        if (data.hash_mode != 2500)
        {
-        char out_buf[HCBUFSIZ] = { 0 };
-
          if (data.username == 1)
          {
            user_t *user = data.hash_info[idx]->user;
@@ -2327,11 +2534,15 @@ static void save_hash ()
            fputc (separator, fp);
          }
  
+        char out_buf[HCBUFSIZ]; // scratch buffer
+
+        out_buf[0] = 0;
+
          ascii_digest (out_buf, salt_pos, digest_pos);
  
          fputs (out_buf, fp);
  
-        log_out (fp, "");
+        fputc ('\n', fp);
        }
        else
        {
@@ -2369,43 +2580,7 @@ static void save_hash ()
    unlink (old_hashfile);
  }
  
-static float find_kernel_power_div (const u64 total_left, const uint kernel_power_all)
-{
-  // function called only in case kernel_power_all > words_left
-
-  float kernel_power_div = (float) (total_left) / kernel_power_all;
-
-  kernel_power_div += kernel_power_div / 100;
-
-  u32 kernel_power_new = (u32) (kernel_power_all * kernel_power_div);
-
-  while (kernel_power_new < total_left)
-  {
-    kernel_power_div += kernel_power_div / 100;
-
-    kernel_power_new = (u32) (kernel_power_all * kernel_power_div);
-  }
-
-  if (data.quiet == 0)
-  {
-    clear_prompt ();
-
-    //log_info ("");
-
-    log_info ("INFO: approaching final keyspace, workload adjusted");
-    log_info ("");
-
-    fprintf (stdout, "%s", PROMPT);
-
-    fflush (stdout);
-  }
-
-  if ((kernel_power_all * kernel_power_div) < 8) return 1;
-
-  return kernel_power_div;
-}
-
-static void run_kernel (const uint kern_run, hc_device_param_t *device_param, const uint num, const uint event_update)
+static void run_kernel (const uint kern_run, hc_device_param_t *device_param, const uint num, const uint event_update, const uint iteration)
  {
    uint num_elements = num;
  
@@ -2468,21 +2643,50 @@ static void run_kernel (const uint kern_run, hc_device_param_t *device_param, co
  
    hc_clFlush (data.ocl, device_param->command_queue);
  
+  if (device_param->nvidia_spin_damp)
+  {
+    if (data.devices_status == STATUS_RUNNING)
+    {
+      if (iteration < EXPECTED_ITERATIONS)
+      {
+        switch (kern_run)
+        {
+          case KERN_RUN_1: if (device_param->exec_us_prev1[iteration]) usleep (device_param->exec_us_prev1[iteration] * device_param->nvidia_spin_damp); break;
+          case KERN_RUN_2: if (device_param->exec_us_prev2[iteration]) usleep (device_param->exec_us_prev2[iteration] * device_param->nvidia_spin_damp); break;
+          case KERN_RUN_3: if (device_param->exec_us_prev3[iteration]) usleep (device_param->exec_us_prev3[iteration] * device_param->nvidia_spin_damp); break;
+        }
+      }
+    }
+  }
+
    hc_clWaitForEvents (data.ocl, 1, &event);
  
-  if (event_update)
-  {
-    cl_ulong time_start;
-    cl_ulong time_end;
+  cl_ulong time_start;
+  cl_ulong time_end;
  
-    hc_clGetEventProfilingInfo (data.ocl, event, CL_PROFILING_COMMAND_START, sizeof (time_start), &time_start, NULL);
-    hc_clGetEventProfilingInfo (data.ocl, event, CL_PROFILING_COMMAND_END,   sizeof (time_end),   &time_end,   NULL);
+  hc_clGetEventProfilingInfo (data.ocl, event, CL_PROFILING_COMMAND_START, sizeof (time_start), &time_start, NULL);
+  hc_clGetEventProfilingInfo (data.ocl, event, CL_PROFILING_COMMAND_END,   sizeof (time_end),   &time_end,   NULL);
  
-    const double exec_time = (double) (time_end - time_start) / 1000000.0;
+  const double exec_us = (double) (time_end - time_start) / 1000;
+
+  if (data.devices_status == STATUS_RUNNING)
+  {
+    if (iteration < EXPECTED_ITERATIONS)
+    {
+      switch (kern_run)
+      {
+        case KERN_RUN_1: device_param->exec_us_prev1[iteration] = exec_us; break;
+        case KERN_RUN_2: device_param->exec_us_prev2[iteration] = exec_us; break;
+        case KERN_RUN_3: device_param->exec_us_prev3[iteration] = exec_us; break;
+      }
+    }
+  }
  
+  if (event_update)
+  {
      uint exec_pos = device_param->exec_pos;
  
-    device_param->exec_ms[exec_pos] = exec_time;
+    device_param->exec_ms[exec_pos] = exec_us / 1000;
  
      exec_pos++;
  
@@ -2609,11 +2813,59 @@ static void run_kernel_amp (hc_device_param_t *device_param, const uint num)
    hc_clFinish (data.ocl, device_param->command_queue);
  }
  
+static void run_kernel_memset (hc_device_param_t *device_param, cl_mem buf, const uint value, const uint num)
+{
+  const u32 num16d = num / 16;
+  const u32 num16m = num % 16;
+
+  if (num16d)
+  {
+    device_param->kernel_params_memset_buf32[1] = value;
+    device_param->kernel_params_memset_buf32[2] = num16d;
+
+    uint kernel_threads = device_param->kernel_threads;
+
+    uint num_elements = num16d;
+
+    while (num_elements % kernel_threads) num_elements++;
+
+    cl_kernel kernel = device_param->kernel_memset;
+
+    hc_clSetKernelArg (data.ocl, kernel, 0, sizeof (cl_mem),  (void *) &buf);
+    hc_clSetKernelArg (data.ocl, kernel, 1, sizeof (cl_uint), device_param->kernel_params_memset[1]);
+    hc_clSetKernelArg (data.ocl, kernel, 2, sizeof (cl_uint), device_param->kernel_params_memset[2]);
+
+    const size_t global_work_size[3] = { num_elements,   1, 1 };
+    const size_t local_work_size[3]  = { kernel_threads, 1, 1 };
+
+    hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
+
+    hc_clFlush (data.ocl, device_param->command_queue);
+
+    hc_clFinish (data.ocl, device_param->command_queue);
+  }
+
+  if (num16m)
+  {
+    u32 tmp[4];
+
+    tmp[0] = value;
+    tmp[1] = value;
+    tmp[2] = value;
+    tmp[3] = value;
+
+    hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, buf, CL_TRUE, num16d * 16, num16m, tmp, 0, NULL, NULL);
+  }
+}
+
  static void run_kernel_bzero (hc_device_param_t *device_param, cl_mem buf, const size_t size)
  {
+  run_kernel_memset (device_param, buf, 0, size);
+
+  /*
    int rc = -1;
  
-  if (device_param->opencl_v12 && device_param->vendor_id == VENDOR_ID_AMD)
+  if (device_param->opencl_v12 && device_param->platform_vendor_id == VENDOR_ID_AMD)
    {
      // So far tested, amd is the only supporting this OpenCL 1.2 function without segfaulting
  
@@ -2644,10 +2896,18 @@ static void run_kernel_bzero (hc_device_param_t *device_param, cl_mem buf, const
  
      myfree (tmp);
    }
+  */
  }
  
-static void choose_kernel (hc_device_param_t *device_param, const uint attack_exec, const uint attack_mode, const uint opts_type, const salt_t *salt_buf, const uint highest_pw_len, const uint pws_cnt)
+static void choose_kernel (hc_device_param_t *device_param, const uint attack_exec, const uint attack_mode, const uint opts_type, const salt_t *salt_buf, const uint highest_pw_len, const uint pws_cnt, const uint fast_iteration)
  {
+  if (data.hash_mode == 2000)
+  {
+    process_stdout (device_param, pws_cnt);
+
+    return;
+  }
+
    if (attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
    {
      if (attack_mode == ATTACK_MODE_BF)
@@ -2666,33 +2926,39 @@ static void choose_kernel (hc_device_param_t *device_param, const uint attack_ex
  
      if (highest_pw_len < 16)
      {
-      run_kernel (KERN_RUN_1, device_param, pws_cnt, true);
+      run_kernel (KERN_RUN_1, device_param, pws_cnt, true, fast_iteration);
      }
      else if (highest_pw_len < 32)
      {
-      run_kernel (KERN_RUN_2, device_param, pws_cnt, true);
+      run_kernel (KERN_RUN_2, device_param, pws_cnt, true, fast_iteration);
      }
      else
      {
-      run_kernel (KERN_RUN_3, device_param, pws_cnt, true);
+      run_kernel (KERN_RUN_3, device_param, pws_cnt, true, fast_iteration);
      }
    }
    else
    {
      run_kernel_amp (device_param, pws_cnt);
  
-    run_kernel (KERN_RUN_1, device_param, pws_cnt, false);
+    run_kernel (KERN_RUN_1, device_param, pws_cnt, false, 0);
  
      if (opts_type & OPTS_TYPE_HOOK12)
      {
-      run_kernel (KERN_RUN_12, device_param, pws_cnt, false);
+      run_kernel (KERN_RUN_12, device_param, pws_cnt, false, 0);
+
+      hc_clEnqueueReadBuffer (data.ocl, device_param->command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
+
+      // do something with data
+
+      hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
      }
  
      uint iter = salt_buf->salt_iter;
  
      uint loop_step = device_param->kernel_loops;
  
-    for (uint loop_pos = 0; loop_pos < iter; loop_pos += loop_step)
+    for (uint loop_pos = 0, slow_iteration = 0; loop_pos < iter; loop_pos += loop_step, slow_iteration++)
      {
        uint loop_left = iter - loop_pos;
  
@@ -2701,7 +2967,7 @@ static void choose_kernel (hc_device_param_t *device_param, const uint attack_ex
        device_param->kernel_params_buf32[25] = loop_pos;
        device_param->kernel_params_buf32[26] = loop_left;
  
-      run_kernel (KERN_RUN_2, device_param, pws_cnt, true);
+      run_kernel (KERN_RUN_2, device_param, pws_cnt, true, slow_iteration);
  
        if (data.devices_status == STATUS_CRACKED) break;
        if (data.devices_status == STATUS_ABORTED) break;
@@ -2733,7 +2999,7 @@ static void choose_kernel (hc_device_param_t *device_param, const uint attack_ex
  
      if (opts_type & OPTS_TYPE_HOOK23)
      {
-      run_kernel (KERN_RUN_23, device_param, pws_cnt, false);
+      run_kernel (KERN_RUN_23, device_param, pws_cnt, false, 0);
  
        hc_clEnqueueReadBuffer (data.ocl, device_param->command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
  
@@ -2742,7 +3008,7 @@ static void choose_kernel (hc_device_param_t *device_param, const uint attack_ex
        hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
      }
  
-    run_kernel (KERN_RUN_3, device_param, pws_cnt, false);
+    run_kernel (KERN_RUN_3, device_param, pws_cnt, false, 0);
    }
  }
  
@@ -2836,7 +3102,7 @@ static void run_copy (hc_device_param_t *device_param, const uint pws_cnt)
  
  static double try_run (hc_device_param_t *device_param, const u32 kernel_accel, const u32 kernel_loops)
  {
-  const u32 kernel_power = device_param->device_processors * device_param->kernel_threads * kernel_accel;
+  const u32 kernel_power_try = device_param->device_processors * device_param->kernel_threads * kernel_accel;
  
    device_param->kernel_params_buf32[25] = 0;
    device_param->kernel_params_buf32[26] = kernel_loops; // not a bug, both need to be set
@@ -2844,11 +3110,11 @@ static double try_run (hc_device_param_t *device_param, const u32 kernel_accel,
  
    if (data.attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
    {
-    run_kernel (KERN_RUN_1, device_param, kernel_power, true);
+    run_kernel (KERN_RUN_1, device_param, kernel_power_try, true, 0);
    }
    else
    {
-    run_kernel (KERN_RUN_2, device_param, kernel_power, true);
+    run_kernel (KERN_RUN_2, device_param, kernel_power_try, true, 0);
    }
  
    const double exec_ms_prev = get_avg_exec_time (device_param, 1);
@@ -2875,10 +3141,13 @@ static void autotune (hc_device_param_t *device_param)
  
    if ((kernel_loops_min == kernel_loops_max) && (kernel_accel_min == kernel_accel_max))
    {
-    try_run (device_param, kernel_accel, kernel_loops);
-    try_run (device_param, kernel_accel, kernel_loops);
-    try_run (device_param, kernel_accel, kernel_loops);
-    try_run (device_param, kernel_accel, kernel_loops);
+    if (data.hash_mode != 2000)
+    {
+      try_run (device_param, kernel_accel, kernel_loops);
+      try_run (device_param, kernel_accel, kernel_loops);
+      try_run (device_param, kernel_accel, kernel_loops);
+      try_run (device_param, kernel_accel, kernel_loops);
+    }
  
      device_param->kernel_accel = kernel_accel;
      device_param->kernel_loops = kernel_loops;
@@ -2895,16 +3164,30 @@ static void autotune (hc_device_param_t *device_param)
  
    const u32 kernel_power_max = device_param->device_processors * device_param->kernel_threads * kernel_accel_max;
  
-  for (u32 i = 0; i < kernel_power_max; i++)
+  if (data.attack_kern == ATTACK_KERN_BF)
    {
-    device_param->pws_buf[i].i[0]   = i;
-    device_param->pws_buf[i].i[1]   = 0x01234567;
-    device_param->pws_buf[i].pw_len = 7;
+    run_kernel_memset (device_param, device_param->d_pws_buf, 7, kernel_power_max * sizeof (pw_t));
    }
+  else
+  {
+    for (u32 i = 0; i < kernel_power_max; i++)
+    {
+      device_param->pws_buf[i].i[0]   = i;
+      device_param->pws_buf[i].i[1]   = 0x01234567;
+      device_param->pws_buf[i].pw_len = 7 + (i & 7);
+    }
  
-  hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, kernel_power_max * sizeof (pw_t), device_param->pws_buf, 0, NULL, NULL);
+    hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, kernel_power_max * sizeof (pw_t), device_param->pws_buf, 0, NULL, NULL);
+  }
  
-  if (data.attack_exec == ATTACK_EXEC_OUTSIDE_KERNEL)
+  if (data.attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+  {
+    if (data.kernel_rules_cnt > 1)
+    {
+      hc_clEnqueueCopyBuffer (data.ocl, device_param->command_queue, device_param->d_rules, device_param->d_rules_c, 0, 0, MIN (kernel_loops_max, KERNEL_RULES) * sizeof (kernel_rule_t), 0, NULL, NULL);
+    }
+  }
+  else
    {
      run_kernel_amp (device_param, kernel_power_max);
    }
@@ -3030,10 +3313,19 @@ static void autotune (hc_device_param_t *device_param)
  
    // reset them fake words
  
+  /*
    memset (device_param->pws_buf, 0, kernel_power_max * sizeof (pw_t));
  
    hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_pws_buf,     CL_TRUE, 0, kernel_power_max * sizeof (pw_t), device_param->pws_buf, 0, NULL, NULL);
    hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_pws_amp_buf, CL_TRUE, 0, kernel_power_max * sizeof (pw_t), device_param->pws_buf, 0, NULL, NULL);
+  */
+
+  run_kernel_memset (device_param, device_param->d_pws_buf, 0, kernel_power_max * sizeof (pw_t));
+
+  if (data.attack_exec == ATTACK_EXEC_OUTSIDE_KERNEL)
+  {
+    run_kernel_memset (device_param, device_param->d_pws_amp_buf, 0, kernel_power_max * sizeof (pw_t));
+  }
  
    // reset timer
  
@@ -3041,6 +3333,10 @@ static void autotune (hc_device_param_t *device_param)
  
    memset (device_param->exec_ms, 0, EXEC_CACHE * sizeof (double));
  
+  memset (device_param->exec_us_prev1, 0, EXPECTED_ITERATIONS * sizeof (double));
+  memset (device_param->exec_us_prev2, 0, EXPECTED_ITERATIONS * sizeof (double));
+  memset (device_param->exec_us_prev3, 0, EXPECTED_ITERATIONS * sizeof (double));
+
    // store
  
    device_param->kernel_accel = kernel_accel;
@@ -3158,9 +3454,16 @@ static void run_cracker (hc_device_param_t *device_param, const uint pws_cnt)
        if (data.devices_status == STATUS_QUIT)    break;
        if (data.devices_status == STATUS_BYPASS)  break;
  
+      uint fast_iteration = 0;
+
        uint innerloop_left = innerloop_cnt - innerloop_pos;
  
-      if (innerloop_left > innerloop_step) innerloop_left = innerloop_step;
+      if (innerloop_left > innerloop_step)
+      {
+        innerloop_left = innerloop_step;
+
+        fast_iteration = 1;
+      }
  
        device_param->innerloop_pos  = innerloop_pos;
        device_param->innerloop_left = innerloop_left;
@@ -3318,7 +3621,7 @@ static void run_cracker (hc_device_param_t *device_param, const uint pws_cnt)
          hc_timer_set (&device_param->timer_speed);
        }
  
-      choose_kernel (device_param, data.attack_exec, data.attack_mode, data.opts_type, salt_buf, highest_pw_len, pws_cnt);
+      choose_kernel (device_param, data.attack_exec, data.attack_mode, data.opts_type, salt_buf, highest_pw_len, pws_cnt, fast_iteration);
  
        if (data.devices_status == STATUS_STOP_AT_CHECKPOINT) check_checkpoint ();
  
@@ -3330,7 +3633,10 @@ static void run_cracker (hc_device_param_t *device_param, const uint pws_cnt)
         * result
         */
  
-      check_cracked (device_param, salt_pos);
+      if (data.benchmark == 0)
+      {
+        check_cracked (device_param, salt_pos);
+      }
  
        /**
         * progress
@@ -3724,9 +4030,11 @@ static void *thread_monitor (void *p)
    uint status_left  = data.status_timer;
  
    #ifdef HAVE_HWMON
-  uint hwmon_check   = 0;
+  uint hwmon_check = 0;
+
+  int slowdown_warnings = 0;
  
-  // these variables are mainly used for fan control (AMD only)
+  // these variables are mainly used for fan control
  
    int *fan_speed_chgd = (int *) mycalloc (data.devices_cnt, sizeof (int));
  
@@ -3735,12 +4043,10 @@ static void *thread_monitor (void *p)
    int *temp_diff_old = (int *) mycalloc (data.devices_cnt, sizeof (int));
    int *temp_diff_sum = (int *) mycalloc (data.devices_cnt, sizeof (int));
  
-  #ifdef HAVE_ADL
    int temp_threshold = 1; // degrees celcius
  
    int fan_speed_min =  15; // in percentage
    int fan_speed_max = 100;
-  #endif // HAVE_ADL
  
    time_t last_temp_check_time;
    #endif // HAVE_HWMON
@@ -3791,11 +4097,68 @@ static void *thread_monitor (void *p)
      if (data.devices_status != STATUS_RUNNING) continue;
  
      #ifdef HAVE_HWMON
+
      if (hwmon_check == 1)
      {
        hc_thread_mutex_lock (mux_adl);
  
-      time_t temp_check_time;
+      for (uint device_id = 0; device_id < data.devices_cnt; device_id++)
+      {
+        hc_device_param_t *device_param = &data.devices_param[device_id];
+
+        if (device_param->skipped) continue;
+
+        if (device_param->device_vendor_id == VENDOR_ID_NV)
+        {
+          if (data.hm_nvapi)
+          {
+            NV_GPU_PERF_POLICIES_INFO_PARAMS_V1   perfPolicies_info   = { 0 };
+            NV_GPU_PERF_POLICIES_STATUS_PARAMS_V1 perfPolicies_status = { 0 };
+
+            perfPolicies_info.version   = MAKE_NVAPI_VERSION (NV_GPU_PERF_POLICIES_INFO_PARAMS_V1, 1);
+            perfPolicies_status.version = MAKE_NVAPI_VERSION (NV_GPU_PERF_POLICIES_STATUS_PARAMS_V1, 1);
+
+            hm_NvAPI_GPU_GetPerfPoliciesInfo (data.hm_nvapi, data.hm_device[device_id].nvapi, &perfPolicies_info);
+
+            perfPolicies_status.info_value = perfPolicies_info.info_value;
+
+            hm_NvAPI_GPU_GetPerfPoliciesStatus (data.hm_nvapi, data.hm_device[device_id].nvapi, &perfPolicies_status);
+
+            if (perfPolicies_status.throttle & 2)
+            {
+              if (slowdown_warnings < 3)
+              {
+                if (data.quiet == 0) clear_prompt ();
+
+                log_info ("WARNING: Drivers temperature threshold hit on GPU #%d, expect performance to drop...", device_id + 1);
+
+                if (slowdown_warnings == 2)
+                {
+                  log_info ("");
+                }
+
+                if (data.quiet == 0) fprintf (stdout, "%s", PROMPT);
+                if (data.quiet == 0) fflush (stdout);
+
+                slowdown_warnings++;
+              }
+            }
+            else
+            {
+              slowdown_warnings = 0;
+            }
+          }
+        }
+      }
+
+      hc_thread_mutex_unlock (mux_adl);
+    }
+
+    if (hwmon_check == 1)
+    {
+      hc_thread_mutex_lock (mux_adl);
+
+      time_t temp_check_time;
  
        time (&temp_check_time);
  
@@ -3822,12 +4185,11 @@ static void *thread_monitor (void *p)
            break;
          }
  
-        #ifdef HAVE_ADL
          const int gpu_temp_retain = data.gpu_temp_retain;
  
-        if (gpu_temp_retain) // VENDOR_ID_AMD implied
+        if (gpu_temp_retain)
          {
-          if (data.hm_device[device_id].fan_supported == 1)
+          if (data.hm_device[device_id].fan_set_supported == 1)
            {
              int temp_cur = temperature;
  
@@ -3867,7 +4229,20 @@ static void *thread_monitor (void *p)
  
                  if ((freely_change_fan_speed == 1) || (fan_speed_must_change == 1))
                  {
-                  hm_set_fanspeed_with_device_id_amd (device_id, fan_speed_new);
+                  if (device_param->device_vendor_id == VENDOR_ID_AMD)
+                  {
+                    hm_set_fanspeed_with_device_id_adl (device_id, fan_speed_new, 1);
+                  }
+                  else if (device_param->device_vendor_id == VENDOR_ID_NV)
+                  {
+                    #ifdef WIN
+                    hm_set_fanspeed_with_device_id_nvapi (device_id, fan_speed_new, 1);
+                    #endif
+
+                    #ifdef LINUX
+                    hm_set_fanspeed_with_device_id_xnvctrl (device_id, fan_speed_new);
+                    #endif
+                  }
  
                    fan_speed_chgd[device_id] = 1;
                  }
@@ -3877,7 +4252,6 @@ static void *thread_monitor (void *p)
              }
            }
          }
-        #endif // HAVE_ADL
        }
  
        hc_thread_mutex_unlock (mux_adl);
@@ -4267,8 +4641,8 @@ static void *thread_outfile_remove (void *p)
  
  static void pw_add (hc_device_param_t *device_param, const u8 *pw_buf, const int pw_len)
  {
-  if (device_param->pws_cnt < device_param->kernel_power)
-  {
+  //if (device_param->pws_cnt < device_param->kernel_power)
+  //{
      pw_t *pw = (pw_t *) device_param->pws_buf + device_param->pws_cnt;
  
      u8 *ptr = (u8 *) pw->i;
@@ -4280,16 +4654,55 @@ static void pw_add (hc_device_param_t *device_param, const u8 *pw_buf, const int
      pw->pw_len = pw_len;
  
      device_param->pws_cnt++;
+  //}
+  //else
+  //{
+  //  fprintf (stderr, "BUG pw_add()!!\n");
+  //
+  //  return;
+  //}
+}
+
+static void set_kernel_power_final (const u64 kernel_power_final)
+{
+  if (data.quiet == 0)
+  {
+    clear_prompt ();
+
+    //log_info ("");
+
+    log_info ("INFO: approaching final keyspace, workload adjusted");
+    log_info ("");
+
+    fprintf (stdout, "%s", PROMPT);
+
+    fflush (stdout);
    }
-  else
+
+  data.kernel_power_final = kernel_power_final;
+}
+
+static u32 get_power (hc_device_param_t *device_param)
+{
+  const u64 kernel_power_final = data.kernel_power_final;
+
+  if (kernel_power_final)
    {
-    fprintf (stderr, "BUG pw_add()!!\n");
+    const double device_factor = (double) device_param->hardware_power / data.hardware_power_all;
  
-    return;
+    const u64 words_left_device = CEIL ((double) kernel_power_final * device_factor);
+
+    // work should be at least the hardware power available without any accelerator
+
+    const u64 work = MAX (words_left_device, device_param->hardware_power);
+
+    return work;
    }
+
+  return device_param->kernel_power;
  }
  
-static uint get_work (hc_device_param_t *device_param, const u64 max, const bool allow_div)
+static uint get_work (hc_device_param_t *device_param, const u64 max)
  {
    hc_thread_mutex_lock (mux_dispatcher);
  
@@ -4298,33 +4711,19 @@ static uint get_work (hc_device_param_t *device_param, const u64 max, const bool
  
    device_param->words_off = words_cur;
  
+  const u64 kernel_power_all = data.kernel_power_all;
+
    const u64 words_left = words_base - words_cur;
  
-  if (allow_div)
+  if (words_left < kernel_power_all)
    {
-    if (data.kernel_power_all > words_left)
-    {
-      if (data.kernel_power_div == 0)
-      {
-        data.kernel_power_div = find_kernel_power_div (words_left, data.kernel_power_all);
-      }
-    }
-
-    if (data.kernel_power_div)
+    if (data.kernel_power_final == 0)
      {
-      if (device_param->kernel_power == device_param->kernel_power_user)
-      {
-        const u32 kernel_power_new = (float) device_param->kernel_power * data.kernel_power_div;
-
-        if (kernel_power_new < device_param->kernel_power)
-        {
-          device_param->kernel_power = kernel_power_new;
-        }
-      }
+      set_kernel_power_final (words_left);
      }
    }
  
-  const uint kernel_power = device_param->kernel_power;
+  const u32 kernel_power = get_power (device_param);
  
    uint work = MIN (words_left, kernel_power);
  
@@ -4337,7 +4736,7 @@ static uint get_work (hc_device_param_t *device_param, const u64 max, const bool
    return work;
  }
  
-static void *thread_calc_stdin (void *p)
+static void *thread_autotune (void *p)
  {
    hc_device_param_t *device_param = (hc_device_param_t *) p;
  
@@ -4345,12 +4744,19 @@ static void *thread_calc_stdin (void *p)
  
    autotune (device_param);
  
+  return NULL;
+}
+
+static void *thread_calc_stdin (void *p)
+{
+  hc_device_param_t *device_param = (hc_device_param_t *) p;
+
+  if (device_param->skipped) return NULL;
+
    char *buf = (char *) mymalloc (HCBUFSIZ);
  
    const uint attack_kern = data.attack_kern;
  
-  const uint kernel_power = device_param->kernel_power;
-
    while ((data.devices_status != STATUS_EXHAUSTED) && (data.devices_status != STATUS_CRACKED) && (data.devices_status != STATUS_ABORTED) && (data.devices_status != STATUS_QUIT))
    {
      hc_thread_mutex_lock (mux_dispatcher);
@@ -4364,7 +4770,7 @@ static void *thread_calc_stdin (void *p)
  
      uint words_cur = 0;
  
-    while (words_cur < kernel_power)
+    while (words_cur < device_param->kernel_power)
      {
        char *line_buf = fgets (buf, HCBUFSIZ - 1, stdin);
  
@@ -4398,6 +4804,8 @@ static void *thread_calc_stdin (void *p)
          continue;
        }
  
+      // hmm that's always the case, or?
+
        if (attack_kern == ATTACK_KERN_STRAIGHT)
        {
          if ((line_len < data.pw_min) || (line_len > data.pw_max))
@@ -4414,25 +4822,6 @@ static void *thread_calc_stdin (void *p)
            continue;
          }
        }
-      else if (attack_kern == ATTACK_KERN_COMBI)
-      {
-        // do not check if minimum restriction is satisfied (line_len >= data.pw_min) here
-        // since we still need to combine the plains
-
-        if (line_len > data.pw_max)
-        {
-          hc_thread_mutex_lock (mux_counter);
-
-          for (uint salt_pos = 0; salt_pos < data.salts_cnt; salt_pos++)
-          {
-            data.words_progress_rejected[salt_pos] += data.combs_cnt;
-          }
-
-          hc_thread_mutex_unlock (mux_counter);
-
-          continue;
-        }
-      }
  
        pw_add (device_param, (u8 *) line_buf, line_len);
  
@@ -4491,8 +4880,6 @@ static void *thread_calc (void *p)
  
    if (device_param->skipped) return NULL;
  
-  autotune (device_param);
-
    const uint attack_mode = data.attack_mode;
    const uint attack_kern = data.attack_kern;
  
@@ -4500,7 +4887,7 @@ static void *thread_calc (void *p)
    {
      while ((data.devices_status != STATUS_EXHAUSTED) && (data.devices_status != STATUS_CRACKED) && (data.devices_status != STATUS_ABORTED) && (data.devices_status != STATUS_QUIT))
      {
-      const uint work = get_work (device_param, -1, true);
+      const uint work = get_work (device_param, -1);
  
        if (work == 0) break;
  
@@ -4615,18 +5002,16 @@ static void *thread_calc (void *p)
        u64 words_off = 0;
        u64 words_fin = 0;
  
-      bool allow_div = true;
-
        u64 max = -1;
  
        while (max)
        {
-        const uint work = get_work (device_param, max, allow_div);
-
-        allow_div = false;
+        const uint work = get_work (device_param, max);
  
          if (work == 0) break;
  
+        max = 0;
+
          words_off = device_param->words_off;
          words_fin = words_off + work;
  
@@ -4635,8 +5020,6 @@ static void *thread_calc (void *p)
  
          for ( ; words_cur < words_off; words_cur++) get_next_word (wl_data, fd, &line_buf, &line_len);
  
-        max = 0;
-
          for ( ; words_cur < words_fin; words_cur++)
          {
            get_next_word (wl_data, fd, &line_buf, &line_len);
@@ -4817,11 +5200,11 @@ static void weak_hash_check (hc_device_param_t *device_param, const uint salt_po
  
    if (data.attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
    {
-    run_kernel (KERN_RUN_1, device_param, 1, false);
+    run_kernel (KERN_RUN_1, device_param, 1, false, 0);
    }
    else
    {
-    run_kernel (KERN_RUN_1, device_param, 1, false);
+    run_kernel (KERN_RUN_1, device_param, 1, false, 0);
  
      uint loop_step = 16;
  
@@ -4836,10 +5219,10 @@ static void weak_hash_check (hc_device_param_t *device_param, const uint salt_po
        device_param->kernel_params_buf32[25] = loop_pos;
        device_param->kernel_params_buf32[26] = loop_left;
  
-      run_kernel (KERN_RUN_2, device_param, 1, false);
+      run_kernel (KERN_RUN_2, device_param, 1, false, 0);
      }
  
-    run_kernel (KERN_RUN_3, device_param, 1, false);
+    run_kernel (KERN_RUN_3, device_param, 1, false, 0);
    }
  
    /**
@@ -5222,8 +5605,8 @@ static uint hlfmt_detect (FILE *fp, uint max_check)
  
  // wrapper around mymalloc for ADL
  
-#if defined(HAVE_HWMON) && defined(HAVE_ADL)
-void *__stdcall ADL_Main_Memory_Alloc (const int iSize)
+#if defined(HAVE_HWMON)
+void *HC_API_CALL ADL_Main_Memory_Alloc (const int iSize)
  {
    return mymalloc (iSize);
  }
@@ -5245,8 +5628,6 @@ static uint generate_bitmaps (const uint digests_cnt, const uint dgst_size, cons
  
    for (uint i = 0; i < digests_cnt; i++)
    {
-    if (data.digests_shown[i] == 1) continue; // can happen with potfile
-
      uint *digest_ptr = (uint *) digests_buf_ptr;
  
      digests_buf_ptr += dgst_size;
@@ -5281,8 +5662,38 @@ static uint generate_bitmaps (const uint digests_cnt, const uint dgst_size, cons
   * main
   */
  
+#ifdef WIN
+void SetConsoleWindowSize (const int x)
+{
+  HANDLE h = GetStdHandle (STD_OUTPUT_HANDLE);
+
+  if (h == INVALID_HANDLE_VALUE) return;
+
+  CONSOLE_SCREEN_BUFFER_INFO bufferInfo;
+
+  if (!GetConsoleScreenBufferInfo (h, &bufferInfo)) return;
+
+  SMALL_RECT *sr = &bufferInfo.srWindow;
+
+  sr->Right = MAX (sr->Right, x - 1);
+
+  COORD co;
+
+  co.X = sr->Right + 1;
+  co.Y = 9999;
+
+  if (!SetConsoleScreenBufferSize (h, co)) return;
+
+  if (!SetConsoleWindowInfo (h, TRUE, sr)) return;
+}
+#endif
+
  int main (int argc, char **argv)
  {
+  #ifdef WIN
+  SetConsoleWindowSize (132);
+  #endif
+
    /**
     * To help users a bit
     */
@@ -5344,87 +5755,87 @@ int main (int argc, char **argv)
     * commandline parameters
     */
  
-  uint  usage                 = USAGE;
-  uint  version               = VERSION;
-  uint  quiet                 = QUIET;
-  uint  benchmark             = BENCHMARK;
-  uint  show                  = SHOW;
-  uint  left                  = LEFT;
-  uint  username              = USERNAME;
-  uint  remove                = REMOVE;
-  uint  remove_timer          = REMOVE_TIMER;
-  u64   skip                  = SKIP;
-  u64   limit                 = LIMIT;
-  uint  keyspace              = KEYSPACE;
-  uint  potfile_disable       = POTFILE_DISABLE;
-  char *potfile_path          = NULL;
-  uint  debug_mode            = DEBUG_MODE;
-  char *debug_file            = NULL;
-  char *induction_dir         = NULL;
-  char *outfile_check_dir     = NULL;
-  uint  force                 = FORCE;
-  uint  runtime               = RUNTIME;
-  uint  hash_mode             = HASH_MODE;
-  uint  attack_mode           = ATTACK_MODE;
-  uint  markov_disable        = MARKOV_DISABLE;
-  uint  markov_classic        = MARKOV_CLASSIC;
-  uint  markov_threshold      = MARKOV_THRESHOLD;
-  char *markov_hcstat         = NULL;
-  char *outfile               = NULL;
-  uint  outfile_format        = OUTFILE_FORMAT;
-  uint  outfile_autohex       = OUTFILE_AUTOHEX;
-  uint  outfile_check_timer   = OUTFILE_CHECK_TIMER;
-  uint  restore               = RESTORE;
-  uint  restore_timer         = RESTORE_TIMER;
-  uint  restore_disable       = RESTORE_DISABLE;
-  uint  status                = STATUS;
-  uint  status_timer          = STATUS_TIMER;
-  uint  status_automat        = STATUS_AUTOMAT;
-  uint  loopback              = LOOPBACK;
-  uint  weak_hash_threshold   = WEAK_HASH_THRESHOLD;
-  char *session               = NULL;
-  uint  hex_charset           = HEX_CHARSET;
-  uint  hex_salt              = HEX_SALT;
-  uint  hex_wordlist          = HEX_WORDLIST;
-  uint  rp_gen                = RP_GEN;
-  uint  rp_gen_func_min       = RP_GEN_FUNC_MIN;
-  uint  rp_gen_func_max       = RP_GEN_FUNC_MAX;
-  uint  rp_gen_seed           = RP_GEN_SEED;
-  char *rule_buf_l            = (char *) RULE_BUF_L;
-  char *rule_buf_r            = (char *) RULE_BUF_R;
-  uint  increment             = INCREMENT;
-  uint  increment_min         = INCREMENT_MIN;
-  uint  increment_max         = INCREMENT_MAX;
-  char *cpu_affinity          = NULL;
-  OCL_PTR *ocl                = NULL;
-  char *opencl_devices        = NULL;
-  char *opencl_platforms      = NULL;
-  char *opencl_device_types   = NULL;
-  uint  opencl_vector_width   = OPENCL_VECTOR_WIDTH;
-  char *truecrypt_keyfiles    = NULL;
-  char *veracrypt_keyfiles    = NULL;
-  uint  veracrypt_pim         = 0;
-  uint  workload_profile      = WORKLOAD_PROFILE;
-  uint  kernel_accel          = KERNEL_ACCEL;
-  uint  kernel_loops          = KERNEL_LOOPS;
-  uint  gpu_temp_disable      = GPU_TEMP_DISABLE;
+  uint  usage                     = USAGE;
+  uint  version                   = VERSION;
+  uint  quiet                     = QUIET;
+  uint  benchmark                 = BENCHMARK;
+  uint  stdout_flag               = STDOUT_FLAG;
+  uint  show                      = SHOW;
+  uint  left                      = LEFT;
+  uint  username                  = USERNAME;
+  uint  remove                    = REMOVE;
+  uint  remove_timer              = REMOVE_TIMER;
+  u64   skip                      = SKIP;
+  u64   limit                     = LIMIT;
+  uint  keyspace                  = KEYSPACE;
+  uint  potfile_disable           = POTFILE_DISABLE;
+  char *potfile_path              = NULL;
+  uint  debug_mode                = DEBUG_MODE;
+  char *debug_file                = NULL;
+  char *induction_dir             = NULL;
+  char *outfile_check_dir         = NULL;
+  uint  force                     = FORCE;
+  uint  runtime                   = RUNTIME;
+  uint  hash_mode                 = HASH_MODE;
+  uint  attack_mode               = ATTACK_MODE;
+  uint  markov_disable            = MARKOV_DISABLE;
+  uint  markov_classic            = MARKOV_CLASSIC;
+  uint  markov_threshold          = MARKOV_THRESHOLD;
+  char *markov_hcstat             = NULL;
+  char *outfile                   = NULL;
+  uint  outfile_format            = OUTFILE_FORMAT;
+  uint  outfile_autohex           = OUTFILE_AUTOHEX;
+  uint  outfile_check_timer       = OUTFILE_CHECK_TIMER;
+  uint  restore                   = RESTORE;
+  uint  restore_timer             = RESTORE_TIMER;
+  uint  restore_disable           = RESTORE_DISABLE;
+  uint  status                    = STATUS;
+  uint  status_timer              = STATUS_TIMER;
+  uint  machine_readable          = MACHINE_READABLE;
+  uint  loopback                  = LOOPBACK;
+  uint  weak_hash_threshold       = WEAK_HASH_THRESHOLD;
+  char *session                   = NULL;
+  uint  hex_charset               = HEX_CHARSET;
+  uint  hex_salt                  = HEX_SALT;
+  uint  hex_wordlist              = HEX_WORDLIST;
+  uint  rp_gen                    = RP_GEN;
+  uint  rp_gen_func_min           = RP_GEN_FUNC_MIN;
+  uint  rp_gen_func_max           = RP_GEN_FUNC_MAX;
+  uint  rp_gen_seed               = RP_GEN_SEED;
+  char *rule_buf_l                = (char *) RULE_BUF_L;
+  char *rule_buf_r                = (char *) RULE_BUF_R;
+  uint  increment                 = INCREMENT;
+  uint  increment_min             = INCREMENT_MIN;
+  uint  increment_max             = INCREMENT_MAX;
+  char *cpu_affinity              = NULL;
+  OCL_PTR *ocl                    = NULL;
+  char *opencl_devices            = NULL;
+  char *opencl_platforms          = NULL;
+  char *opencl_device_types       = NULL;
+  uint  opencl_vector_width       = OPENCL_VECTOR_WIDTH;
+  char *truecrypt_keyfiles        = NULL;
+  char *veracrypt_keyfiles        = NULL;
+  uint  veracrypt_pim             = 0;
+  uint  workload_profile          = WORKLOAD_PROFILE;
+  uint  kernel_accel              = KERNEL_ACCEL;
+  uint  kernel_loops              = KERNEL_LOOPS;
+  uint  nvidia_spin_damp          = NVIDIA_SPIN_DAMP;
+  uint  gpu_temp_disable          = GPU_TEMP_DISABLE;
    #ifdef HAVE_HWMON
-  uint  gpu_temp_abort        = GPU_TEMP_ABORT;
-  uint  gpu_temp_retain       = GPU_TEMP_RETAIN;
-  #ifdef HAVE_ADL
-  uint  powertune_enable      = POWERTUNE_ENABLE;
-  #endif
+  uint  gpu_temp_abort            = GPU_TEMP_ABORT;
+  uint  gpu_temp_retain           = GPU_TEMP_RETAIN;
+  uint  powertune_enable          = POWERTUNE_ENABLE;
    #endif
-  uint  logfile_disable       = LOGFILE_DISABLE;
-  uint  segment_size          = SEGMENT_SIZE;
-  uint  scrypt_tmto           = SCRYPT_TMTO;
-  char  separator             = SEPARATOR;
-  uint  bitmap_min            = BITMAP_MIN;
-  uint  bitmap_max            = BITMAP_MAX;
-  char *custom_charset_1      = NULL;
-  char *custom_charset_2      = NULL;
-  char *custom_charset_3      = NULL;
-  char *custom_charset_4      = NULL;
+  uint  logfile_disable           = LOGFILE_DISABLE;
+  uint  segment_size              = SEGMENT_SIZE;
+  uint  scrypt_tmto               = SCRYPT_TMTO;
+  char  separator                 = SEPARATOR;
+  uint  bitmap_min                = BITMAP_MIN;
+  uint  bitmap_max                = BITMAP_MAX;
+  char *custom_charset_1          = NULL;
+  char *custom_charset_2          = NULL;
+  char *custom_charset_3          = NULL;
+  char *custom_charset_4          = NULL;
  
    #define IDX_HELP                      'h'
    #define IDX_VERSION                   'V'
@@ -5447,6 +5858,7 @@ int main (int argc, char **argv)
    #define IDX_FORCE                     0xff08
    #define IDX_RUNTIME                   0xff09
    #define IDX_BENCHMARK                 'b'
+  #define IDX_STDOUT_FLAG               0xff77
    #define IDX_HASH_MODE                 'm'
    #define IDX_ATTACK_MODE               'a'
    #define IDX_RP_FILE                   'r'
@@ -5467,7 +5879,7 @@ int main (int argc, char **argv)
    #define IDX_RESTORE_DISABLE           0xff27
    #define IDX_STATUS                    0xff17
    #define IDX_STATUS_TIMER              0xff18
-  #define IDX_STATUS_AUTOMAT            0xff50
+  #define IDX_MACHINE_READABLE          0xff50
    #define IDX_LOOPBACK                  0xff38
    #define IDX_WEAK_HASH_THRESHOLD       0xff42
    #define IDX_SESSION                   0xff19
@@ -5486,6 +5898,7 @@ int main (int argc, char **argv)
    #define IDX_WORKLOAD_PROFILE          'w'
    #define IDX_KERNEL_ACCEL              'n'
    #define IDX_KERNEL_LOOPS              'u'
+  #define IDX_NVIDIA_SPIN_DAMP          0xff79
    #define IDX_GPU_TEMP_DISABLE          0xff29
    #define IDX_GPU_TEMP_ABORT            0xff30
    #define IDX_GPU_TEMP_RETAIN           0xff31
@@ -5527,11 +5940,12 @@ int main (int argc, char **argv)
      {"outfile-check-dir",         required_argument, 0, IDX_OUTFILE_CHECK_DIR},
      {"force",                     no_argument,       0, IDX_FORCE},
      {"benchmark",                 no_argument,       0, IDX_BENCHMARK},
+    {"stdout",                    no_argument,       0, IDX_STDOUT_FLAG},
      {"restore",                   no_argument,       0, IDX_RESTORE},
      {"restore-disable",           no_argument,       0, IDX_RESTORE_DISABLE},
      {"status",                    no_argument,       0, IDX_STATUS},
      {"status-timer",              required_argument, 0, IDX_STATUS_TIMER},
-    {"status-automat",            no_argument,       0, IDX_STATUS_AUTOMAT},
+    {"machine-readable",          no_argument,       0, IDX_MACHINE_READABLE},
      {"loopback",                  no_argument,       0, IDX_LOOPBACK},
      {"weak-hash-threshold",       required_argument, 0, IDX_WEAK_HASH_THRESHOLD},
      {"session",                   required_argument, 0, IDX_SESSION},
@@ -5564,13 +5978,12 @@ int main (int argc, char **argv)
      {"workload-profile",          required_argument, 0, IDX_WORKLOAD_PROFILE},
      {"kernel-accel",              required_argument, 0, IDX_KERNEL_ACCEL},
      {"kernel-loops",              required_argument, 0, IDX_KERNEL_LOOPS},
+    {"nvidia-spin-damp",          required_argument, 0, IDX_NVIDIA_SPIN_DAMP},
      {"gpu-temp-disable",          no_argument,       0, IDX_GPU_TEMP_DISABLE},
      #ifdef HAVE_HWMON
      {"gpu-temp-abort",            required_argument, 0, IDX_GPU_TEMP_ABORT},
      {"gpu-temp-retain",           required_argument, 0, IDX_GPU_TEMP_RETAIN},
-    #ifdef HAVE_ADL
      {"powertune-enable",          no_argument,       0, IDX_POWERTUNE_ENABLE},
-    #endif
      #endif // HAVE_HWMON
      {"logfile-disable",           no_argument,       0, IDX_LOGFILE_DISABLE},
      {"truecrypt-keyfiles",        required_argument, 0, IDX_TRUECRYPT_KEYFILES},
@@ -5785,6 +6198,7 @@ int main (int argc, char **argv)
    uint runtime_chgd             = 0;
    uint kernel_loops_chgd        = 0;
    uint kernel_accel_chgd        = 0;
+  uint nvidia_spin_damp_chgd    = 0;
    uint attack_mode_chgd         = 0;
    uint outfile_format_chgd      = 0;
    uint rp_gen_seed_chgd         = 0;
@@ -5794,11 +6208,6 @@ int main (int argc, char **argv)
    uint workload_profile_chgd    = 0;
    uint opencl_vector_width_chgd = 0;
  
-  #if defined(HAVE_HWMON) && defined(HAVE_ADL)
-  uint gpu_temp_retain_chgd   = 0;
-  uint gpu_temp_abort_chgd    = 0;
-  #endif
-
    optind = 1;
    optopt = 0;
    option_index = 0;
@@ -5830,11 +6239,12 @@ int main (int argc, char **argv)
        case IDX_LIMIT:                     limit                     = atoll (optarg); break;
        case IDX_KEYSPACE:                  keyspace                  = 1;              break;
        case IDX_BENCHMARK:                 benchmark                 = 1;              break;
+      case IDX_STDOUT_FLAG:               stdout_flag               = 1;              break;
        case IDX_RESTORE:                                                               break;
        case IDX_RESTORE_DISABLE:           restore_disable           = 1;              break;
        case IDX_STATUS:                    status                    = 1;              break;
        case IDX_STATUS_TIMER:              status_timer              = atoi (optarg);  break;
-      case IDX_STATUS_AUTOMAT:            status_automat            = 1;              break;
+      case IDX_MACHINE_READABLE:          machine_readable          = 1;              break;
        case IDX_LOOPBACK:                  loopback                  = 1;              break;
        case IDX_WEAK_HASH_THRESHOLD:       weak_hash_threshold       = atoi (optarg);  break;
      //case IDX_SESSION:                   session                   = optarg;         break;
@@ -5877,21 +6287,13 @@ int main (int argc, char **argv)
                                            kernel_accel_chgd         = 1;              break;
        case IDX_KERNEL_LOOPS:              kernel_loops              = atoi (optarg);
                                            kernel_loops_chgd         = 1;              break;
+      case IDX_NVIDIA_SPIN_DAMP:          nvidia_spin_damp          = atoi (optarg);
+                                          nvidia_spin_damp_chgd     = 1;              break;
        case IDX_GPU_TEMP_DISABLE:          gpu_temp_disable          = 1;              break;
        #ifdef HAVE_HWMON
-      case IDX_GPU_TEMP_ABORT:            gpu_temp_abort            = atoi (optarg);
-      #ifdef HAVE_ADL
-                                          gpu_temp_abort_chgd       = 1;
-      #endif
-                                                                                      break;
-      case IDX_GPU_TEMP_RETAIN:           gpu_temp_retain           = atoi (optarg);
-      #ifdef HAVE_ADL
-                                          gpu_temp_retain_chgd      = 1;
-      #endif
-                                                                                      break;
-      #ifdef HAVE_ADL
+      case IDX_GPU_TEMP_ABORT:            gpu_temp_abort            = atoi (optarg);  break;
+      case IDX_GPU_TEMP_RETAIN:           gpu_temp_retain           = atoi (optarg);  break;
        case IDX_POWERTUNE_ENABLE:          powertune_enable          = 1;              break;
-      #endif
        #endif // HAVE_HWMON
        case IDX_LOGFILE_DISABLE:           logfile_disable           = 1;              break;
        case IDX_TRUECRYPT_KEYFILES:        truecrypt_keyfiles        = optarg;         break;
@@ -5935,10 +6337,13 @@ int main (int argc, char **argv)
    {
      if (benchmark == 1)
      {
-      if (status_automat == 0)
+      if (machine_readable == 0)
        {
          log_info ("%s (%s) starting in benchmark-mode...", PROGNAME, VERSION_TAG);
          log_info ("");
+        log_info ("Note: Reported benchmark cracking speed = real cracking speed");
+        log_info ("To verify, run hashcat like this: only_one_hash.txt -a 3 -w 3 ?b?b?b?b?b?b?b");
+        log_info ("");
        }
        else
        {
@@ -5950,6 +6355,10 @@ int main (int argc, char **argv)
        log_info ("%s (%s) starting in restore-mode...", PROGNAME, VERSION_TAG);
        log_info ("");
      }
+    else if (stdout_flag == 1)
+    {
+      // do nothing
+    }
      else
      {
        log_info ("%s (%s) starting...", PROGNAME, VERSION_TAG);
@@ -5975,7 +6384,7 @@ int main (int argc, char **argv)
      return (-1);
    }
  
-  if (hash_mode_chgd && hash_mode > 13799) // just added to remove compiler warnings for hash_mode_chgd
+  if (hash_mode_chgd && hash_mode > 13800) // just added to remove compiler warnings for hash_mode_chgd
    {
      log_error ("ERROR: Invalid hash-type specified");
  
@@ -6213,8 +6622,32 @@ int main (int argc, char **argv)
      case ATTACK_MODE_HYBRID2:  attack_kern = ATTACK_KERN_COMBI;    break;
    }
  
-  if (benchmark == 0)
+  if (benchmark == 1)
+  {
+    if (myargv[optind] != 0)
+    {
+      log_error ("ERROR: Invalid argument for benchmark mode specified");
+
+      return (-1);
+    }
+
+    if (attack_mode_chgd == 1)
+    {
+      if (attack_mode != ATTACK_MODE_BF)
+      {
+        log_error ("ERROR: Only attack-mode 3 allowed in benchmark mode");
+
+        return (-1);
+      }
+    }
+  }
+  else
    {
+    if (stdout_flag == 1) // no hash here
+    {
+      optind--;
+    }
+
      if (keyspace == 1)
      {
        int num_additional_params = 1;
@@ -6272,25 +6705,6 @@ int main (int argc, char **argv)
        return (-1);
      }
    }
-  else
-  {
-    if (myargv[optind] != 0)
-    {
-      log_error ("ERROR: Invalid argument for benchmark mode specified");
-
-      return (-1);
-    }
-
-    if (attack_mode_chgd == 1)
-    {
-      if (attack_mode != ATTACK_MODE_BF)
-      {
-        log_error ("ERROR: Only attack-mode 3 allowed in benchmark mode");
-
-        return (-1);
-      }
-    }
-  }
  
    if (skip != 0 && limit != 0)
    {
@@ -6323,6 +6737,24 @@ int main (int argc, char **argv)
      quiet = 1;
    }
  
+  if (stdout_flag == 1)
+  {
+    status_timer          = 0;
+    restore_timer         = 0;
+    restore_disable       = 1;
+    restore               = 0;
+    potfile_disable       = 1;
+    weak_hash_threshold   = 0;
+    gpu_temp_disable      = 1;
+    hash_mode             = 2000;
+    quiet                 = 1;
+    outfile_format        = OUTFILE_FMT_PLAIN;
+    kernel_accel          = 1024;
+    kernel_loops          = 1024;
+    force                 = 1;
+    outfile_check_timer   = 0;
+  }
+
    if (remove_timer_chgd == 1)
    {
      if (remove == 0)
@@ -6415,6 +6847,14 @@ int main (int argc, char **argv)
      weak_hash_threshold = 0;
    }
  
+  if (nvidia_spin_damp > 100)
+  {
+    log_error ("ERROR: setting --nvidia-spin-damp must be between 0 and 100 (inclusive)");
+
+    return (-1);
+  }
+
+
    /**
     * induction directory
     */
@@ -6565,47 +7005,45 @@ int main (int argc, char **argv)
     * store stuff
     */
  
-  data.hash_mode          = hash_mode;
-  data.restore            = restore;
-  data.restore_timer      = restore_timer;
-  data.restore_disable    = restore_disable;
-  data.status             = status;
-  data.status_timer       = status_timer;
-  data.status_automat     = status_automat;
-  data.loopback           = loopback;
-  data.runtime            = runtime;
-  data.remove             = remove;
-  data.remove_timer       = remove_timer;
-  data.debug_mode         = debug_mode;
-  data.debug_file         = debug_file;
-  data.username           = username;
-  data.quiet              = quiet;
-  data.outfile            = outfile;
-  data.outfile_format     = outfile_format;
-  data.outfile_autohex    = outfile_autohex;
-  data.hex_charset        = hex_charset;
-  data.hex_salt           = hex_salt;
-  data.hex_wordlist       = hex_wordlist;
-  data.separator          = separator;
-  data.rp_files           = rp_files;
-  data.rp_files_cnt       = rp_files_cnt;
-  data.rp_gen             = rp_gen;
-  data.rp_gen_seed        = rp_gen_seed;
-  data.force              = force;
-  data.benchmark          = benchmark;
-  data.skip               = skip;
-  data.limit              = limit;
+  data.hash_mode               = hash_mode;
+  data.restore                 = restore;
+  data.restore_timer           = restore_timer;
+  data.restore_disable         = restore_disable;
+  data.status                  = status;
+  data.status_timer            = status_timer;
+  data.machine_readable        = machine_readable;
+  data.loopback                = loopback;
+  data.runtime                 = runtime;
+  data.remove                  = remove;
+  data.remove_timer            = remove_timer;
+  data.debug_mode              = debug_mode;
+  data.debug_file              = debug_file;
+  data.username                = username;
+  data.quiet                   = quiet;
+  data.outfile                 = outfile;
+  data.outfile_format          = outfile_format;
+  data.outfile_autohex         = outfile_autohex;
+  data.hex_charset             = hex_charset;
+  data.hex_salt                = hex_salt;
+  data.hex_wordlist            = hex_wordlist;
+  data.separator               = separator;
+  data.rp_files                = rp_files;
+  data.rp_files_cnt            = rp_files_cnt;
+  data.rp_gen                  = rp_gen;
+  data.rp_gen_seed             = rp_gen_seed;
+  data.force                   = force;
+  data.benchmark               = benchmark;
+  data.skip                    = skip;
+  data.limit                   = limit;
    #ifdef HAVE_HWMON
-  #ifdef HAVE_ADL
-  data.powertune_enable   = powertune_enable;
+  data.powertune_enable        = powertune_enable;
    #endif
-  #endif
-  data.logfile_disable    = logfile_disable;
-  data.truecrypt_keyfiles = truecrypt_keyfiles;
-  data.veracrypt_keyfiles = veracrypt_keyfiles;
-  data.veracrypt_pim      = veracrypt_pim;
-  data.scrypt_tmto        = scrypt_tmto;
-  data.workload_profile   = workload_profile;
+  data.logfile_disable         = logfile_disable;
+  data.truecrypt_keyfiles      = truecrypt_keyfiles;
+  data.veracrypt_keyfiles      = veracrypt_keyfiles;
+  data.veracrypt_pim           = veracrypt_pim;
+  data.scrypt_tmto             = scrypt_tmto;
+  data.workload_profile        = workload_profile;
  
    /**
     * cpu affinity
@@ -6671,12 +7109,14 @@ int main (int argc, char **argv)
    logfile_top_uint   (attack_mode);
    logfile_top_uint   (attack_kern);
    logfile_top_uint   (benchmark);
+  logfile_top_uint   (stdout_flag);
    logfile_top_uint   (bitmap_min);
    logfile_top_uint   (bitmap_max);
    logfile_top_uint   (debug_mode);
    logfile_top_uint   (force);
    logfile_top_uint   (kernel_accel);
    logfile_top_uint   (kernel_loops);
+  logfile_top_uint   (nvidia_spin_damp);
    logfile_top_uint   (gpu_temp_disable);
    #ifdef HAVE_HWMON
    logfile_top_uint   (gpu_temp_abort);
@@ -6701,7 +7141,7 @@ int main (int argc, char **argv)
    logfile_top_uint   (outfile_format);
    logfile_top_uint   (potfile_disable);
    logfile_top_string (potfile_path);
-  #if defined(HAVE_HWMON) && defined(HAVE_ADL)
+  #if defined(HAVE_HWMON)
    logfile_top_uint   (powertune_enable);
    #endif
    logfile_top_uint   (scrypt_tmto);
@@ -6719,7 +7159,7 @@ int main (int argc, char **argv)
    logfile_top_uint   (segment_size);
    logfile_top_uint   (show);
    logfile_top_uint   (status);
-  logfile_top_uint   (status_automat);
+  logfile_top_uint   (machine_readable);
    logfile_top_uint   (status_timer);
    logfile_top_uint   (usage);
    logfile_top_uint   (username);
@@ -6796,11 +7236,21 @@ int main (int argc, char **argv)
      restore_disable       = 1;
      potfile_disable       = 1;
      weak_hash_threshold   = 0;
+    nvidia_spin_damp      = 0;
      gpu_temp_disable      = 1;
+    outfile_check_timer   = 0;
+
+    #ifdef HAVE_HWMON
+    if (powertune_enable == 1)
+    {
+      gpu_temp_disable = 0;
+    }
+    #endif
  
-    data.status_timer     = status_timer;
-    data.restore_timer    = restore_timer;
-    data.restore_disable  = restore_disable;
+    data.status_timer         = status_timer;
+    data.restore_timer        = restore_timer;
+    data.restore_disable      = restore_disable;
+    data.outfile_check_timer  = outfile_check_timer;
  
      /**
       * force attack mode to be bruteforce
@@ -8232,6 +8682,21 @@ int main (int argc, char **argv)
                     dgst_pos3   = 3;
                     break;
  
+      case  2000:  hash_type   = HASH_TYPE_STDOUT;
+                   salt_type   = SALT_TYPE_NONE;
+                   attack_exec = ATTACK_EXEC_INSIDE_KERNEL;
+                   opts_type   = OPTS_TYPE_PT_GENERATE_LE;
+                   kern_type   = 0;
+                   dgst_size   = DGST_SIZE_4_4;
+                   parse_func  = NULL;
+                   sort_by_digest = NULL;
+                   opti_type   = 0;
+                   dgst_pos0   = 0;
+                   dgst_pos1   = 0;
+                   dgst_pos2   = 0;
+                   dgst_pos3   = 0;
+                   break;
+
        case  2100:  hash_type   = HASH_TYPE_DCC2;
                     salt_type   = SALT_TYPE_EMBEDDED;
                     attack_exec = ATTACK_EXEC_OUTSIDE_KERNEL;
@@ -10646,6 +11111,25 @@ int main (int argc, char **argv)
                     dgst_pos3   = 3;
                     break;
  
+      case 13800:  hash_type   = HASH_TYPE_SHA256;
+                   salt_type   = SALT_TYPE_EMBEDDED;
+                   attack_exec = ATTACK_EXEC_INSIDE_KERNEL;
+                   opts_type   = OPTS_TYPE_PT_GENERATE_BE
+                               | OPTS_TYPE_PT_UNICODE;
+                   kern_type   = KERN_TYPE_WIN8PHONE;
+                   dgst_size   = DGST_SIZE_4_8;
+                   parse_func  = win8phone_parse_hash;
+                   sort_by_digest = sort_by_digest_4_8;
+                   opti_type   = OPTI_TYPE_ZERO_BYTE
+                               | OPTI_TYPE_PRECOMPUTE_INIT
+                               | OPTI_TYPE_EARLY_SKIP
+                               | OPTI_TYPE_NOT_ITERATED
+                               | OPTI_TYPE_RAW_HASH;
+                   dgst_pos0   = 3;
+                   dgst_pos1   = 7;
+                   dgst_pos2   = 2;
+                   dgst_pos3   = 6;
+                   break;
  
        default:     usage_mini_print (PROGNAME); return (-1);
      }
@@ -10772,6 +11256,7 @@ int main (int argc, char **argv)
        case 13761:  esalt_size = sizeof (tc_t);            break;
        case 13762:  esalt_size = sizeof (tc_t);            break;
        case 13763:  esalt_size = sizeof (tc_t);            break;
+      case 13800:  esalt_size = sizeof (win8phone_t);     break;
      }
  
      data.esalt_size = esalt_size;
@@ -11105,6 +11590,8 @@ int main (int argc, char **argv)
                    break;
        case  7400: if (pw_max > 16) pw_max = 16;
                    break;
+      case  7700: if (pw_max >  8) pw_max =  8;
+                  break;
        case  7900: if (pw_max > 48) pw_max = 48;
                    break;
        case  8500: if (pw_max >  8) pw_max =  8;
@@ -11172,7 +11659,7 @@ int main (int argc, char **argv)
  
      uint hashes_avail = 0;
  
-    if (benchmark == 0)
+    if ((benchmark == 0) && (stdout_flag == 0))
      {
        struct stat f;
  
@@ -11354,6 +11841,10 @@ int main (int argc, char **argv)
        {
          // useless to read hash file for keyspace, cheat a little bit w/ optind
        }
+      else if (stdout_flag == 1)
+      {
+        // useless to read hash file for stdout, cheat a little bit w/ optind
+      }
        else if (hashes_avail == 0)
        {
        }
@@ -12163,7 +12654,7 @@ int main (int argc, char **argv)
        return (0);
      }
  
-    if (keyspace == 0)
+    if ((keyspace == 0) && (stdout_flag == 0))
      {
        if (hashes_cnt == 0)
        {
@@ -13126,32 +13617,6 @@ int main (int argc, char **argv)
        }
      }
  
-    /**
-     * OpenCL platforms: For each platform check if we need to unset features that we can not use, eg: temp_retain
-     */
-
-    for (uint platform_id = 0; platform_id < platforms_cnt; platform_id++)
-    {
-      cl_platform_id platform = platforms[platform_id];
-
-      char platform_vendor[INFOSZ] = { 0 };
-
-      hc_clGetPlatformInfo (data.ocl, platform, CL_PLATFORM_VENDOR, sizeof (platform_vendor), platform_vendor, NULL);
-
-      #ifdef HAVE_HWMON
-      #if defined(HAVE_NVML) || defined(HAVE_NVAPI)
-      if (strcmp (platform_vendor, CL_VENDOR_NV) == 0)
-      {
-        // make sure that we do not directly control the fan for NVidia
-
-        gpu_temp_retain = 0;
-
-        data.gpu_temp_retain = gpu_temp_retain;
-      }
-      #endif // HAVE_NVML || HAVE_NVAPI
-      #endif
-    }
-
      /**
       * OpenCL device types:
       *   In case the user did not specify --opencl-device-types and the user runs hashcat in a system with only a CPU only he probably want to use that CPU.
@@ -13189,36 +13654,13 @@ int main (int argc, char **argv)
      }
  
      /**
-     * Detect if the user has both an Intel and and AMD OpenCL runtime and not filtered any of them
-     * In this case mark it for later use
+     * OpenCL devices: simply push all devices from all platforms into the same device array
       */
  
-    int has_intel = 0;
-    int has_amd   = 0;
-
-    for (uint platform_id = 0; platform_id < platforms_cnt; platform_id++)
-    {
-      if ((opencl_platforms_filter & (1 << platform_id)) == 0) continue;
-
-      cl_platform_id platform = platforms[platform_id];
-
-      char platform_vendor[INFOSZ] = { 0 };
-
-      hc_clGetPlatformInfo (data.ocl, platform, CL_PLATFORM_VENDOR, sizeof (platform_vendor), platform_vendor, NULL);
-
-      if (strcmp (platform_vendor, CL_VENDOR_AMD) == 0)
-      {
-        has_intel = 1;
-      }
-      else if (strcmp (platform_vendor, CL_VENDOR_INTEL_SDK) == 0)
-      {
-        has_amd = 1;
-      }
-    }
-
-    /**
-     * OpenCL devices: simply push all devices from all platforms into the same device array
-     */
+    int need_adl     = 0;
+    int need_nvapi   = 0;
+    int need_nvml    = 0;
+    int need_xnvctrl = 0;
  
      hc_device_param_t *devices_param = (hc_device_param_t *) mycalloc (DEVICES_MAX, sizeof (hc_device_param_t));
  
@@ -13244,39 +13686,43 @@ int main (int argc, char **argv)
        // this causes trouble with vendor id based macros
        // we'll assign generic to those without special optimization available
  
-      cl_uint vendor_id = 0;
+      cl_uint platform_vendor_id = 0;
  
        if (strcmp (platform_vendor, CL_VENDOR_AMD) == 0)
        {
-        vendor_id = VENDOR_ID_AMD;
+        platform_vendor_id = VENDOR_ID_AMD;
+      }
+      else if (strcmp (platform_vendor, CL_VENDOR_AMD_USE_INTEL) == 0)
+      {
+        platform_vendor_id = VENDOR_ID_AMD_USE_INTEL;
        }
        else if (strcmp (platform_vendor, CL_VENDOR_APPLE) == 0)
        {
-        vendor_id = VENDOR_ID_APPLE;
+        platform_vendor_id = VENDOR_ID_APPLE;
        }
        else if (strcmp (platform_vendor, CL_VENDOR_INTEL_BEIGNET) == 0)
        {
-        vendor_id = VENDOR_ID_INTEL_BEIGNET;
+        platform_vendor_id = VENDOR_ID_INTEL_BEIGNET;
        }
        else if (strcmp (platform_vendor, CL_VENDOR_INTEL_SDK) == 0)
        {
-        vendor_id = VENDOR_ID_INTEL_SDK;
+        platform_vendor_id = VENDOR_ID_INTEL_SDK;
        }
        else if (strcmp (platform_vendor, CL_VENDOR_MESA) == 0)
        {
-        vendor_id = VENDOR_ID_MESA;
+        platform_vendor_id = VENDOR_ID_MESA;
        }
        else if (strcmp (platform_vendor, CL_VENDOR_NV) == 0)
        {
-        vendor_id = VENDOR_ID_NV;
+        platform_vendor_id = VENDOR_ID_NV;
        }
        else if (strcmp (platform_vendor, CL_VENDOR_POCL) == 0)
        {
-        vendor_id = VENDOR_ID_POCL;
+        platform_vendor_id = VENDOR_ID_POCL;
        }
        else
        {
-        vendor_id = VENDOR_ID_GENERIC;
+        platform_vendor_id = VENDOR_ID_GENERIC;
        }
  
        for (uint platform_devices_id = 0; platform_devices_id < platform_devices_cnt; platform_devices_id++)
@@ -13287,7 +13733,7 @@ int main (int argc, char **argv)
  
          hc_device_param_t *device_param = &data.devices_param[device_id];
  
-        device_param->vendor_id = vendor_id;
+        device_param->platform_vendor_id = platform_vendor_id;
  
          device_param->device = platform_devices[platform_devices_id];
  
@@ -13315,6 +13761,57 @@ int main (int argc, char **argv)
  
          device_param->device_name = device_name;
  
+        // device_vendor
+
+        hc_clGetDeviceInfo (data.ocl, device_param->device, CL_DEVICE_VENDOR, 0, NULL, &param_value_size);
+
+        char *device_vendor = (char *) mymalloc (param_value_size);
+
+        hc_clGetDeviceInfo (data.ocl, device_param->device, CL_DEVICE_VENDOR, param_value_size, device_vendor, NULL);
+
+        device_param->device_vendor = device_vendor;
+
+        cl_uint device_vendor_id = 0;
+
+        if (strcmp (device_vendor, CL_VENDOR_AMD) == 0)
+        {
+          device_vendor_id = VENDOR_ID_AMD;
+        }
+        else if (strcmp (device_vendor, CL_VENDOR_AMD_USE_INTEL) == 0)
+        {
+          device_vendor_id = VENDOR_ID_AMD_USE_INTEL;
+        }
+        else if (strcmp (device_vendor, CL_VENDOR_APPLE) == 0)
+        {
+          device_vendor_id = VENDOR_ID_APPLE;
+        }
+        else if (strcmp (device_vendor, CL_VENDOR_INTEL_BEIGNET) == 0)
+        {
+          device_vendor_id = VENDOR_ID_INTEL_BEIGNET;
+        }
+        else if (strcmp (device_vendor, CL_VENDOR_INTEL_SDK) == 0)
+        {
+          device_vendor_id = VENDOR_ID_INTEL_SDK;
+        }
+        else if (strcmp (device_vendor, CL_VENDOR_MESA) == 0)
+        {
+          device_vendor_id = VENDOR_ID_MESA;
+        }
+        else if (strcmp (device_vendor, CL_VENDOR_NV) == 0)
+        {
+          device_vendor_id = VENDOR_ID_NV;
+        }
+        else if (strcmp (device_vendor, CL_VENDOR_POCL) == 0)
+        {
+          device_vendor_id = VENDOR_ID_POCL;
+        }
+        else
+        {
+          device_vendor_id = VENDOR_ID_GENERIC;
+        }
+
+        device_param->device_vendor_id = device_vendor_id;
+
          // tuning db
  
          tuning_db_entry_t *tuningdb_entry = tuning_db_search (tuning_db, device_param, attack_mode, hash_mode);
@@ -13421,7 +13918,7 @@ int main (int argc, char **argv)
  
          if (device_endian_little == CL_FALSE)
          {
-          if (data.quiet == 0) log_info ("Device #%u: WARNING: not little endian device", device_id + 1);
+          log_info ("Device #%u: WARNING: not little endian device", device_id + 1);
  
            device_param->skipped = 1;
          }
@@ -13434,7 +13931,7 @@ int main (int argc, char **argv)
  
          if (device_available == CL_FALSE)
          {
-          if (data.quiet == 0) log_info ("Device #%u: WARNING: device not available", device_id + 1);
+          log_info ("Device #%u: WARNING: device not available", device_id + 1);
  
            device_param->skipped = 1;
          }
@@ -13447,7 +13944,7 @@ int main (int argc, char **argv)
  
          if (device_compiler_available == CL_FALSE)
          {
-          if (data.quiet == 0) log_info ("Device #%u: WARNING: device no compiler available", device_id + 1);
+          log_info ("Device #%u: WARNING: device no compiler available", device_id + 1);
  
            device_param->skipped = 1;
          }
@@ -13460,7 +13957,7 @@ int main (int argc, char **argv)
  
          if ((device_execution_capabilities & CL_EXEC_KERNEL) == 0)
          {
-          if (data.quiet == 0) log_info ("Device #%u: WARNING: device does not support executing kernels", device_id + 1);
+          log_info ("Device #%u: WARNING: device does not support executing kernels", device_id + 1);
  
            device_param->skipped = 1;
          }
@@ -13477,14 +13974,14 @@ int main (int argc, char **argv)
  
          if (strstr (device_extensions, "base_atomics") == 0)
          {
-          if (data.quiet == 0) log_info ("Device #%u: WARNING: device does not support base atomics", device_id + 1);
+          log_info ("Device #%u: WARNING: device does not support base atomics", device_id + 1);
  
            device_param->skipped = 1;
          }
  
          if (strstr (device_extensions, "byte_addressable_store") == 0)
          {
-          if (data.quiet == 0) log_info ("Device #%u: WARNING: device does not support byte addressable store", device_id + 1);
+          log_info ("Device #%u: WARNING: device does not support byte addressable store", device_id + 1);
  
            device_param->skipped = 1;
          }
@@ -13499,23 +13996,31 @@ int main (int argc, char **argv)
  
          if (device_local_mem_size < 32768)
          {
-          if (data.quiet == 0) log_info ("Device #%u: WARNING: device local mem size is too small", device_id + 1);
+          log_info ("Device #%u: WARNING: device local mem size is too small", device_id + 1);
  
            device_param->skipped = 1;
          }
  
-        // if we have both intel and amd opencl runtime we want they share the same cpu
-        // so both virtual cpu share the same resources and run at 50%
-        // however, intel has better control over their own hardware so it makes sense
-        // to give them full control over their own hardware
+        // If there's both an Intel CPU and an AMD OpenCL runtime it's a tricky situation
+        // Both platforms support CPU device types and therefore both will try to use 100% of the physical resources
+        // This results in both utilizing it for 50%
+        // However, Intel has much better SIMD control over their own hardware
+        // It makes sense to give them full control over their own hardware
  
          if (device_type & CL_DEVICE_TYPE_CPU)
          {
-          if ((has_intel == 1) && (has_amd == 1) && (vendor_id == VENDOR_ID_AMD))
+          if (device_param->device_vendor_id == VENDOR_ID_AMD_USE_INTEL)
            {
-            if (data.quiet == 0) log_info ("Device #%u: WARNING: Not its native platform vendor", device_id + 1);
+            if (data.force == 0)
+            {
+              if (algorithm_pos == 0)
+              {
+                log_info ("Device #%u: WARNING: not native intel opencl runtime, expect massive speed loss", device_id + 1);
+                log_info ("           You can use --force to override this but do not post error reports if you do so");
+              }
  
-            device_param->skipped = 1;
+              device_param->skipped = 1;
+            }
            }
          }
  
@@ -13539,9 +14044,9 @@ int main (int argc, char **argv)
          char *device_name_chksum = (char *) mymalloc (INFOSZ);
  
          #if __x86_64__
-        snprintf (device_name_chksum, INFOSZ - 1, "%u-%u-%u-%s-%s-%s-%u", 64, device_param->vendor_id, device_param->vector_width, device_param->device_name, device_param->device_version, device_param->driver_version, COMPTIME);
+        snprintf (device_name_chksum, INFOSZ - 1, "%u-%u-%u-%s-%s-%s-%u", 64, device_param->platform_vendor_id, device_param->vector_width, device_param->device_name, device_param->device_version, device_param->driver_version, COMPTIME);
          #else
-        snprintf (device_name_chksum, INFOSZ - 1, "%u-%u-%u-%s-%s-%s-%u", 32, device_param->vendor_id, device_param->vector_width, device_param->device_name, device_param->device_version, device_param->driver_version, COMPTIME);
+        snprintf (device_name_chksum, INFOSZ - 1, "%u-%u-%u-%s-%s-%s-%u", 32, device_param->platform_vendor_id, device_param->vector_width, device_param->device_name, device_param->device_version, device_param->driver_version, COMPTIME);
          #endif
  
          uint device_name_digest[4] = { 0 };
@@ -13554,6 +14059,29 @@ int main (int argc, char **argv)
  
          // device_processor_cores
  
+        if (device_param->device_type & CL_DEVICE_TYPE_GPU)
+        {
+          if ((device_param->platform_vendor_id == VENDOR_ID_AMD) && (device_param->device_vendor_id == VENDOR_ID_AMD))
+          {
+            need_adl = 1;
+          }
+
+          if ((device_param->platform_vendor_id == VENDOR_ID_NV) && (device_param->device_vendor_id == VENDOR_ID_NV))
+          {
+            need_nvml = 1;
+
+            #ifdef LINUX
+            need_xnvctrl = 1;
+            #endif
+
+            #ifdef WIN
+            need_nvapi = 1;
+            #endif
+          }
+        }
+
+        // device_processor_cores
+
          if (device_type & CL_DEVICE_TYPE_CPU)
          {
            cl_uint device_processor_cores = 1;
@@ -13563,7 +14091,7 @@ int main (int argc, char **argv)
  
          if (device_type & CL_DEVICE_TYPE_GPU)
          {
-          if (vendor_id == VENDOR_ID_AMD)
+          if (device_vendor_id == VENDOR_ID_AMD)
            {
              cl_uint device_processor_cores = 0;
  
@@ -13573,7 +14101,7 @@ int main (int argc, char **argv)
  
              device_param->device_processor_cores = device_processor_cores;
            }
-          else if (vendor_id == VENDOR_ID_NV)
+          else if (device_vendor_id == VENDOR_ID_NV)
            {
              cl_uint kernel_exec_timeout = 0;
  
@@ -13602,6 +14130,29 @@ int main (int argc, char **argv)
  
              device_param->sm_minor = sm_minor;
              device_param->sm_major = sm_major;
+
+            // CPU burning loop damper
+            // Value is given as number between 0-100
+            // By default 100%
+
+            device_param->nvidia_spin_damp = (double) nvidia_spin_damp;
+
+            if (nvidia_spin_damp_chgd == 0)
+            {
+              if (data.attack_mode == ATTACK_MODE_STRAIGHT)
+              {
+                /**
+                 * the workaround is not a friend of rule based attacks
+                 * the words from the wordlist combined with fast and slow rules cause
+                 * fluctuations which cause inaccurate wait time estimations
+                 * using a reduced damping percentage almost compensates this
+                 */
+
+                device_param->nvidia_spin_damp = 64;
+              }
+            }
+
+            device_param->nvidia_spin_damp /= 100;
            }
            else
            {
@@ -13615,16 +14166,15 @@ int main (int argc, char **argv)
  
          if ((benchmark == 1 || quiet == 0) && (algorithm_pos == 0))
          {
-          if (status_automat == 0)
+          if (machine_readable == 0)
            {
              if (device_param->skipped == 0)
              {
-              log_info ("Device #%u: %s, %lu/%lu MB allocatable, %dMhz, %uMCU",
+              log_info ("Device #%u: %s, %lu/%lu MB allocatable, %uMCU",
                          device_id + 1,
                          device_name,
                          (unsigned int) (device_maxmem_alloc / 1024 / 1024),
                          (unsigned int) (device_global_mem   / 1024 / 1024),
-                        (unsigned int) (device_maxclock_frequency),
                          (unsigned int)  device_processors);
              }
              else
@@ -13642,7 +14192,7 @@ int main (int argc, char **argv)
          {
            if (device_type & CL_DEVICE_TYPE_GPU)
            {
-            if (vendor_id == VENDOR_ID_AMD)
+            if (platform_vendor_id == VENDOR_ID_AMD)
              {
                int catalyst_check = (force == 1) ? 0 : 1;
  
@@ -13690,7 +14240,7 @@ int main (int argc, char **argv)
                  return (-1);
                }
              }
-            else if (vendor_id == VENDOR_ID_NV)
+            else if (platform_vendor_id == VENDOR_ID_NV)
              {
                if (device_param->kernel_exec_timeout != 0)
                {
@@ -13703,7 +14253,7 @@ int main (int argc, char **argv)
            /* turns out pocl still creates segfaults (because of llvm)
            if (device_type & CL_DEVICE_TYPE_CPU)
            {
-            if (vendor_id == VENDOR_ID_AMD)
+            if (platform_vendor_id == VENDOR_ID_AMD)
              {
                if (force == 0)
                {
@@ -13813,7 +14363,7 @@ int main (int argc, char **argv)
  
      if ((benchmark == 1 || quiet == 0) && (algorithm_pos == 0))
      {
-      if (status_automat == 0)
+      if (machine_readable == 0)
        {
          log_info ("");
        }
@@ -13824,101 +14374,120 @@ int main (int argc, char **argv)
       */
  
      #ifdef HAVE_HWMON
-    #if defined(HAVE_NVML) || defined(HAVE_NVAPI)
-    hm_attrs_t hm_adapters_nv[DEVICES_MAX]  = { { { 0 }, 0, 0 } };
-    #endif
-
-    #ifdef HAVE_ADL
-    hm_attrs_t hm_adapters_amd[DEVICES_MAX] = { { { 0 }, 0, 0 } };
-    #endif
+    hm_attrs_t hm_adapters_adl[DEVICES_MAX]     = { { 0 } };
+    hm_attrs_t hm_adapters_nvapi[DEVICES_MAX]   = { { 0 } };
+    hm_attrs_t hm_adapters_nvml[DEVICES_MAX]    = { { 0 } };
+    hm_attrs_t hm_adapters_xnvctrl[DEVICES_MAX] = { { 0 } };
  
      if (gpu_temp_disable == 0)
      {
-      #if defined(WIN) && defined(HAVE_NVAPI)
-      NVAPI_PTR *nvapi = (NVAPI_PTR *) mymalloc (sizeof (NVAPI_PTR));
+      ADL_PTR     *adl     = (ADL_PTR *)     mymalloc (sizeof (ADL_PTR));
+      NVAPI_PTR   *nvapi   = (NVAPI_PTR *)   mymalloc (sizeof (NVAPI_PTR));
+      NVML_PTR    *nvml    = (NVML_PTR *)    mymalloc (sizeof (NVML_PTR));
+      XNVCTRL_PTR *xnvctrl = (XNVCTRL_PTR *) mymalloc (sizeof (XNVCTRL_PTR));
  
-      if (nvapi_init (nvapi) == 0)
-        data.hm_nv = nvapi;
+      data.hm_adl     = NULL;
+      data.hm_nvapi   = NULL;
+      data.hm_nvml    = NULL;
+      data.hm_xnvctrl = NULL;
  
-      if (data.hm_nv)
+      if ((need_nvml == 1) && (nvml_init (nvml) == 0))
        {
-        if (hm_NvAPI_Initialize (data.hm_nv) == NVAPI_OK)
+        data.hm_nvml = nvml;
+      }
+
+      if (data.hm_nvml)
+      {
+        if (hm_NVML_nvmlInit (data.hm_nvml) == NVML_SUCCESS)
          {
-          HM_ADAPTER_NV nvGPUHandle[DEVICES_MAX] = { 0 };
+          HM_ADAPTER_NVML nvmlGPUHandle[DEVICES_MAX] = { 0 };
  
-          int tmp_in = hm_get_adapter_index_nv (nvGPUHandle);
+          int tmp_in = hm_get_adapter_index_nvml (nvmlGPUHandle);
  
            int tmp_out = 0;
  
            for (int i = 0; i < tmp_in; i++)
            {
-            hm_adapters_nv[tmp_out++].adapter_index.nv = nvGPUHandle[i];
+            hm_adapters_nvml[tmp_out++].nvml = nvmlGPUHandle[i];
            }
  
            for (int i = 0; i < tmp_out; i++)
            {
-            NV_GPU_COOLER_SETTINGS pCoolerSettings;
+            unsigned int speed;
+
+            if (hm_NVML_nvmlDeviceGetFanSpeed (data.hm_nvml, 0, hm_adapters_nvml[i].nvml, &speed) == NVML_SUCCESS) hm_adapters_nvml[i].fan_get_supported = 1;
  
-            pCoolerSettings.Version = GPU_COOLER_SETTINGS_VER | sizeof (NV_GPU_COOLER_SETTINGS);
+            hm_NVML_nvmlDeviceSetComputeMode (data.hm_nvml, 1, hm_adapters_nvml[i].nvml, NVML_COMPUTEMODE_EXCLUSIVE_PROCESS);
  
-            if (hm_NvAPI_GPU_GetCoolerSettings (data.hm_nv, hm_adapters_nv[i].adapter_index.nv, 0, &pCoolerSettings) != NVAPI_NOT_SUPPORTED) hm_adapters_nv[i].fan_supported = 1;
+            hm_NVML_nvmlDeviceSetGpuOperationMode (data.hm_nvml, 1, hm_adapters_nvml[i].nvml, NVML_GOM_ALL_ON);
            }
          }
        }
-      #endif // WIN && HAVE_NVAPI
  
-      #if defined(LINUX) && defined(HAVE_NVML)
-      NVML_PTR *nvml = (NVML_PTR *) mymalloc (sizeof (NVML_PTR));
-
-      if (nvml_init (nvml) == 0)
-        data.hm_nv = nvml;
+      if ((need_nvapi == 1) && (nvapi_init (nvapi) == 0))
+      {
+        data.hm_nvapi = nvapi;
+      }
  
-      if (data.hm_nv)
+      if (data.hm_nvapi)
        {
-        if (hm_NVML_nvmlInit (data.hm_nv) == NVML_SUCCESS)
+        if (hm_NvAPI_Initialize (data.hm_nvapi) == NVAPI_OK)
          {
-          HM_ADAPTER_NV nvGPUHandle[DEVICES_MAX] = { 0 };
+          HM_ADAPTER_NVAPI nvGPUHandle[DEVICES_MAX] = { 0 };
  
-          int tmp_in = hm_get_adapter_index_nv (nvGPUHandle);
+          int tmp_in = hm_get_adapter_index_nvapi (nvGPUHandle);
  
            int tmp_out = 0;
  
            for (int i = 0; i < tmp_in; i++)
            {
-            hm_adapters_nv[tmp_out++].adapter_index.nv = nvGPUHandle[i];
+            hm_adapters_nvapi[tmp_out++].nvapi = nvGPUHandle[i];
            }
+        }
+      }
  
-          for (int i = 0; i < tmp_out; i++)
+      if ((need_xnvctrl == 1) && (xnvctrl_init (xnvctrl) == 0))
+      {
+        data.hm_xnvctrl = xnvctrl;
+      }
+
+      if (data.hm_xnvctrl)
+      {
+        if (hm_XNVCTRL_XOpenDisplay (data.hm_xnvctrl) == 0)
+        {
+          for (uint device_id = 0; device_id < data.devices_cnt; device_id++)
            {
-            unsigned int speed;
+            hc_device_param_t *device_param = &data.devices_param[device_id];
+
+            if ((device_param->device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
  
-            if (hm_NVML_nvmlDeviceGetFanSpeed (data.hm_nv, 1, hm_adapters_nv[i].adapter_index.nv, &speed) != NVML_ERROR_NOT_SUPPORTED) hm_adapters_nv[i].fan_supported = 1;
+            hm_adapters_xnvctrl[device_id].xnvctrl = device_id;
+
+            int speed = 0;
+
+            if (get_fan_speed_current (data.hm_xnvctrl, device_id, &speed) == 0) hm_adapters_xnvctrl[device_id].fan_get_supported = 1;
            }
          }
        }
-      #endif // LINUX && HAVE_NVML
-
-      data.hm_amd = NULL;
  
-      #ifdef HAVE_ADL
-      ADL_PTR *adl = (ADL_PTR *) mymalloc (sizeof (ADL_PTR));
-
-      if (adl_init (adl) == 0)
-        data.hm_amd = adl;
+      if ((need_adl == 1) && (adl_init (adl) == 0))
+      {
+        data.hm_adl = adl;
+      }
  
-      if (data.hm_amd)
+      if (data.hm_adl)
        {
-        if (hm_ADL_Main_Control_Create (data.hm_amd, ADL_Main_Memory_Alloc, 0) == ADL_OK)
+        if (hm_ADL_Main_Control_Create (data.hm_adl, ADL_Main_Memory_Alloc, 0) == ADL_OK)
          {
            // total number of adapters
  
            int hm_adapters_num;
  
-          if (get_adapters_num_amd (data.hm_amd, &hm_adapters_num) != 0) return (-1);
+          if (get_adapters_num_adl (data.hm_adl, &hm_adapters_num) != 0) return (-1);
  
            // adapter info
  
-          LPAdapterInfo lpAdapterInfo = hm_get_adapter_info_amd (data.hm_amd, hm_adapters_num);
+          LPAdapterInfo lpAdapterInfo = hm_get_adapter_info_adl (data.hm_adl, hm_adapters_num);
  
            if (lpAdapterInfo == NULL) return (-1);
  
@@ -13932,12 +14501,12 @@ int main (int argc, char **argv)
            {
              hc_thread_mutex_lock (mux_adl);
  
-            // hm_get_opencl_busid_devid (hm_adapters_amd, devices_all_cnt, devices_all);
+            // hm_get_opencl_busid_devid (hm_adapters_adl, devices_all_cnt, devices_all);
  
-            hm_get_adapter_index_amd (hm_adapters_amd, valid_adl_device_list, num_adl_adapters, lpAdapterInfo);
+            hm_get_adapter_index_adl (hm_adapters_adl, valid_adl_device_list, num_adl_adapters, lpAdapterInfo);
  
-            hm_get_overdrive_version  (data.hm_amd, hm_adapters_amd, valid_adl_device_list, num_adl_adapters, lpAdapterInfo);
-            hm_check_fanspeed_control (data.hm_amd, hm_adapters_amd, valid_adl_device_list, num_adl_adapters, lpAdapterInfo);
+            hm_get_overdrive_version  (data.hm_adl, hm_adapters_adl, valid_adl_device_list, num_adl_adapters, lpAdapterInfo);
+            hm_check_fanspeed_control (data.hm_adl, hm_adapters_adl, valid_adl_device_list, num_adl_adapters, lpAdapterInfo);
  
              hc_thread_mutex_unlock (mux_adl);
            }
@@ -13946,9 +14515,8 @@ int main (int argc, char **argv)
            myfree (lpAdapterInfo);
          }
        }
-      #endif // HAVE_ADL
  
-      if (data.hm_amd == NULL && data.hm_nv == NULL)
+      if (data.hm_adl == NULL && data.hm_nvml == NULL && data.hm_xnvctrl == NULL)
        {
          gpu_temp_disable = 1;
        }
@@ -13958,34 +14526,19 @@ int main (int argc, char **argv)
       * OpenCL devices: allocate buffer for device specific information
       */
  
-    #ifdef HAVE_HWMON
-    int *temp_retain_fanspeed_value = (int *) mycalloc (data.devices_cnt, sizeof (int));
+    int *temp_retain_fanspeed_value  = (int *) mycalloc (data.devices_cnt, sizeof (int));
+    int *temp_retain_fanpolicy_value = (int *) mycalloc (data.devices_cnt, sizeof (int));
  
-    #ifdef HAVE_ADL
      ADLOD6MemClockState *od_clock_mem_status = (ADLOD6MemClockState *) mycalloc (data.devices_cnt, sizeof (ADLOD6MemClockState));
  
      int *od_power_control_status = (int *) mycalloc (data.devices_cnt, sizeof (int));
-    #endif // ADL
-    #endif
  
-    /**
-     * enable custom signal handler(s)
-     */
-
-    if (benchmark == 0)
-    {
-      hc_signal (sigHandler_default);
-    }
-    else
-    {
-      hc_signal (sigHandler_benchmark);
-    }
+    unsigned int *nvml_power_limit = (unsigned int *) mycalloc (data.devices_cnt, sizeof (unsigned int));
  
      /**
       * User-defined GPU temp handling
       */
  
-    #ifdef HAVE_HWMON
      if (gpu_temp_disable == 1)
      {
        gpu_temp_abort  = 0;
@@ -14007,6 +14560,19 @@ int main (int argc, char **argv)
      data.gpu_temp_retain  = gpu_temp_retain;
      #endif
  
+    /**
+     * enable custom signal handler(s)
+     */
+
+    if (benchmark == 0)
+    {
+      hc_signal (sigHandler_default);
+    }
+    else
+    {
+      hc_signal (sigHandler_benchmark);
+    }
+
      /**
       * inform the user
       */
@@ -14039,7 +14605,7 @@ int main (int argc, char **argv)
         */
  
        #ifdef HAVE_HWMON
-      if (gpu_temp_disable == 0 && data.hm_amd == NULL && data.hm_nv == NULL)
+      if (gpu_temp_disable == 0 && data.hm_adl == NULL && data.hm_nvml == NULL && data.hm_xnvctrl == NULL)
        {
          log_info ("Watchdog: Hardware Monitoring Interface not found on your system");
        }
@@ -14066,6 +14632,8 @@ int main (int argc, char **argv)
        #endif
      }
  
+    #ifdef HAVE_HWMON
+
      /**
       * HM devices: copy
       */
@@ -14082,31 +14650,34 @@ int main (int argc, char **argv)
  
          const uint platform_devices_id = device_param->platform_devices_id;
  
-        #if defined(HAVE_NVML) || defined(HAVE_NVAPI)
-        if (device_param->vendor_id == VENDOR_ID_NV)
+        if (device_param->device_vendor_id == VENDOR_ID_AMD)
          {
-          memcpy (&data.hm_device[device_id], &hm_adapters_nv[platform_devices_id], sizeof (hm_attrs_t));
+          data.hm_device[device_id].adl               = hm_adapters_adl[platform_devices_id].adl;
+          data.hm_device[device_id].nvapi             = 0;
+          data.hm_device[device_id].nvml              = 0;
+          data.hm_device[device_id].xnvctrl           = 0;
+          data.hm_device[device_id].od_version        = hm_adapters_adl[platform_devices_id].od_version;
+          data.hm_device[device_id].fan_get_supported = hm_adapters_adl[platform_devices_id].fan_get_supported;
+          data.hm_device[device_id].fan_set_supported = 0;
          }
-        #endif
  
-        #ifdef HAVE_ADL
-        if (device_param->vendor_id == VENDOR_ID_AMD)
+        if (device_param->device_vendor_id == VENDOR_ID_NV)
          {
-          memcpy (&data.hm_device[device_id], &hm_adapters_amd[platform_devices_id], sizeof (hm_attrs_t));
+          data.hm_device[device_id].adl               = 0;
+          data.hm_device[device_id].nvapi             = hm_adapters_nvapi[platform_devices_id].nvapi;
+          data.hm_device[device_id].nvml              = hm_adapters_nvml[platform_devices_id].nvml;
+          data.hm_device[device_id].xnvctrl           = hm_adapters_xnvctrl[platform_devices_id].xnvctrl;
+          data.hm_device[device_id].od_version        = 0;
+          data.hm_device[device_id].fan_get_supported = hm_adapters_nvml[platform_devices_id].fan_get_supported;
+          data.hm_device[device_id].fan_set_supported = 0;
          }
-        #endif
        }
      }
  
-   /*
-    * Temporary fix:
-    * with AMD r9 295x cards it seems that we need to set the powertune value just AFTER the ocl init stuff
-    * otherwise after hc_clCreateContext () etc, powertune value was set back to "normal" and cards unfortunately
-    * were not working @ full speed (setting hm_ADL_Overdrive_PowerControl_Set () here seems to fix the problem)
-    * Driver / ADL bug?
-    */
+    /**
+     * powertune on user request
+     */
  
-    #ifdef HAVE_ADL
      if (powertune_enable == 1)
      {
        hc_thread_mutex_lock (mux_adl);
@@ -14117,38 +14688,177 @@ int main (int argc, char **argv)
  
          if (device_param->skipped) continue;
  
-        if (data.hm_device[device_id].od_version == 6)
+        if (data.devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
          {
-          // set powertune value only
-
-          int powertune_supported = 0;
-
-          int ADL_rc = 0;
+          /**
+           * Temporary fix:
+           * with AMD r9 295x cards it seems that we need to set the powertune value just AFTER the ocl init stuff
+           * otherwise after hc_clCreateContext () etc, powertune value was set back to "normal" and cards unfortunately
+           * were not working @ full speed (setting hm_ADL_Overdrive_PowerControl_Set () here seems to fix the problem)
+           * Driver / ADL bug?
+           */
  
-          if ((ADL_rc = hm_ADL_Overdrive6_PowerControl_Caps (data.hm_amd, data.hm_device[device_id].adapter_index.amd, &powertune_supported)) != ADL_OK)
+          if (data.hm_device[device_id].od_version == 6)
            {
-            log_error ("ERROR: Failed to get ADL PowerControl Capabilities");
+            int ADL_rc;
  
-            return (-1);
-          }
+            // check powertune capabilities first, if not available then skip device
  
-          if (powertune_supported != 0)
-          {
-            // powertune set
-            ADLOD6PowerControlInfo powertune = {0, 0, 0, 0, 0};
+            int powertune_supported = 0;
  
-            if ((ADL_rc = hm_ADL_Overdrive_PowerControlInfo_Get (data.hm_amd, data.hm_device[device_id].adapter_index.amd, &powertune)) != ADL_OK)
+            if ((ADL_rc = hm_ADL_Overdrive6_PowerControl_Caps (data.hm_adl, data.hm_device[device_id].adl, &powertune_supported)) != ADL_OK)
              {
-              log_error ("ERROR: Failed to get current ADL PowerControl settings");
+              log_error ("ERROR: Failed to get ADL PowerControl Capabilities");
  
                return (-1);
              }
  
-            if ((ADL_rc = hm_ADL_Overdrive_PowerControl_Set (data.hm_amd, data.hm_device[device_id].adapter_index.amd, powertune.iMaxValue)) != ADL_OK)
+            // first backup current value, we will restore it later
+
+            if (powertune_supported != 0)
              {
-              log_error ("ERROR: Failed to set new ADL PowerControl values");
+              // powercontrol settings
  
-              return (-1);
+              ADLOD6PowerControlInfo powertune = {0, 0, 0, 0, 0};
+
+              if ((ADL_rc = hm_ADL_Overdrive_PowerControlInfo_Get (data.hm_adl, data.hm_device[device_id].adl, &powertune)) == ADL_OK)
+              {
+                ADL_rc = hm_ADL_Overdrive_PowerControl_Get (data.hm_adl, data.hm_device[device_id].adl, &od_power_control_status[device_id]);
+              }
+
+              if (ADL_rc != ADL_OK)
+              {
+                log_error ("ERROR: Failed to get current ADL PowerControl settings");
+
+                return (-1);
+              }
+
+              if ((ADL_rc = hm_ADL_Overdrive_PowerControl_Set (data.hm_adl, data.hm_device[device_id].adl, powertune.iMaxValue)) != ADL_OK)
+              {
+                log_error ("ERROR: Failed to set new ADL PowerControl values");
+
+                return (-1);
+              }
+
+              // clocks
+
+              memset (&od_clock_mem_status[device_id], 0, sizeof (ADLOD6MemClockState));
+
+              od_clock_mem_status[device_id].state.iNumberOfPerformanceLevels = 2;
+
+              if ((ADL_rc = hm_ADL_Overdrive_StateInfo_Get (data.hm_adl, data.hm_device[device_id].adl, ADL_OD6_GETSTATEINFO_CUSTOM_PERFORMANCE, &od_clock_mem_status[device_id])) != ADL_OK)
+              {
+                log_error ("ERROR: Failed to get ADL memory and engine clock frequency");
+
+                return (-1);
+              }
+
+              // Query capabilities only to see if profiles were not "damaged", if so output a warning but do accept the users profile settings
+
+              ADLOD6Capabilities caps = {0, 0, 0, {0, 0, 0}, {0, 0, 0}, 0, 0};
+
+              if ((ADL_rc = hm_ADL_Overdrive_Capabilities_Get (data.hm_adl, data.hm_device[device_id].adl, &caps)) != ADL_OK)
+              {
+                log_error ("ERROR: Failed to get ADL device capabilities");
+
+                return (-1);
+              }
+
+              int engine_clock_max = caps.sEngineClockRange.iMax * 0.6666;
+              int memory_clock_max = caps.sMemoryClockRange.iMax * 0.6250;
+
+              int warning_trigger_engine = (int) (0.25 * (float) engine_clock_max);
+              int warning_trigger_memory = (int) (0.25 * (float) memory_clock_max);
+
+              int engine_clock_profile_max = od_clock_mem_status[device_id].state.aLevels[1].iEngineClock;
+              int memory_clock_profile_max = od_clock_mem_status[device_id].state.aLevels[1].iMemoryClock;
+
+              // warning if profile has too low max values
+
+              if ((engine_clock_max - engine_clock_profile_max) > warning_trigger_engine)
+              {
+                log_info ("WARN: the custom profile seems to have too low maximum engine clock values. You therefore may not reach full performance");
+              }
+
+              if ((memory_clock_max - memory_clock_profile_max) > warning_trigger_memory)
+              {
+                log_info ("WARN: the custom profile seems to have too low maximum memory clock values. You therefore may not reach full performance");
+              }
+
+              ADLOD6StateInfo *performance_state = (ADLOD6StateInfo*) mycalloc (1, sizeof (ADLOD6StateInfo) + sizeof (ADLOD6PerformanceLevel));
+
+              performance_state->iNumberOfPerformanceLevels = 2;
+
+              performance_state->aLevels[0].iEngineClock = engine_clock_profile_max;
+              performance_state->aLevels[1].iEngineClock = engine_clock_profile_max;
+              performance_state->aLevels[0].iMemoryClock = memory_clock_profile_max;
+              performance_state->aLevels[1].iMemoryClock = memory_clock_profile_max;
+
+              if ((ADL_rc = hm_ADL_Overdrive_State_Set (data.hm_adl, data.hm_device[device_id].adl, ADL_OD6_SETSTATE_PERFORMANCE, performance_state)) != ADL_OK)
+              {
+                log_info ("ERROR: Failed to set ADL performance state");
+
+                return (-1);
+              }
+
+              local_free (performance_state);
+            }
+
+            // set powertune value only
+
+            if (powertune_supported != 0)
+            {
+              // powertune set
+              ADLOD6PowerControlInfo powertune = {0, 0, 0, 0, 0};
+
+              if ((ADL_rc = hm_ADL_Overdrive_PowerControlInfo_Get (data.hm_adl, data.hm_device[device_id].adl, &powertune)) != ADL_OK)
+              {
+                log_error ("ERROR: Failed to get current ADL PowerControl settings");
+
+                return (-1);
+              }
+
+              if ((ADL_rc = hm_ADL_Overdrive_PowerControl_Set (data.hm_adl, data.hm_device[device_id].adl, powertune.iMaxValue)) != ADL_OK)
+              {
+                log_error ("ERROR: Failed to set new ADL PowerControl values");
+
+                return (-1);
+              }
+            }
+          }
+        }
+
+        if (data.devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
+        {
+          // first backup current value, we will restore it later
+
+          unsigned int limit;
+
+          int powertune_supported = 0;
+
+          if (hm_NVML_nvmlDeviceGetPowerManagementLimit (data.hm_nvml, 0, data.hm_device[device_id].nvml, &limit) == NVML_SUCCESS)
+          {
+            powertune_supported = 1;
+          }
+
+          // if backup worked, activate the maximum allowed
+
+          if (powertune_supported != 0)
+          {
+            unsigned int minLimit;
+            unsigned int maxLimit;
+
+            if (hm_NVML_nvmlDeviceGetPowerManagementLimitConstraints (data.hm_nvml, 0, data.hm_device[device_id].nvml, &minLimit, &maxLimit) == NVML_SUCCESS)
+            {
+              if (maxLimit > 0)
+              {
+                if (hm_NVML_nvmlDeviceSetPowerManagementLimit (data.hm_nvml, 0, data.hm_device[device_id].nvml, maxLimit) == NVML_SUCCESS)
+                {
+                  // now we can be sure we need to reset later
+
+                  nvml_power_limit[device_id] = limit;
+                }
+              }
              }
            }
          }
@@ -14156,7 +14866,7 @@ int main (int argc, char **argv)
  
        hc_thread_mutex_unlock (mux_adl);
      }
-    #endif // HAVE_ADK
+
      #endif // HAVE_HWMON
  
      #ifdef DEBUG
@@ -14165,8 +14875,6 @@ int main (int argc, char **argv)
  
      if (data.quiet == 0) log_info_nn ("Initializing device kernels and memory...");
  
-    uint kernel_power_all = 0;
-
      for (uint device_id = 0; device_id < data.devices_cnt; device_id++)
      {
        /**
@@ -14227,6 +14935,10 @@ int main (int argc, char **argv)
        if (hash_mode == 10500) kernel_threads = 64; // RC4
        if (hash_mode == 13100) kernel_threads = 64; // RC4
  
+      device_param->kernel_threads = kernel_threads;
+
+      device_param->hardware_power = device_processors * kernel_threads;
+
        /**
         * create input buffers on device : calculate size of fixed memory buffers
         */
@@ -14278,22 +14990,22 @@ int main (int argc, char **argv)
  
            if (hash_mode == 8900)
            {
-            if (device_param->vendor_id == VENDOR_ID_AMD)
+            if (device_param->device_vendor_id == VENDOR_ID_AMD)
              {
                tmto_start = 1;
              }
-            else if (device_param->vendor_id == VENDOR_ID_NV)
+            else if (device_param->device_vendor_id == VENDOR_ID_NV)
              {
                tmto_start = 2;
              }
            }
            else if (hash_mode == 9300)
            {
-            if (device_param->vendor_id == VENDOR_ID_AMD)
+            if (device_param->device_vendor_id == VENDOR_ID_AMD)
              {
                tmto_start = 2;
              }
-            else if (device_param->vendor_id == VENDOR_ID_NV)
+            else if (device_param->device_vendor_id == VENDOR_ID_NV)
              {
                tmto_start = 2;
              }
@@ -14341,7 +15053,7 @@ int main (int argc, char **argv)
         * some algorithms need a fixed kernel-loops count
         */
  
-      if (hash_mode == 1500)
+      if (hash_mode == 1500 && attack_mode == ATTACK_MODE_BF)
        {
          const u32 kernel_loops_fixed = 1024;
  
@@ -14349,7 +15061,7 @@ int main (int argc, char **argv)
          device_param->kernel_loops_max = kernel_loops_fixed;
        }
  
-      if (hash_mode == 3000)
+      if (hash_mode == 3000 && attack_mode == ATTACK_MODE_BF)
        {
          const u32 kernel_loops_fixed = 1024;
  
@@ -14516,7 +15228,9 @@ int main (int argc, char **argv)
  
          if ((opts_type & OPTS_TYPE_HOOK12) || (opts_type & OPTS_TYPE_HOOK23))
          {
-          // none yet
+          switch (hash_mode)
+          {
+          }
          }
  
          // now check if all device-memory sizes which depend on the kernel_accel_max amplifier are within its boundaries
@@ -14596,15 +15310,6 @@ int main (int argc, char **argv)
        device_param->size_tmps    = size_tmps;
        device_param->size_hooks   = size_hooks;
  
-      // do not confuse kernel_accel_max with kernel_accel here
-
-      const u32 kernel_power = device_processors * kernel_threads * kernel_accel_max;
-
-      device_param->kernel_threads    = kernel_threads;
-      device_param->kernel_power_user = kernel_power;
-
-      kernel_power_all += kernel_power;
-
        /**
         * default building options
         */
@@ -14621,12 +15326,12 @@ int main (int argc, char **argv)
  
        char build_opts_new[1024] = { 0 };
  
-      snprintf (build_opts_new, sizeof (build_opts_new) - 1, "%s -DVENDOR_ID=%u -DCUDA_ARCH=%d -DVECT_SIZE=%u -DDEVICE_TYPE=%u -DKERN_TYPE=%u -D_unroll -cl-std=CL1.1", build_opts, device_param->vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->vector_width, (u32) device_param->device_type, kern_type);
+      snprintf (build_opts_new, sizeof (build_opts_new) - 1, "%s -DVENDOR_ID=%u -DCUDA_ARCH=%d -DVECT_SIZE=%u -DDEVICE_TYPE=%u -DKERN_TYPE=%u -D_unroll -cl-std=CL1.1", build_opts, device_param->device_vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->vector_width, (u32) device_param->device_type, kern_type);
  
        strncpy (build_opts, build_opts_new, sizeof (build_opts) - 1);
  
-
-      if (device_param->vendor_id == VENDOR_ID_INTEL_SDK)
+      /*
+      if (device_param->device_vendor_id == VENDOR_ID_INTEL_SDK)
        {
          // we do vectorizing much better than the auto-vectorizer
  
@@ -14634,6 +15339,7 @@ int main (int argc, char **argv)
  
          strncpy (build_opts, build_opts_new, sizeof (build_opts) - 1);
        }
+      */
  
        #ifdef DEBUG
        log_info ("Device #%u: build_opts '%s'\n", device_id + 1, build_opts);
@@ -15055,13 +15761,6 @@ int main (int argc, char **argv)
        hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_digests_shown,  CL_TRUE, 0, size_shown,   data.digests_shown, 0, NULL, NULL);
        hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_salt_bufs,      CL_TRUE, 0, size_salts,   data.salts_buf,     0, NULL, NULL);
  
-      run_kernel_bzero (device_param, device_param->d_pws_buf,        size_pws);
-      run_kernel_bzero (device_param, device_param->d_pws_amp_buf,    size_pws);
-      run_kernel_bzero (device_param, device_param->d_tmps,           size_tmps);
-      run_kernel_bzero (device_param, device_param->d_hooks,          size_hooks);
-      run_kernel_bzero (device_param, device_param->d_plain_bufs,     size_plains);
-      run_kernel_bzero (device_param, device_param->d_result,         size_results);
-
        /**
         * special buffers
         */
@@ -15072,8 +15771,6 @@ int main (int argc, char **argv)
          device_param->d_rules_c = hc_clCreateBuffer (data.ocl, device_param->context, CL_MEM_READ_ONLY, size_rules_c, NULL);
  
          hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_rules, CL_TRUE, 0, size_rules, kernel_rules_buf, 0, NULL, NULL);
-
-        run_kernel_bzero (device_param, device_param->d_rules_c, size_rules_c);
        }
        else if (attack_kern == ATTACK_KERN_COMBI)
        {
@@ -15081,11 +15778,6 @@ int main (int argc, char **argv)
          device_param->d_combs_c         = hc_clCreateBuffer (data.ocl, device_param->context, CL_MEM_READ_ONLY, size_combs,      NULL);
          device_param->d_root_css_buf    = hc_clCreateBuffer (data.ocl, device_param->context, CL_MEM_READ_ONLY, size_root_css,   NULL);
          device_param->d_markov_css_buf  = hc_clCreateBuffer (data.ocl, device_param->context, CL_MEM_READ_ONLY, size_markov_css, NULL);
-
-        run_kernel_bzero (device_param, device_param->d_combs,          size_combs);
-        run_kernel_bzero (device_param, device_param->d_combs_c,        size_combs);
-        run_kernel_bzero (device_param, device_param->d_root_css_buf,   size_root_css);
-        run_kernel_bzero (device_param, device_param->d_markov_css_buf, size_markov_css);
        }
        else if (attack_kern == ATTACK_KERN_BF)
        {
@@ -15094,12 +15786,6 @@ int main (int argc, char **argv)
          device_param->d_tm_c            = hc_clCreateBuffer (data.ocl, device_param->context, CL_MEM_READ_ONLY, size_tm,         NULL);
          device_param->d_root_css_buf    = hc_clCreateBuffer (data.ocl, device_param->context, CL_MEM_READ_ONLY, size_root_css,   NULL);
          device_param->d_markov_css_buf  = hc_clCreateBuffer (data.ocl, device_param->context, CL_MEM_READ_ONLY, size_markov_css, NULL);
-
-        run_kernel_bzero (device_param, device_param->d_bfs,            size_bfs);
-        run_kernel_bzero (device_param, device_param->d_bfs_c,          size_bfs);
-        run_kernel_bzero (device_param, device_param->d_tm_c,           size_tm);
-        run_kernel_bzero (device_param, device_param->d_root_css_buf,   size_root_css);
-        run_kernel_bzero (device_param, device_param->d_markov_css_buf, size_markov_css);
        }
  
        if (size_esalts)
@@ -15243,6 +15929,13 @@ int main (int argc, char **argv)
        device_param->kernel_params_tm[0] = &device_param->d_bfs_c;
        device_param->kernel_params_tm[1] = &device_param->d_tm_c;
  
+      device_param->kernel_params_memset_buf32[1] = 0; // value
+      device_param->kernel_params_memset_buf32[2] = 0; // gid_max
+
+      device_param->kernel_params_memset[0] = NULL;
+      device_param->kernel_params_memset[1] = &device_param->kernel_params_memset_buf32[1];
+      device_param->kernel_params_memset[2] = &device_param->kernel_params_memset_buf32[2];
+
        /**
         * kernel name
         */
@@ -15351,6 +16044,18 @@ int main (int argc, char **argv)
          if (opts_type & OPTS_TYPE_HOOK23) hc_clSetKernelArg (data.ocl, device_param->kernel23, i, sizeof (cl_uint), device_param->kernel_params[i]);
        }
  
+      // GPU memset
+
+      device_param->kernel_memset = hc_clCreateKernel (data.ocl, device_param->program, "gpu_memset");
+
+      hc_clGetKernelWorkGroupInfo (data.ocl, device_param->kernel_memset, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &kernel_wgs_tmp, NULL); kernel_threads = MIN (kernel_threads, kernel_wgs_tmp);
+
+      hc_clSetKernelArg (data.ocl, device_param->kernel_memset, 0, sizeof (cl_mem),  device_param->kernel_params_memset[0]);
+      hc_clSetKernelArg (data.ocl, device_param->kernel_memset, 1, sizeof (cl_uint), device_param->kernel_params_memset[1]);
+      hc_clSetKernelArg (data.ocl, device_param->kernel_memset, 2, sizeof (cl_uint), device_param->kernel_params_memset[2]);
+
+      // MP start
+
        if (attack_mode == ATTACK_MODE_BF)
        {
          device_param->kernel_mp_l = hc_clCreateKernel (data.ocl, device_param->program_mp, "l_markov");
@@ -15388,82 +16093,120 @@ int main (int argc, char **argv)
  
          hc_clGetKernelWorkGroupInfo (data.ocl, device_param->kernel_amp, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &kernel_wgs_tmp, NULL); kernel_threads = MIN (kernel_threads, kernel_wgs_tmp);
        }
-
-      if (attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+
+      if (attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+      {
+        // nothing to do
+      }
+      else
+      {
+        for (uint i = 0; i < 5; i++)
+        {
+          hc_clSetKernelArg (data.ocl, device_param->kernel_amp, i, sizeof (cl_mem), device_param->kernel_params_amp[i]);
+        }
+
+        for (uint i = 5; i < 7; i++)
+        {
+          hc_clSetKernelArg (data.ocl, device_param->kernel_amp, i, sizeof (cl_uint), device_param->kernel_params_amp[i]);
+        }
+      }
+
+      // maybe this has been updated by clGetKernelWorkGroupInfo()
+      // value can only be decreased, so we don't need to reallocate buffers
+
+      device_param->kernel_threads = kernel_threads;
+
+      // zero some data buffers
+
+      run_kernel_bzero (device_param, device_param->d_pws_buf,        size_pws);
+      run_kernel_bzero (device_param, device_param->d_pws_amp_buf,    size_pws);
+      run_kernel_bzero (device_param, device_param->d_tmps,           size_tmps);
+      run_kernel_bzero (device_param, device_param->d_hooks,          size_hooks);
+      run_kernel_bzero (device_param, device_param->d_plain_bufs,     size_plains);
+      run_kernel_bzero (device_param, device_param->d_result,         size_results);
+
+      /**
+       * special buffers
+       */
+
+      if (attack_kern == ATTACK_KERN_STRAIGHT)
+      {
+        run_kernel_bzero (device_param, device_param->d_rules_c, size_rules_c);
+      }
+      else if (attack_kern == ATTACK_KERN_COMBI)
        {
-        // nothing to do
+        run_kernel_bzero (device_param, device_param->d_combs,          size_combs);
+        run_kernel_bzero (device_param, device_param->d_combs_c,        size_combs);
+        run_kernel_bzero (device_param, device_param->d_root_css_buf,   size_root_css);
+        run_kernel_bzero (device_param, device_param->d_markov_css_buf, size_markov_css);
        }
-      else
+      else if (attack_kern == ATTACK_KERN_BF)
        {
-        for (uint i = 0; i < 5; i++)
-        {
-          hc_clSetKernelArg (data.ocl, device_param->kernel_amp, i, sizeof (cl_mem), device_param->kernel_params_amp[i]);
-        }
-
-        for (uint i = 5; i < 7; i++)
-        {
-          hc_clSetKernelArg (data.ocl, device_param->kernel_amp, i, sizeof (cl_uint), device_param->kernel_params_amp[i]);
-        }
+        run_kernel_bzero (device_param, device_param->d_bfs,            size_bfs);
+        run_kernel_bzero (device_param, device_param->d_bfs_c,          size_bfs);
+        run_kernel_bzero (device_param, device_param->d_tm_c,           size_tm);
+        run_kernel_bzero (device_param, device_param->d_root_css_buf,   size_root_css);
+        run_kernel_bzero (device_param, device_param->d_markov_css_buf, size_markov_css);
        }
  
-      // maybe this has been updated by clGetKernelWorkGroupInfo()
-      // value can only be decreased, so we don't need to reallocate buffers
-
-      device_param->kernel_threads = kernel_threads;
+      #if defined(HAVE_HWMON)
  
        /**
         * Store initial fanspeed if gpu_temp_retain is enabled
         */
  
-      #if defined(HAVE_HWMON) && defined(HAVE_ADL)
-      int gpu_temp_retain_set = 0;
-
        if (gpu_temp_disable == 0)
        {
-        if (gpu_temp_retain != 0) // VENDOR_ID_AMD implied
+        if (gpu_temp_retain != 0)
          {
            hc_thread_mutex_lock (mux_adl);
  
-          if (data.hm_device[device_id].fan_supported == 1)
+          if (data.hm_device[device_id].fan_get_supported == 1)
            {
-            if (gpu_temp_retain_chgd == 0)
-            {
-              uint cur_temp = 0;
-              uint default_temp = 0;
+            const int fanspeed  = hm_get_fanspeed_with_device_id  (device_id);
+            const int fanpolicy = hm_get_fanpolicy_with_device_id (device_id);
  
-              int ADL_rc = hm_ADL_Overdrive6_TargetTemperatureData_Get (data.hm_amd, data.hm_device[device_id].adapter_index.amd, (int *) &cur_temp, (int *) &default_temp);
-
-              if (ADL_rc == ADL_OK)
-              {
-                #define GPU_TEMP_RETAIN_ABORT_DIFF 15
+            temp_retain_fanspeed_value[device_id]  = fanspeed;
+            temp_retain_fanpolicy_value[device_id] = fanpolicy;
  
-                const uint gpu_temp_retain_target = default_temp - GPU_TEMP_RETAIN_ABORT_DIFF;
+            // we also set it to tell the OS we take control over the fan and it's automatic controller
+            // if it was set to automatic. we do not control user-defined fanspeeds.
  
-                // special case with multi gpu setups: always use minimum retain
+            if (fanpolicy == 1)
+            {
+              data.hm_device[device_id].fan_set_supported = 1;
  
-                if (gpu_temp_retain_set == 0)
-                {
-                  gpu_temp_retain = gpu_temp_retain_target;
-                  gpu_temp_retain_set = 1;
-                }
-                else
-                {
-                  gpu_temp_retain = MIN (gpu_temp_retain, gpu_temp_retain_target);
-                }
+              int rc = -1;
  
-                if (gpu_temp_abort_chgd == 0) gpu_temp_abort = gpu_temp_retain + GPU_TEMP_RETAIN_ABORT_DIFF;
+              if (device_param->device_vendor_id == VENDOR_ID_AMD)
+              {
+                rc = hm_set_fanspeed_with_device_id_adl (device_id, fanspeed, 1);
                }
-            }
+              else if (device_param->device_vendor_id == VENDOR_ID_NV)
+              {
+                #ifdef LINUX
+                rc = set_fan_control (data.hm_xnvctrl, data.hm_device[device_id].xnvctrl, NV_CTRL_GPU_COOLER_MANUAL_CONTROL_TRUE);
+                #endif
  
-            const int fan_speed = hm_get_fanspeed_with_device_id (device_id);
+                #ifdef WIN
+                rc = hm_set_fanspeed_with_device_id_nvapi (device_id, fanspeed, 1);
+                #endif
+              }
  
-            temp_retain_fanspeed_value[device_id] = fan_speed;
+              if (rc == 0)
+              {
+                data.hm_device[device_id].fan_set_supported = 1;
+              }
+              else
+              {
+                log_info ("WARNING: Failed to set initial fan speed for device #%u", device_id + 1);
  
-            if (fan_speed == -1)
+                data.hm_device[device_id].fan_set_supported = 0;
+              }
+            }
+            else
              {
-              log_info ("WARNING: Failed to get current fan speed settings for gpu number: %i:", device_id + 1);
-
-              temp_retain_fanspeed_value[device_id] = 0;
+              data.hm_device[device_id].fan_set_supported = 0;
              }
            }
  
@@ -15471,126 +16214,9 @@ int main (int argc, char **argv)
          }
        }
  
-      /**
-       * Store original powercontrol/clocks settings, set overdrive 6 performance tuning settings
-       */
-
-      if (powertune_enable == 1) // VENDOR_ID_AMD implied
-      {
-        hc_thread_mutex_lock (mux_adl);
-
-        if (data.hm_device[device_id].od_version == 6)
-        {
-          int ADL_rc;
-
-          // check powertune capabilities first, if not available then skip device
-
-          int powertune_supported = 0;
-
-          if ((ADL_rc = hm_ADL_Overdrive6_PowerControl_Caps (data.hm_amd, data.hm_device[device_id].adapter_index.amd, &powertune_supported)) != ADL_OK)
-          {
-            log_error ("ERROR: Failed to get ADL PowerControl Capabilities");
-
-            return (-1);
-          }
-
-          if (powertune_supported != 0)
-          {
-            // powercontrol settings
-
-            ADLOD6PowerControlInfo powertune = {0, 0, 0, 0, 0};
-
-            if ((ADL_rc = hm_ADL_Overdrive_PowerControlInfo_Get (data.hm_amd, data.hm_device[device_id].adapter_index.amd, &powertune)) == ADL_OK)
-            {
-              ADL_rc = hm_ADL_Overdrive_PowerControl_Get (data.hm_amd, data.hm_device[device_id].adapter_index.amd, &od_power_control_status[device_id]);
-            }
-
-            if (ADL_rc != ADL_OK)
-            {
-              log_error ("ERROR: Failed to get current ADL PowerControl settings");
-
-              return (-1);
-            }
-
-            if ((ADL_rc = hm_ADL_Overdrive_PowerControl_Set (data.hm_amd, data.hm_device[device_id].adapter_index.amd, powertune.iMaxValue)) != ADL_OK)
-            {
-              log_error ("ERROR: Failed to set new ADL PowerControl values");
-
-              return (-1);
-            }
-
-            // clocks
-
-            memset (&od_clock_mem_status[device_id], 0, sizeof (ADLOD6MemClockState));
-
-            od_clock_mem_status[device_id].state.iNumberOfPerformanceLevels = 2;
-
-            if ((ADL_rc = hm_ADL_Overdrive_StateInfo_Get (data.hm_amd, data.hm_device[device_id].adapter_index.amd, ADL_OD6_GETSTATEINFO_CUSTOM_PERFORMANCE, &od_clock_mem_status[device_id])) != ADL_OK)
-            {
-              log_error ("ERROR: Failed to get ADL memory and engine clock frequency");
-
-              return (-1);
-            }
-
-            // Query capabilities only to see if profiles were not "damaged", if so output a warning but do accept the users profile settings
-
-            ADLOD6Capabilities caps = {0, 0, 0, {0, 0, 0}, {0, 0, 0}, 0, 0};
-
-            if ((ADL_rc = hm_ADL_Overdrive_Capabilities_Get (data.hm_amd, data.hm_device[device_id].adapter_index.amd, &caps)) != ADL_OK)
-            {
-              log_error ("ERROR: Failed to get ADL device capabilities");
-
-              return (-1);
-            }
-
-            int engine_clock_max = caps.sEngineClockRange.iMax * 0.6666;
-            int memory_clock_max = caps.sMemoryClockRange.iMax * 0.6250;
-
-            int warning_trigger_engine = (int) (0.25 * (float) engine_clock_max);
-            int warning_trigger_memory = (int) (0.25 * (float) memory_clock_max);
-
-            int engine_clock_profile_max = od_clock_mem_status[device_id].state.aLevels[1].iEngineClock;
-            int memory_clock_profile_max = od_clock_mem_status[device_id].state.aLevels[1].iMemoryClock;
-
-            // warning if profile has too low max values
-
-            if ((engine_clock_max - engine_clock_profile_max) > warning_trigger_engine)
-            {
-              log_info ("WARN: the custom profile seems to have too low maximum engine clock values. You therefore may not reach full performance");
-            }
-
-            if ((memory_clock_max - memory_clock_profile_max) > warning_trigger_memory)
-            {
-              log_info ("WARN: the custom profile seems to have too low maximum memory clock values. You therefore may not reach full performance");
-            }
-
-            ADLOD6StateInfo *performance_state = (ADLOD6StateInfo*) mycalloc (1, sizeof (ADLOD6StateInfo) + sizeof (ADLOD6PerformanceLevel));
-
-            performance_state->iNumberOfPerformanceLevels = 2;
-
-            performance_state->aLevels[0].iEngineClock = engine_clock_profile_max;
-            performance_state->aLevels[1].iEngineClock = engine_clock_profile_max;
-            performance_state->aLevels[0].iMemoryClock = memory_clock_profile_max;
-            performance_state->aLevels[1].iMemoryClock = memory_clock_profile_max;
-
-            if ((ADL_rc = hm_ADL_Overdrive_State_Set (data.hm_amd, data.hm_device[device_id].adapter_index.amd, ADL_OD6_SETSTATE_PERFORMANCE, performance_state)) != ADL_OK)
-            {
-              log_info ("ERROR: Failed to set ADL performance state");
-
-              return (-1);
-            }
-
-            local_free (performance_state);
-          }
-        }
-
-        hc_thread_mutex_unlock (mux_adl);
-      }
-      #endif // HAVE_HWMON && HAVE_ADL
+      #endif // HAVE_HWMON
      }
  
-    data.kernel_power_all = kernel_power_all;
-
      if (data.quiet == 0) log_info_nn ("");
  
      /**
@@ -15599,7 +16225,7 @@ int main (int argc, char **argv)
  
      if (benchmark == 1)
      {
-      if (status_automat == 0)
+      if (machine_readable == 0)
        {
          quiet = 0;
  
@@ -16461,11 +17087,18 @@ int main (int argc, char **argv)
  
      if (data.devices_status != STATUS_CRACKED) data.devices_status = STATUS_STARTING;
  
-    hc_thread_t i_thread = 0;
+    uint i_threads_cnt = 0;
+
+    hc_thread_t *i_threads = (hc_thread_t *) mycalloc (10, sizeof (hc_thread_t));
  
      if ((data.wordlist_mode == WL_MODE_FILE) || (data.wordlist_mode == WL_MODE_MASK))
      {
-      hc_thread_create (i_thread, thread_keypress, &benchmark);
+      if (stdout_flag == 0)
+      {
+        hc_thread_create (i_threads[i_threads_cnt], thread_keypress, &benchmark);
+
+        i_threads_cnt++;
+      }
      }
  
      if (wordlist_mode == WL_MODE_STDIN) data.status = 1;
@@ -16474,9 +17107,12 @@ int main (int argc, char **argv)
  
      hc_thread_t *ni_threads = (hc_thread_t *) mycalloc (10, sizeof (hc_thread_t));
  
-    hc_thread_create (ni_threads[ni_threads_cnt], thread_monitor, NULL);
+    if (stdout_flag == 0)
+    {
+      hc_thread_create (ni_threads[ni_threads_cnt], thread_monitor, NULL);
  
-    ni_threads_cnt++;
+      ni_threads_cnt++;
+    }
  
      /**
        * Outfile remove
@@ -16815,6 +17451,8 @@ int main (int argc, char **argv)
  
          data.ms_paused = 0;
  
+        data.kernel_power_final = 0;
+
          data.words_cur = rd->words_cur;
  
          for (uint device_id = 0; device_id < data.devices_cnt; device_id++)
@@ -16832,8 +17470,6 @@ int main (int argc, char **argv)
  
            memset (device_param->exec_ms, 0, EXEC_CACHE * sizeof (double));
  
-          device_param->kernel_power = device_param->kernel_power_user;
-
            device_param->outerloop_pos  = 0;
            device_param->outerloop_left = 0;
            device_param->innerloop_pos  = 0;
@@ -16849,8 +17485,6 @@ int main (int argc, char **argv)
            device_param->words_done = 0;
          }
  
-        data.kernel_power_div = 0;
-
          // figure out some workload
  
          if (attack_mode == ATTACK_MODE_STRAIGHT)
@@ -17292,26 +17926,6 @@ int main (int argc, char **argv)
            }
          }
  
-        /*
-         * Inform user about possible slow speeds
-         */
-
-        if ((wordlist_mode == WL_MODE_FILE) || (wordlist_mode == WL_MODE_MASK))
-        {
-          if (data.words_base < kernel_power_all)
-          {
-            if (quiet == 0)
-            {
-              log_info ("ATTENTION!");
-              log_info ("  The wordlist or mask you are using is too small.");
-              log_info ("  Therefore, hashcat is unable to utilize the full parallelization power of your device(s).");
-              log_info ("  The cracking speed will drop.");
-              log_info ("  Workaround: https://hashcat.net/wiki/doku.php?id=frequently_asked_questions#how_to_create_more_work_for_full_speed");
-              log_info ("");
-            }
-          }
-        }
-
          /*
           * Update loopback file
           */
@@ -17347,6 +17961,64 @@ int main (int argc, char **argv)
            }
          }
  
+        /**
+         * create autotune threads
+         */
+
+        hc_thread_t *c_threads = (hc_thread_t *) mycalloc (data.devices_cnt, sizeof (hc_thread_t));
+
+        data.devices_status = STATUS_AUTOTUNE;
+
+        for (uint device_id = 0; device_id < data.devices_cnt; device_id++)
+        {
+          hc_device_param_t *device_param = &devices_param[device_id];
+
+          hc_thread_create (c_threads[device_id], thread_autotune, device_param);
+        }
+
+        hc_thread_wait (data.devices_cnt, c_threads);
+
+        /*
+         * Inform user about possible slow speeds
+         */
+
+        uint hardware_power_all = 0;
+
+        uint kernel_power_all = 0;
+
+        for (uint device_id = 0; device_id < data.devices_cnt; device_id++)
+        {
+          hc_device_param_t *device_param = &devices_param[device_id];
+
+          hardware_power_all += device_param->hardware_power;
+
+          kernel_power_all += device_param->kernel_power;
+        }
+
+        data.hardware_power_all = hardware_power_all; // hardware_power_all is the same as kernel_power_all but without the influence of kernel_accel on the devices
+
+        data.kernel_power_all = kernel_power_all;
+
+        if ((wordlist_mode == WL_MODE_FILE) || (wordlist_mode == WL_MODE_MASK))
+        {
+          if (data.words_base < kernel_power_all)
+          {
+            if (quiet == 0)
+            {
+              log_info ("ATTENTION!");
+              log_info ("  The wordlist or mask you are using is too small.");
+              log_info ("  Therefore, hashcat is unable to utilize the full parallelization power of your device(s).");
+              log_info ("  The cracking speed will drop.");
+              log_info ("  Workaround: https://hashcat.net/wiki/doku.php?id=frequently_asked_questions#how_to_create_more_work_for_full_speed");
+              log_info ("");
+            }
+          }
+        }
+
+        /**
+         * create cracker threads
+         */
+
          data.devices_status = STATUS_RUNNING;
  
          if (initial_restore_done == 0)
@@ -17378,12 +18050,6 @@ int main (int argc, char **argv)
  
          data.runtime_start = runtime_start;
  
-        /**
-         * create cracker threads
-         */
-
-        hc_thread_t *c_threads = (hc_thread_t *) mycalloc (data.devices_cnt, sizeof (hc_thread_t));
-
          for (uint device_id = 0; device_id < data.devices_cnt; device_id++)
          {
            hc_device_param_t *device_param = &devices_param[device_id];
@@ -17398,8 +18064,6 @@ int main (int argc, char **argv)
            }
          }
  
-        // wait for crack threads to exit
-
          hc_thread_wait (data.devices_cnt, c_threads);
  
          local_free (c_threads);
@@ -17569,11 +18233,13 @@ int main (int argc, char **argv)
  
      // wait for interactive threads
  
-    if ((data.wordlist_mode == WL_MODE_FILE) || (data.wordlist_mode == WL_MODE_MASK))
+    for (uint thread_idx = 0; thread_idx < i_threads_cnt; thread_idx++)
      {
-      hc_thread_wait (1, &i_thread);
+      hc_thread_wait (1, &i_threads[thread_idx]);
      }
  
+    local_free (i_threads);
+
      // we dont need restore file anymore
      if (data.restore_disable == 0)
      {
@@ -17603,7 +18269,7 @@ int main (int argc, char **argv)
      {
        status_benchmark ();
  
-      if (status_automat == 0)
+      if (machine_readable == 0)
        {
          log_info ("");
        }
@@ -17684,6 +18350,7 @@ int main (int argc, char **argv)
        if (device_param->kernel_mp_r)        hc_clReleaseKernel        (data.ocl, device_param->kernel_mp_r);
        if (device_param->kernel_tm)          hc_clReleaseKernel        (data.ocl, device_param->kernel_tm);
        if (device_param->kernel_amp)         hc_clReleaseKernel        (data.ocl, device_param->kernel_amp);
+      if (device_param->kernel_memset)      hc_clReleaseKernel        (data.ocl, device_param->kernel_memset);
  
        if (device_param->program)            hc_clReleaseProgram       (data.ocl, device_param->program);
        if (device_param->program_mp)         hc_clReleaseProgram       (data.ocl, device_param->program_mp);
@@ -17698,7 +18365,6 @@ int main (int argc, char **argv)
      #ifdef HAVE_HWMON
      if (gpu_temp_disable == 0)
      {
-      #ifdef HAVE_ADL
        if (gpu_temp_retain != 0) // VENDOR_ID_AMD is implied here
        {
          hc_thread_mutex_lock (mux_adl);
@@ -17709,24 +18375,39 @@ int main (int argc, char **argv)
  
            if (device_param->skipped) continue;
  
-          if (data.hm_device[device_id].fan_supported == 1)
+          if (data.hm_device[device_id].fan_set_supported == 1)
            {
-            int fanspeed = temp_retain_fanspeed_value[device_id];
+            int fanspeed  = temp_retain_fanspeed_value[device_id];
+            int fanpolicy = temp_retain_fanpolicy_value[device_id];
  
-            if (fanspeed == -1) continue;
+            if (fanpolicy == 1)
+            {
+              int rc = -1;
+
+              if (device_param->device_vendor_id == VENDOR_ID_AMD)
+              {
+                rc = hm_set_fanspeed_with_device_id_adl (device_id, fanspeed, 0);
+              }
+              else if (device_param->device_vendor_id == VENDOR_ID_NV)
+              {
+                #ifdef LINUX
+                rc = set_fan_control (data.hm_xnvctrl, data.hm_device[device_id].xnvctrl, NV_CTRL_GPU_COOLER_MANUAL_CONTROL_FALSE);
+                #endif
  
-            int rc = hm_set_fanspeed_with_device_id_amd (device_id, fanspeed);
+                #ifdef WIN
+                rc = hm_set_fanspeed_with_device_id_nvapi (device_id, fanspeed, fanpolicy);
+                #endif
+              }
  
-            if (rc == -1) log_info ("WARNING: Failed to restore default fan speed for gpu number: %i:", device_id);
+              if (rc == -1) log_info ("WARNING: Failed to restore default fan speed and policy for device #%", device_id + 1);
+            }
            }
          }
  
          hc_thread_mutex_unlock (mux_adl);
        }
-      #endif // HAVE_ADL
      }
  
-    #ifdef HAVE_ADL
      // reset power tuning
  
      if (powertune_enable == 1) // VENDOR_ID_AMD is implied here
@@ -17739,89 +18420,106 @@ int main (int argc, char **argv)
  
          if (device_param->skipped) continue;
  
-        if (data.hm_device[device_id].od_version == 6)
+        if (data.devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
          {
-          // check powertune capabilities first, if not available then skip device
-
-          int powertune_supported = 0;
-
-          if ((hm_ADL_Overdrive6_PowerControl_Caps (data.hm_amd, data.hm_device[device_id].adapter_index.amd, &powertune_supported)) != ADL_OK)
+          if (data.hm_device[device_id].od_version == 6)
            {
-            log_error ("ERROR: Failed to get ADL PowerControl Capabilities");
+            // check powertune capabilities first, if not available then skip device
  
-            return (-1);
-          }
-
-          if (powertune_supported != 0)
-          {
-            // powercontrol settings
+            int powertune_supported = 0;
  
-            if ((hm_ADL_Overdrive_PowerControl_Set (data.hm_amd, data.hm_device[device_id].adapter_index.amd, od_power_control_status[device_id])) != ADL_OK)
+            if ((hm_ADL_Overdrive6_PowerControl_Caps (data.hm_adl, data.hm_device[device_id].adl, &powertune_supported)) != ADL_OK)
              {
-              log_info ("ERROR: Failed to restore the ADL PowerControl values");
+              log_error ("ERROR: Failed to get ADL PowerControl Capabilities");
  
                return (-1);
              }
  
-            // clocks
+            if (powertune_supported != 0)
+            {
+              // powercontrol settings
+
+              if ((hm_ADL_Overdrive_PowerControl_Set (data.hm_adl, data.hm_device[device_id].adl, od_power_control_status[device_id])) != ADL_OK)
+              {
+                log_info ("ERROR: Failed to restore the ADL PowerControl values");
+
+                return (-1);
+              }
  
-            ADLOD6StateInfo *performance_state = (ADLOD6StateInfo*) mycalloc (1, sizeof (ADLOD6StateInfo) + sizeof (ADLOD6PerformanceLevel));
+              // clocks
  
-            performance_state->iNumberOfPerformanceLevels = 2;
+              ADLOD6StateInfo *performance_state = (ADLOD6StateInfo*) mycalloc (1, sizeof (ADLOD6StateInfo) + sizeof (ADLOD6PerformanceLevel));
  
-            performance_state->aLevels[0].iEngineClock = od_clock_mem_status[device_id].state.aLevels[0].iEngineClock;
-            performance_state->aLevels[1].iEngineClock = od_clock_mem_status[device_id].state.aLevels[1].iEngineClock;
-            performance_state->aLevels[0].iMemoryClock = od_clock_mem_status[device_id].state.aLevels[0].iMemoryClock;
-            performance_state->aLevels[1].iMemoryClock = od_clock_mem_status[device_id].state.aLevels[1].iMemoryClock;
+              performance_state->iNumberOfPerformanceLevels = 2;
  
-            if ((hm_ADL_Overdrive_State_Set (data.hm_amd, data.hm_device[device_id].adapter_index.amd, ADL_OD6_SETSTATE_PERFORMANCE, performance_state)) != ADL_OK)
-            {
-              log_info ("ERROR: Failed to restore ADL performance state");
+              performance_state->aLevels[0].iEngineClock = od_clock_mem_status[device_id].state.aLevels[0].iEngineClock;
+              performance_state->aLevels[1].iEngineClock = od_clock_mem_status[device_id].state.aLevels[1].iEngineClock;
+              performance_state->aLevels[0].iMemoryClock = od_clock_mem_status[device_id].state.aLevels[0].iMemoryClock;
+              performance_state->aLevels[1].iMemoryClock = od_clock_mem_status[device_id].state.aLevels[1].iMemoryClock;
  
-              return (-1);
+              if ((hm_ADL_Overdrive_State_Set (data.hm_adl, data.hm_device[device_id].adl, ADL_OD6_SETSTATE_PERFORMANCE, performance_state)) != ADL_OK)
+              {
+                log_info ("ERROR: Failed to restore ADL performance state");
+
+                return (-1);
+              }
+
+              local_free (performance_state);
              }
+          }
+        }
  
-            local_free (performance_state);
+        if (data.devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
+        {
+          unsigned int limit = nvml_power_limit[device_id];
+
+          if (limit > 0)
+          {
+            hm_NVML_nvmlDeviceSetPowerManagementLimit (data.hm_nvml, 0, data.hm_device[device_id].nvml, limit);
            }
          }
        }
  
        hc_thread_mutex_unlock (mux_adl);
      }
-    #endif // HAVE_ADL
  
      if (gpu_temp_disable == 0)
      {
-      #if defined(HAVE_NVML) || defined(HAVE_NVAPI)
-      if (data.hm_nv)
+      if (data.hm_nvml)
        {
-        #if defined(LINUX) && defined(HAVE_NVML)
+        hm_NVML_nvmlShutdown (data.hm_nvml);
+
+        nvml_close (data.hm_nvml);
  
-        hm_NVML_nvmlShutdown (data.hm_nv);
+        data.hm_nvml = NULL;
+      }
  
-        nvml_close (data.hm_nv);
+      if (data.hm_nvapi)
+      {
+        hm_NvAPI_Unload (data.hm_nvapi);
  
-        #elif defined(WIN) && (HAVE_NVAPI)
+        nvapi_close (data.hm_nvapi);
  
-        hm_NvAPI_Unload (data.hm_nv);
+        data.hm_nvapi = NULL;
+      }
  
-        nvapi_close (data.hm_nv);
+      if (data.hm_xnvctrl)
+      {
+        hm_XNVCTRL_XCloseDisplay (data.hm_xnvctrl);
  
-        #endif
+        xnvctrl_close (data.hm_xnvctrl);
  
-        data.hm_nv = NULL;
+        data.hm_xnvctrl = NULL;
        }
-      #endif
  
-      #ifdef HAVE_ADL
-      if (data.hm_amd)
+      if (data.hm_adl)
        {
-        hm_ADL_Main_Control_Destroy (data.hm_amd);
+        hm_ADL_Main_Control_Destroy (data.hm_adl);
  
-        adl_close (data.hm_amd);
-        data.hm_amd = NULL;
+        adl_close (data.hm_adl);
+
+        data.hm_adl = NULL;
        }
-      #endif
      }
      #endif // HAVE_HWMON
  
@@ -17864,10 +18562,9 @@ int main (int argc, char **argv)
  
      #ifdef HAVE_HWMON
      local_free (temp_retain_fanspeed_value);
-    #ifdef HAVE_ADL
      local_free (od_clock_mem_status);
      local_free (od_power_control_status);
-    #endif // ADL
+    local_free (nvml_power_limit);
      #endif
  
      global_free (devices_param);