Limit scrypt to 64 threads has no performance impact but makes it easier to handle
[hashcat.git] / src / hashcat.c
index 8d78174..41b6144 100644 (file)
@@ -2390,7 +2390,7 @@ static void process_stdout (hc_device_param_t *device_param, const uint pws_cnt)
 
   uint plain_len = 0;
 
-  const uint il_cnt = device_param->kernel_params_buf32[27]; // ugly, i know
+  const uint il_cnt = device_param->kernel_params_buf32[30]; // ugly, i know
 
   if (data.attack_mode == ATTACK_MODE_STRAIGHT)
   {
@@ -2649,8 +2649,8 @@ static void run_kernel (const uint kern_run, hc_device_param_t *device_param, co
 {
   uint num_elements = num;
 
-  device_param->kernel_params_buf32[30] = data.combs_mode;
-  device_param->kernel_params_buf32[31] = num;
+  device_param->kernel_params_buf32[33] = data.combs_mode;
+  device_param->kernel_params_buf32[34] = num;
 
   uint kernel_threads = device_param->kernel_threads;
 
@@ -2667,9 +2667,6 @@ static void run_kernel (const uint kern_run, hc_device_param_t *device_param, co
     case KERN_RUN_3:    kernel = device_param->kernel3;     break;
   }
 
-  hc_clSetKernelArg (data.ocl, kernel, 21, sizeof (cl_uint), device_param->kernel_params[21]);
-  hc_clSetKernelArg (data.ocl, kernel, 22, sizeof (cl_uint), device_param->kernel_params[22]);
-  hc_clSetKernelArg (data.ocl, kernel, 23, sizeof (cl_uint), device_param->kernel_params[23]);
   hc_clSetKernelArg (data.ocl, kernel, 24, sizeof (cl_uint), device_param->kernel_params[24]);
   hc_clSetKernelArg (data.ocl, kernel, 25, sizeof (cl_uint), device_param->kernel_params[25]);
   hc_clSetKernelArg (data.ocl, kernel, 26, sizeof (cl_uint), device_param->kernel_params[26]);
@@ -2678,6 +2675,9 @@ static void run_kernel (const uint kern_run, hc_device_param_t *device_param, co
   hc_clSetKernelArg (data.ocl, kernel, 29, sizeof (cl_uint), device_param->kernel_params[29]);
   hc_clSetKernelArg (data.ocl, kernel, 30, sizeof (cl_uint), device_param->kernel_params[30]);
   hc_clSetKernelArg (data.ocl, kernel, 31, sizeof (cl_uint), device_param->kernel_params[31]);
+  hc_clSetKernelArg (data.ocl, kernel, 32, sizeof (cl_uint), device_param->kernel_params[32]);
+  hc_clSetKernelArg (data.ocl, kernel, 33, sizeof (cl_uint), device_param->kernel_params[33]);
+  hc_clSetKernelArg (data.ocl, kernel, 34, sizeof (cl_uint), device_param->kernel_params[34]);
 
   cl_event event;
 
@@ -3029,8 +3029,8 @@ static void choose_kernel (hc_device_param_t *device_param, const uint attack_ex
 
       loop_left = MIN (loop_left, loop_step);
 
-      device_param->kernel_params_buf32[25] = loop_pos;
-      device_param->kernel_params_buf32[26] = loop_left;
+      device_param->kernel_params_buf32[28] = loop_pos;
+      device_param->kernel_params_buf32[29] = loop_left;
 
       run_kernel (KERN_RUN_2, device_param, pws_cnt, true, slow_iteration);
 
@@ -3170,9 +3170,9 @@ static double try_run (hc_device_param_t *device_param, const u32 kernel_accel,
 {
   const u32 kernel_power_try = device_param->device_processors * device_param->kernel_threads * kernel_accel;
 
-  device_param->kernel_params_buf32[25] = 0;
-  device_param->kernel_params_buf32[26] = kernel_loops; // not a bug, both need to be set
-  device_param->kernel_params_buf32[27] = kernel_loops; // because there's two variables for inner iters for slow and fast hashes
+  device_param->kernel_params_buf32[28] = 0;
+  device_param->kernel_params_buf32[29] = kernel_loops; // not a bug, both need to be set
+  device_param->kernel_params_buf32[30] = kernel_loops; // because there's two variables for inner iters for slow and fast hashes
 
   if (data.attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
   {
@@ -3496,9 +3496,9 @@ static void run_cracker (hc_device_param_t *device_param, const uint pws_cnt)
 
     salt_t *salt_buf = &data.salts_buf[salt_pos];
 
-    device_param->kernel_params_buf32[24] = salt_pos;
-    device_param->kernel_params_buf32[28] = salt_buf->digests_cnt;
-    device_param->kernel_params_buf32[29] = salt_buf->digests_offset;
+    device_param->kernel_params_buf32[27] = salt_pos;
+    device_param->kernel_params_buf32[31] = salt_buf->digests_cnt;
+    device_param->kernel_params_buf32[32] = salt_buf->digests_offset;
 
     FILE *combs_fp = device_param->combs_fp;
 
@@ -3534,7 +3534,7 @@ static void run_cracker (hc_device_param_t *device_param, const uint pws_cnt)
       device_param->innerloop_pos  = innerloop_pos;
       device_param->innerloop_left = innerloop_left;
 
-      device_param->kernel_params_buf32[27] = innerloop_left;
+      device_param->kernel_params_buf32[30] = innerloop_left;
 
       // i think we can get rid of this
       if (innerloop_left == 0)
@@ -5244,12 +5244,12 @@ static void weak_hash_check (hc_device_param_t *device_param, const uint salt_po
 
   salt_t *salt_buf = &data.salts_buf[salt_pos];
 
-  device_param->kernel_params_buf32[24] = salt_pos;
-  device_param->kernel_params_buf32[27] = 1;
-  device_param->kernel_params_buf32[28] = salt_buf->digests_cnt;
-  device_param->kernel_params_buf32[29] = salt_buf->digests_offset;
-  device_param->kernel_params_buf32[30] = 0;
-  device_param->kernel_params_buf32[31] = 1;
+  device_param->kernel_params_buf32[27] = salt_pos;
+  device_param->kernel_params_buf32[30] = 1;
+  device_param->kernel_params_buf32[31] = salt_buf->digests_cnt;
+  device_param->kernel_params_buf32[32] = salt_buf->digests_offset;
+  device_param->kernel_params_buf32[33] = 0;
+  device_param->kernel_params_buf32[34] = 1;
 
   char *dictfile_old = data.dictfile;
 
@@ -5283,8 +5283,8 @@ static void weak_hash_check (hc_device_param_t *device_param, const uint salt_po
 
       loop_left = MIN (loop_left, loop_step);
 
-      device_param->kernel_params_buf32[25] = loop_pos;
-      device_param->kernel_params_buf32[26] = loop_left;
+      device_param->kernel_params_buf32[28] = loop_pos;
+      device_param->kernel_params_buf32[29] = loop_left;
 
       run_kernel (KERN_RUN_2, device_param, 1, false, 0);
     }
@@ -5302,14 +5302,14 @@ static void weak_hash_check (hc_device_param_t *device_param, const uint salt_po
    * cleanup
    */
 
-  device_param->kernel_params_buf32[24] = 0;
-  device_param->kernel_params_buf32[25] = 0;
-  device_param->kernel_params_buf32[26] = 0;
   device_param->kernel_params_buf32[27] = 0;
   device_param->kernel_params_buf32[28] = 0;
   device_param->kernel_params_buf32[29] = 0;
   device_param->kernel_params_buf32[30] = 0;
   device_param->kernel_params_buf32[31] = 0;
+  device_param->kernel_params_buf32[32] = 0;
+  device_param->kernel_params_buf32[33] = 0;
+  device_param->kernel_params_buf32[34] = 0;
 
   data.dictfile = dictfile_old;
 
@@ -15011,9 +15011,9 @@ int main (int argc, char **argv)
       if (hash_mode ==  3000) kernel_threads = 64; // DES
       if (hash_mode ==  3200) kernel_threads = 8;  // Blowfish
       if (hash_mode ==  7500) kernel_threads = 64; // RC4
-      if (hash_mode ==  8900) kernel_threads = 32; // scrypt
+      if (hash_mode ==  8900) kernel_threads = 64; // Scrypt
       if (hash_mode ==  9000) kernel_threads = 8;  // Blowfish
-      if (hash_mode ==  9300) kernel_threads = 32; // scrypt
+      if (hash_mode ==  9300) kernel_threads = 64; // Scrypt
       if (hash_mode ==  9700) kernel_threads = 64; // RC4
       if (hash_mode ==  9710) kernel_threads = 64; // RC4
       if (hash_mode ==  9800) kernel_threads = 64; // RC4
@@ -15059,10 +15059,28 @@ int main (int argc, char **argv)
 
       // scryptV stuff
 
-      size_t size_scryptV = 1;
+      size_t size_scrypt = 4;
 
       if ((hash_mode == 8900) || (hash_mode == 9300))
       {
+        // we need to check that all hashes have the same scrypt settings
+
+        const u32 scrypt_N = data.salts_buf[0].scrypt_N;
+        const u32 scrypt_r = data.salts_buf[0].scrypt_r;
+        const u32 scrypt_p = data.salts_buf[0].scrypt_p;
+
+        for (uint i = 1; i < salts_cnt; i++)
+        {
+          if ((data.salts_buf[i].scrypt_N != scrypt_N)
+           || (data.salts_buf[i].scrypt_r != scrypt_r)
+           || (data.salts_buf[i].scrypt_p != scrypt_p))
+          {
+            log_error ("ERROR: Mixed scrypt settings not supported");
+
+            return -1;
+          }
+        }
+
         uint tmto_start = 0;
         uint tmto_stop  = 10;
 
@@ -15073,14 +15091,13 @@ int main (int argc, char **argv)
         else
         {
           // in case the user did not specify the tmto manually
-          // use some values known to run best (tested on 290x for AMD and 980ti for NV)
-          // but set the lower end only in case the user has a device with too less memory
+          // use some values known to run best (tested on 290x for AMD and GTX1080 for NV)
 
           if (hash_mode == 8900)
           {
             if (device_param->device_vendor_id == VENDOR_ID_AMD)
             {
-              tmto_start = 1;
+              tmto_start = 3;
             }
             else if (device_param->device_vendor_id == VENDOR_ID_NV)
             {
@@ -15095,51 +15112,60 @@ int main (int argc, char **argv)
             }
             else if (device_param->device_vendor_id == VENDOR_ID_NV)
             {
-              tmto_start = 2;
+              tmto_start = 4;
             }
           }
         }
 
+        data.scrypt_tmp_size = (128 * scrypt_r);
+
         device_param->kernel_accel_min = 1;
         device_param->kernel_accel_max = 8;
 
-        for (uint tmto = tmto_start; tmto < tmto_stop; tmto++)
+        uint tmto;
+
+        for (tmto = tmto_start; tmto < tmto_stop; tmto++)
         {
-          // TODO: in theory the following calculation needs to be done per salt, not global
-          //       we assume all hashes have the same scrypt settings
+          size_scrypt = (128 * scrypt_r) * scrypt_N;
+
+          size_scrypt /= 1 << tmto;
 
-          size_scryptV = (128 * data.salts_buf[0].scrypt_r) * data.salts_buf[0].scrypt_N;
+          size_scrypt *= device_param->device_processors * device_param->kernel_threads * device_param->kernel_accel_max;
 
-          size_scryptV /= 1 << tmto;
+          if ((size_scrypt / 4) > device_param->device_maxmem_alloc)
+          {
+            if (quiet == 0) log_info ("WARNING: Not enough single-block device memory allocatable to use --scrypt-tmto %d, increasing...", tmto);
 
-          size_scryptV *= device_param->device_processors * device_param->kernel_threads * device_param->kernel_accel_max;
+            continue;
+          }
 
-          if (size_scryptV > device_param->device_maxmem_alloc)
+          if (size_scrypt > device_param->device_global_mem)
           {
-            if (quiet == 0) log_info ("WARNING: Not enough device memory allocatable to use --scrypt-tmto %d, increasing...", tmto);
+            if (quiet == 0) log_info ("WARNING: Not enough total device memory allocatable to use --scrypt-tmto %d, increasing...", tmto);
 
             continue;
           }
 
           for (uint salts_pos = 0; salts_pos < data.salts_cnt; salts_pos++)
           {
-            data.salts_buf[salts_pos].scrypt_tmto = tmto;
-            data.salts_buf[salts_pos].scrypt_phy  = device_param->device_processors * device_param->kernel_threads * device_param->kernel_accel_max;
+            data.scrypt_tmto_final = tmto;
           }
 
           break;
         }
 
-        if (data.salts_buf[0].scrypt_phy == 0)
+        if (tmto == tmto_stop)
         {
           log_error ("ERROR: Can't allocate enough device memory");
 
           return -1;
         }
 
-        if (quiet == 0) log_info ("SCRYPT tmto optimizer value set to: %u, mem: %u\n", data.salts_buf[0].scrypt_tmto, size_scryptV);
+        if (quiet == 0) log_info ("SCRYPT tmto optimizer value set to: %u, mem: %u\n", data.scrypt_tmto_final, size_scrypt);
       }
 
+      size_t size_scrypt4 = size_scrypt / 4;
+
       /**
        * some algorithms need a fixed kernel-loops count
        */
@@ -15265,11 +15291,11 @@ int main (int argc, char **argv)
           case  7900: size_tmps = kernel_power_max * sizeof (drupal7_tmp_t);         break;
           case  8200: size_tmps = kernel_power_max * sizeof (pbkdf2_sha512_tmp_t);   break;
           case  8800: size_tmps = kernel_power_max * sizeof (androidfde_tmp_t);      break;
-          case  8900: size_tmps = kernel_power_max * sizeof (scrypt_tmp_t);          break;
+          case  8900: size_tmps = kernel_power_max * data.scrypt_tmp_size;           break;
           case  9000: size_tmps = kernel_power_max * sizeof (pwsafe2_tmp_t);         break;
           case  9100: size_tmps = kernel_power_max * sizeof (lotus8_tmp_t);          break;
           case  9200: size_tmps = kernel_power_max * sizeof (pbkdf2_sha256_tmp_t);   break;
-          case  9300: size_tmps = kernel_power_max * sizeof (scrypt_tmp_t);          break;
+          case  9300: size_tmps = kernel_power_max * data.scrypt_tmp_size;           break;
           case  9400: size_tmps = kernel_power_max * sizeof (office2007_tmp_t);      break;
           case  9500: size_tmps = kernel_power_max * sizeof (office2010_tmp_t);      break;
           case  9600: size_tmps = kernel_power_max * sizeof (office2013_tmp_t);      break;
@@ -15356,7 +15382,10 @@ int main (int argc, char **argv)
           + size_rules
           + size_rules_c
           + size_salts
-          + size_scryptV
+          + size_scrypt4
+          + size_scrypt4
+          + size_scrypt4
+          + size_scrypt4
           + size_shown
           + size_tm
           + size_tmps;
@@ -15427,8 +15456,6 @@ int main (int argc, char **argv)
 
       snprintf (build_opts, sizeof (build_opts) - 1, "-I \"%s\"", cpath_real);
 
-      myfree (cpath_real);
-
       #else
 
       snprintf (cpath, sizeof (cpath) - 1, "%s/OpenCL/", shared_dir);
@@ -15446,10 +15473,64 @@ int main (int argc, char **argv)
 
       snprintf (build_opts, sizeof (build_opts) - 1, "-I %s", cpath_real);
 
-      myfree (cpath_real);
-
       #endif
 
+      // include check
+      // this test needs to be done manually because of osx opencl runtime
+      // if there's a problem with permission, its not reporting back and erroring out silently
+
+      #define files_cnt 15
+
+      const char *files_names[files_cnt] =
+      {
+        "inc_cipher_aes256.cl",
+        "inc_cipher_serpent256.cl",
+        "inc_cipher_twofish256.cl",
+        "inc_common.cl",
+        "inc_comp_multi_bs.cl",
+        "inc_comp_multi.cl",
+        "inc_comp_single_bs.cl",
+        "inc_comp_single.cl",
+        "inc_hash_constants.h",
+        "inc_hash_functions.cl",
+        "inc_rp.cl",
+        "inc_rp.h",
+        "inc_simd.cl",
+        "inc_types.cl",
+        "inc_vendor.cl",
+      };
+
+      for (int i = 0; i < files_cnt; i++)
+      {
+        char path[1024] = { 0 };
+
+        snprintf (path, sizeof (path) - 1, "%s/%s", cpath_real, files_names[i]);
+
+        FILE *fd = fopen (path, "r");
+
+        if (fd == NULL)
+        {
+          log_error ("ERROR: %s: fopen(): %s", path, strerror (errno));
+
+          return -1;
+        }
+
+        char buf[1];
+
+        size_t n = fread (buf, 1, 1, fd);
+
+        if (n != 1)
+        {
+          log_error ("ERROR: %s: fread(): %s", path, strerror (errno));
+
+          return -1;
+        }
+
+        fclose (fd);
+      }
+
+      myfree (cpath_real);
+
       // we don't have sm_* on vendors not NV but it doesn't matter
 
       char build_opts_new[1024] = { 0 };
@@ -15588,11 +15669,11 @@ int main (int argc, char **argv)
 
           if (force_jit_compilation == 1500)
           {
-            snprintf (build_opts_update, sizeof (build_opts_update) - 1, "%s -DDESCRYPT_SALT=%d", build_opts, data.salts_buf[0].salt_buf[0]);
+            snprintf (build_opts_update, sizeof (build_opts_update) - 1, "%s -DDESCRYPT_SALT=%u", build_opts, data.salts_buf[0].salt_buf[0]);
           }
           else if (force_jit_compilation == 8900)
           {
-            snprintf (build_opts_update, sizeof (build_opts_update) - 1, "%s -DSCRYPT_N=%d -DSCRYPT_R=%d -DSCRYPT_P=%d -DSCRYPT_TMTO=%d", build_opts, data.salts_buf[0].scrypt_N, data.salts_buf[0].scrypt_r, data.salts_buf[0].scrypt_p, 1 << data.salts_buf[0].scrypt_tmto);
+            snprintf (build_opts_update, sizeof (build_opts_update) - 1, "%s -DSCRYPT_N=%u -DSCRYPT_R=%u -DSCRYPT_P=%u -DSCRYPT_TMTO=%u -DSCRYPT_TMP_ELEM=%u", build_opts, data.salts_buf[0].scrypt_N, data.salts_buf[0].scrypt_r, data.salts_buf[0].scrypt_p, 1 << data.scrypt_tmto_final, data.scrypt_tmp_size / 16);
           }
           else
           {
@@ -15864,7 +15945,10 @@ int main (int argc, char **argv)
       device_param->d_digests_shown = hc_clCreateBuffer (data.ocl, device_param->context, CL_MEM_READ_WRITE,  size_shown,   NULL);
       device_param->d_salt_bufs     = hc_clCreateBuffer (data.ocl, device_param->context, CL_MEM_READ_ONLY,   size_salts,   NULL);
       device_param->d_result        = hc_clCreateBuffer (data.ocl, device_param->context, CL_MEM_READ_WRITE,  size_results, NULL);
-      device_param->d_scryptV_buf   = hc_clCreateBuffer (data.ocl, device_param->context, CL_MEM_READ_WRITE,  size_scryptV, NULL);
+      device_param->d_scryptV0_buf  = hc_clCreateBuffer (data.ocl, device_param->context, CL_MEM_READ_WRITE,  size_scrypt4, NULL);
+      device_param->d_scryptV1_buf  = hc_clCreateBuffer (data.ocl, device_param->context, CL_MEM_READ_WRITE,  size_scrypt4, NULL);
+      device_param->d_scryptV2_buf  = hc_clCreateBuffer (data.ocl, device_param->context, CL_MEM_READ_WRITE,  size_scrypt4, NULL);
+      device_param->d_scryptV3_buf  = hc_clCreateBuffer (data.ocl, device_param->context, CL_MEM_READ_WRITE,  size_scrypt4, NULL);
 
       hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_bitmap_s1_a,    CL_TRUE, 0, bitmap_size,  bitmap_s1_a,        0, NULL, NULL);
       hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_bitmap_s1_b,    CL_TRUE, 0, bitmap_size,  bitmap_s1_b,        0, NULL, NULL);
@@ -15932,17 +16016,17 @@ int main (int argc, char **argv)
        * kernel args
        */
 
-      device_param->kernel_params_buf32[21] = bitmap_mask;
-      device_param->kernel_params_buf32[22] = bitmap_shift1;
-      device_param->kernel_params_buf32[23] = bitmap_shift2;
-      device_param->kernel_params_buf32[24] = 0; // salt_pos
-      device_param->kernel_params_buf32[25] = 0; // loop_pos
-      device_param->kernel_params_buf32[26] = 0; // loop_cnt
-      device_param->kernel_params_buf32[27] = 0; // kernel_rules_cnt
-      device_param->kernel_params_buf32[28] = 0; // digests_cnt
-      device_param->kernel_params_buf32[29] = 0; // digests_offset
-      device_param->kernel_params_buf32[30] = 0; // combs_mode
-      device_param->kernel_params_buf32[31] = 0; // gid_max
+      device_param->kernel_params_buf32[24] = bitmap_mask;
+      device_param->kernel_params_buf32[25] = bitmap_shift1;
+      device_param->kernel_params_buf32[26] = bitmap_shift2;
+      device_param->kernel_params_buf32[27] = 0; // salt_pos
+      device_param->kernel_params_buf32[28] = 0; // loop_pos
+      device_param->kernel_params_buf32[29] = 0; // loop_cnt
+      device_param->kernel_params_buf32[30] = 0; // kernel_rules_cnt
+      device_param->kernel_params_buf32[31] = 0; // digests_cnt
+      device_param->kernel_params_buf32[32] = 0; // digests_offset
+      device_param->kernel_params_buf32[33] = 0; // combs_mode
+      device_param->kernel_params_buf32[34] = 0; // gid_max
 
       device_param->kernel_params[ 0] = (attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
                                       ? &device_param->d_pws_buf
@@ -15966,10 +16050,10 @@ int main (int argc, char **argv)
       device_param->kernel_params[17] = &device_param->d_salt_bufs;
       device_param->kernel_params[18] = &device_param->d_esalt_bufs;
       device_param->kernel_params[19] = &device_param->d_result;
-      device_param->kernel_params[20] = &device_param->d_scryptV_buf;
-      device_param->kernel_params[21] = &device_param->kernel_params_buf32[21];
-      device_param->kernel_params[22] = &device_param->kernel_params_buf32[22];
-      device_param->kernel_params[23] = &device_param->kernel_params_buf32[23];
+      device_param->kernel_params[20] = &device_param->d_scryptV0_buf;
+      device_param->kernel_params[21] = &device_param->d_scryptV1_buf;
+      device_param->kernel_params[22] = &device_param->d_scryptV2_buf;
+      device_param->kernel_params[23] = &device_param->d_scryptV3_buf;
       device_param->kernel_params[24] = &device_param->kernel_params_buf32[24];
       device_param->kernel_params[25] = &device_param->kernel_params_buf32[25];
       device_param->kernel_params[26] = &device_param->kernel_params_buf32[26];
@@ -15978,6 +16062,9 @@ int main (int argc, char **argv)
       device_param->kernel_params[29] = &device_param->kernel_params_buf32[29];
       device_param->kernel_params[30] = &device_param->kernel_params_buf32[30];
       device_param->kernel_params[31] = &device_param->kernel_params_buf32[31];
+      device_param->kernel_params[32] = &device_param->kernel_params_buf32[32];
+      device_param->kernel_params[33] = &device_param->kernel_params_buf32[33];
+      device_param->kernel_params[34] = &device_param->kernel_params_buf32[34];
 
       device_param->kernel_params_mp_buf64[3] = 0;
       device_param->kernel_params_mp_buf32[4] = 0;
@@ -16141,7 +16228,7 @@ int main (int argc, char **argv)
       hc_clGetKernelWorkGroupInfo (data.ocl, device_param->kernel2, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &kernel_wgs_tmp, NULL); kernel_threads = MIN (kernel_threads, kernel_wgs_tmp);
       hc_clGetKernelWorkGroupInfo (data.ocl, device_param->kernel3, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &kernel_wgs_tmp, NULL); kernel_threads = MIN (kernel_threads, kernel_wgs_tmp);
 
-      for (uint i = 0; i <= 20; i++)
+      for (uint i = 0; i <= 23; i++)
       {
         hc_clSetKernelArg (data.ocl, device_param->kernel1, i, sizeof (cl_mem), device_param->kernel_params[i]);
         hc_clSetKernelArg (data.ocl, device_param->kernel2, i, sizeof (cl_mem), device_param->kernel_params[i]);
@@ -16151,7 +16238,7 @@ int main (int argc, char **argv)
         if (opts_type & OPTS_TYPE_HOOK23) hc_clSetKernelArg (data.ocl, device_param->kernel23, i, sizeof (cl_mem), device_param->kernel_params[i]);
       }
 
-      for (uint i = 21; i <= 31; i++)
+      for (uint i = 24; i <= 34; i++)
       {
         hc_clSetKernelArg (data.ocl, device_param->kernel1, i, sizeof (cl_uint), device_param->kernel_params[i]);
         hc_clSetKernelArg (data.ocl, device_param->kernel2, i, sizeof (cl_uint), device_param->kernel_params[i]);
@@ -18386,7 +18473,10 @@ int main (int argc, char **argv)
       if (device_param->d_tmps)             hc_clReleaseMemObject     (data.ocl, device_param->d_tmps);
       if (device_param->d_hooks)            hc_clReleaseMemObject     (data.ocl, device_param->d_hooks);
       if (device_param->d_result)           hc_clReleaseMemObject     (data.ocl, device_param->d_result);
-      if (device_param->d_scryptV_buf)      hc_clReleaseMemObject     (data.ocl, device_param->d_scryptV_buf);
+      if (device_param->d_scryptV0_buf)     hc_clReleaseMemObject     (data.ocl, device_param->d_scryptV0_buf);
+      if (device_param->d_scryptV1_buf)     hc_clReleaseMemObject     (data.ocl, device_param->d_scryptV1_buf);
+      if (device_param->d_scryptV2_buf)     hc_clReleaseMemObject     (data.ocl, device_param->d_scryptV2_buf);
+      if (device_param->d_scryptV3_buf)     hc_clReleaseMemObject     (data.ocl, device_param->d_scryptV3_buf);
       if (device_param->d_root_css_buf)     hc_clReleaseMemObject     (data.ocl, device_param->d_root_css_buf);
       if (device_param->d_markov_css_buf)   hc_clReleaseMemObject     (data.ocl, device_param->d_markov_css_buf);
       if (device_param->d_tm_c)             hc_clReleaseMemObject     (data.ocl, device_param->d_tm_c);