X-Git-Url: https://www.flypig.org.uk/git/?a=blobdiff_plain;f=OpenCL%2Fm08900.cl;h=598d578ef6b6fba3a2c5399c860674ed45249f31;hb=161a6eb4bc643d8e636e96eda613f5137d30da59;hp=59e5bd8e29f6cb83df832ed331f807657b6f4108;hpb=069634ae77dea7e7e2579868e70e4a26d30318aa;p=hashcat.git diff --git a/OpenCL/m08900.cl b/OpenCL/m08900.cl index 59e5bd8..598d578 100644 --- a/OpenCL/m08900.cl +++ b/OpenCL/m08900.cl @@ -5,14 +5,8 @@ #define _SCRYPT_ -#include "inc_hash_constants.h" #include "inc_vendor.cl" - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - +#include "inc_hash_constants.h" #include "inc_hash_functions.cl" #include "inc_types.cl" #include "inc_common.cl" @@ -674,18 +668,18 @@ void salsa_r (uint4 *TI) } } -void scrypt_smix (uint4 *X, uint4 *T, const u32 phy, __global uint4 *V) +void scrypt_smix (uint4 *X, uint4 *T, __global uint4 *V0, __global uint4 *V1, __global uint4 *V2, __global uint4 *V3) { - #define Coord(x,y,z) (((x) * zSIZE) + ((y) * zSIZE * xSIZE) + (z)) - #define CO Coord(x,y,z) + #define Coord(xd4,y,z) (((xd4) * ySIZE * zSIZE) + ((y) * zSIZE) + (z)) + #define CO Coord(xd4,y,z) - const u32 xSIZE = phy; const u32 ySIZE = SCRYPT_N / SCRYPT_TMTO; const u32 zSIZE = STATE_CNT4; - const u32 gid = get_global_id (0); + const u32 x = get_global_id (0); - const u32 x = gid % xSIZE; + const u32 xd4 = x / 4; + const u32 xm4 = x & 3; #ifdef _unroll #pragma unroll @@ -705,7 +699,13 @@ void scrypt_smix (uint4 *X, uint4 *T, const u32 phy, __global uint4 *V) for (u32 y = 0; y < ySIZE; y++) { - for (u32 z = 0; z < zSIZE; z++) V[CO] = X[z]; + switch (xm4) + { + case 0: for (u32 z = 0; z < zSIZE; z++) V0[CO] = X[z]; break; + case 1: for (u32 z = 0; z < zSIZE; z++) V1[CO] = X[z]; break; + case 2: for (u32 z = 0; z < zSIZE; z++) V2[CO] = X[z]; break; + case 3: for (u32 z = 0; z < zSIZE; z++) V3[CO] = X[z]; break; + } for (u32 i = 0; i < SCRYPT_TMTO; i++) salsa_r (X); } @@ -718,7 +718,13 @@ void scrypt_smix (uint4 *X, uint4 *T, const u32 phy, __global uint4 *V) const u32 km = k - (y * SCRYPT_TMTO); - for (u32 z = 0; z < zSIZE; z++) T[z] = V[CO]; + switch (xm4) + { + case 0: for (u32 z = 0; z < zSIZE; z++) T[z] = V0[CO]; break; + case 1: for (u32 z = 0; z < zSIZE; z++) T[z] = V1[CO]; break; + case 2: for (u32 z = 0; z < zSIZE; z++) T[z] = V2[CO]; break; + case 3: for (u32 z = 0; z < zSIZE; z++) T[z] = V3[CO]; break; + } for (u32 i = 0; i < km; i++) salsa_r (T); @@ -744,7 +750,7 @@ void scrypt_smix (uint4 *X, uint4 *T, const u32 phy, __global uint4 *V) } } -__kernel void m08900_init (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global scrypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global uint4 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void m08900_init (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global scrypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global uint4 *d_scryptV0_buf, __global uint4 *d_scryptV1_buf, __global uint4 *d_scryptV2_buf, __global uint4 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -885,14 +891,12 @@ __kernel void m08900_init (__global pw_t *pws, __global kernel_rule_t *rules_buf } } -__kernel void m08900_loop (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global scrypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global uint4 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void m08900_loop (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global scrypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global uint4 *d_scryptV0_buf, __global uint4 *d_scryptV1_buf, __global uint4 *d_scryptV2_buf, __global uint4 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { const u32 gid = get_global_id (0); if (gid >= gid_max) return; - const u32 scrypt_phy = salt_bufs[salt_pos].scrypt_phy; - uint4 X[STATE_CNT4]; uint4 T[STATE_CNT4]; @@ -901,7 +905,7 @@ __kernel void m08900_loop (__global pw_t *pws, __global kernel_rule_t *rules_buf #endif for (int z = 0; z < STATE_CNT4; z++) X[z] = swap32_4 (tmps[gid].P[z]); - scrypt_smix (X, T, scrypt_phy, d_scryptV_buf); + scrypt_smix (X, T, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf); #ifdef _unroll #pragma unroll @@ -913,14 +917,14 @@ __kernel void m08900_loop (__global pw_t *pws, __global kernel_rule_t *rules_buf { for (int z = 0; z < STATE_CNT4; z++) X[z] = swap32_4 (tmps[gid].P[i + z]); - scrypt_smix (X, T, scrypt_phy, d_scryptV_buf); + scrypt_smix (X, T, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf); for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[i + z] = swap32_4 (X[z]); } #endif } -__kernel void m08900_comp (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global scrypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global uint4 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void m08900_comp (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global scrypt_tmp_t *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global uint4 *d_scryptV0_buf, __global uint4 *d_scryptV1_buf, __global uint4 *d_scryptV2_buf, __global uint4 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base