-static void overwrite_at (u32 sw[16], const u32 w0, const u32 salt_len)
-{
- #if defined cl_amd_media_ops
- switch (salt_len)
- {
- case 0: sw[0] = w0;
- break;
- case 1: sw[0] = amd_bytealign (w0, sw[0] << 24, 3);
- sw[1] = amd_bytealign (sw[1] >> 8, w0, 3);
- break;
- case 2: sw[0] = amd_bytealign (w0, sw[0] << 16, 2);
- sw[1] = amd_bytealign (sw[1] >> 16, w0, 2);
- break;
- case 3: sw[0] = amd_bytealign (w0, sw[0] << 8, 1);
- sw[1] = amd_bytealign (sw[1] >> 24, w0, 1);
- break;
- case 4: sw[1] = w0;
- break;
- case 5: sw[1] = amd_bytealign (w0, sw[1] << 24, 3);
- sw[2] = amd_bytealign (sw[2] >> 8, w0, 3);
- break;
- case 6: sw[1] = amd_bytealign (w0, sw[1] << 16, 2);
- sw[2] = amd_bytealign (sw[2] >> 16, w0, 2);
- break;
- case 7: sw[1] = amd_bytealign (w0, sw[1] << 8, 1);
- sw[2] = amd_bytealign (sw[2] >> 24, w0, 1);
- break;
- case 8: sw[2] = w0;
- break;
- case 9: sw[2] = amd_bytealign (w0, sw[2] << 24, 3);
- sw[3] = amd_bytealign (sw[3] >> 8, w0, 3);
- break;
- case 10: sw[2] = amd_bytealign (w0, sw[2] << 16, 2);
- sw[3] = amd_bytealign (sw[3] >> 16, w0, 2);
- break;
- case 11: sw[2] = amd_bytealign (w0, sw[2] << 8, 1);
- sw[3] = amd_bytealign (sw[3] >> 24, w0, 1);
- break;
- case 12: sw[3] = w0;
- break;
- case 13: sw[3] = amd_bytealign (w0, sw[3] << 24, 3);
- sw[4] = amd_bytealign (sw[4] >> 8, w0, 3);
- break;
- case 14: sw[3] = amd_bytealign (w0, sw[3] << 16, 2);
- sw[4] = amd_bytealign (sw[4] >> 16, w0, 2);
- break;
- case 15: sw[3] = amd_bytealign (w0, sw[3] << 8, 1);
- sw[4] = amd_bytealign (sw[4] >> 24, w0, 1);
- break;
- case 16: sw[4] = w0;
- break;
- case 17: sw[4] = amd_bytealign (w0, sw[4] << 24, 3);
- sw[5] = amd_bytealign (sw[5] >> 8, w0, 3);
- break;
- case 18: sw[4] = amd_bytealign (w0, sw[4] << 16, 2);
- sw[5] = amd_bytealign (sw[5] >> 16, w0, 2);
- break;
- case 19: sw[4] = amd_bytealign (w0, sw[4] << 8, 1);
- sw[5] = amd_bytealign (sw[5] >> 24, w0, 1);
- break;
- case 20: sw[5] = w0;
- break;
- case 21: sw[5] = amd_bytealign (w0, sw[5] << 24, 3);
- sw[6] = amd_bytealign (sw[6] >> 8, w0, 3);
- break;
- case 22: sw[5] = amd_bytealign (w0, sw[5] << 16, 2);
- sw[6] = amd_bytealign (sw[6] >> 16, w0, 2);
- break;
- case 23: sw[5] = amd_bytealign (w0, sw[5] << 8, 1);
- sw[6] = amd_bytealign (sw[6] >> 24, w0, 1);
- break;
- case 24: sw[6] = w0;
- break;
- case 25: sw[6] = amd_bytealign (w0, sw[6] << 24, 3);
- sw[7] = amd_bytealign (sw[7] >> 8, w0, 3);
- break;
- case 26: sw[6] = amd_bytealign (w0, sw[6] << 16, 2);
- sw[7] = amd_bytealign (sw[7] >> 16, w0, 2);
- break;
- case 27: sw[6] = amd_bytealign (w0, sw[6] << 8, 1);
- sw[7] = amd_bytealign (sw[7] >> 24, w0, 1);
- break;
- case 28: sw[7] = w0;
- break;
- case 29: sw[7] = amd_bytealign (w0, sw[7] << 24, 3);
- sw[8] = amd_bytealign (sw[8] >> 8, w0, 3);
- break;
- case 30: sw[7] = amd_bytealign (w0, sw[7] << 16, 2);
- sw[8] = amd_bytealign (sw[8] >> 16, w0, 2);
- break;
- case 31: sw[7] = amd_bytealign (w0, sw[7] << 8, 1);
- sw[8] = amd_bytealign (sw[8] >> 24, w0, 1);
- break;
- }
- #else
- switch (salt_len)
- {
- case 0: sw[0] = w0;
- break;
- case 1: sw[0] = (sw[0] & 0x000000ff) | (w0 << 8);
- sw[1] = (sw[1] & 0xffffff00) | (w0 >> 24);
- break;
- case 2: sw[0] = (sw[0] & 0x0000ffff) | (w0 << 16);
- sw[1] = (sw[1] & 0xffff0000) | (w0 >> 16);
- break;
- case 3: sw[0] = (sw[0] & 0x00ffffff) | (w0 << 24);
- sw[1] = (sw[1] & 0xff000000) | (w0 >> 8);
- break;
- case 4: sw[1] = w0;
- break;
- case 5: sw[1] = (sw[1] & 0x000000ff) | (w0 << 8);
- sw[2] = (sw[2] & 0xffffff00) | (w0 >> 24);
- break;
- case 6: sw[1] = (sw[1] & 0x0000ffff) | (w0 << 16);
- sw[2] = (sw[2] & 0xffff0000) | (w0 >> 16);
- break;
- case 7: sw[1] = (sw[1] & 0x00ffffff) | (w0 << 24);
- sw[2] = (sw[2] & 0xff000000) | (w0 >> 8);
- break;
- case 8: sw[2] = w0;
- break;
- case 9: sw[2] = (sw[2] & 0x000000ff) | (w0 << 8);
- sw[3] = (sw[3] & 0xffffff00) | (w0 >> 24);
- break;
- case 10: sw[2] = (sw[2] & 0x0000ffff) | (w0 << 16);
- sw[3] = (sw[3] & 0xffff0000) | (w0 >> 16);
- break;
- case 11: sw[2] = (sw[2] & 0x00ffffff) | (w0 << 24);
- sw[3] = (sw[3] & 0xff000000) | (w0 >> 8);
- break;
- case 12: sw[3] = w0;
- break;
- case 13: sw[3] = (sw[3] & 0x000000ff) | (w0 << 8);
- sw[4] = (sw[4] & 0xffffff00) | (w0 >> 24);
- break;
- case 14: sw[3] = (sw[3] & 0x0000ffff) | (w0 << 16);
- sw[4] = (sw[4] & 0xffff0000) | (w0 >> 16);
- break;
- case 15: sw[3] = (sw[3] & 0x00ffffff) | (w0 << 24);
- sw[4] = (sw[4] & 0xff000000) | (w0 >> 8);
- break;
- case 16: sw[4] = w0;
- break;
- case 17: sw[4] = (sw[4] & 0x000000ff) | (w0 << 8);
- sw[5] = (sw[5] & 0xffffff00) | (w0 >> 24);
- break;
- case 18: sw[4] = (sw[4] & 0x0000ffff) | (w0 << 16);
- sw[5] = (sw[5] & 0xffff0000) | (w0 >> 16);
- break;
- case 19: sw[4] = (sw[4] & 0x00ffffff) | (w0 << 24);
- sw[5] = (sw[5] & 0xff000000) | (w0 >> 8);
- break;
- case 20: sw[5] = w0;
- break;
- case 21: sw[5] = (sw[5] & 0x000000ff) | (w0 << 8);
- sw[6] = (sw[6] & 0xffffff00) | (w0 >> 24);
- break;
- case 22: sw[5] = (sw[5] & 0x0000ffff) | (w0 << 16);
- sw[6] = (sw[6] & 0xffff0000) | (w0 >> 16);
- break;
- case 23: sw[5] = (sw[5] & 0x00ffffff) | (w0 << 24);
- sw[6] = (sw[6] & 0xff000000) | (w0 >> 8);
- break;
- case 24: sw[6] = w0;
- break;
- case 25: sw[6] = (sw[6] & 0x000000ff) | (w0 << 8);
- sw[7] = (sw[7] & 0xffffff00) | (w0 >> 24);
- break;
- case 26: sw[6] = (sw[6] & 0x0000ffff) | (w0 << 16);
- sw[7] = (sw[7] & 0xffff0000) | (w0 >> 16);
- break;
- case 27: sw[6] = (sw[6] & 0x00ffffff) | (w0 << 24);
- sw[7] = (sw[7] & 0xff000000) | (w0 >> 8);
- break;
- case 28: sw[7] = w0;
- break;
- case 29: sw[7] = (sw[7] & 0x000000ff) | (w0 << 8);
- sw[8] = (sw[8] & 0xffffff00) | (w0 >> 24);
- break;
- case 30: sw[7] = (sw[7] & 0x0000ffff) | (w0 << 16);
- sw[8] = (sw[8] & 0xffff0000) | (w0 >> 16);
- break;
- case 31: sw[7] = (sw[7] & 0x00ffffff) | (w0 << 24);
- sw[8] = (sw[8] & 0xff000000) | (w0 >> 8);
- break;
- }
- #endif
-}
-
-static void m03100m (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)